[
  {
    "path": ".clang-format",
    "content": "# http://clang.llvm.org/docs/ClangFormatStyleOptions.html\n# Defines the Google C++ style for automatic reformatting.\nBasedOnStyle: Google\nMaxEmptyLinesToKeep: 2\nDerivePointerAlignment: false\nPointerAlignment: Right\nAllowShortFunctionsOnASingleLine: Empty\nIncludeBlocks: Merge\nIncludeCategories:\n  - Regex:           '^<linux/[0-9A-Za-z]+'\n    Priority:        100\n  - Regex:           '^<mach/[0-9A-Za-z]+'\n    Priority:        101\n  - Regex:           '^<mach-o/[0-9A-Za-z]+'\n    Priority:        102\n  - Regex:           '^<sys/[0-9A-Za-z]+'\n    Priority:        103\n  - Regex:           '^<[0-9A-Za-z]+\\.h>$'\n    Priority:        200\n  - Regex:           '^<[0-9A-Za-z_]+>$'\n    Priority:        201\n  - Regex:           '^<[0-9A-Za-z_]+\\.[0-9A-Za-z]+>$'\n    Priority:        202\n  - Regex:           '^<[0-9A-Za-z_]+/[0-9A-Za-z]+'\n    Priority:        203\n  - Regex:           '^\\\"[0-9A-Za-z_]+/[0-9A-Za-z]+'\n    Priority:        300\n  - Regex:           '^\\\"[0-9A-Za-z_]+\\.[0-9A-Za-z]+\\\"$'\n    Priority:        301\n  - Regex:           '.*'\n    Priority:        1000\n"
  },
  {
    "path": ".git/HEAD",
    "content": "ref: refs/heads/main\n"
  },
  {
    "path": ".git/config",
    "content": "[core]\n\trepositoryformatversion = 1\n\tfilemode = true\n\tbare = false\n\tlogallrefupdates = true\n[remote \"origin\"]\n\turl = https://github.com/alibaba/zvec\n\ttagOpt = --no-tags\n\tfetch = +refs/heads/main:refs/remotes/origin/main\n\tpromisor = true\n\tpartialclonefilter = blob:limit=1048576\n[branch \"main\"]\n\tremote = origin\n\tmerge = refs/heads/main\n"
  },
  {
    "path": ".git/description",
    "content": "Unnamed repository; edit this file 'description' to name the repository.\n"
  },
  {
    "path": ".git/hooks/applypatch-msg.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to check the commit log message taken by\n# applypatch from an e-mail message.\n#\n# The hook should exit with non-zero status after issuing an\n# appropriate message if it wants to stop the commit.  The hook is\n# allowed to edit the commit message file.\n#\n# To enable this hook, rename this file to \"applypatch-msg\".\n\n. git-sh-setup\ncommitmsg=\"$(git rev-parse --git-path hooks/commit-msg)\"\ntest -x \"$commitmsg\" && exec \"$commitmsg\" ${1+\"$@\"}\n:\n"
  },
  {
    "path": ".git/hooks/commit-msg.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to check the commit log message.\n# Called by \"git commit\" with one argument, the name of the file\n# that has the commit message.  The hook should exit with non-zero\n# status after issuing an appropriate message if it wants to stop the\n# commit.  The hook is allowed to edit the commit message file.\n#\n# To enable this hook, rename this file to \"commit-msg\".\n\n# Uncomment the below to add a Signed-off-by line to the message.\n# Doing this in a hook is a bad idea in general, but the prepare-commit-msg\n# hook is more suited to it.\n#\n# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\\(.*>\\).*$/Signed-off-by: \\1/p')\n# grep -qs \"^$SOB\" \"$1\" || echo \"$SOB\" >> \"$1\"\n\n# This example catches duplicate Signed-off-by lines.\n\ntest \"\" = \"$(grep '^Signed-off-by: ' \"$1\" |\n\t sort | uniq -c | sed -e '/^[ \t]*1[ \t]/d')\" || {\n\techo >&2 Duplicate Signed-off-by lines.\n\texit 1\n}\n"
  },
  {
    "path": ".git/hooks/fsmonitor-watchman.sample",
    "content": "#!/usr/bin/perl\n\nuse strict;\nuse warnings;\nuse IPC::Open2;\n\n# An example hook script to integrate Watchman\n# (https://facebook.github.io/watchman/) with git to speed up detecting\n# new and modified files.\n#\n# The hook is passed a version (currently 2) and last update token\n# formatted as a string and outputs to stdout a new update token and\n# all files that have been modified since the update token. Paths must\n# be relative to the root of the working tree and separated by a single NUL.\n#\n# To enable this hook, rename this file to \"query-watchman\" and set\n# 'git config core.fsmonitor .git/hooks/query-watchman'\n#\nmy ($version, $last_update_token) = @ARGV;\n\n# Uncomment for debugging\n# print STDERR \"$0 $version $last_update_token\\n\";\n\n# Check the hook interface version\nif ($version ne 2) {\n\tdie \"Unsupported query-fsmonitor hook version '$version'.\\n\" .\n\t    \"Falling back to scanning...\\n\";\n}\n\nmy $git_work_tree = get_working_dir();\n\nmy $retry = 1;\n\nmy $json_pkg;\neval {\n\trequire JSON::XS;\n\t$json_pkg = \"JSON::XS\";\n\t1;\n} or do {\n\trequire JSON::PP;\n\t$json_pkg = \"JSON::PP\";\n};\n\nlaunch_watchman();\n\nsub launch_watchman {\n\tmy $o = watchman_query();\n\tif (is_work_tree_watched($o)) {\n\t\toutput_result($o->{clock}, @{$o->{files}});\n\t}\n}\n\nsub output_result {\n\tmy ($clockid, @files) = @_;\n\n\t# Uncomment for debugging watchman output\n\t# open (my $fh, \">\", \".git/watchman-output.out\");\n\t# binmode $fh, \":utf8\";\n\t# print $fh \"$clockid\\n@files\\n\";\n\t# close $fh;\n\n\tbinmode STDOUT, \":utf8\";\n\tprint $clockid;\n\tprint \"\\0\";\n\tlocal $, = \"\\0\";\n\tprint @files;\n}\n\nsub watchman_clock {\n\tmy $response = qx/watchman clock \"$git_work_tree\"/;\n\tdie \"Failed to get clock id on '$git_work_tree'.\\n\" .\n\t\t\"Falling back to scanning...\\n\" if $? != 0;\n\n\treturn $json_pkg->new->utf8->decode($response);\n}\n\nsub watchman_query {\n\tmy $pid = open2(\\*CHLD_OUT, \\*CHLD_IN, 'watchman -j --no-pretty')\n\tor die \"open2() failed: $!\\n\" .\n\t\"Falling back to scanning...\\n\";\n\n\t# In the query expression below we're asking for names of files that\n\t# changed since $last_update_token but not from the .git folder.\n\t#\n\t# To accomplish this, we're using the \"since\" generator to use the\n\t# recency index to select candidate nodes and \"fields\" to limit the\n\t# output to file names only. Then we're using the \"expression\" term to\n\t# further constrain the results.\n\tmy $last_update_line = \"\";\n\tif (substr($last_update_token, 0, 1) eq \"c\") {\n\t\t$last_update_token = \"\\\"$last_update_token\\\"\";\n\t\t$last_update_line = qq[\\n\"since\": $last_update_token,];\n\t}\n\tmy $query = <<\"\tEND\";\n\t\t[\"query\", \"$git_work_tree\", {$last_update_line\n\t\t\t\"fields\": [\"name\"],\n\t\t\t\"expression\": [\"not\", [\"dirname\", \".git\"]]\n\t\t}]\n\tEND\n\n\t# Uncomment for debugging the watchman query\n\t# open (my $fh, \">\", \".git/watchman-query.json\");\n\t# print $fh $query;\n\t# close $fh;\n\n\tprint CHLD_IN $query;\n\tclose CHLD_IN;\n\tmy $response = do {local $/; <CHLD_OUT>};\n\n\t# Uncomment for debugging the watch response\n\t# open ($fh, \">\", \".git/watchman-response.json\");\n\t# print $fh $response;\n\t# close $fh;\n\n\tdie \"Watchman: command returned no output.\\n\" .\n\t\"Falling back to scanning...\\n\" if $response eq \"\";\n\tdie \"Watchman: command returned invalid output: $response\\n\" .\n\t\"Falling back to scanning...\\n\" unless $response =~ /^\\{/;\n\n\treturn $json_pkg->new->utf8->decode($response);\n}\n\nsub is_work_tree_watched {\n\tmy ($output) = @_;\n\tmy $error = $output->{error};\n\tif ($retry > 0 and $error and $error =~ m/unable to resolve root .* directory (.*) is not watched/) {\n\t\t$retry--;\n\t\tmy $response = qx/watchman watch \"$git_work_tree\"/;\n\t\tdie \"Failed to make watchman watch '$git_work_tree'.\\n\" .\n\t\t    \"Falling back to scanning...\\n\" if $? != 0;\n\t\t$output = $json_pkg->new->utf8->decode($response);\n\t\t$error = $output->{error};\n\t\tdie \"Watchman: $error.\\n\" .\n\t\t\"Falling back to scanning...\\n\" if $error;\n\n\t\t# Uncomment for debugging watchman output\n\t\t# open (my $fh, \">\", \".git/watchman-output.out\");\n\t\t# close $fh;\n\n\t\t# Watchman will always return all files on the first query so\n\t\t# return the fast \"everything is dirty\" flag to git and do the\n\t\t# Watchman query just to get it over with now so we won't pay\n\t\t# the cost in git to look up each individual file.\n\t\tmy $o = watchman_clock();\n\t\t$error = $output->{error};\n\n\t\tdie \"Watchman: $error.\\n\" .\n\t\t\"Falling back to scanning...\\n\" if $error;\n\n\t\toutput_result($o->{clock}, (\"/\"));\n\t\t$last_update_token = $o->{clock};\n\n\t\teval { launch_watchman() };\n\t\treturn 0;\n\t}\n\n\tdie \"Watchman: $error.\\n\" .\n\t\"Falling back to scanning...\\n\" if $error;\n\n\treturn 1;\n}\n\nsub get_working_dir {\n\tmy $working_dir;\n\tif ($^O =~ 'msys' || $^O =~ 'cygwin') {\n\t\t$working_dir = Win32::GetCwd();\n\t\t$working_dir =~ tr/\\\\/\\//;\n\t} else {\n\t\trequire Cwd;\n\t\t$working_dir = Cwd::cwd();\n\t}\n\n\treturn $working_dir;\n}\n"
  },
  {
    "path": ".git/hooks/post-update.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to prepare a packed repository for use over\n# dumb transports.\n#\n# To enable this hook, rename this file to \"post-update\".\n\nexec git update-server-info\n"
  },
  {
    "path": ".git/hooks/pre-applypatch.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to verify what is about to be committed\n# by applypatch from an e-mail message.\n#\n# The hook should exit with non-zero status after issuing an\n# appropriate message if it wants to stop the commit.\n#\n# To enable this hook, rename this file to \"pre-applypatch\".\n\n. git-sh-setup\nprecommit=\"$(git rev-parse --git-path hooks/pre-commit)\"\ntest -x \"$precommit\" && exec \"$precommit\" ${1+\"$@\"}\n:\n"
  },
  {
    "path": ".git/hooks/pre-commit.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to verify what is about to be committed.\n# Called by \"git commit\" with no arguments.  The hook should\n# exit with non-zero status after issuing an appropriate message if\n# it wants to stop the commit.\n#\n# To enable this hook, rename this file to \"pre-commit\".\n\nif git rev-parse --verify HEAD >/dev/null 2>&1\nthen\n\tagainst=HEAD\nelse\n\t# Initial commit: diff against an empty tree object\n\tagainst=$(git hash-object -t tree /dev/null)\nfi\n\n# If you want to allow non-ASCII filenames set this variable to true.\nallownonascii=$(git config --type=bool hooks.allownonascii)\n\n# Redirect output to stderr.\nexec 1>&2\n\n# Cross platform projects tend to avoid non-ASCII filenames; prevent\n# them from being added to the repository. We exploit the fact that the\n# printable range starts at the space character and ends with tilde.\nif [ \"$allownonascii\" != \"true\" ] &&\n\t# Note that the use of brackets around a tr range is ok here, (it's\n\t# even required, for portability to Solaris 10's /usr/bin/tr), since\n\t# the square bracket bytes happen to fall in the designated range.\n\ttest $(git diff-index --cached --name-only --diff-filter=A -z $against |\n\t  LC_ALL=C tr -d '[ -~]\\0' | wc -c) != 0\nthen\n\tcat <<\\EOF\nError: Attempt to add a non-ASCII file name.\n\nThis can cause problems if you want to work with people on other platforms.\n\nTo be portable it is advisable to rename the file.\n\nIf you know what you are doing you can disable this check using:\n\n  git config hooks.allownonascii true\nEOF\n\texit 1\nfi\n\n# If there are whitespace errors, print the offending file names and fail.\nexec git diff-index --check --cached $against --\n"
  },
  {
    "path": ".git/hooks/pre-merge-commit.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to verify what is about to be committed.\n# Called by \"git merge\" with no arguments.  The hook should\n# exit with non-zero status after issuing an appropriate message to\n# stderr if it wants to stop the merge commit.\n#\n# To enable this hook, rename this file to \"pre-merge-commit\".\n\n. git-sh-setup\ntest -x \"$GIT_DIR/hooks/pre-commit\" &&\n        exec \"$GIT_DIR/hooks/pre-commit\"\n:\n"
  },
  {
    "path": ".git/hooks/pre-push.sample",
    "content": "#!/bin/sh\n\n# An example hook script to verify what is about to be pushed.  Called by \"git\n# push\" after it has checked the remote status, but before anything has been\n# pushed.  If this script exits with a non-zero status nothing will be pushed.\n#\n# This hook is called with the following parameters:\n#\n# $1 -- Name of the remote to which the push is being done\n# $2 -- URL to which the push is being done\n#\n# If pushing without using a named remote those arguments will be equal.\n#\n# Information about the commits which are being pushed is supplied as lines to\n# the standard input in the form:\n#\n#   <local ref> <local oid> <remote ref> <remote oid>\n#\n# This sample shows how to prevent push of commits where the log message starts\n# with \"WIP\" (work in progress).\n\nremote=\"$1\"\nurl=\"$2\"\n\nzero=$(git hash-object --stdin </dev/null | tr '[0-9a-f]' '0')\n\nwhile read local_ref local_oid remote_ref remote_oid\ndo\n\tif test \"$local_oid\" = \"$zero\"\n\tthen\n\t\t# Handle delete\n\t\t:\n\telse\n\t\tif test \"$remote_oid\" = \"$zero\"\n\t\tthen\n\t\t\t# New branch, examine all commits\n\t\t\trange=\"$local_oid\"\n\t\telse\n\t\t\t# Update to existing branch, examine new commits\n\t\t\trange=\"$remote_oid..$local_oid\"\n\t\tfi\n\n\t\t# Check for WIP commit\n\t\tcommit=$(git rev-list -n 1 --grep '^WIP' \"$range\")\n\t\tif test -n \"$commit\"\n\t\tthen\n\t\t\techo >&2 \"Found WIP commit in $local_ref, not pushing\"\n\t\t\texit 1\n\t\tfi\n\tfi\ndone\n\nexit 0\n"
  },
  {
    "path": ".git/hooks/pre-rebase.sample",
    "content": "#!/bin/sh\n#\n# Copyright (c) 2006, 2008 Junio C Hamano\n#\n# The \"pre-rebase\" hook is run just before \"git rebase\" starts doing\n# its job, and can prevent the command from running by exiting with\n# non-zero status.\n#\n# The hook is called with the following parameters:\n#\n# $1 -- the upstream the series was forked from.\n# $2 -- the branch being rebased (or empty when rebasing the current branch).\n#\n# This sample shows how to prevent topic branches that are already\n# merged to 'next' branch from getting rebased, because allowing it\n# would result in rebasing already published history.\n\npublish=next\nbasebranch=\"$1\"\nif test \"$#\" = 2\nthen\n\ttopic=\"refs/heads/$2\"\nelse\n\ttopic=`git symbolic-ref HEAD` ||\n\texit 0 ;# we do not interrupt rebasing detached HEAD\nfi\n\ncase \"$topic\" in\nrefs/heads/??/*)\n\t;;\n*)\n\texit 0 ;# we do not interrupt others.\n\t;;\nesac\n\n# Now we are dealing with a topic branch being rebased\n# on top of master.  Is it OK to rebase it?\n\n# Does the topic really exist?\ngit show-ref -q \"$topic\" || {\n\techo >&2 \"No such branch $topic\"\n\texit 1\n}\n\n# Is topic fully merged to master?\nnot_in_master=`git rev-list --pretty=oneline ^master \"$topic\"`\nif test -z \"$not_in_master\"\nthen\n\techo >&2 \"$topic is fully merged to master; better remove it.\"\n\texit 1 ;# we could allow it, but there is no point.\nfi\n\n# Is topic ever merged to next?  If so you should not be rebasing it.\nonly_next_1=`git rev-list ^master \"^$topic\" ${publish} | sort`\nonly_next_2=`git rev-list ^master           ${publish} | sort`\nif test \"$only_next_1\" = \"$only_next_2\"\nthen\n\tnot_in_topic=`git rev-list \"^$topic\" master`\n\tif test -z \"$not_in_topic\"\n\tthen\n\t\techo >&2 \"$topic is already up to date with master\"\n\t\texit 1 ;# we could allow it, but there is no point.\n\telse\n\t\texit 0\n\tfi\nelse\n\tnot_in_next=`git rev-list --pretty=oneline ^${publish} \"$topic\"`\n\t/usr/bin/perl -e '\n\t\tmy $topic = $ARGV[0];\n\t\tmy $msg = \"* $topic has commits already merged to public branch:\\n\";\n\t\tmy (%not_in_next) = map {\n\t\t\t/^([0-9a-f]+) /;\n\t\t\t($1 => 1);\n\t\t} split(/\\n/, $ARGV[1]);\n\t\tfor my $elem (map {\n\t\t\t\t/^([0-9a-f]+) (.*)$/;\n\t\t\t\t[$1 => $2];\n\t\t\t} split(/\\n/, $ARGV[2])) {\n\t\t\tif (!exists $not_in_next{$elem->[0]}) {\n\t\t\t\tif ($msg) {\n\t\t\t\t\tprint STDERR $msg;\n\t\t\t\t\tundef $msg;\n\t\t\t\t}\n\t\t\t\tprint STDERR \" $elem->[1]\\n\";\n\t\t\t}\n\t\t}\n\t' \"$topic\" \"$not_in_next\" \"$not_in_master\"\n\texit 1\nfi\n\n<<\\DOC_END\n\nThis sample hook safeguards topic branches that have been\npublished from being rewound.\n\nThe workflow assumed here is:\n\n * Once a topic branch forks from \"master\", \"master\" is never\n   merged into it again (either directly or indirectly).\n\n * Once a topic branch is fully cooked and merged into \"master\",\n   it is deleted.  If you need to build on top of it to correct\n   earlier mistakes, a new topic branch is created by forking at\n   the tip of the \"master\".  This is not strictly necessary, but\n   it makes it easier to keep your history simple.\n\n * Whenever you need to test or publish your changes to topic\n   branches, merge them into \"next\" branch.\n\nThe script, being an example, hardcodes the publish branch name\nto be \"next\", but it is trivial to make it configurable via\n$GIT_DIR/config mechanism.\n\nWith this workflow, you would want to know:\n\n(1) ... if a topic branch has ever been merged to \"next\".  Young\n    topic branches can have stupid mistakes you would rather\n    clean up before publishing, and things that have not been\n    merged into other branches can be easily rebased without\n    affecting other people.  But once it is published, you would\n    not want to rewind it.\n\n(2) ... if a topic branch has been fully merged to \"master\".\n    Then you can delete it.  More importantly, you should not\n    build on top of it -- other people may already want to\n    change things related to the topic as patches against your\n    \"master\", so if you need further changes, it is better to\n    fork the topic (perhaps with the same name) afresh from the\n    tip of \"master\".\n\nLet's look at this example:\n\n\t\t   o---o---o---o---o---o---o---o---o---o \"next\"\n\t\t  /       /           /           /\n\t\t /   a---a---b A     /           /\n\t\t/   /               /           /\n\t       /   /   c---c---c---c B         /\n\t      /   /   /             \\         /\n\t     /   /   /   b---b C     \\       /\n\t    /   /   /   /             \\     /\n    ---o---o---o---o---o---o---o---o---o---o---o \"master\"\n\n\nA, B and C are topic branches.\n\n * A has one fix since it was merged up to \"next\".\n\n * B has finished.  It has been fully merged up to \"master\" and \"next\",\n   and is ready to be deleted.\n\n * C has not merged to \"next\" at all.\n\nWe would want to allow C to be rebased, refuse A, and encourage\nB to be deleted.\n\nTo compute (1):\n\n\tgit rev-list ^master ^topic next\n\tgit rev-list ^master        next\n\n\tif these match, topic has not merged in next at all.\n\nTo compute (2):\n\n\tgit rev-list master..topic\n\n\tif this is empty, it is fully merged to \"master\".\n\nDOC_END\n"
  },
  {
    "path": ".git/hooks/pre-receive.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to make use of push options.\n# The example simply echoes all push options that start with 'echoback='\n# and rejects all pushes when the \"reject\" push option is used.\n#\n# To enable this hook, rename this file to \"pre-receive\".\n\nif test -n \"$GIT_PUSH_OPTION_COUNT\"\nthen\n\ti=0\n\twhile test \"$i\" -lt \"$GIT_PUSH_OPTION_COUNT\"\n\tdo\n\t\teval \"value=\\$GIT_PUSH_OPTION_$i\"\n\t\tcase \"$value\" in\n\t\techoback=*)\n\t\t\techo \"echo from the pre-receive-hook: ${value#*=}\" >&2\n\t\t\t;;\n\t\treject)\n\t\t\texit 1\n\t\tesac\n\t\ti=$((i + 1))\n\tdone\nfi\n"
  },
  {
    "path": ".git/hooks/prepare-commit-msg.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to prepare the commit log message.\n# Called by \"git commit\" with the name of the file that has the\n# commit message, followed by the description of the commit\n# message's source.  The hook's purpose is to edit the commit\n# message file.  If the hook fails with a non-zero status,\n# the commit is aborted.\n#\n# To enable this hook, rename this file to \"prepare-commit-msg\".\n\n# This hook includes three examples. The first one removes the\n# \"# Please enter the commit message...\" help message.\n#\n# The second includes the output of \"git diff --name-status -r\"\n# into the message, just before the \"git status\" output.  It is\n# commented because it doesn't cope with --amend or with squashed\n# commits.\n#\n# The third example adds a Signed-off-by line to the message, that can\n# still be edited.  This is rarely a good idea.\n\nCOMMIT_MSG_FILE=$1\nCOMMIT_SOURCE=$2\nSHA1=$3\n\n/usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' \"$COMMIT_MSG_FILE\"\n\n# case \"$COMMIT_SOURCE,$SHA1\" in\n#  ,|template,)\n#    /usr/bin/perl -i.bak -pe '\n#       print \"\\n\" . `git diff --cached --name-status -r`\n# \t if /^#/ && $first++ == 0' \"$COMMIT_MSG_FILE\" ;;\n#  *) ;;\n# esac\n\n# SOB=$(git var GIT_COMMITTER_IDENT | sed -n 's/^\\(.*>\\).*$/Signed-off-by: \\1/p')\n# git interpret-trailers --in-place --trailer \"$SOB\" \"$COMMIT_MSG_FILE\"\n# if test -z \"$COMMIT_SOURCE\"\n# then\n#   /usr/bin/perl -i.bak -pe 'print \"\\n\" if !$first_line++' \"$COMMIT_MSG_FILE\"\n# fi\n"
  },
  {
    "path": ".git/hooks/push-to-checkout.sample",
    "content": "#!/bin/sh\n\n# An example hook script to update a checked-out tree on a git push.\n#\n# This hook is invoked by git-receive-pack(1) when it reacts to git\n# push and updates reference(s) in its repository, and when the push\n# tries to update the branch that is currently checked out and the\n# receive.denyCurrentBranch configuration variable is set to\n# updateInstead.\n#\n# By default, such a push is refused if the working tree and the index\n# of the remote repository has any difference from the currently\n# checked out commit; when both the working tree and the index match\n# the current commit, they are updated to match the newly pushed tip\n# of the branch. This hook is to be used to override the default\n# behaviour; however the code below reimplements the default behaviour\n# as a starting point for convenient modification.\n#\n# The hook receives the commit with which the tip of the current\n# branch is going to be updated:\ncommit=$1\n\n# It can exit with a non-zero status to refuse the push (when it does\n# so, it must not modify the index or the working tree).\ndie () {\n\techo >&2 \"$*\"\n\texit 1\n}\n\n# Or it can make any necessary changes to the working tree and to the\n# index to bring them to the desired state when the tip of the current\n# branch is updated to the new commit, and exit with a zero status.\n#\n# For example, the hook can simply run git read-tree -u -m HEAD \"$1\"\n# in order to emulate git fetch that is run in the reverse direction\n# with git push, as the two-tree form of git read-tree -u -m is\n# essentially the same as git switch or git checkout that switches\n# branches while keeping the local changes in the working tree that do\n# not interfere with the difference between the branches.\n\n# The below is a more-or-less exact translation to shell of the C code\n# for the default behaviour for git's push-to-checkout hook defined in\n# the push_to_deploy() function in builtin/receive-pack.c.\n#\n# Note that the hook will be executed from the repository directory,\n# not from the working tree, so if you want to perform operations on\n# the working tree, you will have to adapt your code accordingly, e.g.\n# by adding \"cd ..\" or using relative paths.\n\nif ! git update-index -q --ignore-submodules --refresh\nthen\n\tdie \"Up-to-date check failed\"\nfi\n\nif ! git diff-files --quiet --ignore-submodules --\nthen\n\tdie \"Working directory has unstaged changes\"\nfi\n\n# This is a rough translation of:\n#\n#   head_has_history() ? \"HEAD\" : EMPTY_TREE_SHA1_HEX\nif git cat-file -e HEAD 2>/dev/null\nthen\n\thead=HEAD\nelse\n\thead=$(git hash-object -t tree --stdin </dev/null)\nfi\n\nif ! git diff-index --quiet --cached --ignore-submodules $head --\nthen\n\tdie \"Working directory has staged changes\"\nfi\n\nif ! git read-tree -u -m \"$commit\"\nthen\n\tdie \"Could not update working tree to new HEAD\"\nfi\n"
  },
  {
    "path": ".git/hooks/sendemail-validate.sample",
    "content": "#!/bin/sh\n\n# An example hook script to validate a patch (and/or patch series) before\n# sending it via email.\n#\n# The hook should exit with non-zero status after issuing an appropriate\n# message if it wants to prevent the email(s) from being sent.\n#\n# To enable this hook, rename this file to \"sendemail-validate\".\n#\n# By default, it will only check that the patch(es) can be applied on top of\n# the default upstream branch without conflicts in a secondary worktree. After\n# validation (successful or not) of the last patch of a series, the worktree\n# will be deleted.\n#\n# The following config variables can be set to change the default remote and\n# remote ref that are used to apply the patches against:\n#\n#   sendemail.validateRemote (default: origin)\n#   sendemail.validateRemoteRef (default: HEAD)\n#\n# Replace the TODO placeholders with appropriate checks according to your\n# needs.\n\nvalidate_cover_letter () {\n\tfile=\"$1\"\n\t# TODO: Replace with appropriate checks (e.g. spell checking).\n\ttrue\n}\n\nvalidate_patch () {\n\tfile=\"$1\"\n\t# Ensure that the patch applies without conflicts.\n\tgit am -3 \"$file\" || return\n\t# TODO: Replace with appropriate checks for this patch\n\t# (e.g. checkpatch.pl).\n\ttrue\n}\n\nvalidate_series () {\n\t# TODO: Replace with appropriate checks for the whole series\n\t# (e.g. quick build, coding style checks, etc.).\n\ttrue\n}\n\n# main -------------------------------------------------------------------------\n\nif test \"$GIT_SENDEMAIL_FILE_COUNTER\" = 1\nthen\n\tremote=$(git config --default origin --get sendemail.validateRemote) &&\n\tref=$(git config --default HEAD --get sendemail.validateRemoteRef) &&\n\tworktree=$(mktemp --tmpdir -d sendemail-validate.XXXXXXX) &&\n\tgit worktree add -fd --checkout \"$worktree\" \"refs/remotes/$remote/$ref\" &&\n\tgit config --replace-all sendemail.validateWorktree \"$worktree\"\nelse\n\tworktree=$(git config --get sendemail.validateWorktree)\nfi || {\n\techo \"sendemail-validate: error: failed to prepare worktree\" >&2\n\texit 1\n}\n\nunset GIT_DIR GIT_WORK_TREE\ncd \"$worktree\" &&\n\nif grep -q \"^diff --git \" \"$1\"\nthen\n\tvalidate_patch \"$1\"\nelse\n\tvalidate_cover_letter \"$1\"\nfi &&\n\nif test \"$GIT_SENDEMAIL_FILE_COUNTER\" = \"$GIT_SENDEMAIL_FILE_TOTAL\"\nthen\n\tgit config --unset-all sendemail.validateWorktree &&\n\ttrap 'git worktree remove -ff \"$worktree\"' EXIT &&\n\tvalidate_series\nfi\n"
  },
  {
    "path": ".git/hooks/update.sample",
    "content": "#!/bin/sh\n#\n# An example hook script to block unannotated tags from entering.\n# Called by \"git receive-pack\" with arguments: refname sha1-old sha1-new\n#\n# To enable this hook, rename this file to \"update\".\n#\n# Config\n# ------\n# hooks.allowunannotated\n#   This boolean sets whether unannotated tags will be allowed into the\n#   repository.  By default they won't be.\n# hooks.allowdeletetag\n#   This boolean sets whether deleting tags will be allowed in the\n#   repository.  By default they won't be.\n# hooks.allowmodifytag\n#   This boolean sets whether a tag may be modified after creation. By default\n#   it won't be.\n# hooks.allowdeletebranch\n#   This boolean sets whether deleting branches will be allowed in the\n#   repository.  By default they won't be.\n# hooks.denycreatebranch\n#   This boolean sets whether remotely creating branches will be denied\n#   in the repository.  By default this is allowed.\n#\n\n# --- Command line\nrefname=\"$1\"\noldrev=\"$2\"\nnewrev=\"$3\"\n\n# --- Safety check\nif [ -z \"$GIT_DIR\" ]; then\n\techo \"Don't run this script from the command line.\" >&2\n\techo \" (if you want, you could supply GIT_DIR then run\" >&2\n\techo \"  $0 <ref> <oldrev> <newrev>)\" >&2\n\texit 1\nfi\n\nif [ -z \"$refname\" -o -z \"$oldrev\" -o -z \"$newrev\" ]; then\n\techo \"usage: $0 <ref> <oldrev> <newrev>\" >&2\n\texit 1\nfi\n\n# --- Config\nallowunannotated=$(git config --type=bool hooks.allowunannotated)\nallowdeletebranch=$(git config --type=bool hooks.allowdeletebranch)\ndenycreatebranch=$(git config --type=bool hooks.denycreatebranch)\nallowdeletetag=$(git config --type=bool hooks.allowdeletetag)\nallowmodifytag=$(git config --type=bool hooks.allowmodifytag)\n\n# check for no description\nprojectdesc=$(sed -e '1q' \"$GIT_DIR/description\")\ncase \"$projectdesc\" in\n\"Unnamed repository\"* | \"\")\n\techo \"*** Project description file hasn't been set\" >&2\n\texit 1\n\t;;\nesac\n\n# --- Check types\n# if $newrev is 0000...0000, it's a commit to delete a ref.\nzero=$(git hash-object --stdin </dev/null | tr '[0-9a-f]' '0')\nif [ \"$newrev\" = \"$zero\" ]; then\n\tnewrev_type=delete\nelse\n\tnewrev_type=$(git cat-file -t $newrev)\nfi\n\ncase \"$refname\",\"$newrev_type\" in\n\trefs/tags/*,commit)\n\t\t# un-annotated tag\n\t\tshort_refname=${refname##refs/tags/}\n\t\tif [ \"$allowunannotated\" != \"true\" ]; then\n\t\t\techo \"*** The un-annotated tag, $short_refname, is not allowed in this repository\" >&2\n\t\t\techo \"*** Use 'git tag [ -a | -s ]' for tags you want to propagate.\" >&2\n\t\t\texit 1\n\t\tfi\n\t\t;;\n\trefs/tags/*,delete)\n\t\t# delete tag\n\t\tif [ \"$allowdeletetag\" != \"true\" ]; then\n\t\t\techo \"*** Deleting a tag is not allowed in this repository\" >&2\n\t\t\texit 1\n\t\tfi\n\t\t;;\n\trefs/tags/*,tag)\n\t\t# annotated tag\n\t\tif [ \"$allowmodifytag\" != \"true\" ] && git rev-parse $refname > /dev/null 2>&1\n\t\tthen\n\t\t\techo \"*** Tag '$refname' already exists.\" >&2\n\t\t\techo \"*** Modifying a tag is not allowed in this repository.\" >&2\n\t\t\texit 1\n\t\tfi\n\t\t;;\n\trefs/heads/*,commit)\n\t\t# branch\n\t\tif [ \"$oldrev\" = \"$zero\" -a \"$denycreatebranch\" = \"true\" ]; then\n\t\t\techo \"*** Creating a branch is not allowed in this repository\" >&2\n\t\t\texit 1\n\t\tfi\n\t\t;;\n\trefs/heads/*,delete)\n\t\t# delete branch\n\t\tif [ \"$allowdeletebranch\" != \"true\" ]; then\n\t\t\techo \"*** Deleting a branch is not allowed in this repository\" >&2\n\t\t\texit 1\n\t\tfi\n\t\t;;\n\trefs/remotes/*,commit)\n\t\t# tracking branch\n\t\t;;\n\trefs/remotes/*,delete)\n\t\t# delete tracking branch\n\t\tif [ \"$allowdeletebranch\" != \"true\" ]; then\n\t\t\techo \"*** Deleting a tracking branch is not allowed in this repository\" >&2\n\t\t\texit 1\n\t\tfi\n\t\t;;\n\t*)\n\t\t# Anything else (is there anything else?)\n\t\techo \"*** Update hook: unknown type of update to ref $refname of type $newrev_type\" >&2\n\t\texit 1\n\t\t;;\nesac\n\n# --- Finished\nexit 0\n"
  },
  {
    "path": ".git/info/exclude",
    "content": "# git ls-files --others --exclude-from=.git/info/exclude\n# Lines that start with '#' are comments.\n# For a project mostly in C, the following would be a good set of\n# exclude patterns (uncomment them if you want to use them):\n# *.[oa]\n# *~\n"
  },
  {
    "path": ".git/logs/HEAD",
    "content": "0000000000000000000000000000000000000000 b49833bf56a0e102b8ac1ff95ed7766545f5bd1e appuser <appuser@6f4aff7aca96.(none)> 1774064477 +0000\tclone: from https://github.com/alibaba/zvec\n"
  },
  {
    "path": ".git/logs/refs/heads/main",
    "content": "0000000000000000000000000000000000000000 b49833bf56a0e102b8ac1ff95ed7766545f5bd1e appuser <appuser@6f4aff7aca96.(none)> 1774064477 +0000\tclone: from https://github.com/alibaba/zvec\n"
  },
  {
    "path": ".git/logs/refs/remotes/origin/HEAD",
    "content": "0000000000000000000000000000000000000000 b49833bf56a0e102b8ac1ff95ed7766545f5bd1e appuser <appuser@6f4aff7aca96.(none)> 1774064477 +0000\tclone: from https://github.com/alibaba/zvec\n"
  },
  {
    "path": ".git/objects/pack/pack-2b5e15ebe928a592991dc24c7ae7e8dc9e3500dc.promisor",
    "content": "b49833bf56a0e102b8ac1ff95ed7766545f5bd1e refs/heads/main\n"
  },
  {
    "path": ".git/packed-refs",
    "content": "# pack-refs with: peeled fully-peeled sorted \nb49833bf56a0e102b8ac1ff95ed7766545f5bd1e refs/remotes/origin/main\n"
  },
  {
    "path": ".git/refs/heads/main",
    "content": "b49833bf56a0e102b8ac1ff95ed7766545f5bd1e\n"
  },
  {
    "path": ".git/refs/remotes/origin/HEAD",
    "content": "ref: refs/remotes/origin/main\n"
  },
  {
    "path": ".git/shallow",
    "content": "b49833bf56a0e102b8ac1ff95ed7766545f5bd1e\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/benchmark.yml",
    "content": "name: Benchmarking\ndescription: Add, update, or fix benchmark cases for zvec\ntitle: \"[Benchmark]: \"\nlabels: [\"benchmark\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Use this for benchmark-related work: new test cases, CI integration, or performance regression tracking.\n  \n  - type: input\n    id: benchmark_type\n    attributes:\n      label: Benchmark Type\n      description: e.g., filtered search, batch insert, recall@k, ARM64 vs x86\n    validations:\n      required: true\n  \n  - type: textarea\n    id: goal\n    attributes:\n      label: Goal\n      description: What performance aspect are you measuring or improving?\n    validations:\n      required: true\n  \n  - type: textarea\n    id: methodology\n    attributes:\n      label: Methodology\n      description: Dataset, query size, hardware, metrics (latency, throughput, memory)\n    validations:\n      required: true\n  \n  - type: textarea\n    id: baseline\n    attributes:\n      label: Baseline (if applicable)\n      description: Current performance numbers or competing systems for comparison.\n    validations:\n      required: false\n  \n  - type: textarea\n    id: ci_integration\n    attributes:\n      label: CI Integration Plan\n      description: Should this run in CI? How often?\n    validations:\n      required: false"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.yml",
    "content": "name: Bug Report\ndescription: Report a bug or unexpected behavior (e.g., crash, incorrect vector query, memory leak)\ntitle: \"[Bug]: \"\nlabels: [\"bug\", \"triage\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Thank you for reporting! Please provide detailed info so we can reproduce and fix it quickly.\n\n  - type: textarea\n    id: description\n    attributes:\n      label: Description\n      description: What happened? What did you expect?\n      placeholder: |\n        e.g. \"Query with vector field crashes when using Zvec Python API\"\n    validations:\n      required: true\n\n  - type: textarea\n    id: steps_to_reproduce\n    attributes:\n      label: Steps to Reproduce\n      description: Exact steps to trigger the issue (code snippets welcome)\n      placeholder: |\n        1. Build Zvec with CMake (Debug/Release)\n        2. Run Python script: `python test.py`\n        3. Call `collection.query(VectorQuery())`\n        4. Process segfaults / hangs / returns wrong results\n      render: python\n    validations:\n      required: true\n\n  - type: textarea\n    id: logs_or_trace\n    attributes:\n      label: Logs / Stack Trace\n      description: Paste relevant logs, LLDB/GDB backtrace, or CI failures\n      placeholder: |\n        Thread 1 \"python\" received signal SIGSEGV, Segmentation fault.\n        0x0000000104a2c3f0 in std::__1::shared_ptr<...>::...\n      render: shell\n    validations:\n      required: false\n\n  - type: input\n    id: os\n    attributes:\n      label: Operating System\n      placeholder: macOS 14 (M1), Ubuntu 22.04, Windows 11 (WSL2)\n    validations:\n      required: true\n\n  - type: input\n    id: build_env\n    attributes:\n      label: Build & Runtime Environment\n      description: Compiler, CMake, Python, key dependencies\n      placeholder: |\n        clang 15.0.0, CMake 4.1.2, Python 3.11.9, magic_enum v0.9.7 (via git submodule)\n    validations:\n      required: true\n\n  - type: checkboxes\n    id: additional_context\n    attributes:\n      label: Additional Context\n      options:\n        - label: I've checked `git status` — no uncommitted submodule changes\n        - label: I built with `CMAKE_BUILD_TYPE=Debug`\n        - label: This occurs with or without `COVERAGE=ON`\n        - label: The issue involves Python ↔ C++ integration (pybind11)"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "blank_issues_enabled: false\ncontact_links:\n  - name: Documentation\n    url: https://zvec.org/en/\n    about: Check the quickstart, build guide, and API docs first.\n\n  - name: Python API Examples\n    url: https://zvec.org/en/docs/quickstart/\n    about: See working usage examples.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/enhancement.yml",
    "content": "name: Enhancement\ndescription: Improve an existing feature or component\ntitle: \"[Enhance]: \"\nlabels: [\"enhancement\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        This template is for improving existing functionality (e.g., performance, usability, robustness).\n  \n  - type: input\n    id: component\n    attributes:\n      label: Affected Component\n      description: e.g., HNSW index, buffer manager, Python API\n    validations:\n      required: true\n  \n  - type: textarea\n    id: current\n    attributes:\n      label: Current Behavior\n      description: What is the current state and its limitations?\n    validations:\n      required: true\n  \n  - type: textarea\n    id: desired\n    attributes:\n      label: Desired Improvement\n      description: What should be improved and how?\n    validations:\n      required: true\n  \n  - type: textarea\n    id: impact\n    attributes:\n      label: Impact\n      description: How will this benefit users? (e.g., faster queries, lower memory, easier integration)\n    validations:\n      required: true"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.yml",
    "content": "name: Feature Request\ndescription: Suggest a new feature or improvement (e.g., better memory control, new query option)\ntitle: \"[Feature]: \"\nlabels: [\"feature\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Thanks for your idea! Help us understand the motivation and scope.\n\n  - type: textarea\n    id: problem_or_motivation\n    attributes:\n      label: Problem / Motivation\n      description: What problem does this solve? Why is it needed?\n      placeholder: |\n        e.g. \"Current vector queries don't allow filtering by metadata + distance threshold at once\"\n    validations:\n      required: true\n\n  - type: textarea\n    id: proposed_solution\n    attributes:\n      label: Proposed Solution\n      description: How should it work? API sketch or pseudocode welcome.\n      placeholder: |\n        Add `filter=` and `max_distance=` args to `Zvec.query()`:\n        ```python\n        results = db.query(vector, filter=\"category == 'A'\", max_distance=0.5)\n        ```\n      render: python\n    validations:\n      required: false\n\n  - type: textarea\n    id: alternatives\n    attributes:\n      label: Alternatives Considered\n      description: Are there workarounds? Why not use them?\n    validations:\n      required: false\n\n  - type: dropdown\n    id: impact_area\n    attributes:\n      label: Affected Area\n      multiple: true\n      options:\n        - label: C++ Core (storage, indexing)\n        - label: Python API / Bindings\n        - label: Build System (CMake, Homebrew pkg)\n        - label: Testing / CI / Coverage\n        - label: Documentation\n    validations:\n      required: false"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/integration.yml",
    "content": "name: Ecosystem Integration\ndescription: Integrate zvec with external frameworks (e.g., LangChain, LlamaIndex)\ntitle: \"[Integration]: \"\nlabels: [\"integration\"]\nbody:\n  - type: input\n    id: framework\n    attributes:\n      label: Target Framework\n      description: e.g., LangChain, LlamaIndex, Haystack\n    validations:\n      required: true\n  \n  - type: textarea\n    id: motivation\n    attributes:\n      label: Motivation\n      description: Why integrate with this framework? Who benefits?\n    validations:\n      required: true\n  \n  - type: textarea\n    id: interface\n    attributes:\n      label: Required Interface\n      description: What adapter or interface must be implemented? (e.g., VectorStore base class)\n    validations:\n      required: true\n  \n  - type: textarea\n    id: reference\n    attributes:\n      label: Reference Implementations\n      description: Links to similar integrations in other vector DBs.\n    validations:\n      required: false"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/profiling.yml",
    "content": "name: Profiling / Investigation\ndescription: Profile performance, compatibility, or behavior in a specific scenario\ntitle: \"[Profile]: \"\nlabels: [\"profile\"]\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Use this for tasks like performance profiling, architecture compatibility checks, or feasibility studies.\n  \n  - type: input\n    id: scenario\n    attributes:\n      label: Target Scenario\n      description: e.g., ARM64 deployment, high-concurrency load, large dataset ingestion\n    validations:\n      required: true\n  \n  - type: textarea\n    id: objective\n    attributes:\n      label: Objective\n      description: What do you want to learn or validate?\n    validations:\n      required: true\n  \n  - type: textarea\n    id: methodology\n    attributes:\n      label: Proposed Methodology\n      description: How will you conduct the investigation? (tools, metrics, test data)\n    validations:\n      required: true\n  \n  - type: textarea\n    id: expected_outcome\n    attributes:\n      label: Expected Outcome\n      description: What deliverables are expected? (e.g., report, optimization PR, benchmark results)\n    validations:\n      required: true"
  },
  {
    "path": ".github/codecov.yml",
    "content": "codecov:\n  require_ci_to_pass: true\n\ncoverage:\n  precision: 2\n  round: down\n  range: \"60...75\"\n\n  status:\n    project:\n      default: false\n    patch:\n      default: false\n\n\nparsers:\n  gcov:\n    branch_detection:\n      conditional: true\n      loop: true\n      method: false\n      macro: false\n\ncomment:\n  require_changes: false\n  layout: \"reach,diff,flags,tree\"\n  behavior: default\n\n\nignore:\n  - \"thirdparty/\"\n  - \"tests/\""
  },
  {
    "path": ".github/dependabot.yml",
    "content": "version: 2\nupdates:\n  # GitHub Actions dependencies\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"weekly\"\n      day: \"monday\"\n      time: \"02:00\"\n      timezone: \"Asia/Shanghai\"\n    labels:\n      - \"dependencies\"\n      - \"github-actions\"\n    commit-message:\n      prefix: \"ci\"\n      include: \"scope\"\n    open-pull-requests-limit: 5\n"
  },
  {
    "path": ".github/workflows/01-ci-pipeline.yml",
    "content": "name: Main\n\non:\n  push:\n    branches: [ \"main\" ]\n    paths-ignore:\n      - '**.md'\n  merge_group:\n  pull_request:\n    branches: [ \"main\" ]\n    paths-ignore:\n      - '**.md'\n  workflow_dispatch:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}\n  cancel-in-progress: true\n\npermissions:\n  contents: read\n\njobs:\n  # Code quality checks (fast, run first)\n  lint:\n    uses: ./.github/workflows/02-lint-check.yml\n\n  # Main build and test matrix\n  build-and-test-macos-arm64:\n    name: Build & Test (macos-arm64)\n    needs: lint\n    uses: ./.github/workflows/03-macos-linux-build.yml\n    with:\n      platform: macos-arm64\n      os: macos-15\n\n  build-and-test-linux-arm64:\n    name: Build & Test (linux-arm64)\n    needs: lint\n    uses: ./.github/workflows/03-macos-linux-build.yml\n    with:\n      platform: linux-arm64\n      os: ubuntu-24.04-arm\n\n  build-and-test-linux-x64:\n    name: Build & Test (linux-x64)\n    needs: lint\n    uses: ./.github/workflows/03-macos-linux-build.yml\n    with:\n      platform: linux-x64\n      os: ubuntu-24.04\n\n  build-android:\n    name: Build & Test (android)\n    needs: lint\n    uses: ./.github/workflows/04-android-build.yml\n"
  },
  {
    "path": ".github/workflows/02-lint-check.yml",
    "content": "name: Lint\n\non:\n  workflow_call:\n\njobs:\n  lint:\n    name: Code Quality Checks\n    runs-on: ubuntu-24.04\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@v6\n\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: '3.10'\n          cache: 'pip'\n          cache-dependency-path: 'pyproject.toml'\n\n      - name: Install linting tools\n        run: |\n          python -m pip install --upgrade pip \\\n            ruff==v0.14.4 \\\n            clang-format==18.1.8\n        shell: bash\n\n      - name: Run Ruff Linter\n        run: python -m ruff check .\n        shell: bash\n\n      - name: Run Ruff Formatter Check\n        run: python -m ruff format --check .\n        shell: bash\n\n      - name: Run clang-format Check\n        run: |\n          CPP_FILES=$(find . -type f \\( -name \"*.cpp\" -o -name \"*.h\" -o -name \"*.hpp\" -o -name \"*.cc\" -o -name \"*.cxx\" \\) \\\n            ! -path \"./build/*\" \\\n            ! -path \"./tests/*\" \\\n            ! -path \"./scripts/*\" \\\n            ! -path \"./python/*\" \\\n            ! -path \"./thirdparty/*\" \\\n            ! -path \"./.git/*\")\n\n          if [ -z \"$CPP_FILES\" ]; then\n            echo \"No C++ files found to check.\"\n            exit 0\n          fi\n\n          clang-format --dry-run --Werror $CPP_FILES\n        shell: bash\n"
  },
  {
    "path": ".github/workflows/03-macos-linux-build.yml",
    "content": "name: MacOS & Linux Build\n\non:\n  workflow_call:\n    inputs:\n      platform:\n        description: 'Platform identifier'\n        required: true\n        type: string\n      os:\n        description: 'GitHub Actions runner OS'\n        required: true\n        type: string\n\npermissions:\n  contents: read\n\njobs:\n  # Build and test matrix (parallel execution)\n  build-and-test:\n    name: Build & Test (${{ inputs.platform }})\n    runs-on: ${{ inputs.os }}\n    \n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - os: ${{ inputs.os }}\n            platform: ${{ inputs.platform }}\n            arch_flag: \"\"  # Use appropriate architecture\n\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@v6\n        with:\n          submodules: recursive\n\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: '3.10'\n          cache: 'pip'\n          cache-dependency-path: 'pyproject.toml'\n\n      - name: Set up environment variables\n        run: |\n          # Set number of processors for parallel builds\n          if [[ \"${{ matrix.platform }}\" == \"macos-arm64\" ]]; then\n            NPROC=$(sysctl -n hw.ncpu 2>/dev/null || echo 2)\n          else\n            NPROC=$(nproc 2>/dev/null || echo 2)\n          fi\n          echo \"NPROC=$NPROC\" >> $GITHUB_ENV\n          echo \"Using $NPROC parallel jobs for builds\"\n          \n          # Add Python user base bin to PATH for pip-installed CLI tools\n          echo \"$(python -c 'import site; print(site.USER_BASE)')/bin\" >> $GITHUB_PATH\n        shell: bash\n\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip \\\n            pybind11==3.0 \\\n            cmake==3.30.0 \\\n            ninja==1.11.1 \\\n            pytest \\\n            scikit-build-core \\\n            setuptools_scm\n        shell: bash\n\n      - name: Build from source\n        run: |\n          cd \"$GITHUB_WORKSPACE\"\n          \n          CMAKE_GENERATOR=\"Unix Makefiles\" \\\n          CMAKE_BUILD_PARALLEL_LEVEL=\"$NPROC\" \\\n          python -m pip install -v . \\\n            --no-build-isolation \\\n            --config-settings='cmake.define.BUILD_TOOLS=\"ON\"' \\\n            ${{ matrix.arch_flag }}\n        shell: bash\n\n      - name: Run C++ Tests\n        run: |\n          cd \"$GITHUB_WORKSPACE/build\"\n          make unittest -j$NPROC\n        shell: bash\n\n      - name: Run Python Tests\n        run: |\n          cd \"$GITHUB_WORKSPACE\"\n          python -m pytest python/tests/\n        shell: bash\n\n      - name: Run C++ Examples\n        run: |\n          cd \"$GITHUB_WORKSPACE/examples/c++\"\n          mkdir build && cd build\n          cmake .. -DCMAKE_BUILD_TYPE=Release\n          make -j $NPROC\n          ./db-example\n          ./core-example\n          ./ailego-example\n        shell: bash\n"
  },
  {
    "path": ".github/workflows/04-android-build.yml",
    "content": "name: Android Cross Build\n\non:\n  workflow_call:\n\npermissions:\n  contents: read\n\njobs:\n  build-android:\n    # sdkmanager and other Android tools are x86‑only; ARM runners fail with exit code 1\n    # switch back to an x86 image so the setup-android action can install the SDK\n    runs-on: ubuntu-24.04\n    strategy:\n      fail-fast: false\n      matrix:\n        abi: [x86_64]\n        api: [21]\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v6\n\n      - name: Cache dependencies\n        uses: actions/cache@v5\n        with:\n          path: |\n            ~/.ccache\n          key: ${{ runner.os }}-dependencies-cache-${{ hashFiles('**/CMakeLists.txt', 'thirdparty/**') }}-stl-fix\n\n      - name: Install dependencies\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y --no-install-recommends \\\n            cmake ninja-build git ca-certificates python3 \\\n            build-essential make ccache\n\n      - name: Setup Java 17\n        uses: actions/setup-java@v5\n        with:\n          distribution: temurin\n          java-version: '17'\n\n      - name: Setup Android SDK\n        uses: android-actions/setup-android@v3\n\n      - name: Install NDK (side by side)\n        shell: bash\n        run: |\n          sdkmanager \"ndk;26.1.10909125\"\n\n      - name: Cache host protoc build\n        uses: actions/cache@v5\n        with:\n          path: build-host\n          key: ${{ runner.os }}-host-protoc-${{ hashFiles('src/**', 'CMakeLists.txt') }}-stl-fix\n          restore-keys: |\n            ${{ runner.os }}-host-protoc-\n\n      - name: Use host env to compile protoc\n        shell: bash\n        run: |\n          git submodule update --init\n          if [ ! -d \"build-host\" ]; then\n            export CCACHE_BASEDIR=\"$GITHUB_WORKSPACE\"\n            export CCACHE_NOHASHDIR=1\n            export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros\n\n            cmake -S . -B build-host -G Ninja \\\n              -DCMAKE_C_COMPILER_LAUNCHER=ccache \\\n              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache\n            cmake --build build-host --target protoc --parallel\n          else\n            echo \"Using cached host protoc build\"\n          fi\n\n      - name: Cache Android build\n        uses: actions/cache@v5\n        with:\n          path: build-android-${{ matrix.abi }}\n          key: ${{ runner.os }}-android-build-${{ matrix.abi }}-${{ hashFiles('src/**', 'CMakeLists.txt', 'cmake/**', 'thirdparty/**') }}-stl-fix-3\n\n      - name: Configure and Build\n        shell: bash\n        run: |\n          git submodule foreach --recursive 'git stash --include-untracked'\n\n          export ANDROID_SDK_ROOT=\"$ANDROID_HOME\"\n          export ANDROID_NDK_HOME=\"$ANDROID_SDK_ROOT/ndk/26.1.10909125\"\n\n          export CCACHE_BASEDIR=\"$GITHUB_WORKSPACE\"\n          export CCACHE_NOHASHDIR=1\n          export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros\n\n          if [ ! -d \"build-android-${{ matrix.abi }}\" ]; then\n            cmake -S . -B build-android-${{ matrix.abi }} -G Ninja \\\n              -DCMAKE_BUILD_TYPE=Release \\\n              -DCMAKE_TOOLCHAIN_FILE=\"$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake\" \\\n              -DANDROID_ABI=${{ matrix.abi }} \\\n              -DANDROID_PLATFORM=android-${{ matrix.api }} \\\n              -DANDROID_STL=c++_static \\\n              -DBUILD_PYTHON_BINDINGS=OFF \\\n              -DENABLE_NATIVE=OFF \\\n              -DAUTO_DETECT_ARCH=OFF \\\n              -DBUILD_TOOLS=OFF \\\n              -DGLOBAL_CC_PROTOBUF_PROTOC=\"$GITHUB_WORKSPACE/build-host/bin/protoc\" \\\n              -DCMAKE_C_COMPILER_LAUNCHER=ccache \\\n              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \\\n              -DCMAKE_VERBOSE_MAKEFILE=ON\n            cmake --build build-android-${{ matrix.abi }} --parallel --verbose\n          else\n            echo \"Using cached Android build directory\"\n          fi\n\n      - name: Cache examples build\n        uses: actions/cache@v5\n        with:\n          path: examples/c++/build-android-examples-${{ matrix.abi }}\n          key: ${{ runner.os }}-examples-build-${{ matrix.abi }}-${{ hashFiles('examples/c++/**', 'CMakeLists.txt', 'src/**') }}-stl-fix-3\n\n      - name: Build examples\n        shell: bash\n        run: |\n          export ANDROID_SDK_ROOT=\"$ANDROID_HOME\"\n          export ANDROID_NDK_HOME=\"$ANDROID_SDK_ROOT/ndk/26.1.10909125\"\n\n          if [ ! -d \"examples/c++/build-android-examples-${{ matrix.abi }}\" ]; then\n            cmake -S examples/c++ -B examples/c++/build-android-examples-${{ matrix.abi }} -G Ninja \\\n              -DCMAKE_TOOLCHAIN_FILE=\"$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake\" \\\n              -DANDROID_ABI=${{ matrix.abi }} \\\n              -DANDROID_PLATFORM=android-${{ matrix.api }} \\\n              -DANDROID_STL=c++_static \\\n              -DCMAKE_BUILD_TYPE=Release \\\n              -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \\\n              -DHOST_BUILD_DIR=\"build-android-${{ matrix.abi }}\" \\\n              -DCMAKE_C_COMPILER_LAUNCHER=ccache \\\n              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache\n            cmake --build examples/c++/build-android-examples-${{ matrix.abi }} --parallel\n          else\n            echo \"Using cached examples build\"\n          fi\n\n      - name: Run on Android emulator (arm64) and verify\n        uses: reactivecircus/android-emulator-runner@v2\n        with:\n          api-level: ${{ matrix.api }}\n          arch: ${{ matrix.abi }}\n          # target: google_apis\n          # emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -netdelay none -netspeed full\n          # disable-animations: true\n          script: |\n            adb wait-for-device\n\n            echo \"Device ABI:\"\n            adb shell getprop ro.product.cpu.abi\n            adb shell getprop ro.product.cpu.abilist\n\n            echo \"=== CPU ISA / Instruction Set Support ===\"\n            echo \"--- /proc/cpuinfo flags ---\"\n            adb shell 'cat /proc/cpuinfo | grep -E \"^(Features|flags)\"'\n\n            echo \"Checking binary sizes:\"\n            ls -lah examples/c++/build-android-examples-${{ matrix.abi }}/\n\n            # Push executables to device\n            adb push examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example /data/local/tmp/\n            adb push examples/c++/build-android-examples-${{ matrix.abi }}/core-example /data/local/tmp/\n            adb push examples/c++/build-android-examples-${{ matrix.abi }}/db-example /data/local/tmp/\n\n            adb shell chmod 755 /data/local/tmp/ailego-example\n            adb shell chmod 755 /data/local/tmp/core-example\n            adb shell chmod 755 /data/local/tmp/db-example\n\n            echo \"File info on device:\"\n            adb shell ls -la /data/local/tmp/ailego-example\n            adb shell ls -la /data/local/tmp/core-example\n            adb shell ls -la /data/local/tmp/db-example\n\n            echo \"Running ailego example:\"\n            adb shell 'cd /data/local/tmp && ./ailego-example'\n\n            echo \"Running core example:\"\n            adb shell 'cd /data/local/tmp && ./core-example'\n\n            echo \"Running db example:\"\n            adb shell 'cd /data/local/tmp && ./db-example'\n"
  },
  {
    "path": ".github/workflows/_build_wheel_job.yml",
    "content": "name: \"(Reusable) Build, Publish and Smoke-test a Wheel\"\n\non:\n  workflow_call:\n    inputs:\n      runner:\n        description: \"GitHub Actions runner label\"\n        required: true\n        type: string\n      pypi_repository_url:\n        description: \"PyPI repository URL (empty string means official PyPI)\"\n        required: false\n        type: string\n        default: \"\"\n    secrets:\n      PYPI_API_TOKEN:\n        required: true\n\njobs:\n  build_publish_test:\n    name: Build / publish / smoke-test on ${{ inputs.runner }}\n    runs-on: ${{ inputs.runner }}\n    permissions:\n      contents: read\n\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@v6\n        with:\n          submodules: recursive\n\n      - name: Set up Python (for cibuildwheel controller)\n        uses: actions/setup-python@v6\n        with:\n          python-version: '3.11'\n\n      - name: Install cibuildwheel\n        run: |\n          pip install --upgrade pip\n          pip install cibuildwheel==3.4.0\n\n      - name: Build wheels using cibuildwheel\n        run: |\n          python -m cibuildwheel --output-dir wheelhouse\n          # Save list of built wheels for publishing\n          ls wheelhouse/*.whl | tee $GITHUB_STEP_SUMMARY\n          echo \"wheels=$(ls wheelhouse/*.whl | tr '\\n' ' ')\" >> $GITHUB_ENV\n\n      - name: Publish to PyPI\n        if: success() && github.event_name == 'workflow_dispatch'\n        env:\n          TWINE_USERNAME: __token__\n          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}\n          TWINE_REPOSITORY_URL: ${{ inputs.pypi_repository_url }}\n        run: |\n          pip install twine\n          twine upload --skip-existing --verbose wheelhouse/*.whl\n\n      - name: Smoke test from PyPI\n        if: success() && github.event_name == 'workflow_dispatch'\n        shell: bash\n        env:\n          PYPI_REPOSITORY_URL: ${{ inputs.pypi_repository_url }}\n        run: |\n          # Extract version from wheel filename (e.g. zvec-0.2.1.dev24-cp311-...whl -> 0.2.1.dev24)\n          WHEEL_FILE=$(ls wheelhouse/zvec-*.whl | head -1)\n          ZVEC_VERSION=$(basename \"$WHEEL_FILE\" | sed 's/zvec-\\([^-]*\\)-.*/\\1/')\n\n          # Build index-url flags: use TestPyPI when repository URL is set, otherwise official PyPI\n          if [ -n \"$PYPI_REPOSITORY_URL\" ]; then\n            INDEX_FLAGS=\"--index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/\"\n            echo \"Waiting for zvec==$ZVEC_VERSION to become available on TestPyPI...\"\n          else\n            INDEX_FLAGS=\"\"\n            echo \"Waiting for zvec==$ZVEC_VERSION to become available on PyPI...\"\n          fi\n          # Poll until the version is available (max 5 minutes)\n          FOUND=0\n          for i in $(seq 1 30); do\n            if pip install $INDEX_FLAGS --dry-run \"zvec==$ZVEC_VERSION\" > /dev/null 2>&1; then\n              echo \"Version $ZVEC_VERSION is available.\"\n              FOUND=1\n              break\n            fi\n            echo \"Attempt $i/30: not yet available, retrying in 10s...\"\n            sleep 10\n          done\n\n          if [ \"$FOUND\" -eq 0 ]; then\n            echo \"ERROR: Timed out (5 min) waiting for zvec==$ZVEC_VERSION on PyPI. Aborting smoke test.\"\n            exit 1\n          fi\n\n          # Create a clean venv and install\n          python -m venv test_env\n          source test_env/bin/activate\n          pip install --upgrade pip\n          pip install $INDEX_FLAGS \"zvec==$ZVEC_VERSION\"\n          pip install --upgrade pip\n          pip install $INDEX_FLAGS \"zvec==$ZVEC_VERSION\"\n          # Run a simple smoke test\n          python -c \"import zvec; print('Import OK:', zvec.__version__)\"\n"
  },
  {
    "path": ".github/workflows/build_test_wheel.yml",
    "content": "name: Build Test PyPi Wheels\n\non:\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\njobs:\n  build_wheels_linux_x64:\n    name: Build wheels on ubuntu-24.04 (x64) for TestPyPi\n    uses: ./.github/workflows/_build_wheel_job.yml\n    with:\n      runner: ubuntu-24.04\n      pypi_repository_url: https://test.pypi.org/legacy/\n    secrets:\n      PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}\n\n  build_wheels_linux_arm64:\n    name: Build wheels on ubuntu-24.04-arm (arm64) for TestPyPi\n    uses: ./.github/workflows/_build_wheel_job.yml\n    with:\n      runner: ubuntu-24.04-arm\n      pypi_repository_url: https://test.pypi.org/legacy/\n    secrets:\n      PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}\n\n  build_wheels_macos_arm64:\n    name: Build wheels on macos-15 (arm64) for TestPyPi\n    uses: ./.github/workflows/_build_wheel_job.yml\n    with:\n      runner: macos-15\n      pypi_repository_url: https://test.pypi.org/legacy/\n    secrets:\n      PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/build_wheel.yml",
    "content": "name: Build Wheels\n\non:\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\njobs:\n  build_wheels_linux_x64:\n    name: Build wheels on ubuntu-24.04 (x64) for PyPi\n    uses: ./.github/workflows/_build_wheel_job.yml\n    with:\n      runner: ubuntu-24.04\n    secrets:\n      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}\n\n  build_wheels_linux_arm64:\n    name: Build wheels on ubuntu-24.04-arm (arm64) for PyPi\n    uses: ./.github/workflows/_build_wheel_job.yml\n    with:\n      runner: ubuntu-24.04-arm\n    secrets:\n      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}\n\n  build_wheels_macos_arm64:\n    name: Build wheels on macos-15 (arm64) for PyPi\n    uses: ./.github/workflows/_build_wheel_job.yml\n    with:\n      runner: macos-15\n    secrets:\n      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/continuous_bench.yml",
    "content": "name: Continuous Benchmark\non:\n  push:\n    branches: [ \"main\", \"ci/continuous_bench_squash\" ]\n    paths-ignore:\n      - '**.md'\n  workflow_dispatch:\n\nconcurrency:\n  group: cb-${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: true\n\npermissions:\n  contents: read\n\njobs:\n  benchmark:\n    runs-on: vdbbench\n    steps:\n      - uses: actions/checkout@v6\n\n      - name: Run VectorDBBench\n        env:\n          DATABASE_URL: ${{ secrets.DATABASE_URL }}\n        run: |\n          bash .github/workflows/scripts/run_vdb.sh"
  },
  {
    "path": ".github/workflows/docker/Dockerfile.linux_x64_glibc228",
    "content": "# =============================================================================\n# Dockerfile.linux_x64_glibc228\n# Purpose: Ubuntu 18.10 gcc-9 + glibc 2.28 + CMake 3.30.0 + PyBind11 build environment\n# Warning: ubuntu:18.10 is EOL; use only for glibc 2.28 compatibility testing.\n# =============================================================================\n\n# Use official Ubuntu 18.10 (Cosmic Cuttlefish)\n# glibc version: 2.28 (confirmed via `ldd --version`)\nFROM ubuntu:18.10\n\n# Replace Ubuntu mirror with old-releases.ubuntu.com for older glibc compatibility\nRUN sed -i 's|http://\\(.*\\)/ubuntu|http://old-releases.ubuntu.com/ubuntu|g' /etc/apt/sources.list && \\\n    sed -i 's|http://security.ubuntu.com/ubuntu|http://old-releases.ubuntu.com/ubuntu|g' /etc/apt/sources.list\n\n# Add Ubuntu 20.04 (focal) repo for GCC 9 ONLY\nRUN echo \"deb http://archive.ubuntu.com/ubuntu/ focal main universe\" >> /etc/apt/sources.list && \\\n    echo \"deb http://security.ubuntu.com/ubuntu/ focal-security main universe\" >> /etc/apt/sources.list\n\n# Prevent interactive prompts & set non-root user\nENV DEBIAN_FRONTEND=noninteractive \\\n    TZ=Etc/UTC\n\n# Create non-root user for safety (optional but recommended)\nRUN useradd -m -u 1000 builder && \\\n    mkdir -p /workspace && chown builder:builder /workspace\n\n# Install base system dependencies\nRUN apt-get update && \\\n    apt-get install -y --no-install-recommends \\\n        build-essential \\\n        gcc-9 g++-9 \\\n        ninja-build git curl ca-certificates vim wget lcov gnupg clang-format-18\\\n        rsync lsb-release \\\n        uuid-dev zlib1g-dev libssl-dev libffi-dev \\\n        pybind11-dev && \\\n    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90 \\\n                        --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \\\n    rm -rf /var/lib/apt/lists/*\n\n# Install Miniforge (Conda) as root, then assign to builder\nENV MINIFORGE_VERSION=\"latest\"\nENV MINIFORGE_HOME=\"/opt/miniforge3\"\n\nRUN curl -sSL \"https://github.com/conda-forge/miniforge/releases/${MINIFORGE_VERSION}/download/Miniforge3-Linux-x86_64.sh\" -o miniforge.sh && \\\n    bash miniforge.sh -b -p ${MINIFORGE_HOME} && \\\n    rm miniforge.sh && \\\n    chown -R builder:builder ${MINIFORGE_HOME}\n\n# Switch to non-root user\nUSER builder\nENV PATH=\"${MINIFORGE_HOME}/bin:${PATH}\"\nWORKDIR /workspace\n\n# Create conda envs for supported Python versions\nRUN conda create -n py310 python=3.10 -y && \\\n    conda create -n py311 python=3.11 -y && \\\n    conda create -n py312 python=3.12 -y\nRUN conda clean --all -f -y\n\n# Install CMake 3.30.0 from Kitware official binary\n# Ref: https://github.com/Kitware/CMake/releases/tag/v3.30.0\nRUN mkdir -p /tmp/cmake && cd /tmp/cmake && \\\n    curl -sSL -o cmake.tar.gz \\\n        \"https://github.com/Kitware/CMake/releases/download/v3.30.0/cmake-3.30.0-linux-x86_64.tar.gz\" && \\\n    tar -xzf cmake.tar.gz --strip-components=1 -C /tmp/cmake && \\\n    mkdir -p /home/builder/.local && \\\n    mv * /home/builder/.local/ && \\\n    chown -R builder:builder /home/builder/.local && \\\n    rm -rf /tmp/cmake\n\n# Add CMake to PATH\nENV PATH=\"/home/builder/.local/bin:${PATH}\"\n\n# Verify installations\nRUN cmake --version && \\\n    conda info && \\\n    conda env list && \\\n    python --version && \\\n    gcc --version && \\\n    ldd --version | head -n1\n\n# Final setup\nWORKDIR /workspace"
  },
  {
    "path": ".github/workflows/nightly_coverage.yml",
    "content": "name: Nightly Coverage Report\n\non:\n  schedule:\n    # Runs daily at 00:00 CST (China Standard Time) = 16:00 UTC\n    - cron: '0 16 * * *'\n\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\njobs:\n  coverage:\n    name: Nightly Coverage Report\n    runs-on: ubuntu-24.04\n\n    strategy:\n      matrix:\n        python-version: ['3.10']\n      fail-fast: false\n\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@v6\n        with:\n          ref: main  # Always use main for nightly\n          submodules: recursive\n\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.python-version }}\n          cache: 'pip'\n          cache-dependency-path: 'pyproject.toml'\n\n      - name: Set up environment variables\n        run: |\n          # Set number of processors for parallel builds\n          NPROC=$(nproc 2>/dev/null || echo 2)\n          echo \"NPROC=$NPROC\" >> $GITHUB_ENV\n          echo \"Using $NPROC parallel jobs for builds\"\n          \n          # Add Python user base bin to PATH for pip-installed CLI tools\n          echo \"$(python -c 'import site; print(site.USER_BASE)')/bin\" >> $GITHUB_PATH\n        shell: bash\n\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip \\\n            cmake==3.30.0 \\\n            ninja==1.11.1 \\\n            pytest \\\n            pytest-cov \\\n            scikit-build-core \\\n            setuptools_scm\n        shell: bash\n\n      - name: Build with COVERAGE config\n        run: |\n          cd \"$GITHUB_WORKSPACE\"\n          \n          CMAKE_GENERATOR=\"Unix Makefiles\" \\\n          CMAKE_BUILD_PARALLEL_LEVEL=\"$NPROC\" \\\n          python -m pip install -v . \\\n            --no-build-isolation \\\n            --config-settings=\"cmake.build-type=COVERAGE\" \\\n            --config-settings='cmake.define.ENABLE_ZEN3=\"ON\"'\n        shell: bash\n\n      - name: Run Python Tests with Coverage\n        run: |\n          cd \"$GITHUB_WORKSPACE\"\n          python -m pytest python/tests/ --cov=zvec --cov-report=xml\n        shell: bash\n\n      - name: Run C++ Tests and Generate Coverage\n        run: |\n          cd \"$GITHUB_WORKSPACE/build\"\n          make unittest -j$NPROC\n          cd \"$GITHUB_WORKSPACE\"\n          # Ensure gcov.sh is executable\n          chmod +x scripts/gcov.sh\n          bash scripts/gcov.sh -k\n        shell: bash\n\n      - name: Upload Coverage to Codecov\n        uses: codecov/codecov-action@v5\n        with:\n          files: ./proxima-zvec-filtered.lcov.info,./coverage.xml\n          flags: python,cpp,nightly\n          name: nightly-linux-py${{ matrix.python-version }}\n          token: ${{ secrets.CODECOV_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/scripts/run_vdb.sh",
    "content": "set -e\n\nQUANTIZE_TYPE_LIST=\"int8 int4 fp16 fp32\"\nCASE_TYPE_LIST=\"Performance768D1M Performance768D10M Performance1536D500K\" # respectively test cosine, ip # Performance960D1M l2 metrics\nLOG_FILE=\"bench.log\"\nDATE=$(date +%Y-%m-%d_%H-%M-%S)\nNPROC=$(nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2)\n\n# COMMIT_ID = branch-date-sha\nCOMMIT_ID=${GITHUB_REF_NAME}-\"$DATE\"-$(echo ${GITHUB_WORKFLOW_SHA} | cut -c1-8)\nCOMMIT_ID=$(echo \"$COMMIT_ID\" | sed 's/\\//_/g')\necho \"COMMIT_ID: $COMMIT_ID\"\necho \"GITHUB_WORKFLOW_SHA: $GITHUB_WORKFLOW_SHA\"\necho \"workspace: $GITHUB_WORKSPACE\"\nDB_LABEL_PREFIX=\"Zvec16c64g-$COMMIT_ID\"\n\n# install zvec\ngit submodule update --init\n\n# for debug\n#cd ..\n#export SKBUILD_BUILD_DIR=\"$GITHUB_WORKSPACE/../build\"\npwd\n\npython3 -m venv .venv\nsource .venv/bin/activate\npip install cmake ninja psycopg2-binary loguru fire\npip install -e /opt/VectorDBBench\n\nCMAKE_GENERATOR=\"Unix Makefiles\" \\\nCMAKE_BUILD_PARALLEL_LEVEL=\"$NPROC\" \\\npip install -v \"$GITHUB_WORKSPACE\"\n\nfor CASE_TYPE in $CASE_TYPE_LIST; do\n    echo \"Running VectorDBBench for $CASE_TYPE\"\n    DATASET_DESC=\"\"\n    if [ \"$CASE_TYPE\" == \"Performance768D1M\" ]; then\n        DATASET_DESC=\"Performance768D1M - Cohere Cosine\"\n    elif [ \"$CASE_TYPE\" == \"Performance768D10M\" ]; then\n        DATASET_DESC=\"Performance768D10M - Cohere Cosine\"\n    else\n        DATASET_DESC=\"Performance1536D500K - OpenAI IP\"\n    fi\n\n    for QUANTIZE_TYPE in $QUANTIZE_TYPE_LIST; do\n        DB_LABEL=\"$DB_LABEL_PREFIX-$CASE_TYPE-$QUANTIZE_TYPE\"\n        echo \"Running VectorDBBench for $DB_LABEL\"\n\n        VDB_PARAMS=\"--path ${DB_LABEL} --db-label ${DB_LABEL} --case-type ${CASE_TYPE} --num-concurrency 12,14,16,18,20\"\n        if [ \"$CASE_TYPE\" == \"Performance768D1M\" ]; then\n            VDB_PARAMS=\"${VDB_PARAMS} --m 15 --ef-search 180\"\n        elif [ \"$CASE_TYPE\" == \"Performance768D10M\" ]; then\n            VDB_PARAMS=\"${VDB_PARAMS} --m 50 --ef-search 118 --is-using-refiner\"\n        else #Performance1536D500K using default params + refiner to monitor performance degradation\n            VDB_PARAMS=\"${VDB_PARAMS} --m 50 --ef-search 100 --is-using-refiner\"\n        fi\n\n        if [ \"$QUANTIZE_TYPE\" == \"fp32\" ]; then\n            vectordbbench zvec ${VDB_PARAMS} 2>&1 | tee $LOG_FILE\n        else\n            vectordbbench zvec ${VDB_PARAMS} --quantize-type \"${QUANTIZE_TYPE}\" 2>&1 | tee $LOG_FILE\n        fi\n\n        RESULT_JSON_PATH=$(grep -o \"/opt/VectorDBBench/.*\\.json\" $LOG_FILE)\n        QPS=$(jq -r '.results[0].metrics.qps' \"$RESULT_JSON_PATH\")\n        RECALL=$(jq -r '.results[0].metrics.recall' \"$RESULT_JSON_PATH\")\n        LATENCY_P99=$(jq -r '.results[0].metrics.serial_latency_p99' \"$RESULT_JSON_PATH\")\n        LOAD_DURATION=$(jq -r '.results[0].metrics.load_duration' \"$RESULT_JSON_PATH\")\n\n        #quote the var to avoid space in the label\n        label_list=\"case_type=\\\"${CASE_TYPE}\\\",dataset_desc=\\\"${DATASET_DESC}\\\",db_label=\\\"${DB_LABEL}\\\",commit=\\\"${COMMIT_ID}\\\",date=\\\"${DATE}\\\",quantize_type=\\\"${QUANTIZE_TYPE}\\\"\"\n        # replace `/` with `_` in label_list\n        label_list=$(echo \"$label_list\" | sed 's/\\//_/g')\n        cat <<EOF > prom_metrics.txt\n        # TYPE vdb_bench_qps gauge\n        vdb_bench_qps{$label_list} $QPS\n        # TYPE vdb_bench_recall gauge\n        vdb_bench_recall{$label_list} $RECALL\n        # TYPE vdb_bench_latency_p99 gauge\n        vdb_bench_latency_p99{$label_list} $LATENCY_P99\n        # TYPE vdb_bench_load_duration gauge\n        vdb_bench_load_duration{$label_list} $LOAD_DURATION\nEOF\n        echo \"prom_metrics:\"\n        cat prom_metrics.txt\n        curl --data-binary @prom_metrics.txt \"http://47.93.34.27:9091/metrics/job/benchmarks-${CASE_TYPE}/case_type/${CASE_TYPE}/quantize_type/${QUANTIZE_TYPE}\" -v\n    done\ndone"
  },
  {
    "path": ".gitignore",
    "content": ".*\n*~\nbazel-*\nbuild*\nbin/*\nlib/*\nvar/*\nvenv*\ntests/integration/conf/*\ntests/de_integration/conf/*\n**/__pycache__/*\ntests/bench/log/*\ntests/integration/integration\ntests/integration/log\ntests/integration/*.log\ntests/de_integration/log\ntests/de_integration/*.log\n!.git*\n!.clang-format\n!.circleci\n!.drone.yml\nsdk/python/dist/\ncompile_commands.json\ndist\nhtml\n*.lcov.info\n\n# Dependencies\n/node_modules\n\n# Production\n/build\n\n# Generated files\n.docusaurus\n.cache-loader\n\n# Misc\n.DS_Store\n.env.local\n.env.development.local\n.env.test.local\n.env.production.local\n\nnpm-debug.log*\nyarn-debug.log*\nyarn-error.log*\n\nallure-*\n\n!build_android.sh"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"thirdparty/googletest/googletest-1.10.0\"]\n\tpath = thirdparty/googletest/googletest-1.10.0\n\turl = https://github.com/google/googletest.git\n[submodule \"thirdparty/sparsehash/sparsehash-2.0.4\"]\n\tpath = thirdparty/sparsehash/sparsehash-2.0.4\n\turl = https://github.com/sparsehash/sparsehash.git\n\tignore = untracked\n[submodule \"thirdparty/gflags/gflags-2.2.2\"]\n\tpath = thirdparty/gflags/gflags-2.2.2\n\turl = https://github.com/gflags/gflags.git\n[submodule \"thirdparty/rocksdb/rocksdb-8.1.1\"]\n\tpath = thirdparty/rocksdb/rocksdb-8.1.1\n\turl = https://github.com/facebook/rocksdb.git\n\tignore = all\n[submodule \"thirdparty/yaml-cpp/yaml-cpp-0.6.3\"]\n\tpath = thirdparty/yaml-cpp/yaml-cpp-0.6.3\n\turl = https://github.com/jbeder/yaml-cpp.git\n[submodule \"thirdparty/arrow/apache-arrow-21.0.0\"]\n\tpath = thirdparty/arrow/apache-arrow-21.0.0\n\turl = https://github.com/apache/arrow.git\n\tignore = all\n[submodule \"thirdparty/CRoaring/CRoaring-2.0.4\"]\n\tpath = thirdparty/CRoaring/CRoaring-2.0.4\n\turl = https://github.com/RoaringBitmap/CRoaring.git\n[submodule \"thirdparty/glog/glog-0.5.0\"]\n\tpath = thirdparty/glog/glog-0.5.0\n\turl = https://github.com/google/glog.git\n\tignore = all\n[submodule \"thirdparty/protobuf/protobuf-3.21.12\"]\n\tpath = thirdparty/protobuf/protobuf-3.21.12\n\turl = https://github.com/protocolbuffers/protobuf.git\n[submodule \"thirdparty/lz4/lz4-1.9.4\"]\n\tpath = thirdparty/lz4/lz4-1.9.4\n\turl = https://github.com/lz4/lz4.git\n[submodule \"thirdparty/antlr/antlr4\"]\n\tpath = thirdparty/antlr/antlr4\n\turl = https://github.com/antlr/antlr4.git\n\tignore = all\n[submodule \"thirdparty/magic_enum/magic_enum-0.9.7\"]\n\tpath = thirdparty/magic_enum/magic_enum-0.9.7\n\turl = https://github.com/Neargye/magic_enum.git\n\tignore = all\n[submodule \"thirdparty/RaBitQ-Library/RaBitQ-Library-0.1\"]\n\tpath = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1\n\turl = https://github.com/VectorDB-NTU/RaBitQ-Library.git\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.13)\ncmake_policy(SET CMP0077 NEW)\nproject(zvec)\nset(CC_CXX_STANDARD 17)\n\nset(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -Wall -Werror=return-type\")\nset(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -Wall -Werror=return-type\")\n\nif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n    set(CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-as-needed\")\n    set(CMAKE_SHARED_LINKER_FLAGS \"${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed\")\nendif()\n\nif(NOT DEFINED PROJECT_ROOT_DIR OR NOT PROJECT_ROOT_DIR)\n    set(PROJECT_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH \"Root directory of the project\" FORCE)\nendif()\n\nmessage(STATUS \"PROJECT_ROOT_DIR = ${PROJECT_ROOT_DIR}\")\n\ninclude(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(NOT ANDROID AND AUTO_DETECT_ARCH AND CMAKE_SYSTEM_PROCESSOR MATCHES \"x86_64|i686|i386|x64\")\n  setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512 MATH_MARCH_FLAG_AVX512FP16)\n  message(STATUS \"best compiler march, sse: \" ${MATH_MARCH_FLAG_SSE} \", avx2: \" ${MATH_MARCH_FLAG_AVX2} \", avx512: \" ${MATH_MARCH_FLAG_AVX512} \", avx512fp16: \" ${MATH_MARCH_FLAG_AVX512FP16})\nendif()\n\ninclude_directories(${PROJECT_ROOT_DIR}/src/include)\ninclude_directories(${PROJECT_ROOT_DIR}/src)\n\noption(BUILD_PYTHON_BINDINGS \"Build Python bindings using pybind11\" OFF)\nmessage(STATUS \"BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}\")\n\noption(BUILD_TOOLS \"Build tools\" ON)\nmessage(STATUS \"BUILD_TOOLS:${BUILD_TOOLS}\")\n\noption(RABITQ_ENABLE_AVX512 \"Compile RaBitQ with AVX-512 support\" OFF)\n\nif(CMAKE_SYSTEM_NAME STREQUAL \"Linux\" AND CMAKE_SYSTEM_PROCESSOR MATCHES \"x86_64|amd64|AMD64\" AND NOT ANDROID)\n  include(CheckCCompilerFlag)\n\n  check_c_compiler_flag(\"-mavx2\" COMPILER_SUPPORTS_AVX2)\n  check_c_compiler_flag(\"-mavx512f -mavx512bw -mavx512vl\" COMPILER_SUPPORTS_AVX512)\n\n  if(COMPILER_SUPPORTS_AVX2 OR COMPILER_SUPPORTS_AVX512)\n    set(RABITQ_SUPPORTED ON)\n    add_definitions(-DRABITQ_SUPPORTED=1)\n    if(RABITQ_ENABLE_AVX512 AND COMPILER_SUPPORTS_AVX512)\n      add_definitions(-DRABITQ_COMPILED_AVX512=1)\n      set(RABITQ_ARCH_FLAG \"${MATH_MARCH_FLAG_AVX512}\")\n    else()\n      set(RABITQ_ARCH_FLAG \"${MATH_MARCH_FLAG_AVX2}\")\n    endif()\n  else()\n    set(RABITQ_SUPPORTED OFF)\n    add_definitions(-DRABITQ_SUPPORTED=0)\n    message(STATUS \"RaBitQ support disabled - compiler does not support AVX2 or AVX-512\")\n  endif()\nelse()\n  set(RABITQ_SUPPORTED OFF)\n  add_definitions(-DRABITQ_SUPPORTED=0)\n  message(STATUS \"RaBitQ support disabled - only supported on Linux x86_64\")\nendif()\nmessage(STATUS \"RABITQ_ARCH_FLAG: ${RABITQ_ARCH_FLAG}\")\n\noption(USE_OSS_MIRROR \"Use OSS mirror for faster third-party downloads\" OFF)\nif(DEFINED ENV{USE_OSS_MIRROR} AND NOT \"$ENV{USE_OSS_MIRROR}\" STREQUAL \"\")\n  set(USE_OSS_MIRROR \"$ENV{USE_OSS_MIRROR}\" CACHE BOOL \"Use OSS mirror for faster third-party downloads\" FORCE)\nendif()\nmessage(STATUS \"USE_OSS_MIRROR:${USE_OSS_MIRROR}\")\n\ncc_directory(thirdparty)\ncc_directories(src)\ncc_directories(tests)\n\nif(BUILD_TOOLS)\n    cc_directories(tools)\nendif()\n\ngit_version(GIT_SRCS_VER ${PROJECT_ROOT_DIR})\nset(CPACK_PACKAGE_VERSION ${GIT_SRCS_VER})\nset(CPACK_PACKAGE_NAME zvec)\ninclude(CPack)\n\nif(BUILD_PYTHON_BINDINGS)\n    if(APPLE)\n        set(CMAKE_STRIP \"\")\n        message(STATUS \"Disabled strip on macOS to preserve code signature\")\n    endif()\n\n    include(GNUInstallDirs)\n    if(DEFINED SKBUILD_PLATLIB_DIR)\n        set(ZVEC_PY_INSTALL_DIR \"${SKBUILD_PLATLIB_DIR}\")\n    elseif(DEFINED Python_SITEARCH)\n        set(ZVEC_PY_INSTALL_DIR \"${Python_SITEARCH}\")\n    else()\n        set(ZVEC_PY_INSTALL_DIR \"${CMAKE_INSTALL_LIBDIR}\")\n    endif()\n\n    message(STATUS \"Zvec install path: ${ZVEC_PY_INSTALL_DIR}\")\n    install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})\nendif()\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Zvec Code of Conduct\n\n## Our Pledge\n\nWe pledge to foster an open, respectful, and harassment-free environment for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation.\n\n## Expected Behavior\n\n- Use welcoming and inclusive language\n- Respect differing viewpoints and experiences\n- Gracefully accept constructive criticism\n- Focus on what is best for the community\n- Show empathy and kindness toward others\n\n## Unacceptable Behavior\n\n- Harassment, intimidation, or discriminatory conduct\n- Trolling, insulting, or derogatory comments\n- Public or private harassment\n- Publishing others’ private information without consent\n- Any conduct that would reasonably be considered inappropriate in a professional setting\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team \nat **zvec@alibaba-inc.com** (replace with your contact). All complaints will be reviewed and investigated promptly \nand fairly.\n\nThe project team is obligated to respect the privacy and security of the reporter.\n\nConsequences may include:\n- A formal warning\n- Temporary or permanent ban from project spaces\n- Removal of contributions (e.g. comments, PRs)\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.1, available at  \nhttps://www.contributor-covenant.org/version/2/1/code_of_conduct.html\n\n[homepage]: https://www.contributor-covenant.org"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to Zvec\n\nFirst off, thank you for considering contributing to Zvec! 🙌  \nWhether you're reporting a bug, proposing a feature, improving documentation, or submitting code — every contribution helps make Zvec better.\n\n## Code of Conduct\n\nBy participating, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md). Please be respectful, collaborative, and inclusive.\n\n---\n\n## Development Setup\n\n### Prerequisites\n- Python 3.10 - 3.12\n- CMake ≥ 3.26, < 4.0 (`cmake --version`)\n- A C++17-compatible compiler (e.g., `g++-11+`, `clang++`, Apple Clang on macOS)\n\n### Clone & Initialize\n\n```bash\ngit clone --recursive https://github.com/alibaba/zvec.git\ncd zvec\n```\n\n> 💡 **Tip**  \n> - Forgot `--recursive`? Run:  \n>   ```bash\n>   git submodule update --init --recursive\n>   ```\n> - Set up pre-commit hooks:  \n>   ```bash\n>   pip install pre-commit && pre-commit install\n>   ```\n\n### Build from Source (Editable Install)\n```bash\npip install -e \".[dev]\"\n# This installs dev dependencies (pytest, ruff, etc.) and builds the C++ extension in-place\n```\n\n> ✅ Verify:\n> ```bash\n> python -c \"import zvec; print('Success!')\"\n> ```\n\n---\n\n## Testing\n\n### Run All Tests\n```bash\npytest python/tests/ -v\n```\n\n### Run with Coverage (Debug/CI)\n```bash\npytest python/tests/ --cov=zvec --cov-report=term-missing\n```\n\n> 🔎 See full rules in `[tool.ruff]` section of `pyproject.toml`.\n\n---\n\n## Build Customization\n\nYou can control build behavior via environment variables or `pyproject.toml`:\n\n| Option | How to Set | Description |\n|--------|------------|-------------|\n| **Build Type** | `CMAKE_BUILD_TYPE=Debug` | `Debug`, `Release`, or `Coverage` (for gcov/lcov) |\n| **Generator** | `CMAKE_GENERATOR=\"Unix Makefiles\"` | Default: `Ninja`; use Make if preferred |\n| **AVX-512** | `ENABLE_SKYLAKE_AVX512=ON` | Enable AVX-512 optimizations (x86_64 only) |\n\nExample (Debug + Make):\n```bash\nCMAKE_BUILD_TYPE=Debug CMAKE_GENERATOR=\"Unix Makefiles\" pip install -v .\n```\n\n---\n\n## Submitting Changes\n\n1. Fork the repo and create a feature branch (`feat/...`, `fix/...`, `docs/...`)\n2. Write clear commit messages (e.g., `fix(query): handle null vector in dense_fp32`)\n3. Ensure tests pass & linter is clean\n4. Open a Pull Request to `main`\n5. Link related issue (e.g., `Closes #123`)\n\n✅ **PRs should include**:\n- Test coverage for new behavior\n- Updates to documentation (if applicable)\n- Reasoning behind non-obvious design choices\n\n---\n\n## Documentation\n\n- User guides: `docs/` (built with MkDocs)\n- API reference: generated from docstrings (follow [Google style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings))\n- Build & deploy: `mkdocs serve` / `mkdocs build`\n\n---\n\n## Need Help\n\n- Browse [existing issues](https://github.com/alibaba/zvec/issues)\n- For sensitive/security issues: email `zvec@alibaba-inc.com`\n\n---\n\n✨ Thanks again for being part of Zvec!\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License."
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\">\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"https://zvec.oss-cn-hongkong.aliyuncs.com/logo/github_log_2.svg\" />\n    <img src=\"https://zvec.oss-cn-hongkong.aliyuncs.com/logo/github_logo_1.svg\" width=\"400\" alt=\"zvec logo\" />\n  </picture>\n</div>\n\n<p align=\"center\">\n  <a href=\"https://codecov.io/github/alibaba/zvec\"><img src=\"https://codecov.io/github/alibaba/zvec/graph/badge.svg?token=O81CT45B66\" alt=\"Code Coverage\"/></a>\n  <a href=\"https://github.com/alibaba/zvec/actions/workflows/01-ci-pipeline.yml\"><img src=\"https://github.com/alibaba/zvec/actions/workflows/01-ci-pipeline.yml/badge.svg?branch=main\" alt=\"Main\"/></a>\n  <a href=\"https://github.com/alibaba/zvec/blob/main/LICENSE\"><img src=\"https://img.shields.io/badge/license-Apache%202.0-blue.svg\" alt=\"License\"/></a>\n  <a href=\"https://pypi.org/project/zvec/\"><img src=\"https://img.shields.io/pypi/v/zvec.svg\" alt=\"PyPI Release\"/></a>\n  <a href=\"https://pypi.org/project/zvec/\"><img src=\"https://img.shields.io/pypi/pyversions/zvec.svg\" alt=\"Python Versions\"/></a>\n  <a href=\"https://www.npmjs.com/package/@zvec/zvec\"><img src=\"https://img.shields.io/npm/v/@zvec/zvec.svg\" alt=\"npm Release\"/></a>\n</p>\n\n<p align=\"center\">\n  <a href=\"https://trendshift.io/repositories/20830\" target=\"_blank\"><img src=\"https://trendshift.io/api/badge/repositories/20830\" alt=\"alibaba%2Fzvec | Trendshift\" style=\"width: 250px; height: 55px;\" width=\"250\" height=\"55\"/></a>\n</p>\n\n<p align=\"center\">\n  <a href=\"https://zvec.org/en/docs/quickstart/\">🚀 <strong>Quickstart</strong> </a> |\n  <a href=\"https://zvec.org/en/\">🏠 <strong>Home</strong> </a> |\n  <a href=\"https://zvec.org/en/docs/\">📚 <strong>Docs</strong> </a> |\n  <a href=\"https://zvec.org/en/docs/benchmarks/\">📊 <strong>Benchmarks</strong> </a> |\n  <a href=\"https://deepwiki.com/alibaba/zvec\">🔎 <strong>DeepWiki</strong> </a> |\n  <a href=\"https://discord.gg/rKddFBBu9z\">🎮 <strong>Discord</strong> </a>\n</p>\n\n**Zvec** is an open-source, in-process vector database — lightweight, lightning-fast, and designed to embed directly into applications. Built on **Proxima** (Alibaba's battle-tested vector search engine), it delivers production-grade, low-latency, scalable similarity search with minimal setup.\n\n## 💫 Features\n\n- **Blazing Fast**: Searches billions of vectors in milliseconds.\n- **Simple, Just Works**: [Install](#-installation) and start searching in seconds. No servers, no config, no fuss.\n- **Dense + Sparse Vectors**: Work with both dense and sparse embeddings, with native support for multi-vector queries in a single call.\n- **Hybrid Search**: Combine semantic similarity with structured filters for precise results.\n- **Runs Anywhere**: As an in-process library, Zvec runs wherever your code runs — notebooks, servers, CLI tools, or even edge devices.\n\n## 📦 Installation\n\n### [Python](https://pypi.org/project/zvec/)\n\n**Requirements**: Python 3.10 - 3.12\n\n```bash\npip install zvec\n```\n\n### [Node.js](https://www.npmjs.com/package/@zvec/zvec)\n\n```bash\nnpm install @zvec/zvec\n```\n\n### ✅ Supported Platforms\n\n- Linux (x86_64, ARM64)\n- macOS (ARM64)\n\n### 🛠️ Building from Source\n\nIf you prefer to build Zvec from source, please check the [Building from Source](https://zvec.org/en/docs/build/) guide.\n\n## ⚡ One-Minute Example\n\n```python\nimport zvec\n\n# Define collection schema\nschema = zvec.CollectionSchema(\n    name=\"example\",\n    vectors=zvec.VectorSchema(\"embedding\", zvec.DataType.VECTOR_FP32, 4),\n)\n\n# Create collection\ncollection = zvec.create_and_open(path=\"./zvec_example\", schema=schema)\n\n# Insert documents\ncollection.insert([\n    zvec.Doc(id=\"doc_1\", vectors={\"embedding\": [0.1, 0.2, 0.3, 0.4]}),\n    zvec.Doc(id=\"doc_2\", vectors={\"embedding\": [0.2, 0.3, 0.4, 0.1]}),\n])\n\n# Search by vector similarity\nresults = collection.query(\n    zvec.VectorQuery(\"embedding\", vector=[0.4, 0.3, 0.3, 0.1]),\n    topk=10\n)\n\n# Results: list of {'id': str, 'score': float, ...}, sorted by relevance\nprint(results)\n```\n\n## 📈 Performance at Scale\n\nZvec delivers exceptional speed and efficiency, making it ideal for demanding production workloads.\n\n<img src=\"https://zvec.oss-cn-hongkong.aliyuncs.com/qps_10M.svg\" width=\"800\" alt=\"Zvec Performance Benchmarks\" />\n\nFor detailed benchmark methodology, configurations, and complete results, please see our [Benchmarks documentation](https://zvec.org/en/docs/benchmarks/).\n\n## 🤝 Join Our Community\n\n<div align=\"center\">\n\nStay updated and get support — scan or click:\n\n<div align=\"center\">\n\n| 💬 DingTalk | 📱 WeChat | 🎮 Discord |\n|:---:|:---:|:---:|\n| <img src=\"https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/dingding.png\" width=\"150\"/> | <img src=\"https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/wechat.png?v=3\" width=\"150\"/> | [![Discord](https://img.shields.io/badge/Discord-Join%20Server-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/rKddFBBu9z) |\n| Scan to join | Scan to join | Click to join |\n\n</div>\n\n</div>\n\n## ❤️ Contributing\n\nWe welcome and appreciate contributions from the community! Whether you're fixing a bug, adding a feature, or improving documentation, your help makes Zvec better for everyone.\n\nCheck out our [Contributing Guide](./CONTRIBUTING.md) to get started!\n"
  },
  {
    "path": "cmake/bazel.cmake",
    "content": "##\n##  The following functions used by user's CMakeLists.txt:\n##\n\n##  1. Functions for C/C++\n##\n##  1.1. Add a subdirectory to the build\n##    cc_directory(<source_dir> [binary_dir])\n##\n##  1.2. Add subdirectories to the build\n##    cc_directories(<source_dir1> [source_dir2 ...])\n##\n##  1.3. Build a C/C++ static or shared library\n##    cc_library(\n##        NAME <name>\n##        [STATIC] [SHARED] [STRICT] [ALWAYS_LINK] [EXCLUDE] [PACKED] [SRCS_NO_GLOB]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [PUBINCS public_dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [PACKED_EXCLUDES pattern1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  1.4. Build a C/C++ executable program\n##    cc_binary(\n##        NAME <name>\n##        [STRICT] [PACKED]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  1.5. Build a C/C++ executable test program\n##    cc_test(\n##        NAME <name>\n##        [STRICT]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [ARGS args1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  1.6. Add existing test cases to a test suite\n##    cc_test_suite(<suite_name> [test_name ...])\n##\n##  1.7. Import a C/C++ static or shared library\n##    cc_import(\n##        NAME <name>\n##        [STATIC | SHARED] [PACKED]\n##        PATH <file>\n##        [INCS dir1 ...]\n##        [PUBINCS public_dir1 ...]\n##        [DEPS target1 ...]\n##        [IMPLIB <file>]\n##        [PACKED_EXCLUDES pattern1 ...]\n##      )\n##\n##  1.8. Import a C/C++ interface library\n##    cc_interface(\n##        NAME <name>\n##        [PACKED]\n##        [INCS dir1 ...]\n##        [PUBINCS public_dir1 ...]\n##        [DEPS target1 ...]\n##        [PACKED_EXCLUDES pattern1 ...]\n##      )\n##\n##  1.9. Build a C/C++ executable google test program\n##    cc_gtest(\n##        NAME <name>\n##        [STRICT]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [ARGS args1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  1.10. Build a C/C++ executable google mock program\n##    cc_gmock(\n##        NAME <name>\n##        [STRICT]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [ARGS args1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  1.11. Build a C++ protobuf static or shared library\n##    cc_proto_library(\n##        NAME <name>\n##        [STATIC] [SHARED] [STRICT] [EXCLUDE] [PACKED]\n##        SRCS <file1.proto> [file2.proto ...]\n##        [PROTOROOT path]\n##        [CXXFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [VERSION <version>]\n##        [PROTOBUF_VERSION <Protobuf version>]\n##      )\n##\n\n##  2. Functions for CUDA\n##\n##  2.1. Add a subdirectory to the build\n##    cuda_directory(<source_dir> [binary_dir])\n##\n##  2.2. Add subdirectories to the build\n##    cuda_directories(<source_dir1> [source_dir2 ...])\n##\n##  2.3. Build a CUDA static or shared library\n##    cuda_library(\n##        NAME <name>\n##        [STATIC] [SHARED] [STRICT] [ALWAYS_LINK] [EXCLUDE] [PACKED]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [PUBINCS public_dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [CUDAFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [PACKED_EXCLUDES pattern1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  2.4. Build a CUDA executable program\n##    cuda_binary(\n##        NAME <name>\n##        [STRICT] [PACKED]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [CUDAFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  2.5. Build a CUDA executable test program\n##    cuda_test(\n##        NAME <name>\n##        [STRICT]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [CUDAFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [ARGS args1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  2.6. Add existing test cases to a test suite\n##    cuda_test_suite(<suite_name> [test_name ...])\n##\n##  2.7. Import a C/C++/CUDA static or shared library\n##    cuda_import(\n##        NAME <name>\n##        [STATIC | SHARED] [PACKED]\n##        PATH <file>\n##        [INCS dir1 ...]\n##        [PUBINCS public_dir1 ...]\n##        [DEPS target1 ...]\n##        [IMPLIB <file>]\n##        [PACKED_EXCLUDES pattern1 ...]\n##      )\n##\n##  2.8. Import a C/C++/CUDA interface library\n##    cuda_interface(\n##        NAME <name>\n##        [PACKED]\n##        [INCS dir1 ...]\n##        [PUBINCS public_dir1 ...]\n##        [DEPS target1 ...]\n##        [PACKED_EXCLUDES pattern1 ...]\n##      )\n##\n##  2.9. Build a CUDA executable google test program\n##    cuda_gtest(\n##        NAME <name>\n##        [STRICT]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [CUDAFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [ARGS args1 ...]\n##        [VERSION <version>]\n##      )\n##\n##  2.10. Build a CUDA executable google mock program\n##    cuda_gmock(\n##        NAME <name>\n##        [STRICT]\n##        SRCS <file1> [file2 ...]\n##        [INCS dir1 ...]\n##        [DEFS DEF1=1 ...]\n##        [LIBS lib1 ...]\n##        [CFLAGS flag1 ...]\n##        [CXXFLAGS flag1 ...]\n##        [CUDAFLAGS flag1 ...]\n##        [LDFLAGS flag1 ...]\n##        [DEPS target1 ...]\n##        [ARGS args1 ...]\n##        [VERSION <version>]\n##      )\n##\n\n##  3. Utility functions\n##\n##  3.1. Download a git repository\n##    git_repository(\n##        NAME <name>\n##        URL <url>\n##        [TAG <tag>]\n##        [PATH <local path>]\n##      )\n##\n##  3.2. Download a hg repository\n##    hg_repository(\n##        NAME <name>\n##        URL <url>\n##        [TAG <tag>]\n##        [PATH <local path>]\n##      )\n##\n##  3.3. Download a svn repository\n##    svn_repository(\n##        NAME <name>\n##        URL <url>\n##        [REV <rev>]\n##        [PATH <local path>]\n##      )\n##\n##  3.4. Download a http archive\n##    http_archive(\n##        NAME <name>\n##        URL <url>\n##        [SHA256 <sha256 value> | SHA1 <sha1 value> | MD5 <md5 value>]\n##        [PATH <local path>]\n##      )\n##\n##  3.5. Retrieve a version string from GIT\n##    git_version(\n##        <result variable>\n##        <repository path>\n##      )\n##\n##  3.6. Retrieve a version string from HG\n##    hg_version(\n##        <result variable>\n##        <repository path>\n##      )\n##\n##  3.7. Retrieve a version string from SVN\n##    svn_version(\n##        <result variable>\n##        <repository path>\n##      )\n##\n\ncmake_minimum_required(VERSION 3.1 FATAL_ERROR)\ninclude(CMakeParseArguments)\n\n# Using AppleClang instead of Clang (Compiler id)\nif(POLICY CMP0025)\n  cmake_policy(SET CMP0025 NEW)\nendif()\n\n# Enable unit testing\nenable_testing()\n\n# Add unittest target\nif(NOT TARGET unittest)\n  add_custom_target(\n      unittest\n      COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure\n      --build-config $<CONFIGURATION>\n    )\nendif()\n\n# Directories of target output\nif(NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)\nendif()\nif(NOT CMAKE_LIBRARY_OUTPUT_DIRECTORY)\n  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)\nendif()\nif(NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY)\n  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)\nendif()\n\n# RPATH settings\nset(CMAKE_MACOSX_RPATH ON)\nif(NOT ${CMAKE_SYSTEM_NAME} MATCHES \"Darwin\")\n  set(CMAKE_SKIP_BUILD_RPATH ON)\n  set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)\n  if(${CMAKE_SIZEOF_VOID_P} EQUAL \"8\")\n    set(CMAKE_INSTALL_RPATH \"$ORIGIN/../lib64:$ORIGIN/../lib:$ORIGIN\")\n  else()\n    set(CMAKE_INSTALL_RPATH \"$ORIGIN/../lib:$ORIGIN\")\n  endif()\nelse()\n  set(CMAKE_INSTALL_RPATH \"@loader_path/../lib:@loader_path\")\nendif()\n\n# Define standard installation directories\nif(NOT CMAKE_INSTALL_LIBDIR)\n  set(CMAKE_INSTALL_LIBDIR lib)\nendif()\nif(NOT CMAKE_INSTALL_BINDIR)\n  set(CMAKE_INSTALL_BINDIR bin)\nendif()\nif(NOT CMAKE_INSTALL_INCDIR)\n  set(CMAKE_INSTALL_INCDIR include)\nendif()\nif(NOT CMAKE_INSTALL_ETCDIR)\n  set(CMAKE_INSTALL_ETCDIR etc)\nendif()\n\n# Generates a compile_commands.json\nset(CMAKE_EXPORT_COMPILE_COMMANDS ON)\nset(CMAKE_POSITION_INDEPENDENT_CODE ON)\nset(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)\n\nif(APPLE OR ANDROID)\n    option(CLANG_USE_LIBCXX \"Use libc++ instead of libstdc++\" ON)\nelse()\n    option(CLANG_USE_LIBCXX \"Use libc++ instead of libstdc++\" OFF)\nendif()\n\nset(CLANG_STDLIB_OPTION \"\")\nif(CLANG_USE_LIBCXX)\n    set(CLANG_STDLIB_OPTION \"-stdlib=libc++\")\nelse()\n    set(CLANG_STDLIB_OPTION \"-stdlib=libstdc++\")\nendif()\n\nif(NOT MSVC)\n  # Use color in diagnostics\n  set(\n      _COMPILER_FLAGS\n      \"$<$<C_COMPILER_ID:Clang>:-fcolor-diagnostics;${CLANG_STDLIB_OPTION}>\"\n      \"$<$<C_COMPILER_ID:AppleClang>:-fcolor-diagnostics>\"\n      \"$<$<C_COMPILER_ID:GNU>:-fdiagnostics-color=always>\"\n    )\n  add_compile_options(\n      \"$<$<COMPILE_LANGUAGE:C>:${_COMPILER_FLAGS}>\"\n      \"$<$<COMPILE_LANGUAGE:CXX>:${_COMPILER_FLAGS}>\"\n    )\n  unset(_COMPILER_FLAGS)\nelse()\n  # Replace the default compiling flags\n  set(\n      _COMPILER_FLAGS\n      CMAKE_CXX_FLAGS\n      CMAKE_CXX_FLAGS_DEBUG\n      CMAKE_CXX_FLAGS_RELEASE\n      CMAKE_CXX_FLAGS_RELWITHDEBINFO\n      CMAKE_CXX_FLAGS_MINSIZEREL\n      CMAKE_C_FLAGS\n      CMAKE_C_FLAGS_DEBUG\n      CMAKE_C_FLAGS_RELEASE\n      CMAKE_C_FLAGS_RELWITHDEBINFO\n      CMAKE_C_FLAGS_MINSIZEREL\n    )\n  foreach(COMPILER_FLAG ${_COMPILER_FLAGS})\n    string(REPLACE \"/MT\" \"/MD\" ${COMPILER_FLAG} \"${${COMPILER_FLAG}}\")\n    string(REGEX REPLACE \"/W[0-9]\" \"\" ${COMPILER_FLAG} \"${${COMPILER_FLAG}}\")\n  endforeach()\n  unset(_COMPILER_FLAGS)\n  add_definitions(-D_CRT_SECURE_NO_WARNINGS)\n  # Build shared library as default\n  set(BUILD_SHARED_LIBS ON)\nendif()\n\nset(CMAKE_C_FLAGS_ASAN ${CMAKE_C_FLAGS_DEBUG})\nset(CMAKE_CXX_FLAGS_ASAN ${CMAKE_CXX_FLAGS_DEBUG})\nset(CMAKE_EXE_LINKER_FLAGS_ASAN ${CMAKE_EXE_LINKER_FLAGS_DEBUG})\nset(CMAKE_SHARED_LINKER_FLAGS_ASAN ${CMAKE_SHARED_LINKER_FLAGS_DEBUG})\nset(CMAKE_STATIC_LINKER_FLAGS_ASAN ${CMAKE_STATIC_LINKER_FLAGS_DEBUG})\nset(CMAKE_MODULE_LINKER_FLAGS_ASAN ${CMAKE_MODULE_LINKER_FLAGS_DEBUG})\nset(CMAKE_C_FLAGS_COVERAGE ${CMAKE_C_FLAGS_DEBUG})\nset(CMAKE_CXX_FLAGS_COVERAGE ${CMAKE_CXX_FLAGS_DEBUG})\nset(CMAKE_EXE_LINKER_FLAGS_COVERAGE ${CMAKE_EXE_LINKER_FLAGS_DEBUG})\nset(CMAKE_SHARED_LINKER_FLAGS_COVERAGE ${CMAKE_SHARED_LINKER_FLAGS_DEBUG})\nset(CMAKE_STATIC_LINKER_FLAGS_COVERAGE ${CMAKE_STATIC_LINKER_FLAGS_DEBUG})\nset(CMAKE_MODULE_LINKER_FLAGS_COVERAGE ${CMAKE_MODULE_LINKER_FLAGS_DEBUG})\n\n# C/C++ ASAN compile flags\nset(\n    BAZEL_CC_ASAN_COMPILE_FLAGS\n    \"$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:Clang>:-fsanitize=address>>\"\n    \"$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:AppleClang>:-fsanitize=address>>\"\n    \"$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:GNU>:-fsanitize=address>>\"\n    \"$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:MSVC>:/fsanitize=address>>\"\n  )\n\n# C/C++ COVERAGE compile flags\nset(\n    BAZEL_CC_COVERAGE_COMPILE_FLAGS\n    \"$<$<CONFIG:COVERAGE>:$<$<CXX_COMPILER_ID:Clang>:--coverage>>\"\n    \"$<$<CONFIG:COVERAGE>:$<$<CXX_COMPILER_ID:AppleClang>:--coverage>>\"\n    \"$<$<CONFIG:COVERAGE>:$<$<CXX_COMPILER_ID:GNU>:--coverage>>\"\n  )\n\n# C/C++ strict compile flags\nif(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)\n  set(\n      BAZEL_CC_STRICT_COMPILE_FLAGS\n      \"$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>\"\n      \"$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>\"\n      \"$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow-local;-Wno-misleading-indentation>\"\n      \"$<$<CXX_COMPILER_ID:MSVC>:/W4>\"\n      ${BAZEL_CC_ASAN_COMPILE_FLAGS}\n      ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}\n    )\nelse()\n  set(\n      BAZEL_CC_STRICT_COMPILE_FLAGS\n      \"$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>\"\n      \"$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>\"\n      \"$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow;-Wno-misleading-indentation>\"\n      \"$<$<CXX_COMPILER_ID:MSVC>:/W4>\"\n      ${BAZEL_CC_ASAN_COMPILE_FLAGS}\n      ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}\n    )\nendif()\n\n\n# C/C++ strict link flags\nset(\n    BAZEL_CC_STRICT_LINK_FLAGS\n    \"$<$<CXX_COMPILER_ID:Clang>:${CLANG_STDLIB_OPTION}>\"\n    ${BAZEL_CC_ASAN_COMPILE_FLAGS}\n    ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}\n  )\n\n# C/C++ unstrict compile flags\nset(\n    BAZEL_CC_UNSTRICT_COMPILE_FLAGS\n    \"$<$<CXX_COMPILER_ID:Clang>:-Wall>\"\n    \"$<$<CXX_COMPILER_ID:AppleClang>:-Wall>\"\n    \"$<$<CXX_COMPILER_ID:GNU>:-Wall>\"\n    \"$<$<CXX_COMPILER_ID:MSVC>:/W3>\"\n    ${BAZEL_CC_ASAN_COMPILE_FLAGS}\n    ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}\n  )\n\n# C/C++ unstrict link flags\nset(\n    BAZEL_CC_UNSTRICT_LINK_FLAGS\n    \"$<$<CXX_COMPILER_ID:Clang>:${CLANG_STDLIB_OPTION}>\"\n    ${BAZEL_CC_ASAN_COMPILE_FLAGS}\n    ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}\n  )\n\n# CUDA strict compile flags\nset(\n    BAZEL_CUDA_STRICT_COMPILE_FLAGS\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>>\"\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>>\"\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow>>\"\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:MSVC>:/W4>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:MSVC>:/W4>>\"\n    \"$<$<CONFIG:DEBUG>:$<$<COMPILE_LANGUAGE:CUDA>:-G>>\"\n  )\n\n# CUDA strict link flags\nset(BAZEL_CUDA_STRICT_LINK_FLAGS \"\")\n\n# CUDA unstrict compile flags\nset(\n    BAZEL_CUDA_UNSTRICT_COMPILE_FLAGS\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:Clang>:-Wall>>\"\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:AppleClang>:-Wall>>\"\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:GNU>:-Wall>>\"\n    \"$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:MSVC>:/W3>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:Clang>:-Wall>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:AppleClang>:-Wall>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:GNU>:-Wall>>\"\n    \"$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:MSVC>:/W3>>\"\n    \"$<$<CONFIG:DEBUG>:$<$<COMPILE_LANGUAGE:CUDA>:-G>>\"\n  )\n\n# CUDA unstrict link flags\nset(BAZEL_CUDA_UNSTRICT_LINK_FLAGS \"\")\n\n## Find workspace directory\nfunction(_find_workspace_directory _RESULT)\n  # Find Workspace.cmake folder\n  set(_CURRENT_WORKSPACE_DIR ${CMAKE_CURRENT_SOURCE_DIR})\n  get_filename_component(\n      _PARENT_WORKSPACE_DIR ${_CURRENT_WORKSPACE_DIR} DIRECTORY\n    )\n  while(NOT (\"${_CURRENT_WORKSPACE_DIR}\" STREQUAL \"${_PARENT_WORKSPACE_DIR}\"))\n    if(EXISTS \"${_CURRENT_WORKSPACE_DIR}/Workspace.cmake\")\n      set(${_RESULT} ${_CURRENT_WORKSPACE_DIR} PARENT_SCOPE)\n      message(STATUS \"Found workspace at ${${_RESULT}}\")\n      break()\n    endif()\n\n    # Find next parent folder\n    set(_CURRENT_WORKSPACE_DIR ${_PARENT_WORKSPACE_DIR})\n    get_filename_component(\n        _PARENT_WORKSPACE_DIR ${_CURRENT_WORKSPACE_DIR} DIRECTORY\n      )\n  endwhile()\nendfunction()\n\n## Retrieve absolute paths\nfunction(_absolute_paths _RESULT)\n  foreach(FILEPATH ${ARGN})\n    if(NOT IS_ABSOLUTE ${FILEPATH})\n      get_filename_component(FILEPATH ${FILEPATH} ABSOLUTE)\n    endif()\n    list(APPEND FILEPATHS ${FILEPATH})\n  endforeach()\n  set(${_RESULT} \"${FILEPATHS}\" PARENT_SCOPE)\nendfunction()\n\n## Add both shared and static library\nmacro(_add_library _NAME _OPTION)\n  add_library(${_NAME}_objects OBJECT ${_OPTION} ${ARGN})\n  add_library(\n      ${_NAME}_static STATIC ${_OPTION} $<TARGET_OBJECTS:${_NAME}_objects>\n    )\n  add_library(\n      ${_NAME} SHARED ${_OPTION} $<TARGET_OBJECTS:${_NAME}_objects>\n    )\n  add_dependencies(${_NAME} ${_NAME}_static)\n  if(NOT MSVC)\n    set_property(TARGET ${_NAME}_static PROPERTY OUTPUT_NAME ${_NAME})\n  endif()\nendmacro()\n\n## Link dependencies\nfunction(_targets_link_dependencies _NAME)\n  foreach(LIB ${ARGN})\n    if(TARGET ${LIB})\n      list(APPEND LIBS_DEPS ${LIB})\n      list(\n          APPEND LIBS_INCS\n          \"$<TARGET_PROPERTY:${LIB},INTERFACE_INCLUDE_DIRECTORIES>\"\n        )\n    endif()\n  endforeach()\n\n  if(LIBS_DEPS)\n    add_dependencies(${_NAME} ${LIBS_DEPS})\n    target_include_directories(${_NAME} PRIVATE \"${LIBS_INCS}\")\n  endif()\nendfunction()\n\n## Link libraries\nfunction(_target_link_libraries _NAME)\n  function(_collect_always_link_libs LIB_LIST RESULT_VAR)\n    if(NOT _COLLECT_ALWAYS_LINK_VISITED)\n      set(_COLLECT_ALWAYS_LINK_VISITED \"\" PARENT_SCOPE)\n    endif()\n\n    set(LOCAL_RESULT \"\")\n    foreach(LIB ${LIB_LIST})\n      if(NOT TARGET ${LIB})\n        continue()\n      endif()\n\n      list(FIND _COLLECT_ALWAYS_LINK_VISITED ${LIB} ALREADY_VISITED)\n      if(NOT ALREADY_VISITED EQUAL -1)\n        continue()\n      endif()\n\n      list(APPEND _COLLECT_ALWAYS_LINK_VISITED ${LIB})\n      set(_COLLECT_ALWAYS_LINK_VISITED \"${_COLLECT_ALWAYS_LINK_VISITED}\" PARENT_SCOPE)\n\n      get_target_property(ALWAYS_LINK ${LIB} ALWAYS_LINK)\n      if(ALWAYS_LINK)\n        list(APPEND LOCAL_RESULT ${LIB})\n      endif()\n\n      get_target_property(DEP_LIBS ${LIB} INTERFACE_LINK_LIBRARIES)\n      if(DEP_LIBS)\n        _collect_always_link_libs(\"${DEP_LIBS}\" DEP_ALWAYS_LINK_LIBS)\n        list(APPEND LOCAL_RESULT ${DEP_ALWAYS_LINK_LIBS})\n      endif()\n\n      get_target_property(LINK_LIBS ${LIB} LINK_LIBRARIES)\n      if(LINK_LIBS)\n        _collect_always_link_libs(\"${LINK_LIBS}\" LINK_ALWAYS_LINK_LIBS)\n        list(APPEND LOCAL_RESULT ${LINK_ALWAYS_LINK_LIBS})\n      endif()\n    endforeach()\n\n    list(REMOVE_DUPLICATES LOCAL_RESULT)\n    set(${RESULT_VAR} \"${LOCAL_RESULT}\" PARENT_SCOPE)\n  endfunction()\n\n  _collect_always_link_libs(\"${ARGN}\" ALL_ALWAYS_LINK_LIBS)\n\n  set(ALL_LIBS_TO_PROCESS ${ARGN})\n  foreach(ALWAYS_LIB ${ALL_ALWAYS_LINK_LIBS})\n    list(FIND ARGN ${ALWAYS_LIB} FOUND_INDEX)\n    if(FOUND_INDEX EQUAL -1)\n      list(APPEND ALL_LIBS_TO_PROCESS ${ALWAYS_LIB})\n    endif()\n  endforeach()\n\n  list(REMOVE_DUPLICATES ALL_LIBS_TO_PROCESS)\n\n  foreach(LIB ${ALL_LIBS_TO_PROCESS})\n    if(NOT TARGET ${LIB})\n      list(APPEND LINK_LIBS ${LIB})\n      continue()\n    endif()\n\n    list(FIND ALL_ALWAYS_LINK_LIBS ${LIB} IS_ALWAYS_LINK)\n    if(IS_ALWAYS_LINK EQUAL -1)\n      list(APPEND LINK_LIBS ${LIB})\n      continue()\n    endif()\n\n    if(NOT MSVC)\n      if(NOT ${CMAKE_SYSTEM_NAME} MATCHES \"Darwin\")\n        list(APPEND LINK_LIBS -Wl,--whole-archive ${LIB} -Wl,--no-whole-archive)\n      else()\n        list(APPEND LINK_LIBS -Wl,-force_load ${LIB})\n      endif()\n    else()\n      # Microsoft Visual C++\n      list(APPEND LINK_LIBS /WHOLEARCHIVE:$<TARGET_FILE:${LIB}>)\n      get_target_property(OTHER_LINK_LIBS ${LIB} INTERFACE_LINK_LIBRARIES)\n      if(OTHER_LINK_LIBS)\n        foreach(OTHER_LIB ${OTHER_LINK_LIBS})\n          list(FIND ALL_LIBS_TO_PROCESS ${OTHER_LIB} FOUND_INDEX)\n          if(FOUND_INDEX EQUAL -1)\n            list(APPEND LINK_LIBS ${OTHER_LIB})\n          endif()\n        endforeach()\n      endif()\n      list(APPEND LIBS_DEPS ${LIB})\n      list(\n          APPEND LIBS_INCS\n          \"$<TARGET_PROPERTY:${LIB},INTERFACE_INCLUDE_DIRECTORIES>\"\n        )\n    endif()\n  endforeach()\n\n  target_link_libraries(${_NAME} ${LINK_LIBS})\n  if(LIBS_DEPS)\n    add_dependencies(${_NAME} ${LIBS_DEPS})\n    target_include_directories(${_NAME} PRIVATE \"${LIBS_INCS}\")\n  endif()\nendfunction()\n\n## Add a subdirectory to the build\nfunction(cc_directory)\n  add_subdirectory(${ARGN})\nendfunction()\n\n## Add subdirectories to the build\nfunction(cc_directories)\n  foreach(SRC_DIR ${ARGN})\n    add_subdirectory(${SRC_DIR})\n  endforeach()\nendfunction()\n\n## Set the properties of target\nfunction(_cc_target_properties)\n  cmake_parse_arguments(\n      CC_ARGS \"STRICT;ALWAYS_LINK\" \"NAME;VERSION;C_STANDARD;CXX_STANDARD\"\n      \"INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS\" ${ARGN}\n    )\n\n  if(NOT CC_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  get_target_property(TARGET_TYPE ${CC_ARGS_NAME} TYPE)\n  if((\"${TARGET_TYPE}\" STREQUAL \"SHARED_LIBRARY\") OR\n      (\"${TARGET_TYPE}\" STREQUAL \"STATIC_LIBRARY\") OR\n      (\"${TARGET_TYPE}\" STREQUAL \"EXECUTABLE\"))\n    set(TARGET_LINKABLE TRUE)\n  endif()\n\n  if(CC_ARGS_ALWAYS_LINK)\n    if((\"${TARGET_TYPE}\" STREQUAL \"STATIC_LIBRARY\") OR\n        (\"${TARGET_TYPE}\" STREQUAL \"OBJECT_LIBRARY\"))\n      set_property(TARGET ${CC_ARGS_NAME} PROPERTY ALWAYS_LINK TRUE)\n    endif()\n  endif()\n\n  # Set the warning level of compiling\n  if(CC_ARGS_STRICT)\n    target_compile_options(\n        ${CC_ARGS_NAME} PRIVATE \"${BAZEL_CC_STRICT_COMPILE_FLAGS}\"\n      )\n    if(TARGET_LINKABLE)\n      target_link_libraries(${CC_ARGS_NAME} \"${BAZEL_CC_STRICT_LINK_FLAGS}\")\n    endif()\n  else()\n    target_compile_options(\n        ${CC_ARGS_NAME} PRIVATE \"${BAZEL_CC_UNSTRICT_COMPILE_FLAGS}\"\n      )\n    if(TARGET_LINKABLE)\n      target_link_libraries(${CC_ARGS_NAME} \"${BAZEL_CC_UNSTRICT_LINK_FLAGS}\")\n    endif()\n  endif()\n\n  if(CC_ARGS_DEFS)\n    target_compile_definitions(${CC_ARGS_NAME} PRIVATE \"${CC_ARGS_DEFS}\")\n  endif()\n\n  if(CC_ARGS_CFLAGS OR CC_ARGS_CXXFLAGS)\n    target_compile_options(\n        ${CC_ARGS_NAME} PRIVATE\n        \"$<$<COMPILE_LANGUAGE:C>:${CC_ARGS_CFLAGS}>\"\n        \"$<$<COMPILE_LANGUAGE:CXX>:${CC_ARGS_CXXFLAGS}>\"\n      )\n  endif()\n\n  if(CC_ARGS_LDFLAGS)\n    string(REPLACE \";\" \" \" CC_ARGS_LDFLAGS \"${CC_ARGS_LDFLAGS}\")\n    set_property(\n        TARGET ${CC_ARGS_NAME} PROPERTY LINK_FLAGS \"${CC_ARGS_LDFLAGS}\"\n      )\n  endif()\n\n  if(CC_ARGS_INCS)\n    _absolute_paths(INC_DIRS ${CC_ARGS_INCS})\n    target_include_directories(${CC_ARGS_NAME} PRIVATE \"${INC_DIRS}\")\n  endif()\n\n  if(BAZEL_WORKSPACE_DIR)\n    target_include_directories(${CC_ARGS_NAME} PRIVATE \"${BAZEL_WORKSPACE_DIR}\")\n  endif()\n\n  if(CC_ARGS_PUBINCS)\n    _absolute_paths(INC_DIRS ${CC_ARGS_PUBINCS})\n    target_include_directories(${CC_ARGS_NAME} PUBLIC \"${INC_DIRS}\")\n  endif()\n\n  if(CC_ARGS_LIBS)\n    if(NOT TARGET_LINKABLE)\n      _targets_link_dependencies(${CC_ARGS_NAME} ${CC_ARGS_LIBS})\n    else()\n      if (\"${TARGET_TYPE}\" STREQUAL \"EXECUTABLE\")\n        _target_link_libraries(${CC_ARGS_NAME} \"${CC_ARGS_LIBS}\")\n      else()\n        target_link_libraries(${CC_ARGS_NAME} \"${CC_ARGS_LIBS}\")\n      endif()\n    endif()\n  endif()\n\n  if(CC_ARGS_DEPS)\n    add_dependencies(${CC_ARGS_NAME} \"${CC_ARGS_DEPS}\")\n  endif()\n\n  if(CC_ARGS_VERSION)\n    set_property(\n        TARGET ${CC_ARGS_NAME} PROPERTY VERSION \"${CC_ARGS_VERSION}\"\n      )\n  endif()\n\n  if(NOT CC_C_STANDARD)\n    set(CC_C_STANDARD 99)\n  endif()\n\n  if(NOT CC_CXX_STANDARD)\n    set(CC_CXX_STANDARD 11)\n  endif()\n\n  set_target_properties(\n      ${CC_ARGS_NAME} PROPERTIES DEFINE_SYMBOL \"\"\n      C_STANDARD ${CC_C_STANDARD} CXX_STANDARD ${CC_CXX_STANDARD}\n      C_STANDARD_REQUIRED ON C_EXTENSIONS ON\n      CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF\n      WINDOWS_EXPORT_ALL_SYMBOLS ON\n    )\nendfunction()\n\n## Build a C/C++ static or shared library\nfunction(cc_library)\n  cmake_parse_arguments(\n      CC_ARGS\n      \"STATIC;SHARED;EXCLUDE;PACKED;SRCS_NO_GLOB\"\n      \"NAME;VERSION\"\n      \"SRCS;INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;PACKED_EXCLUDES\"\n      ${ARGN}\n  )\n\n  if(NOT CC_ARGS_NAME)\n    message(FATAL_ERROR \"No target name provided.\")\n  endif()\n\n  if(CC_ARGS_SRCS_NO_GLOB)\n    set(SOURCE_FILES ${CC_ARGS_SRCS})\n    if(NOT SOURCE_FILES)\n      message(FATAL_ERROR \"No source files provided for ${CC_ARGS_NAME} (SRCS_NO_GLOB mode).\")\n    endif()\n  else()\n    set(SOURCE_FILES \"\")\n    foreach(_src IN LISTS CC_ARGS_SRCS)\n      if(IS_ABSOLUTE \"${_src}\" OR NOT \"${_src}\" MATCHES \"[*?]\")\n        list(APPEND SOURCE_FILES \"${_src}\")\n      else()\n        file(GLOB _globbed_srcs ${_src})\n        list(APPEND SOURCE_FILES ${_globbed_srcs})\n      endif()\n    endforeach()\n    if(NOT SOURCE_FILES)\n      message(FATAL_ERROR \"No source files found for ${CC_ARGS_NAME} after globbing.\")\n    endif()\n  endif()\n\n  if(CC_ARGS_VERSION)\n    string(REPLACE \"-\" \"_\" MACRO_PREFIX \"${CC_ARGS_NAME}\")\n    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION=\"${CC_ARGS_VERSION}\")\n  endif()\n\n  if(CC_ARGS_EXCLUDE)\n    set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)\n  endif()\n\n  if(CC_ARGS_SHARED AND CC_ARGS_STATIC)\n    _add_library(${CC_ARGS_NAME} \"${EXCLUDE_OPTION}\" ${SOURCE_FILES})\n  elseif(CC_ARGS_SHARED)\n    add_library(${CC_ARGS_NAME} SHARED ${EXCLUDE_OPTION} ${SOURCE_FILES})\n  elseif(CC_ARGS_STATIC)\n    add_library(${CC_ARGS_NAME} STATIC ${EXCLUDE_OPTION} ${SOURCE_FILES})\n  else()\n    add_library(${CC_ARGS_NAME} ${EXCLUDE_OPTION} ${SOURCE_FILES})\n  endif()\n\n  if(TARGET ${CC_ARGS_NAME}_objects)\n    _cc_target_properties(\n        NAME \"${CC_ARGS_NAME}_objects\"\n        INCS \"${CC_ARGS_INCS};${CC_ARGS_PUBINCS}\"\n        DEFS \"${CC_ARGS_DEFS}\"\n        LIBS \"${CC_ARGS_LIBS}\"\n        CFLAGS \"${CC_ARGS_CFLAGS}\"\n        CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n        LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n        DEPS \"${CC_ARGS_DEPS}\"\n        \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n    )\n  endif()\n\n  if(TARGET ${CC_ARGS_NAME}_static)\n    _cc_target_properties(\n        NAME \"${CC_ARGS_NAME}_static\"\n        INCS \"${CC_ARGS_INCS}\"\n        PUBINCS \"${CC_ARGS_PUBINCS}\"\n        DEFS \"${CC_ARGS_DEFS}\"\n        LIBS \"${CC_ARGS_LIBS}\"\n        CFLAGS \"${CC_ARGS_CFLAGS}\"\n        CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n        LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n        DEPS \"${CC_ARGS_DEPS}\"\n        \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n    )\n    if(CC_ARGS_PACKED)\n      install(\n        TARGETS ${CC_ARGS_NAME}_static\n        ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n      )\n    endif()\n  endif()\n\n  _cc_target_properties(\n      NAME \"${CC_ARGS_NAME}\"\n      INCS \"${CC_ARGS_INCS}\"\n      PUBINCS \"${CC_ARGS_PUBINCS}\"\n      DEFS \"${CC_ARGS_DEFS}\"\n      LIBS \"${CC_ARGS_LIBS}\"\n      CFLAGS \"${CC_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n      LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n      DEPS \"${CC_ARGS_DEPS}\"\n      VERSION \"${CC_ARGS_VERSION}\"\n      \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n  )\n  if(CC_ARGS_PACKED)\n    install(\n        TARGETS ${CC_ARGS_NAME}\n        ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n        LIBRARY DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n    )\n    if(CC_ARGS_PUBINCS)\n      foreach(PACKED_EXCLUDE ${CC_ARGS_PACKED_EXCLUDES})\n        list(APPEND PATTERN_EXCLUDES \"PATTERN;${PACKED_EXCLUDE};EXCLUDE\")\n      endforeach()\n      install(\n          DIRECTORY ${CC_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}\n          FILES_MATCHING PATTERN \"*.h\" PATTERN \"*.hpp\" PATTERN \"*.hxx\"\n          ${PATTERN_EXCLUDES}\n      )\n    endif()\n  endif()\nendfunction()\n\n## Build a C/C++ executable program\nfunction(cc_binary)\n  cmake_parse_arguments(\n      CC_ARGS \"PACKED\" \"NAME;VERSION\"\n     \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS\" ${ARGN}\n    )\n\n  if(NOT CC_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CC_ARGS_SRCS ${CC_ARGS_SRCS})\n  if(NOT CC_ARGS_SRCS)\n    message(FATAL_ERROR \"No source files found of ${CC_ARGS_NAME}.\")\n  endif()\n\n  if(CC_ARGS_VERSION)\n    string(REPLACE \"-\" \"_\" MACRO_PREFIX \"${CC_ARGS_NAME}\")\n    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION=\"${CC_ARGS_VERSION}\")\n  endif()\n  add_executable(${CC_ARGS_NAME} ${CC_ARGS_SRCS})\n\n  if(CC_ARGS_PACKED)\n    install(\n        TARGETS ${CC_ARGS_NAME} RUNTIME DESTINATION \"${CMAKE_INSTALL_BINDIR}\"\n      )\n  endif()\n\n  _cc_target_properties(\n      NAME \"${CC_ARGS_NAME}\"\n      INCS \"${CC_ARGS_INCS}\"\n      DEFS \"${CC_ARGS_DEFS}\"\n      LIBS \"${CC_ARGS_LIBS}\"\n      CFLAGS \"${CC_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n      LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n      DEPS \"${CC_ARGS_DEPS}\"\n      VERSION \"${CC_ARGS_VERSION}\"\n      \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n    )\nendfunction()\n\n## Build a C/C++ executable test program\nfunction(cc_test)\n  cmake_parse_arguments(\n      CC_ARGS \"\" \"NAME;VERSION\"\n      \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;ARGS\" ${ARGN}\n    )\n\n  if(NOT CC_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CC_ARGS_SRCS ${CC_ARGS_SRCS})\n  if(NOT CC_ARGS_SRCS)\n    message(FATAL_ERROR \"No source files found of ${CC_ARGS_NAME}.\")\n  endif()\n\n  if(CC_ARGS_VERSION)\n    string(REPLACE \"-\" \"_\" MACRO_PREFIX \"${CC_ARGS_NAME}\")\n    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION=\"${CC_ARGS_VERSION}\")\n  endif()\n  add_executable(${CC_ARGS_NAME} EXCLUDE_FROM_ALL ${CC_ARGS_SRCS})\n\n  _cc_target_properties(\n      NAME \"${CC_ARGS_NAME}\"\n      INCS \"${CC_ARGS_INCS}\"\n      DEFS \"${CC_ARGS_DEFS}\"\n      LIBS \"${CC_ARGS_LIBS}\"\n      CFLAGS \"${CC_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n      LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n      DEPS \"${CC_ARGS_DEPS}\"\n      \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n    )\n  add_dependencies(unittest ${CC_ARGS_NAME})\n  add_custom_target(\n      unittest.${CC_ARGS_NAME}\n      COMMAND $<TARGET_FILE:${CC_ARGS_NAME}> \"${CC_ARGS_ARGS}\"\n      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}\n      DEPENDS ${CC_ARGS_NAME}\n    )\n  add_test(\n      NAME ${CC_ARGS_NAME}\n      COMMAND $<TARGET_FILE:${CC_ARGS_NAME}> \"${CC_ARGS_ARGS}\"\n      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}\n    )\nendfunction()\n\n## Add existing test cases to a test suite\nfunction(cc_test_suite _NAME)\n  if(NOT TARGET unittest.${_NAME})\n    add_custom_target(unittest.${_NAME} COMMAND \"\")\n  endif()\n  foreach(TEST_TARGET ${ARGN})\n    list(APPEND TEST_TARGETS unittest.${TEST_TARGET})\n  endforeach()\n  if(TEST_TARGETS)\n    add_dependencies(unittest.${_NAME} ${TEST_TARGETS})\n  endif()\nendfunction()\n\n## Import a C/C++ static or shared library\nfunction(cc_import)\n  cmake_parse_arguments(\n      CC_ARGS \"STATIC;SHARED;PACKED\"\n      \"NAME;PATH;IMPLIB\" \"INCS;PUBINCS;DEPS;PACKED_EXCLUDES\" ${ARGN}\n    )\n\n  if(NOT CC_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CC_ARGS_PATH ${CC_ARGS_PATH})\n  if(NOT CC_ARGS_PATH)\n    message(FATAL_ERROR \"No imported target file found of ${CC_ARGS_NAME}.\")\n  endif()\n  if(MSVC AND CC_ARGS_SHARED AND NOT CC_ARGS_IMPLIB)\n    string(REGEX REPLACE\n        \".[Dd][Ll][Ll]$\" \".lib\" CC_ARGS_IMPLIB ${CC_ARGS_PATH}\n      )\n  endif()\n\n  if(CC_ARGS_SHARED)\n    add_library(${CC_ARGS_NAME} SHARED IMPORTED GLOBAL)\n  elseif(CC_ARGS_STATIC)\n    add_library(${CC_ARGS_NAME} STATIC IMPORTED GLOBAL)\n  else()\n    add_library(${CC_ARGS_NAME} UNKNOWN IMPORTED GLOBAL)\n  endif()\n\n  set_property(\n      TARGET ${CC_ARGS_NAME} PROPERTY IMPORTED_LOCATION ${CC_ARGS_PATH}\n    )\n  if(MSVC AND CC_ARGS_SHARED)\n    set_property(\n        TARGET ${CC_ARGS_NAME} PROPERTY IMPORTED_IMPLIB ${CC_ARGS_IMPLIB}\n      )\n  endif()\n\n  if(CC_ARGS_INCS)\n    _absolute_paths(INC_DIRS ${CC_ARGS_INCS})\n    foreach(INC_DIR ${INC_DIRS})\n      set_property(\n          TARGET ${CC_ARGS_NAME} APPEND PROPERTY\n          INTERFACE_INCLUDE_DIRECTORIES \"${INC_DIR}\"\n        )\n    endforeach()\n  endif()\n\n  if(CC_ARGS_PUBINCS)\n    _absolute_paths(INC_DIRS ${CC_ARGS_PUBINCS})\n    foreach(INC_DIR ${INC_DIRS})\n      set_property(\n          TARGET ${CC_ARGS_NAME} APPEND PROPERTY\n          INTERFACE_INCLUDE_DIRECTORIES \"${INC_DIR}\"\n        )\n    endforeach()\n  endif()\n\n  if(CC_ARGS_DEPS)\n    add_dependencies(${CC_ARGS_NAME} \"${CC_ARGS_DEPS}\")\n  endif()\n\n  if(CC_ARGS_PACKED)\n    install(\n        TARGETS ${CC_ARGS_NAME}\n        ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n        LIBRARY DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n      )\n    if(CC_ARGS_PUBINCS)\n      foreach(PACKED_EXCLUDE ${CC_ARGS_PACKED_EXCLUDES})\n        list(APPEND PATTERN_EXCLUDES \"PATTERN;${PACKED_EXCLUDE};EXCLUDE\")\n      endforeach()\n      install(\n          DIRECTORY ${CC_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}\n          FILES_MATCHING PATTERN \"*.h\" PATTERN \"*.hpp\" PATTERN \"*.hxx\"\n          ${PATTERN_EXCLUDES}\n        )\n    endif()\n  endif()\nendfunction()\n\n## Import a C/C++ interface library\nfunction(cc_interface)\n  cmake_parse_arguments(\n      CC_ARGS \"PACKED\" \"NAME\" \"INCS;PUBINCS;DEPS;PACKED_EXCLUDES\" ${ARGN}\n    )\n\n  if(NOT CC_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  add_library(${CC_ARGS_NAME} INTERFACE GLOBAL)\n  if(CC_ARGS_INCS)\n    _absolute_paths(INC_DIRS ${CC_ARGS_INCS})\n    target_include_directories(${CC_ARGS_NAME} INTERFACE \"${INC_DIRS}\")\n  endif()\n\n  if(CC_ARGS_PUBINCS)\n    _absolute_paths(INC_DIRS ${CC_ARGS_PUBINCS})\n    target_include_directories(${CC_ARGS_NAME} INTERFACE \"${INC_DIRS}\")\n  endif()\n\n  if(CC_ARGS_DEPS)\n    add_dependencies(${CC_ARGS_NAME} \"${CC_ARGS_DEPS}\")\n  endif()\n\n  if(CC_ARGS_PACKED AND CC_ARGS_PUBINCS)\n    foreach(PACKED_EXCLUDE ${CC_ARGS_PACKED_EXCLUDES})\n      list(APPEND PATTERN_EXCLUDES \"PATTERN;${PACKED_EXCLUDE};EXCLUDE\")\n    endforeach()\n    install(\n        DIRECTORY ${CC_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}\n        FILES_MATCHING PATTERN \"*.h\" PATTERN \"*.hpp\" PATTERN \"*.hxx\"\n        ${PATTERN_EXCLUDES}\n      )\n  endif()\nendfunction()\n\n## Find gtest library\nfunction(_find_gtest)\n  if(DEFINED FIND_GTEST_LIBS AND DEFINED FIND_GTEST_INCS)\n    return()\n  endif()\n\n  if(NOT TARGET gtest OR NOT TARGET gtest_main)\n    # Find gtest using 'find_package'\n    find_package(GTest REQUIRED)\n    set(\n        FIND_GTEST_INCS \"${GTEST_INCLUDE_DIRS}\"\n        CACHE STRING \"GTest includes\"\n      )\n    set(\n        FIND_GTEST_LIBS \"${GTEST_BOTH_LIBRARIES}\"\n        CACHE STRING \"GTest libraries\"\n      )\n  else()\n    # Find gtest using target names\n    set(FIND_GTEST_INCS \"\" CACHE STRING \"GTest includes\")\n    set(FIND_GTEST_LIBS \"gtest;gtest_main\" CACHE STRING \"GTest libraries\")\n  endif()\nendfunction()\n\n## Build a C/C++ executable google test program\nfunction(cc_gtest)\n  cmake_parse_arguments(\n    CC_ARGS \"\" \"NAME;VERSION\"\n    \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;ARGS\" ${ARGN}\n  )\n  _find_gtest()\n  cc_test(\n      NAME \"${CC_ARGS_NAME}\"\n      VERSION \"${CC_ARGS_VERSION}\"\n      SRCS \"${CC_ARGS_SRCS}\"\n      INCS \"${CC_ARGS_INCS};${FIND_GTEST_INCS}\"\n      DEFS \"${CC_ARGS_DEFS}\"\n      LIBS \"${CC_ARGS_LIBS};${FIND_GTEST_LIBS}\"\n      CFLAGS \"${CC_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n      LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n      DEPS \"${CC_ARGS_DEPS}\"\n      ARGS \"${CC_ARGS_ARGS}\"\n    )\nendfunction()\n\n## Find gmock library\nfunction(_find_gmock)\n  if(DEFINED FIND_GMOCK_LIBS AND DEFINED FIND_GMOCK_INCS)\n    return()\n  endif()\n\n  if(NOT TARGET gmock OR NOT TARGET gmock_main)\n    # Find gmock/gtest using 'find_package'\n    find_package(GMock REQUIRED)\n    find_package(GTest REQUIRED)\n    set(\n        FIND_GMOCK_INCS \"${GMOCK_INCLUDE_DIRS};${GTEST_INCLUDE_DIRS}\"\n        CACHE STRING \"GMock includes\"\n      )\n    set(\n        FIND_GMOCK_LIBS \"${GMOCK_BOTH_LIBRARIES};${GTEST_LIBRARIES}\"\n        CACHE STRING \"GMock libraries\"\n      )\n  else()\n    # Find gmock using target names\n    set(FIND_GMOCK_INCS \"\" CACHE STRING \"GMock includes\")\n    set(FIND_GMOCK_LIBS \"gmock;gmock_main\" CACHE STRING \"GMock libraries\")\n  endif()\nendfunction()\n\n## Build a C/C++ executable google mock program\nfunction(cc_gmock)\n  cmake_parse_arguments(\n    CC_ARGS \"\" \"NAME;VERSION\"\n    \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;ARGS\" ${ARGN}\n  )\n  _find_gmock()\n  cc_test(\n      NAME \"${CC_ARGS_NAME}\"\n      VERSION \"${CC_ARGS_VERSION}\"\n      SRCS \"${CC_ARGS_SRCS}\"\n      INCS \"${CC_ARGS_INCS};${FIND_GMOCK_INCS}\"\n      DEFS \"${CC_ARGS_DEFS}\"\n      LIBS \"${CC_ARGS_LIBS};${FIND_GMOCK_LIBS}\"\n      CFLAGS \"${CC_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n      LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n      DEPS \"${CC_ARGS_DEPS}\"\n      ARGS \"${CC_ARGS_ARGS}\"\n    )\nendfunction()\n\n## Find protobuf library\nfunction(_find_protobuf _VERSION)\n  if(DEFINED CC_PROTOBUF_PROTOC_${_VERSION})\n    return()\n  endif()\n\n  # Find protobuf using 'find_package'\n  if(NOT TARGET protoc OR NOT TARGET libprotobuf)\n    find_package(Protobuf ${_VERSION} REQUIRED)\n    set(\n        CC_PROTOBUF_PROTOC_${_VERSION}\n        \"${PROTOBUF_PROTOC_EXECUTABLE}\" CACHE PATH \"Protobuf compiler\"\n      )\n    set(\n        CC_PROTOBUF_INCS_${_VERSION}\n        \"${PROTOBUF_INCLUDE_DIRS}\" CACHE STRING \"Protobuf includes\"\n      )\n    set(\n        CC_PROTOBUF_LIBS_${_VERSION}\n        \"${PROTOBUF_LIBRARIES}\" CACHE STRING \"Protobuf libraries\"\n      )\n    return()\n  endif()\n\n  # Find protobuf using target names\n  get_target_property(protoc_VERSION protoc VERSION)\n  get_target_property(libprotobuf_VERSION libprotobuf VERSION)\n  if(_VERSION)\n    if(${protoc_VERSION} VERSION_LESS ${_VERSION})\n      message(\n          FATAL_ERROR\n          \"The 'protoc' version is ${protoc_VERSION}, less than ${_VERSION}.\"\n        )\n    endif()\n    if(${libprotobuf_VERSION} VERSION_LESS ${_VERSION})\n      message(\n          FATAL_ERROR\n          \"The 'libprotobuf' version is ${libprotobuf_VERSION}, \"\n          \"less than ${_VERSION}.\"\n        )\n    endif()\n  endif()\n\n  message(STATUS \"Found binary 'protoc ${protoc_VERSION}'\")\n  message(STATUS \"Found library 'libprotobuf ${libprotobuf_VERSION}'\")\n  set(\n      CC_PROTOBUF_PROTOC_${_VERSION}\n      \"$<TARGET_FILE:protoc>\" CACHE PATH \"Protobuf compiler\"\n    )\n  get_target_property(protoc_SOURCE_DIR protoc SOURCE_DIR)\n  get_filename_component(protoc_INCLUDE_DIR ${protoc_SOURCE_DIR}/../src ABSOLUTE)\n  set(\n      CC_PROTOBUF_INCS_${_VERSION}\n      \"${protoc_INCLUDE_DIR}\" CACHE STRING \"Protobuf includes\"\n    )\n  set(\n      CC_PROTOBUF_LIBS_${_VERSION} libprotobuf CACHE STRING \"Protobuf libraries\"\n    )\nendfunction()\n\n## Build a C++ protobuf static or shared library\nfunction(cc_proto_library)\n  cmake_parse_arguments(\n      CC_ARGS \"STATIC;SHARED;EXCLUDE;PACKED\"\n      \"NAME;VERSION;PROTOROOT;PROTOBUF_VERSION\"\n      \"SRCS;CXXFLAGS;LDFLAGS;DEPS\" ${ARGN}\n    )\n\n  _find_protobuf(\"${CC_ARGS_PROTOBUF_VERSION}\")\n  set(CC_PROTOBUF_PROTOC ${CC_PROTOBUF_PROTOC_${CC_ARGS_PROTOBUF_VERSION}})\n  if(DEFINED GLOBAL_CC_PROTOBUF_PROTOC)\n    set(CC_PROTOBUF_PROTOC ${GLOBAL_CC_PROTOBUF_PROTOC})\n  endif()\n  set(CC_PROTOBUF_INCS ${CC_PROTOBUF_INCS_${CC_ARGS_PROTOBUF_VERSION}})\n  set(CC_PROTOBUF_LIBS ${CC_PROTOBUF_LIBS_${CC_ARGS_PROTOBUF_VERSION}})\n\n  if(NOT CC_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CC_ARGS_SRCS ${CC_ARGS_SRCS})\n  if(NOT CC_ARGS_SRCS)\n    message(FATAL_ERROR \"No source files found of ${CC_ARGS_NAME}.\")\n  endif()\n\n  if(CC_ARGS_VERSION)\n    string(REPLACE \"-\" \"_\" MACRO_PREFIX \"${CC_ARGS_NAME}\")\n    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION=\"${CC_ARGS_VERSION}\")\n  endif()\n\n  if(CC_ARGS_EXCLUDE)\n    set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)\n  endif()\n\n  set(PROTO_ROOT ${CMAKE_CURRENT_SOURCE_DIR})\n  if(CC_ARGS_PROTOROOT)\n    get_filename_component(PROTO_ROOT ${CC_ARGS_PROTOROOT} ABSOLUTE)\n  endif()\n\n  # Compile proto files to C++ sources\n  set(CPP_OUTPATH \"${CMAKE_CURRENT_BINARY_DIR}\")\n  foreach(PROTO_FILE ${CC_ARGS_SRCS})\n    get_filename_component(PROTO_FILE ${PROTO_FILE} ABSOLUTE)\n\n    if(NOT ${PROTO_FILE} MATCHES \"\\\\.proto$$\")\n      message(FATAL_ERROR \"Unrecognized proto file ${PROTOFILE}\")\n    endif()\n    if(NOT ${PROTO_FILE} MATCHES \"^${PROTO_ROOT}\")\n      message(FATAL_ERROR \"'${PROTO_FILE}' NOT IN '${PROTO_ROOT}'\")\n    endif()\n\n    string(\n        REGEX REPLACE \"^${PROTO_ROOT}(/?)\" \"\" ROOT_CLEANED_FILE ${PROTO_FILE}\n      )\n    string(REGEX REPLACE \"\\\\.proto$$\" \"\" EXT_CLEANED_FILE ${ROOT_CLEANED_FILE})\n    set(CPP_FILE \"${CPP_OUTPATH}/${EXT_CLEANED_FILE}.pb.cc\")\n    set(HDR_FILE \"${CPP_OUTPATH}/${EXT_CLEANED_FILE}.pb.h\")\n    set(INJ_FILE \"${CPP_OUTPATH}/${EXT_CLEANED_FILE}.pb.cmake\")\n    file(RELATIVE_PATH REL_CPP_FILE ${CMAKE_BINARY_DIR} ${CPP_FILE})\n\n    set(INJECTED_SCRIPT\n        \"foreach(SRC ${EXT_CLEANED_FILE}.pb.cc ${EXT_CLEANED_FILE}.pb.h)\\n\"\n        \"  file(READ \\$\\{SRC\\} SRC_CODE)\\n\"\n        \"  file(REMOVE \\$\\{SRC\\})\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"#ifdef __GNUC__\\\\n\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"#pragma GCC diagnostic push\\\\n\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"#pragma GCC diagnostic ignored \\\\\\\"-Wshadow\\\\\\\"\\\\n\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"#pragma GCC diagnostic ignored \\\\\\\"-Wunused-parameter\\\\\\\"\\\\n\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"#endif\\\\n\\\\n\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"\\$\\{SRC_CODE\\}\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"\\\\n#ifdef __GNUC__\\\\n\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"#pragma GCC diagnostic pop\\\\n\\\")\\n\"\n        \"  file(APPEND \\$\\{SRC\\} \\\"#endif\\\\n\\\")\\n\"\n        \"endforeach()\\n\"\n      )\n    file(WRITE \"${INJ_FILE}\" ${INJECTED_SCRIPT})\n\n    add_custom_command(\n        OUTPUT \"${CPP_FILE}\" \"${HDR_FILE}\"\n        # COMMAND ${CMAKE_COMMAND} -E make_directory ${CPP_OUTPATH}\n        COMMAND ${CC_PROTOBUF_PROTOC}\n        --cpp_out \"${CPP_OUTPATH}\" --python_out \"${CPP_OUTPATH}\"\n        --proto_path \"${PROTO_ROOT}\" --proto_path \"${CC_PROTOBUF_INCS}\" \"${PROTO_FILE}\"\n\n        COMMAND ${CMAKE_COMMAND} -P \"${INJ_FILE}\"\n        DEPENDS \"${PROTO_FILE}\"\n        COMMENT \"Generating CXX source ${REL_CPP_FILE}\"\n        VERBATIM\n      )\n    list(APPEND CC_SRCS \"${CPP_FILE}\" \"${HDR_FILE}\")\n  endforeach()\n\n  # Compile C++ sources\n  if(CC_ARGS_SHARED AND CC_ARGS_STATIC)\n    _add_library(${CC_ARGS_NAME} \"${EXCLUDE_OPTION}\" \"${CC_SRCS}\")\n  elseif(CC_ARGS_SHARED)\n    add_library(${CC_ARGS_NAME} SHARED ${EXCLUDE_OPTION} ${CC_SRCS})\n  elseif(CC_ARGS_STATIC)\n    add_library(${CC_ARGS_NAME} STATIC ${EXCLUDE_OPTION} ${CC_SRCS})\n  else()\n    add_library(${CC_ARGS_NAME} ${EXCLUDE_OPTION} ${CC_SRCS})\n  endif()\n\n  if(TARGET ${CC_ARGS_NAME}_objects)\n    _cc_target_properties(\n        NAME \"${CC_ARGS_NAME}_objects\"\n        INCS \"${CPP_OUTPATH};${CC_PROTOBUF_INCS}\"\n        LIBS \"${CC_PROTOBUF_LIBS}\"\n        CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n        LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n        DEPS \"${CC_ARGS_DEPS}\"\n        \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n      )\n  endif()\n\n  if(TARGET ${CC_ARGS_NAME}_static)\n    _cc_target_properties(\n        NAME \"${CC_ARGS_NAME}_static\"\n        PUBINCS \"${CPP_OUTPATH};${CC_PROTOBUF_INCS}\"\n        LIBS \"${CC_PROTOBUF_LIBS}\"\n        CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n        LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n        DEPS \"${CC_ARGS_DEPS}\"\n        \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n      )\n    if(CC_ARGS_PACKED)\n      install(\n          TARGETS ${CC_ARGS_NAME}_static\n          ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n        )\n    endif()\n  endif()\n\n  _cc_target_properties(\n      NAME \"${CC_ARGS_NAME}\"\n      PUBINCS \"${CPP_OUTPATH};${CC_PROTOBUF_INCS}\"\n      LIBS \"${CC_PROTOBUF_LIBS}\"\n      CXXFLAGS \"${CC_ARGS_CXXFLAGS}\"\n      LDFLAGS \"${CC_ARGS_LDFLAGS}\"\n      DEPS \"${CC_ARGS_DEPS}\"\n      VERSION \"${CC_ARGS_VERSION}\"\n      \"${CC_ARGS_UNPARSED_ARGUMENTS}\"\n    )\n  if(CC_ARGS_PACKED)\n    install(\n        TARGETS ${CC_ARGS_NAME}\n        ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n        LIBRARY DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n      )\n  endif()\nendfunction()\n\n## Add a subdirectory to the build\nfunction(cuda_directory)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n  cc_directory(${ARGN})\nendfunction()\n\n## Add subdirectories to the build\nfunction(cuda_directories)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n  cc_directories(${ARGN})\nendfunction()\n\n## Set the properties of cuda target\nfunction(_cuda_target_properties)\n  cmake_parse_arguments(\n      CUDA_ARGS \"STRICT;ALWAYS_LINK\" \"NAME;VERSION;C_STANDARD;CXX_STANDARD\"\n      \"INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS\" ${ARGN}\n    )\n\n  if(NOT CUDA_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  get_target_property(TARGET_TYPE ${CUDA_ARGS_NAME} TYPE)\n  if((\"${TARGET_TYPE}\" STREQUAL \"SHARED_LIBRARY\") OR\n      (\"${TARGET_TYPE}\" STREQUAL \"STATIC_LIBRARY\") OR\n      (\"${TARGET_TYPE}\" STREQUAL \"EXECUTABLE\"))\n    set(TARGET_LINKABLE TRUE)\n  endif()\n\n  if(CUDA_ARGS_ALWAYS_LINK)\n    if((\"${TARGET_TYPE}\" STREQUAL \"STATIC_LIBRARY\") OR\n        (\"${TARGET_TYPE}\" STREQUAL \"OBJECT_LIBRARY\"))\n      set_property(TARGET ${CUDA_ARGS_NAME} PROPERTY ALWAYS_LINK TRUE)\n    endif()\n  endif()\n\n  # Set the warning level of compiling\n  if(CUDA_ARGS_STRICT)\n    target_compile_options(\n        ${CUDA_ARGS_NAME} PRIVATE \"${BAZEL_CUDA_STRICT_COMPILE_FLAGS}\"\n      )\n    if(TARGET_LINKABLE)\n      target_link_libraries(\n          ${CUDA_ARGS_NAME} \"${BAZEL_CUDA_STRICT_LINK_FLAGS}\"\n        )\n    endif()\n  else()\n    target_compile_options(\n        ${CUDA_ARGS_NAME} PRIVATE \"${BAZEL_CUDA_UNSTRICT_COMPILE_FLAGS}\"\n      )\n    if(TARGET_LINKABLE)\n      target_link_libraries(\n          ${CUDA_ARGS_NAME} \"${BAZEL_CUDA_UNSTRICT_LINK_FLAGS}\"\n        )\n    endif()\n  endif()\n\n  target_compile_options(\n      ${CUDA_ARGS_NAME} PRIVATE\n      \"$<$<COMPILE_LANGUAGE:CUDA>:-ccbin=${CMAKE_CXX_COMPILER}>\"\n    )\n\n  if(CUDA_ARGS_DEFS)\n    target_compile_definitions(${CUDA_ARGS_NAME} PRIVATE \"${CUDA_ARGS_DEFS}\")\n  endif()\n\n  if(CUDA_ARGS_CFLAGS OR CUDA_ARGS_CXXFLAGS OR CUDA_ARGS_CUDAFLAGS)\n    target_compile_options(\n        ${CUDA_ARGS_NAME} PRIVATE\n        \"$<$<COMPILE_LANGUAGE:C>:${CUDA_ARGS_CFLAGS}>\"\n        \"$<$<COMPILE_LANGUAGE:CXX>:${CUDA_ARGS_CXXFLAGS}>\"\n        \"$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_ARGS_CUDAFLAGS}>\"\n      )\n  endif()\n\n  if(CUDA_ARGS_LDFLAGS)\n    string(REPLACE \";\" \" \" CUDA_ARGS_LDFLAGS \"${CUDA_ARGS_LDFLAGS}\")\n    set_property(\n        TARGET ${CUDA_ARGS_NAME} PROPERTY LINK_FLAGS \"${CUDA_ARGS_LDFLAGS}\"\n      )\n  endif()\n\n  if(CUDA_ARGS_INCS)\n    _absolute_paths(INC_DIRS ${CUDA_ARGS_INCS})\n    target_include_directories(${CUDA_ARGS_NAME} PRIVATE \"${INC_DIRS}\")\n  endif()\n\n  target_include_directories(\n      ${CUDA_ARGS_NAME} PRIVATE \"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}\"\n    )\n\n  if(BAZEL_WORKSPACE_DIR)\n    target_include_directories(\n        ${CUDA_ARGS_NAME} PRIVATE \"${BAZEL_WORKSPACE_DIR}\"\n      )\n  endif()\n\n  if(CUDA_ARGS_PUBINCS)\n    _absolute_paths(INC_DIRS ${CUDA_ARGS_PUBINCS})\n    target_include_directories(${CUDA_ARGS_NAME} PUBLIC \"${INC_DIRS}\")\n  endif()\n\n  if(CUDA_ARGS_LIBS)\n    if(NOT TARGET_LINKABLE)\n      _targets_link_dependencies(${CUDA_ARGS_NAME} ${CUDA_ARGS_LIBS})\n    else()\n      if (\"${TARGET_TYPE}\" STREQUAL \"EXECUTABLE\")\n        _target_link_libraries(${CUDA_ARGS_NAME} \"${CUDA_ARGS_LIBS}\")\n      else()\n        target_link_libraries(${CUDA_ARGS_NAME} \"${CUDA_ARGS_LIBS}\")\n      endif()\n    endif()\n  endif()\n\n  if(CUDA_ARGS_DEPS)\n    add_dependencies(${CUDA_ARGS_NAME} \"${CUDA_ARGS_DEPS}\")\n  endif()\n\n  if(CUDA_ARGS_VERSION)\n    set_property(\n        TARGET ${CUDA_ARGS_NAME} PROPERTY VERSION \"${CUDA_ARGS_VERSION}\"\n      )\n  endif()\n\n  if(NOT CUDA_C_STANDARD)\n    set(CUDA_C_STANDARD 99)\n  endif()\n\n  if(NOT CUDA_CXX_STANDARD)\n    set(CUDA_CXX_STANDARD 11)\n  endif()\n\n  set_target_properties(\n      ${CUDA_ARGS_NAME} PROPERTIES DEFINE_SYMBOL \"\"\n      C_STANDARD ${CUDA_C_STANDARD} CXX_STANDARD ${CUDA_CXX_STANDARD}\n      C_STANDARD_REQUIRED ON C_EXTENSIONS ON\n      CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF\n      CUDA_STANDARD 11 CUDA_STANDARD_REQUIRED ON CUDA_EXTENSIONS OFF\n      WINDOWS_EXPORT_ALL_SYMBOLS ON\n    )\nendfunction()\n\n## Build a CUDA static or shared library\nfunction(cuda_library)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n\n  cmake_parse_arguments(\n      CUDA_ARGS \"STATIC;SHARED;EXCLUDE;PACKED\" \"NAME;VERSION\"\n      \"SRCS;INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;PACKED_EXCS\"\n      ${ARGN}\n    )\n\n  if(NOT CUDA_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CUDA_ARGS_SRCS ${CUDA_ARGS_SRCS})\n  if(NOT CUDA_ARGS_SRCS)\n    message(FATAL_ERROR \"No source files found of ${CUDA_ARGS_NAME}.\")\n  endif()\n\n  if(CUDA_ARGS_VERSION)\n    string(REPLACE \"-\" \"_\" MACRO_PREFIX \"${CUDA_ARGS_NAME}\")\n    list(APPEND CUDA_ARGS_DEFS ${MACRO_PREFIX}_VERSION=\"${CUDA_ARGS_VERSION}\")\n  endif()\n\n  if(CUDA_ARGS_EXCLUDE)\n    set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)\n  endif()\n\n  if(CUDA_ARGS_SHARED AND CUDA_ARGS_STATIC)\n    _add_library(${CUDA_ARGS_NAME} \"${EXCLUDE_OPTION}\" \"${CUDA_ARGS_SRCS}\")\n  elseif(CUDA_ARGS_SHARED)\n    add_library(${CUDA_ARGS_NAME} SHARED ${EXCLUDE_OPTION} ${CUDA_ARGS_SRCS})\n  elseif(CUDA_ARGS_STATIC)\n    add_library(${CUDA_ARGS_NAME} STATIC ${EXCLUDE_OPTION} ${CUDA_ARGS_SRCS})\n  else()\n    add_library(${CUDA_ARGS_NAME} ${EXCLUDE_OPTION} ${CUDA_ARGS_SRCS})\n  endif()\n\n  if(TARGET ${CUDA_ARGS_NAME}_objects)\n    _cuda_target_properties(\n        NAME \"${CUDA_ARGS_NAME}_objects\"\n        INCS \"${CUDA_ARGS_INCS};${CUDA_ARGS_PUBINCS}\"\n        DEFS \"${CUDA_ARGS_DEFS}\"\n        LIBS \"${CUDA_ARGS_LIBS}\"\n        CFLAGS \"${CUDA_ARGS_CFLAGS}\"\n        CXXFLAGS \"${CUDA_ARGS_CXXFLAGS}\"\n        CUDAFLAGS \"${CUDA_ARGS_CUDAFLAGS}\"\n        LDFLAGS \"${CUDA_ARGS_LDFLAGS}\"\n        DEPS \"${CUDA_ARGS_DEPS}\"\n        \"${CUDA_ARGS_UNPARSED_ARGUMENTS}\"\n      )\n  endif()\n\n  if(TARGET ${CUDA_ARGS_NAME}_static)\n    _cuda_target_properties(\n        NAME \"${CUDA_ARGS_NAME}_static\"\n        INCS \"${CUDA_ARGS_INCS}\"\n        PUBINCS \"${CUDA_ARGS_PUBINCS}\"\n        DEFS \"${CUDA_ARGS_DEFS}\"\n        LIBS \"${CUDA_ARGS_LIBS}\"\n        CFLAGS \"${CUDA_ARGS_CFLAGS}\"\n        CXXFLAGS \"${CUDA_ARGS_CXXFLAGS}\"\n        CUDAFLAGS \"${CUDA_ARGS_CUDAFLAGS}\"\n        LDFLAGS \"${CUDA_ARGS_LDFLAGS}\"\n        DEPS \"${CUDA_ARGS_DEPS}\"\n        \"${CUDA_ARGS_UNPARSED_ARGUMENTS}\"\n      )\n    if(CUDA_ARGS_PACKED)\n      install(\n          TARGETS ${CUDA_ARGS_NAME}_static\n          ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n        )\n    endif()\n  endif()\n\n  _cuda_target_properties(\n      NAME \"${CUDA_ARGS_NAME}\"\n      INCS \"${CUDA_ARGS_INCS}\"\n      PUBINCS \"${CUDA_ARGS_PUBINCS}\"\n      DEFS \"${CUDA_ARGS_DEFS}\"\n      LIBS \"${CUDA_ARGS_LIBS}\"\n      CFLAGS \"${CUDA_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CUDA_ARGS_CXXFLAGS}\"\n      CUDAFLAGS \"${CUDA_ARGS_CUDAFLAGS}\"\n      LDFLAGS \"${CUDA_ARGS_LDFLAGS}\"\n      DEPS \"${CUDA_ARGS_DEPS}\"\n      VERSION \"${CUDA_ARGS_VERSION}\"\n      \"${CUDA_ARGS_UNPARSED_ARGUMENTS}\"\n    )\n  if(CUDA_ARGS_PACKED)\n    install(\n        TARGETS ${CUDA_ARGS_NAME}\n        ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n        LIBRARY DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n      )\n    if(CUDA_ARGS_PUBINCS)\n      foreach(PACKED_EXCLUDE ${CUDA_ARGS_PACKED_IGORNES})\n        list(APPEND PATTERN_EXCLUDES \"PATTERN;${PACKED_EXCLUDE};EXCLUDE\")\n      endforeach()\n      install(\n          DIRECTORY ${CUDA_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}\n          FILES_MATCHING PATTERN \"*.h\" PATTERN \"*.hpp\"\n          PATTERN \"*.hxx\" PATTERN \"*.cuh\"\n          ${PATTERN_EXCLUDES}\n        )\n    endif()\n  endif()\nendfunction()\n\n## Build a CUDA executable program\nfunction(cuda_binary)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n\n  cmake_parse_arguments(\n      CUDA_ARGS \"PACKED\" \"NAME;VERSION\"\n     \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS\" ${ARGN}\n    )\n\n  if(NOT CUDA_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CUDA_ARGS_SRCS ${CUDA_ARGS_SRCS})\n  if(NOT CUDA_ARGS_SRCS)\n    message(FATAL_ERROR \"No source files found of ${CUDA_ARGS_NAME}.\")\n  endif()\n\n  if(CUDA_ARGS_VERSION)\n    string(REPLACE \"-\" \"_\" MACRO_PREFIX \"${CUDA_ARGS_NAME}\")\n    list(APPEND CUDA_ARGS_DEFS ${MACRO_PREFIX}_VERSION=\"${CUDA_ARGS_VERSION}\")\n  endif()\n  add_executable(${CUDA_ARGS_NAME} ${CUDA_ARGS_SRCS})\n\n  if(CUDA_ARGS_PACKED)\n    install(\n        TARGETS ${CUDA_ARGS_NAME} RUNTIME DESTINATION \"${CMAKE_INSTALL_BINDIR}\"\n      )\n  endif()\n\n  _cuda_target_properties(\n      NAME \"${CUDA_ARGS_NAME}\"\n      INCS \"${CUDA_ARGS_INCS}\"\n      DEFS \"${CUDA_ARGS_DEFS}\"\n      LIBS \"${CUDA_ARGS_LIBS}\"\n      CFLAGS \"${CUDA_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CUDA_ARGS_CXXFLAGS}\"\n      CUDAFLAGS \"${CUDA_ARGS_CUDAFLAGS}\"\n      LDFLAGS \"${CUDA_ARGS_LDFLAGS}\"\n      DEPS \"${CUDA_ARGS_DEPS}\"\n      VERSION \"${CUDA_ARGS_VERSION}\"\n      \"${CUDA_ARGS_UNPARSED_ARGUMENTS}\"\n    )\nendfunction()\n\n## Build a CUDA executable test program\nfunction(cuda_test)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n\n  cmake_parse_arguments(\n      CUDA_ARGS \"\" \"NAME;VERSION\"\n      \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;ARGS\" ${ARGN}\n    )\n\n  if(NOT CUDA_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CUDA_ARGS_SRCS ${CUDA_ARGS_SRCS})\n  if(NOT CUDA_ARGS_SRCS)\n    message(FATAL_ERROR \"No source files found of ${CUDA_ARGS_NAME}.\")\n  endif()\n\n  if(CUDA_ARGS_VERSION)\n    string(REPLACE \"-\" \"_\" MACRO_PREFIX \"${CUDA_ARGS_NAME}\")\n    list(APPEND CUDA_ARGS_DEFS ${MACRO_PREFIX}_VERSION=\"${CUDA_ARGS_VERSION}\")\n  endif()\n  add_executable(${CUDA_ARGS_NAME} EXCLUDE_FROM_ALL ${CUDA_ARGS_SRCS})\n\n  _cuda_target_properties(\n      NAME \"${CUDA_ARGS_NAME}\"\n      INCS \"${CUDA_ARGS_INCS}\"\n      DEFS \"${CUDA_ARGS_DEFS}\"\n      LIBS \"${CUDA_ARGS_LIBS}\"\n      CFLAGS \"${CUDA_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CUDA_ARGS_CXXFLAGS}\"\n      CUDAFLAGS \"${CUDA_ARGS_CUDAFLAGS}\"\n      LDFLAGS \"${CUDA_ARGS_LDFLAGS}\"\n      DEPS \"${CUDA_ARGS_DEPS}\"\n      \"${CUDA_ARGS_UNPARSED_ARGUMENTS}\"\n    )\n  add_dependencies(unittest ${CUDA_ARGS_NAME})\n  add_custom_target(\n      unittest.${CUDA_ARGS_NAME}\n      COMMAND $<TARGET_FILE:${CUDA_ARGS_NAME}> \"${CUDA_ARGS_ARGS}\"\n      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}\n      DEPENDS ${CUDA_ARGS_NAME}\n    )\n  add_test(\n      NAME ${CUDA_ARGS_NAME}\n      COMMAND $<TARGET_FILE:${CUDA_ARGS_NAME}> \"${CUDA_ARGS_ARGS}\"\n      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}\n    )\nendfunction()\n\n## Add existing test cases to a test suite\nfunction(cuda_test_suite)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n  cc_test_suite(${ARGN})\nendfunction()\n\n## Import a C/C++/CUDA static or shared library\nfunction(cuda_import)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n\n  cmake_parse_arguments(\n      CUDA_ARGS \"STATIC;SHARED;PACKED\"\n      \"NAME;PATH;IMPLIB\" \"INCS;PUBINCS;DEPS;PACKED_EXCLUDES\" ${ARGN}\n    )\n\n  if(NOT CUDA_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB CUDA_ARGS_PATH ${CUDA_ARGS_PATH})\n  if(NOT CUDA_ARGS_PATH)\n    message(FATAL_ERROR \"No imported target file found of ${CUDA_ARGS_NAME}.\")\n  endif()\n  if(MSVC AND CUDA_ARGS_SHARED AND NOT CUDA_ARGS_IMPLIB)\n    string(REGEX REPLACE\n        \".[Dd][Ll][Ll]$\" \".lib\" CUDA_ARGS_IMPLIB ${CUDA_ARGS_PATH}\n      )\n  endif()\n\n  if(CUDA_ARGS_SHARED)\n    add_library(${CUDA_ARGS_NAME} SHARED IMPORTED GLOBAL)\n  elseif(CUDA_ARGS_STATIC)\n    add_library(${CUDA_ARGS_NAME} STATIC IMPORTED GLOBAL)\n  else()\n    add_library(${CUDA_ARGS_NAME} UNKNOWN IMPORTED GLOBAL)\n  endif()\n\n  set_property(\n      TARGET ${CUDA_ARGS_NAME} PROPERTY IMPORTED_LOCATION ${CUDA_ARGS_PATH}\n    )\n  if(MSVC AND CUDA_ARGS_SHARED)\n    set_property(\n        TARGET ${CUDA_ARGS_NAME} PROPERTY IMPORTED_IMPLIB ${CUDA_ARGS_IMPLIB}\n      )\n  endif()\n\n  if(CUDA_ARGS_INCS)\n    _absolute_paths(INC_DIRS ${CUDA_ARGS_INCS})\n    foreach(INC_DIR ${INC_DIRS})\n      set_property(\n          TARGET ${CUDA_ARGS_NAME} APPEND PROPERTY\n          INTERFACE_INCLUDE_DIRECTORIES \"${INC_DIR}\"\n        )\n    endforeach()\n  endif()\n\n  if(CUDA_ARGS_PUBINCS)\n    _absolute_paths(INC_DIRS ${CUDA_ARGS_PUBINCS})\n    foreach(INC_DIR ${INC_DIRS})\n      set_property(\n          TARGET ${CUDA_ARGS_NAME} APPEND PROPERTY\n          INTERFACE_INCLUDE_DIRECTORIES \"${INC_DIR}\"\n        )\n    endforeach()\n  endif()\n\n  if(CUDA_ARGS_DEPS)\n    add_dependencies(${CUDA_ARGS_NAME} \"${CUDA_ARGS_DEPS}\")\n  endif()\n\n  if(CUDA_ARGS_PACKED)\n    install(\n        TARGETS ${CUDA_ARGS_NAME}\n        ARCHIVE DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n        LIBRARY DESTINATION \"${CMAKE_INSTALL_LIBDIR}\"\n      )\n    if(CUDA_ARGS_PUBINCS)\n      foreach(PACKED_EXCLUDE ${CUDA_ARGS_PACKED_EXCLUDES})\n        list(APPEND PATTERN_EXCLUDES \"PATTERN;${PACKED_EXCLUDE};EXCLUDE\")\n      endforeach()\n      install(\n          DIRECTORY ${CUDA_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}\n          FILES_MATCHING PATTERN \"*.h\" PATTERN \"*.hpp\"\n          PATTERN \"*.hxx\" PATTERN \"*.cuh\"\n          ${PATTERN_EXCLUDES}\n        )\n    endif()\n  endif()\nendfunction()\n\n## Import a C/C++/CUDA interface library\nfunction(cuda_interface)\n  if(NOT CMAKE_CUDA_COMPILER)\n    message(FATAL_ERROR \"No CUDA language supported.\")\n  endif()\n\n  cmake_parse_arguments(\n      CUDA_ARGS \"PACKED\" \"NAME\" \"INCS;PUBINCS;DEPS;PACKED_EXCLUDES\" ${ARGN}\n    )\n\n  if(NOT CUDA_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  add_library(${CUDA_ARGS_NAME} INTERFACE GLOBAL)\n  if(CUDA_ARGS_INCS)\n    _absolute_paths(INC_DIRS ${CUDA_ARGS_INCS})\n    target_include_directories(${CUDA_ARGS_NAME} INTERFACE \"${INC_DIRS}\")\n  endif()\n\n  if(CUDA_ARGS_PUBINCS)\n    _absolute_paths(INC_DIRS ${CUDA_ARGS_PUBINCS})\n    target_include_directories(${CUDA_ARGS_NAME} INTERFACE \"${INC_DIRS}\")\n  endif()\n\n  if(CUDA_ARGS_DEPS)\n    add_dependencies(${CUDA_ARGS_NAME} \"${CUDA_ARGS_DEPS}\")\n  endif()\n\n  if(CUDA_ARGS_PACKED AND CUDA_ARGS_PUBINCS)\n    foreach(PACKED_EXCLUDE ${CUDA_ARGS_PACKED_EXCLUDES})\n      list(APPEND PATTERN_EXCLUDES \"PATTERN;${PACKED_EXCLUDE};EXCLUDE\")\n    endforeach()\n    install(\n        DIRECTORY ${CUDA_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}\n        FILES_MATCHING PATTERN \"*.h\" PATTERN \"*.hpp\"\n        PATTERN \"*.hxx\" PATTERN \"*.cuh\"\n        ${PATTERN_EXCLUDES}\n      )\n  endif()\nendfunction()\n\n## Build a C/C++/CUDA executable google test program\nfunction(cuda_gtest)\n  cmake_parse_arguments(\n      CUDA_ARGS \"\" \"NAME;VERSION\"\n      \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;ARGS\" ${ARGN}\n    )\n  _find_gtest()\n  cuda_test(\n      NAME \"${CUDA_ARGS_NAME}\"\n      VERSION \"${CUDA_ARGS_VERSION}\"\n      SRCS \"${CUDA_ARGS_SRCS}\"\n      INCS \"${CUDA_ARGS_INCS};${FIND_GTEST_INCS}\"\n      DEFS \"${CUDA_ARGS_DEFS}\"\n      LIBS \"${CUDA_ARGS_LIBS};${FIND_GTEST_LIBS}\"\n      CFLAGS \"${CUDA_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CUDA_ARGS_CXXFLAGS}\"\n      CUDAFLAGS \"${CUDA_ARGS_CUDAFLAGS}\"\n      LDFLAGS \"${CUDA_ARGS_LDFLAGS}\"\n      DEPS \"${CUDA_ARGS_DEPS}\"\n      ARGS \"${CUDA_ARGS_ARGS}\"\n    )\nendfunction()\n\n## Build a C/C++/CUDA executable google mock program\nfunction(cuda_gmock)\n  cmake_parse_arguments(\n      CUDA_ARGS \"\" \"NAME;VERSION\"\n      \"SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;ARGS\" ${ARGN}\n    )\n  _find_gmock()\n  cuda_test(\n      NAME \"${CUDA_ARGS_NAME}\"\n      VERSION \"${CUDA_ARGS_VERSION}\"\n      SRCS \"${CUDA_ARGS_SRCS}\"\n      INCS \"${CUDA_ARGS_INCS};${FIND_GMOCK_INCS}\"\n      DEFS \"${CUDA_ARGS_DEFS}\"\n      LIBS \"${CUDA_ARGS_LIBS};${FIND_GMOCK_LIBS}\"\n      CFLAGS \"${CUDA_ARGS_CFLAGS}\"\n      CXXFLAGS \"${CUDA_ARGS_CXXFLAGS}\"\n      CUDAFLAGS \"${CUDA_ARGS_CUDAFLAGS}\"\n      LDFLAGS \"${CUDA_ARGS_LDFLAGS}\"\n      DEPS \"${CUDA_ARGS_DEPS}\"\n      ARGS \"${CUDA_ARGS_ARGS}\"\n    )\nendfunction()\n\n## Add a subdirectory to the build\nfunction(go_directory)\n  add_subdirectory(${ARGN})\nendfunction()\n\n## Add subdirectories to the build\nfunction(go_directories)\n  foreach(SRC_DIR ${ARGN})\n    add_subdirectory(${SRC_DIR})\n  endforeach()\nendfunction()\n\n## Build a go executable program\nfunction(go_binary)\n  find_program(\n      GO_EXECUTABLE go PATHS $ENV{HOME}/go ENV GOROOT GOPATH PATH_SUFFIXES bin\n    )\n  if(NOT GO_EXECUTABLE)\n    message(FATAL_ERROR \"No go language compiler found.\")\n  endif()\n\n  cmake_parse_arguments(\n      GO_ARGS \"PACKED\" \"NAME\"\n      \"GOPATH;SRCS;ASMFLAGS;GCFLAGS;LDFLAGS;DEPS\" ${ARGN}\n    )\n  if(NOT GO_ARGS_NAME)\n    message(FATAL_ERROR \"No target name privated.\")\n  endif()\n\n  file(GLOB GO_ARGS_SRCS ${GO_ARGS_SRCS})\n  if(NOT GO_ARGS_SRCS)\n    message(FATAL_ERROR \"No source files/directories found of ${GO_ARGS_NAME}.\")\n  endif()\n\n  if(${CMAKE_SYSTEM_NAME} MATCHES \"Windows\")\n    string(REPLACE \";\" \"\\;\" GO_ARGS_GOPATH \"${GO_ARGS_GOPATH}\")\n  else()\n    string(REPLACE \";\" \":\" GO_ARGS_GOPATH \"${GO_ARGS_GOPATH}\")\n  endif()\n\n  set(\n      GO_OUTPUT_FILE\n      ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${GO_ARGS_NAME}${CMAKE_EXECUTABLE_SUFFIX}\n    )\n  file(RELATIVE_PATH GO_OUTPUT_REL_FILE ${CMAKE_BINARY_DIR} ${GO_OUTPUT_FILE})\n  add_custom_target(\n      ${GO_ARGS_NAME}\n      COMMAND ${CMAKE_COMMAND} -E env GOPATH=\"${GO_ARGS_GOPATH}\"\n      \"${GO_EXECUTABLE}\" build -v -buildmode=exe\n      -compiler=gc -gcflags=\"${GO_ARGS_GCFLAGS}\" -asmflags=\"${GO_ARGS_ASMFLAGS}\"\n      -ldflags=\"${GO_ARGS_LDFLAGS}\"\n      -o \"${GO_OUTPUT_FILE}\" \"${GO_ARGS_SRCS}\"\n      WORKING_DIRECTORY \"${CMAKE_CURRENT_BINARY_DIR}\"\n      DEPENDS \"${GO_ARGS_DEPS}\"\n      COMMENT \"Building GO executable ${GO_OUTPUT_REL_FILE}\"\n    )\n  if(GO_ARGS_PACKED)\n    install(PROGRAMS ${GO_OUTPUT_FILE} DESTINATION \"${CMAKE_INSTALL_BINDIR}\")\n  endif()\nendfunction()\n\n## Fetch content\nfunction(_fetch_content)\n  cmake_parse_arguments(\n      DL_ARGS \"\"\n      \"NAME;PATH;GIT_URL;GIT_TAG;HG_URL;HG_TAG;SVN_URL;SVN_REV;URL;URL_HASH\"\n      \"\" ${ARGN}\n    )\n\n  if(NOT DL_ARGS_NAME)\n    message(FATAL_ERROR \"No fetch name privated.\")\n  endif()\n\n  if(NOT DL_ARGS_PATH)\n    # Download to current source directory\n    set(DL_ARGS_PATH \"${CMAKE_CURRENT_SOURCE_DIR}/${DL_ARGS_NAME}\")\n  endif()\n\n  set(\n      CMAKELISTS_CONTENT\n      \"cmake_minimum_required(VERSION 3.1)\\n\"\n      \"project(${DL_ARGS_NAME})\\n\"\n      \"include(ExternalProject)\\n\"\n      \"ExternalProject_Add(\\n\"\n      \"    ${DL_ARGS_NAME}\\n\"\n      \"    PREFIX \\\"external\\\"\\n\"\n      \"    GIT_REPOSITORY \\\"${DL_ARGS_GIT_URL}\\\"\\n\"\n      \"    GIT_TAG \\\"${DL_ARGS_GIT_TAG}\\\"\\n\"\n      \"    HG_REPOSITORY \\\"${DL_ARGS_HG_URL}\\\"\\n\"\n      \"    HG_TAG \\\"${DL_ARGS_HG_TAG}\\\"\\n\"\n      \"    SVN_REPOSITORY \\\"${DL_ARGS_SVN_URL}\\\"\\n\"\n      \"    SVN_REVISION \\\"${DL_ARGS_SVN_REV}\\\"\\n\"\n      \"    URL \\\"${DL_ARGS_URL}\\\"\\n\"\n      \"    URL_HASH \\\"${DL_ARGS_URL_HASH}\\\"\\n\"\n      \"    SOURCE_DIR \\\"${DL_ARGS_PATH}\\\"\\n\"\n      \"    BINARY_DIR \\\"\\\"\\n\"\n      \"    CONFIGURE_COMMAND \\\"\\\"\\n\"\n      \"    BUILD_COMMAND \\\"\\\"\\n\"\n      \"    INSTALL_COMMAND \\\"\\\"\\n\"\n      \"    TEST_COMMAND \\\"\\\"\\n\"\n      \"    LOG_DOWNLOAD ON\\n\"\n      \"  )\\n\"\n    )\n  set(\n      CMAKELISTS_DIRECTORY\n      \"${PROJECT_BINARY_DIR}/downloads/${DL_ARGS_NAME}\"\n    )\n  add_custom_target(\n      external.${DL_ARGS_NAME}\n      COMMAND \"${CMAKE_COMMAND}\" -G \"${CMAKE_GENERATOR}\" . &&\n              \"${CMAKE_COMMAND}\" --build .\n      WORKING_DIRECTORY \"${CMAKELISTS_DIRECTORY}\"\n    )\n\n  # Write a cmake script into folder\n  file(WRITE \"${CMAKELISTS_DIRECTORY}/CMakeLists.txt\" ${CMAKELISTS_CONTENT})\n\n  execute_process(\n      COMMAND \"${CMAKE_COMMAND}\" -G \"${CMAKE_GENERATOR}\" .\n      WORKING_DIRECTORY \"${CMAKELISTS_DIRECTORY}\"\n    )\n  execute_process(\n      COMMAND \"${CMAKE_COMMAND}\" --build .\n      WORKING_DIRECTORY \"${CMAKELISTS_DIRECTORY}\"\n    )\nendfunction()\n\n## Download a git repository\nfunction(git_repository)\n  cmake_parse_arguments(GIT_ARGS \"\" \"NAME;PATH;URL;TAG\" \"\" ${ARGN})\n\n  if(NOT GIT_ARGS_NAME)\n    message(FATAL_ERROR \"No repository name privated.\")\n  endif()\n  if(NOT GIT_ARGS_URL)\n    message(FATAL_ERROR \"No repository URL privated.\")\n  endif()\n\n  if(GIT_ARGS_PATH AND NOT IS_ABSOLUTE ${GIT_ARGS_PATH})\n    get_filename_component(GIT_ARGS_PATH ${GIT_ARGS_PATH} ABSOLUTE)\n  endif()\n\n  _fetch_content(\n      NAME \"${GIT_ARGS_NAME}\"\n      PATH \"${GIT_ARGS_PATH}\"\n      GIT_URL \"${GIT_ARGS_URL}\"\n      GIT_TAG \"${GIT_ARGS_TAG}\"\n    )\nendfunction()\n\n## Download a hg repository\nfunction(hg_repository)\n  cmake_parse_arguments(HG_ARGS \"\" \"NAME;PATH;URL;TAG\" \"\" ${ARGN})\n\n  if(NOT HG_ARGS_NAME)\n    message(FATAL_ERROR \"No repository name privated.\")\n  endif()\n  if(NOT HG_ARGS_URL)\n    message(FATAL_ERROR \"No repository URL privated.\")\n  endif()\n\n  if(HG_ARGS_PATH AND NOT IS_ABSOLUTE ${HG_ARGS_PATH})\n    get_filename_component(HG_ARGS_PATH ${HG_ARGS_PATH} ABSOLUTE)\n  endif()\n\n  _fetch_content(\n      NAME \"${HG_ARGS_NAME}\"\n      PATH \"${HG_ARGS_PATH}\"\n      HG_URL \"${HG_ARGS_URL}\"\n      HG_TAG \"${HG_ARGS_TAG}\"\n    )\nendfunction()\n\n## Download a svn repository\nfunction(svn_repository)\n  cmake_parse_arguments(SVN_ARGS \"\" \"NAME;PATH;URL;REV\" \"\" ${ARGN})\n\n  if(NOT SVN_ARGS_NAME)\n    message(FATAL_ERROR \"No repository name privated.\")\n  endif()\n  if(NOT SVN_ARGS_URL)\n    message(FATAL_ERROR \"No repository URL privated.\")\n  endif()\n\n  if(SVN_ARGS_PATH AND NOT IS_ABSOLUTE ${SVN_ARGS_PATH})\n    get_filename_component(SVN_ARGS_PATH ${SVN_ARGS_PATH} ABSOLUTE)\n  endif()\n\n  _fetch_content(\n      NAME \"${SVN_ARGS_NAME}\"\n      PATH \"${SVN_ARGS_PATH}\"\n      SVN_URL \"${SVN_ARGS_URL}\"\n      SVN_REV \"${SVN_ARGS_REV}\"\n    )\nendfunction()\n\n## Download a http archive\nfunction(http_archive)\n  cmake_parse_arguments(HTTP_ARGS \"\" \"NAME;PATH;URL;SHA256;SHA1;MD5\" \"\" ${ARGN})\n\n  if(NOT HTTP_ARGS_NAME)\n    message(FATAL_ERROR \"No archive name privated.\")\n  endif()\n  if(NOT HTTP_ARGS_URL)\n    message(FATAL_ERROR \"No archive URL privated.\")\n  endif()\n\n  if(HTTP_ARGS_PATH AND NOT IS_ABSOLUTE ${HTTP_ARGS_PATH})\n    get_filename_component(HTTP_ARGS_PATH ${HTTP_ARGS_PATH} ABSOLUTE)\n  endif()\n\n  if(HTTP_ARGS_SHA256)\n    set(HTTP_URL_HASH \"SHA256=${HTTP_ARGS_SHA256}\")\n  elseif(HTTP_ARGS_SHA1)\n    set(HTTP_URL_HASH \"SHA1=${HTTP_ARGS_SHA1}\")\n  elseif(HTTP_ARGS_MD5)\n    set(HTTP_URL_HASH \"MD5=${HTTP_ARGS_MD5}\")\n  else()\n    set(HTTP_URL_HASH \"\")\n  endif()\n\n  _fetch_content(\n      NAME \"${HTTP_ARGS_NAME}\"\n      PATH \"${HTTP_ARGS_PATH}\"\n      URL \"${HTTP_ARGS_URL}\"\n      URL_HASH \"${HTTP_URL_HASH}\"\n    )\nendfunction()\n\n## Retrieve a version string from GIT\nfunction(git_version _RESULT _SOURCES_DIR)\n  find_package(Git REQUIRED)\n\n  if(NOT IS_ABSOLUTE ${_SOURCES_DIR})\n    get_filename_component(_SOURCES_DIR ${_SOURCES_DIR} ABSOLUTE)\n  endif()\n\n  # git describe --tags\n  execute_process(\n      COMMAND \"${GIT_EXECUTABLE}\" describe --tags\n      WORKING_DIRECTORY \"${_SOURCES_DIR}\"\n      RESULT_VARIABLE GIT_VER_RESULT\n      OUTPUT_VARIABLE GIT_VER_OUTPUT\n      ERROR_VARIABLE GIT_VER_ERROR\n    )\n  if(GIT_VER_RESULT EQUAL 0)\n    string(STRIP ${GIT_VER_OUTPUT} GIT_VER_OUTPUT)\n    set(${_RESULT} \"${GIT_VER_OUTPUT}\" PARENT_SCOPE)\n    return()\n  endif()\n\n  # git rev-parse --short HEAD\n  execute_process(\n      COMMAND \"${GIT_EXECUTABLE}\" rev-parse --short HEAD\n      WORKING_DIRECTORY \"${_SOURCES_DIR}\"\n      RESULT_VARIABLE GIT_VER_RESULT\n      OUTPUT_VARIABLE GIT_VER_OUTPUT\n      ERROR_VARIABLE GIT_VER_ERROR\n    )\n  if(GIT_VER_RESULT EQUAL 0)\n    string(STRIP ${GIT_VER_OUTPUT} GIT_VER_OUTPUT)\n    set(${_RESULT} \"g${GIT_VER_OUTPUT}\" PARENT_SCOPE)\n    return()\n  endif()\n\n  set(${_RESULT} \"\" PARENT_SCOPE)\nendfunction()\n\n## Retrieve a version string from HG\nfunction(hg_version _RESULT _SOURCES_DIR)\n  find_package(Hg REQUIRED)\n\n  if(NOT IS_ABSOLUTE ${_SOURCES_DIR})\n    get_filename_component(_SOURCES_DIR ${_SOURCES_DIR} ABSOLUTE)\n  endif()\n\n  # hg log -T \"{latesttagdistance}\" -r .\n  execute_process(\n      COMMAND \"${HG_EXECUTABLE}\" log -T \"{latesttagdistance}\" -r .\n      WORKING_DIRECTORY \"${_SOURCES_DIR}\"\n      RESULT_VARIABLE HG_VER_RESULT\n      OUTPUT_VARIABLE HG_VER_OUTPUT\n      ERROR_VARIABLE HG_VER_ERROR\n    )\n  if(HG_VER_RESULT EQUAL 0)\n    string(STRIP ${HG_VER_OUTPUT} HG_VER_OUTPUT)\n    if(HG_VER_OUTPUT STREQUAL \"0\")\n      # hg log -T \"{latesttag}\" -r .\n      execute_process(\n          COMMAND \"${HG_EXECUTABLE}\" log -T \"{latesttag}\" -r .\n          WORKING_DIRECTORY \"${_SOURCES_DIR}\"\n          RESULT_VARIABLE HG_VER_RESULT\n          OUTPUT_VARIABLE HG_VER_OUTPUT\n          ERROR_VARIABLE HG_VER_ERROR\n        )\n    else()\n      # hg log -T \"{latesttag}-{latesttagdistance}-h{node|short}\" -r .\n      execute_process(\n          COMMAND \"${HG_EXECUTABLE}\" log\n          -T \"{latesttag}-{latesttagdistance}-h{node|short}\" -r .\n          WORKING_DIRECTORY \"${_SOURCES_DIR}\"\n          RESULT_VARIABLE HG_VER_RESULT\n          OUTPUT_VARIABLE HG_VER_OUTPUT\n          ERROR_VARIABLE HG_VER_ERROR\n        )\n    endif()\n\n    if(HG_VER_RESULT EQUAL 0)\n      string(STRIP ${HG_VER_OUTPUT} HG_VER_OUTPUT)\n      if(NOT HG_VER_OUTPUT MATCHES \"^null.*\")\n        set(${_RESULT} \"${HG_VER_OUTPUT}\" PARENT_SCOPE)\n        return()\n      endif()\n    endif()\n  endif()\n\n  # hg log -T \"h{node|short}\" -r .\n  execute_process(\n      COMMAND \"${HG_EXECUTABLE}\" log -T \"h{node|short}\" -r .\n      WORKING_DIRECTORY \"${_SOURCES_DIR}\"\n      RESULT_VARIABLE HG_VER_RESULT\n      OUTPUT_VARIABLE HG_VER_OUTPUT\n      ERROR_VARIABLE HG_VER_ERROR\n    )\n  if(HG_VER_RESULT EQUAL 0)\n    string(STRIP ${HG_VER_OUTPUT} HG_VER_OUTPUT)\n    set(${_RESULT} \"${HG_VER_OUTPUT}\" PARENT_SCOPE)\n    return()\n  endif()\n\n  set(${_RESULT} \"\" PARENT_SCOPE)\nendfunction()\n\n## Retrieve a version string from SVN\nfunction(svn_version _RESULT _SOURCES_DIR)\n  find_package(Subversion REQUIRED)\n\n  if(NOT IS_ABSOLUTE ${_SOURCES_DIR})\n    get_filename_component(_SOURCES_DIR ${_SOURCES_DIR} ABSOLUTE)\n  endif()\n\n  # svn info --show-item revision\n  execute_process(\n      COMMAND \"${Subversion_SVN_EXECUTABLE}\" info --show-item revision\n      WORKING_DIRECTORY \"${_SOURCES_DIR}\"\n      RESULT_VARIABLE SVN_VER_RESULT\n      OUTPUT_VARIABLE SVN_VER_OUTPUT\n      ERROR_VARIABLE SVN_VER_ERROR\n    )\n  if(SVN_VER_RESULT EQUAL 0)\n    string(STRIP ${SVN_VER_OUTPUT} SVN_VER_OUTPUT)\n    set(${_RESULT} \"r${SVN_VER_OUTPUT}\" PARENT_SCOPE)\n    return()\n  endif()\n\n  set(${_RESULT} \"\" PARENT_SCOPE)\nendfunction()\n\n_find_workspace_directory(BAZEL_WORKSPACE_DIR)\nif(BAZEL_WORKSPACE_DIR)\n  include(\"${BAZEL_WORKSPACE_DIR}/Workspace.cmake\")\nendif()\n"
  },
  {
    "path": "cmake/option.cmake",
    "content": "## https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures  \n## https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures  \n## https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html  \n\n## Intel Microarchitectures\noption(ENABLE_NEHALEM \"Enable Intel Nehalem CPU microarchitecture\" OFF)\noption(ENABLE_SANDYBRIDGE \"Enable Intel Sandy Bridge CPU microarchitecture\" OFF)\noption(ENABLE_HASWELL \"Enable Intel Haswell CPU microarchitecture\" OFF)\noption(ENABLE_BROADWELL \"Enable Intel Broadwell CPU microarchitecture\" OFF)\noption(ENABLE_SKYLAKE \"Enable Intel Skylake CPU microarchitecture\" OFF)\noption(ENABLE_SKYLAKE_AVX512 \"Enable Intel Skylake Server CPU microarchitecture\" OFF)\noption(ENABLE_ICELAKE \"Enable Intel Icelake CPU microarchitecture\" OFF)\noption(ENABLE_SAPPHIRERAPIDS \"Enable Intel Sapphire Rapids Server CPU microarchitecture\" OFF)\noption(ENABLE_EMERALDRAPIDS \"Enable Intel Emerald Rapids Server CPU microarchitecture\" OFF)\noption(ENABLE_GRANITERAPIDS \"Enable Intel Granite Rapids Server CPU microarchitecture\" OFF)\n\noption(ENABLE_NATIVE \"Enable native CPU microarchitecture\" OFF)\n\n## AMD Microarchitectures\noption(ENABLE_ZEN1 \"Enable AMD Zen+ Family 17h CPU microarchitecture\" OFF)\noption(ENABLE_ZEN2 \"Enable AMD Zen 2 Family 17h CPU microarchitecture\" OFF)\noption(ENABLE_ZEN3 \"Enable AMD Zen 3 Family 19h CPU microarchitecture\" OFF)\n\n## ARM architectures\noption(ENABLE_ARMV8A \"Enable ARMv8-a architecture\" OFF)\noption(ENABLE_ARMV8.1A \"Enable ARMv8.1-a architecture\" OFF)\noption(ENABLE_ARMV8.2A \"Enable ARMv8.2-a architecture\" OFF)\noption(ENABLE_ARMV8.3A \"Enable ARMv8.3-a architecture\" OFF)\noption(ENABLE_ARMV8.4A \"Enable ARMv8.4-a architecture\" OFF)\noption(ENABLE_ARMV8.5A \"Enable ARMv8.5-a architecture\" OFF)\noption(ENABLE_ARMV8.6A \"Enable ARMv8.6-a architecture\" OFF)\n\n## OpenMP option\noption(ENABLE_OPENMP \"Enable OpenMP support\" OFF)\n\nset(ARCH_OPTIONS\n  ENABLE_NEHALEM ENABLE_SANDYBRIDGE ENABLE_HASWELL ENABLE_BROADWELL ENABLE_SKYLAKE\n  ENABLE_SKYLAKE_AVX512 ENABLE_ICELAKE ENABLE_SAPPHIRERAPIDS ENABLE_EMERALDRAPIDS ENABLE_GRANITERAPIDS\n  ENABLE_ZEN1 ENABLE_ZEN2 ENABLE_ZEN3\n  ENABLE_ARMV8A ENABLE_ARMV8.1A ENABLE_ARMV8.2A ENABLE_ARMV8.3A ENABLE_ARMV8.4A\n  ENABLE_ARMV8.5A ENABLE_ARMV8.6A\n  ENABLE_NATIVE\n)\n\noption(AUTO_DETECT_ARCH \"Auto detect CPU microarchitecture\" ON)\nforeach(opt IN LISTS ARCH_OPTIONS)\n  if(${opt})\n    set(AUTO_DETECT_ARCH OFF)\n    break()\n  endif()\nendforeach()\n\ninclude(CheckCCompilerFlag)\n\nfunction(_AppendFlags _RESULT _FLAG)\n  if(${_RESULT} AND NOT \"${${_RESULT}}\" MATCHES \"${_FLAG}\")\n    set(${_RESULT} \"${${_RESULT}} ${_FLAG}\" PARENT_SCOPE)\n  else()\n    set(${_RESULT} \"${_FLAG}\" PARENT_SCOPE)\n  endif()\nendfunction()\n\nmacro(add_arch_flag FLAG VAR_NAME OPTION_NAME)\n  check_c_compiler_flag(\"${FLAG}\" COMPILER_SUPPORT_${VAR_NAME})\n  if(COMPILER_SUPPORT_${VAR_NAME})\n    _AppendFlags(CMAKE_C_FLAGS \"${FLAG}\")\n    _AppendFlags(CMAKE_CXX_FLAGS \"${FLAG}\")\n    set(${VAR_NAME}_ENABLED ON)\n  else()\n    if(${OPTION_NAME})\n      message(FATAL_ERROR \"Compiler does not support required flag: '${FLAG}' for ${OPTION_NAME}\")\n    else()\n      set(${VAR_NAME}_ENABLED OFF)\n    endif()\n  endif()\nendmacro()\n\nfunction(_setup_armv8_march)\n  set(_arch \"armv8\")\n  check_c_compiler_flag(\"-march=${_arch}\" _COMP_SUPP_${_arch})\n  if(_COMP_SUPP_${_arch})\n    _AppendFlags(CMAKE_C_FLAGS \"-march=${_arch}\")\n    _AppendFlags(CMAKE_CXX_FLAGS \"-march=${_arch}\")\n    set(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS}\" PARENT_SCOPE)\n    set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS}\" PARENT_SCOPE)\n    return()\n  else()\n    message(WARNING \"No ARMv8 march flag supported by compiler.\")\n  endif()\nendfunction()\n\nfunction(_setup_x86_march)\n  set(_arch \"x86-64\")\n  check_c_compiler_flag(\"-march=${_arch}\" _COMP_SUPP_${_arch})\n  if(_COMP_SUPP_${_arch})\n    _AppendFlags(CMAKE_C_FLAGS \"-march=${_arch}\")\n    _AppendFlags(CMAKE_CXX_FLAGS \"-march=${_arch}\")\n    set(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS}\" PARENT_SCOPE)\n    set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS}\" PARENT_SCOPE)\n    return()\n  else()\n    message(WARNING \"No known x86 march flag supported; falling back to generic.\")\n  endif()\nendfunction()\n\nfunction(setup_compiler_march_for_x86 VAR_NAME_SSE VAR_NAME_AVX2 VAR_NAME_AVX512 VAR_NAME_AVX512FP16)\n  #sse\n  set(${VAR_NAME_SSE} \"-march=corei7\" PARENT_SCOPE)\n\n  #avx 2\n  set(${VAR_NAME_AVX2} \"-march=core-avx2\" PARENT_SCOPE)\n\n  #avx512\n  set(_x86_flags_avx512 \"icelake-server\" \"skylake-avx512\" \"core-avx2\" \"x86-64\")\n  foreach(_arch_avx512 IN LISTS _x86_flags_avx512)\n    check_c_compiler_flag(\"-march=${_arch_avx512}\" _COMP_SUPP_${_arch_avx512})\n    if(_COMP_SUPP_${_arch_avx512})\n      set(${VAR_NAME_AVX512} \"-march=${_arch_avx512}\" PARENT_SCOPE)\n      break()\n    endif()\n  endforeach()\n\n  #avx512fp16\n  set(_x86_flags_avx512fp16\n    \"sapphirerapids\" \"icelake-server\" \"skylake-avx512\" \"core-avx2\" \"x86-64\"\n  )\n  foreach(_arch_avx512fp16 IN LISTS _x86_flags_avx512fp16)\n    check_c_compiler_flag(\"-march=${_arch_avx512fp16}\" _COMP_SUPP_${_arch_avx512fp16})\n    if(_COMP_SUPP_${_arch_avx512fp16})\n      set(${VAR_NAME_AVX512FP16} \"-march=${_arch_avx512fp16}\" PARENT_SCOPE)\n      break()\n    endif()\n  endforeach()\nendfunction()\n\nif(MSVC)\n  # Prefer higher ISAs\n  foreach(_isa IN ITEMS \"AVX512\" \"AVX2\" \"AVX\" \"SSE2\")\n    check_c_compiler_flag(\"/arch:${_isa}\" _COMP_SUPP_${_isa})\n    if(_COMP_SUPP_${_isa})\n      _AppendFlags(CMAKE_C_FLAGS \"/arch:${_isa}\")\n      _AppendFlags(CMAKE_CXX_FLAGS \"/arch:${_isa}\")\n      message(STATUS \"MSVC: enabled /arch:${_isa}\")\n      break()\n    endif()\n  endforeach()\n  return()\nendif()\n\nif(NOT AUTO_DETECT_ARCH)\n  if(ENABLE_NATIVE)\n    add_arch_flag(\"-march=native\" NATIVE ENABLE_NATIVE)\n  endif()\n\n  if(ENABLE_ZEN3)\n    add_arch_flag(\"-march=znver3\" ZNVER3 ENABLE_ZEN3)\n  endif()\n\n  if(ENABLE_ZEN2)\n    add_arch_flag(\"-march=znver2\" ZNVER2 ENABLE_ZEN2)\n  endif()\n\n  if(ENABLE_ZEN1)\n    add_arch_flag(\"-march=znver1\" ZNVER1 ENABLE_ZEN1)\n  endif()\n\n  if(ENABLE_GRANITERAPIDS)\n    add_arch_flag(\"-march=graniterapids\" GRANITERAPIDS ENABLE_GRANITERAPIDS)\n  endif()\n\n  if(ENABLE_EMERALDRAPIDS)\n    add_arch_flag(\"-march=emeraldrapids\" EMERALDRAPIDS ENABLE_EMERALDRAPIDS)\n  endif()\n\n  if(ENABLE_SAPPHIRERAPIDS)\n    add_arch_flag(\"-march=sapphirerapids\" SAPPHIRERAPIDS ENABLE_SAPPHIRERAPIDS)\n  endif()\n\n  if(ENABLE_ICELAKE)\n    add_arch_flag(\"-march=icelake-server\" ICELAKE ENABLE_ICELAKE)\n  endif()\n\n  if(ENABLE_SKYLAKE_AVX512)\n    add_arch_flag(\"-march=skylake-avx512\" SKYLAKE_AVX512 ENABLE_SKYLAKE_AVX512)\n  endif()\n\n  if(ENABLE_SKYLAKE)\n    add_arch_flag(\"-march=skylake\" SKYLAKE ENABLE_SKYLAKE)\n  endif()\n\n  if(ENABLE_BROADWELL)\n    add_arch_flag(\"-march=broadwell\" BROADWELL ENABLE_BROADWELL)\n  endif()\n\n  if(ENABLE_HASWELL)\n    add_arch_flag(\"-march=haswell\" HASWELL ENABLE_HASWELL)\n  endif()\n\n  if(ENABLE_SANDYBRIDGE)\n    add_arch_flag(\"-march=sandybridge\" SANDYBRIDGE ENABLE_SANDYBRIDGE)\n  endif()\n\n  if(ENABLE_NEHALEM)\n    add_arch_flag(\"-march=nehalem\" NEHALEM ENABLE_NEHALEM)\n  endif()\n\n  # ARM (newest first — allow multiple? usually only one)\n  # But GCC allows only one -march=, so honor highest enabled\n  if(ENABLE_ARMV8.6A)\n    add_arch_flag(\"-march=armv8.6-a\" ARMV86A ENABLE_ARMV8.6A)\n  endif()\n  if(ENABLE_ARMV8.5A)\n    add_arch_flag(\"-march=armv8.5-a\" ARMV85A ENABLE_ARMV8.5A)\n  endif()\n  if(ENABLE_ARMV8.4A)\n    add_arch_flag(\"-march=armv8.4-a\" ARMV84A ENABLE_ARMV8.4A)\n  endif()\n  if(ENABLE_ARMV8.3A)\n    add_arch_flag(\"-march=armv8.3-a\" ARMV83A ENABLE_ARMV8.3A)\n  endif()\n  if(ENABLE_ARMV8.2A)\n    add_arch_flag(\"-march=armv8.2-a\" ARMV82A ENABLE_ARMV8.2A)\n  endif()\n  if(ENABLE_ARMV8.1A)\n    add_arch_flag(\"-march=armv8.1-a\" ARMV81A ENABLE_ARMV8.1A)\n  endif()\n  if(ENABLE_ARMV8A)\n    add_arch_flag(\"-march=armv8-a\" ARMV8A ENABLE_ARMV8A)\n  endif()\n\nelse()\n  # AUTO DETECT\n  # Heuristic: detect host architecture and probe appropriate flags\n  if(CMAKE_SYSTEM_PROCESSOR MATCHES \"aarch64|arm64|ARM64\")\n    _setup_armv8_march()\n  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES \"x86_64|i686|i386|x64\")\n    _setup_x86_march()\n  else()\n    message(WARNING \"Unknown host architecture: ${CMAKE_SYSTEM_PROCESSOR}; no -march= set.\")\n  endif()\nendif()\n\n# -----------------------------\n# OpenMP\n# -----------------------------\nif(ENABLE_OPENMP)\n  find_package(OpenMP REQUIRED)\n  if(OpenMP_C_FLAGS)\n    _AppendFlags(CMAKE_C_FLAGS \"${OpenMP_C_FLAGS}\")\n  endif()\n  if(OpenMP_CXX_FLAGS)\n    _AppendFlags(CMAKE_CXX_FLAGS \"${OpenMP_CXX_FLAGS}\")\n  endif()\nendif()\n"
  },
  {
    "path": "cmake/utils.cmake",
    "content": "function(apply_patch_once patch_name target_dir patch_file)\n    set(mark_file \"${target_dir}/.${patch_name}_patched\")\n\n    if(EXISTS \"${mark_file}\")\n        #message(STATUS \"Patch '${patch_name}' already applied to ${target_dir}, skipping.\")\n        return()\n    endif()\n\n    if(NOT EXISTS \"${patch_file}\")\n        message(FATAL_ERROR \"Patch file '${patch_file}' not found!\")\n    endif()\n\n    #message(STATUS \"Applying patch '${patch_name}' to ${target_dir} ...\")\n    execute_process(\n        COMMAND patch -p1 -i \"${patch_file}\"\n        WORKING_DIRECTORY \"${target_dir}\"\n        RESULT_VARIABLE patch_result\n        OUTPUT_VARIABLE patch_stdout\n        ERROR_VARIABLE patch_stderr\n    )\n\n    if(NOT patch_result EQUAL 0)\n        message(FATAL_ERROR \"Failed to apply patch '${patch_name}' to ${target_dir}:\\n${patch_stderr}\")\n    else()\n        #message(STATUS \"Patch '${patch_name}' applied successfully:\\n${patch_stdout}\")\n        file(WRITE \"${mark_file}\" \"patched\")\n    endif()\nendfunction()\n"
  },
  {
    "path": "examples/c++/CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.13)\ncmake_policy(SET CMP0077 NEW)\nproject(zvec-example-c++)\nset(CMAKE_CXX_STANDARD 17)\n\n# Enable compile_commands.json\nset(CMAKE_EXPORT_COMPILE_COMMANDS ON)\n\n# --- Paths to Zvec and dependencies ---\n# Allow custom host build directory, default to \"build\"\nif(NOT DEFINED HOST_BUILD_DIR)\n    set(HOST_BUILD_DIR \"build\")\nendif()\n\nset(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)\nset(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)\nset(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)\n\n# Add include and library search paths\ninclude_directories(${ZVEC_INCLUDE_DIR})\nlink_directories(${ZVEC_LIB_DIR} ${ZVEC_DEPENDENCY_LIB_DIR})\n\n# --- Determine debug/release library names ---\nif(CMAKE_BUILD_TYPE STREQUAL \"Debug\")\n    set(GLOG_LIB glogd)\n    set(GFLAGS_LIB gflags_nothreads_debug)\n    set(PROTOBUF_LIB protobufd)\nelse()\n    set(GLOG_LIB glog)\n    set(GFLAGS_LIB gflags_nothreads)\n    set(PROTOBUF_LIB protobuf)\nendif()\n\n# --- Dependency groups ---\nfind_package(Threads REQUIRED)\n\nset(zvec_ailego_deps\n    arrow\n    parquet\n    arrow_bundled_dependencies\n    ${CMAKE_THREAD_LIBS_INIT}\n    ${CMAKE_DL_LIBS}\n)\n\nset(zvec_core_deps\n    zvec_turbo\n)\n\nset(zvec_db_deps\n    roaring\n    rocksdb\n    arrow\n    arrow_acero\n    arrow_bundled_dependencies\n    arrow_compute\n    arrow_dataset\n    parquet\n    antlr4-runtime\n    ${GLOG_LIB}\n    ${GFLAGS_LIB}\n    ${PROTOBUF_LIB}\n    lz4\n)\n\n# --- Create INTERFACE targets for Zvec components ---\n\n# zvec_ailego: links libzvec_ailego.a + its deps\nadd_library(zvec-ailego INTERFACE)\ntarget_link_libraries(zvec-ailego INTERFACE\n    -lzvec_ailego\n    ${zvec_ailego_deps}\n)\n\n# zvec_core: links libzvec_core.a via special flags (handled externally), but declare logical deps\nadd_library(zvec-core INTERFACE)\nif(CMAKE_SYSTEM_NAME STREQUAL \"Linux\")\n    target_link_libraries(zvec-core INTERFACE\n        -Wl,--whole-archive\n        zvec_core\n        -Wl,--no-whole-archive\n        -Wl,--start-group\n        zvec-ailego\n        ${zvec_core_deps}\n        -Wl,--end-group\n    )\nelseif(APPLE)\n    target_link_libraries(zvec-core INTERFACE\n        -Wl,-force_load ${ZVEC_LIB_DIR}/libzvec_core.a\n        zvec-ailego\n        ${zvec_core_deps}\n    )\nelseif(ANDROID)\n    target_link_libraries(zvec-core INTERFACE\n        -Wl,--whole-archive\n        zvec_core\n        -Wl,--no-whole-archive\n        -Wl,--start-group\n        zvec-ailego\n        ${zvec_core_deps}\n        -Wl,--end-group\n    )\nelse()\n    message(FATAL_ERROR \"Unsupported platform: ${CMAKE_SYSTEM_NAME}\")\nendif()\n\n# zvec_db: links libzvec_db.a + all deps\nadd_library(zvec-db INTERFACE)\nif(CMAKE_SYSTEM_NAME STREQUAL \"Linux\")\n    target_link_libraries(zvec-db INTERFACE\n        zvec_db\n        zvec-core\n        zvec-ailego\n        -Wl,--start-group\n        ${zvec_db_deps}\n        -Wl,--end-group\n    )\nelseif(APPLE)\n    target_link_libraries(zvec-db INTERFACE\n        zvec_db\n        zvec-core\n        zvec-ailego\n        ${zvec_db_deps}\n    )\nelseif(ANDROID)\n    target_link_libraries(zvec-db INTERFACE\n        zvec_db\n        zvec-core\n        zvec-ailego\n        -Wl,--start-group\n        ${zvec_db_deps}\n        -Wl,--end-group\n    )\nelse()\n    message(FATAL_ERROR \"Unsupported platform: ${CMAKE_SYSTEM_NAME}\")\nendif()\n\n\n# --- Main executable ---\nadd_executable(db-example db/main.cc)\ntarget_link_libraries(db-example PRIVATE\n    zvec-db\n)\nif(ANDROID)\n    target_link_libraries(db-example PRIVATE\n        log\n    )\nendif()\n\nadd_executable(core-example core/main.cc)\ntarget_link_libraries(core-example PRIVATE\n    zvec-core\n)\n\nadd_executable(ailego-example ailego/main.cc)\ntarget_link_libraries(ailego-example PRIVATE\n    zvec-ailego\n)\n\n# Strip symbols to reduce executable size\nif(CMAKE_BUILD_TYPE STREQUAL \"Release\" AND ANDROID)\n    add_custom_command(TARGET db-example POST_BUILD\n        COMMAND ${CMAKE_STRIP} \"$<TARGET_FILE:db-example>\"\n        COMMENT \"Stripping symbols from db-example\")\n    add_custom_command(TARGET core-example POST_BUILD\n        COMMAND ${CMAKE_STRIP} \"$<TARGET_FILE:core-example>\"\n        COMMENT \"Stripping symbols from core-example\")\n    add_custom_command(TARGET ailego-example POST_BUILD\n        COMMAND ${CMAKE_STRIP} \"$<TARGET_FILE:ailego-example>\"\n        COMMENT \"Stripping symbols from ailego-example\")\nendif()\n\n# Optimize for size\nif(CMAKE_BUILD_TYPE STREQUAL \"Release\" AND ANDROID)\n    set_property(TARGET db-example core-example ailego-example\n                 PROPERTY COMPILE_FLAGS \"-Os\")\n    set_property(TARGET db-example core-example ailego-example\n                 PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)\nendif()\n"
  },
  {
    "path": "examples/c++/ailego/main.cc",
    "content": "#include <iostream>\n#include <string>\n#include <zvec/ailego/utility/string_helper.h>\n\nusing namespace zvec;\n\nint main() {\n  std::string a{\"hello world\"};\n\n  std::cout << ailego::StringHelper::StartsWith(a, \"hello\") << std::endl;\n}"
  },
  {
    "path": "examples/c++/core/main.cc",
    "content": "#include <cstdlib>\n#include <iostream>\n#include <zvec/core/interface/index.h>\n#include <zvec/core/interface/index_factory.h>\n#include <zvec/core/interface/index_param.h>\n#include <zvec/core/interface/index_param_builders.h>\n\nusing namespace zvec::core_interface;\n\nconstexpr uint32_t kDimension = 64;\nconst std::string index_name{\"test.index\"};\n\nIndex::Pointer create_index(const BaseIndexParam::Pointer &param,\n                            int doc_num = 10) {\n  auto index = IndexFactory::CreateAndInitIndex(*param);\n  if (!index) {\n    std::cout << \"Failed to create index.\" << std::endl;\n    return nullptr;\n  }\n\n  int ret = index->Open(\n      index_name, StorageOptions{StorageOptions::StorageType::kMMAP, true});\n  if (ret != 0) {\n    std::cout << \"Failed to open index.\" << std::endl;\n    return nullptr;\n  }\n\n  for (int i = 0; i < doc_num; ++i) {\n    std::vector<float> vector(kDimension, i / 10.0f + 0.1f);\n    VectorData vector_data;\n    vector_data.vector = DenseVector{vector.data()};\n    ret = index->Add(vector_data, i);\n    if (ret != 0) {\n      std::cout << \"Failed to add to index.\" << std::endl;\n      return nullptr;\n    }\n  }\n\n  ret = index->Train();\n  if (ret != 0) {\n    std::cout << \"Failed to train index.\" << std::endl;\n    return nullptr;\n  }\n\n  return index;\n}\n\nint main() {\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s\", index_name.c_str());\n  system(cmd_buf);\n\n  auto param = HNSWIndexParamBuilder()\n                   .WithMetricType(MetricType::kInnerProduct)\n                   .WithDataType(DataType::DT_FP32)\n                   .WithDimension(kDimension)\n                   .WithIsSparse(false)\n                   .Build();\n  auto index = create_index(param, 1);\n  std::cout << \"index stats: \" << index->GetDocCount() << std::endl;\n\n  // query\n  auto query_param = HNSWQueryParamBuilder()\n                         .with_topk(10)\n                         .with_fetch_vector(true)\n                         .with_ef_search(20)\n                         .build();\n\n  SearchResult result;\n  VectorData query;\n  std::vector<float> vector(kDimension, 0.1f);\n  query.vector = DenseVector{vector.data()};\n  int ret = index->Search(query, query_param, &result);\n  if (ret != 0) {\n    std::cout << \"Failed to search index.\" << std::endl;\n    return -1;\n  }\n\n  std::cout << \"query results: \" << result.doc_list_.size() << std::endl;\n  if (result.doc_list_.size() == 0) {\n    std::cout << \"No results found.\" << std::endl;\n    return -1;\n  }\n\n  std::cout << \"key: \" << result.doc_list_[0].key()\n            << \", score: \" << result.doc_list_[0].score() << std::endl;\n\n  return 0;\n}"
  },
  {
    "path": "examples/c++/db/main.cc",
    "content": "#include <cstdlib>\n#include <string>\n#include <vector>\n#include <zvec/db/collection.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n\nusing namespace zvec;\n\nDoc create_doc(const uint64_t doc_id, const CollectionSchema &schema,\n               std::string pk = \"\") {\n  Doc new_doc;\n  if (pk.empty()) {\n    pk = \"pk_\" + std::to_string(doc_id);\n  }\n  new_doc.set_pk(pk);\n\n  for (auto &field : schema.fields()) {\n    switch (field->data_type()) {\n      case DataType::BINARY: {\n        std::string binary_str(\"binary_\" + std::to_string(doc_id));\n        new_doc.set<std::string>(field->name(), binary_str);\n        break;\n      }\n      case DataType::BOOL:\n        new_doc.set<bool>(field->name(), doc_id % 10 == 0);\n        break;\n      case DataType::INT32:\n        new_doc.set<int32_t>(field->name(), (int32_t)doc_id);\n        break;\n      case DataType::INT64:\n        new_doc.set<int64_t>(field->name(), (int64_t)doc_id);\n        break;\n      case DataType::UINT32:\n        new_doc.set<uint32_t>(field->name(), (uint32_t)doc_id);\n        break;\n      case DataType::UINT64:\n        new_doc.set<uint64_t>(field->name(), (uint64_t)doc_id);\n        break;\n      case DataType::FLOAT:\n        new_doc.set<float>(field->name(), (float)doc_id);\n        break;\n      case DataType::DOUBLE:\n        new_doc.set<double>(field->name(), (double)doc_id);\n        break;\n      case DataType::STRING:\n        new_doc.set<std::string>(field->name(),\n                                 \"value_\" + std::to_string(doc_id));\n        break;\n      case DataType::ARRAY_BINARY: {\n        std::vector<std::string> bin_vec;\n        for (size_t i = 0; i < (doc_id % 10); i++) {\n          bin_vec.push_back(\"bin_\" + std::to_string(i));\n        }\n        new_doc.set<std::vector<std::string>>(field->name(), bin_vec);\n        break;\n      }\n      case DataType::ARRAY_BOOL:\n        new_doc.set<std::vector<bool>>(field->name(),\n                                       std::vector<bool>(10, doc_id % 10 == 0));\n        break;\n      case DataType::ARRAY_INT32:\n        new_doc.set<std::vector<int32_t>>(\n            field->name(), std::vector<int32_t>(10, (int32_t)doc_id));\n        break;\n      case DataType::ARRAY_INT64:\n        new_doc.set<std::vector<int64_t>>(\n            field->name(), std::vector<int64_t>(10, (int64_t)doc_id));\n        break;\n      case DataType::ARRAY_UINT32:\n        new_doc.set<std::vector<uint32_t>>(\n            field->name(), std::vector<uint32_t>(10, (uint32_t)doc_id));\n        break;\n      case DataType::ARRAY_UINT64:\n        new_doc.set<std::vector<uint64_t>>(\n            field->name(), std::vector<uint64_t>(10, (uint64_t)doc_id));\n        break;\n      case DataType::ARRAY_FLOAT:\n        new_doc.set<std::vector<float>>(field->name(),\n                                        std::vector<float>(10, (float)doc_id));\n        break;\n      case DataType::ARRAY_DOUBLE:\n        new_doc.set<std::vector<double>>(\n            field->name(), std::vector<double>(10, (double)doc_id));\n        break;\n      case DataType::ARRAY_STRING:\n        new_doc.set<std::vector<std::string>>(\n            field->name(),\n            std::vector<std::string>(10, \"value_\" + std::to_string(doc_id)));\n        break;\n      case DataType::VECTOR_BINARY32:\n        new_doc.set<std::vector<uint32_t>>(\n            field->name(),\n            std::vector<uint32_t>(field->dimension(), uint32_t(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_BINARY64:\n        new_doc.set<std::vector<uint64_t>>(\n            field->name(),\n            std::vector<uint64_t>(field->dimension(), uint64_t(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP32:\n        new_doc.set<std::vector<float>>(\n            field->name(),\n            std::vector<float>(field->dimension(), float(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP64:\n        new_doc.set<std::vector<double>>(\n            field->name(),\n            std::vector<double>(field->dimension(), double(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP16:\n        new_doc.set<std::vector<zvec::float16_t>>(\n            field->name(), std::vector<zvec::float16_t>(\n                               field->dimension(), static_cast<zvec::float16_t>(\n                                                       float(doc_id + 0.1))));\n        break;\n      case DataType::VECTOR_INT8:\n        new_doc.set<std::vector<int8_t>>(\n            field->name(),\n            std::vector<int8_t>(field->dimension(), (int8_t)doc_id));\n        break;\n      case DataType::VECTOR_INT16:\n        new_doc.set<std::vector<int16_t>>(\n            field->name(),\n            std::vector<int16_t>(field->dimension(), (int16_t)doc_id));\n        break;\n      case DataType::SPARSE_VECTOR_FP16: {\n        std::vector<uint32_t> indices;\n        std::vector<zvec::float16_t> values;\n        for (uint32_t i = 0; i < 100; i++) {\n          indices.push_back(i);\n          values.push_back(zvec::float16_t(float(doc_id + 0.1)));\n        }\n        std::pair<std::vector<uint32_t>, std::vector<zvec::float16_t>>\n            sparse_float_vec;\n        sparse_float_vec.first = indices;\n        sparse_float_vec.second = values;\n        new_doc.set<\n            std::pair<std::vector<uint32_t>, std::vector<zvec::float16_t>>>(\n            field->name(), sparse_float_vec);\n        break;\n      }\n      case DataType::SPARSE_VECTOR_FP32: {\n        std::vector<uint32_t> indices;\n        std::vector<float> values;\n        for (uint32_t i = 0; i < 100; i++) {\n          indices.push_back(i);\n          values.push_back(float(doc_id + 0.1));\n        }\n        std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;\n        sparse_float_vec.first = indices;\n        sparse_float_vec.second = values;\n        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n            field->name(), sparse_float_vec);\n        break;\n      }\n      default:\n        std::cout << \"Unsupported data type: \" << field->name() << std::endl;\n        throw std::runtime_error(\"Unsupported vector data type\");\n    }\n  }\n\n  return new_doc;\n}\n\nCollectionSchema::Ptr create_schema() {\n  auto schema = std::make_shared<CollectionSchema>(\"demo\");\n  schema->set_max_doc_count_per_segment(1000);\n\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"id\", DataType::INT64, false, std::make_shared<InvertIndexParams>(true)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"name\", DataType::STRING, false,\n      std::make_shared<InvertIndexParams>(false)));\n  schema->add_field(\n      std::make_shared<FieldSchema>(\"weight\", DataType::FLOAT, true));\n\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"dense\", DataType::VECTOR_FP32, 128, false,\n      std::make_shared<HnswIndexParams>(MetricType::IP)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"sparse\", DataType::SPARSE_VECTOR_FP32, 0, false,\n      std::make_shared<HnswIndexParams>(MetricType::IP)));\n\n  return schema;\n}\n\nint main() {\n  std::string path = \"./demo\";\n  std::string rm_cmd = \"rm -rf \" + path;\n  system(rm_cmd.c_str());\n\n  auto schema = create_schema();\n  CollectionOptions options{false, true};\n\n  auto result = Collection::CreateAndOpen(path, *schema, options);\n  if (!result.has_value()) {\n    std::cout << result.error().message() << std::endl;\n    return -1;\n  }\n\n  std::cout << \"init stats: \" << result.value()->Stats().value().to_string()\n            << std::endl;\n\n  auto coll = std::move(result).value();\n\n  // insert docs\n  {\n    auto doc1 = create_doc(0, *schema);\n    std::vector<Doc> docs{doc1};\n    auto res = coll->Insert(docs);\n    if (!res.has_value()) {\n      std::cout << res.error().message() << std::endl;\n      return -1;\n    }\n    std::cout << \"after insert stats \" << coll->Stats().value().to_string()\n              << std::endl;\n  }\n\n  // optimize\n  {\n    auto res = coll->Optimize();\n    if (!res.ok()) {\n      std::cout << res.message() << std::endl;\n      return -1;\n    }\n    std::cout << \"after optimize stats \" << coll->Stats().value().to_string()\n              << std::endl;\n  }\n\n  // query\n  {\n    VectorQuery query;\n    query.topk_ = 10;\n    query.field_name_ = \"dense\";\n    query.include_vector_ = true;\n    std::vector<float> query_vector = std::vector<float>(128, 0.1);\n    query.query_vector_.assign((char *)query_vector.data(),\n                               query_vector.size() * sizeof(float));\n    auto res = coll->Query(query);\n    if (!res.has_value()) {\n      std::cout << res.error().message() << std::endl;\n      return -1;\n    }\n    std::cout << \"query result: doc_count[\" << res.value().size() << \"]\"\n              << std::endl;\n    std::cout << \"first doc: \" << res.value()[0]->to_detail_string()\n              << std::endl;\n  }\n\n  // close and reopen\n  coll.reset();\n  options.read_only_ = true;\n  result = Collection::Open(path, options);\n  if (!result.has_value()) {\n    std::cout << result.error().message() << std::endl;\n    return -1;\n  }\n  std::cout << \"reopen stats: \" << result.value()->Stats().value().to_string()\n            << std::endl;\n\n  return 0;\n}"
  },
  {
    "path": "pyproject.toml",
    "content": "######################################################################################################\n# Zvec: High-Performance Vector Database with PyBind11 & C++ Backend\n######################################################################################################\n[project]\nname = \"zvec\"\ndynamic = [\"version\"]\ndescription = \"A high-performance vector database engine with native C++ backend and Python bindings\"\nreadme = \"README.md\"\nlicense = { text = \"Apache-2.0\" }\nauthors = [\n    { name = \"zvec\", email = \"zvec@alibaba-inc.com\" },\n]\nmaintainers = [\n    { name = \"Zvec Core Team\", email = \"zvec@alibaba-inc.com\" },\n]\nrequires-python = \">=3.9\"\nclassifiers = [\n    \"Development Status :: 3 - Alpha\",\n    \"Intended Audience :: Developers\",\n    \"Intended Audience :: Education\",\n    \"Intended Audience :: Science/Research\",\n    \"License :: OSI Approved :: Apache Software License\",\n    \"Operating System :: POSIX :: Linux\",\n    \"Operating System :: MacOS\",\n    \"Programming Language :: C++\",\n    \"Programming Language :: Python :: 3 :: Only\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n    \"Topic :: Database\",\n    \"Topic :: Scientific/Engineering :: Artificial Intelligence\",\n    \"Topic :: Software Development :: Libraries :: Python Modules\",\n]\nkeywords = [\n    \"vector-database\", \"ann\", \"nearest-neighbor\"\n]\ndependencies = [\n    \"numpy >=1.23\",\n]\n\n[project.urls]\nHomepage = \"https://github.com/alibaba/zvec\"\nRepository = \"https://github.com/alibaba/zvec\"\n\"Bug Tracker\" = \"https://github.com/alibaba/zvec/issues\"\n\"Documentation\" = \"https://zvec.org\"\n\n[project.optional-dependencies]\ntest = [\n    \"pytest >=8.0\",\n    \"pytest-cov >=4.1\",\n    \"pytest-mock >=3.12\",\n    \"cibuildwheel == 3.4.0\",\n]\ndocs = [\n    \"mkdocs >=1.5\",\n    \"mkdocs-material >=9.5\",\n    \"mkdocstrings[python] >=0.24\",\n]\ndev = [\n    \"ruff >=0.4\",\n    \"black >=24.0\",\n    \"mypy >=1.8\",\n    \"pre-commit >=3.6\",\n    \"build >=1.0\",\n    \"twine >=4.0\",\n    \"numpy >=1.23\",\n    # Inherit test deps\n    \"pytest >=8.0\",\n    \"pytest-cov >=4.1\",\n    \"pytest-mock >=3.12\",\n    \"cibuildwheel == 3.4.0\",\n    # Inherit docs deps\n    \"mkdocs >=1.5\",\n    \"mkdocs-material >=9.5\",\n    \"mkdocstrings[python] >=0.24\",\n    \"pybind11-stubgen>=2.5.5\",\n    \"pybind11 >=3.0\",\n]\n######################################################################################################\n# BUILD SYSTEM CONFIGURATION (scikit-build-core)\n######################################################################################################\n[build-system]\nrequires = [\n    \"scikit-build-core >=0.11\",\n    \"pybind11 >=3.0\",\n    \"setuptools_scm>=8.0\",\n    \"cmake>=3.26,<4.0\",\n    \"ninja>=1.11\",\n]\nbuild-backend = \"scikit_build_core.build\"\n\n[tool.scikit-build]\n# Core settings\nminimum-version = \"0.11\"\nmetadata.version.provider = \"scikit_build_core.metadata.setuptools_scm\"\n\n# CMake configuration\ncmake.version = \">=3.26,<4.0\"\nninja.version = \">=1.11\"\ncmake.build-type = \"Release\"\ninstall.strip = true  # Strip symbols in release builds to reduce wheel size\n\n# Build directory\nbuild-dir = \"build\"\n\n# Platform support\nwheel.expand-macos-universal-tags = true\nwheel.packages = [\"python/zvec\"]\n\n# Source distribution\nsdist.include = [\n    \"README.md\",\n    \"LICENSE\",\n    \"pyproject.toml\",\n    \"CMakeLists.txt\",\n    \"src/**/*\",\n    \"stub/zvec/**/*\",\n    \"python/zvec/py.typed\",\n]\n\n# CMake defines (env-overridable)\n[tool.scikit-build.cmake.define]\nBUILD_TOOLS = \"OFF\"\nBUILD_PYTHON_BINDINGS = \"ON\"\n#CMAKE_VERBOSE_MAKEFILE = \"ON\"\n\n# Setuptools config for test pypi\n[tool.setuptools_scm]\nlocal_scheme = \"no-local-version\"\nversion_scheme = \"guess-next-dev\"\nfallback_version = \"0.2.1b1\"\n######################################################################################################\n# TESTING & QUALITY\n######################################################################################################\n[tool.pytest.ini_options]\nminversion = \"8.0\"\naddopts = [\n    \"-ra\",\n    \"--showlocals\",\n    \"--strict-markers\",\n    \"--strict-config\",\n    \"--tb=short\",\n]\nxfail_strict = true\nlog_cli_level = \"INFO\"\nfilterwarnings = [\n    \"error\",\n    \"ignore::pytest.PytestCacheWarning\",\n    # Ignore numpy deprecation warnings in tests (if any)\n    \"ignore:.*numpy.*:DeprecationWarning\",\n]\ntestpaths = [\"python/tests\"]\nmarkers = [\n    \"title: Custom marker for test title/description\",\n    # \"slow: marks tests as slow\",\n]\n\n######################################################################################################\n# BUILD WHEEL\n######################################################################################################\n[tool.cibuildwheel]\nbuild = [\n    \"cp310-*\",\n    \"cp311-*\",\n    \"cp312-*\",\n    \"cp313-*\",\n    \"cp314-*\",\n]\nbuild-frontend = \"build\"\ntest-requires = [\"pytest\", \"numpy\"]\ntest-command = \"cd {project} && pytest python/tests -v --tb=short\"\nbuild-verbosity = 1\n\n[tool.cibuildwheel.linux]\narchs = [\"auto\"]\nenvironment = { CMAKE_GENERATOR = \"Unix Makefiles\", CMAKE_BUILD_PARALLEL_LEVEL = \"16\" }\nmanylinux-x86_64-image = \"manylinux_2_28\"\nmanylinux-aarch64-image = \"manylinux_2_28\"\n# Skip 32-bit builds and musllinux\nskip = [\"*-manylinux_i686\", \"*-musllinux*\"]\n\n[tool.cibuildwheel.macos]\narchs = [\"arm64\"]\n# Inherits CMAKE_GENERATOR and CMAKE_BUILD_PARALLEL_LEVEL from [tool.cibuildwheel] won't work;\n# platform-level environment overrides the top-level entirely, so all vars must be listed here\nenvironment = { CMAKE_GENERATOR = \"Unix Makefiles\", CMAKE_BUILD_PARALLEL_LEVEL = \"16\", MACOSX_DEPLOYMENT_TARGET = \"11.0\" }\n######################################################################################################\n# CODE QUALITY & FORMATTING (Ruff)\n######################################################################################################\n[tool.ruff]\ntarget-version = \"py310\"\nline-length = 88\nexclude = [\n    \"build/\",\n    \"dist/\",\n    \".git/\",\n    \".venv/\",\n    \"venv/\",\n    \"thirdparty\",\n]\n\n[tool.ruff.lint]\nextend-select = [\n    \"B\",    # flake8-bugbear\n    \"I\",    # isort\n    \"ARG\",  # flake8-unused-arguments\n    \"C4\",   # flake8-comprehensions\n    \"EM\",   # flake8-errmsg\n    \"ICN\",  # flake8-import-conventions\n    \"G\",    # flake8-logging-format\n    \"PGH\",  # pygrep-hooks\n    \"PIE\",  # flake8-pie\n    \"PL\",   # pylint\n    \"PT\",   # flake8-pytest-style\n    \"PTH\",  # flake8-use-pathlib\n    \"RET\",  # flake8-return\n    \"RUF\",  # Ruff-specific\n    \"SIM\",  # flake8-simplify\n    \"T20\",  # flake8-print\n    \"UP\",   # pyupgrade\n    \"YTT\",  # flake8-2020\n    \"EXE\",  # flake8-executable\n    \"NPY\",  # NumPy-specific\n    \"PD\",   # pandas-vet\n]\nignore = [\n    \"PLR0913\",  # Too many arguments (common in bindings)\n    \"PLR2004\",  # Magic value used in comparison\n    \"UP045\", \"UP007\",  # Use list() instead of [] (breaks C++ init)\n    \"EM101\", \"EM102\",  # Exception messages as literals (ok in tests/utils)\n    \"B008\",     # Mutable default args (cautiously allowed in config)\n    \"E731\",     # Lambda assignment (used in callbacks)\n    \"B019\",     # `functools.lru_cache` on methods (handled manually)\n    \"PLR0912\",  # Too many branches\n    \"PLC0105\",  # Ignore contravariant\n    \"RUF002\",   # Ignore Unicode\n]\nfixable = [\"ALL\"]\nunfixable = []\n\n# Ignore all errors in docstrings\n[tool.ruff.lint.pydocstyle]\nconvention = \"google\"  # or \"numpy\", \"pep257\"\nignore-decorators = [\"typing.overload\"]\n\n[tool.ruff.lint.flake8-type-checking]\n# Don't check code examples in docstrings\nquote-annotations = true\n\n[tool.ruff.lint.isort]\nrequired-imports = [\"from __future__ import annotations\"]\nknown-first-party = [\"zvec\"]\n\n[tool.ruff.lint.per-file-ignores]\n\"python/tests/**\" = [\"ALL\"]\n\"bench/core/**\" = [\"ALL\"]\n\"python/zvec/__init__.py\" = [\n    \"F401\",   # Unused import (for __all__)\n    \"E402\",   # Module level import not at top (C++ module init order)\n    \"PLE0605\", # Invalid format for __all__\n    \"RUF022\", # __all__ is not sorted\n]\n\"python/zvec/model/doc.py\" = [\n    \"RUF023\",   # Unused sort (for __slot__)\n]\n\"python/zvec/extension/**\" = [\n    \"PLC0415\",  # Import outside top-level (dynamic imports in _get_model)\n]\n\n[tool.ruff.format]\nindent-style = \"space\"\nquote-style = \"double\"\nline-ending = \"lf\"\nskip-magic-trailing-comma = false\n"
  },
  {
    "path": "python/tests/detail/distance_helper.py",
    "content": "import logging\nimport math\nimport numpy as np\n\nfrom zvec import (\n    MetricType,\n    DataType,\n    QuantizeType,\n    Doc,\n    CollectionSchema,\n    FieldSchema,\n    VectorSchema,\n)\n\nfrom typing import Dict\n\n\ndef is_float_equal(actual, expected, rel_tol=1e-5, abs_tol=1e-8):\n    if actual is None and expected is None:\n        return True\n    return math.isclose(actual, expected, rel_tol=rel_tol, abs_tol=abs_tol)\n\n\ndef is_dense_vector_equal(vec1, vec2, rtol=1e-5, atol=1e-8):\n    \"\"\"Compare two dense vectors with tolerance.\"\"\"\n    return np.allclose(vec1, vec2, rtol=rtol, atol=atol)\n\n\ndef is_sparse_vector_equal(vec1, vec2, rtol=1e-5, atol=1e-8):\n    \"\"\"Compare two sparse vectors with tolerance.\"\"\"\n    # Check if they have the same keys\n    if set(vec1.keys()) != set(vec2.keys()):\n        return False\n\n    # Check if all values are close\n    for key in vec1:\n        if not math.isclose(vec1[key], vec2[key], rel_tol=rtol, abs_tol=atol):\n            return False\n\n    return True\n\n\ndef is_float_array_equal(arr1, arr2, rtol=1e-5, atol=1e-8):\n    \"\"\"Compare two float arrays with tolerance.\"\"\"\n    return np.allclose(arr1, arr2, rtol=rtol, atol=atol)\n\n\ndef is_double_array_equal(arr1, arr2, rtol=1e-9, atol=1e-12):\n    \"\"\"Compare two double arrays with tolerance.\"\"\"\n    return np.allclose(arr1, arr2, rtol=rtol, atol=atol)\n\n\ndef is_int_array_equal(arr1, arr2):\n    \"\"\"Compare two integer arrays with exact equality.\"\"\"\n    return np.array_equal(arr1, arr2)\n\n\ndef cosine_distance_dense(\n    vec1,\n    vec2,\n    dtype: DataType = DataType.VECTOR_FP32,\n    quantize_type: QuantizeType = QuantizeType.UNDEFINED,\n):\n    if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16:\n        # More stable conversion to float16 to avoid numerical issues\n        vec1 = [float(np.float16(a)) for a in vec1]\n        vec2 = [float(np.float16(b)) for b in vec2]\n    elif dtype == DataType.VECTOR_INT8:\n        # For INT8 vectors, convert to integers for proper calculation\n        vec1 = [\n            int(round(min(max(val, -128), 127))) for val in vec1\n        ]  # Clamp to valid INT8 range\n        vec2 = [\n            int(round(min(max(val, -128), 127))) for val in vec2\n        ]  # Clamp to valid INT8 range\n\n    dot_product = sum(a * b for a, b in zip(vec1, vec2))\n\n    magnitude1 = math.sqrt(sum(a * a for a in vec1))\n    magnitude2 = math.sqrt(sum(b * b for b in vec2))\n\n    if magnitude1 == 0 or magnitude2 == 0:\n        return 1.0  # Zero vector case - maximum distance\n\n    cosine_similarity = dot_product / (magnitude1 * magnitude2)\n\n    # Clamp to [-1, 1] range to handle floating-point precision errors\n    cosine_similarity = max(-1.0, min(1.0, cosine_similarity))\n\n    # For identical vectors (within floating point precision), ensure cosine distance is 0.0\n    # This is especially important for low-precision types which have limited precision\n    if (\n        dtype == DataType.VECTOR_FP16\n        or quantize_type == QuantizeType.FP16\n        or dtype == DataType.VECTOR_INT8\n    ):\n        if (\n            abs(cosine_similarity - 1.0) < 1e-3\n        ):  # Handle precision issues for low-precision types\n            cosine_similarity = 1.0\n\n    # Return cosine distance (1 - cosine similarity) to maintain compatibility\n    # with system internal processing and existing test expectations\n    return 1.0 - cosine_similarity\n\n\ndef dp_distance_dense(\n    vec1,\n    vec2,\n    dtype: DataType = DataType.VECTOR_FP32,\n    quantize_type: QuantizeType = QuantizeType.UNDEFINED,\n):\n    if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16:\n        # More stable computation to avoid numerical issues\n        products = [\n            float(np.float16(a)) * float(np.float16(b)) for a, b in zip(vec1, vec2)\n        ]\n        return sum(products)\n    elif dtype == DataType.VECTOR_INT8:\n        # For INT8 vectors, convert to integers for proper calculation\n        products = [\n            int(round(min(max(a, -128), 127))) * int(round(min(max(b, -128), 127)))\n            for a, b in zip(vec1, vec2)\n        ]\n        return sum(products)\n    return sum(a * b for a, b in zip(vec1, vec2))\n\n\ndef euclidean_distance_dense(\n    vec1,\n    vec2,\n    dtype: DataType = DataType.VECTOR_FP32,\n    quantize_type: QuantizeType = QuantizeType.UNDEFINED,\n):\n    if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16:\n        # Convert to float16 and compute squared differences safely\n        # Use a more stable computation to avoid overflow\n        squared_diffs = []\n        for a, b in zip(vec1, vec2):\n            diff = np.float16(a) - np.float16(b)\n            squared_diff = float(diff) * float(\n                diff\n            )  # Convert to float for multiplication\n            squared_diffs.append(squared_diff)\n        squared_distance = sum(squared_diffs)\n    elif dtype == DataType.VECTOR_INT8:\n        # For INT8 vectors, convert to integers and handle potential scaling\n        # INT8 values might be treated differently in the library implementation\n        vec1_int = [\n            int(round(min(max(val, -128), 127))) for val in vec1\n        ]  # Clamp to valid INT8 range\n        vec2_int = [\n            int(round(min(max(val, -128), 127))) for val in vec2\n        ]  # Clamp to valid INT8 range\n        # Use float type to prevent overflow when summing large squared differences\n        squared_distance = sum(float(a - b) ** 2 for a, b in zip(vec1_int, vec2_int))\n    else:\n        squared_distance = sum((a - b) ** 2 for a, b in zip(vec1, vec2))\n\n    return squared_distance  # Return squared distance for INT8\n\n\ndef distance_dense(\n    vec1,\n    vec2,\n    metric: MetricType,\n    data_type: DataType = DataType.VECTOR_FP32,\n    quantize_type: QuantizeType = QuantizeType.UNDEFINED,\n):\n    if metric == MetricType.COSINE:\n        return cosine_distance_dense(vec1, vec2, data_type, quantize_type)\n    elif metric == MetricType.L2:\n        return euclidean_distance_dense(vec1, vec2, data_type, quantize_type)\n    elif metric == MetricType.IP:\n        return dp_distance_dense(vec1, vec2, data_type, quantize_type)\n    else:\n        raise ValueError(\"Unsupported metric type\")\n\n\ndef dp_distance_sparse(\n    vec1,\n    vec2,\n    data_type: DataType = DataType.SPARSE_VECTOR_FP32,\n    quantize_type: QuantizeType = QuantizeType.UNDEFINED,\n):\n    dot_product = 0.0\n    for dim in set(vec1.keys()) & set(vec2.keys()):\n        print(\"dim,vec1,vec2:\\n\")\n        print(dim, vec1, vec2)\n        if (\n            data_type == DataType.SPARSE_VECTOR_FP16\n            or quantize_type == QuantizeType.FP16\n        ):\n            vec1[dim] = np.float16(vec1[dim])\n            vec2[dim] = np.float16(vec2[dim])\n        dot_product += vec1[dim] * vec2[dim]\n    return dot_product\n\n\ndef distance(\n    vec1,\n    vec2,\n    metric: MetricType,\n    data_type: DataType,\n    quantize_type: QuantizeType = QuantizeType.UNDEFINED,\n):\n    is_sparse = (\n        data_type == DataType.SPARSE_VECTOR_FP32\n        or data_type == DataType.SPARSE_VECTOR_FP16\n    )\n\n    if is_sparse:\n        if metric != MetricType.IP:\n            raise ValueError(\"Unsupported metric type for sparse vectors\")\n\n    if is_sparse:\n        return dp_distance_sparse(vec1, vec2, data_type, quantize_type)\n    else:\n        return distance_dense(vec1, vec2, metric, data_type, quantize_type)\n\n\ndef distance_recall(\n    vec1,\n    vec2,\n    metric: MetricType,\n    data_type: DataType,\n    quantize_type: QuantizeType = QuantizeType.UNDEFINED,\n):\n    is_sparse = (\n        data_type == DataType.SPARSE_VECTOR_FP32\n        or data_type == DataType.SPARSE_VECTOR_FP16\n    )\n\n    if is_sparse:\n        return dp_distance_sparse(vec1, vec2, data_type, quantize_type)\n    else:\n        if data_type in [DataType.VECTOR_FP32, DataType.VECTOR_FP16]:\n            return distance_dense(vec1, vec2, metric, data_type, quantize_type)\n        elif data_type in [DataType.VECTOR_INT8] and metric in [\n            MetricType.L2,\n            MetricType.IP,\n        ]:\n            return distance_dense(vec1, vec2, metric, data_type, quantize_type)\n        else:\n            return dp_distance_dense(vec1, vec2, data_type, quantize_type)\n\n\ndef calculate_rrf_score(rank, k=60):\n    return 1.0 / (k + rank + 1)\n\n\ndef calculate_multi_vector_rrf_scores(query_results: Dict[str, Doc], k=60):\n    rrf_scores = {}\n\n    for vector_name, docs in query_results.items():\n        for rank, doc in enumerate(docs):\n            doc_id = doc.id\n            rrf_score = calculate_rrf_score(rank, k)\n            if doc_id in rrf_scores:\n                rrf_scores[doc_id] += rrf_score\n            else:\n                rrf_scores[doc_id] = rrf_score\n\n    return rrf_scores\n\n\ndef calculate_multi_vector_weighted_scores(\n    query_results: Dict[str, Doc], weights: Dict[str, float], metric: MetricType\n):\n    def _normalize_score(score: float, metric: MetricType) -> float:\n        if metric == MetricType.L2:\n            return 1.0 - 2 * math.atan(score) / math.pi\n        if metric == MetricType.IP:\n            return 0.5 + math.atan(score) / math.pi\n        if metric == MetricType.COSINE:\n            return 1.0 - score / 2.0\n        raise ValueError(\"Unsupported metric type\")\n\n    weighted_scores = {}\n\n    for vector_name, docs in query_results.items():\n        weight = weights.get(vector_name, 1.0)\n\n        for doc in docs:\n            doc_id = doc.id\n            weighted_score = (_normalize_score(doc.score, metric)) * weight\n            if doc_id in weighted_scores:\n                weighted_scores[doc_id] += weighted_score\n            else:\n                weighted_scores[doc_id] = weighted_score\n\n    return weighted_scores\n\n\ndef is_field_equal(field1, field2, schema: FieldSchema) -> bool:\n    if field1 is None and field2 is None:\n        return True\n    if field1 is None or field2 is None:\n        return False\n\n    if schema.data_type == DataType.ARRAY_FLOAT:\n        return is_float_array_equal(field1, field2)\n    elif schema.data_type == DataType.ARRAY_DOUBLE:\n        return is_double_array_equal(field1, field2)\n    elif schema.data_type in [\n        DataType.ARRAY_INT32,\n        DataType.ARRAY_INT64,\n        DataType.ARRAY_BOOL,\n        DataType.ARRAY_STRING,\n        DataType.ARRAY_UINT32,\n        DataType.ARRAY_UINT64,\n        DataType.ARRAY_INT64,\n    ]:\n        return is_int_array_equal(field1, field2)\n    elif schema.data_type in [DataType.FLOAT, DataType.DOUBLE]:\n        return is_float_equal(field1, field2)\n\n    return field1 == field2\n\n\ndef is_vector_equal(vec1, vec2, schema: VectorSchema) -> bool:\n    if (\n        schema.data_type == DataType.SPARSE_VECTOR_FP16\n        or schema.data_type == DataType.VECTOR_FP16\n    ):\n        # skip fp16 vector equal\n        return True\n\n    is_sparse = (\n        schema.data_type == DataType.SPARSE_VECTOR_FP32\n        or schema.data_type == DataType.SPARSE_VECTOR_FP16\n    )\n\n    if is_sparse:\n        return is_sparse_vector_equal(vec1, vec2)\n    else:\n        return is_dense_vector_equal(vec1, vec2)\n\n\ndef is_doc_equal(\n    doc1: Doc,\n    doc2: Doc,\n    schema: CollectionSchema,\n    except_score: bool = True,\n    include_vector: bool = True,\n):\n    if doc1.id != doc2.id:\n        logging.error(\"doc ids are not equal\")\n        return False\n\n    reduce_field_names = set(doc1.field_names() + doc2.field_names())\n    reduce_vector_names = set(doc1.vector_names() + doc2.vector_names())\n\n    is_doc1_fields_empty = doc1.fields is None or doc1.fields == {}\n    is_doc2_fields_empty = doc2.fields is None or doc2.fields == {}\n\n    if is_doc1_fields_empty or is_doc2_fields_empty:\n        if is_doc1_fields_empty != is_doc2_fields_empty:\n            return False\n    else:\n        for field_name in reduce_field_names:\n            field_schema = schema.field(field_name)\n            if field_schema is None:\n                return False\n            if is_field_equal(\n                doc1.field(field_name), doc2.field(field_name), field_schema\n            ):\n                continue\n            else:\n                logging.error(f\"{field_name} are not equal\")\n                return False\n\n    if include_vector:\n        is_doc1_vectors_empty = doc1.vectors is None or doc1.vectors == {}\n        is_doc2_vectors_empty = doc2.vectors is None or doc2.vectors == {}\n\n        if is_doc1_vectors_empty or is_doc2_vectors_empty:\n            if is_doc1_fields_empty != is_doc2_vectors_empty:\n                return False\n        else:\n            for vector_name in reduce_vector_names:\n                vector_schema = schema.vector(vector_name)\n                if vector_schema is None:\n                    return False\n                if is_vector_equal(\n                    doc1.vector(vector_name), doc2.vector(vector_name), vector_schema\n                ):\n                    continue\n                else:\n                    return False\n\n    return True\n"
  },
  {
    "path": "python/tests/detail/doc_helper.py",
    "content": "from zvec import CollectionSchema, Doc\n\nfrom support_helper import *\n\nimport numpy as np\nfrom typing import Literal, Optional, Union, Tuple\n\nimport random\nimport string\nimport math\n\n\ndef generate_constant_vector(\n    i: int, dimension: int, dtype: Literal[\"int8\", \"float16\", \"float32\"] = \"float32\"\n):\n    if dtype == \"int8\":\n        vec = [(i % 127)] * dimension\n        vec[i % dimension] = (i + 1) % 127\n    else:\n        base_val = (i % 1000) / 256.0\n        special_val = ((i + 1) % 1000) / 256.0\n        vec = [base_val] * dimension\n        vec[i % dimension] = special_val\n\n    return vec\n\n\ndef generate_constant_vector_recall(\n    i: int, dimension: int, dtype: Literal[\"int8\", \"float16\", \"float32\"] = \"float32\"\n):\n    if dtype == \"int8\":\n        vec = [(i % 127)] * dimension\n        vec[i % dimension] = (i + 1) % 127\n    else:\n        base_val = math.sin((i) * 1000) / 256.0\n        special_val = math.sin((i + 1) * 1000) / 256.0\n        vec = [base_val] * dimension\n        vec[i % dimension] = special_val\n\n    return vec\n\n\ndef generate_sparse_vector(i: int):\n    return {i: i + 0.1}\n\n\ndef generate_vectordict(i: int, schema: CollectionSchema) -> Doc:\n    doc_fields = {}\n    doc_vectors = {}\n    doc_fields = {}\n    doc_vectors = {}\n    for field in schema.fields:\n        if field.data_type == DataType.BOOL:\n            doc_fields[field.name] = i % 2 == 0\n        elif field.data_type == DataType.INT32:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.UINT32:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.INT64:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.UINT64:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.FLOAT:\n            doc_fields[field.name] = float(i) + 0.1\n        elif field.data_type == DataType.DOUBLE:\n            doc_fields[field.name] = float(i) + 0.11\n        elif field.data_type == DataType.STRING:\n            doc_fields[field.name] = f\"test_{i}\"\n        elif field.data_type == DataType.ARRAY_BOOL:\n            doc_fields[field.name] = [i % 2 == 0, i % 3 == 0]\n        elif field.data_type == DataType.ARRAY_INT32:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_UINT32:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_INT64:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_UINT64:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_FLOAT:\n            doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)]\n        elif field.data_type == DataType.ARRAY_DOUBLE:\n            doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)]\n        elif field.data_type == DataType.ARRAY_STRING:\n            doc_fields[field.name] = [f\"test_{i}\", f\"test_{i + 1}\", f\"test_{i + 2}\"]\n        else:\n            raise ValueError(f\"Unsupported field type: {field.data_type}\")\n    for vector in schema.vectors:\n        if vector.data_type == DataType.VECTOR_FP16:\n            doc_vectors[vector.name] = generate_constant_vector(\n                i, vector.dimension, \"float16\"\n            )\n        elif vector.data_type == DataType.VECTOR_FP32:\n            doc_vectors[vector.name] = generate_constant_vector(\n                i, vector.dimension, \"float32\"\n            )\n        elif vector.data_type == DataType.VECTOR_INT8:\n            doc_vectors[vector.name] = generate_constant_vector(\n                i,\n                vector.dimension,\n                \"int8\",\n            )\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:\n            doc_vectors[vector.name] = generate_sparse_vector(i)\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:\n            doc_vectors[vector.name] = generate_sparse_vector(i)\n        else:\n            raise ValueError(f\"Unsupported vector type: {vector.data_type}\")\n    return doc_fields, doc_vectors\n\n\ndef generate_vectordict_recall(i: int, schema: CollectionSchema) -> Doc:\n    doc_fields = {}\n    doc_vectors = {}\n    doc_fields = {}\n    doc_vectors = {}\n    for field in schema.fields:\n        if field.data_type == DataType.BOOL:\n            doc_fields[field.name] = i % 2 == 0\n        elif field.data_type == DataType.INT32:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.UINT32:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.INT64:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.UINT64:\n            doc_fields[field.name] = i\n        elif field.data_type == DataType.FLOAT:\n            doc_fields[field.name] = float(i) + 0.1\n        elif field.data_type == DataType.DOUBLE:\n            doc_fields[field.name] = float(i) + 0.11\n        elif field.data_type == DataType.STRING:\n            doc_fields[field.name] = f\"test_{i}\"\n        elif field.data_type == DataType.ARRAY_BOOL:\n            doc_fields[field.name] = [i % 2 == 0, i % 3 == 0]\n        elif field.data_type == DataType.ARRAY_INT32:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_UINT32:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_INT64:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_UINT64:\n            doc_fields[field.name] = [i, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_FLOAT:\n            doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)]\n        elif field.data_type == DataType.ARRAY_DOUBLE:\n            doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)]\n        elif field.data_type == DataType.ARRAY_STRING:\n            doc_fields[field.name] = [f\"test_{i}\", f\"test_{i + 1}\", f\"test_{i + 2}\"]\n        else:\n            raise ValueError(f\"Unsupported field type: {field.data_type}\")\n    for vector in schema.vectors:\n        if vector.data_type == DataType.VECTOR_FP16:\n            doc_vectors[vector.name] = generate_constant_vector_recall(\n                i, vector.dimension, \"float16\"\n            )\n        elif vector.data_type == DataType.VECTOR_FP32:\n            doc_vectors[vector.name] = generate_constant_vector_recall(\n                i, vector.dimension, \"float32\"\n            )\n        elif vector.data_type == DataType.VECTOR_INT8:\n            doc_vectors[vector.name] = generate_constant_vector_recall(\n                i,\n                vector.dimension,\n                \"int8\",\n            )\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:\n            doc_vectors[vector.name] = generate_sparse_vector(i)\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:\n            doc_vectors[vector.name] = generate_sparse_vector(i)\n        else:\n            raise ValueError(f\"Unsupported vector type: {vector.data_type}\")\n    return doc_fields, doc_vectors\n\n\ndef generate_vectordict_update(i: int, schema: CollectionSchema) -> Doc:\n    doc_fields = {}\n    doc_vectors = {}\n    doc_fields = {}\n    doc_vectors = {}\n    for field in schema.fields:\n        if field.data_type == DataType.BOOL:\n            doc_fields[field.name] = (i + 1) % 2 == 0\n        elif field.data_type == DataType.INT32:\n            doc_fields[field.name] = i + 1\n        elif field.data_type == DataType.UINT32:\n            doc_fields[field.name] = i + 1\n        elif field.data_type == DataType.INT64:\n            doc_fields[field.name] = i + 1\n        elif field.data_type == DataType.UINT64:\n            doc_fields[field.name] = i + 1\n        elif field.data_type == DataType.FLOAT:\n            doc_fields[field.name] = float(i + 1) + 0.1\n        elif field.data_type == DataType.DOUBLE:\n            doc_fields[field.name] = float(i + 1) + 0.11\n        elif field.data_type == DataType.STRING:\n            doc_fields[field.name] = f\"test_{i + 1}\"\n        elif field.data_type == DataType.ARRAY_BOOL:\n            doc_fields[field.name] = [(i + 1) % 2 == 0, (i + 1) % 3 == 0]\n        elif field.data_type == DataType.ARRAY_INT32:\n            doc_fields[field.name] = [i + 1, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_UINT32:\n            doc_fields[field.name] = [i + 1, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_INT64:\n            doc_fields[field.name] = [i + 1, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_UINT64:\n            doc_fields[field.name] = [i + 1, i + 1, i + 2]\n        elif field.data_type == DataType.ARRAY_FLOAT:\n            doc_fields[field.name] = [float(i + 1.1), float(i + 2.1), float(i + 3.1)]\n        elif field.data_type == DataType.ARRAY_DOUBLE:\n            doc_fields[field.name] = [float(i + 1.11), float(i + 2.11), float(i + 3.11)]\n        elif field.data_type == DataType.ARRAY_STRING:\n            doc_fields[field.name] = [f\"test_{i + 1}\", f\"test_{i + 2}\", f\"test_{i + 3}\"]\n        else:\n            raise ValueError(f\"Unsupported field type: {field.data_type}\")\n    for vector in schema.vectors:\n        if vector.data_type == DataType.VECTOR_FP16:\n            doc_vectors[vector.name] = generate_constant_vector(\n                i + 1, vector.dimension, \"float16\"\n            )\n        elif vector.data_type == DataType.VECTOR_FP32:\n            doc_vectors[vector.name] = generate_constant_vector(\n                i + 1, vector.dimension, \"float32\"\n            )\n        elif vector.data_type == DataType.VECTOR_INT8:\n            doc_vectors[vector.name] = generate_constant_vector(\n                i + 1,\n                vector.dimension,\n                \"int8\",\n            )\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:\n            doc_vectors[vector.name] = generate_sparse_vector(i + 1)\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:\n            doc_vectors[vector.name] = generate_sparse_vector(i + 1)\n        else:\n            raise ValueError(f\"Unsupported vector type: {vector.data_type}\")\n    return doc_fields, doc_vectors\n\n\ndef generate_doc(i: int, schema: CollectionSchema) -> Doc:\n    doc_fields = {}\n    doc_vectors = {}\n    doc_fields, doc_vectors = generate_vectordict(i, schema)\n    doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)\n    return doc\n\n\ndef generate_doc_recall(i: int, schema: CollectionSchema) -> Doc:\n    doc_fields = {}\n    doc_vectors = {}\n    doc_fields, doc_vectors = generate_vectordict_recall(i, schema)\n    doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)\n    return doc\n\n\ndef generate_update_doc(i: int, schema: CollectionSchema) -> Doc:\n    doc_fields = {}\n    doc_vectors = {}\n    doc_fields, doc_vectors = generate_vectordict_update(i, schema)\n    doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)\n    return doc\n\n\ndef generate_doc_random(i, schema: CollectionSchema) -> Doc:\n    doc_fields = {}\n    doc_vectors = {}\n\n    random.seed(i)\n\n    for field in schema.fields:\n        if field.data_type == DataType.BOOL:\n            doc_fields[field.name] = random.choice([True, False])\n        elif field.data_type == DataType.INT32:\n            doc_fields[field.name] = random.randint(-2147483648, 2147483647)\n        elif field.data_type == DataType.UINT32:\n            doc_fields[field.name] = random.randint(0, 4294967295)\n        elif field.data_type == DataType.INT64:\n            doc_fields[field.name] = random.randint(\n                -9223372036854775808, 9223372036854775807\n            )\n        elif field.data_type == DataType.UINT64:\n            doc_fields[field.name] = random.randint(0, 18446744073709551615)\n        elif field.data_type == DataType.FLOAT:\n            doc_fields[field.name] = random.uniform(-3.4028235e38, 3.4028235e38)\n        elif field.data_type == DataType.DOUBLE:\n            doc_fields[field.name] = random.uniform(\n                -1.7976931348623157e308, 1.7976931348623157e308\n            )\n        elif field.data_type == DataType.STRING:\n            length = random.randint(1, 999)\n            doc_fields[field.name] = \"\".join(\n                random.choices(string.ascii_letters + string.digits, k=length)\n            )\n        elif field.data_type == DataType.ARRAY_BOOL:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.choice([True, False]) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_INT32:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(-2147483648, 2147483647) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_UINT32:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(0, 4294967295) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_INT64:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(-9223372036854775808, 9223372036854775807)\n                for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_UINT64:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(0, 18446744073709551615) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_FLOAT:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.uniform(-3.4028235e38, 3.4028235e38) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_DOUBLE:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.uniform(-1.7976931348623157e308, 1.7976931348623157e308)\n                for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_STRING:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                \"\".join(\n                    random.choices(\n                        string.ascii_letters + string.digits, k=random.randint(1, 100)\n                    )\n                )\n                for _ in range(array_length)\n            ]\n        else:\n            raise ValueError(f\"Unsupported field type: {field.data_type}\")\n\n    for vector in schema.vectors:\n        if vector.data_type == DataType.VECTOR_FP16:\n            doc_vectors[vector.name] = generate_constant_vector(\n                random.randint(1, 100), DEFAULT_VECTOR_DIMENSION, \"float16\"\n            )\n        elif vector.data_type == DataType.VECTOR_FP32:\n            doc_vectors[vector.name] = generate_constant_vector(\n                random.randint(1, 100), DEFAULT_VECTOR_DIMENSION, \"float32\"\n            )\n        elif vector.data_type == DataType.VECTOR_INT8:\n            doc_vectors[vector.name] = generate_constant_vector(\n                random.randint(1, 100), DEFAULT_VECTOR_DIMENSION, \"int8\"\n            )\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:\n            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:\n            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))\n        else:\n            raise ValueError(f\"Unsupported vector type: {vector.data_type}\")\n\n    doc = Doc(id=i, fields=doc_fields, vectors=doc_vectors)\n    return doc\n\n\ndef generate_vectordict_random(schema: CollectionSchema):\n    doc_fields = {}\n    doc_vectors = {}\n    for field in schema.fields:\n        if field.data_type == DataType.BOOL:\n            doc_fields[field.name] = random.choice([True, False])\n        elif field.data_type == DataType.INT32:\n            doc_fields[field.name] = random.randint(-2147483648, 2147483647)\n        elif field.data_type == DataType.UINT32:\n            doc_fields[field.name] = random.randint(0, 4294967295)\n        elif field.data_type == DataType.INT64:\n            doc_fields[field.name] = random.randint(\n                -9223372036854775808, 9223372036854775807\n            )\n        elif field.data_type == DataType.UINT64:\n            doc_fields[field.name] = random.randint(0, 18446744073709551615)\n        elif field.data_type == DataType.FLOAT:\n            doc_fields[field.name] = random.uniform(-3.4028235e38, 3.4028235e38)\n        elif field.data_type == DataType.DOUBLE:\n            doc_fields[field.name] = random.uniform(\n                -1.7976931348623157e308, 1.7976931348623157e308\n            )\n        elif field.data_type == DataType.STRING:\n            length = random.randint(1, 999)\n            doc_fields[field.name] = \"\".join(\n                random.choices(string.ascii_letters + string.digits, k=length)\n            )\n        elif field.data_type == DataType.ARRAY_BOOL:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.choice([True, False]) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_INT32:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(-2147483648, 2147483647) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_UINT32:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(0, 4294967295) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_INT64:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(-9223372036854775808, 9223372036854775807)\n                for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_UINT64:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.randint(0, 18446744073709551615) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_FLOAT:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.uniform(-3.4028235e38, 3.4028235e38) for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_DOUBLE:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                random.uniform(-1.7976931348623157e308, 1.7976931348623157e308)\n                for _ in range(array_length)\n            ]\n        elif field.data_type == DataType.ARRAY_STRING:\n            array_length = random.randint(0, 10)\n            doc_fields[field.name] = [\n                \"\".join(\n                    random.choices(\n                        string.ascii_letters + string.digits, k=random.randint(1, 100)\n                    )\n                )\n                for _ in range(array_length)\n            ]\n        else:\n            raise ValueError(f\"Unsupported field type: {field.data_type}\")\n\n    for vector in schema.vectors:\n        if vector.data_type == DataType.VECTOR_FP16:\n            doc_vectors[vector.name] = generate_constant_vector(\n                random.randint(1, 100), vector.dimension, \"float16\"\n            )\n        elif vector.data_type == DataType.VECTOR_FP32:\n            doc_vectors[vector.name] = generate_constant_vector(\n                random.randint(1, 100), vector.dimension, \"float32\"\n            )\n        elif vector.data_type == DataType.VECTOR_INT8:\n            doc_vectors[vector.name] = generate_constant_vector(\n                random.randint(1, 100), vector.dimension, \"int8\"\n            )\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:\n            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))\n        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:\n            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))\n        else:\n            raise ValueError(f\"Unsupported vector type: {vector.data_type}\")\n\n    return doc_fields, doc_vectors\n"
  },
  {
    "path": "python/tests/detail/fixture_helper.py",
    "content": "import pytest\nimport logging\n\nfrom typing import Any, Generator\nfrom zvec.typing import DataType, StatusCode, MetricType, QuantizeType\nimport zvec\nfrom zvec import (\n    CollectionOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    FlatIndexParam,\n    IVFIndexParam,\n    FieldSchema,\n    VectorSchema,\n    CollectionSchema,\n    Collection,\n    Doc,\n    VectorQuery,\n)\n\nfrom support_helper import *\n\n\n@pytest.fixture(scope=\"session\")\ndef basic_schema(collection_name=\"test_collection\") -> CollectionSchema:\n    return CollectionSchema(\n        name=collection_name if len(collection_name) > 0 else \"test_collection\",\n        fields=[\n            FieldSchema(\n                \"id\",\n                DataType.INT64,\n                nullable=False,\n                index_param=InvertIndexParam(enable_range_optimization=True),\n            ),\n            FieldSchema(\n                \"name\", DataType.STRING, nullable=False, index_param=InvertIndexParam()\n            ),\n            FieldSchema(\"weight\", DataType.FLOAT, nullable=True),\n        ],\n        vectors=[\n            VectorSchema(\n                \"dense\",\n                DataType.VECTOR_FP32,\n                dimension=128,\n                index_param=HnswIndexParam(),\n            ),\n            VectorSchema(\n                \"sparse\", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()\n            ),\n        ],\n    )\n\n\n@pytest.fixture(scope=\"session\")\ndef full_schema(\n    nullable: bool = False,\n    has_index: bool = False,\n) -> CollectionSchema:\n    scalar_index_param = None\n    vector_index_param = None\n    if has_index:\n        scalar_index_param = InvertIndexParam(enable_range_optimization=True)\n        vector_index_param = HnswIndexParam()\n\n    fields = []\n    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():\n        fields.append(\n            FieldSchema(\n                v,\n                k,\n                nullable=nullable,\n                index_param=scalar_index_param,\n            )\n        )\n    vetors = []\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        vetors.append(\n            VectorSchema(\n                v,\n                k,\n                dimension=DEFAULT_VECTOR_DIMENSION,\n                index_param=vector_index_param,\n            )\n        )\n\n    return CollectionSchema(\n        name=\"full_collection\",\n        fields=fields,\n        vectors=vetors,\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef full_schema_new(request) -> CollectionSchema:\n    if hasattr(request, \"param\"):\n        nullable, has_index, vector_index = request.param\n    else:\n        nullable, has_index, vector_index = True, False, HnswIndexParam()\n\n    scalar_index_param = None\n    vector_index_param = None\n    if has_index:\n        scalar_index_param = InvertIndexParam(enable_range_optimization=True)\n        vector_index_param = vector_index\n\n    fields = []\n    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():\n        fields.append(\n            FieldSchema(\n                v,\n                k,\n                nullable=nullable,\n                index_param=scalar_index_param,\n            )\n        )\n    vectors = []\n\n    if vector_index_param in [\n        HnswIndexParam(),\n        FlatIndexParam(),\n        HnswIndexParam(\n            metric_type=MetricType.IP,\n            m=16,\n            ef_construction=100,\n        ),\n        FlatIndexParam(\n            metric_type=MetricType.IP,\n        ),\n    ]:\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            vectors.append(\n                VectorSchema(\n                    v,\n                    k,\n                    dimension=DEFAULT_VECTOR_DIMENSION,\n                    index_param=vector_index_param,\n                )\n            )\n    elif vector_index_param in [\n        IVFIndexParam(),\n        IVFIndexParam(\n            metric_type=MetricType.IP,\n            n_list=100,\n            n_iters=10,\n            use_soar=False,\n        ),\n        IVFIndexParam(\n            metric_type=MetricType.L2,\n            n_list=200,\n            n_iters=20,\n            use_soar=True,\n        ),\n        (\n            IVFIndexParam(\n                metric_type=MetricType.COSINE,\n                n_list=150,\n                n_iters=15,\n                use_soar=False,\n            )\n        ),\n        (\n            HnswIndexParam(\n                metric_type=MetricType.COSINE,\n                m=24,\n                ef_construction=150,\n            )\n        ),\n        (\n            HnswIndexParam(\n                metric_type=MetricType.L2,\n                m=32,\n                ef_construction=200,\n            )\n        ),\n        (\n            FlatIndexParam(\n                metric_type=MetricType.COSINE,\n            )\n        ),\n        (\n            FlatIndexParam(\n                metric_type=MetricType.L2,\n            )\n        ),\n    ]:\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=DEFAULT_VECTOR_DIMENSION,\n                        index_param=vector_index_param,\n                    )\n                )\n            elif v in [\"vector_int8_field\"] and vector_index_param in [\n                IVFIndexParam(\n                    metric_type=MetricType.L2,\n                    n_list=200,\n                    n_iters=20,\n                    use_soar=True,\n                ),\n                (\n                    HnswIndexParam(\n                        metric_type=MetricType.L2,\n                        m=32,\n                        ef_construction=200,\n                    )\n                ),\n                (\n                    FlatIndexParam(\n                        metric_type=MetricType.L2,\n                    )\n                ),\n            ]:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=DEFAULT_VECTOR_DIMENSION,\n                        index_param=vector_index_param,\n                    )\n                )\n            else:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=DEFAULT_VECTOR_DIMENSION,\n                        index_param=HnswIndexParam(),\n                    )\n                )\n    else:\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=DEFAULT_VECTOR_DIMENSION,\n                        index_param=vector_index_param,\n                    )\n                )\n            else:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=DEFAULT_VECTOR_DIMENSION,\n                        index_param=HnswIndexParam(),\n                    )\n                )\n\n    return CollectionSchema(\n        name=\"full_collection_new\",\n        fields=fields,\n        vectors=vectors,\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef full_schema_ivf(request) -> CollectionSchema:\n    if hasattr(request, \"param\"):\n        nullable, has_index, vector_index = request.param\n    else:\n        nullable, has_index, vector_index = True, False, IVFIndexParam()\n\n    scalar_index_param = None\n    vector_index_param = None\n    if has_index:\n        scalar_index_param = InvertIndexParam(enable_range_optimization=True)\n        vector_index_param = vector_index\n\n    fields = []\n    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():\n        fields.append(\n            FieldSchema(\n                v,\n                k,\n                nullable=nullable,\n                index_param=scalar_index_param,\n            )\n        )\n    vectors = []\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n            vectors.append(\n                VectorSchema(\n                    v,\n                    k,\n                    dimension=DEFAULT_VECTOR_DIMENSION,\n                    index_param=vector_index_param,\n                )\n            )\n\n    return CollectionSchema(\n        name=\"full_collection_ivf\",\n        fields=fields,\n        vectors=vectors,\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef full_schema_1024(request) -> CollectionSchema:\n    if hasattr(request, \"param\"):\n        nullable, has_index, vector_index = request.param\n    else:\n        nullable, has_index, vector_index = True, False, HnswIndexParam()\n\n    scalar_index_param = None\n    vector_index_param = None\n    if has_index:\n        scalar_index_param = InvertIndexParam(enable_range_optimization=True)\n        vector_index_param = vector_index\n\n    fields = []\n    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():\n        fields.append(\n            FieldSchema(\n                v,\n                k,\n                nullable=nullable,\n                index_param=scalar_index_param,\n            )\n        )\n    vectors = []\n\n    if vector_index_param in [\n        HnswIndexParam(),\n        FlatIndexParam(),\n        HnswIndexParam(\n            metric_type=MetricType.IP,\n            m=16,\n            ef_construction=100,\n        ),\n        FlatIndexParam(\n            metric_type=MetricType.IP,\n        ),\n    ]:\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            vectors.append(\n                VectorSchema(\n                    v,\n                    k,\n                    dimension=VECTOR_DIMENSION_1024,\n                    index_param=vector_index_param,\n                )\n            )\n    elif vector_index_param in [\n        IVFIndexParam(),\n        IVFIndexParam(\n            metric_type=MetricType.IP,\n            n_list=100,\n            n_iters=10,\n            use_soar=False,\n        ),\n        IVFIndexParam(\n            metric_type=MetricType.L2,\n            n_list=200,\n            n_iters=20,\n            use_soar=True,\n        ),\n        IVFIndexParam(\n            metric_type=MetricType.COSINE,\n            n_list=150,\n            n_iters=15,\n            use_soar=False,\n        ),\n    ]:\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=VECTOR_DIMENSION_1024,\n                        index_param=vector_index_param,\n                    )\n                )\n            elif v in [\"vector_int8_field\"] and vector_index_param in [\n                IVFIndexParam(\n                    metric_type=MetricType.L2,\n                    n_list=200,\n                    n_iters=20,\n                    use_soar=True,\n                ),\n                IVFIndexParam(\n                    metric_type=MetricType.COSINE,\n                    n_list=150,\n                    n_iters=15,\n                    use_soar=False,\n                ),\n            ]:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=DVECTOR_DIMENSION_1024,\n                        index_param=vector_index_param,\n                    )\n                )\n            else:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=VECTOR_DIMENSION_1024,\n                        index_param=HnswIndexParam(),\n                    )\n                )\n    else:\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            if v in [\"vector_fp16_field\", \"vector_fp32_field\", \"vector_int8_field\"]:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=VECTOR_DIMENSION_1024,\n                        index_param=vector_index_param,\n                    )\n                )\n            else:\n                vectors.append(\n                    VectorSchema(\n                        v,\n                        k,\n                        dimension=VECTOR_DIMENSION_1024,\n                        index_param=HnswIndexParam(),\n                    )\n                )\n\n    return CollectionSchema(\n        name=\"full_collection_new\",\n        fields=fields,\n        vectors=vectors,\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef single_vector_schema(\n    data_type: DataType,\n) -> CollectionSchema:\n    vector_schema = [\n        VectorSchema(\n            DEFAULT_VECTOR_FIELD_NAME[data_type],\n            data_type,\n            DEFAULT_VECTOR_DIMENSION,\n        )\n    ]\n\n    return CollectionSchema(\n        name=\"full_collection\",\n        vectors=vector_schema,\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef single_vector_schema_with_index_param(\n    data_type: DataType, index_param\n) -> CollectionSchema:\n    vector_schema = [\n        VectorSchema(\n            DEFAULT_VECTOR_FIELD_NAME[data_type],\n            data_type,\n            DEFAULT_VECTOR_DIMENSION,\n            index_param,\n        )\n    ]\n\n    return CollectionSchema(\n        name=\"full_collection\",\n        vectors=vector_schema,\n    )\n\n\ndef create_collection_fixture(\n    collection_temp_dir, schema: CollectionSchema, collection_option: CollectionOption\n) -> Generator[Any, Any, Collection]:\n    \"\"\"Common helper function to create and manage collection fixtures.\"\"\"\n    coll = zvec.create_and_open(\n        path=str(collection_temp_dir),\n        schema=schema,\n        option=collection_option,\n    )\n\n    assert coll is not None, \"Failed to create and open collection\"\n    assert coll.path == str(collection_temp_dir)\n    assert coll.schema.name == schema.name\n    assert list(coll.schema.fields) == list(schema.fields)\n    assert list(coll.schema.vectors) == list(schema.vectors)\n    assert coll.option.read_only == collection_option.read_only\n    assert coll.option.enable_mmap == collection_option.enable_mmap\n\n    try:\n        yield coll\n    finally:\n        if hasattr(coll, \"destroy\") and coll is not None:\n            try:\n                coll.destroy()\n            except Exception as e:\n                logging.warning(f\"Warning: failed to destroy collection: {e}\")\n\n\n@pytest.fixture(scope=\"function\")\ndef basic_collection(\n    collection_temp_dir, basic_schema, collection_option\n) -> Generator[Any, Any, Collection]:\n    yield from create_collection_fixture(\n        collection_temp_dir, basic_schema, collection_option\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef collection_option():\n    return CollectionOption(read_only=False, enable_mmap=True)\n\n\n@pytest.fixture(scope=\"function\")\ndef collection_temp_dir(tmp_path_factory):\n    temp_dir = tmp_path_factory.mktemp(\"zvec\")\n    collection_path = temp_dir / \"test_collection_path\"\n    return str(collection_path)\n\n\n@pytest.fixture(scope=\"function\")\ndef full_collection(\n    collection_temp_dir,\n    full_schema,\n    collection_option,\n    nullable: bool = True,\n    has_index: bool = False,\n) -> Generator[Any, Any, Collection]:\n    yield from create_collection_fixture(\n        collection_temp_dir, full_schema, collection_option\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef full_collection_new(\n    collection_temp_dir, full_schema_new, collection_option\n) -> Generator[Any, Any, Collection]:\n    yield from create_collection_fixture(\n        collection_temp_dir, full_schema_new, collection_option\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef full_collection_ivf(\n    collection_temp_dir, full_schema_ivf, collection_option\n) -> Generator[Any, Any, Collection]:\n    yield from create_collection_fixture(\n        collection_temp_dir, full_schema_ivf, collection_option\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef full_collection_1024(\n    collection_temp_dir, full_schema_1024, collection_option\n) -> Generator[Any, Any, Collection]:\n    yield from create_collection_fixture(\n        collection_temp_dir, full_schema_1024, collection_option\n    )\n\n\n@pytest.fixture\ndef sample_field_list(nullable: bool = True, scalar_index_param=None, name_prefix=\"\"):\n    field_list = []\n    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():\n        field_list.append(\n            FieldSchema(\n                f\"{name_prefix}_{v}\" if len(name_prefix) > 0 else v,\n                k,\n                nullable=nullable,\n                index_param=scalar_index_param,\n            )\n        )\n    return field_list\n\n\n@pytest.fixture\ndef sample_vector_list(vector_index_param=None, name_prefix=\"\"):\n    vector_list = []\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        vector_list.append(\n            VectorSchema(\n                f\"{name_prefix}_{v}\" if len(name_prefix) > 0 else v,\n                k,\n                dimension=DEFAULT_VECTOR_DIMENSION,\n                index_param=vector_index_param,\n            )\n        )\n    return vector_list\n"
  },
  {
    "path": "python/tests/detail/params_helper.py",
    "content": "from zvec import (\n    CollectionOption,\n    IndexOption,\n    OptimizeOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    IVFIndexParam,\n    FlatIndexParam,\n    AlterColumnOption,\n    AddColumnOption,\n    DataType,\n    MetricType,\n    QuantizeType,\n)\n\n\nVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP = {\n    DataType.VECTOR_FP32: [\n        HnswIndexParam(),\n        HnswIndexParam(\n            metric_type=MetricType.IP,\n            m=16,\n            ef_construction=100,\n            quantize_type=QuantizeType.INT8,\n        ),\n        HnswIndexParam(\n            metric_type=MetricType.COSINE,\n            m=24,\n            ef_construction=150,\n            quantize_type=QuantizeType.INT4,\n        ),\n        HnswIndexParam(\n            metric_type=MetricType.L2,\n            m=32,\n            ef_construction=200,\n            quantize_type=QuantizeType.FP16,\n        ),\n        FlatIndexParam(),\n        FlatIndexParam(metric_type=MetricType.IP, quantize_type=QuantizeType.INT4),\n        FlatIndexParam(metric_type=MetricType.L2, quantize_type=QuantizeType.INT8),\n        FlatIndexParam(metric_type=MetricType.COSINE, quantize_type=QuantizeType.FP16),\n        IVFIndexParam(),\n        IVFIndexParam(\n            metric_type=MetricType.IP,\n            quantize_type=QuantizeType.INT4,\n            n_list=100,\n            n_iters=10,\n            use_soar=False,\n        ),\n        IVFIndexParam(\n            metric_type=MetricType.L2,\n            quantize_type=QuantizeType.INT8,\n            n_list=200,\n            n_iters=20,\n            use_soar=True,\n        ),\n        IVFIndexParam(\n            metric_type=MetricType.COSINE,\n            quantize_type=QuantizeType.FP16,\n            n_list=150,\n            n_iters=15,\n            use_soar=False,\n        ),\n    ],\n    DataType.VECTOR_FP16: [\n        HnswIndexParam(),\n        FlatIndexParam(),\n        # IVFIndexParam(),\n    ],\n    DataType.VECTOR_INT8: [\n        HnswIndexParam(),\n        FlatIndexParam(),\n        # IVFIndexParam(),\n    ],\n    DataType.SPARSE_VECTOR_FP32: [\n        HnswIndexParam(),\n        FlatIndexParam(),\n        HnswIndexParam(\n            metric_type=MetricType.IP,\n            m=16,\n            ef_construction=100,\n            quantize_type=QuantizeType.FP16,\n        ),\n    ],\n    DataType.SPARSE_VECTOR_FP16: [\n        HnswIndexParam(),\n        FlatIndexParam(),\n        HnswIndexParam(\n            metric_type=MetricType.IP,\n            m=16,\n            ef_construction=100,\n        ),\n    ],\n}\n\nVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS = [\n    (data_type, param)\n    for data_type, params in VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP.items()\n    for param in params\n]\n\nINVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP = {\n    DataType.VECTOR_FP32: [\n        InvertIndexParam(),\n    ],\n    DataType.VECTOR_FP16: [\n        InvertIndexParam(),\n    ],\n    DataType.VECTOR_INT8: [\n        InvertIndexParam(),\n    ],\n    DataType.SPARSE_VECTOR_FP32: [\n        HnswIndexParam(metric_type=MetricType.L2),\n        FlatIndexParam(metric_type=MetricType.COSINE),\n        IVFIndexParam(),\n        InvertIndexParam(),\n    ],\n    DataType.SPARSE_VECTOR_FP16: [\n        HnswIndexParam(metric_type=MetricType.L2),\n        FlatIndexParam(metric_type=MetricType.COSINE),\n        IVFIndexParam(),\n        InvertIndexParam(),\n    ],\n}\n\nINVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS = [\n    (data_type, param)\n    for data_type, params in INVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP.items()\n    for param in params\n]\n\nCOLLECTION_NAME_MAX_LENGTH = 64\n\nCOLLECTION_NAME_VALID_LIST = [\n    \"col\",\n    \"C0llECTION\",\n    \"Collection1\",\n    \"collection_2\",\n    \"123collection-\",\n    \"a\" * COLLECTION_NAME_MAX_LENGTH,\n]\n\nCOLLECTION_NAME_INVALID_LIST = [\n    \"l\",\n    \"1C\",\n    \"\",\n    \" \",\n    None,\n    \"abcdefghijklmnopqrstuvwxzy123456abcdefghijklmnopqrstuvwxzy1234561\",\n    \"test/\",\n    \"!@#$%^&*()test\",\n]\n\nFIELD_NAME_VALID_LIST = [\n    \"1\",\n    \"12\",\n    \"col\",\n    \"ID\",\n    \"name1\",\n    \"Weigt_12-\",\n    \"123age\",\n    \"name_with_underscores\",\n    \"123numeric_start\",\n    \"name-with-dashes\",\n]\n\nFIELD_NAME_INVALID_LIST = [\n    \"\",\n    \" \",\n    None,\n    \"abcdefghijklmnopqrstuvwxzy1234561\",\n    \"test/\",\n    \"!@#$%^&*()test\",\n    \"name@with#special$chars\",\n    \"name with spaces\",\n]\n\nFIELD_LIST_MAX_LENGTH = 1024\nVECTOR_LIST_MAX_LENGTH = 5\nDENSE_VECTOR_MAX_DIMENSION = 20000\nSPARSE_VECTOR_MAX_DIMENSION = 4096\n\nFIELD_VECTOR_LIST_DIMENSION_VALID_LIST = [\n    # field_list_len, vector_list_len, dimension\n    (1, 1, 1),\n    (2, 2, 512),\n    (512, 3, 1024),\n    (1024, 4, 20000),\n]\n\nFIELD_VECTOR_LIST_DIMENSION_INVALID_LIST = [\n    # field_list_len, vector_list_len, dimension\n    (1, 1, 0),\n    (1, 1, -1),\n    (1, 1, \"1\"),\n    (1, 1, 20001),\n]\n\n\nINCOMPATIBLE_CONSTRUCTOR_ERROR_MSG = \"incompatible constructor arguments\"\nSCHEMA_VALIDATE_ERROR_MSG = \"schema validate failed\"\nCREATE_READ_ONLY_ERROR_MSG = \"Unable to create collection with read-only mode\"\nINCOMPATIBLE_FUNCTION_ERROR_MSG = \"incompatible function arguments\"\nINVALID_PATH_ERROR_MSG = \"path validate failed\"\nINDEX_NON_EXISTENT_COLUMN_ERROR_MSG = \"not found in schema\"\nACCESS_DESTROYED_COLLECTION_ERROR_MSG = \"is already destroyed\"\nCOLLECTION_PATH_NOT_EXIST_ERROR_MSG = \"not exist\"\nNOT_SUPPORT_ADD_COLUMN_ERROR_MSG = \"Only support basic numeric data type\"\nNOT_EXIST_COLUMN_TO_DROP_ERROR_MSG = \"Column not exists\"\n"
  },
  {
    "path": "python/tests/detail/support_helper.py",
    "content": "from zvec import (\n    CollectionOption,\n    IndexOption,\n    OptimizeOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    IVFIndexParam,\n    FlatIndexParam,\n    DataType,\n    IndexType,\n    QuantizeType,\n)\n\nSUPPORT_SCALAR_DATA_TYPES = [\n    DataType.BOOL,\n    DataType.FLOAT,\n    DataType.DOUBLE,\n    DataType.INT32,\n    DataType.INT64,\n    DataType.UINT32,\n    DataType.UINT64,\n    DataType.STRING,\n    DataType.ARRAY_BOOL,\n    DataType.ARRAY_FLOAT,\n    DataType.ARRAY_DOUBLE,\n    DataType.ARRAY_INT32,\n    DataType.ARRAY_INT64,\n    DataType.ARRAY_UINT32,\n    DataType.ARRAY_UINT64,\n    DataType.ARRAY_STRING,\n]\n\nDEFAULT_SCALAR_FIELD_NAME = {\n    DataType.BOOL: \"bool_field\",\n    DataType.FLOAT: \"float_field\",\n    DataType.DOUBLE: \"double_field\",\n    DataType.INT32: \"int32_field\",\n    DataType.INT64: \"int64_field\",\n    DataType.UINT32: \"uint32_field\",\n    DataType.UINT64: \"uint64_field\",\n    DataType.STRING: \"string_field\",\n    DataType.ARRAY_BOOL: \"array_bool_field\",\n    DataType.ARRAY_FLOAT: \"array_float_field\",\n    DataType.ARRAY_DOUBLE: \"array_double_field\",\n    DataType.ARRAY_INT32: \"array_int32_field\",\n    DataType.ARRAY_INT64: \"array_int64_field\",\n    DataType.ARRAY_UINT32: \"array_uint32_field\",\n    DataType.ARRAY_UINT64: \"array_uint64_field\",\n    DataType.ARRAY_STRING: \"array_string_field\",\n}\n\nSUPPORT_SCALAR_INDEX_TYPES = [\n    IndexType.INVERT,\n]\n\nSUPPORT_VECTOR_DATA_TYPES = [\n    DataType.VECTOR_FP16,\n    DataType.VECTOR_FP32,\n    DataType.VECTOR_INT8,\n    DataType.SPARSE_VECTOR_FP32,\n    DataType.SPARSE_VECTOR_FP16,\n]\n\nSUPPORT_VECTOR_INDEX_TYPES = [\n    IndexType.FLAT,\n    IndexType.HNSW,\n    IndexType.IVF,\n]\n\nDEFAULT_VECTOR_FIELD_NAME = {\n    DataType.VECTOR_FP16: \"vector_fp16_field\",\n    DataType.VECTOR_FP32: \"vector_fp32_field\",\n    DataType.VECTOR_INT8: \"vector_int8_field\",\n    DataType.SPARSE_VECTOR_FP32: \"sparse_vector_fp32_field\",\n    DataType.SPARSE_VECTOR_FP16: \"sparse_vector_fp16_field\",\n}\n\nDEFAULT_VECTOR_DIMENSION = 128\nVECTOR_DIMENSION_1024 = 4\nSUPPORT_VECTOR_DATA_TYPE_INDEX_MAP = {\n    DataType.VECTOR_FP16: [IndexType.FLAT, IndexType.HNSW, IndexType.IVF],\n    DataType.VECTOR_FP32: [IndexType.FLAT, IndexType.HNSW, IndexType.IVF],\n    DataType.VECTOR_INT8: [IndexType.FLAT, IndexType.HNSW],\n    DataType.SPARSE_VECTOR_FP32: [IndexType.FLAT, IndexType.HNSW],\n    DataType.SPARSE_VECTOR_FP16: [IndexType.FLAT, IndexType.HNSW],\n}\n\nSUPPORT_VECTOR_DATA_TYPE_INDEX_MAP_PARAMS = [\n    (data_type, index_type)\n    for data_type, index_types in SUPPORT_VECTOR_DATA_TYPE_INDEX_MAP.items()\n    for index_type in index_types\n]\n\nDEFAULT_INDEX_PARAMS = {\n    IndexType.FLAT: FlatIndexParam(),\n    IndexType.HNSW: HnswIndexParam(),\n    IndexType.IVF: IVFIndexParam(),\n    IndexType.INVERT: InvertIndexParam(),\n}\n\nSUPPORT_VECTOR_DATA_TYPE_QUANT_MAP = {\n    DataType.VECTOR_FP32: [QuantizeType.FP16, QuantizeType.INT8, QuantizeType.INT4],\n    DataType.SPARSE_VECTOR_FP32: [QuantizeType.FP16],\n}\n\nSUPPORT_ADD_COLUMN_DATA_TYPE = [\n    DataType.INT32,\n    DataType.UINT32,\n    DataType.INT64,\n    DataType.UINT64,\n    DataType.FLOAT,\n    DataType.DOUBLE,\n]\n\nNOT_SUPPORT_ADD_COLUMN_DATA_TYPE = [\n    DataType.BOOL,\n    DataType.STRING,\n    DataType.ARRAY_BOOL,\n    DataType.ARRAY_INT32,\n    DataType.ARRAY_INT64,\n    DataType.ARRAY_UINT32,\n    DataType.ARRAY_UINT64,\n    DataType.ARRAY_FLOAT,\n    DataType.ARRAY_DOUBLE,\n    DataType.ARRAY_STRING,\n]\n"
  },
  {
    "path": "python/tests/detail/test_collection_concurrency.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nimport logging\nimport pytest\nimport threading\nimport numpy as np\nimport zvec\n\nfrom zvec import (\n    CollectionOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    Collection,\n    Doc,\n    DataType,\n    FieldSchema,\n    VectorSchema,\n)\n\n\nclass TestCollectionConcurrency:\n    @pytest.fixture(scope=\"function\")\n    def test_collection(self, tmp_path_factory):\n        \"\"\"Fixture to create a test collection\"\"\"\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n                FieldSchema(\"weight\", DataType.FLOAT, nullable=True),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                ),\n                VectorSchema(\n                    \"sparse\", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()\n                ),\n            ],\n        )\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        coll = zvec.create_and_open(\n            path=str(collection_path),\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        assert coll is not None, \"Failed to create and open collection\"\n\n        yield coll\n\n        # Clean up\n        if hasattr(coll, \"destroy\") and coll is not None:\n            try:\n                coll.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n    def test_concurrent_read_write(self, test_collection: Collection):\n        results = []\n\n        def insert_docs(thread_id):\n            try:\n                docs = [\n                    Doc(\n                        id=f\"{thread_id}_{i}\",\n                        fields={\n                            \"id\": int(f\"{thread_id}{i}\"),\n                            \"name\": f\"thread_{thread_id}_doc_{i}\",\n                            \"weight\": float(i),\n                        },\n                        vectors={\n                            \"dense\": np.random.random(128).tolist(),\n                            \"sparse\": {1: float(i), 2: float(i * 2)},\n                        },\n                    )\n                    for i in range(5)\n                ]\n\n                result = test_collection.insert(docs)\n                results.append((thread_id, \"insert\", len(result)))\n            except Exception as e:\n                results.append((thread_id, \"insert_exception\", str(e)))\n\n        def query_docs(thread_id):\n            try:\n                result = test_collection.query(filter=\"id > 0\", topk=10)\n                results.append((thread_id, \"query\", len(result)))\n            except Exception as e:\n                results.append((thread_id, \"query_exception\", str(e)))\n\n        # Create threads for concurrent operations\n        threads = []\n\n        # Start insert threads\n        for i in range(3):\n            thread = threading.Thread(target=insert_docs, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Start query threads\n        for i in range(3):\n            thread = threading.Thread(target=query_docs, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Wait for all threads to complete\n        for thread in threads:\n            thread.join()\n\n        # Analyze results\n        insert_results = [r for r in results if r[1] == \"insert\"]\n        query_results = [r for r in results if r[1] == \"query\"]\n\n        logging.info(\n            f\"Concurrent read/write results - Inserts: {len(insert_results)}, Queries: {len(query_results)}\"\n        )\n\n        # At least some operations should succeed\n        assert len(insert_results) + len(query_results) > 0\n\n    def test_concurrent_query(self, test_collection: Collection):\n        # First insert some data\n        docs = [\n            Doc(\n                id=f\"{i}\",\n                fields={\"id\": i, \"name\": f\"test_{i}\", \"weight\": float(i)},\n                vectors={\n                    \"dense\": np.random.random(128).tolist(),\n                    \"sparse\": {1: float(i), 2: float(i * 2)},\n                },\n            )\n            for i in range(20)\n        ]\n\n        insert_result = test_collection.insert(docs)\n        assert len(insert_result) == 20\n\n        results = []\n\n        def query_operation(thread_id):\n            \"\"\"Perform query operation from a thread\"\"\"\n            try:\n                result = test_collection.query(filter=f\"id > {thread_id}\", topk=5)\n                results.append((thread_id, \"query\", len(result)))\n            except Exception as e:\n                results.append((thread_id, \"query_exception\", str(e)))\n\n        # Create multiple threads for concurrent queries\n        threads = []\n        for i in range(5):\n            thread = threading.Thread(target=query_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Wait for all threads to complete\n        for thread in threads:\n            thread.join()\n\n        # Analyze results\n        query_results = [r for r in results if r[1] == \"query\"]\n        logging.info(f\"Concurrent query results - Queries: {len(query_results)}\")\n\n        # All query operations should succeed\n        assert len(query_results) == 5\n\n    def test_concurrent_modifications(self, test_collection: Collection):\n        # First insert some data\n        docs = [\n            Doc(\n                id=f\"{i}\",\n                fields={\"id\": i, \"name\": f\"test_{i}\", \"weight\": float(i)},\n                vectors={\n                    \"dense\": np.random.random(128).tolist(),\n                    \"sparse\": {1: float(i), 2: float(i * 2)},\n                },\n            )\n            for i in range(10)\n        ]\n\n        insert_result = test_collection.insert(docs)\n        assert len(insert_result) == 10\n\n        results = []\n\n        def update_operation(thread_id):\n            \"\"\"Perform update operation from a thread\"\"\"\n            try:\n                # Each thread updates different documents\n                update_docs = [\n                    Doc(\n                        id=f\"{i}\",\n                        fields={\n                            \"id\": i,\n                            \"name\": f\"updated_by_thread_{thread_id}\",\n                            \"weight\": float(i + thread_id),\n                        },\n                        vectors={\n                            \"dense\": np.random.random(128).tolist(),\n                            \"sparse\": {1: float(i) + 0.5, 2: float(i * 2) + 0.5},\n                        },\n                    )\n                    for i in range(thread_id * 2, thread_id * 2 + 2)\n                ]\n\n                result = test_collection.update(update_docs)\n                results.append((thread_id, \"update\", len(result)))\n            except Exception as e:\n                results.append((thread_id, \"update_exception\", str(e)))\n\n        def delete_operation(thread_id):\n            \"\"\"Perform delete operation from a thread\"\"\"\n            try:\n                # Each thread deletes different documents\n                delete_ids = [f\"{thread_id * 2 + 2}\", f\"{thread_id * 2 + 3}\"]\n                result = test_collection.delete(delete_ids)\n                results.append((thread_id, \"delete\", len(result)))\n            except Exception as e:\n                results.append((thread_id, \"delete_exception\", str(e)))\n\n        # Create threads for concurrent operations\n        threads = []\n\n        # Start update threads\n        for i in range(3):\n            thread = threading.Thread(target=update_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Start delete threads\n        for i in range(2):\n            thread = threading.Thread(target=delete_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Wait for all threads to complete\n        for thread in threads:\n            thread.join()\n\n        # Analyze results\n        update_results = [r for r in results if r[1] == \"update\"]\n        delete_results = [r for r in results if r[1] == \"delete\"]\n\n        logging.info(\n            f\"Concurrent modification results - Updates: {len(update_results)}, Deletes: {len(delete_results)}\"\n        )\n\n        # At least some operations should succeed\n        assert len(update_results) + len(delete_results) > 0\n\n    def test_read_write_locking(self, test_collection: Collection):\n        # Perform operations that should be thread-safe\n        docs = [\n            Doc(\n                id=f\"{i}\",\n                fields={\"id\": i, \"name\": f\"test_{i}\", \"weight\": float(i)},\n                vectors={\n                    \"dense\": np.random.random(128).tolist(),\n                    \"sparse\": {1: float(i), 2: float(i * 2)},\n                },\n            )\n            for i in range(5)\n        ]\n\n        # Insert data\n        insert_result = test_collection.insert(docs)\n        assert len(insert_result) == 5\n\n        # Concurrent operations should not cause data corruption\n        results = []\n\n        def mixed_operation(thread_id):\n            \"\"\"Perform mixed operations from a thread\"\"\"\n            try:\n                # Mix of read and write operations\n                if thread_id % 2 == 0:\n                    # Read operation\n                    result = test_collection.fetch([f\"{thread_id % 5}\"])\n                    results.append((thread_id, \"read\", len(result)))\n                else:\n                    # Write operation\n                    doc = Doc(\n                        id=f\"{thread_id % 5}\",\n                        fields={\n                            \"id\": thread_id % 5,\n                            \"name\": f\"mixed_op_{thread_id}\",\n                            \"weight\": float(thread_id),\n                        },\n                        vectors={\n                            \"dense\": np.random.random(128).tolist(),\n                            \"sparse\": {1: float(thread_id), 2: float(thread_id * 2)},\n                        },\n                    )\n                    result = test_collection.upsert(doc)\n                    results.append((thread_id, \"write\", len(result)))\n            except Exception as e:\n                results.append((thread_id, \"exception\", str(e)))\n\n        # Create multiple threads\n        threads = []\n        for i in range(10):\n            thread = threading.Thread(target=mixed_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Wait for all threads to complete\n        for thread in threads:\n            thread.join()\n\n        # Verify that the collection is still in a consistent state\n        final_result = test_collection.query()\n        assert len(final_result) >= 0  # Should not crash or return corrupted data\n\n    def test_race_condition_detection(self, test_collection: Collection):\n        # Insert initial data\n        docs = [\n            Doc(\n                id=f\"{i}\",\n                fields={\"id\": i, \"name\": f\"initial_{i}\", \"weight\": float(i)},\n                vectors={\n                    \"dense\": np.random.random(128).tolist(),\n                    \"sparse\": {1: float(i), 2: float(i * 2)},\n                },\n            )\n            for i in range(10)\n        ]\n\n        insert_result = test_collection.insert(docs)\n        assert len(insert_result) == 10\n\n        # Perform many rapid concurrent operations\n        operation_count = 100\n        results = []\n\n        def rapid_operation(op_id):\n            \"\"\"Perform rapid operations\"\"\"\n            try:\n                # Alternate between different types of operations\n                if op_id % 4 == 0:\n                    # Insert\n                    doc = Doc(\n                        id=f\"rapid_{op_id}\",\n                        fields={\n                            \"id\": op_id,\n                            \"name\": f\"rapid_{op_id}\",\n                            \"weight\": float(op_id),\n                        },\n                        vectors={\n                            \"dense\": np.random.random(128).tolist(),\n                            \"sparse\": {1: float(op_id), 2: float(op_id * 2)},\n                        },\n                    )\n                    result = test_collection.insert(doc)\n                    results.append((\"insert\", len(result)))\n                elif op_id % 4 == 1:\n                    # Update\n                    doc = Doc(\n                        id=f\"{op_id % 10}\",\n                        fields={\n                            \"id\": op_id % 10,\n                            \"name\": f\"rapid_update_{op_id}\",\n                            \"weight\": float(op_id),\n                        },\n                        vectors={\n                            \"dense\": np.random.random(128).tolist(),\n                            \"sparse\": {1: float(op_id), 2: float(op_id * 2)},\n                        },\n                    )\n                    result = test_collection.update(doc)\n                    results.append((\"update\", len(result)))\n                elif op_id % 4 == 2:\n                    # Query\n                    result = test_collection.query(filter=f\"id > {op_id % 5}\", topk=3)\n                    results.append((\"query\", len(result)))\n                else:\n                    # Fetch\n                    result = test_collection.fetch([f\"{op_id % 10}\"])\n                    results.append((\"fetch\", len(result)))\n            except Exception as e:\n                results.append((\"exception\", str(e)))\n\n        # Create many threads for rapid concurrent operations\n        threads = []\n        for i in range(operation_count):\n            thread = threading.Thread(target=rapid_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Wait for all threads to complete\n        for thread in threads:\n            thread.join()\n\n        # Verify collection is still functional\n        final_query = test_collection.query()\n        assert len(final_query) >= 0  # Should not be corrupted\n\n        logging.info(\n            f\"Rapid concurrent operations completed - Total operations: {len(results)}\"\n        )\n"
  },
  {
    "path": "python/tests/detail/test_collection_create_and_open.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport threading\nimport os\n\nfrom distance_helper import *\nfrom fixture_helper import *\nfrom doc_helper import *\nfrom params_helper import *\n\n\ndef check_collection_info(\n    coll: Collection, schema: CollectionSchema, option: CollectionOption, path: str\n):\n    assert coll is not None, \"Failed to create and open collection\"\n    assert coll.path == path\n    assert coll.schema.name == schema.name\n    assert list(coll.schema.fields) == list(schema.fields)\n    assert list(coll.schema.vectors) == list(schema.vectors)\n    assert coll.option.read_only == option.read_only\n    assert coll.option.enable_mmap == option.enable_mmap\n\n\ndef check_collection_basic(coll: Collection, optimize: bool = False):\n    schema = coll.schema\n\n    docs = [generate_doc(i, schema) for i in range(10)]\n\n    results = coll.insert(docs=docs)\n    assert len(results) == len(docs)\n    for result in results:\n        assert result.ok()\n\n    assert coll.stats.doc_count == len(docs)\n\n    def check_fetch_query():\n        results = coll.fetch([str(i) for i in range(len(docs))])\n        assert len(results) == len(docs)\n        for i in range(len(docs)):\n            assert str(i) in results\n\n        results = coll.query()\n        assert len(results) == len(docs)\n\n    check_fetch_query()\n\n    if optimize:\n        coll.optimize()\n        check_fetch_query()\n\n\ndef check_collection_full(coll: Collection):\n    test_doc = generate_doc(1, coll.schema)\n\n    insert_result = coll.insert(test_doc)\n    assert insert_result.ok()\n\n    stats = coll.stats\n    assert stats.doc_count == 1\n\n    fetched_docs = coll.fetch(ids=[\"1\"])\n    assert len(fetched_docs) == 1\n    assert \"1\" in fetched_docs\n    assert fetched_docs[\"1\"] is not None\n    assert is_doc_equal(fetched_docs[\"1\"], test_doc, coll.schema)\n\n    query_result = coll.query()\n    assert len(query_result) == 1\n\n    updated_doc = Doc(\n        id=\"1\",\n        fields={\"int32_field\": 1},\n        vectors={\"vector_fp32_field\": [0.2] * 128},\n    )\n    update_result = coll.update(updated_doc)\n    assert update_result.ok()\n\n    upserted_doc = generate_doc(1, coll.schema)\n    upsert_result = coll.upsert(upserted_doc)\n    assert upsert_result.ok()\n\n    # 8. Delete document\n    delete_result = coll.delete(\"1\")\n    assert delete_result.ok()\n\n    # Verify document was deleted\n    stats = coll.stats\n    assert stats.doc_count == 0\n\n\nvalid_collection_options = [\n    # (read_only, enable_mmap)\n    (False, True),\n    (False, False),\n]\ninvalid_collection_options = [\n    # (read_only, enable_mmap)\n    (True, True),\n    (True, False),\n]\nduplicate_names_test = [\n    (\"field1\", \"field1\", \"vector1\", \"vector2\"),\n    (\"field1\", \"field2\", \"vector1\", \"vector1\"),\n    (\n        \"shared_name1\",\n        \"shared_name2\",\n        \"shared_name1\",\n        \"shared_name2\",\n    ),\n]\nlong_names = [\n    \"a\" * 100,  # 100 characters\n    \"b\" * 200,  # 200 characters\n]\n\nvalid_path_list = [\n    \"/tmp/nonexistent/directory/test_collection\",\n    \"test/collection/with/slashes\",\n]\ninvalid_path_list = [\n    \"invalid:path\",\n    \"\",\n    \"test_collection_with_spaces \",\n    \"test@#$%collection\",\n]\n\n\nclass TestCreateAndOpen:\n    @pytest.mark.parametrize(\"collection_name\", COLLECTION_NAME_VALID_LIST)\n    def test_valid_collection_name(\n        self,\n        collection_temp_dir,\n        collection_name,\n        collection_option,\n        sample_field_list,\n        sample_vector_list,\n    ):\n        collection_schema = zvec.CollectionSchema(\n            name=collection_name,\n            fields=sample_field_list,\n            vectors=sample_vector_list,\n        )\n\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        check_collection_info(\n            coll, collection_schema, collection_option, collection_temp_dir\n        )\n        check_collection_basic(coll)\n\n        coll.destroy()\n\n    @pytest.mark.parametrize(\"collection_name\", COLLECTION_NAME_INVALID_LIST)\n    def test_invalid_collection_name(\n        self,\n        collection_temp_dir,\n        collection_name,\n        collection_option,\n        sample_field_list,\n        sample_vector_list,\n    ):\n        with pytest.raises(Exception) as exc_info:\n            collection_schema = zvec.CollectionSchema(\n                name=collection_name,\n                fields=sample_field_list,\n                vectors=sample_vector_list,\n            )\n\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=collection_schema,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    @pytest.mark.parametrize(\"name_prefix\", FIELD_NAME_VALID_LIST)\n    def test_valid_field_vector_name(\n        self,\n        collection_temp_dir,\n        collection_option,\n        name_prefix,\n        sample_field_list,\n        sample_vector_list,\n    ):\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=sample_field_list,\n            vectors=sample_vector_list,\n        )\n\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        check_collection_info(\n            coll, collection_schema, collection_option, collection_temp_dir\n        )\n        check_collection_basic(coll)\n\n        coll.destroy()\n\n    @pytest.mark.parametrize(\"field_name\", FIELD_NAME_INVALID_LIST)\n    def test_invalid_field_name(\n        self, collection_temp_dir, collection_option, field_name\n    ):\n        with pytest.raises(Exception) as exc_info:\n            field_list = [FieldSchema(field_name, DataType.STRING)]\n            vector_list = [\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ]\n\n            collection_schema = zvec.CollectionSchema(\n                name=\"collection_name\", fields=field_list, vectors=vector_list\n            )\n\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=collection_schema,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    @pytest.mark.parametrize(\"vector_name\", FIELD_NAME_INVALID_LIST)\n    def test_invalid_vector_name(\n        self, collection_temp_dir, collection_option, vector_name\n    ):\n        with pytest.raises(Exception) as exc_info:\n            field_list = [\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                )\n            ]\n            vector_list = [\n                VectorSchema(vector_name, DataType.VECTOR_FP32, dimension=128)\n            ]\n\n            collection_schema = zvec.CollectionSchema(\n                name=\"collection_name\", fields=field_list, vectors=vector_list\n            )\n\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=collection_schema,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    @pytest.mark.parametrize(\n        \"field_list_len,vector_list_len,dimension\",\n        FIELD_VECTOR_LIST_DIMENSION_VALID_LIST,\n    )\n    def test_valid_field_vector_size_dimension(\n        self,\n        collection_temp_dir,\n        collection_option,\n        field_list_len,\n        vector_list_len,\n        dimension,\n    ):\n        field_list = []\n        vector_list = []\n        for i in range(0, field_list_len):\n            field_list.append(\n                FieldSchema(\"id_\" + str(i), DataType.INT64, nullable=True)\n            )\n\n        for i in range(0, vector_list_len):\n            vector_list.append(\n                VectorSchema(\n                    \"dense_vector_\" + str(i),\n                    DataType.VECTOR_FP32,\n                    dimension=dimension,\n                    index_param=HnswIndexParam(),\n                )\n            )\n\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_dense_vector_list\", fields=field_list, vectors=vector_list\n        )\n\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        check_collection_info(\n            coll, collection_schema, collection_option, collection_temp_dir\n        )\n        check_collection_basic(coll)\n\n        coll.destroy()\n\n    @pytest.mark.parametrize(\n        \"field_list_len,vector_list_len,dimension\",\n        FIELD_VECTOR_LIST_DIMENSION_INVALID_LIST,\n    )\n    def test_invalid_field_vector_size_dimension(\n        self,\n        collection_temp_dir,\n        collection_option,\n        vector_list_len,\n        field_list_len,\n        dimension,\n    ):\n        with pytest.raises(Exception) as exc_info:\n            field_list = []\n            vector_list = []\n            for i in range(0, field_list_len):\n                field_list.append(\n                    FieldSchema(\n                        \"id_\" + str(i),\n                        DataType.INT64,\n                        nullable=False,\n                    )\n                )\n\n            for i in range(0, vector_list_len):\n                vector_list.append(\n                    VectorSchema(\n                        \"dense_vector_\" + str(i),\n                        DataType.VECTOR_FP32,\n                        dimension=dimension,\n                        index_param=HnswIndexParam(),\n                    )\n                )\n\n            collection_schema = zvec.CollectionSchema(\n                name=\"test_dense_vector_list\", fields=field_list, vectors=vector_list\n            )\n\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=collection_schema,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    def test_valid_single_vector_field_construction(\n        self, collection_temp_dir, collection_option\n    ):\n        field = FieldSchema(\n            \"id\",\n            DataType.INT64,\n            nullable=True,\n            index_param=InvertIndexParam(enable_range_optimization=True),\n        )\n\n        vector = VectorSchema(\n            \"dense_vector\",\n            DataType.VECTOR_FP32,\n            dimension=128,\n            index_param=HnswIndexParam(),\n        )\n\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_single_dense_vector_non_list\",\n            fields=field,\n            vectors=vector,  # Non-list form\n        )\n\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        check_collection_info(\n            coll, collection_schema, collection_option, collection_temp_dir\n        )\n        check_collection_basic(coll)\n        coll.destroy()\n\n    def test_collection_concurrent_create(\n        self, collection_temp_dir, basic_schema, collection_option\n    ):\n        results = []\n        errors = []\n        lock = threading.Lock()\n\n        # Function to be executed by each thread\n        def create_collection_thread(thread_id):\n            try:\n                coll = zvec.create_and_open(\n                    path=collection_temp_dir,\n                    schema=basic_schema,\n                    option=collection_option,\n                )\n                with lock:\n                    results.append((thread_id, coll))\n            except Exception as e:\n                with lock:\n                    errors.append((thread_id, str(e)))\n\n        threads = []\n        for i in range(5):\n            thread = threading.Thread(target=create_collection_thread, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        for thread in threads:\n            thread.join()\n        assert len(results) == 1, (\n            f\"Expected exactly one successful creation, but got {len(results)}\"\n        )\n        assert len(errors) == 4, (\n            f\"Expected exactly four failures, but got {len(errors)}\"\n        )\n\n        successful_thread_id, successful_collection = results[0]\n        assert successful_collection is not None, (\n            \"Successful creation should return a valid collection\"\n        )\n        assert successful_collection.path == collection_temp_dir, (\n            \"Collection path mismatch\"\n        )\n\n    def test_create_open_loop(\n        self, collection_temp_dir, collection_option, full_schema\n    ):\n        for cycle in range(10):\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=full_schema,\n                option=collection_option,\n            )\n            assert coll is not None, (\n                f\"Failed to create and open collection in cycle {cycle}\"\n            )\n            assert coll.path == collection_temp_dir, (\n                f\"Collection path mismatch in cycle {cycle}\"\n            )\n\n            del coll\n\n            reopened_coll = zvec.open(\n                path=collection_temp_dir, option=collection_option\n            )\n            assert reopened_coll is not None, (\n                f\"Failed to reopen collection in cycle {cycle}\"\n            )\n            assert reopened_coll.path == collection_temp_dir, (\n                f\"Reopened collection path mismatch in cycle {cycle}\"\n            )\n\n            check_collection_full(reopened_coll)\n\n            reopened_coll.destroy()\n\n    @pytest.mark.parametrize(\n        \"data_type, index_param\", VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS\n    )\n    def test_valid_vector_index_params(\n        self,\n        data_type,\n        index_param,\n        single_vector_schema_with_index_param,\n        collection_temp_dir,\n        collection_option,\n    ):\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=single_vector_schema_with_index_param,\n            option=collection_option,\n        )\n\n        check_collection_info(\n            coll,\n            single_vector_schema_with_index_param,\n            collection_option,\n            collection_temp_dir,\n        )\n\n        check_collection_basic(coll, True)\n\n    @pytest.mark.parametrize(\n        \"data_type, index_param\", INVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS\n    )\n    def test_invalid_vector_index_params(\n        self,\n        data_type,\n        index_param,\n        single_vector_schema_with_index_param,\n        collection_temp_dir,\n        collection_option,\n    ):\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=single_vector_schema_with_index_param,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    def test_open_concurrent_same_path(self, tmp_path_factory, collection_option):\n        \"\"\"Test concurrent opening of the same collection path.\n\n        - Multi-threading concurrency: 5 threads simultaneously open the same collection\n        - Result verification: Verify that only one can open successfully, others must fail\n        \"\"\"\n        # Create a temporary directory and path for the collection\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"concurrent_open_test_collection\"\n\n        # First, create a collection that we'll try to open concurrently\n        field_list = [\n            FieldSchema(\n                \"id\",\n                DataType.INT64,\n                nullable=False,\n                index_param=InvertIndexParam(enable_range_optimization=True),\n            ),\n            FieldSchema(\n                \"name\", DataType.STRING, nullable=False, index_param=InvertIndexParam()\n            ),\n        ]\n\n        vector_list = [\n            VectorSchema(\n                \"dense_vector\",\n                DataType.VECTOR_FP32,\n                dimension=128,\n                index_param=HnswIndexParam(),\n            )\n        ]\n\n        collection_schema = zvec.CollectionSchema(\n            name=\"concurrent_open_test_collection\",\n            fields=field_list,\n            vectors=vector_list,\n        )\n\n        # Create the collection first\n        coll = zvec.create_and_open(\n            path=str(collection_path),\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        # Close the collection so we can test opening it\n        if hasattr(coll, \"close\") and coll is not None:\n            coll.close()\n\n        # Shared variables to collect results from threads\n        results = []\n        errors = []\n\n        # Lock for thread-safe operations\n        lock = threading.Lock()\n        # Clean up the created collection reference\n        del coll\n\n        # Function to be executed by each thread\n        def open_collection_thread(thread_id):\n            try:\n                reopened_coll = zvec.open(\n                    path=str(collection_path), option=collection_option\n                )\n                with lock:\n                    results.append((thread_id, reopened_coll))\n                # Clean up the collection if opened successfully\n                if hasattr(reopened_coll, \"close\") and reopened_coll is not None:\n                    reopened_coll.close()\n            except Exception as e:\n                with lock:\n                    errors.append((thread_id, str(e)))\n\n        # Create and start 5 threads\n        threads = []\n        for i in range(5):\n            thread = threading.Thread(target=open_collection_thread, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Wait for all threads to complete\n        for thread in threads:\n            thread.join()\n\n        # Verify results:\n        # 1. Only one open should succeed (exactly one collection in results)\n        # 2. Others should fail (4 errors in errors)\n        assert len(results) == 1, (\n            f\"Expected exactly one successful open, but got {len(results)}\"\n        )\n        assert len(errors) == 4, (\n            f\"Expected exactly four failures, but got {len(errors)}\"\n        )\n\n        # Additional verification: check that the successful open has a valid collection\n        successful_thread_id, successful_collection = results[0]\n        assert successful_collection is not None, (\n            \"Successful open should return a valid collection\"\n        )\n        assert successful_collection.path == str(collection_path), (\n            \"Collection path mismatch\"\n        )\n\n    @pytest.mark.parametrize(\"read_only,enable_mmap\", valid_collection_options)\n    def test_valid_option(\n        self, collection_temp_dir, basic_schema, read_only, enable_mmap\n    ):\n        option = CollectionOption(read_only=read_only, enable_mmap=enable_mmap)\n\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=basic_schema,\n            option=option,\n        )\n\n        check_collection_info(coll, basic_schema, option, collection_temp_dir)\n        check_collection_basic(coll)\n\n        coll.destroy()\n\n    def test_valid_none_option(self, collection_temp_dir, basic_schema):\n        zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=basic_schema,\n            option=None,\n        )\n\n    @pytest.mark.parametrize(\"read_only,enable_mmap\", invalid_collection_options)\n    def test_invalid_option(\n        self, collection_temp_dir, basic_schema, read_only, enable_mmap\n    ):\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=basic_schema,\n                option=CollectionOption(read_only=read_only, enable_mmap=enable_mmap),\n            )\n\n        assert CREATE_READ_ONLY_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    @pytest.mark.parametrize(\n        \"field_name1,field_name2,vector_name1,vector_name2\",\n        duplicate_names_test,\n    )\n    def test_duplicate_field_names(\n        self,\n        collection_temp_dir,\n        collection_option,\n        field_name1,\n        field_name2,\n        vector_name1,\n        vector_name2,\n    ):\n        with pytest.raises(Exception) as exc_info:\n            collection_schema = zvec.CollectionSchema(\n                name=\"test_collection\",\n                fields=[\n                    FieldSchema(\n                        field_name1,\n                        DataType.INT64,\n                        nullable=False,\n                        index_param=InvertIndexParam(enable_range_optimization=True),\n                    ),\n                    FieldSchema(\n                        field_name2,\n                        DataType.INT64,\n                        nullable=False,\n                        index_param=InvertIndexParam(enable_range_optimization=True),\n                    ),\n                ],\n                vectors=[\n                    VectorSchema(\n                        vector_name1,\n                        DataType.VECTOR_FP32,\n                        dimension=128,\n                        index_param=HnswIndexParam(),\n                    ),\n                    VectorSchema(\n                        vector_name2,\n                        DataType.VECTOR_FP32,\n                        dimension=128,\n                        index_param=HnswIndexParam(),\n                    ),\n                ],\n            )\n\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=collection_schema,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    @pytest.mark.parametrize(\"long_name\", long_names)\n    def test_invalid_long_field_names(\n        self, collection_option, collection_temp_dir, long_name\n    ):\n        collection_schema = zvec.CollectionSchema(\n            name=long_name,\n            fields=[\n                FieldSchema(\n                    long_name + \"_field\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    long_name + \"_vector\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ],\n        )\n\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=collection_schema,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    def test_invalid_empty_fields_and_vectors(\n        self, collection_temp_dir, collection_option\n    ):\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=[],  # Empty fields\n            vectors=[],  # Empty vectors\n        )\n\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.create_and_open(\n                path=collection_temp_dir,\n                schema=collection_schema,\n                option=collection_option,\n            )\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n\n    @pytest.mark.parametrize(\"valid_path\", valid_path_list)\n    def test_valid_path(self, basic_schema, collection_option, valid_path):\n        if os.path.exists(valid_path):\n            import shutil\n\n            shutil.rmtree(valid_path)\n\n        coll = zvec.create_and_open(\n            path=valid_path, schema=basic_schema, option=collection_option\n        )\n\n        check_collection_info(coll, basic_schema, collection_option, valid_path)\n\n        coll.destroy()\n\n    @pytest.mark.parametrize(\"invalid_path\", invalid_path_list)\n    def test_invalid_path(self, basic_schema, collection_option, invalid_path):\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.create_and_open(\n                path=invalid_path, schema=basic_schema, option=collection_option\n            )\n\n        assert INVALID_PATH_ERROR_MSG in str(exc_info.value), str(exc_info.value)\n"
  },
  {
    "path": "python/tests/detail/test_collection_ddl.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nfrom distance_helper import *\nfrom fixture_helper import *\nfrom doc_helper import *\nfrom params_helper import *\n\n\nclass TestDDL:\n    def test_collection_stats(self, basic_collection: Collection):\n        assert basic_collection.stats is not None\n        stats = basic_collection.stats\n        assert stats.doc_count == 0\n        assert len(stats.index_completeness) == 2\n        assert stats.index_completeness[\"dense\"] == 1\n        assert stats.index_completeness[\"sparse\"] == 1\n\n    def test_collection_destroy(\n        self, basic_collection: Collection, collection_temp_dir, collection_option\n    ):\n        doc = generate_doc(1, basic_collection.schema)\n\n        result = basic_collection.insert(doc)\n        assert bool(result)\n        assert result.ok()\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        basic_collection.destroy()\n\n        with pytest.raises(Exception) as exc_info:\n            stats = basic_collection.stats\n        assert ACCESS_DESTROYED_COLLECTION_ERROR_MSG in str(exc_info.value)\n\n        with pytest.raises(Exception) as exc_info:\n            zvec.open(path=collection_temp_dir, option=collection_option)\n        assert COLLECTION_PATH_NOT_EXIST_ERROR_MSG in str(exc_info.value)\n\n    def test_collection_flush(self, basic_collection: Collection):\n        doc = generate_doc(1, basic_collection.schema)\n\n        result = basic_collection.insert(doc)\n        assert bool(result)\n        assert result.ok()\n\n        basic_collection.flush()\n\n        fetched_docs = basic_collection.fetch([\"1\"])\n        assert \"1\" in fetched_docs\n        assert fetched_docs[\"1\"].id == \"1\"\n\n\nclass TestIndexDDL:\n    @pytest.mark.parametrize(\"field_name\", DEFAULT_SCALAR_FIELD_NAME.values())\n    @pytest.mark.parametrize(\"index_type\", SUPPORT_SCALAR_INDEX_TYPES)\n    def test_scalar_index_operation(\n        self,\n        full_collection: Collection,\n        field_name: str,\n        index_type: IndexType,\n    ):\n        # INSERT 0~5 Doc\n        docs = [generate_doc(i, full_collection.schema) for i in range(5)]\n\n        result = full_collection.insert(docs)\n        assert len(result) == 5\n        for item in result:\n            assert item.ok()\n\n        stats = full_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 5\n\n        if field_name in [\"bool_field\"]:\n            query_filter = f\"{field_name} = true\"\n        elif field_name in [\"double_field\", \"float_field\"]:\n            query_filter = f\"{field_name} >= 3.0\"\n        elif field_name in [\n            \"int32_field\",\n            \"int64_field\",\n            \"uint32_field\",\n            \"uint64_field\",\n        ]:\n            query_filter = f\"{field_name} >= 30\"\n        elif field_name in [\"string_field\"]:\n            query_filter = f\"{field_name} >= 'test_3'\"\n        elif field_name in [\"array_bool_field\"]:\n            query_filter = f\"{field_name} contain_any (false)\"\n        elif field_name in [\"array_double_field\", \"array_float_field\"]:\n            query_filter = f\"{field_name} contain_any (3.0, 4.0)\"\n        elif field_name in [\n            \"array_int64_field\",\n            \"array_int32_field\",\n            \"array_uint64_field\",\n            \"array_uint32_field\",\n        ]:\n            query_filter = f\"{field_name} contain_any (3, 4)\"\n        elif field_name == \"array_string_field\":\n            query_filter = f\"{field_name} contain_any ('test_3', 'test_4')\"\n        else:\n            assert False, f\"Unsupported field type for index creation: {field_name}\"\n\n        query_result_before = full_collection.query(filter=query_filter, topk=10)\n\n        if index_type not in DEFAULT_INDEX_PARAMS:\n            pytest.fail(f\"Unsupported index type for index creation: {index_type}\")\n        index_param = DEFAULT_INDEX_PARAMS[index_type]\n\n        full_collection.create_index(\n            field_name=field_name, index_param=index_param, option=IndexOption()\n        )\n        stats_after_create = full_collection.stats\n        assert stats_after_create is not None\n        assert stats_after_create.doc_count == 5\n\n        query_result_after = full_collection.query(filter=query_filter, topk=10)\n\n        assert len(query_result_before) == len(query_result_after), (\n            f\"Query result count mismatch for {field_name} with index type {index_type}: before={len(query_result_before)}, after={len(query_result_after)}\"\n        )\n\n        before_ids = set(doc.id for doc in query_result_before)\n        after_ids = set(doc.id for doc in query_result_after)\n        assert before_ids == after_ids, (\n            f\"Query result IDs mismatch for {field_name} with index type {index_type}: before={before_ids}, after={after_ids}\"\n        )\n\n        # INSERT 5~8 Doc\n        new_docs = [generate_doc(i, full_collection.schema) for i in range(5, 8)]\n\n        result = full_collection.insert(new_docs)\n        assert len(result) == 3\n        for item in result:\n            assert item.ok()\n\n        stats_after_insert1 = full_collection.stats\n        assert stats_after_insert1 is not None\n        assert stats_after_insert1.doc_count == 8\n\n        fetched_docs = full_collection.fetch([f\"{i}\" for i in range(5, 8)])\n        assert len(fetched_docs) == 3\n\n        for i in range(5, 8):\n            doc_id = f\"{i}\"\n            assert doc_id in fetched_docs\n\n        query_result = full_collection.query(filter=query_filter, topk=20)\n        assert len(query_result) >= len(query_result_before)\n\n        full_collection.drop_index(field_name=field_name)\n\n        # Insert 8~10 Doc\n        more_docs = [generate_doc(i, full_collection.schema) for i in range(8, 10)]\n\n        result = full_collection.insert(more_docs)\n        assert len(result) == 2\n        for item in result:\n            assert item.ok()\n\n        stats_after_insert2 = full_collection.stats\n        assert stats_after_insert2 is not None\n        assert stats_after_insert2.doc_count == 10\n\n        fetched_docs = full_collection.fetch([f\"{i}\" for i in range(8, 10)])\n        assert len(fetched_docs) == 2\n\n        for i in range(8, 10):\n            doc_id = f\"{i}\"\n            assert doc_id in fetched_docs\n\n        query_result = full_collection.query(filter=query_filter, topk=20)\n        assert len(query_result) >= len(query_result_before)\n\n        final_stats = full_collection.stats\n        assert final_stats is not None\n        assert final_stats.doc_count == 10\n        full_collection.destroy()\n\n    @pytest.mark.parametrize(\"field_name\", DEFAULT_SCALAR_FIELD_NAME.values())\n    @pytest.mark.parametrize(\"index_type\", SUPPORT_SCALAR_INDEX_TYPES)\n    def test_duplicate_create_index(\n        self, full_collection: Collection, field_name: str, index_type: IndexType\n    ):\n        docs = [generate_doc(i, full_collection.schema) for i in range(10)]\n\n        result = full_collection.insert(docs)\n        assert bool(result)\n        for item in result:\n            assert item.ok()\n\n        stats = full_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 10\n\n        if field_name in [\"bool_field\"]:\n            query_filter = f\"{field_name} = true\"\n        elif field_name in [\"double_field\", \"float_field\"]:\n            query_filter = f\"{field_name} >= 3.0\"\n        elif field_name in [\n            \"int32_field\",\n            \"int64_field\",\n            \"uint32_field\",\n            \"uint64_field\",\n        ]:\n            query_filter = f\"{field_name} >= 30\"\n        elif field_name in [\"string_field\"]:\n            query_filter = f\"{field_name} >= 'test_3'\"\n        elif field_name in [\"array_bool_field\"]:\n            query_filter = f\"{field_name} contain_any (false)\"\n        elif field_name in [\"array_double_field\", \"array_float_field\"]:\n            query_filter = f\"{field_name} contain_any (3.0, 4.0)\"\n        elif field_name in [\n            \"array_int64_field\",\n            \"array_int32_field\",\n            \"array_uint64_field\",\n            \"array_uint32_field\",\n        ]:\n            query_filter = f\"{field_name} contain_any (3, 4)\"\n        elif field_name == \"array_string_field\":\n            query_filter = f\"{field_name} contain_any ('test_3', 'test_4')\"\n        else:\n            assert False, f\"Unsupported field type for index creation: {field_name}\"\n\n        query_result_before = full_collection.query(filter=query_filter, topk=5)\n\n        if index_type not in DEFAULT_INDEX_PARAMS:\n            pytest.fail(f\"Unsupported index type for index creation: {index_type}\")\n        index_param = DEFAULT_INDEX_PARAMS[index_type]\n\n        full_collection.create_index(\n            field_name=field_name, index_param=index_param, option=IndexOption()\n        )\n\n        query_result_after = full_collection.query(filter=query_filter, topk=5)\n\n        assert len(query_result_before) == len(query_result_after), (\n            f\"Query result count mismatch: before={len(query_result_before)}, after={len(query_result_after)}\"\n        )\n\n        before_ids = set(doc.id for doc in query_result_before)\n        after_ids = set(doc.id for doc in query_result_after)\n        assert before_ids == after_ids, (\n            f\"Query result IDs mismatch: before={before_ids}, after={after_ids}\"\n        )\n\n        full_collection.create_index(\n            field_name=field_name, index_param=index_param, option=IndexOption()\n        )\n\n    def test_optimize(self, full_collection: Collection):\n        docs = [generate_doc(i, full_collection.schema) for i in range(10)]\n\n        result = full_collection.insert(docs)\n        assert bool(result)\n        for item in result:\n            assert item.ok()\n\n        stats = full_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 10\n\n        full_collection.optimize(option=OptimizeOption())\n\n        fetched_docs = full_collection.fetch([\"1\"])\n        assert \"1\" in fetched_docs\n        assert fetched_docs[\"1\"].id == \"1\"\n\n    @pytest.mark.parametrize(\n        \"vector_type, index_type\", SUPPORT_VECTOR_DATA_TYPE_INDEX_MAP_PARAMS\n    )\n    def test_vector_index_operation(\n        self,\n        full_collection: Collection,\n        vector_type: DataType,\n        index_type: IndexType,\n    ):\n        vector_field_name = DEFAULT_VECTOR_FIELD_NAME[vector_type]\n\n        docs = [generate_doc(i, full_collection.schema) for i in range(5)]\n\n        result = full_collection.insert(docs)\n        assert len(result) == 5, (\n            f\"Expected 5 insertion results, got {len(result)} for vector type {vector_type} and index type {index_type}\"\n        )\n        for i, item in enumerate(result):\n            assert item.ok(), (\n                f\"Before create_index,result={result},Insertion result {i} is not OK for vector type {vector_type} and index type {index_type} and result={result}\"\n            )\n\n        stats = full_collection.stats\n        assert stats is not None, (\n            f\"stats is None for vector type {vector_type} and index type {index_type}\"\n        )\n        assert stats.doc_count == 5, (\n            f\"doc_count!=5 for vector type {vector_type} and index type {index_type}\"\n        )\n\n        if index_type not in DEFAULT_INDEX_PARAMS:\n            pytest.fail(\n                f\"Unsupported index type {index_type} for vector type {vector_type} in test_vector_all_data_types_index_create_drop_validation\"\n            )\n        index_param = DEFAULT_INDEX_PARAMS[index_type]\n\n        full_collection.create_index(\n            field_name=vector_field_name,\n            index_param=index_param,\n            option=IndexOption(),\n        )\n\n        stats_after_create = full_collection.stats\n        assert stats_after_create is not None, (\n            f\"stats_after_create_index is None for vector type {vector_type} and index type {index_type}\"\n        )\n\n        new_docs = [generate_doc(i, full_collection.schema) for i in range(5, 8)]\n\n        result = full_collection.insert(new_docs)\n        assert len(result) == 3, (\n            f\"Expected 3 insertion results, got {len(result)} for vector type {vector_type} and index type {index_type}\"\n        )\n        for i, item in enumerate(result):\n            assert item.ok(), (\n                f\"Before drop_index,result={result},BInsertion result {i} is not OK for vector type {vector_type} and index type {index_type} and \"\n            )\n\n        stats_after_insert1 = full_collection.stats\n        assert stats_after_insert1 is not None, (\n            f\"stats_after_insert1 is None for vector type {vector_type} and index type {index_type}\"\n        )\n        assert stats_after_insert1.doc_count == 8, (\n            f\"Expected 8 documents, got {stats_after_insert1.doc_count} for vector type {vector_type} and index type {index_type}\"\n        )\n\n        fetched_docs = full_collection.fetch([f\"{i}\" for i in range(5, 8)])\n        assert len(fetched_docs) == 3, (\n            f\"Expected 3 fetched documents, got {len(fetched_docs)} for vector type {vector_type} and index type {index_type}\"\n        )\n\n        for i in range(5, 8):\n            doc_id = f\"{i}\"\n            assert doc_id in fetched_docs, (\n                f\"Document ID {doc_id} not found in fetched results for vector type {vector_type} and index type {index_type}\"\n            )\n            assert fetched_docs[doc_id].id == doc_id, (\n                f\"Document {doc_id} has incorrect ID field value for vector type {vector_type} and index type {index_type}\"\n            )\n\n        full_collection.drop_index(field_name=vector_field_name)\n\n        more_docs = [generate_doc(i, full_collection.schema) for i in range(8, 10)]\n        result = full_collection.insert(more_docs)\n        assert len(result) == 2, (\n            f\"Expected 2 insertion results, got {len(result)} for vector type {vector_type} and index type {index_type}\"\n        )\n        for i, item in enumerate(result):\n            assert item.ok(), (\n                f\"After drop_index,Insertion result {i} is not OK for vector type {vector_type} and index type {index_type} and result={result}\"\n            )\n\n        # Verify document count after second insertion\n        stats_after_insert2 = full_collection.stats\n        assert stats_after_insert2 is not None, (\n            f\"stats_after_insert2 is None for vector type {vector_type} and index type {index_type}\"\n        )\n        assert stats_after_insert2.doc_count == 10, (\n            f\"Expected 10 documents, got {stats_after_insert2.doc_count} for vector type {vector_type} and index type {index_type}\"\n        )\n\n        # Fetch data\n        fetched_docs = full_collection.fetch([f\"{i}\" for i in range(8, 10)])\n        assert len(fetched_docs) == 2, (\n            f\"Expected 2 fetched documents, got {len(fetched_docs)} for vector type {vector_type} and index type {index_type}\"\n        )\n\n        # Verify fetched documents have correct data\n        for i in range(8, 10):\n            doc_id = f\"{i}\"\n            assert doc_id in fetched_docs, (\n                f\"Document ID {doc_id} not found in fetched results for vector type {vector_type} and index type {index_type}\"\n            )\n            assert fetched_docs[doc_id].id == doc_id, (\n                f\"Document {doc_id} has incorrect ID field value for vector type {vector_type} and index type {index_type}\"\n            )\n\n        # Final verification\n        final_stats = full_collection.stats\n        assert final_stats is not None, (\n            f\"final_stats is None for vector type {vector_type} and index type {index_type}\"\n        )\n        assert final_stats.doc_count == 10, (\n            f\"Expected 10 documents, got {final_stats.doc_count} for vector type {vector_type} and index type {index_type}\"\n        )\n        full_collection.destroy()\n\n    @staticmethod\n    def create_collection(\n        collection_path, collection_option: CollectionOption\n    ) -> Collection:\n        schema = CollectionSchema(\n            name=\"test_collection_invalid_vector_index\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=True,\n                    index_param=InvertIndexParam(),\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                ),\n            ],\n        )\n        coll = zvec.create_and_open(\n            path=collection_path, schema=schema, option=collection_option\n        )\n        assert coll is not None, \"Failed to create and open collection\"\n        return coll\n\n    @staticmethod\n    def check_error_message(exc_info, invalid_name):\n        if type(invalid_name) is str:\n            assert INDEX_NON_EXISTENT_COLUMN_ERROR_MSG in str(exc_info.value), (\n                \"Error message is unreasonable: e=\" + str(exc_info.value)\n            )\n        else:\n            assert INCOMPATIBLE_FUNCTION_ERROR_MSG in str(exc_info.value), (\n                \"Error message is unreasonable: e=\" + str(exc_info.value)\n            )\n\n    @pytest.mark.parametrize(\n        \"invalid_field_name,invalid_vector_name\",\n        [\n            (\"\", \"\"),  # Empty string\n            (\" \", \" \"),  # Space only\n            (\"v\" * 33, \"v\" * 33),  # Too long (33 characters, exceeds 32)\n            (\"vector name\", \"vector_name\"),  # Contains space\n            (\"vector@name\", \"vector@name\"),  # Contains special character\n            (\"vector/name\", \"vector/name\"),  # Contains slash\n            (\"vector\\\\name\", \"vector\\\\name\"),  # Contains backslash\n            (\"vector.name\", \"vector.name\"),  # Contains dot\n            (\"vector$data\", \"vector$data\"),  # Contains dollar sign\n            (\"vector+name\", \"vector+name\"),  # Contains plus sign\n            (\"vector=name\", \"vector=name\"),  # Contains equals sign\n            (None, None),  # None value,\n            (1, 1),\n            (1.1, 1.1),\n        ],\n    )\n    def test_invalid_field_and_vector_name(\n        self,\n        collection_temp_dir,\n        collection_option: CollectionOption,\n        invalid_field_name: Any,\n        invalid_vector_name: Any,\n    ):\n        coll = self.create_collection(collection_temp_dir, collection_option)\n        with pytest.raises(Exception) as exc_info:\n            coll.create_index(\n                field_name=invalid_vector_name,\n                index_param=HnswIndexParam(),\n                option=IndexOption(),\n            )\n        self.check_error_message(exc_info, invalid_vector_name)\n        with pytest.raises(Exception) as exc_info:\n            coll.create_index(\n                field_name=invalid_field_name,\n                index_param=InvertIndexParam(),\n                option=IndexOption(),\n            )\n        self.check_error_message(exc_info, invalid_field_name)\n        coll.destroy()\n        coll = self.create_collection(collection_temp_dir, collection_option)\n        with pytest.raises(Exception) as exc_info:\n            coll.drop_index(field_name=invalid_vector_name)\n        self.check_error_message(exc_info, invalid_vector_name)\n        with pytest.raises(Exception) as exc_info:\n            coll.drop_index(field_name=invalid_field_name)\n        self.check_error_message(exc_info, invalid_field_name)\n        coll.destroy()\n\n    @pytest.mark.parametrize(\n        \"field_name,vector_name\",\n        [\n            (\"2\", \"3\"),\n            (\"col\", \"co1\"),\n            (\"ID\", \"IM\"),\n            (\"name-1\", \"name2\"),\n            (\"Weigt_12\", \"Weigt_13\"),\n            (\"123age\", \"123agl\"),\n        ],\n    )\n    def test_valid_field_and_vector_name(\n        self,\n        collection_temp_dir,\n        collection_option: CollectionOption,\n        field_name: str,\n        vector_name: str,\n    ):\n        schema = zvec.CollectionSchema(\n            name=\"test_index_names\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(field_name, DataType.STRING, nullable=True),\n            ],\n            vectors=[\n                VectorSchema(\n                    vector_name,\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ],\n        )\n\n        coll = zvec.create_and_open(\n            path=collection_temp_dir, schema=schema, option=collection_option\n        )\n\n        assert coll is not None, (\n            f\"Failed to create and open collection with field_name={field_name}, vector_name={vector_name}\"\n        )\n\n        # Insert some data\n        docs = [\n            Doc(\n                id=f\"{i}\",\n                fields={\"id\": i, field_name: f\"value_{i}\"},\n                vectors={vector_name: [float(j % 10) for j in range(128)]},\n            )\n            for i in range(5)\n        ]\n\n        result = coll.insert(docs)\n        assert len(result) == 5, (\n            f\"Expected 5 insertion results, got {len(result)} for field_name={field_name}, vector_name={vector_name}\"\n        )\n        for item in result:\n            assert item.ok(), (\n                f\"Insertion failed for field_name={field_name}, vector_name={vector_name}: {item}\"\n            )\n\n        # Create index on field\n        coll.create_index(\n            field_name=field_name,\n            index_param=InvertIndexParam(),\n            option=IndexOption(),\n        )\n\n        # Create index on vector\n        coll.create_index(\n            field_name=vector_name,\n            index_param=HnswIndexParam(),\n            option=IndexOption(),\n        )\n\n        # Verify indexes were created successfully\n        stats = coll.stats\n        assert stats is not None, (\n            f\"Stats is None for field_name={field_name}, vector_name={vector_name}\"\n        )\n\n        coll.destroy()\n\n    def test_compicated_workflow(\n        self,\n        collection_temp_dir,\n        basic_schema: CollectionSchema,\n        collection_option: CollectionOption,\n    ):\n        \"\"\"\n        Test the complete workflow:\n        1. Create collection\n        2. Create index\n        3. Insert doc\n        4. Upsert\n        5. Update doc\n        6. Fetch doc\n        7. Query doc\n        8. Drop index\n        9. Insert doc\n        10. Update doc\n        11. Upsert doc\n        12. Fetch doc\n        13. Query doc\n        14. Flush\n        15. Destroy\n        \"\"\"\n        # Step 1: Create collection\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=basic_schema,\n            option=collection_option,\n        )\n\n        assert coll is not None, \"Failed to create and open collection\"\n        assert coll.path == collection_temp_dir\n        assert coll.schema.name == basic_schema.name\n        assert coll.stats.doc_count == 0\n\n        # Step 2: Create index\n        coll.create_index(\n            field_name=\"name\", index_param=InvertIndexParam(), option=IndexOption()\n        )\n        # Verify index was created\n        stats = coll.stats\n        assert stats is not None, \"coll.stats is None!\"\n\n        # Step 3: Insert doc\n        doc1 = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test1\", \"weight\": 80.5},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        result = coll.insert(doc1)\n        assert bool(result)\n        assert result.ok()\n        assert coll.stats.doc_count == 1\n\n        # Step 4: Upsert (existing doc)\n        doc1_updated = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test1_updated\", \"weight\": 85.0},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.5, 2: 2.5},\n            },\n        )\n\n        result = coll.upsert(doc1_updated)\n        assert bool(result)\n        assert result.ok()\n        assert coll.stats.doc_count == 1\n\n        # Step 5: Update doc\n        doc2 = Doc(\n            id=\"2\",\n            fields={\"id\": 2, \"name\": \"test2\", \"weight\": 90.0},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 3.0, 2: 4.0},\n            },\n        )\n\n        # First insert doc2\n        result = coll.insert(doc2)\n        assert bool(result)\n        assert result.ok()\n        assert coll.stats.doc_count == 2\n\n        # Then update it\n        doc2_updated = Doc(\n            id=\"2\",\n            fields={\"id\": 2, \"name\": \"test2_updated\", \"weight\": 95.0},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 3.5, 2: 4.5},\n            },\n        )\n\n        result = coll.update(doc2_updated)\n        assert bool(result)\n        assert result.ok()\n        assert coll.stats.doc_count == 2\n\n        # Step 6: Fetch doc\n        fetched_docs = coll.fetch([\"1\", \"2\"])\n        assert len(fetched_docs) == 2\n        assert \"1\" in fetched_docs\n        assert \"2\" in fetched_docs\n        assert fetched_docs[\"1\"].field(\"name\") == \"test1_updated\"\n        assert fetched_docs[\"2\"].field(\"name\") == \"test2_updated\"\n\n        # Step 7: Query doc\n        query_result = coll.query(filter=\"id >= 1\", topk=10)\n        assert len(query_result) == 2\n\n        # Step 8: Drop index\n        coll.drop_index(field_name=\"name\")\n\n        # Step 9: Insert doc\n        doc3 = Doc(\n            id=\"3\",\n            fields={\"id\": 3, \"name\": \"test3\", \"weight\": 100.0},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 5.0, 2: 6.0},\n            },\n        )\n\n        result = coll.insert(doc3)\n        assert bool(result)\n        assert result.ok()\n        assert coll.stats.doc_count == 3\n\n        # Step 10: Update doc\n        doc3_updated = Doc(\n            id=\"3\",\n            fields={\"id\": 3, \"name\": \"test3_updated\", \"weight\": 105.0},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 5.5, 2: 6.5},\n            },\n        )\n\n        result = coll.update(doc3_updated)\n        assert bool(result)\n        assert result.ok()\n        assert coll.stats.doc_count == 3\n\n        # Step 11: Upsert doc\n        doc4 = Doc(\n            id=\"4\",\n            fields={\"id\": 4, \"name\": \"test4\", \"weight\": 110.0},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 7.0, 2: 8.0},\n            },\n        )\n\n        result = coll.upsert(doc4)\n        assert bool(result)\n        assert result.ok()\n        assert coll.stats.doc_count == 4\n\n        # Step 12: Fetch doc\n        fetched_docs = coll.fetch([\"3\", \"4\"])\n        assert len(fetched_docs) == 2\n        assert \"3\" in fetched_docs\n        assert \"4\" in fetched_docs\n        assert fetched_docs[\"3\"].field(\"name\") == \"test3_updated\"\n        assert fetched_docs[\"4\"].field(\"name\") == \"test4\"\n\n        # Step 13: Query doc\n        query_result = coll.query(filter=\"id >= 3\", topk=10)\n        assert len(query_result) == 2\n\n        # Step 14: Flush\n        coll.flush()\n\n        # Verify data is still accessible after flush\n        fetched_docs = coll.fetch([\"1\", \"2\", \"3\", \"4\"])\n        assert len(fetched_docs) == 4\n\n        # Step 15: Destroy\n        coll.destroy()\n\n    @pytest.mark.parametrize(\n        \"data_type, index_param\", VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS\n    )\n    def test_vector_index_params(\n        self,\n        collection_temp_dir,\n        collection_option: CollectionOption,\n        data_type: DataType,\n        index_param,\n        single_vector_schema,\n    ):\n        vector_name = DEFAULT_VECTOR_FIELD_NAME[data_type]\n        dimension = DEFAULT_VECTOR_DIMENSION\n\n        coll = zvec.create_and_open(\n            path=collection_temp_dir,\n            schema=single_vector_schema,\n            option=collection_option,\n        )\n\n        assert coll is not None, (\n            f\"Failed to create and open collection, {data_type}, {index_param}\"\n        )\n\n        docs = {str(i): generate_doc(i, single_vector_schema) for i in range(5)}\n        result = coll.insert(docs.values())\n        assert len(result) == len(docs), (\n            f\"Expected 5 results, got {len(result)}, {data_type}, {index_param}\"\n        )\n        for item in result:\n            assert item.ok(), f\"Insertion failed for, {data_type}, {index_param}\"\n\n        def check_result(\n            label: str, metric_type: MetricType, quantize_type: QuantizeType\n        ):\n            query_vector = [1] * dimension\n            if data_type in [DataType.SPARSE_VECTOR_FP16, DataType.SPARSE_VECTOR_FP32]:\n                query_vector = {1: 1}\n\n            fetch_result = coll.fetch([str(i) for i in range(len(docs))])\n            assert len(fetch_result) == len(docs), (\n                f\"{label}, Expected 5 fetched docs, got {len(fetch_result)}, {data_type}, {index_param}\"\n            )\n            for i in range(len(docs)):\n                doc_id = str(i)\n                assert doc_id in fetch_result, (\n                    f\"{label}, Document ID '{doc_id}' not found, {data_type}, {index_param}\"\n                )\n                fetched_doc = fetch_result[doc_id]\n                # Verify doc equal\n                assert is_doc_equal(fetched_doc, docs[doc_id], single_vector_schema), (\n                    f\"{label}, doc not equal, insert: {docs[doc_id]}, fetched: {fetched_doc}, {data_type}, {index_param}\"\n                )\n\n            query_result: list[Doc] = coll.query(\n                VectorQuery(field_name=vector_name, vector=query_vector),\n                include_vector=False,\n                topk=len(docs),\n            )\n            assert len(query_result) == len(docs), (\n                f\"{label}, Expected {len(docs)} result, got {len(query_result)}, {data_type}, {index_param}\"\n            )\n            inserted_ids = [str(i) for i in range(len(docs))]\n            queried_ids = [doc.id for doc in query_result]\n            assert set(inserted_ids) == set(queried_ids), (\n                f\"{label}, inserted_ids != queried_ids, insert: {inserted_ids}, query: {queried_ids}, {data_type}, {index_param}\"\n            )\n\n            last_score = None\n            for i, doc in enumerate(query_result):\n                # Get the document's vector for comparison\n                expect_doc = generate_doc(int(doc.id), single_vector_schema)\n                doc_vector = expect_doc.vector(vector_name)\n                expected_score = distance(\n                    doc_vector,\n                    query_vector,\n                    metric_type,\n                    data_type,\n                    quantize_type,\n                )\n                print(f\"query: {doc}, expect_core: {expected_score}\")\n                if quantize_type is QuantizeType.UNDEFINED:\n                    assert is_float_equal(doc.score, expected_score), (\n                        f\"{label} top{i} pk{doc.id} score {doc.score:6f} expected:{expected_score:6f}, {data_type}, {index_param}\"\n                    )\n                if last_score is not None:\n                    if metric_type == MetricType.IP:\n                        assert last_score >= doc.score, (\n                            f\"{label}, score not sorted, last_score: {last_score}, current_score: {doc.score}, {data_type}, {index_param}\"\n                        )\n                    else:\n                        assert last_score <= doc.score, (\n                            f\"{label}, score not sorted, last_score: {last_score}, current_score: {doc.score}, {data_type}, {index_param}\"\n                        )\n                last_score = doc.score\n\n        # default metric_type=IP, quantize_type=None\n        check_result(\"pre_create_index\", MetricType.IP, QuantizeType.UNDEFINED)\n\n        # create index\n        coll.create_index(\n            field_name=vector_name,\n            index_param=index_param,\n            option=IndexOption(),\n        )\n        check_result(\n            \"post_create_index\", index_param.metric_type, index_param.quantize_type\n        )\n\n        coll.drop_index(field_name=vector_name)\n        check_result(\"post_drop_index\", MetricType.IP, QuantizeType.UNDEFINED)\n\n        new_docs = {str(i): generate_doc(i, single_vector_schema) for i in range(5, 8)}\n        new_result = coll.insert(new_docs.values())\n        assert len(new_result) == len(new_docs), (\n            f\"Expected {len(new_docs)} insertion results for new docs, got {len(new_result)} for vector {vector_name}\"\n        )\n        for item in new_result:\n            assert item.ok(), (\n                f\"New document insertion failed for vector {vector_name}: {item}\"\n            )\n        docs |= new_docs\n        coll.create_index(\n            field_name=vector_name,\n            index_param=index_param,\n            option=IndexOption(),\n        )\n\n        check_result(\n            \"post_create_index2\", index_param.metric_type, index_param.quantize_type\n        )\n        coll.destroy()\n\n\nclass TestColumnDDL:\n    def test_add_column(self, basic_collection: Collection):\n        basic_collection.add_column(\n            field_schema=FieldSchema(\"income\", DataType.INT32),\n            expression=\"'weight' * 2\",  # Simple expression\n        )\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, \"income\": 1},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_add_column_with_default_option(self, basic_collection: Collection):\n        # Add a new column with default option\n        basic_collection.add_column(\n            field_schema=FieldSchema(\"test_column_default\", DataType.INT32),\n            expression=\"100\",\n            option=AddColumnOption(),  # Default option\n        )\n        # Verify column was added by inserting data\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, \"test_column_default\": 1},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n        # Verify document was inserted\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\"concurrency\", [0, 1, 4, 8])\n    def test_add_column_with_various_concurrency_options(\n        self, basic_collection: Collection, concurrency\n    ):\n        field_name = f\"test_column_concurrent_{concurrency}\"\n        basic_collection.add_column(\n            field_schema=FieldSchema(field_name, DataType.INT32),\n            expression=\"100\",\n            option=AddColumnOption(concurrency=concurrency),\n        )\n\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, field_name: 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\"data_type\", SUPPORT_ADD_COLUMN_DATA_TYPE)\n    def test_add_column_valid_data_types(self, basic_collection: Collection, data_type):\n        field_name = f\"test_field_{data_type.name.lower()}\"\n\n        # Add a new column with specific data type\n        basic_collection.add_column(\n            field_schema=FieldSchema(field_name, data_type),\n            expression=\"1\" if data_type != DataType.STRING else \"'test'\",\n        )\n\n        # Verify column was added by inserting data\n        if data_type == DataType.STRING:\n            field_value = \"test_value\"\n        elif data_type in [DataType.ARRAY_STRING]:\n            field_value = [\"test_value\"]\n        elif data_type in [DataType.ARRAY_INT32, DataType.ARRAY_INT64]:\n            field_value = [1, 2, 3]\n        elif data_type in [DataType.ARRAY_FLOAT, DataType.ARRAY_DOUBLE]:\n            field_value = [1.1, 2.2, 3.3]\n        elif data_type == DataType.ARRAY_BOOL:\n            field_value = [True, False]\n        elif data_type in [DataType.FLOAT, DataType.DOUBLE]:\n            field_value = 1.5\n        elif data_type in [DataType.INT32, DataType.INT64]:\n            field_value = 100\n        elif data_type == DataType.BOOL:\n            field_value = True\n        else:\n            field_value = 1\n\n        doc = Doc(\n            id=\"1\",\n            fields={\n                \"id\": 1,\n                \"name\": \"test\",\n                \"weight\": 80.5,\n                field_name: field_value,\n            },\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        # Verify document was inserted\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\"data_type\", NOT_SUPPORT_ADD_COLUMN_DATA_TYPE)\n    def test_add_column_invalid_data_types(\n        self, basic_collection: Collection, data_type\n    ):\n        with pytest.raises(Exception) as exc_info:\n            field_name = f\"test_field_{data_type.name.lower()}\"\n\n            # Add a new column with specific data type\n            basic_collection.add_column(\n                field_schema=FieldSchema(field_name, data_type),\n                expression=\"1\" if data_type != DataType.STRING else \"'test'\",\n            )\n\n        assert NOT_SUPPORT_ADD_COLUMN_ERROR_MSG in str(exc_info.value)\n\n    @pytest.mark.parametrize(\"nullable\", [True, False])\n    def test_add_column_with_nullable_options(\n        self, basic_collection: Collection, nullable\n    ):\n        field_name = f\"test_field_nullable_{str(nullable).lower()}\"\n\n        # Add a new column with specific nullable option\n        basic_collection.add_column(\n            field_schema=FieldSchema(field_name, DataType.INT32, nullable=nullable),\n            expression=\"100\",\n        )\n\n        # Verify column was added by inserting data\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, field_name: 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        # Verify document was inserted\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        # Verify column was added by inserting data\n        doc = Doc(\n            id=\"2\",\n            fields={\"id\": 2, \"name\": \"test\", \"weight\": 80.5, field_name: None},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        if nullable:\n            result = basic_collection.insert(doc)\n            assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n            assert result.ok(), (\n                f\"result={result},Insert operation failed with code = {result.code()}\"\n            )\n        else:\n            with pytest.raises(ValueError) as e:\n                basic_collection.insert(doc)\n            assert (\n                \"Field 'test_field_nullable_false': expected non-nullable type\"\n                in str(e.value)\n            )\n\n        # Verify document was inserted\n        stats = basic_collection.stats\n        assert stats is not None\n        if nullable:\n            assert stats.doc_count == 2\n        else:\n            assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\n        \"expression\",\n        [\n            \"1\",  # Constant integer\n            \"1.5\",  # Constant float\n            \"'test'\",  # Constant string\n            \"id\",  # Reference to existing field\n            \"weight * 2\",  # Simple arithmetic\n            \"weight + id\",  # Complex arithmetic\n            \"CASE WHEN weight > 50 THEN 1 ELSE 0 END\",  # Conditional expression\n        ],\n    )\n    def test_add_column_with_different_expressions(\n        self, basic_collection: Collection, expression\n    ):\n        field_name = f\"test_field_expr_{abs(hash(expression)) % 1000}\"\n\n        # Add a new column with specific expression\n        basic_collection.add_column(\n            field_schema=FieldSchema(field_name, DataType.INT32),\n            expression=expression,\n        )\n\n        # Verify column was added by inserting data\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, field_name: 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        # Verify document was inserted\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_add_column_with_index_param(self, basic_collection: Collection):\n        basic_collection.add_column(\n            field_schema=FieldSchema(\n                \"indexed_field\",\n                DataType.INT32,\n                index_param=InvertIndexParam(enable_range_optimization=True),\n            ),\n            expression=\"id * 2\",\n        )\n\n        # Verify column was added by inserting data\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, \"indexed_field\": 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        # Verify document was inserted\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\n        \"field_name\",\n        [\n            \"a\",  # Minimum length\n            \"a\" * 32,  # Maximum length (32 characters)\n            \"valid_field_name_123\",  # Alphanumeric with underscore\n            \"Valid-Field-Name\",  # With hyphens\n            \"_underscore_start\",  # Starting with underscore\n            \"field_name_with_123_numbers\",  # Numbers in middle\n            \"FIELD_NAME_UPPERCASE\",  # Uppercase\n            # \"field_with_nums_123_and_hyphens-456\",  # Complex valid name within limit\n        ],\n    )\n    def test_add_column_with_valid_field_names(\n        self, basic_collection: Collection, field_name\n    ):\n        basic_collection.add_column(\n            field_schema=FieldSchema(field_name, DataType.INT32), expression=\"200\"\n        )\n\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, field_name: 300},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\n        \"invalid_field_name\",\n        [\n            \"\",  # Empty string\n            \" \",  # Space only\n            \"a\" * 33,  # Too long (33 characters, exceeds 32)\n            \"field name\",  # Contains space\n            \"field.name\",  # Contains dot\n            \"field@name\",  # Contains special character\n            \"field/name\",  # Contains slash\n            \"field\\\\name\",  # Contains backslash\n            \"field$name\",  # Contains dollar sign\n            \"field+name\",  # Contains plus sign\n            \"field=name\",  # Contains equals sign\n            None,  # None value\n        ],\n    )\n    def test_add_column_with_invalid_field_names(\n        self, basic_collection: Collection, invalid_field_name\n    ):\n        with pytest.raises(Exception) as exc_info:\n            basic_collection.add_column(\n                field_schema=FieldSchema(invalid_field_name, DataType.INT32),\n                expression=\"100\",\n            )\n\n        if invalid_field_name is None:\n            assert \"validate failed\" in str(exc_info.value), (\n                \"Error message is unreasonable: e=\" + str(exc_info.value)\n            )\n        else:\n            assert (\n                \"invalid\" in str(exc_info.value).lower()\n                or \"name\" in str(exc_info.value).lower()\n            )\n\n    def test_alter_column_rename(self, basic_collection: Collection):\n        basic_collection.alter_column(\n            old_name=\"weight\",\n            new_name=\"mass\",\n            option=AlterColumnOption(),\n        )\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"mass\": 80.5},  # Use new name\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_alter_column_non_exist(self, basic_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            basic_collection.alter_column(\n                old_name=\"non_existing\",\n                new_name=\"new_name\",\n                field_schema=FieldSchema(\"new_name\", DataType.STRING),\n            )\n        assert \"column non_existing not found\" in str(exc_info.value), (\n            \"Error message is unreasonable: e=\" + str(exc_info.value)\n        )\n\n    def test_alter_column_with_default_option(self, basic_collection: Collection):\n        basic_collection.add_column(\n            field_schema=FieldSchema(\"original_field\", DataType.INT32), expression=\"100\"\n        )\n\n        basic_collection.alter_column(\n            old_name=\"original_field\",\n            new_name=\"renamed_field\",\n            option=AlterColumnOption(),\n        )\n\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, \"renamed_field\": 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\"concurrency\", [0, 1, 4, 8])\n    def test_alter_column_with_various_concurrency_options(\n        self, basic_collection: Collection, concurrency\n    ):\n        old_field_name = f\"orig_field_{concurrency}\"\n        new_field_name = f\"modified_field_{concurrency}\"\n\n        basic_collection.add_column(\n            field_schema=FieldSchema(old_field_name, DataType.INT32),\n            expression=\"100\",\n        )\n\n        basic_collection.alter_column(\n            old_name=old_field_name,\n            new_name=new_field_name,\n            option=AlterColumnOption(concurrency=concurrency),\n        )\n\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, new_field_name: 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\n        \"old_field_name,new_field_name\",\n        [\n            (\"a\", \"new_a\"),  # Minimum length\n            (\n                \"abcdefghijklmnopqrstuvwxyz123456\",\n                \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n            ),  # Maximum length (32 characters)\n            (\"valid_field_name_123\", \"new_valid_field\"),  # Alphanumeric with underscore\n            (\"Valid-Field-Name\", \"New-Field-Name\"),  # With hyphens\n            (\"_underscore_start\", \"new_underscore\"),  # Starting with underscore\n            (\"field_name_with_123_numbers\", \"new_with_nums\"),  # Numbers in middle\n            (\"FIELD_NAME_UPPERCASE\", \"new_uppercase\"),  # Uppercase\n            (\n                \"field_with_nums_3_and_hyphens-6\",\n                \"new_field_hyphens\",\n            ),  # Complex valid name\n        ],\n    )\n    def test_alter_column_field_name_valid(\n        self, basic_collection: Collection, old_field_name, new_field_name\n    ):\n        basic_collection.add_column(\n            field_schema=FieldSchema(old_field_name, DataType.INT32),\n            expression=\"100\",\n        )\n        basic_collection.alter_column(\n            old_name=old_field_name,\n            new_name=new_field_name,\n            option=AlterColumnOption(),\n        )\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, new_field_name: 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    @pytest.mark.parametrize(\n        \"valid_old_name,invalid_new_name\",\n        [\n            (\"temp_field\", \"\"),  # Empty new name\n            (\"temp_field\", \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"),  # Too long new name\n            (\"temp_field\", \"field name\"),  # New name with space\n            (\"temp_field\", \"field.name\"),  # New name with dot\n            (\"temp_field\", \"field@name\"),  # New name with special character\n            (\"temp_field\", \"field/name\"),  # New name with slash\n            (\"temp_field\", \"field\\\\name\"),  # New name with backslash\n            (\"temp_field\", \"field$name\"),  # New name with dollar sign\n            (\"temp_field\", \"field+name\"),  # New name with plus sign\n            (\"temp_field\", \"field=name\"),  # New name with equals sign\n            (\"temp_field\", None),  # None new name\n        ],\n    )\n    def test_alter_column_with_invalid_field_names(\n        self, basic_collection: Collection, valid_old_name, invalid_new_name\n    ):\n        basic_collection.add_column(\n            field_schema=FieldSchema(\"temp_field\", DataType.INT32), expression=\"100\"\n        )\n        with pytest.raises(Exception) as exc_info:\n            basic_collection.alter_column(\n                old_name=valid_old_name,\n                new_name=invalid_new_name if invalid_new_name is not None else \"\",\n                field_schema=FieldSchema(\n                    invalid_new_name if invalid_new_name is not None else \"\",\n                    DataType.INT32,\n                ),\n            )\n\n        assert (\n            \"invalid\" in str(exc_info.value).lower()\n            or \"name\" in str(exc_info.value).lower()\n            or \"incompatible\" in str(exc_info.value).lower()\n        )\n\n    def test_drop_column_exist(self, basic_collection: Collection):\n        basic_collection.add_column(\n            field_schema=FieldSchema(\"temp_field\", DataType.INT32), expression=\"100\"\n        )\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, \"temp_field\": 1},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        basic_collection.drop_column(\"temp_field\")\n        doc = Doc(\n            id=\"2\",\n            fields={\"id\": 2, \"name\": \"test\", \"weight\": 80.5, \"temp_field\": 1},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        with pytest.raises(Exception) as exc_info:\n            result = basic_collection.insert(doc)\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value)\n\n    def test_drop_column_non_exist(self, basic_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            basic_collection.drop_column(\"non_existing_column\")\n        assert NOT_EXIST_COLUMN_TO_DROP_ERROR_MSG in str(exc_info.value)\n\n    @pytest.mark.parametrize(\n        \"field_name\",\n        [\n            \"a\",  # Minimum length\n            \"a\" * 32,  # Maximum length (32 characters)\n            \"valid_field_name_123\",  # Alphanumeric with underscore\n            \"Valid-Field-Name\",  # With hyphens\n            \"_underscore_start\",  # Starting with underscore\n            \"field_name_with_123_numbers\",  # Numbers in middle\n            \"FIELD_NAME_UPPERCASE\",  # Uppercase\n            \"field_with_nums_3_and_hyphens-6\",  # Complex valid name within limit\n        ],\n    )\n    def test_drop_column_field_name_valid(\n        self, basic_collection: Collection, field_name\n    ):\n        basic_collection.add_column(\n            field_schema=FieldSchema(field_name, DataType.INT32), expression=\"100\"\n        )\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5, field_name: 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        result = basic_collection.insert(doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.ok(), (\n            f\"result={result},Insert operation failed with code = {result.code()}\"\n        )\n\n        stats = basic_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        basic_collection.drop_column(field_name)\n\n        doc = Doc(\n            id=\"2\",\n            fields={\"id\": 2, \"name\": \"test\", \"weight\": 80.5, field_name: 200},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n        with pytest.raises(Exception) as exc_info:\n            result = basic_collection.insert(doc)\n\n        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value)\n"
  },
  {
    "path": "python/tests/detail/test_collection_dml.py",
    "content": "import logging\nimport pytest\n\n\nfrom zvec import (\n    CollectionOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    FieldSchema,\n    VectorSchema,\n    CollectionSchema,\n    Collection,\n    Doc,\n    VectorQuery,\n    StatusCode,\n)\nfrom distance_helper import *\nfrom fixture_helper import *\nfrom doc_helper import *\n\nMaximum = 1024\n\nDOCID_VALID_LIST = [\n    \"1valid_Id\",\n    \"123.45\",\n    \"123abc\",\n    \"-!@#$%+=.123abc_+\",\n    \"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789012\",\n]\nDOCID_INVALID_LIST = [\n    None,\n    \"\",\n    \"()qsd123\",\n    \" \",\n    \"/&AS12\",\n    \"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890121\",\n]\n\nFIELD_VALUE_VALID_LIST = [\n    (\n        \"bool_field\",\n        [\n            None,\n            True,\n            False,\n        ],\n    ),\n    (\n        \"float_field\",\n        [\n            None,\n            0.0,\n            -1.0,\n            1.0,\n            3.4028235e38,\n            -3.4028235e38,\n            1.17549435e-38,\n            -1.17549435e-38,\n            float(\"inf\"),\n            float(\"-inf\"),\n        ],\n    ),\n    (\n        \"double_field\",\n        [\n            None,\n            0.0,\n            -1.0,\n            1.0,\n            1.7976931348623157e308,\n            -1.7976931348623157e308,\n            2.2250738585072014e-308,\n            -2.2250738585072014e-308,\n            float(\"inf\"),\n            float(\"-inf\"),\n        ],\n    ),\n    (\n        \"int32_field\",\n        [\n            None,\n            0,\n            1,\n            -1,\n            2147483647,\n            -2147483648,\n        ],\n    ),\n    (\n        \"int64_field\",\n        [\n            None,\n            0,\n            1,\n            -1,\n            9223372036854775807,\n            -9223372036854775808,\n        ],\n    ),\n    (\n        \"uint32_field\",\n        [\n            None,\n            0,\n            1,\n            4294967295,\n        ],\n    ),\n    (\n        \"uint64_field\",\n        [\n            None,\n            0,\n            1,\n            18446744073709551615,\n        ],\n    ),\n    (\n        \"string_field\",\n        [\n            None,\n            \"\",\n            \"a\",\n            \"test_name\",\n            \"这是一个中文名称测试\",\n            \"a\" * 1000,\n        ],\n    ),\n    (\n        \"array_bool_field\",\n        [\n            None,\n            [],\n            [True],\n            [False, True],\n            [True, False, True, False] * 10,\n        ],\n    ),\n    (\n        \"array_float_field\",\n        [\n            None,\n            [],\n            [0.0],\n            [1.0, 2.0, 3.0],\n            [3.4028235e38, -3.4028235e38],\n        ],\n    ),\n    (\n        \"array_double_field\",\n        [\n            None,\n            [],\n            [0.0],\n            [1.0, 2.0, 3.0],\n            [1.7976931348623157e308, -1.7976931348623157e308],\n        ],\n    ),\n    (\n        \"array_int32_field\",\n        [\n            None,\n            [],\n            [0],\n            [1, 2, 3],\n            [2147483647, -2147483648],\n        ],\n    ),\n    (\n        \"array_int64_field\",\n        [\n            None,\n            [],\n            [0],\n            [1, 2, 3],\n            [9223372036854775807, -9223372036854775808],\n        ],\n    ),\n    (\n        \"array_uint32_field\",\n        [\n            None,\n            [],\n            [0],\n            [1, 2, 3],\n            [4294967295],\n        ],\n    ),\n    (\n        \"array_uint64_field\",\n        [\n            None,\n            [],\n            [0],\n            [1, 2, 3],\n            [18446744073709551615],\n        ],\n    ),\n    (\n        \"array_string_field\",\n        [\n            None,\n            [],\n            [\"\"],\n            [\"a\", \"b\", \"c\"],\n            [\"test_string\", \"测试字符串\"],\n            [\"a\" * 100] * 5,\n        ],\n    ),\n]\nFIELD_VALUE_INVALID_LIST = [\n    (\n        \"bool_field\",\n        [\n            \"True\",\n            \"False\",\n            \"\",\n        ],\n    ),\n    (\"float_field\", [\"invalid\", [1.0], {\"value\": 1.0}]),\n    (\"double_field\", [\"invalid\", [1.0], {\"value\": 1.0}]),\n    (\n        \"int32_field\",\n        [\n            \"invalid\",\n            [1],\n            {\"value\": 1},\n            2147483648,\n            -2147483649,\n        ],\n    ),\n    (\n        \"int64_field\",\n        [\n            \"invalid\",\n            [1],\n            {\"value\": 1},\n            9223372036854775808,\n            -9223372036854775809,\n        ],\n    ),\n    (\n        \"uint32_field\",\n        [\n            \"invalid\",\n            [1],\n            {\"value\": 1},\n            4294967296,\n            -1,\n        ],\n    ),\n    (\n        \"uint64_field\",\n        [\n            \"invalid\",\n            [1],\n            {\"value\": 1},\n            18446744073709551616,\n            -1,\n        ],\n    ),\n    (\n        \"string_field\",\n        [\n            123,\n            12.34,\n            True,\n            [\"array\"],\n            {\"key\": \"value\"},\n        ],\n    ),\n    (\n        \"array_bool_field\",\n        [\n            True,\n            False,\n            [True, \"invalid\"],\n            {\"key\": True},\n        ],\n    ),\n    (\n        \"array_float_field\",\n        [\n            [1.0, \"invalid\"],\n            [1.0, None],\n            \"invalid\",\n            [1.0, [2.0]],\n            1.0,\n        ],\n    ),\n    (\n        \"array_double_field\",\n        [\n            [1.0, \"invalid\"],\n            [1.0, None],\n            \"invalid\",\n            [1.0, [2.0]],\n            1.0,\n        ],\n    ),\n    (\n        \"array_int32_field\",\n        [\n            [1, \"invalid\"],\n            [1, None],\n            \"invalid\",\n            [1, [2]],\n            1,\n        ],\n    ),\n    (\n        \"array_int64_field\",\n        [\n            [1, \"invalid\"],\n            [1, None],\n            \"invalid\",\n            [1, [2]],\n            1,\n        ],\n    ),\n    (\n        \"array_uint32_field\",\n        [\n            [1, \"invalid\"],\n            [1, None],\n            [1, -1],\n            \"invalid\",\n            [1, [2]],\n            1,\n        ],\n    ),\n    (\n        \"array_uint64_field\",\n        [\n            [1, \"invalid\"],\n            [1, None],\n            [1, -1],\n            \"invalid\",\n            [1, [2]],\n            1,\n        ],\n    ),\n    (\n        \"array_string_field\",\n        [\n            [\"valid\", 123],\n            [\"valid\", None],\n            \"invalid\",\n            [[\"nested\"]],\n            123,\n        ],\n    ),\n]\n\nVECTOR_VALUE_VALID_LIST = [\n    (\n        \"vector_fp32_field\",\n        [\n            [0.0] * 128,\n            [1.0] * 128,\n            [-1.0] * 128,\n            [float(\"inf\")] * 128,\n            [float(\"-inf\")] * 128,\n            [i / 128.0 for i in range(128)],\n            [-i / 128.0 for i in range(128)],\n        ],\n    ),\n    (\n        \"vector_fp16_field\",\n        [\n            [0.0] * 128,\n            [1.0] * 128,\n            [-1.0] * 128,\n            [float(\"inf\")] * 128,\n            [float(\"-inf\")] * 128,\n            [i / 128.0 for i in range(128)],\n            [-i / 128.0 for i in range(128)],\n        ],\n    ),\n    (\"vector_int8_field\", [[100] * 128, [0] * 128, [-100] * 128]),\n    (\n        \"sparse_vector_fp32_field\",\n        [\n            {0: 1.0},\n            {0: 0.0, 1: 1.0, 2: -1.0},\n            {0: float(\"inf\"), 1: float(\"-inf\")},\n            {i: float(i) for i in range(10)},\n            {128: 1.0, 256: -1.0, 512: 0.5},\n        ],\n    ),\n    (\n        \"sparse_vector_fp16_field\",\n        [\n            {0: 1.0},\n            {0: 0.0, 1: 1.0, 2: -1.0},\n            {0: float(\"inf\"), 1: float(\"-inf\")},\n            {i: float(i) for i in range(10)},\n            {128: 1.0, 256: -1.0, 512: 0.5},\n        ],\n    ),\n]\nVECTOR_VALUE_INVALID_LIST = [\n    (\n        \"vector_fp32_field\",\n        [\n            None,\n            [],\n            [0.0] * 127,\n            [0.0] * 129,\n            [0.0] * 1000,\n            [\"invalid\"],\n            [0, 1, 2],\n            [None] * 128,\n        ],\n    ),\n    (\n        \"vector_fp16_field\",\n        [\n            None,\n            [],\n            [0.0] * 127,\n            [0.0] * 129,\n            [0.0] * 1000,\n            [\"invalid\"],\n            [0, 1, 2],\n            [None] * 128,\n        ],\n    ),\n    (\n        \"vector_int8_field\",\n        [\n            None,\n            [],\n            [1] * 127,\n            [10] * 129,\n            [0] * 1000,\n            [\"invalid\"],\n            [0, 1, 2],\n            [None] * 128,\n        ],\n    ),\n    (\n        \"sparse_vector_fp32_field\",\n        [\n            None,\n            \"invalid\",\n            {None: 1.0},\n            {\"0\": 1.0},\n            {0: \"invalid\"},\n            {0: None},\n            {-1: 1.0},\n        ],\n    ),\n    (\n        \"sparse_vector_fp16_field\",\n        [\n            None,\n            \"invalid\",\n            {None: 1.0},\n            {\"0\": 1.0},\n            {0: \"invalid\"},\n            {0: None},\n            {-1: 1.0},\n        ],\n    ),\n]\n\nUPDATE_PARTIAL_VALUE = [\n    (\n        \"partial_fields\",\n        {\"string_field\": \"partially_updated_test\", \"float_field\": 95.5},\n        {},\n    ),\n    (\"dense_vector_only\", {}, {\"vector_fp32_field\": [0.3] * 128}),\n    (\"dense_vector_only\", {}, {\"vector_fp16_field\": [0.6] * 128}),\n    (\"dense_vector_only\", {}, {\"vector_int8_field\": [3] * 128}),\n    (\"sparse_vector_only\", {}, {\"sparse_vector_fp32_field\": {1: 2.0, 2: 3.0, 4: 4.0}}),\n    (\n        \"sparse_vector_only\",\n        {},\n        {\"sparse_vector_fp16_field\": {10: 2.1, 20: 3.1, 40: 4.1}},\n    ),\n    (\n        \"fields_and_vectors\",\n        {\"string_field\": \"fully_updated_test\", \"bool_field\": False},\n        {\n            \"vector_fp32_field\": [0.4] * 128,\n            \"sparse_vector_fp32_field\": {1: 3.0, 3: 5.0},\n        },\n    ),\n]\n\n\n# ==================== helper ====================\ndef singledoc_and_check(\n    collection: Collection, insert_doc, operator=\"insert\", is_delete=1\n):\n    if operator == \"insert\":\n        result = collection.insert(insert_doc)\n    elif operator == \"upsert\":\n        result = collection.upsert(insert_doc)\n    elif operator == \"update\":\n        result = collection.update(insert_doc)\n    else:\n        logging.error(\"operator value is error!\")\n\n    assert bool(result)\n    assert result.ok()\n\n    stats = collection.stats\n    assert stats is not None\n    assert stats.doc_count == 1\n\n    fetched_docs = collection.fetch([insert_doc.id])\n    assert len(fetched_docs) == 1\n    assert insert_doc.id in fetched_docs\n\n    fetched_doc = fetched_docs[insert_doc.id]\n\n    assert is_doc_equal(fetched_doc, insert_doc, collection.schema)\n    assert hasattr(fetched_doc, \"score\"), \"Document should have a score attribute\"\n    assert fetched_doc.score == 0.0, (\n        \"Fetch operation should return default score of 0.0\"\n    )\n\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        if v != {}:\n            query_result = collection.query(\n                VectorQuery(field_name=v, vector=insert_doc.vectors[v]),\n                topk=10,\n            )\n            assert len(query_result) > 0, (\n                f\"Expected at least 1 query result, but got {len(query_result)}\"\n            )\n\n            found_doc = None\n            for doc in query_result:\n                if doc.id == insert_doc.id:\n                    found_doc = doc\n                    break\n            assert found_doc is not None, (\n                f\"Inserted document {insert_doc.id} not found in query results\"\n            )\n            assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False)\n    if is_delete == 1:\n        collection.delete(insert_doc.id)\n        assert collection.stats.doc_count == 0, \"Document should be deleted\"\n\n\ndef updatedoc_partial_check(\n    collection, update_doc_partial, update_doc_full, operator=\"update\", is_delete=1\n):\n    if operator == \"upsert\":\n        result = collection.upsert(update_doc_partial)\n    elif operator == \"update\":\n        result = collection.update(update_doc_partial)\n    else:\n        logging.error(\"operator value is error!\")\n\n    assert bool(result)\n    assert result.ok()\n\n    stats = collection.stats\n    assert stats is not None\n    assert stats.doc_count == 1\n\n    fetched_docs = collection.fetch([update_doc_partial.id])\n    assert len(fetched_docs) == 1, (\n        f\"fetched_docs={fetched_docs},Expected 1 fetched document, but got {len(fetched_docs)}\"\n    )\n    assert update_doc_partial.id in fetched_docs, (\n        f\"Expected document ID {update_doc_partial.id} in fetched documents\"\n    )\n\n    fetched_doc = fetched_docs[update_doc_partial.id]\n    assert is_doc_equal(fetched_doc, update_doc_full, collection.schema)\n    assert hasattr(fetched_doc, \"score\"), \"Document should have a score attribute\"\n    assert fetched_doc.score == 0.0, (\n        \"Fetch operation should return default score of 0.0\"\n    )\n\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        if v != {}:\n            query_result = collection.query(\n                VectorQuery(field_name=v, vector=update_doc_full.vectors[v]),\n                topk=10,\n            )\n            assert len(query_result) > 0, (\n                f\"Expected at least 1 query result, but got {len(query_result)}\"\n            )\n\n            found_doc = None\n            for doc in query_result:\n                if doc.id == update_doc_partial.id:\n                    found_doc = doc\n                    break\n            assert found_doc is not None, (\n                f\"Inserted document {update_doc_partial.id} not found in query results\"\n            )\n            assert is_doc_equal(\n                found_doc, update_doc_full, collection.schema, True, False\n            )\n    if is_delete == 1:\n        collection.delete(update_doc_partial.id)\n        assert collection.stats.doc_count == 0, \"Document should be deleted\"\n\n\ndef batchdoc_and_check(collection, multiple_docs, doc_num, operator=\"insert\"):\n    if operator == \"insert\":\n        result = collection.insert(multiple_docs)\n    elif operator == \"upsert\":\n        result = collection.upsert(multiple_docs)\n\n    elif operator == \"update\":\n        result = collection.update(multiple_docs)\n    else:\n        logging.error(\"operator value is error!\")\n\n    assert len(result) == len(multiple_docs)\n    for item in result:\n        assert item.ok(), (\n            f\"result={result},Insert operation failed with code {item.code()}\"\n        )\n\n    stats = collection.stats\n    assert stats is not None, \"Collection stats should not be None\"\n    assert stats.doc_count == len(multiple_docs), (\n        f\"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}\"\n    )\n\n    doc_ids = [doc.id for doc in multiple_docs]\n    fetched_docs = collection.fetch(doc_ids)\n    assert len(fetched_docs) == len(multiple_docs), (\n        f\"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}\"\n    )\n\n    for original_doc in multiple_docs:\n        assert original_doc.id in fetched_docs, (\n            f\"Expected document ID {original_doc.id} in fetched documents\"\n        )\n        fetched_doc = fetched_docs[original_doc.id]\n\n        assert is_doc_equal(fetched_doc, original_doc, collection.schema)\n\n        assert hasattr(fetched_doc, \"score\"), \"Document should have a score attribute\"\n        assert fetched_doc.score == 0.0, (\n            \"Fetch operation should return default score of 0.0\"\n        )\n\n    first_doc = multiple_docs[doc_num - 1]\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        query_result = collection.query(\n            VectorQuery(field_name=v, vector=first_doc.vectors[v]),\n            topk=1024,\n        )\n        assert len(query_result) > 0, (\n            f\"Expected at least 1 query result, but got {len(query_result)}\"\n        )\n\n        found_doc = None\n\n        for doc in query_result:\n            if doc.id == first_doc.id:\n                found_doc = doc\n                break\n        assert found_doc is not None, (\n            f\"Inserted document {first_doc.id} not found in query results\"\n        )\n\n        assert is_doc_equal(found_doc, first_doc, collection.schema, True, False)\n\n\n# ==================== Tests ====================\n# ----------------------------\n# Collection Insert Test Case\n# ----------------------------\n\n\nclass TestCollectionInsert:\n    def test_insert(self, full_collection: Collection):\n        single_doc = generate_doc(1, full_collection.schema)\n        singledoc_and_check(full_collection, single_doc)\n\n    @pytest.mark.parametrize(\"doc_num\", [1, 5, Maximum])\n    def test_insert_batch(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num)\n\n    def test_insert_duplicate(self, full_collection: Collection):\n        insert_doc = generate_doc(1, full_collection.schema)\n\n        result = full_collection.insert(insert_doc)\n        assert result.code().value == 0\n        assert result.ok()\n\n        # Verify documents were inserted\n        stats = full_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        insert_doc_duplicate = full_collection.insert(insert_doc)\n        assert bool(insert_doc_duplicate)\n        assert insert_doc_duplicate.code() == StatusCode.ALREADY_EXISTS, (\n            f\"Second insert operation should fail with ALREADY_EXISTS, but got code {insert_doc_duplicate.code()}\"\n        )\n\n        stats = full_collection.stats\n        assert stats is not None, \"Collection stats should not be None\"\n        assert stats.doc_count == 1, (\n            f\"Document count should still be 1 after failed insert, but got {stats.doc_count}\"\n        )\n\n    @pytest.mark.parametrize(\"doc_id\", DOCID_VALID_LIST)\n    def test_insert_docid_valid(self, full_collection: Collection, doc_id):\n        insert_doc = generate_doc_random(doc_id, full_collection.schema)\n        singledoc_and_check(full_collection, insert_doc)\n\n    @pytest.mark.parametrize(\"doc_id\", DOCID_INVALID_LIST)\n    def test_insert_docid_invalid(self, full_collection: Collection, doc_id):\n        insert_doc = generate_doc_random(doc_id, full_collection.schema)\n\n        with pytest.raises(Exception) as exc_info:\n            full_collection.insert(insert_doc)\n\n        assert exc_info.value is not None\n        stats = full_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n    @pytest.mark.parametrize(\"field_name, field_values\", FIELD_VALUE_VALID_LIST)\n    @pytest.mark.parametrize(\n        \"full_schema_new\",\n        [(True, True, HnswIndexParam()), (False, True, HnswIndexParam())],\n        indirect=True,\n    )\n    def test_insert_fields_valid(\n        self, full_collection_new: Collection, field_name: str, field_values, request\n    ):\n        for i, field_value in enumerate(field_values):\n            doc_id = str(field_value) if field_name == \"id\" else str(i)\n            doc_fields, doc_vectors = generate_vectordict_random(\n                full_collection_new.schema\n            )\n            full_schema_params = request.getfixturevalue(\"full_schema_new\")\n            target_field = None\n            for field in full_schema_params.fields:\n                if field.name == field_name:\n                    target_field = field\n                    break\n            doc_fields[field_name] = field_value\n            insert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)\n            if target_field and not target_field.nullable and field_value is None:\n                with pytest.raises(Exception) as exc_info:\n                    full_collection_new.insert(insert_doc)\n                assert exc_info.value is not None\n            else:\n                singledoc_and_check(full_collection_new, insert_doc)\n\n    @pytest.mark.parametrize(\"field_name, field_values\", FIELD_VALUE_INVALID_LIST)\n    def test_insert_fields_invalid(\n        self, full_collection: Collection, field_name: str, field_values\n    ):\n        for i, field_value in enumerate(field_values):\n            doc_id = str(field_value) if field_name == \"id\" else str(i)\n            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n            doc_fields[field_name] = field_value\n            insert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)\n\n            with pytest.raises(Exception) as exc_info:\n                full_collection.insert(insert_doc)\n            assert exc_info.value is not None\n            stats = full_collection.stats\n            assert stats is not None\n            assert stats.doc_count == 0\n\n    @pytest.mark.parametrize(\"vector_field, vector_values\", VECTOR_VALUE_VALID_LIST)\n    def test_insert_vector_valid(\n        self, full_collection: Collection, vector_field: str, vector_values\n    ):\n        for i, vector_value in enumerate(vector_values):\n            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n\n            doc_vectors[vector_field] = vector_value\n\n            insert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)\n\n            singledoc_and_check(full_collection, insert_doc)\n\n    @pytest.mark.parametrize(\"vector_field, vector_values\", VECTOR_VALUE_INVALID_LIST)\n    def test_insert_vector_invalid(\n        self, full_collection: Collection, vector_field: str, vector_values\n    ):\n        for i, vector_value in enumerate(vector_values):\n            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n            doc_vectors[vector_field] = vector_value\n            insert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)\n            with pytest.raises(Exception) as exc_info:\n                full_collection.insert(insert_doc)\n\n            assert exc_info.value is not None\n            stats = full_collection.stats\n            assert stats is not None\n            assert stats.doc_count == 0\n\n\nclass TestCollectionUpdate:\n    def test_update(self, full_collection: Collection):\n        insert_doc = generate_doc(1, full_collection.schema)\n        singledoc_and_check(full_collection, insert_doc, is_delete=0)\n        updated_doc = generate_update_doc(1, full_collection.schema)\n        singledoc_and_check(full_collection, updated_doc, operator=\"update\")\n\n    @pytest.mark.parametrize(\"doc_num\", [1, 5, Maximum])\n    def test_update_batch(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num)\n        multiple_update_docs = [\n            generate_update_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(\n            full_collection, multiple_update_docs, doc_num, operator=\"update\"\n        )\n\n    def test_empty_collection_update(self, full_collection: Collection):\n        updated_doc = generate_update_doc(1, full_collection.schema)\n        result = full_collection.update(updated_doc)\n        assert bool(result), f\"Expected 1 result, but got {len(result)}\"\n        assert result.code() == StatusCode.NOT_FOUND, (\n            f\"Update operation should fail with NOT_FOUND, but got code {result.code()}\"\n        )\n        fetched_docs = full_collection.fetch([updated_doc.id])\n        assert len(fetched_docs) == 0\n\n        stats = full_collection.stats\n        assert stats is not None, \"Collection stats should not be None\"\n        assert stats.doc_count == 0, (\n            f\"Document count should be 0, but got {stats.doc_count}\"\n        )\n\n    @pytest.mark.parametrize(\"doc_num\", [1, 5, Maximum])\n    def test_empty_collection_update_batch(self, full_collection: Collection, doc_num):\n        multiple_update_docs = [\n            generate_update_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        result = full_collection.update(multiple_update_docs)\n        assert len(result) == len(multiple_update_docs), (\n            f\"Expected {len(multiple_update_docs)} results, but got {len(result)}\"\n        )\n        for item in result:\n            assert item.code() == StatusCode.NOT_FOUND, (\n                f\"Update operation should fail with NOT_FOUND, but got code {item.code()}\"\n            )\n\n        stats = full_collection.stats\n        assert stats is not None, \"Collection stats should not be None\"\n        assert stats.doc_count == 0, (\n            f\"Document count should be 0, but got {stats.doc_count}\"\n        )\n\n        doc_ids = [doc.id for doc in multiple_update_docs]\n        fetched_docs = full_collection.fetch(doc_ids)\n        assert len(fetched_docs) == 0\n\n    @pytest.mark.parametrize(\"field_name, field_values\", FIELD_VALUE_VALID_LIST)\n    @pytest.mark.parametrize(\n        \"full_schema_new\",\n        [(True, True, HnswIndexParam()), (False, True, HnswIndexParam())],\n        indirect=True,\n    )\n    def test_update_fields_valid(\n        self, full_collection_new: Collection, field_name: str, field_values, request\n    ):\n        for i, field_value in enumerate(field_values):\n            insert_doc = generate_doc(i, full_collection_new.schema)\n            singledoc_and_check(full_collection_new, insert_doc, is_delete=0)\n            update_doc_fields, update_doc_vectors = generate_vectordict_random(\n                full_collection_new.schema\n            )\n            full_schema_params = request.getfixturevalue(\"full_schema_new\")\n            target_field = None\n            for field in full_schema_params.fields:\n                if field.name == field_name:\n                    target_field = field\n                    break\n            update_doc_fields[field_name] = field_value\n            update_doc = Doc(\n                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors\n            )\n            if target_field and not target_field.nullable and field_value is None:\n                with pytest.raises(Exception) as exc_info:\n                    update_doc_fields[field_name] = field_value\n                    full_collection_new.update(update_doc)\n                assert exc_info.value is not None\n                full_collection_new.delete(insert_doc.id)\n            else:\n                singledoc_and_check(\n                    full_collection_new, update_doc, operator=\"update\", is_delete=1\n                )\n\n    @pytest.mark.parametrize(\"field_name, field_values\", FIELD_VALUE_INVALID_LIST)\n    def test_update_fields_invalid(\n        self, full_collection: Collection, field_name: str, field_values\n    ):\n        for i, field_value in enumerate(field_values):\n            insert_doc = generate_doc(i, full_collection.schema)\n            singledoc_and_check(full_collection, insert_doc, is_delete=0)\n            update_doc_fields, update_doc_vectors = generate_vectordict_random(\n                full_collection.schema\n            )\n            update_doc_fields[field_name] = field_value\n            update_doc = Doc(\n                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors\n            )\n\n            with pytest.raises(Exception) as exc_info:\n                full_collection.update(update_doc)\n\n            assert exc_info.value is not None\n            full_collection.delete(insert_doc.id)\n            stats = full_collection.stats\n            assert stats is not None\n            assert stats.doc_count == 0\n\n    @pytest.mark.parametrize(\"vector_field, vector_values\", VECTOR_VALUE_VALID_LIST)\n    def test_update_doc_vector_valid(\n        self,\n        full_collection: Collection,\n        collection_temp_dir,\n        collection_option,\n        vector_field: str,\n        vector_values,\n    ):\n        for i, vector_value in enumerate(vector_values):\n            insert_doc = generate_doc(i, full_collection.schema)\n            singledoc_and_check(full_collection, insert_doc, is_delete=0)\n            update_doc_fields, update_doc_vectors = generate_vectordict_random(\n                full_collection.schema\n            )\n            update_doc_vectors[vector_field] = vector_value\n            update_doc = Doc(\n                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors\n            )\n            singledoc_and_check(full_collection, update_doc, operator=\"update\")\n\n    @pytest.mark.parametrize(\"vector_field, vector_values\", VECTOR_VALUE_INVALID_LIST)\n    def test_update_doc_vector_invalid(\n        self,\n        full_collection: Collection,\n        collection_temp_dir,\n        collection_option,\n        vector_field: str,\n        vector_values,\n    ):\n        for i, vector_value in enumerate(vector_values):\n            insert_doc = generate_doc(i, full_collection.schema)\n            singledoc_and_check(full_collection, insert_doc, is_delete=0)\n            update_doc_fields, update_doc_vectors = generate_vectordict_random(\n                full_collection.schema\n            )\n            update_doc_vectors[vector_field] = vector_value\n            update_doc = Doc(\n                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors\n            )\n            with pytest.raises(Exception) as exc_info:\n                full_collection.update(update_doc)\n            assert exc_info.value is not None\n            full_collection.delete(insert_doc.id)\n            stats = full_collection.stats\n            assert stats is not None\n            assert stats.doc_count == 0\n\n    @pytest.mark.parametrize(\n        \"update_type, fields_to_update, vectors_to_update\", UPDATE_PARTIAL_VALUE\n    )\n    def test_update_partial_fields(\n        self,\n        full_collection: Collection,\n        collection_temp_dir,\n        collection_option,\n        update_type: str,\n        fields_to_update: dict,\n        vectors_to_update: dict,\n        doc_id=1,\n    ):\n        insert_doc = generate_doc(doc_id, full_collection.schema)\n        singledoc_and_check(full_collection, insert_doc, is_delete=0)\n\n        update_doc_fields, update_doc_vectors = insert_doc.fields, insert_doc.vectors\n        for k, v in fields_to_update.items():\n            update_doc_fields[k] = v\n        for k, v in vectors_to_update.items():\n            update_doc_vectors[k] = v\n\n        update_doc_full = Doc(\n            id=str(doc_id), fields=update_doc_fields, vectors=update_doc_vectors\n        )\n\n        update_doc_partial = Doc(\n            id=str(doc_id), fields=fields_to_update, vectors=vectors_to_update\n        )\n\n        updatedoc_partial_check(\n            full_collection,\n            update_doc_partial,\n            update_doc_full,\n            operator=\"update\",\n            is_delete=1,\n        )\n\n\nclass TestCollectionUpsert:\n    def test_new_doc_upsert(self, full_collection: Collection):\n        single_doc = generate_doc(1, full_collection.schema)\n        singledoc_and_check(full_collection, single_doc, operator=\"upsert\", is_delete=1)\n\n    @pytest.mark.parametrize(\"doc_num\", [1, 5, Maximum])\n    def test_new_doc_upsert_batch(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"upsert\")\n\n    def test_existing_doc_upsert(self, full_collection: Collection):\n        insert_doc = generate_doc(1, full_collection.schema)\n        singledoc_and_check(full_collection, insert_doc, is_delete=0)\n        updated_doc = generate_update_doc(1, full_collection.schema)\n        singledoc_and_check(full_collection, updated_doc, operator=\"upsert\")\n\n    @pytest.mark.parametrize(\"doc_id\", DOCID_VALID_LIST)\n    def test_upsert_docid_valid(self, full_collection: Collection, doc_id):\n        upsert_doc = generate_doc_random(doc_id, full_collection.schema)\n        singledoc_and_check(full_collection, upsert_doc, operator=\"upsert\", is_delete=1)\n\n    @pytest.mark.parametrize(\"doc_id\", DOCID_INVALID_LIST)\n    def test_upsert_docid_invalid(self, full_collection: Collection, doc_id):\n        upsert_doc = generate_doc_random(doc_id, full_collection.schema)\n\n        with pytest.raises(Exception) as exc_info:\n            full_collection.upsert(upsert_doc)\n\n        assert exc_info.value is not None\n\n        stats = full_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n    @pytest.mark.parametrize(\"field_name, field_values\", FIELD_VALUE_VALID_LIST)\n    @pytest.mark.parametrize(\n        \"full_schema_new\",\n        [(True, True, HnswIndexParam()), (False, True, HnswIndexParam())],\n        indirect=True,\n    )\n    def test_upsert_fields_valid(\n        self, full_collection_new: Collection, field_name: str, field_values, request\n    ):\n        for i, field_value in enumerate(field_values):\n            doc_id = str(field_value) if field_name == \"id\" else str(i)\n            doc_fields, doc_vectors = generate_vectordict_random(\n                full_collection_new.schema\n            )\n\n            full_schema_params = request.getfixturevalue(\"full_schema_new\")\n            target_field = None\n            for field in full_schema_params.fields:\n                if field.name == field_name:\n                    target_field = field\n                    break\n            doc_fields[field_name] = field_value\n            upsert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)\n            if target_field and not target_field.nullable and field_value is None:\n                with pytest.raises(Exception) as exc_info:\n                    full_collection_new.upsert(upsert_doc)\n                assert exc_info.value is not None\n            else:\n                singledoc_and_check(\n                    full_collection_new, upsert_doc, operator=\"upsert\", is_delete=1\n                )\n\n    @pytest.mark.parametrize(\"field_name, field_values\", FIELD_VALUE_INVALID_LIST)\n    def test_upsert_fields_invalid(\n        self, full_collection: Collection, field_name: str, field_values\n    ):\n        for i, field_value in enumerate(field_values):\n            doc_id = str(field_value) if field_name == \"id\" else str(i)\n            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n            doc_fields[field_name] = field_value\n            upsert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)\n\n            with pytest.raises(Exception) as exc_info:\n                full_collection.upsert(upsert_doc)\n            assert exc_info.value is not None\n            stats = full_collection.stats\n            assert stats is not None\n            assert stats.doc_count == 0\n\n    @pytest.mark.parametrize(\"vector_field, vector_values\", VECTOR_VALUE_VALID_LIST)\n    def test_upsert_vector_valid(\n        self, full_collection: Collection, vector_field: str, vector_values\n    ):\n        for i, vector_value in enumerate(vector_values):\n            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n\n            doc_vectors[vector_field] = vector_value\n\n            upsert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)\n\n            singledoc_and_check(\n                full_collection, upsert_doc, operator=\"upsert\", is_delete=1\n            )\n\n    @pytest.mark.parametrize(\"vector_field, vector_values\", VECTOR_VALUE_INVALID_LIST)\n    def test_upsert_vector_invalid(\n        self, full_collection: Collection, vector_field: str, vector_values\n    ):\n        for i, vector_value in enumerate(vector_values):\n            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n            doc_vectors[vector_field] = vector_value\n            upsert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)\n            with pytest.raises(Exception) as exc_info:\n                full_collection.upsert(upsert_doc)\n\n            assert exc_info.value is not None\n            stats = full_collection.stats\n            assert stats is not None\n            assert stats.doc_count == 0\n\n\nclass TestCollectionDelete:\n    @pytest.mark.parametrize(\"doc_num\", [1, 5, Maximum])\n    def test_delete_batch(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        doc_ids = [doc.id for doc in multiple_docs]\n        result = full_collection.delete(doc_ids)\n        assert len(result) == len(doc_ids)\n        for item in result:\n            assert item.ok()\n\n    def test_delete_non_exist(self, full_collection: Collection):\n        result = full_collection.delete(\"non_existing_id\")\n        assert result.code().value == 1\n        assert result.code() == StatusCode.NOT_FOUND\n\n    @pytest.mark.parametrize(\"doc_num\", [5])\n    def test_delete_batch_part_non_exist(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        doc_ids = [doc.id for doc in multiple_docs]\n        doc_ids.extend([str(doc_num), str(doc_num + 1)])\n        result = full_collection.delete(doc_ids)\n\n        assert len(result) == len(doc_ids)\n        for i in range(len(result)):\n            if i < doc_num:\n                assert result[i].ok()\n            else:\n                assert result[i].code().value == 1\n                assert result[i].code() == StatusCode.NOT_FOUND\n\n    @pytest.mark.parametrize(\"doc_num\", [5])\n    def test_delete_by_filter(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        result = full_collection.delete_by_filter(\"int32_field > 0\")\n        assert result is None\n\n    def test_delete_empty_ids(self, full_collection: Collection):\n        result = full_collection.delete([])\n        assert len(result) == 0\n"
  },
  {
    "path": "python/tests/detail/test_collection_dql.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\nfrom zvec.typing import DataType, StatusCode, MetricType, QuantizeType\nfrom zvec.model import Collection, Doc, VectorQuery\nfrom zvec.model.param import (\n    CollectionOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    FlatIndexParam,\n    IVFIndexParam,\n    HnswQueryParam,\n    IVFQueryParam,\n)\n\n\nfrom zvec.model.schema import FieldSchema, VectorSchema\nfrom zvec.extension import RrfReRanker, WeightedReRanker, QwenReRanker\nfrom distance_helper import *\n\nfrom zvec import StatusCode\nfrom distance_helper import *\nfrom fixture_helper import *\nfrom doc_helper import *\nfrom params_helper import *\n\n\n# ==================== helper ====================\ndef batchdoc_and_check(\n    collection: Collection, multiple_docs, doc_num, operator=\"insert\"\n):\n    if operator == \"insert\":\n        result = collection.insert(multiple_docs)\n    elif operator == \"upsert\":\n        result = collection.upsert(multiple_docs)\n\n    elif operator == \"update\":\n        result = collection.update(multiple_docs)\n    else:\n        logging.error(\"operator value is error!\")\n\n    assert len(result) == len(multiple_docs)\n    for item in result:\n        assert item.ok(), (\n            f\"result={result},Insert operation failed with code {item.code()}\"\n        )\n\n    stats = collection.stats\n    assert stats is not None, \"Collection stats should not be None\"\n    assert stats.doc_count == len(multiple_docs), (\n        f\"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}\"\n    )\n\n    doc_ids = [doc.id for doc in multiple_docs]\n    fetched_docs = collection.fetch(doc_ids)\n    assert len(fetched_docs) == len(multiple_docs), (\n        f\"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}\"\n    )\n\n    for original_doc in multiple_docs:\n        assert original_doc.id in fetched_docs, (\n            f\"Expected document ID {original_doc.id} in fetched documents\"\n        )\n        fetched_doc = fetched_docs[original_doc.id]\n\n        assert is_doc_equal(fetched_doc, original_doc, collection.schema)\n\n        assert hasattr(fetched_doc, \"score\"), \"Document should have a score attribute\"\n        assert fetched_doc.score == 0.0, (\n            \"Fetch operation should return default score of 0.0\"\n        )\n\n    first_doc = multiple_docs[doc_num - 1]\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        query_result = collection.query(\n            VectorQuery(field_name=v, vector=first_doc.vectors[v]),\n            topk=1024,\n            include_vector=True,\n        )\n        assert len(query_result) > 0, (\n            f\"Expected at least 1 query result, but got {len(query_result)}\"\n        )\n\n        found_doc = None\n\n        for doc in query_result:\n            if doc.id == first_doc.id:\n                found_doc = doc\n                break\n        assert found_doc is not None, (\n            f\"Inserted document {first_doc.id} not found in query results\"\n        )\n\n        assert is_doc_equal(found_doc, first_doc, collection.schema)\n        assert hasattr(found_doc, \"score\")\n        assert isinstance(found_doc.score, (int, float))\n\n\ndef batchdoc_and_check_ivf(\n    collection: Collection, multiple_docs, doc_num, operator=\"insert\"\n):\n    if operator == \"insert\":\n        result = collection.insert(multiple_docs)\n    elif operator == \"upsert\":\n        result = collection.upsert(multiple_docs)\n\n    elif operator == \"update\":\n        result = collection.update(multiple_docs)\n    else:\n        logging.error(\"operator value is error!\")\n\n    assert len(result) == len(multiple_docs)\n    for item in result:\n        assert item.ok(), (\n            f\"result={result},Insert operation failed with code {item.code()}\"\n        )\n\n    stats = collection.stats\n    assert stats is not None, \"Collection stats should not be None\"\n    assert stats.doc_count == len(multiple_docs), (\n        f\"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}\"\n    )\n\n    doc_ids = [doc.id for doc in multiple_docs]\n    fetched_docs = collection.fetch(doc_ids)\n    assert len(fetched_docs) == len(multiple_docs), (\n        f\"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}\"\n    )\n\n    for original_doc in multiple_docs:\n        assert original_doc.id in fetched_docs, (\n            f\"Expected document ID {original_doc.id} in fetched documents\"\n        )\n        fetched_doc = fetched_docs[original_doc.id]\n\n        assert is_doc_equal(fetched_doc, original_doc, collection.schema)\n\n        assert hasattr(fetched_doc, \"score\"), \"Document should have a score attribute\"\n        assert fetched_doc.score == 0.0, (\n            \"Fetch operation should return default score of 0.0\"\n        )\n\n    first_doc = multiple_docs[doc_num - 1]\n    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n        if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n            query_result = collection.query(\n                VectorQuery(field_name=v, vector=first_doc.vectors[v]),\n                topk=1024,\n                include_vector=True,\n            )\n            assert len(query_result) > 0, (\n                f\"Expected at least 1 query result, but got {len(query_result)}\"\n            )\n\n            found_doc = None\n\n            for doc in query_result:\n                if doc.id == first_doc.id:\n                    found_doc = doc\n                    break\n            assert found_doc is not None, (\n                f\"Inserted document {first_doc.id} not found in query results\"\n            )\n\n            assert is_doc_equal(found_doc, first_doc, collection.schema)\n            assert hasattr(found_doc, \"score\")\n            assert isinstance(found_doc.score, (int, float))\n\n\ndef single_querydoc_check(\n    multiple_docs,\n    query_result,\n    full_collection: Collection,\n    is_by_vector=0,\n    query_vector=None,\n    data_type=None,\n    vector_name=None,\n    metric_type=MetricType.IP,\n    id_include_vector: bool = False,\n    is_output_fields=0,\n):\n    for original_doc in multiple_docs:\n        for doc in query_result:\n            if doc.id == original_doc.id:\n                found_doc = doc\n                if is_output_fields == 0:\n                    assert is_doc_equal(\n                        found_doc,\n                        original_doc,\n                        full_collection.schema,\n                        True,\n                        id_include_vector,\n                    )\n                assert hasattr(found_doc, \"score\")\n                # assert found_doc.score >= 0.0\n                if not id_include_vector:\n                    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n                        assert found_doc.vector(v) == {}\n                else:\n                    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n                        assert found_doc.vector(v) != {}\n                if is_by_vector:\n                    prev_score = float(\"inf\")\n                    for i, doc in enumerate(query_result):\n                        doc_vector = full_collection.fetch(doc.id)[doc.id].vector(\n                            vector_name\n                        )\n                        expected_score = distance(\n                            query_vector, doc_vector, metric_type, data_type, k\n                        )\n                        if (\n                            full_collection.schema.vector(vector_name).data_type\n                            != DataType.VECTOR_FP16\n                        ):\n                            assert abs(doc.score - expected_score) < 0.001, (\n                                f\"{data_type} {vector_name} :Expected score {expected_score:.6f}, but got {doc.score:.6f} for document {doc.id}\"\n                            )\n                        assert doc.score <= prev_score, (\n                            f\"{data_type} {vector_name} :Scores should be in descending order. Current: {doc.score}, Previous: {prev_score}\"\n                        )\n                        prev_score = doc.score\n\n\ndef multi_querydoc_check(multiple_docs, query_result, full_collection):\n    for original_doc in multiple_docs:\n        for doc in query_result:\n            if doc.id == original_doc.id:\n                found_doc = doc\n                assert is_doc_equal(\n                    found_doc, original_doc, full_collection.schema, False, False\n                )\n                assert hasattr(found_doc, \"score\"), (\n                    \"Document should have a score attribute\"\n                )\n                assert found_doc.score >= 0.0, (\n                    \"Fetch operation should return default score of 0.0\"\n                )\n                for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n                    assert found_doc.vector(v) == {}\n\n\n# ==================== Tests ====================\nclass TestCollectionFetch:\n    def test_fetch_non_existing(self, full_collection: Collection):\n        result = full_collection.fetch(ids=[\"non_existing_id1\", \"non_existing_id2\"])\n        assert len(result) == 0\n\n    @pytest.mark.parametrize(\"doc_num\", [3])\n    def test_fetch_partial_non_existing(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        fetch_id_list = [doc.id for doc in multiple_docs]\n        fetch_id_list.append(\"non_existing_id\")\n        result = full_collection.fetch(ids=fetch_id_list)\n\n        assert len(result) == doc_num\n        assert \"non_existing_id\" not in result.keys()\n\n    def test_fetch_empty_ids(self, full_collection: Collection):\n        result = full_collection.fetch(ids=[])\n        assert len(result) == 0, (\n            f\"Expected 0 results for empty ID list, but got {len(result)}\"\n        )\n\n\nclass TestCollectionQuery:\n    @pytest.mark.parametrize(\"doc_num\", [5])\n    def test_query_with_no_condition(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        query_result = full_collection.query()\n        assert len(query_result) == doc_num\n        single_querydoc_check(multiple_docs, query_result, full_collection)\n\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_with_filter_empty(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        result1 = full_collection.query(filter=\"\")\n        assert len(result1) == doc_num\n        single_querydoc_check(multiple_docs, result1, full_collection)\n        result2 = full_collection.query(filter=None)\n        assert len(result2) == doc_num\n        single_querydoc_check(multiple_docs, result2, full_collection)\n        ids1 = set(doc.id for doc in result1)\n        ids2 = set(doc.id for doc in result2)\n        assert ids1 == ids2\n\n    @pytest.mark.parametrize(\"field_name\", [\"int32_field\"])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_with_filter_single_condition(\n        self, full_collection: Collection, doc_num, field_name\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        filter = field_name + \" > 5\"\n        query_result = full_collection.query(filter=filter)\n        assert len(query_result) == doc_num - 6\n\n        returned_doc_ids = set()\n        for doc in query_result:\n            returned_doc_ids.add(doc.id)\n\n        expected_doc_ids = set(str(i) for i in range(6, doc_num))\n\n        for doc in query_result:\n            assert doc.id in expected_doc_ids\n            assert int(doc.field(field_name)) > 5\n\n        single_querydoc_check(multiple_docs, query_result, full_collection)\n\n    @pytest.mark.parametrize(\"field_name\", [\"int32_field\"])\n    @pytest.mark.parametrize(\n        \"filter\",\n        [\n            \"int32_field > 3 and int32_field < 9\",\n            \"int32_field >= 5 and int32_field <= 7\",\n        ],\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_with_filter_and(\n        self, full_collection: Collection, doc_num, field_name, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        filter = field_name + \" > 3 and \" + field_name + \" < 9\"\n        query_result = full_collection.query(filter=filter)\n        if filter == \"int32_field > 3 and int32_field < 9\":\n            assert len(query_result) == doc_num - 4 - 1\n            expected_doc_ids = set(str(i) for i in range(4, 9))\n\n            for doc in query_result:\n                assert doc.id in expected_doc_ids\n                field_value = int(doc.field(field_name))\n                assert field_value > 3 and field_value < 9\n        else:\n            assert len(query_result) == 3\n            expected_doc_ids = set(str(i) for i in range(5, 8))\n\n            for doc in query_result:\n                assert doc.id in expected_doc_ids\n                field_value = int(doc.field(field_name))\n                assert field_value >= 5 and field_value <= 7\n\n        single_querydoc_check(multiple_docs, query_result, full_collection)\n\n    @pytest.mark.parametrize(\"field_name\", [\"int32_field\"])\n    @pytest.mark.parametrize(\n        \"filter\",\n        [\n            \"int32_field < 3 or int32_field > 8\",\n            \"int32_field = 3 or int32_field = 7\",\n            \"int32_field <= 3 or int32_field >= 8\",\n        ],\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_with_filter_or(\n        self, full_collection: Collection, doc_num, field_name, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        query_result = full_collection.query(filter=filter)\n        if filter == \"int32_field < 3 or int32_field > 8\":\n            assert len(query_result) == 4\n            expected_doc_ids = set([str(0), str(1), str(2), str(9)])\n            for doc in query_result:\n                assert doc.id in expected_doc_ids\n                field_value = int(doc.field(field_name))\n                assert field_value < 3 or field_value > 8\n        elif filter == \"int32_field = 3 or int32_field = 7\":\n            assert len(query_result) == 2\n            expected_doc_ids = set([str(3), str(7)])\n            for doc in query_result:\n                assert doc.id in expected_doc_ids\n                field_value = int(doc.field(field_name))\n                assert field_value == 3 or field_value == 7\n        else:\n            assert len(query_result) == 6\n            expected_doc_ids = set([str(0), str(1), str(2), str(3), str(8), str(9)])\n            for doc in query_result:\n                assert doc.id in expected_doc_ids\n                field_value = int(doc.field(field_name))\n                assert field_value <= 3 or field_value >= 8\n\n        single_querydoc_check(multiple_docs, query_result, full_collection)\n\n    @pytest.mark.parametrize(\"field_names\", [(\"int32_field\", \"bool_field\")])\n    @pytest.mark.parametrize(\n        \"filter\",\n        [\n            \"(int32_field < 3 or int32_field > 8) and bool_field = false\",\n            \"(int32_field > 2 and int32_field < 5) or (int32_field > 7 and bool_field = true)\",\n        ],\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_with_filter_parentheses(\n        self, full_collection: Collection, doc_num, field_names, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        query_result = full_collection.query(filter=filter)\n        if filter == \"(int32_field < 3 or int32_field > 8) and bool_field = false\":\n            assert len(query_result) == 2\n            expected_doc_ids = set([str(1), str(9)])\n            for doc in query_result:\n                assert doc.id in expected_doc_ids\n                assert (\n                    int(doc.field(field_names[0])) < 3\n                    or int(doc.field(field_names[0])) > 8\n                ) and doc.field(field_names[1]) == False\n        else:\n            assert len(query_result) == 3\n            expected_doc_ids = set([str(3), str(4), str(8)])\n            for doc in query_result:\n                assert doc.id in expected_doc_ids\n                assert (\n                    (\n                        int(doc.field(field_names[0])) > 2\n                        and int(doc.field(field_names[0])) < 5\n                    )\n                    or (doc.field(field_names[0])) > 7\n                    and doc.field(field_names[1]) == True\n                )\n        single_querydoc_check(multiple_docs, query_result, full_collection)\n\n    @pytest.mark.parametrize(\n        \"filter\",\n        [\n            \"int32_field >\",\n            \"int32_field = 'string'\",\n            \"nonexistent_field = 5\",\n            \"int32_field > 5 and\",\n            \"int32_field > > 5\",\n        ],\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_filter_invalid(self, full_collection: Collection, doc_num, filter):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        with pytest.raises(Exception) as exc_info:\n            full_collection.query(filter=filter)\n        if filter in [\"int32_field = 'string'\", \"nonexistent_field = 5\"]:\n            assert \"Analyze sql info failed\" in str(exc_info.value)\n        else:\n            assert \"Invalid filter\" in str(exc_info.value)\n\n    @pytest.mark.parametrize(\"field_name\", [\"int32_field\"])\n    @pytest.mark.parametrize(\"topk_value\", [1, 5, 10, 50, 100, 500, 1000, 1024])\n    def test_query_with_filter_topk_valid(\n        self, full_collection: Collection, topk_value: int, field_name\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(topk_value)\n        ]\n        batchdoc_and_check(\n            full_collection, multiple_docs, topk_value, operator=\"insert\"\n        )\n        filter = (\n            field_name + f\" >={topk_value - 1} and \" + field_name + f\" <={topk_value}\"\n        )\n        print(\"filter:\\n\")\n        print(filter)\n        query_result = full_collection.query(filter=filter, topk=topk_value)\n        assert len(query_result) == 1\n        expected_doc_ids = [str(topk_value - 1)]\n\n        for doc in query_result:\n            assert doc.id in expected_doc_ids\n            field_value = int(doc.field(field_name))\n            assert field_value >= topk_value - 1 and field_value <= topk_value\n        single_querydoc_check(multiple_docs, query_result, full_collection)\n\n    @pytest.mark.parametrize(\"field_name\", [\"int32_field\"])\n    @pytest.mark.parametrize(\"topk_value\", [1, 5, 10, 50, 100, 500, 1000, 1024])\n    def test_query_without_filter_topk_valid(\n        self, full_collection: Collection, topk_value: int, field_name\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(topk_value)\n        ]\n        batchdoc_and_check(\n            full_collection, multiple_docs, topk_value, operator=\"insert\"\n        )\n\n        query_result = full_collection.query(topk=topk_value)\n        assert len(query_result) == topk_value\n        single_querydoc_check(multiple_docs, query_result, full_collection)\n\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_with_include_vector(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        query_result = full_collection.query(include_vector=True)\n        assert len(query_result) > 0\n        single_querydoc_check(\n            multiple_docs, query_result, full_collection, id_include_vector=1\n        )\n\n    @pytest.mark.parametrize(\"output_fields\", [[\"int32_field\", \"int64_field\"]])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_with_output_fields(\n        self, full_collection: Collection, doc_num, output_fields\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        query_result = full_collection.query(output_fields=output_fields)\n        assert len(query_result) > 0\n        for doc in query_result:\n            field_names = doc.field_names()\n            assert field_names == output_fields\n\n    @pytest.mark.parametrize(\n        \"filter\",\n        [\n            \"int32_field >= 10 and int32_field <= 20\",\n            \"int32_field = 3 and int32_field = 8\",\n        ],\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_empty_result(self, full_collection: Collection, doc_num, filter):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        result = full_collection.query(filter=filter)\n        assert len(result) == 0\n\n    @pytest.mark.parametrize(\n        \"full_schema_new\",\n        [(True, True, HnswIndexParam()), (False, True, FlatIndexParam())],\n        indirect=True,\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_by_id(\n        self, full_collection_new: Collection, doc_num, full_schema_new\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection_new.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(\n            full_collection_new, multiple_docs, doc_num, operator=\"insert\"\n        )\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            query_result = full_collection_new.query(VectorQuery(field_name=v, id=\"1\"))\n            assert len(query_result) > 0\n            query_doc = full_collection_new.fetch(ids=[\"1\"])\n            query_vector = query_doc[\"1\"].vector(v)\n            single_querydoc_check(\n                multiple_docs,\n                query_result,\n                full_collection_new,\n                is_by_vector=1,\n                query_vector=query_vector,\n                data_type=k,\n                vector_name=v,\n            )\n\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_by_id_ivf(self, full_collection_ivf: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check_ivf(\n            full_collection_ivf, multiple_docs, doc_num, operator=\"insert\"\n        )\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n                query_result = full_collection_ivf.query(\n                    VectorQuery(field_name=v, id=\"1\")\n                )\n                assert len(query_result) > 0\n                query_doc = full_collection_ivf.fetch(ids=[\"1\"])\n                query_vector = query_doc[\"1\"].vector(v)\n                single_querydoc_check(\n                    multiple_docs,\n                    query_result,\n                    full_collection_ivf,\n                    is_by_vector=1,\n                    query_vector=query_vector,\n                    data_type=k,\n                    vector_name=v,\n                )\n\n    @pytest.mark.parametrize(\n        \"full_schema_new\",\n        [(True, True, HnswIndexParam()), (False, True, FlatIndexParam())],\n        indirect=True,\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    @pytest.mark.parametrize(\"topk\", [None, 1024])\n    @pytest.mark.parametrize(\"filter\", [None, \"int32_field >= 3 and int32_field <= 7\"])\n    def test_query_by_vector(\n        self, full_collection_new: Collection, doc_num, full_schema_new, topk, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection_new.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(\n            full_collection_new, multiple_docs, doc_num, operator=\"insert\"\n        )\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            doc_fields, doc_vectors = generate_vectordict_random(\n                full_collection_new.schema\n            )\n            query_vector = doc_vectors[v]\n            if topk and filter:\n                query_result = full_collection_new.query(\n                    filter=filter,\n                    vectors=VectorQuery(field_name=v, vector=query_vector),\n                    topk=topk,\n                )\n            elif topk and not filter:\n                query_result = full_collection_new.query(\n                    VectorQuery(field_name=v, vector=query_vector), topk=topk\n                )\n            elif not topk and filter:\n                query_result = full_collection_new.query(\n                    filter=filter,\n                    vectors=VectorQuery(field_name=v, vector=query_vector),\n                )\n            else:\n                query_result = full_collection_new.query(\n                    VectorQuery(field_name=v, vector=query_vector)\n                )\n            assert len(query_result) > 0, (\n                f\"Expected at least 1 query result, but got {len(query_result)}\"\n            )\n            single_querydoc_check(\n                multiple_docs,\n                query_result,\n                full_collection_new,\n                is_by_vector=1,\n                query_vector=query_vector,\n                data_type=k,\n                vector_name=v,\n            )\n\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_by_vector_ivf(self, full_collection_ivf: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check_ivf(\n            full_collection_ivf, multiple_docs, doc_num, operator=\"insert\"\n        )\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n                doc_fields, doc_vectors = generate_vectordict_random(\n                    full_collection_ivf.schema\n                )\n                query_vector = doc_vectors[v]\n                query_result = full_collection_ivf.query(\n                    VectorQuery(field_name=v, vector=query_vector),\n                    topk=1024,\n                )\n                assert len(query_result) > 0, (\n                    f\"Expected at least 1 query result, but got {len(query_result)}\"\n                )\n                single_querydoc_check(\n                    multiple_docs,\n                    query_result,\n                    full_collection_ivf,\n                    is_by_vector=1,\n                    query_vector=query_vector,\n                    data_type=k,\n                    vector_name=v,\n                )\n\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_multivector_rrf(self, full_collection: Collection, doc_num):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n        single_query_results = {}\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            single_query_results[v] = full_collection.query(\n                VectorQuery(field_name=v, vector=doc_vectors[v])\n            )\n        expected_rrf_scores = calculate_multi_vector_rrf_scores(single_query_results)\n        multi_query_vectors = []\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            multi_query_vectors.append(VectorQuery(field_name=v, vector=doc_vectors[v]))\n\n        rrf_reranker = RrfReRanker(topn=3)\n        multi_query_result = full_collection.query(\n            vectors=multi_query_vectors,\n            reranker=rrf_reranker,\n        )\n        assert len(multi_query_result) > 0, (\n            f\"Expected at least 1 result, but got {len(multi_query_result)}\"\n        )\n\n        multi_querydoc_check(multiple_docs, multi_query_result, full_collection)\n\n        prev_score = float(\"inf\")\n        for i, doc in enumerate(multi_query_result):\n            doc_id = doc.id\n            assert doc_id in expected_rrf_scores, (\n                f\"Document {doc_id} should be in expected RRF scores\"\n            )\n            expected_score = expected_rrf_scores[doc_id]\n            actual_score = doc.score\n            assert abs(actual_score - expected_score) < 1e-10, (\n                f\"RRF score mismatch for document {doc_id}: expected {expected_score}, got {actual_score}\"\n            )\n            assert doc.score <= prev_score, (\n                f\"Scores should be in descending order. Current: {doc.score}, Previous: {prev_score}\"\n            )\n            prev_score = doc.score\n\n    @pytest.mark.parametrize(\n        \"weights\",\n        [\n            {\n                \"vector_fp32_field\": 0.3,\n                \"vector_fp16_field\": 0.2,\n                \"vector_int8_field\": 0.3,\n                \"sparse_vector_fp32_field\": 0.1,\n                \"sparse_vector_fp16_field\": 0.1,\n            }\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"metric_type\", [MetricType.L2, MetricType.IP, MetricType.COSINE]\n    )\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_multivector_weighted(\n        self, full_collection: Collection, doc_num, weights, metric_type\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n\n        weighted_reranker = WeightedReRanker(\n            topn=3, weights=weights, metric=MetricType.IP\n        )\n\n        single_query_results = {}\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            single_query_results[v] = full_collection.query(\n                VectorQuery(field_name=v, vector=doc_vectors[v])\n            )\n        expected_weighted_scores = calculate_multi_vector_weighted_scores(\n            single_query_results, weights, MetricType.IP\n        )\n\n        multi_query_vectors = []\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            multi_query_vectors.append(VectorQuery(field_name=v, vector=doc_vectors[v]))\n\n        multi_query_result = full_collection.query(\n            vectors=multi_query_vectors,\n            reranker=weighted_reranker,\n        )\n        assert len(multi_query_result) > 0, (\n            f\"Expected at least 1 result, but got {len(multi_query_result)}\"\n        )\n\n        multi_querydoc_check(multiple_docs, multi_query_result, full_collection)\n\n        prev_score = float(\"inf\")\n        for i, doc in enumerate(multi_query_result):\n            doc_id = doc.id\n            assert doc_id in expected_weighted_scores, (\n                f\"Document {doc_id} should be in expected  scores\"\n            )\n            expected_score = expected_weighted_scores[doc_id]\n            actual_score = doc.score\n            assert abs(actual_score - expected_score) < 1e-10, (\n                f\"score mismatch for document {doc_id}: expected {expected_score}, got {actual_score}\"\n            )\n            assert doc.score <= prev_score, (\n                f\"Scores should be in descending order. Current: {doc.score}, Previous: {prev_score}\"\n            )\n            prev_score = doc.score\n\n    @pytest.mark.parametrize(\"topk\", [5])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    @pytest.mark.parametrize(\"filter\", [\"int32_field >= 3 and int32_field <= 7\"])\n    def test_query_consistency(\n        self, full_collection: Collection, filter, doc_num, topk\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        results = []\n        for i in range(5):\n            query_result = full_collection.query(filter=filter, topk=topk)\n            single_querydoc_check(multiple_docs, query_result, full_collection)\n\n            results.append(query_result)\n        assert len(results) == 5\n        expected_count = len(results[0])\n        for i, result in enumerate(results):\n            assert len(result) == expected_count\n\n        expected_ids = set(doc.id for doc in results[0])\n        for i, result in enumerate(results):\n            result_ids = set(doc.id for doc in result)\n            assert result_ids == expected_ids\n\n        for i, result in enumerate(results):\n            result_ids = [doc.id for doc in result]\n            expected_sorted_ids = sorted(result_ids, key=lambda x: int(x))\n            assert result_ids == expected_sorted_ids\n\n    @pytest.mark.parametrize(\"ef\", [0, 100, 1024, 2048])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    @pytest.mark.parametrize(\"topk\", [1024])\n    @pytest.mark.parametrize(\"filter\", [\"int32_field >= 3 and int32_field <= 7\"])\n    def test_query_vector_with_HnswQueryParam_valid(\n        self,\n        full_collection_new: Collection,\n        doc_num,\n        full_schema_new,\n        topk,\n        filter,\n        ef,\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection_new.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(\n            full_collection_new, multiple_docs, doc_num, operator=\"insert\"\n        )\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            doc_fields, doc_vectors = generate_vectordict_random(\n                full_collection_new.schema\n            )\n            query_vector = doc_vectors[v]\n            query_result = full_collection_new.query(\n                filter=filter,\n                vectors=VectorQuery(\n                    field_name=v, vector=query_vector, param=HnswQueryParam(ef=ef)\n                ),\n                topk=topk,\n            )\n            assert len(query_result) > 0, (\n                f\"Expected at least 1 query result, but got {len(query_result)}\"\n            )\n            single_querydoc_check(\n                multiple_docs,\n                query_result,\n                full_collection_new,\n                is_by_vector=1,\n                query_vector=query_vector,\n                data_type=k,\n                vector_name=v,\n            )\n\n    @pytest.mark.parametrize(\"ef\", [None, \"invalid\", 10.5])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    @pytest.mark.parametrize(\"topk\", [10])\n    @pytest.mark.parametrize(\"filter\", [\"int32_field >= 3 and int32_field <= 7\"])\n    def test_query_vector_with_HnswQueryParam_invalid(\n        self, full_collection: Collection, doc_num, topk, ef, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)\n            query_vector = doc_vectors[v]\n            with pytest.raises(Exception) as exc_info:\n                full_collection.query(\n                    filter=filter,\n                    vectors=VectorQuery(\n                        field_name=v, vector=query_vector, param=HnswQueryParam(ef=ef)\n                    ),\n                    topk=topk,\n                )\n            assert INCOMPATIBLE_CONSTRUCTOR_ERROR_MSG in str(exc_info.value)\n\n    @pytest.mark.parametrize(\"nprobe\", [1, 10, 100, 2048])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    @pytest.mark.parametrize(\"topk\", [10])\n    @pytest.mark.parametrize(\"filter\", [\"int32_field >= 3 and int32_field <= 7\"])\n    def test_query_vector_with_IVFQueryParam_valid(\n        self, full_collection_ivf: Collection, nprobe, doc_num, topk, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check_ivf(\n            full_collection_ivf, multiple_docs, doc_num, operator=\"insert\"\n        )\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            doc_fields, doc_vectors = generate_vectordict_random(\n                full_collection_ivf.schema\n            )\n            if v in [\"vector_fp32_field\"]:\n                query_vector = doc_vectors[v]\n\n                query_result = full_collection_ivf.query(\n                    filter=filter,\n                    vectors=VectorQuery(\n                        field_name=v,\n                        vector=query_vector,\n                        param=IVFQueryParam(nprobe=nprobe),\n                    ),\n                    topk=topk,\n                )\n                assert len(query_result) > 0\n                single_querydoc_check(\n                    multiple_docs,\n                    query_result,\n                    full_collection_ivf,\n                    is_by_vector=1,\n                    query_vector=query_vector,\n                    data_type=k,\n                    vector_name=v,\n                )\n\n    @pytest.mark.parametrize(\"nprobe\", [None, 10.5])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    @pytest.mark.parametrize(\"topk\", [10])\n    @pytest.mark.parametrize(\"filter\", [\"int32_field >= 3 and int32_field <= 7\"])\n    def test_query_vector_with_IVFQueryParam_invalid(\n        self, full_collection_ivf: Collection, nprobe, doc_num, topk, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check_ivf(\n            full_collection_ivf, multiple_docs, doc_num, operator=\"insert\"\n        )\n        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n            doc_fields, doc_vectors = generate_vectordict_random(\n                full_collection_ivf.schema\n            )\n            if v in [\"vector_fp32_field\"]:\n                print(\"v:\\n\")\n                print(v)\n                query_vector = doc_vectors[v]\n                with pytest.raises(Exception) as exc_info:\n                    full_collection_ivf.query(\n                        # filter=filter,\n                        vectors=VectorQuery(\n                            field_name=v,\n                            vector=query_vector,\n                            param=IVFQueryParam(nprobe=nprobe),\n                        ),\n                        topk=topk,\n                    )\n                assert INCOMPATIBLE_CONSTRUCTOR_ERROR_MSG in str(exc_info.value)\n\n    @pytest.mark.parametrize(\"filter\", [\"int32_field >= 3 and int32_field <= 7\"])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_vector_with_param_invalid(\n        self, full_collection: Collection, doc_num, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        with pytest.raises(Exception) as exc_info:\n            for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n                doc_fields, doc_vectors = generate_vectordict_random(\n                    full_collection.schema\n                )\n                query_vector = doc_vectors[v]\n                if v in [\"vector_fp16_field\", \"vector_fp32_field\"]:\n                    full_collection.query(\n                        filter=filter,\n                        vectors=VectorQuery(\n                            field_name=v, vector=query_vector, param=HnswIndexParam()\n                        ),\n                    )\n        assert INCOMPATIBLE_FUNCTION_ERROR_MSG in str(exc_info.value)\n\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    @pytest.mark.parametrize(\n        \"test_case_name,vector_query,expected_error_msg\",\n        [\n            (\n                \"Non-existent vector field name\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"nonexistent_vector\", vector=ref_dense_vector\n                ),\n                \"Expected exception for non-existent vector field name\",\n            ),\n            (\n                \"Invalid vector data type for dense vector (string instead of list)\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"vector_fp32_field\", vector=\"invalid_vector_data\"\n                ),\n                \"Expected exception for invalid dense vector data type\",\n            ),\n            (\n                \"Invalid vector data type for sparse vector (list instead of dict)\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"sparse_fp32\", vector=[1.0, 2.0, 3.0]\n                ),\n                \"Expected exception for invalid sparse vector data type\",\n            ),\n            (\n                \"Empty vector data for dense vector\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"vector_fp32_field\", vector=[]\n                ),\n                \"Expected exception for empty dense vector data\",\n            ),\n            (\n                \"Invalid dimension for dense vector\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"vector_fp32_field\", vector=[1.0, 2.0]\n                ),  # Only 2 dimensions instead of 128\n                \"Expected exception for invalid dense vector dimension\",\n            ),\n            (\n                \"Non-existent document ID for by_id query\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"vector_fp32_field\", id=\"999\"\n                ),  # Non-existent ID\n                \"Expected exception for non-existent document ID\",\n            ),\n            (\n                \"Both vector and id specified (invalid combination)\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"vector_fp32_field\", vector=ref_dense_vector, id=\"5\"\n                ),\n                \"Expected exception for specifying both vector and id\",\n            ),\n            (\n                \"Neither vector nor id specified\",\n                lambda ref_dense_vector: VectorQuery(\n                    field_name=\"vector_fp32_field\"\n                ),  # Neither vector nor id\n                \"Expected exception for specifying neither vector nor id\",\n            ),\n        ],\n    )\n    def test_query_vector_with_vectors_invalid(\n        self,\n        full_collection: Collection,\n        doc_num,\n        test_case_name,\n        vector_query,\n        expected_error_msg,\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n        ref_doc_result = full_collection.fetch(ids=[\"5\"])\n        assert \"5\" in ref_doc_result\n        ref_doc = ref_doc_result[\"5\"]\n        ref_dense_vector = ref_doc.vector(\"vector_fp32_field\")\n\n        with pytest.raises(Exception) as exc_info:\n            full_collection.query(vectors=[vector_query(ref_dense_vector)])\n        assert exc_info.value is not None, expected_error_msg\n\n    @pytest.mark.parametrize(\"filter\", [\"int32_field >= 3 and int32_field <= 7\"])\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_query_invalid_param_incompatible_type(\n        self, full_collection: Collection, doc_num, filter\n    ):\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(doc_num)\n        ]\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        with pytest.raises(Exception) as exc_info:\n            for k, v in DEFAULT_VECTOR_FIELD_NAME.items():\n                doc_fields, doc_vectors = generate_vectordict_random(\n                    full_collection.schema\n                )\n                query_vector = doc_vectors[v]\n                full_collection.query(\n                    filter=filter,\n                    vectors=VectorQuery(field_name=v, vector=query_vector),\n                    param=HnswIndexParam(),\n                    topk=3,\n                )\n\n        assert \"query() got an unexpected keyword argument 'param'\" in str(\n            exc_info.value\n        )\n\n\nclass TestRRFScoreCalculation:\n    class MockDoc:\n        def __init__(self, id, score=0.0):\n            self._id = id\n            self._score = score\n\n        @property\n        def id(self):\n            return self._id\n\n        @property\n        def score(self):\n            return self._score\n\n        @score.setter\n        def score(self, score):\n            self._score = score\n\n    def test_rrf_score_calculation_formula(self):\n        k = 60\n\n        assert abs(calculate_rrf_score(0, k) - 1.0 / 61) < 1e-10, (\n            \"RRF score for rank 0 should be 1/61\"\n        )\n        assert abs(calculate_rrf_score(1, k) - 1.0 / 62) < 1e-10, (\n            \"RRF score for rank 1 should be 1/62\"\n        )\n        assert abs(calculate_rrf_score(2, k) - 1.0 / 63) < 1e-10, (\n            \"RRF score for rank 2 should be 1/63\"\n        )\n        assert abs(calculate_rrf_score(10, k) - 1.0 / 71) < 1e-10, (\n            \"RRF score for rank 10 should be 1/71\"\n        )\n\n        k = 10\n        assert abs(calculate_rrf_score(0, k) - 1.0 / 11) < 1e-10, (\n            \"RRF score for rank 0 with k=10 should be 1/11\"\n        )\n        assert abs(calculate_rrf_score(1, k) - 1.0 / 12) < 1e-10, (\n            \"RRF score for rank 1 with k=10 should be 1/12\"\n        )\n\n    def test_multi_vector_rrf_scores(self):\n        query1_results = [self.MockDoc(\"1\"), self.MockDoc(\"2\"), self.MockDoc(\"3\")]\n        query2_results = [self.MockDoc(\"3\"), self.MockDoc(\"1\"), self.MockDoc(\"4\")]\n        query3_results = [self.MockDoc(\"2\"), self.MockDoc(\"4\"), self.MockDoc(\"5\")]\n        query_results = {\n            \"vector1\": query1_results,\n            \"vector2\": query2_results,\n            \"vector3\": query3_results,\n        }\n        rrf_scores = calculate_multi_vector_rrf_scores(query_results, k=60)\n\n        expected_doc1_score = 1.0 / 61 + 1.0 / 62\n        assert abs(rrf_scores[\"1\"] - expected_doc1_score) < 1e-10, (\n            f\"RRF score for doc1 mismatch: expected {expected_doc1_score}, got {rrf_scores['1']}\"\n        )\n        expected_doc2_score = 1.0 / 62 + 1.0 / 61\n        assert abs(rrf_scores[\"2\"] - expected_doc2_score) < 1e-10, (\n            f\"RRF score for doc2 mismatch: expected {expected_doc2_score}, got {rrf_scores['2']}\"\n        )\n        expected_doc3_score = 1.0 / 63 + 1.0 / 61\n        assert abs(rrf_scores[\"3\"] - expected_doc3_score) < 1e-10, (\n            f\"RRF score for doc3 mismatch: expected {expected_doc3_score}, got {rrf_scores['3']}\"\n        )\n        expected_doc4_score = 1.0 / 63 + 1.0 / 62\n        assert abs(rrf_scores[\"4\"] - expected_doc4_score) < 1e-10, (\n            f\"RRF score for doc4 mismatch: expected {expected_doc4_score}, got {rrf_scores['4']}\"\n        )\n\n        expected_doc5_score = 1.0 / 63\n        assert abs(rrf_scores[\"5\"] - expected_doc5_score) < 1e-10, (\n            f\"RRF score for doc5 mismatch: expected {expected_doc5_score}, got {rrf_scores['5']}\"\n        )\n        sorted_scores = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)\n        expected_order = [\"1\", \"2\", \"3\", \"4\", \"5\"]\n        actual_order = [item[0] for item in sorted_scores]\n        assert actual_order == expected_order, (\n            f\"RRF score ranking mismatch: expected {expected_order}, got {actual_order}\"\n        )\n\n\nclass TestCollectionConcurrencyOperations:\n    @pytest.mark.parametrize(\"doc_num\", [10])\n    def test_concurrent_insert_update_upsert_query(\n        self, full_collection: Collection, doc_num\n    ):\n        import threading\n\n        results = []\n        errors = []\n        multiple_docs = [\n            generate_doc(i, full_collection.schema) for i in range(1000, 1010)\n        ]\n\n        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator=\"insert\")\n\n        def insert_operation(thread_id):\n            try:\n                multiple_docs = [\n                    generate_doc(i, full_collection.schema)\n                    for i in range(thread_id, thread_id + 5)\n                ]\n                result = full_collection.insert(multiple_docs)\n                results.append((\"insert\", thread_id, len(result)))\n            except Exception as e:\n                errors.append((\"insert\", thread_id, str(e)))\n\n        def update_operation(thread_id):\n            try:\n                multiple_docs = [\n                    generate_doc_random(i, full_collection.schema)\n                    for i in range(1000, 1001)\n                ]\n                result = full_collection.update(multiple_docs)\n                results.append((\"update\", thread_id, len(result)))\n            except Exception as e:\n                errors.append((\"update\", thread_id, str(e)))\n\n        def upsert_operation(thread_id):\n            try:\n                multiple_docs = [\n                    generate_doc(i, full_collection.schema)\n                    for i in range(thread_id, thread_id + 5)\n                ]\n                result = full_collection.upsert(multiple_docs)\n                results.append((\"upsert\", thread_id, len(result)))\n            except Exception as e:\n                errors.append((\"upsert\", thread_id, str(e)))\n\n        def query_operation(thread_id):\n            try:\n                if thread_id % 3 == 0:\n                    result = full_collection.query(filter=\"int32_field > 1\", topk=5)\n                elif thread_id % 3 == 1:\n                    result = full_collection.query(filter=\"bool_field = true\", topk=3)\n                else:\n                    query_vector = [0.1] * 128\n                    result = full_collection.query(\n                        VectorQuery(\n                            field_name=\"vector_fp32_field\", vector=query_vector\n                        ),\n                        topk=3,\n                    )\n\n                results.append((\"query\", thread_id, len(result)))\n            except Exception as e:\n                errors.append((\"query\", thread_id, str(e)))\n\n        def delete_operation(thread_id):\n            try:\n                # Delete some existing documents\n                delete_ids = (\n                    [f\"{thread_id + 1}\", f\"{thread_id + 2}\"]\n                    if thread_id < 5\n                    else [f\"{thread_id % 5 + 1}\"]\n                )\n                result = full_collection.delete(delete_ids)\n                results.append((\"delete\", thread_id, len(result)))\n            except Exception as e:\n                errors.append((\"delete\", thread_id, str(e)))\n\n        threads = []\n        for i in range(1):\n            thread = threading.Thread(target=insert_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n        for i in range(1):\n            thread = threading.Thread(target=update_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n        for i in range(1):\n            thread = threading.Thread(target=upsert_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n        for i in range(1):\n            thread = threading.Thread(target=query_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n        for i in range(1):\n            thread = threading.Thread(target=delete_operation, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        for thread in threads:\n            thread.join()\n\n        insert_results = [r for r in results if r[0] == \"insert\"]\n        update_results = [r for r in results if r[0] == \"update\"]\n        upsert_results = [r for r in results if r[0] == \"upsert\"]\n        query_results = [r for r in results if r[0] == \"query\"]\n        delete_results = [r for r in results if r[0] == \"delete\"]\n\n        assert (\n            len(insert_results)\n            + len(update_results)\n            + len(upsert_results)\n            + len(query_results)\n            + len(delete_results)\n            > 0\n        ), f\"No operations succeeded. Errors: {errors}\"\n\n        critical_errors = [\n            e for e in errors if \"critical\" in e[2].lower() or \"fatal\" in e[2].lower()\n        ]\n        assert len(critical_errors) == 0, f\"Critical errors occurred: {critical_errors}\"\n"
  },
  {
    "path": "python/tests/detail/test_collection_exception.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\nimport logging\nimport pytest\nimport numpy as np\nimport zvec\n\nfrom zvec import (\n    CollectionOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    DataType,\n    Collection,\n    Doc,\n    FieldSchema,\n    VectorSchema,\n    VectorQuery,\n)\n\n\nclass TestCollectionExceptionHandling:\n    @pytest.fixture(scope=\"function\")\n    def test_collection(self, tmp_path_factory):\n        \"\"\"Fixture to create a test collection\"\"\"\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n                FieldSchema(\"weight\", DataType.FLOAT, nullable=True),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                ),\n                VectorSchema(\n                    \"sparse\", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()\n                ),\n            ],\n        )\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        coll = zvec.create_and_open(\n            path=str(collection_path),\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        assert coll is not None, \"Failed to create and open collection\"\n\n        yield coll\n\n        # Clean up\n        if hasattr(coll, \"destroy\") and coll is not None:\n            try:\n                coll.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n    def test_create_and_open_missing_path(self, tmp_path_factory):\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ],\n        )\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.create_and_open(\n                schema=collection_schema, option=collection_option\n            )\n        assert exc_info.value is not None, (\n            \"Expected exception for missing path parameter\"\n        )\n\n    def test_create_and_open_missing_schema(self, tmp_path_factory):\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.create_and_open(\n                path=str(collection_path), option=collection_option\n            )\n        assert exc_info.value is not None, (\n            \"Expected exception for missing schema parameter\"\n        )\n\n    def test_open_missing_path(self):\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        with pytest.raises(Exception) as exc_info:\n            coll = zvec.open(option=collection_option)\n        assert exc_info.value is not None, (\n            \"Expected exception for missing path parameter\"\n        )\n\n    def test_insert_missing_docs(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            result = test_collection.insert()\n        assert exc_info.value is not None, (\n            \"Expected exception for missing docs parameter\"\n        )\n\n    def test_update_missing_docs(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            result = test_collection.update()\n        assert exc_info.value is not None, (\n            \"Expected exception for missing docs parameter\"\n        )\n\n    def test_upsert_missing_docs(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            result = test_collection.upsert()\n        assert exc_info.value is not None, (\n            \"Expected exception for missing docs parameter\"\n        )\n\n    def test_delete_missing_ids(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            result = test_collection.delete()\n        assert exc_info.value is not None, (\n            \"Expected exception for missing ids parameter\"\n        )\n\n    def test_fetch_missing_ids(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            result = test_collection.fetch()\n        assert exc_info.value is not None, (\n            \"Expected exception for missing ids parameter\"\n        )\n\n    def test_query_missing_vectorquery_field_name(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            result = test_collection.query(vectors=[VectorQuery()])\n        assert exc_info.value is not None, (\n            \"Expected exception for missing VectorQuery field_name parameter\"\n        )\n\n    def test_add_column_missing_field_schema(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            test_collection.add_column()\n        assert exc_info.value is not None, (\n            \"Expected exception for missing field_schema parameter\"\n        )\n\n    def test_alter_column_missing_old_name(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            test_collection.alter_column(new_name=\"new_name\")\n        assert exc_info.value is not None, (\n            \"Expected exception for missing old_name parameter\"\n        )\n\n    def test_alter_column_missing_new_name(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            test_collection.alter_column(old_name=\"old_name\")\n        assert exc_info.value is not None, (\n            \"Expected exception for missing new_name parameter\"\n        )\n\n    def test_drop_column_missing_field_name(self, test_collection: Collection):\n        with pytest.raises(Exception) as exc_info:\n            test_collection.drop_column()\n        assert exc_info.value is not None, (\n            \"Expected exception for missing field_name parameter\"\n        )\n\n    def test_invalid_parameter_types(self, test_collection: Collection):\n        # This test depends on specific implementation details\n        # Generally, we would expect TypeErrors or similar exceptions\n        pass\n\n    def test_missing_required_parameters(self, test_collection: Collection):\n        # This test depends on specific implementation details\n        # Generally, we would expect TypeErrors or similar exceptions\n        pass\n\n    def test_empty_collection_operations(self, tmp_path_factory):\n        collection_schema = zvec.CollectionSchema(\n            name=\"empty_test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ],\n        )\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"empty_test_collection\"\n\n        coll = zvec.create_and_open(\n            path=str(collection_path),\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        assert coll is not None, \"Failed to create and open collection\"\n\n        # Test fetch on empty collection\n        result = coll.fetch([\"1\"])\n        assert len(result) >= 0  # May be empty or have special handling\n\n        # Test query on empty collection\n        result = coll.query()\n        assert len(result) == 0\n\n        # Test update on empty collection\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\"},\n            vectors={\"dense\": np.random.random(128).tolist()},\n        )\n\n        result = coll.update(doc)\n        # Should handle gracefully, possibly with NOT_FOUND status\n\n        # Clean up\n        if hasattr(coll, \"destroy\") and coll is not None:\n            try:\n                coll.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n    def test_resource_management(self, test_collection: Collection):\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\", \"weight\": 80.5},\n            vectors={\n                \"dense\": np.random.random(128).tolist(),\n                \"sparse\": {1: 1.0, 2: 2.0},\n            },\n        )\n\n        # Insert\n        result = test_collection.insert(doc)\n        assert result.ok()\n\n        # Fetch\n        result = test_collection.fetch([\"1\"])\n        assert len(result) == 1\n\n        # Query\n        result = test_collection.query()\n        assert len(result) >= 0\n\n        # Update\n        result = test_collection.update(doc)\n        assert result.ok()\n\n        # Delete\n        result = test_collection.delete(\"1\")\n        assert result.ok()\n\n    def test_exception_resource_cleanup(self, test_collection: Collection):\n        # This test would need to simulate exception conditions\n        # which is difficult without specific failure injection points\n        pass\n"
  },
  {
    "path": "python/tests/detail/test_collection_open.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\nimport threading\nimport numpy as np\n\nfrom fixture_helper import *\n\nCOLLECTION_OPTION_TEST_CASES_VALID = [\n    # (read_only, enable_mmap, description)\n    (False, True, \"Read-write with mmap enabled\"),\n    (False, False, \"Read-write with mmap disabled\"),\n    (True, True, \"Read-only with mmap enabled\"),\n    (True, False, \"Read-only with mmap disabled\"),\n]\n\n# Test data for invalid paths\nINVALID_PATH_LIST = [\n    \"/nonexistent/directory/test_collection\",\n    \"invalid:path\",\n    \"\",  # Empty path\n]\n\n\n@pytest.fixture(scope=\"session\")\ndef collection_schema():\n    return zvec.CollectionSchema(\n        name=\"test_collection\",\n        fields=[\n            FieldSchema(\n                \"id\",\n                DataType.INT64,\n                nullable=False,\n                index_param=InvertIndexParam(enable_range_optimization=True),\n            ),\n            FieldSchema(\n                \"name\", DataType.STRING, nullable=False, index_param=InvertIndexParam()\n            ),\n            FieldSchema(\n                \"weight\", DataType.FLOAT, nullable=False, index_param=InvertIndexParam()\n            ),\n        ],\n        vectors=[\n            VectorSchema(\n                \"dense\",\n                DataType.VECTOR_FP32,\n                dimension=128,\n                index_param=HnswIndexParam(),\n            ),\n            VectorSchema(\n                \"sparse\", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()\n            ),\n        ],\n    )\n\n\n@pytest.fixture\ndef single_doc():\n    id = 0\n    return Doc(\n        id=f\"{id}\",\n        fields={\"id\": id, \"name\": \"test\"},\n        vectors={\n            \"dense\": [id + 0.1] * 128,\n        },\n    )\n\n\n@pytest.fixture(scope=\"function\")\ndef test_collection(\n    tmp_path_factory, collection_schema, collection_option\n) -> Generator[Any, Any, Collection]:\n    temp_dir = tmp_path_factory.mktemp(\"zvec\")\n    collection_path = temp_dir / \"test_collection\"\n\n    coll = zvec.create_and_open(\n        path=str(collection_path), schema=collection_schema, option=collection_option\n    )\n\n    assert coll is not None, \"Failed to create and open collection\"\n    assert coll.path == str(collection_path)\n    assert coll.schema.name == collection_schema.name\n    assert list(coll.schema.fields) == list(collection_schema.fields)\n    assert list(coll.schema.vectors) == list(collection_schema.vectors)\n    assert coll.option.read_only == collection_option.read_only\n    assert coll.option.enable_mmap == collection_option.enable_mmap\n\n    try:\n        yield coll\n    finally:\n        if hasattr(coll, \"destroy\") and coll is not None:\n            try:\n                coll.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n\nclass TestCollectionOpen:\n    def test_open_basic_functionality(\n        self, tmp_path_factory, collection_schema, collection_option\n    ):\n        import sys\n        import time\n        import os\n\n        # Create unique temp directory\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        # Ensure the path exists\n        collection_path_str = str(collection_path)\n        print(f\"DEBUG: Collection path: {collection_path_str}\")\n        print(f\"DEBUG: Temp directory exists: {temp_dir.exists()}\")\n\n        # Create and open collection first\n        created_coll = zvec.create_and_open(\n            path=collection_path_str, schema=collection_schema, option=collection_option\n        )\n\n        assert created_coll is not None, (\n            f\"Failed to create collection, returned None instead of valid Collection object. Path: {collection_path_str}\"\n        )\n        assert created_coll.path == collection_path_str, (\n            f\"Collection path mismatch. Expected: {collection_path_str}, Actual: {created_coll.path}\"\n        )\n        assert created_coll.schema.name == \"test_collection\", (\n            f\"Collection schema name mismatch. Expected: test_collection, Actual: {created_coll.schema.name}\"\n        )\n\n        # Insert multiple documents to verify persistence\n        docs = []\n        for i in range(3):\n            doc = Doc(\n                id=f\"{i}\",\n                fields={\"id\": i, \"name\": f\"test_{i}\", \"weight\": float(i * 10)},\n                vectors={\n                    \"dense\": [float(j + i) for j in range(128)],\n                    \"sparse\": {j: float(j + i) for j in range(5)},\n                },\n            )\n            docs.append(doc)\n\n        result = created_coll.insert(docs)\n        assert len(result) == 3, f\"Expected 3 insertion results, but got {len(result)}\"\n        for i, res in enumerate(result):\n            assert res.ok(), (\n                f\"Insertion result {i} is not OK. Status code: {res.code()}, Message: {res.message()}\"\n            )\n\n        # Verify documents were inserted using fetch interface\n        fetched_docs_after_insert = created_coll.fetch([\"0\", \"1\", \"2\"])\n        assert len(fetched_docs_after_insert) == 3, (\n            f\"Expected 3 fetched documents after insertion, but got {len(fetched_docs_after_insert)}\"\n        )\n        assert \"0\" in fetched_docs_after_insert, (\n            \"Document with ID '0' not found in fetched results after insertion\"\n        )\n        assert \"1\" in fetched_docs_after_insert, (\n            \"Document with ID '1' not found in fetched results after insertion\"\n        )\n        assert \"2\" in fetched_docs_after_insert, (\n            \"Document with ID '2' not found in fetched results after insertion\"\n        )\n\n        # Verify fetched document content after insertion\n        for i in range(3):\n            doc = fetched_docs_after_insert[f\"{i}\"]\n            assert doc is not None, (\n                f\"Fetched document with ID '{i}' is None after insertion\"\n            )\n            assert doc.id == f\"{i}\", (\n                f\"Document ID mismatch for document '{i}' after insertion. Expected: {i}, Actual: {doc.id}\"\n            )\n            assert doc.field(\"id\") == i, (\n                f\"Document id field mismatch for document '{i}' after insertion. Expected: {i}, Actual: {doc.field('id')}\"\n            )\n            assert doc.field(\"name\") == f\"test_{i}\", (\n                f\"Document name field mismatch for document '{i}' after insertion. Expected: test_{i}, Actual: {doc.field('name')}\"\n            )\n            assert doc.field(\"weight\") == float(i * 10), (\n                f\"Document weight field mismatch for document '{i}' after insertion. Expected: {float(i * 10)}, Actual: {doc.field('weight')}\"\n            )\n\n            # Verify vector access after insertion\n            assert doc.vector(\"dense\") is not None, (\n                f\"Document {i} should have dense vector after insertion\"\n            )\n            assert doc.vector(\"sparse\") is not None, (\n                f\"Document {i} should have sparse vector after insertion\"\n            )\n\n            # Verify vector types after insertion\n            assert isinstance(doc.vector(\"dense\"), list), (\n                f\"Document {i} dense vector should be dict after insertion, got {type(doc.vector('dense'))}\"\n            )\n            assert isinstance(doc.vector(\"sparse\"), dict), (\n                f\"Document {i} sparse vector should be dict after insertion, got {type(doc.vector('sparse'))}\"\n            )\n\n        # Verify documents were inserted using stats\n        stats = created_coll.stats\n        assert stats is not None, \"Collection stats should not be None\"\n        assert stats.doc_count == 3, (\n            f\"Document count mismatch after insertion. Expected: 3, Actual: {stats.doc_count}\"\n        )\n\n        # Store the collection path before cleanup\n        collection_path = created_coll.path\n\n        # Clean up the created collection reference\n        del created_coll\n\n        # Wait and verify the path still exists\n        print(f\"DEBUG: Collection path after destroy: {collection_path}\")\n        print(f\"DEBUG: Path exists after destroy: {os.path.exists(collection_path)}\")\n\n        # Now open the existing collection\n        try:\n            print(f\"DEBUG: Path exists before open: {os.path.exists(collection_path)}\")\n\n            # List contents of parent directory for debugging\n            parent_dir = os.path.dirname(collection_path)\n            if os.path.exists(parent_dir):\n                print(f\"DEBUG: Parent directory contents: {os.listdir(parent_dir)}\")\n\n            opened_coll = zvec.open(path=collection_path, option=collection_option)\n\n            assert opened_coll is not None, (\n                f\"Failed to open existing collection at path: {collection_path}. Returned None instead of valid Collection object\"\n            )\n            assert opened_coll.path == collection_path, (\n                f\"Opened collection path mismatch. Expected: {collection_path}, Actual: {opened_coll.path}\"\n            )\n            assert opened_coll.schema.name == \"test_collection\", (\n                f\"Opened collection schema name mismatch. Expected: test_collection, Actual: {opened_coll.schema.name}\"\n            )\n\n            # Check reference count of opened collection\n            opened_ref_count = sys.getrefcount(opened_coll)\n            print(f\"DEBUG: Reference count of opened collection: {opened_ref_count}\")\n\n            # Verify data persistence\n            # Verify data persistence using fetch interface\n            fetched_docs = opened_coll.fetch([\"0\", \"1\", \"2\"])\n            assert len(fetched_docs) == 3, (\n                f\"Expected 3 fetched documents after reopening, but got {len(fetched_docs)}\"\n            )\n            assert \"0\" in fetched_docs, (\n                \"Document with ID '0' not found in fetched results after reopening\"\n            )\n            assert \"1\" in fetched_docs, (\n                \"Document with ID '1' not found in fetched results after reopening\"\n            )\n            assert \"2\" in fetched_docs, (\n                \"Document with ID '2' not found in fetched results after reopening\"\n            )\n\n            # Verify fetched document content after reopening collection\n            for i in range(3):\n                doc = fetched_docs[f\"{i}\"]\n                assert doc is not None, (\n                    f\"Fetched document with ID '{i}' is None after reopening collection\"\n                )\n                assert doc.id == f\"{i}\", (\n                    f\"Document ID mismatch for document '{i}' after reopening. Expected: {i}, Actual: {doc.id}\"\n                )\n                assert doc.field(\"id\") == i, (\n                    f\"Document id field mismatch for document '{i}' after reopening. Expected: {i}, Actual: {doc.field('id')}\"\n                )\n                assert doc.field(\"name\") == f\"test_{i}\", (\n                    f\"Document name field mismatch for document '{i}' after reopening. Expected: test_{i}, Actual: {doc.field('name')}\"\n                )\n                assert doc.field(\"weight\") == float(i * 10), (\n                    f\"Document weight field mismatch for document '{i}' after reopening. Expected: {float(i * 10)}, Actual: {doc.field('weight')}\"\n                )\n\n                # Verify vector access after reopening\n                assert doc.vector(\"dense\") is not None, (\n                    f\"Document {i} should have dense vector after reopening\"\n                )\n                assert doc.vector(\"sparse\") is not None, (\n                    f\"Document {i} should have sparse vector after reopening\"\n                )\n\n                # Verify vector types after reopening\n                assert isinstance(doc.vector(\"dense\"), list), (\n                    f\"Document {i} dense vector should be dict after reopening, got {type(doc.vector('dense'))}\"\n                )\n                assert isinstance(doc.vector(\"sparse\"), dict), (\n                    f\"Document {i} sparse vector should be dict after reopening, got {type(doc.vector('sparse'))}\"\n                )\n\n                # Verify score attribute exists\n                assert hasattr(doc, \"score\"), (\n                    f\"Document {i} should have a score attribute after reopening\"\n                )\n                assert isinstance(doc.score, (int, float)), (\n                    f\"Document {i} score should be numeric after reopening, got {type(doc.score)}\"\n                )\n                # For fetch operations, score is typically 0.0\n                assert doc.score == 0.0, (\n                    f\"Document {i} score should be 0.0 for fetch operation after reopening, but got {doc.score}\"\n                )\n\n            # Test query functionality\n            query_result = opened_coll.query(include_vector=True)\n            assert len(query_result) == 3, (\n                f\"Expected 3 query results, but got {len(query_result)}\"\n            )\n\n            # Verify query results have proper structure and content with detailed validation\n            returned_doc_ids = set()\n            for doc in query_result:\n                # Verify basic document structure\n                assert doc.id is not None, f\"Query result document should have an ID\"\n                assert doc.id in [\"0\", \"1\", \"2\"], (\n                    f\"Query result document ID should be one of ['0', '1', '2'], but got {doc.id}\"\n                )\n                returned_doc_ids.add(doc.id)\n\n                # Verify field access\n                assert doc.field(\"id\") is not None, (\n                    f\"Document {doc.id} should have id field\"\n                )\n                assert doc.field(\"name\") is not None, (\n                    f\"Document {doc.id} should have name field\"\n                )\n                assert doc.field(\"weight\") is not None, (\n                    f\"Document {doc.id} should have weight field\"\n                )\n\n                # Verify field values\n                expected_id = int(doc.id)\n                assert doc.field(\"id\") == expected_id, (\n                    f\"Document {doc.id} id field mismatch. Expected: {expected_id}, Actual: {doc.field('id')}\"\n                )\n                assert doc.field(\"name\") == f\"test_{expected_id}\", (\n                    f\"Document {doc.id} name field mismatch. Expected: test_{expected_id}, Actual: {doc.field('name')}\"\n                )\n                assert doc.field(\"weight\") == float(expected_id * 10), (\n                    f\"Document {doc.id} weight field mismatch. Expected: {float(expected_id * 10)}, Actual: {doc.field('weight')}\"\n                )\n\n                # Verify vector access\n                assert doc.vector(\"dense\") is not None, (\n                    f\"Document {doc.id} should have dense vector\"\n                )\n                assert doc.vector(\"sparse\") is not None, (\n                    f\"Document {doc.id} should have sparse vector\"\n                )\n\n                # Verify vector types\n                assert isinstance(doc.vector(\"dense\"), list), (\n                    f\"Document {doc.id} dense vector should be list, got {type(doc.vector('dense'))}\"\n                )\n                assert isinstance(doc.vector(\"sparse\"), dict), (\n                    f\"Document {doc.id} sparse vector should be dict, got {type(doc.vector('sparse'))}\"\n                )\n\n                # Verify score attribute exists\n                assert hasattr(doc, \"score\"), (\n                    f\"Document {doc.id} should have a score attribute\"\n                )\n                assert isinstance(doc.score, (int, float)), (\n                    f\"Document {doc.id} score should be numeric, got {type(doc.score)}\"\n                )\n\n            # Verify all expected documents are returned\n            expected_doc_ids = {\"0\", \"1\", \"2\"}\n            assert returned_doc_ids == expected_doc_ids, (\n                f\"Query should return all expected documents. Expected: {expected_doc_ids}, Actual: {returned_doc_ids}\"\n            )\n\n            # === Enhanced validation based on test_collection_dql_operations.py ===\n\n            # Verify vector field names accessibility for all documents\n            for doc in query_result:\n                vector_names = doc.vector_names()\n                expected_vector_names = {\"dense\", \"sparse\"}\n                assert set(vector_names) == expected_vector_names, (\n                    f\"Document {doc.id} vector names mismatch. Expected: {expected_vector_names}, Actual: {set(vector_names)}\"\n                )\n\n                # Verify all vector fields can be accessed\n                for vector_name in expected_vector_names:\n                    vector_data = doc.vector(vector_name)\n                    assert vector_data is not None, (\n                        f\"Document {doc.id} should have accessible vector '{vector_name}'\"\n                    )\n                    if vector_name == \"dense\":\n                        assert isinstance(vector_data, list), (\n                            f\"Document {doc.id} vector '{vector_name}' should be list, got {type(vector_data)}\"\n                        )\n                    else:\n                        assert isinstance(vector_data, dict), (\n                            f\"Document {doc.id} vector '{vector_name}' should be dict, got {type(vector_data)}\"\n                        )\n\n            # Test query with filter\n            filtered_result = opened_coll.query(filter=\"id >= 1\", include_vector=True)\n            assert len(filtered_result) == 2, (\n                f\"Expected 2 filtered query results (id >= 1), but got {len(filtered_result)}\"\n            )\n\n            # Verify filtered query results\n            filtered_doc_ids = set()\n            for doc in filtered_result:\n                assert doc.id is not None, (\n                    f\"Filtered query result document should have an ID\"\n                )\n                assert doc.id in [\"1\", \"2\"], (\n                    f\"Filtered query result document ID should be one of ['1', '2'], but got {doc.id}\"\n                )\n                filtered_doc_ids.add(doc.id)\n\n                # Verify filter condition is satisfied\n                doc_id = int(doc.id)\n                assert doc_id >= 1, (\n                    f\"Document {doc.id} should satisfy filter condition id >= 1\"\n                )\n\n                # Verify document structure\n                assert doc.field(\"id\") is not None, (\n                    f\"Document {doc.id} should have id field\"\n                )\n                assert doc.field(\"name\") is not None, (\n                    f\"Document {doc.id} should have name field\"\n                )\n                assert doc.field(\"weight\") is not None, (\n                    f\"Document {doc.id} should have weight field\"\n                )\n\n                # Verify field values\n                assert doc.field(\"id\") == doc_id, (\n                    f\"Document {doc.id} id field mismatch. Expected: {doc_id}, Actual: {doc.field('id')}\"\n                )\n                assert doc.field(\"name\") == f\"test_{doc_id}\", (\n                    f\"Document {doc.id} name field mismatch. Expected: test_{doc_id}, Actual: {doc.field('name')}\"\n                )\n                assert doc.field(\"weight\") == float(doc_id * 10), (\n                    f\"Document {doc.id} weight field mismatch. Expected: {float(doc_id * 10)}, Actual: {doc.field('weight')}\"\n                )\n\n                # Verify vector access\n                assert doc.vector(\"dense\") is not None, (\n                    f\"Document {doc.id} should have dense vector\"\n                )\n                assert doc.vector(\"sparse\") is not None, (\n                    f\"Document {doc.id} should have sparse vector\"\n                )\n\n                # Verify score attribute exists\n                assert hasattr(doc, \"score\"), (\n                    f\"Document {doc.id} should have a score attribute\"\n                )\n                assert isinstance(doc.score, (int, float)), (\n                    f\"Document {doc.id} score should be numeric, got {type(doc.score)}\"\n                )\n\n            # Verify filtered documents\n            expected_filtered_ids = {\"1\", \"2\"}\n            assert filtered_doc_ids == expected_filtered_ids, (\n                f\"Filtered query should return expected documents. Expected: {expected_filtered_ids}, Actual: {filtered_doc_ids}\"\n            )\n\n            # Test vector query functionality for dense vectors\n            query_vector_dense = [0.1] * 128\n            vector_query_result = opened_coll.query(\n                VectorQuery(field_name=\"dense\", vector=query_vector_dense)\n            )\n            assert len(vector_query_result) > 0, (\n                f\"Expected at least 1 vector query result, but got {len(vector_query_result)}\"\n            )\n\n            # Verify vector query results structure\n            for doc in vector_query_result[:3]:  # Check first 3 results\n                assert doc.id is not None, (\n                    f\"Vector query result document should have an ID\"\n                )\n                assert doc.id in [\"0\", \"1\", \"2\"], (\n                    f\"Vector query result document ID should be one of ['0', '1', '2'], but got {doc.id}\"\n                )\n\n                # Verify document structure\n                assert doc.field(\"id\") is not None, (\n                    f\"Document {doc.id} should have id field\"\n                )\n                assert doc.field(\"name\") is not None, (\n                    f\"Document {doc.id} should have name field\"\n                )\n                assert doc.field(\"weight\") is not None, (\n                    f\"Document {doc.id} should have weight field\"\n                )\n\n                # Verify vector access\n                assert doc.vector(\"dense\") is not None, (\n                    f\"Document {doc.id} should have dense vector\"\n                )\n                assert doc.vector(\"sparse\") is not None, (\n                    f\"Document {doc.id} should have sparse vector\"\n                )\n\n                # Verify score attribute exists and is numeric\n                assert hasattr(doc, \"score\"), (\n                    f\"Document {doc.id} should have a score attribute\"\n                )\n                assert isinstance(doc.score, (int, float)), (\n                    f\"Document {doc.id} score should be numeric, got {type(doc.score)}\"\n                )\n\n                # For dense vector queries, score should typically be non-negative (depending on metric)\n                # Note: This may vary based on the metric type used\n                assert doc.score >= 0 or doc.score < 0, (\n                    f\"Document {doc.id} score should be a valid number\"\n                )\n\n            # Test vector query functionality for sparse vectors\n            query_vector_sparse = {1: 1.0, 2: 2.0, 3: 3.0}\n            sparse_vector_query_result = opened_coll.query(\n                VectorQuery(field_name=\"sparse\", vector=query_vector_sparse)\n            )\n            assert len(sparse_vector_query_result) > 0, (\n                f\"Expected at least 1 sparse vector query result, but got {len(sparse_vector_query_result)}\"\n            )\n\n            # Verify sparse vector query results structure\n            for doc in sparse_vector_query_result[:3]:  # Check first 3 results\n                assert doc.id is not None, (\n                    f\"Sparse vector query result document should have an ID\"\n                )\n                assert doc.id in [\"0\", \"1\", \"2\"], (\n                    f\"Sparse vector query result document ID should be one of ['0', '1', '2'], but got {doc.id}\"\n                )\n\n                # Verify document structure\n                assert doc.field(\"id\") is not None, (\n                    f\"Document {doc.id} should have id field\"\n                )\n                assert doc.field(\"name\") is not None, (\n                    f\"Document {doc.id} should have name field\"\n                )\n                assert doc.field(\"weight\") is not None, (\n                    f\"Document {doc.id} should have weight field\"\n                )\n\n                # Verify vector access\n                assert doc.vector(\"dense\") is not None, (\n                    f\"Document {doc.id} should have dense vector\"\n                )\n                assert doc.vector(\"sparse\") is not None, (\n                    f\"Document {doc.id} should have sparse vector\"\n                )\n\n                # Verify score attribute exists and is numeric\n                assert hasattr(doc, \"score\"), (\n                    f\"Document {doc.id} should have a score attribute\"\n                )\n                assert isinstance(doc.score, (int, float)), (\n                    f\"Document {doc.id} score should be numeric, got {type(doc.score)}\"\n                )\n\n            # Clean up\n            if hasattr(opened_coll, \"destroy\") and opened_coll is not None:\n                opened_coll.destroy()\n                print(\"DEBUG: Opened collection destroyed successfully\")\n\n        except Exception as e:\n            logging.error(\"Exception occurred: [{}]\".format(e))\n            raise e\n\n    @pytest.mark.parametrize(\n        \"read_only,enable_mmap,description\", COLLECTION_OPTION_TEST_CASES_VALID\n    )\n    @pytest.mark.parametrize(\"createAndopen_enable_mmap\", [True, False])\n    def test_open_with_different_collection_options_valid(\n        self,\n        tmp_path_factory,\n        createAndopen_enable_mmap,\n        read_only,\n        enable_mmap,\n        description,\n        collection_schema,\n    ):\n        # Create collection with initial option\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        initial_option = CollectionOption(\n            read_only=False, enable_mmap=createAndopen_enable_mmap\n        )\n\n        # Create and open collection first\n        created_coll = zvec.create_and_open(\n            path=str(collection_path), schema=collection_schema, option=initial_option\n        )\n\n        assert created_coll is not None, \"Failed to create collection\"\n\n        # Clean up the created collection reference\n        del created_coll\n\n        # Now open with different options\n        collection_option = CollectionOption(\n            read_only=read_only, enable_mmap=enable_mmap\n        )\n\n        try:\n            opened_coll = zvec.open(path=str(collection_path), option=collection_option)\n\n            assert opened_coll is not None, (\n                f\"Failed to open collection with option: {description}. Returned None instead of valid Collection object. Path: {collection_path}\"\n            )\n            assert opened_coll.path == str(collection_path), (\n                f\"Opened collection path mismatch. Expected: {collection_path}, Actual: {opened_coll.path}\"\n            )\n            assert opened_coll.schema.name == collection_schema.name, (\n                f\"Opened collection schema name mismatch. Expected: {collection_schema.name}, Actual: {opened_coll.schema.name}\"\n            )\n            assert opened_coll.option.read_only == read_only, (\n                f\"Opened collection read_only option mismatch. Expected: {read_only}, Actual: {opened_coll.option.read_only}\"\n            )\n            assert opened_coll.option.enable_mmap == createAndopen_enable_mmap, (\n                f\"Opened collection mmap option mismatch. Expected: {createAndopen_enable_mmap}, Actual: {opened_coll.option.enable_mmap}\"\n            )\n\n            # Clean up\n            if (\n                hasattr(opened_coll, \"destroy\")\n                and opened_coll is not None\n                and read_only == False\n            ):\n                opened_coll.destroy()\n\n        except Exception as e:\n            logging.error(\"Exception occurred: [{}]\".format(e))\n            pytest.fail(f\"Failed to open collection with different options: {e}\")\n\n    def test_open_with_none_option(self, tmp_path_factory, collection_schema):\n        # Create collection\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        initial_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        # Create and open collection first\n        created_coll = zvec.create_and_open(\n            path=str(collection_path), schema=collection_schema, option=initial_option\n        )\n\n        assert created_coll is not None, (\n            f\"Failed to create collection. Returned None instead of valid Collection object. Path: {collection_path}\"\n        )\n\n        # Clean up the created collection reference\n        del created_coll\n\n        # Now open with None option\n        with pytest.raises(Exception) as exc_info:\n            zvec.open(path=str(collection_path), option=None)\n\n        assert \"incompatible function arguments\" in str(exc_info.value), (\n            f\"Expected 'incompatible function arguments' error, but got: {exc_info.value}\"\n        )\n\n    def test_reopen_collection(self, tmp_path_factory):\n        # Prepare schema\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ],\n        )\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        # Create collection\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        # Create and open collection\n        coll1 = zvec.create_and_open(\n            path=str(collection_path),\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        assert coll1 is not None, \"Failed to create and open collection\"\n\n        # Insert some data\n        doc = Doc(\n            id=\"1\",\n            fields={\"id\": 1, \"name\": \"test\"},\n            vectors={\"dense\": np.random.random(128).tolist()},\n        )\n\n        result = coll1.insert(doc)\n        assert result.ok()\n\n        # Close the first collection (delete reference)\n        del coll1\n\n        # Reopen the collection\n        coll2 = zvec.open(path=str(collection_path), option=collection_option)\n\n        assert coll2 is not None, \"Failed to reopen collection\"\n        assert coll2.path == str(collection_path)\n        assert coll2.schema.name == collection_schema.name\n\n        # Verify data is still there\n        fetched_docs = coll2.fetch([\"1\"])\n        assert \"1\" in fetched_docs\n        fetched_doc = fetched_docs[\"1\"]\n        assert fetched_doc.id == \"1\"\n        assert fetched_doc.field(\"name\") == \"test\"\n\n        # Clean up\n        if hasattr(coll2, \"destroy\") and coll2 is not None:\n            try:\n                coll2.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n    def test_open_concurrent_same_path(self, tmp_path_factory):\n        # First create a collection\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ],\n        )\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        # Create collection path\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        # First create the collection\n        created_coll = zvec.create_and_open(\n            path=str(collection_path),\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        assert created_coll is not None, \"Failed to create collection\"\n\n        # Close the collection so we can test concurrent opening\n        if hasattr(created_coll, \"close\") and created_coll is not None:\n            created_coll.close()\n\n        # Shared variables to collect results from threads\n        results = []\n        errors = []\n\n        # Lock for thread-safe operations\n        lock = threading.Lock()\n        # Clean up the created collection reference\n        del created_coll\n\n        # Function to be executed by each thread\n        def open_collection_thread(thread_id):\n            try:\n                coll = zvec.open(path=str(collection_path), option=collection_option)\n                with lock:\n                    results.append((thread_id, coll))\n                # Close the collection if opened successfully\n                if hasattr(coll, \"close\") and coll is not None:\n                    coll.close()\n            except Exception as e:\n                with lock:\n                    errors.append((thread_id, str(e)))\n\n        # Create 5 threads to call open concurrently\n        threads = []\n        for i in range(5):\n            thread = threading.Thread(target=open_collection_thread, args=(i,))\n            threads.append(thread)\n            thread.start()\n\n        # Wait for all threads to complete\n        for thread in threads:\n            thread.join()\n\n        # Verify concurrency safety: only one should succeed, others should fail\n        assert len(results) == 1, (\n            f\"Expected exactly one successful open, but got {len(results)}\"\n        )\n        assert len(errors) == 4, (\n            f\"Expected exactly four failures, but got {len(errors)}\"\n        )\n\n        # Additional verification: check that the successful open has a valid collection\n        successful_thread_id, successful_collection = results[0]\n        assert successful_collection is not None, (\n            \"Successful open should return a valid collection\"\n        )\n        assert successful_collection.path == str(collection_path), (\n            \"Collection path mismatch\"\n        )\n\n        # Clean up the successfully opened collection\n        if (\n            hasattr(successful_collection, \"destroy\")\n            and successful_collection is not None\n        ):\n            try:\n                successful_collection.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n    def test_open_with_corrupted_files(self, tmp_path_factory):\n        # First create a collection\n        collection_schema = zvec.CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                )\n            ],\n        )\n\n        collection_option = CollectionOption(read_only=False, enable_mmap=True)\n\n        # Create collection path\n        temp_dir = tmp_path_factory.mktemp(\"zvec\")\n        collection_path = temp_dir / \"test_collection\"\n\n        # First create the collection\n        created_coll = zvec.create_and_open(\n            path=str(collection_path),\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        assert created_coll is not None, \"Failed to create collection\"\n\n        # Close the collection so we can manipulate its files\n        if hasattr(created_coll, \"close\") and created_coll is not None:\n            created_coll.close()\n\n        # Test case 1: Delete some files in the collection directory (simulate partial corruption)\n        import os\n        import shutil\n        import random\n\n        # Get the collection directory path\n        collection_dir = str(collection_path)\n\n        # List all files in the collection directory\n        files_in_dir = []\n        for root, dirs, files in os.walk(collection_dir):\n            for file in files:\n                files_in_dir.append(os.path.join(root, file))\n\n        # Randomly delete approximately half of the files to simulate partial corruption\n        if files_in_dir:\n            # Shuffle the list to randomly select files\n            random.shuffle(files_in_dir)\n            files_to_delete = files_in_dir[: len(files_in_dir) // 2]\n            for file_path in files_to_delete:\n                try:\n                    os.remove(file_path)\n                except Exception as e:\n                    pass  # Ignore errors during deletion\n\n        # Try to open the collection with missing files - should raise an exception\n        with pytest.raises(Exception):\n            zvec.open(path=str(collection_path), option=collection_option)\n\n        # Test case 2: Delete all files in the collection directory (simulate complete corruption)\n        # Recreate the collection\n        recreated_coll = zvec.create_and_open(\n            path=str(collection_path) + \"_all\",\n            schema=collection_schema,\n            option=collection_option,\n        )\n\n        assert recreated_coll is not None, \"Failed to recreate collection\"\n\n        # Close the collection so we can manipulate its files\n        if hasattr(recreated_coll, \"close\") and recreated_coll is not None:\n            recreated_coll.close()\n\n        # Delete all files in the collection directory\n        try:\n            shutil.rmtree(collection_dir)\n            os.makedirs(collection_dir)  # Recreate empty directory\n        except Exception as e:\n            pass  # Ignore errors during deletion\n\n        # Try to open the collection with missing files - should raise an exception\n        with pytest.raises(Exception):\n            zvec.open(path=str(collection_path), option=collection_option)\n"
  },
  {
    "path": "python/tests/detail/test_collection_recall.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nimport pytest\n\nfrom zvec.typing import DataType, StatusCode, MetricType, QuantizeType\nfrom zvec.model import Collection, Doc, VectorQuery\nfrom zvec.model.param import (\n    CollectionOption,\n    InvertIndexParam,\n    HnswIndexParam,\n    FlatIndexParam,\n    IVFIndexParam,\n    HnswQueryParam,\n    IVFQueryParam,\n)\n\nfrom zvec.model.schema import FieldSchema, VectorSchema\nfrom zvec.extension import RrfReRanker, WeightedReRanker, QwenReRanker\nfrom distance_helper import *\n\nfrom zvec import StatusCode\nfrom distance_helper import *\nfrom fixture_helper import *\nfrom doc_helper import *\nfrom params_helper import *\n\nimport time\n\n\n# ==================== helper ====================\ndef batchdoc_and_check(collection: Collection, multiple_docs, operator=\"insert\"):\n    if operator == \"insert\":\n        result = collection.insert(multiple_docs)\n    elif operator == \"upsert\":\n        result = collection.upsert(multiple_docs)\n\n    elif operator == \"update\":\n        result = collection.update(multiple_docs)\n    else:\n        logging.error(\"operator value is error!\")\n\n    assert len(result) == len(multiple_docs)\n    for item in result:\n        assert item.ok(), (\n            f\"result={result},Insert operation failed with code {item.code()}\"\n        )\n\n    stats = collection.stats\n    assert stats is not None, \"Collection stats should not be None\"\n    \"\"\"assert stats.doc_count == len(multiple_docs), (\n        f\"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}\"\n    )\"\"\"\n\n    doc_ids = [doc.id for doc in multiple_docs]\n    fetched_docs = collection.fetch(doc_ids)\n    assert len(fetched_docs) == len(multiple_docs), (\n        f\"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}\"\n    )\n\n    for original_doc in multiple_docs:\n        assert original_doc.id in fetched_docs, (\n            f\"Expected document ID {original_doc.id} in fetched documents\"\n        )\n        fetched_doc = fetched_docs[original_doc.id]\n\n        assert is_doc_equal(fetched_doc, original_doc, collection.schema)\n\n        assert hasattr(fetched_doc, \"score\"), \"Document should have a score attribute\"\n        assert fetched_doc.score == 0.0, (\n            \"Fetch operation should return default score of 0.0\"\n        )\n\n\ndef compute_exact_similarity_scores(\n    vectors_a,\n    vectors_b,\n    metric_type=MetricType.IP,\n    DataType=DataType.VECTOR_FP32,\n    QuantizeType=QuantizeType.UNDEFINED,\n):\n    similarities = []\n    for i, vec_a in enumerate(vectors_a):\n        for j, vec_b in enumerate(vectors_b):\n            similarity = distance_recall(vec_a, vec_b, metric_type, DataType)\n            similarities.append((j, similarity))\n\n    # For L2,COSINE metric, smaller distances mean higher similarity, so sort in ascending order\n    if (\n        metric_type in [MetricType.L2]\n        and DataType\n        in [DataType.VECTOR_FP32, DataType.VECTOR_FP16, DataType.VECTOR_INT8]\n    ) or (\n        metric_type in [MetricType.COSINE]\n        and DataType in [DataType.VECTOR_FP32, DataType.VECTOR_FP16]\n    ):\n        similarities.sort(key=lambda x: x[1], reverse=False)  # Ascending order for L2\n\n    else:\n        similarities.sort(\n            key=lambda x: x[1], reverse=True\n        )  # Descending order for others\n\n    # Special handling for COSINE in FP16 to address precision issues\n    if metric_type == MetricType.COSINE and DataType == DataType.VECTOR_FP16:\n        # Clamp values to valid cosine distance range [0, 2] and handle floating point errors\n        similarities = [(idx, max(0.0, min(2.0, score))) for idx, score in similarities]\n\n    return similarities\n\n\ndef get_ground_truth_for_vector_query(\n    collection,\n    query_vector,\n    field_name,\n    all_docs,\n    query_idx,\n    metric_type,\n    k,\n    use_exact_computation=False,\n):\n    if use_exact_computation:\n        all_vectors = [doc.vectors[field_name] for doc in all_docs]\n\n        for d, f in DEFAULT_VECTOR_FIELD_NAME.items():\n            if field_name == f:\n                DataType = d\n                break\n        similarities = compute_exact_similarity_scores(\n            [query_vector],\n            all_vectors,\n            metric_type,\n            DataType=DataType,\n            QuantizeType=QuantizeType,\n        )\n\n        if metric_type == MetricType.COSINE and DataType == DataType.VECTOR_FP16:\n            # Filter out tiny non-zero values that may be caused by precision errors\n            similarities = [\n                (idx, max(0.0, min(2.0, score))) for idx, score in similarities\n            ]\n\n        ground_truth_ids_scores = similarities[:k]\n        print(\"Get the most similar k document IDs k:,ground_truth_ids_scores\")\n        print(k, ground_truth_ids_scores)\n        return ground_truth_ids_scores\n\n    else:\n        full_result = collection.query(\n            VectorQuery(field_name=field_name, vector=query_vector),\n            topk=min(len(all_docs), 1024),\n            include_vector=True,\n        )\n\n        ground_truth_ids_scores = [\n            (result.id, result.score) for result in full_result[:k]\n        ]\n\n        if not ground_truth_ids_scores:\n            ground_truth_ids_scores = [(all_docs[query_idx].id, 0)]\n\n        return ground_truth_ids_scores\n\n\ndef get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type, k):\n    ground_truth_map = {}\n\n    for field_name, query_vectors in query_vectors_map.items():\n        ground_truth_map[field_name] = {}\n\n        for i, query_vector in enumerate(query_vectors):\n            # Get the ground truth for this query\n            relevant_doc_ids_scores = get_ground_truth_for_vector_query(\n                collection, query_vector, field_name, test_docs, i, metric_type, k, True\n            )\n            ground_truth_map[field_name][i] = relevant_doc_ids_scores\n\n    print(\"ground_truth_map:\\n\")\n    print(ground_truth_map)\n    return ground_truth_map\n\n\ndef calculate_recall_at_k(\n    collection: Collection,\n    test_docs,\n    query_vectors_map,\n    schema,\n    k=1,\n    expected_doc_ids_scores_map=None,\n    tolerance=0.01,\n):\n    recall_stats = {}\n\n    for field_name, query_vectors in query_vectors_map.items():\n        recall_stats[field_name] = {\n            \"relevant_retrieved_count\": 0,\n            \"total_relevant_count\": 0,\n            \"retrieved_count\": 0,\n            \"recall_at_k\": 0.0,\n        }\n\n        for i, query_vector in enumerate(query_vectors):\n            print(\"Starting %dth query\" % i)\n\n            query_result_list = collection.query(\n                VectorQuery(field_name=field_name, vector=query_vector),\n                topk=1024,\n                include_vector=True,\n            )\n            retrieved_count = len(query_result_list)\n\n            query_result_ids_scores = []\n            for word in query_result_list:\n                query_result_ids_scores.append((word.id, word.score))\n\n            recall_stats[field_name][\"retrieved_count\"] += retrieved_count\n\n            print(\"expected_doc_ids_scores_map:\\n\")\n            print(expected_doc_ids_scores_map)\n            if i in (expected_doc_ids_scores_map[field_name]):\n                expected_relevant_ids_scores = expected_doc_ids_scores_map[field_name][\n                    i\n                ]\n            print(\n                \"field_name,i,expected_relevant_ids_scores, query_result_ids_scores:\\n\"\n            )\n            print(\n                field_name,\n                i,\n                \"\\n\",\n                expected_relevant_ids_scores,\n                \"\\n\",\n                len(query_result_ids_scores),\n                query_result_ids_scores,\n            )\n\n            # Update total relevant documents count\n            recall_stats[field_name][\"total_relevant_count\"] += len(\n                expected_relevant_ids_scores\n            )\n\n            relevant_found_count = 0\n            for ids_scores_except in expected_relevant_ids_scores:\n                for ids_scores_result in query_result_ids_scores[:k]:\n                    if int(ids_scores_result[0]) == int(ids_scores_except[0]):\n                        relevant_found_count += 1\n                        break\n                    elif (\n                        int(ids_scores_result[0]) != int(ids_scores_except[0])\n                        and abs(ids_scores_result[1] - ids_scores_except[1])\n                        <= tolerance\n                    ):\n                        print(\"IDs are not equal, but the error is small, tolerance\")\n                        print(\n                            ids_scores_result[0],\n                            ids_scores_except[0],\n                            ids_scores_result[1],\n                            ids_scores_except[1],\n                            tolerance,\n                        )\n                        relevant_found_count += 1\n                        break\n                    else:\n                        continue\n\n            recall_stats[field_name][\"relevant_retrieved_count\"] += relevant_found_count\n\n        # Calculate Recall@K\n        if recall_stats[field_name][\"total_relevant_count\"] > 0:\n            recall_stats[field_name][\"recall_at_k\"] = (\n                recall_stats[field_name][\"relevant_retrieved_count\"]\n                / recall_stats[field_name][\"total_relevant_count\"]\n            )\n\n    return recall_stats\n\n\nclass TestRecall:\n    @pytest.mark.parametrize(\n        \"full_schema_new\",\n        [\n            (True, True, HnswIndexParam()),\n            (False, True, IVFIndexParam()),\n            (False, True, FlatIndexParam()),  # ——ok\n            (\n                True,\n                True,\n                HnswIndexParam(\n                    metric_type=MetricType.IP,\n                    m=16,\n                    ef_construction=100,\n                ),\n            ),\n            (\n                True,\n                True,\n                HnswIndexParam(\n                    metric_type=MetricType.COSINE,\n                    m=24,\n                    ef_construction=150,\n                ),\n            ),\n            (\n                True,\n                True,\n                HnswIndexParam(\n                    metric_type=MetricType.L2,\n                    m=32,\n                    ef_construction=200,\n                ),\n            ),\n            (\n                False,\n                True,\n                FlatIndexParam(\n                    metric_type=MetricType.IP,\n                ),\n            ),\n            (\n                True,\n                True,\n                FlatIndexParam(\n                    metric_type=MetricType.COSINE,\n                ),\n            ),\n            (\n                True,\n                True,\n                FlatIndexParam(\n                    metric_type=MetricType.L2,\n                ),\n            ),\n            (\n                True,\n                True,\n                IVFIndexParam(\n                    metric_type=MetricType.IP,\n                    n_list=100,\n                    n_iters=10,\n                    use_soar=False,\n                ),\n            ),\n            (\n                True,\n                True,\n                IVFIndexParam(\n                    metric_type=MetricType.L2,\n                    n_list=200,\n                    n_iters=20,\n                    use_soar=True,\n                ),\n            ),\n            (\n                True,\n                True,\n                IVFIndexParam(\n                    metric_type=MetricType.COSINE,\n                    n_list=150,\n                    n_iters=15,\n                    use_soar=False,\n                ),\n            ),\n        ],\n        indirect=True,\n    )\n    @pytest.mark.parametrize(\"doc_num\", [500])\n    @pytest.mark.parametrize(\"query_num\", [10])\n    @pytest.mark.parametrize(\"top_k\", [1])\n    def test_recall_with_single_vector_valid_500(\n        self,\n        full_collection_new: Collection,\n        doc_num,\n        query_num,\n        top_k,\n        full_schema_new,\n        request,\n    ):\n        full_schema_params = request.getfixturevalue(\"full_schema_new\")\n\n        for vector_para in full_schema_params.vectors:\n            if vector_para.name == \"vector_fp32_field\":\n                metric_type = vector_para.index_param.metric_type\n                break\n\n        multiple_docs = [\n            generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)\n        ]\n        print(\"len(multiple_docs):\\n\")\n        print(len(multiple_docs))\n        # print(multiple_docs)\n\n        for i in range(10):\n            if i != 0:\n                pass\n                # print(multiple_docs[i * 1000:1000 * (i + 1)])\n            batchdoc_and_check(\n                full_collection_new,\n                multiple_docs[i * 1000 : 1000 * (i + 1)],\n                operator=\"insert\",\n            )\n\n        stats = full_collection_new.stats\n        assert stats.doc_count == len(multiple_docs)\n\n        doc_ids = [\"0\", \"1\"]\n        fetched_docs = full_collection_new.fetch(doc_ids)\n        print(\"fetched_docs,multiple_docs\")\n        print(\n            fetched_docs[doc_ids[0]].vectors[\"sparse_vector_fp32_field\"],\n            fetched_docs[doc_ids[0]].vectors[\"sparse_vector_fp16_field\"],\n            fetched_docs[doc_ids[1]].vectors[\"sparse_vector_fp32_field\"],\n            fetched_docs[doc_ids[1]].vectors[\"sparse_vector_fp16_field\"],\n            \"\\n\",\n            multiple_docs[0].vectors[\"sparse_vector_fp32_field\"],\n            multiple_docs[0].vectors[\"sparse_vector_fp32_field\"],\n            multiple_docs[1].vectors[\"sparse_vector_fp32_field\"],\n            multiple_docs[1].vectors[\"sparse_vector_fp16_field\"],\n        )\n\n        full_collection_new.optimize(option=OptimizeOption())\n\n        time.sleep(2)\n\n        query_vectors_map = {}\n        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():\n            query_vectors_map[field_name] = [\n                multiple_docs[i].vectors[field_name] for i in range(query_num)\n            ]\n\n        # Get ground truth mapping\n        ground_truth_map = get_ground_truth_map(\n            full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k\n        )\n\n        # Validate ground truth mapping structure\n        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():\n            assert field_name in ground_truth_map\n            field_gt = ground_truth_map[field_name]\n            assert len(field_gt) == query_num\n\n            for query_idx in range(query_num):\n                assert query_idx in field_gt\n                relevant_ids = field_gt[query_idx]\n                assert isinstance(relevant_ids, list)\n                assert len(relevant_ids) <= top_k\n\n        # Print ground truth statistics\n        print(f\"Ground Truth for Top-{top_k} Retrieval:\")\n        for field_name, field_gt in ground_truth_map.items():\n            print(f\"  {field_name}:\")\n            for query_idx, relevant_ids in field_gt.items():\n                print(\n                    f\" Query {query_idx}: {len(relevant_ids)} relevant docs - {relevant_ids[:5]}{'...' if len(relevant_ids) > 5 else ''}\"\n                )\n\n        # Calculate Recall@K using ground truth\n        recall_at_k_stats = calculate_recall_at_k(\n            full_collection_new,\n            multiple_docs,\n            query_vectors_map,\n            full_schema_new,\n            k=top_k,\n            expected_doc_ids_scores_map=ground_truth_map,\n            tolerance=0.01,\n        )\n        print(\"ground_truth_map:\\n\")\n        print(ground_truth_map)\n\n        print(\"(recall_at_k_stats:\\n\")\n        print(recall_at_k_stats)\n        print(\"metric_type:\")\n        print(metric_type)\n        # Print Recall@K statistics\n        print(f\"Recall@{top_k} using Ground Truth:\")\n        for field_name, stats in recall_at_k_stats.items():\n            print(f\"  {field_name}:\")\n            print(\n                f\"    Relevant Retrieved: {stats['relevant_retrieved_count']}/{stats['total_relevant_count']}\"\n            )\n            print(f\"    Recall@{top_k}: {stats['recall_at_k']:.4f}\")\n        for k, v in recall_at_k_stats.items():\n            assert v[\"recall_at_k\"] == 1.0\n\n    @pytest.mark.parametrize(\n        \"full_schema_new\",\n        [\n            (True, True, HnswIndexParam()),\n            (False, True, IVFIndexParam()),\n            (False, True, FlatIndexParam()),  # ——ok\n            (\n                True,\n                True,\n                HnswIndexParam(\n                    metric_type=MetricType.IP,\n                    m=16,\n                    ef_construction=100,\n                ),\n            ),\n            (\n                True,\n                True,\n                HnswIndexParam(\n                    metric_type=MetricType.COSINE,\n                    m=24,\n                    ef_construction=150,\n                ),\n            ),\n            # (True, True, HnswIndexParam(metric_type=MetricType.L2, m=32, ef_construction=200, )),\n            (\n                False,\n                True,\n                FlatIndexParam(\n                    metric_type=MetricType.IP,\n                ),\n            ),\n            (\n                True,\n                True,\n                FlatIndexParam(\n                    metric_type=MetricType.COSINE,\n                ),\n            ),\n            # (True, True, FlatIndexParam(metric_type=MetricType.L2, )),\n            (\n                True,\n                True,\n                IVFIndexParam(\n                    metric_type=MetricType.IP,\n                    n_list=100,\n                    n_iters=10,\n                    use_soar=False,\n                ),\n            ),\n            (\n                True,\n                True,\n                IVFIndexParam(\n                    metric_type=MetricType.L2,\n                    n_list=200,\n                    n_iters=20,\n                    use_soar=True,\n                ),\n            ),\n            # (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )),\n        ],\n        indirect=True,\n    )\n    @pytest.mark.parametrize(\"doc_num\", [2000])\n    @pytest.mark.parametrize(\"query_num\", [2])\n    @pytest.mark.parametrize(\"top_k\", [1])\n    @pytest.mark.skip(reason=\"known bug\")\n    def test_recall_with_single_vector_valid_2000(\n        self,\n        full_collection_new: Collection,\n        doc_num,\n        query_num,\n        top_k,\n        full_schema_new,\n        request,\n    ):\n        full_schema_params = request.getfixturevalue(\"full_schema_new\")\n\n        for vector_para in full_schema_params.vectors:\n            if vector_para.name == \"vector_fp32_field\":\n                metric_type = vector_para.index_param.metric_type\n                break\n\n        multiple_docs = [\n            generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)\n        ]\n        print(\"len(multiple_docs):\\n\")\n        print(len(multiple_docs))\n        # print(multiple_docs)\n\n        for i in range(10):\n            if i != 0:\n                pass\n                # print(multiple_docs[i * 1000:1000 * (i + 1)])\n            batchdoc_and_check(\n                full_collection_new,\n                multiple_docs[i * 1000 : 1000 * (i + 1)],\n                operator=\"insert\",\n            )\n\n        stats = full_collection_new.stats\n        assert stats.doc_count == len(multiple_docs)\n\n        doc_ids = [\"0\", \"1\"]\n        fetched_docs = full_collection_new.fetch(doc_ids)\n        print(\"fetched_docs,multiple_docs\")\n        print(\n            fetched_docs[doc_ids[0]].vectors[\"sparse_vector_fp32_field\"],\n            fetched_docs[doc_ids[0]].vectors[\"sparse_vector_fp16_field\"],\n            fetched_docs[doc_ids[1]].vectors[\"sparse_vector_fp32_field\"],\n            fetched_docs[doc_ids[1]].vectors[\"sparse_vector_fp16_field\"],\n            \"\\n\",\n            multiple_docs[0].vectors[\"sparse_vector_fp32_field\"],\n            multiple_docs[0].vectors[\"sparse_vector_fp32_field\"],\n            multiple_docs[1].vectors[\"sparse_vector_fp32_field\"],\n            multiple_docs[1].vectors[\"sparse_vector_fp16_field\"],\n        )\n\n        full_collection_new.optimize(option=OptimizeOption())\n\n        time.sleep(2)\n\n        query_vectors_map = {}\n        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():\n            query_vectors_map[field_name] = [\n                multiple_docs[i].vectors[field_name] for i in range(query_num)\n            ]\n\n        # Get ground truth mapping\n        ground_truth_map = get_ground_truth_map(\n            full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k\n        )\n\n        # Validate ground truth mapping structure\n        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():\n            assert field_name in ground_truth_map\n            field_gt = ground_truth_map[field_name]\n            assert len(field_gt) == query_num\n\n            for query_idx in range(query_num):\n                assert query_idx in field_gt\n                relevant_ids = field_gt[query_idx]\n                assert isinstance(relevant_ids, list)\n                assert len(relevant_ids) <= top_k\n\n        # Print ground truth statistics\n        print(f\"Ground Truth for Top-{top_k} Retrieval:\")\n        for field_name, field_gt in ground_truth_map.items():\n            print(f\"  {field_name}:\")\n            for query_idx, relevant_ids in field_gt.items():\n                print(\n                    f\" Query {query_idx}: {len(relevant_ids)} relevant docs - {relevant_ids[:5]}{'...' if len(relevant_ids) > 5 else ''}\"\n                )\n\n        # Calculate Recall@K using ground truth\n        recall_at_k_stats = calculate_recall_at_k(\n            full_collection_new,\n            multiple_docs,\n            query_vectors_map,\n            full_schema_new,\n            k=top_k,\n            expected_doc_ids_scores_map=ground_truth_map,\n            tolerance=0.01,\n        )\n        print(\"ground_truth_map:\\n\")\n        print(ground_truth_map)\n\n        print(\"(recall_at_k_stats:\\n\")\n        print(recall_at_k_stats)\n        print(\"metric_type:\")\n        print(metric_type)\n        # Print Recall@K statistics\n        print(f\"Recall@{top_k} using Ground Truth:\")\n        for field_name, stats in recall_at_k_stats.items():\n            print(f\"  {field_name}:\")\n            print(\n                f\"    Relevant Retrieved: {stats['relevant_retrieved_count']}/{stats['total_relevant_count']}\"\n            )\n            print(f\"    Recall@{top_k}: {stats['recall_at_k']:.4f}\")\n        for k, v in recall_at_k_stats.items():\n            assert v[\"recall_at_k\"] == 1.0\n"
  },
  {
    "path": "python/tests/detail/test_db_config.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nimport logging\nimport pytest\nimport tempfile\nimport os\nimport sys\nimport subprocess\n\nimport zvec\nimport zvec\nfrom zvec import LogType, LogLevel\n\n# Error messages\nINITIALIZATION_ERROR_MSG = \"initialization failed\"\nRUNTIME_ERROR_MSG = \"RuntimeError\"\nVALUE_ERROR_MSG = \"ValueError\"\nTYPE_ERROR_MSG = \"TypeError\"\n\n\n# ==================== helper ====================\ndef run_in_subprocess(func):\n    def wrapper(*args, **kwargs):\n        if os.getenv(\"RUNNING_IN_SUBPROCESS\"):\n            return func(*args, **kwargs)\n\n        env = os.environ.copy()\n        env[\"RUNNING_IN_SUBPROCESS\"] = \"1\"\n        env[\"PYTEST_CURRENT_TEST\"] = func.__name__\n\n        import inspect\n\n        filepath = inspect.getfile(func)\n        qualname = func.__qualname__.replace(\".\", \"::\")\n        test_id = f\"{filepath}::{qualname}\"\n\n        project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))\n        env[\"PYTHONPATH\"] = project_root + \":\" + env.get(\"PYTHONPATH\", \"\")\n\n        cmd = [sys.executable, \"-m\", \"pytest\", \"-v\", \"-s\", test_id]\n\n        result = subprocess.run(cmd, env=env, capture_output=True, text=True)\n        if result.returncode != 0:\n            pytest.fail(\n                f\"Subprocess test {func.__name__} failed with code {result.returncode}\\n\"\n                f\"STDOUT:\\n{result.stdout}\\nSTDERR:\\n{result.stderr}\"\n            )\n\n    return wrapper\n\n\n# ==================== Fixtures ====================\n@pytest.fixture(scope=\"function\")\ndef temp_log_dir(tmp_path_factory):\n    return tmp_path_factory.mktemp(\"logs\")\n\n\n# ==================== Tests ====================\nclass TestDbConfigInitialization:\n    @run_in_subprocess\n    def test_init_default(self):\n        # default config\n        # log_type: Optional[LogType] = LogType.CONSOLE,\n        # log_level: Optional[LogLevel] = LogLevel.WARN,\n        # log_dir: Optional[str] = \"./logs\",\n        # log_basename: Optional[str] = \"zvec.log\",\n        # log_file_size: Optional[int] = 2048,\n        # log_overdue_days: Optional[int] = 7,\n        zvec.init()\n\n    @run_in_subprocess\n    def test_init_file_logger(self):\n        from pathlib import Path\n        import shutil\n\n        zvec.init(\n            log_level=LogLevel.DEBUG,\n            log_type=LogType.FILE,\n        )\n        # assert logdir exist\n        log_dir = Path(\"./logs\")\n        assert log_dir.exists()\n\n        # validate write log\n        col = zvec.create_and_open(\n            \"/tmp/test/1\",\n            zvec.CollectionSchema(\n                name=\"test\",\n                vectors=zvec.VectorSchema(\n                    dimension=4,\n                    data_type=zvec.DataType.VECTOR_FP32,\n                    name=\"image\",\n                ),\n            ),\n        )\n        col.insert(docs=[zvec.Doc(id=\"1\", vectors={\"image\": [1.0, 2.0, 3.0, 4.0]})])\n        assert any(log_dir.glob(\"zvec.log.*\"))\n\n        # clear\n        col.destroy()\n        shutil.rmtree(log_dir, ignore_errors=True)\n\n    @run_in_subprocess\n    def test_init_with_mixed_config(self):\n        zvec.init(\n            memory_limit_mb=128,\n            log_type=LogType.FILE,\n            query_threads=1,\n            log_level=LogLevel.WARN,\n        )\n\n    @run_in_subprocess\n    def test_repeated_initialization(self):\n        # Calling init() repeatedly is allowed:\n        # it succeeds but becomes a no-op after the first successful init()\n        zvec.init()\n\n\nclass TestDbConfigMemoryLimitValidation:\n    @run_in_subprocess\n    def test_memory_limit_min_valid(self):\n        # MIN_MEMORY_LIMIT_BYTES is 100M\n        with pytest.raises(RuntimeError):\n            zvec.init(memory_limit_mb=99)\n\n    @run_in_subprocess\n    def test_memory_limit_invalid_value(self):\n        # memory_limit_mb must >= 0 and must be int and if None, set default value\n        with pytest.raises(ValueError):\n            zvec.init(memory_limit_mb=0)\n        with pytest.raises(ValueError):\n            zvec.init(memory_limit_mb=-1)\n        with pytest.raises(TypeError):\n            zvec.init(memory_limit_mb=\"512\")\n        with pytest.raises(TypeError):\n            zvec.init(memory_limit_mb=512.5)\n\n\nclass TestDbConfigThreadValidation:\n    @run_in_subprocess\n    def test_query_threads(self):\n        zvec.init(query_threads=1)\n\n    @run_in_subprocess\n    def test_query_threads_invalid(self):\n        # query_threads must >= 0 and must be int and if None, set default value\n        with pytest.raises(ValueError):\n            zvec.init(query_threads=0)\n        with pytest.raises(ValueError):\n            zvec.init(query_threads=-1)\n        with pytest.raises(TypeError):\n            zvec.init(query_threads=\"value\")\n        with pytest.raises(TypeError):\n            zvec.init(query_threads=512.5)\n        with pytest.raises(TypeError):\n            zvec.init(query_threads=\"512\")\n\n    @run_in_subprocess\n    def test_optimize_threads(self):\n        zvec.init(optimize_threads=1)\n\n    @run_in_subprocess\n    def test_optimize_threads_invalid(self):\n        # optimize_threads must >= 0 and must be int and if None, set default value\n        with pytest.raises(ValueError):\n            zvec.init(optimize_threads=0)\n        with pytest.raises(ValueError):\n            zvec.init(optimize_threads=-1)\n        with pytest.raises(TypeError):\n            zvec.init(optimize_threads=\"value\")\n        with pytest.raises(TypeError):\n            zvec.init(optimize_threads=512.5)\n        with pytest.raises(TypeError):\n            zvec.init(optimize_threads=\"512\")\n\n\nclass TestDbConfigRatioValidation:\n    @run_in_subprocess\n    def test_init_invert_to_forward_scan_ratio(self):\n        # must be in [0,1]\n        zvec.init(invert_to_forward_scan_ratio=0.8)\n\n    @run_in_subprocess\n    def test_init_invert_to_forward_scan_ratio_invalid(self):\n        with pytest.raises(ValueError):\n            zvec.init(invert_to_forward_scan_ratio=1.1)\n        with pytest.raises(ValueError):\n            zvec.init(invert_to_forward_scan_ratio=-0.1)\n        with pytest.raises(TypeError):\n            zvec.init(invert_to_forward_scan_ratio=\"0.8\")\n\n    @run_in_subprocess\n    def test_init_brute_force_by_keys_ratio(self):\n        zvec.init(brute_force_by_keys_ratio=0.8)\n\n    @run_in_subprocess\n    def test_init_brute_force_by_keys_ratio_invalid(self):\n        with pytest.raises(ValueError):\n            zvec.init(brute_force_by_keys_ratio=1.1)\n        with pytest.raises(ValueError):\n            zvec.init(brute_force_by_keys_ratio=-0.1)\n        with pytest.raises(TypeError):\n            zvec.init(brute_force_by_keys_ratio=\"0.8\")\n\n\nclass TestDbConfigLogValidation:\n    @run_in_subprocess\n    def test_log_type_valid(self):\n        zvec.init(log_type=LogType.CONSOLE)\n\n    @run_in_subprocess\n    def test_log_type_invalid(self):\n        with pytest.raises(TypeError):\n            zvec.init(log_type=\"FILE\")\n        with pytest.raises(TypeError):\n            zvec.init(log_type=\"\")\n        with pytest.raises(TypeError):\n            zvec.init(log_type=\"invalid\")\n        with pytest.raises(TypeError):\n            zvec.init(log_type=123)\n\n    @run_in_subprocess\n    def test_log_level_valid(self):\n        zvec.init(log_level=LogLevel.ERROR)\n\n    @run_in_subprocess\n    def test_log_level_invalid(self):\n        with pytest.raises(TypeError):\n            zvec.init(log_level=\"WARN\")\n        with pytest.raises(TypeError):\n            zvec.init(log_level=\"\")\n        with pytest.raises(TypeError):\n            zvec.init(log_level=\"invalid\")\n        with pytest.raises(TypeError):\n            zvec.init(log_level=123)\n\n    @run_in_subprocess\n    def test_init_file_logger(self):\n        from pathlib import Path\n        import shutil\n\n        temp_dir = tempfile.mkdtemp(prefix=\"log_test_\")\n        abs_temp_dir = os.path.abspath(temp_dir)\n\n        zvec.init(\n            log_level=LogLevel.DEBUG,\n            log_type=LogType.FILE,\n            log_dir=abs_temp_dir,\n            log_basename=\"test\",\n        )\n\n        # assert logdir exist\n        log_dir = Path(abs_temp_dir)\n        assert log_dir.exists()\n\n        # validate write log\n        col = zvec.create_and_open(\n            \"/tmp/test/1\",\n            zvec.CollectionSchema(\n                name=\"test\",\n                vectors=zvec.VectorSchema(\n                    dimension=4,\n                    data_type=zvec.DataType.VECTOR_FP32,\n                    name=\"image\",\n                ),\n            ),\n        )\n        col.insert(docs=[zvec.Doc(id=\"1\", vectors={\"image\": [1.0, 2.0, 3.0, 4.0]})])\n        assert any(log_dir.glob(\"test.*\"))\n\n        # clear\n        col.destroy()\n        shutil.rmtree(log_dir, ignore_errors=True)\n\n    @run_in_subprocess\n    def test_log_file_size_invalid(self):\n        with pytest.raises(TypeError):\n            zvec.init(log_type=LogType.FILE, log_file_size=\"df\")\n\n        with pytest.raises(ValueError):\n            zvec.init(log_type=LogType.FILE, log_file_size=0)\n\n        with pytest.raises(ValueError):\n            zvec.init(log_type=LogType.FILE, log_file_size=-1)\n\n    @run_in_subprocess\n    def test_log_overdue_days_invalid(self):\n        with pytest.raises(TypeError):\n            zvec.init(log_type=LogType.FILE, log_overdue_days=\"df\")\n\n        with pytest.raises(ValueError):\n            zvec.init(log_type=LogType.FILE, log_overdue_days=0)\n\n        with pytest.raises(ValueError):\n            zvec.init(log_type=LogType.FILE, log_overdue_days=-1)\n"
  },
  {
    "path": "python/tests/test_collection.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\n\nimport pytest\nimport zvec\nfrom zvec import (\n    Collection,\n    CollectionOption,\n    DataType,\n    Doc,\n    FieldSchema,\n    HnswIndexParam,\n    InvertIndexParam,\n    LogLevel,\n    LogType,\n    VectorSchema,\n    StatusCode,\n    IndexOption,\n    IndexType,\n    VectorQuery,\n    OptimizeOption,\n)\n\n# ==================== Common ====================\n\n\n@pytest.fixture(scope=\"session\")\ndef collection_schema():\n    return zvec.CollectionSchema(\n        name=\"test_collection\",\n        fields=[\n            FieldSchema(\n                \"id\",\n                DataType.INT64,\n                nullable=False,\n                index_param=InvertIndexParam(enable_range_optimization=True),\n            ),\n            FieldSchema(\n                \"name\", DataType.STRING, nullable=False, index_param=InvertIndexParam()\n            ),\n            FieldSchema(\"weight\", DataType.FLOAT, nullable=True),\n            FieldSchema(\"height\", DataType.INT32, nullable=True),\n        ],\n        vectors=[\n            VectorSchema(\n                \"dense\",\n                DataType.VECTOR_FP32,\n                dimension=128,\n                index_param=HnswIndexParam(),\n            ),\n            VectorSchema(\n                \"sparse\", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()\n            ),\n        ],\n    )\n\n\n@pytest.fixture(scope=\"session\")\ndef collection_option():\n    return CollectionOption(read_only=False, enable_mmap=True)\n\n\n@pytest.fixture\ndef single_doc():\n    id = 0\n    return Doc(\n        id=f\"{id}\",\n        fields={\"id\": id, \"name\": \"test\", \"weight\": 80.0, \"height\": id + 140},\n        vectors={\"dense\": [id + 0.1] * 128, \"sparse\": {1: 1.0, 2: 2.0, 3: 3.0}},\n    )\n\n\n@pytest.fixture\ndef multiple_docs():\n    return [\n        Doc(\n            id=f\"{id}\",\n            fields={\"id\": id, \"name\": \"test\", \"weight\": 80.0, \"height\": 210},\n            vectors={\"dense\": [id + 0.1] * 128, \"sparse\": {1: 1.0, 2: 2.0, 3: 3.0}},\n        )\n        for id in range(1, 101)\n    ]\n\n\n@pytest.fixture(scope=\"function\")\ndef test_collection(\n    tmp_path_factory, collection_schema, collection_option\n) -> Collection:\n    \"\"\"\n    Function-scoped fixture: creates and opens a collection.\n    Uses tmp_path_factory to ensure shared temp dir per class.\n    \"\"\"\n    # Create unique temp directory for this test class\n    temp_dir = tmp_path_factory.mktemp(\"zvec\")\n    collection_path = temp_dir / \"test_collection\"\n\n    coll = zvec.create_and_open(\n        path=str(collection_path), schema=collection_schema, option=collection_option\n    )\n\n    assert coll is not None, \"Failed to create and open collection\"\n    assert coll.path == str(collection_path)\n    assert coll.schema.name == collection_schema.name\n    assert list(coll.schema.fields) == list(collection_schema.fields)\n    assert list(coll.schema.vectors) == list(collection_schema.vectors)\n    assert coll.option.read_only == collection_option.read_only\n    assert coll.option.enable_mmap == collection_option.enable_mmap\n\n    try:\n        yield coll\n    finally:\n        if hasattr(coll, \"destroy\") and coll is not None:\n            try:\n                coll.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n\n@pytest.fixture\ndef collection_with_single_doc(test_collection: Collection, single_doc) -> Collection:\n    # Setup: insert single doc\n    assert test_collection.stats.doc_count == 0\n    result = test_collection.insert(single_doc)\n    assert bool(result)\n    assert result.ok()\n    assert test_collection.stats.doc_count == 1\n\n    yield test_collection\n\n    # Teardown: delete single doc\n    test_collection.delete(single_doc.id)\n    assert test_collection.stats.doc_count == 0\n\n\n@pytest.fixture\ndef collection_with_multiple_docs(\n    test_collection: Collection, multiple_docs\n) -> Collection:\n    # Setup: insert multiple docs\n    assert test_collection.stats.doc_count == 0\n    result = test_collection.insert(multiple_docs)\n    assert len(result) == len(multiple_docs)\n    for item in result:\n        assert item.ok()\n    assert test_collection.stats.doc_count == len(multiple_docs)\n\n    yield test_collection\n\n    # Teardown: delete multiple docs\n    test_collection.delete([doc.id for doc in multiple_docs])\n\n\n# ==================== Tests ====================\n\n\n# ----------------------------\n# Config Test Case\n# ----------------------------\nclass TestConfig:\n    def test_config(self):\n        zvec.init(log_type=LogType.CONSOLE, log_level=LogLevel.ERROR, log_dir=\"./log\")\n\n\n# ----------------------------\n# Collection DDL Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionDDL:\n    def test_collection_stats(self, test_collection: Collection):\n        assert test_collection.stats is not None\n        stats = test_collection.stats\n        assert stats.doc_count == 0\n        assert len(stats.index_completeness) == 2\n        assert stats.index_completeness[\"dense\"] == 1\n        assert stats.index_completeness[\"sparse\"] == 1\n\n\n# ----------------------------\n# Collection Index DDL Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionIndexDDL:\n    def test_create_index(self, test_collection: Collection):\n        # before create\n        field_schema = test_collection.schema.field(\"weight\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.FLOAT\n        assert field_schema.name == \"weight\"\n        index_param = field_schema.index_param\n        assert index_param is None\n\n        # create\n        test_collection.create_index(\n            field_name=\"weight\", index_param=InvertIndexParam(), option=IndexOption()\n        )\n        assert test_collection.schema is not None\n        field_schema = test_collection.schema.field(\"weight\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.FLOAT\n        assert field_schema.name == \"weight\"\n\n        index_param = field_schema.index_param\n        assert index_param.type == IndexType.INVERT\n        assert index_param.enable_range_optimization is False\n        assert index_param.enable_extended_wildcard is False\n\n    def test_drop_index(self, test_collection: Collection):\n        # before drop\n        field_schema = test_collection.schema.field(\"name\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.STRING\n        assert field_schema.name == \"name\"\n        index_param = field_schema.index_param\n        assert index_param.type == IndexType.INVERT\n        assert index_param.enable_range_optimization is False\n        assert index_param.enable_extended_wildcard is False\n\n        # drop\n        test_collection.drop_index(\"name\")\n        field_schema = test_collection.schema.field(\"name\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.STRING\n        assert field_schema.name == \"name\"\n\n        # without index\n        index_param = field_schema.index_param\n        assert index_param is None\n\n    def test_create_index_field_is_not_exist(self, test_collection: Collection):\n        with pytest.raises(Exception) as e:\n            test_collection.create_index(\n                field_name=\"not_exist\",\n                index_param=InvertIndexParam(),\n            )\n\n        index_param = field_schema.index_param\n        assert index_param.type == IndexType.INVERT\n        assert index_param.enable_range_optimization is False\n        assert index_param.enable_extended_wildcard is False\n\n    def test_drop_index(self, test_collection: Collection):\n        # before drop\n        field_schema = test_collection.schema.field(\"name\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.STRING\n        assert field_schema.name == \"name\"\n        index_param = field_schema.index_param\n        assert index_param.type == IndexType.INVERT\n        assert index_param.enable_range_optimization is False\n        assert index_param.enable_extended_wildcard is False\n\n        # drop\n        test_collection.drop_index(\"name\")\n        field_schema = test_collection.schema.field(\"name\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.STRING\n        assert field_schema.name == \"name\"\n\n        # without index\n        index_param = field_schema.index_param\n        assert index_param is None\n\n    def test_create_index_field_is_not_exist(self, test_collection: Collection):\n        with pytest.raises(Exception) as e:\n            test_collection.create_index(\n                field_name=\"not_exist\",\n                index_param=InvertIndexParam(),\n            )\n\n\n# ----------------------------\n# Collection Column DDL Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionColumnDDL:\n    def test_create_column(self, test_collection: Collection):\n        # before create column\n        field_schema = test_collection.schema.field(\"age\")\n        assert field_schema is None\n\n        # create\n        test_collection.add_column(FieldSchema(\"age\", DataType.INT32, nullable=True))\n\n        field_schema = test_collection.schema.field(\"age\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.INT32\n        assert field_schema.name == \"age\"\n        assert field_schema.index_param is None\n\n    def test_create_column_is_nullable(self, test_collection: Collection):\n        with pytest.raises(ValueError):\n            test_collection.add_column(\n                FieldSchema(\"age\", DataType.INT32, nullable=False)\n            )\n\n    def test_drop_column(self, test_collection: Collection):\n        # before drop column\n        field_schema = test_collection.schema.field(\"id\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.INT64\n        assert field_schema.name == \"id\"\n        index_param = field_schema.index_param\n        assert index_param is not None\n        assert index_param.type == IndexType.INVERT\n\n        # drop\n        test_collection.drop_column(\"id\")\n        field_schema = test_collection.schema.field(\"id\")\n        assert field_schema is None\n\n    def test_alert_column_to_rename(self, test_collection: Collection):\n        # before alert column\n        field_schema = test_collection.schema.field(\"id\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.INT64\n        assert field_schema.name == \"id\"\n        index_param = field_schema.index_param\n        assert index_param is not None\n        assert index_param.type == IndexType.INVERT\n        assert index_param.enable_range_optimization is True\n        assert index_param.enable_extended_wildcard is False\n\n        # alert rename\n        test_collection.alter_column(\"id\", \"doc_id\")\n\n        # validate old column\n        field_schema = test_collection.schema.field(\"id\")\n        assert field_schema is None\n        # validate rename column\n        field_schema = test_collection.schema.field(\"doc_id\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.INT64\n        assert field_schema.name == \"doc_id\"\n        assert field_schema.nullable is False\n        index_param = field_schema.index_param\n        assert index_param is not None\n        assert index_param.type == IndexType.INVERT\n        assert index_param.enable_range_optimization is True\n        assert index_param.enable_extended_wildcard is False\n\n    def test_alert_column_to_modify_schema(self, test_collection: Collection):\n        # before alert column\n        field_schema = test_collection.schema.field(\"id\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.INT64\n        assert field_schema.name == \"id\"\n        index_param = field_schema.index_param\n        assert index_param.type == IndexType.INVERT\n\n        test_collection.alter_column(\n            old_name=\"id\",\n            field_schema=FieldSchema(\"doc_id\", DataType.UINT64, nullable=True),\n        )\n        field_schema = test_collection.schema.field(\"doc_id\")\n        assert field_schema is not None\n        assert field_schema.data_type == DataType.UINT64\n        assert field_schema.name == \"doc_id\"\n\n    def test_column_with_other_dtype(self, test_collection: Collection):\n        # only allow number type\n        test_collection.add_column(FieldSchema(\"age\", DataType.INT32, nullable=True))\n\n        with pytest.raises(ValueError):\n            test_collection.add_column(FieldSchema(\"full_name\", DataType.STRING))\n        with pytest.raises(ValueError):\n            test_collection.drop_column(\"name\")\n        with pytest.raises(ValueError):\n            test_collection.alter_column(old_name=\"name\", new_name=\"full_name\")\n        with pytest.raises(ValueError):\n            test_collection.alter_column(\n                old_name=\"name\", field_schema=FieldSchema(\"full_name\", DataType.STRING)\n            )\n\n\n# ----------------------------\n# Collection Optimize Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionOptimize:\n    def test_collection_optimize(self, test_collection: Collection):\n        test_collection.optimize(option=OptimizeOption())\n\n\n# ----------------------------\n# Collection Fetch Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionFetch:\n    def test_collection_fetch(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        result = collection_with_single_doc.fetch(ids=[single_doc.id])\n        assert bool(result)\n        assert single_doc.id in result.keys()\n\n        doc = result[single_doc.id]\n        assert doc is not None\n        assert doc.id == single_doc.id\n        assert set(doc.field_names()) == set(single_doc.field_names())\n        for field_name in doc.field_names():\n            if field_name in [\"dense\", \"sparse\"]:\n                continue\n            assert doc.field(field_name) == single_doc.field(field_name)\n\n    def test_collection_fetch_contains_nodata_ids(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        ids = [doc.id for doc in multiple_docs]\n        no_data_key = \"x\"\n        ids_with_no_data = [no_data_key] + ids\n        result = collection_with_multiple_docs.fetch(ids=ids_with_no_data)\n        assert bool(result)\n        assert len(result) == len(ids)\n        assert no_data_key not in result\n\n\n# ----------------------------\n# Collection Insert Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionInsert:\n    def test_collection_insert(self, test_collection, single_doc):\n        result = test_collection.insert(single_doc)\n        assert bool(result)\n        assert result.ok()\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_collection_insert_with_nullable_false_field(self, test_collection):\n        # id, name's nullable == False\n        # weight, height's nullable == True\n\n        doc = Doc(\n            id=\"0\",\n            fields={\n                \"id\": 1,\n                \"name\": \"test\",\n            },\n            vectors={\"dense\": [1 + 0.1] * 128, \"sparse\": {1: 1.0, 2: 2.0, 3: 3.0}},\n        )\n        result = test_collection.insert(doc)\n        assert bool(result)\n        assert result.ok()\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_collection_insert_without_nullable_false_field(self, test_collection):\n        # id, name's nullable == False\n        # weight, height's nullable == True\n\n        # without id, name\n        doc = Doc(\n            id=\"0\",\n            vectors={\"dense\": [1 + 0.1] * 128, \"sparse\": {1: 1.0, 2: 2.0, 3: 3.0}},\n        )\n        with pytest.raises(ValueError) as e:\n            # ValueError: doc validate failed: field[id] is configured not nullable,\n            # but doc does not contain this field\n            test_collection.insert(doc)\n        assert \"field[id] is configured not nullable\" in str(e.value)\n\n        # without name\n        doc = Doc(\n            id=\"0\",\n            fields={\n                \"id\": 1,\n            },\n            vectors={\"dense\": [1 + 0.1] * 128, \"sparse\": {1: 1.0, 2: 2.0, 3: 3.0}},\n        )\n        with pytest.raises(ValueError) as e:\n            test_collection.insert(doc)\n        assert \"field[name] is configured not nullable\" in str(e.value)\n\n    def test_collection_insert_with_nullable_true_field(self, test_collection):\n        # id, name's nullable == False\n        # weight, height's nullable == True\n\n        doc = Doc(\n            id=\"0\",\n            fields={\n                \"id\": 1,\n                \"name\": \"test\",\n            },\n            vectors={\"dense\": [1 + 0.1] * 128, \"sparse\": {1: 1.0, 2: 2.0, 3: 3.0}},\n        )\n        result = test_collection.insert(doc)\n        assert bool(result)\n        assert result.ok()\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        result = test_collection.fetch(ids=[doc.id])\n        assert doc.id in result\n        ret = result[doc.id]\n        assert ret.field(\"id\") == 1\n        assert ret.field(\"name\") == \"test\"\n        assert ret.field(\"weight\") is None\n        assert ret.field(\"height\") is None\n\n    def test_collection_insert_batch(self, test_collection, multiple_docs):\n        result = test_collection.insert(multiple_docs)\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.ok()\n\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == len(multiple_docs)\n\n    def test_collection_insert_duplicate(\n        self, test_collection, single_doc, multiple_docs\n    ):\n        test_collection.insert(single_doc)\n        result = test_collection.insert(single_doc)\n        assert bool(result)\n        assert result.code() == StatusCode.ALREADY_EXISTS\n\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n\n# ----------------------------\n# Collection Update Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionUpdate:\n    def test_empty_collection_update(\n        self, test_collection: Collection, single_doc: Doc\n    ):\n        result = test_collection.update(single_doc)\n        assert bool(result)\n        assert result.code() == StatusCode.NOT_FOUND\n\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n    def test_collection_update_with_nullable_false_field(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        # id, name's nullable == False\n        # weight, height's nullable == True\n\n        # update doc field id\n        doc = Doc(\n            id=single_doc.id,\n            fields={\"id\": single_doc.field(\"id\") + 1},\n        )\n        result = collection_with_single_doc.update(doc)\n        assert bool(result)\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        # fetch\n        result = collection_with_single_doc.fetch(ids=[doc.id])\n        assert doc.id in result\n        ret = result[doc.id]\n        assert ret.field(\"id\") == doc.field(\"id\")\n        assert ret.field(\"name\") == single_doc.field(\"name\")\n        assert ret.field(\"weight\") == single_doc.field(\"weight\")\n        assert ret.field(\"height\") == single_doc.field(\"height\")\n\n    def test_collection_update_with_nullable_false_field_is_none(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        # id, name's nullable == False\n        # weight, height's nullable == True\n\n        # update doc field id\n        doc = Doc(\n            id=single_doc.id,\n            fields={\"id\": None},\n        )\n        with pytest.raises(ValueError) as e:\n            # ValueError: doc validate failed: field[id] is configured not nullable,\n            # but doc does not contain this field\n            collection_with_single_doc.update(doc)\n\n        doc = Doc(\n            id=single_doc.id,\n            fields={\"id\": single_doc.field(\"id\") + 1, \"weight\": None},\n        )\n\n        result = collection_with_single_doc.update(doc)\n        assert bool(result)\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        ret = collection_with_single_doc.fetch(ids=[doc.id])\n        assert doc.id in ret\n        ret = ret[doc.id]\n        assert ret.field(\"id\") == doc.field(\"id\")\n        assert ret.field(\"name\") == single_doc.field(\"name\")\n        assert ret.field(\"weight\") is None\n        assert ret.field(\"height\") == single_doc.field(\"height\")\n\n    def test_collection_update_without_nullable_false_field(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        # id, name's nullable == False\n        # weight, height's nullable == True\n\n        # update doc field weight\n        doc = Doc(\n            id=single_doc.id,\n            fields={\"weight\": single_doc.field(\"weight\") + 1},\n        )\n        result = collection_with_single_doc.update(doc)\n        assert bool(result)\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        # fetch\n        ret = collection_with_single_doc.fetch(ids=[doc.id])\n        assert doc.id in ret\n        ret = ret[doc.id]\n        assert ret.field(\"id\") == single_doc.field(\"id\")\n        assert ret.field(\"name\") == single_doc.field(\"name\")\n        assert ret.field(\"weight\") == doc.field(\"weight\")\n        assert ret.field(\"height\") == single_doc.field(\"height\")\n\n    def test_collection_update_without_nullable_false_field_set_null(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        # id, name's nullable == False\n        # weight, height's nullable == True\n\n        # update doc field weight is None\n        doc = Doc(\n            id=single_doc.id,\n            fields={\"weight\": None},\n        )\n        result = collection_with_single_doc.update(doc)\n        assert bool(result)\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n        # fetch\n        ret = collection_with_single_doc.fetch(ids=[doc.id])\n        assert doc.id in ret\n        ret = ret[doc.id]\n        assert ret.field(\"id\") == single_doc.field(\"id\")\n        assert ret.field(\"name\") == single_doc.field(\"name\")\n        assert ret.field(\"weight\") is None\n        assert ret.field(\"height\") == single_doc.field(\"height\")\n\n    def test_empty_collection_update_batch(\n        self, test_collection: Collection, multiple_docs\n    ):\n        result = test_collection.update(multiple_docs)\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.code() == StatusCode.NOT_FOUND\n\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n    def test_collection_update(\n        self, collection_with_single_doc: Collection, single_doc\n    ):\n        result = collection_with_single_doc.update(single_doc)\n        assert bool(result) == 1\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_collection_update_batch(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        result = collection_with_multiple_docs.update(multiple_docs)\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.ok()\n\n        stats = collection_with_multiple_docs.stats\n        assert stats is not None\n        assert stats.doc_count == len(multiple_docs)\n\n\n# ----------------------------\n# Collection Upsert Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionUpsert:\n    def test_empty_collection_upsert(self, test_collection: Collection, single_doc):\n        result = test_collection.upsert(single_doc)\n        assert bool(result)\n        assert result.ok()\n\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_empty_collection_upsert_batch(\n        self, test_collection: Collection, multiple_docs\n    ):\n        result = test_collection.upsert(multiple_docs)\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.ok()\n\n        stats = test_collection.stats\n        assert stats is not None\n        assert stats.doc_count == len(multiple_docs)\n\n    def test_collection_upsert(\n        self, collection_with_single_doc: Collection, single_doc, multiple_docs\n    ):\n        # doc is existing\n        # upsert => update\n        result = collection_with_single_doc.upsert(single_doc)\n        assert bool(result)\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_collection_upsert_batch(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        # doc is existing\n        # upsert => update\n        result = collection_with_multiple_docs.upsert(multiple_docs)\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.ok()\n\n        stats = collection_with_multiple_docs.stats\n        assert stats is not None\n        assert stats.doc_count == len(multiple_docs)\n\n\n# ----------------------------\n# Collection Upsert Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionDelete:\n    def test_empty_collection_delete(self, test_collection: Collection, single_doc):\n        result = test_collection.delete(single_doc.id)\n        assert bool(result)\n        assert result.code() == StatusCode.NOT_FOUND\n\n    def test_empty_collection_delete_batch(\n        self, test_collection: Collection, multiple_docs\n    ):\n        result = test_collection.delete([doc.id for doc in multiple_docs])\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.code() == StatusCode.NOT_FOUND\n\n    def test_collection_delete(\n        self, collection_with_single_doc: Collection, single_doc\n    ):\n        result = collection_with_single_doc.delete(single_doc.id)\n        assert bool(result)\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n        result = collection_with_single_doc.insert(single_doc)\n        assert bool(result)\n        assert result.ok()\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_collection_delete_batch(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        result = collection_with_multiple_docs.delete([doc.id for doc in multiple_docs])\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.ok()\n        stats = collection_with_multiple_docs.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n    def test_collection_delete_by_filter(\n        self, collection_with_single_doc: Collection, single_doc\n    ):\n        collection_with_single_doc.delete_by_filter(\n            filter=f\"height={single_doc.field('height')}\"\n        )\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n    def test_collection_delete_by_filter_invert_field(\n        self, collection_with_single_doc: Collection, single_doc\n    ):\n        collection_with_single_doc.delete_by_filter(\n            filter=f\"id={single_doc.field('id')}\"\n        )\n        stats = collection_with_single_doc.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n\n\n# ----------------------------\n# Collection Upsert Test Case\n# ----------------------------\n@pytest.mark.usefixtures(\"test_collection\")\nclass TestCollectionQuery:\n    def test_empty_collection_query(self, test_collection: Collection):\n        result = test_collection.query()\n        assert len(result) == 0\n\n    def test_collection_query(self, collection_with_single_doc: Collection, single_doc):\n        result = collection_with_single_doc.query()\n        assert len(result) == 1\n        doc = result[0]\n        assert doc.id == single_doc.id\n        assert \"dense\" not in doc.field_names()\n        assert \"sparse\" not in doc.field_names()\n        field_without_vector = single_doc.field_names()\n        assert set(doc.field_names()) == set(field_without_vector)\n        for name in field_without_vector:\n            assert doc.field(name) == single_doc.field(name)\n\n    def test_collection_query_with_include_vector(\n        self, collection_with_single_doc: Collection, single_doc\n    ):\n        result = collection_with_single_doc.query(include_vector=True)\n        assert len(result) == 1\n        doc = result[0]\n        assert doc.vector(\"dense\") is not None\n        assert doc.vector(\"sparse\") is not None\n\n    def test_collection_query_with_output_fields(\n        self, collection_with_single_doc: Collection, single_doc\n    ):\n        result = collection_with_single_doc.query(output_fields=[\"id\", \"name\"])\n        assert len(result) == 1\n        doc = result[0]\n        assert doc.id == single_doc.id\n        assert len(doc.field_names()) == 2\n        assert set(doc.field_names()) == {\"id\", \"name\"}\n\n    def test_collection_query_with_topk(\n        self, collection_with_multiple_docs: Collection\n    ):\n        result = collection_with_multiple_docs.query()\n        assert len(result) == 10\n\n        result = collection_with_multiple_docs.query(topk=5)\n        assert len(result) == 5\n\n    def test_collection_query_with_range_filter_int_field(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        index = 10\n        idx = multiple_docs[index].id\n\n        result = collection_with_multiple_docs.query(filter=f\"id>{idx}\", topk=100)\n        assert len(result) == len(multiple_docs) - index - 1\n\n        result = collection_with_multiple_docs.query(filter=f\"id>={idx}\", topk=100)\n        assert len(result) == len(multiple_docs) - index\n\n        result = collection_with_multiple_docs.query(filter=f\"id<{idx}\", topk=100)\n        assert len(result) == index\n\n        result = collection_with_multiple_docs.query(filter=f\"id<={idx}\", topk=100)\n        assert len(result) == index + 1\n\n        result = collection_with_multiple_docs.query(filter=f\"id={idx}\", topk=100)\n        assert len(result) == 1\n\n        result = collection_with_multiple_docs.query(filter=f\"id!={idx}\", topk=100)\n        assert len(result) == len(multiple_docs) - 1\n\n        left, right = 10, 90\n        l_id, r_id = multiple_docs[left].id, multiple_docs[right].id\n        result = collection_with_multiple_docs.query(\n            filter=f\"id>{l_id} and id<{r_id}\", topk=100\n        )\n        assert len(result) == right - left - 1\n\n        result = collection_with_multiple_docs.query(\n            filter=f\"id>={l_id} and id<{r_id}\", topk=100\n        )\n        assert len(result) == right - left\n\n        result = collection_with_multiple_docs.query(\n            filter=f\"id>={l_id} and id<={r_id}\", topk=100\n        )\n        assert len(result) == right - left + 1\n\n        result = collection_with_multiple_docs.query(\n            filter=f\"id<{l_id} or id>{r_id}\", topk=100\n        )\n        assert len(result) == len(multiple_docs) - (right - left) - 1\n\n        result = collection_with_multiple_docs.query(\n            filter=f\"id<={l_id} or id>{r_id}\", topk=100\n        )\n        assert len(result) == len(multiple_docs) - (right - left)\n\n        result = collection_with_multiple_docs.query(\n            filter=f\"id<={l_id} or id>={r_id}\", topk=100\n        )\n        assert len(result) == len(multiple_docs) - (right - left) + 1\n\n        result = collection_with_multiple_docs.query(filter=\"id in (1)\", topk=100)\n        assert len(result) == 1\n\n    def test_collection_query_with_vector_and_id(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        with pytest.raises(ValueError):\n            collection_with_single_doc.query(\n                VectorQuery(\n                    field_name=\"dense\",\n                    id=single_doc.id,\n                    vector=single_doc.vector(\"dense\"),\n                )\n            )\n\n    def test_collection_query_with_filter_not_in(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        result = collection_with_multiple_docs.query(filter=\"id not in (1)\", topk=100)\n        assert len(result) == len(multiple_docs) - 1\n\n    def test_collection_with_error_query_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        query = VectorQuery(\n            field_name=\"dense\", vector=multiple_docs[0].vector(\"dense\"), param=[1, 2, 3]\n        )\n        with pytest.raises(TypeError):\n            result = collection_with_multiple_docs.query(\n                filter=\"id in (1)\", topk=100, vectors=query\n            )\n\n    def test_collection_query_by_id(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        result = collection_with_multiple_docs.query(\n            VectorQuery(field_name=\"dense\", id=multiple_docs[0].id)\n        )\n        assert len(result) == 10\n\n    def test_collection_query_multi_vector_with_same_field(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        with pytest.raises(ValueError):\n            collection_with_multiple_docs.query(\n                [\n                    VectorQuery(\n                        field_name=\"dense\", vector=multiple_docs[0].vector(\"dense\")\n                    ),\n                    VectorQuery(\n                        field_name=\"dense\", vector=multiple_docs[0].vector(\"dense\")\n                    ),\n                ]\n            )\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_by_dense_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_by_sparse_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_by_dense_vector_with_filter(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_by_sparse_vector_with_filter(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_with_rrf_reranker_by_multi_dense_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_with_rrf_reranker_by_multi_sparse_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_with_rrf_reranker_by_hybrid_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_with_weighted_reranker_by_multi_dense_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_with_weighted_reranker_by_multi_sparse_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n\n    @pytest.mark.skip(reason=\"TODO: This test case is pending implementation\")\n    def test_collection_query_with_weighted_reranker_by_hybrid_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs\n    ):\n        pass\n"
  },
  {
    "path": "python/tests/test_collection_hnsw_rabitq.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport platform\nimport sys\n\nimport pytest\nimport math\nimport zvec\n\npytestmark = pytest.mark.skipif(\n    not (sys.platform == \"linux\" and platform.machine() in (\"x86_64\", \"AMD64\")),\n    reason=\"HNSW RaBitQ only supported on Linux x86_64\",\n)\nfrom zvec import (\n    Collection,\n    CollectionOption,\n    DataType,\n    Doc,\n    FieldSchema,\n    HnswRabitqIndexParam,\n    HnswRabitqQueryParam,\n    MetricType,\n    VectorSchema,\n    VectorQuery,\n)\n\n\n# ==================== Fixtures ====================\n\n\n@pytest.fixture(scope=\"session\")\ndef hnsw_rabitq_collection_schema():\n    \"\"\"Create a collection schema with HNSW RaBitQ index.\"\"\"\n    return zvec.CollectionSchema(\n        name=\"test_hnsw_rabitq_collection\",\n        fields=[\n            FieldSchema(\"id\", DataType.INT64, nullable=False),\n            FieldSchema(\"name\", DataType.STRING, nullable=False),\n        ],\n        vectors=[\n            VectorSchema(\n                \"embedding\",\n                DataType.VECTOR_FP32,\n                dimension=128,\n                index_param=HnswRabitqIndexParam(\n                    metric_type=MetricType.L2,\n                    m=16,\n                    ef_construction=200,\n                    total_bits=7,\n                    num_clusters=64,\n                ),\n            ),\n        ],\n    )\n\n\n@pytest.fixture(scope=\"session\")\ndef collection_option():\n    \"\"\"Create collection options.\"\"\"\n    return CollectionOption(read_only=False, enable_mmap=True)\n\n\n@pytest.fixture\ndef single_doc():\n    \"\"\"Create a single document for testing.\"\"\"\n    return Doc(\n        id=\"0\",\n        fields={\"id\": 0, \"name\": \"test_doc_0\"},\n        vectors={\"embedding\": [0.1 + i * 0.01 for i in range(128)]},\n    )\n\n\n@pytest.fixture\ndef multiple_docs():\n    \"\"\"Create multiple documents for testing.\"\"\"\n    return [\n        Doc(\n            id=f\"{i}\",\n            fields={\"id\": i, \"name\": f\"test_doc_{i}\"},\n            vectors={\"embedding\": [i * 0.1 + j * 0.01 for j in range(128)]},\n        )\n        for i in range(1, 101)\n    ]\n\n\n@pytest.fixture(scope=\"function\")\ndef hnsw_rabitq_collection(\n    tmp_path_factory, hnsw_rabitq_collection_schema, collection_option\n) -> Collection:\n    \"\"\"\n    Function-scoped fixture: creates and opens a collection with HNSW RaBitQ index.\n    \"\"\"\n    temp_dir = tmp_path_factory.mktemp(\"zvec_hnsw_rabitq\")\n    collection_path = temp_dir / \"test_hnsw_rabitq_collection\"\n\n    coll = zvec.create_and_open(\n        path=str(collection_path),\n        schema=hnsw_rabitq_collection_schema,\n        option=collection_option,\n    )\n\n    assert coll is not None, \"Failed to create and open HNSW RaBitQ collection\"\n    assert coll.path == str(collection_path)\n    assert coll.schema.name == hnsw_rabitq_collection_schema.name\n\n    try:\n        yield coll\n    finally:\n        if hasattr(coll, \"destroy\") and coll is not None:\n            try:\n                coll.destroy()\n            except Exception as e:\n                print(f\"Warning: failed to destroy collection: {e}\")\n\n\n@pytest.fixture\ndef collection_with_single_doc(\n    hnsw_rabitq_collection: Collection, single_doc: Doc\n) -> Collection:\n    \"\"\"Setup: insert single doc into collection.\"\"\"\n    assert hnsw_rabitq_collection.stats.doc_count == 0\n    result = hnsw_rabitq_collection.insert(single_doc)\n    assert bool(result)\n    assert result.ok()\n    assert hnsw_rabitq_collection.stats.doc_count == 1\n\n    yield hnsw_rabitq_collection\n\n    # Teardown: delete single doc\n    hnsw_rabitq_collection.delete(single_doc.id)\n    assert hnsw_rabitq_collection.stats.doc_count == 0\n\n\n@pytest.fixture\ndef collection_with_multiple_docs(\n    hnsw_rabitq_collection: Collection, multiple_docs: list[Doc]\n) -> Collection:\n    \"\"\"Setup: insert multiple docs into collection.\"\"\"\n    assert hnsw_rabitq_collection.stats.doc_count == 0\n    result = hnsw_rabitq_collection.insert(multiple_docs)\n    assert len(result) == len(multiple_docs)\n    for item in result:\n        assert item.ok()\n    assert hnsw_rabitq_collection.stats.doc_count == len(multiple_docs)\n\n    yield hnsw_rabitq_collection\n\n    # Teardown: delete multiple docs\n    hnsw_rabitq_collection.delete([doc.id for doc in multiple_docs])\n\n\n# ==================== Tests ====================\n\n\n@pytest.mark.usefixtures(\"hnsw_rabitq_collection\")\nclass TestHnswRabitqCollectionCreation:\n    \"\"\"Test HNSW RaBitQ collection creation and schema validation.\"\"\"\n\n    def test_collection_creation(\n        self, hnsw_rabitq_collection: Collection, hnsw_rabitq_collection_schema\n    ):\n        \"\"\"Test that collection is created with correct schema.\"\"\"\n        assert hnsw_rabitq_collection is not None\n        assert hnsw_rabitq_collection.schema.name == hnsw_rabitq_collection_schema.name\n        assert len(hnsw_rabitq_collection.schema.fields) == len(\n            hnsw_rabitq_collection_schema.fields\n        )\n        assert len(hnsw_rabitq_collection.schema.vectors) == len(\n            hnsw_rabitq_collection_schema.vectors\n        )\n\n    def test_vector_schema_validation(self, hnsw_rabitq_collection: Collection):\n        \"\"\"Test that vector schema has correct HNSW RaBitQ configuration.\"\"\"\n        vector_schema = hnsw_rabitq_collection.schema.vector(\"embedding\")\n        assert vector_schema is not None\n        assert vector_schema.name == \"embedding\"\n        assert vector_schema.data_type == DataType.VECTOR_FP32\n        assert vector_schema.dimension == 128\n\n        index_param = vector_schema.index_param\n        assert index_param is not None\n        assert index_param.metric_type == MetricType.L2\n        assert index_param.m == 16\n        assert index_param.ef_construction == 200\n        assert index_param.total_bits == 7\n        assert index_param.num_clusters == 64\n\n    def test_collection_stats(self, hnsw_rabitq_collection: Collection):\n        \"\"\"Test initial collection statistics.\"\"\"\n        stats = hnsw_rabitq_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 0\n        assert len(stats.index_completeness) == 1\n        assert stats.index_completeness[\"embedding\"] == 1\n\n\n@pytest.mark.usefixtures(\"hnsw_rabitq_collection\")\nclass TestHnswRabitqCollectionInsert:\n    \"\"\"Test document insertion into HNSW RaBitQ collection.\"\"\"\n\n    def test_insert_single_doc(\n        self, hnsw_rabitq_collection: Collection, single_doc: Doc\n    ):\n        \"\"\"Test inserting a single document.\"\"\"\n        result = hnsw_rabitq_collection.insert(single_doc)\n        assert bool(result)\n        assert result.ok()\n\n        stats = hnsw_rabitq_collection.stats\n        assert stats is not None\n        assert stats.doc_count == 1\n\n    def test_insert_multiple_docs(\n        self, hnsw_rabitq_collection: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test inserting multiple documents.\"\"\"\n        result = hnsw_rabitq_collection.insert(multiple_docs)\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.ok()\n\n        stats = hnsw_rabitq_collection.stats\n        assert stats is not None\n        assert stats.doc_count == len(multiple_docs)\n\n\n@pytest.mark.usefixtures(\"hnsw_rabitq_collection\")\nclass TestHnswRabitqCollectionFetch:\n    \"\"\"Test document fetching from HNSW RaBitQ collection.\"\"\"\n\n    def test_fetch_single_doc(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        \"\"\"Test fetching a single document by ID.\"\"\"\n        result = collection_with_single_doc.fetch(ids=[single_doc.id])\n        assert bool(result)\n        assert single_doc.id in result.keys()\n\n        doc = result[single_doc.id]\n        assert doc is not None\n        assert doc.id == single_doc.id\n        assert doc.field(\"id\") == single_doc.field(\"id\")\n        assert doc.field(\"name\") == single_doc.field(\"name\")\n\n    def test_fetch_multiple_docs(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test fetching multiple documents by IDs.\"\"\"\n        ids = [doc.id for doc in multiple_docs[:10]]\n        result = collection_with_multiple_docs.fetch(ids=ids)\n        assert bool(result)\n        assert len(result) == len(ids)\n\n        for doc_id in ids:\n            assert doc_id in result\n            doc = result[doc_id]\n            assert doc is not None\n            assert doc.id == doc_id\n\n    def test_fetch_nonexistent_doc(self, collection_with_single_doc: Collection):\n        \"\"\"Test fetching a non-existent document.\"\"\"\n        result = collection_with_single_doc.fetch(ids=[\"nonexistent_id\"])\n        assert len(result) == 0\n\n\n@pytest.mark.usefixtures(\"hnsw_rabitq_collection\")\nclass TestHnswRabitqCollectionQuery:\n    \"\"\"Test vector search queries on HNSW RaBitQ collection.\"\"\"\n\n    def test_query_by_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test querying by vector with HNSW RaBitQ index.\"\"\"\n        query_vector = multiple_docs[0].vector(\"embedding\")\n        query = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n\n        result = collection_with_multiple_docs.query(vectors=query, topk=10)\n        assert len(result) > 0\n        assert len(result) <= 10\n\n        # First result should be the query document itself (or very close)\n        first_doc = result[0]\n        assert first_doc is not None\n        assert first_doc.id is not None\n\n    def test_query_by_id(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test querying by document ID with HNSW RaBitQ index.\"\"\"\n        query = VectorQuery(\n            field_name=\"embedding\",\n            id=multiple_docs[0].id,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n\n        result = collection_with_multiple_docs.query(vectors=query, topk=10)\n        assert len(result) > 0\n        assert len(result) <= 10\n\n    def test_query_with_different_ef_values(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test querying with different ef parameter values.\"\"\"\n        query_vector = multiple_docs[0].vector(\"embedding\")\n\n        # Test with ef=100\n        query_100 = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=100),\n        )\n        result_100 = collection_with_multiple_docs.query(vectors=query_100, topk=10)\n        assert len(result_100) > 0\n\n        # Test with ef=500\n        query_500 = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=500),\n        )\n        result_500 = collection_with_multiple_docs.query(vectors=query_500, topk=10)\n        assert len(result_500) > 0\n\n    def test_query_with_topk(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test querying with different topk values.\"\"\"\n        query_vector = multiple_docs[0].vector(\"embedding\")\n        query = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n\n        # Test topk=5\n        result_5 = collection_with_multiple_docs.query(vectors=query, topk=5)\n        assert len(result_5) <= 5\n\n        # Test topk=20\n        result_20 = collection_with_multiple_docs.query(vectors=query, topk=20)\n        assert len(result_20) <= 20\n\n    def test_query_with_filter(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test querying with filter conditions.\"\"\"\n        query_vector = multiple_docs[0].vector(\"embedding\")\n        query = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n\n        # Query with id filter\n        result = collection_with_multiple_docs.query(\n            vectors=query, topk=10, filter=\"id < 50\"\n        )\n        assert len(result) > 0\n        for doc in result:\n            assert doc.field(\"id\") < 50\n\n    def test_query_with_output_fields(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test querying with specific output fields.\"\"\"\n        query_vector = multiple_docs[0].vector(\"embedding\")\n        query = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n\n        result = collection_with_multiple_docs.query(\n            vectors=query, topk=10, output_fields=[\"id\", \"name\"]\n        )\n        assert len(result) > 0\n\n        first_doc = result[0]\n        assert \"id\" in first_doc.field_names()\n        assert \"name\" in first_doc.field_names()\n\n    def test_query_with_include_vector(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test querying with vector data included in results.\"\"\"\n        query_vector = multiple_docs[0].vector(\"embedding\")\n        query = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n\n        result = collection_with_multiple_docs.query(\n            vectors=query, topk=10, include_vector=True\n        )\n        assert len(result) > 0\n\n        first_doc = result[0]\n        assert first_doc.vector(\"embedding\") is not None\n        assert len(first_doc.vector(\"embedding\")) == 128\n\n\n@pytest.mark.usefixtures(\"hnsw_rabitq_collection\")\nclass TestHnswRabitqCollectionUpdate:\n    \"\"\"Test document update in HNSW RaBitQ collection.\"\"\"\n\n    def test_update_doc_fields(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        \"\"\"Test updating document fields.\"\"\"\n        updated_doc = Doc(\n            id=single_doc.id,\n            fields={\"id\": single_doc.field(\"id\"), \"name\": \"updated_name\"},\n        )\n\n        result = collection_with_single_doc.update(updated_doc)\n        assert bool(result)\n        assert result.ok()\n\n        # Verify update\n        fetched = collection_with_single_doc.fetch(ids=[single_doc.id])\n        assert single_doc.id in fetched\n        doc = fetched[single_doc.id]\n        assert doc.field(\"name\") == \"updated_name\"\n\n    def test_update_doc_vector(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        \"\"\"Test updating document vector.\"\"\"\n        new_vector = [0.5 + i * 0.01 for i in range(128)]\n        updated_doc = Doc(\n            id=single_doc.id,\n            vectors={\"embedding\": new_vector},\n        )\n\n        result = collection_with_single_doc.update(updated_doc)\n        assert bool(result)\n        assert result.ok()\n\n        # Verify update\n        fetched = collection_with_single_doc.fetch(\n            ids=[single_doc.id],\n        )\n        assert single_doc.id in fetched\n        doc = fetched[single_doc.id]\n        assert doc.vector(\"embedding\") is not None\n        embedding = doc.vector(\"embedding\")\n        assert len(embedding) == 128\n        # Verify vector values are approximately equal (float comparison)\n        for i in range(128):\n            assert math.isclose(embedding[i], new_vector[i], rel_tol=1e-5)\n\n\n@pytest.mark.usefixtures(\"hnsw_rabitq_collection\")\nclass TestHnswRabitqCollectionDelete:\n    \"\"\"Test document deletion from HNSW RaBitQ collection.\"\"\"\n\n    def test_delete_single_doc(\n        self, collection_with_single_doc: Collection, single_doc: Doc\n    ):\n        \"\"\"Test deleting a single document.\"\"\"\n        result = collection_with_single_doc.delete(single_doc.id)\n        assert bool(result)\n        assert result.ok()\n\n        stats = collection_with_single_doc.stats\n        assert stats.doc_count == 0\n\n    def test_delete_multiple_docs(\n        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]\n    ):\n        \"\"\"Test deleting multiple documents.\"\"\"\n        ids_to_delete = [doc.id for doc in multiple_docs[:10]]\n        result = collection_with_multiple_docs.delete(ids_to_delete)\n        assert len(result) == len(ids_to_delete)\n        for item in result:\n            assert item.ok()\n\n        stats = collection_with_multiple_docs.stats\n        assert stats.doc_count == len(multiple_docs) - len(ids_to_delete)\n\n\n@pytest.mark.usefixtures(\"hnsw_rabitq_collection\")\nclass TestHnswRabitqCollectionOptimizeAndReopen:\n    \"\"\"Test collection optimize and reopen functionality.\"\"\"\n\n    def test_optimize_close_reopen_and_query(\n        self,\n        tmp_path_factory,\n        hnsw_rabitq_collection_schema,\n        collection_option,\n        multiple_docs: list[Doc],\n    ):\n        \"\"\"Test inserting 100 docs, optimize, close, reopen and query.\"\"\"\n        # Create collection and insert 100 documents\n        temp_dir = tmp_path_factory.mktemp(\"zvec_hnsw_rabitq_optimize\")\n        collection_path = temp_dir / \"test_optimize_collection\"\n\n        coll = zvec.create_and_open(\n            path=str(collection_path),\n            schema=hnsw_rabitq_collection_schema,\n            option=collection_option,\n        )\n\n        assert coll is not None\n        assert coll.stats.doc_count == 0\n\n        # Insert 100 documents\n        result = coll.insert(multiple_docs)\n        assert len(result) == len(multiple_docs)\n        for item in result:\n            assert item.ok()\n        assert coll.stats.doc_count == len(multiple_docs)\n\n        # Call optimize\n        from zvec import OptimizeOption\n\n        coll.optimize(option=OptimizeOption())\n\n        # Verify data is still accessible after optimize\n        query_vector = multiple_docs[0].vector(\"embedding\")\n        query = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n        result_before_close = coll.query(vectors=query, topk=10)\n        assert len(result_before_close) > 0\n\n        # Close collection (destroy will close it)\n        collection_path_str = str(collection_path)\n        del coll\n\n        # Reopen collection\n        reopened_coll = zvec.open(path=collection_path_str, option=collection_option)\n        assert reopened_coll is not None\n        assert reopened_coll.stats.doc_count == len(multiple_docs)\n\n        # Execute query on reopened collection\n        query_after_reopen = VectorQuery(\n            field_name=\"embedding\",\n            vector=query_vector,\n            param=HnswRabitqQueryParam(ef=300),\n        )\n        result_after_reopen = reopened_coll.query(vectors=query_after_reopen, topk=10)\n        assert len(result_after_reopen) > 0\n        assert len(result_after_reopen) <= 10\n\n        # Verify query results are valid\n        first_doc = result_after_reopen[0]\n        assert first_doc is not None\n        assert first_doc.id is not None\n        assert first_doc.field(\"id\") is not None\n        assert first_doc.field(\"name\") is not None\n\n        # Cleanup\n        reopened_coll.destroy()\n"
  },
  {
    "path": "python/tests/test_convert.py",
    "content": "from __future__ import annotations\n\nimport math\n\nimport pytest\nfrom _zvec import _Doc\nfrom zvec.model.convert import convert_to_py_doc, convert_to_cpp_doc\nfrom zvec import Doc, CollectionSchema, DataType, FieldSchema, VectorSchema\n\n\n# ----------------------------\n# Convert Cpp Doc Test Case\n# ----------------------------\nclass TestConvertCppDoc:\n    def test_default(self):\n        doc = Doc(id=\"1\")\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=FieldSchema(\"name\", DataType.STRING),\n        )\n\n        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)\n\n        assert cpp_doc is not None\n        assert cpp_doc.pk() == doc.id\n\n    def test_with_field_notin_schema(self):\n        doc = Doc(id=\"1\", fields={\"name\": \"Tom\"})\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\"id\", DataType.UINT64),\n                FieldSchema(\"salary\", DataType.UINT32),\n                FieldSchema(\"age\", DataType.INT32),\n                FieldSchema(\"create_at\", DataType.INT64),\n                FieldSchema(\"author\", DataType.STRING),\n                FieldSchema(\"weight\", DataType.FLOAT),\n            ],\n        )\n        with pytest.raises(ValueError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n    def test_with_scalar_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\"id\", DataType.UINT64),\n                FieldSchema(\"salary\", DataType.UINT32),\n                FieldSchema(\"age\", DataType.INT32),\n                FieldSchema(\"create_at\", DataType.INT64),\n                FieldSchema(\"author\", DataType.STRING),\n                FieldSchema(\"weight\", DataType.FLOAT),\n                FieldSchema(\"bmi\", DataType.DOUBLE),\n                FieldSchema(\"is_male\", DataType.BOOL),\n            ],\n        )\n        doc = Doc(\n            id=\"1\",\n            fields={\n                \"id\": 1,\n                \"salary\": 1000,\n                \"age\": 18,\n                \"create_at\": 1640995200,\n                \"bmi\": 80.0 / 200.0,\n                \"author\": \"Tom\",\n                \"weight\": 80.0,\n                \"is_male\": True,\n            },\n        )\n        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)\n        assert cpp_doc is not None\n        assert cpp_doc.pk() == doc.id\n        assert cpp_doc.get_any(\"id\", DataType.UINT64) == 1\n        assert cpp_doc.get_any(\"salary\", DataType.UINT32) == 1000\n        assert cpp_doc.get_any(\"age\", DataType.INT32) == 18\n        assert cpp_doc.get_any(\"create_at\", DataType.INT64) == 1640995200\n        assert cpp_doc.get_any(\"author\", DataType.STRING) == \"Tom\"\n        assert math.isclose(\n            cpp_doc.get_any(\"weight\", DataType.FLOAT), 80.0, rel_tol=1e-6\n        )\n        assert math.isclose(\n            cpp_doc.get_any(\"bmi\", DataType.DOUBLE), 80.0 / 200.0, rel_tol=1e-6\n        )\n        assert cpp_doc.get_any(\"is_male\", DataType.BOOL) == True\n\n    def test_with_array_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\"tags\", DataType.ARRAY_STRING),\n                FieldSchema(\"ids\", DataType.ARRAY_UINT64),\n                FieldSchema(\"marks\", DataType.ARRAY_UINT32),\n                FieldSchema(\"x\", DataType.ARRAY_INT32),\n                FieldSchema(\"y\", DataType.ARRAY_INT64),\n                FieldSchema(\"scores\", DataType.ARRAY_FLOAT),\n                FieldSchema(\"ratios\", DataType.ARRAY_DOUBLE),\n                FieldSchema(\"results\", DataType.ARRAY_BOOL),\n            ],\n        )\n\n        doc = Doc(\n            id=\"1\",\n            fields={\n                \"tags\": [\"tag1\", \"tag2\", \"tag3\"],\n                \"ids\": [111111111111, 222222222222, 333333333333],\n                \"marks\": [100, 200, 300],\n                \"x\": [1, 2, 3],\n                \"y\": [100, 200, 300],\n                \"scores\": [1.1, 2.2, 3.3],\n                \"ratios\": [0.1, 0.2, 0.3],\n                \"results\": [True, False, True],\n            },\n        )\n        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)\n\n        assert cpp_doc is not None\n        assert cpp_doc.pk() == doc.id\n        assert cpp_doc.get_any(\"tags\", DataType.ARRAY_STRING) == doc.field(\"tags\")\n        assert cpp_doc.get_any(\"ids\", DataType.ARRAY_UINT64) == doc.field(\"ids\")\n        assert cpp_doc.get_any(\"marks\", DataType.ARRAY_UINT32) == doc.field(\"marks\")\n        assert cpp_doc.get_any(\"x\", DataType.ARRAY_INT32) == doc.field(\"x\")\n        assert cpp_doc.get_any(\"y\", DataType.ARRAY_INT64) == doc.field(\"y\")\n        scores = cpp_doc.get_any(\"scores\", DataType.ARRAY_FLOAT)\n        for i in range(len(doc.field(\"scores\"))):\n            assert math.isclose(scores[i], doc.field(\"scores\")[i], rel_tol=1e-1)\n        ratios = cpp_doc.get_any(\"ratios\", DataType.ARRAY_DOUBLE)\n        for i in range(len(doc.field(\"ratios\"))):\n            assert math.isclose(ratios[i], doc.field(\"ratios\")[i], rel_tol=1e-1)\n        results = cpp_doc.get_any(\"results\", DataType.ARRAY_BOOL)\n        for i in range(len(doc.field(\"results\"))):\n            assert results[i] == doc.field(\"results\")[i]\n\n    def test_with_dense_vector_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            vectors=[\n                VectorSchema(\n                    name=\"embedding\",\n                    data_type=DataType.VECTOR_FP16,\n                    dimension=4,\n                ),\n                VectorSchema(\n                    name=\"image\",\n                    data_type=DataType.VECTOR_FP32,\n                    dimension=8,\n                ),\n                VectorSchema(\n                    name=\"text\",\n                    data_type=DataType.VECTOR_INT8,\n                    dimension=32,\n                ),\n            ],\n        )\n\n        doc = Doc(\n            id=\"1\",\n            vectors={\n                \"embedding\": [1.1] * 4,\n                \"image\": [2.2] * 8,\n                \"text\": [4] * 32,\n            },\n        )\n        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)\n        assert cpp_doc is not None\n        assert cpp_doc.pk() == doc.id\n\n        embedding_vector = cpp_doc.get_any(\"embedding\", DataType.VECTOR_FP16)\n        assert len(embedding_vector) == 4\n        for i in range(4):\n            assert math.isclose(\n                embedding_vector[i], doc.vector(\"embedding\")[i], rel_tol=1e-1\n            )\n\n        image_vector = cpp_doc.get_any(\"image\", DataType.VECTOR_FP32)\n        assert len(image_vector) == 8\n        for i in range(8):\n            assert math.isclose(image_vector[i], doc.vector(\"image\")[i], rel_tol=1e-1)\n\n        text_vector = cpp_doc.get_any(\"text\", DataType.VECTOR_INT8)\n        assert len(text_vector) == 32\n        for i in range(32):\n            assert text_vector[i] == doc.vectors[\"text\"][i]\n\n    def test_with_sparse_vector_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            vectors=[\n                VectorSchema(\n                    name=\"author\",\n                    data_type=DataType.SPARSE_VECTOR_FP32,\n                ),\n                VectorSchema(\n                    name=\"content\",\n                    data_type=DataType.SPARSE_VECTOR_FP16,\n                ),\n            ],\n        )\n        doc = Doc(\n            id=\"1\",\n            vectors={\n                \"author\": {1: 1.1, 2: 2.2, 3: 3.3},\n                \"content\": {4: 4.4, 5: 5.5, 6: 6.6},\n            },\n        )\n\n        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)\n        assert cpp_doc is not None\n        assert cpp_doc.pk() == doc.id\n\n        author_vector = cpp_doc.get_any(\"author\", DataType.SPARSE_VECTOR_FP32)\n        assert isinstance(author_vector, dict)\n        for key, value in doc.vector(\"author\").items():\n            assert math.isclose(author_vector[key], value, rel_tol=1e-1)\n\n        content_vector = cpp_doc.get_any(\"content\", DataType.SPARSE_VECTOR_FP16)\n        assert isinstance(content_vector, dict)\n        for key, value in doc.vector(\"content\").items():\n            assert math.isclose(content_vector[key], value, rel_tol=1e-1)\n\n    def test_with_scalar_fields_error_datatype(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\"id\", DataType.UINT64),\n                FieldSchema(\"salary\", DataType.UINT32),\n                FieldSchema(\"age\", DataType.INT32),\n                FieldSchema(\"create_at\", DataType.INT64),\n                FieldSchema(\"author\", DataType.STRING),\n                FieldSchema(\"weight\", DataType.FLOAT),\n                FieldSchema(\"bmi\", DataType.DOUBLE),\n                FieldSchema(\"is_male\", DataType.BOOL),\n            ],\n        )\n        doc = Doc(\n            id=\"1\",\n            fields={\n                \"id\": \"1\",\n            },\n        )\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"salary\": \"1000\"})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"age\": \"18\"})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"create_at\": \"2021-01-01\"})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"author\": 1})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"weight\": \"80.5\"})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"bmi\": \"25.0\"})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"is_male\": \"true\"})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n    def test_with_array_fields_error_datatype(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\"tags\", DataType.ARRAY_STRING),\n                FieldSchema(\"ids\", DataType.ARRAY_UINT64),\n                FieldSchema(\"marks\", DataType.ARRAY_UINT32),\n                FieldSchema(\"x\", DataType.ARRAY_INT32),\n                FieldSchema(\"y\", DataType.ARRAY_INT64),\n                FieldSchema(\"scores\", DataType.ARRAY_FLOAT),\n                FieldSchema(\"ratios\", DataType.ARRAY_DOUBLE),\n                FieldSchema(\"results\", DataType.ARRAY_BOOL),\n            ],\n        )\n\n        doc = Doc(id=\"1\", fields={\"tags\": [1, 2, 3]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"ids\": [\"1\", \"2\", \"3\"]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"marks\": [1.1, 2.2, 3.3]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"x\": [1.1, 2.2, 3.3]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"y\": [1.1, 2.2, 3.3]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"scores\": [\"1\", \"2\", \"3\"]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"ratios\": [\"1\", \"2\", \"3\"]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", fields={\"results\": [\"1\", \"2\", \"3\"]})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n    def test_with_vector_fields_error_datatype(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            vectors=[\n                VectorSchema(\n                    name=\"embedding\",\n                    data_type=DataType.VECTOR_FP16,\n                    dimension=4,\n                ),\n                VectorSchema(\n                    name=\"image\",\n                    data_type=DataType.VECTOR_FP32,\n                    dimension=8,\n                ),\n                VectorSchema(\n                    name=\"text\",\n                    data_type=DataType.VECTOR_INT8,\n                    dimension=32,\n                ),\n            ],\n        )\n\n        doc = Doc(id=\"1\", vectors={\"image\": [\"1.1\"] * 4})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", vectors={\"text\": [\"1\"] * 4})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(id=\"1\", vectors={\"embedding\": [\"1\"] * 4})\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n    def test_with_sparse_vector_error_datatype(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            vectors=[\n                VectorSchema(\n                    name=\"author\",\n                    data_type=DataType.SPARSE_VECTOR_FP32,\n                ),\n                VectorSchema(\n                    name=\"content\",\n                    data_type=DataType.SPARSE_VECTOR_FP16,\n                ),\n            ],\n        )\n        doc = Doc(\n            id=\"1\",\n            vectors={\n                \"author\": {\"1\": 1.1, \"2\": 2.2, \"3\": 3.3},\n            },\n        )\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(\n            id=\"1\",\n            vectors={\n                \"content\": {\"1\": 1.1, \"2\": 2.2, \"3\": 3.3},\n            },\n        )\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n        doc = Doc(\n            id=\"1\",\n            vectors={\n                \"author\": {1: \"1\", 2: \"2\", 3: \"3\"},\n            },\n        )\n        with pytest.raises(TypeError):\n            convert_to_cpp_doc(doc, collection_schema=schema)\n\n\n# ----------------------------\n# Convert Py Doc Test Case\n# ----------------------------\nclass TestConvertPyDoc:\n    def test_default(self):\n        doc = _Doc()\n        doc.set_pk(\"1\")\n        doc.set_score(1.0)\n\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=FieldSchema(\"name\", DataType.STRING),\n        )\n\n        py_doc = convert_to_py_doc(doc, schema)\n        assert py_doc.id == \"1\"\n        assert py_doc.score == 1.0\n\n    def test_with_scalar_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\"id\", DataType.UINT64),\n                FieldSchema(\"salary\", DataType.UINT32),\n                FieldSchema(\"age\", DataType.INT32),\n                FieldSchema(\"create_at\", DataType.INT64),\n                FieldSchema(\"author\", DataType.STRING),\n                FieldSchema(\"weight\", DataType.FLOAT),\n                FieldSchema(\"bmi\", DataType.DOUBLE),\n                FieldSchema(\"is_male\", DataType.BOOL),\n            ],\n        )\n        doc = _Doc()\n        doc.set_pk(\"1\")\n        doc.set_any(\"id\", schema.field(\"id\")._get_object(), 1111111111111111)\n        doc.set_any(\"salary\", schema.field(\"salary\")._get_object(), 1000)\n        doc.set_any(\"age\", schema.field(\"age\")._get_object(), 18)\n        doc.set_any(\"create_at\", schema.field(\"create_at\")._get_object(), 1640995200)\n        doc.set_any(\"author\", schema.field(\"author\")._get_object(), \"Tom\")\n        doc.set_any(\"weight\", schema.field(\"weight\")._get_object(), 80.0)\n        doc.set_any(\"bmi\", schema.field(\"bmi\")._get_object(), 80.0 / 200.0)\n        doc.set_any(\"is_male\", schema.field(\"is_male\")._get_object(), True)\n\n        py_doc = convert_to_py_doc(doc, schema)\n        assert py_doc.id == \"1\"\n        assert py_doc.field(\"id\") == 1111111111111111\n        assert py_doc.field(\"salary\") == 1000\n        assert py_doc.field(\"age\") == 18\n        assert py_doc.field(\"create_at\") == 1640995200\n        assert py_doc.field(\"author\") == \"Tom\"\n        assert py_doc.field(\"weight\") == 80.0\n        assert py_doc.field(\"bmi\") == 80.0 / 200.0\n        assert py_doc.field(\"is_male\") == True\n\n    def test_with_array_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\"tags\", DataType.ARRAY_STRING),\n                FieldSchema(\"ids\", DataType.ARRAY_UINT64),\n                FieldSchema(\"marks\", DataType.ARRAY_UINT32),\n                FieldSchema(\"x\", DataType.ARRAY_INT32),\n                FieldSchema(\"y\", DataType.ARRAY_INT64),\n                FieldSchema(\"scores\", DataType.ARRAY_FLOAT),\n                FieldSchema(\"ratios\", DataType.ARRAY_DOUBLE),\n                FieldSchema(\"results\", DataType.ARRAY_BOOL),\n            ],\n        )\n\n        doc = _Doc()\n        doc.set_pk(\"1\")\n        doc.set_any(\n            \"tags\", schema.field(\"tags\")._get_object(), [\"tag1\", \"tag2\", \"tag3\"]\n        )\n        doc.set_any(\n            \"ids\",\n            schema.field(\"ids\")._get_object(),\n            [111111111111, 222222222222, 3333333333333],\n        )\n        doc.set_any(\"marks\", schema.field(\"marks\")._get_object(), [1000, 2000, 3000])\n        doc.set_any(\"x\", schema.field(\"x\")._get_object(), [1, 2, 3])\n        doc.set_any(\"y\", schema.field(\"y\")._get_object(), [100, 200, 300])\n        doc.set_any(\"scores\", schema.field(\"scores\")._get_object(), [0.1, 0.2, 0.3])\n        doc.set_any(\"ratios\", schema.field(\"ratios\")._get_object(), [0.1, 0.2, 0.3])\n        doc.set_any(\n            \"results\", schema.field(\"results\")._get_object(), [True, False, True]\n        )\n\n        py_doc = convert_to_py_doc(doc, schema)\n        assert py_doc.field(\"tags\") == [\"tag1\", \"tag2\", \"tag3\"]\n        assert py_doc.field(\"ids\") == [111111111111, 222222222222, 3333333333333]\n        assert py_doc.field(\"marks\") == [1000, 2000, 3000]\n        assert py_doc.field(\"x\") == [1, 2, 3]\n        assert py_doc.field(\"y\") == [100, 200, 300]\n\n        scores = doc.get_any(\"scores\", DataType.ARRAY_FLOAT)\n        for i in range(len(scores)):\n            assert math.isclose(scores[i], py_doc.field(\"scores\")[i], rel_tol=1e-1)\n        ratios = doc.get_any(\"ratios\", DataType.ARRAY_DOUBLE)\n        for i in range(len(ratios)):\n            assert math.isclose(ratios[i], py_doc.field(\"ratios\")[i], rel_tol=1e-1)\n        results = doc.get_any(\"results\", DataType.ARRAY_BOOL)\n        for i in range(len(results)):\n            assert results[i] == py_doc.field(\"results\")[i]\n\n    def test_with_dense_vector_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            vectors=[\n                VectorSchema(\n                    name=\"embedding\",\n                    data_type=DataType.VECTOR_FP16,\n                    dimension=4,\n                ),\n                VectorSchema(\n                    name=\"image\",\n                    data_type=DataType.VECTOR_FP32,\n                    dimension=8,\n                ),\n                VectorSchema(\n                    name=\"text\",\n                    data_type=DataType.VECTOR_INT8,\n                    dimension=32,\n                ),\n            ],\n        )\n\n        doc = _Doc()\n        doc.set_pk(\"1\")\n        doc.set_any(\"embedding\", schema.vector(\"embedding\")._get_object(), [1.1] * 4)\n        doc.set_any(\"image\", schema.vector(\"image\")._get_object(), [2.2] * 8)\n        doc.set_any(\"text\", schema.vector(\"text\")._get_object(), [4] * 32)\n\n        py_doc = convert_to_py_doc(doc, schema)\n        assert py_doc.id == \"1\"\n\n        embedding_vector = py_doc.vector(\"embedding\")\n        assert len(embedding_vector) == 4\n        for i in range(4):\n            assert math.isclose(\n                py_doc.vector(\"embedding\")[i], embedding_vector[i], rel_tol=1e-1\n            )\n\n        image_vector = py_doc.vector(\"image\")\n        assert len(image_vector) == 8\n        for i in range(8):\n            assert math.isclose(\n                py_doc.vector(\"image\")[i], image_vector[i], rel_tol=1e-1\n            )\n\n        text_vector = py_doc.vector(\"text\")\n        assert len(text_vector) == 32\n        for i in range(32):\n            assert py_doc.vector(\"text\")[i] == text_vector[i]\n\n    def test_with_sparse_vector_fields(self):\n        schema = CollectionSchema(\n            name=\"test_collection\",\n            vectors=[\n                VectorSchema(\n                    name=\"author\",\n                    data_type=DataType.SPARSE_VECTOR_FP32,\n                ),\n                VectorSchema(\n                    name=\"content\",\n                    data_type=DataType.SPARSE_VECTOR_FP16,\n                ),\n            ],\n        )\n\n        doc = _Doc()\n        doc.set_pk(\"1\")\n        doc.set_any(\n            \"author\", schema.vector(\"author\")._get_object(), {1: 1.1, 2: 2.2, 3: 3.3}\n        )\n        doc.set_any(\n            \"content\", schema.vector(\"content\")._get_object(), {4: 4.4, 5: 5.5, 6: 6.6}\n        )\n\n        py_doc = convert_to_py_doc(doc, schema)\n        assert py_doc.id == \"1\"\n\n        author_vector = py_doc.vector(\"author\")\n        assert isinstance(author_vector, dict)\n        for key, value in doc.get_any(\"author\", DataType.SPARSE_VECTOR_FP32).items():\n            assert math.isclose(author_vector[key], value, rel_tol=1e-1)\n\n        content_vector = py_doc.vector(\"content\")\n        assert isinstance(content_vector, dict)\n        for key, value in doc.get_any(\"content\", DataType.SPARSE_VECTOR_FP16).items():\n            assert math.isclose(content_vector[key], value, rel_tol=1e-1)\n"
  },
  {
    "path": "python/tests/test_doc.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport math\nimport pytest\n\n\nfrom _zvec import _Doc\nfrom zvec import FieldSchema, VectorSchema, Doc, DataType\n\n\n# ----------------------------\n# PyDoc Test Case\n# ----------------------------\nclass TestPyDoc:\n    def test_default(self):\n        Doc(id=\"1\")\n\n    def test_with_single_vector(self):\n        doc = Doc(id=\"1\", vectors={\"dense\": [1, 2, 3]})\n        assert doc is not None\n        assert doc.id == \"1\"\n        assert doc.vector(\"dense\") == [1, 2, 3]\n\n    def test_with_hybrid_vectors(self):\n        doc = Doc(\n            id=\"1\", vectors={\"dense\": [1, 2, 3], \"sparse\": {1: 1.0, 2: 2.0, 3: 3.0}}\n        )\n        assert doc is not None\n        assert doc.id == \"1\"\n        assert doc.vector(\"dense\") == [1, 2, 3]\n        assert doc.vector(\"sparse\") == {1: 1.0, 2: 2.0, 3: 3.0}\n\n    def test_with_multi_vectors(self):\n        doc = Doc(\n            id=\"1\",\n            vectors={\n                \"image\": [1, 2, 3],\n                \"description\": [4, 5, 6],\n                \"keys\": {1: 1.0, 2: 2.0, 3: 3.0},\n            },\n            fields={\"author\": \"Tom\", \"age\": 19, \"is_male\": True, \"weight\": 60.5},\n        )\n        assert doc is not None\n        assert doc.id == \"1\"\n        assert doc.vector(\"image\") == [1, 2, 3]\n        assert doc.vector(\"description\") == [4, 5, 6]\n        assert doc.vector(\"keys\") == {1: 1.0, 2: 2.0, 3: 3.0}\n        assert doc.field(\"author\") == \"Tom\"\n        assert doc.field(\"age\") == 19\n        assert doc.field(\"is_male\") == True\n        assert doc.field(\"weight\") == 60.5\n\n    def test_with_numpy_array(self):\n        import numpy as np\n\n        doc = Doc._from_tuple(\n            (\n                \"1\",\n                0.0,\n                None,\n                {\n                    \"image\": np.array([1, 2, 3]),\n                    \"description\": np.random.random(512),\n                    \"keys\": {1: 1.0, 2: 2.0, 3: 3.0},\n                },\n            )\n        )\n        assert doc is not None\n        assert doc.id == \"1\"\n        assert doc.vector(\"image\") == [1, 2, 3]\n        assert doc.vector(\"keys\") == {1: 1.0, 2: 2.0, 3: 3.0}\n\n\n# ----------------------------\n# CppDoc Test Case\n# ----------------------------\nclass TestCppDoc:\n    def test_default(self):\n        doc = _Doc()\n        assert doc is not None\n\n    def test_doc_set_pk(self):\n        doc = _Doc()\n        doc.set_pk(\"1\")\n        assert doc.pk() == \"1\"\n\n    def test_doc_set_score(self):\n        doc = _Doc()\n        doc.set_score(0.9)\n        assert math.isclose(doc.score(), 0.9, rel_tol=1e-6)\n\n    def test_doc_get_null_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"author\", DataType.STRING, nullable=True)\n        doc.set_any(\"author\", schema._get_object(), None)\n        assert doc.has_field(\"author\")\n        assert doc.get_any(\"author\", schema.data_type) is None\n\n    def test_doc_get_set_has_null_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"author\", DataType.STRING, nullable=False)\n        with pytest.raises(ValueError):\n            doc.set_any(\"author\", schema._get_object(), None)\n\n    def test_doc_get_set_has_string_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"author\", DataType.STRING)\n        doc.set_any(\"author\", schema._get_object(), \"Tom\")\n        assert doc.has_field(\"author\")\n        assert doc.get_any(\"author\", DataType.STRING) == \"Tom\"\n\n    def test_doc_get_set_has_bool_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"is_male\", DataType.BOOL)\n        doc.set_any(\"is_male\", schema._get_object(), True)\n        assert doc.has_field(\"is_male\")\n        assert doc.get_any(\"is_male\", DataType.BOOL) == True\n\n    def test_doc_get_set_has_int32_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"age\", DataType.INT32)\n        doc.set_any(\"age\", schema._get_object(), 19)\n        assert doc.has_field(\"age\")\n        assert doc.get_any(\"age\", DataType.INT32) == 19\n\n    def test_doc_get_set_has_int64_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"id\", DataType.INT64)\n        doc.set_any(\"id\", schema._get_object(), 1111111111111111111)\n        assert doc.has_field(\"id\")\n        assert doc.get_any(\"id\", DataType.INT64) == 1111111111111111111\n\n    def test_doc_get_set_has_float_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"weight\", DataType.FLOAT)\n        doc.set_any(\"weight\", schema._get_object(), 60.5)\n        assert doc.has_field(\"weight\")\n        assert math.isclose(doc.get_any(\"weight\", DataType.FLOAT), 60.5, rel_tol=1e-6)\n\n    def test_doc_get_set_has_double_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"height\", DataType.DOUBLE)\n        doc.set_any(\"height\", schema._get_object(), 1.77777777777)\n        assert doc.has_field(\"height\")\n        assert math.isclose(\n            doc.get_any(\"height\", DataType.DOUBLE), 1.7777777777, rel_tol=1e-9\n        )\n\n    def test_doc_get_set_has_uint32_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"id\", DataType.UINT32)\n        doc.set_any(\"id\", schema._get_object(), 4294967295)\n        assert doc.has_field(\"id\")\n        assert doc.get_any(\"id\", DataType.UINT32) == 4294967295\n\n    def test_doc_get_set_has_uint64_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"id\", DataType.UINT64)\n        doc.set_any(\"id\", schema._get_object(), 18446744073709551615)\n        assert doc.has_field(\"id\")\n        assert doc.get_any(\"id\", DataType.UINT64) == 18446744073709551615\n\n    def test_doc_get_set_has_array_string_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"tags\", DataType.ARRAY_STRING)\n        doc.set_any(\"tags\", schema._get_object(), [\"tag1\", \"tag2\", \"tag3\"])\n        assert doc.has_field(\"tags\")\n        assert doc.get_any(\"tags\", DataType.ARRAY_STRING) == [\"tag1\", \"tag2\", \"tag3\"]\n\n    def test_doc_get_set_has_array_int32_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"ids\", DataType.ARRAY_INT32)\n        doc.set_any(\"ids\", schema._get_object(), [1, 2, 3])\n        assert doc.has_field(\"ids\")\n        assert doc.get_any(\"ids\", DataType.ARRAY_INT32) == [1, 2, 3]\n\n    def test_doc_get_set_has_array_int64_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"ids\", DataType.ARRAY_INT64)\n        doc.set_any(\"ids\", schema._get_object(), [1, 2, 3])\n        assert doc.has_field(\"ids\")\n        assert doc.get_any(\"ids\", DataType.ARRAY_INT64) == [1, 2, 3]\n\n    def test_doc_get_set_has_array_float_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"weights\", DataType.ARRAY_FLOAT)\n        doc.set_any(\"weights\", schema._get_object(), [1.0, 2.0, 3.0])\n        assert doc.has_field(\"weights\")\n        assert doc.get_any(\"weights\", DataType.ARRAY_FLOAT) == [1.0, 2.0, 3.0]\n\n    def test_doc_get_set_has_array_double_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"heights\", DataType.ARRAY_DOUBLE)\n        doc.set_any(\"heights\", schema._get_object(), [1.0, 2.0, 3.0])\n        assert doc.has_field(\"heights\")\n        assert doc.get_any(\"heights\", DataType.ARRAY_DOUBLE) == [1.0, 2.0, 3.0]\n\n    def test_doc_get_set_has_array_bool_field(self):\n        doc = _Doc()\n        schema = FieldSchema(\"bools\", DataType.ARRAY_BOOL)\n        doc.set_any(\"bools\", schema._get_object(), [True, False, True])\n        assert doc.has_field(\"bools\")\n        assert doc.get_any(\"bools\", DataType.ARRAY_BOOL) == [True, False, True]\n\n    def test_doc_get_set_has_vector_fp16(self):\n        doc = _Doc()\n        schema = VectorSchema(\"image\", DataType.VECTOR_FP16)\n        doc.set_any(\"image\", schema._get_object(), [1.0, 2.0, 3.0])\n        assert doc.has_field(\"image\")\n        image_vector = doc.get_any(\"image\", DataType.VECTOR_FP16)\n        assert image_vector is not None\n        for i in range(len(image_vector)):\n            assert math.isclose(image_vector[i], [1.0, 2.0, 3.0][i], rel_tol=1e-6)\n\n    def test_doc_get_set_has_vector_fp32(self):\n        doc = _Doc()\n        schema = VectorSchema(\"image\", DataType.VECTOR_FP32)\n        doc.set_any(\"image\", schema._get_object(), [1.111111, 2.222222, 3.333333])\n        assert doc.has_field(\"image\")\n        vector = doc.get_any(\"image\", DataType.VECTOR_FP32)\n        assert vector is not None\n        for i in range(len(vector)):\n            assert math.isclose(\n                vector[i], [1.111111, 2.222222, 3.333333][i], rel_tol=1e-6\n            )\n\n    def test_doc_get_set_has_vector_int8(self):\n        doc = _Doc()\n        schema = VectorSchema(\"image\", DataType.VECTOR_INT8)\n        doc.set_any(\"image\", schema._get_object(), [1, 2, 3])\n        assert doc.has_field(\"image\")\n        assert doc.get_any(\"image\", DataType.VECTOR_INT8) == [1, 2, 3]\n\n    def test_doc_get_set_has_sparse_vector_fp32(self):\n        doc = _Doc()\n        sparse = {1: 1.111111, 2: 2.222222, 3: 3.333333}\n        schema = VectorSchema(\"key\", DataType.SPARSE_VECTOR_FP32)\n        doc.set_any(\"key\", schema._get_object(), sparse)\n        assert doc.has_field(\"key\")\n        vector = doc.get_any(\"key\", DataType.SPARSE_VECTOR_FP32)\n        assert vector is not None\n        assert isinstance(vector, dict)\n        for key, value in sparse.items():\n            assert math.isclose(vector[key], value, rel_tol=1e-6)\n\n    def test_doc_get_set_has_sparse_vector_fp16(self):\n        doc = _Doc()\n        sparse = {1: 1.1, 2: 2.2, 3: 3.3}\n        schema = VectorSchema(\"key\", DataType.SPARSE_VECTOR_FP16)\n        doc.set_any(\"key\", schema._get_object(), sparse)\n        assert doc.has_field(\"key\")\n        vector = doc.get_any(\"key\", DataType.SPARSE_VECTOR_FP16)\n        assert vector is not None\n        assert isinstance(vector, dict)\n        for key, value in sparse.items():\n            assert math.isclose(vector[key], value, rel_tol=1e-1)\n"
  },
  {
    "path": "python/tests/test_embedding.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport os\nfrom http import HTTPStatus\nfrom unittest.mock import MagicMock, patch, Mock\n\nimport numpy as np\nimport pytest\nfrom zvec.extension import (\n    BM25EmbeddingFunction,\n    DefaultLocalDenseEmbedding,\n    DefaultLocalSparseEmbedding,\n    OpenAIDenseEmbedding,\n    QwenDenseEmbedding,\n    QwenSparseEmbedding,\n)\n\n# Environment variable to control integration tests\n# Set ZVEC_RUN_INTEGRATION_TESTS=1 to run real API/model tests\nRUN_INTEGRATION_TESTS = os.environ.get(\"ZVEC_RUN_INTEGRATION_TESTS\", \"0\") == \"1\"\n\n\n# ----------------------------\n# QwenDenseEmbedding Test Case\n# ----------------------------\nclass TestQwenDenseEmbedding:\n    def test_init_with_api_key(self):\n        # Test initialization with explicit API key\n        embedding_func = QwenDenseEmbedding(dimension=128, api_key=\"test_key\")\n        assert embedding_func.dimension == 128\n        assert embedding_func.model == \"text-embedding-v4\"\n        assert embedding_func._api_key == \"test_key\"\n\n    @patch.dict(os.environ, {\"DASHSCOPE_API_KEY\": \"env_key\"})\n    def test_init_with_env_api_key(self):\n        # Test initialization with API key from environment\n        embedding_func = QwenDenseEmbedding(dimension=128)\n        assert embedding_func._api_key == \"env_key\"\n\n    @patch.dict(os.environ, {\"DASHSCOPE_API_KEY\": \"\"})\n    def test_init_with_empty_env_api_key(self):\n        # Test initialization with empty API key from environment\n        with pytest.raises(ValueError, match=\"DashScope API key is required\"):\n            QwenDenseEmbedding(dimension=128)\n\n    def test_model_property(self):\n        embedding_func = QwenDenseEmbedding(dimension=128, api_key=\"test_key\")\n        assert embedding_func.model == \"text-embedding-v4\"\n\n        embedding_func = QwenDenseEmbedding(\n            dimension=128, model=\"custom-model\", api_key=\"test_key\"\n        )\n        assert embedding_func.model == \"custom-model\"\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_with_empty_text(self, mock_require_module):\n        # Test embed method with empty text raises ValueError\n        embedding_func = QwenDenseEmbedding(dimension=128, api_key=\"test_key\")\n\n        with pytest.raises(\n            ValueError, match=\"Input text cannot be empty or whitespace only\"\n        ):\n            embedding_func.embed(\"\")\n\n        with pytest.raises(TypeError):\n            embedding_func.embed(None)\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_success(self, mock_require_module):\n        # Test successful embedding\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        mock_response.output = {\"embeddings\": [{\"embedding\": [0.1, 0.2, 0.3]}]}\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenDenseEmbedding(dimension=3, api_key=\"test_key\")\n        # Clear cache to avoid interference\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test text\")\n\n        assert result == [0.1, 0.2, 0.3]\n        mock_dashscope.TextEmbedding.call.assert_called_once_with(\n            model=\"text-embedding-v4\",\n            input=\"test text\",\n            dimension=3,\n            output_type=\"dense\",\n        )\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_http_error(self, mock_require_module):\n        # Test embedding with HTTP error\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.BAD_REQUEST\n        mock_response.message = \"Bad Request\"\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenDenseEmbedding(dimension=128, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(ValueError):\n            embedding_func.embed(\"test text\")\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_invalid_response(self, mock_require_module):\n        # Test embedding with invalid response (wrong number of embeddings)\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        mock_response.output = {\"embeddings\": []}\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenDenseEmbedding(dimension=128, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(ValueError):\n            embedding_func.embed(\"test text\")\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    def test_real_embed_success(self):\n        \"\"\"Integration test with real DashScope API.\n\n        To run this test, set environment variable:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n            export DASHSCOPE_API_KEY=your-api-key\n        \"\"\"\n        embedding_func = QwenDenseEmbedding(dimension=128)\n        dense = embedding_func(\"test text\")\n        assert len(dense) == 128\n\n\n# ----------------------------\n# QwenSparseEmbedding Test Case\n# ----------------------------\nclass TestQwenSparseEmbedding:\n    \"\"\"Test suite for QwenSparseEmbedding (Qwen sparse embedding via DashScope API).\"\"\"\n\n    def test_init_with_api_key(self):\n        \"\"\"Test initialization with explicit API key.\"\"\"\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        assert embedding_func._dimension == 1024\n        assert embedding_func.model == \"text-embedding-v4\"\n        assert embedding_func._api_key == \"test_key\"\n        # encoding_type defaults to \"query\" via extra_params\n        assert embedding_func.extra_params.get(\"encoding_type\", \"query\") == \"query\"\n\n    def test_init_with_custom_encoding_type(self):\n        \"\"\"Test initialization with custom encoding type.\"\"\"\n        embedding_func = QwenSparseEmbedding(\n            dimension=1024, encoding_type=\"document\", api_key=\"test_key\"\n        )\n        assert embedding_func.extra_params.get(\"encoding_type\") == \"document\"\n\n    @patch.dict(os.environ, {\"DASHSCOPE_API_KEY\": \"env_key\"})\n    def test_init_with_env_api_key(self):\n        \"\"\"Test initialization with API key from environment.\"\"\"\n        embedding_func = QwenSparseEmbedding(dimension=1024)\n        assert embedding_func._api_key == \"env_key\"\n\n    @patch.dict(os.environ, {\"DASHSCOPE_API_KEY\": \"\"})\n    def test_init_without_api_key(self):\n        \"\"\"Test initialization fails without API key.\"\"\"\n        with pytest.raises(ValueError, match=\"DashScope API key is required\"):\n            QwenSparseEmbedding(dimension=1024)\n\n    def test_model_property(self):\n        \"\"\"Test model property.\"\"\"\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        assert embedding_func.model == \"text-embedding-v4\"\n\n        embedding_func = QwenSparseEmbedding(\n            dimension=1024, model=\"text-embedding-v3\", api_key=\"test_key\"\n        )\n        assert embedding_func.model == \"text-embedding-v3\"\n\n    def test_encoding_type_property(self):\n        \"\"\"Test encoding_type via extra_params.\"\"\"\n        query_emb = QwenSparseEmbedding(\n            dimension=1024, encoding_type=\"query\", api_key=\"test_key\"\n        )\n        assert query_emb.extra_params.get(\"encoding_type\") == \"query\"\n\n        doc_emb = QwenSparseEmbedding(\n            dimension=1024, encoding_type=\"document\", api_key=\"test_key\"\n        )\n        assert doc_emb.extra_params.get(\"encoding_type\") == \"document\"\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_with_empty_text(self, mock_require_module):\n        \"\"\"Test embed method with empty text raises ValueError.\"\"\"\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n\n        with pytest.raises(\n            ValueError, match=\"Input text cannot be empty or whitespace only\"\n        ):\n            embedding_func.embed(\"\")\n\n        with pytest.raises(\n            ValueError, match=\"Input text cannot be empty or whitespace only\"\n        ):\n            embedding_func.embed(\"   \")\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_with_non_string_input(self, mock_require_module):\n        \"\"\"Test embed method with non-string input raises TypeError.\"\"\"\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            embedding_func.embed(123)\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            embedding_func.embed(None)\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_success(self, mock_require_module):\n        \"\"\"Test successful sparse embedding generation.\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        # Sparse embedding returns array of {index, value, token} objects\n        mock_response.output = {\n            \"embeddings\": [\n                {\n                    \"sparse_embedding\": [\n                        {\"index\": 10, \"value\": 0.5, \"token\": \"机器\"},\n                        {\"index\": 245, \"value\": 0.8, \"token\": \"学习\"},\n                        {\"index\": 1023, \"value\": 1.2, \"token\": \"算法\"},\n                    ]\n                }\n            ]\n        }\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        # Clear cache to avoid interference\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test text\")\n\n        # Verify result is a dict\n        assert isinstance(result, dict)\n        # Verify keys are integers\n        assert all(isinstance(k, int) for k in result.keys())\n        # Verify values are floats\n        assert all(isinstance(v, float) for v in result.values())\n        # Verify all values are positive\n        assert all(v > 0 for v in result.values())\n        # Verify sorted by indices\n        keys = list(result.keys())\n        assert keys == sorted(keys)\n        # Verify specific keys\n        assert keys == [10, 245, 1023]\n\n        mock_dashscope.TextEmbedding.call.assert_called_once_with(\n            model=\"text-embedding-v4\",\n            input=\"test text\",\n            dimension=1024,\n            output_type=\"sparse\",\n            text_type=\"query\",\n        )\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_with_document_encoding_type(self, mock_require_module):\n        \"\"\"Test embedding with document encoding type.\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        mock_response.output = {\n            \"embeddings\": [\n                {\n                    \"sparse_embedding\": [\n                        {\"index\": 5, \"value\": 0.3, \"token\": \"文档\"},\n                        {\"index\": 100, \"value\": 0.7, \"token\": \"内容\"},\n                        {\"index\": 500, \"value\": 0.9, \"token\": \"检索\"},\n                    ]\n                }\n            ]\n        }\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(\n            dimension=1024, encoding_type=\"document\", api_key=\"test_key\"\n        )\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test document\")\n\n        assert isinstance(result, dict)\n        assert list(result.keys()) == [5, 100, 500]\n\n        # Verify text_type parameter is \"document\"\n        call_args = mock_dashscope.TextEmbedding.call.call_args\n        assert call_args[1][\"text_type\"] == \"document\"\n        assert call_args[1][\"output_type\"] == \"sparse\"\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_output_sorted_by_indices(self, mock_require_module):\n        \"\"\"Test that output is always sorted by indices in ascending order.\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        # Return unsorted indices\n        mock_response.output = {\n            \"embeddings\": [\n                {\n                    \"sparse_embedding\": [\n                        {\"index\": 9999, \"value\": 1.5, \"token\": \"A\"},\n                        {\"index\": 5, \"value\": 2.0, \"token\": \"B\"},\n                        {\"index\": 1234, \"value\": 0.8, \"token\": \"C\"},\n                        {\"index\": 77, \"value\": 3.2, \"token\": \"D\"},\n                        {\"index\": 500, \"value\": 1.1, \"token\": \"E\"},\n                    ]\n                }\n            ]\n        }\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test sorting\")\n\n        # Verify keys are sorted\n        result_keys = list(result.keys())\n        assert result_keys == sorted(result_keys)\n        # Verify expected sorted order\n        assert result_keys == [5, 77, 500, 1234, 9999]\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_filters_zero_values(self, mock_require_module):\n        \"\"\"Test that zero and negative values are filtered out.\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        # Include zero and negative values\n        mock_response.output = {\n            \"embeddings\": [\n                {\n                    \"sparse_embedding\": [\n                        {\"index\": 10, \"value\": 0.5, \"token\": \"正\"},\n                        {\n                            \"index\": 20,\n                            \"value\": 0.0,\n                            \"token\": \"零\",\n                        },  # Should be filtered\n                        {\n                            \"index\": 30,\n                            \"value\": -0.3,\n                            \"token\": \"负\",\n                        },  # Should be filtered\n                        {\"index\": 40, \"value\": 0.8, \"token\": \"正\"},\n                        {\n                            \"index\": 50,\n                            \"value\": 0.0,\n                            \"token\": \"零\",\n                        },  # Should be filtered\n                    ]\n                }\n            ]\n        }\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test filtering\")\n\n        # Only positive values should remain\n        assert list(result.keys()) == [10, 40]\n        assert all(v > 0 for v in result.values())\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_http_error(self, mock_require_module):\n        \"\"\"Test embedding with HTTP error.\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.BAD_REQUEST\n        mock_response.message = \"Bad Request\"\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(ValueError, match=\"DashScope API error\"):\n            embedding_func.embed(\"test text\")\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_invalid_response_no_embeddings(self, mock_require_module):\n        \"\"\"Test embedding with invalid response (no embeddings).\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        mock_response.output = {\"embeddings\": []}\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(ValueError, match=\"Expected exactly 1 embedding\"):\n            embedding_func.embed(\"test text\")\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_invalid_response_not_dict(self, mock_require_module):\n        \"\"\"Test embedding with invalid response (sparse_embedding not list).\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        # sparse_embedding should be list, not dict\n        mock_response.output = {\n            \"embeddings\": [{\"sparse_embedding\": {\"index\": 10, \"value\": 0.5}}]\n        }\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(\n            ValueError, match=\"'sparse_embedding' field is missing or not a list\"\n        ):\n            embedding_func.embed(\"test text\")\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_callable_interface(self, mock_require_module):\n        \"\"\"Test that embedding function is callable.\"\"\"\n        mock_dashscope = MagicMock()\n        mock_response = MagicMock()\n        mock_response.status_code = HTTPStatus.OK\n        mock_response.output = {\n            \"embeddings\": [\n                {\n                    \"sparse_embedding\": [\n                        {\"index\": 100, \"value\": 1.0, \"token\": \"测试\"},\n                        {\"index\": 200, \"value\": 0.5, \"token\": \"调用\"},\n                    ]\n                }\n            ]\n        }\n        mock_dashscope.TextEmbedding.call.return_value = mock_response\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n\n        # Test calling the function directly\n        result = embedding_func(\"test text\")\n        assert isinstance(result, dict)\n        assert list(result.keys()) == [100, 200]\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_embed_api_connection_error(self, mock_require_module):\n        \"\"\"Test handling of API connection errors.\"\"\"\n        mock_dashscope = MagicMock()\n        mock_dashscope.TextEmbedding.call.side_effect = Exception(\"Connection timeout\")\n        mock_require_module.return_value = mock_dashscope\n\n        embedding_func = QwenSparseEmbedding(dimension=1024, api_key=\"test_key\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(RuntimeError, match=\"Failed to call DashScope API\"):\n            embedding_func.embed(\"test text\")\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    def test_real_embed_success(self):\n        \"\"\"Integration test with real DashScope API.\n\n        To run this test, set environment variable:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n            export DASHSCOPE_API_KEY=your-api-key\n        \"\"\"\n        # Test query embedding\n        query_emb = QwenSparseEmbedding(dimension=1024, encoding_type=\"query\")\n        query_vec = query_emb.embed(\"machine learning\")\n\n        assert isinstance(query_vec, dict)\n        assert len(query_vec) > 0\n        assert all(isinstance(k, int) for k in query_vec.keys())\n        assert all(isinstance(v, float) and v > 0 for v in query_vec.values())\n\n        # Verify sorted output\n        keys = list(query_vec.keys())\n        assert keys == sorted(keys)\n\n        # Test document embedding\n        doc_emb = QwenSparseEmbedding(dimension=1024, encoding_type=\"document\")\n        doc_vec = doc_emb.embed(\"Machine learning is a subset of AI\")\n\n        assert isinstance(doc_vec, dict)\n        assert len(doc_vec) > 0\n\n        # Verify sorted output\n        doc_keys = list(doc_vec.keys())\n        assert doc_keys == sorted(doc_keys)\n\n\n# ----------------------------\n# OpenAIDenseEmbedding Test Case\n# ----------------------------\nclass TestOpenAIDenseEmbedding:\n    def test_init_with_api_key(self):\n        \"\"\"Test initialization with explicit API key.\"\"\"\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test-key\")\n        assert embedding_func.dimension == 1536  # Default for text-embedding-3-small\n        assert embedding_func.model == \"text-embedding-3-small\"\n        assert embedding_func._api_key == \"sk-test-key\"\n\n    @patch.dict(os.environ, {\"OPENAI_API_KEY\": \"sk-env-key\"})\n    def test_init_with_env_api_key(self):\n        \"\"\"Test initialization with API key from environment.\"\"\"\n        embedding_func = OpenAIDenseEmbedding()\n        assert embedding_func._api_key == \"sk-env-key\"\n\n    @patch.dict(os.environ, {\"OPENAI_API_KEY\": \"\"})\n    def test_init_without_api_key(self):\n        \"\"\"Test initialization fails without API key.\"\"\"\n        with pytest.raises(ValueError, match=\"OpenAI API key is required\"):\n            OpenAIDenseEmbedding()\n\n    def test_init_with_custom_dimension(self):\n        \"\"\"Test initialization with custom dimension.\"\"\"\n        embedding_func = OpenAIDenseEmbedding(\n            model=\"text-embedding-3-large\", dimension=1024, api_key=\"sk-test\"\n        )\n        assert embedding_func.dimension == 1024\n        assert embedding_func.model == \"text-embedding-3-large\"\n\n    def test_init_with_base_url(self):\n        \"\"\"Test initialization with custom base URL.\"\"\"\n        embedding_func = OpenAIDenseEmbedding(\n            api_key=\"sk-test\", base_url=\"https://custom.openai.com/\"\n        )\n        assert embedding_func._base_url == \"https://custom.openai.com/\"\n\n    def test_model_property(self):\n        \"\"\"Test model property.\"\"\"\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test\")\n        assert embedding_func.model == \"text-embedding-3-small\"\n\n        embedding_func = OpenAIDenseEmbedding(\n            model=\"text-embedding-ada-002\", api_key=\"sk-test\"\n        )\n        assert embedding_func.model == \"text-embedding-ada-002\"\n\n    def test_extra_params(self):\n        \"\"\"Test extra_params property.\"\"\"\n        # Test without extra params\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test\")\n        assert embedding_func.extra_params == {}\n\n        # Test with extra params\n        embedding_func = OpenAIDenseEmbedding(\n            api_key=\"sk-test\",\n            encoding_format=\"float\",\n            user=\"test-user\",\n        )\n        assert embedding_func.extra_params == {\n            \"encoding_format\": \"float\",\n            \"user\": \"test-user\",\n        }\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_with_empty_text(self, mock_require_module):\n        \"\"\"Test embed method with empty text raises ValueError.\"\"\"\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test\")\n\n        with pytest.raises(\n            ValueError, match=\"Input text cannot be empty or whitespace only\"\n        ):\n            embedding_func.embed(\"\")\n\n        with pytest.raises(\n            ValueError, match=\"Input text cannot be empty or whitespace only\"\n        ):\n            embedding_func.embed(\"   \")\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_with_non_string_input(self, mock_require_module):\n        \"\"\"Test embed method with non-string input raises TypeError.\"\"\"\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test\")\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            embedding_func.embed(123)\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            embedding_func.embed(None)\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_success(self, mock_require_module):\n        \"\"\"Test successful embedding generation.\"\"\"\n        # Mock OpenAI client\n        mock_openai = Mock()\n        mock_client = Mock()\n        mock_response = Mock()\n\n        # Create mock embedding data\n        fake_embedding = [0.1, 0.2, 0.3]\n        mock_embedding_obj = Mock()\n        mock_embedding_obj.embedding = fake_embedding\n        mock_response.data = [mock_embedding_obj]\n\n        mock_client.embeddings.create.return_value = mock_response\n        mock_openai.OpenAI.return_value = mock_client\n        mock_require_module.return_value = mock_openai\n\n        embedding_func = OpenAIDenseEmbedding(dimension=3, api_key=\"sk-test\")\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test text\")\n\n        assert result == [0.1, 0.2, 0.3]\n        mock_client.embeddings.create.assert_called_once_with(\n            model=\"text-embedding-3-small\", input=\"test text\", dimensions=3\n        )\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_with_custom_model(self, mock_require_module):\n        \"\"\"Test embedding with custom model.\"\"\"\n        mock_openai = Mock()\n        mock_client = Mock()\n        mock_response = Mock()\n\n        fake_embedding = [0.1] * 1536\n        mock_embedding_obj = Mock()\n        mock_embedding_obj.embedding = fake_embedding\n        mock_response.data = [mock_embedding_obj]\n\n        mock_client.embeddings.create.return_value = mock_response\n        mock_openai.OpenAI.return_value = mock_client\n        mock_require_module.return_value = mock_openai\n\n        embedding_func = OpenAIDenseEmbedding(\n            model=\"text-embedding-ada-002\", api_key=\"sk-test\"\n        )\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test text\")\n\n        assert len(result) == 1536\n        mock_client.embeddings.create.assert_called_once_with(\n            model=\"text-embedding-ada-002\", input=\"test text\"\n        )\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_api_error(self, mock_require_module):\n        \"\"\"Test handling of API errors.\"\"\"\n        mock_openai = Mock()\n        mock_client = Mock()\n\n        # Simulate API error\n        api_error = Mock()\n        api_error.__class__.__name__ = \"APIError\"\n        mock_openai.APIError = type(\"APIError\", (Exception,), {})\n        mock_openai.APIConnectionError = type(\"APIConnectionError\", (Exception,), {})\n\n        mock_client.embeddings.create.side_effect = mock_openai.APIError(\n            \"Rate limit exceeded\"\n        )\n        mock_openai.OpenAI.return_value = mock_client\n        mock_require_module.return_value = mock_openai\n\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(RuntimeError, match=\"Failed to call OpenAI API\"):\n            embedding_func.embed(\"test text\")\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_invalid_response(self, mock_require_module):\n        \"\"\"Test handling of invalid API response.\"\"\"\n        mock_openai = Mock()\n        mock_client = Mock()\n        mock_response = Mock()\n\n        # Empty response data\n        mock_response.data = []\n\n        mock_client.embeddings.create.return_value = mock_response\n        mock_openai.OpenAI.return_value = mock_client\n        mock_openai.APIError = type(\"APIError\", (Exception,), {})\n        mock_openai.APIConnectionError = type(\"APIConnectionError\", (Exception,), {})\n        mock_require_module.return_value = mock_openai\n\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(ValueError, match=\"no embedding data returned\"):\n            embedding_func.embed(\"test text\")\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_dimension_mismatch(self, mock_require_module):\n        \"\"\"Test handling of dimension mismatch.\"\"\"\n        mock_openai = Mock()\n        mock_client = Mock()\n        mock_response = Mock()\n\n        # Return embedding with wrong dimension\n        fake_embedding = [0.1] * 512\n        mock_embedding_obj = Mock()\n        mock_embedding_obj.embedding = fake_embedding\n        mock_response.data = [mock_embedding_obj]\n\n        mock_client.embeddings.create.return_value = mock_response\n        mock_openai.OpenAI.return_value = mock_client\n        mock_openai.APIError = type(\"APIError\", (Exception,), {})\n        mock_openai.APIConnectionError = type(\"APIConnectionError\", (Exception,), {})\n        mock_require_module.return_value = mock_openai\n\n        embedding_func = OpenAIDenseEmbedding(dimension=1536, api_key=\"sk-test\")\n        embedding_func.embed.cache_clear()\n\n        with pytest.raises(ValueError, match=\"Dimension mismatch\"):\n            embedding_func.embed(\"test text\")\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_callable(self, mock_require_module):\n        \"\"\"Test that embedding function is callable.\"\"\"\n        mock_openai = Mock()\n        mock_client = Mock()\n        mock_response = Mock()\n\n        fake_embedding = [0.1] * 1536\n        mock_embedding_obj = Mock()\n        mock_embedding_obj.embedding = fake_embedding\n        mock_response.data = [mock_embedding_obj]\n\n        mock_client.embeddings.create.return_value = mock_response\n        mock_openai.OpenAI.return_value = mock_client\n        mock_openai.APIError = type(\"APIError\", (Exception,), {})\n        mock_openai.APIConnectionError = type(\"APIConnectionError\", (Exception,), {})\n        mock_require_module.return_value = mock_openai\n\n        embedding_func = OpenAIDenseEmbedding(api_key=\"sk-test\")\n        embedding_func.embed.cache_clear()\n\n        # Test calling the function directly\n        result = embedding_func(\"test text\")\n        assert isinstance(result, list)\n        assert len(result) == 1536\n\n    @patch(\"zvec.extension.openai_function.require_module\")\n    def test_embed_with_base_url(self, mock_require_module):\n        \"\"\"Test embedding with custom base URL.\"\"\"\n        mock_openai = Mock()\n        mock_client = Mock()\n        mock_response = Mock()\n\n        fake_embedding = [0.1] * 1536\n        mock_embedding_obj = Mock()\n        mock_embedding_obj.embedding = fake_embedding\n        mock_response.data = [mock_embedding_obj]\n\n        mock_client.embeddings.create.return_value = mock_response\n        mock_openai.OpenAI.return_value = mock_client\n        mock_openai.APIError = type(\"APIError\", (Exception,), {})\n        mock_openai.APIConnectionError = type(\"APIConnectionError\", (Exception,), {})\n        mock_require_module.return_value = mock_openai\n\n        embedding_func = OpenAIDenseEmbedding(\n            api_key=\"sk-test\", base_url=\"https://custom.openai.com/\"\n        )\n        embedding_func.embed.cache_clear()\n        result = embedding_func.embed(\"test text\")\n\n        # Verify client was created with custom base URL\n        mock_openai.OpenAI.assert_called_once_with(\n            api_key=\"sk-test\", base_url=\"https://custom.openai.com/\"\n        )\n        assert len(result) == 1536\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    def test_real_embed_success(self):\n        \"\"\"Integration test with real OpenAI API.\n\n        To run this test, set environment variable:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n            export OPENAI_API_KEY=sk-...\n        \"\"\"\n        embedding_func = OpenAIDenseEmbedding(\n            model=\"text-embedding-v4\",\n            dimension=256,\n            base_url=\"https://dashscope.aliyuncs.com/compatible-mode/v1\",\n        )\n        vector = embedding_func.embed(\"Hello, world!\")\n        assert len(vector) == 256\n        assert isinstance(vector, list)\n        assert all(isinstance(x, float) for x in vector)\n\n\n# ----------------------------\n# DefaultLocalDenseEmbedding Test Case\n# ----------------------------\nclass TestDefaultLocalDenseEmbedding:\n    \"\"\"Test cases for DefaultLocalDenseEmbedding.\"\"\"\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_init_success(self, mock_require_module):\n        \"\"\"Test successful initialization with mocked model.\"\"\"\n        # Mock sentence_transformers module\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n        mock_model.device = \"cpu\"\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        # Initialize embedding function\n        emb_func = DefaultLocalDenseEmbedding()\n\n        # Assertions\n        assert emb_func.dimension == 384\n        assert emb_func.model_name == \"all-MiniLM-L6-v2\"\n        assert emb_func.model_source == \"huggingface\"\n        assert emb_func.device == \"cpu\"\n        mock_st.SentenceTransformer.assert_called_once_with(\n            \"all-MiniLM-L6-v2\", device=None, trust_remote_code=True\n        )\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_init_with_custom_device(self, mock_require_module):\n        \"\"\"Test initialization with custom device.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n        mock_model.device = \"cuda\"\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding(device=\"cuda\")\n\n        assert emb_func.device == \"cuda\"\n        mock_st.SentenceTransformer.assert_called_once_with(\n            \"all-MiniLM-L6-v2\", device=\"cuda\", trust_remote_code=True\n        )\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_init_with_modelscope(self, mock_require_module):\n        \"\"\"Test initialization with ModelScope as model source.\"\"\"\n        mock_st = Mock()\n        mock_ms = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n        mock_model.device = \"cpu\"\n        mock_st.SentenceTransformer.return_value = mock_model\n\n        def require_module_side_effect(module_name):\n            if module_name == \"sentence_transformers\":\n                return mock_st\n            elif module_name == \"modelscope\":\n                return mock_ms\n            raise ImportError(f\"No module named '{module_name}'\")\n\n        mock_require_module.side_effect = require_module_side_effect\n\n        # Mock snapshot_download at the correct import location\n        with patch(\n            \"modelscope.hub.snapshot_download.snapshot_download\",\n            return_value=\"/path/to/cached/model\",\n        ):\n            emb_func = DefaultLocalDenseEmbedding(model_source=\"modelscope\")\n\n        # Assertions\n        assert emb_func.dimension == 384\n        assert emb_func.model_name == \"iic/nlp_gte_sentence-embedding_chinese-small\"\n        assert emb_func.model_source == \"modelscope\"\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_init_with_invalid_model_source(self, mock_require_module):\n        \"\"\"Test initialization with invalid model_source raises ValueError.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        with pytest.raises(ValueError, match=\"Invalid model_source\"):\n            DefaultLocalDenseEmbedding(model_source=\"invalid_source\")\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_success(self, mock_require_module):\n        \"\"\"Test successful embedding generation.\"\"\"\n        # Mock embedding output\n        fake_embedding = np.random.rand(384).astype(np.float32)\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n\n        # Configure encode method\n        mock_model.encode = Mock(return_value=fake_embedding)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding()\n        result = emb_func.embed(\"Hello, world!\")\n\n        # Assertions\n        assert isinstance(result, list)\n        assert len(result) == 384\n        assert all(isinstance(x, float) for x in result)\n        mock_model.encode.assert_called_once_with(\n            \"Hello, world!\",\n            convert_to_numpy=True,\n            normalize_embeddings=True,\n            batch_size=32,\n        )\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_with_normalization(self, mock_require_module):\n        \"\"\"Test embedding with L2 normalization.\"\"\"\n        # Create a normalized vector\n        fake_embedding = np.random.rand(384).astype(np.float32)\n        fake_embedding = fake_embedding / np.linalg.norm(fake_embedding)\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n\n        # Configure encode method\n        mock_model.encode = Mock(return_value=fake_embedding)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding(normalize_embeddings=True)\n        result = emb_func.embed(\"Test sentence\")\n\n        # Check if vector is normalized (L2 norm should be close to 1.0)\n        result_array = np.array(result)\n        norm = np.linalg.norm(result_array)\n        assert abs(norm - 1.0) < 1e-5\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_empty_string(self, mock_require_module):\n        \"\"\"Test embedding with empty string raises ValueError.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding()\n\n        with pytest.raises(ValueError, match=\"Input text cannot be empty\"):\n            emb_func.embed(\"\")\n\n        with pytest.raises(ValueError, match=\"Input text cannot be empty\"):\n            emb_func.embed(\"   \")\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_non_string_input(self, mock_require_module):\n        \"\"\"Test embedding with non-string input raises TypeError.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding()\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            emb_func.embed(123)\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            emb_func.embed(None)\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_callable(self, mock_require_module):\n        \"\"\"Test that embedding function is callable.\"\"\"\n        fake_embedding = np.random.rand(384).astype(np.float32)\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n\n        # Configure encode method\n        mock_model.encode = Mock(return_value=fake_embedding)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding()\n\n        # Test calling the function directly\n        result = emb_func(\"Test text\")\n        assert isinstance(result, list)\n        assert len(result) == 384\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_semantic_similarity(self, mock_require_module):\n        \"\"\"Test semantic similarity between similar and different texts.\"\"\"\n        # Create mock embeddings for similar and different texts\n        similar_emb_1 = np.array([1.0, 0.0, 0.0] + [0.0] * 381, dtype=np.float32)\n        similar_emb_2 = np.array([0.9, 0.1, 0.0] + [0.0] * 381, dtype=np.float32)\n        different_emb = np.array([0.0, 0.0, 1.0] + [0.0] * 381, dtype=np.float32)\n\n        # Normalize\n        similar_emb_1 = similar_emb_1 / np.linalg.norm(similar_emb_1)\n        similar_emb_2 = similar_emb_2 / np.linalg.norm(similar_emb_2)\n        different_emb = different_emb / np.linalg.norm(different_emb)\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n\n        # Configure encode method with side_effect for multiple calls\n        mock_model.encode = Mock(\n            side_effect=[similar_emb_1, similar_emb_2, different_emb]\n        )\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding()\n\n        v1 = emb_func.embed(\"The cat sits on the mat\")\n        v2 = emb_func.embed(\"A feline rests on a rug\")\n        v3 = emb_func.embed(\"Python programming\")\n\n        # Calculate similarities\n        similarity_high = np.dot(v1, v2)\n        similarity_low = np.dot(v1, v3)\n\n        assert similarity_high > similarity_low\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_model_loading_error(self, mock_require_module):\n        \"\"\"Test handling of model loading failure.\"\"\"\n        # Clear model cache\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n        mock_st = Mock()\n        mock_st.SentenceTransformer.side_effect = Exception(\"Model not found\")\n        mock_require_module.return_value = mock_st\n\n        with pytest.raises(\n            ValueError, match=\"Failed to load Sentence Transformer model\"\n        ):\n            DefaultLocalDenseEmbedding()\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_modelscope_import_error(self, mock_require_module):\n        \"\"\"Test handling of ModelScope import error.\"\"\"\n        mock_st = Mock()\n\n        def require_module_side_effect(module_name):\n            if module_name == \"sentence_transformers\":\n                return mock_st\n            elif module_name == \"modelscope\":\n                raise ImportError(\"No module named 'modelscope'\")\n\n        mock_require_module.side_effect = require_module_side_effect\n\n        with pytest.raises(\n            ImportError, match=\"ModelScope support requires the 'modelscope' package\"\n        ):\n            DefaultLocalDenseEmbedding(model_source=\"modelscope\")\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_dimension_mismatch(self, mock_require_module):\n        \"\"\"Test handling of dimension mismatch in embedding output.\"\"\"\n        # Return embedding with wrong dimension\n        fake_embedding = np.random.rand(256).astype(np.float32)\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n\n        # Configure encode method\n        mock_model.encode = Mock(return_value=fake_embedding)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        emb_func = DefaultLocalDenseEmbedding()\n\n        with pytest.raises(ValueError, match=\"Dimension mismatch\"):\n            emb_func.embed(\"Test text\")\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    def test_real_embedding_generation(self):\n        \"\"\"Integration test with real model (requires sentence-transformers).\n\n        To run this test, set environment variable:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n\n        Note: First run will download the model (~80MB).\n        \"\"\"\n        emb_func = DefaultLocalDenseEmbedding()\n\n        # Test basic embedding\n        vector = emb_func.embed(\"Hello, world!\")\n        assert len(vector) == 384\n        assert isinstance(vector, list)\n        assert all(isinstance(x, float) for x in vector)\n\n        # Test normalization\n        norm = np.linalg.norm(vector)\n        assert abs(norm - 1.0) < 1e-5\n\n        # Test semantic similarity\n        v1 = emb_func.embed(\"The cat sits on the mat\")\n        v2 = emb_func.embed(\"A feline rests on a rug\")\n        v3 = emb_func.embed(\"Python programming language\")\n\n        similarity_high = np.dot(v1, v2)\n        similarity_low = np.dot(v1, v3)\n        assert similarity_high > similarity_low\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_model_properties(self, mock_require_module):\n        \"\"\"Test model_name and model_source properties.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.get_sentence_embedding_dimension.return_value = 384\n        mock_model.device = \"cpu\"\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        # Test Hugging Face\n        emb_func_hf = DefaultLocalDenseEmbedding(model_source=\"huggingface\")\n        assert emb_func_hf.model_name == \"all-MiniLM-L6-v2\"\n        assert emb_func_hf.model_source == \"huggingface\"\n\n        # Test ModelScope\n        with patch(\n            \"modelscope.hub.snapshot_download.snapshot_download\",\n            return_value=\"/path/to/model\",\n        ):\n            mock_ms = Mock()\n            mock_require_module.side_effect = (\n                lambda m: mock_st if m == \"sentence_transformers\" else mock_ms\n            )\n            emb_func_ms = DefaultLocalDenseEmbedding(model_source=\"modelscope\")\n            assert (\n                emb_func_ms.model_name == \"iic/nlp_gte_sentence-embedding_chinese-small\"\n            )\n            assert emb_func_ms.model_source == \"modelscope\"\n\n\n# -----------------------------------\n# DefaultLocalSparseEmbedding Test Case\n# -----------------------------------\nclass TestDefaultLocalSparseEmbedding:\n    \"\"\"Test suite for DefaultLocalSparseEmbedding (SPLADE sparse embedding).\n\n    Note:\n        DefaultLocalSparseEmbedding uses naver/splade-cocondenser-ensembledistil\n        instead of naver/splade-v3 because:\n\n        - splade-v3 is a gated model requiring Hugging Face authentication\n        - cocondenser-ensembledistil is publicly accessible\n        - Performance difference is minimal (~2%)\n        - Avoids \"Access to model is restricted\" errors\n\n        This allows all users to run tests without authentication setup.\n    \"\"\"\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_init_success(self, mock_require_module):\n        \"\"\"Test successful initialization.\n\n        Verifies that DefaultLocalSparseEmbedding initializes with the publicly\n        accessible naver/splade-cocondenser-ensembledistil model instead of\n        the gated naver/splade-v3 model.\n        \"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cpu\"\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n\n        assert sparse_emb.model_name == \"naver/splade-cocondenser-ensembledistil\"\n        assert sparse_emb.model_source == \"huggingface\"\n        assert sparse_emb.device == \"cpu\"\n        mock_st.SentenceTransformer.assert_called_once_with(\n            \"naver/splade-cocondenser-ensembledistil\",\n            device=None,\n            trust_remote_code=True,\n        )\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_init_with_custom_device(self, mock_require_module):\n        \"\"\"Test initialization with custom device.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cuda\"\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding(device=\"cuda\")\n\n        assert sparse_emb.device == \"cuda\"\n        mock_st.SentenceTransformer.assert_called_once_with(\n            \"naver/splade-cocondenser-ensembledistil\",\n            device=\"cuda\",\n            trust_remote_code=True,\n        )\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_success(self, mock_require_module):\n        \"\"\"Test successful sparse embedding generation with official API.\"\"\"\n        import numpy as np\n\n        # Clear model cache to ensure fresh mock\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        # Create a mock sparse matrix that simulates scipy.sparse behavior\n        # The code will call: sparse_matrix[0].toarray().flatten()\n        mock_sparse_matrix = Mock()\n\n        # Create a dense array representation with vocab_size=30522\n        vocab_size = 30522\n        dense_array = np.zeros(vocab_size)\n        # Set specific non-zero values at indices [10, 245, 1023, 5678]\n        dense_array[10] = 0.5\n        dense_array[245] = 0.8\n        dense_array[1023] = 1.2\n        dense_array[5678] = 0.3\n\n        # Mock the method chain: sparse_matrix[0].toarray().flatten()\n        mock_row = Mock()\n        mock_dense = Mock()\n        mock_row.toarray.return_value = mock_dense\n        mock_dense.flatten.return_value = dense_array\n        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)\n\n        # Also mock hasattr check for 'toarray'\n        mock_sparse_matrix.toarray = Mock()\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cpu\"\n\n        # Configure mock methods to return sparse matrix\n        # Must set return_value BEFORE hasattr() check in the code\n        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)\n        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n        result = sparse_emb.embed(\"machine learning\")\n\n        # Verify result is a dictionary\n        assert isinstance(result, dict)\n        # Verify keys are integers and values are floats\n        assert all(isinstance(k, int) for k in result.keys())\n        assert all(isinstance(v, float) for v in result.values())\n        # Verify all values are positive\n        assert all(v > 0 for v in result.values())\n        # Sparse vectors should have specific dimensions\n        assert len(result) == 4\n\n        # Verify output is sorted by indices (keys)\n        keys = list(result.keys())\n        assert keys == sorted(keys), (\n            \"Sparse vector keys must be sorted in ascending order\"\n        )\n\n        # Verify expected keys\n        assert keys == [10, 245, 1023, 5678]\n\n        # Verify encode_query was called with a list\n        mock_model.encode_query.assert_called_once()\n        call_args = mock_model.encode_query.call_args[0][0]\n        assert isinstance(call_args, list)\n        assert call_args == [\"machine learning\"]\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_empty_input(self, mock_require_module):\n        \"\"\"Test embedding with empty input.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n\n        with pytest.raises(ValueError, match=\"Input text cannot be empty\"):\n            sparse_emb.embed(\"\")\n\n        with pytest.raises(ValueError, match=\"Input text cannot be empty\"):\n            sparse_emb.embed(\"   \")\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_embed_non_string_input(self, mock_require_module):\n        \"\"\"Test embedding with non-string input.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            sparse_emb.embed(123)\n\n        with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n            sparse_emb.embed([\"text\"])\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_callable_interface(self, mock_require_module):\n        \"\"\"Test that DefaultSparseEmbedding is callable.\"\"\"\n        import numpy as np\n\n        # Clear model cache\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        # Create a mock sparse matrix\n        mock_sparse_matrix = Mock()\n\n        # Create a dense array representation with vocab_size=30522\n        vocab_size = 30522\n        dense_array = np.zeros(vocab_size)\n        # Set specific non-zero values at indices [100, 200, 300]\n        dense_array[100] = 1.0\n        dense_array[200] = 0.5\n        dense_array[300] = 0.8\n\n        # Mock the method chain: sparse_matrix[0].toarray().flatten()\n        mock_row = Mock()\n        mock_dense = Mock()\n        mock_row.toarray.return_value = mock_dense\n        mock_dense.flatten.return_value = dense_array\n        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)\n\n        # Also mock hasattr check for 'toarray'\n        mock_sparse_matrix.toarray = Mock()\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cpu\"\n\n        # Configure mock methods\n        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)\n        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n\n        # Test callable interface\n        result = sparse_emb(\"test input\")\n        assert isinstance(result, dict)\n        assert all(isinstance(k, int) for k in result.keys())\n\n        # Verify sorted output\n        keys = list(result.keys())\n        assert keys == sorted(keys), \"Callable interface must also return sorted keys\"\n        assert keys == [100, 200, 300]\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_model_loading_failure(self, mock_require_module):\n        \"\"\"Test handling of model loading failure.\"\"\"\n        # Clear model cache to ensure the test actually tries to load the model\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        mock_st = Mock()\n        mock_st.SentenceTransformer.side_effect = Exception(\"Model not found\")\n        mock_require_module.return_value = mock_st\n\n        with pytest.raises(\n            ValueError, match=\"Failed to load Sentence Transformer model\"\n        ):\n            DefaultLocalSparseEmbedding()\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_inference_failure(self, mock_require_module):\n        \"\"\"Test handling of inference failure.\"\"\"\n        # Clear model cache\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cpu\"\n\n        # Configure mock methods to raise RuntimeError\n        mock_model.encode_query = Mock(side_effect=RuntimeError(\"CUDA out of memory\"))\n        mock_model.encode_document = Mock(\n            side_effect=RuntimeError(\"CUDA out of memory\")\n        )\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n\n        with pytest.raises(RuntimeError, match=\"Failed to generate sparse embedding\"):\n            sparse_emb.embed(\"test input\")\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_sparse_vector_properties(self, mock_require_module):\n        \"\"\"Test properties of sparse vectors (sparsity, non-zero values, sorted order).\"\"\"\n        import numpy as np\n\n        # Clear model cache\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        # Create a mock sparse matrix that simulates scipy.sparse behavior\n        # The code will call: sparse_matrix[0].toarray().flatten()\n        mock_sparse_matrix = Mock()\n\n        # Create a dense array representation with vocab_size=30522\n        vocab_size = 30522\n        dense_array = np.zeros(vocab_size)\n        # Set specific non-zero values at indices [50, 100, 200, 400, 500]\n        dense_array[50] = 3.0\n        dense_array[100] = 2.0\n        dense_array[200] = 1.5\n        dense_array[400] = 2.5\n        dense_array[500] = 1.8\n\n        # Mock the method chain: sparse_matrix[0].toarray().flatten()\n        mock_row = Mock()\n        mock_dense = Mock()\n        mock_row.toarray.return_value = mock_dense\n        mock_dense.flatten.return_value = dense_array\n        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)\n\n        # Also mock hasattr check for 'toarray'\n        mock_sparse_matrix.toarray = Mock()\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cpu\"\n\n        # Configure mock methods\n        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)\n        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n        result = sparse_emb.embed(\"test\")\n\n        # Verify sparsity: result should have much fewer dimensions than vocab_size\n        assert len(result) < vocab_size\n        # All values should be positive\n        assert all(v > 0 for v in result.values())\n\n        # Verify keys are sorted in ascending order\n        keys = list(result.keys())\n        assert keys == sorted(keys), \"Sparse vector keys must be sorted\"\n\n        # Verify the specific non-zero indices are present and sorted\n        # Expected order: [50, 100, 200, 400, 500] (sorted)\n        expected_keys = [50, 100, 200, 400, 500]\n        assert keys == expected_keys, f\"Expected {expected_keys}, got {keys}\"\n\n        # First key should be smallest\n        if len(result) > 0:\n            first_key = next(iter(result.keys()))\n            assert first_key == min(result.keys()), \"First key must be the smallest\"\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_output_sorted_by_indices(self, mock_require_module):\n        \"\"\"Test that output dictionary is always sorted by indices (keys) in ascending order.\"\"\"\n        import numpy as np\n\n        # Clear model cache\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        # Create sparse output with deliberately out-of-order indices\n        # Non-sequential indices: 9999, 5, 1234, 77, 500\n        mock_sparse_matrix = Mock()\n\n        # Create a dense array representation with vocab_size=30522\n        vocab_size = 30522\n        dense_array = np.zeros(vocab_size)\n        # Set specific non-zero values at out-of-order indices\n        dense_array[9999] = 1.5\n        dense_array[5] = 2.0\n        dense_array[1234] = 0.8\n        dense_array[77] = 3.2\n        dense_array[500] = 1.1\n\n        # Mock the method chain: sparse_matrix[0].toarray().flatten()\n        mock_row = Mock()\n        mock_dense = Mock()\n        mock_row.toarray.return_value = mock_dense\n        mock_dense.flatten.return_value = dense_array\n        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)\n\n        # Also mock hasattr check for 'toarray'\n        mock_sparse_matrix.toarray = Mock()\n\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cpu\"\n\n        # Configure mock methods\n        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)\n        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)\n\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n        result = sparse_emb.embed(\"test sorting\")\n\n        # Extract keys from result\n        result_keys = list(result.keys())\n\n        # Verify keys are sorted\n        assert result_keys == sorted(result_keys), (\n            f\"Keys must be sorted in ascending order. \"\n            f\"Got: {result_keys}, Expected: {sorted(result_keys)}\"\n        )\n\n        # Verify expected keys are present and in correct order\n        # Expected sorted order: [5, 77, 500, 1234, 9999]\n        expected_sorted_keys = [5, 77, 500, 1234, 9999]\n        assert result_keys == expected_sorted_keys, (\n            f\"All expected keys should be present in sorted order. \"\n            f\"Expected: {expected_sorted_keys}, Got: {result_keys}\"\n        )\n\n        # Verify first and last keys\n        assert result_keys[0] == 5, \"First key must be minimum\"\n        assert result_keys[-1] == 9999, \"Last key must be maximum\"\n\n        # Verify iteration order matches sorted order\n        for i, (key, value) in enumerate(result.items()):\n            if i > 0:\n                prev_key = list(result.keys())[i - 1]\n                assert key > prev_key, (\n                    f\"Key at position {i} must be greater than previous key\"\n                )\n\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_device_property(self, mock_require_module):\n        \"\"\"Test device property returns correct device.\"\"\"\n        mock_st = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cuda\"\n        mock_st.SentenceTransformer.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        sparse_emb = DefaultLocalSparseEmbedding(device=\"cuda\")\n        assert sparse_emb.device == \"cuda\"\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1 and model download\",\n    )\n    @patch(\"zvec.extension.sentence_transformer_function.require_module\")\n    def test_modelscope_source(self, mock_require_module):\n        \"\"\"Test initialization with ModelScope source.\"\"\"\n        mock_st = Mock()\n        mock_ms = Mock()\n        mock_model = Mock()\n        mock_model.device = \"cpu\"\n        mock_st.SentenceTransformer.return_value = mock_model\n\n        # Mock ModelScope snapshot_download\n        with patch(\n            \"modelscope.hub.snapshot_download.snapshot_download\",\n            return_value=\"/cache/splade-cocondenser\",\n        ):\n            mock_require_module.side_effect = (\n                lambda m: mock_st if m == \"sentence_transformers\" else mock_ms\n            )\n\n            sparse_emb = DefaultLocalSparseEmbedding(model_source=\"modelscope\")\n\n            assert sparse_emb.model_name == \"naver/splade-cocondenser-ensembledistil\"\n            assert sparse_emb.model_source == \"modelscope\"\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1 and model download\",\n    )\n    def test_integration_real_model(self):\n        \"\"\"Integration test with real SPLADE model (requires model download).\n\n        This test uses naver/splade-cocondenser-ensembledistil instead of\n        naver/splade-v3 because splade-v3 requires Hugging Face authentication.\n        The cocondenser-ensembledistil model is publicly accessible and provides\n        comparable performance.\n\n        To run this test:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n            pytest tests/test_embedding.py::TestDefaultSparseEmbedding::test_integration_real_model -v\n\n        Note: First run will download ~100MB model from Hugging Face.\n\n        Alternative models:\n            If you have access to splade-v3, you can create a custom embedding\n            class following the example in DefaultSparseEmbedding docstring.\n        \"\"\"\n        # Clear model cache to ensure fresh load\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n\n        # Test with real input\n        text = \"machine learning and artificial intelligence\"\n        result = sparse_emb.embed(text)\n\n        # Verify result structure\n        assert isinstance(result, dict)\n        assert len(result) > 0\n        assert all(isinstance(k, int) and k >= 0 for k in result.keys())\n        assert all(isinstance(v, float) and v > 0 for v in result.values())\n\n        # SPLADE typically produces 100-300 non-zero dimensions\n        assert 50 < len(result) < 500\n\n        # Verify keys are sorted in ascending order\n        keys = list(result.keys())\n        assert keys == sorted(keys), \"Real model output must be sorted by indices\"\n\n        # Test callable interface\n        result2 = sparse_emb(text)\n        assert result == result2\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1\",\n    )\n    def test_integration_multiple_inputs(self):\n        \"\"\"Integration test with multiple different inputs.\"\"\"\n        # Clear model cache\n        from zvec.extension.sentence_transformer_embedding_function import (\n            DefaultLocalSparseEmbedding,\n        )\n\n        DefaultLocalSparseEmbedding.clear_cache()\n\n        sparse_emb = DefaultLocalSparseEmbedding()\n\n        texts = [\n            \"Hello, world!\",\n            \"Machine learning is fascinating\",\n            \"Python programming language\",\n        ]\n\n        results = [sparse_emb.embed(text) for text in texts]\n\n        # All results should be different\n        assert len(results) == 3\n        assert all(isinstance(r, dict) for r in results)\n\n        # Different inputs should produce different sparse vectors\n        assert results[0] != results[1]\n        assert results[1] != results[2]\n\n        # All results must be sorted by indices\n        for i, result in enumerate(results):\n            keys = list(result.keys())\n            assert keys == sorted(keys), f\"Result {i} must have sorted keys\"\n\n\n# ----------------------------\n# BM25EmbeddingFunction Test Case\n# ----------------------------\nclass TestBM25EmbeddingFunction:\n    \"\"\"Test suite for BM25EmbeddingFunction (BM25-based sparse embedding using DashText SDK).\"\"\"\n\n    def test_init_with_built_in_encoder(self):\n        \"\"\"Test successful initialization with built-in encoder (no corpus).\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            # Test with default language (Chinese)\n            bm25 = BM25EmbeddingFunction()\n\n            assert bm25.corpus_size == 0\n            assert bm25.encoding_type == \"query\"\n            assert bm25.language == \"zh\"\n            mock_dashtext.SparseVectorEncoder.default.assert_called_once_with(name=\"zh\")\n\n    def test_init_with_custom_encoder(self):\n        \"\"\"Test successful initialization with custom encoder (with corpus).\"\"\"\n        corpus = [\n            \"a cat is a feline and likes to purr\",\n            \"a dog is the human's best friend\",\n            \"a bird is a beautiful animal that can fly\",\n        ]\n\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_dashtext.SparseVectorEncoder.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction(corpus=corpus, b=0.75, k1=1.2)\n\n            assert bm25.corpus_size == 3\n            assert bm25.encoding_type == \"query\"\n            mock_dashtext.SparseVectorEncoder.assert_called_once_with(b=0.75, k1=1.2)\n            mock_encoder.train.assert_called_once_with(corpus)\n\n    def test_init_with_empty_corpus(self):\n        \"\"\"Test initialization with empty corpus raises ValueError.\"\"\"\n        with pytest.raises(ValueError, match=\"Corpus must be a non-empty list\"):\n            BM25EmbeddingFunction(corpus=[])\n\n    def test_init_with_invalid_corpus(self):\n        \"\"\"Test initialization with invalid corpus elements.\"\"\"\n        with pytest.raises(ValueError, match=\"All corpus documents must be strings\"):\n            BM25EmbeddingFunction(corpus=[\"text\", 123, \"another\"])\n\n        with pytest.raises(ValueError, match=\"All corpus documents must be strings\"):\n            BM25EmbeddingFunction(corpus=[None, \"text\"])\n\n    def test_init_with_language_parameter(self):\n        \"\"\"Test initialization with different language settings.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            # Test English language\n            bm25_en = BM25EmbeddingFunction(language=\"en\")\n            assert bm25_en.language == \"en\"\n            mock_dashtext.SparseVectorEncoder.default.assert_called_with(name=\"en\")\n\n    def test_init_with_encoding_type(self):\n        \"\"\"Test initialization with different encoding types.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            # Test document encoding type\n            bm25_doc = BM25EmbeddingFunction(encoding_type=\"document\")\n            assert bm25_doc.encoding_type == \"document\"\n\n    def test_init_with_missing_dashtext_library(self):\n        \"\"\"Test initialization fails when dashtext library is not installed.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_require.side_effect = ImportError(\"dashtext package is required\")\n\n            with pytest.raises(ImportError, match=\"dashtext package is required\"):\n                BM25EmbeddingFunction()\n\n    def test_embed_with_query_encoding(self):\n        \"\"\"Test successful sparse embedding generation with query encoding.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n\n            # Mock encode_queries to return sparse vector\n            mock_encoder.encode_queries.return_value = {\n                5: 0.89,\n                12: 1.45,\n                23: 0.67,\n                45: 1.12,\n            }\n\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction(encoding_type=\"query\")\n            # Clear LRU cache to ensure fresh call\n            bm25.embed.cache_clear()\n            result = bm25.embed(\"cat purr loud\")\n\n            # Verify result structure\n            assert isinstance(result, dict)\n            assert all(isinstance(k, int) for k in result.keys())\n            assert all(isinstance(v, float) for v in result.values())\n\n            # Verify all values are positive\n            assert all(v > 0 for v in result.values())\n\n            # Verify output is sorted by indices\n            keys = list(result.keys())\n            assert keys == sorted(keys), \"Output must be sorted by indices\"\n\n            # Verify expected keys from mock response\n            assert result == {5: 0.89, 12: 1.45, 23: 0.67, 45: 1.12}\n\n            # Verify encode_queries was called\n            mock_encoder.encode_queries.assert_called_once_with(\"cat purr loud\")\n\n    def test_embed_with_document_encoding(self):\n        \"\"\"Test successful sparse embedding generation with document encoding.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n\n            # Mock encode_documents to return sparse vector\n            mock_encoder.encode_documents.return_value = {10: 1.5, 20: 2.3}\n\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction(encoding_type=\"document\")\n            bm25.embed.cache_clear()\n            result = bm25.embed(\"document text\")\n\n            assert result == {10: 1.5, 20: 2.3}\n            mock_encoder.encode_documents.assert_called_once_with(\"document text\")\n\n    def test_embed_with_empty_input(self):\n        \"\"\"Test embedding with empty input raises ValueError.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction()\n\n            with pytest.raises(ValueError, match=\"Input text cannot be empty\"):\n                bm25.embed(\"\")\n\n            with pytest.raises(ValueError, match=\"Input text cannot be empty\"):\n                bm25.embed(\"   \")\n\n    def test_embed_with_non_string_input(self):\n        \"\"\"Test embedding with non-string input raises TypeError.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction()\n\n            # Test with hashable non-string types - should get our custom error message\n            with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n                bm25.embed(123)\n\n            with pytest.raises(TypeError, match=\"Expected 'input' to be str\"):\n                bm25.embed(None)\n\n            # Test with unhashable type (list)\n            # Note: lru_cache raises TypeError(\"unhashable type: 'list'\") before our type check\n            # This is still a valid type error, just caught at a different layer\n            with pytest.raises(TypeError, match=\"unhashable type\"):\n                bm25.embed([\"text\"])\n\n    def test_embed_callable_interface(self):\n        \"\"\"Test that BM25EmbeddingFunction is callable.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_encoder.encode_queries.return_value = {10: 1.5}\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction()\n            bm25.embed.cache_clear()\n\n            # Test callable interface\n            result = bm25(\"test query\")\n            assert isinstance(result, dict)\n            assert 10 in result\n\n    def test_embed_output_sorted_by_indices(self):\n        \"\"\"Test that output is always sorted by indices in ascending order.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n\n            # Mock encode_queries with unsorted indices\n            mock_encoder.encode_queries.return_value = {\n                9999: 1.5,\n                5: 2.0,\n                1234: 0.8,\n                77: 3.2,\n                500: 1.1,\n            }\n\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction()\n            bm25.embed.cache_clear()\n            result = bm25.embed(\"test query\")\n\n            # Verify keys are sorted\n            result_keys = list(result.keys())\n            assert result_keys == sorted(result_keys), (\n                f\"Keys must be sorted. Got: {result_keys}, Expected: {sorted(result_keys)}\"\n            )\n\n            # Verify expected sorted order: [5, 77, 500, 1234, 9999]\n            expected_keys = [5, 77, 500, 1234, 9999]\n            assert result_keys == expected_keys\n\n    def test_embed_filters_zero_values(self):\n        \"\"\"Test that zero and negative values are filtered out.\"\"\"\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n\n            # Mock encode_queries with zero and negative values\n            mock_encoder.encode_queries.return_value = {\n                0: 1.5,  # Positive - should be included\n                1: 0.0,  # Zero - should be filtered\n                2: -0.5,  # Negative - should be filtered\n            }\n\n            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction()\n            bm25.embed.cache_clear()\n            result = bm25.embed(\"test\")\n\n            # Only positive token should be in result\n            assert 0 in result\n            assert 1 not in result  # Zero value filtered\n            assert 2 not in result  # Negative value filtered\n            assert all(v > 0 for v in result.values())\n\n    def test_properties(self):\n        \"\"\"Test property accessors.\"\"\"\n        corpus = [\"doc1\", \"doc2\", \"doc3\"]\n\n        with patch(\n            \"zvec.extension.bm25_embedding_function.require_module\"\n        ) as mock_require:\n            mock_dashtext = Mock()\n            mock_encoder = Mock()\n            mock_dashtext.SparseVectorEncoder.return_value = mock_encoder\n            mock_require.return_value = mock_dashtext\n\n            bm25 = BM25EmbeddingFunction(\n                corpus=corpus,\n                encoding_type=\"document\",\n                language=\"en\",\n                b=0.8,\n                k1=1.5,\n                custom_param=\"test\",\n            )\n\n            assert bm25.corpus_size == 3\n            assert bm25.encoding_type == \"document\"\n            assert bm25.language == \"en\"\n            assert bm25.extra_params == {\"custom_param\": \"test\"}\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    def test_real_dashtext_bm25_embedding(self):\n        \"\"\"Integration test with real DashText library.\n\n        To run this test:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n            pip install dashtext\n\n        Note: This test requires the dashtext package to be installed.\n        \"\"\"\n        # Test built-in encoder (Chinese)\n        bm25_zh = BM25EmbeddingFunction(language=\"zh\", encoding_type=\"query\")\n\n        query_zh = \"什么是向量检索服务\"\n        result_zh = bm25_zh.embed(query_zh)\n\n        assert isinstance(result_zh, dict)\n        assert len(result_zh) > 0\n        assert all(isinstance(k, int) for k in result_zh.keys())\n        assert all(isinstance(v, float) and v > 0 for v in result_zh.values())\n\n        # Verify sorted output\n        keys = list(result_zh.keys())\n        assert keys == sorted(keys), \"Real DashText BM25 output must be sorted\"\n\n        # Test custom corpus\n        corpus = [\n            \"The cat sits on the mat\",\n            \"The dog plays in the garden\",\n            \"Birds fly in the sky\",\n            \"Fish swim in the water\",\n        ]\n\n        bm25_custom = BM25EmbeddingFunction(corpus=corpus, encoding_type=\"query\")\n\n        query_en = \"cat on mat\"\n        result_en = bm25_custom.embed(query_en)\n\n        assert isinstance(result_en, dict)\n        assert len(result_en) > 0\n        assert all(isinstance(k, int) for k in result_en.keys())\n        assert all(isinstance(v, float) and v > 0 for v in result_en.values())\n\n        # Test callable interface\n        result2 = bm25_custom(query_en)\n        assert result_en == result2\n\n        # Verify properties\n        assert bm25_custom.corpus_size == 4\n"
  },
  {
    "path": "python/tests/test_params.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport sys\nimport time\n\n\nimport numpy as np\nimport pytest\nfrom zvec import (\n    AddColumnOption,\n    AlterColumnOption,\n    CollectionOption,\n    FlatIndexParam,\n    HnswIndexParam,\n    IndexOption,\n    InvertIndexParam,\n    IVFIndexParam,\n    OptimizeOption,\n    HnswQueryParam,\n    IVFQueryParam,\n    VectorQuery,\n    IndexType,\n    MetricType,\n    QuantizeType,\n    DataType,\n    VectorSchema,\n)\n\nfrom _zvec.param import _VectorQuery\n\n# ----------------------------\n# Invert Index Param Test Case\n# ----------------------------\n\n\nclass TestInvertIndexParam:\n    def test_default(self):\n        param = InvertIndexParam()\n        assert param.enable_range_optimization is False\n        assert param.enable_extended_wildcard is False\n        assert param.type == IndexType.INVERT\n\n    def test_custom(self):\n        param = InvertIndexParam(\n            enable_range_optimization=True, enable_extended_wildcard=True\n        )\n        assert param.enable_range_optimization is True\n        assert param.enable_extended_wildcard is True\n\n    def test_readonly(self):\n        param = InvertIndexParam()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            param.enable_range_optimization = False\n            param.enable_extended_wildcard = False\n\n\n# ----------------------------\n# Hnsw Index Param Test Case\n# ----------------------------\n\n\nclass TestHnswIndexParam:\n    def test_default(self):\n        param = HnswIndexParam()\n        assert param.metric_type == MetricType.IP\n        assert param.m == 50\n        assert param.ef_construction == 500\n        assert param.quantize_type == QuantizeType.UNDEFINED\n        assert param.type == IndexType.HNSW\n\n    def test_custom(self):\n        param = HnswIndexParam(\n            metric_type=MetricType.L2,\n            m=10,\n            ef_construction=1000,\n            quantize_type=QuantizeType.FP16,\n        )\n        assert param.metric_type == MetricType.L2\n        assert param.m == 10\n        assert param.ef_construction == 1000\n        assert param.quantize_type == QuantizeType.FP16\n\n    @pytest.mark.parametrize(\n        \"attr\", [\"metric_type\", \"m\", \"ef_construction\", \"quantize_type\"]\n    )\n    def test_readonly_attributes(self, attr):\n        param = HnswIndexParam()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# Flat Index Param Test Case\n# ----------------------------\nclass TestFlatIndexParam:\n    def test_default(self):\n        param = FlatIndexParam()\n        assert param.type == IndexType.FLAT\n        assert param.quantize_type == QuantizeType.UNDEFINED\n        assert param.metric_type == MetricType.IP\n\n    def test_custom(self):\n        param = FlatIndexParam(\n            metric_type=MetricType.L2, quantize_type=QuantizeType.INT8\n        )\n        assert param.metric_type == MetricType.L2\n        assert param.quantize_type == QuantizeType.INT8\n\n    @pytest.mark.parametrize(\"attr\", [\"metric_type\", \"quantize_type\"])\n    def test_readonly_attributes(self, attr):\n        param = FlatIndexParam()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# Ivf Index Param Test Case\n# ----------------------------\nclass TestIVFIndexParam:\n    def test_default(self):\n        param = IVFIndexParam()\n        assert param.metric_type == MetricType.IP\n        assert param.n_list == 0\n        assert param.quantize_type == QuantizeType.UNDEFINED\n        assert param.type == IndexType.IVF\n\n    def test_custom(self):\n        param = IVFIndexParam(\n            metric_type=MetricType.L2, n_list=1000, quantize_type=QuantizeType.FP16\n        )\n        assert param.metric_type == MetricType.L2\n        assert param.n_list == 1000\n        assert param.quantize_type == QuantizeType.FP16\n        assert param.type == IndexType.IVF\n\n    @pytest.mark.parametrize(\"attr\", [\"metric_type\", \"n_list\", \"quantize_type\"])\n    def test_readonly_attributes(self, attr):\n        param = IVFIndexParam()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# CollectionOption Test Case\n# ----------------------------\nclass TestCollectionOption:\n    def test_default(self):\n        option = CollectionOption()\n        assert option is not None\n        assert option.read_only == False\n        assert option.enable_mmap == True\n\n    def test_custom(self):\n        option = CollectionOption(read_only=True, enable_mmap=False)\n        assert option.read_only == True\n        assert option.enable_mmap == False\n\n        option = CollectionOption(read_only=False, enable_mmap=True)\n        assert option.read_only == False\n        assert option.enable_mmap == True\n\n    @pytest.mark.parametrize(\"attr\", [\"read_only\", \"enable_mmap\"])\n    def test_readonly_attributes(self, attr):\n        param = CollectionOption()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# IndexOption Test Case\n# ----------------------------\nclass TestIndexOption:\n    def test_default(self):\n        option = IndexOption()\n        assert option is not None\n        assert option.concurrency == 0\n\n    def test_custom(self):\n        option = IndexOption(concurrency=10)\n        assert option.concurrency == 10\n\n    @pytest.mark.parametrize(\"attr\", [\"concurrency\"])\n    def test_readonly_attributes(self, attr):\n        param = IndexOption()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# AddColumnOption Test Case\n# ----------------------------\nclass TestAddColumnOption:\n    def test_default(self):\n        option = AddColumnOption()\n        assert option is not None\n        assert option.concurrency == 0\n\n    def test_custom(self):\n        option = AddColumnOption(concurrency=10)\n        assert option.concurrency == 10\n\n    @pytest.mark.parametrize(\"attr\", [\"concurrency\"])\n    def test_readonly_attributes(self, attr):\n        param = AddColumnOption()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# AlterColumnOption Test Case\n# ----------------------------\nclass TestAlterColumnOption:\n    def test_default(self):\n        option = AlterColumnOption()\n        assert option is not None\n        assert option.concurrency == 0\n\n    def test_custom(self):\n        option = AlterColumnOption(concurrency=10)\n        assert option.concurrency == 10\n\n    @pytest.mark.parametrize(\"attr\", [\"concurrency\"])\n    def test_readonly_attributes(self, attr):\n        param = AlterColumnOption()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# OptimizeOption Test Case\n# ----------------------------\nclass TestOptimizeOption:\n    def test_default(self):\n        option = OptimizeOption()\n        assert option is not None\n        assert option.concurrency == 0\n\n    def test_custom(self):\n        option = OptimizeOption(concurrency=10)\n        assert option.concurrency == 10\n\n    @pytest.mark.parametrize(\"attr\", [\"concurrency\"])\n    def test_readonly_attributes(self, attr):\n        param = OptimizeOption()\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            setattr(param, attr, getattr(param, attr))\n\n\n# ----------------------------\n# HnswQueryParam Test Case\n# ----------------------------\nclass TestHnswQueryParam:\n    def test_default(self):\n        param = HnswQueryParam()\n        assert param is not None\n        assert param.ef == 300\n        assert param.is_using_refiner == False\n        assert param.radius == 0\n        assert param.is_linear == False\n\n    def test_custom(self):\n        param = HnswQueryParam(ef=10, is_using_refiner=True, radius=30, is_linear=True)\n        assert param.ef == 10\n        assert param.is_using_refiner == True\n        assert param.radius == 30\n        assert param.is_linear == True\n\n    def test_readonly_attributes(self):\n        param = HnswQueryParam()\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n            with pytest.raises(AttributeError, match=match_pattern):\n                param.ef = 10\n                param.is_using_refiner = True\n                param.radius = 30\n                param.is_linear = True\n\n\n# # ----------------------------\n# # IVFQueryParam Test Case\n# # ----------------------------\n# class TestIVFQueryParam:\n#     def test_default(self):\n#         param = IVFQueryParam()\n#         assert param is not None\n#         assert param.nprobe == 10\n#         assert param.is_using_refiner == False\n#         assert param.radius == 0\n#         assert param.is_linear == False\n#         assert param.scale_factor == 10\n#\n#     def test_custom(self):\n#         param = IVFQueryParam(\n#             nprobe=20,\n#             is_using_refiner=True,\n#             radius=30,\n#             is_linear=True,\n#             scale_factor=40\n#         )\n#         assert param.nprobe == 20\n#         assert param.is_using_refiner == True\n#         assert param.radius == 30\n#         assert param.is_linear == True\n#         assert param.scale_factor == 40\n\n\nclass TestVectorQuery:\n    def test_init_with_valid_id(self):\n        vq = VectorQuery(field_name=\"embedding\", id=\"doc123\")\n        assert vq.field_name == \"embedding\"\n        assert vq.id == \"doc123\"\n        assert vq.vector is None\n        assert vq.param is None\n\n    def test_init_with_valid_vector(self):\n        vec = [0.1, 0.2, 0.3]\n        param = HnswQueryParam(ef=300)\n        vq = VectorQuery(field_name=\"embedding\", vector=vec, param=param)\n        assert vq.field_name == \"embedding\"\n        assert vq.vector == vec\n        assert vq.param == param\n\n    def test_init_both_id_and_vector_raises_error(self):\n        with pytest.raises(ValueError):\n            VectorQuery(field_name=\"embedding\", id=\"doc123\", vector=[0.1])._validate()\n\n    def test_init_without_field_name_raises_error(self):\n        with pytest.raises(ValueError):\n            VectorQuery(field_name=None)._validate()\n\n    def test_has_id_returns_true_when_id_set(self):\n        vq = VectorQuery(field_name=\"embedding\", id=\"doc123\")\n        assert vq.has_id()\n\n    def test_has_id_returns_false_when_no_id(self):\n        vq = VectorQuery(field_name=\"embedding\", vector=[0.1])\n        assert not vq.has_id()\n\n    def test_has_vector_returns_true_with_non_empty_vector(self):\n        vq = VectorQuery(field_name=\"embedding\", vector=[0.1])\n        assert vq.has_vector()\n\n    def test_validate_fails_on_both_id_and_vector(self):\n        vq = VectorQuery(field_name=\"test\", id=\"doc123\", vector=[0.1])\n        with pytest.raises(ValueError):\n            vq._validate()\n"
  },
  {
    "path": "python/tests/test_query_executor.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import Dict, Union\nfrom unittest.mock import MagicMock\n\nimport numpy as np\nimport math\nfrom _zvec.param import _VectorQuery\n\nimport pytest\nfrom zvec.executor.query_executor import (\n    MultiVectorQueryExecutor,\n    NoVectorQueryExecutor,\n    QueryContext,\n    QueryExecutor,\n    QueryExecutorFactory,\n    SingleVectorQueryExecutor,\n    VectorQuery,\n)\nfrom zvec import RrfReRanker, HnswQueryParam, CollectionSchema, VectorSchema, DataType\n\n\n# ----------------------------\n# Mock Vector Schema\n# ----------------------------\nclass MockVectorSchema(VectorSchema):\n    def __init__(self, name=\"test_vector\"):\n        self._name = name\n\n    @property\n    def name(self):\n        return self._name\n\n    def _get_object(self):\n        return MagicMock()\n\n\n# ----------------------------\n# Mock Collection Schema\n# ----------------------------\nclass MockCollectionSchema(CollectionSchema):\n    def __init__(self, vectors=Union[VectorSchema, Dict[str, VectorSchema]]):\n        self._vectors = (\n            [vectors] if not isinstance(vectors, Dict) else list(vectors.values())\n        )\n\n    @property\n    def vectors(self):\n        return self._vectors\n\n\n# ----------------------------\n# VectorQuery Test Case\n# ----------------------------\nclass TestVectorQuery:\n    def test_init(self):\n        query = VectorQuery(field_name=\"test_field\")\n        assert query.field_name == \"test_field\"\n        assert query.id is None\n        assert query.vector is None\n        assert query.param is None\n\n        param = HnswQueryParam()\n        query = VectorQuery(\n            field_name=\"test_field\", id=\"test_id\", vector=[1, 2, 3], param=param\n        )\n        assert query.field_name == \"test_field\"\n        assert query.id == \"test_id\"\n        assert query.vector == [1, 2, 3]\n        assert query.param == param\n\n    def test_has_id(self):\n        query = VectorQuery(field_name=\"test_field\")\n        assert not query.has_id()\n\n        query = VectorQuery(field_name=\"test_field\", id=\"test_id\")\n        assert query.has_id()\n\n    def test_has_vector(self):\n        query = VectorQuery(field_name=\"test_field\")\n        assert not query.has_vector()\n\n        query = VectorQuery(field_name=\"test_field\", vector=[])\n        assert not query.has_vector()\n\n        query = VectorQuery(field_name=\"test_field\", vector=[1, 2, 3])\n        assert query.has_vector()\n\n    def test_validate_dense_fp16_convert(self):\n        v = _VectorQuery()\n        schema = VectorSchema(name=\"test\", data_type=DataType.VECTOR_FP16)\n        vec = np.array([1.1, 2.1, 3.1], dtype=np.float16)\n        v.set_vector(schema._get_object(), vec)\n        ret = v.get_vector(schema._get_object())\n        assert np.array_equal(vec, ret)\n\n    def test_validate_dense_fp32_convert(self):\n        v = _VectorQuery()\n        schema = VectorSchema(name=\"test\", data_type=DataType.VECTOR_FP32)\n        vec = np.array([1.1, 2.1, 3.1], dtype=np.float32)\n        v.set_vector(schema._get_object(), vec)\n        ret = v.get_vector(schema._get_object())\n        assert np.array_equal(vec, ret)\n\n    def test_validate_dense_fp64_convert(self):\n        v = _VectorQuery()\n        schema = VectorSchema(name=\"test\", data_type=DataType.VECTOR_FP64)\n        vec = np.array([1.1, 2.1, 3.1], dtype=np.float64)\n        v.set_vector(schema._get_object(), vec)\n        ret = v.get_vector(schema._get_object())\n        assert np.array_equal(vec, ret)\n\n    def test_validate_dense_int8_convert(self):\n        v = _VectorQuery()\n        schema = VectorSchema(name=\"test\", data_type=DataType.VECTOR_INT8)\n        vec = np.array([1, 2, 3], dtype=np.int8)\n        v.set_vector(schema._get_object(), vec)\n        ret = v.get_vector(schema._get_object())\n        assert np.array_equal(vec, ret)\n\n    def test_validate_sparse_fp32_convert(self):\n        v = _VectorQuery()\n        schema = VectorSchema(name=\"test\", data_type=DataType.SPARSE_VECTOR_FP32)\n        vec = {1: 1.1, 2: 2.2, 3: 3.3}\n        v.set_vector(schema._get_object(), vec)\n        ret = v.get_vector(schema._get_object())\n        for k in vec.keys():\n            assert math.isclose(vec[k], ret[k], abs_tol=1e-6)\n\n    def test_validate_sparse_fp16_convert(self):\n        v = _VectorQuery()\n        schema = VectorSchema(name=\"test\", data_type=DataType.SPARSE_VECTOR_FP16)\n        vec = {1: 1.1, 2: 2.2, 3: 3.3}\n        v.set_vector(schema._get_object(), vec)\n        ret = v.get_vector(schema._get_object())\n        for k in vec.keys():\n            assert math.isclose(np.float16(vec[k]), ret[k], abs_tol=1e-6)\n\n\nclass TestQueryContext:\n    def test_init(self):\n        ctx = QueryContext(topk=10)\n        assert ctx.topk == 10\n        assert ctx.queries == []\n        assert ctx.filter is None\n        assert ctx.reranker is None\n        assert ctx.output_fields is None\n        assert ctx.include_vector is False\n        assert ctx.core_vectors == []\n\n    def test_properties(self):\n        queries = [VectorQuery(field_name=\"test\")]\n        reranker = RrfReRanker()\n        output_fields = [\"field1\", \"field2\"]\n\n        ctx = QueryContext(\n            topk=5,\n            filter=\"test_filter\",\n            include_vector=True,\n            queries=queries,\n            output_fields=output_fields,\n            reranker=reranker,\n        )\n\n        assert ctx.topk == 5\n        assert ctx.queries == queries\n        assert ctx.filter == \"test_filter\"\n        assert ctx.reranker == reranker\n        assert ctx.output_fields == output_fields\n        assert ctx.include_vector is True\n\n    def test_core_vectors_setter(self):\n        ctx = QueryContext(topk=10)\n        core_vectors = [MagicMock()]\n        ctx.core_vectors = core_vectors\n        assert ctx.core_vectors == core_vectors\n\n\nclass TestNoVectorQueryExecutor:\n    def test_init(self):\n        schema = MockCollectionSchema()\n        executor = NoVectorQueryExecutor(schema)\n        assert isinstance(executor, QueryExecutor)\n\n    def test_do_validate_with_queries(self):\n        schema = MockCollectionSchema()\n        executor = NoVectorQueryExecutor(schema)\n        ctx = QueryContext(topk=10, queries=[VectorQuery(field_name=\"test\")])\n\n        with pytest.raises(\n            ValueError, match=\"Collection does not support query with vector or id\"\n        ):\n            executor._do_validate(ctx)\n\n    def test_do_validate_without_queries(self):\n        schema = MockCollectionSchema()\n        executor = NoVectorQueryExecutor(schema)\n        ctx = QueryContext(topk=10)\n\n        executor._do_validate(ctx)\n\n    def test_do_build(self):\n        schema = MockCollectionSchema()\n        executor = NoVectorQueryExecutor(schema)\n        ctx = QueryContext(topk=5, filter=\"test_filter\")\n\n        result = executor._do_build(ctx, MagicMock())\n        assert len(result) == 1\n        assert result[0].topk == 5\n        assert result[0].filter == \"test_filter\"\n\n\nclass TestSingleVectorQueryExecutor:\n    def test_init(self):\n        schema = MockCollectionSchema()\n        executor = SingleVectorQueryExecutor(schema)\n        assert isinstance(executor, NoVectorQueryExecutor)\n\n    def test_do_validate_multiple_queries(self):\n        schema = MockCollectionSchema()\n        executor = SingleVectorQueryExecutor(schema)\n        queries = [VectorQuery(field_name=\"test1\"), VectorQuery(field_name=\"test2\")]\n        ctx = QueryContext(topk=10, queries=queries)\n\n        with pytest.raises(\n            ValueError,\n            match=\"Collection has only one vector field, cannot query with multiple vectors\",\n        ):\n            executor._do_validate(ctx)\n\n    def test_do_build_without_queries(self):\n        schema = MockCollectionSchema()\n        executor = SingleVectorQueryExecutor(schema)\n        ctx = QueryContext(topk=5)\n\n        result = executor._do_build(ctx, MagicMock())\n        assert len(result) == 1\n        assert result[0].topk == 5\n\n\nclass TestMultiVectorQueryExecutor:\n    def test_init(self):\n        schema = MockCollectionSchema()\n        executor = MultiVectorQueryExecutor(schema)\n        assert isinstance(executor, SingleVectorQueryExecutor)\n\n    def test_do_validate_multiple_queries_without_reranker(self):\n        schema = MockCollectionSchema()\n        executor = MultiVectorQueryExecutor(schema)\n        queries = [VectorQuery(field_name=\"test1\"), VectorQuery(field_name=\"test2\")]\n        ctx = QueryContext(topk=10, queries=queries)\n\n        with pytest.raises(\n            ValueError, match=\"Reranker is required for multi-vector query\"\n        ):\n            executor._do_validate(ctx)\n\n    def test_do_validate_multiple_queries_with_reranker(self):\n        schema = MockCollectionSchema()\n        executor = MultiVectorQueryExecutor(schema)\n        queries = [VectorQuery(field_name=\"test1\"), VectorQuery(field_name=\"test2\")]\n        reranker = RrfReRanker()\n        ctx = QueryContext(topk=10, queries=queries, reranker=reranker)\n\n        executor._do_validate(ctx)\n\n\nclass TestQueryExecutorFactory:\n    def test_create_no_vectors(self):\n        schema = MockCollectionSchema()\n        executor = QueryExecutorFactory.create(schema)\n        assert isinstance(executor, NoVectorQueryExecutor)\n\n    def test_create_single_vector(self):\n        schema = MockCollectionSchema(vectors=MockVectorSchema())\n        executor = QueryExecutorFactory.create(schema)\n        assert isinstance(executor, SingleVectorQueryExecutor)\n\n    def test_create_multiple_vectors(self):\n        schema = MockCollectionSchema(\n            vectors={\"test1\": MockVectorSchema(), \"test2\": MockVectorSchema()}\n        )\n        executor = QueryExecutorFactory.create(schema)\n        assert isinstance(executor, MultiVectorQueryExecutor)\n"
  },
  {
    "path": "python/tests/test_reranker.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom unittest.mock import patch, MagicMock\nimport pytest\nimport math\nimport os\n\nfrom zvec import Doc, MetricType\nfrom zvec.extension.multi_vector_reranker import (\n    RrfReRanker,\n    WeightedReRanker,\n)\nfrom zvec.extension.sentence_transformer_rerank_function import (\n    DefaultLocalReRanker,\n)\nfrom zvec.extension.qwen_rerank_function import QwenReRanker\n\n# Set ZVEC_RUN_INTEGRATION_TESTS=1 to run real API tests\nRUN_INTEGRATION_TESTS = os.environ.get(\"ZVEC_RUN_INTEGRATION_TESTS\", \"0\") == \"1\"\n\n\n# ----------------------------\n# RrfRanker Test Case\n# ----------------------------\nclass TestRrfReRanker:\n    def test_init(self):\n        reranker = RrfReRanker(topn=5, rerank_field=\"content\", rank_constant=100)\n        assert reranker.topn == 5\n        assert reranker.rerank_field == \"content\"\n        assert reranker.rank_constant == 100\n\n    def test_rrf_score(self):\n        reranker = RrfReRanker(rank_constant=60)\n        # 根据公式 1.0 / (k + rank + 1)，其中k=60\n        assert reranker._rrf_score(0) == 1.0 / (60 + 0 + 1)\n        assert reranker._rrf_score(1) == 1.0 / (60 + 1 + 1)\n        assert reranker._rrf_score(10) == 1.0 / (60 + 10 + 1)\n\n    def test_rerank(self):\n        reranker = RrfReRanker(topn=3)\n\n        doc1 = Doc(id=\"1\", score=0.8)\n        doc2 = Doc(id=\"2\", score=0.7)\n        doc3 = Doc(id=\"3\", score=0.9)\n        doc4 = Doc(id=\"4\", score=0.6)\n\n        query_results = {\"vector1\": [doc1, doc2, doc3], \"vector2\": [doc3, doc1, doc4]}\n\n        results = reranker.rerank(query_results)\n\n        assert len(results) <= reranker.topn\n\n        for doc in results:\n            assert hasattr(doc, \"score\")\n\n        scores = [doc.score for doc in results]\n        assert scores == sorted(scores, reverse=True)\n\n\n# ----------------------------\n# WeightedRanker Test Case\n# ----------------------------\nclass TestWeightedReRanker:\n    def test_init(self):\n        weights = {\"vector1\": 0.7, \"vector2\": 0.3}\n        reranker = WeightedReRanker(\n            topn=5,\n            rerank_field=\"content\",\n            metric=MetricType.L2,\n            weights=weights,\n        )\n        assert reranker.topn == 5\n        assert reranker.rerank_field == \"content\"\n        assert reranker.metric == MetricType.L2\n        assert reranker.weights == weights\n\n    def test_normalize_score(self):\n        reranker = WeightedReRanker()\n\n        score = reranker._normalize_score(1.0, MetricType.L2)\n        expected = 1.0 - 2 * math.atan(1.0) / math.pi\n        assert score == expected\n\n        score = reranker._normalize_score(1.0, MetricType.IP)\n        expected = 0.5 + math.atan(1.0) / math.pi\n        assert score == expected\n\n        score = reranker._normalize_score(1.0, MetricType.COSINE)\n        expected = 1.0 - 1.0 / 2.0\n        assert score == expected\n\n        with pytest.raises(ValueError, match=\"Unsupported metric type\"):\n            reranker._normalize_score(1.0, \"unsupported_metric\")\n\n    def test_rerank(self):\n        weights = {\"vector1\": 0.7, \"vector2\": 0.3}\n        reranker = WeightedReRanker(topn=3, weights=weights, metric=MetricType.L2)\n\n        doc1 = Doc(id=\"1\", score=0.8)\n        doc2 = Doc(id=\"2\", score=0.7)\n        doc3 = Doc(id=\"3\", score=0.9)\n\n        query_results = {\"vector1\": [doc1, doc2], \"vector2\": [doc2, doc3]}\n\n        results = reranker.rerank(query_results)\n\n        assert len(results) <= reranker.topn\n\n        for doc in results:\n            assert hasattr(doc, \"score\")\n\n        scores = [doc.score for doc in results]\n        assert scores == sorted(scores, reverse=True)\n\n\n# ----------------------------\n# QwenReRanker Test Case\n# ----------------------------\nclass TestQwenReRanker:\n    def test_init_without_query(self):\n        with pytest.raises(ValueError, match=\"Query is required for QwenReRanker\"):\n            QwenReRanker(api_key=\"test_key\")\n\n    def test_init_without_api_key(self):\n        with patch.dict(os.environ, {}, clear=True):\n            with pytest.raises(ValueError, match=\"DashScope API key is required\"):\n                QwenReRanker(query=\"test\")\n\n    @patch.dict(os.environ, {\"DASHSCOPE_API_KEY\": \"test_key\"})\n    def test_init_with_env_api_key(self):\n        reranker = QwenReRanker(query=\"test\", rerank_field=\"content\")\n        assert reranker.query == \"test\"\n        assert reranker._api_key == \"test_key\"\n        assert reranker.rerank_field == \"content\"\n\n    def test_init_with_explicit_api_key(self):\n        reranker = QwenReRanker(\n            query=\"test\", api_key=\"explicit_key\", rerank_field=\"content\"\n        )\n        assert reranker.query == \"test\"\n        assert reranker._api_key == \"explicit_key\"\n\n    def test_model_property(self):\n        reranker = QwenReRanker(\n            query=\"test\", api_key=\"test_key\", rerank_field=\"content\"\n        )\n        assert reranker.model == \"gte-rerank-v2\"\n\n        reranker = QwenReRanker(\n            query=\"test\",\n            model=\"custom-model\",\n            api_key=\"test_key\",\n            rerank_field=\"content\",\n        )\n        assert reranker.model == \"custom-model\"\n\n    def test_query_property(self):\n        reranker = QwenReRanker(\n            query=\"test query\", api_key=\"test_key\", rerank_field=\"content\"\n        )\n        assert reranker.query == \"test query\"\n\n    def test_topn_property(self):\n        reranker = QwenReRanker(\n            query=\"test\", topn=5, api_key=\"test_key\", rerank_field=\"content\"\n        )\n        assert reranker.topn == 5\n\n    def test_rerank_field_property(self):\n        reranker = QwenReRanker(query=\"test\", api_key=\"test_key\", rerank_field=\"title\")\n        assert reranker.rerank_field == \"title\"\n\n    def test_rerank_empty_results(self):\n        reranker = QwenReRanker(\n            query=\"test\", api_key=\"test_key\", rerank_field=\"content\"\n        )\n        results = reranker.rerank({})\n        assert results == []\n\n    def test_rerank_no_valid_documents(self):\n        reranker = QwenReRanker(\n            query=\"test\", api_key=\"test_key\", rerank_field=\"content\"\n        )\n        # Document without the rerank_field\n        query_results = {\"vector1\": [Doc(id=\"1\")]}\n        with pytest.raises(ValueError, match=\"No documents to rerank\"):\n            reranker.rerank(query_results)\n\n    def test_rerank_skip_empty_content(self):\n        reranker = QwenReRanker(\n            query=\"test\", api_key=\"test_key\", rerank_field=\"content\"\n        )\n        query_results = {\n            \"vector1\": [\n                Doc(id=\"1\", fields={\"content\": \"\"}),\n                Doc(id=\"2\", fields={\"content\": \"   \"}),\n            ]\n        }\n        with pytest.raises(ValueError, match=\"No documents to rerank\"):\n            reranker.rerank(query_results)\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_rerank_success(self, mock_require_module):\n        # Mock dashscope module\n        mock_dashscope = MagicMock()\n        mock_require_module.return_value = mock_dashscope\n\n        # Mock API response\n        mock_response = MagicMock()\n        mock_response.status_code = 200\n        mock_response.output = {\n            \"results\": [\n                {\"index\": 0, \"relevance_score\": 0.95},\n                {\"index\": 1, \"relevance_score\": 0.85},\n            ]\n        }\n        mock_dashscope.TextReRank.call.return_value = mock_response\n\n        reranker = QwenReRanker(\n            query=\"test query\", topn=2, api_key=\"test_key\", rerank_field=\"content\"\n        )\n\n        query_results = {\n            \"vector1\": [\n                Doc(id=\"1\", fields={\"content\": \"Document 1\"}),\n                Doc(id=\"2\", fields={\"content\": \"Document 2\"}),\n            ]\n        }\n\n        results = reranker.rerank(query_results)\n\n        assert len(results) == 2\n        assert results[0].id == \"1\"\n        assert results[0].score == 0.95\n        assert results[1].id == \"2\"\n        assert results[1].score == 0.85\n\n        # Verify API call\n        mock_dashscope.TextReRank.call.assert_called_once_with(\n            model=\"gte-rerank-v2\",\n            query=\"test query\",\n            documents=[\"Document 1\", \"Document 2\"],\n            top_n=2,\n            return_documents=False,\n        )\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_rerank_deduplicate_documents(self, mock_require_module):\n        # Mock dashscope module\n        mock_dashscope = MagicMock()\n        mock_require_module.return_value = mock_dashscope\n\n        # Mock API response\n        mock_response = MagicMock()\n        mock_response.status_code = 200\n        mock_response.output = {\n            \"results\": [\n                {\"index\": 0, \"relevance_score\": 0.9},\n            ]\n        }\n        mock_dashscope.TextReRank.call.return_value = mock_response\n\n        reranker = QwenReRanker(\n            query=\"test\", topn=5, api_key=\"test_key\", rerank_field=\"content\"\n        )\n\n        # Same document in multiple vector results\n        doc1 = Doc(id=\"1\", fields={\"content\": \"Document 1\"})\n        query_results = {\"vector1\": [doc1], \"vector2\": [doc1]}\n\n        results = reranker.rerank(query_results)\n\n        # Should only call API with document once\n        call_args = mock_dashscope.TextReRank.call.call_args\n        assert len(call_args[1][\"documents\"]) == 1\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_rerank_api_error(self, mock_require_module):\n        # Mock dashscope module\n        mock_dashscope = MagicMock()\n        mock_require_module.return_value = mock_dashscope\n\n        # Mock API error response\n        mock_response = MagicMock()\n        mock_response.status_code = 400\n        mock_response.message = \"Invalid request\"\n        mock_response.code = \"InvalidParameter\"\n        mock_dashscope.TextReRank.call.return_value = mock_response\n\n        reranker = QwenReRanker(\n            query=\"test\", api_key=\"test_key\", rerank_field=\"content\"\n        )\n\n        query_results = {\"vector1\": [Doc(id=\"1\", fields={\"content\": \"Document 1\"})]}\n\n        with pytest.raises(ValueError, match=\"DashScope API error\"):\n            reranker.rerank(query_results)\n\n    @patch(\"zvec.extension.qwen_function.require_module\")\n    def test_rerank_runtime_error(self, mock_require_module):\n        # Mock dashscope module that raises exception\n        mock_dashscope = MagicMock()\n        mock_require_module.return_value = mock_dashscope\n        mock_dashscope.TextReRank.call.side_effect = Exception(\"Network error\")\n\n        reranker = QwenReRanker(\n            query=\"test\", api_key=\"test_key\", rerank_field=\"content\"\n        )\n\n        query_results = {\"vector1\": [Doc(id=\"1\", fields={\"content\": \"Document 1\"})]}\n\n        with pytest.raises(RuntimeError, match=\"Failed to call DashScope API\"):\n            reranker.rerank(query_results)\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    def test_real_qwen_rerank(self):\n        \"\"\"Integration test with real DashScope TextReRank API.\n\n        To run this test, set environment variables:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n            export DASHSCOPE_API_KEY=your-api-key\n        \"\"\"\n        # Create reranker with real API\n        reranker = QwenReRanker(\n            query=\"What is machine learning?\",\n            topn=3,\n            rerank_field=\"content\",\n            model=\"gte-rerank-v2\",\n        )\n\n        # Prepare test documents\n        query_results = {\n            \"vector1\": [\n                Doc(\n                    id=\"1\",\n                    score=0.8,\n                    fields={\n                        \"content\": \"Machine learning is a subset of artificial intelligence that focuses on building systems that can learn from data.\"\n                    },\n                ),\n                Doc(\n                    id=\"2\",\n                    score=0.7,\n                    fields={\n                        \"content\": \"The weather is nice today with clear skies and sunshine.\"\n                    },\n                ),\n                Doc(\n                    id=\"3\",\n                    score=0.75,\n                    fields={\n                        \"content\": \"Deep learning is a specialized branch of machine learning using neural networks with multiple layers.\"\n                    },\n                ),\n            ],\n            \"vector2\": [\n                Doc(\n                    id=\"4\",\n                    score=0.6,\n                    fields={\n                        \"content\": \"Python is a popular programming language for data science and machine learning applications.\"\n                    },\n                ),\n                Doc(\n                    id=\"5\",\n                    score=0.65,\n                    fields={\n                        \"content\": \"A recipe for chocolate cake includes flour, sugar, eggs, and cocoa powder.\"\n                    },\n                ),\n            ],\n        }\n\n        # Call real API\n        results = reranker.rerank(query_results)\n\n        # Verify results\n        assert len(results) <= 3, \"Should return at most topn documents\"\n        assert len(results) > 0, \"Should return at least one document\"\n\n        # All results should have valid scores\n        for doc in results:\n            assert hasattr(doc, \"score\"), \"Each document should have a score\"\n            assert isinstance(doc.score, (int, float)), \"Score should be numeric\"\n            assert doc.score > 0, \"Score should be positive\"\n\n        # Verify scores are in descending order\n        scores = [doc.score for doc in results]\n        assert scores == sorted(scores, reverse=True), (\n            \"Results should be sorted by score in descending order\"\n        )\n\n        # Verify relevant documents are ranked higher\n        # Document 1 and 3 are about machine learning, should rank higher than weather/recipe docs\n        result_ids = [doc.id for doc in results]\n\n        # At least one of the ML-related documents should be in top results\n        ml_related_docs = {\"1\", \"3\", \"4\"}\n        assert any(doc_id in ml_related_docs for doc_id in result_ids[:2]), (\n            \"ML-related documents should rank higher\"\n        )\n\n        # Print results for manual verification (useful during development)\n        print(\"\\nReranking results:\")\n        for i, doc in enumerate(results, 1):\n            print(f\"{i}. ID={doc.id}, Score={doc.score:.4f}\")\n            if doc.fields:\n                content = doc.field(\"content\")\n                if content:\n                    print(f\"   Content: {content[:80]}...\")\n\n\n# ----------------------------\n# DefaultLocalReRanker Test Case\n# ----------------------------\nclass TestDefaultLocalReRanker:\n    \"\"\"Test cases for DefaultLocalReRanker.\"\"\"\n\n    def test_init_without_query(self):\n        \"\"\"Test initialization fails without query.\"\"\"\n        with pytest.raises(\n            ValueError, match=\"Query is required for DefaultLocalReRanker\"\n        ):\n            DefaultLocalReRanker(rerank_field=\"content\")\n\n    def test_init_with_empty_query(self):\n        \"\"\"Test initialization fails with empty query.\"\"\"\n        with pytest.raises(\n            ValueError, match=\"Query is required for DefaultLocalReRanker\"\n        ):\n            DefaultLocalReRanker(query=\"\", rerank_field=\"content\")\n\n    @patch(\"zvec.extension.sentence_transformer_rerank_function.require_module\")\n    def test_init_success(self, mock_require_module):\n        \"\"\"Test successful initialization with mocked model.\"\"\"\n        # Mock sentence_transformers module\n        mock_st = MagicMock()\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()  # Cross-encoder has predict method\n        mock_model.device = \"cpu\"\n        mock_st.CrossEncoder.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        reranker = DefaultLocalReRanker(\n            query=\"test query\",\n            topn=5,\n            rerank_field=\"content\",\n            model_name=\"cross-encoder/ms-marco-MiniLM-L6-v2\",\n        )\n\n        assert reranker.query == \"test query\"\n        assert reranker.topn == 5\n        assert reranker.rerank_field == \"content\"\n        assert reranker.model_name == \"cross-encoder/ms-marco-MiniLM-L6-v2\"\n        assert reranker.model_source == \"huggingface\"\n        assert reranker.batch_size == 32\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    @patch(\"zvec.extension.sentence_transformer_rerank_function.require_module\")\n    def test_init_with_custom_params(self, mock_require_module):\n        \"\"\"Test initialization with custom parameters.\"\"\"\n        mock_st = MagicMock()\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n        mock_model.device = \"cuda\"\n        mock_st.CrossEncoder.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        reranker = DefaultLocalReRanker(\n            query=\"custom query\",\n            topn=10,\n            rerank_field=\"title\",\n            model_name=\"cross-encoder/ms-marco-MiniLM-L12-v2\",\n            model_source=\"modelscope\",\n            device=\"cuda\",\n            batch_size=64,\n        )\n\n        assert reranker.query == \"custom query\"\n        assert reranker.topn == 10\n        assert reranker.rerank_field == \"title\"\n        assert reranker.model_name == \"cross-encoder/ms-marco-MiniLM-L12-v2\"\n        assert reranker.model_source == \"modelscope\"\n        assert reranker.batch_size == 64\n\n    @patch(\"zvec.extension.sentence_transformer_rerank_function.require_module\")\n    def test_init_invalid_model(self, mock_require_module):\n        \"\"\"Test initialization fails with non-cross-encoder model.\"\"\"\n        # Mock a model without predict method (not a cross-encoder)\n        mock_st = MagicMock()\n        mock_model = MagicMock(spec=[])  # No predict method\n        mock_st.CrossEncoder.return_value = mock_model\n        mock_require_module.return_value = mock_st\n\n        with pytest.raises(ValueError, match=\"does not appear to be a cross-encoder\"):\n            DefaultLocalReRanker(query=\"test\", rerank_field=\"content\")\n\n    def test_query_property(self):\n        \"\"\"Test query property.\"\"\"\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(query=\"test query\", rerank_field=\"content\")\n            assert reranker.query == \"test query\"\n\n    def test_topn_property(self):\n        \"\"\"Test topn property.\"\"\"\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(\n                query=\"test\", topn=15, rerank_field=\"content\"\n            )\n            assert reranker.topn == 15\n\n    def test_rerank_field_property(self):\n        \"\"\"Test rerank_field property.\"\"\"\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(query=\"test\", rerank_field=\"title\")\n            assert reranker.rerank_field == \"title\"\n\n    def test_batch_size_property(self):\n        \"\"\"Test batch_size property.\"\"\"\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(\n                query=\"test\", rerank_field=\"content\", batch_size=128\n            )\n            assert reranker.batch_size == 128\n\n    def test_rerank_empty_results(self):\n        \"\"\"Test rerank with empty query_results.\"\"\"\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(query=\"test\", rerank_field=\"content\")\n            results = reranker.rerank({})\n            assert results == []\n\n    def test_rerank_no_valid_documents(self):\n        \"\"\"Test rerank with documents missing rerank_field.\"\"\"\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(query=\"test\", rerank_field=\"content\")\n\n            # Document without the rerank_field\n            query_results = {\"vector1\": [Doc(id=\"1\")]}\n            with pytest.raises(ValueError, match=\"No documents to rerank\"):\n                reranker.rerank(query_results)\n\n    def test_rerank_skip_empty_content(self):\n        \"\"\"Test rerank skips documents with empty content.\"\"\"\n        mock_model = MagicMock()\n        mock_model.predict = MagicMock()\n\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(query=\"test\", rerank_field=\"content\")\n\n            query_results = {\n                \"vector1\": [\n                    Doc(id=\"1\", fields={\"content\": \"\"}),\n                    Doc(id=\"2\", fields={\"content\": \"   \"}),\n                ]\n            }\n            with pytest.raises(ValueError, match=\"No documents to rerank\"):\n                reranker.rerank(query_results)\n\n    def test_rerank_success(self):\n        \"\"\"Test successful rerank with mocked model.\"\"\"\n        # Mock standard cross-encoder model\n        mock_model = MagicMock()\n\n        # Mock predict method to return scores\n        import numpy as np\n\n        mock_scores = np.array([0.95, 0.85, 0.75])\n        mock_model.predict.return_value = mock_scores\n        mock_model.device = \"cpu\"\n\n        # Mock sentence_transformers module\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(\n                query=\"test query\", topn=3, rerank_field=\"content\"\n            )\n\n            query_results = {\n                \"vector1\": [\n                    Doc(id=\"1\", score=0.8, fields={\"content\": \"Document 1\"}),\n                    Doc(id=\"2\", score=0.7, fields={\"content\": \"Document 2\"}),\n                    Doc(id=\"3\", score=0.6, fields={\"content\": \"Document 3\"}),\n                ]\n            }\n\n            results = reranker.rerank(query_results)\n\n            # Verify results\n            assert len(results) == 3\n            assert results[0].id == \"1\"\n            assert results[0].score == 0.95\n            assert results[1].id == \"2\"\n            assert results[1].score == 0.85\n            assert results[2].id == \"3\"\n            assert results[2].score == 0.75\n\n            # Verify model.predict was called correctly\n            assert mock_model.predict.called\n            call_args = mock_model.predict.call_args\n            pairs = call_args[0][0]\n            assert len(pairs) == 3\n            assert pairs[0] == [\"test query\", \"Document 1\"]\n            assert pairs[1] == [\"test query\", \"Document 2\"]\n            assert pairs[2] == [\"test query\", \"Document 3\"]\n            assert call_args[1][\"batch_size\"] == 32\n            assert call_args[1][\"show_progress_bar\"] is False\n\n    def test_rerank_with_topn_limit(self):\n        \"\"\"Test rerank respects topn limit.\"\"\"\n        mock_model = MagicMock()\n\n        import numpy as np\n\n        mock_scores = np.array([0.9, 0.8, 0.7, 0.6, 0.5])\n        mock_model.predict.return_value = mock_scores\n\n        # Mock sentence_transformers module\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(\n                query=\"test\", topn=2, rerank_field=\"content\"\n            )\n\n            query_results = {\n                \"vector1\": [\n                    Doc(id=\"1\", fields={\"content\": \"Doc 1\"}),\n                    Doc(id=\"2\", fields={\"content\": \"Doc 2\"}),\n                    Doc(id=\"3\", fields={\"content\": \"Doc 3\"}),\n                    Doc(id=\"4\", fields={\"content\": \"Doc 4\"}),\n                    Doc(id=\"5\", fields={\"content\": \"Doc 5\"}),\n                ]\n            }\n\n            results = reranker.rerank(query_results)\n\n            # Should only return top 2\n            assert len(results) == 2\n            assert results[0].id == \"1\"\n            assert results[0].score == 0.9\n            assert results[1].id == \"2\"\n            assert results[1].score == 0.8\n\n    def test_rerank_deduplicate_documents(self):\n        \"\"\"Test rerank deduplicates documents across multiple vectors.\"\"\"\n        mock_model = MagicMock()\n\n        import numpy as np\n\n        mock_scores = np.array([0.95, 0.85])\n        mock_model.predict.return_value = mock_scores\n\n        # Mock sentence_transformers module\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(\n                query=\"test\", topn=5, rerank_field=\"content\"\n            )\n\n            # Same document in multiple vector results\n            doc1 = Doc(id=\"1\", fields={\"content\": \"Document 1\"})\n            doc2 = Doc(id=\"2\", fields={\"content\": \"Document 2\"})\n\n            query_results = {\n                \"vector1\": [doc1, doc2],\n                \"vector2\": [doc1],  # doc1 appears in both\n            }\n\n            results = reranker.rerank(query_results)\n\n            # Should only process each document once\n            assert len(results) == 2\n            assert mock_model.predict.call_count == 1\n\n            call_args = mock_model.predict.call_args\n            pairs = call_args[0][0]\n            assert len(pairs) == 2  # Only 2 unique documents\n\n    def test_rerank_sorting(self):\n        \"\"\"Test rerank sorts documents by score in descending order.\"\"\"\n        mock_model = MagicMock()\n\n        import numpy as np\n\n        # Return scores in non-sorted order\n        mock_scores = np.array([0.6, 0.9, 0.7])\n        mock_model.predict.return_value = mock_scores\n\n        # Mock sentence_transformers module\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(\n                query=\"test\", topn=3, rerank_field=\"content\"\n            )\n\n            query_results = {\n                \"vector1\": [\n                    Doc(id=\"1\", fields={\"content\": \"Doc 1\"}),\n                    Doc(id=\"2\", fields={\"content\": \"Doc 2\"}),\n                    Doc(id=\"3\", fields={\"content\": \"Doc 3\"}),\n                ]\n            }\n\n            results = reranker.rerank(query_results)\n\n            # Should be sorted by score (descending)\n            assert len(results) == 3\n            assert results[0].id == \"2\"  # score 0.9\n            assert results[0].score == 0.9\n            assert results[1].id == \"3\"  # score 0.7\n            assert results[1].score == 0.7\n            assert results[2].id == \"1\"  # score 0.6\n            assert results[2].score == 0.6\n\n    def test_rerank_model_error(self):\n        \"\"\"Test rerank handles model prediction errors.\"\"\"\n        mock_model = MagicMock()\n\n        # Mock predict to raise exception\n        mock_model.predict.side_effect = Exception(\"Model inference error\")\n\n        # Mock sentence_transformers module\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(query=\"test\", rerank_field=\"content\")\n\n            query_results = {\"vector1\": [Doc(id=\"1\", fields={\"content\": \"Document 1\"})]}\n\n            with pytest.raises(RuntimeError, match=\"Failed to compute rerank scores\"):\n                reranker.rerank(query_results)\n\n    def test_rerank_with_custom_batch_size(self):\n        \"\"\"Test rerank uses custom batch_size.\"\"\"\n        mock_model = MagicMock()\n\n        import numpy as np\n\n        mock_scores = np.array([0.9, 0.8])\n        mock_model.predict.return_value = mock_scores\n\n        # Mock sentence_transformers module\n        mock_st = MagicMock()\n        mock_st.CrossEncoder.return_value = mock_model\n\n        with patch(\n            \"zvec.extension.sentence_transformer_rerank_function.require_module\",\n            return_value=mock_st,\n        ):\n            reranker = DefaultLocalReRanker(\n                query=\"test\", rerank_field=\"content\", batch_size=64\n            )\n\n            query_results = {\n                \"vector1\": [\n                    Doc(id=\"1\", fields={\"content\": \"Doc 1\"}),\n                    Doc(id=\"2\", fields={\"content\": \"Doc 2\"}),\n                ]\n            }\n\n            reranker.rerank(query_results)\n\n            # Verify batch_size is passed to predict\n            call_args = mock_model.predict.call_args\n            assert call_args[1][\"batch_size\"] == 64\n\n    @pytest.mark.skipif(\n        not RUN_INTEGRATION_TESTS,\n        reason=\"Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.\",\n    )\n    def test_real_sentence_transformer_rerank(self):\n        \"\"\"Integration test with real SentenceTransformer cross-encoder model.\n\n        To run this test, set environment variable:\n            export ZVEC_RUN_INTEGRATION_TESTS=1\n\n        Note: This test requires sentence-transformers package and will\n        download the MS MARCO MiniLM model (~80MB) on first run.\n        \"\"\"\n        # Create reranker with real model (using default lightweight model)\n        reranker = DefaultLocalReRanker(\n            query=\"What is machine learning?\",\n            topn=3,\n            rerank_field=\"content\",\n        )\n\n        # Prepare test documents\n        query_results = {\n            \"vector1\": [\n                Doc(\n                    id=\"1\",\n                    score=0.8,\n                    fields={\n                        \"content\": \"Machine learning is a subset of artificial intelligence that focuses on building systems that can learn from data.\"\n                    },\n                ),\n                Doc(\n                    id=\"2\",\n                    score=0.7,\n                    fields={\n                        \"content\": \"The weather is nice today with clear skies and sunshine.\"\n                    },\n                ),\n                Doc(\n                    id=\"3\",\n                    score=0.75,\n                    fields={\n                        \"content\": \"Deep learning is a specialized branch of machine learning using neural networks with multiple layers.\"\n                    },\n                ),\n            ],\n            \"vector2\": [\n                Doc(\n                    id=\"4\",\n                    score=0.6,\n                    fields={\n                        \"content\": \"Python is a popular programming language for data science and machine learning applications.\"\n                    },\n                ),\n                Doc(\n                    id=\"5\",\n                    score=0.65,\n                    fields={\n                        \"content\": \"A recipe for chocolate cake includes flour, sugar, eggs, and cocoa powder.\"\n                    },\n                ),\n            ],\n        }\n\n        # Call real model\n        results = reranker.rerank(query_results)\n\n        # Verify results\n        assert len(results) <= 3, \"Should return at most topn documents\"\n        assert len(results) > 0, \"Should return at least one document\"\n\n        # All results should have valid scores\n        for doc in results:\n            assert hasattr(doc, \"score\"), \"Each document should have a score\"\n            assert isinstance(doc.score, (int, float)), \"Score should be numeric\"\n\n        # Verify scores are in descending order\n        scores = [doc.score for doc in results]\n        assert scores == sorted(scores, reverse=True), (\n            \"Results should be sorted by score in descending order\"\n        )\n\n        # Verify relevant documents are ranked higher\n        # Documents 1, 3, and 4 are about machine learning, should rank higher\n        result_ids = [doc.id for doc in results]\n\n        # At least one of the ML-related documents should be in top results\n        ml_related_docs = {\"1\", \"3\", \"4\"}\n        assert any(doc_id in ml_related_docs for doc_id in result_ids[:2]), (\n            \"ML-related documents should rank higher\"\n        )\n\n        # Print results for manual verification (useful during development)\n        print(\"\\nSentenceTransformer Reranking results:\")\n        for i, doc in enumerate(results, 1):\n            print(f\"{i}. ID={doc.id}, Score={doc.score:.4f}\")\n            if doc.fields:\n                content = doc.field(\"content\")\n                if content:\n                    print(f\"   Content: {content[:80]}...\")\n"
  },
  {
    "path": "python/tests/test_schema.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport pytest\nfrom zvec import (\n    CollectionSchema,\n    CollectionStats,\n    FieldSchema,\n    VectorSchema,\n    HnswIndexParam,\n    InvertIndexParam,\n    DataType,\n    IndexType,\n    MetricType,\n)\n\n# ----------------------------\n# FieldSchema Test Case\n# ----------------------------\n\n\nclass TestFieldSchema:\n    def test_default(self):\n        field = FieldSchema(\"field\", data_type=DataType.FLOAT)\n        assert field.name == \"field\"\n        assert field.data_type == DataType.FLOAT\n        assert field.nullable is False\n        assert field.index_param is None\n\n    def test_custom(self):\n        field_1 = FieldSchema(\n            name=\"float\",\n            data_type=DataType.FLOAT,\n            nullable=True,\n            index_param=InvertIndexParam(),\n        )\n        assert field_1.name == \"float\"\n        assert field_1.data_type == DataType.FLOAT\n        assert field_1.nullable is True\n        assert field_1.index_param.enable_range_optimization is False\n\n        field_2 = FieldSchema(\n            name=\"str\",\n            data_type=DataType.STRING,\n            nullable=True,\n            index_param=InvertIndexParam(enable_range_optimization=True),\n        )\n        assert field_2.name == \"str\"\n        assert field_2.data_type == DataType.STRING\n        assert field_2.nullable is True\n        assert field_2.index_param.enable_range_optimization is True\n\n    def test_readonly(self):\n        field = FieldSchema(\n            name=\"float\",\n            data_type=DataType.FLOAT,\n            nullable=True,\n            index_param=InvertIndexParam(),\n        )\n\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            field.index_param = InvertIndexParam(enable_range_optimization=True)\n\n\n# ----------------------------\n# VectorSchema Test Case\n# ----------------------------\nclass TestVectorSchema:\n    def test_default(self):\n        field = VectorSchema(\"vector\", data_type=DataType.VECTOR_FP32, dimension=128)\n        assert field.name == \"vector\"\n        assert field.data_type == DataType.VECTOR_FP32\n        assert field.dimension == 128\n        assert field.index_param is not None\n        assert field.index_param.type == IndexType.FLAT\n        assert field.index_param.metric_type == MetricType.IP\n\n    def test_custom(self):\n        field = VectorSchema(\n            name=\"vector\",\n            data_type=DataType.VECTOR_INT8,\n            dimension=512,\n            index_param=HnswIndexParam(\n                metric_type=MetricType.COSINE, m=15, ef_construction=300\n            ),\n        )\n        assert field.name == \"vector\"\n        assert field.data_type == DataType.VECTOR_INT8\n        assert field.index_param.metric_type == MetricType.COSINE\n        assert field.index_param.m == 15\n        assert field.index_param.ef_construction == 300\n\n    def test_readonly(self):\n        field = VectorSchema(\n            name=\"vector\",\n            dimension=128,\n            data_type=DataType.VECTOR_INT8,\n        )\n\n        import sys\n\n        if sys.version_info >= (3, 11):\n            match_pattern = r\"(can't set attribute|has no setter|readonly attribute)\"\n        else:\n            match_pattern = r\"can't set attribute\"\n        with pytest.raises(AttributeError, match=match_pattern):\n            field.dimension = 4\n\n\n# ----------------------------\n# CollectionSchema Test Case\n# ----------------------------\nclass TestCollectionSchema:\n    def test_collection_schema_with_single_field(self):\n        collection_schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=FieldSchema(\n                name=\"id\",\n                data_type=DataType.INT64,\n                index_param=InvertIndexParam(),\n                nullable=False,\n            ),\n            vectors=VectorSchema(\n                name=\"vector\",\n                data_type=DataType.VECTOR_INT8,\n                dimension=128,\n                index_param=HnswIndexParam(),\n            ),\n        )\n\n        assert collection_schema is not None\n        assert collection_schema.name == \"test_collection\"\n        assert len(collection_schema.fields) == 1\n        assert len(collection_schema.vectors) == 1\n\n        field = collection_schema.field(\"id\")\n        assert field is not None\n        assert field.name == \"id\"\n        assert field.data_type == DataType.INT64\n        assert not field.nullable\n        assert field.index_param.type == IndexType.INVERT\n        assert not field.index_param.enable_range_optimization\n\n        vector = collection_schema.vector(\"vector\")\n        assert vector is not None\n        assert vector.name == \"vector\"\n        assert vector.data_type == DataType.VECTOR_INT8\n        assert vector.dimension == 128\n        assert vector.index_param.type == IndexType.HNSW\n        assert vector.index_param.m == 50\n        assert vector.index_param.ef_construction == 500\n        assert vector.index_param.metric_type == MetricType.IP\n\n    def test_collection_schema_with_multi_fields(self):\n        collection_schema = CollectionSchema(\n            name=\"test_collection\",\n            fields=[\n                FieldSchema(\n                    \"id\",\n                    DataType.INT64,\n                    nullable=False,\n                    index_param=InvertIndexParam(enable_range_optimization=True),\n                ),\n                FieldSchema(\n                    \"name\",\n                    DataType.STRING,\n                    nullable=False,\n                    index_param=InvertIndexParam(),\n                ),\n                FieldSchema(\n                    \"weight\",\n                    DataType.INT32,\n                    nullable=True,\n                ),\n            ],\n            vectors=[\n                VectorSchema(\n                    \"dense\",\n                    DataType.VECTOR_FP32,\n                    dimension=128,\n                    index_param=HnswIndexParam(),\n                ),\n                VectorSchema(\n                    \"sparse\", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()\n                ),\n            ],\n        )\n        assert collection_schema is not None\n        assert collection_schema.name == \"test_collection\"\n        assert len(collection_schema.fields) == 3\n        assert len(collection_schema.vectors) == 2\n\n        field_id = collection_schema.field(\"id\")\n        assert field_id is not None\n        assert field_id.name == \"id\"\n        assert field_id.data_type == DataType.INT64\n        assert not field_id.nullable\n        assert field_id.index_param.type == IndexType.INVERT\n\n        dense = collection_schema.vector(\"dense\")\n        assert dense is not None\n        assert dense.name == \"dense\"\n        assert dense.data_type == DataType.VECTOR_FP32\n        assert dense.dimension == 128\n        assert dense.index_param.type == IndexType.HNSW\n\n        sparse = collection_schema.vector(\"sparse\")\n        assert sparse is not None\n        assert sparse.name == \"sparse\"\n        assert sparse.data_type == DataType.SPARSE_VECTOR_FP32\n        assert sparse.dimension == 0\n        assert sparse.index_param.type == IndexType.HNSW\n\n        assert str(collection_schema) is not None\n\n\n# ----------------------------\n# CollectionStats Test Case\n# ----------------------------\nclass TestCollectionStats:\n    \"\"\"\n    The constructor of CollectionStats is not provided.\n    It can only be obtained through collection.stats()\n    \"\"\"\n\n    def test_collection_stats(self):\n        stats = CollectionStats()\n        assert stats is not None\n"
  },
  {
    "path": "python/tests/test_typing.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport pytest\nfrom zvec import (\n    DataType,\n    IndexType,\n    MetricType,\n    QuantizeType,\n    Status,\n    StatusCode,\n)\n\n\n# ----------------------------\n# Enum Test Case\n# ----------------------------\n@pytest.mark.parametrize(\n    \"member, name\",\n    [\n        (DataType.FLOAT, \"FLOAT\"),\n        (IndexType.HNSW, \"HNSW\"),\n        (MetricType.COSINE, \"COSINE\"),\n        (QuantizeType.INT8, \"INT8\"),\n        (StatusCode.OK, \"OK\"),\n    ],\n)\ndef test_enum_names(member, name):\n    assert member.name == name\n\n\n@pytest.mark.parametrize(\n    \"member, value\",\n    [\n        (DataType.FLOAT, 8),\n        (IndexType.HNSW, 1),\n        (MetricType.COSINE, 3),\n        (QuantizeType.INT8, 2),\n        (StatusCode.OK, 0),\n    ],\n)\ndef test_enum_values(member, value):\n    assert member.value == value\n\n\n@pytest.mark.parametrize(\"member\", [\"L2\", \"IP\", \"COSINE\"])\ndef test_metric_type_has_member(member):\n    assert member in MetricType.__members__\n\n\n@pytest.mark.parametrize(\n    \"member\",\n    [\n        \"STRING\",\n        \"BOOL\",\n        \"INT32\",\n        \"INT64\",\n        \"FLOAT\",\n        \"DOUBLE\",\n        \"UINT32\",\n        \"UINT64\",\n        \"VECTOR_FP16\",\n        \"VECTOR_FP32\",\n        \"VECTOR_FP64\",\n        \"VECTOR_INT8\",\n        \"SPARSE_VECTOR_FP32\",\n        \"SPARSE_VECTOR_FP16\",\n        \"ARRAY_STRING\",\n        \"ARRAY_INT32\",\n        \"ARRAY_INT64\",\n        \"ARRAY_FLOAT\",\n        \"ARRAY_DOUBLE\",\n        \"ARRAY_BOOL\",\n        \"ARRAY_UINT32\",\n        \"ARRAY_UINT64\",\n    ],\n)\ndef test_data_type_has_member(member):\n    assert member in DataType.__members__\n\n\n@pytest.mark.parametrize(\"member\", [\"HNSW\", \"IVF\", \"FLAT\", \"INVERT\"])\ndef test_index_type_has_member(member):\n    assert member in IndexType.__members__\n\n\n@pytest.mark.parametrize(\"member\", [\"FP16\", \"INT8\", \"INT4\", \"UNDEFINED\"])\ndef test_quantize_type_has_member(member):\n    assert member in QuantizeType.__members__\n\n\n@pytest.mark.parametrize(\n    \"member\",\n    [\n        \"OK\",\n        \"UNKNOWN\",\n        \"NOT_FOUND\",\n        \"ALREADY_EXISTS\",\n        \"INVALID_ARGUMENT\",\n        \"PERMISSION_DENIED\",\n        \"FAILED_PRECONDITION\",\n        \"RESOURCE_EXHAUSTED\",\n        \"UNAVAILABLE\",\n        \"INTERNAL_ERROR\",\n        \"NOT_SUPPORTED\",\n    ],\n)\ndef test_status_code_has_member(member):\n    assert member in StatusCode.__members__\n\n\n# ----------------------------\n# Status Test Case\n# ----------------------------\nclass TestStatus:\n    def test_status_code(self):\n        status = Status(StatusCode.OK)\n        assert status.code() == StatusCode.OK\n\n    def test_status_message(self):\n        status = Status(StatusCode.OK, \"OK\")\n        assert status.message() == \"OK\"\n\n        status = Status(StatusCode.NOT_FOUND, \"Not Found\")\n        assert status.message() == \"Not Found\"\n\n    def test_status_ok(self):\n        status = Status(StatusCode.OK)\n        assert status.ok()\n"
  },
  {
    "path": "python/tests/test_util.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom unittest.mock import MagicMock, patch\n\nimport pytest\nfrom zvec import require_module\n\n\n# ----------------------------\n# require_module func Test Case\n# ----------------------------\ndef test_require_module_success():\n    module = require_module(\"os\")\n    assert module is not None\n    assert hasattr(module, \"path\")\n\n\ndef test_require_module_with_submodule_success():\n    module = require_module(\"os.path\")\n    assert module is not None\n    assert hasattr(module, \"join\")\n\n\ndef test_require_module_import_error():\n    with pytest.raises(ImportError) as exc_info:\n        require_module(\"nonexistent_module\")\n\n    exception_msg = str(exc_info.value)\n    assert \"Required package 'nonexistent_module' is not installed.\" in exception_msg\n\n\ndef test_require_module_with_mitigation_import_error():\n    with pytest.raises(ImportError) as exc_info:\n        require_module(\"nonexistent_module.submodule\", mitigation=\"custom_package\")\n\n    exception_msg = str(exc_info.value)\n    assert \"Required package 'custom_package' is not installed.\" in exception_msg\n    assert (\n        \"Module 'nonexistent_module.submodule' is part of 'nonexistent_module'\"\n        in exception_msg\n    )\n    assert \"please pip install 'custom_package'.\" in exception_msg\n\n\ndef test_require_module_submodule_import_error():\n    with pytest.raises(ImportError) as exc_info:\n        require_module(\"os.nonexistent_submodule\")\n\n    exception_msg = str(exc_info.value)\n    assert (\n        \"Required package 'os.nonexistent_submodule' is not installed.\" in exception_msg\n    )\n    assert \"Module 'os.nonexistent_submodule' is part of 'os'\" in exception_msg\n    assert \"please pip install 'os'.\" in exception_msg\n\n\n@patch(\"importlib.import_module\")\ndef test_require_module_wraps_original_exception(mock_import_module):\n    original_exception = ImportError(\"Original error\")\n    mock_import_module.side_effect = original_exception\n\n    with pytest.raises(ImportError) as exc_info:\n        require_module(\"some_module\")\n\n    assert exc_info.value.__cause__ is original_exception\n\n\n@patch(\"importlib.import_module\")\ndef test_require_module_calls_importlib(mock_import_module):\n    mock_module = MagicMock()\n    mock_import_module.return_value = mock_module\n\n    result = require_module(\"test_module\")\n\n    mock_import_module.assert_called_once_with(\"test_module\")\n    assert result is mock_module\n"
  },
  {
    "path": "python/zvec/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nfrom __future__ import annotations\n\nimport sys\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from importlib.metadata import PackageNotFoundError\n\n\n# ==============================\n# Public API — grouped by category\n# ==============================\n\nfrom . import model as model\n\n# —— Extensions ——\nfrom .extension import (\n    BM25EmbeddingFunction,\n    DefaultLocalDenseEmbedding,\n    DefaultLocalReRanker,\n    DefaultLocalSparseEmbedding,\n    DenseEmbeddingFunction,\n    OpenAIDenseEmbedding,\n    OpenAIFunctionBase,\n    QwenDenseEmbedding,\n    QwenFunctionBase,\n    QwenReRanker,\n    QwenSparseEmbedding,\n    ReRanker,\n    RrfReRanker,\n    SentenceTransformerFunctionBase,\n    SparseEmbeddingFunction,\n    WeightedReRanker,\n)\n\n# —— Typing ——\nfrom .model import param as param\nfrom .model import schema as schema\n\n# —— Core data structures ——\nfrom .model.collection import Collection\nfrom .model.doc import Doc\n\n# —— Query & index parameters ——\nfrom .model.param import (\n    AddColumnOption,\n    AlterColumnOption,\n    CollectionOption,\n    FlatIndexParam,\n    HnswIndexParam,\n    HnswQueryParam,\n    HnswRabitqIndexParam,\n    HnswRabitqQueryParam,\n    IndexOption,\n    InvertIndexParam,\n    IVFIndexParam,\n    IVFQueryParam,\n    OptimizeOption,\n)\nfrom .model.param.vector_query import VectorQuery\n\n# —— Schema & field definitions ——\nfrom .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema\n\n# —— tools ——\nfrom .tool import require_module\nfrom .typing import (\n    DataType,\n    IndexType,\n    MetricType,\n    QuantizeType,\n    Status,\n    StatusCode,\n)\nfrom .typing.enum import LogLevel, LogType\n\n# —— lifecycle ——\nfrom .zvec import create_and_open, init, open\n\n# ==============================\n# Public interface declaration\n# ==============================\n__all__ = [\n    # Zvec functions\n    \"create_and_open\",\n    \"init\",\n    \"open\",\n    # Core classes\n    \"Collection\",\n    \"Doc\",\n    # Schema\n    \"CollectionSchema\",\n    \"FieldSchema\",\n    \"VectorSchema\",\n    \"CollectionStats\",\n    # Parameters\n    \"VectorQuery\",\n    \"InvertIndexParam\",\n    \"HnswIndexParam\",\n    \"HnswRabitqIndexParam\",\n    \"HnswRabitqQueryParam\",\n    \"FlatIndexParam\",\n    \"IVFIndexParam\",\n    \"CollectionOption\",\n    \"IndexOption\",\n    \"OptimizeOption\",\n    \"AddColumnOption\",\n    \"AlterColumnOption\",\n    \"HnswQueryParam\",\n    \"IVFQueryParam\",\n    # Extensions\n    \"DenseEmbeddingFunction\",\n    \"SparseEmbeddingFunction\",\n    \"QwenFunctionBase\",\n    \"OpenAIFunctionBase\",\n    \"SentenceTransformerFunctionBase\",\n    \"ReRanker\",\n    \"DefaultLocalDenseEmbedding\",\n    \"DefaultLocalSparseEmbedding\",\n    \"BM25EmbeddingFunction\",\n    \"OpenAIDenseEmbedding\",\n    \"QwenDenseEmbedding\",\n    \"QwenSparseEmbedding\",\n    \"RrfReRanker\",\n    \"WeightedReRanker\",\n    \"DefaultLocalReRanker\",\n    \"QwenReRanker\",\n    # Typing\n    \"DataType\",\n    \"MetricType\",\n    \"QuantizeType\",\n    \"IndexType\",\n    \"LogLevel\",\n    \"LogType\",\n    \"Status\",\n    \"StatusCode\",\n    # Tools\n    \"require_module\",\n]\n\n# ==============================\n# Version handling\n# ==============================\n__version__: str\n\ntry:\n    from importlib.metadata import version\nexcept ImportError:\n    from importlib_metadata import version  # Python < 3.8\n\ntry:\n    __version__ = version(\"zvec\")\nexcept Exception:\n    __version__ = \"unknown\"\n"
  },
  {
    "path": "python/zvec/__init__.pyi",
    "content": "\"\"\"\nZvec core module\n\"\"\"\n\nfrom __future__ import annotations\n\nimport collections\n\nfrom . import typing\nfrom .extension import ReRanker, RrfReRanker, WeightedReRanker\nfrom .extension.embedding import DenseEmbeddingFunction\nfrom .model import param, schema\nfrom .model.collection import Collection\nfrom .model.doc import Doc\nfrom .model.param import (\n    AddColumnOption,\n    AlterColumnOption,\n    CollectionOption,\n    FlatIndexParam,\n    HnswIndexParam,\n    HnswQueryParam,\n    IndexOption,\n    InvertIndexParam,\n    IVFIndexParam,\n    IVFQueryParam,\n    OptimizeOption,\n)\nfrom .model.param.vector_query import VectorQuery\nfrom .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema\nfrom .tool import require_module\nfrom .typing import (\n    DataType,\n    IndexType,\n    MetricType,\n    QuantizeType,\n    Status,\n    StatusCode,\n)\nfrom .typing.enum import LogLevel, LogType\nfrom .zvec import create_and_open, init, open\n\n__all__: list = [\n    \"AddColumnOption\",\n    \"AlterColumnOption\",\n    \"Collection\",\n    \"CollectionOption\",\n    \"CollectionSchema\",\n    \"CollectionStats\",\n    \"DataType\",\n    \"DenseEmbeddingFunction\",\n    \"DenseEmbeddingFunction\",\n    \"Doc\",\n    \"FieldSchema\",\n    \"FlatIndexParam\",\n    \"HnswIndexParam\",\n    \"HnswQueryParam\",\n    \"IVFIndexParam\",\n    \"IVFQueryParam\",\n    \"IndexOption\",\n    \"IndexType\",\n    \"InvertIndexParam\",\n    \"LogLevel\",\n    \"LogType\",\n    \"MetricType\",\n    \"OptimizeOption\",\n    \"QuantizeType\",\n    \"ReRanker\",\n    \"ReRanker\",\n    \"RrfReRanker\",\n    \"Status\",\n    \"StatusCode\",\n    \"VectorQuery\",\n    \"VectorSchema\",\n    \"WeightedReRanker\",\n    \"create_and_open\",\n    \"init\",\n    \"open\",\n    \"require_module\",\n]\n\nclass _Collection:\n    @staticmethod\n    def CreateAndOpen(\n        arg0: str, arg1: schema._CollectionSchema, arg2: param.CollectionOption\n    ) -> _Collection: ...\n    @staticmethod\n    def Open(arg0: str, arg1: param.CollectionOption) -> _Collection: ...\n    def AddColumn(\n        self,\n        arg0: schema._FieldSchema,\n        arg1: str,\n        arg2: param.AddColumnOption,\n    ) -> None: ...\n    def AlterColumn(\n        self,\n        arg0: str,\n        arg1: str,\n        arg2: schema._FieldSchema,\n        arg3: param.AlterColumnOption,\n    ) -> None: ...\n    def CreateIndex(\n        self, arg0: str, arg1: param.IndexParam, arg2: param.IndexOption\n    ) -> None: ...\n    def Delete(self, arg0: collections.abc.Sequence[str]) -> list[typing.Status]: ...\n    def DeleteByFilter(self, arg0: str) -> None: ...\n    def Destroy(self) -> None: ...\n    def DropColumn(self, arg0: str) -> None: ...\n    def DropIndex(self, arg0: str) -> None: ...\n    def Fetch(self, arg0: collections.abc.Sequence[str]) -> dict[str, _Doc]: ...\n    def Flush(self) -> None: ...\n    def GroupByQuery(self, arg0: ...) -> list[...]: ...\n    def Insert(self, arg0: collections.abc.Sequence[_Doc]) -> list[typing.Status]: ...\n    def Optimize(self, arg0: param.OptimizeOption) -> None: ...\n    def Options(self) -> param.CollectionOption: ...\n    def Path(self) -> str: ...\n    def Query(self, arg0: param._VectorQuery) -> list[_Doc]: ...\n    def Schema(self) -> schema._CollectionSchema: ...\n    def Stats(self) -> schema.CollectionStats: ...\n    def Update(self, arg0: collections.abc.Sequence[_Doc]) -> list[typing.Status]: ...\n    def Upsert(self, arg0: collections.abc.Sequence[_Doc]) -> list[typing.Status]: ...\n    def __getstate__(self) -> tuple: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n\nclass _Doc:\n    def __getstate__(self) -> bytes: ...\n    def __init__(self) -> None: ...\n    def __setstate__(self, arg0: bytes) -> None: ...\n    def field_names(self) -> list[str]: ...\n    def get_any(self, arg0: str, arg1: typing.DataType) -> typing.Any: ...\n    def has_field(self, arg0: str) -> bool: ...\n    def pk(self) -> str: ...\n    def score(self) -> float: ...\n    def set_any(self, arg0: str, arg1: typing.DataType, arg2: typing.Any) -> bool: ...\n    def set_pk(self, arg0: str) -> None: ...\n    def set_score(self, arg0: typing.SupportsFloat) -> None: ...\n\nclass _DocOp:\n    \"\"\"\n    Members:\n\n      INSERT\n\n      UPDATE\n\n      DELETE\n\n      UPSERT\n    \"\"\"\n\n    DELETE: typing.ClassVar[_DocOp]  # value = <_DocOp.DELETE: 3>\n    INSERT: typing.ClassVar[_DocOp]  # value = <_DocOp.INSERT: 0>\n    UPDATE: typing.ClassVar[_DocOp]  # value = <_DocOp.UPDATE: 2>\n    UPSERT: typing.ClassVar[_DocOp]  # value = <_DocOp.UPSERT: 1>\n    __members__: typing.ClassVar[\n        dict[str, _DocOp]\n    ]  # value = {'INSERT': <_DocOp.INSERT: 0>, 'UPDATE': <_DocOp.UPDATE: 2>, 'DELETE': <_DocOp.DELETE: 3>, 'UPSERT': <_DocOp.UPSERT: 1>}\n\n    def __eq__(self, other: typing.Any) -> bool: ...\n    def __getstate__(self) -> int: ...\n    def __hash__(self) -> int: ...\n    def __index__(self) -> int: ...\n    def __init__(self, value: typing.SupportsInt) -> None: ...\n    def __int__(self) -> int: ...\n    def __ne__(self, other: typing.Any) -> bool: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, state: typing.SupportsInt) -> None: ...\n    def __str__(self) -> str: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def value(self) -> int: ...\n"
  },
  {
    "path": "python/zvec/common/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom .constants import DenseVectorType, SparseVectorType, VectorType\n\n__all__ = [\"DenseVectorType\", \"SparseVectorType\", \"VectorType\"]\n"
  },
  {
    "path": "python/zvec/common/constants.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import Optional, TypeVar, Union\n\nimport numpy as np\n\n# VectorType: DenseVectorType | SparseVectorType\nDenseVectorType = Union[list[float], list[int], np.ndarray]\nSparseVectorType = dict[int, float]\nVectorType = Optional[Union[DenseVectorType, SparseVectorType]]\n\n# Embeddable: Text | Image | Audio\nTEXT = str\nIMAGE = Union[str, bytes, np.ndarray]  # file path, raw bytes, or numpy array\nAUDIO = Union[str, bytes, np.ndarray]  # file path, raw bytes, or numpy array\n\nEmbeddable = Optional[Union[TEXT, IMAGE, AUDIO]]\n\n# Multimodal Embeddable\nMD = TypeVar(\"MD\", bound=Embeddable, contravariant=True)\n"
  },
  {
    "path": "python/zvec/executor/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom .query_executor import (\n    QueryContext,\n    QueryExecutor,\n    QueryExecutorFactory,\n)\n\n__all__ = [\n    \"QueryContext\",\n    \"QueryExecutor\",\n    \"QueryExecutorFactory\",\n]\n"
  },
  {
    "path": "python/zvec/executor/query_executor.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport os\nfrom abc import ABC, abstractmethod\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\nfrom typing import Optional, Union, final\n\nimport numpy as np\nfrom _zvec import _Collection\nfrom _zvec.param import _VectorQuery\n\nfrom ..extension import ReRanker, RrfReRanker, WeightedReRanker\nfrom ..model.convert import convert_to_py_doc\nfrom ..model.doc import Doc\nfrom ..model.param.vector_query import VectorQuery\nfrom ..model.schema import CollectionSchema\nfrom ..typing import DataType\n\n__all__ = [\n    \"QueryContext\",\n    \"QueryExecutor\",\n    \"QueryExecutorFactory\",\n]\n\nDTYPE_MAP = {\n    DataType.VECTOR_FP16.value: np.float16,\n    DataType.VECTOR_FP32.value: np.float32,\n    DataType.VECTOR_FP64.value: np.float64,\n    DataType.VECTOR_INT8.value: np.int8,\n}\n\n\ndef convert_to_numpy(vec: Union[list, np.ndarray], dtype: np.dtype) -> np.ndarray:\n    if isinstance(vec, np.ndarray):\n        if vec.dtype == dtype and vec.ndim == 1:\n            return vec\n        return np.asarray(vec, dtype=dtype).flatten()\n\n    try:\n        arr = np.asarray(vec, dtype=dtype)\n        if arr.ndim != 1:\n            arr = arr.flatten()\n        return arr\n    except (ValueError, TypeError) as e:\n        raise TypeError(\n            f\"Cannot convert input to 1D numpy array with dtype={dtype}: {type(vec)}\"\n        ) from e\n\n\nclass QueryContext:\n    def __init__(\n        self,\n        topk: int,\n        filter: Optional[str] = None,\n        include_vector: bool = False,\n        queries: Optional[list[VectorQuery]] = None,\n        output_fields: Optional[list[str]] = None,\n        reranker: Optional[ReRanker] = None,\n    ):\n        # query param\n        self._filter = filter\n        self._queries = queries or []\n        self._topk = topk\n        self._include_vector = include_vector\n        self._output_fields = output_fields\n\n        # reranker\n        self._reranker = reranker\n\n        # core vectors\n        self._core_vectors = []\n\n    @property\n    def topk(self):\n        return self._topk\n\n    @property\n    def queries(self):\n        return self._queries\n\n    @property\n    def filter(self):\n        return self._filter\n\n    @property\n    def reranker(self):\n        return self._reranker\n\n    @property\n    def output_fields(self):\n        return self._output_fields\n\n    @property\n    def include_vector(self):\n        return self._include_vector\n\n    @property\n    def core_vectors(self):\n        return self._core_vectors\n\n    @core_vectors.setter\n    def core_vectors(self, core_vectors: list[_VectorQuery]):\n        self._core_vectors = core_vectors\n\n\nclass QueryExecutor(ABC):\n    def __init__(self, schema: CollectionSchema):\n        self._schema = schema\n        self._concurrency = max(1, int(os.getenv(\"ZVEC_QUERY_CONCURRENCY\", \"1\")))\n\n    @abstractmethod\n    def _do_validate(self, ctx: QueryContext) -> None:\n        pass\n\n    @abstractmethod\n    def _do_build(\n        self, ctx: QueryContext, collection: _Collection\n    ) -> list[_VectorQuery]:\n        pass\n\n    def _do_build_query_wo_vector(self, ctx: QueryContext) -> _VectorQuery:\n        core_vector = _VectorQuery()\n        core_vector.topk = ctx.topk\n        core_vector.include_vector = ctx.include_vector\n        if ctx.filter:\n            core_vector.filter = ctx.filter\n        if ctx.output_fields:\n            core_vector.output_fields = ctx.output_fields\n        return core_vector\n\n    def _do_build_query_with_vector(\n        self, ctx: QueryContext, query: VectorQuery, collection: _Collection\n    ) -> _VectorQuery:\n        core_vector = self._do_build_query_wo_vector(ctx)\n        core_vector.field_name = query.field_name\n        if query.param:\n            core_vector.query_params = query.param\n\n        vector_schema = (\n            self._schema.vector(query.field_name) if query else self._schema.vectors[0]\n        )\n\n        if vector_schema is None:\n            raise ValueError(\"No vector field found\")\n\n        # set output_fields\n        core_vector.output_fields = ctx.output_fields\n\n        # set vector\n        if query.has_vector():\n            vec_data = query.vector\n        else:\n            fetched = collection.Fetch([query.id])\n            doc = next(iter(fetched.values()))\n            if not doc:\n                return core_vector\n            vec_data = doc.get_any(vector_schema.name, vector_schema.data_type)\n\n        target_dtype = DTYPE_MAP.get(vector_schema.data_type.value)\n        core_vector.set_vector(\n            vector_schema._get_object(),\n            convert_to_numpy(vec_data, target_dtype) if target_dtype else vec_data,\n        )\n        return core_vector\n\n    def _do_execute(\n        self, vectors: list[_VectorQuery], collection: _Collection\n    ) -> dict[str, list[Doc]]:\n        query_cnt = len(vectors)\n        if query_cnt == 0:\n            raise ValueError(\"No query to execute\")\n\n        if len(vectors) == 1 or self._concurrency == 1:\n            results = {}\n            for query in vectors:\n                docs = collection.Query(query)\n                results[query.field_name] = [\n                    convert_to_py_doc(doc, self._schema) for doc in docs\n                ]\n            return results\n\n        results = {}\n        with ThreadPoolExecutor(max_workers=self._concurrency) as executor:\n            future_to_query = {\n                executor.submit(collection.Query, query): query.field_name\n                for query in vectors\n            }\n\n            for future in as_completed(future_to_query):\n                field_name = future_to_query[future]\n                try:\n                    docs = future.result()\n                    results[field_name] = [\n                        convert_to_py_doc(doc, self._schema) for doc in docs\n                    ]\n                except Exception as e:\n                    raise e\n        return results\n\n    def _do_merge_rerank_results(\n        self, ctx: QueryContext, docs_map: dict[str, list[Doc]]\n    ) -> list[Doc]:\n        query_result_cnt = len(docs_map) if docs_map else 0\n        if query_result_cnt == 0:\n            raise ValueError(\"Query results is none and dost not to rerank\")\n        if query_result_cnt == 1:\n            if not ctx.reranker or isinstance(\n                ctx.reranker, (RrfReRanker, WeightedReRanker)\n            ):\n                return next(iter(docs_map.values()))\n            return ctx.reranker.rerank(docs_map)\n        return ctx.reranker.rerank(docs_map)\n\n    @final\n    def execute(self, ctx: QueryContext, collection: _Collection) -> list[Doc]:\n        # 1. validate query\n        self._do_validate(ctx)\n        # 2. build query vector\n        query_vectors = self._do_build(ctx, collection)\n        if not query_vectors:\n            raise ValueError(\"No query to execute\")\n        # 3. execute query\n        docs = self._do_execute(query_vectors, collection)\n        # 4. merge and rerank result\n        return self._do_merge_rerank_results(ctx, docs)\n\n\nclass NoVectorQueryExecutor(QueryExecutor):\n    def __init__(self, schema: CollectionSchema):\n        super().__init__(schema)\n\n    def _do_validate(self, ctx: QueryContext) -> None:\n        if len(ctx.queries) > 0:\n            raise ValueError(\"Collection does not support query with vector or id\")\n\n    def _do_build(\n        self, ctx: QueryContext, _collection: _Collection\n    ) -> list[_VectorQuery]:\n        return [self._do_build_query_wo_vector(ctx)]\n\n\nclass SingleVectorQueryExecutor(NoVectorQueryExecutor):\n    def __init__(self, schema: CollectionSchema) -> None:\n        super().__init__(schema)\n\n    def _do_validate(self, ctx: QueryContext) -> None:\n        if len(ctx.queries) > 1:\n            raise ValueError(\n                \"Collection has only one vector field, cannot query with multiple vectors\"\n            )\n        for query in ctx.queries:\n            query._validate()\n\n    def _do_build(\n        self, ctx: QueryContext, collection: _Collection\n    ) -> list[_VectorQuery]:\n        if len(ctx.queries) == 0:\n            return [self._do_build_query_wo_vector(ctx)]\n        vectors = []\n        for query in ctx.queries:\n            vectors.append(self._do_build_query_with_vector(ctx, query, collection))\n        return vectors\n\n\nclass MultiVectorQueryExecutor(SingleVectorQueryExecutor):\n    def __init__(self, schema: CollectionSchema) -> None:\n        super().__init__(schema)\n\n    def _do_validate(self, ctx: QueryContext) -> None:\n        if len(ctx.queries) > 1 and ctx.reranker is None:\n            raise ValueError(\"Reranker is required for multi-vector query\")\n        seen_fields = set()\n        for query in ctx.queries:\n            query._validate()\n            field = query.field_name\n            if field in seen_fields:\n                raise ValueError(f\"Query field name '{field}' appears more than once\")\n            seen_fields.add(field)\n\n    def _do_execute(\n        self, vectors: list[_VectorQuery], collection: _Collection\n    ) -> dict[str, list[Doc]]:\n        return super()._do_execute(vectors, collection)\n\n\nclass QueryExecutorFactory:\n    @staticmethod\n    def create(schema: CollectionSchema) -> QueryExecutor:\n        vectors = schema.vectors\n        if len(vectors) == 0:\n            return NoVectorQueryExecutor(schema)\n        if len(vectors) == 1:\n            return SingleVectorQueryExecutor(schema)\n        return MultiVectorQueryExecutor(schema)\n"
  },
  {
    "path": "python/zvec/extension/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom .bm25_embedding_function import BM25EmbeddingFunction\nfrom .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction\nfrom .http_embedding_function import HTTPDenseEmbedding\nfrom .jina_embedding_function import JinaDenseEmbedding\nfrom .jina_function import JinaFunctionBase\nfrom .multi_vector_reranker import RrfReRanker, WeightedReRanker\nfrom .openai_embedding_function import OpenAIDenseEmbedding\nfrom .openai_function import OpenAIFunctionBase\nfrom .qwen_embedding_function import QwenDenseEmbedding, QwenSparseEmbedding\nfrom .qwen_function import QwenFunctionBase\nfrom .qwen_rerank_function import QwenReRanker\nfrom .rerank_function import RerankFunction as ReRanker\nfrom .sentence_transformer_embedding_function import (\n    DefaultLocalDenseEmbedding,\n    DefaultLocalSparseEmbedding,\n)\nfrom .sentence_transformer_function import SentenceTransformerFunctionBase\nfrom .sentence_transformer_rerank_function import DefaultLocalReRanker\n\n__all__ = [\n    \"BM25EmbeddingFunction\",\n    \"DefaultLocalDenseEmbedding\",\n    \"DefaultLocalReRanker\",\n    \"DefaultLocalSparseEmbedding\",\n    \"DenseEmbeddingFunction\",\n    \"HTTPDenseEmbedding\",\n    \"JinaDenseEmbedding\",\n    \"JinaFunctionBase\",\n    \"OpenAIDenseEmbedding\",\n    \"OpenAIFunctionBase\",\n    \"QwenDenseEmbedding\",\n    \"QwenFunctionBase\",\n    \"QwenReRanker\",\n    \"QwenSparseEmbedding\",\n    \"ReRanker\",\n    \"RrfReRanker\",\n    \"SentenceTransformerFunctionBase\",\n    \"SparseEmbeddingFunction\",\n    \"WeightedReRanker\",\n]\n"
  },
  {
    "path": "python/zvec/extension/bm25_embedding_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom functools import lru_cache\nfrom typing import Literal, Optional\n\nfrom ..common.constants import TEXT, SparseVectorType\nfrom ..tool import require_module\nfrom .embedding_function import SparseEmbeddingFunction\n\n\nclass BM25EmbeddingFunction(SparseEmbeddingFunction[TEXT]):\n    \"\"\"BM25-based sparse embedding function using DashText SDK.\n\n    This class provides text-to-sparse-vector embedding capabilities using\n    the DashText library with BM25 algorithm. BM25 (Best Matching 25) is a\n    probabilistic retrieval function used for lexical search and document\n    ranking based on term frequency and inverse document frequency.\n\n    BM25 generates sparse vectors where each dimension corresponds to a term in\n    the vocabulary, and the value represents the BM25 score for that term. It's\n    particularly effective for:\n\n    - Lexical search and keyword matching\n    - Document ranking and information retrieval\n    - Combining with dense embeddings for hybrid search\n    - Traditional IR tasks where exact term matching is important\n\n    This implementation uses DashText's SparseVectorEncoder, which provides\n    efficient BM25 computation for Chinese and English text using either a\n    built-in encoder or custom corpus training.\n\n    Args:\n        corpus (Optional[list[str]], optional): List of documents to train the\n            BM25 encoder. If provided, creates a custom encoder trained on this\n            corpus for better domain-specific accuracy. If ``None``, uses the\n            built-in encoder. Defaults to ``None``.\n        encoding_type (Literal[\"query\", \"document\"], optional): Encoding mode\n            for text processing. Use ``\"query\"`` for search queries (default) and\n            ``\"document\"`` for document indexing. This distinction optimizes the\n            BM25 scoring for asymmetric retrieval tasks. Defaults to ``\"query\"``.\n        language (Literal[\"zh\", \"en\"], optional): Language for built-in encoder.\n            Only used when corpus is None. ``\"zh\"`` for Chinese (trained on Chinese\n            Wikipedia), ``\"en\"`` for English. Defaults to ``\"zh\"``.\n        b (float, optional): Document length normalization parameter for BM25.\n            Range [0, 1]. 0 means no normalization, 1 means full normalization.\n            Only used with custom corpus. Defaults to ``0.75``.\n        k1 (float, optional): Term frequency saturation parameter for BM25.\n            Higher values give more weight to term frequency. Only used with\n            custom corpus. Defaults to ``1.2``.\n        **kwargs: Additional parameters for DashText encoder customization.\n\n    Attributes:\n        corpus_size (int): Number of documents in the training corpus (0 if using built-in encoder).\n        encoding_type (str): The encoding type being used (\"query\" or \"document\").\n        language (str): The language of the built-in encoder (\"zh\" or \"en\").\n\n    Raises:\n        ValueError: If corpus is provided but empty or contains non-string elements.\n        TypeError: If input to ``embed()`` is not a string.\n        RuntimeError: If DashText encoder initialization or training fails.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires the ``dashtext`` package: ``pip install dashtext``\n        - Two encoder options available:\n\n          1. **Built-in encoder** (no corpus needed): Pre-trained models for\n             Chinese (zh) and English (en), good generalization, works out-of-the-box\n          2. **Custom encoder** (corpus required): Better accuracy for domain-specific\n             terminology, requires training on your full corpus with BM25 parameters\n\n        - Encoding types:\n\n          * ``encoding_type=\"query\"``: Optimized for search queries (shorter text)\n          * ``encoding_type=\"document\"``: Optimized for document indexing (longer text)\n\n        - BM25 parameters (b, k1) only apply to custom encoder training\n        - Output is sorted by indices (vocabulary term IDs) for consistency\n        - Results are cached (LRU cache, maxsize=10) to reduce computation\n        - No API key or network connectivity required (local computation)\n\n    Examples:\n        >>> # Option 1: Using built-in encoder for Chinese (no corpus needed)\n        >>> from zvec.extension import BM25EmbeddingFunction\n        >>>\n        >>> # For query encoding (Chinese)\n        >>> bm25_query_zh = BM25EmbeddingFunction(language=\"zh\", encoding_type=\"query\")\n        >>> query_vec = bm25_query_zh.embed(\"什么是机器学习\")\n        >>> isinstance(query_vec, dict)\n        True\n        >>> # query_vec: {1169440797: 0.29, 2045788977: 0.70, ...}\n\n        >>> # For document encoding (Chinese)\n        >>> bm25_doc_zh = BM25EmbeddingFunction(language=\"zh\", encoding_type=\"document\")\n        >>> doc_vec = bm25_doc_zh.embed(\"机器学习是人工智能的一个重要分支...\")\n        >>> isinstance(doc_vec, dict)\n        True\n\n        >>> # Using built-in encoder for English\n        >>> bm25_query_en = BM25EmbeddingFunction(language=\"en\", encoding_type=\"query\")\n        >>> query_vec_en = bm25_query_en.embed(\"what is vector search service\")\n        >>> isinstance(query_vec_en, dict)\n        True\n\n        >>> # Option 2: Using custom corpus for domain-specific accuracy\n        >>> corpus = [\n        ...     \"机器学习是人工智能的一个重要分支\",\n        ...     \"深度学习使用多层神经网络进行特征提取\",\n        ...     \"自然语言处理技术用于理解和生成人类语言\"\n        ... ]\n        >>> bm25_custom = BM25EmbeddingFunction(\n        ...     corpus=corpus,\n        ...     encoding_type=\"query\",\n        ...     b=0.75,\n        ...     k1=1.2\n        ... )\n        >>> custom_vec = bm25_custom.embed(\"机器学习算法\")\n        >>> isinstance(custom_vec, dict)\n        True\n\n        >>> # Hybrid search: combining with dense embeddings\n        >>> from zvec.extension import DefaultLocalDenseEmbedding\n        >>> dense_emb = DefaultLocalDenseEmbedding()\n        >>> bm25_emb = BM25EmbeddingFunction(language=\"zh\", encoding_type=\"query\")\n        >>>\n        >>> query = \"machine learning algorithms\"\n        >>> dense_vec = dense_emb.embed(query)  # Semantic similarity\n        >>> sparse_vec = bm25_emb.embed(query)  # Lexical matching\n        >>> # Combine scores for hybrid retrieval\n\n        >>> # Callable interface\n        >>> sparse_vec = bm25_query_zh(\"information retrieval\")\n        >>> isinstance(sparse_vec, dict)\n        True\n\n        >>> # Error handling\n        >>> try:\n        ...     bm25_query_zh.embed(\"\")  # Empty query\n        ... except ValueError as e:\n        ...     print(f\"Error: {e}\")\n        Error: Input text cannot be empty or whitespace only\n\n    See Also:\n        - ``SparseEmbeddingFunction``: Base class for sparse embeddings\n        - ``DefaultLocalSparseEmbedding``: SPLADE-based sparse embedding\n        - ``QwenSparseEmbedding``: API-based sparse embedding using Qwen\n        - ``DefaultLocalDenseEmbedding``: Dense embedding for semantic search\n\n    References:\n        - DashText Documentation: https://help.aliyun.com/zh/document_detail/2546039.html\n        - DashText PyPI: https://pypi.org/project/dashtext/\n        - BM25 Algorithm: Robertson & Zaragoza (2009)\n    \"\"\"\n\n    def __init__(\n        self,\n        corpus: Optional[list[str]] = None,\n        encoding_type: Literal[\"query\", \"document\"] = \"query\",\n        language: Literal[\"zh\", \"en\"] = \"zh\",\n        b: float = 0.75,\n        k1: float = 1.2,\n        **kwargs,\n    ):\n        \"\"\"Initialize the BM25 embedding function.\n\n        Args:\n            corpus (Optional[list[str]]): Optional corpus for training custom encoder.\n                If None, uses built-in encoder. Defaults to None.\n            encoding_type (Literal[\"query\", \"document\"]): Text encoding mode.\n                Use \"query\" for search queries, \"document\" for indexing.\n                Defaults to \"query\".\n            language (Literal[\"zh\", \"en\"]): Language for built-in encoder.\n                \"zh\" for Chinese, \"en\" for English. Defaults to \"zh\".\n            b (float): Document length normalization for BM25 [0, 1].\n                Only used with custom corpus. Defaults to 0.75.\n            k1 (float): Term frequency saturation for BM25.\n                Only used with custom corpus. Defaults to 1.2.\n            **kwargs: Additional DashText encoder parameters.\n\n        Raises:\n            ValueError: If corpus is provided but empty or invalid.\n            ImportError: If dashtext package is not installed.\n            RuntimeError: If encoder initialization or training fails.\n        \"\"\"\n        # Validate corpus if provided\n        if corpus is not None:\n            if not corpus or not isinstance(corpus, list):\n                raise ValueError(\"Corpus must be a non-empty list of strings\")\n\n            if not all(isinstance(doc, str) for doc in corpus):\n                raise ValueError(\"All corpus documents must be strings\")\n\n        # Import dashtext\n        self._dashtext = require_module(\"dashtext\")\n\n        self._corpus = corpus\n        self._encoding_type = encoding_type\n        self._language = language\n        self._b = b\n        self._k1 = k1\n        self._extra_params = kwargs\n\n        # Initialize the BM25 encoder\n        self._build_encoder()\n\n    def _build_encoder(self):\n        \"\"\"Build the BM25 sparse vector encoder.\n\n        Creates either a built-in encoder (pre-trained) or a custom encoder\n        trained on the provided corpus.\n\n        Raises:\n            RuntimeError: If encoder initialization or training fails.\n            ImportError: If dashtext package is not installed.\n        \"\"\"\n        try:\n            if self._corpus is None:\n                # Use built-in encoder (pre-trained on Wikipedia)\n                # language: 'zh' for Chinese, 'en' for English\n                self._encoder = self._dashtext.SparseVectorEncoder.default(\n                    name=self._language\n                )\n            else:\n                # Create custom encoder with BM25 parameters\n                self._encoder = self._dashtext.SparseVectorEncoder(\n                    b=self._b, k1=self._k1, **self._extra_params\n                )\n\n                # Train encoder with the corpus\n                self._encoder.train(self._corpus)\n\n        except ImportError as e:\n            raise ImportError(\n                \"dashtext package is required for BM25EmbeddingFunction. \"\n                \"Install it with: pip install dashtext\"\n            ) from e\n        except Exception as e:\n            if isinstance(e, (ValueError, RuntimeError)):\n                raise\n            raise RuntimeError(f\"Failed to build BM25 encoder: {e!s}\") from e\n\n    @property\n    def corpus_size(self) -> int:\n        \"\"\"int: Number of documents in the training corpus (0 if using built-in encoder).\"\"\"\n        return len(self._corpus) if self._corpus is not None else 0\n\n    @property\n    def encoding_type(self) -> str:\n        \"\"\"str: The encoding type being used (\"query\" or \"document\").\"\"\"\n        return self._encoding_type\n\n    @property\n    def language(self) -> str:\n        \"\"\"str: The language of the built-in encoder (\"zh\" or \"en\").\"\"\"\n        return self._language\n\n    @property\n    def extra_params(self) -> dict:\n        \"\"\"dict: Extra parameters for DashText encoder customization.\"\"\"\n        return self._extra_params\n\n    def __call__(self, input: TEXT) -> SparseVectorType:\n        \"\"\"Make the embedding function callable.\n\n        Args:\n            input (TEXT): Input text to embed.\n\n        Returns:\n            SparseVectorType: Sparse vector as dictionary.\n        \"\"\"\n        return self.embed(input)\n\n    @lru_cache(maxsize=10)\n    def embed(self, input: TEXT) -> SparseVectorType:\n        \"\"\"Generate BM25 sparse embedding for the input text.\n\n        This method computes BM25 scores for the input text using DashText's\n        SparseVectorEncoder. The encoding behavior depends on the encoding_type:\n\n        - ``encoding_type=\"query\"``: Uses ``encode_queries()`` for search queries\n        - ``encoding_type=\"document\"``: Uses ``encode_documents()`` for documents\n\n        The result is a sparse vector where keys are term indices in the\n        vocabulary and values are BM25 scores.\n\n        Args:\n            input (TEXT): Input text string to embed. Must be non-empty after\n                stripping whitespace.\n\n        Returns:\n            SparseVectorType: A dictionary mapping vocabulary term index to BM25 score.\n                Only non-zero scores are included. The dictionary is sorted by indices\n                (keys) in ascending order for consistent output.\n                Example: ``{1169440797: 0.29, 2045788977: 0.70, ...}``\n\n        Raises:\n            TypeError: If ``input`` is not a string.\n            ValueError: If input is empty or whitespace-only.\n            RuntimeError: If BM25 encoding fails.\n\n        Examples:\n            >>> bm25 = BM25EmbeddingFunction(language=\"zh\", encoding_type=\"query\")\n            >>> sparse_vec = bm25.embed(\"query text\")\n            >>> isinstance(sparse_vec, dict)\n            True\n            >>> all(isinstance(k, int) and isinstance(v, float) for k, v in sparse_vec.items())\n            True\n\n            >>> # Verify sorted output\n            >>> keys = list(sparse_vec.keys())\n            >>> keys == sorted(keys)\n            True\n\n            >>> # Error: empty input\n            >>> bm25.embed(\"   \")\n            ValueError: Input text cannot be empty or whitespace only\n\n            >>> # Error: non-string input\n            >>> bm25.embed(123)\n            TypeError: Expected 'input' to be str, got int\n\n        Note:\n            - BM25 scores are relative to the vocabulary statistics\n            - Output dictionary is always sorted by indices for consistency\n            - Terms not in the vocabulary will have zero scores (not included)\n            - This method is cached (maxsize=10) for performance\n            - DashText automatically handles Chinese/English text segmentation\n        \"\"\"\n        if not isinstance(input, str):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        try:\n            # Encode based on encoding_type\n            if self._encoding_type == \"query\":\n                sparse_vector = self._encoder.encode_queries(input)\n            else:  # encoding_type == \"document\"\n                sparse_vector = self._encoder.encode_documents(input)\n\n            # DashText returns dict with int/long keys and float values\n            # Convert to standard format: {int: float}\n            sparse_dict: dict[int, float] = {}\n            for key, value in sparse_vector.items():\n                try:\n                    idx = int(key)\n                    val = float(value)\n                    if val > 0:\n                        sparse_dict[idx] = val\n                except (ValueError, TypeError):\n                    # Skip invalid entries\n                    continue\n\n            # Sort by indices (keys) to ensure consistent ordering\n            return dict(sorted(sparse_dict.items()))\n\n        except Exception as e:\n            if isinstance(e, (TypeError, ValueError)):\n                raise\n            raise RuntimeError(f\"Failed to generate BM25 embedding: {e!s}\") from e\n"
  },
  {
    "path": "python/zvec/extension/embedding_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom abc import abstractmethod\nfrom typing import Protocol, runtime_checkable\n\nfrom ..common.constants import MD, DenseVectorType, SparseVectorType\n\n\n@runtime_checkable\nclass DenseEmbeddingFunction(Protocol[MD]):\n    \"\"\"Protocol for dense vector embedding functions.\n\n    Dense embedding functions map multimodal input (text, image, or audio) to\n    fixed-length real-valued vectors. This is a Protocol class that defines\n    the interface - implementations should provide their own initialization\n    and properties.\n\n    Type Parameters:\n        MD: The type of input data (bound to Embeddable: TEXT, IMAGE, or AUDIO).\n\n    Note:\n        - This is a Protocol class - it only defines the ``embed()`` interface.\n        - Implementations are free to define their own ``__init__``, properties,\n          and additional methods as needed.\n        - The ``embed()`` method is the only required interface.\n\n    Examples:\n        >>> # Custom text embedding implementation\n        >>> class MyTextEmbedding:\n        ...     def __init__(self, dimension: int, model_name: str):\n        ...         self.dimension = dimension\n        ...         self.model = load_model(model_name)\n        ...\n        ...     def embed(self, input: str) -> list[float]:\n        ...         return self.model.encode(input).tolist()\n\n        >>> # Custom image embedding implementation\n        >>> class MyImageEmbedding:\n        ...     def __init__(self, dimension: int = 512):\n        ...         self.dimension = dimension\n        ...         self.model = load_image_model()\n        ...\n        ...     def embed(self, input: Union[str, bytes, np.ndarray]) -> list[float]:\n        ...         if isinstance(input, str):\n        ...             image = load_image_from_path(input)\n        ...         else:\n        ...             image = input\n        ...         return self.model.extract_features(image).tolist()\n\n        >>> # Using built-in implementations\n        >>> from zvec.extension import QwenDenseEmbedding\n        >>> text_emb = QwenDenseEmbedding(dimension=768, api_key=\"sk-xxx\")\n        >>> vector = text_emb.embed(\"Hello world\")\n    \"\"\"\n\n    @abstractmethod\n    def embed(self, input: MD) -> DenseVectorType:\n        \"\"\"Generate a dense embedding vector for the input data.\n\n        Args:\n            input (MD): Multimodal input data to embed. Can be:\n                - TEXT (str): Text string\n                - IMAGE (str | bytes | np.ndarray): Image file path, raw bytes, or array\n                - AUDIO (str | bytes | np.ndarray): Audio file path, raw bytes, or array\n\n        Returns:\n            DenseVectorType: A dense vector representing the embedding.\n                Can be list[float], list[int], or np.ndarray.\n                Length should match the implementation's dimension.\n        \"\"\"\n        ...\n\n\n@runtime_checkable\nclass SparseEmbeddingFunction(Protocol[MD]):\n    \"\"\"Abstract base class for sparse vector embedding functions.\n\n    Sparse embedding functions map multimodal input (text, image, or audio) to\n    a dictionary of {index: weight}, where only non-zero dimensions are stored.\n    You can inherit this class to create custom sparse embedding functions.\n\n    Type Parameters:\n        MD: The type of input data (bound to Embeddable: TEXT, IMAGE, or AUDIO).\n\n    Note:\n        Subclasses must implement the ``embed()`` method.\n\n    Examples:\n        >>> # Using built-in text sparse embedding (e.g., BM25, TF-IDF)\n        >>> sparse_emb = SomeSparseEmbedding()\n        >>> vector = sparse_emb.embed(\"Hello world\")\n        >>> # Returns: {0: 0.5, 42: 1.2, 100: 0.8}\n\n        >>> # Custom BM25 sparse embedding function\n        >>> class MyBM25Embedding(SparseEmbeddingFunction):\n        ...     def __init__(self, vocab_size: int = 10000):\n        ...         self.vocab_size = vocab_size\n        ...         self.tokenizer = MyTokenizer()\n        ...\n        ...     def embed(self, input: str) -> dict[int, float]:\n        ...         tokens = self.tokenizer.tokenize(input)\n        ...         sparse_vector = {}\n        ...         for token_id, weight in self._calculate_bm25(tokens):\n        ...             if weight > 0:\n        ...                 sparse_vector[token_id] = weight\n        ...         return sparse_vector\n        ...\n        ...     def _calculate_bm25(self, tokens):\n        ...         # BM25 calculation logic\n        ...         pass\n\n        >>> # Custom sparse image feature extractor\n        >>> class MySparseImageEmbedding(SparseEmbeddingFunction):\n        ...     def embed(self, input: Union[str, bytes, np.ndarray]) -> dict[int, float]:\n        ...         image = self._load_image(input)\n        ...         features = self._extract_sparse_features(image)\n        ...         return {idx: val for idx, val in enumerate(features) if val != 0}\n    \"\"\"\n\n    @abstractmethod\n    def embed(self, input: MD) -> SparseVectorType:\n        \"\"\"Generate a sparse embedding for the input data.\n\n        Args:\n            input (MD): Multimodal input data to embed. Can be:\n                - TEXT (str): Text string\n                - IMAGE (str | bytes | np.ndarray): Image file path, raw bytes, or array\n                - AUDIO (str | bytes | np.ndarray): Audio file path, raw bytes, or array\n\n        Returns:\n            SparseVectorType: Mapping from dimension index to non-zero weight.\n                Only dimensions with non-zero values are included.\n        \"\"\"\n        ...\n"
  },
  {
    "path": "python/zvec/extension/http_embedding_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport json\nimport os\nimport urllib.request\nfrom functools import lru_cache\nfrom typing import Optional\n\nfrom ..common.constants import TEXT, DenseVectorType\nfrom .embedding_function import DenseEmbeddingFunction\n\n\nclass HTTPDenseEmbedding(DenseEmbeddingFunction[TEXT]):\n    \"\"\"Dense text embedding function using any OpenAI-compatible HTTP endpoint.\n\n    This class calls any server that implements the ``/v1/embeddings`` API\n    (LM Studio, Ollama, vLLM, LocalAI, etc.) using only the Python standard\n    library — no extra dependencies are required.\n\n    The embedding dimension is detected automatically from the first server\n    response.\n\n    Args:\n        base_url (str, optional): Base URL of the embedding server.\n            Defaults to ``\"http://localhost:1234\"`` (LM Studio).\n            Common values:\n\n            - ``\"http://localhost:1234\"``  — LM Studio\n            - ``\"http://localhost:11434\"`` — Ollama\n        model (str, optional): Model identifier as expected by the server.\n            Defaults to ``\"text-embedding-nomic-embed-text-v1.5@f16\"``.\n        api_key (Optional[str], optional): Bearer token for authenticated\n            endpoints.  Falls back to the ``OPENAI_API_KEY`` environment\n            variable.  Leave as ``None`` for local servers that do not\n            require authentication.\n        timeout (int, optional): HTTP request timeout in seconds.\n            Defaults to 30.\n\n    Attributes:\n        dimension (int): Embedding vector dimensionality (auto-detected).\n\n    Raises:\n        TypeError: If ``embed()`` receives a non-string input.\n        ValueError: If input is empty/whitespace-only or the server returns\n            an unexpected response format.\n        RuntimeError: If the HTTP request fails or the server is unreachable.\n\n    Examples:\n        >>> from zvec.extension import HTTPDenseEmbedding\n        >>>\n        >>> # LM Studio (default)\n        >>> emb = HTTPDenseEmbedding()\n        >>> vector = emb.embed(\"Hello, world!\")\n        >>> len(vector)\n        768\n        >>>\n        >>> # Ollama\n        >>> emb = HTTPDenseEmbedding(\n        ...     base_url=\"http://localhost:11434\",\n        ...     model=\"nomic-embed-text\",\n        ... )\n        >>> vector = emb.embed(\"Semantic search with local models\")\n\n    See Also:\n        - ``DenseEmbeddingFunction``: Protocol for dense embeddings.\n        - ``OpenAIDenseEmbedding``: Cloud embedding via the OpenAI API.\n    \"\"\"\n\n    ENDPOINT = \"/v1/embeddings\"\n\n    def __init__(\n        self,\n        base_url: str = \"http://localhost:1234\",\n        model: str = \"text-embedding-nomic-embed-text-v1.5@f16\",\n        api_key: Optional[str] = None,\n        timeout: int = 30,\n    ) -> None:\n        self._base_url = base_url.rstrip(\"/\")\n        self._model = model\n        self._api_key = api_key or os.environ.get(\"OPENAI_API_KEY\", \"\")\n        self._timeout = timeout\n        self._dimension: Optional[int] = None\n\n    @property\n    def dimension(self) -> int:\n        \"\"\"int: Embedding vector dimensionality (auto-detected on first call).\"\"\"\n        if self._dimension is None:\n            self._dimension = len(self.embed(\"dimension probe\"))\n        return self._dimension\n\n    def __call__(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Make the embedding function callable.\"\"\"\n        return self.embed(input)\n\n    @lru_cache(maxsize=256)\n    def embed(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Generate a dense embedding vector for the input text.\n\n        Results are cached (LRU, up to 256 entries) so repeated strings\n        do not trigger extra HTTP requests.\n\n        Args:\n            input (TEXT): Input text string to embed.  Must be non-empty\n                after stripping whitespace.\n\n        Returns:\n            DenseVectorType: A list of floats representing the embedding.\n\n        Raises:\n            TypeError: If *input* is not a string.\n            ValueError: If *input* is empty/whitespace-only or the server\n                returns an unexpected response format.\n            RuntimeError: If the HTTP request fails.\n        \"\"\"\n        if not isinstance(input, TEXT):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        url = self._base_url + self.ENDPOINT\n        payload = json.dumps({\"model\": self._model, \"input\": input}).encode()\n\n        headers: dict[str, str] = {\"Content-Type\": \"application/json\"}\n        if self._api_key:\n            headers[\"Authorization\"] = f\"Bearer {self._api_key}\"\n\n        req = urllib.request.Request(url, data=payload, headers=headers, method=\"POST\")\n        try:\n            with urllib.request.urlopen(req, timeout=self._timeout) as resp:\n                body = json.loads(resp.read())\n        except urllib.error.HTTPError as exc:\n            raise RuntimeError(\n                f\"Embedding server returned HTTP {exc.code}: {exc.read().decode()}\"\n            ) from exc\n        except OSError as exc:\n            raise RuntimeError(\n                f\"Could not reach embedding server at {url}: {exc}\"\n            ) from exc\n\n        try:\n            vector: list[float] = body[\"data\"][0][\"embedding\"]\n        except (KeyError, IndexError) as exc:\n            raise ValueError(\n                f\"Unexpected response format from embedding server: {body}\"\n            ) from exc\n\n        return vector\n"
  },
  {
    "path": "python/zvec/extension/jina_embedding_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom functools import lru_cache\nfrom typing import Optional\n\nfrom ..common.constants import TEXT, DenseVectorType\nfrom .embedding_function import DenseEmbeddingFunction\nfrom .jina_function import JinaFunctionBase\n\n\nclass JinaDenseEmbedding(JinaFunctionBase, DenseEmbeddingFunction[TEXT]):\n    \"\"\"Dense text embedding function using Jina AI API.\n\n    This class provides text-to-vector embedding capabilities using Jina AI's\n    embedding models. It inherits from ``DenseEmbeddingFunction`` and implements\n    dense text embedding via the Jina Embeddings API (OpenAI-compatible).\n\n    Jina Embeddings v5 models support task-specific embedding through the\n    ``task`` parameter, which optimizes the embedding for different use cases\n    such as retrieval, text matching, or classification. They also support\n    Matryoshka Representation Learning, allowing flexible output dimensions.\n\n    Args:\n        model (str, optional): Jina embedding model identifier.\n            Defaults to ``\"jina-embeddings-v5-text-nano\"``. Available models:\n            - ``\"jina-embeddings-v5-text-nano\"``: 768 dims, 239M params, 8K context\n            - ``\"jina-embeddings-v5-text-small\"``: 1024 dims, 677M params, 32K context\n        dimension (Optional[int], optional): Desired output embedding dimension.\n            If ``None``, uses model's default dimension. Supports Matryoshka\n            dimensions: 32, 64, 128, 256, 512, 768 (nano) / 1024 (small).\n            Defaults to ``None``.\n        api_key (Optional[str], optional): Jina API authentication key.\n            If ``None``, reads from ``JINA_API_KEY`` environment variable.\n            Obtain your key from: https://jina.ai/api-dashboard\n        task (Optional[str], optional): Task type to optimize embeddings for.\n            Defaults to ``None``. Valid values:\n            - ``\"retrieval.query\"``: For search queries\n            - ``\"retrieval.passage\"``: For documents/passages to be searched\n            - ``\"text-matching\"``: For symmetric text similarity\n            - ``\"classification\"``: For text classification\n            - ``\"separation\"``: For clustering/separation tasks\n\n    Attributes:\n        dimension (int): The embedding vector dimension.\n        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.\n        model (str): The Jina model name being used.\n        task (Optional[str]): The task type for embedding optimization.\n\n    Raises:\n        ValueError: If API key is not provided and not found in environment,\n            if task is not a valid task type, or if API returns an error response.\n        TypeError: If input to ``embed()`` is not a string.\n        RuntimeError: If network error or Jina service error occurs.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires the ``openai`` package: ``pip install openai``\n        - Jina API is OpenAI-compatible, so it uses the ``openai`` Python client\n        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls\n        - For retrieval tasks, use ``\"retrieval.query\"`` for queries and\n          ``\"retrieval.passage\"`` for documents\n        - API usage requires a Jina API key from https://jina.ai/api-dashboard\n\n    Examples:\n        >>> # Basic usage with default model\n        >>> from zvec.extension import JinaDenseEmbedding\n        >>> import os\n        >>> os.environ[\"JINA_API_KEY\"] = \"jina_...\"\n        >>>\n        >>> emb_func = JinaDenseEmbedding()\n        >>> vector = emb_func.embed(\"Hello, world!\")\n        >>> len(vector)\n        768\n\n        >>> # Retrieval use case: embed queries and documents differently\n        >>> query_emb = JinaDenseEmbedding(task=\"retrieval.query\")\n        >>> doc_emb = JinaDenseEmbedding(task=\"retrieval.passage\")\n        >>>\n        >>> query_vector = query_emb.embed(\"What is machine learning?\")\n        >>> doc_vector = doc_emb.embed(\"Machine learning is a subset of AI...\")\n\n        >>> # Using larger model with custom dimension (Matryoshka)\n        >>> emb_func = JinaDenseEmbedding(\n        ...     model=\"jina-embeddings-v5-text-small\",\n        ...     dimension=256,\n        ...     api_key=\"jina_...\",\n        ...     task=\"text-matching\",\n        ... )\n        >>> vector = emb_func.embed(\"Semantic similarity comparison\")\n        >>> len(vector)\n        256\n\n        >>> # Using with zvec collection\n        >>> import zvec\n        >>> emb_func = JinaDenseEmbedding(task=\"retrieval.passage\")\n        >>> schema = zvec.CollectionSchema(\n        ...     name=\"docs\",\n        ...     vectors=zvec.VectorSchema(\n        ...         \"embedding\", zvec.DataType.VECTOR_FP32, emb_func.dimension\n        ...     ),\n        ... )\n        >>> collection = zvec.create_and_open(path=\"./my_docs\", schema=schema)\n\n    See Also:\n        - ``DenseEmbeddingFunction``: Base class for dense embeddings\n        - ``OpenAIDenseEmbedding``: Alternative using OpenAI API\n        - ``QwenDenseEmbedding``: Alternative using Qwen/DashScope API\n        - ``DefaultLocalDenseEmbedding``: Local model without API calls\n    \"\"\"\n\n    def __init__(\n        self,\n        model: str = \"jina-embeddings-v5-text-nano\",\n        dimension: Optional[int] = None,\n        api_key: Optional[str] = None,\n        task: Optional[str] = None,\n        **kwargs,\n    ):\n        \"\"\"Initialize the Jina dense embedding function.\n\n        Args:\n            model (str): Jina model name. Defaults to \"jina-embeddings-v5-text-nano\".\n            dimension (Optional[int]): Target embedding dimension or None for default.\n            api_key (Optional[str]): API key or None to use environment variable.\n            task (Optional[str]): Task type for embedding optimization or None.\n            **kwargs: Additional parameters for API calls.\n\n        Raises:\n            ValueError: If API key is not provided and not in environment,\n                or if task is not a valid task type.\n        \"\"\"\n        # Initialize base class for API connection\n        JinaFunctionBase.__init__(self, model=model, api_key=api_key, task=task)\n\n        # Store dimension configuration\n        self._custom_dimension = dimension\n\n        # Determine actual dimension\n        if dimension is None:\n            self._dimension = self._MODEL_DIMENSIONS.get(model, 768)\n        else:\n            self._dimension = dimension\n\n        # Store extra attributes\n        self._extra_params = kwargs\n\n    @property\n    def dimension(self) -> int:\n        \"\"\"int: The expected dimensionality of the embedding vector.\"\"\"\n        return self._dimension\n\n    @property\n    def extra_params(self) -> dict:\n        \"\"\"dict: Extra parameters for model-specific customization.\"\"\"\n        return self._extra_params\n\n    def __call__(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Make the embedding function callable.\"\"\"\n        return self.embed(input)\n\n    @lru_cache(maxsize=10)\n    def embed(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Generate dense embedding vector for the input text.\n\n        This method calls the Jina Embeddings API to convert input text\n        into a dense vector representation. Results are cached to improve\n        performance for repeated inputs.\n\n        Args:\n            input (TEXT): Input text string to embed. Must be non-empty after\n                stripping whitespace. Maximum length depends on model:\n                8192 tokens for v5-nano, 32768 tokens for v5-small.\n\n        Returns:\n            DenseVectorType: A list of floats representing the embedding vector.\n                Length equals ``self.dimension``. Example:\n                ``[0.123, -0.456, 0.789, ...]``\n\n        Raises:\n            TypeError: If ``input`` is not a string.\n            ValueError: If input is empty/whitespace-only, or if the API returns\n                an error or malformed response.\n            RuntimeError: If network connectivity issues or Jina service\n                errors occur.\n\n        Examples:\n            >>> emb = JinaDenseEmbedding(task=\"retrieval.query\")\n            >>> vector = emb.embed(\"What is deep learning?\")\n            >>> len(vector)\n            768\n            >>> isinstance(vector[0], float)\n            True\n\n            >>> # Error: empty input\n            >>> emb.embed(\"   \")\n            ValueError: Input text cannot be empty or whitespace only\n\n            >>> # Error: non-string input\n            >>> emb.embed(123)\n            TypeError: Expected 'input' to be str, got int\n\n        Note:\n            - This method is cached (maxsize=10). Identical inputs return cached results.\n            - The cache is based on exact string match (case-sensitive).\n            - Task type affects embedding optimization but not caching behavior.\n        \"\"\"\n        if not isinstance(input, TEXT):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        # Call API\n        embedding_vector = self._call_text_embedding_api(\n            input=input,\n            dimension=self._custom_dimension,\n        )\n\n        # Verify dimension\n        if len(embedding_vector) != self.dimension:\n            raise ValueError(\n                f\"Dimension mismatch: expected {self.dimension}, \"\n                f\"got {len(embedding_vector)}\"\n            )\n\n        return embedding_vector\n"
  },
  {
    "path": "python/zvec/extension/jina_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport os\nfrom typing import ClassVar, Optional\n\nfrom ..common.constants import TEXT\nfrom ..tool import require_module\n\n\nclass JinaFunctionBase:\n    \"\"\"Base class for Jina AI functions.\n\n    This base class provides common functionality for calling Jina AI APIs\n    and handling responses. It supports embeddings (dense) operations via\n    the OpenAI-compatible Jina Embeddings API.\n\n    This class is not meant to be used directly. Use concrete implementations:\n    - ``JinaDenseEmbedding`` for dense embeddings\n\n    Args:\n        model (str): Jina embedding model identifier.\n        api_key (Optional[str]): Jina API authentication key.\n        task (Optional[str]): Task type for the embedding model.\n\n    Note:\n        - This is an internal base class for code reuse across Jina features\n        - Subclasses should inherit from appropriate Protocol\n        - Provides unified API connection and response handling\n        - Jina API is OpenAI-compatible, using the ``openai`` Python client\n    \"\"\"\n\n    _BASE_URL: ClassVar[str] = \"https://api.jina.ai/v1\"\n\n    # Model default dimensions\n    _MODEL_DIMENSIONS: ClassVar[dict[str, int]] = {\n        \"jina-embeddings-v5-text-nano\": 768,\n        \"jina-embeddings-v5-text-small\": 1024,\n    }\n\n    # Model max tokens\n    _MODEL_MAX_TOKENS: ClassVar[dict[str, int]] = {\n        \"jina-embeddings-v5-text-nano\": 8192,\n        \"jina-embeddings-v5-text-small\": 32768,\n    }\n\n    # Valid task types\n    _VALID_TASKS: ClassVar[tuple[str, ...]] = (\n        \"retrieval.query\",\n        \"retrieval.passage\",\n        \"text-matching\",\n        \"classification\",\n        \"separation\",\n    )\n\n    def __init__(\n        self,\n        model: str,\n        api_key: Optional[str] = None,\n        task: Optional[str] = None,\n    ):\n        \"\"\"Initialize the base Jina functionality.\n\n        Args:\n            model (str): Jina model name.\n            api_key (Optional[str]): API key or None to use environment variable.\n            task (Optional[str]): Task type for the embedding model.\n                Valid values: \"retrieval.query\", \"retrieval.passage\",\n                \"text-matching\", \"classification\", \"separation\".\n\n        Raises:\n            ValueError: If API key is not provided and not in environment,\n                or if task is not a valid task type.\n        \"\"\"\n        self._model = model\n        self._api_key = api_key or os.environ.get(\"JINA_API_KEY\")\n        self._task = task\n\n        if not self._api_key:\n            raise ValueError(\n                \"Jina API key is required. Please provide 'api_key' parameter \"\n                \"or set the 'JINA_API_KEY' environment variable. \"\n                \"Get your key from: https://jina.ai/api-dashboard\"\n            )\n\n        if task is not None and task not in self._VALID_TASKS:\n            raise ValueError(\n                f\"Invalid task '{task}'. Valid tasks: {', '.join(self._VALID_TASKS)}\"\n            )\n\n    @property\n    def model(self) -> str:\n        \"\"\"str: The Jina model name currently in use.\"\"\"\n        return self._model\n\n    @property\n    def task(self) -> Optional[str]:\n        \"\"\"Optional[str]: The task type for the embedding model.\"\"\"\n        return self._task\n\n    def _get_client(self):\n        \"\"\"Get OpenAI-compatible client instance configured for Jina API.\n\n        Returns:\n            OpenAI: Configured OpenAI client pointing to Jina API.\n\n        Raises:\n            ImportError: If openai package is not installed.\n        \"\"\"\n        openai = require_module(\"openai\")\n        return openai.OpenAI(api_key=self._api_key, base_url=self._BASE_URL)\n\n    def _call_text_embedding_api(\n        self,\n        input: TEXT,\n        dimension: Optional[int] = None,\n    ) -> list:\n        \"\"\"Call Jina Embeddings API.\n\n        Args:\n            input (TEXT): Input text to embed.\n            dimension (Optional[int]): Target dimension for Matryoshka embeddings.\n\n        Returns:\n            list: Embedding vector as list of floats.\n\n        Raises:\n            RuntimeError: If API call fails.\n            ValueError: If API returns error response.\n        \"\"\"\n        try:\n            client = self._get_client()\n\n            # Prepare embedding parameters\n            params = {\"model\": self.model, \"input\": input}\n\n            # Add dimension parameter for Matryoshka support\n            if dimension is not None:\n                params[\"dimensions\"] = dimension\n\n            # Add task parameter via extra_body\n            if self._task is not None:\n                params[\"extra_body\"] = {\"task\": self._task}\n\n            # Call Jina API (OpenAI-compatible)\n            response = client.embeddings.create(**params)\n\n        except Exception as e:\n            # Check if it's an OpenAI API error\n            openai = require_module(\"openai\")\n            if isinstance(e, (openai.APIError, openai.APIConnectionError)):\n                raise RuntimeError(f\"Failed to call Jina API: {e!s}\") from e\n            raise RuntimeError(f\"Unexpected error during API call: {e!s}\") from e\n\n        # Extract embedding from response\n        try:\n            if not response.data:\n                raise ValueError(\"Invalid API response: no embedding data returned\")\n\n            embedding_vector = response.data[0].embedding\n\n            if not isinstance(embedding_vector, list):\n                raise ValueError(\n                    \"Invalid API response: embedding is not a list of numbers\"\n                )\n\n            return embedding_vector\n\n        except (AttributeError, IndexError, TypeError) as e:\n            raise ValueError(f\"Failed to parse API response: {e!s}\") from e\n"
  },
  {
    "path": "python/zvec/extension/multi_vector_reranker.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport heapq\nimport math\nfrom collections import defaultdict\nfrom typing import Optional\n\nfrom ..model.doc import Doc\nfrom ..typing import MetricType\nfrom .rerank_function import RerankFunction\n\n\nclass RrfReRanker(RerankFunction):\n    \"\"\"Re-ranker using Reciprocal Rank Fusion (RRF) for multi-vector search.\n\n    RRF combines results from multiple vector queries without requiring relevance scores.\n    It assigns higher weight to documents that appear early in multiple result lists.\n\n    The RRF score for a document at rank ``r`` is: ``1 / (k + r + 1)``,\n    where ``k`` is the rank constant.\n\n    Note:\n        This re-ranker is specifically designed for multi-vector scenarios where\n        query results from multiple vector fields need to be combined.\n\n    Args:\n        topn (int, optional): Number of top documents to return. Defaults to 10.\n        rerank_field (Optional[str], optional): Ignored by RRF. Defaults to None.\n        rank_constant (int, optional): Smoothing constant ``k`` in RRF formula.\n            Larger values reduce the impact of early ranks. Defaults to 60.\n    \"\"\"\n\n    def __init__(\n        self,\n        topn: int = 10,\n        rerank_field: Optional[str] = None,\n        rank_constant: int = 60,\n    ):\n        super().__init__(topn=topn, rerank_field=rerank_field)\n        self._rank_constant = rank_constant\n\n    @property\n    def rank_constant(self) -> int:\n        return self._rank_constant\n\n    def _rrf_score(self, rank: int) -> float:\n        return 1.0 / (self._rank_constant + rank + 1)\n\n    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:\n        \"\"\"Apply Reciprocal Rank Fusion to combine multiple query results.\n\n        Args:\n            query_results (dict[str, list[Doc]]): Results from one or more vector queries.\n\n        Returns:\n            list[Doc]: Re-ranked documents with RRF scores in the ``score`` field.\n        \"\"\"\n        rrf_scores: dict[str, float] = defaultdict(float)\n        id_to_doc: dict[str, Doc] = {}\n\n        for _, query_result in query_results.items():\n            for rank, doc in enumerate(query_result):\n                doc_id = doc.id\n                rrf_score = self._rrf_score(rank)\n                rrf_scores[doc_id] += rrf_score\n                if doc_id not in id_to_doc:\n                    id_to_doc[doc_id] = doc\n\n        top_docs = heapq.nlargest(self.topn, rrf_scores.items(), key=lambda x: x[1])\n        results: list[Doc] = []\n        for doc_id, rrf_score in top_docs:\n            doc = id_to_doc[doc_id]\n            new_doc = doc._replace(score=rrf_score)\n            results.append(new_doc)\n        return results\n\n\nclass WeightedReRanker(RerankFunction):\n    \"\"\"Re-ranker that combines scores from multiple vector fields using weights.\n\n    Each vector field's relevance score is normalized based on its metric type,\n    then scaled by a user-provided weight. Final scores are summed across fields.\n\n    Note:\n        This re-ranker is specifically designed for multi-vector scenarios where\n        query results from multiple vector fields need to be combined with\n        configurable weights.\n\n    Args:\n        topn (int, optional): Number of top documents to return. Defaults to 10.\n        rerank_field (Optional[str], optional): Ignored. Defaults to None.\n        metric (MetricType, optional): Distance metric used for score normalization.\n            Defaults to ``MetricType.L2``.\n        weights (Optional[dict[str, float]], optional): Weight per vector field.\n            Fields not listed use weight 1.0. Defaults to None.\n\n    Note:\n        Supported metrics: L2, IP, COSINE. Scores are normalized to [0, 1].\n    \"\"\"\n\n    def __init__(\n        self,\n        topn: int = 10,\n        rerank_field: Optional[str] = None,\n        metric: MetricType = MetricType.L2,\n        weights: Optional[dict[str, float]] = None,\n    ):\n        super().__init__(topn=topn, rerank_field=rerank_field)\n        self._weights = weights or {}\n        self._metric = metric\n\n    @property\n    def weights(self) -> dict[str, float]:\n        \"\"\"dict[str, float]: Weight mapping for vector fields.\"\"\"\n        return self._weights\n\n    @property\n    def metric(self) -> MetricType:\n        \"\"\"MetricType: Distance metric used for score normalization.\"\"\"\n        return self._metric\n\n    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:\n        \"\"\"Combine scores from multiple vector fields using weighted sum.\n\n        Args:\n            query_results (dict[str, list[Doc]]): Results per vector field.\n\n        Returns:\n            list[Doc]: Re-ranked documents with combined scores in ``score`` field.\n        \"\"\"\n        weighted_scores: dict[str, float] = defaultdict(float)\n        id_to_doc: dict[str, Doc] = {}\n\n        for vector_name, query_result in query_results.items():\n            for _, doc in enumerate(query_result):\n                doc_id = doc.id\n                weighted_score = self._normalize_score(\n                    doc.score, self.metric\n                ) * self.weights.get(vector_name, 1.0)\n                weighted_scores[doc_id] += weighted_score\n                if doc_id not in id_to_doc:\n                    id_to_doc[doc_id] = doc\n\n        top_docs = heapq.nlargest(\n            self.topn, weighted_scores.items(), key=lambda x: x[1]\n        )\n        results: list[Doc] = []\n        for doc_id, weighted_score in top_docs:\n            doc = id_to_doc[doc_id]\n            new_doc = doc._replace(score=weighted_score)\n            results.append(new_doc)\n        return results\n\n    def _normalize_score(self, score: float, metric: MetricType) -> float:\n        if metric == MetricType.L2:\n            return 1.0 - 2 * math.atan(score) / math.pi\n        if metric == MetricType.IP:\n            return 0.5 + math.atan(score) / math.pi\n        if metric == MetricType.COSINE:\n            return 1.0 - score / 2.0\n        raise ValueError(\"Unsupported metric type\")\n"
  },
  {
    "path": "python/zvec/extension/openai_embedding_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom functools import lru_cache\nfrom typing import Optional\n\nfrom ..common.constants import TEXT, DenseVectorType\nfrom .embedding_function import DenseEmbeddingFunction\nfrom .openai_function import OpenAIFunctionBase\n\n\nclass OpenAIDenseEmbedding(OpenAIFunctionBase, DenseEmbeddingFunction[TEXT]):\n    \"\"\"Dense text embedding function using OpenAI API.\n\n    This class provides text-to-vector embedding capabilities using OpenAI's\n    embedding models. It inherits from ``DenseEmbeddingFunction`` and implements\n    dense text embedding via the OpenAI API.\n\n    The implementation supports various OpenAI embedding models with different\n    dimensions and includes automatic result caching for improved performance.\n\n    Args:\n        model (str, optional): OpenAI embedding model identifier.\n            Defaults to ``\"text-embedding-3-small\"``. Common options:\n            - ``\"text-embedding-3-small\"``: 1536 dims, cost-efficient, good performance\n            - ``\"text-embedding-3-large\"``: 3072 dims, highest quality\n            - ``\"text-embedding-ada-002\"``: 1536 dims, legacy model\n        dimension (Optional[int], optional): Desired output embedding dimension.\n            If ``None``, uses model's default dimension. For text-embedding-3 models,\n            you can specify custom dimensions (e.g., 256, 512, 1024, 1536).\n            Defaults to ``None``.\n        api_key (Optional[str], optional): OpenAI API authentication key.\n            If ``None``, reads from ``OPENAI_API_KEY`` environment variable.\n            Obtain your key from: https://platform.openai.com/api-keys\n        base_url (Optional[str], optional): Custom API base URL for OpenAI-compatible\n            services. Defaults to ``None`` (uses official OpenAI endpoint).\n\n    Attributes:\n        dimension (int): The embedding vector dimension.\n        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.\n        model (str): The OpenAI model name being used.\n\n    Raises:\n        ValueError: If API key is not provided and not found in environment,\n            or if API returns an error response.\n        TypeError: If input to ``embed()`` is not a string.\n        RuntimeError: If network error or OpenAI service error occurs.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires the ``openai`` package: ``pip install openai``\n        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls\n        - Network connectivity to OpenAI API endpoints is required\n        - API usage incurs costs based on your OpenAI subscription plan\n        - Rate limits apply based on your OpenAI account tier\n\n    Examples:\n        >>> # Basic usage with default model\n        >>> from zvec.extension import OpenAIDenseEmbedding\n        >>> import os\n        >>> os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n        >>>\n        >>> emb_func = OpenAIDenseEmbedding()\n        >>> vector = emb_func.embed(\"Hello, world!\")\n        >>> len(vector)\n        1536\n\n        >>> # Using specific model with custom dimension\n        >>> emb_func = OpenAIDenseEmbedding(\n        ...     model=\"text-embedding-3-large\",\n        ...     dimension=1024,\n        ...     api_key=\"sk-...\"\n        ... )\n        >>> vector = emb_func.embed(\"Machine learning is fascinating\")\n        >>> len(vector)\n        1024\n\n        >>> # Using with custom base URL (e.g., Azure OpenAI)\n        >>> emb_func = OpenAIDenseEmbedding(\n        ...     model=\"text-embedding-ada-002\",\n        ...     api_key=\"your-azure-key\",\n        ...     base_url=\"https://your-resource.openai.azure.com/\"\n        ... )\n        >>> vector = emb_func(\"Natural language processing\")\n        >>> isinstance(vector, list)\n        True\n\n        >>> # Batch processing with caching benefit\n        >>> texts = [\"First text\", \"Second text\", \"First text\"]\n        >>> vectors = [emb_func.embed(text) for text in texts]\n        >>> # Third call uses cached result for \"First text\"\n\n        >>> # Error handling\n        >>> try:\n        ...     emb_func.embed(\"\")  # Empty string\n        ... except ValueError as e:\n        ...     print(f\"Error: {e}\")\n        Error: Input text cannot be empty or whitespace only\n\n    See Also:\n        - ``DenseEmbeddingFunction``: Base class for dense embeddings\n        - ``QwenDenseEmbedding``: Alternative using Qwen/DashScope API\n        - ``DefaultDenseEmbedding``: Local model without API calls\n        - ``SparseEmbeddingFunction``: Base class for sparse embeddings\n    \"\"\"\n\n    def __init__(\n        self,\n        model: str = \"text-embedding-3-small\",\n        dimension: Optional[int] = None,\n        api_key: Optional[str] = None,\n        base_url: Optional[str] = None,\n        **kwargs,\n    ):\n        \"\"\"Initialize the OpenAI dense embedding function.\n\n        Args:\n            model (str): OpenAI model name. Defaults to \"text-embedding-3-small\".\n            dimension (Optional[int]): Target embedding dimension or None for default.\n            api_key (Optional[str]): API key or None to use environment variable.\n            base_url (Optional[str]): Custom API base URL or None for default.\n            **kwargs: Additional parameters for API calls. Examples:\n                - ``encoding_format`` (str): Format of embeddings, \"float\" or \"base64\".\n                - ``user`` (str): User identifier for tracking.\n\n        Raises:\n            ValueError: If API key is not provided and not in environment.\n        \"\"\"\n        # Initialize base class for API connection\n        OpenAIFunctionBase.__init__(\n            self, model=model, api_key=api_key, base_url=base_url\n        )\n\n        # Store dimension configuration\n        self._custom_dimension = dimension\n\n        # Determine actual dimension\n        if dimension is None:\n            # Use model default dimension\n            self._dimension = self._MODEL_DIMENSIONS.get(model, 1536)\n        else:\n            self._dimension = dimension\n\n        # Store dense-specific attributes\n        self._extra_params = kwargs\n\n    @property\n    def dimension(self) -> int:\n        \"\"\"int: The expected dimensionality of the embedding vector.\"\"\"\n        return self._dimension\n\n    @property\n    def extra_params(self) -> dict:\n        \"\"\"dict: Extra parameters for model-specific customization.\"\"\"\n        return self._extra_params\n\n    def __call__(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Make the embedding function callable.\"\"\"\n        return self.embed(input)\n\n    @lru_cache(maxsize=10)\n    def embed(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Generate dense embedding vector for the input text.\n\n        This method calls the OpenAI Embeddings API to convert input text\n        into a dense vector representation. Results are cached to improve\n        performance for repeated inputs.\n\n        Args:\n            input (TEXT): Input text string to embed. Must be non-empty after\n                stripping whitespace. Maximum length is 8191 tokens for most models.\n\n        Returns:\n            DenseVectorType: A list of floats representing the embedding vector.\n                Length equals ``self.dimension``. Example:\n                ``[0.123, -0.456, 0.789, ...]``\n\n        Raises:\n            TypeError: If ``input`` is not a string.\n            ValueError: If input is empty/whitespace-only, or if the API returns\n                an error or malformed response.\n            RuntimeError: If network connectivity issues or OpenAI service\n                errors occur.\n\n        Examples:\n            >>> emb = OpenAIDenseEmbedding()\n            >>> vector = emb.embed(\"Natural language processing\")\n            >>> len(vector)\n            1536\n            >>> isinstance(vector[0], float)\n            True\n\n            >>> # Error: empty input\n            >>> emb.embed(\"   \")\n            ValueError: Input text cannot be empty or whitespace only\n\n            >>> # Error: non-string input\n            >>> emb.embed(123)\n            TypeError: Expected 'input' to be str, got int\n\n        Note:\n            - This method is cached (maxsize=10). Identical inputs return cached results.\n            - The cache is based on exact string match (case-sensitive).\n            - Consider pre-processing text (lowercasing, normalization) for better caching.\n        \"\"\"\n        if not isinstance(input, TEXT):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        # Call API\n        embedding_vector = self._call_text_embedding_api(\n            input=input,\n            dimension=self._custom_dimension,\n        )\n\n        # Verify dimension\n        if len(embedding_vector) != self.dimension:\n            raise ValueError(\n                f\"Dimension mismatch: expected {self.dimension}, \"\n                f\"got {len(embedding_vector)}\"\n            )\n\n        return embedding_vector\n"
  },
  {
    "path": "python/zvec/extension/openai_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport os\nfrom typing import ClassVar, Optional\n\nfrom ..common.constants import TEXT\nfrom ..tool import require_module\n\n\nclass OpenAIFunctionBase:\n    \"\"\"Base class for OpenAI functions.\n\n    This base class provides common functionality for calling OpenAI APIs\n    and handling responses. It supports embeddings (dense) operations.\n\n    This class is not meant to be used directly. Use concrete implementations:\n    - ``OpenAIDenseEmbedding`` for dense embeddings\n\n    Args:\n        model (str): OpenAI model identifier.\n        api_key (Optional[str]): OpenAI API authentication key.\n        base_url (Optional[str]): Custom API base URL.\n\n    Note:\n        - This is an internal base class for code reuse across OpenAI features\n        - Subclasses should inherit from appropriate Protocol\n        - Provides unified API connection and response handling\n    \"\"\"\n\n    # Model default dimensions\n    _MODEL_DIMENSIONS: ClassVar[dict[str, int]] = {\n        \"text-embedding-3-small\": 1536,\n        \"text-embedding-3-large\": 3072,\n        \"text-embedding-ada-002\": 1536,\n    }\n\n    def __init__(\n        self,\n        model: str,\n        api_key: Optional[str] = None,\n        base_url: Optional[str] = None,\n    ):\n        \"\"\"Initialize the base OpenAI functionality.\n\n        Args:\n            model (str): OpenAI model name.\n            api_key (Optional[str]): API key or None to use environment variable.\n            base_url (Optional[str]): Custom API base URL or None for default.\n\n        Raises:\n            ValueError: If API key is not provided and not in environment.\n        \"\"\"\n        self._model = model\n        self._api_key = api_key or os.environ.get(\"OPENAI_API_KEY\")\n        self._base_url = base_url\n\n        if not self._api_key:\n            raise ValueError(\n                \"OpenAI API key is required. Please provide 'api_key' parameter \"\n                \"or set the 'OPENAI_API_KEY' environment variable.\"\n            )\n\n    @property\n    def model(self) -> str:\n        \"\"\"str: The OpenAI model name currently in use.\"\"\"\n        return self._model\n\n    def _get_client(self):\n        \"\"\"Get OpenAI client instance.\n\n        Returns:\n            OpenAI: Configured OpenAI client.\n\n        Raises:\n            ImportError: If openai package is not installed.\n        \"\"\"\n        openai = require_module(\"openai\")\n\n        if self._base_url:\n            return openai.OpenAI(api_key=self._api_key, base_url=self._base_url)\n        return openai.OpenAI(api_key=self._api_key)\n\n    def _call_text_embedding_api(\n        self,\n        input: TEXT,\n        dimension: Optional[int] = None,\n    ) -> list:\n        \"\"\"Call OpenAI Embeddings API.\n\n        Args:\n            input (TEXT): Input text to embed.\n            dimension (Optional[int]): Target dimension (for models that support it).\n\n        Returns:\n            list: Embedding vector as list of floats.\n\n        Raises:\n            RuntimeError: If API call fails.\n            ValueError: If API returns error response.\n        \"\"\"\n        try:\n            client = self._get_client()\n\n            # Prepare embedding parameters\n            params = {\"model\": self.model, \"input\": input}\n\n            # Add dimension parameter for models that support it\n            if dimension is not None:\n                params[\"dimensions\"] = dimension\n\n            # Call OpenAI API\n            response = client.embeddings.create(**params)\n\n        except Exception as e:\n            # Check if it's an OpenAI API error\n            openai = require_module(\"openai\")\n            if isinstance(e, (openai.APIError, openai.APIConnectionError)):\n                raise RuntimeError(f\"Failed to call OpenAI API: {e!s}\") from e\n            raise RuntimeError(f\"Unexpected error during API call: {e!s}\") from e\n\n        # Extract embedding from response\n        try:\n            if not response.data:\n                raise ValueError(\"Invalid API response: no embedding data returned\")\n\n            embedding_vector = response.data[0].embedding\n\n            if not isinstance(embedding_vector, list):\n                raise ValueError(\n                    \"Invalid API response: embedding is not a list of numbers\"\n                )\n\n            return embedding_vector\n\n        except (AttributeError, IndexError, TypeError) as e:\n            raise ValueError(f\"Failed to parse API response: {e!s}\") from e\n"
  },
  {
    "path": "python/zvec/extension/qwen_embedding_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom functools import lru_cache\nfrom typing import Optional\n\nfrom ..common.constants import TEXT, DenseVectorType, SparseVectorType\nfrom .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction\nfrom .qwen_function import QwenFunctionBase\n\n\nclass QwenDenseEmbedding(QwenFunctionBase, DenseEmbeddingFunction[TEXT]):\n    \"\"\"Dense text embedding function using Qwen (DashScope) API.\n\n    This class provides text-to-vector embedding capabilities using Alibaba Cloud's\n    DashScope service and Qwen embedding models. It inherits from\n    ``DenseEmbeddingFunction`` and implements dense text embedding.\n\n    The implementation supports various Qwen embedding models with configurable\n    dimensions and includes automatic result caching for improved performance.\n\n    Args:\n        dimension (int): Desired output embedding dimension. Common values:\n            - 512: Balanced performance and accuracy\n            - 1024: Higher accuracy, larger storage\n            - 1536: Maximum accuracy for supported models\n        model (str, optional): DashScope embedding model identifier.\n            Defaults to ``\"text-embedding-v4\"``. Other options include:\n            - ``\"text-embedding-v3\"``\n            - ``\"text-embedding-v2\"``\n            - ``\"text-embedding-v1\"``\n        api_key (Optional[str], optional): DashScope API authentication key.\n            If ``None``, reads from ``DASHSCOPE_API_KEY`` environment variable.\n            Obtain your key from: https://dashscope.console.aliyun.com/\n        **kwargs: Additional DashScope API parameters. Supported options:\n            - ``text_type`` (str): Specifies the text role in retrieval tasks.\n              Options: ``\"query\"`` (search query) or ``\"document\"`` (indexed content).\n              This parameter optimizes embeddings for asymmetric search scenarios.\n\n            Reference: https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api\n\n    Attributes:\n        dimension (int): The embedding vector dimension.\n        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.\n        model (str): The DashScope model name being used.\n\n    Raises:\n        ValueError: If API key is not provided and not found in environment,\n            or if API returns an error response.\n        TypeError: If input to ``embed()`` is not a string.\n        RuntimeError: If network error or DashScope service error occurs.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires the ``dashscope`` package: ``pip install dashscope``\n        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls\n        - Network connectivity to DashScope API endpoints is required\n        - API usage may incur costs based on your DashScope subscription plan\n\n        **Parameter Guidelines:**\n\n        - Use ``text_type=\"query\"`` for search queries and ``text_type=\"document\"``\n          for indexed content to optimize asymmetric retrieval tasks.\n        - For detailed API specifications and parameter usage, refer to:\n          https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api\n\n    Examples:\n        >>> # Basic usage with default model\n        >>> from zvec.extension import QwenDenseEmbedding\n        >>> import os\n        >>> os.environ[\"DASHSCOPE_API_KEY\"] = \"your-api-key\"\n        >>>\n        >>> emb_func = QwenDenseEmbedding(dimension=1024)\n        >>> vector = emb_func.embed(\"Hello, world!\")\n        >>> len(vector)\n        1024\n\n        >>> # Using specific model with explicit API key\n        >>> emb_func = QwenDenseEmbedding(\n        ...     dimension=512,\n        ...     model=\"text-embedding-v3\",\n        ...     api_key=\"sk-xxxxx\"\n        ... )\n        >>> vector = emb_func(\"Machine learning is fascinating\")\n        >>> isinstance(vector, list)\n        True\n\n        >>> # Using with custom parameters (text_type)\n        >>> # For search queries - optimize for query-document matching\n        >>> emb_func = QwenDenseEmbedding(\n        ...     dimension=1024,\n        ...     text_type=\"query\"\n        ... )\n        >>> query_vector = emb_func.embed(\"What is machine learning?\")\n        >>>\n        >>> # For document embeddings - optimize for being matched by queries\n        >>> doc_emb_func = QwenDenseEmbedding(\n        ...     dimension=1024,\n        ...     text_type=\"document\"\n        ... )\n        >>> doc_vector = doc_emb_func.embed(\n        ...     \"Machine learning is a subset of artificial intelligence...\"\n        ... )\n\n        >>> # Batch processing with caching benefit\n        >>> texts = [\"First text\", \"Second text\", \"First text\"]\n        >>> vectors = [emb_func.embed(text) for text in texts]\n        >>> # Third call uses cached result for \"First text\"\n\n        >>> # Error handling\n        >>> try:\n        ...     emb_func.embed(\"\")  # Empty string\n        ... except ValueError as e:\n        ...     print(f\"Error: {e}\")\n        Error: Input text cannot be empty or whitespace only\n\n    See Also:\n        - ``DenseEmbeddingFunction``: Base class for dense embeddings\n        - ``SparseEmbeddingFunction``: Base class for sparse embeddings\n    \"\"\"\n\n    def __init__(\n        self,\n        dimension: int,\n        model: str = \"text-embedding-v4\",\n        api_key: Optional[str] = None,\n        **kwargs,\n    ):\n        \"\"\"Initialize the Qwen dense embedding function.\n\n        Args:\n            dimension (int): Target embedding dimension.\n            model (str): DashScope model name. Defaults to \"text-embedding-v4\".\n            api_key (Optional[str]): API key or None to use environment variable.\n            **kwargs: Additional DashScope API parameters. Supported options:\n                - ``text_type`` (str): Text role in asymmetric retrieval.\n                  * ``\"query\"``: Optimize for search queries (short, question-like).\n                  * ``\"document\"``: Optimize for indexed documents (longer content).\n                  Using appropriate text_type improves retrieval accuracy by\n                  optimizing the embedding space for query-document matching.\n\n                For detailed API documentation, see:\n                https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api\n\n        Raises:\n            ValueError: If API key is not provided and not in environment.\n        \"\"\"\n        # Initialize base class for API connection\n        QwenFunctionBase.__init__(self, model=model, api_key=api_key)\n\n        # Store dense-specific attributes\n        self._dimension = dimension\n        self._extra_params = kwargs\n\n    @property\n    def dimension(self) -> int:\n        \"\"\"int: The expected dimensionality of the embedding vector.\"\"\"\n        return self._dimension\n\n    @property\n    def extra_params(self) -> dict:\n        \"\"\"dict: Extra parameters for model-specific customization.\"\"\"\n        return self._extra_params\n\n    def __call__(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Make the embedding function callable.\"\"\"\n        return self.embed(input)\n\n    @lru_cache(maxsize=10)\n    def embed(self, input: TEXT) -> DenseVectorType:\n        \"\"\"Generate dense embedding vector for the input text.\n\n        This method calls the DashScope TextEmbedding API to convert input text\n        into a dense vector representation. Results are cached to improve\n        performance for repeated inputs.\n\n        Args:\n            input (TEXT): Input text string to embed. Must be non-empty after\n                stripping whitespace. Maximum length depends on the model used\n                (typically 2048-8192 tokens).\n\n        Returns:\n            DenseVectorType: A list of floats representing the embedding vector.\n                Length equals ``self.dimension``. Example:\n                ``[0.123, -0.456, 0.789, ...]``\n\n        Raises:\n            TypeError: If ``input`` is not a string.\n            ValueError: If input is empty/whitespace-only, or if the API returns\n                an error or malformed response.\n            RuntimeError: If network connectivity issues or DashScope service\n                errors occur.\n\n        Examples:\n            >>> emb = QwenDenseEmbedding(dimension=1024)\n            >>> vector = emb.embed(\"Natural language processing\")\n            >>> len(vector)\n            1024\n            >>> isinstance(vector[0], float)\n            True\n\n            >>> # Error: empty input\n            >>> emb.embed(\"   \")\n            ValueError: Input text cannot be empty or whitespace only\n\n            >>> # Error: non-string input\n            >>> emb.embed(123)\n            TypeError: Expected 'input' to be str, got int\n\n        Note:\n            - This method is cached (maxsize=10). Identical inputs return cached results.\n            - The cache is based on exact string match (case-sensitive).\n            - Consider pre-processing text (lowercasing, normalization) for better caching.\n        \"\"\"\n        if not isinstance(input, TEXT):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        # Call API with dense output type\n        output = self._call_text_embedding_api(\n            input=input,\n            dimension=self.dimension,\n            output_type=\"dense\",\n            text_type=self.extra_params.get(\"text_type\"),\n        )\n\n        embeddings = output.get(\"embeddings\")\n        if not isinstance(embeddings, list):\n            raise ValueError(\n                \"Invalid API response: 'embeddings' field is missing or not a list\"\n            )\n\n        if len(embeddings) != 1:\n            raise ValueError(\n                f\"Expected exactly 1 embedding in response, got {len(embeddings)}\"\n            )\n\n        first_emb = embeddings[0]\n        if not isinstance(first_emb, dict):\n            raise ValueError(\"Invalid API response: embedding item is not a dictionary\")\n\n        embedding_vector = first_emb.get(\"embedding\")\n        if not isinstance(embedding_vector, list):\n            raise ValueError(\n                \"Invalid API response: 'embedding' field is missing or not a list\"\n            )\n\n        if len(embedding_vector) != self.dimension:\n            raise ValueError(\n                f\"Dimension mismatch: expected {self.dimension}, \"\n                f\"got {len(embedding_vector)}\"\n            )\n\n        return list(embedding_vector)\n\n\nclass QwenSparseEmbedding(QwenFunctionBase, SparseEmbeddingFunction[TEXT]):\n    \"\"\"Sparse text embedding function using Qwen (DashScope) API.\n\n    This class provides text-to-sparse-vector embedding capabilities using\n    Alibaba Cloud's DashScope service and Qwen embedding models. It generates\n    sparse keyword-weighted vectors suitable for lexical matching and BM25-style\n    retrieval scenarios.\n\n    Sparse embeddings are particularly useful for:\n    - Keyword-based search and exact matching\n    - Hybrid retrieval (combining with dense embeddings)\n    - Interpretable search results (weights show term importance)\n\n    Args:\n        dimension (int): Desired output embedding dimension. Common values:\n            - 512: Balanced performance and accuracy\n            - 1024: Higher accuracy, larger storage\n            - 1536: Maximum accuracy for supported models\n        model (str, optional): DashScope embedding model identifier.\n            Defaults to ``\"text-embedding-v4\"``. Other options include:\n            - ``\"text-embedding-v3\"``\n            - ``\"text-embedding-v2\"``\n        api_key (Optional[str], optional): DashScope API authentication key.\n            If ``None``, reads from ``DASHSCOPE_API_KEY`` environment variable.\n            Obtain your key from: https://dashscope.console.aliyun.com/\n        **kwargs: Additional DashScope API parameters. Supported options:\n            - ``encoding_type`` (Literal[\"query\", \"document\"]): Encoding type.\n              * ``\"query\"``: Optimize for search queries (default).\n              * ``\"document\"``: Optimize for indexed documents.\n              This distinction is important for asymmetric retrieval tasks.\n\n    Attributes:\n        model (str): The DashScope model name being used.\n        encoding_type (str): The encoding type (\"query\" or \"document\").\n\n    Raises:\n        ValueError: If API key is not provided and not found in environment,\n            or if API returns an error response.\n        TypeError: If input to ``embed()`` is not a string.\n        RuntimeError: If network error or DashScope service error occurs.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires the ``dashscope`` package: ``pip install dashscope``\n        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls\n        - Network connectivity to DashScope API endpoints is required\n        - API usage may incur costs based on your DashScope subscription plan\n        - Sparse vectors have only non-zero dimensions stored as dict\n        - Output is sorted by indices (keys) in ascending order\n\n        **Parameter Guidelines:**\n\n        - Use ``encoding_type=\"query\"`` for search queries and\n          ``encoding_type=\"document\"`` for indexed content to optimize\n          asymmetric retrieval tasks.\n        - For detailed API specifications, refer to:\n          https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api\n\n    Examples:\n        >>> # Basic usage for query embedding\n        >>> from zvec.extension import QwenSparseEmbedding\n        >>> import os\n        >>> os.environ[\"DASHSCOPE_API_KEY\"] = \"your-api-key\"\n        >>>\n        >>> query_emb = QwenSparseEmbedding(dimension=1024, encoding_type=\"query\")\n        >>> query_vec = query_emb.embed(\"machine learning\")\n        >>> type(query_vec)\n        <class 'dict'>\n        >>> len(query_vec)  # Only non-zero dimensions\n        156\n\n        >>> # Document embedding\n        >>> doc_emb = QwenSparseEmbedding(dimension=1024, encoding_type=\"document\")\n        >>> doc_vec = doc_emb.embed(\"Machine learning is a subset of AI\")\n        >>> isinstance(doc_vec, dict)\n        True\n\n        >>> # Asymmetric retrieval example\n        >>> query_vec = query_emb.embed(\"what causes aging fast\")\n        >>> doc_vec = doc_emb.embed(\n        ...     \"UV-A light causes tanning, skin aging, and cataracts...\"\n        ... )\n        >>>\n        >>> # Calculate similarity (dot product for sparse vectors)\n        >>> similarity = sum(\n        ...     query_vec.get(k, 0) * doc_vec.get(k, 0)\n        ...     for k in set(query_vec) | set(doc_vec)\n        ... )\n\n        >>> # Output is sorted by indices\n        >>> list(query_vec.items())[:5]  # First 5 dimensions (by index)\n        [(10, 0.45), (23, 0.87), (56, 0.32), (89, 1.12), (120, 0.65)]\n\n        >>> # Hybrid retrieval (combining dense + sparse)\n        >>> from zvec.extension import QwenDenseEmbedding\n        >>> dense_emb = QwenDenseEmbedding(dimension=1024)\n        >>> sparse_emb = QwenSparseEmbedding(dimension=1024)\n        >>>\n        >>> query = \"deep learning neural networks\"\n        >>> dense_vec = dense_emb.embed(query)   # [0.1, -0.3, 0.5, ...]\n        >>> sparse_vec = sparse_emb.embed(query)  # {12: 0.8, 45: 1.2, ...}\n\n        >>> # Error handling\n        >>> try:\n        ...     sparse_emb.embed(\"\")  # Empty string\n        ... except ValueError as e:\n        ...     print(f\"Error: {e}\")\n        Error: Input text cannot be empty or whitespace only\n\n    See Also:\n        - ``SparseEmbeddingFunction``: Base class for sparse embeddings\n        - ``QwenDenseEmbedding``: Dense embedding using Qwen API\n        - ``DefaultSparseEmbedding``: Sparse embedding with SPLADE model\n    \"\"\"\n\n    def __init__(\n        self,\n        dimension: int,\n        model: str = \"text-embedding-v4\",\n        api_key: Optional[str] = None,\n        **kwargs,\n    ):\n        \"\"\"Initialize the Qwen sparse embedding function.\n\n        Args:\n            dimension (int): Target embedding dimension.\n            model (str): DashScope model name. Defaults to \"text-embedding-v4\".\n            api_key (Optional[str]): API key or None to use environment variable.\n            **kwargs: Additional DashScope API parameters. Supported options:\n                - ``encoding_type`` (Literal[\"query\", \"document\"]): Encoding type.\n                  * ``\"query\"``: Optimize for search queries (default).\n                  * ``\"document\"``: Optimize for indexed documents.\n                  This distinction is important for asymmetric retrieval tasks.\n\n        Raises:\n            ValueError: If API key is not provided and not in environment.\n        \"\"\"\n        # Initialize base class for API connection\n        QwenFunctionBase.__init__(self, model=model, api_key=api_key)\n\n        self._dimension = dimension\n        self._extra_params = kwargs\n\n    @property\n    def extra_params(self) -> dict:\n        \"\"\"dict: Extra parameters for model-specific customization.\"\"\"\n        return self._extra_params\n\n    def __call__(self, input: TEXT) -> SparseVectorType:\n        \"\"\"Make the embedding function callable.\"\"\"\n        return self.embed(input)\n\n    @lru_cache(maxsize=10)\n    def embed(self, input: TEXT) -> SparseVectorType:\n        \"\"\"Generate sparse embedding vector for the input text.\n\n        This method calls the DashScope TextEmbedding API with sparse output type\n        to convert input text into a sparse vector representation. The result is\n        a dictionary where keys are dimension indices and values are importance\n        weights (only non-zero values included).\n\n        The embedding is optimized based on the ``encoding_type`` specified during\n        initialization: \"query\" for search queries or \"document\" for indexed content.\n\n        Args:\n            input (TEXT): Input text string to embed. Must be non-empty after\n                stripping whitespace. Maximum length depends on the model used\n                (typically 2048-8192 tokens).\n\n        Returns:\n            SparseVectorType: A dictionary mapping dimension index to weight.\n                Only non-zero dimensions are included. The dictionary is sorted\n                by indices (keys) in ascending order for consistent output.\n                Example: ``{10: 0.5, 245: 0.8, 1023: 1.2, 5678: 0.5}``\n\n        Raises:\n            TypeError: If ``input`` is not a string.\n            ValueError: If input is empty/whitespace-only, or if the API returns\n                an error or malformed response.\n            RuntimeError: If network connectivity issues or DashScope service\n                errors occur.\n\n        Examples:\n            >>> emb = QwenSparseEmbedding(dimension=1024, encoding_type=\"query\")\n            >>> sparse_vec = emb.embed(\"machine learning\")\n            >>> isinstance(sparse_vec, dict)\n            True\n            >>>\n            >>> # Verify sorted output\n            >>> keys = list(sparse_vec.keys())\n            >>> keys == sorted(keys)\n            True\n\n            >>> # Error: empty input\n            >>> emb.embed(\"   \")\n            ValueError: Input text cannot be empty or whitespace only\n\n            >>> # Error: non-string input\n            >>> emb.embed(123)\n            TypeError: Expected 'input' to be str, got int\n\n        Note:\n            - This method is cached (maxsize=10). Identical inputs return cached results.\n            - The cache is based on exact string match (case-sensitive).\n            - Output dictionary is always sorted by indices for consistency.\n        \"\"\"\n        if not isinstance(input, TEXT):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        # Call API with sparse output type\n        output = self._call_text_embedding_api(\n            input=input,\n            dimension=self._dimension,\n            output_type=\"sparse\",\n            text_type=self.extra_params.get(\"encoding_type\", \"query\"),\n        )\n\n        embeddings = output.get(\"embeddings\")\n        if not isinstance(embeddings, list):\n            raise ValueError(\n                \"Invalid API response: 'embeddings' field is missing or not a list\"\n            )\n\n        if len(embeddings) != 1:\n            raise ValueError(\n                f\"Expected exactly 1 embedding in response, got {len(embeddings)}\"\n            )\n\n        first_emb = embeddings[0]\n        if not isinstance(first_emb, dict):\n            raise ValueError(\"Invalid API response: embedding item is not a dictionary\")\n\n        sparse_embedding = first_emb.get(\"sparse_embedding\")\n        if not isinstance(sparse_embedding, list):\n            raise ValueError(\n                \"Invalid API response: 'sparse_embedding' field is missing or not a list\"\n            )\n\n        # Parse sparse embedding: convert array of {index, value, token} to dict\n        sparse_dict = {}\n        for item in sparse_embedding:\n            if not isinstance(item, dict):\n                raise ValueError(\n                    \"Invalid API response: sparse_embedding item is not a dictionary\"\n                )\n\n            index = item.get(\"index\")\n            value = item.get(\"value\")\n\n            if index is None or value is None:\n                raise ValueError(\n                    \"Invalid API response: sparse_embedding item missing 'index' or 'value'\"\n                )\n\n            # Convert to int and float, filter positive values\n            idx = int(index)\n            val = float(value)\n            if val > 0:\n                sparse_dict[idx] = val\n\n        # Sort by indices (keys) to ensure consistent ordering\n        return dict(sorted(sparse_dict.items()))\n"
  },
  {
    "path": "python/zvec/extension/qwen_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport os\nfrom http import HTTPStatus\nfrom typing import Optional\n\nfrom ..common.constants import TEXT\nfrom ..tool import require_module\n\n\nclass QwenFunctionBase:\n    \"\"\"Base class for Qwen (DashScope) functions.\n\n    This base class provides common functionality for calling DashScope APIs\n    and handling responses. It supports embeddings (dense and sparse) and\n    re-ranking operations.\n\n    This class is not meant to be used directly. Use concrete implementations:\n    - ``QwenDenseEmbedding`` for dense embeddings\n    - ``QwenSparseEmbedding`` for sparse embeddings\n    - ``QwenReRanker`` for semantic re-ranking\n\n    Args:\n        model (str): DashScope model identifier.\n        api_key (Optional[str]): DashScope API authentication key.\n\n    Note:\n        - This is an internal base class for code reuse across Qwen features\n        - Subclasses should inherit from appropriate Protocol/ABC\n        - Provides unified API connection and response handling\n    \"\"\"\n\n    def __init__(\n        self,\n        model: str,\n        api_key: Optional[str] = None,\n    ):\n        \"\"\"Initialize the base Qwen embedding functionality.\n\n        Args:\n            model (str): DashScope model name.\n            api_key (Optional[str]): API key or None to use environment variable.\n\n        Raises:\n            ValueError: If API key is not provided and not in environment.\n        \"\"\"\n        self._model = model\n        self._api_key = api_key or os.environ.get(\"DASHSCOPE_API_KEY\")\n        if not self._api_key:\n            raise ValueError(\n                \"DashScope API key is required. Please provide 'api_key' parameter \"\n                \"or set the 'DASHSCOPE_API_KEY' environment variable.\"\n            )\n\n    @property\n    def model(self) -> str:\n        \"\"\"str: The DashScope embedding model name currently in use.\"\"\"\n        return self._model\n\n    def _get_connection(self):\n        \"\"\"Establish connection to DashScope API.\n\n        Returns:\n            module: The dashscope module with API key configured.\n\n        Raises:\n            ImportError: If dashscope package is not installed.\n        \"\"\"\n        dashscope = require_module(\"dashscope\")\n        dashscope.api_key = self._api_key\n        return dashscope\n\n    def _call_text_embedding_api(\n        self,\n        input: TEXT,\n        dimension: int,\n        output_type: str,\n        text_type: Optional[str] = None,\n    ) -> dict:\n        \"\"\"Call DashScope TextEmbedding API.\n\n        Args:\n            input (TEXT): Input text to embed.\n            dimension (int): Target embedding dimension.\n            output_type (str): Output type (\"dense\" or \"sparse\").\n            text_type (Optional[str]): Text type (\"query\" or \"document\").\n\n        Returns:\n            dict: API response output field.\n\n        Raises:\n            RuntimeError: If API call fails.\n            ValueError: If API returns error response.\n        \"\"\"\n        try:\n            # Prepare API call parameters\n            call_params = {\n                \"model\": self.model,\n                \"input\": input,\n                \"dimension\": dimension,\n                \"output_type\": output_type,\n            }\n\n            # Add optional text_type parameter if provided\n            if text_type is not None:\n                call_params[\"text_type\"] = text_type\n\n            resp = self._get_connection().TextEmbedding.call(**call_params)\n        except Exception as e:\n            raise RuntimeError(f\"Failed to call DashScope API: {e!s}\") from e\n\n        if resp.status_code != HTTPStatus.OK:\n            error_msg = getattr(resp, \"message\", \"Unknown error\")\n            error_code = getattr(resp, \"code\", \"N/A\")\n            raise ValueError(\n                f\"DashScope API error: [Code={error_code}, \"\n                f\"Status={resp.status_code}] {error_msg}\"\n            )\n\n        output = getattr(resp, \"output\", None)\n        if not isinstance(output, dict):\n            raise ValueError(\n                \"Invalid API response: missing or malformed 'output' field\"\n            )\n\n        return output\n\n    def _call_rerank_api(\n        self,\n        query: str,\n        documents: list[str],\n        top_n: int,\n    ) -> dict:\n        \"\"\"Call DashScope TextReRank API.\n\n        Args:\n            query (str): Query text for semantic matching.\n            documents (list[str]): List of document texts to re-rank.\n            top_n (int): Maximum number of documents to return.\n\n        Returns:\n            dict: API response output field containing re-ranked results.\n\n        Raises:\n            RuntimeError: If API call fails.\n            ValueError: If API returns error response.\n        \"\"\"\n        try:\n            resp = self._get_connection().TextReRank.call(\n                model=self.model,\n                query=query,\n                documents=documents,\n                top_n=top_n,\n                return_documents=False,\n            )\n        except Exception as e:\n            raise RuntimeError(f\"Failed to call DashScope API: {e!s}\") from e\n\n        if resp.status_code != HTTPStatus.OK:\n            error_msg = getattr(resp, \"message\", \"Unknown error\")\n            error_code = getattr(resp, \"code\", \"N/A\")\n            raise ValueError(\n                f\"DashScope API error: [Code={error_code}, \"\n                f\"Status={resp.status_code}] {error_msg}\"\n            )\n\n        output = getattr(resp, \"output\", None)\n        if not isinstance(output, dict):\n            raise ValueError(\n                \"Invalid API response: missing or malformed 'output' field\"\n            )\n\n        return output\n"
  },
  {
    "path": "python/zvec/extension/qwen_rerank_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import Optional\n\nfrom ..model.doc import Doc\nfrom .qwen_function import QwenFunctionBase\nfrom .rerank_function import RerankFunction\n\n\nclass QwenReRanker(QwenFunctionBase, RerankFunction):\n    \"\"\"Re-ranker using Qwen (DashScope) cross-encoder API for semantic re-ranking.\n\n    This re-ranker leverages DashScope's TextReRank service to perform\n    cross-encoder style re-ranking. It sends query and document pairs to the\n    API and receives relevance scores based on deep semantic understanding.\n\n    The re-ranker is suitable for single-vector or multi-vector search scenarios\n    where semantic relevance to a specific query is required.\n\n    Args:\n        query (str): Query text for semantic re-ranking. **Required**.\n        topn (int, optional): Maximum number of documents to return after re-ranking.\n            Defaults to 10.\n        rerank_field (str): Document field name to use as re-ranking input text.\n            **Required** (e.g., \"content\", \"title\", \"body\").\n        model (str, optional): DashScope re-ranking model identifier.\n            Defaults to ``\"gte-rerank-v2\"``.\n        api_key (Optional[str], optional): DashScope API authentication key.\n            If not provided, reads from ``DASHSCOPE_API_KEY`` environment variable.\n\n    Raises:\n        ValueError: If ``query`` is empty/None, ``rerank_field`` is None,\n            or API key is not available.\n\n    Note:\n        - Requires ``dashscope`` Python package installed\n        - Documents without valid content in ``rerank_field`` are skipped\n        - API rate limits and quotas apply per DashScope subscription\n\n    Example:\n        >>> reranker = QwenReRanker(\n        ...     query=\"machine learning algorithms\",\n        ...     topn=5,\n        ...     rerank_field=\"content\",\n        ...     model=\"gte-rerank-v2\",\n        ...     api_key=\"your-api-key\"\n        ... )\n        >>> # Use in collection.query(reranker=reranker)\n    \"\"\"\n\n    def __init__(\n        self,\n        query: Optional[str] = None,\n        topn: int = 10,\n        rerank_field: Optional[str] = None,\n        model: str = \"gte-rerank-v2\",\n        api_key: Optional[str] = None,\n    ):\n        \"\"\"Initialize QwenReRanker with query and configuration.\n\n        Args:\n            query (Optional[str]): Query text for semantic matching. Required.\n            topn (int): Number of top results to return.\n            rerank_field (Optional[str]): Document field for re-ranking input.\n            model (str): DashScope model name.\n            api_key (Optional[str]): API key or None to use environment variable.\n\n        Raises:\n            ValueError: If query is empty or API key is unavailable.\n        \"\"\"\n        QwenFunctionBase.__init__(self, model=model, api_key=api_key)\n        RerankFunction.__init__(self, topn=topn, rerank_field=rerank_field)\n\n        if not query:\n            raise ValueError(\"Query is required for QwenReRanker\")\n        self._query = query\n\n    @property\n    def query(self) -> str:\n        \"\"\"str: Query text used for semantic re-ranking.\"\"\"\n        return self._query\n\n    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:\n        \"\"\"Re-rank documents using Qwen's TextReRank API.\n\n        Sends document texts to DashScope TextReRank service along with the query.\n        Returns documents sorted by relevance scores from the cross-encoder model.\n\n        Args:\n            query_results (dict[str, list[Doc]]): Mapping from vector field names\n                to lists of retrieved documents. Documents from all fields are\n                deduplicated and re-ranked together.\n\n        Returns:\n            list[Doc]: Re-ranked documents (up to ``topn``) with updated ``score``\n                fields containing relevance scores from the API.\n\n        Raises:\n            ValueError: If no valid documents are found or API call fails.\n\n        Note:\n            - Duplicate documents (same ID) across fields are processed once\n            - Documents with empty/missing ``rerank_field`` content are skipped\n            - Returned scores are relevance scores from the cross-encoder model\n        \"\"\"\n        if not query_results:\n            return []\n\n        # Collect and deduplicate documents\n        id_to_doc: dict[str, Doc] = {}\n        doc_ids: list[str] = []\n        contents: list[str] = []\n\n        for _, query_result in query_results.items():\n            for doc in query_result:\n                doc_id = doc.id\n                if doc_id in id_to_doc:\n                    continue\n\n                # Extract text content from specified field\n                field_value = doc.field(self.rerank_field)\n                rank_content = str(field_value).strip() if field_value else \"\"\n                if not rank_content:\n                    continue\n\n                id_to_doc[doc_id] = doc\n                doc_ids.append(doc_id)\n                contents.append(rank_content)\n\n        if not contents:\n            raise ValueError(\"No documents to rerank\")\n\n        # Call DashScope TextReRank API\n        output = self._call_rerank_api(\n            query=self.query,\n            documents=contents,\n            top_n=self.topn,\n        )\n\n        # Build result list with updated scores\n        results: list[Doc] = []\n        for item in output[\"results\"]:\n            idx = item[\"index\"]\n            doc_id = doc_ids[idx]\n            doc = id_to_doc[doc_id]\n            new_doc = doc._replace(score=item[\"relevance_score\"])\n            results.append(new_doc)\n\n        return results\n"
  },
  {
    "path": "python/zvec/extension/rerank_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom abc import ABC, abstractmethod\nfrom typing import Optional\n\nfrom ..model.doc import Doc\n\n\nclass RerankFunction(ABC):\n    \"\"\"Abstract base class for re-ranking search results.\n\n    Re-rankers refine the output of one or more vector queries by applying\n    a secondary scoring strategy. They are used in the ``query()`` method of\n    ``Collection`` via the ``reranker`` parameter.\n\n    Args:\n        topn (int, optional): Number of top documents to return after re-ranking.\n            Defaults to 10.\n        rerank_field (Optional[str], optional): Field name used as input for\n            re-ranking (e.g., document title or body). Defaults to None.\n\n    Note:\n        Subclasses must implement the ``rerank()`` method.\n    \"\"\"\n\n    def __init__(\n        self,\n        topn: int = 10,\n        rerank_field: Optional[str] = None,\n    ):\n        self._topn = topn\n        self._rerank_field = rerank_field\n\n    @property\n    def topn(self) -> int:\n        \"\"\"int: Number of top documents to return after re-ranking.\"\"\"\n        return self._topn\n\n    @property\n    def rerank_field(self) -> Optional[str]:\n        \"\"\"Optional[str]: Field name used as re-ranking input.\"\"\"\n        return self._rerank_field\n\n    @abstractmethod\n    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:\n        \"\"\"Re-rank documents from one or more vector queries.\n\n        Args:\n            query_results (dict[str, list[Doc]]): Mapping from vector field name\n                to list of retrieved documents (sorted by relevance).\n\n        Returns:\n            list[Doc]: Re-ranked list of documents (length ≤ ``topn``),\n                with updated ``score`` fields.\n        \"\"\"\n        ...\n"
  },
  {
    "path": "python/zvec/extension/sentence_transformer_embedding_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import ClassVar, Literal, Optional\n\nimport numpy as np\n\nfrom ..common.constants import TEXT, DenseVectorType, SparseVectorType\nfrom .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction\nfrom .sentence_transformer_function import SentenceTransformerFunctionBase\n\n\nclass DefaultLocalDenseEmbedding(\n    SentenceTransformerFunctionBase, DenseEmbeddingFunction[TEXT]\n):\n    \"\"\"Default local dense embedding using all-MiniLM-L6-v2 model.\n\n    This is the default implementation for dense text embedding that uses the\n    ``all-MiniLM-L6-v2`` model from Hugging Face by default. This model provides\n    a good balance between speed and quality for general-purpose text embedding.\n\n    The class provides text-to-vector dense embedding capabilities using the\n    sentence-transformers library. It supports models from Hugging Face Hub and\n    ModelScope, runs locally without API calls, and supports CPU/GPU acceleration.\n\n    The model produces 384-dimensional embeddings and is optimized for semantic\n    similarity tasks. It runs locally without requiring API keys.\n\n    Args:\n        model_source (Literal[\"huggingface\", \"modelscope\"], optional): Model source.\n            - ``\"huggingface\"``: Use Hugging Face Hub (default, for international users)\n            - ``\"modelscope\"``: Use ModelScope (recommended for users in China)\n            Defaults to ``\"huggingface\"``.\n        device (Optional[str], optional): Device to run the model on.\n            Options: ``\"cpu\"``, ``\"cuda\"``, ``\"mps\"`` (for Apple Silicon), or ``None``\n            for automatic detection. Defaults to ``None``.\n        normalize_embeddings (bool, optional): Whether to normalize embeddings to\n            unit length (L2 normalization). Useful for cosine similarity.\n            Defaults to ``True``.\n        batch_size (int, optional): Batch size for encoding. Defaults to ``32``.\n        **kwargs: Additional parameters for future extension.\n\n    Attributes:\n        dimension (int): Always 384 for both models.\n        model_name (str): \"all-MiniLM-L6-v2\" (HF) or \"iic/nlp_gte_sentence-embedding_chinese-small\" (MS).\n        model_source (str): The model source being used.\n        device (str): The device the model is running on.\n\n    Raises:\n        ValueError: If the model cannot be loaded or input is invalid.\n        TypeError: If input to ``embed()`` is not a string.\n        RuntimeError: If model inference fails.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires the ``sentence-transformers`` package:\n          ``pip install sentence-transformers``\n        - For ModelScope, also requires: ``pip install modelscope``\n        - First run downloads the model (~50-80MB) from chosen source\n        - Hugging Face cache: ``~/.cache/torch/sentence_transformers/``\n        - ModelScope cache: ``~/.cache/modelscope/hub/``\n        - No API keys or network required after initial download\n        - Inference speed: ~1000 sentences/sec on CPU, ~10000 on GPU\n\n        **For users in China:**\n\n        If you encounter Hugging Face access issues, use ModelScope instead:\n\n        .. code-block:: python\n\n            # Recommended for users in China\n            emb = DefaultLocalDenseEmbedding(model_source=\"modelscope\")\n\n        Alternatively, use Hugging Face mirror:\n\n        .. code-block:: bash\n\n            export HF_ENDPOINT=https://hf-mirror.com\n            # Then use default Hugging Face mode\n\n    Examples:\n        >>> # Basic usage with Hugging Face (default)\n        >>> from zvec.extension import DefaultLocalDenseEmbedding\n        >>>\n        >>> emb_func = DefaultLocalDenseEmbedding()\n        >>> vector = emb_func.embed(\"Hello, world!\")\n        >>> len(vector)\n        384\n        >>> isinstance(vector, list)\n        True\n\n        >>> # Recommended for users in China (uses ModelScope)\n        >>> emb_func = DefaultLocalDenseEmbedding(model_source=\"modelscope\")\n        >>> vector = emb_func.embed(\"你好，世界！\")  # Works well with Chinese text\n        >>> len(vector)\n        384\n\n        >>> # Alternative for China users: Use Hugging Face mirror\n        >>> import os\n        >>> os.environ[\"HF_ENDPOINT\"] = \"https://hf-mirror.com\"\n        >>> emb_func = DefaultLocalDenseEmbedding()  # Uses HF mirror\n        >>> vector = emb_func.embed(\"Hello, world!\")\n\n        >>> # Using GPU for faster inference\n        >>> emb_func = DefaultLocalDenseEmbedding(device=\"cuda\")\n        >>> vector = emb_func(\"Machine learning is fascinating\")\n        >>> # Normalized vector has unit length\n        >>> import numpy as np\n        >>> np.linalg.norm(vector)\n        1.0\n\n        >>> # Batch processing\n        >>> texts = [\"First text\", \"Second text\", \"Third text\"]\n        >>> vectors = [emb_func.embed(text) for text in texts]\n        >>> len(vectors)\n        3\n        >>> all(len(v) == 384 for v in vectors)\n        True\n\n        >>> # Semantic similarity\n        >>> v1 = emb_func.embed(\"The cat sits on the mat\")\n        >>> v2 = emb_func.embed(\"A feline rests on a rug\")\n        >>> v3 = emb_func.embed(\"Python programming\")\n        >>> similarity_high = np.dot(v1, v2)  # Similar sentences\n        >>> similarity_low = np.dot(v1, v3)   # Different topics\n        >>> similarity_high > similarity_low\n        True\n\n        >>> # Error handling\n        >>> try:\n        ...     emb_func.embed(\"\")  # Empty string\n        ... except ValueError as e:\n        ...     print(f\"Error: {e}\")\n        Error: Input text cannot be empty or whitespace only\n\n    See Also:\n        - ``DenseEmbeddingFunction``: Base class for dense embeddings\n        - ``DefaultLocalSparseEmbedding``: Sparse embedding with SPLADE\n        - ``QwenDenseEmbedding``: Alternative using Qwen API\n    \"\"\"\n\n    def __init__(\n        self,\n        model_source: Literal[\"huggingface\", \"modelscope\"] = \"huggingface\",\n        device: Optional[str] = None,\n        normalize_embeddings: bool = True,\n        batch_size: int = 32,\n        **kwargs,\n    ):\n        \"\"\"Initialize with all-MiniLM-L6-v2 model.\n\n        Args:\n            model_source (Literal[\"huggingface\", \"modelscope\"]): Model source.\n                Defaults to \"huggingface\".\n            device (Optional[str]): Target device (\"cpu\", \"cuda\", \"mps\", or None).\n                Defaults to None (automatic detection).\n            normalize_embeddings (bool): Whether to L2-normalize output vectors.\n                Defaults to True.\n            batch_size (int): Batch size for encoding. Defaults to 32.\n            **kwargs: Additional parameters for future extension.\n\n        Raises:\n            ImportError: If sentence-transformers or modelscope is not installed.\n            ValueError: If model cannot be loaded.\n        \"\"\"\n        # Use different models based on source\n        if model_source == \"modelscope\":\n            # Use Chinese-optimized model for ModelScope (better for Chinese text)\n            model_name = \"iic/nlp_gte_sentence-embedding_chinese-small\"\n        else:\n            model_name = \"all-MiniLM-L6-v2\"\n\n        # Initialize base class for model loading\n        SentenceTransformerFunctionBase.__init__(\n            self, model_name=model_name, model_source=model_source, device=device\n        )\n\n        self._normalize_embeddings = normalize_embeddings\n        self._batch_size = batch_size\n\n        # Load model and get dimension\n        model = self._get_model()\n        self._dimension = model.get_sentence_embedding_dimension()\n\n        # Store extra parameters\n        self._extra_params = kwargs\n\n    @property\n    def dimension(self) -> int:\n        \"\"\"int: The expected dimensionality of the embedding vector.\"\"\"\n        return self._dimension\n\n    @property\n    def extra_params(self) -> dict:\n        \"\"\"dict: Extra parameters for model-specific customization.\"\"\"\n        return self._extra_params\n\n    def __call__(self, input: str) -> DenseVectorType:\n        \"\"\"Make the embedding function callable.\"\"\"\n        return self.embed(input)\n\n    def embed(self, input: str) -> DenseVectorType:\n        \"\"\"Generate dense embedding vector for the input text.\n\n        This method uses the Sentence Transformer model to convert input text\n        into a dense vector representation. The model runs locally without\n        requiring API calls.\n\n        Args:\n            input (str): Input text string to embed. Must be non-empty after\n                stripping whitespace. Maximum length depends on the model used\n                (typically 128-512 tokens for most models).\n\n        Returns:\n            DenseVectorType: A list of floats representing the embedding vector.\n                Length equals ``self.dimension``. If ``normalize_embeddings=True``,\n                the vector has unit length. Example:\n                ``[0.123, -0.456, 0.789, ...]``\n\n        Raises:\n            TypeError: If ``input`` is not a string.\n            ValueError: If input is empty or whitespace-only.\n            RuntimeError: If model inference fails.\n\n        Examples:\n            >>> emb = DefaultLocalDenseEmbedding()\n            >>> vector = emb.embed(\"Natural language processing\")\n            >>> len(vector)\n            384\n            >>> isinstance(vector[0], float)\n            True\n\n            >>> # Normalized vectors have unit length\n            >>> import numpy as np\n            >>> emb = DefaultLocalDenseEmbedding(normalize_embeddings=True)\n            >>> vector = emb.embed(\"Test sentence\")\n            >>> np.linalg.norm(vector)\n            1.0\n\n            >>> # Error: empty input\n            >>> emb.embed(\"   \")\n            ValueError: Input text cannot be empty or whitespace only\n\n            >>> # Error: non-string input\n            >>> emb.embed(123)\n            TypeError: Expected 'input' to be str, got int\n\n            >>> # Semantic similarity example\n            >>> v1 = emb.embed(\"The cat sits on the mat\")\n            >>> v2 = emb.embed(\"A feline rests on a rug\")\n            >>> similarity = np.dot(v1, v2)  # High similarity due to semantic meaning\n            >>> similarity > 0.7\n            True\n\n        Note:\n            - First call may be slower due to model loading\n            - Subsequent calls are much faster as the model stays in memory\n            - For batch processing, consider encoding multiple texts together\n              (though this method handles single texts only)\n            - GPU acceleration provides 5-10x speedup over CPU\n        \"\"\"\n        if not isinstance(input, str):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        try:\n            model = self._get_model()\n            embedding = model.encode(\n                input,\n                convert_to_numpy=True,\n                normalize_embeddings=self._normalize_embeddings,\n                batch_size=self._batch_size,\n            )\n\n            # Convert numpy array to list\n            if isinstance(embedding, np.ndarray):\n                embedding_list = embedding.tolist()\n            else:\n                embedding_list = list(embedding)\n\n            # Validate dimension\n            if len(embedding_list) != self.dimension:\n                raise ValueError(\n                    f\"Dimension mismatch: expected {self.dimension}, \"\n                    f\"got {len(embedding_list)}\"\n                )\n\n            return embedding_list\n\n        except Exception as e:\n            if isinstance(e, (TypeError, ValueError)):\n                raise\n            raise RuntimeError(f\"Failed to generate embedding: {e!s}\") from e\n\n\nclass DefaultLocalSparseEmbedding(\n    SentenceTransformerFunctionBase, SparseEmbeddingFunction[TEXT]\n):\n    \"\"\"Default local sparse embedding using SPLADE model.\n\n    This class provides sparse vector embedding using the SPLADE (SParse Lexical\n    AnD Expansion) model. SPLADE generates sparse, interpretable representations\n    where each dimension corresponds to a vocabulary term with learned importance\n    weights. It's ideal for lexical matching, BM25-style retrieval, and hybrid\n    search scenarios.\n\n    The default model is ``naver/splade-cocondenser-ensembledistil``, which is\n    publicly available without authentication. It produces sparse vectors with\n    thousands of dimensions but only hundreds of non-zero values, making them\n    efficient for storage and retrieval while maintaining strong lexical matching.\n\n    **Model Caching:**\n\n    This class uses class-level caching to share the SPLADE model across all instances\n    with the same configuration (model_source, device). This significantly reduces\n    memory usage when creating multiple instances for different encoding types\n    (query vs document).\n\n    **Cache Management:**\n\n    The class provides methods to manage the model cache:\n\n    - ``clear_cache()``: Clear all cached models to free memory\n    - ``get_cache_info()``: Get information about cached models\n    - ``remove_from_cache(model_source, device)``: Remove a specific model from cache\n\n    .. note::\n        **Why not use splade-v3?**\n\n        The newer ``naver/splade-v3`` model is gated (requires access approval).\n        We use ``naver/splade-cocondenser-ensembledistil`` instead.\n\n        **To use splade-v3 (if you have access):**\n\n        1. Request access at https://huggingface.co/naver/splade-v3\n        2. Get your Hugging Face token from https://huggingface.co/settings/tokens\n        3. Set environment variable:\n\n           .. code-block:: bash\n\n               export HF_TOKEN=\"your_huggingface_token\"\n\n        4. Or login programmatically:\n\n           .. code-block:: python\n\n               from huggingface_hub import login\n               login(token=\"your_huggingface_token\")\n\n        5. To use a custom SPLADE model, you can subclass this class and override\n           the model_name in ``__init__``, or create your own implementation\n           inheriting from ``SentenceTransformerFunctionBase`` and\n           ``SparseEmbeddingFunction``.\n\n    Args:\n        model_source (Literal[\"huggingface\", \"modelscope\"], optional): Model source.\n            Defaults to ``\"huggingface\"``. ModelScope support may vary for SPLADE models.\n        device (Optional[str], optional): Device to run the model on.\n            Options: ``\"cpu\"``, ``\"cuda\"``, ``\"mps\"`` (for Apple Silicon), or ``None``\n            for automatic detection. Defaults to ``None``.\n        encoding_type (Literal[\"query\", \"document\"], optional): Encoding type.\n            - ``\"query\"``: Optimize for search queries (default)\n            - ``\"document\"``: Optimize for indexed documents\n        **kwargs: Additional parameters (currently unused, for future extension).\n\n    Attributes:\n        model_name (str): Model identifier.\n        model_source (str): The model source being used.\n        device (str): The device the model is running on.\n\n    Raises:\n        ValueError: If the model cannot be loaded or input is invalid.\n        TypeError: If input to ``embed()`` is not a string.\n        RuntimeError: If model inference fails.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires the ``sentence-transformers`` package:\n          ``pip install sentence-transformers``\n        - First run downloads the model (~100MB) from Hugging Face\n        - Cache location: ``~/.cache/torch/sentence_transformers/``\n        - No API keys or authentication required\n        - Sparse vectors have ~30k dimensions but only ~100-200 non-zero values\n        - Best combined with dense embeddings for hybrid retrieval\n\n        **SPLADE vs Dense Embeddings:**\n\n        - **Dense**: Continuous semantic vectors, good for semantic similarity\n        - **Sparse**: Lexical keyword-based, interpretable, good for exact matching\n        - **Hybrid**: Combine both for best retrieval performance\n\n    Examples:\n        >>> # Memory-efficient: both instances share the same model (~200MB)\n        >>> from zvec.extension import DefaultLocalSparseEmbedding\n        >>>\n        >>> # Query embedding\n        >>> query_emb = DefaultLocalSparseEmbedding(encoding_type=\"query\")\n        >>> query_vec = query_emb.embed(\"machine learning algorithms\")\n        >>> type(query_vec)\n        <class 'dict'>\n        >>> len(query_vec)  # Only non-zero dimensions\n        156\n\n        >>> # Document embedding (shares model with query_emb)\n        >>> doc_emb = DefaultLocalSparseEmbedding(encoding_type=\"document\")\n        >>> doc_vec = doc_emb.embed(\"Machine learning is a subset of AI\")\n        >>> # Total memory: ~200MB (not 400MB) thanks to model caching\n\n        >>> # Asymmetric retrieval example\n        >>> query_vec = query_emb.embed(\"what causes aging fast\")\n        >>> doc_vec = doc_emb.embed(\n        ...     \"UV-A light causes tanning, skin aging, and cataracts...\"\n        ... )\n        >>>\n        >>> # Calculate similarity (dot product for sparse vectors)\n        >>> similarity = sum(\n        ...     query_vec.get(k, 0) * doc_vec.get(k, 0)\n        ...     for k in set(query_vec) | set(doc_vec)\n        ... )\n\n        >>> # Batch processing\n        >>> queries = [\"query 1\", \"query 2\", \"query 3\"]\n        >>> query_vecs = [query_emb.embed(q) for q in queries]\n        >>>\n        >>> documents = [\"doc 1\", \"doc 2\", \"doc 3\"]\n        >>> doc_vecs = [doc_emb.embed(d) for d in documents]\n\n        >>> # Inspecting sparse dimensions (output is sorted by indices)\n        >>> query_vec = query_emb.embed(\"machine learning\")\n        >>> list(query_vec.items())[:5]  # First 5 dimensions (by index)\n        [(10, 0.45), (23, 0.87), (56, 0.32), (89, 1.12), (120, 0.65)]\n        >>>\n        >>> # Sort by weight to find most important terms\n        >>> sorted_by_weight = sorted(query_vec.items(), key=lambda x: x[1], reverse=True)\n        >>> top_5 = sorted_by_weight[:5]  # Top 5 most important terms\n        >>> top_5\n        [(1023, 1.45), (245, 1.23), (8901, 0.98), (5678, 0.87), (12034, 0.76)]\n\n        >>> # Using GPU for faster inference\n        >>> sparse_emb = DefaultLocalSparseEmbedding(device=\"cuda\")\n        >>> vector = sparse_emb.embed(\"natural language processing\")\n\n        >>> # Hybrid retrieval example (combining dense + sparse)\n        >>> from zvec.extension import DefaultDenseEmbedding\n        >>> dense_emb = DefaultDenseEmbedding()\n        >>> sparse_emb = DefaultLocalSparseEmbedding()\n        >>>\n        >>> query = \"deep learning neural networks\"\n        >>> dense_vec = dense_emb.embed(query)   # [0.1, -0.3, 0.5, ...]\n        >>> sparse_vec = sparse_emb.embed(query)  # {12: 0.8, 45: 1.2, ...}\n\n        >>> # Error handling\n        >>> try:\n        ...     sparse_emb.embed(\"\")  # Empty string\n        ... except ValueError as e:\n        ...     print(f\"Error: {e}\")\n        Error: Input text cannot be empty or whitespace only\n\n        >>> # Cache management\n        >>> # Check cache status\n        >>> info = DefaultLocalSparseEmbedding.get_cache_info()\n        >>> print(f\"Cached models: {info['cached_models']}\")\n        Cached models: 1\n        >>>\n        >>> # Clear cache to free memory\n        >>> DefaultLocalSparseEmbedding.clear_cache()\n        >>> info = DefaultLocalSparseEmbedding.get_cache_info()\n        >>> print(f\"Cached models: {info['cached_models']}\")\n        Cached models: 0\n        >>>\n        >>> # Remove specific model from cache\n        >>> query_emb = DefaultLocalSparseEmbedding()  # Creates CPU model\n        >>> cuda_emb = DefaultLocalSparseEmbedding(device=\"cuda\")  # Creates CUDA model\n        >>> info = DefaultLocalSparseEmbedding.get_cache_info()\n        >>> print(f\"Cached models: {info['cached_models']}\")\n        Cached models: 2\n        >>>\n        >>> # Remove only CPU model\n        >>> removed = DefaultLocalSparseEmbedding.remove_from_cache(device=None)\n        >>> print(f\"Removed: {removed}\")\n        True\n        >>> info = DefaultLocalSparseEmbedding.get_cache_info()\n        >>> print(f\"Cached models: {info['cached_models']}\")\n        Cached models: 1\n\n    See Also:\n        - ``SparseEmbeddingFunction``: Base class for sparse embeddings\n        - ``DefaultDenseEmbedding``: Dense embedding with all-MiniLM-L6-v2\n        - ``QwenDenseEmbedding``: Alternative using Qwen API\n\n    References:\n        - SPLADE Paper: https://arxiv.org/abs/2109.10086\n        - Model: https://huggingface.co/naver/splade-cocondenser-ensembledistil\n    \"\"\"\n\n    # Class-level model cache: {(model_name, model_source, device): model}\n    # Shared across all DefaultLocalSparseEmbedding instances to save memory\n    _model_cache: ClassVar[dict] = {}\n\n    @classmethod\n    def clear_cache(cls) -> None:\n        \"\"\"Clear all cached SPLADE models from memory.\n\n        This is useful for:\n        - Freeing memory when models are no longer needed\n        - Forcing a fresh model reload\n        - Testing and debugging\n                Examples:\n            >>> # Clear cache to free memory\n            >>> DefaultLocalSparseEmbedding.clear_cache()\n\n            >>> # Or in tests to ensure fresh model loading\n            >>> def test_something():\n            ...     DefaultLocalSparseEmbedding.clear_cache()\n            ...     emb = DefaultLocalSparseEmbedding()\n            ...     # Test with fresh model\n        \"\"\"\n        cls._model_cache.clear()\n\n    @classmethod\n    def get_cache_info(cls) -> dict:\n        \"\"\"Get information about currently cached models.\n\n        Returns:\n            dict: Dictionary with cache statistics:\n                - cached_models (int): Number of cached model instances\n                - cache_keys (list): List of cache keys (model_name, model_source, device)\n\n        Examples:\n            >>> info = DefaultLocalSparseEmbedding.get_cache_info()\n            >>> print(f\"Cached models: {info['cached_models']}\")\n            Cached models: 2\n            >>> print(f\"Cache keys: {info['cache_keys']}\")\n            Cache keys: [('naver/splade-cocondenser-ensembledistil', 'huggingface', None),\n                        ('naver/splade-cocondenser-ensembledistil', 'huggingface', 'cuda')]\n        \"\"\"\n        return {\n            \"cached_models\": len(cls._model_cache),\n            \"cache_keys\": list(cls._model_cache.keys()),\n        }\n\n    @classmethod\n    def remove_from_cache(\n        cls, model_source: str = \"huggingface\", device: Optional[str] = None\n    ) -> bool:\n        \"\"\"Remove a specific model from cache.\n\n        Args:\n            model_source (str): Model source (\"huggingface\" or \"modelscope\").\n                Defaults to \"huggingface\".\n            device (Optional[str]): Device identifier. Defaults to None.\n\n        Returns:\n            bool: True if model was found and removed, False otherwise.\n\n        Examples:\n            >>> # Remove CPU model from cache\n            >>> removed = DefaultLocalSparseEmbedding.remove_from_cache()\n            >>> print(f\"Removed: {removed}\")\n            True\n\n            >>> # Remove CUDA model from cache\n            >>> removed = DefaultLocalSparseEmbedding.remove_from_cache(device=\"cuda\")\n            >>> print(f\"Removed: {removed}\")\n            True\n        \"\"\"\n        model_name = \"naver/splade-cocondenser-ensembledistil\"\n        cache_key = (model_name, model_source, device)\n\n        if cache_key in cls._model_cache:\n            del cls._model_cache[cache_key]\n            return True\n        return False\n\n    def __init__(\n        self,\n        model_source: Literal[\"huggingface\", \"modelscope\"] = \"huggingface\",\n        device: Optional[str] = None,\n        encoding_type: Literal[\"query\", \"document\"] = \"query\",\n        **kwargs,\n    ):\n        \"\"\"Initialize with SPLADE model.\n\n        Args:\n            model_source (Literal[\"huggingface\", \"modelscope\"]): Model source.\n                Defaults to \"huggingface\".\n            device (Optional[str]): Target device (\"cpu\", \"cuda\", \"mps\", or None).\n                Defaults to None (automatic detection).\n            encoding_type (Literal[\"query\", \"document\"]): Encoding type for embeddings.\n                - \"query\": Optimize for search queries (default)\n                - \"document\": Optimize for indexed documents\n                This distinction is important for asymmetric retrieval tasks.\n            **kwargs: Additional parameters (reserved for future use).\n\n        Raises:\n            ImportError: If sentence-transformers is not installed.\n            ValueError: If model cannot be loaded.\n\n        Note:\n            Multiple instances with the same (model_source, device) configuration\n            will share the same underlying model to save memory. Different\n            instances can use different encoding_type settings while sharing\n            the model.\n\n            **Model Selection:**\n\n            Uses ``naver/splade-cocondenser-ensembledistil`` instead of the newer\n            ``naver/splade-v3`` because splade-v3 is a gated model requiring\n            Hugging Face authentication. The cocondenser-ensembledistil variant:\n\n            - Does not require authentication or API tokens\n            - Is immediately available for all users\n            - Provides comparable retrieval performance (~2% difference)\n            - Avoids \"Access to model is restricted\" errors\n\n            If you need splade-v3 and have obtained access, you can subclass\n            this class and override the model_name parameter.\n\n        Examples:\n            >>> # Both instances share the same model (saves memory)\n            >>> query_emb = DefaultLocalSparseEmbedding(encoding_type=\"query\")\n            >>> doc_emb = DefaultLocalSparseEmbedding(encoding_type=\"document\")\n            >>> # Only one model is loaded in memory\n        \"\"\"\n        # Use publicly available SPLADE model (no gated access required)\n        # Note: naver/splade-v3 requires authentication, so we use the\n        # cocondenser-ensembledistil variant which is publicly accessible\n        model_name = \"naver/splade-cocondenser-ensembledistil\"\n\n        # Initialize base class for model loading\n        SentenceTransformerFunctionBase.__init__(\n            self, model_name=model_name, model_source=model_source, device=device\n        )\n\n        self._encoding_type = encoding_type\n        self._extra_params = kwargs\n\n        # Create cache key for this model configuration\n        self._cache_key = (model_name, model_source, device)\n\n        # Load model to ensure it's available (will use cache if exists)\n        self._get_model()\n\n    @property\n    def extra_params(self) -> dict:\n        \"\"\"dict: Extra parameters for model-specific customization.\"\"\"\n        return self._extra_params\n\n    def __call__(self, input: str) -> SparseVectorType:\n        \"\"\"Make the embedding function callable.\"\"\"\n        return self.embed(input)\n\n    def embed(self, input: str) -> SparseVectorType:\n        \"\"\"Generate sparse embedding vector for the input text.\n\n        This method uses the SPLADE model to convert input text into a sparse\n        vector representation. The result is a dictionary where keys are dimension\n        indices and values are importance weights (only non-zero values included).\n\n        The embedding is optimized based on the ``encoding_type`` specified during\n        initialization: \"query\" for search queries or \"document\" for indexed content.\n\n        Args:\n            input (str): Input text string to embed. Must be non-empty after\n                stripping whitespace.\n\n        Returns:\n            SparseVectorType: A dictionary mapping dimension index to weight.\n                Only non-zero dimensions are included. The dictionary is sorted\n                by indices (keys) in ascending order for consistent output.\n                Example: ``{10: 0.5, 245: 0.8, 1023: 1.2, 5678: 0.5}``\n\n        Raises:\n            TypeError: If ``input`` is not a string.\n            ValueError: If input is empty or whitespace-only.\n            RuntimeError: If model inference fails.\n\n        Examples:\n            >>> # Query embedding\n            >>> query_emb = DefaultLocalSparseEmbedding(encoding_type=\"query\")\n            >>> query_vec = query_emb.embed(\"machine learning\")\n            >>> isinstance(query_vec, dict)\n            True\n\n        Note:\n            - First call may be slower due to model loading\n            - Subsequent calls are much faster as the model stays in memory\n            - GPU acceleration provides significant speedup\n            - Sparse vectors are memory-efficient (only store non-zero values)\n        \"\"\"\n        if not isinstance(input, str):\n            raise TypeError(f\"Expected 'input' to be str, got {type(input).__name__}\")\n\n        input = input.strip()\n        if not input:\n            raise ValueError(\"Input text cannot be empty or whitespace only\")\n\n        try:\n            model = self._get_model()\n\n            # Use appropriate encoding method based on type\n            if self._encoding_type == \"document\" and hasattr(model, \"encode_document\"):\n                # Use document encoding\n                sparse_matrix = model.encode_document([input])\n            elif hasattr(model, \"encode_query\"):\n                # Use query encoding (default)\n                sparse_matrix = model.encode_query([input])\n            else:\n                # Fallback: manual implementation for older sentence-transformers\n                return self._manual_sparse_encode(input)\n\n            # Convert sparse matrix to dictionary\n            # SPLADE returns shape [1, vocab_size] for single input\n\n            # Check if it's a sparse matrix (duck typing - has toarray method)\n            if hasattr(sparse_matrix, \"toarray\"):\n                # Sparse matrix (CSR/CSC/etc.) - convert to dense array\n                sparse_array = sparse_matrix[0].toarray().flatten()\n                sparse_dict = {\n                    int(idx): float(val)\n                    for idx, val in enumerate(sparse_array)\n                    if val > 0\n                }\n            else:\n                # Dense array format (numpy array or similar)\n                if isinstance(sparse_matrix, np.ndarray):\n                    sparse_array = sparse_matrix[0]\n                else:\n                    sparse_array = sparse_matrix\n\n                sparse_dict = {\n                    int(idx): float(val)\n                    for idx, val in enumerate(sparse_array)\n                    if val > 0\n                }\n\n            # Sort by indices (keys) to ensure consistent ordering\n            return dict(sorted(sparse_dict.items()))\n\n        except Exception as e:\n            if isinstance(e, (TypeError, ValueError)):\n                raise\n            raise RuntimeError(f\"Failed to generate sparse embedding: {e!s}\") from e\n\n    def _manual_sparse_encode(self, input: str) -> SparseVectorType:\n        \"\"\"Fallback manual SPLADE encoding for older sentence-transformers.\n\n        Args:\n            input (str): Input text to encode.\n\n        Returns:\n            SparseVectorType: Sparse vector as dictionary.\n        \"\"\"\n        import torch\n\n        model = self._get_model()\n\n        # Tokenize input\n        features = model.tokenize([input])\n\n        # Move to correct device\n        features = {k: v.to(model.device) for k, v in features.items()}\n\n        # Forward pass with no gradient\n        with torch.no_grad():\n            embeddings = model.forward(features)\n\n            # Get logits from model output\n            # SPLADE models typically output 'token_embeddings'\n            if isinstance(embeddings, dict) and \"token_embeddings\" in embeddings:\n                logits = embeddings[\"token_embeddings\"][0]  # First batch item\n            elif hasattr(embeddings, \"token_embeddings\"):\n                logits = embeddings.token_embeddings[0]\n            # Fallback: try to get first value\n            elif isinstance(embeddings, dict):\n                logits = next(iter(embeddings.values()))[0]\n            else:\n                logits = embeddings[0]\n\n            # Apply SPLADE activation: log(1 + relu(x))\n            relu_log = torch.log(1 + torch.relu(logits))\n\n            # Max pooling over token dimension (reduce to vocab size)\n            if relu_log.dim() > 1:\n                sparse_vec, _ = torch.max(relu_log, dim=0)\n            else:\n                sparse_vec = relu_log\n\n            # Convert to sparse dictionary (only non-zero values)\n            sparse_vec_np = sparse_vec.cpu().numpy()\n            sparse_dict = {\n                int(idx): float(val) for idx, val in enumerate(sparse_vec_np) if val > 0\n            }\n\n            # Sort by indices (keys) to ensure consistent ordering\n            return dict(sorted(sparse_dict.items()))\n\n    def _get_model(self):\n        \"\"\"Load or retrieve the SPLADE model from class-level cache.\n\n        Returns:\n            SentenceTransformer: The loaded SPLADE model instance.\n\n        Raises:\n            ImportError: If required packages are not installed.\n            ValueError: If model cannot be loaded.\n\n        Note:\n            Models are cached at class level and shared across all instances\n            with the same (model_name, model_source, device) configuration.\n            This allows memory-efficient usage when creating multiple instances\n            with different encoding_type settings.\n        \"\"\"\n        # Check class-level cache first\n        if self._cache_key in self._model_cache:\n            return self._model_cache[self._cache_key]\n\n        # Use parent class method to load model\n        model = super()._get_model()\n\n        # Cache the model at class level\n        self._model_cache[self._cache_key] = model\n\n        return model\n"
  },
  {
    "path": "python/zvec/extension/sentence_transformer_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import Literal, Optional\n\nfrom ..tool import require_module\n\n\nclass SentenceTransformerFunctionBase:\n    \"\"\"Base class for Sentence Transformer functions (both dense and sparse).\n\n    This base class provides common functionality for loading and managing\n    sentence-transformers models from Hugging Face or ModelScope. It supports\n    both dense models (e.g., all-MiniLM-L6-v2) and sparse models (e.g., SPLADE).\n\n    This class is not meant to be used directly. Use concrete implementations:\n    - ``SentenceTransformerEmbeddingFunction`` for dense embeddings\n    - ``SentenceTransformerSparseEmbeddingFunction`` for sparse embeddings\n    - ``DefaultDenseEmbedding`` for default dense embeddings\n    - ``DefaultSparseEmbedding`` for default sparse embeddings\n\n    Args:\n        model_name (str): Model identifier or local path.\n        model_source (Literal[\"huggingface\", \"modelscope\"]): Model source.\n        device (Optional[str]): Device to run the model on.\n\n    Note:\n        - This is an internal base class for code reuse\n        - Subclasses should inherit from appropriate Protocol (Dense/Sparse)\n        - Provides model loading and management functionality\n    \"\"\"\n\n    def __init__(\n        self,\n        model_name: str,\n        model_source: Literal[\"huggingface\", \"modelscope\"] = \"huggingface\",\n        device: Optional[str] = None,\n    ):\n        \"\"\"Initialize the base Sentence Transformer functionality.\n\n        Args:\n            model_name (str): Model identifier or local path.\n            model_source (Literal[\"huggingface\", \"modelscope\"]): Model source.\n            device (Optional[str]): Device to run the model on.\n\n        Raises:\n            ValueError: If model_source is invalid.\n        \"\"\"\n        # Validate model_source\n        if model_source not in (\"huggingface\", \"modelscope\"):\n            raise ValueError(\n                f\"Invalid model_source: '{model_source}'. \"\n                \"Must be 'huggingface' or 'modelscope'.\"\n            )\n\n        self._model_name = model_name\n        self._model_source = model_source\n        self._device = device\n        self._model = None\n\n    @property\n    def model_name(self) -> str:\n        \"\"\"str: The Sentence Transformer model name currently in use.\"\"\"\n        return self._model_name\n\n    @property\n    def model_source(self) -> str:\n        \"\"\"str: The model source being used (\"huggingface\" or \"modelscope\").\"\"\"\n        return self._model_source\n\n    @property\n    def device(self) -> str:\n        \"\"\"str: The device the model is running on.\"\"\"\n        model = self._get_model()\n        if model is not None:\n            return str(model.device)\n        return self._device or \"cpu\"\n\n    def _get_model(self):\n        \"\"\"Load or retrieve the Sentence Transformer model.\n\n        Returns:\n            SentenceTransformer or SparseEncoder: The loaded model instance.\n\n        Raises:\n            ImportError: If required packages are not installed.\n            ValueError: If model cannot be loaded.\n        \"\"\"\n        # Return cached model if exists\n        if self._model is not None:\n            return self._model\n\n        # Load model\n        try:\n            sentence_transformers = require_module(\"sentence_transformers\")\n\n            if self._model_source == \"modelscope\":\n                # Load from ModelScope\n                require_module(\"modelscope\")\n                from modelscope.hub.snapshot_download import snapshot_download\n\n                # Download model to cache\n                model_dir = snapshot_download(self._model_name)\n\n                # Load from local path\n                self._model = sentence_transformers.SentenceTransformer(\n                    model_dir, device=self._device, trust_remote_code=True\n                )\n            else:\n                # Load from Hugging Face (default)\n                self._model = sentence_transformers.SentenceTransformer(\n                    self._model_name, device=self._device, trust_remote_code=True\n                )\n\n            return self._model\n\n        except ImportError as e:\n            if \"modelscope\" in str(e) and self._model_source == \"modelscope\":\n                raise ImportError(\n                    \"ModelScope support requires the 'modelscope' package. \"\n                    \"Please install it with: pip install modelscope\"\n                ) from e\n            raise\n        except Exception as e:\n            raise ValueError(\n                f\"Failed to load Sentence Transformer model '{self._model_name}' \"\n                f\"from {self._model_source}: {e!s}\"\n            ) from e\n\n    def _is_sparse_model(self) -> bool:\n        \"\"\"Check if the loaded model is a sparse encoder (e.g., SPLADE).\n\n        Returns:\n            bool: True if model supports sparse encoding.\n        \"\"\"\n        model = self._get_model()\n        # Check if model has sparse encoding methods\n        return hasattr(model, \"encode_query\") or hasattr(model, \"encode_document\")\n"
  },
  {
    "path": "python/zvec/extension/sentence_transformer_rerank_function.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import Literal, Optional\n\nfrom ..model.doc import Doc\nfrom ..tool import require_module\nfrom .rerank_function import RerankFunction\nfrom .sentence_transformer_function import SentenceTransformerFunctionBase\n\n\nclass DefaultLocalReRanker(SentenceTransformerFunctionBase, RerankFunction):\n    \"\"\"Re-ranker using Sentence Transformer cross-encoder models for semantic re-ranking.\n\n    This re-ranker leverages pre-trained cross-encoder models to perform deep semantic\n    re-ranking of search results. It runs locally without API calls, supports GPU\n    acceleration, and works with models from Hugging Face or ModelScope.\n\n    Cross-encoder models evaluate query-document pairs jointly, providing more\n    accurate relevance scores than bi-encoder (embedding-based) similarity.\n\n    Args:\n        query (str): Query text for semantic re-ranking. **Required**.\n        topn (int, optional): Maximum number of documents to return after re-ranking.\n            Defaults to 10.\n        rerank_field (Optional[str], optional): Document field name to use as\n            re-ranking input text. **Required** (e.g., \"content\", \"title\", \"body\").\n        model_name (str, optional): Cross-encoder model identifier or local path.\n            Defaults to ``\"cross-encoder/ms-marco-MiniLM-L6-v2\"`` (MS MARCO MiniLM).\n            Common options:\n            - ``\"cross-encoder/ms-marco-MiniLM-L6-v2\"``: Lightweight, fast (~80MB, recommended)\n            - ``\"cross-encoder/ms-marco-MiniLM-L12-v2\"``: Better accuracy (~120MB)\n            - ``\"BAAI/bge-reranker-base\"``: BGE Reranker Base (~280MB)\n            - ``\"BAAI/bge-reranker-large\"``: BGE Reranker Large (highest quality, ~560MB)\n        model_source (Literal[\"huggingface\", \"modelscope\"], optional): Model source.\n            Defaults to ``\"huggingface\"``.\n            - ``\"huggingface\"``: Load from Hugging Face Hub\n            - ``\"modelscope\"``: Load from ModelScope (recommended for users in China)\n        device (Optional[str], optional): Device to run the model on.\n            Options: ``\"cpu\"``, ``\"cuda\"``, ``\"mps\"`` (for Apple Silicon), or ``None``\n            for automatic detection. Defaults to ``None``.\n        batch_size (int, optional): Batch size for processing query-document pairs.\n            Larger values speed up processing but use more memory. Defaults to ``32``.\n\n    Attributes:\n        query (str): The query text used for re-ranking.\n        topn (int): Maximum number of documents to return.\n        rerank_field (Optional[str]): Field name used for re-ranking input.\n        model_name (str): The cross-encoder model being used.\n        model_source (str): The model source (\"huggingface\" or \"modelscope\").\n        device (str): The device the model is running on.\n\n    Raises:\n        ValueError: If ``query`` is empty/None, ``rerank_field`` is None,\n            or model cannot be loaded.\n        TypeError: If input types are invalid.\n        RuntimeError: If model inference fails.\n\n    Note:\n        - Requires Python 3.10, 3.11, or 3.12\n        - Requires ``sentence-transformers`` package: ``pip install sentence-transformers``\n        - For ModelScope support, also requires: ``pip install modelscope``\n        - First run downloads the model (~80-560MB depending on model) from chosen source\n        - No API keys or network required after initial download\n        - Cross-encoders are slower than bi-encoders but more accurate\n        - GPU acceleration provides significant speedup (5-10x)\n\n        **MS MARCO MiniLM-L6-v2 Model (Default):**\n\n        The default model ``cross-encoder/ms-marco-MiniLM-L6-v2`` is a lightweight and\n        efficient cross-encoder trained on MS MARCO dataset. It provides:\n\n        - Fast inference speed (suitable for real-time applications)\n        - Small model size (~80MB, quick to download)\n        - Good balance between speed and accuracy\n        - Trained on 500K+ query-document pairs\n        - Public availability without authentication\n\n        **For users in China:**\n\n        If you encounter Hugging Face access issues, use ModelScope instead:\n\n        .. code-block:: python\n\n            # Recommended for users in China\n            reranker = SentenceTransformerReRanker(\n                query=\"机器学习算法\",\n                rerank_field=\"content\",\n                model_source=\"modelscope\"\n            )\n\n        Alternatively, use Hugging Face mirror:\n\n        .. code-block:: bash\n\n            export HF_ENDPOINT=https://hf-mirror.com\n\n    Examples:\n        >>> # Basic usage with default MS MARCO MiniLM model\n        >>> from zvec.extension import SentenceTransformerReRanker\n        >>>\n        >>> reranker = SentenceTransformerReRanker(\n        ...     query=\"machine learning algorithms\",\n        ...     topn=5,\n        ...     rerank_field=\"content\"\n        ... )\n        >>>\n        >>> # Use in collection.query()\n        >>> results = collection.query(\n        ...     data={\"vector_field\": query_vector},\n        ...     reranker=reranker,\n        ...     topk=20\n        ... )\n\n        >>> # Using ModelScope for users in China\n        >>> reranker = SentenceTransformerReRanker(\n        ...     query=\"深度学习\",\n        ...     topn=10,\n        ...     rerank_field=\"content\",\n        ...     model_source=\"modelscope\"\n        ... )\n\n        >>> # Using larger model for better quality\n        >>> reranker = SentenceTransformerReRanker(\n        ...     query=\"neural networks\",\n        ...     topn=5,\n        ...     rerank_field=\"content\",\n        ...     model_name=\"BAAI/bge-reranker-large\",\n        ...     device=\"cuda\",\n        ...     batch_size=64\n        ... )\n\n        >>> # Direct rerank call (for testing)\n        >>> query_results = {\n        ...     \"vector1\": [\n        ...         Doc(id=\"1\", score=0.9, fields={\"content\": \"Machine learning is...\"}),\n        ...         Doc(id=\"2\", score=0.8, fields={\"content\": \"Deep learning is...\"}),\n        ...     ]\n        ... }\n        >>> reranked = reranker.rerank(query_results)\n        >>> for doc in reranked:\n        ...     print(f\"ID: {doc.id}, Score: {doc.score:.4f}\")\n        ID: 2, Score: 0.9234\n        ID: 1, Score: 0.8567\n\n    See Also:\n        - ``RerankFunction``: Abstract base class for re-rankers\n        - ``QwenReRanker``: Re-ranker using Qwen API\n        - ``RrfReRanker``: Multi-vector re-ranker using RRF\n        - ``WeightedReRanker``: Multi-vector re-ranker using weighted scores\n\n    References:\n        - MS MARCO Cross-Encoder: https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2\n        - BGE Reranker: https://huggingface.co/BAAI/bge-reranker-base\n        - Cross-Encoder vs Bi-Encoder: https://www.sbert.net/examples/applications/cross-encoder/README.html\n    \"\"\"\n\n    def __init__(\n        self,\n        query: Optional[str] = None,\n        topn: int = 10,\n        rerank_field: Optional[str] = None,\n        model_name: str = \"cross-encoder/ms-marco-MiniLM-L6-v2\",\n        model_source: Literal[\"huggingface\", \"modelscope\"] = \"huggingface\",\n        device: Optional[str] = None,\n        batch_size: int = 32,\n    ):\n        \"\"\"Initialize SentenceTransformerReRanker with query and configuration.\n\n        Args:\n            query (Optional[str]): Query text for semantic matching. Required.\n            topn (int): Number of top results to return.\n            rerank_field (Optional[str]): Document field for re-ranking input.\n            model_name (str): Cross-encoder model identifier.\n            model_source (Literal[\"huggingface\", \"modelscope\"]): Model source.\n            device (Optional[str]): Target device (\"cpu\", \"cuda\", \"mps\", or None).\n            batch_size (int): Batch size for processing query-document pairs.\n\n        Raises:\n            ValueError: If query is empty or model cannot be loaded.\n        \"\"\"\n        # Initialize base class for model loading\n        SentenceTransformerFunctionBase.__init__(\n            self, model_name=model_name, model_source=model_source, device=device\n        )\n\n        # Initialize rerank function\n        RerankFunction.__init__(self, topn=topn, rerank_field=rerank_field)\n\n        # Validate query\n        if not query:\n            raise ValueError(\"Query is required for DefaultLocalReRanker\")\n        self._query = query\n        self._batch_size = batch_size\n\n        # Load and validate cross-encoder model\n        model = self._get_model()\n        if not hasattr(model, \"predict\"):\n            raise ValueError(\n                f\"Model '{model_name}' does not appear to be a cross-encoder model. \"\n                \"Cross-encoder models should have a 'predict' method.\"\n            )\n        self._model = model\n\n    def _get_model(self):\n        \"\"\"Load or retrieve the CrossEncoder model.\n\n        This overrides the base class method to load CrossEncoder instead of\n        SentenceTransformer, as reranking requires cross-encoder models.\n\n        Returns:\n            CrossEncoder: The loaded cross-encoder model instance.\n\n        Raises:\n            ImportError: If required packages are not installed.\n            ValueError: If model cannot be loaded.\n        \"\"\"\n        # Return cached model if exists\n        if self._model is not None:\n            return self._model\n\n        # Load cross-encoder model\n        try:\n            sentence_transformers = require_module(\"sentence_transformers\")\n\n            if self._model_source == \"modelscope\":\n                # Load from ModelScope\n                require_module(\"modelscope\")\n                from modelscope.hub.snapshot_download import snapshot_download\n\n                # Download model to cache\n                model_dir = snapshot_download(self._model_name)\n\n                # Load CrossEncoder from local path\n                model = sentence_transformers.CrossEncoder(\n                    model_dir, device=self._device\n                )\n            else:\n                # Load CrossEncoder from Hugging Face (default)\n                model = sentence_transformers.CrossEncoder(\n                    self._model_name, device=self._device\n                )\n\n            return model\n\n        except ImportError as e:\n            if \"modelscope\" in str(e) and self._model_source == \"modelscope\":\n                raise ImportError(\n                    \"ModelScope support requires the 'modelscope' package. \"\n                    \"Please install it with: pip install modelscope\"\n                ) from e\n            raise\n        except Exception as e:\n            raise ValueError(\n                f\"Failed to load CrossEncoder model '{self._model_name}' \"\n                f\"from {self._model_source}: {e!s}\"\n            ) from e\n\n    @property\n    def query(self) -> str:\n        \"\"\"str: Query text used for semantic re-ranking.\"\"\"\n        return self._query\n\n    @property\n    def batch_size(self) -> int:\n        \"\"\"int: Batch size for processing query-document pairs.\"\"\"\n        return self._batch_size\n\n    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:\n        \"\"\"Re-rank documents using Sentence Transformer cross-encoder model.\n\n        Evaluates each query-document pair using the cross-encoder model to compute\n        relevance scores. Documents are then sorted by these scores and the top-k\n        results are returned.\n\n        Args:\n            query_results (dict[str, list[Doc]]): Mapping from vector field names\n                to lists of retrieved documents. Documents from all fields are\n                deduplicated and re-ranked together.\n\n        Returns:\n            list[Doc]: Re-ranked documents (up to ``topn``) with updated ``score``\n                fields containing relevance scores from the cross-encoder model.\n\n        Raises:\n            ValueError: If no valid documents are found or model inference fails.\n\n        Note:\n            - Duplicate documents (same ID) across fields are processed once\n            - Documents with empty/missing ``rerank_field`` content are skipped\n            - Returned scores are logits from the cross-encoder model\n            - Higher scores indicate higher relevance\n            - Processing time is O(n) where n is the number of documents\n\n        Examples:\n            >>> reranker = SentenceTransformerReRanker(\n            ...     query=\"machine learning\",\n            ...     topn=3,\n            ...     rerank_field=\"content\"\n            ... )\n            >>> query_results = {\n            ...     \"vector1\": [\n            ...         Doc(id=\"1\", score=0.9, fields={\"content\": \"ML basics\"}),\n            ...         Doc(id=\"2\", score=0.8, fields={\"content\": \"DL tutorial\"}),\n            ...     ]\n            ... }\n            >>> reranked = reranker.rerank(query_results)\n            >>> len(reranked) <= 3\n            True\n        \"\"\"\n        if not query_results:\n            return []\n\n        # Collect and deduplicate documents\n        id_to_doc: dict[str, Doc] = {}\n        doc_ids: list[str] = []\n        contents: list[str] = []\n\n        for _, query_result in query_results.items():\n            for doc in query_result:\n                doc_id = doc.id\n                if doc_id in id_to_doc:\n                    continue\n\n                # Extract text content from specified field\n                field_value = doc.field(self.rerank_field)\n                rank_content = str(field_value).strip() if field_value else \"\"\n                if not rank_content:\n                    continue\n\n                id_to_doc[doc_id] = doc\n                doc_ids.append(doc_id)\n                contents.append(rank_content)\n\n        if not contents:\n            raise ValueError(\"No documents to rerank\")\n\n        try:\n            # Use standard cross-encoder predict method\n            pairs = [[self.query, content] for content in contents]\n            scores = self._model.predict(\n                pairs,\n                batch_size=self.batch_size,\n                show_progress_bar=False,\n                convert_to_numpy=True,\n            )\n\n            # Convert to float list if needed\n            if hasattr(scores, \"tolist\"):\n                scores = scores.tolist()\n            else:\n                scores = [float(s) for s in scores]\n\n        except Exception as e:\n            raise RuntimeError(f\"Failed to compute rerank scores: {e!s}\") from e\n\n        # Create scored documents\n        scored_docs = [\n            (doc_ids[i], id_to_doc[doc_ids[i]], scores[i]) for i in range(len(doc_ids))\n        ]\n\n        # Sort by score (descending) and take top-k\n        scored_docs.sort(key=lambda x: x[2], reverse=True)\n        top_scored_docs = scored_docs[: self.topn]\n\n        # Build result list with updated scores\n        results: list[Doc] = []\n        for _, doc, score in top_scored_docs:\n            new_doc = doc._replace(score=score)\n            results.append(new_doc)\n\n        return results\n"
  },
  {
    "path": "python/zvec/model/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom .collection import Collection\nfrom .doc import Doc\nfrom .param.vector_query import VectorQuery\nfrom .schema.collection_schema import CollectionSchema\nfrom .schema.field_schema import FieldSchema\n\n__all__ = [\"Collection\", \"CollectionSchema\", \"Doc\", \"FieldSchema\", \"VectorQuery\"]\n"
  },
  {
    "path": "python/zvec/model/collection.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import Optional, Union, overload\n\nfrom _zvec import _Collection\n\nfrom ..executor import QueryContext, QueryExecutorFactory\nfrom ..extension import ReRanker\nfrom ..typing import Status\nfrom .convert import convert_to_cpp_doc, convert_to_py_doc\nfrom .doc import Doc\nfrom .param import (\n    AddColumnOption,\n    AlterColumnOption,\n    CollectionOption,\n    FlatIndexParam,\n    HnswIndexParam,\n    IndexOption,\n    InvertIndexParam,\n    IVFIndexParam,\n    OptimizeOption,\n)\nfrom .param.vector_query import VectorQuery\nfrom .schema import CollectionSchema, CollectionStats, FieldSchema\n\n__all__ = [\"Collection\"]\n\n_VECTOR_INDEX_TYPES = (HnswIndexParam, IVFIndexParam, FlatIndexParam)\n\n\nclass Collection:\n    \"\"\"Represents an opened collection in Zvec.\n\n    A `Collection` provides methods for data definition (DDL), data manipulation (DML),\n    and querying (DQL). It is obtained via `create_and_open()` or `open()`.\n\n    This class is not meant to be instantiated directly; use factory functions instead.\n    \"\"\"\n\n    def __init__(self, obj: _Collection):\n        self._obj = obj\n        self._schema = None\n        self._querier = None\n\n    @classmethod\n    def _from_core(cls, core_collection: _Collection) -> Collection:\n        if not core_collection:\n            raise ValueError(\"Collection is None\")\n        inst = cls.__new__(cls)\n        inst._obj = core_collection\n        schema = CollectionSchema._from_core(core_collection.Schema())\n        inst._schema = schema\n        inst._querier = QueryExecutorFactory.create(schema)\n        return inst\n\n    @property\n    def path(self) -> str:\n        \"\"\"str: The filesystem path of the collection.\"\"\"\n        return self._obj.Path()\n\n    @property\n    def option(self) -> CollectionOption:\n        \"\"\"CollectionOption: The options used to open the collection.\"\"\"\n        return self._obj.Options()\n\n    @property\n    def schema(self) -> CollectionSchema:\n        \"\"\"CollectionSchema: The schema defining the structure of the collection.\"\"\"\n        return self._schema\n\n    @property\n    def stats(self) -> CollectionStats:\n        \"\"\"CollectionStats: Runtime statistics about the collection (e.g., doc count, size).\"\"\"\n        return self._obj.Stats()\n\n    # ========== Collection DDL Methods ==========\n    def destroy(self) -> None:\n        \"\"\"Permanently delete the collection from disk.\n\n        Warning:\n            This operation is irreversible. All data will be lost.\n        \"\"\"\n        self._obj.Destroy()\n\n    def flush(self) -> None:\n        \"\"\"Force all pending writes to disk.\n\n        Ensures durability of recent inserts/updates.\n        \"\"\"\n        self._obj.Flush()\n\n    # ========== Index DDL Methods ==========\n    def create_index(\n        self,\n        field_name: str,\n        index_param: Union[\n            HnswIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam\n        ],\n        option: IndexOption = IndexOption(),\n    ) -> None:\n        \"\"\"Create an index on a field.\n\n        Vector index types (HNSW, IVF, FLAT) can only be applied to vector fields.\n        Inverted index (`InvertIndexParam`) is for scalar fields.\n\n        Args:\n            field_name (str): Name of the field to index.\n            index_param (Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam]):\n                Index configuration.\n            option (Optional[IndexOption], optional): Index creation options.\n                Defaults to ``IndexOption()``.\n\n        Raises:\n            ValueError: If a vector index is applied to a non-vector field.\n        \"\"\"\n        if index_param in _VECTOR_INDEX_TYPES and not self.schema.vector(field_name):\n            supported_types = \", \".join(cls.__name__ for cls in _VECTOR_INDEX_TYPES)\n            raise ValueError(\n                f\"Cannot apply vector index to non-vector field '{field_name}'. \"\n                f\"The field must be of vector type to use index types like {supported_types}.\"\n            )\n        self._obj.CreateIndex(field_name, index_param, option)\n        self._schema = CollectionSchema._from_core(self._obj.Schema())\n\n    def drop_index(self, field_name: str) -> None:\n        \"\"\"Remove the index from a field.\n\n        Args:\n            field_name (str): Name of the indexed field.\n        \"\"\"\n        self._obj.DropIndex(field_name)\n        self._schema = CollectionSchema._from_core(self._obj.Schema())\n\n    def optimize(self, option: OptimizeOption = OptimizeOption()) -> None:\n        \"\"\"Optimize the collection (e.g., merge segments, rebuild index).\n\n        Args:\n            option (Optional[OptimizeOption], optional): Optimization options.\n                Defaults to ``OptimizeOption()``.\n        \"\"\"\n        self._obj.Optimize(option)\n\n    # ========== COLUMN DDL Methods ==========\n    def add_column(\n        self,\n        field_schema: FieldSchema,\n        expression: str = \"\",\n        option: AddColumnOption = AddColumnOption(),\n    ) -> None:\n        \"\"\"Add a new column to the collection.\n\n        The column is populated using the provided expression (e.g., SQL-like formula).\n\n        Args:\n            field_schema (FieldSchema): Schema definition for the new column.\n            expression (str): Expression to compute values for existing documents.\n            option (Optional[AddColumnOption], optional): Options for the operation.\n                Defaults to ``AddColumnOption()``.\n        \"\"\"\n        self._obj.AddColumn(field_schema._get_object(), expression, option)\n        self._schema = CollectionSchema._from_core(self._obj.Schema())\n\n    def drop_column(self, field_name: str) -> None:\n        \"\"\"Remove a column from the collection.\n\n        Args:\n            field_name (str): Name of the column to drop.\n        \"\"\"\n        self._obj.DropColumn(field_name)\n        self._schema = CollectionSchema._from_core(self._obj.Schema())\n\n    def alter_column(\n        self,\n        old_name: str,\n        new_name: Optional[str] = None,\n        field_schema: Optional[FieldSchema] = None,\n        option: AlterColumnOption = AlterColumnOption(),\n    ) -> None:\n        \"\"\"Rename a column, update its schema.\n\n        This method supports three atomic operations:\n          1. Rename only (when `field_schema` is None).\n          2. Modify schema only (when `new_name` is None or empty string).\n\n        Args:\n            old_name (str): The current name of the column to be altered.\n            new_name (Optional[str]): The new name for the column.\n                - If provided and non-empty, the column will be renamed.\n                - If `None` or empty string, no rename occurs.\n            field_schema (Optional[FieldSchema]): The new schema definition.\n                - If provided, the column's type, dimension, or other properties will be updated.\n                - If `None`, only renaming (if requested) is performed.\n            option (AlterColumnOption, optional): Options controlling the alteration behavior.\n                Defaults to ``AlterColumnOption()``.\n\n        **Limitation**: This operation **only supports scalar numeric columns**. such as:\n        - `DOUBLE`, `FLOAT`,\n        - `INT32`, `INT64`, `UINT32`, `UINT64`\n\n        Note:\n            - Schema modification may trigger data migration or index rebuild.\n\n        Examples:\n            >>> # Rename column only\n            >>> results = collection.alter_column(old_name=\"id\", new_name=\"doc_id\")\n\n            >>> # Modify schema only\n            >>> new_schema = FieldSchema(name=\"doc_id\", dtype=DataType.INT64)\n            >>> collection.alter_column(\"id\", field_schema=new_schema)\n        \"\"\"\n        self._obj.AlterColumn(\n            old_name,\n            new_name or \"\",\n            field_schema._get_object() if field_schema else None,\n            option,\n        )\n        self._schema = CollectionSchema._from_core(self._obj.Schema())\n\n    # ========== Collection DDL Methods ==========\n    @overload\n    def insert(self, docs: Doc) -> Status:\n        pass\n\n    @overload\n    def insert(self, docs: list[Doc]) -> list[Status]:\n        pass\n\n    def insert(self, docs: Union[Doc, list[Doc]]) -> Union[Status, list[Status]]:\n        \"\"\"Insert new documents into the collection.\n\n        Documents must have unique IDs and conform to the schema.\n\n        Args:\n            docs (Union[Doc, list[Doc]]): One or more documents to insert.\n\n        Returns:\n            Union[Status, list[Status]]: If a single Doc was given, returns its Status;\n            if a list was given, returns a list of Status objects.\n        \"\"\"\n        is_single = isinstance(docs, Doc)\n        doc_list = [docs] if is_single else docs\n        results = self._obj.Insert(\n            [convert_to_cpp_doc(doc, self.schema) for doc in doc_list]\n        )\n        return results[0] if is_single else results\n\n    @overload\n    def upsert(self, docs: Doc) -> Status:\n        pass\n\n    @overload\n    def upsert(self, docs: list[Doc]) -> list[Status]:\n        pass\n\n    def upsert(self, docs: Union[Doc, list[Doc]]) -> Union[Status, list[Status]]:\n        \"\"\"Insert new documents or update existing ones by ID.\n\n        Args:\n            docs (Union[Doc, list[Doc]]): Documents to upsert.\n\n        Returns:\n            Union[Status, list[Status]]: If a single Doc was given, returns its Status;\n            if a list was given, returns a list of Status objects.\n        \"\"\"\n        is_single = isinstance(docs, Doc)\n        doc_list = [docs] if is_single else docs\n        results = self._obj.Upsert(\n            [convert_to_cpp_doc(doc, self.schema) for doc in doc_list]\n        )\n        return results[0] if is_single else results\n\n    @overload\n    def update(self, docs: Doc) -> Status:\n        pass\n\n    @overload\n    def update(self, docs: list[Doc]) -> list[Status]:\n        pass\n\n    def update(self, docs: Union[Doc, list[Doc]]) -> Union[Status, list[Status]]:\n        \"\"\"Update existing documents by ID.\n\n        Only specified fields are updated; others remain unchanged.\n\n        Args:\n            docs (Union[Doc, list[Doc]]): Documents containing updated fields.\n\n        Returns:\n            Union[Status, list[Status]]: If a single Doc was given, returns its Status;\n            if a list was given, returns a list of Status objects.\n        \"\"\"\n        is_single = isinstance(docs, Doc)\n        doc_list = [docs] if is_single else docs\n        results = self._obj.Update(\n            [convert_to_cpp_doc(doc, self.schema) for doc in doc_list]\n        )\n        return results[0] if is_single else results\n\n    @overload\n    def delete(self, ids: str) -> Status:\n        pass\n\n    @overload\n    def delete(self, ids: list[str]) -> list[Status]:\n        pass\n\n    def delete(self, ids: Union[str, list[str]]) -> Union[Status, list[Status]]:\n        \"\"\"Delete documents by ID.\n\n        Args:\n            ids (Union[str, list[str]]): One or more document IDs to delete.\n\n        Returns:\n            Union[Status, list[Status]]: If a single id was given, returns its Status;\n            if a list was given, returns a list of Status objects.\n        \"\"\"\n        is_single = isinstance(ids, str)\n        id_list = [ids] if isinstance(ids, str) else ids\n        results = self._obj.Delete(id_list)\n        return results[0] if is_single else results\n\n    def delete_by_filter(self, filter: str) -> None:\n        \"\"\"Delete documents matching a filter expression.\n\n        Args:\n            filter (str): Boolean expression (e.g., ``\"age > 30\"``).\n        \"\"\"\n        self._obj.DeleteByFilter(filter)\n\n    # ========== Collection DQL-fetch Methods ==========\n    def fetch(self, ids: Union[str, list[str]]) -> dict[str, Doc]:\n        \"\"\"Retrieve documents by ID.\n\n        Args:\n            ids (Union[str, list[str]]): Document IDs to fetch.\n\n        Returns:\n            dict[str, Doc]: Mapping from ID to document. Missing IDs are omitted.\n        \"\"\"\n        ids = [ids] if isinstance(ids, str) else ids\n        docs = self._obj.Fetch(ids)\n        return {\n            doc_id: py_doc\n            for doc_id, core_doc in docs.items()\n            if (py_doc := convert_to_py_doc(core_doc, self.schema)) is not None\n        }\n\n    # ========== Collection DQL-Query Methods ==========\n\n    def query(\n        self,\n        vectors: Optional[Union[VectorQuery, list[VectorQuery]]] = None,\n        *,\n        topk: int = 10,\n        filter: Optional[str] = None,\n        include_vector: bool = False,\n        output_fields: Optional[list[str]] = None,\n        reranker: Optional[ReRanker] = None,\n    ) -> list[Doc]:\n        \"\"\"Perform vector similarity search with optional filtering and re-ranking.\n\n        At least one `VectorQuery` must be provided.\n\n        Args:\n            vectors (Optional[Union[VectorQuery, list[VectorQuery]]], optional):\n                One or more vector queries. Defaults to None.\n            topk (int, optional): Number of nearest neighbors to return.\n                Defaults to 10.\n            filter (Optional[str], optional): Boolean expression to pre-filter candidates.\n                Defaults to None.\n            include_vector (bool, optional): Whether to include vector data in results.\n                Defaults to False.\n            output_fields (Optional[list[str]], optional): Scalar fields to include.\n                If None, all fields are returned. Defaults to None.\n            reranker (Optional[ReRanker], optional): Re-ranker to refine results.\n                Defaults to None.\n\n        Returns:\n            list[Doc]: Top-k matching documents, sorted by relevance score.\n\n        Examples:\n            >>> from zvec import VectorQuery\n            >>> results = collection.query(\n            ...     vectors=VectorQuery(\"embedding\", vector=[0.1, 0.2]),\n            ...     topk=5,\n            ...     filter=\"category == 'tech'\",\n            ...     output_fields=[\"title\", \"url\"]\n            ... )\n        \"\"\"\n        ctx = QueryContext(\n            topk=topk,\n            filter=filter,\n            queries=[vectors] if isinstance(vectors, VectorQuery) else vectors,\n            include_vector=include_vector,\n            output_fields=output_fields,\n            reranker=reranker,\n        )\n        return self._querier.execute(ctx, self._obj)\n"
  },
  {
    "path": "python/zvec/model/convert.py",
    "content": "# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom _zvec import _Doc\n\nfrom .doc import Doc\nfrom .schema import CollectionSchema\n\n\ndef convert_to_cpp_doc(doc: Doc, collection_schema: CollectionSchema) -> _Doc:\n    if not doc or not collection_schema:\n        return None\n\n    _doc = _Doc()\n\n    # set pk\n    _doc.set_pk(doc.id)\n\n    # set scalar fields\n    for k, v in doc.fields.items():\n        field_schema = collection_schema.field(k)\n        if not field_schema:\n            raise ValueError(\n                f\"schema validate failed: {k} not found in collection schema\"\n            )\n        _doc.set_any(k, field_schema._get_object(), v)\n\n    # set vector fields\n    for k, v in doc.vectors.items():\n        vector_schema = collection_schema.vector(k)\n        if not vector_schema:\n            raise ValueError(\n                f\"schema validate failed: {k} not found in collection schema\"\n            )\n        _doc.set_any(k, vector_schema._get_object(), v)\n    return _doc\n\n\ndef convert_to_py_doc(doc: _Doc, collection_schema: CollectionSchema) -> Doc:\n    if not doc or not collection_schema:\n        return None\n\n    data_tuple = doc.get_all(collection_schema._get_object())\n    return Doc._from_tuple(data_tuple)\n"
  },
  {
    "path": "python/zvec/model/doc.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport json\nfrom typing import Any, Optional\n\nfrom ..common import VectorType\n\n__all__ = [\n    \"Doc\",\n]\n\n\nclass Doc:\n    \"\"\"Represents a retrieved document with optional metadata, fields, and vectors.\n\n    This immutable data class encapsulates the result of a search or retrieval\n    operation. It includes the document ID, relevance score (if applicable),\n    scalar fields, and vector embeddings.\n\n    During initialization, any `numpy.ndarray` in `vectors` is automatically\n    converted to a plain Python list for JSON serialization and immutability.\n\n    Attributes:\n        id (str): Unique identifier of the document.\n        score (Optional[float], optional): Relevance score from search.\n            Defaults to None.\n        vectors (Optional[dict[str, VectorType]], optional): Named vector\n            embeddings associated with the document. Values are converted to\n            lists if originally `np.ndarray`. Defaults to None.\n        fields (Optional[dict[str, Any]], optional): Scalar metadata fields\n            (e.g., title, timestamp). Defaults to None.\n\n    Examples:\n        >>> import numpy as np\n        >>> import zvec\n        >>> doc = zvec.Doc(\n        ...     id=\"doc1\",\n        ...     score=0.95,\n        ...     vectors={\"emb\": np.array([0.1, 0.2, 0.3])},\n        ...     fields={\"title\": \"Hello World\"}\n        ... )\n        >>> print(doc.vector(\"emb\"))\n        [0.1, 0.2, 0.3]\n        >>> print(doc.has_field(\"title\"))\n        True\n    \"\"\"\n\n    __slots__ = (\"id\", \"score\", \"vectors\", \"fields\")\n\n    def __init__(\n        self,\n        id: str,\n        score: Optional[float] = None,\n        vectors: Optional[dict[str, VectorType]] = None,\n        fields: Optional[dict[str, Any]] = None,\n    ):\n        self.id = id\n        self.score = score\n        self.vectors = vectors or {}\n        self.fields = fields or {}\n\n    def has_field(self, name: str) -> bool:\n        \"\"\"Check if the document contains a scalar field with the given name.\n\n        Args:\n            name (str): Name of the field to check.\n\n        Returns:\n            bool: True if the field exists, False otherwise.\n        \"\"\"\n        return name in self.fields\n\n    def has_vector(self, name: str) -> bool:\n        \"\"\"Check if the document contains a vector with the given name.\n\n        Args:\n            name (str): Name of the vector to check.\n\n        Returns:\n            bool: True if the vector exists, False otherwise.\n        \"\"\"\n        return name in self.vectors\n\n    def vector(self, name: str):\n        \"\"\"Get a vector by name.\n\n        Args:\n            name (str): Name of the vector.\n\n        Returns:\n            Any: The vector (as a list) if it exists, otherwise None.\n        \"\"\"\n        return self.vectors and self.vectors.get(name)\n\n    def field(self, name: str):\n        \"\"\"Get a scalar field by name.\n\n        Args:\n            name (str): Name of the field.\n\n        Returns:\n            Any: The field value if it exists, otherwise None.\n        \"\"\"\n        return self.fields and self.fields.get(name)\n\n    def vector_names(self) -> list[str]:\n        \"\"\"Get the list of all vector names in this document.\n\n        Returns:\n            list[str]: A list of vector field names. Empty if no vectors.\n        \"\"\"\n        return [] if not self.vectors else list(self.vectors.keys())\n\n    def field_names(self) -> list[str]:\n        \"\"\"Get the list of all scalar field names in this document.\n\n        Returns:\n            list[str]: A list of field names. Empty if no fields.\n        \"\"\"\n        return [] if not self.fields else list(self.fields.keys())\n\n    def __repr__(self) -> str:\n        try:\n            schema = {\n                \"id\": self.id,\n                \"score\": self.score,\n                \"fields\": self.fields,\n                \"vectors\": self.vectors,\n            }\n            return json.dumps(schema, indent=2, ensure_ascii=False)\n        except Exception as e:\n            return f\"<Doc error during repr: {e}>\"\n\n    def _replace(self, **changes):\n        new_tuple = (\n            changes.get(\"id\", self.id),\n            changes.get(\"score\", self.score),\n            changes.get(\"fields\", self.fields.copy() if self.fields else None),\n            changes.get(\"vectors\", self.vectors.copy() if self.vectors else None),\n        )\n        return type(self)._from_tuple(new_tuple)\n\n    @classmethod\n    def _from_tuple(\n        cls, data_tuple: tuple[str, float, dict[str, Any], dict[str, VectorType]]\n    ):\n        obj = object.__new__(cls)\n        obj.id = data_tuple[0]\n        obj.score = data_tuple[1]\n        obj.fields = data_tuple[2] or {}\n\n        vectors = data_tuple[3]\n        if vectors is not None:\n            obj.vectors = {\n                name: (vec.tolist() if hasattr(vec, \"tolist\") else vec)\n                for name, vec in vectors.items()\n            }\n        else:\n            obj.vectors = {}\n        return obj\n"
  },
  {
    "path": "python/zvec/model/param/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom _zvec.param import (\n    AddColumnOption,\n    AlterColumnOption,\n    CollectionOption,\n    FlatIndexParam,\n    HnswIndexParam,\n    HnswQueryParam,\n    HnswRabitqIndexParam,\n    HnswRabitqQueryParam,\n    IndexOption,\n    InvertIndexParam,\n    IVFIndexParam,\n    IVFQueryParam,\n    OptimizeOption,\n)\n\n__all__ = [\n    \"AddColumnOption\",\n    \"AlterColumnOption\",\n    \"CollectionOption\",\n    \"FlatIndexParam\",\n    \"HnswIndexParam\",\n    \"HnswQueryParam\",\n    \"HnswRabitqIndexParam\",\n    \"HnswRabitqQueryParam\",\n    \"IVFIndexParam\",\n    \"IVFQueryParam\",\n    \"IndexOption\",\n    \"InvertIndexParam\",\n    \"OptimizeOption\",\n]\n"
  },
  {
    "path": "python/zvec/model/param/__init__.pyi",
    "content": "\"\"\"\nThis module contains the params of Zvec\n\"\"\"\n\nfrom __future__ import annotations\n\nimport collections\nimport typing\n\nimport _zvec.typing\n\n__all__: list[str] = [\n    \"AddColumnOption\",\n    \"AlterColumnOption\",\n    \"CollectionOption\",\n    \"FlatIndexParam\",\n    \"HnswIndexParam\",\n    \"HnswQueryParam\",\n    \"HnswRabitqIndexParam\",\n    \"HnswRabitqQueryParam\",\n    \"IVFIndexParam\",\n    \"IVFQueryParam\",\n    \"IndexOption\",\n    \"IndexParam\",\n    \"InvertIndexParam\",\n    \"OptimizeOption\",\n    \"QueryParam\",\n    \"SegmentOption\",\n    \"VectorIndexParam\",\n]\n\nclass AddColumnOption:\n    \"\"\"\n\n    Options for adding a new column to a collection.\n\n    Attributes:\n        concurrency (int): Number of threads to use when backfilling data\n            for the new column. If 0, auto-detect is used. Default is 0.\n\n    Examples:\n        >>> opt = AddColumnOption(concurrency=1)\n        >>> print(opt.concurrency)\n        1\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:\n        \"\"\"\n        Constructs an AddColumnOption instance.\n\n        Args:\n            concurrency (int, optional): Number of threads for data backfill.\n                0 means auto-detect. Defaults to 0.\n        \"\"\"\n\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def concurrency(self) -> int:\n        \"\"\"\n        int: Number of threads used when adding a column (0 = auto).\n        \"\"\"\n\nclass AlterColumnOption:\n    \"\"\"\n\n    Options for altering an existing column (e.g., changing index settings).\n\n    Attributes:\n        concurrency (int): Number of threads to use during the alteration process.\n            If 0, the system will choose an optimal value automatically.\n            Default is 0.\n\n    Examples:\n        >>> opt = AlterColumnOption(concurrency=1)\n        >>> print(opt.concurrency)\n        1\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:\n        \"\"\"\n        Constructs an AlterColumnOption instance.\n\n        Args:\n            concurrency (int, optional): Number of threads for column alteration.\n                0 means auto-detect. Defaults to 0.\n        \"\"\"\n\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def concurrency(self) -> int:\n        \"\"\"\n        int: Number of threads used when altering a column (0 = auto).\n        \"\"\"\n\nclass CollectionOption:\n    \"\"\"\n\n    Options for opening or creating a collection.\n\n    Attributes:\n        read_only (bool): Whether the collection is opened in read-only mode.\n            Default is False.\n        enable_mmap (bool): Whether to use memory-mapped I/O for data files.\n            Default is True.\n\n    Examples:\n        >>> opt = CollectionOption(read_only=True, enable_mmap=False)\n        >>> print(opt.read_only)\n        True\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(self, read_only: bool = False, enable_mmap: bool = True) -> None:\n        \"\"\"\n        Constructs a CollectionOption instance.\n\n        Args:\n            read_only (bool, optional): Open collection in read-only mode.\n                Defaults to False.\n            enable_mmap (bool, optional): Enable memory-mapped I/O.\n                Defaults to True.\n        \"\"\"\n\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def enable_mmap(self) -> bool: ...\n    @property\n    def read_only(self) -> bool: ...\n\nclass FlatIndexParam(VectorIndexParam):\n    \"\"\"\n\n    Parameters for configuring a flat (brute-force) index.\n\n    A flat index performs exact nearest neighbor search by comparing the query\n    vector against all vectors in the collection. It is simple, accurate, and\n    suitable for small to medium datasets or as a baseline.\n\n    Attributes:\n        metric_type (MetricType): Distance metric used for similarity computation.\n            Default is ``MetricType.IP`` (inner product).\n        quantize_type (QuantizeType): Optional quantization type for vector\n            compression (e.g., FP16, INT8). Use ``QuantizeType.UNDEFINED`` to\n            disable quantization. Default is ``QuantizeType.UNDEFINED``.\n\n    Examples:\n        >>> from zvec.typing import MetricType, QuantizeType\n        >>> params = FlatIndexParam(\n        ...     metric_type=MetricType.L2,\n        ...     quantize_type=QuantizeType.FP16\n        ... )\n        >>> print(params)\n        {'metric_type': 'L2', 'quantize_type': 'FP16'}\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(\n        self,\n        metric_type: _zvec.typing.MetricType = ...,\n        quantize_type: _zvec.typing.QuantizeType = ...,\n    ) -> None:\n        \"\"\"\n        Constructs a FlatIndexParam instance.\n\n        Args:\n            metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.\n            quantize_type (QuantizeType, optional): Vector quantization type.\n                Defaults to QuantizeType.UNDEFINED (no quantization).\n        \"\"\"\n\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def to_dict(self) -> dict:\n        \"\"\"\n        Convert to dictionary with all fields\n        \"\"\"\n\nclass HnswIndexParam(VectorIndexParam):\n    \"\"\"\n\n    Parameters for configuring an HNSW (Hierarchical Navigable Small World) index.\n\n    HNSW is a graph-based approximate nearest neighbor search index. This class\n    encapsulates its construction hyperparameters.\n\n    Attributes:\n        metric_type (MetricType): Distance metric used for similarity computation.\n            Default is ``MetricType.IP`` (inner product).\n        m (int): Number of bi-directional links created for every new element\n            during construction. Higher values improve accuracy but increase\n            memory usage and construction time. Default is 50.\n        ef_construction (int): Size of the dynamic candidate list for nearest\n            neighbors during index construction. Larger values yield better\n            graph quality at the cost of slower build time. Default is 500.\n        quantize_type (QuantizeType): Optional quantization type for vector\n            compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to\n            disable quantization.\n\n    Examples:\n        >>> from zvec.typing import MetricType, QuantizeType\n        >>> params = HnswIndexParam(\n        ...     metric_type=MetricType.COSINE,\n        ...     m=16,\n        ...     ef_construction=200,\n        ...     quantize_type=QuantizeType.INT8\n        ... )\n        >>> print(params)\n        {'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(\n        self,\n        metric_type: _zvec.typing.MetricType = ...,\n        m: typing.SupportsInt = 50,\n        ef_construction: typing.SupportsInt = 500,\n        quantize_type: _zvec.typing.QuantizeType = ...,\n    ) -> None: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def to_dict(self) -> dict:\n        \"\"\"\n        Convert to dictionary with all fields\n        \"\"\"\n\n    @property\n    def ef_construction(self) -> int:\n        \"\"\"\n        int: Candidate list size during index construction.\n        \"\"\"\n\n    @property\n    def m(self) -> int:\n        \"\"\"\n        int: Maximum number of neighbors per node in upper layers.\n        \"\"\"\n\nclass HnswQueryParam(QueryParam):\n    \"\"\"\n\n    Query parameters for HNSW (Hierarchical Navigable Small World) index.\n\n    Controls the trade-off between search speed and accuracy via the `ef` parameter.\n\n    Attributes:\n        type (IndexType): Always ``IndexType.HNSW``.\n        ef (int): Size of the dynamic candidate list during search.\n            Larger values improve recall but slow down search.\n            Default is 300.\n        radius (float): Search radius for range queries. Default is 0.0.\n        is_linear (bool): Force linear search. Default is False.\n        is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n\n    Examples:\n        >>> params = HnswQueryParam(ef=300)\n        >>> print(params.ef)\n        300\n        >>> print(params.to_dict() if hasattr(params, 'to_dict') else params)\n        {\"type\":\"HNSW\", \"ef\":300}\n    \"\"\"\n    def __getstate__(self) -> tuple: ...\n    def __init__(\n        self,\n        ef: typing.SupportsInt = 300,\n        radius: typing.SupportsFloat = 0.0,\n        is_linear: bool = False,\n        is_using_refiner: bool = False,\n    ) -> None:\n        \"\"\"\n        Constructs an HnswQueryParam instance.\n\n        Args:\n            ef (int, optional): Search-time candidate list size.\n                Higher values improve accuracy. Defaults to 300.\n            radius (float, optional): Search radius for range queries. Default is 0.0.\n            is_linear (bool, optional): Force linear search. Default is False.\n            is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n        \"\"\"\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def ef(self) -> int:\n        \"\"\"\n        int: Size of the dynamic candidate list during HNSW search.\n        \"\"\"\n\nclass HnswRabitqIndexParam(VectorIndexParam):\n    \"\"\"\n\n    Parameters for configuring an HNSW (Hierarchical Navigable Small World) index with RabitQ quantization.\n\n    HNSW is a graph-based approximate nearest neighbor search index. RabitQ is a\n    quantization method that provides high compression with minimal accuracy loss.\n\n    Attributes:\n        metric_type (MetricType): Distance metric used for similarity computation.\n            Default is ``MetricType.IP`` (inner product).\n        total_bits (int): Total bits for RabitQ quantization. Default is 7.\n        num_clusters (int): Number of clusters for RabitQ. Default is 16.\n        m (int): Number of bi-directional links created for every new element\n            during construction. Higher values improve accuracy but increase\n            memory usage and construction time. Default is 50.\n        ef_construction (int): Size of the dynamic candidate list for nearest\n            neighbors during index construction. Larger values yield better\n            graph quality at the cost of slower build time. Default is 500.\n        sample_count (int): Sample count for RabitQ training. Default is 0.\n\n    Examples:\n        >>> from zvec.typing import MetricType\n        >>> params = HnswRabitqIndexParam(\n        ...     metric_type=MetricType.COSINE,\n        ...     total_bits=8,\n        ...     num_clusters=256,\n        ...     m=16,\n        ...     ef_construction=200,\n        ...     sample_count=10000\n        ... )\n        >>> print(params)\n        {'metric_type': 'COSINE', 'total_bits': 8, 'num_clusters': 256, 'm': 16, 'ef_construction': 200, 'sample_count': 10000}\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(\n        self,\n        metric_type: _zvec.typing.MetricType = ...,\n        total_bits: typing.SupportsInt = 7,\n        num_clusters: typing.SupportsInt = 16,\n        m: typing.SupportsInt = 50,\n        ef_construction: typing.SupportsInt = 500,\n        sample_count: typing.SupportsInt = 0,\n    ) -> None: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def to_dict(self) -> dict:\n        \"\"\"\n        Convert to dictionary with all fields\n        \"\"\"\n\n    @property\n    def ef_construction(self) -> int:\n        \"\"\"\n        int: Candidate list size during index construction.\n        \"\"\"\n\n    @property\n    def m(self) -> int:\n        \"\"\"\n        int: Maximum number of neighbors per node.\n        \"\"\"\n\n    @property\n    def total_bits(self) -> int:\n        \"\"\"\n        int: Total bits for RabitQ quantization.\n        \"\"\"\n\n    @property\n    def num_clusters(self) -> int:\n        \"\"\"\n        int: Number of clusters for RabitQ.\n        \"\"\"\n\n    @property\n    def sample_count(self) -> int:\n        \"\"\"\n        int: Sample count for RabitQ training.\n        \"\"\"\n\nclass HnswRabitqQueryParam(QueryParam):\n    \"\"\"\n\n    Query parameters for HNSW index with RabitQ quantization.\n\n    Controls the trade-off between search speed and accuracy via the `ef` parameter.\n\n    Attributes:\n        type (IndexType): Always ``IndexType.HNSW_RABITQ``.\n        ef (int): Size of the dynamic candidate list during search.\n            Larger values improve recall but slow down search.\n            Default is 300.\n        radius (float): Search radius for range queries. Default is 0.0.\n        is_linear (bool): Force linear search. Default is False.\n        is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n\n    Examples:\n        >>> params = HnswRabitqQueryParam(ef=300)\n        >>> print(params.ef)\n        300\n    \"\"\"\n    def __getstate__(self) -> tuple: ...\n    def __init__(\n        self,\n        ef: typing.SupportsInt = 300,\n        radius: typing.SupportsFloat = 0.0,\n        is_linear: bool = False,\n        is_using_refiner: bool = False,\n    ) -> None:\n        \"\"\"\n        Constructs an HnswRabitqQueryParam instance.\n\n        Args:\n            ef (int, optional): Search-time candidate list size.\n                Higher values improve accuracy. Defaults to 300.\n            radius (float, optional): Search radius for range queries. Default is 0.0.\n            is_linear (bool, optional): Force linear search. Default is False.\n            is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n        \"\"\"\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def ef(self) -> int:\n        \"\"\"\n        int: Size of the dynamic candidate list during HNSW search.\n        \"\"\"\n\nclass IVFIndexParam(VectorIndexParam):\n    \"\"\"\n\n    Parameters for configuring an IVF (Inverted File Index) index.\n\n    IVF partitions the vector space into clusters (inverted lists). At query time,\n    only a subset of clusters is searched, providing a trade-off between speed\n    and accuracy.\n\n    Attributes:\n        metric_type (MetricType): Distance metric used for similarity computation.\n            Default is ``MetricType.IP`` (inner product).\n        n_list (int): Number of clusters (inverted lists) to partition the dataset into.\n            If set to 0, the system will auto-select a reasonable value based on data size.\n            Default is 0 (auto).\n        n_iters (int): Number of iterations for k-means clustering during index training.\n            Higher values yield more stable centroids. Default is 10.\n        use_soar (bool): Whether to enable SOAR (Scalable Optimized Adaptive Routing)\n            for improved IVF search performance. Default is False.\n        quantize_type (QuantizeType): Optional quantization type for vector\n            compression (e.g., FP16, INT8). Default is ``QuantizeType.UNDEFINED``.\n\n    Examples:\n        >>> from zvec.typing import MetricType, QuantizeType\n        >>> params = IVFIndexParam(\n        ...     metric_type=MetricType.COSINE,\n        ...     n_list=100,\n        ...     n_iters=15,\n        ...     use_soar=True,\n        ...     quantize_type=QuantizeType.INT8\n        ... )\n        >>> print(params.n_list)\n        100\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(\n        self,\n        metric_type: _zvec.typing.MetricType = ...,\n        n_list: typing.SupportsInt = 0,\n        n_iters: typing.SupportsInt = 10,\n        use_soar: bool = False,\n        quantize_type: _zvec.typing.QuantizeType = ...,\n    ) -> None:\n        \"\"\"\n        Constructs an IVFIndexParam instance.\n\n        Args:\n            metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.\n            n_list (int, optional): Number of inverted lists (clusters). Set to 0 for auto.\n                Defaults to 0.\n            n_iters (int, optional): Number of k-means iterations during training.\n                Defaults to 10.\n            use_soar (bool, optional): Enable SOAR optimization. Defaults to False.\n            quantize_type (QuantizeType, optional): Vector quantization type.\n                Defaults to QuantizeType.UNDEFINED.\n        \"\"\"\n\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def to_dict(self) -> dict:\n        \"\"\"\n        Convert to dictionary with all fields\n        \"\"\"\n\n    @property\n    def n_iters(self) -> int:\n        \"\"\"\n        int: Number of k-means iterations during training.\n        \"\"\"\n\n    @property\n    def n_list(self) -> int:\n        \"\"\"\n        int: Number of inverted lists (0 = auto).\n        \"\"\"\n\n    @property\n    def use_soar(self) -> bool:\n        \"\"\"\n        bool: Whether SOAR optimization is enabled.\n        \"\"\"\n\nclass IVFQueryParam(QueryParam):\n    \"\"\"\n\n    Query parameters for IVF (Inverted File Index) index.\n\n    Controls how many inverted lists (`nprobe`) to visit during search.\n\n    Attributes:\n        type (IndexType): Always ``IndexType.IVF``.\n        nprobe (int): Number of closest clusters (inverted lists) to search.\n            Higher values improve recall but increase latency.\n            Default is 10.\n        radius (float): Search radius for range queries. Default is 0.0.\n        is_linear (bool): Force linear search. Default is False.\n\n    Examples:\n        >>> params = IVFQueryParam(nprobe=20)\n        >>> print(params.nprobe)\n        20\n    \"\"\"\n    def __getstate__(self) -> tuple: ...\n    def __init__(self, nprobe: typing.SupportsInt = 10) -> None:\n        \"\"\"\n        Constructs an IVFQueryParam instance.\n\n        Args:\n            nprobe (int, optional): Number of inverted lists to probe during search.\n                Higher values improve accuracy. Defaults to 10.\n        \"\"\"\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def nprobe(self) -> int:\n        \"\"\"\n        int: Number of inverted lists to search during IVF query.\n        \"\"\"\n\nclass IndexOption:\n    \"\"\"\n\n    Options for creating an index.\n\n    Attributes:\n        concurrency (int): Number of threads to use during index creation.\n            If 0, the system will choose an optimal value automatically.\n            Default is 0.\n\n    Examples:\n        >>> opt = IndexOption(concurrency=4)\n        >>> print(opt.concurrency)\n        4\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:\n        \"\"\"\n        Constructs an IndexOption instance.\n\n        Args:\n            concurrency (int, optional): Number of concurrent threads.\n                0 means auto-detect. Defaults to 0.\n        \"\"\"\n\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def concurrency(self) -> int:\n        \"\"\"\n        int: Number of threads used for index creation (0 = auto).\n        \"\"\"\n\nclass IndexParam:\n    \"\"\"\n\n    Base class for all index parameter configurations.\n\n    This abstract base class defines the common interface for index types.\n    It should not be instantiated directly; use derived classes instead.\n\n    Attributes:\n        type (IndexType): The type of the index (e.g., HNSW, FLAT, INVERT).\n    \"\"\"\n\n    __hash__: typing.ClassVar[None] = None\n\n    def __eq__(self, arg0: typing.Any) -> bool: ...\n    def __getstate__(self) -> tuple: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def clone(self) -> IndexParam: ...\n    def to_dict(self) -> dict:\n        \"\"\"\n        Convert to dictionary with all fields\n        \"\"\"\n\n    @property\n    def type(self) -> _zvec.typing.IndexType:\n        \"\"\"\n        IndexType: The type of the index.\n        \"\"\"\n\nclass InvertIndexParam(IndexParam):\n    \"\"\"\n\n    Parameters for configuring an invert index.\n\n    This class controls whether range query\n    optimization is enabled for invert index structures.\n\n    Attributes:\n        type (IndexType): Always `IndexType.INVERTED`.\n        enable_range_optimization (bool): Whether range optimization is enabled.\n        enable_extended_wildcard (bool): Whether extended wildcard (suffix and infix) search is enabled.\n\n    Examples:\n        >>> params = InvertIndexParam(enable_range_optimization=True, enable_extended_wildcard=False)\n        >>> print(params.enable_range_optimization)\n        True\n        >>> print(params.enable_extended_wildcard)\n        False\n        >>> config = params.to_dict()\n        >>> print(config)\n        {'enable_range_optimization': True, 'enable_extended_wildcard': False}\n    \"\"\"\n    def __getstate__(self) -> tuple: ...\n    def __init__(\n        self,\n        enable_range_optimization: bool = False,\n        enable_extended_wildcard: bool = False,\n    ) -> None:\n        \"\"\"\n        Constructs an InvertIndexParam instance.\n\n        Args:\n            enable_range_optimization (bool, optional): If True, enables range query\n                optimization for the invert index. Defaults to False.\n            enable_extended_wildcard (bool, optional): If True, enables extended wildcard\n                search including suffix and infix patterns. Defaults to False.\n        \"\"\"\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def to_dict(self) -> dict:\n        \"\"\"\n        Convert to dictionary with all fields\n        \"\"\"\n    @property\n    def enable_extended_wildcard(self) -> bool:\n        \"\"\"\n        bool: Whether extended wildcard (suffix and infix) search is enabled.\n        Note: Prefix search is always enabled regardless of this setting.\n        \"\"\"\n    @property\n    def enable_range_optimization(self) -> bool:\n        \"\"\"\n        bool: Whether range optimization is enabled for this inverted index.\n        \"\"\"\n\nclass OptimizeOption:\n    \"\"\"\n\n    Options for optimizing a collection (e.g., merging segments).\n\n    Attributes:\n        concurrency (int): Number of threads to use during optimization.\n            If 0, the system will choose an optimal value automatically.\n            Default is 0.\n\n    Examples:\n        >>> opt = OptimizeOption(concurrency=2)\n        >>> print(opt.concurrency)\n        2\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:\n        \"\"\"\n        Constructs an OptimizeOption instance.\n\n        Args:\n            concurrency (int, optional): Number of concurrent threads.\n                0 means auto-detect. Defaults to 0.\n        \"\"\"\n\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def concurrency(self) -> int:\n        \"\"\"\n        int: Number of threads used for optimization (0 = auto).\n        \"\"\"\n\nclass QueryParam:\n    \"\"\"\n\n    Base class for all query parameter configurations.\n\n    This abstract base class defines common query settings such as search radius\n    and whether to force linear (brute-force) search. It should not be instantiated\n    directly; use derived classes like `HnswQueryParam` or `IVFQueryParam`.\n\n    Attributes:\n        type (IndexType): The index type this query is configured for.\n        radius (float): Search radius for range queries. Used in combination with\n            top-k to filter results. Default is 0.0 (disabled).\n        is_linear (bool): If True, forces brute-force linear search instead of\n            using the index. Useful for debugging or small datasets. Default is False.\n        is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n    \"\"\"\n    def __getstate__(self) -> tuple: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def is_linear(self) -> bool:\n        \"\"\"\n        bool: Whether to bypass the index and use brute-force linear search.\n        \"\"\"\n    @property\n    def is_using_refiner(self) -> bool:\n        \"\"\"\n        bool: Whether to use refiner for the query.\n        \"\"\"\n    @property\n    def radius(self) -> float:\n        \"\"\"\n        IndexType: The type of index this query targets.\n        \"\"\"\n    @property\n    def type(self) -> _zvec.typing.IndexType:\n        \"\"\"\n        IndexType: The type of index this query targets.\n        \"\"\"\n\nclass SegmentOption:\n    \"\"\"\n\n    Options for segment-level operations.\n\n    Currently, this class mirrors CollectionOption and is used internally.\n    It supports read-only mode, memory mapping, and buffer configuration.\n\n    Note:\n        This class is primarily for internal use. Most users should use\n        CollectionOption instead.\n\n    Examples:\n        >>> opt = SegmentOption()\n        >>> print(opt.enable_mmap)\n        True\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(self) -> None:\n        \"\"\"\n        Constructs a SegmentOption with default settings.\n        \"\"\"\n\n    def __repr__(self) -> str: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    @property\n    def enable_mmap(self) -> bool:\n        \"\"\"\n        bool: Whether memory-mapped I/O is enabled.\n        \"\"\"\n\n    @property\n    def max_buffer_size(self) -> int:\n        \"\"\"\n        int: Maximum buffer size in bytes (internal use).\n        \"\"\"\n\n    @property\n    def read_only(self) -> bool:\n        \"\"\"\n        bool: Whether the segment is read-only.\n        \"\"\"\n\nclass VectorIndexParam(IndexParam):\n    \"\"\"\n\n    Base class for vector index parameter configurations.\n\n    Encapsulates common settings for all vector index types.\n\n    Attributes:\n        type (IndexType): The specific vector index type (e.g., HNSW, FLAT).\n        metric_type (MetricType): Distance metric used for similarity search.\n        quantize_type (QuantizeType): Optional vector quantization type.\n    \"\"\"\n\n    def __getstate__(self) -> tuple: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def to_dict(self) -> dict:\n        \"\"\"\n        Convert to dictionary with all fields\n        \"\"\"\n\n    @property\n    def metric_type(self) -> _zvec.typing.MetricType:\n        \"\"\"\n        MetricType: Distance metric (e.g., IP, COSINE, L2).\n        \"\"\"\n\n    @property\n    def quantize_type(self) -> _zvec.typing.QuantizeType:\n        \"\"\"\n        QuantizeType: Vector quantization type (e.g., FP16, INT8).\n        \"\"\"\n\nclass _VectorQuery:\n    field_name: str\n    filter: str\n    include_vector: bool\n    query_params: QueryParam\n\n    def __getstate__(self) -> tuple: ...\n    def __init__(self) -> None: ...\n    def __setstate__(self, arg0: tuple) -> None: ...\n    def set_vector(self, arg0: ..., arg1: typing.Any) -> None: ...\n    @property\n    def output_fields(self) -> list[str] | None: ...\n    @output_fields.setter\n    def output_fields(self, arg0: collections.abc.Sequence[str] | None) -> None: ...\n    @property\n    def topk(self) -> int: ...\n    @topk.setter\n    def topk(self, arg0: typing.SupportsInt) -> None: ...\n"
  },
  {
    "path": "python/zvec/model/param/vector_query.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass\nfrom typing import Optional, Union\n\nfrom ...common import VectorType\nfrom . import HnswQueryParam, IVFQueryParam\n\n__all__ = [\"VectorQuery\"]\n\n\n@dataclass(frozen=True)\nclass VectorQuery:\n    \"\"\"Represents a vector search query for a specific field in a collection.\n\n    A `VectorQuery` can be constructed using either a document ID (to look up\n    its vector) or an explicit vector. It may optionally include index-specific\n    query parameters to control search behavior (e.g., `ef` for HNSW, `nprobe` for IVF).\n\n    Exactly one of `id` or `vector` should be provided. If both are given,\n    behavior is implementation-defined (typically `id` takes precedence).\n\n    Attributes:\n        field_name (str): Name of the vector field to query.\n        id (Optional[str], optional): Document ID to fetch vector from. Default is None.\n        vector (VectorType, optional): Explicit query vector. Default is None.\n        param (Optional[Union[HnswQueryParam, IVFQueryParam]], optional):\n            Index-specific query parameters. Default is None.\n\n    Examples:\n        >>> import zvec\n        >>> # Query by ID\n        >>> q1 = zvec.VectorQuery(field_name=\"embedding\", id=\"doc123\")\n        >>> # Query by vector\n        >>> q2 = zvec.VectorQuery(\n        ...     field_name=\"embedding\",\n        ...     vector=[0.1, 0.2, 0.3],\n        ...     param=HnswQueryParam(ef=300)\n        ... )\n    \"\"\"\n\n    field_name: str\n    id: Optional[str] = None\n    vector: VectorType = None\n    param: Optional[Union[HnswQueryParam, IVFQueryParam]] = None\n\n    def has_id(self) -> bool:\n        \"\"\"Check if the query is based on a document ID.\n\n        Returns:\n            bool: True if `id` is set, False otherwise.\n        \"\"\"\n        return self.id is not None\n\n    def has_vector(self) -> bool:\n        \"\"\"Check if the query contains an explicit vector.\n\n        Returns:\n            bool: True if `vector` is non-empty, False otherwise.\n        \"\"\"\n        return self.vector is not None and len(self.vector) > 0\n\n    def _validate(self) -> None:\n        if self.field_name is None:\n            raise ValueError(\"Field name cannot be empty\")\n        if self.id and self.vector:\n            raise ValueError(\"Cannot provide both id and vector\")\n"
  },
  {
    "path": "python/zvec/model/schema/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom _zvec.schema import CollectionStats\n\nfrom .collection_schema import CollectionSchema\nfrom .field_schema import FieldSchema, VectorSchema\n\n__all__ = [\"CollectionSchema\", \"CollectionStats\", \"FieldSchema\", \"VectorSchema\"]\n"
  },
  {
    "path": "python/zvec/model/schema/__init__.pyi",
    "content": "\"\"\"\nThis module contains the schema of Zvec\n\"\"\"\n\nfrom __future__ import annotations\n\nimport collections.abc\nimport typing\n\nimport _zvec.param\nimport _zvec.typing\n\nfrom .collection_schema import CollectionSchema\nfrom .field_schema import FieldSchema, VectorSchema\n\n__all__: list[str] = [\n    \"CollectionSchema\",\n    \"CollectionStats\",\n    \"FieldSchema\",\n    \"VectorSchema\",\n]\n\nclass CollectionStats:\n    def __init__(self) -> None: ...\n    def __repr__(self) -> str: ...\n    @property\n    def doc_count(self) -> int: ...\n    @property\n    def index_completeness(self) -> dict[str, float]: ...\n\nclass _CollectionSchema:\n    __hash__: typing.ClassVar[None] = None\n\n    def __eq__(self, arg0: _CollectionSchema) -> bool: ...\n    def __init__(\n        self, name: str, fields: collections.abc.Sequence[_FieldSchema]\n    ) -> None:\n        \"\"\"\n        Construct with name and list of fields\n        \"\"\"\n\n    def __ne__(self, arg0: _CollectionSchema) -> bool: ...\n    def fields(self) -> list[_FieldSchema]:\n        \"\"\"\n        Return list of all field schemas.\n        \"\"\"\n\n    def forward_fields(self) -> list[_FieldSchema]:\n        \"\"\"\n        Return list of forward-indexed fields.\n        \"\"\"\n\n    def get_field(self, field_name: str) -> _FieldSchema:\n        \"\"\"\n        Get field by name (const pointer), returns None if not found.\n        \"\"\"\n\n    def get_forward_field(self, field_name: str) -> _FieldSchema:\n        \"\"\"\n        Get forward field (used for filtering).\n        \"\"\"\n\n    def get_vector_field(self, field_name: str) -> _FieldSchema:\n        \"\"\"\n        Get vector field by name.\n        \"\"\"\n\n    def has_field(self, field_name: str) -> bool:\n        \"\"\"\n        Check if a field exists.\n        \"\"\"\n\n    def vector_fields(self) -> list[_FieldSchema]:\n        \"\"\"\n        Return list of vector fields.\n        \"\"\"\n\n    @property\n    def name(self) -> str: ...\n\nclass _FieldSchema:\n    __hash__: typing.ClassVar[None] = None\n\n    def __eq__(self, arg0: _FieldSchema) -> bool: ...\n    def __init__(\n        self,\n        name: str,\n        data_type: _zvec.typing.DataType,\n        nullable: bool = False,\n        dimension: typing.SupportsInt = 0,\n        index_param: _zvec.param.IndexParam = None,\n    ) -> None: ...\n    def __ne__(self, arg0: _FieldSchema) -> bool: ...\n    @property\n    def data_type(self) -> _zvec.typing.DataType: ...\n    @property\n    def dimension(self) -> int: ...\n    @property\n    def index_param(self) -> typing.Any: ...\n    @property\n    def index_type(self) -> _zvec.typing.IndexType: ...\n    @property\n    def is_dense_vector(self) -> bool: ...\n    @property\n    def is_sparse_vector(self) -> bool: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def nullable(self) -> bool: ...\n"
  },
  {
    "path": "python/zvec/model/schema/collection_schema.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport json\nfrom typing import Optional, Union\n\nfrom _zvec.schema import _CollectionSchema, _FieldSchema\n\nfrom .field_schema import FieldSchema, VectorSchema\n\n__all__ = [\n    \"CollectionSchema\",\n]\n\n\nclass CollectionSchema:\n    \"\"\"Defines the structure of a collection in Zvec.\n\n    A collection schema specifies the name of the collection and its fields,\n    including both scalar fields (e.g., int, string) and vector fields.\n    Field names must be unique across both scalar and vector fields.\n\n    Args:\n        name (str): Name of the collection.\n        fields (Optional[Union[FieldSchema, list[FieldSchema]]], optional):\n            One or more scalar field definitions. Defaults to None.\n        vectors (Optional[Union[VectorSchema, list[VectorSchema]]], optional):\n            One or more vector field definitions. Defaults to None.\n\n    Raises:\n        TypeError: If `fields` or `vectors` are of unsupported types.\n        ValueError: If any field or vector name is duplicated.\n\n    Examples:\n        >>> from zvec import FieldSchema, VectorSchema, DataType, IndexType\n        >>> id_field = FieldSchema(\"id\", DataType.INT64, is_primary=True)\n        >>> emb_field = VectorSchema(\"embedding\", dim=128, data_type=DataType.VECTOR_FP32)\n        >>> schema = CollectionSchema(\n        ...     name=\"my_collection\",\n        ...     fields=id_field,\n        ...     vectors=emb_field\n        ... )\n        >>> print(schema.name)\n        my_collection\n    \"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        fields: Optional[Union[FieldSchema, list[FieldSchema]]] = None,\n        vectors: Optional[Union[VectorSchema, list[VectorSchema]]] = None,\n    ):\n        if name is None or not isinstance(name, str):\n            raise ValueError(\n                f\"schema validate failed: collection name must be str, got {type(name).__name__}\"\n            )\n\n        # handle fields\n        _fields_name: list[str] = []\n        _fields_list: list[_FieldSchema] = []\n\n        self._check_fields(fields, _fields_name, _fields_list)\n        self._check_vectors(vectors, _fields_name, _fields_list)\n\n        # init\n        self._cpp_obj = _CollectionSchema(\n            name=name,\n            fields=_fields_list,\n        )\n\n    def _check_fields(\n        self,\n        fields: Optional[Union[FieldSchema, list[FieldSchema]]],\n        _fields_name: list[str],\n        _fields_list: list[_FieldSchema],\n    ) -> None:\n        field_items = []\n\n        if isinstance(fields, FieldSchema):\n            field_items = [fields]\n        elif isinstance(fields, list):\n            field_items = fields\n        elif fields is None:\n            field_items = []\n        else:\n            raise TypeError(\n                f\"schema validate failed: invalid 'fields' type, expected FieldSchema or list[FieldSchema], \"\n                f\"got {type(fields).__name__}\"\n            )\n\n        for idx, field in enumerate(field_items):\n            if not isinstance(field, FieldSchema):\n                raise TypeError(\n                    f\"schema validate failed: invalid field type in 'fields' list, expected FieldSchema, \"\n                    f\"got {type(field).__name__} at index {idx}\"\n                )\n\n            if field.name in _fields_name:\n                raise ValueError(\n                    f\"schema validate failed: duplicate field name '{field.name}': field names must be unique\"\n                )\n            _fields_name.append(field.name)\n            _fields_list.append(field._get_object())\n\n    def _check_vectors(\n        self,\n        vectors: Optional[Union[VectorSchema, list[VectorSchema]]],\n        _fields_name: list[str],\n        _fields_list: list[_FieldSchema],\n    ) -> None:\n        # handle vector\n        if isinstance(vectors, VectorSchema):\n            vectors_items = [vectors]\n        elif isinstance(vectors, list):\n            vectors_items = vectors\n        elif vectors is None:\n            vectors_items = []\n        else:\n            raise TypeError(\n                f\"schema validate failed: invalid 'vectors' type, expected VectorSchema or list[VectorSchema], \"\n                f\"got {type(vectors).__name__}\"\n            )\n\n        for idx, vector in enumerate(vectors_items):\n            if not isinstance(vector, VectorSchema):\n                raise TypeError(\n                    f\"schema validate failed: invalid vector type in 'vectors' list, expected VectorSchema, \"\n                    f\"got {type(vector).__name__} at index {idx}\"\n                )\n\n            if vector.name in _fields_name:\n                raise ValueError(\n                    f\"schema validate failed: duplicate vector name '{vector.name}', vector names must be unique \"\n                    f\"(conflicts with existing field or vector)\"\n                )\n            _fields_name.append(vector.name)\n            _fields_list.append(vector._get_object())\n\n    @classmethod\n    def _from_core(cls, core_collection_schema: _CollectionSchema):\n        inst = cls.__new__(cls)\n        if not core_collection_schema:\n            raise ValueError(\"schema validate failed: schema is null\")\n        inst._cpp_obj = core_collection_schema\n        return inst\n\n    @property\n    def name(self) -> str:\n        \"\"\"str: The name of the collection.\"\"\"\n        return self._cpp_obj.name\n\n    def field(self, name: str) -> Optional[FieldSchema]:\n        \"\"\"Retrieve a scalar field by name.\n\n        Args:\n            name (str): Name of the field.\n\n        Returns:\n            Optional[FieldSchema]: The field if found, otherwise None.\n        \"\"\"\n        _field = self._cpp_obj.get_forward_field(name)\n        return FieldSchema._from_core(_field) if _field else None\n\n    def vector(self, name: str) -> Optional[VectorSchema]:\n        \"\"\"Retrieve a vector field by name.\n\n        Args:\n            name (str): Name of the vector field.\n\n        Returns:\n            Optional[VectorSchema]: The vector field if found, otherwise None.\n        \"\"\"\n        _field = self._cpp_obj.get_vector_field(name)\n        return VectorSchema._from_core(_field) if _field else None\n\n    @property\n    def fields(self) -> list[FieldSchema]:\n        \"\"\"list[FieldSchema]: All scalar (non-vector) fields in the schema.\"\"\"\n        _fields = self._cpp_obj.forward_fields()\n        return [FieldSchema._from_core(_field) for _field in _fields]\n\n    @property\n    def vectors(self) -> list[VectorSchema]:\n        \"\"\"list[VectorSchema]: All vector fields in the schema.\"\"\"\n        _vectors = self._cpp_obj.vector_fields()\n        return [VectorSchema._from_core(_vector) for _vector in _vectors]\n\n    def _get_object(self) -> _CollectionSchema:\n        return self._cpp_obj\n\n    def __repr__(self) -> str:\n        try:\n            schema = {\n                \"name\": self.name,\n                \"fields\": {field.name: field.__dict__() for field in self.fields},\n                \"vectors\": {vector.name: vector.__dict__() for vector in self.vectors},\n            }\n            return json.dumps(schema, indent=2, ensure_ascii=False)\n        except Exception as e:\n            return f\"<CollectionSchema error during repr: {e}>\"\n\n    def __str__(self) -> str:\n        return self.__repr__()\n"
  },
  {
    "path": "python/zvec/model/schema/field_schema.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport json\nfrom typing import Any, Optional, Union\n\nfrom _zvec.schema import _FieldSchema\n\nfrom zvec.model.param import (\n    FlatIndexParam,\n    HnswIndexParam,\n    InvertIndexParam,\n    IVFIndexParam,\n)\nfrom zvec.typing import DataType\n\n__all__ = [\n    \"FieldSchema\",\n    \"VectorSchema\",\n]\n\nSUPPORT_VECTOR_DATA_TYPE = [\n    DataType.VECTOR_FP16,\n    DataType.VECTOR_FP32,\n    DataType.VECTOR_FP64,\n    DataType.VECTOR_INT8,\n    DataType.SPARSE_VECTOR_FP16,\n    DataType.SPARSE_VECTOR_FP32,\n]\n\nSUPPORT_SCALAR_DATA_TYPE = [\n    DataType.INT32,\n    DataType.INT64,\n    DataType.UINT32,\n    DataType.UINT64,\n    DataType.FLOAT,\n    DataType.DOUBLE,\n    DataType.STRING,\n    DataType.BOOL,\n    DataType.ARRAY_INT32,\n    DataType.ARRAY_INT64,\n    DataType.ARRAY_UINT32,\n    DataType.ARRAY_UINT64,\n    DataType.ARRAY_FLOAT,\n    DataType.ARRAY_DOUBLE,\n    DataType.ARRAY_STRING,\n    DataType.ARRAY_BOOL,\n]\n\n\nclass FieldSchema:\n    \"\"\"Represents a scalar (non-vector) field in a collection schema.\n\n    A `FieldSchema` defines the name, data type, nullability, and optional\n    inverted index configuration for a regular field (e.g., ID, timestamp, category).\n\n    Args:\n        name (str): Name of the field. Must be unique within the collection.\n        data_type (DataType): Data type of the field (e.g., INT64, STRING).\n        nullable (bool, optional): Whether the field can contain null values.\n            Defaults to False.\n        index_param (Optional[InvertIndexParam], optional): Inverted index\n            parameters for this field. Only applicable to fields that support\n            indexing (e.g., scalar fields used in filtering). Defaults to None.\n\n    Examples:\n        >>> from zvec.typing import DataType\n        >>> from zvec.model.param import InvertIndexParam\n        >>> id_field = FieldSchema(\n        ...     name=\"id\",\n        ...     data_type=DataType.INT64,\n        ...     nullable=False,\n        ...     index_param=InvertIndexParam(enable_range_optimization=True)\n        ... )\n    \"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        data_type: DataType,\n        nullable: bool = False,\n        index_param: Optional[InvertIndexParam] = None,\n    ):\n        if name is None or not isinstance(name, str):\n            raise ValueError(\n                f\"schema validate failed: field name must be str, got {type(name).__name__}\"\n            )\n\n        if data_type not in SUPPORT_SCALAR_DATA_TYPE:\n            raise ValueError(\n                f\"schema validate failed: scalar_field's data_type must be one of \"\n                f\"{', '.join(str(dt) for dt in SUPPORT_SCALAR_DATA_TYPE)}, \"\n                f\"but field[{name}]'s data_type is {data_type}\"\n            )\n\n        self._cpp_obj = _FieldSchema(\n            name=name,\n            data_type=data_type,\n            dimension=0,\n            nullable=nullable,\n            index_param=index_param,\n        )\n\n    @classmethod\n    def _from_core(cls, core_field_schema: _FieldSchema):\n        if core_field_schema is None:\n            raise ValueError(\"schema validate failed: field schema is None\")\n        inst = cls.__new__(cls)\n        inst._cpp_obj = core_field_schema\n        return inst\n\n    def _get_object(self) -> _FieldSchema:\n        return self._cpp_obj\n\n    @property\n    def name(self) -> str:\n        \"\"\"str: The name of the field.\"\"\"\n        return self._cpp_obj.name\n\n    @property\n    def data_type(self) -> DataType:\n        \"\"\"DataType: The data type of the field (e.g., INT64, STRING).\"\"\"\n        return self._cpp_obj.data_type\n\n    @property\n    def nullable(self) -> bool:\n        \"\"\"bool: Whether the field allows null values.\"\"\"\n        return self._cpp_obj.nullable\n\n    @property\n    def index_param(self) -> Optional[InvertIndexParam]:\n        \"\"\"Optional[InvertIndexParam]: Inverted index configuration, if any.\"\"\"\n        return self._cpp_obj.index_param\n\n    def __dict__(self) -> dict[str, Any]:\n        return {\n            \"name\": self.name,\n            \"data_type\": (\n                self.data_type.name\n                if hasattr(self.data_type, \"name\")\n                else str(self.data_type)\n            ),\n            \"nullable\": self.nullable,\n            \"index_param\": (\n                self.index_param.to_dict() if self.index_param is not None else None\n            ),\n        }\n\n    def __repr__(self) -> str:\n        try:\n            schema = self.__dict__()\n            return json.dumps(schema, indent=2, ensure_ascii=False)\n        except Exception as e:\n            return f\"<FieldSchema error during repr: {e}>\"\n\n    def __str__(self) -> str:\n        return self.__repr__()\n\n    def __eq__(self, other: object) -> bool:\n        if not isinstance(other, FieldSchema):\n            return False\n        return self._cpp_obj == other._cpp_obj\n\n    def __hash__(self) -> int:\n        return hash((self.name, self.data_type, self.nullable))\n\n\nclass VectorSchema:\n    \"\"\"Represents a vector field in a collection schema.\n\n    A `VectorSchema` defines the name, data type, dimensionality, and index\n    configuration for a vector field used in similarity search.\n\n    Args:\n        name (str): Name of the vector field. Must be unique within the collection.\n        data_type (DataType): Vector data type (e.g., VECTOR_FP32, VECTOR_INT8).\n        dimension (int, optional): Dimensionality of the vector. Must be > 0 for dense vectors;\n         may be `None` for sparse vectors.\n        index_param (Union[HnswIndexParam, IVFIndexParam, FlatIndexParam], optional):\n            Index configuration for this vector field. Defaults to\n            ``HnswIndexParam()``.\n\n    Examples:\n        >>> from zvec.typing import DataType\n        >>> from zvec.model.param import HnswIndexParam\n        >>> emb_field = VectorSchema(\n        ...     name=\"embedding\",\n        ...     data_type=DataType.VECTOR_FP32,\n        ...     dimension=128,\n        ...     index_param=HnswIndexParam(ef_construction=200, m=16)\n        ... )\n    \"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        data_type: DataType,\n        dimension: Optional[int] = 0,\n        index_param: Optional[\n            Union[HnswIndexParam, FlatIndexParam, IVFIndexParam]\n        ] = None,\n    ):\n        if name is None or not isinstance(name, str):\n            raise ValueError(\n                f\"schema validate failed: field name must be str, got {type(name).__name__}\"\n            )\n\n        if not isinstance(dimension, int) or dimension < 0:\n            raise ValueError(\"schema validate failed: vector's dimension must be >= 0\")\n\n        if data_type not in SUPPORT_VECTOR_DATA_TYPE:\n            raise ValueError(\n                f\"schema validate failed: vector's data_type must be one of \"\n                f\"{', '.join(str(dt) for dt in SUPPORT_VECTOR_DATA_TYPE)}, \"\n                f\"but field[{name}]'s data_type is {data_type}\"\n            )\n\n        if index_param is None:\n            index_param = FlatIndexParam()\n\n        self._cpp_obj = _FieldSchema(\n            name=name,\n            data_type=data_type,\n            dimension=dimension,\n            nullable=False,\n            index_param=index_param,\n        )\n\n    @classmethod\n    def _from_core(cls, core_field_schema: _FieldSchema):\n        inst = cls.__new__(cls)\n        inst._cpp_obj = core_field_schema\n        return inst\n\n    def _get_object(self) -> _FieldSchema:\n        return self._cpp_obj\n\n    @property\n    def name(self) -> str:\n        \"\"\"str: The name of the vector field.\"\"\"\n        return self._cpp_obj.name\n\n    @property\n    def data_type(self) -> DataType:\n        \"\"\"DataType: The vector data type (e.g., VECTOR_FP32).\"\"\"\n        return self._cpp_obj.data_type\n\n    @property\n    def dimension(self) -> int:\n        \"\"\"int: The dimensionality of the vector.\"\"\"\n        return self._cpp_obj.dimension\n\n    @property\n    def index_param(self) -> Union[HnswIndexParam, IVFIndexParam, FlatIndexParam]:\n        \"\"\"Union[HnswIndexParam, IVFIndexParam, FlatIndexParam]: Index configuration for the vector.\"\"\"\n        return self._cpp_obj.index_param\n\n    def __dict__(self) -> dict[str, Any]:\n        return {\n            \"name\": self.name,\n            \"data_type\": (\n                self.data_type.name\n                if hasattr(self.data_type, \"name\")\n                else str(self.data_type)\n            ),\n            \"dimension\": self.dimension,\n            \"index_param\": (\n                self.index_param.to_dict() if self.index_param is not None else None\n            ),\n        }\n\n    def __repr__(self) -> str:\n        try:\n            schema = self.__dict__()\n            return json.dumps(schema, indent=2, ensure_ascii=False)\n        except Exception as e:\n            return f\"<FieldSchema error during repr: {e}>\"\n\n    def __str__(self) -> str:\n        return self.__repr__()\n\n    def __eq__(self, other: object) -> bool:\n        if not isinstance(other, VectorSchema):\n            return False\n        return self._cpp_obj == other._cpp_obj\n\n    def __hash__(self) -> int:\n        return hash((self.name, self.data_type, self.dimension))\n"
  },
  {
    "path": "python/zvec/py.typed",
    "content": ""
  },
  {
    "path": "python/zvec/tool/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom .util import require_module\n\n__all__ = [\"require_module\"]\n"
  },
  {
    "path": "python/zvec/tool/util.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nimport importlib\nfrom typing import Any, Optional\n\n\ndef require_module(module: str, mitigation: Optional[str] = None) -> Any:\n    \"\"\"Import a Python module and raise a user-friendly error if it is not available.\n\n    This utility helps provide actionable error messages when optional dependencies\n    are missing. It attempts to import the given module and, on failure, suggests\n    a `pip install` command based on either the module name or an optional\n    mitigation package name.\n\n    Args:\n        module (str): The full module name to import (e.g., ``\"numpy\"``, ``\"pandas.io.parquet\"``).\n        mitigation (Optional[str], optional): The package name to suggest for installation\n            if the import fails. If not provided, the top-level package of `module`\n            will be used (e.g., ``\"pandas\"`` for ``\"pandas.io.parquet\"``).\n\n    Returns:\n        Any: The imported module object.\n\n    Raises:\n        ImportError: If the module cannot be imported, with a clear installation hint.\n\n    Examples:\n        >>> import zvec\n        >>> np = zvec.require_module(\"numpy\")\n        >>> pq = zvec.require_module(\"pyarrow.parquet\", mitigation=\"pyarrow\")\n\n    Note:\n        This function is intended for lazy-loading optional dependencies\n        with helpful error messages, not for core dependencies.\n    \"\"\"\n    try:\n        return importlib.import_module(module)\n    except ImportError as e:\n        package = mitigation or module\n        msg = f\"Required package '{package}' is not installed. \"\n        if \".\" in module:\n            top_level = module.split(\".\", maxsplit=1)[0]\n            msg += f\"Module '{module}' is part of '{top_level}', \"\n            if mitigation:\n                msg += f\"please pip install '{mitigation}'.\"\n            else:\n                msg += f\"please pip install '{top_level}'.\"\n        else:\n            msg += f\"Please pip install '{package}'.\"\n        raise ImportError(msg) from e\n"
  },
  {
    "path": "python/zvec/typing/__init__.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom _zvec.typing import (\n    DataType,\n    IndexType,\n    MetricType,\n    QuantizeType,\n    Status,\n    StatusCode,\n)\n\n__all__ = [\n    \"DataType\",\n    \"IndexType\",\n    \"MetricType\",\n    \"QuantizeType\",\n    \"Status\",\n    \"StatusCode\",\n]\n"
  },
  {
    "path": "python/zvec/typing/__init__.pyi",
    "content": "\"\"\"\nThis module contains the basic data types of Zvec\n\"\"\"\n\nfrom __future__ import annotations\n\nimport typing\n\n__all__: list[str] = [\n    \"DataType\",\n    \"IndexType\",\n    \"MetricType\",\n    \"QuantizeType\",\n    \"Status\",\n    \"StatusCode\",\n]\n\nclass DataType:\n    \"\"\"\n\n    Enumeration of supported data types in Zvec.\n\n    Includes scalar types, dense/sparse vector types, and array types.\n\n    Examples:\n        >>> import zvec\n        >>> print(zvec.DataType.FLOAT)\n        DataType.FLOAT\n        >>> print(zvec.DataType.VECTOR_FP32)\n        DataType.VECTOR_FP32\n\n\n    Members:\n\n      STRING\n\n      BOOL\n\n      INT32\n\n      INT64\n\n      FLOAT\n\n      DOUBLE\n\n      UINT32\n\n      UINT64\n\n      VECTOR_FP16\n\n      VECTOR_FP32\n\n      VECTOR_FP64\n\n      VECTOR_INT8\n\n      SPARSE_VECTOR_FP32\n\n      SPARSE_VECTOR_FP16\n\n      ARRAY_STRING\n\n      ARRAY_INT32\n\n      ARRAY_INT64\n\n      ARRAY_FLOAT\n\n      ARRAY_DOUBLE\n\n      ARRAY_BOOL\n\n      ARRAY_UINT32\n\n      ARRAY_UINT64\n    \"\"\"\n\n    ARRAY_BOOL: typing.ClassVar[DataType]  # value = <DataType.ARRAY_BOOL: 42>\n    ARRAY_DOUBLE: typing.ClassVar[DataType]  # value = <DataType.ARRAY_DOUBLE: 48>\n    ARRAY_FLOAT: typing.ClassVar[DataType]  # value = <DataType.ARRAY_FLOAT: 47>\n    ARRAY_INT32: typing.ClassVar[DataType]  # value = <DataType.ARRAY_INT32: 43>\n    ARRAY_INT64: typing.ClassVar[DataType]  # value = <DataType.ARRAY_INT64: 44>\n    ARRAY_STRING: typing.ClassVar[DataType]  # value = <DataType.ARRAY_STRING: 41>\n    ARRAY_UINT32: typing.ClassVar[DataType]  # value = <DataType.ARRAY_UINT32: 45>\n    ARRAY_UINT64: typing.ClassVar[DataType]  # value = <DataType.ARRAY_UINT64: 46>\n    BOOL: typing.ClassVar[DataType]  # value = <DataType.BOOL: 3>\n    DOUBLE: typing.ClassVar[DataType]  # value = <DataType.DOUBLE: 9>\n    FLOAT: typing.ClassVar[DataType]  # value = <DataType.FLOAT: 8>\n    INT32: typing.ClassVar[DataType]  # value = <DataType.INT32: 4>\n    INT64: typing.ClassVar[DataType]  # value = <DataType.INT64: 5>\n    SPARSE_VECTOR_FP16: typing.ClassVar[\n        DataType\n    ]  # value = <DataType.SPARSE_VECTOR_FP16: 30>\n    SPARSE_VECTOR_FP32: typing.ClassVar[\n        DataType\n    ]  # value = <DataType.SPARSE_VECTOR_FP32: 31>\n    STRING: typing.ClassVar[DataType]  # value = <DataType.STRING: 2>\n    UINT32: typing.ClassVar[DataType]  # value = <DataType.UINT32: 6>\n    UINT64: typing.ClassVar[DataType]  # value = <DataType.UINT64: 7>\n    VECTOR_FP16: typing.ClassVar[DataType]  # value = <DataType.VECTOR_FP16: 22>\n    VECTOR_FP32: typing.ClassVar[DataType]  # value = <DataType.VECTOR_FP32: 23>\n    VECTOR_FP64: typing.ClassVar[DataType]  # value = <DataType.VECTOR_FP64: 24>\n    VECTOR_INT8: typing.ClassVar[DataType]  # value = <DataType.VECTOR_INT8: 26>\n    __members__: typing.ClassVar[\n        dict[str, DataType]\n    ]  # value = {'STRING': <DataType.STRING: 2>, 'BOOL': <DataType.BOOL: 3>, 'INT32': <DataType.INT32: 4>, 'INT64': <DataType.INT64: 5>, 'FLOAT': <DataType.FLOAT: 8>, 'DOUBLE': <DataType.DOUBLE: 9>, 'UINT32': <DataType.UINT32: 6>, 'UINT64': <DataType.UINT64: 7>, 'VECTOR_FP16': <DataType.VECTOR_FP16: 22>, 'VECTOR_FP32': <DataType.VECTOR_FP32: 23>, 'VECTOR_FP64': <DataType.VECTOR_FP64: 24>, 'VECTOR_INT8': <DataType.VECTOR_INT8: 26>, 'SPARSE_VECTOR_FP32': <DataType.SPARSE_VECTOR_FP32: 31>, 'SPARSE_VECTOR_FP16': <DataType.SPARSE_VECTOR_FP16: 30>, 'ARRAY_STRING': <DataType.ARRAY_STRING: 41>, 'ARRAY_INT32': <DataType.ARRAY_INT32: 43>, 'ARRAY_INT64': <DataType.ARRAY_INT64: 44>, 'ARRAY_FLOAT': <DataType.ARRAY_FLOAT: 47>, 'ARRAY_DOUBLE': <DataType.ARRAY_DOUBLE: 48>, 'ARRAY_BOOL': <DataType.ARRAY_BOOL: 42>, 'ARRAY_UINT32': <DataType.ARRAY_UINT32: 45>, 'ARRAY_UINT64': <DataType.ARRAY_UINT64: 46>}\n\n    def __eq__(self, other: typing.Any) -> bool: ...\n    def __getstate__(self) -> int: ...\n    def __hash__(self) -> int: ...\n    def __index__(self) -> int: ...\n    def __init__(self, value: typing.SupportsInt) -> None: ...\n    def __int__(self) -> int: ...\n    def __ne__(self, other: typing.Any) -> bool: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, state: typing.SupportsInt) -> None: ...\n    def __str__(self) -> str: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def value(self) -> int: ...\n\nclass IndexType:\n    \"\"\"\n\n    Enumeration of supported index types in Zvec.\n\n    Examples:\n        >>> import zvec\n        >>> print(zvec.IndexType.HNSW)\n        IndexType.HNSW\n\n\n    Members:\n\n      UNDEFINED\n\n      HNSW\n\n      IVF\n\n      FLAT\n\n      INVERT\n    \"\"\"\n\n    FLAT: typing.ClassVar[IndexType]  # value = <IndexType.FLAT: 4>\n    HNSW: typing.ClassVar[IndexType]  # value = <IndexType.HNSW: 1>\n    INVERT: typing.ClassVar[IndexType]  # value = <IndexType.INVERT: 10>\n    IVF: typing.ClassVar[IndexType]  # value = <IndexType.IVF: 3>\n    UNDEFINED: typing.ClassVar[IndexType]  # value = <IndexType.UNDEFINED: 0>\n    __members__: typing.ClassVar[\n        dict[str, IndexType]\n    ]  # value = {'UNDEFINED': <IndexType.UNDEFINED: 0>, 'HNSW': <IndexType.HNSW: 1>, 'IVF': <IndexType.IVF: 3>, 'FLAT': <IndexType.FLAT: 4>, 'INVERT': <IndexType.INVERT: 10>}\n\n    def __eq__(self, other: typing.Any) -> bool: ...\n    def __getstate__(self) -> int: ...\n    def __hash__(self) -> int: ...\n    def __index__(self) -> int: ...\n    def __init__(self, value: typing.SupportsInt) -> None: ...\n    def __int__(self) -> int: ...\n    def __ne__(self, other: typing.Any) -> bool: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, state: typing.SupportsInt) -> None: ...\n    def __str__(self) -> str: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def value(self) -> int: ...\n\nclass MetricType:\n    \"\"\"\n\n    Enumeration of supported distance/similarity metrics.\n\n    - COSINE: Cosine similarity.\n    - IP: Inner product (dot product).\n    - L2: Euclidean distance (L2 norm).\n\n    Examples:\n        >>> import zvec\n        >>> print(zvec.MetricType.COSINE)\n        MetricType.COSINE\n\n\n    Members:\n\n      COSINE\n\n      IP\n\n      L2\n    \"\"\"\n\n    COSINE: typing.ClassVar[MetricType]  # value = <MetricType.COSINE: 3>\n    IP: typing.ClassVar[MetricType]  # value = <MetricType.IP: 2>\n    L2: typing.ClassVar[MetricType]  # value = <MetricType.L2: 1>\n    __members__: typing.ClassVar[\n        dict[str, MetricType]\n    ]  # value = {'COSINE': <MetricType.COSINE: 3>, 'IP': <MetricType.IP: 2>, 'L2': <MetricType.L2: 1>}\n\n    def __eq__(self, other: typing.Any) -> bool: ...\n    def __getstate__(self) -> int: ...\n    def __hash__(self) -> int: ...\n    def __index__(self) -> int: ...\n    def __init__(self, value: typing.SupportsInt) -> None: ...\n    def __int__(self) -> int: ...\n    def __ne__(self, other: typing.Any) -> bool: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, state: typing.SupportsInt) -> None: ...\n    def __str__(self) -> str: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def value(self) -> int: ...\n\nclass QuantizeType:\n    \"\"\"\n\n    Enumeration of supported quantization types for vector compression.\n\n    Examples:\n        >>> import zvec\n        >>> print(zvec.QuantizeType.INT8)\n        QuantizeType.INT8\n\n\n    Members:\n\n      UNDEFINED\n\n      FP16\n\n      INT8\n\n      INT4\n    \"\"\"\n\n    FP16: typing.ClassVar[QuantizeType]  # value = <QuantizeType.FP16: 1>\n    INT4: typing.ClassVar[QuantizeType]  # value = <QuantizeType.INT4: 3>\n    INT8: typing.ClassVar[QuantizeType]  # value = <QuantizeType.INT8: 2>\n    UNDEFINED: typing.ClassVar[QuantizeType]  # value = <QuantizeType.UNDEFINED: 0>\n    __members__: typing.ClassVar[\n        dict[str, QuantizeType]\n    ]  # value = {'UNDEFINED': <QuantizeType.UNDEFINED: 0>, 'FP16': <QuantizeType.FP16: 1>, 'INT8': <QuantizeType.INT8: 2>, 'INT4': <QuantizeType.INT4: 3>}\n\n    def __eq__(self, other: typing.Any) -> bool: ...\n    def __getstate__(self) -> int: ...\n    def __hash__(self) -> int: ...\n    def __index__(self) -> int: ...\n    def __init__(self, value: typing.SupportsInt) -> None: ...\n    def __int__(self) -> int: ...\n    def __ne__(self, other: typing.Any) -> bool: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, state: typing.SupportsInt) -> None: ...\n    def __str__(self) -> str: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def value(self) -> int: ...\n\nclass Status:\n    \"\"\"\n\n    Represents the outcome of a Zvec operation.\n\n    A `Status` object is either OK (success) or carries an error code and message.\n\n    Examples:\n        >>> from zvec.typing import Status, StatusCode\n        >>> s = Status()\n        >>> print(s.ok())\n        True\n        >>> s = Status(StatusCode.INVALID_ARGUMENT, \"Field not found\")\n        >>> print(s.code() == StatusCode.INVALID_ARGUMENT)\n        True\n        >>> print(s.message())\n        Field not found\n    \"\"\"\n\n    __hash__: typing.ClassVar[None] = None\n\n    @staticmethod\n    def AlreadyExists(message: str) -> Status: ...\n    @staticmethod\n    def InternalError(message: str) -> Status: ...\n    @staticmethod\n    def InvalidArgument(message: str) -> Status: ...\n    @staticmethod\n    def NotFound(message: str) -> Status: ...\n    @staticmethod\n    def OK() -> Status:\n        \"\"\"\n        Create an OK status.\n        \"\"\"\n\n    @staticmethod\n    def PermissionDenied(message: str) -> Status: ...\n    def __eq__(self, arg0: Status) -> bool: ...\n    @typing.overload\n    def __init__(self) -> None: ...\n    @typing.overload\n    def __init__(self, code: StatusCode, message: str = \"\") -> None:\n        \"\"\"\n        Construct a status with the given code and optional message.\n\n        Args:\n            code (StatusCode): The status code.\n            message (str, optional): Error message. Defaults to empty string.\n        \"\"\"\n\n    def __ne__(self, arg0: Status) -> bool: ...\n    def __repr__(self) -> str: ...\n    def code(self) -> StatusCode:\n        \"\"\"\n        StatusCode: Returns the status code.\n        \"\"\"\n\n    def message(self) -> str:\n        \"\"\"\n        str: Returns the error message (may be empty).\n        \"\"\"\n\n    def ok(self) -> bool:\n        \"\"\"\n        bool: Returns True if the status is OK.\n        \"\"\"\n\nclass StatusCode:\n    \"\"\"\n\n    Enumeration of possible status codes for Zvec operations.\n\n    Used by the `Status` class to indicate success or failure reason.\n\n\n    Members:\n\n      OK\n\n      NOT_FOUND\n\n      ALREADY_EXISTS\n\n      INVALID_ARGUMENT\n\n      PERMISSION_DENIED\n\n      FAILED_PRECONDITION\n\n      RESOURCE_EXHAUSTED\n\n      UNAVAILABLE\n\n      INTERNAL_ERROR\n\n      NOT_SUPPORTED\n\n      UNKNOWN\n    \"\"\"\n\n    ALREADY_EXISTS: typing.ClassVar[\n        StatusCode\n    ]  # value = <StatusCode.ALREADY_EXISTS: 2>\n    FAILED_PRECONDITION: typing.ClassVar[\n        StatusCode\n    ]  # value = <StatusCode.FAILED_PRECONDITION: 5>\n    INTERNAL_ERROR: typing.ClassVar[\n        StatusCode\n    ]  # value = <StatusCode.INTERNAL_ERROR: 8>\n    INVALID_ARGUMENT: typing.ClassVar[\n        StatusCode\n    ]  # value = <StatusCode.INVALID_ARGUMENT: 3>\n    NOT_FOUND: typing.ClassVar[StatusCode]  # value = <StatusCode.NOT_FOUND: 1>\n    NOT_SUPPORTED: typing.ClassVar[StatusCode]  # value = <StatusCode.NOT_SUPPORTED: 9>\n    OK: typing.ClassVar[StatusCode]  # value = <StatusCode.OK: 0>\n    PERMISSION_DENIED: typing.ClassVar[\n        StatusCode\n    ]  # value = <StatusCode.PERMISSION_DENIED: 4>\n    RESOURCE_EXHAUSTED: typing.ClassVar[\n        StatusCode\n    ]  # value = <StatusCode.RESOURCE_EXHAUSTED: 6>\n    UNAVAILABLE: typing.ClassVar[StatusCode]  # value = <StatusCode.UNAVAILABLE: 7>\n    UNKNOWN: typing.ClassVar[StatusCode]  # value = <StatusCode.UNKNOWN: 10>\n    __members__: typing.ClassVar[\n        dict[str, StatusCode]\n    ]  # value = {'OK': <StatusCode.OK: 0>, 'NOT_FOUND': <StatusCode.NOT_FOUND: 1>, 'ALREADY_EXISTS': <StatusCode.ALREADY_EXISTS: 2>, 'INVALID_ARGUMENT': <StatusCode.INVALID_ARGUMENT: 3>, 'PERMISSION_DENIED': <StatusCode.PERMISSION_DENIED: 4>, 'FAILED_PRECONDITION': <StatusCode.FAILED_PRECONDITION: 5>, 'RESOURCE_EXHAUSTED': <StatusCode.RESOURCE_EXHAUSTED: 6>, 'UNAVAILABLE': <StatusCode.UNAVAILABLE: 7>, 'INTERNAL_ERROR': <StatusCode.INTERNAL_ERROR: 8>, 'NOT_SUPPORTED': <StatusCode.NOT_SUPPORTED: 9>, 'UNKNOWN': <StatusCode.UNKNOWN: 10>}\n\n    def __eq__(self, other: typing.Any) -> bool: ...\n    def __getstate__(self) -> int: ...\n    def __hash__(self) -> int: ...\n    def __index__(self) -> int: ...\n    def __init__(self, value: typing.SupportsInt) -> None: ...\n    def __int__(self) -> int: ...\n    def __ne__(self, other: typing.Any) -> bool: ...\n    def __repr__(self) -> str: ...\n    def __setstate__(self, state: typing.SupportsInt) -> None: ...\n    def __str__(self) -> str: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def value(self) -> int: ...\n"
  },
  {
    "path": "python/zvec/typing/enum.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom enum import IntEnum\n\n__all__ = [\"LogLevel\", \"LogType\"]\n\n\nclass LogLevel(IntEnum):\n    \"\"\"Enumeration of logging severity levels, ordered from lowest to highest priority.\n\n    Used to control verbosity and filtering of log messages. Higher numeric values\n    indicate more severe conditions.\n\n    Note:\n        ``WARNING`` is an alias for ``WARN`` to match Python's built-in :mod:`logging`\n        module convention.\n\n    Attributes:\n        DEBUG (int): Detailed information, typically of interest only when diagnosing problems.\n        INFO (int): Confirmation that things are working as expected.\n        WARN (int): An indication that something unexpected happened, or indicative of\n            potential future problems. (Alias: ``WARNING``)\n        WARNING (int): Same as ``WARN``.\n        ERROR (int): Due to a more serious problem, the software has not been able\n            to perform some function.\n        FATAL (int): A serious error, indicating that the program itself may be unable\n            to continue running.\n    \"\"\"\n\n    DEBUG = 0\n    INFO = 1\n    WARN = 2\n    WARNING = 2\n    ERROR = 3\n    FATAL = 4\n\n\nclass LogType(IntEnum):\n    \"\"\"Enumeration of supported log output destinations.\n\n    Specifies where log messages should be written.\n\n    Attributes:\n        CONSOLE (int): Output logs to standard output/error (e.g., terminal or IDE console).\n        FILE (int): Write logs to a persistent file on disk.\n    \"\"\"\n\n    CONSOLE = 0\n    FILE = 1\n"
  },
  {
    "path": "python/zvec/zvec.py",
    "content": "# Copyright 2025-present the zvec project\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom __future__ import annotations\n\nfrom typing import Optional\n\nfrom _zvec import Initialize, _Collection\n\nfrom .model import Collection\nfrom .model.param import CollectionOption\nfrom .model.schema import CollectionSchema\n\n__all__ = [\"create_and_open\", \"init\", \"open\"]\n\nfrom .typing.enum import LogLevel, LogType\n\n\ndef init(\n    *,\n    log_type: Optional[LogType] = LogType.CONSOLE,\n    log_level: Optional[LogLevel] = LogLevel.WARN,\n    log_dir: Optional[str] = \"./logs\",\n    log_basename: Optional[str] = \"zvec.log\",\n    log_file_size: Optional[int] = 2048,\n    log_overdue_days: Optional[int] = 7,\n    query_threads: Optional[int] = None,\n    optimize_threads: Optional[int] = None,\n    invert_to_forward_scan_ratio: Optional[float] = None,\n    brute_force_by_keys_ratio: Optional[float] = None,\n    memory_limit_mb: Optional[int] = None,\n) -> None:\n    \"\"\"Initialize Zvec with configuration options.\n\n    This function must be called before any other operation.\n    It can only be called once — subsequent calls raise a ``RuntimeError``.\n\n    Parameters set to ``None`` are **omitted** from the configuration and\n    fall back to Zvec's internal defaults, which may be derived from the runtime\n    environment (e.g., cgroup CPU/memory limits). Explicitly provided values\n    always override defaults.\n\n    Args:\n        log_type (Optional[LogType], optional): Logger destination.\n            - ``LogType.CONSOLE`` (default if omitted or set to this)\n            - ``LogType.FILE``\n            - If ``None``, uses internal default (currently ``CONSOLE``).\n        log_level (Optional[LogLevel], optional): Minimum log severity.\n            Default: ``LogLevel.WARN``.\n            Accepted values: ``DEBUG``, ``INFO``, ``WARN``, ``ERROR``, ``FATAL``.\n            If ``None``, uses internal default (``WARN``).\n        log_dir (Optional[str], optional):\n            Directory for log files (only used when ``log_type=FILE``).\n            Parent directories are **not** created automatically.\n            Default: ``\"./logs\"``.\n            If ``None``, internal default is used.\n        log_basename (Optional[str], optional):\n            Base name for rotated log files (e.g., ``zvec.log.1``, ``zvec.log.2``).\n            Default: ``\"zvec.log\"``.\n        log_file_size (Optional[int], optional):\n            Max size per log file in **MB** before rotation.\n            Default: ``2048`` MB (2 GB).\n        log_overdue_days (Optional[int], optional):\n            Days to retain rotated log files before deletion.\n            Default: ``7`` days.\n        query_threads (Optional[int], optional):\n            Number of threads for query execution.\n            If ``None`` (default), inferred from available CPU cores (via cgroup).\n            Must be ≥ 1 if provided.\n        optimize_threads (Optional[int], optional):\n            Threads for background tasks (e.g., compaction, indexing).\n            If ``None``, defaults to same as ``query_threads`` or CPU count.\n        invert_to_forward_scan_ratio (Optional[float], optional):\n            Threshold to switch from inverted index to full forward scan.\n            Range: [0.0, 1.0]. Higher → more aggressive index skipping.\n            Default: ``0.9`` (if omitted).\n        brute_force_by_keys_ratio (Optional[float], optional):\n            Threshold to use brute-force key lookup over index.\n            Lower → prefer index; higher → prefer brute-force.\n            Range: [0.0, 1.0]. Default: ``0.1``.\n        memory_limit_mb (Optional[int], optional):\n            Soft memory cap in MB. Zvec may throttle or fail operations\n            approaching this limit.\n            If ``None``, inferred from cgroup memory limit * 0.8 (e.g., in Docker).\n            Must be > 0 if provided.\n\n    Raises:\n        RuntimeError: If Zvec is already initialized.\n        ValueError: On invalid values (e.g., negative thread count, log level out of range).\n        TypeError: If a value has incorrect type (e.g., string for ``query_threads``).\n\n    Note:\n        - All ``None`` arguments are **excluded** from the configuration payload,\n          allowing the core library to apply environment-aware defaults.\n        - This design ensures container-friendliness: in Kubernetes/Docker,\n          omitting ``memory_limit_mb`` and thread counts lets Zvec auto-adapt.\n\n    Examples:\n        Initialize with defaults (log to console, auto-detect resources):\n        >>> import zvec\n        >>> zvec.init()\n\n        Customize logging to file with rotation:\n        >>> zvec.init(\n        ...     log_type=LogType.FILE,\n        ...     log_dir=\"/var/log/zvec\",\n        ...     log_file_size=1024,\n        ...     log_overdue_days=30\n        ... )\n\n        Limit resources explicitly:\n        >>> zvec.init(\n        ...     memory_limit_mb=2048,\n        ...     query_threads=4,\n        ...     optimize_threads=2\n        ... )\n\n        Fine-tune query heuristics:\n        >>> zvec.init(\n        ...     invert_to_forward_scan_ratio=0.95,\n        ...     brute_force_by_keys_ratio=0.05\n        ... )\n    \"\"\"\n    # Build config dict, skipping None values\n    config_dict = {}\n    if log_type is not None:\n        if not isinstance(log_type, LogType):\n            raise TypeError(\"log_type must be LogType\")\n        config_dict[\"log_type\"] = log_type.name\n    if log_level is not None:\n        if not isinstance(log_level, LogLevel):\n            raise TypeError(\"log_level must be LogLevel\")\n        config_dict[\"log_level\"] = log_level.name\n    if log_dir is not None:\n        config_dict[\"log_dir\"] = log_dir\n    if log_basename is not None:\n        config_dict[\"log_basename\"] = log_basename\n    if log_file_size is not None:\n        config_dict[\"log_file_size\"] = log_file_size\n    if log_overdue_days is not None:\n        config_dict[\"log_overdue_days\"] = log_overdue_days\n    if query_threads is not None:\n        config_dict[\"query_threads\"] = query_threads\n    if optimize_threads is not None:\n        config_dict[\"optimize_threads\"] = optimize_threads\n    if invert_to_forward_scan_ratio is not None:\n        config_dict[\"invert_to_forward_scan_ratio\"] = invert_to_forward_scan_ratio\n    if brute_force_by_keys_ratio is not None:\n        config_dict[\"brute_force_by_keys_ratio\"] = brute_force_by_keys_ratio\n    if memory_limit_mb is not None:\n        config_dict[\"memory_limit_mb\"] = memory_limit_mb\n\n    Initialize(config_dict)\n\n\ndef create_and_open(\n    path: str,\n    schema: CollectionSchema,\n    option: Optional[CollectionOption] = None,\n) -> Collection:\n    \"\"\"Create a new collection and open it for use.\n\n    If a collection already exists at the given path, it may raise an error\n    depending on the underlying implementation.\n\n    Args:\n        path (str): Path or name of the collection to create.\n        schema (CollectionSchema): Schema defining the structure of the collection.\n        option (CollectionOption): Configuration options\n            for opening the collection. Defaults to a default-constructed\n            ``CollectionOption()`` if not provided.\n\n    Returns:\n        Collection: An opened collection instance ready for operations.\n\n    Examples:\n        >>> import zvec\n        >>> schema = zvec.CollectionSchema(\n        ...     name=\"my_collection\",\n        ...     fields=[zvec.FieldSchema(\"id\", zvec.DataType.INT64, nullable=True)]\n        ... )\n        >>> coll = create_and_open(\"./my_collection\", schema)\n    \"\"\"\n    if not isinstance(path, str):\n        raise TypeError(\"path must be a string\")\n    if not isinstance(schema, CollectionSchema):\n        raise TypeError(\"schema must be a CollectionSchema\")\n\n    option = option or CollectionOption()\n    if not isinstance(option, CollectionOption):\n        raise TypeError(\"option must be a CollectionOption\")\n\n    _collection = _Collection.CreateAndOpen(path, schema._get_object(), option)\n    return Collection._from_core(_collection)\n\n\ndef open(path: str, option: CollectionOption = CollectionOption()) -> Collection:\n    \"\"\"Open an existing collection from disk.\n\n    The collection must have been previously created with ``create_and_open``.\n\n    Args:\n        path (str): Path or name of the existing collection.\n        option (CollectionOption): Configuration options\n            for opening the collection. Defaults to a default-constructed\n            ``CollectionOption()`` if not provided.\n\n    Returns:\n        Collection: An opened collection instance.\n\n    Examples:\n        >>> import zvec\n        >>> coll = zvec.open(\"./my_collection\")\n    \"\"\"\n    _collection = _Collection.Open(path, option)\n    return Collection._from_core(_collection)\n"
  },
  {
    "path": "scripts/README.md",
    "content": ""
  },
  {
    "path": "scripts/build_android.sh",
    "content": "#!/bin/bash\nset -e\nCURRENT_DIR=$(pwd)\n\nABI=${1:-\"arm64-v8a\"}\nAPI_LEVEL=${2:-21}\nBUILD_TYPE=${3:-\"Release\"}\n\n# step1: use host env to compile protoc\necho \"step1: building protoc for host...\"\nHOST_BUILD_DIR=\"build_host\"\nmkdir -p $HOST_BUILD_DIR\ncd $HOST_BUILD_DIR\n\ncmake -DCMAKE_BUILD_TYPE=\"$BUILD_TYPE\" ..\nmake -j protoc\nPROTOC_EXECUTABLE=$CURRENT_DIR/$HOST_BUILD_DIR/bin/protoc\ncd $CURRENT_DIR\n\necho \"step1: Done!!!\"\n\n# step2: cross build zvec based on android ndk\necho \"step2: building zvec for android...\"\n\n# reset thirdparty directory\ngit submodule foreach --recursive 'git stash --include-untracked'\n\nexport ANDROID_SDK_ROOT=$HOME/Library/Android/sdk\nexport ANDROID_HOME=$ANDROID_SDK_ROOT\nexport ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/28.2.13676358\nexport CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake\n\nexport PATH=$PATH:$ANDROID_SDK_ROOT/cmdline-tools/latest/bin\nexport PATH=$PATH:$ANDROID_SDK_ROOT/platform-tools\nexport PATH=$PATH:$ANDROID_NDK_HOME\n\nif [ -z \"$ANDROID_NDK_HOME\" ]; then\n    echo \"error: ANDROID_NDK_HOME env not set\"\n    echo \"please install NDK and set env variable ANDROID_NDK_HOME\"\n    exit 1\nfi\n\nBUILD_DIR=\"build_android_${ABI}\"\nmkdir -p $BUILD_DIR\ncd $BUILD_DIR\n\necho \"configure CMake...\"\ncmake \\\n    -DANDROID_NDK=\"$ANDROID_NDK_HOME\" \\\n    -DCMAKE_TOOLCHAIN_FILE=\"$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake\" \\\n    -DANDROID_ABI=\"$ABI\" \\\n    -DANDROID_NATIVE_API_LEVEL=\"$API_LEVEL\" \\\n    -DANDROID_STL=\"c++_static\" \\\n    -DCMAKE_BUILD_TYPE=\"$BUILD_TYPE\" \\\n    -DBUILD_PYTHON_BINDINGS=OFF \\\n    -DBUILD_TOOLS=OFF \\\n    -DCMAKE_INSTALL_PREFIX=\"./install\" \\\n    -DGLOBAL_CC_PROTOBUF_PROTOC=$PROTOC_EXECUTABLE \\\n    ../\n\necho \"building...\"\nCORE_COUNT=$(sysctl -n hw.ncpu)\nmake -j$CORE_COUNT\n\necho \"step2: Done!!!\""
  },
  {
    "path": "scripts/gcov.sh",
    "content": "#!/bin/bash\n\nproject_name=proxima-zvec\ngcov_tool=gcov\nzip_html=false\noutput_name=html\nkeep_info=false\n\nscript_dir=$(cd \"$(dirname \"$0\")\"; pwd)\nsource_base=$(dirname \"$script_dir\")\nfilter_list=\"'*/tests/*' '*/thirdparty/*' '*/deps/*' '*/proto/*' '*/external/*' '*/sqlengine/antlr/gen/*'\"\n\nwhile getopts t:p:o:zk option; do\n  case \"$option\" in\n  t)\n    gcov_tool=$OPTARG;;\n  p)\n    project_name=$OPTARG;;\n  o)\n    output_name=$OPTARG;;\n  z)\n    zip_html=true;;\n  k)\n    keep_info=true;;\n  esac\ndone\n\n# Process sources\nlcov -c -b \"$source_base\" -d . -o $project_name.lcov.info --gcov-tool=$gcov_tool --no-external || exit 1\neval $(echo lcov -r $project_name.lcov.info -o $project_name-filtered.lcov.info $filter_list) || exit 1\n\n# Gather HTML files\ngenhtml -t \"$project_name\" -o $output_name $project_name-filtered.lcov.info || exit 1\nif [ \"$keep_info\" = false ]; then\n  rm -rf *.lcov.info\nfi\n\n# Zip HTML files\nif $zip_html ; then\n  zip -r $output_name.zip $output_name/\nfi\n"
  },
  {
    "path": "src/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\n# Retrieve version from git repository\ngit_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR})\n\n# Add repository\ncc_directory(ailego)\ncc_directory(turbo)\ncc_directory(core)\ncc_directory(db)\nif(BUILD_PYTHON_BINDINGS)\n    cc_directory(binding)\nendif()\n"
  },
  {
    "path": "src/ailego/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nfind_package(Threads REQUIRED)\n\nif(UNIX AND NOT APPLE AND NOT ANDROID)\n    find_library(LIB_RT NAMES rt)\nelse()\n    set(LIB_RT \"\")\nendif()\n\ngit_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR})\nfile(GLOB_RECURSE ALL_SRCS *.cc *.c *.h)\n\nset(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})\n\nif(UNIX AND NOT APPLE)\n    list(APPEND EXTRA_LIBS ${LIB_RT})\nendif()\n\nif(NOT ANDROID AND AUTO_DETECT_ARCH)\n    if(CMAKE_SYSTEM_PROCESSOR MATCHES \"x86_64|i686|i386|x64\")\n        setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512 MATH_MARCH_FLAG_AVX512FP16)\n        message(STATUS \"best compiler march, sse: \" ${MATH_MARCH_FLAG_SSE} \", avx2: \" ${MATH_MARCH_FLAG_AVX2} \", avx512: \" ${MATH_MARCH_FLAG_AVX512} \", avx512fp16: \" ${MATH_MARCH_FLAG_AVX512FP16})\n\n        file(GLOB_RECURSE MATH_FILES_SSE\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_sse.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_sse.c\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_sse.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_sse.c\n            )\n\n        file(GLOB_RECURSE MATH_FILES_AVX2\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx2.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx2.c\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx2.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx2.c\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx.c\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx.c\n        )\n\n        file(GLOB_RECURSE MATH_FILES_AVX512\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512.c\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512.cc\n            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512.c\n        )\n\n        file(GLOB_RECURSE MATH_FILES_AVX512FP16\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512fp16.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512fp16.c\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512fp16.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512fp16.c\n        )\n\n        foreach(MATH_FILE ${MATH_FILES_SSE})\n            set_source_files_properties(\n                ${MATH_FILE}\n                PROPERTIES\n                COMPILE_FLAGS \"${MATH_MARCH_FLAG_SSE}\"\n            )\n        endforeach()\n\n        foreach(MATH_FILE ${MATH_FILES_AVX2})\n            set_source_files_properties(\n                ${MATH_FILE}\n                PROPERTIES\n                COMPILE_FLAGS \"${MATH_MARCH_FLAG_AVX2}\"\n            )\n        endforeach()\n\n        foreach(MATH_FILE ${MATH_FILES_AVX512})\n            set_source_files_properties(\n                ${MATH_FILE}\n                PROPERTIES\n                COMPILE_FLAGS \"${MATH_MARCH_FLAG_AVX512}\"\n            )\n        endforeach()\n\n        foreach(MATH_FILE ${MATH_FILES_AVX512FP16})\n        set_source_files_properties(\n            ${MATH_FILE}\n            PROPERTIES\n            COMPILE_FLAGS \"${MATH_MARCH_FLAG_AVX512FP16}\"\n        )\n    endforeach()\n    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES \"aarch64|arm64|ARM64\")\n      # set(CMAKE_CXX_FLAGS \"-march=armv8-a\")\n      # set(CMAKE_C_FLAGS \"-march=armv8-a\")\n      set(MATH_MARCH_FLAG_NEON \"-march=armv8-a\")\n\n      file(GLOB_RECURSE MATH_FILES_NEON\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.c\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.cc\n          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.c\n      )\n\n      foreach(MATH_FILE ${MATH_FILES_NEON})\n          set_source_files_properties(\n              ${MATH_FILE}\n              PROPERTIES\n              COMPILE_FLAGS \"${MATH_MARCH_FLAG_NEON}\"\n          )\n      endforeach()\n    endif()\nendif()\n\ncc_library(\n    NAME zvec_ailego STATIC STRICT PACKED\n    SRCS    ${ALL_SRCS}\n    LIBS    ${EXTRA_LIBS}\n            Arrow::arrow_static\n            Arrow::parquet_static\n    VERSION \"${GIT_SRCS_VER}\"\n)\n"
  },
  {
    "path": "src/ailego/algorithm/binary_quantizer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"binary_quantizer.h\"\n#include <algorithm>\n#include <cmath>\n#include <cstring>\n#include <iostream>\n#include <numeric>\n#include <ailego/math/normalizer.h>\n\nnamespace zvec {\nnamespace ailego {\n\n//! Feed the training data\nbool BinaryQuantizer::feed(const float *vec, size_t dim) {\n  for (size_t i = 0; i < dim; ++i) {\n    data_.emplace_back(vec[i]);\n  }\n  return true;\n}\n\n//! Train the quantizer\nbool BinaryQuantizer::train(void) {\n  return true;\n}\n\n//! Quantize data: encode the float input to uint32_t output\nvoid BinaryQuantizer::encode(const float *in, size_t dim, uint32_t *out) const {\n  for (size_t i = 0; i < dim; i += 32) {\n    size_t remain = i + 32 <= dim ? 32 : dim - i;\n    uint32_t data = 0;\n    uint32_t mask = 1;\n\n    for (size_t j = 0; j < remain; j++) {\n      if (in[i + j] >= threshold_) {\n        data |= mask;\n      }\n\n      mask <<= 1;\n    }\n\n    *out = data;\n    out++;\n  }\n}\n\n//! De-quantize data: decode the input uint32_t to float output\n//!   bit value 1 will be mapped to 1.0\n//!   bit value 0 will be mapped to -1.0\nvoid BinaryQuantizer::decode(const uint32_t *in, size_t dim, float *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    uint8_t bit = (in[i >> 5] >> (i & 31)) & 0x01;\n\n    if (bit == 1) {\n      out[i] = 1.0f;\n    } else {\n      out[i] = -1.0f;\n    }\n\n    // std::cout << \"dim: \" << i << \", value: \" << (size_t)bit << std::endl;\n  }\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/algorithm/binary_quantizer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <vector>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Binary Quantization Algorithm\n */\nclass BinaryQuantizer {\n public:\n  //! Constructor\n  BinaryQuantizer(void) {}\n\n  //! Feed the training data\n  bool feed(const float *vec, size_t dim);\n\n  //! Train the quantizer\n  bool train(void);\n\n  //! Quantize data: encode the float input to uint32_t output\n  void encode(const float *in, size_t dim, uint32_t *out) const;\n\n  //! De-quantize data: decode the input uint32_t to float output\n  void decode(const uint32_t *in, size_t dim, float *out) const;\n\n  //! Get encoded elements in type of uint32_t\n  static size_t EncodedSizeInBinary32(size_t dim) {\n    return (dim + 31) / 32;\n  }\n\n  //! Set quantization threshold\n  void set_threshold(float threshold) {\n    threshold_ = threshold;\n  }\n\n  //! Get quantization threshold\n  float threshold(void) const {\n    return threshold_;\n  }\n\n private:\n  //! Disable them\n  BinaryQuantizer(const BinaryQuantizer &) = delete;\n  BinaryQuantizer &operator=(const BinaryQuantizer &) = delete;\n\n private:\n  //! Members\n  std::vector<float> data_{};\n  float threshold_{0.0f};\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/algorithm/integer_quantizer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"integer_quantizer.h\"\n#include <algorithm>\n#include <cmath>\n#include <cstring>\n#include <numeric>\n#include <ailego/math/normalizer.h>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n//! Make smooth the distribution to eliminate zero in hist\nstatic inline void MakeSmooth(std::vector<float> &dist) {\n  constexpr float epsilon = std::numeric_limits<float>::epsilon();\n\n  // L1 Normalize first\n  float norm = 1.0f;\n  Normalizer<float>::L1(dist.data(), dist.size(), &norm);\n\n  size_t zero_count = std::count_if(dist.begin(), dist.end(), [](float val) {\n    return (std::abs(val) < std::numeric_limits<float>::epsilon());\n  });\n  size_t nonzero_count = dist.size() - zero_count;\n\n  // Double check\n  if (nonzero_count == 0 || zero_count == 0) {\n    return;\n  }\n\n  float y = epsilon * zero_count / static_cast<float>(nonzero_count);\n  for (auto &it : dist) {\n    if (std::abs(it) < epsilon) {\n      it += epsilon;\n    } else {\n      it -= y;\n    }\n  }  // end of for\n}\n\n//! Compute the Entropy of distribution p/q by  Kullback-Leibler Divergence\nstatic inline double ComputeKlDivergence(const std::vector<float> &p,\n                                         const std::vector<float> &q) {\n  if (p.size() != q.size() || p.size() == 0) {\n    return std::numeric_limits<float>::max();\n  }\n\n  double v = 0.0f;\n  for (size_t i = 0; i != p.size(); ++i) {\n    if (p[i] == 0 || q[i] == 0) {\n      return std::numeric_limits<double>::max();\n    }\n    v += p[i] * std::log(static_cast<double>(p[i]) / static_cast<double>(q[i]));\n  }\n  return v;\n}\n\n//! Expand the quantization distribution to origin distribution in\n//! [-threshold, threshold]\nstatic inline void ExpandCandidateDistribution(\n    const std::vector<uint32_t> &distribution,\n    const std::vector<float> &quantized_distribution, size_t threshold,\n    std::vector<float> *expand_distribution) {\n  expand_distribution->resize(threshold * 2, 0);\n  float merged_cnt = static_cast<float>(expand_distribution->size()) /\n                     quantized_distribution.size();\n  size_t left_boundary = distribution.size() / 2 - threshold;\n\n  for (size_t i = 0; i < quantized_distribution.size(); ++i) {\n    float start = i * merged_cnt;\n    float end = start + merged_cnt;\n    const size_t start_ceil = static_cast<size_t>(std::ceil(start));\n    const size_t end_floor = static_cast<size_t>(std::floor(end));\n    float left_ratio = static_cast<float>(start_ceil) - start;\n    float right_ratio = end - static_cast<float>(end_floor);\n    float nonzero_count = 0;\n\n    //! Count the non-zeros bins, if the histogram bin is partially included,\n    //! non-zero bins is also partially counted\n    if (left_ratio > 0 && left_boundary + start_ceil > 0) {\n      if (distribution[left_boundary + start_ceil - 1] != 0) {\n        nonzero_count += left_ratio;\n      }\n    }\n    if (right_ratio > 0 && left_boundary + end_floor < distribution.size()) {\n      if (distribution[left_boundary + end_floor] != 0) {\n        nonzero_count += right_ratio;\n      }\n    }\n    for (size_t j = start_ceil; j < end_floor; j++) {\n      nonzero_count += distribution[left_boundary + j] != 0;\n    }\n    if (nonzero_count == 0) {\n      continue;\n    }\n\n    //! expand the quantized value\n    float value = quantized_distribution[i] / nonzero_count;\n    if (left_ratio > 0 && start_ceil > 0) {\n      (*expand_distribution)[start_ceil - 1] += value * left_ratio;\n    }\n    if (right_ratio > 0 && end_floor < expand_distribution->size()) {\n      (*expand_distribution)[end_floor] += value * right_ratio;\n    }\n    for (size_t j = start_ceil; j < end_floor; j++) {\n      if (distribution[left_boundary + j] != 0) {\n        (*expand_distribution)[j] = value;\n      }\n    }  // end of for\n  }  // end of for\n}\n\n/*! Compute quantization threshold bins\n *  Implement Int8 Quantization Algorithm ref:\n *  http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf\n */\nstatic inline size_t ComputeThreshold(const std::vector<uint32_t> &hist,\n                                      const size_t target_bins) {\n  std::vector<float> P_distribution(hist.size());\n  size_t zero_point_index = hist.size() / 2;\n\n  size_t start_bin = target_bins / 2;\n  size_t end_bin = hist.size() / 2;\n  size_t negative_outliers_count = 0;\n  size_t positive_outliers_count = 0;\n  double min_divergence = std::numeric_limits<double>::max();\n  size_t target_threshold = end_bin;\n\n  for (size_t threshold = start_bin; threshold <= end_bin; ++threshold) {\n    negative_outliers_count += hist[zero_point_index - threshold];\n    positive_outliers_count += hist[zero_point_index + threshold - 1];\n  }\n\n  //! for each zero-axised quantization range: [-threshold, threshold], search\n  //! the best solution\n  for (size_t threshold = start_bin; threshold <= end_bin; ++threshold) {\n    P_distribution.resize(threshold * 2);\n    auto p_hist = &hist[zero_point_index - threshold];\n    for (size_t i = 0; i != P_distribution.size(); ++i) {\n      P_distribution[i] = static_cast<float>(p_hist[i]);\n    }\n\n    negative_outliers_count -= hist[zero_point_index - threshold];\n    positive_outliers_count -= hist[zero_point_index + threshold - 1];\n    P_distribution[0] += negative_outliers_count;\n    P_distribution[P_distribution.size() - 1] += positive_outliers_count;\n\n    //! Quantize the bins in range [-threshold, threshold] to target_bins\n    std::vector<float> Q_distribution(target_bins, 0);\n    float merged_cnt = static_cast<float>(threshold * 2) / target_bins;\n    size_t left_boundary = zero_point_index - threshold;\n    for (size_t i = 0; i < target_bins; ++i) {\n      float start = i * merged_cnt;\n      float end = start + merged_cnt;\n      const size_t start_ceil = static_cast<size_t>(std::ceil(start));\n      const size_t end_floor = static_cast<size_t>(std::floor(end));\n      if (left_boundary + start_ceil > 0) {\n        Q_distribution[i] +=\n            ((float)start_ceil - start) * hist[left_boundary + start_ceil - 1];\n      }\n      if (left_boundary + end_floor < hist.size()) {\n        Q_distribution[i] +=\n            (end - (float)end_floor) * hist[left_boundary + end_floor];\n      }\n\n      for (size_t j = start_ceil; j < end_floor; j++) {\n        Q_distribution[i] += hist[left_boundary + j];\n      }\n    }\n    std::vector<float> Q_expand_distribution;\n    ExpandCandidateDistribution(hist, Q_distribution, threshold,\n                                &Q_expand_distribution);\n\n    //! Compute Kullback-Leibler Divergence, normalize the smooth the data\n    //! first. Ref: http://hanj.cs.illinois.edu/cs412/bk3/KL-divergence.pdf\n    MakeSmooth(P_distribution);\n    MakeSmooth(Q_expand_distribution);\n    double divergence =\n        ComputeKlDivergence(P_distribution, Q_expand_distribution);\n\n    if (divergence < min_divergence) {\n      min_divergence = divergence;\n      target_threshold = threshold;\n    }\n  }\n  return target_threshold;\n}\n\n// Quantize the value in range\ntemplate <int RANGE_MIN, int RANGE_MAX>\nstatic inline float QuantizeValue(float val, float scale, float bias) {\n  val = (val + bias) * scale;\n\n  if (val > RANGE_MAX) {\n    val = RANGE_MAX;\n  } else if (val < RANGE_MIN) {\n    val = RANGE_MIN;\n  }\n  return val;\n}\n\n// Init the historgram params\n#define INIT_HISTOGRAM()                                                      \\\n  {                                                                           \\\n    if (histogram_bins_ == 0) {                                               \\\n      size_t range = non_bias_                                                \\\n                         ? std::max(std::abs(MIN_VALUE), std::abs(MAX_VALUE)) \\\n                         : (MAX_VALUE - MIN_VALUE);                           \\\n      histogram_bins_ = std::max<size_t>(4096u, range * 8);                   \\\n    }                                                                         \\\n    histogram_.resize((histogram_bins_ + 1) >> 1 << 1);                       \\\n    if (non_bias_) {                                                          \\\n      bias_ = 0.0f;                                                           \\\n      auto val = std::max(std::abs(max_), std::abs(min_));                    \\\n      left_boundary_ = -val;                                                  \\\n      hist_interval_ = (val * 2) / static_cast<float>(histogram_.size());     \\\n    } else {                                                                  \\\n      bias_ = -static_cast<float>(min_ + (max_ - min_) * 0.5);                \\\n      left_boundary_ = min_;                                                  \\\n      hist_interval_ = (max_ - min_) / static_cast<float>(histogram_.size()); \\\n    }                                                                         \\\n  }\n\n// Feed vector and update the historgram\n#define UPDATE_HISTOGRAM(vec, dim)                                            \\\n  {                                                                           \\\n    if (max_ < min_) {                                                        \\\n      return false;                                                           \\\n    }                                                                         \\\n    if (histogram_.size() == 0) {                                             \\\n      INIT_HISTOGRAM()                                                        \\\n    }                                                                         \\\n    for (size_t i = 0; i < dim; ++i) {                                        \\\n      ssize_t index = 0;                                                      \\\n      if (hist_interval_ > 0.0) {                                             \\\n        index =                                                               \\\n            static_cast<ssize_t>((vec[i] - left_boundary_) / hist_interval_); \\\n      }                                                                       \\\n      if (index < 0) {                                                        \\\n        index = 0;                                                            \\\n      } else if ((size_t)index >= histogram_.size()) {                        \\\n        index = histogram_.size() - 1;                                        \\\n      }                                                                       \\\n      ailego_assert_with((size_t)index < histogram_.size(), \"Invalid index\"); \\\n      histogram_[index] += 1;                                                 \\\n    }                                                                         \\\n    return true;                                                              \\\n  }\n\n// Train the quantizer\n#define TRAIN_QUANTIZER()                                                \\\n  {                                                                      \\\n    auto sum = std::accumulate(histogram_.begin(), histogram_.end(), 0); \\\n    if (sum == 0) {                                                      \\\n      return false;                                                      \\\n    }                                                                    \\\n    size_t target_bins =                                                 \\\n        ailego_align(static_cast<size_t>(MAX_VALUE - MIN_VALUE), 2);     \\\n    auto threshold_bins = ComputeThreshold(histogram_, target_bins);     \\\n    auto threshold =                                                     \\\n        (static_cast<float>(threshold_bins) + 0.5f) * hist_interval_;    \\\n    scale_ = target_bins / 2 / threshold;                                \\\n    if (!non_bias_) {                                                    \\\n      bias_ += (MAX_VALUE + MIN_VALUE) * 0.5f / scale_;                  \\\n    }                                                                    \\\n    scale_reciprocal_ = 1 / scale_;                                      \\\n    return true;                                                         \\\n  }\n\n// Feed the INT16 quantizer\nbool EntropyInt16Quantizer::feed(const float *vec, size_t dim) {\n  UPDATE_HISTOGRAM(vec, dim)\n}\n\n// Train the INT16 quantizer\nbool EntropyInt16Quantizer::train(void) {\n  TRAIN_QUANTIZER()\n}\n\n// Encode to INT16\nvoid EntropyInt16Quantizer::encode(const float *in, size_t dim,\n                                   int16_t *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = static_cast<int16_t>(\n        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));\n  }\n}\n\n// Decode from INT16\nvoid EntropyInt16Quantizer::decode(const int16_t *in, size_t dim,\n                                   float *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = in[i] * this->scale_reciprocal() - this->bias();\n  }\n}\n\n// Feed the UINT16 quantizer\nbool EntropyUInt16Quantizer::feed(const float *vec, size_t dim) {\n  UPDATE_HISTOGRAM(vec, dim)\n}\n\n// Train the UINT16 quantizer\nbool EntropyUInt16Quantizer::train(void) {\n  TRAIN_QUANTIZER()\n}\n\n// Encode to UINT16\nvoid EntropyUInt16Quantizer::encode(const float *in, size_t dim,\n                                    uint16_t *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = static_cast_from_float_to_uint16(\n        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));\n  }\n}\n\n// Decode from INT16\nvoid EntropyUInt16Quantizer::decode(const uint16_t *in, size_t dim,\n                                    float *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = in[i] * this->scale_reciprocal() - this->bias();\n  }\n}\n\n// Feed the INT8 quantizer\nbool EntropyInt8Quantizer::feed(const float *vec, size_t dim) {\n  UPDATE_HISTOGRAM(vec, dim)\n}\n\n// Train the INT8 quantizer\nbool EntropyInt8Quantizer::train(void) {\n  TRAIN_QUANTIZER()\n}\n\n// Encode to INT8\nvoid EntropyInt8Quantizer::encode(const float *in, size_t dim,\n                                  int8_t *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = static_cast<int8_t>(\n        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));\n  }\n}\n\n// Decode from INT8\nvoid EntropyInt8Quantizer::decode(const int8_t *in, size_t dim,\n                                  float *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = in[i] * this->scale_reciprocal() - this->bias();\n  }\n}\n\n// Feed the UINT8 quantizer\nbool EntropyUInt8Quantizer::feed(const float *vec, size_t dim) {\n  UPDATE_HISTOGRAM(vec, dim)\n}\n\n// Train the UINT8 quantizer\nbool EntropyUInt8Quantizer::train(void) {\n  TRAIN_QUANTIZER()\n}\n\n// Encode to INT8\nvoid EntropyUInt8Quantizer::encode(const float *in, size_t dim,\n                                   uint8_t *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = static_cast_from_float_to_uint8(\n        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));\n  }\n}\n\n// Decode from UINT8\nvoid EntropyUInt8Quantizer::decode(const uint8_t *in, size_t dim,\n                                   float *out) const {\n  for (size_t i = 0; i < dim; ++i) {\n    out[i] = in[i] * this->scale_reciprocal() - this->bias();\n  }\n}\n\n// Feed the INT4 quantizer\nbool EntropyInt4Quantizer::feed(const float *vec, size_t dim) {\n  UPDATE_HISTOGRAM(vec, dim)\n}\n\n// Train the INT4 quantizer\nbool EntropyInt4Quantizer::train(void) {\n  TRAIN_QUANTIZER()\n}\n\n// Encode to INT4\nvoid EntropyInt4Quantizer::encode(const float *in, size_t dim,\n                                  uint8_t *out) const {\n  ailego_assert_with(dim % 2 == 0, \"Dimension must be aligned with 2\");\n\n  for (size_t i = 0; i < dim; i += 2) {\n    float lo = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_);\n    float hi = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i + 1], scale_, bias_);\n    out[i / 2] = (static_cast_from_float_to_uint8(std::round(hi)) << 4) |\n                 (static_cast_from_float_to_uint8(std::round(lo)) & 0xF);\n  }\n}\n\n// Decode from INT4\nvoid EntropyInt4Quantizer::decode(const uint8_t *in, size_t dim,\n                                  float *out) const {\n  ailego_assert_with(dim % 2 == 0, \"Dimension must be aligned with 2\");\n\n  size_t size = dim / 2;\n  for (size_t i = 0; i < size; i += 1) {\n    uint8_t v = in[i];\n    int8_t lo = (static_cast<int8_t>(v << 4) >> 4);\n    int8_t hi = (static_cast<int8_t>(v & 0xf0) >> 4);\n    out[2 * i] = lo * this->scale_reciprocal() - this->bias();\n    out[2 * i + 1] = hi * this->scale_reciprocal() - this->bias();\n  }\n}\n\n// Feed the UINT4 quantizer\nbool EntropyUInt4Quantizer::feed(const float *vec, size_t dim) {\n  UPDATE_HISTOGRAM(vec, dim)\n}\n\n// Train the UINT4 quantizer\nbool EntropyUInt4Quantizer::train(void) {\n  TRAIN_QUANTIZER()\n}\n\n// Encode to INT4\nvoid EntropyUInt4Quantizer::encode(const float *in, size_t dim,\n                                   uint8_t *out) const {\n  ailego_assert_with(dim % 2 == 0, \"Dimension must be aligned with 2\");\n\n  for (size_t i = 0; i < dim; i += 2) {\n    float lo = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_);\n    float hi = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i + 1], scale_, bias_);\n    out[i / 2] = (static_cast_from_float_to_uint8(std::round(hi)) << 4) |\n                 (static_cast_from_float_to_uint8(std::round(lo)) & 0xF);\n  }\n}\n\n// Decode from INT4\nvoid EntropyUInt4Quantizer::decode(const uint8_t *in, size_t dim,\n                                   float *out) const {\n  ailego_assert_with(dim % 2 == 0, \"Dimension must be aligned with 2\");\n\n  size_t size = dim / 2;\n  for (size_t i = 0; i < size; i += 1) {\n    uint8_t v = in[i];\n    out[2 * i] = (v & 0xf) * this->scale_reciprocal() - this->bias();\n    out[2 * i + 1] = (v >> 4) * this->scale_reciprocal() - this->bias();\n  }\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/algorithm/integer_quantizer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <limits>\n#include <vector>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Entropy-based Integer Quantization Algorithm\n */\ntemplate <typename T, int RANGE_MIN, int RANGE_MAX>\nclass EntropyIntegerQuantizer {\n public:\n  //! Primitive Built-in Types to store the quantized data\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Constants\n  constexpr static int MIN_VALUE = RANGE_MIN;\n  constexpr static int MAX_VALUE = RANGE_MAX;\n\n  // Check supporting type\n  static_assert(std::is_integral<T>::value, \"ValueType must be integral\");\n\n  // Check template values\n  static_assert(RANGE_MIN < RANGE_MAX, \"Invalid value range\");\n\n  //! Constructor\n  EntropyIntegerQuantizer(void) {}\n\n  //! Set histogram bins in train\n  void set_histogram_bins(size_t bins) {\n    if (bins > (RANGE_MAX - RANGE_MIN)) {\n      histogram_bins_ = bins;\n    }\n  }\n\n  //! Set quantization params scale\n  void set_scale(float val) {\n    if (val > 0.0f) {\n      scale_ = val;\n      scale_reciprocal_ = 1 / scale_;\n    }\n  }\n\n  //! Set quantization params bias\n  void set_bias(float val) {\n    bias_ = val;\n  }\n\n  //! Set quantization params max\n  void set_max(float val) {\n    max_ = val;\n  }\n\n  //! Set quantization params min\n  void set_min(float val) {\n    min_ = val;\n  }\n\n  //! Set quantization params non bias\n  void set_non_bias(bool val) {\n    non_bias_ = val;\n  }\n\n  //! Get histogram bins in train\n  size_t histogram_bins(void) const {\n    return histogram_bins_;\n  }\n\n  //! Get quantization params scale\n  float scale(void) const {\n    return scale_;\n  }\n\n  //! Get quantization params bias\n  float bias(void) const {\n    return bias_;\n  }\n\n  //! Get quantization params max\n  float max(void) const {\n    return max_;\n  }\n\n  //! Get quantization params min\n  float min(void) const {\n    return min_;\n  }\n\n  //! Get quantization params non bias\n  bool non_bias(void) const {\n    return non_bias_;\n  }\n\n  //! Retrieve the scale reciprocal for decoding\n  float scale_reciprocal(void) const {\n    return scale_reciprocal_;\n  }\n\n protected:\n  //! Disable them\n  EntropyIntegerQuantizer(const EntropyIntegerQuantizer &) = delete;\n  EntropyIntegerQuantizer &operator=(const EntropyIntegerQuantizer &) = delete;\n\n  //! Members\n  size_t histogram_bins_{0};\n  float hist_interval_{1.0f};\n  float max_{std::numeric_limits<float>::min()};\n  float min_{std::numeric_limits<float>::max()};\n  float bias_{0.0f};\n  float scale_{0.0f};\n  float scale_reciprocal_{0.0f};\n  float left_boundary_{0.0f};\n  bool non_bias_{false};\n  std::vector<uint32_t> histogram_{};\n};\n\n/*! INT16 Quantizer\n */\nclass EntropyInt16Quantizer\n    : public EntropyIntegerQuantizer<int16_t, -32767, 32767> {\n public:\n  //! Feed the training data\n  bool feed(const float *vec, size_t dim);\n\n  //! Train the quantizer\n  bool train(void);\n\n  //! Encode float vector to int16\n  void encode(const float *in, size_t dim, ValueType *out) const;\n\n  //! Decode to float vector from int16\n  void decode(const ValueType *in, size_t dim, float *out) const;\n};\n\n/*! UINT16 Quantizer\n */\nclass EntropyUInt16Quantizer\n    : public EntropyIntegerQuantizer<uint16_t, 0, 65535> {\n public:\n  //! Feed the training data\n  bool feed(const float *vec, size_t dim);\n\n  //! Train the quantizer\n  bool train(void);\n\n  //! Encode float vector to uint16\n  void encode(const float *in, size_t dim, ValueType *out) const;\n\n  //! Decode to float vector from uint16\n  void decode(const ValueType *in, size_t dim, float *out) const;\n};\n\n/*! INT8 Quantizer\n */\nclass EntropyInt8Quantizer : public EntropyIntegerQuantizer<int8_t, -127, 127> {\n public:\n  //! Feed the training data\n  bool feed(const float *vec, size_t dim);\n\n  //! Train the quantizer\n  bool train(void);\n\n  //! Encode float vector to int8\n  void encode(const float *in, size_t dim, ValueType *out) const;\n\n  //! Decode to float vector from int8\n  void decode(const ValueType *in, size_t dim, float *out) const;\n};\n\n/*! UINT8 Quantizer\n */\nclass EntropyUInt8Quantizer : public EntropyIntegerQuantizer<uint8_t, 0, 255> {\n public:\n  //! Feed the training data\n  bool feed(const float *vec, size_t dim);\n\n  //! Train the quantizer\n  bool train(void);\n\n  //! Encode float vector to uint8\n  void encode(const float *in, size_t dim, ValueType *out) const;\n\n  //! Decode to float vector from uint8\n  void decode(const ValueType *in, size_t dim, float *out) const;\n};\n\n/*! INT4 Quantizer\n */\nclass EntropyInt4Quantizer : public EntropyIntegerQuantizer<uint8_t, -8, 7> {\n public:\n  //! Feed the training data\n  bool feed(const float *vec, size_t dim);\n\n  //! Train the quantizer\n  bool train(void);\n\n  //! Encode float vector to int4\n  void encode(const float *in, size_t dim, ValueType *out) const;\n\n  //! Decode to float vector from int4\n  void decode(const ValueType *in, size_t dim, float *out) const;\n};\n\n/*! UINT4 Quantizer\n */\nclass EntropyUInt4Quantizer : public EntropyIntegerQuantizer<uint8_t, 0, 15> {\n public:\n  //! Feed the training data\n  bool feed(const float *vec, size_t dim);\n\n  //! Train the quantizer\n  bool train(void);\n\n  //! Encode float vector to uint4\n  void encode(const float *in, size_t dim, ValueType *out) const;\n\n  //! Decode to float vector from uint4\n  void decode(const ValueType *in, size_t dim, float *out) const;\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/algorithm/kmeans.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cmath>\n#include <numeric>\n#include <random>\n#include <ailego/container/vector_array.h>\n#include <ailego/math/euclidean_distance_matrix.h>\n#include <ailego/math/hamming_distance_matrix.h>\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math/normalizer.h>\n#include <ailego/utility/matrix_helper.h>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"lloyd_cluster.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n/*! K-MC2 Centroids Generator\n */\ntemplate <typename T, typename TPool>\nclass Kmc2CentroidsGenerator {\n public:\n  //! Type of values\n  using OwnerType = typename std::decay<T>::type;\n  using ContainerType = typename OwnerType::ContainerType;\n  using ContextType = typename OwnerType::ContextType;\n  using ValueType = typename OwnerType::ValueType;\n  using StoreType = typename OwnerType::StoreType;\n  using ThreadPoolType = TPool;\n\n  //! constexpr variables\n  constexpr static size_t BatchCount = OwnerType::BatchCount;\n\n  //! Generate centroids\n  void operator()(OwnerType *owner, ThreadPoolType &pool) const {\n    if (chain_length_ == 0) {\n      this->init_centroids_random(owner);\n    } else if (!assumption_free_) {\n      this->init_centroids_kmc2(owner, pool);\n    } else {\n      this->init_centroids_afkmc2(owner, pool);\n    }\n  }\n\n  //! Retrieve the markov chain length\n  size_t chain_length(void) const {\n    return chain_length_;\n  }\n\n  //! Set the mutable markov chain length\n  void set_chain_length(size_t len) {\n    chain_length_ = len;\n  }\n\n  //! Retrieve assumption free option\n  bool assumption_free(void) const {\n    return assumption_free_;\n  }\n\n  //! Set the assumption free option\n  void set_assumption_free(bool val) {\n    assumption_free_ = val;\n  }\n\n protected:\n  //! Initialize centroids randomly\n  void init_centroids_random(OwnerType *owner) const {\n    RandomSelectBenches(owner->feature_cache(), owner->feature_matrix(),\n                        owner->k_value(), owner->mutable_centroids());\n  }\n\n  //! Initialize centroids with K-MC2\n  void init_centroids_kmc2(OwnerType *owner, ThreadPoolType &pool) const {\n    const auto &matrix = owner->feature_matrix();\n    const auto &cache = owner->feature_cache();\n    auto *centroids = owner->mutable_centroids();\n\n    std::mt19937 mt((std::random_device())());\n    std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n    ContainerType benches(cache.dimension());\n    std::vector<float> scores;\n\n    // Sample first center uniformly\n    RandomSelectBenches(cache, matrix, 1, centroids);\n\n    // Make a thread group\n    auto group = pool.make_group();\n\n    for (size_t i = 1, k = owner->k_value(); i < k; ++i) {\n      RandomSelectBenches(cache, matrix, chain_length_, &benches);\n\n      // Update bench scores\n      scores.resize(benches.count());\n      for (size_t j = 0; j != scores.size(); ++j) {\n        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateBenchScores,\n                                   centroids, benches[j], &scores[j]));\n      }\n      group->wait_finish();\n\n      //! Select the better centroid randomly\n      float x = scores[0];\n      size_t xj = 0;\n      for (size_t j = 1; j != scores.size(); ++j) {\n        float y = scores[j];\n\n        if (x == 0.0f || x * dist(mt) < y) {\n          x = y;\n          xj = j;\n        }\n      }\n      centroids->append(benches[xj], benches.dimension());\n    }  // end of for\n  }\n\n  //! Initialize centroids with K-MC2\n  void init_centroids_afkmc2(OwnerType *owner, ThreadPoolType &pool) const {\n    const auto &matrix = owner->feature_matrix();\n    const auto &cache = owner->feature_cache();\n\n    // Probability\n    std::vector<float> probs(matrix.count() + cache.count());\n\n    // Sample first center uniformly\n    RandomSelectBenches(cache, matrix, 1, owner->mutable_centroids());\n\n    // Make a thread group\n    auto group = pool.make_group();\n    if (!matrix.empty()) {\n      size_t n = matrix.count() / BatchCount;\n      size_t c = std::max<size_t>(n / pool.count() / 2u, 1u);\n      size_t m = n / c * c;\n\n      for (size_t i = 0; i != m; i += c) {\n        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateMatrixScores,\n                                   owner, i, i + c, &probs[0]));\n      }\n      for (size_t i = m; i != n; i += 1) {\n        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateMatrixScores,\n                                   owner, i, i + 1, &probs[0]));\n      }\n    }\n    if (!cache.empty()) {\n      group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateCacheScores,\n                                 owner, &probs[matrix.count()]));\n    }\n    group->wait_finish();\n\n    // Update probabilities\n    double p_sum = std::accumulate(probs.begin(), probs.end(), 0.0);\n    for (auto it = probs.begin(); it != probs.end(); ++it) {\n      *it = static_cast<float>((*it / p_sum + 1.0 / probs.size()) * 0.5);\n    }\n\n    std::mt19937 mt((std::random_device())());\n    std::uniform_real_distribution<float> dist(0.0, 1.0);\n    ContainerType benches(cache.dimension());\n    std::vector<float> scores;\n    std::vector<float> bench_probs;\n\n    for (size_t i = 1; i < owner->k_value(); ++i) {\n      RandomSelectBenches(cache, matrix, chain_length_, probs, &benches,\n                          &bench_probs);\n\n      // Update bench scores\n      scores.resize(benches.count());\n      for (size_t j = 0; j != scores.size(); ++j) {\n        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateBenchScores,\n                                   owner->mutable_centroids(), benches[j],\n                                   &scores[j]));\n      }\n      group->wait_finish();\n\n      // Update scores with probabilities\n      for (size_t j = 0; j != scores.size(); ++j) {\n        scores[j] /= bench_probs[j];\n      }\n\n      //! Select the better centroid randomly\n      float x = scores[0];\n      size_t xj = 0;\n      for (size_t j = 1; j != scores.size(); ++j) {\n        float y = scores[j];\n\n        if (x == 0.0f || x * dist(mt) < y) {\n          x = y;\n          xj = j;\n        }\n      }\n      owner->mutable_centroids()->append(benches[xj], benches.dimension());\n    }  // end of for\n  }\n\n  //! Update matrix score\n  static void UpdateMatrixScores(const OwnerType *owner, size_t first,\n                                 size_t last, float *out) {\n    const auto &matrix = owner->feature_matrix();\n    const auto *bench = owner->centroids().data();\n\n    for (size_t i = first * BatchCount; i != last * BatchCount;\n         i += BatchCount) {\n      ContextType::template BatchDistance<1>(matrix[i], bench,\n                                             matrix.dimension(), &out[i]);\n    }\n  }\n\n  //! Update cache score\n  static void UpdateCacheScores(const OwnerType *owner, float *out) {\n    const auto &cache = owner->feature_cache();\n    const auto *bench = owner->centroids().data();\n\n    for (size_t i = 0, n = cache.count(); i != n; ++i) {\n      ContextType::Distance(bench, cache[i], cache.dimension(), &out[i]);\n    }\n  }\n\n  //! Update bench score\n  static void UpdateBenchScores(const ContainerType *benches,\n                                const StoreType *feat, float *out) {\n    float min_score = std::numeric_limits<float>::max();\n\n    for (size_t i = 0, c = benches->count(); i != c; ++i) {\n      float new_score;\n      ContextType::Distance(benches->at(i), feat, benches->dimension(),\n                            &new_score);\n\n      if (new_score < min_score) {\n        min_score = new_score;\n      }\n    }\n    *out = min_score;\n  }\n\n  //! Select k benches randomly\n  static void RandomSelectBenches(const ContainerType &cache,\n                                  const ContainerType &matrix, size_t k,\n                                  ContainerType *benches) {\n    ContainerType rows(cache.dimension());\n    size_t m = matrix.count();\n    size_t n = m + cache.count();\n    std::mt19937 mt((std::random_device())());\n\n    rows.resize(BatchCount);\n    benches->reset(cache.dimension());\n    benches->reserve(k);\n\n    for (size_t i = 0; k > 0 && i < n; ++i) {\n      if (mt() % (n - i) >= k) {\n        continue;\n      }\n      // Selected a feature\n      if (i < m) {\n        ContextType::MatrixReverseTranspose(matrix[i / BatchCount * BatchCount],\n                                            matrix.dimension(), rows.data());\n        benches->append(rows[i & (BatchCount - 1u)], matrix.dimension());\n      } else {\n        benches->append(cache[i - m], cache.dimension());\n      }\n      --k;\n    }  // end of for\n  }\n\n  //! Select k benches randomly\n  static void RandomSelectBenches(const ContainerType &cache,\n                                  const ContainerType &matrix, size_t k,\n                                  const std::vector<float> &probs,\n                                  ContainerType *benches,\n                                  std::vector<float> *bench_probs) {\n    std::mt19937 mt((std::random_device())());\n    std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n    // Sample features\n    KeyValueHeap<size_t, double, std::greater<double>> samples(k);\n    for (size_t i = 0; i < probs.size(); ++i) {\n      samples.emplace(i, std::pow(dist(mt), 1.0 / probs[i]));\n    }\n\n    ContainerType rows(cache.dimension());\n    size_t matrix_count = matrix.count();\n\n    rows.resize(BatchCount);\n    benches->reset(cache.dimension());\n    benches->reserve(k);\n    bench_probs->clear();\n    bench_probs->reserve(k);\n\n    for (const auto &it : samples) {\n      // Selected a feature\n      if (it.first < matrix_count) {\n        ContextType::MatrixReverseTranspose(\n            matrix[it.first / BatchCount * BatchCount], matrix.dimension(),\n            rows.data());\n        benches->append(rows[it.first & (BatchCount - 1u)], matrix.dimension());\n      } else {\n        benches->append(cache[it.first - matrix_count], cache.dimension());\n      }\n      bench_probs->push_back(probs[it.first]);\n    }\n  }\n\n private:\n  size_t chain_length_{32};\n  bool assumption_free_{false};\n};\n\n/*! Numerical K-Means Context\n */\ntemplate <typename T, size_t BATCH_COUNT = 32u>\nclass NumericalKmeansContext {\n public:\n  //! constexpr variables\n  constexpr static size_t BatchCount = BATCH_COUNT;\n\n  //! Type of values\n  using ValueType = typename std::remove_cv<T>::type;\n  using StoreType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(IsSignedArithmetic<ValueType>::value,\n                \"ValueType must be signed arithmetic\");\n\n  /*! K-Means Context Cluster\n   */\n  class Cluster {\n   public:\n    //! Constructor\n    Cluster(size_t dim) : accum_(dim, 0.0) {}\n\n    //! Constructor\n    Cluster(const Cluster &rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}\n\n    //! Constructor\n    Cluster(Cluster &&rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}\n\n    //! Assignment\n    Cluster &operator=(const Cluster &rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = rhs.accum_;\n      return *this;\n    }\n\n    //! Assignment\n    Cluster &operator=(Cluster &&rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = std::move(rhs.accum_);\n      return *this;\n    }\n\n    //! Append a vector\n    void append(const ValueType *vec, size_t dim, float dist) {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      mutex_.lock();\n      cost_ += dist;\n      count_ += 1;\n\n      for (size_t i = 0; i != dim; ++i) {\n        accum_[i] += vec[i];\n      }\n      mutex_.unlock();\n    }\n\n    //! Retrieve the centroid of vectors\n    void centroid(ValueType *out, size_t dim) const {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      for (size_t i = 0; i != dim; ++i) {\n        out[i] = count_ == 0 ? FloatCast<ValueType>(NAN)\n                             : FloatCast<ValueType>(accum_[i] / count_);\n      }\n    }\n\n    //! Retrieve squared error\n    double cost(void) const {\n      return cost_;\n    }\n\n    //! Retrieve feature count\n    size_t count(void) const {\n      return count_;\n    }\n\n   protected:\n    //! Convert float type to another type\n    template <typename U>\n    static auto FloatCast(const double val) ->\n        typename std::enable_if<!std::is_integral<U>::value, U>::type {\n      return static_cast<U>(val);\n    }\n\n    //! Convert float type to another type\n    template <typename U>\n    static auto FloatCast(const double val) ->\n        typename std::enable_if<std::is_integral<U>::value, U>::type {\n      return static_cast<U>(std::round(val));\n    }\n\n   private:\n    SpinMutex mutex_{};\n    double cost_{0.0};\n    size_t count_{0u};\n    std::vector<double> accum_{};\n  };\n\n  //! operator []\n  const Cluster &operator[](size_t i) const {\n    return clusters_[i];\n  }\n\n  //! operator []\n  Cluster &operator[](size_t i) {\n    return clusters_[i];\n  }\n\n  //! Clear the context\n  void clear(void) {\n    clusters_.clear();\n  }\n\n  //! Reset the context\n  void reset(size_t k_value, size_t dim) {\n    clusters_.clear();\n    clusters_.resize(k_value, dim);\n  }\n\n  //! Retrieve context of clusters\n  const std::vector<Cluster> &clusters(void) const {\n    return clusters_;\n  }\n\n  //! Compute the distance between matrix and query (batch)\n  template <size_t N>\n  static void BatchDistance(const ValueType *m, const ValueType *q, size_t dim,\n                            float *out) {\n    SquaredEuclideanDistanceMatrix<ValueType, BatchCount, N>::Compute(m, q, dim,\n                                                                      out);\n  }\n\n  //! Compute the distance between matrix and query (single)\n  static void Distance(const ValueType *m, const ValueType *q, size_t dim,\n                       float *out) {\n    SquaredEuclideanDistanceMatrix<ValueType, 1, 1>::Compute(m, q, dim, out);\n  }\n\n  //! Transpose a matrix\n  template <typename U>\n  static auto MatrixTranspose(const U *src, size_t dim, T *dst) ->\n      typename std::enable_if<sizeof(U) >= 2>::type {\n    MatrixHelper::Transpose<U, BatchCount>(src, dim, dst);\n  }\n\n  //! Transpose a matrix\n  template <typename U>\n  static auto MatrixTranspose(const U *src, size_t dim, U *dst) ->\n      typename std::enable_if<sizeof(U) == 1>::type {\n    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 2, dst);\n  }\n\n  //! Reverse transpose a matrix\n  template <typename U>\n  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->\n      typename std::enable_if<sizeof(U) >= 2>::type {\n    MatrixHelper::ReverseTranspose<U, BatchCount>(src, dim, dst);\n  }\n\n  //! Reverse transpose a matrix\n  template <typename U>\n  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->\n      typename std::enable_if<sizeof(U) == 1>::type {\n    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 2, dst);\n  }\n\n  //! Compute Norm2\n  template <typename ValueType, typename = typename std::enable_if<\n                                    IsFloatingPoint<ValueType>::value>::type>\n  static void Norm2(ValueType *data, size_t dim, float *norm) {\n    Normalizer<ValueType>::L2(data, dim, norm);\n  }\n\n  //! Compute Norm2, for non-float do nothing\n  static void Norm2(ValueType * /*data*/, size_t /*dim*/, float *norm) {\n    *norm = 0.0f;\n  }\n\n private:\n  //! Members\n  std::vector<Cluster> clusters_{};\n};\n\n/*! Nibble K-Means Context (INT4)\n */\ntemplate <typename T, size_t BATCH_COUNT = 32u>\nclass NibbleKmeansContext {\n public:\n  //! constexpr variables\n  constexpr static size_t BatchCount = BATCH_COUNT;\n\n  //! Type of values\n  using ValueType = typename std::remove_cv<T>::type;\n  using StoreType = typename std::make_unsigned<ValueType>::type;\n\n  // Check supporting type\n  static_assert(std::is_same<ValueType, int32_t>::value ||\n                    std::is_same<ValueType, int64_t>::value,\n                \"ValueType must be int32_t or int64_t\");\n\n  /*! K-Means Context Cluster\n   */\n  class Cluster {\n   public:\n    //! Constructor\n    Cluster(size_t dim) : accum_(dim, 0.0) {}\n\n    //! Constructor\n    Cluster(const Cluster &rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}\n\n    //! Constructor\n    Cluster(Cluster &&rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}\n\n    //! Assignment\n    Cluster &operator=(const Cluster &rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = rhs.accum_;\n      return *this;\n    }\n\n    //! Assignment\n    Cluster &operator=(Cluster &&rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = std::move(rhs.accum_);\n      return *this;\n    }\n\n    //! Append a vector\n    void append(const StoreType *vec, size_t dim, float dist) {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      mutex_.lock();\n      cost_ += dist;\n      count_ += 1;\n\n      const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);\n      dim = (dim >> 1) << 1;\n      for (size_t i = 0; i != dim; i += 2) {\n        uint8_t val = arr[i >> 1];\n        accum_[i] += ((int8_t)(val << 4) >> 4);\n        accum_[i + 1] += ((int8_t)(val) >> 4);\n      }\n      mutex_.unlock();\n    }\n\n    //! Retrieve the centroid of vectors\n    void centroid(StoreType *out, size_t dim) const {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      uint8_t *arr = reinterpret_cast<uint8_t *>(out);\n      dim = (dim >> 1) << 1;\n      for (size_t i = 0; i != dim; i += 2) {\n        int lo =\n            count_ == 0 ? 0 : static_cast<int>(std::round(accum_[i] / count_));\n        int hi = count_ == 0\n                     ? 0\n                     : static_cast<int>(std::round(accum_[i + 1] / count_));\n        arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);\n      }\n    }\n\n    //! Retrieve squared error\n    double cost(void) const {\n      return cost_;\n    }\n\n    //! Retrieve feature count\n    size_t count(void) const {\n      return count_;\n    }\n\n   private:\n    SpinMutex mutex_{};\n    double cost_{0.0};\n    size_t count_{0u};\n    std::vector<double> accum_{};\n  };\n\n  //! operator []\n  const Cluster &operator[](size_t i) const {\n    return clusters_[i];\n  }\n\n  //! operator []\n  Cluster &operator[](size_t i) {\n    return clusters_[i];\n  }\n\n  //! Clear the context\n  void clear(void) {\n    clusters_.clear();\n  }\n\n  //! Reset the context\n  void reset(size_t k_value, size_t dim) {\n    clusters_.clear();\n    clusters_.resize(k_value, dim);\n  }\n\n  //! Retrieve context of clusters\n  const std::vector<Cluster> &clusters(void) const {\n    return clusters_;\n  }\n\n  //! Compute the distance between matrix and query (batch)\n  template <size_t N>\n  static void BatchDistance(const StoreType *m, const StoreType *q, size_t dim,\n                            float *out) {\n    SquaredEuclideanDistanceMatrix<uint8_t, BatchCount, N>::Compute(\n        reinterpret_cast<const uint8_t *>(m),\n        reinterpret_cast<const uint8_t *>(q), dim, out);\n  }\n\n  //! Compute the distance between matrix and query (single)\n  static void Distance(const StoreType *m, const StoreType *q, size_t dim,\n                       float *out) {\n    SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n        reinterpret_cast<const uint8_t *>(m),\n        reinterpret_cast<const uint8_t *>(q), dim, out);\n  }\n\n  //! Transpose a matrix\n  static void MatrixTranspose(const StoreType *src, size_t dim,\n                              StoreType *dst) {\n    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 3, dst);\n  }\n\n  //! Reverse transpose a matrix\n  static void MatrixReverseTranspose(const StoreType *src, size_t dim,\n                                     StoreType *dst) {\n    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 3, dst);\n  }\n\n  //! Compute and do norm2\n  static void Norm2(StoreType * /*data*/, size_t /*dim*/, float *norm) {\n    *norm = 0;\n  }\n\n private:\n  //! Members\n  std::vector<Cluster> clusters_{};\n};\n\n/*! Binary K-Means Context\n */\ntemplate <typename T, size_t BATCH_COUNT = 32u>\nclass BinaryKmeansContext {\n public:\n  //! constexpr variables\n  constexpr static size_t BatchCount = BATCH_COUNT;\n\n  //! Type of values\n  using ValueType = typename std::remove_cv<T>::type;\n  using StoreType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(std::is_same<ValueType, uint32_t>::value ||\n                    std::is_same<ValueType, uint64_t>::value,\n                \"ValueType must be uint32_t or uint64_t\");\n\n  /*! K-Means Context Cluster\n   */\n  class Cluster {\n   public:\n    //! Constructor\n    Cluster(size_t dim) : accum_(dim, 0) {}\n\n    //! Constructor\n    Cluster(const Cluster &rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}\n\n    //! Constructor\n    Cluster(Cluster &&rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}\n\n    //! Assignment\n    Cluster &operator=(const Cluster &rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = rhs.accum_;\n      return *this;\n    }\n\n    //! Assignment\n    Cluster &operator=(Cluster &&rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = std::move(rhs.accum_);\n      return *this;\n    }\n\n    //! Append a vector\n    void append(const ValueType *vec, size_t dim, float dist) {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      mutex_.lock();\n      cost_ += dist;\n      count_ += 1;\n\n      const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);\n      for (size_t i = 0; i != dim; ++i) {\n        if (arr[i >> 3] & (1u << (i & 7))) {\n          accum_[i] += 1;\n        }\n      }\n      mutex_.unlock();\n    }\n\n    //! Retrieve the centroid of vectors\n    void centroid(ValueType *out, size_t dim) const {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      uint8_t *arr = reinterpret_cast<uint8_t *>(out);\n      size_t half = count_ >> 1;\n      for (size_t i = 0; i != dim; ++i) {\n        if (accum_[i] > half) {\n          arr[i >> 3] |= static_cast<uint8_t>(1 << (i & 0x7));\n        } else {\n          arr[i >> 3] &= ~static_cast<uint8_t>(1 << (i & 0x7));\n        }\n      }\n    }\n\n    //! Retrieve squared error\n    double cost(void) const {\n      return cost_;\n    }\n\n    //! Retrieve feature count\n    size_t count(void) const {\n      return count_;\n    }\n\n   private:\n    SpinMutex mutex_{};\n    double cost_{0.0};\n    size_t count_{0u};\n    std::vector<uint32_t> accum_{};\n  };\n\n  //! operator []\n  const Cluster &operator[](size_t i) const {\n    return clusters_[i];\n  }\n\n  //! operator []\n  Cluster &operator[](size_t i) {\n    return clusters_[i];\n  }\n\n  //! Clear the context\n  void clear(void) {\n    clusters_.clear();\n  }\n\n  //! Reset the context\n  void reset(size_t k_value, size_t dim) {\n    clusters_.clear();\n    clusters_.resize(k_value, dim);\n  }\n\n  //! Retrieve context of clusters\n  const std::vector<Cluster> &clusters(void) const {\n    return clusters_;\n  }\n\n  //! Compute the distance between matrix and query (batch)\n  template <size_t N>\n  static void BatchDistance(const ValueType *m, const ValueType *q, size_t dim,\n                            float *out) {\n    HammingDistanceMatrix<ValueType, BatchCount, N>::Compute(m, q, dim, out);\n  }\n\n  //! Compute the distance between matrix and query (single)\n  static void Distance(const ValueType *m, const ValueType *q, size_t dim,\n                       float *out) {\n    HammingDistanceMatrix<ValueType, 1, 1>::Compute(m, q, dim, out);\n  }\n\n  //! Transpose a matrix\n  static void MatrixTranspose(const ValueType *src, size_t dim, T *dst) {\n    MatrixHelper::Transpose<ValueType, BatchCount>(\n        src, (dim >> 3) / sizeof(ValueType), dst);\n  }\n\n  //! Reverse transpose a matrix\n  static void MatrixReverseTranspose(const ValueType *src, size_t dim, T *dst) {\n    MatrixHelper::ReverseTranspose<ValueType, BatchCount>(\n        src, (dim >> 3) / sizeof(ValueType), dst);\n  }\n\n  //! Compute Norm2\n  static void Norm2(ValueType * /*data*/, size_t /*dim*/, float *norm) {\n    *norm = 0;\n  }\n\n private:\n  //! Members\n  std::vector<Cluster> clusters_{};\n};\n\n/*! Numerical InnerProduct K-Means Context\n */\ntemplate <typename T, size_t BATCH_COUNT = 32u>\nclass NumericalInnerProductKmeansContext {\n public:\n  //! constexpr variables\n  constexpr static size_t BatchCount = BATCH_COUNT;\n\n  //! Type of values\n  using ValueType = typename std::remove_cv<T>::type;\n  using StoreType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(IsSignedArithmetic<ValueType>::value,\n                \"ValueType must be signed arithmetic\");\n\n  /*! K-Means Context Cluster\n   */\n  class Cluster {\n   public:\n    //! Constructor\n    Cluster(size_t dim) : accum_(dim, 0.0) {}\n\n    //! Constructor\n    Cluster(const Cluster &rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}\n\n    //! Constructor\n    Cluster(Cluster &&rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}\n\n    //! Assignment\n    Cluster &operator=(const Cluster &rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = rhs.accum_;\n      return *this;\n    }\n\n    //! Assignment\n    Cluster &operator=(Cluster &&rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = std::move(rhs.accum_);\n      return *this;\n    }\n\n    //! Append a vector\n    void append(const ValueType *vec, size_t dim, float dist) {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      mutex_.lock();\n      cost_ += dist;\n      count_ += 1;\n\n      for (size_t i = 0; i != dim; ++i) {\n        accum_[i] += vec[i];\n      }\n      mutex_.unlock();\n    }\n\n    //! Retrieve the centroid of vectors\n    void centroid(ValueType *out, size_t dim) const {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      for (size_t i = 0; i != dim; ++i) {\n        out[i] = count_ == 0 ? FloatCast<ValueType>(NAN)\n                             : FloatCast<ValueType>(accum_[i] / count_);\n      }\n    }\n\n    //! Retrieve squared error\n    double cost(void) const {\n      return cost_;\n    }\n\n    //! Retrieve feature count\n    size_t count(void) const {\n      return count_;\n    }\n\n   protected:\n    //! Convert float type to another type\n    template <typename U>\n    static auto FloatCast(const double val) ->\n        typename std::enable_if<!std::is_integral<U>::value, U>::type {\n      return static_cast<U>(val);\n    }\n\n    //! Convert float type to another type\n    template <typename U>\n    static auto FloatCast(const double val) ->\n        typename std::enable_if<std::is_integral<U>::value, U>::type {\n      return static_cast<U>(std::round(val));\n    }\n\n   private:\n    SpinMutex mutex_{};\n    double cost_{0.0};\n    size_t count_{0u};\n    std::vector<double> accum_{};\n  };\n\n  //! operator []\n  const Cluster &operator[](size_t i) const {\n    return clusters_[i];\n  }\n\n  //! operator []\n  Cluster &operator[](size_t i) {\n    return clusters_[i];\n  }\n\n  //! Clear the context\n  void clear(void) {\n    clusters_.clear();\n  }\n\n  //! Reset the context\n  void reset(size_t k_value, size_t dim) {\n    clusters_.clear();\n    clusters_.resize(k_value, dim);\n  }\n\n  //! Retrieve context of clusters\n  const std::vector<Cluster> &clusters(void) const {\n    return clusters_;\n  }\n\n  //! Compute the distance between matrix and query (batch)\n  template <size_t N>\n  static void BatchDistance(const ValueType *m, const ValueType *q, size_t dim,\n                            float *out) {\n    MinusInnerProductMatrix<ValueType, BatchCount, N>::Compute(m, q, dim, out);\n  }\n\n  //! Compute the distance between matrix and query (single)\n  static void Distance(const ValueType *m, const ValueType *q, size_t dim,\n                       float *out) {\n    MinusInnerProductMatrix<ValueType, 1, 1>::Compute(m, q, dim, out);\n  }\n\n  //! Transpose a matrix\n  template <typename U>\n  static auto MatrixTranspose(const U *src, size_t dim, T *dst) ->\n      typename std::enable_if<sizeof(U) >= 2>::type {\n    MatrixHelper::Transpose<U, BatchCount>(src, dim, dst);\n  }\n\n  //! Transpose a matrix\n  template <typename U>\n  static auto MatrixTranspose(const U *src, size_t dim, U *dst) ->\n      typename std::enable_if<sizeof(U) == 1>::type {\n    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 2, dst);\n  }\n\n  //! Reverse transpose a matrix\n  template <typename U>\n  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->\n      typename std::enable_if<sizeof(U) >= 2>::type {\n    MatrixHelper::ReverseTranspose<U, BatchCount>(src, dim, dst);\n  }\n\n  //! Reverse transpose a matrix\n  template <typename U>\n  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->\n      typename std::enable_if<sizeof(U) == 1>::type {\n    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 2, dst);\n  }\n\n  //! Compute Norm2\n  template <typename ValueType, typename = typename std::enable_if<\n                                    IsFloatingPoint<ValueType>::value>::type>\n  static void Norm2(ValueType *data, size_t dim, float *norm) {\n    Normalizer<ValueType>::L2(data, dim, norm);\n  }\n\n  //! Compute Norm2, for non-float do nothing\n  static void Norm2(ValueType * /*data*/, size_t /*dim*/, float *norm) {\n    *norm = 0.0f;\n  }\n\n private:\n  //! Members\n  std::vector<Cluster> clusters_{};\n};\n\n/*! Nibble InnerProduct K-Means Context (INT4)\n */\ntemplate <typename T, size_t BATCH_COUNT = 32u>\nclass NibbleInnerProductKmeansContext {\n public:\n  //! constexpr variables\n  constexpr static size_t BatchCount = BATCH_COUNT;\n\n  //! Type of values\n  using ValueType = typename std::remove_cv<T>::type;\n  using StoreType = typename std::make_unsigned<ValueType>::type;\n\n  // Check supporting type\n  static_assert(std::is_same<ValueType, int32_t>::value ||\n                    std::is_same<ValueType, int64_t>::value,\n                \"ValueType must be int32_t or int64_t\");\n\n  /*! K-Means Context Cluster\n   */\n  class Cluster {\n   public:\n    //! Constructor\n    Cluster(size_t dim) : accum_(dim, 0.0) {}\n\n    //! Constructor\n    Cluster(const Cluster &rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}\n\n    //! Constructor\n    Cluster(Cluster &&rhs)\n        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}\n\n    //! Assignment\n    Cluster &operator=(const Cluster &rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = rhs.accum_;\n      return *this;\n    }\n\n    //! Assignment\n    Cluster &operator=(Cluster &&rhs) {\n      cost_ = rhs.cost_;\n      count_ = rhs.count_;\n      accum_ = std::move(rhs.accum_);\n      return *this;\n    }\n\n    //! Append a vector\n    void append(const StoreType *vec, size_t dim, float dist) {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      mutex_.lock();\n      cost_ += dist;\n      count_ += 1;\n\n      const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);\n      dim = (dim >> 1) << 1;\n      for (size_t i = 0; i != dim; i += 2) {\n        uint8_t val = arr[i >> 1];\n        accum_[i] += ((int8_t)(val << 4) >> 4);\n        accum_[i + 1] += ((int8_t)(val) >> 4);\n      }\n      mutex_.unlock();\n    }\n\n    //! Retrieve the centroid of vectors\n    void centroid(StoreType *out, size_t dim) const {\n      ailego_check_with(dim == accum_.size(), \"Unmatched dimension\");\n\n      uint8_t *arr = reinterpret_cast<uint8_t *>(out);\n      dim = (dim >> 1) << 1;\n      for (size_t i = 0; i != dim; i += 2) {\n        int lo =\n            count_ == 0 ? 0 : static_cast<int>(std::round(accum_[i] / count_));\n        int hi = count_ == 0\n                     ? 0\n                     : static_cast<int>(std::round(accum_[i + 1] / count_));\n        arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);\n      }\n    }\n\n    //! Retrieve squared error\n    double cost(void) const {\n      return cost_;\n    }\n\n    //! Retrieve feature count\n    size_t count(void) const {\n      return count_;\n    }\n\n   private:\n    SpinMutex mutex_{};\n    double cost_{0.0};\n    size_t count_{0u};\n    std::vector<double> accum_{};\n  };\n\n  //! operator []\n  const Cluster &operator[](size_t i) const {\n    return clusters_[i];\n  }\n\n  //! operator []\n  Cluster &operator[](size_t i) {\n    return clusters_[i];\n  }\n\n  //! Clear the context\n  void clear(void) {\n    clusters_.clear();\n  }\n\n  //! Reset the context\n  void reset(size_t k_value, size_t dim) {\n    clusters_.clear();\n    clusters_.resize(k_value, dim);\n  }\n\n  //! Retrieve context of clusters\n  const std::vector<Cluster> &clusters(void) const {\n    return clusters_;\n  }\n\n  //! Compute the distance between matrix and query (batch)\n  template <size_t N>\n  static void BatchDistance(const StoreType *m, const StoreType *q, size_t dim,\n                            float *out) {\n    MinusInnerProductMatrix<uint8_t, BatchCount, N>::Compute(\n        reinterpret_cast<const uint8_t *>(m),\n        reinterpret_cast<const uint8_t *>(q), dim, out);\n  }\n\n  //! Compute the distance between matrix and query (single)\n  static void Distance(const StoreType *m, const StoreType *q, size_t dim,\n                       float *out) {\n    MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(\n        reinterpret_cast<const uint8_t *>(m),\n        reinterpret_cast<const uint8_t *>(q), dim, out);\n  }\n\n  //! Transpose a matrix\n  static void MatrixTranspose(const StoreType *src, size_t dim,\n                              StoreType *dst) {\n    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 3, dst);\n  }\n\n  //! Reverse transpose a matrix\n  static void MatrixReverseTranspose(const StoreType *src, size_t dim,\n                                     StoreType *dst) {\n    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 3, dst);\n  }\n\n  //! Compute Norm2\n  static void Norm2(StoreType * /*data*/, size_t /*dim*/, float *norm) {\n    *norm = 0;\n  }\n\n private:\n  //! Members\n  std::vector<Cluster> clusters_{};\n};\n\n/*! Numerical K-Means cluster algorithm\n */\ntemplate <typename T, typename TPool,\n          typename TContext = NumericalKmeansContext<T>>\nusing NumericalKmeans =\n    LloydCluster<T, TPool, TContext, NumericalVectorArray<T>>;\n\n/*! Nibble K-Means cluster algorithm\n */\ntemplate <typename T, typename TPool,\n          typename TContext = NibbleKmeansContext<T>>\nusing NibbleKmeans = LloydCluster<T, TPool, TContext, NibbleVectorArray<T>>;\n\n/*! Binary K-Means cluster algorithm\n */\ntemplate <typename T, typename TPool,\n          typename TContext = BinaryKmeansContext<T>>\nusing BinaryKmeans = LloydCluster<T, TPool, TContext, BinaryVectorArray<T>>;\n\n/*! Numerical K-Means cluster algorithm\n */\ntemplate <typename T, typename TPool,\n          typename TContext = NumericalInnerProductKmeansContext<T>>\nusing NumericalInnerProductKmeans =\n    LloydCluster<T, TPool, TContext, NumericalVectorArray<T>>;\n\n/*! Nibble K-Means cluster algorithm\n */\ntemplate <typename T, typename TPool,\n          typename TContext = NibbleInnerProductKmeansContext<T>>\nusing NibbleInnerProductKmeans =\n    LloydCluster<T, TPool, TContext, NibbleVectorArray<T>>;\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/algorithm/lloyd_cluster.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <array>\n#include <random>\n#include <ailego/parallel/lock.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Random Centroids Generator\n */\ntemplate <typename T, typename TPool>\nstruct RandomCentroidsGenerator {\n  //! Type of values\n  using OwnerType = typename std::decay<T>::type;\n  using ContainerType = typename OwnerType::ContainerType;\n  using ContextType = typename OwnerType::ContextType;\n  using ThreadPoolType = TPool;\n\n  //! constexpr variables\n  constexpr static size_t BatchCount = OwnerType::BatchCount;\n\n  //! Generate centroids\n  void operator()(OwnerType *owner, ThreadPoolType &) const {\n    const auto &matrix = owner->feature_matrix();\n    const auto &cache = owner->feature_cache();\n    auto *centroids = owner->mutable_centroids();\n\n    ContainerType rows(cache.dimension());\n    size_t m = matrix.count();\n    size_t n = m + cache.count();\n    size_t k = owner->k_value();\n    std::mt19937 mt((std::random_device())());\n\n    rows.resize(BatchCount);\n    centroids->reset(cache.dimension());\n    centroids->reserve(k);\n\n    for (size_t i = 0; k > 0 && i < n; ++i) {\n      if (mt() % (n - i) >= k) {\n        continue;\n      }\n      // Selected a feature\n      if (i < m) {\n        ContextType::MatrixReverseTranspose(matrix[i / BatchCount * BatchCount],\n                                            matrix.dimension(), rows.data());\n        centroids->append(rows[i & (BatchCount - 1u)], matrix.dimension());\n      } else {\n        centroids->append(cache[i - m], cache.dimension());\n      }\n      --k;\n    }\n  }\n};\n\n/*! Lloyd's algorithm cluster\n */\ntemplate <typename T, typename TPool, typename TContext, typename TContainer>\nclass LloydCluster {\n public:\n  //! constexpr variables\n  constexpr static size_t BatchCount = TContext::BatchCount;\n\n  //! Type of values\n  using ThreadPoolType = TPool;\n  using ContainerType = TContainer;\n  using ContextType = TContext;\n  using ValueType = typename TContext::ValueType;\n  using StoreType = typename TContext::StoreType;\n\n  //! Constructor\n  LloydCluster(size_t k, size_t dim)\n      : k_value_(k),\n        feature_cache_(dim),\n        feature_matrix_(dim),\n        centroids_matrix_(dim),\n        centroids_(dim) {}\n\n  //! Constructor\n  LloydCluster(size_t k, size_t dim, bool spherical)\n      : k_value_(k),\n        feature_cache_(dim),\n        feature_matrix_(dim),\n        centroids_matrix_(dim),\n        centroids_(dim),\n        spherical_{spherical} {}\n\n  //! Constructor\n  LloydCluster(void) {}\n\n  //! Destructor\n  ~LloydCluster(void) {}\n\n  //! Append a feature\n  void append(const StoreType *arr, size_t dim) {\n    feature_cache_.append(arr, dim);\n\n    if (feature_cache_.count() == BatchCount) {\n      size_t pos = feature_matrix_.count();\n      feature_matrix_.resize(pos + BatchCount);\n      ContextType::MatrixTranspose(feature_cache_.data(), dim,\n                                   feature_matrix_[pos]);\n      feature_cache_.clear();\n    }\n  }\n\n  //! Reset cluster\n  void reset(size_t k, size_t dim) {\n    k_value_ = k;\n    feature_cache_.reset(dim);\n    feature_matrix_.reset(dim);\n    centroids_.reset(dim);\n    centroids_matrix_.reset(dim);\n    context_.clear();\n  }\n\n  //! Reset cluster\n  void reset(size_t k, size_t dim, bool spherical) {\n    k_value_ = k;\n    feature_cache_.reset(dim);\n    feature_matrix_.reset(dim);\n    centroids_.reset(dim);\n    centroids_matrix_.reset(dim);\n    context_.clear();\n    spherical_ = spherical;\n  }\n\n  //! Initialize centroids\n  template <typename G = RandomCentroidsGenerator<LloydCluster, ThreadPoolType>>\n  void init_centroids(ThreadPoolType &pool, const G &g = G()) {\n    g(this, pool);\n  }\n\n  //! Cluster one time\n  template <typename ThreadPoolType>\n  bool cluster_once(ThreadPoolType &pool, double *cost) {\n    if (centroids_.empty()) {\n      RandomCentroidsGenerator<LloydCluster, ThreadPoolType> g;\n      this->init_centroids(pool, g);\n    }\n    if (centroids_.count() != k_value_) {\n      return false;\n    }\n    context_.reset(centroids_.count(), centroids_.dimension());\n\n    size_t count = centroids_.count() / BatchCount * BatchCount;\n    centroids_matrix_.resize(count);\n    for (size_t i = 0; i != count; i += BatchCount) {\n      ContextType::MatrixTranspose(centroids_[i], centroids_.dimension(),\n                                   centroids_matrix_[i]);\n    }\n    size_t remain = static_cast<uint32_t>(centroids_.count() - count);\n    if (remain > 0) {\n      centroids_matrix_.append(centroids_[count], centroids_.dimension(),\n                               remain);\n    }\n\n    // Using thread pool\n    auto group = pool.make_group();\n    if (!feature_matrix_.empty()) {\n      size_t n = feature_matrix_.count() / BatchCount;\n      size_t c = std::max<size_t>(n / pool.count() / 2u, 1u);\n      size_t m = n / c * c;\n\n      for (size_t i = 0; i != m; i += c) {\n        group->submit(Closure::New(this, &LloydCluster::cluster_matrix_features,\n                                   i, i + c));\n      }\n      for (size_t i = m; i != n; i += 1) {\n        group->submit(Closure::New(this, &LloydCluster::cluster_matrix_features,\n                                   i, i + 1));\n      }\n    }\n    if (!feature_cache_.empty()) {\n      group->submit(Closure::New(this, &LloydCluster::cluster_cache_features));\n    }\n    group->wait_finish();\n\n    *cost = 0.0;\n    for (size_t i = 0, n = centroids_.count(); i != n; ++i) {\n      const auto &item = context_[i];\n      item.centroid(centroids_[i], centroids_.dimension());\n      *cost += item.cost();\n    }\n\n    if (spherical_) {\n      for (size_t i = 0, n = centroids_.count(); i != n; ++i) {\n        float norm;\n        ContextType::Norm2(centroids_[i], centroids_.dimension(), &norm);\n      }\n    }\n\n    return true;\n  }\n\n  //! Retrieve the controids\n  ContainerType *mutable_centroids(void) {\n    return &centroids_;\n  }\n\n  //! Retrieve the controids\n  const ContainerType &centroids(void) const {\n    return centroids_;\n  }\n\n  //! Retrieve the K value\n  size_t k_value(void) const {\n    return k_value_;\n  }\n\n  //! Retrieve context\n  const ContextType &context(void) const {\n    return context_;\n  }\n\n  //! Retrieve the feature cache\n  const ContainerType &feature_cache(void) const {\n    return feature_cache_;\n  }\n\n  //! Retrieve the feature matrix\n  const ContainerType &feature_matrix(void) const {\n    return feature_matrix_;\n  }\n\n  //! Reserve the feature matrix\n  void feature_matrix_reserve(size_t count) {\n    feature_matrix_.reserve(count);\n  }\n\n protected:\n  //! Cluster the cache features\n  void cluster_cache_features(void) {\n    std::array<float, BatchCount> scores;\n\n    for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) {\n      size_t count = centroids_matrix_.count() / BatchCount * BatchCount;\n      const StoreType *feature = feature_cache_[i];\n      float nearest_score = std::numeric_limits<float>::max();\n      size_t nearest_index = 0;\n\n      for (size_t j = 0; j != count; j += BatchCount) {\n        ContextType::template BatchDistance<1>(centroids_matrix_[j], feature,\n                                               centroids_matrix_.dimension(),\n                                               scores.data());\n\n        for (size_t k = 0; k < BatchCount; ++k) {\n          if (scores[k] < nearest_score) {\n            nearest_score = scores[k];\n            nearest_index = j + k;\n          }\n        }\n      }  // end of for\n\n      for (size_t j = count, total = centroids_matrix_.count(); j != total;\n           ++j) {\n        ContextType::Distance(centroids_matrix_[j], feature,\n                              centroids_matrix_.dimension(), scores.data());\n\n        if (scores[0] < nearest_score) {\n          nearest_score = scores[0];\n          nearest_index = j;\n        }\n      }\n      context_[nearest_index].append(feature, feature_cache_.dimension(),\n                                     nearest_score);\n    }  // end of for\n  }\n\n  //! Cluster the matrix features\n  void cluster_matrix_features(size_t first, size_t last) {\n    std::array<float, BatchCount * BatchCount> scores;\n    ContainerType rows(centroids_matrix_.dimension());\n\n    auto comp = [](float i, float j) {\n      if (std::isnan(i)) return false;\n      if (std::isnan(j)) return true;\n\n      return i < j;\n    };\n\n    std::array<float, BatchCount> nearest_scores;\n    std::array<size_t, BatchCount> nearest_indexes;\n\n    rows.resize(BatchCount);\n    for (size_t i = first * BatchCount; i != last * BatchCount;\n         i += BatchCount) {\n      size_t count = centroids_matrix_.count() / BatchCount * BatchCount;\n      const StoreType *block = feature_matrix_[i];\n\n      std::fill(nearest_indexes.data(), nearest_indexes.data() + BatchCount, 0);\n      std::fill(nearest_scores.data(), nearest_scores.data() + BatchCount,\n                std::numeric_limits<float>::max());\n\n      for (size_t j = 0; j != count; j += BatchCount) {\n        ContextType::template BatchDistance<BatchCount>(\n            centroids_matrix_[j], block, centroids_matrix_.dimension(),\n            scores.data());\n\n        for (size_t k = 0; k < BatchCount; ++k) {\n          const float *start = &scores[k * BatchCount];\n          const float *result =\n              std::min_element(start, start + BatchCount, comp);\n          if (*result < nearest_scores[k]) {\n            nearest_scores[k] = *result;\n            nearest_indexes[k] = j + (result - start);\n          }\n        }\n      }  // end of for\n\n      for (size_t j = count, total = centroids_matrix_.count(); j != total;\n           ++j) {\n        ContextType::template BatchDistance<1>(block, centroids_matrix_[j],\n                                               centroids_matrix_.dimension(),\n                                               scores.data());\n\n        for (size_t k = 0; k < BatchCount; ++k) {\n          float score = scores[k];\n          if (score < nearest_scores[k]) {\n            nearest_scores[k] = score;\n            nearest_indexes[k] = j;\n          }\n        }\n      }  // end of for\n\n      ContextType::MatrixReverseTranspose(block, feature_matrix_.dimension(),\n                                          rows.data());\n      for (size_t k = 0; k < BatchCount; ++k) {\n        context_[nearest_indexes[k]].append(\n            rows[k], feature_matrix_.dimension(), nearest_scores[k]);\n      }\n    }  // end of for\n  }\n\n private:\n  //! Members\n  size_t k_value_{0u};\n  ContainerType feature_cache_{};\n  ContainerType feature_matrix_{};\n  ContainerType centroids_matrix_{};\n  ContainerType centroids_{};\n  ContextType context_{};\n  bool spherical_{false};\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/buffer/buffer_manager.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <atomic>\n#include <mutex>\n#include <ailego/pattern/defer.h>\n#include <arrow/io/api.h>\n#include <parquet/arrow/reader.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/logger/logger.h>\n\n#ifdef __clang__\n#pragma clang diagnostic push\n#pragma clang diagnostic ignored \"-Wunused-parameter\"\n#pragma clang diagnostic ignored \"-Wshadow\"\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-parameter\"\n#pragma GCC diagnostic ignored \"-Wshadow\"\n#endif\n\n#include <arrow/api.h>\n\n#ifdef __clang__\n#pragma clang diagnostic pop\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n\n\nnamespace zvec {\n\n\nnamespace ailego {\n\n\nnamespace {\n\n\nstruct IDHash {\n  size_t operator()(const BufferID &buffer_id) const {\n    size_t hash = std::hash<int>{}(static_cast<int>(buffer_id.type));\n    hash = hash ^ (std::hash<uint64_t>{}(buffer_id.file_id));\n    if (buffer_id.type == BufferID::TYPE::PARQUET) {\n      hash = hash * 31 + std::hash<int>{}(buffer_id.parquet().column);\n      hash = hash * 31 + std::hash<int>{}(buffer_id.parquet().row_group);\n    } else if (buffer_id.type == BufferID::TYPE::VECTOR) {\n      hash = hash * 31 + std::hash<uint32_t>{}(buffer_id.vector().offset);\n    }\n    return hash;\n  }\n};\n\n\nstruct IDEqual {\n  bool operator()(const BufferID &a, const BufferID &b) const {\n    if (a.type != b.type) {\n      return false;\n    }\n    if (a.file_name != b.file_name) {\n      return false;\n    }\n    if (a.file_id != b.file_id) {\n      return false;\n    }\n    if (a.mtime != b.mtime) {\n      return false;\n    }\n    if (a.type == BufferID::TYPE::PARQUET) {\n      return a.parquet().column == b.parquet().column &&\n             a.parquet().row_group == b.parquet().row_group;\n    } else if (a.type == BufferID::TYPE::VECTOR) {\n      return a.vector().offset == b.vector().offset;\n    } else {\n      return false;\n    }\n  }\n};\n\n\n}  // namespace\n\n\nstruct BufferManager::BufferContext {\n  BufferContext(const BufferID &id, BufferPool *p) : id(id), pool(p) {};\n  BufferContext(const BufferContext &) = delete;\n  BufferContext(BufferContext &&) = delete;\n  BufferContext &operator=(const BufferContext &) = delete;\n  BufferContext &operator=(BufferContext &&) = delete;\n\n\n  ~BufferContext() {\n    if (vector) {\n      free(vector);\n    }\n  }\n\n\n  typedef std::unique_ptr<BufferManager::BufferContext> Pointer;\n\n\n  enum State : uint32_t {\n    IDLE = 0,      // Empty and not held by any users, not in LRU\n    RESERVED = 1,  // Pinned by a user but no data yet, not in LRU\n    IN_USE = 2,    // Pinned by a user and data is present, not in LRU\n    CACHED = 3,    // Data is present but not held by any users, in LRU\n    ERROR = 4      // Something went wrong, not in LRU\n  };\n\n\n  // Identifier for the buffer\n  BufferID id;\n\n  // Current state\n  State state{IDLE};\n\n  // The size of the buffer\n  uint32_t size{0};\n\n  // Handle of the file backing this buffer\n  File file;\n\n  // The number of external references to this buffer (via pin/unpin)\n  std::atomic<uint32_t> refs_buf{0};\n\n  // The number of external references to this context (via BufferHandle)\n  std::atomic<uint32_t> refs_context{0};\n\n  BufferPool *pool{nullptr};\n\n  // A shared pointer to the buffers allocated for arrow parquet data\n  std::shared_ptr<arrow::ChunkedArray> arrow{nullptr};\n\n  // Guard original arrow buffers to prevent premature deletion\n  std::vector<std::shared_ptr<arrow::Buffer>> arrow_refs{};\n\n  // A pointer to the buffer allocated for vector data\n  void *vector{nullptr};\n\n  // Doubly linked LRU list\n  BufferContext *next{nullptr};\n  BufferContext *prev{nullptr};\n\n\n  // Return a string representation of the status\n  const std::string status_string() const;\n\n  // Populate the buffer with parquet data\n  arrow::Status read_arrow_parquet();\n\n  // Populate the buffer with vector data\n  bool read_vector();\n};\n\n\nconst std::string BufferManager::BufferContext::status_string() const {\n  std::string msg{id.to_string() + \": \"};\n  switch (state) {\n    case State::IDLE: {\n      msg += \"Idle\";\n      break;\n    }\n    case State::RESERVED: {\n      msg += \"Reserved\";\n      break;\n    }\n    case State::IN_USE: {\n      msg += \"In use\";\n      break;\n    }\n    case State::CACHED: {\n      msg += \"Cached\";\n      break;\n    }\n    case State::ERROR: {\n      msg += \"Error\";\n      break;\n    }\n  }\n  return msg;\n}\n\n\narrow::Status BufferManager::BufferContext::read_arrow_parquet() {\n  // TODO: file handler and memory pool can be optimized\n  arrow::MemoryPool *mem_pool = arrow::default_memory_pool();\n\n  // Open file\n  std::shared_ptr<arrow::io::RandomAccessFile> input;\n  const auto &file_name = id.file_name;\n  ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_name));\n\n  // Open reader\n  std::unique_ptr<parquet::arrow::FileReader> reader;\n  ARROW_ASSIGN_OR_RAISE(reader, parquet::arrow::OpenFile(input, mem_pool));\n\n  // Perform read\n  int row_group = id.parquet().row_group;\n  int column = id.parquet().column;\n  auto s = reader->RowGroup(row_group)->Column(column)->Read(&arrow);\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to read parquet file[%s]\", file_name.c_str());\n    arrow = nullptr;\n    return s;\n  }\n\n  // Compute the memory usage and hijack Arrow's buffers with our implementation\n  for (auto &array : arrow->chunks()) {\n    auto &buffers = array->data()->buffers;\n    for (size_t buf_idx = 0; buf_idx < buffers.size(); ++buf_idx) {\n      if (buffers[buf_idx] == nullptr) {\n        continue;\n      }\n      // Keep references to original buffers to prevent premature deletion\n      arrow_refs.emplace_back(buffers[buf_idx]);\n      size += buffers[buf_idx]->capacity();\n      // Create hijacked buffer with custom deleter that notifies us when Arrow\n      // is finished with the buffer\n      std::shared_ptr<arrow::Buffer> hijacked_buffer(\n          buffers[buf_idx].get(), BufferManager::ArrowBufferDeleter(this));\n      buffers[buf_idx] = hijacked_buffer;\n    }\n  }\n\n  return arrow::Status::OK();\n}\n\n\nbool BufferManager::BufferContext::read_vector() {\n  const auto &file_name = id.file_name;\n  if (!file.is_valid()) {\n    if (!File::IsExist(file_name)) {\n      LOG_ERROR(\"File[%s] does not exist\", file_name.c_str());\n      return false;\n    }\n    if (!File::IsRegular(file_name)) {\n      LOG_ERROR(\"[%s] is not a regular file\", file_name.c_str());\n      return false;\n    }\n    if (!file.open(file_name.c_str(), true, false)) {\n      LOG_ERROR(\"Failed to open file[%s]\", file_name.c_str());\n      return false;\n    }\n  }\n  AILEGO_DEFER([this] { file.close(); });\n  uint32_t len = id.vector().length;\n  auto ret = posix_memalign((void **)&vector, 64, len);  // 64-byte alignment\n  if (ret != 0 || vector == nullptr) {\n    LOG_ERROR(\"Failed to allocate buffer for file[%s]\", file_name.c_str());\n    return false;\n  }\n  uint32_t offset = id.vector().offset;\n  if (file.read(offset, vector, len) != len) {\n    LOG_ERROR(\"Failed to read file[%s]\", file_name.c_str());\n    free(vector);\n    vector = nullptr;\n    return false;\n  }\n  size = len;\n  return true;\n}\n\n\n// Thread-safe buffer pool implementation.\n//\n// BufferContext states:\n// 1. Must exist in the lookup (hash) table.\n// 2. LRU list presence:\n//    - In LRU: holds memory but not pinned by any users\n//    - Not in LRU: either holds memory pinned by users, or doesn't hold memory\n// 3. External references: when an external user acquires a context and pins the\n//    memory, that context is removed from LRU list; when they unpins the\n//    memory, that context is moved to LRU list if it was the last reference.\n//\n// Any operation on the hash table is protected by mutex_table_.\n// Any change to context state and LRU list is protected by mutex_context_.\n//\nclass BufferManager::BufferPool {\n public:\n  explicit BufferPool(uint64_t limit) : limit_(limit) {\n    sentinel_.next = &sentinel_;\n    sentinel_.prev = &sentinel_;\n  }\n\n\n  BufferContext *acquire_locked(BufferID &id) {\n    std::lock_guard<std::mutex> lock(mutex_context_);\n    if (auto iter = table_.find(id); iter != table_.end()) {\n      return iter->second.get();\n    }\n    auto [iter, _] =\n        table_.emplace(id, std::make_unique<BufferContext>(id, this));\n    return iter->second.get();\n  }\n\n\n  void try_release_context_locked(BufferContext *context) {\n    if (context->refs_context.load() != 0) {\n      return;\n    }\n    std::lock_guard<std::mutex> lock(mutex_table_);\n    if (context->refs_context.load() != 0) {\n      return;\n    }\n    if (context->state == BufferContext::State::IDLE) {\n      table_.erase(context->id);\n    }\n  }\n\n\n  void pin_locked(BufferContext *ctx) {\n    std::lock_guard<std::mutex> lock(mutex_context_);\n    if (ctx->state == BufferContext::State::IDLE) {\n      return pin_at_IDLE(ctx);\n    }\n    if (ctx->state == BufferContext::State::IN_USE) {\n      return pin_at_IN_USE(ctx);\n    }\n    if (ctx->state == BufferContext::State::CACHED) {\n      return pin_at_CACHED(ctx);\n    }\n    if (ctx->state == BufferContext::State::ERROR) {\n      return;\n    }\n  }\n\n\n  bool unpin_locked(BufferContext *ctx) {\n    uint32_t prev_refs = ctx->refs_buf.fetch_sub(1);\n    if (prev_refs > 1) {\n      return false;\n    }\n    std::lock_guard<std::mutex> lock(mutex_context_);\n    if (ctx->refs_buf.load() == 0 &&\n        ctx->state != BufferContext::State::CACHED) {\n      ctx->state = BufferContext::State::CACHED;\n      LRU_insert(ctx);\n      return true;\n    } else {\n      return false;\n    }\n  }\n\n\n  void LRU_insert_locked(BufferContext *context) {\n    std::lock_guard<std::mutex> lock(mutex_context_);\n    LRU_insert(context);\n  }\n\n\n  void LRU_remove_locked(BufferContext *context) {\n    std::lock_guard<std::mutex> lock(mutex_context_);\n    LRU_remove(context);\n  }\n\n\n  uint64_t usage() const {\n    return usage_;\n  }\n\n\n private:\n  void pin_at_IDLE(BufferContext *ctx) {\n    ctx->state = BufferContext::State::RESERVED;\n\n    while (usage_ >= limit_) {\n      // The tail of LRU list is the least recently used context\n      BufferContext *victim = sentinel_.prev;\n      if (victim == &sentinel_) {  // No victim could be found\n        ctx->state = BufferContext::State::ERROR;\n        return;\n      }\n      if (victim->state == BufferContext::State::ERROR) {\n        LRU_remove(victim);\n        try_release_context_locked(ctx);\n        continue;\n      }\n      if (victim->id.type == BufferID::TYPE::PARQUET) {\n        victim->arrow_refs.clear();\n      } else {\n        free(victim->vector);\n        victim->vector = nullptr;\n      }\n      victim->state = BufferContext::State::IDLE;\n      LRU_remove(victim);\n      try_release_context_locked(ctx);\n      usage_ -= victim->size;\n    }\n\n    if (ctx->id.type == BufferID::TYPE::PARQUET) {\n      if (ctx->read_arrow_parquet().ok()) {\n        ctx->state = BufferContext::State::IN_USE;\n        ctx->refs_buf.fetch_add(ctx->arrow_refs.size());\n        usage_ += ctx->size;\n      } else {\n        LOG_ERROR(\"Failed to read to %s\", ctx->id.to_string().c_str());\n        ctx->state = BufferContext::State::ERROR;\n      }\n    } else {\n      if (ctx->read_vector()) {\n        ctx->state = BufferContext::State::IN_USE;\n        ctx->refs_buf.fetch_add(1);\n        usage_ += ctx->size;\n      } else {\n        LOG_ERROR(\"Failed to read to %s\", ctx->id.to_string().c_str());\n        ctx->state = BufferContext::State::ERROR;\n      }\n    }\n  }\n\n\n  void pin_at_IN_USE(BufferContext *ctx) {\n    if (ctx->id.type == BufferID::TYPE::PARQUET) {\n      ctx->refs_buf.fetch_add(ctx->arrow_refs.size());\n    } else {\n      ctx->refs_buf.fetch_add(1);\n    }\n  }\n\n\n  void pin_at_CACHED(BufferContext *ctx) {\n    if (ctx->id.type == BufferID::TYPE::PARQUET) {\n      ctx->refs_buf.fetch_add(ctx->arrow_refs.size());\n    } else {\n      ctx->refs_buf.fetch_add(1);\n    }\n    LRU_remove(ctx);\n    ctx->state = BufferContext::State::IN_USE;\n  }\n\n\n  void LRU_insert(BufferContext *context) {\n    if (context->refs_buf > 0) {\n      return;  // Already pinned, should not be evicted\n    }\n    if (context->next != nullptr || context->prev != nullptr) {\n      return;\n    }\n    // Insert the context to the head of LRU list\n    context->next = sentinel_.next;\n    context->prev = &sentinel_;\n    sentinel_.next = context;\n    context->next->prev = context;\n    inactive_ += context->size;\n  }\n\n\n  void LRU_remove(BufferContext *context) {\n    if (context->next == nullptr) {\n      return;  // Not in LRU list\n    }\n    context->next->prev = context->prev;\n    context->prev->next = context->next;\n    context->next = nullptr;\n    context->prev = nullptr;\n    inactive_ -= context->size;\n  }\n\n private:\n  using Table =\n      std::unordered_map<BufferID, BufferContext::Pointer, IDHash, IDEqual>;\n\n  uint64_t limit_;\n  std::atomic<uint64_t> usage_{0};\n  std::atomic<uint64_t> inactive_{0};\n\n  Table table_{};\n  std::mutex mutex_table_{};\n  BufferContext sentinel_{BufferID{}, this};  // LRU list sentinel\n  std::mutex mutex_context_{};\n};\n\n\nBufferManager::ArrowBufferDeleter::ArrowBufferDeleter(BufferContext *c)\n    : context(c) {}\n\n\nvoid BufferManager::ArrowBufferDeleter::operator()(arrow::Buffer *) {\n  context->pool->unpin_locked(context);\n}\n\n\nBufferHandle::BufferHandle(BufferContext *context) : context_(context) {\n  if (context_ != nullptr) {\n    pool_ = context_->pool;\n    context_->refs_context.fetch_add(1);\n  }\n}\n\n\nBufferHandle::~BufferHandle() {\n  if (context_ != nullptr) {\n    uint32_t prev_refs = context_->refs_context.fetch_sub(1);\n    if (prev_refs > 1) {\n      return;\n    }\n    if (context_->state == BufferContext::State::IDLE) {\n      pool_->try_release_context_locked(context_);\n    }\n  }\n}\n\n\nstd::shared_ptr<arrow::ChunkedArray> BufferHandle::pin_parquet_data() {\n  pool_->pin_locked(context_);\n  return context_->arrow;\n}\n\n\nvoid *BufferHandle::pin_vector_data() {\n  if (!context_) {\n    return nullptr;\n  }\n  pool_->pin_locked(context_);\n  return context_->vector;\n}\n\n\nbool BufferHandle::unpin_vector_data() {\n  if (!context_) {\n    return true;\n  }\n  return pool_->unpin_locked(context_);\n}\n\n\nuint32_t BufferHandle::references() const {\n  return context_->refs_buf.load();\n}\n\n\nuint32_t BufferHandle::size() const {\n  return context_->size;\n}\n\n\nvoid BufferManager::init(uint64_t limit, uint32_t num_shards) {\n  pools_.clear();\n  uint64_t limit_per_shard = ailego_align(limit / num_shards, 4096);\n  for (uint32_t i = 0; i < num_shards; ++i) {\n    auto pool = new BufferPool(limit_per_shard);\n    pools_.push_back(pool);\n  }\n  LOG_INFO(\n      \"BufferManager initialized with [%u] buffer pools, [%zu] bytes memory \"\n      \"limit per pool, total memory limit [%zu] bytes\",\n      num_shards, (size_t)limit_per_shard, (size_t)limit);\n}\n\n\nBufferHandle BufferManager::acquire(BufferID &buffer_id) {\n  static IDHash id_hash{};\n  auto hash_val = id_hash(buffer_id);\n  auto ctx = pools_[hash_val % pools_.size()]->acquire_locked(buffer_id);\n  return BufferHandle(ctx);\n}\n\n\nstd::unique_ptr<BufferHandle> BufferManager::acquire_ptr(BufferID &buffer_id) {\n  static IDHash id_hash{};\n  auto hash_val = id_hash(buffer_id);\n  auto ctx = pools_[hash_val % pools_.size()]->acquire_locked(buffer_id);\n  return std::make_unique<BufferHandle>(ctx);\n}\n\n\nuint64_t BufferManager::total_size_in_bytes() const {\n  uint64_t total_usage = 0;\n  for (auto pool : pools_) {\n    total_usage += pool->usage();\n  }\n  return total_usage;\n}\n\n\nBufferManager::~BufferManager() {\n  for (auto pool : pools_) {\n    delete pool;\n  }\n}\n\n\n}  // namespace ailego\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/buffer/buffer_pool.cc",
    "content": "#include <zvec/ailego/buffer/buffer_pool.h>\n#include <zvec/core/framework/index_logger.h>\n\nnamespace zvec {\nnamespace ailego {\n\nint LRUCache::init(size_t block_size) {\n  block_size_ = block_size;\n  for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) {\n    queues_.push_back(ConcurrentQueue(block_size));\n  }\n  return 0;\n}\n\nbool LRUCache::evict_single_block(BlockType &item) {\n  bool found = false;\n  for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) {\n    found = queues_[i].try_dequeue(item);\n    if (found) {\n      break;\n    }\n  }\n  return found;\n}\n\nbool LRUCache::add_single_block(const LPMap *lp_map, const BlockType &block,\n                                int block_type) {\n  bool ok = queues_[block_type].enqueue(block);\n  if (!ok) {\n    LOG_ERROR(\"enqueue failed.\");\n    return false;\n  }\n  evict_queue_insertions_.fetch_add(1, std::memory_order_relaxed);\n  if (evict_queue_insertions_ % block_size_ == 0) {\n    this->clear_dead_node(lp_map);\n  }\n  return true;\n}\n\nvoid LRUCache::clear_dead_node(const LPMap *lp_map) {\n  for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) {\n    size_t clear_size = block_size_ * 2;\n    if (queues_[i].size_approx() < clear_size * 4) {\n      continue;\n    }\n    size_t clear_count = 0;\n    ConcurrentQueue tmp(block_size_);\n    BlockType item;\n    while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) {\n      if (!lp_map->isDeadBlock(item)) {\n        if (!tmp.enqueue(item)) {\n          LOG_ERROR(\"enqueue failed.\");\n        }\n      }\n    }\n    while (tmp.try_dequeue(item)) {\n      if (!lp_map->isDeadBlock(item)) {\n        if (!queues_[i].enqueue(item)) {\n          LOG_ERROR(\"enqueue failed.\");\n        }\n      }\n    }\n  }\n}\n\nvoid LPMap::init(size_t entry_num) {\n  if (entries_) {\n    delete[] entries_;\n  }\n  entry_num_ = entry_num;\n  entries_ = new Entry[entry_num_];\n  for (size_t i = 0; i < entry_num_; i++) {\n    entries_[i].ref_count.store(std::numeric_limits<int>::min());\n    entries_[i].load_count.store(0);\n    entries_[i].buffer = nullptr;\n  }\n  cache_.init(entry_num * 4);\n}\n\nchar *LPMap::acquire_block(block_id_t block_id, bool lru_mode) {\n  assert(block_id < entry_num_);\n  Entry &entry = entries_[block_id];\n  if (!lru_mode) {\n    return entry.buffer;\n  }\n  while (true) {\n    int current_count = entry.ref_count.load(std::memory_order_acquire);\n    if (current_count < 0) {\n      return nullptr;\n    }\n    if (entry.ref_count.compare_exchange_weak(current_count, current_count + 1,\n                                              std::memory_order_acq_rel,\n                                              std::memory_order_acquire)) {\n      if (current_count == 0) {\n        entry.load_count.fetch_add(1, std::memory_order_relaxed);\n      }\n      return entry.buffer;\n    }\n  }\n}\n\nvoid LPMap::release_block(block_id_t block_id) {\n  assert(block_id < entry_num_);\n  Entry &entry = entries_[block_id];\n\n  if (entry.ref_count.fetch_sub(1, std::memory_order_release) == 1) {\n    std::atomic_thread_fence(std::memory_order_acquire);\n    LRUCache::BlockType block;\n    block.first = block_id;\n    block.second = entry.load_count.load();\n    cache_.add_single_block(this, block, 0);\n  }\n}\n\nchar *LPMap::evict_block(block_id_t block_id) {\n  assert(block_id < entry_num_);\n  Entry &entry = entries_[block_id];\n  int expected = 0;\n  if (entry.ref_count.compare_exchange_strong(\n          expected, std::numeric_limits<int>::min())) {\n    char *buffer = entry.buffer;\n    entry.buffer = nullptr;\n    return buffer;\n  } else {\n    return nullptr;\n  }\n}\n\nchar *LPMap::set_block_acquired(block_id_t block_id, char *buffer) {\n  assert(block_id < entry_num_);\n  Entry &entry = entries_[block_id];\n  while (true) {\n    int current_count = entry.ref_count.load(std::memory_order_relaxed);\n    if (current_count >= 0) {\n      if (entry.ref_count.compare_exchange_weak(\n              current_count, current_count + 1, std::memory_order_acq_rel,\n              std::memory_order_acquire)) {\n        return entry.buffer;\n      }\n    } else {\n      if (entry.ref_count.compare_exchange_weak(current_count, 1,\n                                                std::memory_order_acq_rel,\n                                                std::memory_order_acquire)) {\n        entry.buffer = buffer;\n        entry.load_count.fetch_add(1, std::memory_order_relaxed);\n        return entry.buffer;\n      }\n    }\n  }\n}\n\nvoid LPMap::recycle(moodycamel::ConcurrentQueue<char *> &free_buffers) {\n  LRUCache::BlockType block;\n  do {\n    bool ok = cache_.evict_single_block(block);\n    if (!ok) {\n      return;\n    }\n  } while (isDeadBlock(block));\n  char *buffer = evict_block(block.first);\n  if (buffer) {\n    if (!free_buffers.enqueue(buffer)) {\n      LOG_ERROR(\"recycle buffer enqueue failed.\");\n      ailego_free(buffer);\n    }\n  }\n}\n\nVecBufferPool::VecBufferPool(const std::string &filename) {\n  fd_ = open(filename.c_str(), O_RDONLY);\n  if (fd_ < 0) {\n    throw std::runtime_error(\"Failed to open file: \" + filename);\n  }\n  struct stat st;\n  if (fstat(fd_, &st) < 0) {\n    ::close(fd_);\n    throw std::runtime_error(\"Failed to stat file: \" + filename);\n  }\n  file_size_ = st.st_size;\n}\n\nint VecBufferPool::init(size_t pool_capacity, size_t block_size,\n                        size_t segment_count) {\n  if (block_size == 0) {\n    LOG_ERROR(\"block_size must not be 0\");\n    return -1;\n  }\n  pool_capacity_ = pool_capacity;\n  size_t buffer_num = pool_capacity_ / block_size + 10;\n  size_t block_num = segment_count + 10;\n  lp_map_.init(block_num);\n  mutex_vec_.reserve(block_num);\n  for (int i = 0; i < block_num; i++) {\n    mutex_vec_.emplace_back(std::make_unique<std::mutex>());\n  }\n  for (size_t i = 0; i < buffer_num; i++) {\n    char *buffer = (char *)ailego_malloc(block_size);\n    if (buffer != nullptr) {\n      if (!free_buffers_.enqueue(buffer)) {\n        LOG_ERROR(\"recycle buffer enqueue failed.\");\n        ailego_free(buffer);\n        return -1;\n      }\n    } else {\n      LOG_ERROR(\"aligned_alloc %zu(size: %zu) failed\", i, block_size);\n      return -1;\n    }\n  }\n  LOG_DEBUG(\"Buffer pool num: %zu, entry num: %zu\", buffer_num,\n            lp_map_.entry_num());\n  no_lru_mode_ = false;\n  if (lp_map_.entry_num() <= buffer_num) {\n    no_lru_mode_ = true;\n  }\n  return 0;\n}\n\nVecBufferPoolHandle VecBufferPool::get_handle() {\n  return VecBufferPoolHandle(*this);\n}\n\nchar *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset,\n                                    size_t size, int retry) {\n  char *buffer = lp_map_.acquire_block(block_id, !no_lru_mode());\n  if (buffer) {\n    return buffer;\n  }\n  std::lock_guard<std::mutex> lock(*mutex_vec_[block_id]);\n  buffer = lp_map_.acquire_block(block_id, !no_lru_mode());\n  if (buffer) {\n    return buffer;\n  }\n  {\n    bool found = free_buffers_.try_dequeue(buffer);\n    if (!found && !no_lru_mode_) {\n      for (int i = 0; i < retry; i++) {\n        lp_map_.recycle(free_buffers_);\n        found = free_buffers_.try_dequeue(buffer);\n        if (found) {\n          break;\n        }\n      }\n    }\n    if (!found) {\n      LOG_ERROR(\"Buffer pool failed to get free buffer\");\n      return nullptr;\n    }\n  }\n\n  ssize_t read_bytes = pread(fd_, buffer, size, offset);\n  if (read_bytes != static_cast<ssize_t>(size)) {\n    LOG_ERROR(\"Buffer pool failed to read file at offset: %zu\", offset);\n    free_buffers_.enqueue(buffer);\n    return nullptr;\n  }\n  return lp_map_.set_block_acquired(block_id, buffer);\n}\n\nint VecBufferPool::get_meta(size_t offset, size_t length, char *buffer) {\n  ssize_t read_bytes = pread(fd_, buffer, length, offset);\n  if (read_bytes != static_cast<ssize_t>(length)) {\n    LOG_ERROR(\"Buffer pool failed to read file at offset: %zu\", offset);\n    return -1;\n  }\n  return 0;\n}\n\nchar *VecBufferPoolHandle::get_block(size_t offset, size_t size,\n                                     size_t block_id) {\n  char *buffer = pool_.acquire_buffer(block_id, offset, size, 5);\n  return buffer;\n}\n\nint VecBufferPoolHandle::get_meta(size_t offset, size_t length, char *buffer) {\n  return pool_.get_meta(offset, length, buffer);\n}\n\nvoid VecBufferPoolHandle::release_one(block_id_t block_id) {\n  if (!pool_.no_lru_mode()) {\n    pool_.lp_map_.release_block(block_id);\n  }\n}\n\nvoid VecBufferPoolHandle::acquire_one(block_id_t block_id) {\n  if (!pool_.no_lru_mode()) {\n    pool_.lp_map_.acquire_block(block_id, true);\n  }\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/container/bitmap.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"bitmap.h\"\n\nnamespace zvec {\nnamespace ailego {\n\nsize_t Bitset::BitwiseAndCardinality(const Bitset &lhs, const Bitset &rhs) {\n  return BitsetHelper::BitwiseAndCardinality(\n      lhs.array_.data(), rhs.array_.data(),\n      std::min(lhs.array_.size(), rhs.array_.size()));\n}\n\nsize_t Bitset::BitwiseAndnotCardinality(const Bitset &lhs, const Bitset &rhs) {\n  size_t lsize = lhs.array_.size();\n  size_t rsize = rhs.array_.size();\n\n  if (lsize > rsize) {\n    return (\n        BitsetHelper::BitwiseAndnotCardinality(lhs.array_.data(),\n                                               rhs.array_.data(), rsize) +\n        BitsetHelper::Cardinality(lhs.array_.data() + rsize, lsize - rsize));\n  }\n  return BitsetHelper::BitwiseAndnotCardinality(lhs.array_.data(),\n                                                rhs.array_.data(), lsize);\n}\n\nsize_t Bitset::BitwiseXorCardinality(const Bitset &lhs, const Bitset &rhs) {\n  size_t lsize = lhs.array_.size();\n  size_t rsize = rhs.array_.size();\n\n  if (lsize < rsize) {\n    return (\n        BitsetHelper::BitwiseXorCardinality(lhs.array_.data(),\n                                            rhs.array_.data(), lsize) +\n        BitsetHelper::Cardinality(rhs.array_.data() + lsize, rsize - lsize));\n  } else if (lsize > rsize) {\n    return (\n        BitsetHelper::BitwiseXorCardinality(lhs.array_.data(),\n                                            rhs.array_.data(), rsize) +\n        BitsetHelper::Cardinality(lhs.array_.data() + rsize, lsize - rsize));\n  }\n  return BitsetHelper::BitwiseXorCardinality(lhs.array_.data(),\n                                             rhs.array_.data(), lsize);\n}\n\nsize_t Bitset::BitwiseOrCardinality(const Bitset &lhs, const Bitset &rhs) {\n  size_t lsize = lhs.array_.size();\n  size_t rsize = rhs.array_.size();\n\n  if (lsize < rsize) {\n    return (\n        BitsetHelper::BitwiseOrCardinality(lhs.array_.data(), rhs.array_.data(),\n                                           lsize) +\n        BitsetHelper::Cardinality(rhs.array_.data() + lsize, rsize - lsize));\n  } else if (lsize > rsize) {\n    return (\n        BitsetHelper::BitwiseOrCardinality(lhs.array_.data(), rhs.array_.data(),\n                                           rsize) +\n        BitsetHelper::Cardinality(lhs.array_.data() + rsize, lsize - rsize));\n  }\n  return BitsetHelper::BitwiseOrCardinality(lhs.array_.data(),\n                                            rhs.array_.data(), lsize);\n}\n\nvoid Bitmap::clear(void) {\n  for (std::vector<Bucket *>::iterator iter = array_.begin();\n       iter != array_.end(); ++iter) {\n    delete (*iter);\n  }\n  array_.clear();\n}\n\nvoid Bitmap::copy(const Bitmap &rhs) {\n  this->clear();\n\n  for (std::vector<Bucket *>::const_iterator iter = rhs.array_.begin();\n       iter != rhs.array_.end(); ++iter) {\n    Bucket *bucket = NULL;\n    if (*iter) {\n      bucket = new Bucket(*(*iter));\n    }\n    array_.push_back(bucket);\n  }\n}\n\nvoid Bitmap::shrink_to_fit(void) {\n  size_t shrink_count = 0;\n  std::vector<Bucket *>::reverse_iterator iter;\n\n  for (iter = array_.rbegin(); iter != array_.rend(); ++iter) {\n    if (*iter) {\n      if (!(*iter)->test_none()) {\n        break;\n      }\n      delete (*iter);\n      *iter = NULL;\n    }\n    ++shrink_count;\n  }\n  for (; iter != array_.rend(); ++iter) {\n    if ((*iter) && (*iter)->test_none()) {\n      delete (*iter);\n      *iter = NULL;\n    }\n  }\n  if (shrink_count != 0) {\n    array_.resize(array_.size() - shrink_count);\n  }\n}\n\nbool Bitmap::test(size_t num) const {\n  // High 16 bits\n  size_t offset = num >> 16;\n\n  if (offset < array_.size()) {\n    const Bucket *bucket = array_[offset];\n    if (bucket) {\n      // Low 16 bits\n      return bucket->test(static_cast<uint16_t>(num));\n    }\n  }\n  return false;\n}\n\nvoid Bitmap::set(size_t num) {\n  // High 16 bits\n  size_t offset = num >> 16;\n  if (offset >= array_.size()) {\n    array_.resize(offset + 1, NULL);\n  }\n\n  Bucket *&bucket = array_[offset];\n  if (!bucket) {\n    bucket = new Bucket;\n  }\n  // Low 16 bits\n  bucket->set(static_cast<uint16_t>(num));\n}\n\nvoid Bitmap::reset(size_t num) {\n  // High 16 bits\n  size_t offset = num >> 16;\n  if (offset >= array_.size()) {\n    array_.resize(offset + 1, NULL);\n  }\n\n  if (offset < array_.size()) {\n    Bucket *bucket = array_[offset];\n    if (bucket) {\n      // Low 16 bits\n      bucket->reset(static_cast<uint16_t>(num));\n    }\n  }\n}\n\nvoid Bitmap::flip(size_t num) {\n  // High 16 bits\n  uint16_t offset = (uint16_t)(num >> 16);\n  if (offset >= array_.size()) {\n    array_.resize(offset + 1, NULL);\n  }\n\n  Bucket *&bucket = array_[offset];\n  if (!bucket) {\n    bucket = new Bucket;\n  }\n  // Low 16 bits\n  bucket->flip(static_cast<uint16_t>(num));\n}\n\nvoid Bitmap::bitwise_and(const Bitmap &rhs) {\n  size_t overlap = std::min(array_.size(), rhs.array_.size());\n\n  for (size_t i = 0; i < overlap; ++i) {\n    Bucket *&dst = array_[i];\n\n    if (dst) {\n      const Bucket *src = rhs.array_[i];\n      if (src) {\n        dst->bitwise_and(*src);\n      } else {\n        delete dst;\n        dst = NULL;\n      }\n    }\n  }\n  for (size_t i = overlap; i < array_.size(); ++i) {\n    Bucket *&dst = array_[i];\n    delete dst;\n    dst = NULL;\n  }\n}\n\nvoid Bitmap::bitwise_andnot(const Bitmap &rhs) {\n  size_t overlap = std::min(array_.size(), rhs.array_.size());\n\n  for (size_t i = 0; i < overlap; ++i) {\n    Bucket *&dst = array_[i];\n\n    if (dst) {\n      const Bucket *src = rhs.array_[i];\n      if (src) {\n        dst->bitwise_andnot(*src);\n      }\n    }\n  }\n}\n\nvoid Bitmap::bitwise_or(const Bitmap &rhs) {\n  size_t overlap = std::min(array_.size(), rhs.array_.size());\n\n  for (size_t i = 0; i < overlap; ++i) {\n    const Bucket *src = rhs.array_[i];\n\n    if (src) {\n      Bucket *&dst = array_[i];\n\n      if (dst) {\n        dst->bitwise_or(*src);\n      } else {\n        dst = new Bucket(*src);\n      }\n    }\n  }\n  for (size_t i = overlap; i < rhs.array_.size(); ++i) {\n    const Bucket *src = rhs.array_[i];\n    Bucket *bucket = NULL;\n\n    if (src) {\n      bucket = new Bucket(*src);\n    }\n    array_.push_back(bucket);\n  }\n}\n\nvoid Bitmap::bitwise_xor(const Bitmap &rhs) {\n  size_t overlap = std::min(array_.size(), rhs.array_.size());\n\n  for (size_t i = 0; i < overlap; ++i) {\n    const Bucket *src = rhs.array_[i];\n\n    if (src) {\n      Bucket *&dst = array_[i];\n\n      if (dst) {\n        dst->bitwise_xor(*src);\n      } else {\n        dst = new Bucket(*src);\n      }\n    }\n  }\n  for (size_t i = overlap; i < rhs.array_.size(); ++i) {\n    const Bucket *src = rhs.array_[i];\n    Bucket *bucket = NULL;\n\n    if (src) {\n      bucket = new Bucket(*src);\n    }\n    array_.push_back(bucket);\n  }\n}\n\nvoid Bitmap::bitwise_not(void) {\n  for (std::vector<Bucket *>::iterator iter = array_.begin();\n       iter != array_.end(); ++iter) {\n    Bucket *&bucket = *iter;\n    if (!bucket) {\n      bucket = new Bucket;\n    }\n    bucket->bitwise_not();\n  }\n}\n\nbool Bitmap::test_all(void) const {\n  if (array_.empty()) {\n    return false;\n  }\n  for (std::vector<Bucket *>::const_iterator iter = array_.begin();\n       iter != array_.end(); ++iter) {\n    if (!(*iter) || !(*iter)->test_all()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nbool Bitmap::test_any(void) const {\n  for (std::vector<Bucket *>::const_iterator iter = array_.begin();\n       iter != array_.end(); ++iter) {\n    if (*iter && (*iter)->test_any()) {\n      return true;\n    }\n  }\n  return false;\n}\n\nbool Bitmap::test_none(void) const {\n  for (std::vector<Bucket *>::const_iterator iter = array_.begin();\n       iter != array_.end(); ++iter) {\n    if (*iter && !(*iter)->test_none()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nsize_t Bitmap::cardinality(void) const {\n  size_t result = 0;\n  for (std::vector<Bucket *>::const_iterator iter = array_.begin();\n       iter != array_.end(); ++iter) {\n    if (*iter) {\n      result += (*iter)->cardinality();\n    }\n  }\n  return result;\n}\n\nvoid Bitmap::extract(size_t base, std::vector<size_t> *out) const {\n  for (std::vector<Bucket *>::const_iterator iter = array_.begin();\n       iter != array_.end(); ++iter) {\n    if (*iter) {\n      (*iter)->extract(base, out);\n    }\n    base += Bucket::MAX_SIZE;\n  }\n}\n\nsize_t Bitmap::BitwiseAndCardinality(const Bitmap &lhs, const Bitmap &rhs) {\n  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());\n  size_t dist = 0;\n\n  for (size_t i = 0; i < overlap; ++i) {\n    const Bucket *l = lhs.array_[i];\n    const Bucket *r = rhs.array_[i];\n\n    if (l && r) {\n      dist += Bucket::BitwiseAndCardinality(*l, *r);\n    }\n  }\n  return dist;\n}\n\nsize_t Bitmap::BitwiseAndnotCardinality(const Bitmap &lhs, const Bitmap &rhs) {\n  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());\n  size_t dist = 0;\n\n  for (size_t i = 0; i < overlap; ++i) {\n    const Bucket *l = lhs.array_[i];\n    if (l) {\n      const Bucket *r = rhs.array_[i];\n      if (r) {\n        dist += Bucket::BitwiseAndnotCardinality(*l, *r);\n      } else {\n        dist += l->cardinality();\n      }\n    }\n  }\n  for (size_t i = overlap; i < lhs.array_.size(); ++i) {\n    const Bucket *l = lhs.array_[i];\n    if (l) {\n      dist += l->cardinality();\n    }\n  }\n  return dist;\n}\n\nsize_t Bitmap::BitwiseXorCardinality(const Bitmap &lhs, const Bitmap &rhs) {\n  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());\n  size_t dist = 0;\n\n  for (size_t i = 0; i < overlap; ++i) {\n    const Bucket *l = lhs.array_[i];\n    const Bucket *r = rhs.array_[i];\n\n    if (l && r) {\n      dist += Bucket::BitwiseXorCardinality(*l, *r);\n    } else if (l) {\n      dist += l->cardinality();\n    } else if (r) {\n      dist += r->cardinality();\n    }\n  }\n  for (size_t i = overlap; i < lhs.array_.size(); ++i) {\n    const Bucket *l = lhs.array_[i];\n    if (l) {\n      dist += l->cardinality();\n    }\n  }\n  for (size_t i = overlap; i < rhs.array_.size(); ++i) {\n    const Bucket *r = rhs.array_[i];\n    if (r) {\n      dist += r->cardinality();\n    }\n  }\n  return dist;\n}\n\nsize_t Bitmap::BitwiseOrCardinality(const Bitmap &lhs, const Bitmap &rhs) {\n  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());\n  size_t dist = 0;\n\n  for (size_t i = 0; i < overlap; ++i) {\n    const Bucket *l = lhs.array_[i];\n    const Bucket *r = rhs.array_[i];\n\n    if (l && r) {\n      dist += Bucket::BitwiseOrCardinality(*l, *r);\n    } else if (l) {\n      dist += l->cardinality();\n    } else if (r) {\n      dist += r->cardinality();\n    }\n  }\n  for (size_t i = overlap; i < lhs.array_.size(); ++i) {\n    const Bucket *l = lhs.array_[i];\n    if (l) {\n      dist += l->cardinality();\n    }\n  }\n  for (size_t i = overlap; i < rhs.array_.size(); ++i) {\n    const Bucket *r = rhs.array_[i];\n    if (r) {\n      dist += r->cardinality();\n    }\n  }\n  return dist;\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/container/bitmap.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <vector>\n#include <ailego/utility/bitset_helper.h>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Fixed Bitset\n */\ntemplate <size_t N, typename = typename std::enable_if<N % 32 == 0>::type>\nclass FixedBitset {\n public:\n  enum { MAX_SIZE = N };\n\n  //! Constructor\n  FixedBitset(void) {\n    memset(array_, 0, sizeof(array_));\n  }\n\n  //! Constructor\n  FixedBitset(const FixedBitset &rhs) {\n    memcpy(array_, rhs.array_, sizeof(array_));\n  }\n\n  //! Destructor\n  ~FixedBitset(void) {}\n\n  //! Assignment\n  FixedBitset &operator=(const FixedBitset &rhs) {\n    memcpy(array_, rhs.array_, sizeof(array_));\n    return *this;\n  }\n\n  //! Retrieve data pointer\n  uint32_t *data(void) {\n    return reinterpret_cast<uint32_t *>(array_);\n  }\n\n  //! Retrieve data pointer\n  const uint32_t *data(void) const {\n    return reinterpret_cast<const uint32_t *>(array_);\n  }\n\n  //! Retrieve count of bits in set\n  constexpr size_t size(void) const {\n    return MAX_SIZE;\n  }\n\n  // ！Clear the bitset\n  void clear(void) {\n    memset(array_, 0, sizeof(array_));\n  }\n\n  //! Test a bit in bitset\n  bool test(size_t num) const {\n    ailego_assert_with(N > num, \"overflow argument\");\n    return ((array_[num >> 5] & (1u << (num & 0x1f))) != 0);\n  }\n\n  //! Set a bit in bitset\n  void set(size_t num) {\n    ailego_assert_with(N > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] |= mask;\n  }\n\n  //! Clear a bit in bitset\n  void reset(size_t num) {\n    ailego_assert_with(N > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] &= ~mask;\n  }\n\n  //! Toggle a bit in bitset\n  void flip(size_t num) {\n    ailego_assert_with(N > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] ^= mask;\n  }\n\n  //! Perform binary AND\n  void bitwise_and(const FixedBitset &rhs) {\n    BitsetHelper::BitwiseAnd(array_, rhs.array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Perform binary AND NOT\n  void bitwise_andnot(const FixedBitset &rhs) {\n    BitsetHelper::BitwiseAndnot(array_, rhs.array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Perform binary OR\n  void bitwise_or(const FixedBitset &rhs) {\n    BitsetHelper::BitwiseOr(array_, rhs.array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Perform binary XOR\n  void bitwise_xor(const FixedBitset &rhs) {\n    BitsetHelper::BitwiseXor(array_, rhs.array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Perform binary NOT\n  void bitwise_not(void) {\n    BitsetHelper::BitwiseNot(array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Check if all bits are set to true\n  bool test_all(void) const {\n    return BitsetHelper::TestAll(array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Check if any bits are set to true\n  bool test_any(void) const {\n    return BitsetHelper::TestAny(array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Check if none of the bits are set to true\n  bool test_none(void) const {\n    return BitsetHelper::TestNone(array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Compute the cardinality of a bitset\n  size_t cardinality(void) const {\n    return BitsetHelper::Cardinality(array_, ((N + 0x1f) >> 5));\n  }\n\n  //! Extract the bitset to an array\n  void extract(size_t base, std::vector<size_t> *out) const {\n    const uint32_t *iter = array_;\n    const uint32_t *last = array_ + ((N + 0x1f) >> 5);\n\n    for (; iter != last; ++iter) {\n      uint32_t w = *iter;\n\n      while (w != 0) {\n        uint32_t c = ailego_ctz32(w);\n        w &= ~(1u << c);\n        out->push_back(base + c);\n      }\n      base += 32u;\n    }\n  }\n\n  //! Extract the bitset to an array\n  void extract(std::vector<size_t> *out) const {\n    this->extract(0, out);\n  }\n\n  //! Compute the AND cardinality between two bitsets\n  static size_t BitwiseAndCardinality(const FixedBitset &lhs,\n                                      const FixedBitset &rhs) {\n    return BitsetHelper::BitwiseAndCardinality(lhs.array_, rhs.array_,\n                                               ((N + 0x1f) >> 5));\n  }\n\n  //! Compute the ANDNOT cardinality between two bitsets\n  static size_t BitwiseAndnotCardinality(const FixedBitset &lhs,\n                                         const FixedBitset &rhs) {\n    return BitsetHelper::BitwiseAndnotCardinality(lhs.array_, rhs.array_,\n                                                  ((N + 0x1f) >> 5));\n  }\n\n  //! Compute the XOR cardinality between two bitsets\n  static size_t BitwiseXorCardinality(const FixedBitset &lhs,\n                                      const FixedBitset &rhs) {\n    return BitsetHelper::BitwiseXorCardinality(lhs.array_, rhs.array_,\n                                               ((N + 0x1f) >> 5));\n  }\n\n  //! Compute the OR cardinality between two bitsets\n  static size_t BitwiseOrCardinality(const FixedBitset &lhs,\n                                     const FixedBitset &rhs) {\n    return BitsetHelper::BitwiseOrCardinality(lhs.array_, rhs.array_,\n                                              ((N + 0x1f) >> 5));\n  }\n\n  //! Convert a array pointer to bitset pointer\n  static FixedBitset *Cast(uint32_t *arr) {\n    return reinterpret_cast<FixedBitset<N> *>(arr);\n  }\n\n  //! Convert a array pointer to bitset pointer\n  static const FixedBitset *Cast(const uint32_t *arr) {\n    return reinterpret_cast<const FixedBitset<N> *>(arr);\n  }\n\n  //! Convert a array pointer to bitset pointer\n  static FixedBitset *Cast(uint64_t *arr) {\n    return reinterpret_cast<FixedBitset<N> *>(arr);\n  }\n\n  //! Convert a array pointer to bitset pointer\n  static const FixedBitset *Cast(const uint64_t *arr) {\n    return reinterpret_cast<const FixedBitset<N> *>(arr);\n  }\n\n private:\n  uint32_t array_[(N + 0x1f) >> 5];\n};\n\n/*! Fixed Bitset (Special)\n */\ntemplate <>\nclass FixedBitset<0> {\n public:\n  enum { MAX_SIZE = 0 };\n\n  //! Retrieve max size of bitset\n  constexpr size_t size(void) const {\n    return MAX_SIZE;\n  }\n};\n\n/*! Bitset\n */\nclass Bitset {\n public:\n  //! Constructor\n  Bitset(void) : array_() {}\n\n  //! Constructor\n  Bitset(size_t bits) : array_((bits + 0x1f) >> 5) {}\n\n  //! Constructor\n  Bitset(const Bitset &rhs) : array_(rhs.array_) {}\n\n  //! Constructor\n  Bitset(Bitset &&rhs) : array_(std::move(rhs.array_)) {}\n\n  //! Destructor\n  ~Bitset(void) {}\n\n  //! Assignment\n  Bitset &operator=(const Bitset &rhs) {\n    array_ = rhs.array_;\n    return *this;\n  }\n\n  //! Assignment\n  Bitset &operator=(Bitset &&rhs) {\n    array_ = std::move(rhs.array_);\n    return *this;\n  }\n\n  //! Retrieve data pointer\n  uint32_t *data(void) {\n    return array_.data();\n  }\n\n  //! Retrieve data pointer\n  const uint32_t *data(void) const {\n    return array_.data();\n  }\n\n  //! Retrieve count of bits in set\n  size_t size(void) const {\n    return (array_.size() << 5);\n  }\n\n  //! Resize the bitset\n  void resize(size_t bits) {\n    array_.resize((bits + 0x1f) >> 5);\n  }\n\n  // ！Clear the bitset\n  void clear(void) {\n    array_.clear();\n  }\n\n  //! Test a bit in bitset\n  bool test(size_t num) const {\n    ailego_assert_with(this->size() > num, \"overflow argument\");\n    return ((array_[num >> 5] & (1u << (num & 0x1f))) != 0);\n  }\n\n  //! Set a bit in bitset\n  void set(size_t num) {\n    ailego_assert_with(this->size() > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] |= mask;\n  }\n\n  //! Clear a bit in bitset\n  void reset(size_t num) {\n    ailego_assert_with(this->size() > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] &= ~mask;\n  }\n\n  //! Toggle a bit in bitset\n  void flip(size_t num) {\n    ailego_assert_with(this->size() > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] ^= mask;\n  }\n\n  //! Perform binary AND\n  void bitwise_and(const Bitset &rhs) {\n    BitsetHelper::BitwiseAnd(array_.data(), rhs.array_.data(),\n                             std::min(array_.size(), rhs.array_.size()));\n  }\n\n  //! Perform binary AND NOT\n  void bitwise_andnot(const Bitset &rhs) {\n    BitsetHelper::BitwiseAndnot(array_.data(), rhs.array_.data(),\n                                std::min(array_.size(), rhs.array_.size()));\n  }\n\n  //! Perform binary OR\n  void bitwise_or(const Bitset &rhs) {\n    BitsetHelper::BitwiseOr(array_.data(), rhs.array_.data(),\n                            std::min(array_.size(), rhs.array_.size()));\n  }\n\n  //! Perform binary XOR\n  void bitwise_xor(const Bitset &rhs) {\n    BitsetHelper::BitwiseXor(array_.data(), rhs.array_.data(),\n                             std::min(array_.size(), rhs.array_.size()));\n  }\n\n  //! Perform binary NOT\n  void bitwise_not(void) {\n    BitsetHelper::BitwiseNot(array_.data(), array_.size());\n  }\n\n  //! Check if all bits are set to true\n  bool test_all(void) const {\n    return BitsetHelper::TestAll(array_.data(), array_.size());\n  }\n\n  //! Check if any bits are set to true\n  bool test_any(void) const {\n    return BitsetHelper::TestAny(array_.data(), array_.size());\n  }\n\n  //! Check if none of the bits are set to true\n  bool test_none(void) const {\n    return BitsetHelper::TestNone(array_.data(), array_.size());\n  }\n\n  //! Compute the cardinality of a bitset\n  size_t cardinality(void) const {\n    return BitsetHelper::Cardinality(array_.data(), array_.size());\n  }\n\n  //! Extract the bitset to an array\n  void extract(size_t base, std::vector<size_t> *out) const {\n    const uint32_t *iter = array_.data();\n    const uint32_t *last = array_.data() + array_.size();\n\n    for (; iter != last; ++iter) {\n      uint32_t w = *iter;\n\n      while (w != 0) {\n        uint32_t c = ailego_ctz32(w);\n        w &= ~(1u << c);\n        out->push_back(base + c);\n      }\n      base += 32u;\n    }\n  }\n\n  //! Extract the bitset to an array\n  void extract(std::vector<size_t> *out) const {\n    this->extract(0, out);\n  }\n\n  //! Compute the AND cardinality between two bitsets\n  static size_t BitwiseAndCardinality(const Bitset &lhs, const Bitset &rhs);\n\n  //! Compute the ANDNOT cardinality between two bitsets\n  static size_t BitwiseAndnotCardinality(const Bitset &lhs, const Bitset &rhs);\n\n  //! Compute the XOR cardinality between two bitsets\n  static size_t BitwiseXorCardinality(const Bitset &lhs, const Bitset &rhs);\n\n  //! Compute the OR cardinality between two bitsets\n  static size_t BitwiseOrCardinality(const Bitset &lhs, const Bitset &rhs);\n\n private:\n  std::vector<uint32_t> array_;\n};\n\n/*! Bitmap\n */\nclass Bitmap {\n public:\n  typedef FixedBitset<65536u> Bucket;\n\n  //! Constructor\n  Bitmap(void) : array_() {}\n\n  //! Constructor\n  Bitmap(const Bitmap &rhs) {\n    this->copy(rhs);\n  }\n\n  //! Destructor\n  ~Bitmap(void) {\n    this->clear();\n  }\n\n  //! Assignment\n  Bitmap &operator=(const Bitmap &rhs) {\n    this->copy(rhs);\n    return *this;\n  }\n\n  //! Retrieve bucket size of bitmap\n  size_t bucket_size(void) const {\n    return array_.size();\n  }\n\n  // ！Clear the bitmap\n  void clear(void);\n\n  //! Remove the none buckets\n  void shrink_to_fit(void);\n\n  //! Test a bit in bitmap\n  bool test(size_t num) const;\n\n  //! Set a bit in bitmap\n  void set(size_t num);\n\n  //! Reset a bit in bitmap\n  void reset(size_t num);\n\n  //! Toggle a bit in bitmap\n  void flip(size_t num);\n\n  //! Perform binary AND\n  void bitwise_and(const Bitmap &rhs);\n\n  //! Perform binary AND NOT\n  void bitwise_andnot(const Bitmap &rhs);\n\n  //! Perform binary OR\n  void bitwise_or(const Bitmap &rhs);\n\n  //! Perform binary XOR\n  void bitwise_xor(const Bitmap &rhs);\n\n  //! Perform binary NOT (It will expand the whole map)\n  void bitwise_not(void);\n\n  //! Check if all bits are set to true\n  bool test_all(void) const;\n\n  //! Check if any bits are set to true\n  bool test_any(void) const;\n\n  //! Check if none of the bits are set to true\n  bool test_none(void) const;\n\n  //! Compute the cardinality of a bitmap\n  size_t cardinality(void) const;\n\n  //! Extract the bitmap to an array\n  void extract(size_t base, std::vector<size_t> *out) const;\n\n  //! Extract the bitmap to an array\n  void extract(std::vector<size_t> *out) const {\n    this->extract(0, out);\n  }\n\n  //! Compute the AND cardinality between two bitmaps\n  static size_t BitwiseAndCardinality(const Bitmap &lhs, const Bitmap &rhs);\n\n  //! Compute the ANDNOT cardinality between two bitmaps\n  static size_t BitwiseAndnotCardinality(const Bitmap &lhs, const Bitmap &rhs);\n\n  //! Compute the XOR cardinality between two bitmaps\n  static size_t BitwiseXorCardinality(const Bitmap &lhs, const Bitmap &rhs);\n\n  //! Compute the OR cardinality between two bitmaps\n  static size_t BitwiseOrCardinality(const Bitmap &lhs, const Bitmap &rhs);\n\n protected:\n  //! Copy the content from another bitmap\n  void copy(const Bitmap &rhs);\n\n private:\n  std::vector<Bucket *> array_;\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/container/bloom_filter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cmath>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Bloom Filter Calculator\n */\nstruct BloomFilterCalculator {\n  /**\n   *  \\brief          Calculate probability of false positives\n   *  \\param n        Number of items in the filter\n   *  \\param m        Number of bits in the filter\n   *  \\param k        Number of hash functions\n   *  \\return         Probability of false positives\n   */\n  static double Probability(size_t n, size_t m, size_t k) {\n    return std::pow(1.0 - std::exp(-((double)k / (double)m * (double)n)), k);\n  }\n\n  /**\n   *  \\brief          Calculate number of items in the filter\n   *  \\param m        Number of bits in the filter\n   *  \\param k        Number of hash functions\n   *  \\param p        Probability of false positives\n   *  \\return         Number of items in the filter\n   */\n  static size_t NumberOfItems(size_t m, size_t k, double p) {\n    return (size_t)std::ceil(\n        -((double)m / (double)k *\n          std::log(1.0 - std::exp(std::log(p) / (double)k))));\n  }\n\n  /**\n   *  \\brief          Calculate number of bits in the filter\n   *  \\param n        Number of items in the filter\n   *  \\param p        Probability of false positives\n   *  \\return         Number of bits in the filter\n   */\n  static size_t NumberOfBits(size_t n, double p) {\n    return (size_t)std::ceil((double)n * std::log(p) /\n                             std::log(1.0 / std::pow(2.0, std::log(2.0))));\n  }\n\n  /**\n   *  \\brief          Calculate number of bits in the filter\n   *  \\param n        Number of items in the filter\n   *  \\param k        Number of hash functions\n   *  \\param p        Probability of false positives\n   *  \\return         Number of bits in the filter\n   */\n  static size_t NumberOfBits(size_t n, size_t k, double p) {\n    return (size_t)std::ceil(-((double)k * (double)n /\n                               std::log(1.0 - std::pow(p, 1.0 / (double)k))));\n  }\n\n  /**\n   *  \\brief          Calculate number of bytes in the filter\n   *  \\param n        Number of items in the filter\n   *  \\param p        Probability of false positives\n   *  \\return         Number of bytes in the filter\n   */\n  static size_t NumberOfBytes(size_t n, double p) {\n    return ((NumberOfBits(n, p) + 7) >> 3);\n  }\n\n  /**\n   *  \\brief          Calculate number of bits in the filter\n   *  \\param n        Number of items in the filter\n   *  \\param k        Number of hash functions\n   *  \\param p        Probability of false positives\n   *  \\return         Number of bits in the filter\n   */\n  static size_t NumberOfBytes(size_t n, size_t k, double p) {\n    return ((NumberOfBits(n, k, p) + 7) >> 3);\n  }\n\n  /**\n   *  \\brief          Calculate number of hash functions\n   *  \\param n        Number of items in the filter\n   *  \\param m        Number of bits in the filter\n   *  \\return         Number of hash functions\n   */\n  static size_t NumberOfHash(size_t n, size_t m) {\n    return (size_t)std::round((double)m / (double)n * std::log(2.0));\n  }\n};\n\n/*! Bloom Filter\n */\ntemplate <size_t K>\nclass BloomFilter {\n public:\n  //! Constructor\n  BloomFilter(void) {}\n\n  //! Constructor\n  BloomFilter(size_t n, double p) {\n    if (n > 0 && p > 0.0 && p < 1.0) {\n      capacity_ = n;\n      bits_count_ = BloomFilterCalculator::NumberOfBits(n, K, p);\n      bits_count_ = ((bits_count_ + 31) >> 5) << 5;\n      probability_ = BloomFilterCalculator::Probability(n, bits_count_, K);\n      bitset_ = new uint32_t[bits_count_ >> 5];\n      memset(bitset_, 0, (bits_count_ >> 3));\n    }\n  }\n\n  //! Constructor\n  BloomFilter(BloomFilter &&rhs)\n      : bitset_(rhs.bitset_),\n        bits_count_(rhs.bits_count_),\n        capacity_(rhs.capacity_),\n        count_(rhs.count_),\n        probability_(rhs.probability_) {\n    rhs.bitset_ = nullptr;\n    rhs.bits_count_ = 0u;\n    rhs.capacity_ = 0u;\n    rhs.count_ = 0u;\n    rhs.probability_ = 0u;\n  }\n\n  //! Destructor\n  ~BloomFilter(void) {\n    delete[] bitset_;\n  }\n\n  //! Test if the filter is valid\n  bool is_valid(void) const {\n    return (bitset_ != nullptr);\n  }\n\n  //! Reset the bloom filter\n  bool reset(size_t n, double p) {\n    if (n <= 0 || p <= 0.0 || p >= 1.0) {\n      return false;\n    }\n    delete[] bitset_;\n    capacity_ = n;\n    count_ = 0u;\n    bits_count_ = BloomFilterCalculator::NumberOfBits(n, K, p);\n    bits_count_ = ((bits_count_ + 31) >> 5) << 5;\n    probability_ = BloomFilterCalculator::Probability(n, bits_count_, K);\n    bitset_ = new (std::nothrow) uint32_t[bits_count_ >> 5];\n    if (!bitset_) {\n      return false;\n    }\n    memset(bitset_, 0, (bits_count_ >> 3));\n    return true;\n  }\n\n  //! Clear the bloom filter\n  void clear(void) {\n    if (bitset_) {\n      memset(bitset_, 0, (bits_count_ >> 3));\n      count_ = 0u;\n    }\n  }\n\n  //! Insert a item into bloom filter\n  template <typename... TArgs,\n            typename = typename std::enable_if<\n                Conjunction<std::is_integral<TArgs>...>::value &&\n                sizeof...(TArgs) == K>::type>\n  bool insert(TArgs... vals) {\n    if (count_ >= capacity_) {\n      return false;\n    }\n    this->set_bits(vals...);\n    ++count_;\n    return true;\n  }\n\n  //! Force insert a item into bloom filter\n  template <typename... TArgs,\n            typename = typename std::enable_if<\n                Conjunction<std::is_integral<TArgs>...>::value &&\n                sizeof...(TArgs) == K>::type>\n  void force_insert(TArgs... vals) {\n    this->set_bits(vals...);\n    ++count_;\n  }\n\n  //! Insert a item into bloom filter\n  template <typename... TArgs,\n            typename = typename std::enable_if<\n                Conjunction<std::is_integral<TArgs>...>::value &&\n                sizeof...(TArgs) == K>::type>\n  bool has(TArgs... vals) const {\n    return this->test_bits(vals...);\n  }\n\n  //! Retrieve count of bits in bloom filter\n  size_t bits_count(void) const {\n    return bits_count_;\n  }\n\n  //! Retrieve capacity of bloom filter\n  size_t capacity(void) const {\n    return capacity_;\n  }\n\n  //! Retrieve count of items in bloom filter\n  size_t count(void) const {\n    return count_;\n  }\n\n  //! Retrieve probability of false positives\n  double probability(void) const {\n    return probability_;\n  }\n\n protected:\n  //! Disable them\n  BloomFilter(const BloomFilter &) = delete;\n  BloomFilter &operator=(const BloomFilter &) = delete;\n\n  //! Set bits in bloom filter\n  template <typename TArg>\n  void set_bits(TArg val) {\n    size_t num = static_cast<size_t>(val) % bits_count_;\n    bitset_[num >> 5] |= (1u << (num & 0x1f));\n  }\n\n  //! Set bits in bloom filter\n  template <typename TArg, typename... TArgs>\n  void set_bits(TArg val, TArgs... vals) {\n    this->set_bits(val);\n    this->set_bits(vals...);\n  }\n\n  //! Test bits in bloom filter\n  template <typename TArg>\n  bool test_bits(TArg val) const {\n    size_t num = static_cast<size_t>(val) % bits_count_;\n    return ((bitset_[num >> 5] & (1u << (num & 0x1f))) != 0);\n  }\n\n  //! Test bits in bloom filter\n  template <typename TArg, typename... TArgs>\n  bool test_bits(TArg val, TArgs... vals) const {\n    if (!this->test_bits(val)) {\n      return false;\n    }\n    return this->test_bits(vals...);\n  }\n\n private:\n  uint32_t *bitset_{nullptr};\n  size_t bits_count_{0u};\n  size_t capacity_{0u};\n  size_t count_{0u};\n  double probability_{0.0};\n};\n\n/*! Bloom Filter (Special)\n */\ntemplate <>\nstruct BloomFilter<0> {};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/container/params.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cstring>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/encoding/json.h>\n#include <zvec/ailego/logger/logger.h>\n\n//! Global environ variable\nextern char **environ;\n\nnamespace zvec {\nnamespace ailego {\n\nstatic void ParseFromJsonObject(const ailego::JsonObject &obj, Params *params) {\n  for (ailego::JsonObject::const_iterator it = obj.begin(); it != obj.end();\n       ++it) {\n    const ailego::JsonValue &val = it->value();\n\n    if (val.is_boolean()) {\n      params->set(it->key().as_stl_string(), val.as_bool());\n    } else if (val.is_integer()) {\n      params->set(it->key().as_stl_string(),\n                  static_cast<int64_t>(val.as_integer()));\n    } else if (val.is_float()) {\n      params->set(it->key().as_stl_string(), val.as_float());\n    } else if (val.is_string()) {\n      params->set(it->key().as_stl_string(),\n                  val.as_string().decode().as_stl_string());\n    } else if (val.is_object()) {\n      Params subparams;\n      ParseFromJsonObject(val.as_object(), &subparams);\n      params->set(it->key().as_stl_string(), std::move(subparams));\n    }\n  }\n}\n\nbool Params::ParseFromBuffer(const std::string &buf, Params *params) {\n  ailego::JsonValue val;\n  ailego::JsonParser parser;\n\n  parser.set_comment(true);\n  parser.set_simple(true);\n  parser.set_squote(true);\n  parser.set_unstrict(false);\n  if (!parser.parse(buf.c_str(), &val)) {\n    return false;\n  }\n\n  if (!val.is_object()) {\n    return false;\n  }\n  ParseFromJsonObject(val.as_object(), params);\n  return true;\n}\n\nvoid Params::ParseFromEnvironment(Params *params) {\n  // Dump all environ string\n  for (size_t i = 0; environ[i]; ++i) {\n    const char *env = environ[i];\n    const char *p = std::strchr(env, '=');\n    if (p) {\n      params->set(std::string(env, p - env), std::string(p + 1));\n    }\n  }\n}\n\nstatic void SerializeToJsonObject(const Params &params,\n                                  ailego::JsonObject *obj) {\n  for (const auto &it : params.hypercube().cubes()) {\n    const ailego::Cube &cube = it.second;\n    const char *key = it.first.c_str();\n\n    if (cube.compatible<std::string>()) {\n      const auto &val = cube.unsafe_cast<std::string>();\n      ailego::JsonString str(val.data(), val.size());\n      obj->set(key, ailego::JsonValue(str.encode()));\n    } else if (cube.compatible<unsigned long long int>()) {\n      obj->set(key,\n               ailego::JsonValue(cube.unsafe_cast<unsigned long long int>()));\n    } else if (cube.compatible<long long int>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<long long int>()));\n    } else if (cube.compatible<unsigned long int>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned long int>()));\n    } else if (cube.compatible<long int>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<long int>()));\n    } else if (cube.compatible<unsigned int>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned int>()));\n    } else if (cube.compatible<int>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<int>()));\n    } else if (cube.compatible<unsigned short int>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned short int>()));\n    } else if (cube.compatible<short int>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<short int>()));\n    } else if (cube.compatible<unsigned char>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned char>()));\n    } else if (cube.compatible<char>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<char>()));\n    } else if (cube.compatible<signed char>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<signed char>()));\n    } else if (cube.compatible<bool>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<bool>()));\n    } else if (cube.compatible<float>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<float>()));\n    } else if (cube.compatible<double>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<double>()));\n    } else if (cube.compatible<long double>()) {\n      obj->set(key, ailego::JsonValue(cube.unsafe_cast<long double>()));\n    } else if (cube.compatible<Params>()) {\n      ailego::JsonObject subobj;\n      SerializeToJsonObject(cube.unsafe_cast<Params>(), &subobj);\n      obj->set(key, ailego::JsonValue(subobj));\n    } else {\n      LOG_WARN(\"Unsupported serializing \\'%s\\' <%s>.\", key, cube.type().name());\n    }\n  }\n}\n\nvoid Params::SerializeToBuffer(const Params &params, std::string *buf) {\n  if (buf != nullptr) {\n    ailego::JsonObject obj;\n    SerializeToJsonObject(params, &obj);\n    buf->assign(ailego::JsonValue(obj).as_json_string().as_stl_string());\n  }\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/container/reservoir.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <random>\n#include <vector>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Sampling Reservoir\n */\ntemplate <typename T, typename Allocator = std::allocator<T>>\nclass Reservoir {\n public:\n  //! Constructor\n  Reservoir(size_t cnt)\n      : samples_(cnt), total_(0), mt_(std::random_device()()), pool_() {\n    pool_.reserve(samples_);\n  }\n\n  //! Constructor\n  Reservoir(const Reservoir &rhs)\n      : samples_(rhs.samples_),\n        total_(rhs.total_),\n        mt_(std::random_device()()),\n        pool_(rhs.pool_) {}\n\n  //! Constructor\n  Reservoir(Reservoir &&rhs)\n      : samples_(rhs.samples_),\n        total_(rhs.total_),\n        mt_(std::random_device()()),\n        pool_(std::move(rhs.pool_)) {}\n\n  //! Destructor\n  ~Reservoir(void) {}\n\n  //! Assignment\n  Reservoir &operator=(const Reservoir &rhs) {\n    samples_ = rhs.samples_;\n    total_ = rhs.total_;\n    pool_ = rhs.pool_;\n    return *this;\n  }\n\n  //! Assignment\n  Reservoir &operator=(Reservoir &&rhs) {\n    samples_ = rhs.samples_;\n    total_ = rhs.total_;\n    pool_ = std::move(rhs.pool_);\n    return *this;\n  }\n\n  //! Retrieve pool of reservoir\n  std::vector<T, Allocator> *mutable_pool(void) {\n    return &pool_;\n  }\n\n  //! Retrieve pool of reservoir\n  const std::vector<T, Allocator> &pool(void) const {\n    return pool_;\n  }\n\n  //! Retrieve count of samples\n  size_t samples(void) const {\n    return samples_;\n  }\n\n  //! Retrieve total count of filling\n  size_t total(void) const {\n    return total_;\n  }\n\n  //! Reset the reservoir\n  void reset(void) {\n    total_ = 0;\n    pool_.clear();\n    pool_.reserve(samples_);\n  }\n\n  //! Fill the reservoir\n  void fill(const T &item) {\n    if (samples_ > 0) {\n      if (pool_.size() >= samples_) {\n        std::uniform_int_distribution<size_t> dt(0, total_);\n        size_t i = dt(mt_);\n\n        if (i < samples_) {\n          pool_[i] = item;\n        }\n      } else {\n        pool_.push_back(item);\n      }\n    }\n    ++total_;\n  }\n\n  //! Fill the reservoir\n  void fill(T &&item) {\n    if (samples_ > 0) {\n      if (pool_.size() >= samples_) {\n        std::uniform_int_distribution<size_t> dt(0, total_);\n        size_t i = dt(mt_);\n\n        if (i < samples_) {\n          pool_[i] = std::move(item);\n        }\n      } else {\n        pool_.push_back(std::move(item));\n      }\n    }\n    ++total_;\n  }\n\n private:\n  //! Disable them\n  Reservoir(void) = delete;\n\n  //! Members\n  size_t samples_;\n  size_t total_;\n  std::mt19937 mt_;\n  std::vector<T, Allocator> pool_;\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/container/vector_array.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Numerical Vector Array\n */\ntemplate <typename T,\n          typename =\n              typename std::enable_if<IsTriviallyCopyable<T>::value>::type>\nclass NumericalVectorArray {\n public:\n  //! Type of value\n  using ValueType = typename NumericalVector<T>::ValueType;\n\n  //! Constructor\n  NumericalVectorArray(void) {}\n\n  //! Constructor\n  explicit NumericalVectorArray(size_t dim) : dimension_(dim) {}\n\n  //! Constructor\n  NumericalVectorArray(const NumericalVectorArray &rhs)\n      : dimension_(rhs.dimension_), buffer_(rhs.buffer_) {}\n\n  //! Constructor\n  NumericalVectorArray(NumericalVectorArray &&rhs)\n      : dimension_(rhs.dimension_), buffer_(std::move(rhs.buffer_)) {}\n\n  //! Assignment\n  NumericalVectorArray &operator=(const NumericalVectorArray &rhs) {\n    dimension_ = rhs.dimension_;\n    buffer_ = rhs.buffer_;\n    return *this;\n  }\n\n  //! Assignment\n  NumericalVectorArray &operator=(NumericalVectorArray &&rhs) {\n    dimension_ = rhs.dimension_;\n    buffer_ = std::move(rhs.buffer_);\n    return *this;\n  }\n\n  //! Overloaded operator []\n  ValueType *operator[](size_t i) {\n    return (reinterpret_cast<ValueType *>(&buffer_[0]) + i * dimension_);\n  }\n\n  //! Overloaded operator []\n  const ValueType *operator[](size_t i) const {\n    return (reinterpret_cast<const ValueType *>(buffer_.data()) +\n            i * dimension_);\n  }\n\n  //! Append a vector\n  void append(const ValueType *vec, size_t dim) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    buffer_.append(reinterpret_cast<const char *>(vec),\n                   dim * sizeof(ValueType));\n  }\n\n  //! Append vectors\n  void append(const ValueType *vec, size_t dim, size_t cnt) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    buffer_.append(reinterpret_cast<const char *>(vec),\n                   cnt * dim * sizeof(ValueType));\n  }\n\n  //! Append a vector\n  void append(const NumericalVector<ValueType> &vec) {\n    this->append(vec.data(), vec.dimension());\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const ValueType *vec, size_t dim) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    size_t element_size = dim * sizeof(ValueType);\n    buffer_.replace(index * element_size, element_size,\n                    reinterpret_cast<const char *>(vec), element_size);\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const ValueType *vec, size_t dim, size_t cnt) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    size_t element_size = dim * sizeof(ValueType);\n    size_t total = element_size * cnt;\n    buffer_.replace(index * element_size, total,\n                    reinterpret_cast<const char *>(vec), total);\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const NumericalVector<ValueType> &vec) {\n    this->replace(index, vec.data(), vec.dimension());\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t n) {\n    buffer_.reserve(n * dimension_ * sizeof(ValueType));\n  }\n\n  //! Resize the array to a length of n elements\n  void resize(size_t n) {\n    buffer_.resize(n * dimension_ * sizeof(ValueType));\n  }\n\n  //! Clear the vector array\n  void clear(void) {\n    buffer_.clear();\n  }\n\n  //! Reset the vector array\n  void reset(size_t dim) {\n    dimension_ = dim;\n    buffer_.clear();\n  }\n\n  //! Requests the removal of unused capacity.\n  void shrink_to_fit(void) {\n    buffer_.shrink_to_fit();\n  }\n\n  //! Retrieve pointer of data\n  ValueType *data(void) {\n    return reinterpret_cast<ValueType *>(&buffer_[0]);\n  }\n\n  //! Retrieve pointer of data\n  const ValueType *data(void) const {\n    return reinterpret_cast<const ValueType *>(buffer_.data());\n  }\n\n  //! Retrieve pointer of data\n  ValueType *at(size_t i) {\n    if (ailego_unlikely(i >= this->count())) {\n      throw std::out_of_range(\"Index overflow\");\n    }\n    return (reinterpret_cast<ValueType *>(&buffer_[0]) + i * dimension_);\n  }\n\n  //! Retrieve pointer of data\n  const ValueType *at(size_t i) const {\n    if (ailego_unlikely(i >= this->count())) {\n      throw std::out_of_range(\"Index overflow\");\n    }\n    return (reinterpret_cast<const ValueType *>(buffer_.data()) +\n            i * dimension_);\n  }\n\n  //! Test if the array is empty\n  bool empty(void) const {\n    return buffer_.empty();\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const {\n    return (dimension_ > 0 ? buffer_.size() / (dimension_ * sizeof(ValueType))\n                           : 0u);\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const {\n    return dimension_;\n  }\n\n  //! Retrieve size of array in bytes\n  size_t bytes(void) const {\n    return buffer_.size();\n  }\n\n private:\n  size_t dimension_{0u};\n  std::string buffer_{};\n};\n\n/*! Nibble Vector Array\n */\ntemplate <typename T,\n          typename = typename std::enable_if<std::is_integral<T>::value>::type>\nclass NibbleVectorArray {\n public:\n  //! Type of value\n  using ValueType = typename NibbleVector<T>::ValueType;\n  using StoreType = typename NibbleVector<T>::StoreType;\n\n  //! Constructor\n  NibbleVectorArray(void) {}\n\n  //! Constructor\n  explicit NibbleVectorArray(size_t dim)\n      : dimension_((dim + (sizeof(ValueType) << 1) - 1) /\n                       (sizeof(ValueType) << 1) * sizeof(ValueType)\n                   << 1) {}\n\n  //! Constructor\n  NibbleVectorArray(const NibbleVectorArray &rhs)\n      : dimension_(rhs.dimension_), buffer_(rhs.buffer_) {}\n\n  //! Constructor\n  NibbleVectorArray(NibbleVectorArray &&rhs)\n      : dimension_(rhs.dimension_), buffer_(std::move(rhs.buffer_)) {}\n\n  //! Assignment\n  NibbleVectorArray &operator=(const NibbleVectorArray &rhs) {\n    dimension_ = rhs.dimension_;\n    buffer_ = rhs.buffer_;\n    return *this;\n  }\n\n  //! Assignment\n  NibbleVectorArray &operator=(NibbleVectorArray &&rhs) {\n    dimension_ = rhs.dimension_;\n    buffer_ = std::move(rhs.buffer_);\n    return *this;\n  }\n\n  //! Overloaded operator []\n  StoreType *operator[](size_t i) {\n    return reinterpret_cast<StoreType *>(&buffer_[0] + i * (dimension_ >> 1));\n  }\n\n  //! Overloaded operator []\n  const StoreType *operator[](size_t i) const {\n    return reinterpret_cast<const StoreType *>(&buffer_[0] +\n                                               i * (dimension_ >> 1));\n  }\n\n  //! Append a vector\n  void append(const StoreType *vec, size_t dim) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    buffer_.append(reinterpret_cast<const char *>(vec), dim >> 1);\n  }\n\n  //! Append vectors\n  void append(const StoreType *vec, size_t dim, size_t cnt) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    buffer_.append(reinterpret_cast<const char *>(vec), cnt * (dim >> 1));\n  }\n\n  //! Append a vector\n  void append(const NibbleVector<ValueType> &vec) {\n    this->append(vec.data(), vec.dimension());\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const StoreType *vec, size_t dim) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    size_t element_size = (dim >> 1);\n    buffer_.replace(index * element_size, element_size,\n                    reinterpret_cast<const char *>(vec), element_size);\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const StoreType *vec, size_t dim, size_t cnt) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    size_t element_size = (dim >> 1);\n    size_t total = element_size * cnt;\n    buffer_.replace(index * element_size, total,\n                    reinterpret_cast<const char *>(vec), total);\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const NibbleVector<ValueType> &vec) {\n    this->replace(index, vec.data(), vec.dimension());\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t n) {\n    buffer_.reserve(n * (dimension_ >> 1));\n  }\n\n  //! Resize the array to a length of n elements\n  void resize(size_t n) {\n    buffer_.resize(n * (dimension_ >> 1));\n  }\n\n  //! Clear the vector array\n  void clear(void) {\n    buffer_.clear();\n  }\n\n  //! Reset the vector array\n  void reset(size_t dim) {\n    dimension_ = (dim + (sizeof(ValueType) << 1) - 1) /\n                     (sizeof(ValueType) << 1) * sizeof(ValueType)\n                 << 1;\n    buffer_.clear();\n  }\n\n  //! Requests the removal of unused capacity.\n  void shrink_to_fit(void) {\n    buffer_.shrink_to_fit();\n  }\n\n  //! Retrieve pointer of data\n  StoreType *data(void) {\n    return reinterpret_cast<StoreType *>(&buffer_[0]);\n  }\n\n  //! Retrieve pointer of data\n  const StoreType *data(void) const {\n    return reinterpret_cast<const StoreType *>(buffer_.data());\n  }\n\n  //! Retrieve pointer of data\n  StoreType *at(size_t i) {\n    if (ailego_unlikely(i >= this->count())) {\n      throw std::out_of_range(\"Index overflow\");\n    }\n    return reinterpret_cast<StoreType *>(&buffer_[0] + i * (dimension_ >> 1));\n  }\n\n  //! Retrieve pointer of data\n  const StoreType *at(size_t i) const {\n    if (ailego_unlikely(i >= this->count())) {\n      throw std::out_of_range(\"Index overflow\");\n    }\n    return reinterpret_cast<const StoreType *>(buffer_.data() +\n                                               i * (dimension_ >> 1));\n  }\n\n  //! Test if the array is empty\n  bool empty(void) const {\n    return buffer_.empty();\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const {\n    return (dimension_ > 1 ? buffer_.size() / (dimension_ >> 1) : 0u);\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const {\n    return dimension_;\n  }\n\n  //! Retrieve size of array in bytes\n  size_t bytes(void) const {\n    return buffer_.size();\n  }\n\n private:\n  size_t dimension_{0u};\n  std::string buffer_{};\n};\n\n/*! Binary Vector Array\n */\ntemplate <typename T,\n          typename = typename std::enable_if<std::is_integral<T>::value>::type>\nclass BinaryVectorArray {\n public:\n  //! Type of value\n  using ValueType = typename BinaryVector<T>::ValueType;\n\n  //! Constructor\n  BinaryVectorArray(void) {}\n\n  //! Constructor\n  explicit BinaryVectorArray(size_t dim)\n      : dimension_((dim + (sizeof(ValueType) << 3) - 1) /\n                   (sizeof(ValueType) << 3) * (sizeof(ValueType) << 3)) {}\n\n  //! Constructor\n  BinaryVectorArray(const BinaryVectorArray &rhs)\n      : dimension_(rhs.dimension_), buffer_(rhs.buffer_) {}\n\n  //! Constructor\n  BinaryVectorArray(BinaryVectorArray &&rhs)\n      : dimension_(rhs.dimension_), buffer_(std::move(rhs.buffer_)) {}\n\n  //! Assignment\n  BinaryVectorArray &operator=(const BinaryVectorArray &rhs) {\n    dimension_ = rhs.dimension_;\n    buffer_ = rhs.buffer_;\n    return *this;\n  }\n\n  //! Assignment\n  BinaryVectorArray &operator=(BinaryVectorArray &&rhs) {\n    dimension_ = rhs.dimension_;\n    buffer_ = std::move(rhs.buffer_);\n    return *this;\n  }\n\n  //! Overloaded operator []\n  ValueType *operator[](size_t i) {\n    return reinterpret_cast<ValueType *>(&buffer_[0] + i * (dimension_ >> 3));\n  }\n\n  //! Overloaded operator []\n  const ValueType *operator[](size_t i) const {\n    return reinterpret_cast<const ValueType *>(buffer_.data() +\n                                               i * (dimension_ >> 3));\n  }\n\n  //! Append a vector\n  void append(const ValueType *vec, size_t dim) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    buffer_.append(reinterpret_cast<const char *>(vec), (dim >> 3));\n  }\n\n  //! Append vectors\n  void append(const ValueType *vec, size_t dim, size_t cnt) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    buffer_.append(reinterpret_cast<const char *>(vec), cnt * (dim >> 3));\n  }\n\n  //! Append a vector\n  void append(const BinaryVector<ValueType> &vec) {\n    this->append(vec.data(), vec.dimension());\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const ValueType *vec, size_t dim) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    size_t element_size = (dim >> 3);\n    buffer_.replace(index * element_size, element_size,\n                    reinterpret_cast<const char *>(vec), element_size);\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const ValueType *vec, size_t dim, size_t cnt) {\n    if (ailego_unlikely(dim != dimension_)) {\n      throw std::length_error(\"Unmatched dimension\");\n    }\n    size_t element_size = (dim >> 3);\n    size_t total = element_size * cnt;\n    buffer_.replace(index * element_size, total,\n                    reinterpret_cast<const char *>(vec), total);\n  }\n\n  //! Replace a vector\n  void replace(size_t index, const BinaryVector<ValueType> &vec) {\n    this->replace(index, vec.data(), vec.dimension());\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t n) {\n    buffer_.reserve(n * (dimension_ >> 3));\n  }\n\n  //! Resize the array to a length of n elements\n  void resize(size_t n) {\n    buffer_.resize(n * (dimension_ >> 3));\n  }\n\n  //! Clear the vector array\n  void clear(void) {\n    buffer_.clear();\n  }\n\n  //! Reset the vector array\n  void reset(size_t dim) {\n    dimension_ = (dim + (sizeof(ValueType) << 3) - 1) /\n                 (sizeof(ValueType) << 3) * (sizeof(ValueType) << 3);\n    buffer_.clear();\n  }\n\n  //! Requests the removal of unused capacity.\n  void shrink_to_fit(void) {\n    buffer_.shrink_to_fit();\n  }\n\n  //! Retrieve pointer of data\n  ValueType *data(void) {\n    return reinterpret_cast<ValueType *>(&buffer_[0]);\n  }\n\n  //! Retrieve pointer of data\n  const ValueType *data(void) const {\n    return reinterpret_cast<const ValueType *>(buffer_.data());\n  }\n\n  //! Retrieve pointer of data\n  ValueType *at(size_t i) {\n    if (ailego_unlikely(i >= this->count())) {\n      throw std::out_of_range(\"Index overflow\");\n    }\n    return reinterpret_cast<ValueType *>(&buffer_[0] + i * (dimension_ >> 3));\n  }\n\n  //! Retrieve pointer of data\n  const ValueType *at(size_t i) const {\n    if (ailego_unlikely(i >= this->count())) {\n      throw std::out_of_range(\"Index overflow\");\n    }\n    return reinterpret_cast<const ValueType *>(buffer_.data() +\n                                               i * (dimension_ >> 3));\n  }\n\n  //! Test if the array is empty\n  bool empty(void) const {\n    return buffer_.empty();\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const {\n    return (dimension_ > 0 ? buffer_.size() / (dimension_ >> 3) : 0u);\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const {\n    return dimension_;\n  }\n\n  //! Retrieve size of array in bytes\n  size_t bytes(void) const {\n    return buffer_.size();\n  }\n\n private:\n  size_t dimension_{0u};\n  std::string buffer_{};\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/encoding/json/mod_json.c",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <float.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <zvec/ailego/encoding/json/mod_json.h>\n\n#ifndef MOD_JSON_TOKEN_DEFOPTS\n#define MOD_JSON_TOKEN_DEFOPTS 0 /* default options of token */\n#endif\n#ifndef MOD_JSON_TOKEN_DEFOBJDEP\n#define MOD_JSON_TOKEN_DEFOBJDEP 64 /* default objects depth of token */\n#endif\n#ifndef MOD_JSON_TOKEN_DEFARRDEP\n#define MOD_JSON_TOKEN_DEFARRDEP 64 /* default arrays depth of token */\n#endif\n#ifndef MOD_JSON_STRING_DEFSIZE\n#define MOD_JSON_STRING_DEFSIZE 32 /* default started size of string */\n#endif\n#ifndef MOD_JSON_ARRAY_DEFSIZE\n#define MOD_JSON_ARRAY_DEFSIZE 32 /* default started size of array */\n#endif\n#ifndef MOD_JSON_OBJECT_DEFSIZE\n#define MOD_JSON_OBJECT_DEFSIZE 32 /* default started size of object */\n#endif\n\n#ifndef mod_json_malloc\n#define mod_json_malloc malloc\n#endif\n#ifndef mod_json_free\n#define mod_json_free free\n#endif\n\n#ifdef __GNUC__\n#define mod_json_likely(x) __builtin_expect(!!(x), 1)\n#define mod_json_unlikely(x) __builtin_expect(!!(x), 0)\n#else\n#define mod_json_likely(x) (x)\n#define mod_json_unlikely(x) (x)\n#endif\n\n#define mod_json_minus_if_ne_zero(COND) \\\n  if (mod_json_unlikely((COND) != 0)) return (-1)\n\n#define mod_json_minus_if_false(COND) \\\n  if (mod_json_unlikely(!(COND))) return (-1)\n\n#define mod_json_null_if_ne_zero(COND) \\\n  if (mod_json_unlikely((COND) != 0)) return (NULL)\n\n#define mod_json_null_if_false(COND) \\\n  if (mod_json_unlikely(!(COND))) return (NULL)\n\n#if defined(_MSC_VER)\n#pragma warning(disable : 4200)\n#define strtoull _strtoui64\n#define snprintf(buf, size, format, ...) \\\n  _snprintf_s(buf, size, _TRUNCATE, format, ##__VA_ARGS__)\n#endif\n#define mod_json_utils_snprintf snprintf\n#define mod_json_utils_strtoi strtoull\n#define mod_json_utils_strtof strtod\n#define mod_json_utils_strlen strlen\n\n/*! JSON Token\n */\nstruct mod_json_token {\n  mod_json_state_t state;\n  mod_json_error_t error;\n  mod_json_cchar_t *context;\n  mod_json_size_t options;\n  mod_json_size_t object_max_depth;\n  mod_json_size_t array_max_depth;\n  mod_json_size_t object_depth;\n  mod_json_size_t array_depth;\n  mod_json_event_t event_code;\n  mod_json_event_proc event_proc;\n  mod_json_void_t *param;\n  mod_json_char_t tags[0];\n};\n\ntypedef struct mod_json_parser mod_json_parser_t;\n\n/*! JSON Parser\n */\nstruct mod_json_parser {\n  mod_json_string_t *key;\n  mod_json_value_t *val_null;\n  mod_json_value_t *val_true;\n  mod_json_value_t *val_false;\n  mod_json_value_t *val_zero;\n  mod_json_value_t *val_zerof;\n  mod_json_value_t *val_empty;\n  mod_json_value_t *vals[0];\n};\n\nstatic inline mod_json_size_t mod_json_utils_clp2(mod_json_size_t n) {\n  n = n - 1;\n  n = n | (n >> 1);\n  n = n | (n >> 2);\n  n = n | (n >> 4);\n  n = n | (n >> 8);\n  n = n | (n >> 16);\n  return (n + 1);\n}\n\nstatic inline mod_json_size_t mod_json_utils_itostr(mod_json_char_t *buf,\n                                                    mod_json_integer_t val) {\n  mod_json_char_t *pos, *first, *last;\n\n  pos = buf;\n  if (val < 0) {\n    *pos++ = '-';\n    val = -val;\n  }\n\n  /* save pointer to first digit */\n  first = pos;\n\n  do {\n    /* convert to ASCII and store */\n    *pos++ = (mod_json_char_t)(val % 10 + '0');\n\n    /* next digit */\n    val /= 10;\n\n  } while (val > 0);\n\n  *pos = '\\0';\n\n  /* save pointer to last digit */\n  last = pos - 1;\n\n  /* reverse digit string */\n  while (first < last) {\n    mod_json_char_t temp = *first;\n    *first++ = *last;\n    *last-- = temp;\n  }\n  return (mod_json_size_t)(pos - buf);\n}\n\nstatic inline mod_json_float_t mod_json_utils_pow10(int n) {\n  /* 1e-308...1e308: 617 * 8 bytes = 4936 bytes */\n  static const mod_json_float_t etab[] = {\n      1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,\n      1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,\n      1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282,\n      1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273,\n      1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264,\n      1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255,\n      1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246,\n      1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237,\n      1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228,\n      1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219,\n      1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210,\n      1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201,\n      1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192,\n      1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183,\n      1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174,\n      1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165,\n      1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156,\n      1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147,\n      1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138,\n      1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129,\n      1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120,\n      1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,\n      1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102,\n      1e-101, 1e-100, 1e-99,  1e-98,  1e-97,  1e-96,  1e-95,  1e-94,  1e-93,\n      1e-92,  1e-91,  1e-90,  1e-89,  1e-88,  1e-87,  1e-86,  1e-85,  1e-84,\n      1e-83,  1e-82,  1e-81,  1e-80,  1e-79,  1e-78,  1e-77,  1e-76,  1e-75,\n      1e-74,  1e-73,  1e-72,  1e-71,  1e-70,  1e-69,  1e-68,  1e-67,  1e-66,\n      1e-65,  1e-64,  1e-63,  1e-62,  1e-61,  1e-60,  1e-59,  1e-58,  1e-57,\n      1e-56,  1e-55,  1e-54,  1e-53,  1e-52,  1e-51,  1e-50,  1e-49,  1e-48,\n      1e-47,  1e-46,  1e-45,  1e-44,  1e-43,  1e-42,  1e-41,  1e-40,  1e-39,\n      1e-38,  1e-37,  1e-36,  1e-35,  1e-34,  1e-33,  1e-32,  1e-31,  1e-30,\n      1e-29,  1e-28,  1e-27,  1e-26,  1e-25,  1e-24,  1e-23,  1e-22,  1e-21,\n      1e-20,  1e-19,  1e-18,  1e-17,  1e-16,  1e-15,  1e-14,  1e-13,  1e-12,\n      1e-11,  1e-10,  1e-9,   1e-8,   1e-7,   1e-6,   1e-5,   1e-4,   1e-3,\n      1e-2,   1e-1,   1e+0,   1e+1,   1e+2,   1e+3,   1e+4,   1e+5,   1e+6,\n      1e+7,   1e+8,   1e+9,   1e+10,  1e+11,  1e+12,  1e+13,  1e+14,  1e+15,\n      1e+16,  1e+17,  1e+18,  1e+19,  1e+20,  1e+21,  1e+22,  1e+23,  1e+24,\n      1e+25,  1e+26,  1e+27,  1e+28,  1e+29,  1e+30,  1e+31,  1e+32,  1e+33,\n      1e+34,  1e+35,  1e+36,  1e+37,  1e+38,  1e+39,  1e+40,  1e+41,  1e+42,\n      1e+43,  1e+44,  1e+45,  1e+46,  1e+47,  1e+48,  1e+49,  1e+50,  1e+51,\n      1e+52,  1e+53,  1e+54,  1e+55,  1e+56,  1e+57,  1e+58,  1e+59,  1e+60,\n      1e+61,  1e+62,  1e+63,  1e+64,  1e+65,  1e+66,  1e+67,  1e+68,  1e+69,\n      1e+70,  1e+71,  1e+72,  1e+73,  1e+74,  1e+75,  1e+76,  1e+77,  1e+78,\n      1e+79,  1e+80,  1e+81,  1e+82,  1e+83,  1e+84,  1e+85,  1e+86,  1e+87,\n      1e+88,  1e+89,  1e+90,  1e+91,  1e+92,  1e+93,  1e+94,  1e+95,  1e+96,\n      1e+97,  1e+98,  1e+99,  1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105,\n      1e+106, 1e+107, 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114,\n      1e+115, 1e+116, 1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123,\n      1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132,\n      1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141,\n      1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150,\n      1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159,\n      1e+160, 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168,\n      1e+169, 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177,\n      1e+178, 1e+179, 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186,\n      1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195,\n      1e+196, 1e+197, 1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204,\n      1e+205, 1e+206, 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213,\n      1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222,\n      1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231,\n      1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240,\n      1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249,\n      1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258,\n      1e+259, 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267,\n      1e+268, 1e+269, 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276,\n      1e+277, 1e+278, 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285,\n      1e+286, 1e+287, 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294,\n      1e+295, 1e+296, 1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303,\n      1e+304, 1e+305, 1e+306, 1e+307, 1e+308};\n  return (n < -308 ? 0.0 : etab[n + 308]);\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strskpb(mod_json_cchar_t *cstr) {\n  static const mod_json_char_t blanks[256] = {\n      0, 0, 0, 0, 0, 0, 0, 0, 0, '\\t', '\\n', '\\v', '\\f', '\\r', 0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    ' ',  0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,\n      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0};\n\n  while (*(blanks + *cstr)) {\n    ++cstr;\n  }\n  return cstr;\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strskpc1(\n    mod_json_cchar_t *cstr) {\n  mod_json_char_t c;\n\n  while ((c = *cstr++) != '\\0') {\n    if (c == '\\r' || c == '\\n') {\n      return mod_json_utils_strskpb(cstr);\n    }\n  }\n  return (cstr - 1);\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strskpc2(\n    mod_json_cchar_t *cstr) {\n  mod_json_char_t c;\n\n  while ((c = *cstr++) != '\\0') {\n    /* asterisk, slash */\n    if (c == '*' && *cstr == '/') {\n      return mod_json_utils_strskpb(cstr + 1);\n    }\n  }\n  return (cstr - 1);\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strskp(mod_json_cchar_t *cstr) {\n  cstr = mod_json_utils_strskpb(cstr);\n\n  /* treat it as comments? */\n  while (*cstr == '/') {\n    mod_json_char_t c = *(cstr + 1); /* second char */\n\n    if (c == '/') {\n      /* two slashes */\n      cstr = mod_json_utils_strskpc1(cstr + 2);\n    } else if (c == '*') {\n      /* slash, asterisk */\n      cstr = mod_json_utils_strskpc2(cstr + 2);\n    } else {\n      /* invalid format */\n      break;\n    }\n  }\n  return cstr;\n}\n\nstatic inline int mod_json_utils_char2hex(mod_json_char_t ch) {\n  static const mod_json_char_t char2hex[256] = {\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0,  1,  2,  3,  4,  5,\n      6,  7,  8,  9,  16, 16, 16, 16, 16, 16, 16, 10, 11, 12, 13, 14, 15, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,\n      16, 16, 16, 16};\n  return *(char2hex + ch);\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strfquo(mod_json_cchar_t *cstr,\n                                                       mod_json_char_t quo) {\n  mod_json_char_t c;\n\n  for (c = *cstr; c != quo; c = *(++cstr)) {\n    if ((mod_json_uchar_t)c <= 0x1f) {\n      return NULL;\n    }\n\n    if (c != '\\\\') {\n      continue;\n    }\n\n    /* next char */\n    switch (*(++cstr)) {\n      case '\\\"':\n      case '/':\n      case 'b':\n      case 'f':\n      case '\\\\':\n      case 'n':\n      case 'r':\n      case 't':\n        /* ignore next char */\n        break;\n\n      case 'u':\n        if (mod_json_utils_char2hex(*(cstr + 1)) > 15) {\n          return NULL;\n        }\n        if (mod_json_utils_char2hex(*(cstr + 2)) > 15) {\n          return NULL;\n        }\n        if (mod_json_utils_char2hex(*(cstr + 3)) > 15) {\n          return NULL;\n        }\n        if (mod_json_utils_char2hex(*(cstr + 4)) > 15) {\n          return NULL;\n        }\n        cstr += 4;\n        break;\n\n      default:\n        /* invalid */\n        return NULL;\n    }\n  }\n  /* found it */\n  return cstr;\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strfquo2(mod_json_cchar_t *cstr,\n                                                        mod_json_char_t quo) {\n  mod_json_char_t c;\n\n  for (c = *cstr; c; c = *(++cstr)) {\n    if (c == quo) {\n      /* found it */\n      return cstr;\n    }\n\n    if (c == '\\\\') {\n      /* ignore next char */\n      if (*(++cstr) == '\\0') {\n        break;\n      }\n    }\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strfsep(mod_json_cchar_t *cstr) {\n  mod_json_char_t c;\n\n  while ((c = *cstr++) != '\\0') {\n    switch (c) {\n      case ':':\n      case ' ':\n      case '\\t':\n      case '\\r':\n      case '\\n':\n      case '\\f':\n      case '\\v':\n        return (cstr - 1);\n    }\n  }\n  return (cstr - 1);\n}\n\nstatic inline mod_json_cchar_t *mod_json_utils_strfsep2(\n    mod_json_cchar_t *cstr) {\n  mod_json_char_t c;\n\n  while ((c = *cstr++) != '\\0') {\n    switch (c) {\n      case ':':\n      case ' ':\n      case '\\t':\n      case '\\r':\n      case '\\n':\n      case '\\f':\n      case '\\v':\n        return (cstr - 1);\n\n      case '/':\n        if (*cstr == '/' || *cstr == '*') {\n          return (cstr - 1);\n        }\n    }\n  }\n  return (cstr - 1);\n}\n\nstatic inline mod_json_char_t *mod_json_utils_uni2utf8(mod_json_char_t *buf,\n                                                       mod_json_size_t size,\n                                                       mod_json_uchar_t high,\n                                                       mod_json_uchar_t low) {\n  /* convert to UTF-8 */\n  if (high >= 0x8) {\n    /* 0800 - FFFF | 1110xxxx 10xxxxxx 10xxxxxx */\n    if (size >= 3) {\n      *buf++ = (mod_json_char_t)(0xE0 | (high >> 4));\n      *buf++ = (mod_json_char_t)(0x80 | ((high & 0xF) << 2) | (low >> 6));\n      *buf++ = (mod_json_char_t)(0x80 | (low & 0x3F));\n      return buf;\n    }\n  } else if (high > 0 || low >= 0x80) {\n    /* 0080 - 07FF | 110xxxxx 10xxxxxx */\n    if (size >= 2) {\n      *buf++ = (mod_json_char_t)(0xC0 | (high << 2) | (low >> 6));\n      *buf++ = (mod_json_char_t)(0x80 | (low & 0x3F));\n      return buf;\n    }\n  } else {\n    /* 0000 - 007F | 0xxxxxxx */\n    if (size >= 1) {\n      *buf++ = (mod_json_char_t)(low);\n      return buf;\n    }\n  }\n  return (mod_json_char_t *)0;\n}\n\nmod_json_value_t *mod_json_value_set_null(void) {\n  mod_json_value_t *val;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  val->refer = 1;\n  val->type = mod_json_type_null;\n  val->data.c_int = 0;\n  return val;\n}\n\nmod_json_value_t *mod_json_value_set_object(mod_json_object_t *obj) {\n  mod_json_value_t *val;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  val->refer = 1;\n  val->type = mod_json_type_object;\n  val->data.c_obj = obj ? mod_json_object_grab(obj) : NULL;\n  return val;\n}\n\nmod_json_value_t *mod_json_value_set_array(mod_json_array_t *arr) {\n  mod_json_value_t *val;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  val->refer = 1;\n  val->type = mod_json_type_array;\n  val->data.c_arr = arr ? mod_json_array_grab(arr) : NULL;\n  return val;\n}\n\nmod_json_value_t *mod_json_value_set_string(mod_json_string_t *str) {\n  mod_json_value_t *val;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  val->refer = 1;\n  val->type = mod_json_type_string;\n  val->data.c_str = str ? mod_json_string_grab(str) : NULL;\n  return val;\n}\n\nmod_json_value_t *mod_json_value_set_buffer(mod_json_cchar_t *buf,\n                                            mod_json_size_t len) {\n  mod_json_value_t *val;\n  mod_json_string_t *str;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  /* create a string */\n  str = mod_json_string_set(buf, len);\n  if (mod_json_unlikely(!str)) {\n    mod_json_free(val);\n    return NULL;\n  }\n\n  val->refer = 1;\n  val->type = mod_json_type_string;\n  val->data.c_str = str;\n  return val;\n}\n\nmod_json_value_t *mod_json_value_set_integer(mod_json_integer_t num) {\n  mod_json_value_t *val;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  val->refer = 1;\n  val->type = mod_json_type_integer;\n  val->data.c_int = num;\n  return val;\n}\n\nmod_json_value_t *mod_json_value_set_float(mod_json_float_t dbl) {\n  mod_json_value_t *val;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  val->refer = 1;\n  val->type = mod_json_type_float;\n  val->data.c_float = dbl;\n  return val;\n}\n\nmod_json_value_t *mod_json_value_set_boolean(mod_json_boolean_t bol) {\n  mod_json_value_t *val;\n\n  /* create a value */\n  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));\n  mod_json_null_if_false(val);\n\n  val->refer = 1;\n  val->type = mod_json_type_boolean;\n  val->data.c_bool = bol ? MOD_JSON_TRUE : MOD_JSON_FALSE;\n  return val;\n}\n\nstatic inline void mod_json_value_clear(mod_json_value_t *val) {\n  switch (val->type) {\n    case mod_json_type_object:\n      mod_json_object_unset(val->data.c_obj);\n      break;\n\n    case mod_json_type_array:\n      mod_json_array_unset(val->data.c_arr);\n      break;\n\n    case mod_json_type_string:\n      mod_json_string_unset(val->data.c_str);\n      break;\n\n    default:\n      break;\n  }\n}\n\nvoid mod_json_value_assign_null(mod_json_value_t *val) {\n  if (val) {\n    mod_json_value_clear(val);\n    val->type = mod_json_type_null;\n    val->data.c_int = 0;\n  }\n}\n\nvoid mod_json_value_assign_object(mod_json_value_t *val,\n                                  mod_json_object_t *obj) {\n  if (val) {\n    mod_json_value_clear(val);\n    val->type = mod_json_type_object;\n    val->data.c_obj = obj ? mod_json_object_grab(obj) : NULL;\n  }\n}\n\nvoid mod_json_value_assign_array(mod_json_value_t *val, mod_json_array_t *arr) {\n  if (val) {\n    mod_json_value_clear(val);\n    val->type = mod_json_type_array;\n    val->data.c_arr = arr ? mod_json_array_grab(arr) : NULL;\n  }\n}\n\nvoid mod_json_value_assign_string(mod_json_value_t *val,\n                                  mod_json_string_t *str) {\n  if (val) {\n    mod_json_value_clear(val);\n    val->type = mod_json_type_string;\n    val->data.c_str = str ? mod_json_string_grab(str) : NULL;\n  }\n}\n\nvoid mod_json_value_assign_integer(mod_json_value_t *val,\n                                   mod_json_integer_t num) {\n  if (val) {\n    mod_json_value_clear(val);\n    val->type = mod_json_type_integer;\n    val->data.c_int = num;\n  }\n}\n\nvoid mod_json_value_assign_float(mod_json_value_t *val, mod_json_float_t dbl) {\n  if (val) {\n    mod_json_value_clear(val);\n    val->type = mod_json_type_float;\n    val->data.c_float = dbl;\n  }\n}\n\nvoid mod_json_value_assign_boolean(mod_json_value_t *val,\n                                   mod_json_boolean_t bol) {\n  if (val) {\n    mod_json_value_clear(val);\n    val->type = mod_json_type_boolean;\n    val->data.c_bool = bol ? MOD_JSON_TRUE : MOD_JSON_FALSE;\n  }\n}\n\nvoid mod_json_value_assign(mod_json_value_t *dst, mod_json_value_t *src) {\n  if (!dst || dst == src) {\n    return;\n  }\n\n  if (!src) {\n    /* treat as JSON null */\n    mod_json_value_assign_null(dst);\n    return;\n  }\n\n  switch (src->type) {\n    case mod_json_type_boolean:\n      mod_json_value_assign_boolean(dst, src->data.c_bool);\n      break;\n\n    case mod_json_type_integer:\n      mod_json_value_assign_integer(dst, src->data.c_int);\n      break;\n\n    case mod_json_type_float:\n      mod_json_value_assign_float(dst, src->data.c_float);\n      break;\n\n    case mod_json_type_string:\n      mod_json_value_assign_string(dst, src->data.c_str);\n      break;\n\n    case mod_json_type_array:\n      mod_json_value_assign_array(dst, src->data.c_arr);\n      break;\n\n    case mod_json_type_object:\n      mod_json_value_assign_object(dst, src->data.c_obj);\n      break;\n\n    default:\n      mod_json_value_assign_null(dst);\n      break;\n  }\n}\n\nstatic inline int mod_json_value_merge_array(mod_json_value_t *val,\n                                             mod_json_array_t *arr) {\n  if (val->type != mod_json_type_array || !val->data.c_arr) {\n    mod_json_value_assign_array(val, arr);\n    return 0;\n  }\n\n  if (arr) {\n    if (mod_json_array_is_shared(val->data.c_arr)) {\n      mod_json_array_put(val->data.c_arr);\n      val->data.c_arr = mod_json_array_clone(val->data.c_arr);\n    }\n    return mod_json_array_merge(val->data.c_arr, arr);\n  }\n  return 0;\n}\n\nstatic inline int mod_json_value_merge_object(mod_json_value_t *val,\n                                              mod_json_object_t *obj) {\n  if (val->type != mod_json_type_object || !val->data.c_obj) {\n    mod_json_value_assign_object(val, obj);\n    return 0;\n  }\n\n  if (obj) {\n    if (mod_json_object_is_shared(val->data.c_obj)) {\n      mod_json_object_put(val->data.c_obj);\n      val->data.c_obj = mod_json_object_clone(val->data.c_obj);\n    }\n    return mod_json_object_merge(val->data.c_obj, obj);\n  }\n  return 0;\n}\n\nint mod_json_value_merge(mod_json_value_t *dst, mod_json_value_t *src) {\n  mod_json_minus_if_false(dst && dst != src);\n\n  if (!src) {\n    mod_json_value_assign_null(dst);\n    return 0;\n  }\n\n  switch (src->type) {\n    case mod_json_type_boolean:\n      mod_json_value_assign_boolean(dst, src->data.c_bool);\n      break;\n\n    case mod_json_type_integer:\n      mod_json_value_assign_integer(dst, src->data.c_int);\n      break;\n\n    case mod_json_type_float:\n      mod_json_value_assign_float(dst, src->data.c_float);\n      break;\n\n    case mod_json_type_string:\n      mod_json_value_assign_string(dst, src->data.c_str);\n      break;\n\n    case mod_json_type_array:\n      return mod_json_value_merge_array(dst, src->data.c_arr);\n\n    case mod_json_type_object:\n      return mod_json_value_merge_object(dst, src->data.c_obj);\n\n    default:\n      mod_json_value_assign_null(dst);\n      break;\n  }\n  return 0;\n}\n\nmod_json_object_t *mod_json_value_object(mod_json_value_t *val) {\n  if (val && val->type == mod_json_type_object) {\n    return (val->data.c_obj);\n  }\n  return NULL;\n}\n\nmod_json_array_t *mod_json_value_array(mod_json_value_t *val) {\n  if (val && val->type == mod_json_type_array) {\n    return (val->data.c_arr);\n  }\n  return NULL;\n}\n\nmod_json_string_t *mod_json_value_string(mod_json_value_t *val) {\n  if (val && val->type == mod_json_type_string) {\n    return (val->data.c_str);\n  }\n  return NULL;\n}\n\nmod_json_cchar_t *mod_json_value_cstring(mod_json_value_t *val) {\n  if (val && val->type == mod_json_type_string) {\n    return mod_json_string_cstr(val->data.c_str);\n  }\n  return NULL;\n}\n\nmod_json_float_t mod_json_value_float(mod_json_value_t *val) {\n  if (val) {\n    switch (val->type) {\n      case mod_json_type_boolean:\n        return (val->data.c_bool ? 1.0 : 0.0);\n\n      case mod_json_type_integer:\n        return (mod_json_float_t)(val->data.c_int);\n\n      case mod_json_type_float:\n        return (val->data.c_float);\n\n      case mod_json_type_string:\n        return mod_json_string_float(val->data.c_str);\n\n      default:\n        break;\n    }\n  }\n  return (0.0);\n}\n\nmod_json_boolean_t mod_json_value_boolean(mod_json_value_t *val) {\n  if (val) {\n    switch (val->type) {\n      case mod_json_type_null:\n        return MOD_JSON_FALSE;\n\n      case mod_json_type_object:\n        return (mod_json_object_count(val->data.c_obj) != 0);\n\n      case mod_json_type_array:\n        return (mod_json_array_count(val->data.c_arr) != 0);\n\n      case mod_json_type_string:\n        return (mod_json_string_length(val->data.c_str) != 0);\n\n      case mod_json_type_integer:\n        return (val->data.c_int != 0);\n\n      case mod_json_type_float:\n        return (val->data.c_float != 0);\n\n      case mod_json_type_boolean:\n        return (val->data.c_bool);\n\n      default:\n        break;\n    }\n  }\n  return MOD_JSON_FALSE;\n}\n\nmod_json_integer_t mod_json_value_integer(mod_json_value_t *val) {\n  if (val) {\n    switch (val->type) {\n      case mod_json_type_boolean:\n        return (val->data.c_bool ? 1 : 0);\n\n      case mod_json_type_integer:\n        return (val->data.c_int);\n\n      case mod_json_type_float:\n        return (mod_json_integer_t)(val->data.c_float);\n\n      case mod_json_type_string:\n        return mod_json_string_integer(val->data.c_str);\n\n      default:\n        break;\n    }\n  }\n  return (0);\n}\n\nmod_json_value_t *mod_json_value_clone(mod_json_value_t *val) {\n  if (val) {\n    switch (val->type) {\n      case mod_json_type_null:\n        return mod_json_value_set_null();\n\n      case mod_json_type_object:\n        return mod_json_value_set_object(val->data.c_obj);\n\n      case mod_json_type_array:\n        return mod_json_value_set_array(val->data.c_arr);\n\n      case mod_json_type_string:\n        return mod_json_value_set_string(val->data.c_str);\n\n      case mod_json_type_integer:\n        return mod_json_value_set_integer(val->data.c_int);\n\n      case mod_json_type_float:\n        return mod_json_value_set_float(val->data.c_float);\n\n      case mod_json_type_boolean:\n        return mod_json_value_set_boolean(val->data.c_bool);\n\n      default:\n        break;\n    }\n  }\n  return NULL;\n}\n\nstatic inline mod_json_boolean_t mod_json_value_is_equal_float(\n    mod_json_float_t lhs, mod_json_float_t rhs) {\n  mod_json_float_t diff = lhs - rhs;\n  return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));\n}\n\nmod_json_boolean_t mod_json_value_is_equal(mod_json_value_t *lhs,\n                                           mod_json_value_t *rhs) {\n  if (lhs == rhs) {\n    /* The same pointer */\n    return MOD_JSON_TRUE;\n  }\n\n  if (lhs && rhs && lhs->type == rhs->type) {\n    switch (lhs->type) {\n      case mod_json_type_null:\n        return MOD_JSON_TRUE;\n\n      case mod_json_type_object:\n        return mod_json_object_is_equal(lhs->data.c_obj, rhs->data.c_obj);\n\n      case mod_json_type_array:\n        return mod_json_array_is_equal(lhs->data.c_arr, rhs->data.c_arr);\n\n      case mod_json_type_string:\n        return (mod_json_string_compare(lhs->data.c_str, rhs->data.c_str) == 0);\n\n      case mod_json_type_integer:\n        return (lhs->data.c_int == rhs->data.c_int);\n\n      case mod_json_type_float:\n        return mod_json_value_is_equal_float(lhs->data.c_float,\n                                             rhs->data.c_float);\n\n      case mod_json_type_boolean:\n        return ((!lhs->data.c_bool) == (!rhs->data.c_bool));\n\n      default:\n        break;\n    }\n  }\n  return MOD_JSON_FALSE;\n}\n\nvoid mod_json_value_unset(mod_json_value_t *val) {\n  if (val && mod_json_value_put(val) <= 0) {\n    mod_json_value_clear(val);\n    mod_json_free(val);\n  }\n}\n\nstatic inline int mod_json_string_expand(mod_json_string_t *str,\n                                         mod_json_size_t size) {\n  mod_json_char_t *cstr;\n  mod_json_size_t len;\n\n  size = mod_json_utils_clp2(size);\n  if (size < MOD_JSON_STRING_DEFSIZE) {\n    size = MOD_JSON_STRING_DEFSIZE;\n  }\n  mod_json_minus_if_false(size > str->size);\n\n  cstr = (mod_json_char_t *)mod_json_malloc(size * sizeof(mod_json_char_t));\n  mod_json_minus_if_false(cstr);\n\n  len = (mod_json_size_t)(str->last - str->first);\n  if (len != 0) {\n    memcpy(cstr, str->first, len + 1);\n  } else {\n    *cstr = '\\0'; /* terminal character */\n  }\n  mod_json_free(str->first);\n  str->first = cstr;\n  str->last = cstr + len;\n  str->size = size;\n\n  /* success */\n  return 0;\n}\n\nint mod_json_string_reserve(mod_json_string_t *str, mod_json_size_t n) {\n  mod_json_minus_if_false(str);\n\n  if (str->size >= n + 1) {\n    /* needn't grow */\n    return 0;\n  }\n  return mod_json_string_expand(str, n + 1);\n}\n\nstatic inline mod_json_string_t *mod_json_string_malloc(mod_json_size_t size) {\n  mod_json_string_t *str;\n  mod_json_char_t *buf;\n\n  buf = (mod_json_char_t *)mod_json_malloc(size * sizeof(mod_json_char_t));\n  mod_json_null_if_false(buf);\n\n  str = (mod_json_string_t *)mod_json_malloc(sizeof(mod_json_string_t));\n  if (mod_json_unlikely(!str)) {\n    mod_json_free(buf);\n    return NULL;\n  }\n\n  str->refer = 1;\n  str->size = size;\n  str->first = buf;\n  str->last = buf;\n  *buf = '\\0';\n  return str;\n}\n\nint mod_json_string_assign(mod_json_string_t *str, mod_json_cchar_t *cstr,\n                           mod_json_size_t len) {\n  mod_json_string_reset(str);\n  mod_json_minus_if_ne_zero(mod_json_string_reserve(str, len));\n\n  if (cstr && len) {\n    memcpy(str->first, cstr, len);\n  }\n  str->last = str->first + len;\n  *(str->last) = '\\0';\n\n  /* success */\n  return 0;\n}\n\nstatic inline mod_json_string_t *mod_json_string_set_empty(void) {\n  return mod_json_string_malloc(MOD_JSON_STRING_DEFSIZE);\n}\n\nstatic inline mod_json_string_t *mod_json_string_set_cstr(\n    mod_json_cchar_t *cstr, mod_json_size_t len) {\n  mod_json_string_t *str;\n\n  str = mod_json_string_malloc(mod_json_utils_clp2(len + 1));\n  mod_json_null_if_false(str);\n\n  str->last = str->first + len;\n  memcpy(str->first, cstr, len);\n  *(str->last) = '\\0';\n  return str;\n}\n\nmod_json_string_t *mod_json_string_set(mod_json_cchar_t *cstr,\n                                       mod_json_size_t len) {\n  return ((cstr && len) ? mod_json_string_set_cstr(cstr, len)\n                        : mod_json_string_set_empty());\n}\n\nvoid mod_json_string_unset(mod_json_string_t *str) {\n  if (str && mod_json_string_put(str) <= 0) {\n    mod_json_free(str->first);\n    mod_json_free(str);\n  }\n}\n\nvoid mod_json_string_reset(mod_json_string_t *str) {\n  if (str) {\n    str->last = str->first;\n    *(str->first) = '\\0';\n  }\n}\n\nstatic inline int mod_json_string_add_char(mod_json_string_t *str,\n                                           mod_json_char_t ch) {\n  mod_json_size_t need;\n\n  need = (mod_json_size_t)(str->last - str->first) + 2;\n  if (need > str->size) {\n    mod_json_minus_if_ne_zero(mod_json_string_expand(str, need));\n  }\n\n  /* append to string */\n  *(str->last++) = ch;\n  *(str->last) = '\\0';\n\n  /* success */\n  return 0;\n}\n\nstatic inline int mod_json_string_add_cstr(mod_json_string_t *str,\n                                           mod_json_cchar_t *cstr,\n                                           mod_json_size_t len) {\n  if (cstr && len) {\n    mod_json_size_t need;\n\n    need = len + (mod_json_size_t)(str->last - str->first) + 1;\n    if (need > str->size) {\n      mod_json_minus_if_ne_zero(mod_json_string_expand(str, need));\n    }\n\n    /* append to string */\n    memcpy(str->last, cstr, len);\n    str->last += len;\n    *(str->last) = '\\0';\n  }\n\n  /* success */\n  return 0;\n}\n\nstatic inline int mod_json_string_add_jstr(mod_json_string_t *str,\n                                           mod_json_string_t *val) {\n  return mod_json_string_add_cstr(str, val->first,\n                                  (mod_json_size_t)(val->last - val->first));\n}\n\nint mod_json_string_add(mod_json_string_t *str, mod_json_string_t *val) {\n  return mod_json_string_add_jstr(str, val);\n}\n\nint mod_json_string_append(mod_json_string_t *str, mod_json_cchar_t *cstr,\n                           mod_json_size_t len) {\n  return mod_json_string_add_cstr(str, cstr, len);\n}\n\nmod_json_size_t mod_json_string_hash(mod_json_string_t *str) {\n  mod_json_size_t hash = 1;\n\n  if (str) {\n    mod_json_cchar_t *iter = str->first;\n    mod_json_cchar_t *last = str->last;\n\n    for (; iter != last; ++iter) {\n      mod_json_size_t c = (mod_json_size_t)(*iter);\n      hash = hash * 131 + c;\n    }\n  }\n  return hash;\n}\n\nint mod_json_string_compare(mod_json_string_t *str1, mod_json_string_t *str2) {\n  mod_json_size_t len1 = 0, len2 = 0;\n\n  if (str1 == str2) {\n    /* The same pointer */\n    return 0;\n  }\n\n  if (str1) {\n    len1 = (mod_json_size_t)(str1->last - str1->first);\n    if (str2) {\n      len2 = (mod_json_size_t)(str2->last - str2->first);\n      if (len1 == len2) {\n        return memcmp(str1->first, str2->first, len1);\n      }\n    }\n  } else {\n    /* The first string is null, and the second string it not null. */\n    len2 = (mod_json_size_t)(str2->last - str2->first);\n  }\n  return (int)(len1 - len2);\n}\n\nmod_json_integer_t mod_json_string_integer(mod_json_string_t *str) {\n  return (str ? (mod_json_integer_t)mod_json_utils_strtoi(str->first, NULL, 0)\n              : 0);\n}\n\nmod_json_float_t mod_json_string_float(mod_json_string_t *str) {\n  return (str ? mod_json_utils_strtof(str->first, NULL) : 0.0);\n}\n\nstatic inline int mod_json_string_flat(mod_json_string_t *dst,\n                                       mod_json_string_t *src) {\n  static mod_json_cchar_t *flattab[32] = {\n      \"\\\\u0000\", \"\\\\u0001\", \"\\\\u0002\", \"\\\\u0003\", \"\\\\u0004\", \"\\\\u0005\",\n      \"\\\\u0006\", \"\\\\u0007\", \"\\\\b\",     \"\\\\t\",     \"\\\\n\",     \"\\\\u000b\",\n      \"\\\\f\",     \"\\\\r\",     \"\\\\u000e\", \"\\\\u000f\", \"\\\\u0010\", \"\\\\u0011\",\n      \"\\\\u0012\", \"\\\\u0013\", \"\\\\u0014\", \"\\\\u0015\", \"\\\\u0016\", \"\\\\u0017\",\n      \"\\\\u0018\", \"\\\\u0019\", \"\\\\u001a\", \"\\\\u001b\", \"\\\\u001c\", \"\\\\u001d\",\n      \"\\\\u001e\", \"\\\\u001f\"};\n\n  /* length of items in flat table */\n  static const mod_json_uchar_t flatlen[32] = {6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2,\n                                               6, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6,\n                                               6, 6, 6, 6, 6, 6, 6, 6, 6, 6};\n\n  mod_json_cchar_t *first = src->first;\n  mod_json_cchar_t *iter = src->first;\n  mod_json_cchar_t *last = src->last;\n\n  /* the whole string */\n  for (; iter != last; ++iter) {\n    int c = *iter;\n\n    if ((mod_json_uchar_t)c <= 0x1f) {\n      if (iter > first) {\n        mod_json_minus_if_ne_zero(mod_json_string_add_cstr(\n            dst, first, (mod_json_size_t)(iter - first)));\n      }\n      mod_json_minus_if_ne_zero(\n          mod_json_string_add_cstr(dst, flattab[c], flatlen[c]));\n\n      /* skip current character */\n      first = iter + 1;\n    } else if (c == '\\\"' || c == '\\\\') {\n      if (iter > first) {\n        mod_json_minus_if_ne_zero(mod_json_string_add_cstr(\n            dst, first, (mod_json_size_t)(iter - first)));\n      }\n      mod_json_minus_if_ne_zero(mod_json_string_add_char(dst, '\\\\'));\n\n      /* don't skip current character */\n      first = iter;\n    }\n  }\n\n  if (iter > first) {\n    mod_json_minus_if_ne_zero(\n        mod_json_string_add_cstr(dst, first, (mod_json_size_t)(iter - first)));\n  }\n\n  /* success */\n  return 0;\n}\n\nstatic inline int mod_json_string_unflat(mod_json_string_t *dst,\n                                         mod_json_string_t *src) {\n  enum {\n    state_normal,\n    state_rev_slash,\n    state_digit_1,\n    state_digit_2,\n    state_digit_3,\n    state_digit_4\n  } state;\n\n  mod_json_char_t *pbuf = dst->first;\n  mod_json_char_t *pend = dst->first + dst->size;\n  mod_json_cchar_t *iter = src->first;\n  mod_json_cchar_t *last = src->last;\n  mod_json_uchar_t high = 0;\n  mod_json_uchar_t low = 0;\n\n  /* the whole string */\n  for (state = state_normal; iter != last; ++iter) {\n    int c = *iter;\n\n    switch (state) {\n      case state_normal:\n        if (c != '\\\\') {\n          mod_json_minus_if_false(pbuf < pend);\n          *pbuf++ = (mod_json_char_t)c;\n        } else {\n          /* '\\\\' in process */\n          state = state_rev_slash;\n        }\n        break;\n\n      case state_rev_slash:\n        mod_json_minus_if_false(pbuf < pend);\n\n        switch (c) {\n          case '\\\"':\n            state = state_normal;\n            *pbuf++ = '\\\"';\n            break;\n          case '/':\n            state = state_normal;\n            *pbuf++ = '/';\n            break;\n          case 'b':\n            state = state_normal;\n            *pbuf++ = '\\b';\n            break;\n          case 'f':\n            state = state_normal;\n            *pbuf++ = '\\f';\n            break;\n          case '\\\\':\n            state = state_normal;\n            *pbuf++ = '\\\\';\n            break;\n          case 'n':\n            state = state_normal;\n            *pbuf++ = '\\n';\n            break;\n          case 'r':\n            state = state_normal;\n            *pbuf++ = '\\r';\n            break;\n          case 't':\n            state = state_normal;\n            *pbuf++ = '\\t';\n            break;\n          case 'u':\n            state = state_digit_1;\n            break;\n          default:\n            return -1;\n        }\n        break;\n\n      case state_digit_1:\n        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {\n          /* invalid character */\n          return -1;\n        }\n        high = (mod_json_uchar_t)(c << 4);\n        state = state_digit_2;\n        break;\n\n      case state_digit_2:\n        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {\n          /* invalid character */\n          return -1;\n        }\n        high |= (mod_json_uchar_t)c;\n        state = state_digit_3;\n        break;\n\n      case state_digit_3:\n        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {\n          /* invalid character */\n          return -1;\n        }\n        low = (mod_json_uchar_t)(c << 4);\n        state = state_digit_4;\n        break;\n\n      case state_digit_4:\n        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {\n          /* invalid character */\n          return -1;\n        }\n        low |= (mod_json_uchar_t)c;\n\n        /* decode as a UTF-8 string */\n        pbuf = mod_json_utils_uni2utf8(pbuf, (mod_json_size_t)(pend - pbuf),\n                                       high, low);\n        if (!pbuf) {\n          /* lack of buffer */\n          return -1;\n        }\n        state = state_normal;\n        break;\n    }\n  }\n\n  if (state != state_normal) {\n    /* uncompleted state */\n    return -1;\n  }\n  mod_json_minus_if_false(pbuf < pend);\n\n  /* update the last pointer */\n  *(dst->last = pbuf) = '\\0';\n\n  /* success */\n  return 0;\n}\n\nmod_json_string_t *mod_json_string_encode(mod_json_string_t *src) {\n  mod_json_string_t *dst;\n  mod_json_null_if_false(src);\n\n  dst = mod_json_string_malloc(\n      mod_json_utils_clp2((mod_json_size_t)(src->last - src->first) + 1));\n  mod_json_null_if_false(dst);\n\n  if (mod_json_unlikely(mod_json_string_flat(dst, src) != 0)) {\n    mod_json_string_unset(dst);\n    return NULL;\n  }\n  return dst;\n}\n\nmod_json_string_t *mod_json_string_decode(mod_json_string_t *src) {\n  mod_json_string_t *dst;\n  mod_json_null_if_false(src);\n\n  dst = mod_json_string_malloc(\n      mod_json_utils_clp2((mod_json_size_t)(src->last - src->first) + 1));\n  mod_json_null_if_false(dst);\n\n  if (mod_json_unlikely(mod_json_string_unflat(dst, src) != 0)) {\n    mod_json_string_unset(dst);\n    return NULL;\n  }\n  return dst;\n}\n\nmod_json_array_t *mod_json_array_set(mod_json_size_t size) {\n  mod_json_array_t *arr;\n  mod_json_value_t **buf;\n\n  size = (size ? mod_json_utils_clp2(size) : MOD_JSON_ARRAY_DEFSIZE);\n  buf = (mod_json_value_t **)mod_json_malloc(size * sizeof(mod_json_value_t *));\n  mod_json_null_if_false(buf);\n\n  /* create an array */\n  arr = (mod_json_array_t *)mod_json_malloc(sizeof(mod_json_array_t));\n  if (mod_json_unlikely(!arr)) {\n    mod_json_free(buf);\n    return NULL;\n  }\n\n  arr->refer = 1;\n  arr->size = size;\n  arr->first = buf;\n  arr->last = buf;\n  return arr;\n}\n\nmod_json_array_t *mod_json_array_clone(mod_json_array_t *arr) {\n  mod_json_array_t *arr2 = NULL;\n\n  if (arr) {\n    arr2 = mod_json_array_set((mod_json_size_t)(arr->last - arr->first));\n    if (arr2) {\n      mod_json_value_t **iter = arr->first;\n\n      /* clone items */\n      for (; iter != arr->last; ++iter) {\n        *arr2->last++ = *iter ? mod_json_value_grab(*iter) : NULL;\n      }\n    }\n  }\n  return arr2;\n}\n\nmod_json_boolean_t mod_json_array_is_equal(mod_json_array_t *lhs,\n                                           mod_json_array_t *rhs) {\n  mod_json_value_t **itl, **itr;\n\n  if (lhs == rhs) {\n    return MOD_JSON_TRUE;\n  }\n\n  if (!lhs || !rhs || ((lhs->last - lhs->first) != (rhs->last - rhs->first))) {\n    return MOD_JSON_FALSE;\n  }\n\n  /* compare items */\n  for (itl = lhs->first, itr = rhs->first; itl != lhs->last; ++itl, ++itr) {\n    if (!mod_json_value_is_equal(*itl, *itr)) {\n      return MOD_JSON_FALSE;\n    }\n  }\n  return MOD_JSON_TRUE;\n}\n\nvoid mod_json_array_unset(mod_json_array_t *arr) {\n  if (arr && mod_json_array_put(arr) <= 0) {\n    mod_json_value_t **iter = arr->first;\n\n    for (; iter != arr->last; ++iter) {\n      mod_json_value_unset(*iter);\n    }\n    mod_json_free(arr->first);\n    mod_json_free(arr);\n  }\n}\n\nvoid mod_json_array_reset(mod_json_array_t *arr) {\n  if (arr) {\n    mod_json_value_t **iter = arr->first;\n\n    for (; iter != arr->last; ++iter) {\n      mod_json_value_unset(*iter);\n    }\n    arr->last = arr->first;\n  }\n}\n\nstatic inline void mod_json_array_migrate(mod_json_array_t *arr,\n                                          mod_json_value_t **buf,\n                                          mod_json_size_t size) {\n  mod_json_size_t count = (mod_json_size_t)(arr->last - arr->first);\n  if (count > 0) {\n    memcpy(buf, arr->first, count * sizeof(mod_json_value_t *));\n  }\n  mod_json_free(arr->first);\n\n  arr->first = buf;\n  arr->last = buf + count;\n  arr->size = size;\n}\n\nstatic inline int mod_json_array_expand(mod_json_array_t *arr,\n                                        mod_json_size_t n) {\n  mod_json_size_t size;\n  mod_json_value_t **vals;\n\n  size = mod_json_utils_clp2(n);\n  if (size < MOD_JSON_ARRAY_DEFSIZE) {\n    size = MOD_JSON_ARRAY_DEFSIZE;\n  }\n  mod_json_minus_if_false(size > arr->size);\n\n  vals =\n      (mod_json_value_t **)mod_json_malloc(size * sizeof(mod_json_value_t *));\n  mod_json_minus_if_false(vals);\n\n  /* use new buffer */\n  mod_json_array_migrate(arr, vals, size);\n\n  /* success */\n  return 0;\n}\n\nint mod_json_array_reserve(mod_json_array_t *arr, mod_json_size_t n) {\n  mod_json_minus_if_false(arr);\n\n  if (arr->size >= n) {\n    /* needn't grow */\n    return 0;\n  }\n  return mod_json_array_expand(arr, n);\n}\n\nvoid mod_json_array_reverse(mod_json_array_t *arr) {\n  if (arr) {\n    mod_json_value_t **first = arr->first;\n    mod_json_value_t **last = arr->last - 1;\n\n    while (first < last) {\n      mod_json_value_t *temp = *first;\n      *first++ = *last;\n      *last-- = temp;\n    }\n  }\n}\n\nint mod_json_array_push(mod_json_array_t *arr, mod_json_value_t *val) {\n  mod_json_size_t count;\n  mod_json_minus_if_false(arr);\n\n  count = (mod_json_size_t)(arr->last - arr->first);\n  if (count >= arr->size) {\n    mod_json_minus_if_ne_zero(mod_json_array_expand(arr, count + 1));\n  }\n\n  *arr->last++ = val ? mod_json_value_grab(val) : NULL;\n  return 0;\n}\n\nvoid mod_json_array_pop(mod_json_array_t *arr) {\n  if (arr && arr->first != arr->last) {\n    mod_json_value_unset(*(--arr->last));\n  }\n}\n\nvoid mod_json_array_shift(mod_json_array_t *arr) {\n  if (arr && arr->first != arr->last) {\n    mod_json_value_t **it = arr->first;\n    mod_json_value_t **last = --arr->last;\n\n    mod_json_value_unset(*it++);\n    for (; it <= last; ++it) {\n      *(it - 1) = *it;\n    }\n  }\n}\n\nmod_json_value_t *mod_json_array_at(mod_json_array_t *arr, mod_json_size_t id) {\n  if (arr && ((arr->first + id) < arr->last)) {\n    return (arr->first[id]);\n  }\n  return NULL;\n}\n\nint mod_json_array_merge(mod_json_array_t *dst, mod_json_array_t *src) {\n  long count, len1, len2;\n\n  mod_json_minus_if_false(dst && src && dst != src);\n\n  /* update length of array */\n  len1 = (mod_json_size_t)(src->last - src->first);\n  len2 = (mod_json_size_t)(dst->last - dst->first);\n  mod_json_minus_if_false(len1 >= 0 && len2 >= 0);\n\n  /* append empty values */\n  count = len1 - len2;\n  for (; count > 0; --count) {\n    mod_json_array_push(dst, NULL);\n  }\n\n  /* It must be assigned again. */\n  len2 = (mod_json_size_t)(dst->last - dst->first);\n  count = (len1 < len2 ? len1 : len2);\n\n  while ((count--) > 0) {\n    mod_json_value_t **iter1 = src->first + count;\n    mod_json_value_t **iter2 = dst->first + count;\n\n    if (!(*iter2)) {\n      *iter2 = *iter1 ? mod_json_value_grab(*iter1) : NULL;\n      continue;\n    }\n\n    if (mod_json_value_is_shared(*iter2)) {\n      mod_json_value_put(*iter2);\n      *iter2 = mod_json_value_clone(*iter2);\n    }\n    mod_json_value_merge(*iter2, *iter1);\n  }\n\n  /* success */\n  return 0;\n}\n\nint mod_json_array_resize(mod_json_array_t *arr, mod_json_size_t n,\n                          mod_json_value_t *val) {\n  mod_json_size_t orig;\n\n  /* check input */\n  mod_json_minus_if_false(arr);\n\n  /* original count of array */\n  orig = (mod_json_size_t)(arr->last - arr->first);\n\n  if (orig < n) {\n    mod_json_value_t **iter;\n\n    if (arr->size < n) {\n      mod_json_minus_if_ne_zero(mod_json_array_expand(arr, n));\n    }\n\n    iter = arr->last;\n    arr->last = arr->first + n;\n\n    /* grab the first one, but get the others */\n    *iter++ = val = val ? mod_json_value_grab(val) : NULL;\n    for (; iter != arr->last; ++iter) {\n      *iter = val ? mod_json_value_get(val) : NULL;\n    }\n  } else if (orig > n) {\n    mod_json_value_t **iter = arr->first + n;\n\n    for (; iter != arr->last; ++iter) {\n      mod_json_value_unset(*iter);\n      *iter = NULL;\n    }\n    arr->last = arr->first + n;\n  }\n\n  /* success */\n  return 0;\n}\n\nstatic inline void mod_json_pair_init(mod_json_pair_t *pair,\n                                      mod_json_string_t *key,\n                                      mod_json_value_t *val) {\n  pair->key = mod_json_string_grab(key);\n  pair->val = val ? mod_json_value_grab(val) : NULL;\n}\n\nstatic inline void mod_json_pair_cleanup(mod_json_pair_t *pair) {\n  mod_json_string_unset(pair->key);\n  mod_json_value_unset(pair->val);\n  pair->key = NULL;\n  pair->val = NULL;\n}\n\nmod_json_object_t *mod_json_object_set(mod_json_size_t size) {\n  mod_json_object_t *obj;\n  mod_json_pair_t *buf;\n\n  size = (size ? mod_json_utils_clp2(size) : MOD_JSON_OBJECT_DEFSIZE);\n  buf = (mod_json_pair_t *)mod_json_malloc(size * sizeof(mod_json_pair_t));\n  mod_json_null_if_false(buf);\n\n  /* create a object */\n  obj = (mod_json_object_t *)mod_json_malloc(sizeof(mod_json_object_t));\n  if (mod_json_unlikely(!obj)) {\n    mod_json_free(buf);\n    return NULL;\n  }\n\n  obj->refer = 1;\n  obj->size = size;\n  obj->first = buf;\n  obj->last = buf;\n  return obj;\n}\n\nvoid mod_json_object_unset(mod_json_object_t *obj) {\n  if (obj && mod_json_object_put(obj) <= 0) {\n    mod_json_pair_t *iter = obj->first;\n\n    for (; iter != obj->last; ++iter) {\n      mod_json_pair_cleanup(iter);\n    }\n    mod_json_free(obj->first);\n    mod_json_free(obj);\n  }\n}\n\nvoid mod_json_object_reset(mod_json_object_t *obj) {\n  if (obj) {\n    mod_json_pair_t *iter = obj->first;\n\n    for (; iter != obj->last; ++iter) {\n      mod_json_pair_cleanup(iter);\n    }\n    obj->last = obj->first;\n  }\n}\n\nstatic inline void mod_json_object_migrate(mod_json_object_t *obj,\n                                           mod_json_pair_t *buf,\n                                           mod_json_size_t size) {\n  mod_json_size_t count = (mod_json_size_t)(obj->last - obj->first);\n  if (count > 0) {\n    memcpy(buf, obj->first, count * sizeof(mod_json_pair_t));\n  }\n  mod_json_free(obj->first);\n\n  obj->first = buf;\n  obj->last = buf + count;\n  obj->size = size;\n}\n\nstatic inline int mod_json_object_expand(mod_json_object_t *obj,\n                                         mod_json_size_t n) {\n  mod_json_size_t size;\n  mod_json_pair_t *buf;\n\n  size = mod_json_utils_clp2(n);\n  if (size < MOD_JSON_OBJECT_DEFSIZE) {\n    size = MOD_JSON_OBJECT_DEFSIZE;\n  }\n  mod_json_minus_if_false(size > obj->size);\n\n  buf = (mod_json_pair_t *)mod_json_malloc(size * sizeof(mod_json_pair_t));\n  mod_json_minus_if_false(buf);\n\n  /* use new buffer */\n  mod_json_object_migrate(obj, buf, size);\n\n  /* success */\n  return 0;\n}\n\nstatic inline mod_json_pair_t *mod_json_object_find_pair(mod_json_object_t *obj,\n                                                         mod_json_string_t *key,\n                                                         mod_json_size_t *out) {\n  mod_json_pair_t *first = obj->first;\n  mod_json_pair_t *last = obj->last;\n\n  while (first < last) {\n    mod_json_pair_t *middle = first + ((last - first) >> 2);\n    int diff = mod_json_string_compare(middle->key, key);\n\n    if (diff < 0) {\n      first = middle + 1;\n    } else if (diff > 0) {\n      last = middle;\n    } else /*if (diff == 0)*/\n    {\n      *out = (mod_json_size_t)(middle - obj->first);\n      return middle;\n    }\n  }\n  *out = (mod_json_size_t)(first - obj->first);\n  return NULL;\n}\n\nmod_json_pair_t *mod_json_object_insert_force(mod_json_object_t *obj,\n                                              mod_json_size_t npos,\n                                              mod_json_string_t *key,\n                                              mod_json_value_t *val) {\n  mod_json_pair_t *iter, *pos;\n  mod_json_size_t count;\n\n  count = (mod_json_size_t)(obj->last - obj->first);\n  if (count >= obj->size) {\n    mod_json_null_if_ne_zero(mod_json_object_expand(obj, count + 1));\n  }\n\n  pos = obj->first + npos;\n  iter = obj->last++;\n  for (; iter != pos; --iter) {\n    mod_json_pair_t *prev = iter - 1;\n    iter->key = prev->key;\n    iter->val = prev->val;\n  }\n  mod_json_pair_init(pos, key, val);\n  return pos;\n}\n\nmod_json_pair_t *mod_json_object_insert(mod_json_object_t *obj,\n                                        mod_json_string_t *key,\n                                        mod_json_value_t *val) {\n  mod_json_size_t npos;\n  mod_json_null_if_false(obj && key);\n\n  if (mod_json_object_find_pair(obj, key, &npos)) {\n    /* One in object */\n    return NULL;\n  }\n  return mod_json_object_insert_force(obj, npos, key, val);\n}\n\nmod_json_pair_t *mod_json_object_assign(mod_json_object_t *obj,\n                                        mod_json_string_t *key,\n                                        mod_json_value_t *val) {\n  mod_json_pair_t *elem = NULL;\n\n  if (obj && key) {\n    mod_json_size_t npos;\n\n    elem = mod_json_object_find_pair(obj, key, &npos);\n    if (elem) {\n      if (!elem->val) {\n        elem->val = val ? mod_json_value_grab(val) : NULL;\n      } else {\n        /* overwrite the old value */\n        mod_json_value_assign(elem->val, val);\n      }\n    } else {\n      /* insert a new one */\n      elem = mod_json_object_insert_force(obj, npos, key, val);\n    }\n  }\n  return elem;\n}\n\nmod_json_pair_t *mod_json_object_touch(mod_json_object_t *obj,\n                                       mod_json_cchar_t *key) {\n  mod_json_pair_t *elem = NULL;\n\n  if (obj && key) {\n    mod_json_string_t str;\n    mod_json_size_t npos;\n\n    str.first = (mod_json_char_t *)key;\n    str.last = str.first + mod_json_utils_strlen(key);\n\n    elem = mod_json_object_find_pair(obj, &str, &npos);\n    if (!elem) {\n      mod_json_string_t *jkey;\n\n      /* insert a new one */\n      jkey =\n          mod_json_string_set(key, (mod_json_size_t)mod_json_utils_strlen(key));\n      elem = mod_json_object_insert_force(obj, npos, jkey, NULL);\n      mod_json_string_unset(jkey);\n    }\n  }\n  return elem;\n}\n\nmod_json_object_t *mod_json_object_clone(mod_json_object_t *obj) {\n  mod_json_object_t *obj2 = NULL;\n\n  if (obj) {\n    obj2 = mod_json_object_set((mod_json_size_t)(obj->last - obj->first));\n    if (obj2) {\n      mod_json_pair_t *iter = obj->first;\n\n      /* clone items */\n      for (; iter != obj->last; ++iter) {\n        mod_json_pair_init(obj2->last++, iter->key, iter->val);\n      }\n    }\n  }\n  return obj2;\n}\n\nmod_json_boolean_t mod_json_object_is_equal(mod_json_object_t *lhs,\n                                            mod_json_object_t *rhs) {\n  mod_json_pair_t *itl, *itr;\n\n  if (lhs == rhs) {\n    /* The same pointer */\n    return MOD_JSON_TRUE;\n  }\n\n  if (!lhs || !rhs || ((lhs->last - lhs->first) != (rhs->last - rhs->first))) {\n    return MOD_JSON_FALSE;\n  }\n\n  /* compare items */\n  for (itl = lhs->first, itr = rhs->first; itl != lhs->last; ++itl, ++itr) {\n    if ((mod_json_string_compare(itl->key, itr->key) != 0) ||\n        (!mod_json_value_is_equal(itl->val, itr->val))) {\n      return MOD_JSON_FALSE;\n    }\n  }\n  return MOD_JSON_TRUE;\n}\n\nvoid mod_json_object_erase(mod_json_object_t *obj, mod_json_cchar_t *key) {\n  if (obj && key) {\n    mod_json_string_t str;\n    mod_json_pair_t *iter;\n    mod_json_size_t npos;\n\n    str.first = (mod_json_char_t *)key;\n    str.last = str.first + mod_json_utils_strlen(key);\n\n    iter = mod_json_object_find_pair(obj, &str, &npos);\n    if (iter) {\n      mod_json_pair_cleanup(iter++);\n\n      for (; iter != obj->last; ++iter) {\n        mod_json_pair_t *prev = iter - 1;\n        prev->key = iter->key;\n        prev->val = iter->val;\n      }\n      --obj->last;\n    }\n  }\n}\n\nmod_json_value_t *mod_json_object_at(mod_json_object_t *obj,\n                                     mod_json_cchar_t *key) {\n  if (obj && key) {\n    mod_json_string_t str;\n    mod_json_pair_t *elem;\n    mod_json_size_t npos;\n\n    str.first = (mod_json_char_t *)key;\n    str.last = str.first + mod_json_utils_strlen(key);\n\n    elem = mod_json_object_find_pair(obj, &str, &npos);\n    if (elem) {\n      return (elem->val);\n    }\n  }\n  return NULL;\n}\n\nmod_json_pair_t *mod_json_object_find(mod_json_object_t *obj,\n                                      mod_json_cchar_t *key) {\n  if (obj && key) {\n    mod_json_string_t str;\n    mod_json_size_t npos;\n\n    str.first = (mod_json_char_t *)key;\n    str.last = str.first + mod_json_utils_strlen(key);\n\n    return mod_json_object_find_pair(obj, &str, &npos);\n  }\n  return NULL;\n}\n\nint mod_json_object_merge(mod_json_object_t *dst, mod_json_object_t *src) {\n  mod_json_pair_t *iter;\n\n  mod_json_minus_if_false(dst && src && dst != src);\n\n  for (iter = src->first; iter != src->last; ++iter) {\n    mod_json_pair_t *elem;\n    mod_json_size_t npos;\n\n    elem = mod_json_object_find_pair(dst, iter->key, &npos);\n    if (!elem) {\n      /* insert a new one */\n      mod_json_object_insert_force(dst, npos, iter->key, iter->val);\n      continue;\n    }\n\n    if (!elem->val) {\n      elem->val = iter->val ? mod_json_value_grab(iter->val) : NULL;\n      continue;\n    }\n\n    if (mod_json_value_is_shared(elem->val)) {\n      mod_json_value_put(elem->val);\n      elem->val = mod_json_value_clone(elem->val);\n    }\n    mod_json_value_merge(elem->val, iter->val);\n  }\n  return 0;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_strskp(mod_json_token_t *tok,\n                                                      mod_json_cchar_t *cstr) {\n  if ((tok->options & MOD_JSON_COMMENT) == 0) {\n    return mod_json_utils_strskpb(cstr);\n  }\n  return mod_json_utils_strskp(cstr);\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_strfquo(mod_json_token_t *tok,\n                                                       mod_json_cchar_t *cstr,\n                                                       mod_json_char_t quo) {\n  if ((tok->options & MOD_JSON_UNSTRICT) == 0) {\n    return mod_json_utils_strfquo(cstr, quo);\n  }\n  return mod_json_utils_strfquo2(cstr, quo);\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_strfsep(mod_json_token_t *tok,\n                                                       mod_json_cchar_t *cstr) {\n  if ((tok->options & MOD_JSON_COMMENT) == 0) {\n    return mod_json_utils_strfsep(cstr);\n  }\n  return mod_json_utils_strfsep2(cstr);\n}\n\nmod_json_token_t *mod_json_token_create(mod_json_option_t *opt) {\n  mod_json_token_t *tok;\n  mod_json_size_t opts = MOD_JSON_TOKEN_DEFOPTS;\n  mod_json_size_t mobj = MOD_JSON_TOKEN_DEFOBJDEP;\n  mod_json_size_t marr = MOD_JSON_TOKEN_DEFARRDEP;\n\n  if (opt) {\n    opts = opt->options;\n\n    if (opt->object_depth > 0) {\n      mobj = opt->object_depth;\n    }\n    if (opt->array_depth > 0) {\n      marr = opt->array_depth;\n    }\n  }\n\n  tok = (mod_json_token_t *)mod_json_malloc(\n      (mobj + marr) * sizeof(mod_json_char_t) + sizeof(mod_json_token_t));\n  mod_json_null_if_false(tok);\n\n  memset(tok, 0, sizeof(mod_json_token_t));\n  tok->state = mod_json_state_null;\n  tok->error = mod_json_error_null;\n  tok->options = opts;\n  tok->object_max_depth = mobj;\n  tok->array_max_depth = marr;\n  return tok;\n}\n\nvoid mod_json_token_destroy(mod_json_token_t *tok) {\n  mod_json_free(tok);\n}\n\nstatic inline void mod_json_token_set_tag(mod_json_token_t *tok,\n                                          mod_json_char_t tag) {\n  mod_json_size_t depth = tok->object_depth + tok->array_depth;\n  if (depth != 0) {\n    tok->tags[depth - 1] = tag;\n  }\n}\n\nstatic inline mod_json_char_t mod_json_token_tag(mod_json_token_t *tok) {\n  mod_json_size_t depth = tok->object_depth + tok->array_depth;\n\n  /* type of current depth */\n  return (depth ? tok->tags[depth - 1] : (mod_json_char_t)-1);\n}\n\nmod_json_error_t mod_json_token_error(mod_json_token_t *tok) {\n  return (tok->error);\n}\n\nmod_json_cchar_t *mod_json_token_context(mod_json_token_t *tok) {\n  return (tok->context);\n}\n\nmod_json_state_t mod_json_token_state(mod_json_token_t *tok) {\n  return (tok->state);\n}\n\nmod_json_size_t mod_json_token_object_depth(mod_json_token_t *tok) {\n  return (tok->object_depth);\n}\n\nmod_json_size_t mod_json_token_array_depth(mod_json_token_t *tok) {\n  return (tok->array_depth);\n}\n\nmod_json_size_t mod_json_token_depth(mod_json_token_t *tok) {\n  return (tok->object_depth + tok->array_depth);\n}\n\nmod_json_size_t mod_json_token_max_object_depth(mod_json_token_t *tok) {\n  return (tok->object_max_depth);\n}\n\nmod_json_size_t mod_json_token_max_array_depth(mod_json_token_t *tok) {\n  return (tok->array_max_depth);\n}\n\nmod_json_size_t mod_json_token_max_depth(mod_json_token_t *tok) {\n  return (tok->object_max_depth + tok->array_max_depth);\n}\n\nmod_json_void_t *mod_json_token_param(mod_json_token_t *tok) {\n  return (tok->param);\n}\n\nvoid mod_json_token_set_param(mod_json_token_t *tok, mod_json_void_t *param) {\n  tok->param = param;\n}\n\nvoid mod_json_token_set_event(mod_json_token_t *tok, mod_json_event_proc proc) {\n  tok->event_proc = proc;\n}\n\nmod_json_event_t mod_json_token_event(mod_json_token_t *tok) {\n  return (tok->event_code);\n}\n\nstatic inline int mod_json_token_invoke_field(mod_json_token_t *tok,\n                                              mod_json_cchar_t *val,\n                                              mod_json_size_t len) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_field;\n    return invoke(tok, (mod_json_void_t *)val, len);\n  }\n  return 0;\n}\n\nstatic inline int mod_json_token_invoke_object(mod_json_token_t *tok) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_object;\n    return invoke(tok, NULL, 0);\n  }\n  return 0;\n}\n\nstatic inline int mod_json_token_invoke_array(mod_json_token_t *tok) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_array;\n    return invoke(tok, NULL, 0);\n  }\n  return 0;\n}\n\nstatic inline int mod_json_token_invoke_null(mod_json_token_t *tok) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_null;\n    return invoke(tok, NULL, 0);\n  }\n  return 0;\n}\n\nstatic inline int mod_json_token_invoke_boolean(mod_json_token_t *tok,\n                                                mod_json_boolean_t val) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_boolean;\n    return invoke(tok, &val, sizeof(val));\n  }\n  return 0;\n}\n\nstatic inline int mod_json_token_invoke_integer(mod_json_token_t *tok,\n                                                mod_json_integer_t val) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_integer;\n    return invoke(tok, &val, sizeof(val));\n  }\n  return 0;\n}\n\nstatic inline int mod_json_token_invoke_float(mod_json_token_t *tok,\n                                              mod_json_float_t val) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_float;\n    return invoke(tok, &val, sizeof(val));\n  }\n  return 0;\n}\n\nstatic inline int mod_json_token_invoke_string(mod_json_token_t *tok,\n                                               mod_json_cchar_t *val,\n                                               mod_json_size_t len) {\n  mod_json_event_proc invoke = tok->event_proc;\n  if (invoke) {\n    tok->event_code = mod_json_event_string;\n    return invoke(tok, (mod_json_void_t *)val, len);\n  }\n  return 0;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_start(mod_json_token_t *tok,\n                                                     mod_json_cchar_t *cstr) {\n  cstr = mod_json_token_strskp(tok, cstr);\n  switch (*cstr) {\n    case '{':\n      tok->state = mod_json_state_object_start;\n      return (cstr + 1);\n\n    case '[':\n      tok->state = mod_json_state_array_start;\n      return (cstr + 1);\n\n    case '\\0':\n      tok->error = mod_json_error_empty;\n      tok->context = cstr;\n      break;\n\n    default:\n      tok->error = mod_json_error_start;\n      tok->context = cstr;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_value_null(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  mod_json_char_t c1 = *(cstr + 1);\n  mod_json_char_t c2 = *(cstr + 2);\n  mod_json_char_t c3 = *(cstr + 3);\n\n  if ((c1 != 'u' && c1 != 'U') || (c2 != 'l' && c2 != 'L') ||\n      (c3 != 'l' && c3 != 'L')) {\n    tok->error = mod_json_error_value;\n    tok->context = cstr;\n    return NULL;\n  }\n\n  if (mod_json_token_invoke_null(tok) != 0) {\n    tok->error = mod_json_error_break;\n    tok->context = cstr;\n    return NULL;\n  }\n  return (cstr + 4);\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_value_true(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  mod_json_char_t c1 = *(cstr + 1);\n  mod_json_char_t c2 = *(cstr + 2);\n  mod_json_char_t c3 = *(cstr + 3);\n\n  if ((c1 != 'r' && c1 != 'R') || (c2 != 'u' && c2 != 'U') ||\n      (c3 != 'e' && c3 != 'E')) {\n    tok->error = mod_json_error_value;\n    tok->context = cstr;\n    return NULL;\n  }\n\n  if (mod_json_token_invoke_boolean(tok, MOD_JSON_TRUE) != 0) {\n    tok->error = mod_json_error_break;\n    tok->context = cstr;\n    return NULL;\n  }\n  return (cstr + 4);\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_value_false(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  mod_json_char_t c1 = *(cstr + 1);\n  mod_json_char_t c2 = *(cstr + 2);\n  mod_json_char_t c3 = *(cstr + 3);\n  mod_json_char_t c4 = *(cstr + 4);\n\n  if ((c1 != 'a' && c1 != 'A') || (c2 != 'l' && c2 != 'L') ||\n      (c3 != 's' && c3 != 'S') || (c4 != 'e' && c4 != 'E')) {\n    tok->error = mod_json_error_value;\n    tok->context = cstr;\n    return NULL;\n  }\n\n  if (mod_json_token_invoke_boolean(tok, MOD_JSON_FALSE) != 0) {\n    tok->error = mod_json_error_break;\n    tok->context = cstr;\n    return NULL;\n  }\n  return (cstr + 5);\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_value_infinity(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  mod_json_char_t c1 = *(cstr + 1);\n  mod_json_char_t c2 = *(cstr + 2);\n\n  if ((c1 != 'n' && c1 != 'N') || (c2 != 'f' && c2 != 'F')) {\n    tok->error = mod_json_error_value;\n    tok->context = cstr;\n    return NULL;\n  }\n\n  if (mod_json_token_invoke_float(tok, MOD_JSON_INFINITY) != 0) {\n    tok->error = mod_json_error_break;\n    tok->context = cstr;\n    return NULL;\n  }\n  return (cstr + 3);\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_value_string(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr, mod_json_char_t quo) {\n  mod_json_cchar_t *cstr2 = mod_json_token_strfquo(tok, ++cstr, quo);\n  if (!cstr2) {\n    tok->error = mod_json_error_quote;\n    tok->context = cstr;\n    return NULL;\n  }\n\n  if (mod_json_token_invoke_string(tok, cstr,\n                                   (mod_json_size_t)(cstr2 - cstr)) != 0) {\n    tok->error = mod_json_error_break;\n    tok->context = cstr;\n    return NULL;\n  }\n  return (cstr2 + 1);\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_value_number(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  enum { number_integer, number_float } num_type = number_integer;\n\n  mod_json_float_t dbl = 0.0;\n  uint32_t dig = 0;\n  uint64_t u64 = 0;\n  int32_t minus = 0;\n  int32_t exp_frac = 0, exp = 0;\n\n  /* Parse minus */\n  minus = *cstr;\n  if (minus == '-' || minus == '+') {\n    ++cstr;\n  }\n\n  /* The first digit */\n  if ((dig = (uint32_t)(*cstr - '0')) > 9) {\n    return NULL;\n  }\n\n  /* Save the first digit */\n  u64 = dig;\n\n  /* Parse as 64bit integer */\n  if (minus != '-') {\n    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {\n      if (u64 >= 1844674407370955161uLL) {\n        /* 2^64 - 1 = 18446744073709551615 */\n        if (u64 != 1844674407370955161uLL || dig > 5) {\n          dbl = (mod_json_float_t)u64 * 10 + dig;\n          num_type = number_float;\n          break;\n        }\n      }\n      u64 = u64 * 10 + dig;\n    }\n  } else {\n    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {\n      /* 2^63 = 9223372036854775808 */\n      if (u64 >= 922337203685477580uLL) {\n        if (u64 != 922337203685477580uLL || dig > 8) {\n          dbl = (mod_json_float_t)u64 * 10 + dig;\n          num_type = number_float;\n          break;\n        }\n      }\n      u64 = u64 * 10 + dig;\n    }\n  }\n\n  /* Force double for big integer */\n  if (num_type == number_float) {\n    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {\n      if (dbl >= 1E307) {\n        /* Number too big to store in double */\n        return NULL;\n      }\n      dbl = dbl * 10 + dig;\n    }\n  }\n\n  /* Parse frac = decimal-point 1*DIGIT */\n  if (*cstr == '.') {\n    if (num_type != number_float) {\n      dbl = (mod_json_float_t)u64;\n      num_type = number_float;\n    }\n\n    if ((dig = (uint32_t)(*(++cstr) - '0')) > 9) {\n      /* At least one digit in fraction part */\n      return NULL;\n    }\n\n    dbl = dbl * 10 + dig;\n    --exp_frac;\n\n    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {\n      if (exp_frac > -16) {\n        dbl = dbl * 10 + dig;\n        --exp_frac;\n      }\n    }\n  }\n\n  /* Parse exp = e [ minus / plus ] 1*DIGIT */\n  if (*cstr == 'e' || *cstr == 'E') {\n    int32_t exp_minus = 0;\n\n    if (num_type != number_float) {\n      dbl = (mod_json_float_t)u64;\n      num_type = number_float;\n    }\n\n    exp_minus = *(++cstr);\n    if (exp_minus == '-' || exp_minus == '+') {\n      ++cstr;\n    }\n\n    /* The first number char after 'e/E' */\n    if ((dig = (uint32_t)(*cstr - '0')) > 9) {\n      return NULL;\n    }\n    exp = (int32_t)dig;\n\n    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {\n      exp = exp * 10 + (int32_t)dig;\n      if (exp > 308) {\n        /* Number too big to store in double */\n        return NULL;\n      }\n    }\n\n    if (exp_minus == '-') {\n      exp = -exp;\n    }\n  }\n\n  /* Finish parsing, call event according to the type of number. */\n  if (num_type == number_float) {\n    dbl *= mod_json_utils_pow10(exp + exp_frac);\n    if (minus == '-') {\n      dbl = -dbl;\n    }\n    if (mod_json_token_invoke_float(tok, dbl) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n  } else {\n    if (minus == '-') {\n      u64 = (uint64_t)(-(int64_t)u64);\n    }\n    if (mod_json_token_invoke_integer(tok, (mod_json_integer_t)u64) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n  }\n  return cstr;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_array_start(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  if (tok->array_depth < tok->array_max_depth) {\n    /* callback */\n    if (mod_json_token_invoke_array(tok) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n\n    /* increase depth */\n    ++tok->array_depth;\n\n    /* push current tag */\n    mod_json_token_set_tag(tok, '[');\n\n    cstr = mod_json_token_strskp(tok, cstr);\n    switch (*cstr) {\n      case '[':\n        tok->state = mod_json_state_array_start;\n        return (cstr + 1);\n\n      case ']':\n        tok->state = mod_json_state_array_finish;\n        return (cstr + 1);\n\n      case '\\0':\n        tok->error = mod_json_error_trunc;\n        tok->context = cstr;\n        break;\n\n      default:\n        tok->state = mod_json_state_array_half;\n        return (cstr);\n    }\n  } else {\n    tok->error = mod_json_error_depth;\n    tok->context = cstr;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_array_half(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  cstr = mod_json_token_strskp(tok, cstr);\n  switch (*cstr) {\n    case ',':\n      tok->state = mod_json_state_array_half;\n      return (cstr + 1);\n\n    case '[':\n      tok->state = mod_json_state_array_start;\n      return (cstr + 1);\n\n    case ']':\n      tok->state = mod_json_state_array_finish;\n      return (cstr + 1);\n\n    case '{':\n      tok->state = mod_json_state_object_start;\n      return (cstr + 1);\n\n    case '\\0':\n      tok->error = mod_json_error_trunc;\n      tok->context = cstr;\n      return NULL;\n\n    /* value in array */\n    case 't':\n    case 'T':\n      cstr = mod_json_token_value_true(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case 'f':\n    case 'F':\n      cstr = mod_json_token_value_false(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case 'n':\n    case 'N':\n      cstr = mod_json_token_value_null(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case '0':\n    case '1':\n    case '2':\n    case '3':\n    case '4':\n    case '5':\n    case '6':\n    case '7':\n    case '8':\n    case '9':\n    case '+':\n    case '-':\n      cstr = mod_json_token_value_number(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case '\\\"':\n      cstr = mod_json_token_value_string(tok, cstr, '\\\"');\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case '\\'':\n      if (tok->options & MOD_JSON_SQUOTE) {\n        cstr = mod_json_token_value_string(tok, cstr, '\\'');\n        if (!cstr) {\n          return NULL;\n        }\n        break;\n      }\n      /* FALLTHRU */\n\n    default:\n      tok->error = mod_json_error_value;\n      tok->context = cstr;\n      return NULL;\n  }\n\n  cstr = mod_json_token_strskp(tok, cstr);\n  switch (*cstr) {\n    case ',':\n      tok->state = mod_json_state_array_half;\n      return (cstr + 1);\n\n    case ']':\n      tok->state = mod_json_state_array_finish;\n      return (cstr + 1);\n\n    case '\\0':\n      tok->error = mod_json_error_trunc;\n      tok->context = cstr;\n      break;\n\n    default:\n      tok->error = mod_json_error_value;\n      tok->context = cstr;\n      break;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_array_finish(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  if (tok->array_depth) {\n    /* decrease depth */\n    --tok->array_depth;\n\n    /* callback */\n    if (mod_json_token_invoke_array(tok) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n\n    cstr = mod_json_token_strskp(tok, cstr);\n    switch (*cstr) {\n      case ']':\n        tok->state = mod_json_state_array_finish;\n        return (cstr + 1);\n\n      case '}':\n        tok->state = mod_json_state_object_finish;\n        return (cstr + 1);\n\n      case '\\0':\n        if (tok->object_depth || tok->array_depth) {\n          tok->error = mod_json_error_trunc;\n          tok->context = cstr;\n        } else {\n          tok->state = mod_json_state_finish;\n        }\n        break;\n\n      case ',':\n        if (tok->object_depth || tok->array_depth) {\n          mod_json_char_t tag = mod_json_token_tag(tok);\n\n          if (tag == '{') {\n            tok->state = mod_json_state_object_half1;\n            return (cstr + 1);\n          } else if (tag == '[') {\n            tok->state = mod_json_state_array_half;\n            return (cstr + 1);\n          }\n        }\n        /* FALLTHRU */\n\n      default:\n        tok->error = mod_json_error_array;\n        tok->context = cstr;\n    }\n  } else {\n    tok->error = mod_json_error_depth;\n    tok->context = cstr;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_object_start(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  if (tok->object_depth < tok->object_max_depth) {\n    /* callback */\n    if (mod_json_token_invoke_object(tok) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n\n    /* increase depth */\n    ++tok->object_depth;\n\n    /* push current tag */\n    mod_json_token_set_tag(tok, '{');\n\n    cstr = mod_json_token_strskp(tok, cstr);\n    switch (*cstr) {\n      case '}':\n        tok->state = mod_json_state_object_finish;\n        return (cstr + 1);\n\n      case '\\0':\n        tok->error = mod_json_error_trunc;\n        tok->context = cstr;\n        break;\n\n      default:\n        tok->state = mod_json_state_object_half1;\n        return (cstr);\n    }\n  } else {\n    tok->error = mod_json_error_depth;\n    tok->context = cstr;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_object_quotekey(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr, mod_json_char_t quo) {\n  mod_json_cchar_t *cstr2 = mod_json_token_strfquo(tok, ++cstr, quo);\n  if (cstr2) {\n    /* callback */\n    if (mod_json_token_invoke_field(tok, cstr,\n                                    (mod_json_size_t)(cstr2 - cstr)) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n\n    cstr2 = mod_json_token_strskp(tok, ++cstr2);\n    switch (*cstr2) {\n      case ':':\n        tok->state = mod_json_state_object_half2;\n        return (cstr2 + 1);\n\n      case '\\0':\n        tok->error = mod_json_error_trunc;\n        tok->context = cstr;\n        break;\n\n      default:\n        tok->error = mod_json_error_key;\n        tok->context = cstr2;\n        break;\n    }\n  } else {\n    tok->error = mod_json_error_quote;\n    tok->context = cstr;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_object_simplekey(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  mod_json_cchar_t *cstr2 = mod_json_token_strfsep(tok, cstr);\n  if (cstr2 != cstr) {\n    /* callback */\n    if (mod_json_token_invoke_field(tok, cstr,\n                                    (mod_json_size_t)(cstr2 - cstr)) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n\n    cstr2 = mod_json_token_strskp(tok, cstr2);\n    switch (*cstr2) {\n      case ':':\n        tok->state = mod_json_state_object_half2;\n        return (cstr2 + 1);\n\n      case '\\0':\n        tok->error = mod_json_error_trunc;\n        tok->context = cstr;\n        break;\n\n      default:\n        tok->error = mod_json_error_key;\n        tok->context = cstr2;\n        break;\n    }\n  } else {\n    tok->error = mod_json_error_key;\n    tok->context = cstr;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_object_half1(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  cstr = mod_json_token_strskp(tok, cstr);\n  switch (*cstr) {\n    case ',':\n      tok->state = mod_json_state_object_half1;\n      return (cstr + 1);\n\n    case '}':\n      tok->state = mod_json_state_object_finish;\n      return (cstr + 1);\n\n    case '\\0':\n      tok->error = mod_json_error_trunc;\n      tok->context = cstr;\n      break;\n\n    case '\\\"':\n      /* The key with double quotes */\n      return mod_json_token_object_quotekey(tok, cstr, '\\\"');\n\n    case '\\'':\n      if (tok->options & MOD_JSON_SQUOTE) {\n        /* The key with single quotes */\n        return mod_json_token_object_quotekey(tok, cstr, '\\'');\n      }\n      /* FALLTHRU */\n\n    default:\n      /* support simple format? */\n      if (tok->options & MOD_JSON_SIMPLE) {\n        return mod_json_token_object_simplekey(tok, cstr);\n      } else {\n        tok->error = mod_json_error_quote;\n        tok->context = cstr;\n      }\n      break;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_object_half2(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  cstr = mod_json_token_strskp(tok, cstr);\n  switch (*cstr) {\n    case '{':\n      tok->state = mod_json_state_object_start;\n      return (cstr + 1);\n\n    case '[':\n      tok->state = mod_json_state_array_start;\n      return (cstr + 1);\n\n    case ',':\n      tok->state = mod_json_state_object_half1;\n      return (cstr + 1);\n\n    case '}':\n      tok->state = mod_json_state_object_finish;\n      return (cstr + 1);\n\n    case '\\0':\n      tok->error = mod_json_error_trunc;\n      tok->context = cstr;\n      return NULL;\n\n    case 't':\n    case 'T':\n      cstr = mod_json_token_value_true(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case 'f':\n    case 'F':\n      cstr = mod_json_token_value_false(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case 'i':\n    case 'I':\n      cstr = mod_json_token_value_infinity(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case 'n':\n    case 'N':\n      cstr = mod_json_token_value_null(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case '0':\n    case '1':\n    case '2':\n    case '3':\n    case '4':\n    case '5':\n    case '6':\n    case '7':\n    case '8':\n    case '9':\n    case '+':\n    case '-':\n      cstr = mod_json_token_value_number(tok, cstr);\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case '\\\"':\n      cstr = mod_json_token_value_string(tok, cstr, '\\\"');\n      if (!cstr) {\n        return NULL;\n      }\n      break;\n\n    case '\\'':\n      if (tok->options & MOD_JSON_SQUOTE) {\n        cstr = mod_json_token_value_string(tok, cstr, '\\'');\n        if (!cstr) {\n          return NULL;\n        }\n        break;\n      }\n      /* FALLTHRU */\n\n    default:\n      tok->error = mod_json_error_value;\n      tok->context = cstr;\n      return NULL;\n  }\n\n  cstr = mod_json_token_strskp(tok, cstr);\n  switch (*cstr) {\n    case ',':\n      tok->state = mod_json_state_object_half1;\n      return (cstr + 1);\n\n    case '}':\n      tok->state = mod_json_state_object_finish;\n      return (cstr + 1);\n\n    case '\\0':\n      tok->error = mod_json_error_trunc;\n      tok->context = cstr;\n      break;\n\n    default:\n      tok->error = mod_json_error_value;\n      tok->context = cstr;\n      break;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_object_finish(\n    mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  if (tok->object_depth) {\n    /* decrease depth */\n    --tok->object_depth;\n\n    /* callback */\n    if (mod_json_token_invoke_object(tok) != 0) {\n      tok->error = mod_json_error_break;\n      tok->context = cstr;\n      return NULL;\n    }\n\n    cstr = mod_json_token_strskp(tok, cstr);\n    switch (*cstr) {\n      case '}':\n        tok->state = mod_json_state_object_finish;\n        return (cstr + 1);\n\n      case ']':\n        tok->state = mod_json_state_array_finish;\n        return (cstr + 1);\n\n      case '\\0':\n        if (tok->object_depth || tok->array_depth) {\n          tok->error = mod_json_error_trunc;\n          tok->context = cstr;\n        } else {\n          tok->state = mod_json_state_finish;\n        }\n        break;\n\n      case ',':\n        if (tok->object_depth || tok->array_depth) {\n          mod_json_char_t tag = mod_json_token_tag(tok);\n\n          if (tag == '{') {\n            tok->state = mod_json_state_object_half1;\n            return (cstr + 1);\n          } else if (tag == '[') {\n            tok->state = mod_json_state_array_half;\n            return (cstr + 1);\n          }\n        }\n        /* FALLTHRU */\n\n      default:\n        tok->error = mod_json_error_object;\n        tok->context = cstr;\n    }\n  } else {\n    tok->error = mod_json_error_depth;\n    tok->context = cstr;\n  }\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_null(mod_json_token_t *tok,\n                                                    mod_json_cchar_t *cstr) {\n  if (!cstr || *cstr == '\\0') {\n    tok->error = mod_json_error_invalid;\n    tok->context = cstr;\n    return NULL;\n  }\n\n  tok->state = mod_json_state_start;\n  return cstr;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_finish(mod_json_token_t *tok,\n                                                      mod_json_cchar_t *cstr) {\n  tok->error = mod_json_error_null;\n  (void)cstr;\n  return NULL;\n}\n\nstatic inline mod_json_cchar_t *mod_json_token_default(mod_json_token_t *tok,\n                                                       mod_json_cchar_t *cstr) {\n  tok->error = mod_json_error_state;\n  tok->context = cstr;\n  return NULL;\n}\n\nint mod_json_token_parse(mod_json_token_t *tok, mod_json_cchar_t *cstr) {\n  while (cstr) {\n    switch (tok->state) {\n      case mod_json_state_start:\n        cstr = mod_json_token_start(tok, cstr);\n        break;\n\n      case mod_json_state_array_start:\n        cstr = mod_json_token_array_start(tok, cstr);\n        break;\n\n      case mod_json_state_array_half:\n        cstr = mod_json_token_array_half(tok, cstr);\n        break;\n\n      case mod_json_state_array_finish:\n        cstr = mod_json_token_array_finish(tok, cstr);\n        break;\n\n      case mod_json_state_object_start:\n        cstr = mod_json_token_object_start(tok, cstr);\n        break;\n\n      case mod_json_state_object_half1:\n        cstr = mod_json_token_object_half1(tok, cstr);\n        break;\n\n      case mod_json_state_object_half2:\n        cstr = mod_json_token_object_half2(tok, cstr);\n        break;\n\n      case mod_json_state_object_finish:\n        cstr = mod_json_token_object_finish(tok, cstr);\n        break;\n\n      case mod_json_state_null:\n        cstr = mod_json_token_null(tok, cstr);\n        break;\n\n      case mod_json_state_finish:\n        cstr = mod_json_token_finish(tok, cstr);\n        break;\n\n      default:\n        cstr = mod_json_token_default(tok, cstr);\n        break;\n    }\n  }\n  return (tok->error == mod_json_error_null ? 0 : -1);\n}\n\nstatic inline int mod_json_parser_insert(mod_json_parser_t *par,\n                                         mod_json_size_t depth,\n                                         mod_json_value_t *val) {\n  if (depth > 0) {\n    mod_json_value_t *cur = par->vals[depth - 1];\n\n    switch (cur->type) {\n      case mod_json_type_object:\n        return (mod_json_object_insert(cur->data.c_obj, par->key, val) ? 0\n                                                                       : -1);\n\n      case mod_json_type_array:\n        return mod_json_array_push(cur->data.c_arr, val);\n\n      default:\n        break;\n    }\n  }\n  return -1;\n}\n\nstatic inline int mod_json_parser_insert_object(mod_json_parser_t *par,\n                                                mod_json_size_t depth) {\n  mod_json_object_t *obj;\n  mod_json_value_t *jval;\n\n  obj = mod_json_object_set_default();\n  mod_json_minus_if_false(obj);\n\n  jval = mod_json_value_set_object(obj);\n  mod_json_object_unset(obj);\n  mod_json_minus_if_false(jval);\n\n  if (depth > 0) {\n    int ret = mod_json_parser_insert(par, depth, jval);\n    if (ret == 0) {\n      par->vals[depth] = jval;\n    }\n    mod_json_value_unset(jval);\n    return ret;\n  } else {\n    /* It's the root, save the pointer. Don't unset it. */\n    par->vals[0] = jval;\n  }\n  return 0;\n}\n\nstatic inline int mod_json_parser_insert_array(mod_json_parser_t *par,\n                                               mod_json_size_t depth) {\n  mod_json_array_t *arr;\n  mod_json_value_t *jval;\n\n  arr = mod_json_array_set_default();\n  mod_json_minus_if_false(arr);\n\n  jval = mod_json_value_set_array(arr);\n  mod_json_array_unset(arr);\n  mod_json_minus_if_false(jval);\n\n  if (depth > 0) {\n    int ret = mod_json_parser_insert(par, depth, jval);\n    if (ret == 0) {\n      par->vals[depth] = jval;\n    }\n    mod_json_value_unset(jval);\n    return ret;\n  } else {\n    /* It's the root, save the pointer. Don't unset it. */\n    par->vals[0] = jval;\n  }\n  return 0;\n}\n\nstatic inline void mod_json_token_set_parser(mod_json_token_t *tok,\n                                             mod_json_parser_t *par) {\n  mod_json_token_set_param(tok, par);\n}\n\nstatic inline mod_json_parser_t *mod_json_token_parser(mod_json_token_t *tok) {\n  return (mod_json_parser_t *)mod_json_token_param(tok);\n}\n\nstatic inline int mod_json_parser_event_field(mod_json_token_t *tok,\n                                              mod_json_cchar_t *val,\n                                              mod_json_size_t len) {\n  mod_json_parser_t *parser;\n\n  /* get information */\n  parser = mod_json_token_parser(tok);\n\n  /* unset previous one */\n  mod_json_string_unset(parser->key);\n\n  parser->key = mod_json_string_set(val, len);\n  return (parser->key ? 0 : -1);\n}\n\nstatic inline int mod_json_parser_event_array(mod_json_token_t *tok) {\n  switch (mod_json_token_state(tok)) {\n    case mod_json_state_array_finish:\n      /* continue */\n      return 0;\n\n    case mod_json_state_array_start:\n      return mod_json_parser_insert_array(mod_json_token_parser(tok),\n                                          mod_json_token_depth(tok));\n\n    default:\n      break;\n  }\n  return -1;\n}\n\nstatic inline int mod_json_parser_event_object(mod_json_token_t *tok) {\n  switch (mod_json_token_state(tok)) {\n    case mod_json_state_object_finish:\n      /* continue */\n      return 0;\n\n    case mod_json_state_object_start:\n      return mod_json_parser_insert_object(mod_json_token_parser(tok),\n                                           mod_json_token_depth(tok));\n\n    default:\n      break;\n  }\n  return -1;\n}\n\nstatic inline int mod_json_parser_event_null(mod_json_token_t *tok) {\n  mod_json_parser_t *parser;\n\n  /* get information */\n  parser = mod_json_token_parser(tok);\n\n  if (!parser->val_null) {\n    parser->val_null = mod_json_value_set_null();\n    mod_json_minus_if_false(parser->val_null);\n  }\n  return mod_json_parser_insert(parser, mod_json_token_depth(tok),\n                                parser->val_null);\n}\n\nstatic inline int mod_json_parser_event_true(mod_json_token_t *tok) {\n  mod_json_parser_t *parser;\n\n  /* get information */\n  parser = mod_json_token_parser(tok);\n\n  if (!parser->val_true) {\n    parser->val_true = mod_json_value_set_boolean(MOD_JSON_TRUE);\n    mod_json_minus_if_false(parser->val_true);\n  }\n  return mod_json_parser_insert(parser, mod_json_token_depth(tok),\n                                parser->val_true);\n}\n\nstatic inline int mod_json_parser_event_false(mod_json_token_t *tok) {\n  mod_json_parser_t *parser;\n\n  /* get information */\n  parser = mod_json_token_parser(tok);\n\n  if (!parser->val_false) {\n    parser->val_false = mod_json_value_set_boolean(MOD_JSON_FALSE);\n    mod_json_minus_if_false(parser->val_false);\n  }\n  return mod_json_parser_insert(parser, mod_json_token_depth(tok),\n                                parser->val_false);\n}\n\nstatic inline int mod_json_parser_event_boolean(mod_json_token_t *tok,\n                                                mod_json_boolean_t val) {\n  if (!val) {\n    return mod_json_parser_event_false(tok);\n  }\n  return mod_json_parser_event_true(tok);\n}\n\nstatic inline int mod_json_parser_event_zero(mod_json_token_t *tok) {\n  mod_json_parser_t *parser;\n\n  /* get information */\n  parser = mod_json_token_parser(tok);\n\n  if (!parser->val_zero) {\n    parser->val_zero = mod_json_value_set_integer(0);\n    mod_json_minus_if_false(parser->val_zero);\n  }\n  return mod_json_parser_insert(parser, mod_json_token_depth(tok),\n                                parser->val_zero);\n}\n\nstatic inline int mod_json_parser_event_integer(mod_json_token_t *tok,\n                                                mod_json_integer_t val) {\n  int ret = -1;\n\n  if (val != 0) {\n    mod_json_value_t *jval;\n\n    jval = mod_json_value_set_integer(val);\n    if (jval) {\n      ret = mod_json_parser_insert(mod_json_token_parser(tok),\n                                   mod_json_token_depth(tok), jval);\n      mod_json_value_unset(jval);\n    }\n  } else {\n    /* zero event */\n    ret = mod_json_parser_event_zero(tok);\n  }\n  return ret;\n}\n\nstatic inline int mod_json_parser_event_zerof(mod_json_token_t *tok) {\n  mod_json_parser_t *parser;\n\n  /* get information */\n  parser = mod_json_token_parser(tok);\n\n  if (!parser->val_zerof) {\n    parser->val_zerof = mod_json_value_set_float(0.0);\n    mod_json_minus_if_false(parser->val_zerof);\n  }\n  return mod_json_parser_insert(parser, mod_json_token_depth(tok),\n                                parser->val_zerof);\n}\n\nstatic inline int mod_json_parser_event_float(mod_json_token_t *tok,\n                                              mod_json_float_t val) {\n  int ret = -1;\n\n  if (val != 0.0) {\n    mod_json_value_t *jval;\n\n    jval = mod_json_value_set_float(val);\n    if (jval) {\n      ret = mod_json_parser_insert(mod_json_token_parser(tok),\n                                   mod_json_token_depth(tok), jval);\n      mod_json_value_unset(jval);\n    }\n  } else {\n    /* zero event */\n    ret = mod_json_parser_event_zerof(tok);\n  }\n  return ret;\n}\n\nstatic inline int mod_json_parser_event_empty(mod_json_token_t *tok) {\n  mod_json_parser_t *parser;\n\n  /* get information */\n  parser = mod_json_token_parser(tok);\n\n  if (!parser->val_empty) {\n    mod_json_string_t *str;\n\n    str = mod_json_string_set(\"\", 0);\n    mod_json_minus_if_false(str);\n\n    parser->val_empty = mod_json_value_set_string(str);\n    mod_json_string_unset(str);\n    mod_json_minus_if_false(parser->val_empty);\n  }\n  return mod_json_parser_insert(parser, mod_json_token_depth(tok),\n                                parser->val_empty);\n}\n\nstatic inline int mod_json_parser_event_string(mod_json_token_t *tok,\n                                               mod_json_cchar_t *val,\n                                               mod_json_size_t len) {\n  int ret = -1;\n\n  if (len > 0) {\n    mod_json_string_t *str;\n    mod_json_value_t *jval;\n\n    str = mod_json_string_set(val, len);\n    if (str) {\n      jval = mod_json_value_set_string(str);\n    } else {\n      jval = NULL;\n    }\n    mod_json_string_unset(str);\n\n    if (jval) {\n      ret = mod_json_parser_insert(mod_json_token_parser(tok),\n                                   mod_json_token_depth(tok), jval);\n      mod_json_value_unset(jval);\n    }\n  } else {\n    /* empty event */\n    ret = mod_json_parser_event_empty(tok);\n  }\n  return ret;\n}\n\nstatic int mod_json_parser_event(mod_json_token_t *tok, mod_json_void_t *val,\n                                 mod_json_size_t len) {\n  switch (tok->event_code) {\n    case mod_json_event_field:\n      return mod_json_parser_event_field(tok, (mod_json_cchar_t *)val, len);\n\n    case mod_json_event_object:\n      return mod_json_parser_event_object(tok);\n\n    case mod_json_event_array:\n      return mod_json_parser_event_array(tok);\n\n    case mod_json_event_null:\n      return mod_json_parser_event_null(tok);\n\n    case mod_json_event_boolean:\n      return mod_json_parser_event_boolean(tok, *(mod_json_boolean_t *)val);\n\n    case mod_json_event_integer:\n      return mod_json_parser_event_integer(tok, *(mod_json_integer_t *)val);\n\n    case mod_json_event_float:\n      return mod_json_parser_event_float(tok, *(mod_json_float_t *)val);\n\n    case mod_json_event_string:\n      return mod_json_parser_event_string(tok, (mod_json_cchar_t *)val, len);\n\n    default:\n      break;\n  }\n  return -1;\n}\n\nstatic inline mod_json_parser_t *mod_json_parser_create(mod_json_size_t depth) {\n  mod_json_parser_t *parser;\n  mod_json_null_if_false(depth > 0);\n\n  parser = (mod_json_parser_t *)mod_json_malloc(\n      depth * sizeof(mod_json_value_t *) + sizeof(mod_json_parser_t));\n  mod_json_null_if_false(parser);\n\n  memset(parser, 0, sizeof(mod_json_parser_t));\n  parser->vals[0] = NULL;\n  return parser;\n}\n\nstatic inline void mod_json_parser_destroy(mod_json_parser_t *par) {\n  mod_json_value_unset(par->val_null);\n  mod_json_value_unset(par->val_true);\n  mod_json_value_unset(par->val_false);\n  mod_json_value_unset(par->val_zero);\n  mod_json_value_unset(par->val_zerof);\n  mod_json_value_unset(par->val_empty);\n  mod_json_string_unset(par->key);\n  mod_json_free(par);\n}\n\nmod_json_value_t *mod_json_parse(mod_json_token_t *tok,\n                                 mod_json_cchar_t *cstr) {\n  mod_json_parser_t *parser;\n  mod_json_value_t *root;\n  mod_json_null_if_false(tok && cstr && *cstr);\n\n  parser = mod_json_parser_create(mod_json_token_max_depth(tok));\n  mod_json_null_if_false(parser);\n\n  mod_json_token_set_parser(tok, parser);\n  mod_json_token_set_event(tok, mod_json_parser_event);\n\n  if (mod_json_token_parse(tok, cstr) == 0) {\n    root = parser->vals[0];\n  } else {\n    /* error occur */\n    root = NULL;\n    mod_json_value_unset(parser->vals[0]);\n  }\n\n  /* clean up */\n  mod_json_parser_destroy(parser);\n\n  /* success? */\n  return root;\n}\n\nmod_json_value_t *mod_json_parse_simply(mod_json_cchar_t *cstr,\n                                        mod_json_size_t opts) {\n  mod_json_value_t *val;\n  mod_json_token_t *tok;\n  mod_json_option_t opt;\n\n  opt.options = opts;\n  opt.object_depth = 0; /* Use default object depth */\n  opt.array_depth = 0;  /* Use default array depth */\n\n  tok = mod_json_token_create(&opt);\n  mod_json_null_if_false(tok);\n\n  val = mod_json_parse(tok, cstr);\n  mod_json_token_destroy(tok);\n\n  /* value of root */\n  return val;\n}\n\nstatic inline int mod_json_dump_null(mod_json_string_t *str) {\n  return mod_json_string_add_cstr(str, \"null\", 4);\n}\n\nstatic inline int mod_json_dump_boolean(mod_json_string_t *str,\n                                        mod_json_boolean_t bol) {\n  if (!bol) {\n    return mod_json_string_add_cstr(str, \"false\", 5);\n  }\n  return mod_json_string_add_cstr(str, \"true\", 4);\n}\n\nstatic inline int mod_json_dump_integer(mod_json_string_t *str,\n                                        mod_json_integer_t num) {\n  mod_json_char_t buf[32];\n\n  return mod_json_string_add_cstr(str, buf, mod_json_utils_itostr(buf, num));\n}\n\nstatic inline int mod_json_dump_float(mod_json_string_t *str,\n                                      mod_json_float_t dbl) {\n  mod_json_char_t buf[32];\n\n  return mod_json_string_add_cstr(\n      str, buf,\n      (mod_json_size_t)mod_json_utils_snprintf(buf, sizeof(buf), \"%g\", dbl));\n}\n\nstatic inline int mod_json_dump_string(mod_json_string_t *str,\n                                       mod_json_string_t *val) {\n  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '\\\"'));\n\n  if (val) {\n    mod_json_minus_if_ne_zero(mod_json_string_add_jstr(str, val));\n  }\n  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '\\\"'));\n  return 0;\n}\n\nstatic inline int mod_json_dump_value(mod_json_string_t *str,\n                                      mod_json_value_t *val);\n\nstatic inline int mod_json_dump_array(mod_json_string_t *str,\n                                      mod_json_array_t *arr) {\n  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '['));\n\n  if (arr) {\n    mod_json_value_t **iter = arr->first;\n\n    for (; iter != arr->last; ++iter) {\n      mod_json_minus_if_ne_zero(mod_json_dump_value(str, *iter));\n      if (iter + 1 != arr->last) {\n        mod_json_minus_if_ne_zero(mod_json_string_add_char(str, ','));\n      }\n    }\n  }\n  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, ']'));\n  return 0;\n}\n\nstatic inline int mod_json_dump_key(mod_json_string_t *str,\n                                    mod_json_string_t *key) {\n  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '\\\"'));\n  mod_json_minus_if_ne_zero(mod_json_string_add_jstr(str, key));\n  mod_json_minus_if_ne_zero(mod_json_string_add_cstr(str, \"\\\":\", 2));\n  return 0;\n}\n\nstatic inline int mod_json_dump_object(mod_json_string_t *str,\n                                       mod_json_object_t *obj) {\n  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '{'));\n\n  if (obj) {\n    mod_json_pair_t *iter = obj->first;\n\n    for (; iter != obj->last; ++iter) {\n      mod_json_minus_if_ne_zero(mod_json_dump_key(str, iter->key));\n      mod_json_minus_if_ne_zero(mod_json_dump_value(str, iter->val));\n\n      if (iter + 1 != obj->last) {\n        mod_json_minus_if_ne_zero(mod_json_string_add_char(str, ','));\n      }\n    }\n  }\n  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '}'));\n  return 0;\n}\n\nstatic inline int mod_json_dump_value(mod_json_string_t *str,\n                                      mod_json_value_t *val) {\n  if (val) {\n    switch (val->type) {\n      case mod_json_type_null:\n        return mod_json_dump_null(str);\n\n      case mod_json_type_boolean:\n        return mod_json_dump_boolean(str, val->data.c_bool);\n\n      case mod_json_type_integer:\n        return mod_json_dump_integer(str, val->data.c_int);\n\n      case mod_json_type_float:\n        return mod_json_dump_float(str, val->data.c_float);\n\n      case mod_json_type_string:\n        return mod_json_dump_string(str, val->data.c_str);\n\n      case mod_json_type_array:\n        return mod_json_dump_array(str, val->data.c_arr);\n\n      case mod_json_type_object:\n        return mod_json_dump_object(str, val->data.c_obj);\n\n      default:\n        return -1;\n    }\n  }\n  return mod_json_dump_null(str);\n}\n\nmod_json_string_t *mod_json_dump(mod_json_value_t *val) {\n  mod_json_string_t *str = mod_json_string_set(\"\", 0);\n  mod_json_null_if_false(str);\n\n  if (mod_json_unlikely(mod_json_dump_value(str, val) != 0)) {\n    /* error occur */\n    mod_json_string_unset(str);\n    return NULL;\n  }\n  return str;\n}\n"
  },
  {
    "path": "src/ailego/hash/crc32c.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/internal/platform.h>\n\n#if !defined(__SSE4_2__) && !defined(__ARM_FEATURE_CRC32)\n/**\n *  The following CRC lookup table was generated automagically\n *  using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o32[256] = {\n    0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C,\n    0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,\n    0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C,\n    0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,\n    0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC,\n    0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,\n    0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xAA64D611, 0x580F5512,\n    0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,\n    0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD,\n    0x1642AE59, 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,\n    0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x417B1DBC, 0xB3109EBF,\n    0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,\n    0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F,\n    0xED03A29B, 0x1F682198, 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,\n    0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F,\n    0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,\n    0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E,\n    0x4767748A, 0xB50CF789, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,\n    0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E,\n    0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,\n    0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE,\n    0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,\n    0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, 0x082F63B7, 0xFA44E0B4,\n    0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,\n    0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B,\n    0xB4091BFF, 0x466298FC, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,\n    0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xA24BB5A6, 0x502036A5,\n    0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,\n    0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975,\n    0x0E330A81, 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,\n    0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, 0xCAA7A905,\n    0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,\n    0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8,\n    0xE52CC12C, 0x1747422F, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,\n    0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8,\n    0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,\n    0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78,\n    0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,\n    0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6,\n    0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,\n    0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69,\n    0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,\n    0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351};\n\n/**\n *  The following CRC lookup table was generated automagically\n *  using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o40[256] = {\n    0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB,\n    0x69CF5132, 0x7A6DC945, 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21,\n    0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, 0x3FC5F181, 0x2C6769F6,\n    0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,\n    0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92,\n    0xCB1E630B, 0xD8BCFB7C, 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B,\n    0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, 0xE29F20BA, 0xF13DB8CD,\n    0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,\n    0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28,\n    0x298143B1, 0x3A23DBC6, 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2,\n    0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, 0xFF17C604, 0xECB55E73,\n    0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,\n    0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17,\n    0x0BCC548E, 0x186ECCF9, 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C,\n    0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, 0x5DC6F43D, 0x4E646C4A,\n    0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,\n    0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD,\n    0xE9537434, 0xFAF1EC43, 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27,\n    0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, 0xBF59D487, 0xACFB4CF0,\n    0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,\n    0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94,\n    0x4B82460D, 0x5820DE7A, 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260,\n    0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, 0x66D73941, 0x7575A136,\n    0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,\n    0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3,\n    0xADC95A4A, 0xBE6BC23D, 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059,\n    0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, 0x844819FB, 0x97EA818C,\n    0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,\n    0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8,\n    0x70938B71, 0x63311306, 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3,\n    0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, 0x26992BC2, 0x353BB3B5,\n    0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,\n    0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556,\n    0x6D1B6DCF, 0x7EB9F5B8, 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC,\n    0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, 0x3B11CD7C, 0x28B3550B,\n    0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,\n    0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F,\n    0xCFCA5FF6, 0xDC68C781, 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766,\n    0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, 0xE64B1C47, 0xF5E98430,\n    0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,\n    0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5,\n    0x2D557F4C, 0x3EF7E73B, 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F,\n    0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483};\n\n/**\n *  The following CRC lookup table was generated automagically\n *  using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o48[256] = {\n    0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664,\n    0xD1B1F617, 0x74F06469, 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6,\n    0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, 0x70A27D8A, 0xD5E3EFF4,\n    0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,\n    0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B,\n    0x9942B558, 0x3C032726, 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67,\n    0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, 0xD915C5D1, 0x7C5457AF,\n    0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,\n    0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA,\n    0x40577089, 0xE516E2F7, 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828,\n    0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, 0xC76580D9, 0x622412A7,\n    0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,\n    0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878,\n    0x2E85480B, 0x8BC4DA75, 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20,\n    0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, 0x8F96C396, 0x2AD751E8,\n    0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,\n    0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9,\n    0xF7908DDA, 0x52D11FA4, 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B,\n    0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, 0x56830647, 0xF3C29439,\n    0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,\n    0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6,\n    0xBF63CE95, 0x1A225CEB, 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730,\n    0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, 0xB3764986, 0x1637DBF8,\n    0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,\n    0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD,\n    0x2A34FCDE, 0x8F756EA0, 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F,\n    0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, 0x6A638C57, 0xCF221E29,\n    0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,\n    0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6,\n    0x83834485, 0x26C2D6FB, 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE,\n    0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, 0x2290CF18, 0x87D15D66,\n    0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,\n    0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE,\n    0x9DF3018D, 0x38B293F3, 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C,\n    0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, 0x3CE08A10, 0x99A1186E,\n    0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,\n    0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1,\n    0xD50042C2, 0x7041D0BC, 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD,\n    0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, 0x9557324B, 0x3016A035,\n    0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,\n    0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760,\n    0x0C158713, 0xA954156D, 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2,\n    0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8};\n\n/**\n * The following CRC lookup table was generated automagically\n * using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o56[256] = {\n    0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B,\n    0xC4451272, 0x1900B8CA, 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF,\n    0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, 0xE964B13D, 0x34211B85,\n    0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,\n    0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990,\n    0xDB65C0A9, 0x06206A11, 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2,\n    0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, 0x2161776D, 0xFC24DDD5,\n    0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,\n    0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD,\n    0xFA04B7C4, 0x27411D7C, 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69,\n    0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, 0xABA65FE7, 0x76E3F55F,\n    0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,\n    0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A,\n    0x99A72E73, 0x44E284CB, 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3,\n    0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, 0xB4868D3C, 0x69C32784,\n    0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,\n    0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027,\n    0xB8C6591E, 0x6583F3A6, 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3,\n    0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, 0x95E7FA51, 0x48A250E9,\n    0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,\n    0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC,\n    0xA7E68BC5, 0x7AA3217D, 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006,\n    0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, 0xA4E4AAD9, 0x79A10061,\n    0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,\n    0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349,\n    0x7F816A70, 0xA2C4C0C8, 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD,\n    0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, 0x8585DDB4, 0x58C0770C,\n    0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,\n    0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519,\n    0xB784AC20, 0x6AC10698, 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0,\n    0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, 0x9AA50F6F, 0x47E0A5D7,\n    0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,\n    0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93,\n    0x3D4384AA, 0xE0062E12, 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07,\n    0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, 0x106227E5, 0xCD278D5D,\n    0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,\n    0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48,\n    0x22635671, 0xFF26FCC9, 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A,\n    0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, 0xD867E1B5, 0x05224B0D,\n    0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,\n    0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825,\n    0x0302211C, 0xDE478BA4, 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1,\n    0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842};\n\n/**\n *  The following CRC lookup table was generated automagically\n *  using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o64[256] = {\n    0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C,\n    0x906761E8, 0xA8760E44, 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65,\n    0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, 0x8F2261D3, 0xB7330E7F,\n    0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,\n    0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E,\n    0xDA220BAA, 0xE2336406, 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3,\n    0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, 0xDECFBEC6, 0xE6DED16A,\n    0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,\n    0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598,\n    0x04EDB56C, 0x3CFCDAC0, 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1,\n    0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, 0x37516AAE, 0x0F400502,\n    0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,\n    0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023,\n    0x625100D7, 0x5A406F7B, 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89,\n    0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, 0x7D1400EC, 0x45056F40,\n    0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,\n    0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5,\n    0xBC9EBE11, 0x848FD1BD, 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C,\n    0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, 0xA3DBBE2A, 0x9BCAD186,\n    0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,\n    0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7,\n    0xF6DBD453, 0xCECABBFF, 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8,\n    0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, 0xABC5DECD, 0x93D4B161,\n    0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,\n    0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593,\n    0x71E7D567, 0x49F6BACB, 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA,\n    0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, 0x750A600B, 0x4D1B0FA7,\n    0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,\n    0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86,\n    0x200A0A72, 0x181B65DE, 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C,\n    0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, 0x3F4F0A49, 0x075E65E5,\n    0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,\n    0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE,\n    0xC994DE1A, 0xF185B1B6, 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497,\n    0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, 0xD6D1DE21, 0xEEC0B18D,\n    0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,\n    0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC,\n    0x83D1B458, 0xBBC0DBF4, 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51,\n    0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, 0x873C0134, 0xBF2D6E98,\n    0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,\n    0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A,\n    0x5D1E0A9E, 0x650F6532, 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013,\n    0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3};\n\n/**\n *  The following CRC lookup table was generated automagically\n *  using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o72[256] = {\n    0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E,\n    0x697997B4, 0x8649FCAD, 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5,\n    0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, 0xC00C303E, 0x2F3C5B27,\n    0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,\n    0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F,\n    0xC973BF95, 0x2643D48C, 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57,\n    0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, 0xE5F20E92, 0x0AC2658B,\n    0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,\n    0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD,\n    0x2C81B107, 0xC3B1DA1E, 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576,\n    0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, 0x0E045BEB, 0xE13430F2,\n    0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,\n    0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A,\n    0x077BD440, 0xE84BBF59, 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F,\n    0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, 0xAE0E73CA, 0x413E18D3,\n    0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,\n    0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108,\n    0xE289DAD2, 0x0DB9B1CB, 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3,\n    0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, 0x4BFC7D58, 0xA4CC1641,\n    0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,\n    0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929,\n    0x4283F2F3, 0xADB399EA, 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C,\n    0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, 0x7C0EAFC9, 0x933EC4D0,\n    0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,\n    0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86,\n    0xB57D105C, 0x5A4D7B45, 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D,\n    0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, 0x99FCA15B, 0x76CCCA42,\n    0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,\n    0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A,\n    0x90832EF0, 0x7FB345E9, 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF,\n    0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, 0x39F6897A, 0xD6C6E263,\n    0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,\n    0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053,\n    0x7B757B89, 0x94451090, 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8,\n    0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, 0xD200DC03, 0x3D30B71A,\n    0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,\n    0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872,\n    0xDB7F53A8, 0x344F38B1, 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A,\n    0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, 0xF7FEE2AF, 0x18CE89B6,\n    0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,\n    0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0,\n    0x3E8D5D3A, 0xD1BD3623, 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B,\n    0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C};\n\n/**\n *  The following CRC lookup table was generated automagically\n *  using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o80[256] = {\n    0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919,\n    0x75E69C41, 0x1DE5B089, 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B,\n    0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, 0x9C5BFAA6, 0xF458D66E,\n    0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,\n    0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC,\n    0xA7909BB4, 0xCF93B77C, 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5,\n    0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, 0x73767EEE, 0x1B755226,\n    0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,\n    0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002,\n    0xD4E6E55A, 0xBCE5C992, 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110,\n    0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, 0x7AB7077A, 0x12B42BB2,\n    0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,\n    0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330,\n    0x417C6668, 0x297F4AA0, 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884,\n    0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, 0xA8C1008F, 0xC0C22C47,\n    0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,\n    0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE,\n    0x320A1886, 0x5A09344E, 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC,\n    0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, 0xDBB77E61, 0xB3B452A9,\n    0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,\n    0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B,\n    0xE07C1F73, 0x887F33BB, 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC,\n    0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, 0xBB43F3A7, 0xD340DF6F,\n    0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,\n    0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B,\n    0x1CD36813, 0x74D044DB, 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59,\n    0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, 0xC8358D49, 0xA036A181,\n    0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,\n    0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903,\n    0xF3FEEC5B, 0x9BFDC093, 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7,\n    0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, 0x1A438ABC, 0x7240A674,\n    0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,\n    0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097,\n    0xFA3F95CF, 0x923CB907, 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185,\n    0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, 0x1382F328, 0x7B81DFE0,\n    0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,\n    0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762,\n    0x2849923A, 0x404ABEF2, 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B,\n    0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, 0xFCAF7760, 0x94AC5BA8,\n    0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,\n    0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C,\n    0x5B3FECD4, 0x333CC01C, 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E,\n    0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F};\n\n/**\n *  The following CRC lookup table was generated automagically\n *  using the following model parameters:\n *\n *  Generator Polynomial = ................. 0x1EDC6F41\n *  Generator Polynomial Length = .......... 32 bits\n *  Reflected Bits = ....................... TRUE\n *  Table Generation Offset = .............. 32 bits\n *  Number of Slices = ..................... 8 slices\n *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8\n */\nstatic uint32_t crc_tableil8_o88[256] = {\n    0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A,\n    0xB3657823, 0xFA590504, 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3,\n    0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, 0x847609B4, 0xCD4A7493,\n    0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,\n    0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224,\n    0x7528754D, 0x3C14086A, 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0,\n    0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, 0x4F3B6143, 0x06071C64,\n    0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,\n    0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367,\n    0x3A13140E, 0x732F6929, 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E,\n    0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, 0x1A00CB32, 0x533CB615,\n    0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,\n    0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2,\n    0xEB5EB7CB, 0xA262CAEC, 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF,\n    0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, 0xDC4DC65C, 0x9571BB7B,\n    0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,\n    0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1,\n    0xA465D688, 0xED59ABAF, 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18,\n    0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, 0x9376A71F, 0xDA4ADA38,\n    0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,\n    0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F,\n    0x6228DBE6, 0x2B14A6C1, 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D,\n    0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, 0x763A92BE, 0x3F06EF99,\n    0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,\n    0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A,\n    0x0312E7F3, 0x4A2E9AD4, 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63,\n    0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, 0x3901F3FD, 0x703D8EDA,\n    0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,\n    0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D,\n    0xC85F8F04, 0x8163F223, 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20,\n    0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, 0xFF4CFE93, 0xB67083B4,\n    0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,\n    0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C,\n    0x9D642575, 0xD4585852, 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5,\n    0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, 0xAA7754E2, 0xE34B29C5,\n    0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,\n    0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72,\n    0x5B29281B, 0x1215553C, 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6,\n    0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, 0x613A3C15, 0x28064132,\n    0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,\n    0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31,\n    0x14124958, 0x5D2E347F, 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8,\n    0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5};\n\n/**\n *  Implementations adapted from Intel's Slicing By 8 Sourceforge Project\n *  http://sourceforge.net/projects/slicing-by-8/\n *  http://www.evanjones.ca/crc32c.html\n */\nstatic inline uint32_t crc32c_slicing8(const void *data, size_t len,\n                                       uint32_t crc) {\n  const uint8_t *p_buf = (const uint8_t *)data;\n\n  /* Handle leading misaligned bytes */\n  size_t init_bytes =\n      (sizeof(int32_t) - (intptr_t)p_buf) & (sizeof(int32_t) - 1);\n  if (len < init_bytes) {\n    init_bytes = len;\n  }\n  for (size_t li = 0; li < init_bytes; li++) {\n    crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);\n  }\n\n  len -= init_bytes;\n  size_t running_length = len & ~(sizeof(uint64_t) - 1);\n  size_t end_bytes = len - running_length;\n\n  for (size_t li = 0; li < running_length / 8; li++) {\n    uint32_t term1, term2;\n\n    crc ^= *(uint32_t *)p_buf;\n    p_buf += 4;\n    term1 = crc_tableil8_o88[crc & 0x000000FF] ^\n            crc_tableil8_o80[(crc >> 8) & 0x000000FF];\n    term2 = crc >> 16;\n    crc = term1 ^ crc_tableil8_o72[term2 & 0x000000FF] ^\n          crc_tableil8_o64[(term2 >> 8) & 0x000000FF];\n    term1 = crc_tableil8_o56[(*(uint32_t *)p_buf) & 0x000000FF] ^\n            crc_tableil8_o48[((*(uint32_t *)p_buf) >> 8) & 0x000000FF];\n\n    term2 = (*(uint32_t *)p_buf) >> 16;\n    crc = crc ^ term1 ^ crc_tableil8_o40[term2 & 0x000000FF] ^\n          crc_tableil8_o32[(term2 >> 8) & 0x000000FF];\n    p_buf += 4;\n  }\n\n  for (size_t li = 0; li < end_bytes; li++) {\n    crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);\n  }\n  return crc;\n}\n#endif  // !__SSE4_2__\n\n#if defined(__SSE4_2__)\n#if defined(AILEGO_M64)\nstatic inline uint32_t crc32c_sse42(const void *data, size_t len,\n                                    uint32_t crc) {\n  const uint8_t *first = (const uint8_t *)data;\n  const uint8_t *last = first + ((len >> 3) << 3);\n\n  for (; first != last; first += 8) {\n    crc = (uint32_t)_mm_crc32_u64(crc, *(uint64_t *)first);\n  }\n  switch (((uint8_t *)data + len) - last) {\n    case 1:\n      crc = _mm_crc32_u8(crc, *last);\n      break;\n    case 2:\n      crc = _mm_crc32_u16(crc, *(uint16_t *)last);\n      break;\n    case 3:\n      crc = _mm_crc32_u16(crc, *(uint16_t *)last);\n      crc = _mm_crc32_u8(crc, *(last + 2));\n      break;\n    case 4:\n      crc = _mm_crc32_u32(crc, *(uint32_t *)last);\n      break;\n    case 5:\n      crc = _mm_crc32_u32(crc, *(uint32_t *)last);\n      crc = _mm_crc32_u8(crc, *(last + 4));\n      break;\n    case 6:\n      crc = _mm_crc32_u32(crc, *(uint32_t *)last);\n      crc = _mm_crc32_u16(crc, *(uint16_t *)(last + 4));\n      break;\n    case 7:\n      crc = _mm_crc32_u32(crc, *(uint32_t *)last);\n      crc = _mm_crc32_u16(crc, *(uint16_t *)(last + 4));\n      crc = _mm_crc32_u8(crc, *(last + 6));\n      break;\n  }\n  return crc;\n}\n#else\nstatic inline uint32_t crc32c_sse42(const void *data, size_t len,\n                                    uint32_t crc) {\n  const uint8_t *first = (const uint8_t *)data;\n  const uint8_t *last = first + ((len >> 2) << 2);\n\n  for (; first != last; first += 4) {\n    crc = _mm_crc32_u32(crc, *(uint32_t *)first);\n  }\n  switch (((uint8_t *)data + len) - last) {\n    case 1:\n      crc = _mm_crc32_u8(crc, *last);\n      break;\n    case 2:\n      crc = _mm_crc32_u16(crc, *(uint16_t *)last);\n      break;\n    case 3:\n      crc = _mm_crc32_u16(crc, *(uint16_t *)last);\n      crc = _mm_crc32_u8(crc, *(last + 2));\n      break;\n  }\n  return crc;\n}\n#endif  // AILEGO_M64\n#endif  // __SSE4_2__\n\n#if defined(__ARM_FEATURE_CRC32)\nstatic inline uint32_t crc32c_neon(const void *data, size_t len, uint32_t crc) {\n  const uint8_t *first = (const uint8_t *)data;\n  const uint8_t *last = first + ((len >> 3) << 3);\n\n  for (; first != last; first += 8) {\n    crc = __crc32cd(crc, *(uint64_t *)first);\n  }\n  switch (((uint8_t *)data + len) - last) {\n    case 1:\n      crc = __crc32cb(crc, *last);\n      break;\n    case 2:\n      crc = __crc32ch(crc, *(uint16_t *)last);\n      break;\n    case 3:\n      crc = __crc32ch(crc, *(uint16_t *)last);\n      crc = __crc32cb(crc, *(last + 2));\n      break;\n    case 4:\n      crc = __crc32cw(crc, *(uint32_t *)last);\n      break;\n    case 5:\n      crc = __crc32cw(crc, *(uint32_t *)last);\n      crc = __crc32cb(crc, *(last + 4));\n      break;\n    case 6:\n      crc = __crc32cw(crc, *(uint32_t *)last);\n      crc = __crc32ch(crc, *(uint16_t *)(last + 4));\n      break;\n    case 7:\n      crc = __crc32cw(crc, *(uint32_t *)last);\n      crc = __crc32ch(crc, *(uint16_t *)(last + 4));\n      crc = __crc32cb(crc, *(last + 6));\n      break;\n  }\n  return crc;\n}\n#endif  // __ARM_FEATURE_CRC32\n\nnamespace zvec {\nnamespace ailego {\n\nuint32_t Crc32c::Hash(const void *data, size_t len, uint32_t crc) {\n#if defined(__SSE4_2__)\n  return crc32c_sse42(data, len, crc);\n#elif defined(__ARM_FEATURE_CRC32)\n  return crc32c_neon(data, len, crc);\n#else\n  return crc32c_slicing8(data, len, crc);\n#endif\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/internal/cpu_features.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"cpu_features.h\"\n#include <cstddef>\n\n#if !defined(_MSC_VER) && !defined(__ARM_ARCH)\n#include <cpuid.h>\n#endif\n\nnamespace zvec {\nnamespace ailego {\nnamespace internal {\n\n//\n// REFER: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/\n//        tree/arch/x86/include/asm/cpufeatures.h\n//        https://software.intel.com/sites/default/files/managed/c5/15/\n//        architecture-instruction-set-extensions-programming-reference.pdf\n//\n\nCpuFeatures::CpuFlags CpuFeatures::flags_;\n\n#if defined(_MSC_VER)\nCpuFeatures::CpuFlags::CpuFlags(void)\n    : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {\n  int l1[4] = {0, 0, 0, 0};\n  int l7[4] = {0, 0, 0, 0};\n\n  __cpuidex(l1, 1, 0);\n  __cpuidex(l7, 7, 0);\n  L1_ECX = l1[2];\n  L1_EDX = l1[3];\n  L7_EBX = l7[1];\n  L7_ECX = l7[2];\n  L7_EDX = l7[3];\n}\n#elif !defined(__ARM_ARCH)\nCpuFeatures::CpuFlags::CpuFlags(void)\n    : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {\n  uint32_t eax, ebx, ecx, edx;\n\n  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {\n    L1_ECX = ecx;\n    L1_EDX = edx;\n  }\n  if (__get_cpuid_max(0, NULL) >= 7) {\n    __cpuid_count(7, 0, eax, ebx, ecx, edx);\n    L7_EBX = ebx;\n    L7_ECX = ecx;\n    L7_EDX = edx;\n  }\n}\n#else\nCpuFeatures::CpuFlags::CpuFlags(void)\n    : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {}\n#endif\n\n//! 16-bit FP conversions\nbool CpuFeatures::F16C(void) {\n  return !!(flags_.L1_ECX & (1u << 29));\n}\n\n//! Multimedia Extensions\nbool CpuFeatures::MMX(void) {\n  return !!(flags_.L1_EDX & (1u << 23));\n}\n\n//! Streaming SIMD Extensions\nbool CpuFeatures::SSE(void) {\n  return !!(flags_.L1_EDX & (1u << 25));\n}\n\n//! Streaming SIMD Extensions 2\nbool CpuFeatures::SSE2(void) {\n  return !!(flags_.L1_EDX & (1u << 26));\n}\n\n//! Streaming SIMD Extensions 3\nbool CpuFeatures::SSE3(void) {\n  return !!(flags_.L1_ECX & (1u << 0));\n}\n\n//! Supplemental Streaming SIMD Extensions 3\nbool CpuFeatures::SSSE3(void) {\n  return !!(flags_.L1_ECX & (1u << 9));\n}\n\n//! Streaming SIMD Extensions 4.1\nbool CpuFeatures::SSE4_1(void) {\n  return !!(flags_.L1_ECX & (1u << 19));\n}\n\n//! Streaming SIMD Extensions 4.2\nbool CpuFeatures::SSE4_2(void) {\n  return !!(flags_.L1_ECX & (1u << 20));\n}\n\n//! Advanced Vector Extensions\nbool CpuFeatures::AVX(void) {\n  return !!(flags_.L1_ECX & (1u << 28));\n}\n\n//! Advanced Vector Extensions 2\nbool CpuFeatures::AVX2(void) {\n  return !!(flags_.L7_EBX & (1u << 5));\n}\n\n//! AVX-512 Foundation\nbool CpuFeatures::AVX512F(void) {\n  return !!(flags_.L7_EBX & (1u << 16));\n}\n\n//! AVX-512 DQ (Double/Quad granular) Instructions\nbool CpuFeatures::AVX512DQ(void) {\n  return !!(flags_.L7_EBX & (1u << 17));\n}\n\n//! AVX-512 Prefetch\nbool CpuFeatures::AVX512PF(void) {\n  return !!(flags_.L7_EBX & (1u << 26));\n}\n\n//! AVX-512 Exponential and Reciprocal\nbool CpuFeatures::AVX512ER(void) {\n  return !!(flags_.L7_EBX & (1u << 27));\n}\n\n//! AVX-512 Conflict Detection\nbool CpuFeatures::AVX512CD(void) {\n  return !!(flags_.L7_EBX & (1u << 28));\n}\n\n//! AVX-512 BW (Byte/Word granular) Instructions\nbool CpuFeatures::AVX512BW(void) {\n  return !!(flags_.L7_EBX & (1u << 30));\n}\n\n//! AVX-512 VL (128/256 Vector Length) Extensions\nbool CpuFeatures::AVX512VL(void) {\n  return !!(flags_.L7_EBX & (1u << 31));\n}\n\n//! AVX-512 Integer Fused Multiply-Add instructions\nbool CpuFeatures::AVX512_IFMA(void) {\n  return !!(flags_.L7_EBX & (1u << 21));\n}\n\n//! AVX512 Vector Bit Manipulation instructions\nbool CpuFeatures::AVX512_VBMI(void) {\n  return !!(flags_.L7_ECX & (1u << 1));\n}\n\n//! Additional AVX512 Vector Bit Manipulation Instructions\nbool CpuFeatures::AVX512_VBMI2(void) {\n  return !!(flags_.L7_ECX & (1u << 6));\n}\n\n//! Vector Neural Network Instructions\nbool CpuFeatures::AVX512_VNNI(void) {\n  return !!(flags_.L7_ECX & (1u << 11));\n}\n\n//! Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions\nbool CpuFeatures::AVX512_BITALG(void) {\n  return !!(flags_.L7_ECX & (1u << 12));\n}\n\n//! POPCNT for vectors of DW/QW\nbool CpuFeatures::AVX512_VPOPCNTDQ(void) {\n  return !!(flags_.L7_ECX & (1u << 14));\n}\n\n//! AVX-512 Neural Network Instructions\nbool CpuFeatures::AVX512_4VNNIW(void) {\n  return !!(flags_.L7_EDX & (1u << 2));\n}\n\n//! AVX-512 Multiply Accumulation Single precision\nbool CpuFeatures::AVX512_4FMAPS(void) {\n  return !!(flags_.L7_EDX & (1u << 3));\n}\n\n//! AVX-512 FP16 instructions\nbool CpuFeatures::AVX512_FP16(void) {\n  return !!(flags_.L7_EDX & (1u << 23));\n}\n\n//! CMPXCHG8 instruction\nbool CpuFeatures::CX8(void) {\n  return !!(flags_.L1_EDX & (1u << 8));\n}\n\n//! CMPXCHG16B instruction\nbool CpuFeatures::CX16(void) {\n  return !!(flags_.L1_ECX & (1u << 13));\n}\n\n//! PCLMULQDQ instruction\nbool CpuFeatures::PCLMULQDQ(void) {\n  return !!(flags_.L1_ECX & (1u << 1));\n}\n\n//! Carry-Less Multiplication Double Quadword\nbool CpuFeatures::VPCLMULQDQ(void) {\n  return !!(flags_.L7_ECX & (1u << 10));\n}\n\n//! CMOV instructions (plus FCMOVcc, FCOMI with FPU)\nbool CpuFeatures::CMOV(void) {\n  return !!(flags_.L1_EDX & (1u << 15));\n}\n\n//! MOVBE instruction\nbool CpuFeatures::MOVBE(void) {\n  return !!(flags_.L1_ECX & (1u << 22));\n}\n\n//! Enhanced REP MOVSB/STOSB instructions\nbool CpuFeatures::ERMS(void) {\n  return !!(flags_.L7_EBX & (1u << 9));\n}\n\n//! POPCNT instruction\nbool CpuFeatures::POPCNT(void) {\n  return !!(flags_.L1_ECX & (1u << 23));\n}\n\n//! XSAVE/XRSTOR/XSETBV/XGETBV instructions\nbool CpuFeatures::XSAVE(void) {\n  return !!(flags_.L1_ECX & (1u << 26));\n}\n\n//! Fused multiply-add\nbool CpuFeatures::FMA(void) {\n  return !!(flags_.L1_ECX & (1u << 12));\n}\n\n//! ADCX and ADOX instructions\nbool CpuFeatures::ADX(void) {\n  return !!(flags_.L7_EBX & (1u << 19));\n}\n\n//! Galois Field New Instructions\nbool CpuFeatures::GFNI(void) {\n  return !!(flags_.L7_ECX & (1u << 8));\n}\n\n//! AES instructions\nbool CpuFeatures::AES(void) {\n  return !!(flags_.L1_ECX & (1u << 25));\n}\n\n//! Vector AES\nbool CpuFeatures::VAES(void) {\n  return !!(flags_.L7_ECX & (1u << 9));\n}\n\n//! RDSEED instruction\nbool CpuFeatures::RDSEED(void) {\n  return !!(flags_.L7_EBX & (1u << 18));\n}\n\n//! RDRAND instruction\nbool CpuFeatures::RDRAND(void) {\n  return !!(flags_.L1_ECX & (1u << 30));\n}\n\n//! SHA1/SHA256 Instruction Extensions\nbool CpuFeatures::SHA(void) {\n  return !!(flags_.L7_EBX & (1u << 29));\n}\n\n//! 1st group bit manipulation extensions\nbool CpuFeatures::BMI1(void) {\n  return !!(flags_.L7_EBX & (1u << 3));\n}\n\n//! 2nd group bit manipulation extensions\nbool CpuFeatures::BMI2(void) {\n  return !!(flags_.L7_EBX & (1u << 8));\n}\n\n//! CLFLUSH instruction\nbool CpuFeatures::CLFLUSH(void) {\n  return !!(flags_.L1_EDX & (1u << 19));\n}\n\n//! CLFLUSHOPT instruction\nbool CpuFeatures::CLFLUSHOPT(void) {\n  return !!(flags_.L7_EBX & (1u << 23));\n}\n\n//! CLWB instruction\nbool CpuFeatures::CLWB(void) {\n  return !!(flags_.L7_EBX & (1u << 24));\n}\n\n//! RDPID instruction\nbool CpuFeatures::RDPID(void) {\n  return !!(flags_.L7_ECX & (1u << 22));\n}\n\n//! Onboard FPU\nbool CpuFeatures::FPU(void) {\n  return !!(flags_.L1_EDX & (1u << 0));\n}\n\n//! Hyper-Threading\nbool CpuFeatures::HT(void) {\n  return !!(flags_.L1_EDX & (1u << 28));\n}\n\n//! Hardware virtualization\nbool CpuFeatures::VMX(void) {\n  return !!(flags_.L1_ECX & (1u << 5));\n}\n\n// ！Running on a hypervisor\nbool CpuFeatures::HYPERVISOR(void) {\n  return !!(flags_.L1_ECX & (1u << 31));\n}\n\nconst char *CpuFeatures::Intrinsics(void) {\n  return \"\"\n#if defined(__ARM_NEON)\n         \"Neon\"\n#if defined(__ARM_FEATURE_CRC32)\n         \"+CRC\"\n#endif\n#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) || \\\n    defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n         \"+FP16\"\n#endif\n#elif defined(__AVX512F__)\n         \"AVX512F\"\n#if defined(__AVX512VL__)\n         \"+AVX512VL\"\n#endif\n#if defined(__AVX512BW__)\n         \"+AVX512BW\"\n#endif\n#if defined(__AVX512DQ__)\n         \"+AVX512DQ\"\n#endif\n#if defined(__AVX512CD__)\n         \"+AVX512CD\"\n#endif\n#if defined(__AVX512ER__)\n         \"+AVX512ER\"\n#endif\n#if defined(__AVX512PF__)\n         \"+AVX512PF\"\n#endif\n#if defined(__AVX512IFMA__)\n         \"+AVX512IFMA\"\n#endif\n#if defined(__AVX512VBMI__)\n         \"+AVX512VBMI\"\n#endif\n#if defined(__AVX512VBMI2__)\n         \"+AVX512VBMI2\"\n#endif\n#if defined(__AVX512VNNI__)\n         \"+AVX512VNNI\"\n#endif\n#if defined(__AVX512BITALG__)\n         \"+AVX512BITALG\"\n#endif\n#if defined(__AVX512VPOPCNTDQ__)\n         \"+AVX512VPOPCNTDQ\"\n#endif\n#if defined(__AVX512FP16__)\n         \"+AVX512FP16\"\n#endif\n#elif defined(__AVX2__)\n         \"AVX2\"\n#elif defined(__AVX__)\n         \"AVX\"\n#elif defined(__SSE4_2__)\n         \"SSE4.2\"\n#elif defined(__SSE4_1__)\n         \"SSE4.1\"\n#elif defined(__SSSE3__)\n         \"SSSE3\"\n#elif defined(__SSE3__)\n         \"SSE3\"\n#elif defined(__SSE2__)\n         \"SSE2\"\n#elif defined(__SSE__)\n         \"SSE\"\n#elif defined(__MMX__)\n         \"MMX\"\n#endif\n#if defined(__FMA__)\n         \"+FMA\"\n#endif\n#if defined(__BMI2__)\n         \"+BMI2\"\n#elif defined(__BMI__)\n         \"+BMI\"\n#endif\n#if defined(__F16C__)\n         \"+F16C\"\n#endif\n      ;\n}\n\nCpuFeatures::StaticFlags CpuFeatures::static_flags_;\n}  // namespace internal\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/internal/cpu_features.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\nnamespace zvec {\nnamespace ailego {\nnamespace internal {\n\n/*! Cpu Features\n */\nclass CpuFeatures {\n public:\n  //! 16-bit FP conversions\n  static bool F16C(void);\n\n  //! Multimedia Extensions\n  static bool MMX(void);\n\n  //! Streaming SIMD Extensions\n  static bool SSE(void);\n\n  //! Streaming SIMD Extensions 2\n  static bool SSE2(void);\n\n  //! Streaming SIMD Extensions 3\n  static bool SSE3(void);\n\n  //! Supplemental Streaming SIMD Extensions 3\n  static bool SSSE3(void);\n\n  //! Streaming SIMD Extensions 4.1\n  static bool SSE4_1(void);\n\n  //! Streaming SIMD Extensions 4.2\n  static bool SSE4_2(void);\n\n  //! Advanced Vector Extensions\n  static bool AVX(void);\n\n  //! Advanced Vector Extensions 2\n  static bool AVX2(void);\n\n  //! AVX-512 Foundation\n  static bool AVX512F(void);\n\n  //! AVX-512 DQ (Double/Quad granular) Instructions\n  static bool AVX512DQ(void);\n\n  //! AVX-512 Prefetch\n  static bool AVX512PF(void);\n\n  //! AVX-512 Exponential and Reciprocal\n  static bool AVX512ER(void);\n\n  //! AVX-512 Conflict Detection\n  static bool AVX512CD(void);\n\n  //! AVX-512 BW (Byte/Word granular) Instructions\n  static bool AVX512BW(void);\n\n  //! AVX-512 VL (128/256 Vector Length) Extensions\n  static bool AVX512VL(void);\n\n  //! AVX-512 Integer Fused Multiply-Add instructions\n  static bool AVX512_IFMA(void);\n\n  //! AVX512 Vector Bit Manipulation instructions\n  static bool AVX512_VBMI(void);\n\n  //! Additional AVX512 Vector Bit Manipulation Instructions\n  static bool AVX512_VBMI2(void);\n\n  //! Vector Neural Network Instructions\n  static bool AVX512_VNNI(void);\n\n  //! Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions\n  static bool AVX512_BITALG(void);\n\n  //! POPCNT for vectors of DW/QW\n  static bool AVX512_VPOPCNTDQ(void);\n\n  //! AVX-512 Neural Network Instructions\n  static bool AVX512_4VNNIW(void);\n\n  //! AVX-512 Multiply Accumulation Single precision\n  static bool AVX512_4FMAPS(void);\n\n  //! AVX-512 FP16 instructions\n  static bool AVX512_FP16(void);\n\n  //! CMPXCHG8 instruction\n  static bool CX8(void);\n\n  //! CMPXCHG16B instruction\n  static bool CX16(void);\n\n  //! PCLMULQDQ instruction\n  static bool PCLMULQDQ(void);\n\n  //! Carry-Less Multiplication Double Quadword\n  static bool VPCLMULQDQ(void);\n\n  //! CMOV instructions (plus FCMOVcc, FCOMI with FPU)\n  static bool CMOV(void);\n\n  //! MOVBE instruction\n  static bool MOVBE(void);\n\n  //! Enhanced REP MOVSB/STOSB instructions\n  static bool ERMS(void);\n\n  //! POPCNT instruction\n  static bool POPCNT(void);\n\n  //! XSAVE/XRSTOR/XSETBV/XGETBV instructions\n  static bool XSAVE(void);\n\n  //! Fused multiply-add\n  static bool FMA(void);\n\n  //! ADCX and ADOX instructions\n  static bool ADX(void);\n\n  //! Galois Field New Instructions\n  static bool GFNI(void);\n\n  //! AES instructions\n  static bool AES(void);\n\n  //! Vector AES\n  static bool VAES(void);\n\n  //! RDSEED instruction\n  static bool RDSEED(void);\n\n  //! RDRAND instruction\n  static bool RDRAND(void);\n\n  //! SHA1/SHA256 Instruction Extensions\n  static bool SHA(void);\n\n  //! 1st group bit manipulation extensions\n  static bool BMI1(void);\n\n  //! 2nd group bit manipulation extensions\n  static bool BMI2(void);\n\n  //! CLFLUSH instruction\n  static bool CLFLUSH(void);\n\n  //! CLFLUSHOPT instruction\n  static bool CLFLUSHOPT(void);\n\n  //! CLWB instruction\n  static bool CLWB(void);\n\n  //! RDPID instruction\n  static bool RDPID(void);\n\n  //! Onboard FPU\n  static bool FPU(void);\n\n  //! Hyper-Threading\n  static bool HT(void);\n\n  //! Hardware virtualization\n  static bool VMX(void);\n\n  // ！Running on a hypervisor\n  static bool HYPERVISOR(void);\n\n  //! Intrinsics of compiling\n  static const char *Intrinsics(void);\n\n private:\n  struct CpuFlags {\n    //! Constructor\n    CpuFlags(void);\n\n    //! Members\n    uint32_t L1_ECX;\n    uint32_t L1_EDX;\n    uint32_t L7_EBX;\n    uint32_t L7_ECX;\n    uint32_t L7_EDX;\n  };\n\n  //! Static Members\n  static CpuFlags flags_;\n\n public:\n  struct StaticFlags {\n    //! 16-bit FP conversions\n    bool F16C = CpuFeatures::F16C();\n\n    //! Multimedia Extensions\n    bool MMX = CpuFeatures::MMX();\n\n    //! Streaming SIMD Extensions\n    bool SSE = CpuFeatures::SSE();\n\n    //! Streaming SIMD Extensions 2\n    bool SSE2 = CpuFeatures::SSE2();\n\n    //! Streaming SIMD Extensions 3\n    bool SSE3 = CpuFeatures::SSE3();\n\n    //! Supplemental Streaming SIMD Extensions 3\n    bool SSSE3 = CpuFeatures::SSSE3();\n\n    //! Streaming SIMD Extensions 4.1\n    bool SSE4_1 = CpuFeatures::SSE4_1();\n\n    //! Streaming SIMD Extensions 4.2\n    bool SSE4_2 = CpuFeatures::SSE4_2();\n\n    //! Advanced Vector Extensions\n    bool AVX = CpuFeatures::AVX();\n\n    //! Advanced Vector Extensions 2\n    bool AVX2 = CpuFeatures::AVX2();\n\n    //! AVX-512 Foundation\n    bool AVX512F = CpuFeatures::AVX512F();\n\n    //! AVX-512 DQ (Double/Quad granular) Instructions\n    bool AVX512DQ = CpuFeatures::AVX512DQ();\n\n    //! AVX-512 Prefetch\n    bool AVX512PF = CpuFeatures::AVX512PF();\n\n    //! AVX-512 Exponential and Reciprocal\n    bool AVX512ER = CpuFeatures::AVX512ER();\n\n    //! AVX-512 Conflict Detection\n    bool AVX512CD = CpuFeatures::AVX512CD();\n\n    //! AVX-512 BW (Byte/Word granular) Instructions\n    bool AVX512BW = CpuFeatures::AVX512BW();\n\n    //! AVX-512 VL (128/256 Vector Length) Extensions\n    bool AVX512VL = CpuFeatures::AVX512VL();\n\n    //! AVX-512 Integer Fused Multiply-Add instructions\n    bool AVX512_IFMA = CpuFeatures::AVX512_IFMA();\n\n    //! AVX512 Vector Bit Manipulation instructions\n    bool AVX512_VBMI = CpuFeatures::AVX512_VBMI();\n\n    //! Additional AVX512 Vector Bit Manipulation Instructions\n    bool AVX512_VBMI2 = CpuFeatures::AVX512_VBMI2();\n\n    //! Vector Neural Network Instructions\n    bool AVX512_VNNI = CpuFeatures::AVX512_VNNI();\n\n    //! Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions\n    bool AVX512_BITALG = CpuFeatures::AVX512_BITALG();\n\n    //! POPCNT for vectors of DW/QW\n    bool AVX512_VPOPCNTDQ = CpuFeatures::AVX512_VPOPCNTDQ();\n\n    //! AVX-512 Neural Network Instructions\n    bool AVX512_4VNNIW = CpuFeatures::AVX512_4VNNIW();\n\n    //! AVX-512 Multiply Accumulation Single precision\n    bool AVX512_4FMAPS = CpuFeatures::AVX512_4FMAPS();\n\n    //! AVX-512 FP16 instructions\n    bool AVX512_FP16 = CpuFeatures::AVX512_FP16();\n\n    //! CMPXCHG8 instruction\n    bool CX8 = CpuFeatures::CX8();\n\n    //! CMPXCHG16B instruction\n    bool CX16 = CpuFeatures::CX16();\n\n    //! PCLMULQDQ instruction\n    bool PCLMULQDQ = CpuFeatures::PCLMULQDQ();\n\n    //! Carry-Less Multiplication Double Quadword\n    bool VPCLMULQDQ = CpuFeatures::VPCLMULQDQ();\n\n    //! CMOV instructions (plus FCMOVcc, FCOMI with FPU)\n    bool CMOV = CpuFeatures::CMOV();\n\n    //! MOVBE instruction\n    bool MOVBE = CpuFeatures::MOVBE();\n\n    //! Enhanced REP MOVSB/STOSB instructions\n    bool ERMS = CpuFeatures::ERMS();\n\n    //! POPCNT instruction\n    bool POPCNT = CpuFeatures::POPCNT();\n\n    //! XSAVE/XRSTOR/XSETBV/XGETBV instructions\n    bool XSAVE = CpuFeatures::XSAVE();\n\n    //! Fused multiply-add\n    bool FMA = CpuFeatures::FMA();\n\n    //! ADCX and ADOX instructions\n    bool ADX = CpuFeatures::ADX();\n\n    //! Galois Field New Instructions\n    bool GFNI = CpuFeatures::GFNI();\n\n    //! AES instructions\n    bool AES = CpuFeatures::AES();\n\n    //! Vector AES\n    bool VAES = CpuFeatures::VAES();\n\n    //! RDSEED instruction\n    bool RDSEED = CpuFeatures::RDSEED();\n\n    //! RDRAND instruction\n    bool RDRAND = CpuFeatures::RDRAND();\n\n    //! SHA1/SHA256 Instruction Extensions\n    bool SHA = CpuFeatures::SHA();\n\n    //! 1st group bit manipulation extensions\n    bool BMI1 = CpuFeatures::BMI1();\n\n    //! 2nd group bit manipulation extensions\n    bool BMI2 = CpuFeatures::BMI2();\n\n    //! CLFLUSH instruction\n    bool CLFLUSH = CpuFeatures::CLFLUSH();\n\n    //! CLFLUSHOPT instruction\n    bool CLFLUSHOPT = CpuFeatures::CLFLUSHOPT();\n\n    //! CLWB instruction\n    bool CLWB = CpuFeatures::CLWB();\n\n    //! RDPID instruction\n    bool RDPID = CpuFeatures::RDPID();\n\n    //! Onboard FPU\n    bool FPU = CpuFeatures::FPU();\n\n    //! Hyper-Threading\n    bool HT = CpuFeatures::HT();\n\n    //! Hardware virtualization\n    bool VMX = CpuFeatures::VMX();\n\n    // ！Running on a hypervisor\n    bool HYPERVISOR = CpuFeatures::HYPERVISOR();\n  };\n  static StaticFlags static_flags_;\n};\n\n}  // namespace internal\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/io/file.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/io/file.h>\n#if !defined(_WIN64) && !defined(_WIN32)\n#include <sys/mman.h>\n#include <sys/stat.h>\n#include <dirent.h>\n#include <errno.h>\n#include <fcntl.h>\n#include <string.h>\n#include <unistd.h>\n#else\n#include <Windows.h>\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\n#if !defined(_WIN64) && !defined(_WIN32)\n\nstatic inline int OpenSafely(const char *path, int flags) {\n  int fd = open(path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);\n  while (fd == -1 && errno == EINTR) {\n    fd = open(path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);\n  }\n  return fd;\n}\n\nstatic inline void CloseSafely(int fd) {\n  int ret = close(fd);\n  while (ret == -1 && errno == EINTR) {\n    ret = close(fd);\n  }\n}\n\nstatic inline ssize_t ReadSafely(int fd, void *buf, size_t count) {\n  ssize_t ret = read(fd, buf, count);\n  while (ret == -1 && errno == EINTR) {\n    ret = read(fd, buf, count);\n  }\n  return ret;\n}\n\nstatic inline ssize_t PreadSafely(int fd, void *buf, size_t count,\n                                  ssize_t offset) {\n  ssize_t ret = pread(fd, buf, count, offset);\n  while (ret == -1 && errno == EINTR) {\n    ret = pread(fd, buf, count, offset);\n  }\n  return ret;\n}\n\nstatic inline ssize_t WriteSafely(int fd, const void *buf, size_t count) {\n  ssize_t ret = write(fd, buf, count);\n  while (ret == -1 && errno == EINTR) {\n    ret = write(fd, buf, count);\n  }\n  return ret;\n}\n\nstatic inline ssize_t PwriteSafely(int fd, const void *buf, size_t count,\n                                   ssize_t offset) {\n  ssize_t ret = pwrite(fd, buf, count, offset);\n  while (ret == -1 && errno == EINTR) {\n    ret = pwrite(fd, buf, count, offset);\n  }\n  return ret;\n}\n\nstatic inline size_t ReadAll(int fd, void *buf, size_t count) {\n  size_t rdlen = 0;\n  while (rdlen < count) {\n    ssize_t ret = ReadSafely(fd, (char *)buf + rdlen, count - rdlen);\n    if (ret <= 0) {\n      break;\n    }\n    rdlen += ret;\n  }\n  return rdlen;\n}\n\nstatic inline size_t PreadAll(int fd, void *buf, size_t count, ssize_t offset) {\n  size_t rdlen = 0;\n  while (rdlen < count) {\n    ssize_t ret =\n        PreadSafely(fd, (char *)buf + rdlen, count - rdlen, offset + rdlen);\n    if (ret <= 0) {\n      break;\n    }\n    rdlen += ret;\n  }\n  return rdlen;\n}\n\nstatic inline size_t WriteAll(int fd, const void *buf, size_t count) {\n  size_t wrlen = 0;\n  while (wrlen < count) {\n    ssize_t ret = WriteSafely(fd, (const char *)buf + wrlen, count - wrlen);\n    if (ret <= 0) {\n      break;\n    }\n    wrlen += ret;\n  }\n  return wrlen;\n}\n\nstatic inline size_t PwriteAll(int fd, const void *buf, size_t count,\n                               ssize_t offset) {\n  size_t wrlen = 0;\n  while (wrlen < count) {\n    ssize_t ret = PwriteSafely(fd, (const char *)buf + wrlen, count - wrlen,\n                               offset + wrlen);\n    if (ret <= 0) {\n      break;\n    }\n    wrlen += ret;\n  }\n  return wrlen;\n}\n\nbool File::create(const char *path, size_t len, bool direct) {\n  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);\n\n  // Try opening or creating a file\n  int flags = O_RDWR | O_CREAT;\n#ifdef O_DIRECT\n  if (direct) {\n    flags |= O_DIRECT;\n  }\n#else\n  (void)direct;\n#endif\n\n  int fd = OpenSafely(path, flags);\n  ailego_false_if_lt_zero(fd);\n\n#ifdef F_NOCACHE\n  // Direct IO canonical solution for Mac OSX\n  if (direct) {\n    ailego_false_if_ne_zero(fcntl(fd, F_NOCACHE, 1));\n  }\n#endif\n\n  // Truncate the file to the specified size\n  ailego_do_if_ne_zero(ftruncate(fd, len)) {\n    CloseSafely(fd);\n    return false;\n  }\n\n  read_only_ = false;\n  native_handle_ = fd;\n  return true;\n}\n\nbool File::open(const char *path, bool rdonly, bool direct) {\n  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);\n\n  // Try opening the file\n  int flags = rdonly ? O_RDONLY : O_RDWR;\n#ifdef O_DIRECT\n  if (direct) {\n    flags |= O_DIRECT;\n  }\n#else\n  (void)direct;\n#endif\n\n  int fd = OpenSafely(path, flags);\n  ailego_false_if_lt_zero(fd);\n\n#ifdef F_NOCACHE\n  // Direct IO canonical solution for Mac OSX\n  if (direct) {\n    ailego_false_if_ne_zero(fcntl(fd, F_NOCACHE, 1));\n  }\n#endif\n\n  read_only_ = rdonly;\n  native_handle_ = fd;\n  return true;\n}\n\nvoid File::close(void) {\n  ailego_return_if_false(native_handle_ != File::InvalidHandle);\n  CloseSafely(native_handle_);\n  native_handle_ = File::InvalidHandle;\n}\n\nvoid File::reset(void) {\n  ailego_return_if_false(native_handle_ != File::InvalidHandle);\n  lseek(native_handle_, 0, SEEK_SET);\n}\n\nsize_t File::write(const void *data, size_t len) {\n  const size_t block_size = 0x40000000u;\n  size_t total = 0u;\n\n  for (; len >= block_size; len -= block_size) {\n    size_t wrlen =\n        WriteAll(native_handle_, (const uint8_t *)data + total, block_size);\n    if (wrlen != block_size) {\n      return (total + wrlen);\n    }\n    total += block_size;\n  }\n  if (len > 0) {\n    total += WriteAll(native_handle_, (const uint8_t *)data + total, len);\n  }\n  return total;\n}\n\nsize_t File::write(ssize_t off, const void *data, size_t len) {\n  const size_t block_size = 0x40000000u;\n  size_t total = 0u;\n\n  for (; len >= block_size; len -= block_size) {\n    size_t wrlen = PwriteAll(native_handle_, (const uint8_t *)data + total,\n                             block_size, off + total);\n    if (wrlen != block_size) {\n      return (total + wrlen);\n    }\n    total += block_size;\n  }\n  if (len > 0) {\n    total += PwriteAll(native_handle_, (const uint8_t *)data + total, len,\n                       off + total);\n  }\n  return total;\n}\n\nsize_t File::read(void *buf, size_t len) {\n  const size_t block_size = 0x40000000u;\n  size_t total = 0u;\n\n  for (; len >= block_size; len -= block_size) {\n    size_t rdlen = ReadAll(native_handle_, (uint8_t *)buf + total, block_size);\n    if (rdlen != block_size) {\n      return (total + rdlen);\n    }\n    total += block_size;\n  }\n  if (len > 0) {\n    total += ReadAll(native_handle_, (uint8_t *)buf + total, len);\n  }\n  return total;\n}\n\nsize_t File::read(ssize_t off, void *buf, size_t len) {\n  const size_t block_size = 0x40000000u;\n  size_t total = 0u;\n\n  for (; len >= block_size; len -= block_size) {\n    size_t rdlen = PreadAll(native_handle_, (uint8_t *)buf + total, block_size,\n                            off + total);\n    if (rdlen != block_size) {\n      return (total + rdlen);\n    }\n    total += block_size;\n  }\n  if (len > 0) {\n    total += PreadAll(native_handle_, (uint8_t *)buf + total, len, off + total);\n  }\n  return total;\n}\n\nbool File::flush(void) {\n  ailego_false_if_false(native_handle_ != File::InvalidHandle);\n  return (fsync(native_handle_) == 0);\n}\n\nbool File::seek(ssize_t off, Origin origin) {\n  ailego_false_if_false(native_handle_ != File::InvalidHandle);\n  ailego_false_if_false(lseek(native_handle_, off, (int)origin) != (off_t)-1);\n  return true;\n}\n\nbool File::truncate(size_t len) {\n  ailego_false_if_false(native_handle_ != File::InvalidHandle);\n  ailego_false_if_ne_zero(ftruncate(native_handle_, (off_t)len));\n  return true;\n}\n\nsize_t File::size(void) const {\n  struct stat fs;\n  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&\n                       fstat(native_handle_, &fs) == 0);\n  return (fs.st_size);\n}\n\nssize_t File::offset(void) const {\n  off_t off;\n  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&\n                       (off = lseek(native_handle_, 0, SEEK_CUR)) != -1);\n  return off;\n}\n\nvoid *File::MemoryMap(NativeHandle handle, ssize_t off, size_t len, int opts) {\n  int prot =\n      ((opts & File::MMAP_READONLY) ? PROT_READ : PROT_READ | PROT_WRITE);\n\n#if defined(MAP_POPULATE)\n  if (opts & File::MMAP_POPULATE) {\n    prot |= MAP_POPULATE;\n  }\n#endif\n  int flags = (opts & File::MMAP_SHARED) ? MAP_SHARED : MAP_PRIVATE;\n#if defined(MAP_HUGETLB)\n  if (opts & File::MMAP_HUGE_PAGE) {\n    flags |= MAP_HUGETLB;\n  }\n#endif\n  void *addr = mmap(nullptr, len, prot, flags, handle, off);\n  ailego_null_if_false(addr != MAP_FAILED);\n\n  if (opts & File::MMAP_LOCKED) {\n    mlock(addr, len);\n  }\n  if (opts & File::MMAP_WARMUP) {\n    File::MemoryWarmup(addr, len);\n  }\n  return addr;\n}\n\n#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)\n#define MAP_ANONYMOUS MAP_ANON\n#endif\n\nvoid *File::MemoryMap(size_t len, int opts) {\n#if defined(MAP_ANONYMOUS)\n  int prot =\n      ((opts & File::MMAP_READONLY) ? PROT_READ : PROT_READ | PROT_WRITE);\n\n#if defined(MAP_POPULATE)\n  if (opts & File::MMAP_POPULATE) {\n    prot |= MAP_POPULATE;\n  }\n#endif\n  int flags = (opts & File::MMAP_SHARED) ? MAP_SHARED | MAP_ANONYMOUS\n                                         : MAP_PRIVATE | MAP_ANONYMOUS;\n#if defined(MAP_HUGETLB)\n  if (opts & File::MMAP_HUGE_PAGE) {\n    flags |= MAP_HUGETLB;\n  }\n#endif\n  void *addr = mmap(nullptr, len, prot, flags, -1, 0);\n  ailego_null_if_false(addr != MAP_FAILED);\n  return addr;\n#else\n  (void)len;\n  (void)opts;\n  return nullptr;\n#endif  // MAP_ANONYMOUS\n}\n\nvoid *File::MemoryRemap(void *oldptr, size_t oldsize, void *newptr,\n                        size_t newsize) {\n#if defined(__linux) || defined(__linux__)\n  return newptr ? mremap(oldptr, oldsize, newsize, MREMAP_FIXED, newptr)\n                : mremap(oldptr, oldsize, newsize, MREMAP_MAYMOVE);\n#elif defined(__NetBSD__)\n  return newptr ? mremap(oldptr, oldsize, newptr, newsize, MAP_FIXED)\n                : mremap(oldptr, oldsize, nullptr, newsize, 0);\n#else\n  (void)oldptr;\n  (void)oldsize;\n  (void)newptr;\n  (void)newsize;\n  errno = ENOTSUP;\n  return nullptr;\n#endif\n}\n\nvoid File::MemoryUnmap(void *addr, size_t len) {\n  ailego_return_if_false(addr);\n  munmap(addr, len);\n}\n\nbool File::MemoryFlush(void *addr, size_t len) {\n  ailego_false_if_false(addr);\n  return (msync(addr, len, MS_ASYNC) == 0);\n}\n\nbool File::MemoryLock(void *addr, size_t len) {\n  ailego_false_if_false(addr && len);\n  return (mlock(addr, len) == 0);\n}\n\nbool File::MemoryUnlock(void *addr, size_t len) {\n  ailego_false_if_false(addr && len);\n  return (munlock(addr, len) == 0);\n}\n\n#else\n\n//! Create a local file\nbool File::create(const char *path, size_t len, bool direct) {\n  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);\n\n  // Try opening or creating the file\n  HANDLE file_handle =\n      CreateFileA(path, GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ, nullptr,\n                  CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);\n  ailego_false_if_false(file_handle != INVALID_HANDLE_VALUE);\n\n  // Truncate the file to the specified size\n  LARGE_INTEGER file_size;\n  file_size.QuadPart = len;\n  ailego_do_if_false(\n      SetFilePointerEx(file_handle, file_size, nullptr, FILE_BEGIN) &&\n      SetEndOfFile(file_handle)) {\n    CloseHandle(file_handle);\n    return false;\n  }\n\n  if (!direct) {\n    // Reset the file pointer\n    SetFilePointer(file_handle, 0, nullptr, FILE_BEGIN);\n  } else {\n    // Close and reopen file\n    CloseHandle(file_handle);\n    file_handle = CreateFileA(\n        path, GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ, nullptr,\n        OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_NO_BUFFERING, nullptr);\n    ailego_false_if_false(file_handle != INVALID_HANDLE_VALUE);\n  }\n\n  read_only_ = false;\n  native_handle_ = file_handle;\n  return true;\n}\n\n//! Open a local file\nbool File::open(const char *path, bool rdonly, bool direct) {\n  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);\n\n  // Try opening the file\n  DWORD flags = FILE_ATTRIBUTE_NORMAL;\n  if (direct) {\n    flags |= FILE_FLAG_NO_BUFFERING;\n  }\n  HANDLE file_handle =\n      CreateFileA(path, (rdonly ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE),\n                  FILE_SHARE_READ, nullptr, OPEN_EXISTING, flags, nullptr);\n  ailego_false_if_false(file_handle != INVALID_HANDLE_VALUE);\n\n  read_only_ = rdonly;\n  native_handle_ = file_handle;\n  return true;\n}\n\nvoid File::close(void) {\n  ailego_return_if_false(native_handle_ != File::InvalidHandle);\n  CloseHandle(native_handle_);\n  native_handle_ = File::InvalidHandle;\n}\n\nvoid File::reset(void) {\n  ailego_return_if_false(native_handle_ != File::InvalidHandle);\n  SetFilePointer(native_handle_, 0, nullptr, FILE_BEGIN);\n}\n\nsize_t File::write(const void *data, size_t len) {\n  const DWORD block_size = 0x40000000u;\n  DWORD wrlen = 0u;\n  size_t total = 0u;\n\n  for (; len >= block_size; len -= block_size) {\n    if (!WriteFile(native_handle_, (const uint8_t *)data + total, block_size,\n                   &wrlen, nullptr)) {\n      return total;\n    }\n    if (wrlen != block_size) {\n      return (total + wrlen);\n    }\n    total += block_size;\n  }\n  if (len > 0 && WriteFile(native_handle_, (const uint8_t *)data + total,\n                           (DWORD)len, &wrlen, nullptr)) {\n    total += wrlen;\n  }\n  return total;\n}\n\nsize_t File::write(ssize_t off, const void *data, size_t len) {\n  const DWORD block_size = 0x40000000u;\n  DWORD wrlen = 0u;\n  size_t total = 0u;\n\n  OVERLAPPED overlapped;\n  memset(&overlapped, 0, sizeof(OVERLAPPED));\n\n  for (; len >= block_size; len -= block_size) {\n    uint64_t current = off + total;\n    overlapped.OffsetHigh = (DWORD)(current >> 32);\n    overlapped.Offset = (DWORD)(current & 0xffffffffu);\n\n    if (!WriteFile(native_handle_, (const uint8_t *)data + total, block_size,\n                   &wrlen, &overlapped)) {\n      return total;\n    }\n    if (wrlen != block_size) {\n      return (total + wrlen);\n    }\n    total += block_size;\n  }\n  if (len > 0) {\n    uint64_t current = off + total;\n    overlapped.OffsetHigh = (DWORD)(current >> 32);\n    overlapped.Offset = (DWORD)(current & 0xffffffffu);\n\n    if (WriteFile(native_handle_, (const uint8_t *)data + total, (DWORD)len,\n                  &wrlen, &overlapped)) {\n      total += wrlen;\n    }\n  }\n  return total;\n}\n\nsize_t File::read(void *buf, size_t len) {\n  const DWORD block_size = 0x40000000u;\n  DWORD rdlen = 0u;\n  size_t total = 0u;\n\n  for (; len >= block_size; len -= block_size) {\n    if (!ReadFile(native_handle_, (uint8_t *)buf + total, block_size, &rdlen,\n                  nullptr)) {\n      return total;\n    }\n    if (rdlen != block_size) {\n      return (total + rdlen);\n    }\n    total += block_size;\n  }\n  if (len > 0 && ReadFile(native_handle_, (uint8_t *)buf + total, (DWORD)len,\n                          &rdlen, nullptr)) {\n    total += rdlen;\n  }\n  return total;\n}\n\nsize_t File::read(ssize_t off, void *buf, size_t len) {\n  const DWORD block_size = 0x40000000u;\n  DWORD rdlen = 0u;\n  size_t total = 0u;\n\n  OVERLAPPED overlapped;\n  memset(&overlapped, 0, sizeof(OVERLAPPED));\n\n  for (; len >= block_size; len -= block_size) {\n    uint64_t current = off + total;\n    overlapped.OffsetHigh = (DWORD)(current >> 32);\n    overlapped.Offset = (DWORD)(current & 0xffffffffu);\n\n    if (!ReadFile(native_handle_, (uint8_t *)buf + total, block_size, &rdlen,\n                  &overlapped)) {\n      return total;\n    }\n    if (rdlen != block_size) {\n      return (total + rdlen);\n    }\n    total += block_size;\n  }\n  if (len > 0) {\n    uint64_t current = off + total;\n    overlapped.OffsetHigh = (DWORD)(current >> 32);\n    overlapped.Offset = (DWORD)(current & 0xffffffffu);\n\n    if (ReadFile(native_handle_, (uint8_t *)buf + total, (DWORD)len, &rdlen,\n                 &overlapped)) {\n      total += rdlen;\n    }\n  }\n  return total;\n}\n\nbool File::flush(void) {\n  ailego_false_if_false(native_handle_ != File::InvalidHandle);\n  return (!!FlushFileBuffers(native_handle_));\n}\n\nbool File::seek(ssize_t off, Origin origin) {\n  ailego_false_if_false(native_handle_ != File::InvalidHandle);\n\n  LARGE_INTEGER file_offset;\n  file_offset.QuadPart = off;\n  ailego_false_if_false(SetFilePointerEx(native_handle_, file_offset, nullptr,\n                                         (DWORD)origin) != 0);\n  return true;\n}\n\nbool File::truncate(size_t len) {\n  ailego_false_if_false(native_handle_ != File::InvalidHandle);\n\n  LARGE_INTEGER file_size, orig_file_size;\n  file_size.QuadPart = 0;\n  orig_file_size.QuadPart = 0;\n  ailego_false_if_false(SetFilePointerEx(native_handle_, file_size,\n                                         &orig_file_size, FILE_CURRENT));\n\n  // Truncate the file to the specified size\n  file_size.QuadPart = len;\n  ailego_false_if_false(\n      SetFilePointerEx(native_handle_, file_size, nullptr, FILE_BEGIN) &&\n      SetEndOfFile(native_handle_));\n\n  // Reset the file pointer\n  SetFilePointerEx(native_handle_, orig_file_size, nullptr, FILE_BEGIN);\n  return true;\n}\n\nsize_t File::size(void) const {\n  LARGE_INTEGER file_size;\n  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&\n                       GetFileSizeEx(native_handle_, &file_size));\n  return (size_t)file_size.QuadPart;\n}\n\nssize_t File::offset(void) const {\n  LARGE_INTEGER file_size;\n  LARGE_INTEGER file_size_new;\n  file_size.QuadPart = 0;\n  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&\n                       SetFilePointerEx(native_handle_, file_size,\n                                        &file_size_new, FILE_CURRENT));\n  return (size_t)file_size_new.QuadPart;\n}\n\nvoid *File::MemoryMap(NativeHandle handle, ssize_t off, size_t len, int opts) {\n  LARGE_INTEGER file_size;\n  file_size.QuadPart = len;\n\n  // Create map object\n  HANDLE file_mapping = CreateFileMapping(\n      handle, nullptr,\n      ((opts & File::MMAP_READONLY) ? PAGE_READONLY : PAGE_READWRITE),\n      file_size.HighPart, file_size.LowPart, nullptr);\n  ailego_null_if_false(file_mapping != nullptr);\n\n  DWORD desired_access = FILE_MAP_READ;\n  if (!(opts & File::MMAP_READONLY)) {\n    desired_access |= FILE_MAP_WRITE;\n  }\n  if (!(opts & File::MMAP_SHARED)) {\n    desired_access |= FILE_MAP_COPY;\n  }\n  file_size.QuadPart = off;\n\n  // Map the whole file to memory and close handle\n  void *addr = MapViewOfFile(file_mapping, desired_access, file_size.HighPart,\n                             file_size.LowPart, 0);\n  CloseHandle(file_mapping);\n\n  ailego_null_if_false(addr);\n  if (opts & File::MMAP_LOCKED) {\n    VirtualLock(addr, len);\n  }\n  if (opts & File::MMAP_WARMUP) {\n    File::MemoryWarmup(addr, len);\n  }\n  return addr;\n}\n\nvoid *File::MemoryMap(size_t, int) {\n  return nullptr;\n}\n\nvoid *File::MemoryRemap(void *, size_t, void *, size_t) {\n  return nullptr;\n}\n\nvoid File::MemoryUnmap(void *addr, size_t /*len*/) {\n  ailego_return_if_false(addr);\n  UnmapViewOfFile(addr);\n}\n\nbool File::MemoryFlush(void *addr, size_t /*len*/) {\n  ailego_false_if_false(addr);\n  return (!!FlushViewOfFile(addr, 0));\n}\n\nbool File::MemoryLock(void *addr, size_t len) {\n  ailego_false_if_false(addr && len);\n  return (!!VirtualLock(addr, len));\n}\n\nbool File::MemoryUnlock(void *addr, size_t len) {\n  ailego_false_if_false(addr && len);\n  return (!!VirtualUnlock(addr, len));\n}\n\nstatic inline int getpagesize(void) {\n  SYSTEM_INFO info;\n  GetSystemInfo(&info);\n  return info.dwPageSize;\n}\n#endif\n\nvoid File::MemoryWarmup(void *addr, size_t len) {\n  static int page_size = getpagesize();\n\n  if (addr && len) {\n    uint8_t *p = reinterpret_cast<uint8_t *>(addr);\n    uint8_t *end = p + len;\n    volatile uint8_t tmp = 0;\n\n    while (p < end) {\n      tmp ^= *p;\n      p += page_size;\n    }\n  }\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/io/file_lock.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"file_lock.h\"\n\n#if !defined(_WIN64) && !defined(_WIN32)\n#include <sys/file.h>\n#else\n#include <Windows.h>\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\n#if !defined(_WIN64) && !defined(_WIN32)\nbool FileLock::Lock(int fd) {\n  return (flock(fd, LOCK_EX) == 0);\n}\n\nbool FileLock::TryLock(int fd) {\n  return (flock(fd, LOCK_EX | LOCK_NB) == 0);\n}\n\nbool FileLock::LockShared(int fd) {\n  return (flock(fd, LOCK_SH) == 0);\n}\n\nbool FileLock::TryLockShared(int fd) {\n  return (flock(fd, LOCK_SH | LOCK_NB) == 0);\n}\n\nbool FileLock::Unlock(int fd) {\n  return (flock(fd, LOCK_UN) == 0);\n}\n\n#else\nbool FileLock::Lock(HANDLE handle) {\n  OVERLAPPED ol = {0};\n  return (!!LockFileEx(handle, LOCKFILE_EXCLUSIVE_LOCK, 0, MAXDWORD, MAXDWORD,\n                       &ol));\n}\n\nbool FileLock::TryLock(HANDLE handle) {\n  OVERLAPPED ol = {0};\n  return (!!LockFileEx(handle,\n                       LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, 0,\n                       MAXDWORD, MAXDWORD, &ol));\n}\n\nbool FileLock::LockShared(HANDLE handle) {\n  OVERLAPPED ol = {0};\n  return (!!LockFileEx(handle, 0, 0, MAXDWORD, MAXDWORD, &ol));\n}\n\nbool FileLock::TryLockShared(HANDLE handle) {\n  OVERLAPPED ol = {0};\n  return (!!LockFileEx(handle, LOCKFILE_FAIL_IMMEDIATELY, 0, MAXDWORD, MAXDWORD,\n                       &ol));\n}\n\nbool FileLock::Unlock(HANDLE handle) {\n  OVERLAPPED ol = {0};\n  return (!!UnlockFileEx(handle, 0, MAXDWORD, MAXDWORD, &ol));\n}\n\n#endif  // !_WIN64 && !_WIN32\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/io/file_lock.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/io/file.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! File Utility\n */\nclass FileLock {\n public:\n  //! Constructor\n  FileLock(const File &file) : native_handle_(file.native_handle()) {}\n\n  //! Constructor\n  FileLock(File::NativeHandle handle) : native_handle_(handle) {}\n\n  //! Locking\n  bool lock(void) const {\n    return FileLock::Lock(native_handle_);\n  }\n\n  //! Try locking\n  bool try_lock(void) const {\n    return FileLock::TryLock(native_handle_);\n  }\n\n  //! Locking (shared)\n  bool lock_shared(void) const {\n    return FileLock::LockShared(native_handle_);\n  }\n\n  //! Try locking (shared)\n  bool try_lock_shared(void) const {\n    return FileLock::TryLockShared(native_handle_);\n  }\n\n  //! Unlocking\n  bool unlock(void) const {\n    return FileLock::Unlock(native_handle_);\n  }\n\n  //! Locking\n  static bool Lock(File::NativeHandle handle);\n\n  //! Try locking\n  static bool TryLock(File::NativeHandle handle);\n\n  //! Locking (shared)\n  static bool LockShared(File::NativeHandle handle);\n\n  //! Try locking (shared)\n  static bool TryLockShared(File::NativeHandle handle);\n\n  //! Unlocking\n  static bool Unlock(File::NativeHandle handle);\n\n private:\n  //! Disable them\n  FileLock(const FileLock &) = delete;\n  FileLock(FileLock &&) = delete;\n  FileLock &operator=(const FileLock &) = delete;\n  FileLock &operator=(FileLock &&) = delete;\n\n  //! Members\n  File::NativeHandle native_handle_;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/io/file_writer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdarg>\n#include <ios>\n#include \"file.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n/*! File Stream Writer\n */\nclass FileWriter {\n public:\n  //! Constructor\n  FileWriter(void) {}\n\n  //! Constructor\n  FileWriter(FileWriter &&rhs) : file_(std::move(rhs.file_)) {}\n\n  //! Destructor\n  ~FileWriter(void) {}\n\n  //! Assignment\n  FileWriter &operator=(FileWriter &&rhs) {\n    file_ = std::move(rhs.file_);\n    return *this;\n  }\n\n  //! Output to writer\n  FileWriter &operator<<(const char *str) {\n    size_t len = std::strlen(str);\n    if (file_.write(str, len) != len) {\n      throw std::ios_base::failure(\"Write error\");\n    }\n    return *this;\n  }\n\n  //! Output to writer\n  FileWriter &operator<<(const std::string &str) {\n    if (file_.write(str.data(), str.size()) != str.size()) {\n      throw std::ios_base::failure(\"Write error\");\n    }\n    return *this;\n  }\n\n  //! Output to writer\n  FileWriter &operator<<(char c) {\n    if (file_.write(&c, 1) != 1) {\n      throw std::ios_base::failure(\"Write error\");\n    }\n    return *this;\n  }\n\n  //! Test if the file is valid\n  bool is_valid(void) const {\n    return file_.is_valid();\n  }\n\n  //! Create a local file\n  bool create(const char *path) {\n    return file_.create(path, 0, false);\n  }\n\n  //! Open a local file\n  bool open(const char *path) {\n    return file_.open(path, false, false);\n  }\n\n  //! Close the local file\n  void close(void) {\n    file_.close();\n  }\n\n  //! Write data into the file\n  size_t write(const void *data, size_t len) {\n    return file_.write(data, len);\n  }\n\n  //! Synchronize memory with physical storage\n  bool flush(void) {\n    return file_.flush();\n  }\n\n  //! Output with format\n  void print(const char *format, va_list args) {\n    char buf[8192];\n    std::vsnprintf(buf, sizeof(buf), format, args);\n    (*this) << buf;\n  }\n\n  //! Output with format\n#if defined(__GNUC__)\n  void print(const char *format, ...) __attribute__((format(printf, 2, 3))) {\n#else\n  void print(const char *format, ...) {\n#endif\n    va_list args;\n    va_start(args, format);\n    this->print(format, args);\n    va_end(args);\n  }\n\n private:\n  //! Disable them\n  FileWriter(const FileWriter &) = delete;\n  FileWriter &operator=(const FileWriter &) = delete;\n\n  //! Members\n  File file_;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/logger/logger.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <sstream>\n#include <thread>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\nconst int Logger::LEVEL_DEBUG = 0;\nconst int Logger::LEVEL_INFO = 1;\nconst int Logger::LEVEL_WARN = 2;\nconst int Logger::LEVEL_ERROR = 3;\nconst int Logger::LEVEL_FATAL = 4;\n\n/*! Console Logger\n */\nstruct ConsoleLogger : public Logger {\n  //! Initialize Logger\n  int init(const Params &) override {\n    return 0;\n  }\n\n  //! Cleanup Logger\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Log Message\n  void log(int level, const char *file, int line, const char *format,\n           va_list args) override {\n    char buffer[8192];\n    std::ostringstream stream;\n\n    ailego::Realtime::Localtime(buffer, sizeof(buffer));\n    stream << '[' << LevelString(level) << ' ' << buffer << ' '\n           << std::this_thread::get_id() << ' ' << ailego::File::BaseName(file)\n           << ':' << line << \"] \";\n\n    vsnprintf(buffer, sizeof(buffer), format, args);\n    stream << buffer << '\\n';\n\n    if (level <= LEVEL_INFO) {\n      std::cout << stream.str() << std::flush;\n    } else {\n      std::cerr << stream.str() << std::flush;\n    }\n  }\n};\n\n//! Logger Level\nint LoggerBroker::logger_level_ = Logger::LEVEL_WARN;\n\n//! Logger\nLogger::Pointer LoggerBroker::logger_(new ConsoleLogger);\n\n//! Register Console Logger in Factory\nFACTORY_REGISTER_LOGGER(ConsoleLogger);\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/cosine_distance_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Cosine Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N, typename = void>\nstruct CosineDistanceMatrix;\n\n/*! Cosine Distance Matrix (M=1, N=1)\n */\ntemplate <typename T>\nstruct CosineDistanceMatrix<\n    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    constexpr size_t extra_dim = sizeof(float) / sizeof(ValueType);\n    size_t d = dim - extra_dim;\n\n    float ip;\n    InnerProductMatrix<T, 1, 1>::Compute(m, q, d, &ip);\n\n    *out = 1 - ip;\n  }\n};\n\n/*! Cosine Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N>\nstruct CosineDistanceMatrix<\n    T, M, N,\n    typename std::enable_if<IsSignedArithmetic<T>::value && M >= 2 &&\n                            N >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType * /*m*/, const ValueType * /*q*/,\n                             size_t /*dim*/, float *out) {\n    // ailego_assert(m && q && dim && out);\n\n    *out = 0.0f;\n  }\n};\n\n/*! Cosine Distance Matrix (N=1)\n */\ntemplate <typename T, size_t M>\nstruct CosineDistanceMatrix<\n    T, M, 1,\n    typename std::enable_if<IsSignedArithmetic<T>::value && M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType * /*m*/, const ValueType * /*q*/,\n                             size_t /*dim*/, float *out) {\n    // ailego_assert(m && q && dim && out);\n\n    *out = 0.0f;\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/distance.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Distance module\n */\nstruct Distance {\n  //! Compute the hamming distance between two vectors (BINARY)\n  static float Hamming(const uint32_t *lhs, const uint32_t *rhs, size_t dim) {\n    float result;\n    HammingDistanceMatrix<uint32_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n#if defined(AILEGO_M64)\n  //! Compute the hamming distance between two vectors (BINARY)\n  static float Hamming(const uint64_t *lhs, const uint64_t *rhs, size_t dim) {\n    float result;\n    HammingDistanceMatrix<uint64_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n#else\n  //! Compute the hamming distance between two vectors (BINARY)\n  static float Hamming(const uint64_t *lhs, const uint64_t *rhs, size_t dim) {\n    float result;\n    HammingDistanceMatrix<uint32_t, 1, 1>::Compute(\n        reinterpret_cast<const uint32_t *>(lhs),\n        reinterpret_cast<const uint32_t *>(rhs), dim, &result);\n    return result;\n  }\n#endif\n\n  //! Compute the squared euclidean distance between two vectors (FP32)\n  static float SquaredEuclidean(const float *lhs, const float *rhs,\n                                size_t dim) {\n    float result;\n    SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim,\n                                                         &result);\n    return result;\n  }\n\n  //! Compute the squared euclidean distance between two vectors (FP16)\n  static float SquaredEuclidean(const Float16 *lhs, const Float16 *rhs,\n                                size_t dim) {\n    float result;\n    SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim,\n                                                           &result);\n    return result;\n  }\n\n  //! Compute the squared euclidean distance between two vectors (INT8)\n  static float SquaredEuclidean(const int8_t *lhs, const int8_t *rhs,\n                                size_t dim) {\n    float result;\n    SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim,\n                                                          &result);\n    return result;\n  }\n\n  //! Compute the squared euclidean distance between two vectors (INT4)\n  static float SquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,\n                                size_t dim) {\n    float result;\n    SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim,\n                                                           &result);\n    return result;\n  }\n\n  //! Compute the euclidean distance between two vectors (FP32)\n  static float Euclidean(const float *lhs, const float *rhs, size_t dim) {\n    float result;\n    EuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the euclidean distance between two vectors (FP16)\n  static float Euclidean(const Float16 *lhs, const Float16 *rhs, size_t dim) {\n    float result;\n    EuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the euclidean distance between two vectors (INT8)\n  static float Euclidean(const int8_t *lhs, const int8_t *rhs, size_t dim) {\n    float result;\n    EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the euclidean distance between two vectors (INT4)\n  static float Euclidean(const uint8_t *lhs, const uint8_t *rhs, size_t dim) {\n    float result;\n    EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the inner product between two vectors (FP32)\n  static float InnerProduct(const float *lhs, const float *rhs, size_t dim) {\n    float result;\n    InnerProductMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the inner product between two vectors (FP16)\n  static float InnerProduct(const Float16 *lhs, const Float16 *rhs,\n                            size_t dim) {\n    float result;\n    InnerProductMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the inner product between two vectors (INT8)\n  static float InnerProduct(const int8_t *lhs, const int8_t *rhs, size_t dim) {\n    float result;\n    InnerProductMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the minus inner product between two vectors (INT4)\n  static float InnerProduct(const uint8_t *lhs, const uint8_t *rhs,\n                            size_t dim) {\n    float result;\n    InnerProductMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the minus inner product between two vectors (FP32)\n  static float MinusInnerProduct(const float *lhs, const float *rhs,\n                                 size_t dim) {\n    float result;\n    MinusInnerProductMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the minus inner product between two vectors (FP16)\n  static float MinusInnerProduct(const Float16 *lhs, const Float16 *rhs,\n                                 size_t dim) {\n    float result;\n    MinusInnerProductMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the minus inner product between two vectors (INT8)\n  static float MinusInnerProduct(const int8_t *lhs, const int8_t *rhs,\n                                 size_t dim) {\n    float result;\n    MinusInnerProductMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the minus inner product between two vectors (INT4)\n  static float MinusInnerProduct(const uint8_t *lhs, const uint8_t *rhs,\n                                 size_t dim) {\n    float result;\n    MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (FP32, RepeatedQuadraticInjection)\n  static float MipsSquaredEuclidean(const float *lhs, const float *rhs,\n                                    size_t dim, size_t m, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, m,\n                                                             eta, &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (FP16, RepeatedQuadraticInjection)\n  static float MipsSquaredEuclidean(const Float16 *lhs, const Float16 *rhs,\n                                    size_t dim, size_t m, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, m,\n                                                               eta, &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (INT8, RepeatedQuadraticInjection)\n  static float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,\n                                    size_t dim, size_t m, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, m,\n                                                              eta, &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (INT4, RepeatedQuadraticInjection)\n  static float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,\n                                    size_t dim, size_t m, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, m,\n                                                               eta, &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (FP32, SphericalInjection)\n  static float MipsSquaredEuclidean(const float *lhs, const float *rhs,\n                                    size_t dim, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, eta,\n                                                             &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (FP16, SphericalInjection)\n  static float MipsSquaredEuclidean(const Float16 *lhs, const Float16 *rhs,\n                                    size_t dim, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim,\n                                                               eta, &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (INT8, SphericalInjection)\n  static float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,\n                                    size_t dim, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim,\n                                                              eta, &result);\n    return result;\n  }\n\n  //! Compute the mips squared L2 distance between two vectors\n  //! (INT4, SphericalInjection)\n  static float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,\n                                    size_t dim, float eta) {\n    float result;\n    MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim,\n                                                               eta, &result);\n    return result;\n  }\n\n  //! Compute the cosine distance between two vectors (FP32)\n  static float Cosine(const float *lhs, const float *rhs, size_t dim) {\n    float result;\n    CosineDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the cosine distance between two vectors (FP16)\n  static float Cosine(const Float16 *lhs, const Float16 *rhs, size_t dim) {\n    float result;\n    CosineDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n\n  //! Compute the cosine distance between two vectors (FP16)\n  static float Cosine(const int8_t *lhs, const int8_t *rhs, size_t dim) {\n    float result;\n    CosineDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);\n    return result;\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/distance_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"cosine_distance_matrix.h\"\n#include \"euclidean_distance_matrix.h\"\n#include \"hamming_distance_matrix.h\"\n#include \"inner_product_matrix.h\"\n#include \"mips_euclidean_distance_matrix.h\"\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_accum_fp16.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_fp16.i\"\n#include \"matrix_utility.i\"\n\n#if !defined(__FMA__)\n#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))\n#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))\n#endif  // !__FMA__\n\n#if defined(__AVX512F__) && !defined(__AVX512DQ__)\n#define _mm512_and_ps(a, b) \\\n  _mm512_castsi512_ps(      \\\n      _mm512_and_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b)))\n#define _mm512_mask_and_ps(src, k, a, b)                                   \\\n  _mm512_castsi512_ps(_mm512_mask_and_epi32(_mm512_castps_si512(src), (k), \\\n                                            _mm512_castps_si512(a),        \\\n                                            _mm512_castps_si512(b)))\n#endif  // __AVX512DQ__\n\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\n#define ACCUM_FP16_1X1_AVX(m, q, dim, out, _MASK, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())               \\\n  const Float16 *qe = q + dim;                                              \\\n  const Float16 *qe_aligned = q + ((dim >> 4) << 4);                        \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {           \\\n    for (; q != qe_aligned; m += 16, q += 16) {                             \\\n      MATRIX_FP16_ITER_1X1_AVX(m, q, ymm_sum, _mm256_load_si256,            \\\n                               ACCUM_FP32_STEP_AVX)                         \\\n    }                                                                       \\\n    if (qe >= qe_aligned + 8) {                                             \\\n      __m256 ymm_m = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)m));   \\\n      __m256 ymm_q = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)q));   \\\n      ACCUM_FP32_STEP_AVX(ymm_m, ymm_q, ymm_sum_0_0)                        \\\n      m += 8;                                                               \\\n      q += 8;                                                               \\\n    }                                                                       \\\n  } else {                                                                  \\\n    for (; q != qe_aligned; m += 16, q += 16) {                             \\\n      MATRIX_FP16_ITER_1X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \\\n                               ACCUM_FP32_STEP_AVX)                         \\\n    }                                                                       \\\n    if (qe >= qe_aligned + 8) {                                             \\\n      __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));  \\\n      __m256 ymm_q = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)q));  \\\n      ACCUM_FP32_STEP_AVX(ymm_m, ymm_q, ymm_sum_0_0)                        \\\n      m += 8;                                                               \\\n      q += 8;                                                               \\\n    }                                                                       \\\n  }                                                                         \\\n  MATRIX_FP16_MASK_AVX(m, q, (qe - q), _MASK, ymm_sum, ACCUM_FP32_STEP_AVX) \\\n  *out = _NORM(HorizontalAdd_FP32_V256(ymm_sum_0_0));\n\n//! Compute the distance between matrix and query (FP16, M=2, N=1)\n#define ACCUM_FP16_2X1_AVX(m, q, dim, out, _NORM)                             \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())                 \\\n  const Float16 *qe_aligned = q + ((dim >> 2) << 2);                          \\\n  const Float16 *qe = q + dim;                                                \\\n  if (((uintptr_t)m & 0xf) == 0) {                                            \\\n    for (; q != qe_aligned; m += 8, q += 4) {                                 \\\n      MATRIX_FP16_ITER_2X1_AVX(m, q, ymm_sum, _mm_load_si128,                 \\\n                               ACCUM_FP32_STEP_AVX)                           \\\n    }                                                                         \\\n  } else {                                                                    \\\n    for (; q != qe_aligned; m += 8, q += 4) {                                 \\\n      MATRIX_FP16_ITER_2X1_AVX(m, q, ymm_sum, _mm_loadu_si128,                \\\n                               ACCUM_FP32_STEP_AVX)                           \\\n    }                                                                         \\\n  }                                                                           \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),        \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));     \\\n  if (qe >= qe_aligned + 2) {                                                 \\\n    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));         \\\n    __m128 xmm_q = _mm_cvtph_ps(                                              \\\n        _mm_shufflelo_epi16(_mm_broadcast_si32(q), _MM_SHUFFLE(1, 1, 0, 0))); \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                            \\\n    m += 4;                                                                   \\\n    q += 2;                                                                   \\\n  }                                                                           \\\n  xmm_sum_0_0 =                                                               \\\n      _mm_add_ps(xmm_sum_0_0, _mm_movehl_ps(xmm_sum_0_0, xmm_sum_0_0));       \\\n  if (q != qe) {                                                              \\\n    __m128 xmm_m = _mm_cvtph_ps(                                              \\\n        _mm_shufflelo_epi16(_mm_broadcast_si32(m), _MM_SHUFFLE(0, 0, 1, 0))); \\\n    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q)));         \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                            \\\n  }                                                                           \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));\n\n//! Compute the distance between matrix and query (FP16, M=2, N=2)\n#define ACCUM_FP16_2X2_AVX(m, q, dim, out, _NORM)                             \\\n  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())                 \\\n  const Float16 *qe_aligned = q + ((dim >> 2) << 3);                          \\\n  const Float16 *qe = q + (dim << 1);                                         \\\n  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {               \\\n    for (; q != qe_aligned; m += 8, q += 8) {                                 \\\n      MATRIX_FP16_ITER_2X2_AVX(m, q, ymm_sum, _mm_load_si128,                 \\\n                               ACCUM_FP32_STEP_AVX)                           \\\n    }                                                                         \\\n  } else {                                                                    \\\n    for (; q != qe_aligned; m += 8, q += 8) {                                 \\\n      MATRIX_FP16_ITER_2X2_AVX(m, q, ymm_sum, _mm_loadu_si128,                \\\n                               ACCUM_FP32_STEP_AVX)                           \\\n    }                                                                         \\\n  }                                                                           \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),        \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));     \\\n  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),        \\\n                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));     \\\n  if (qe >= qe_aligned + 4) {                                                 \\\n    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));         \\\n    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(q)));         \\\n    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 0, 0));            \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0)                            \\\n    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));                   \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_1)                            \\\n    m += 4;                                                                   \\\n    q += 4;                                                                   \\\n  }                                                                           \\\n  xmm_sum_0_0 = _mm_add_ps(_mm_movelh_ps(xmm_sum_0_0, xmm_sum_0_1),           \\\n                           _mm_movehl_ps(xmm_sum_0_1, xmm_sum_0_0));          \\\n  if (q != qe) {                                                              \\\n    __m128 xmm_m = _mm_cvtph_ps(                                              \\\n        _mm_shufflelo_epi16(_mm_broadcast_si32(m), _MM_SHUFFLE(1, 0, 1, 0))); \\\n    __m128 xmm_q = _mm_cvtph_ps(                                              \\\n        _mm_shufflelo_epi16(_mm_broadcast_si32(q), _MM_SHUFFLE(1, 1, 0, 0))); \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                            \\\n  }                                                                           \\\n  if (((uintptr_t)out & 0xf) == 0) {                                          \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)              \\\n  } else {                                                                    \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)             \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=4, N=1)\n#define ACCUM_FP16_4X1_AVX(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())                \\\n  const Float16 *qe = q + dim;                                               \\\n  if (((uintptr_t)m & 0xf) == 0) {                                           \\\n    for (const Float16 *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \\\n         m += 8, q += 2) {                                                   \\\n      MATRIX_FP16_ITER_4X1_AVX(m, q, ymm_sum, _mm_load_si128,                \\\n                               ACCUM_FP32_STEP_AVX)                          \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (const Float16 *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \\\n         m += 8, q += 2) {                                                   \\\n      MATRIX_FP16_ITER_4X1_AVX(m, q, ymm_sum, _mm_loadu_si128,               \\\n                               ACCUM_FP32_STEP_AVX)                          \\\n    }                                                                        \\\n  }                                                                          \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),       \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));    \\\n  if (q != qe) {                                                             \\\n    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));        \\\n    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q)));        \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                           \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=4, N=2)\n#define ACCUM_FP16_4X2_AVX(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())                \\\n  const Float16 *qe = q + (dim << 1);                                        \\\n  if (((uintptr_t)m & 0xf) == 0) {                                           \\\n    for (const Float16 *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \\\n         m += 8, q += 4) {                                                   \\\n      MATRIX_FP16_ITER_4X2_AVX(m, q, ymm_sum, _mm_load_si128,                \\\n                               ACCUM_FP32_STEP_AVX)                          \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (const Float16 *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \\\n         m += 8, q += 4) {                                                   \\\n      MATRIX_FP16_ITER_4X2_AVX(m, q, ymm_sum, _mm_loadu_si128,               \\\n                               ACCUM_FP32_STEP_AVX)                          \\\n    }                                                                        \\\n  }                                                                          \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),       \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));    \\\n  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),       \\\n                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));    \\\n  if (q != qe) {                                                             \\\n    __m128 xmm_q_0 = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q + 0)));  \\\n    __m128 xmm_q_1 = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q + 1)));  \\\n    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));        \\\n    MATRIX_VAR_PROC(1, 2, 0, xmm_m, xmm_q, xmm_sum, ACCUM_FP32_STEP_SSE)     \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=4, N=4)\n#define ACCUM_FP16_4X4_AVX(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())                \\\n  const Float16 *qe = q + (dim << 2);                                        \\\n  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {              \\\n    for (const Float16 *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \\\n         m += 8, q += 8) {                                                   \\\n      MATRIX_FP16_ITER_4X4_AVX(m, q, ymm_sum, _mm_load_si128,                \\\n                               ACCUM_FP32_STEP_AVX)                          \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (const Float16 *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \\\n         m += 8, q += 8) {                                                   \\\n      MATRIX_FP16_ITER_4X4_AVX(m, q, ymm_sum, _mm_loadu_si128,               \\\n                               ACCUM_FP32_STEP_AVX)                          \\\n    }                                                                        \\\n  }                                                                          \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),       \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));    \\\n  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),       \\\n                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));    \\\n  __m128 xmm_sum_0_2 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_2),       \\\n                                  _mm256_extractf128_ps(ymm_sum_0_2, 1));    \\\n  __m128 xmm_sum_0_3 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_3),       \\\n                                  _mm256_extractf128_ps(ymm_sum_0_3, 1));    \\\n  if (q != qe) {                                                             \\\n    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));        \\\n    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(q)));        \\\n    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(0, 0, 0, 0));           \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0)                           \\\n    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(1, 1, 1, 1));                  \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_1)                           \\\n    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 2, 2));                  \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_2)                           \\\n    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 3, 3));                  \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_3)                           \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=8, N=1)\n#define ACCUM_FP16_8X1_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())        \\\n  if (((uintptr_t)m & 0xf) == 0) {                                   \\\n    for (const Float16 *qe = q + dim; q != qe; m += 8, ++q) {        \\\n      MATRIX_FP16_ITER_8X1_AVX(m, q, ymm_sum, _mm_load_si128,        \\\n                               ACCUM_FP32_STEP_AVX)                  \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const Float16 *qe = q + dim; q != qe; m += 8, ++q) {        \\\n      MATRIX_FP16_ITER_8X1_AVX(m, q, ymm_sum, _mm_loadu_si128,       \\\n                               ACCUM_FP32_STEP_AVX)                  \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=8, N=2)\n#define ACCUM_FP16_8X2_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())           \\\n  if (((uintptr_t)m & 0xf) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \\\n      MATRIX_FP16_ITER_8X2_AVX(m, q, ymm_sum, _mm_load_si128,           \\\n                               ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \\\n      MATRIX_FP16_ITER_8X2_AVX(m, q, ymm_sum, _mm_loadu_si128,          \\\n                               ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)     \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=8, N=4)\n#define ACCUM_FP16_8X4_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())           \\\n  if (((uintptr_t)m & 0xf) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \\\n      MATRIX_FP16_ITER_8X4_AVX(m, q, ymm_sum, _mm_load_si128,           \\\n                               ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \\\n      MATRIX_FP16_ITER_8X4_AVX(m, q, ymm_sum, _mm_loadu_si128,          \\\n                               ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)     \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=8, N=8)\n#define ACCUM_FP16_8X8_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(1, 8, __m256, ymm_sum, _mm256_setzero_ps())           \\\n  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {         \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \\\n      MATRIX_FP16_ITER_8X8_AVX(m, q, ymm_sum, _mm_load_si128,           \\\n                               ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \\\n      MATRIX_FP16_ITER_8X8_AVX(m, q, ymm_sum, _mm_loadu_si128,          \\\n                               ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)     \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=1)\n#define ACCUM_FP16_16X1_AVX(m, q, dim, out, _NORM)                   \\\n  MATRIX_VAR_INIT(2, 1, __m256, ymm_sum, _mm256_setzero_ps())        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {       \\\n      MATRIX_FP16_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {       \\\n      MATRIX_FP16_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=2)\n#define ACCUM_FP16_16X2_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(2, 2, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP16_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP16_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                    \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=4)\n#define ACCUM_FP16_16X4_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(2, 4, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP16_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP16_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                    \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=8)\n#define ACCUM_FP16_16X8_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(2, 8, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP16_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP16_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                    \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=16)\n#define ACCUM_FP16_16X16_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(2, 16, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                       \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP16_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                 ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP16_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                 ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                     \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=1)\n#define ACCUM_FP16_32X1_AVX(m, q, dim, out, _NORM)                   \\\n  MATRIX_VAR_INIT(4, 1, __m256, ymm_sum, _mm256_setzero_ps())        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {       \\\n      MATRIX_FP16_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {       \\\n      MATRIX_FP16_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=2)\n#define ACCUM_FP16_32X2_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(4, 2, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP16_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP16_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                    \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=4)\n#define ACCUM_FP16_32X4_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(4, 4, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP16_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP16_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                    \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=8)\n#define ACCUM_FP16_32X8_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(4, 8, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP16_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP16_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                    \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=16)\n#define ACCUM_FP16_32X16_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(4, 16, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                       \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP16_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                 ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP16_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                 ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                     \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=32)\n#define ACCUM_FP16_32X32_AVX(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(4, 32, __m256, ymm_sum, _mm256_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                       \\\n    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP16_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,        \\\n                                 ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP16_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \\\n                                 ACCUM_FP32_STEP_AVX)                     \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                     \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\n#define ACCUM_FP16_1X1_AVX512(m, q, dim, out, _MASK, _NORM)                   \\\n  MATRIX_VAR_INIT(1, 1, __m512, zmm_sum, _mm512_setzero_ps())                 \\\n  const Float16 *qe = q + dim;                                                \\\n  const Float16 *qe_aligned = q + ((dim >> 5) << 5);                          \\\n  if (((uintptr_t)m & 0x3f) == 0 && ((uintptr_t)q & 0x3f) == 0) {             \\\n    for (; q != qe_aligned; m += 32, q += 32) {                               \\\n      MATRIX_FP16_ITER_1X1_AVX512(m, q, zmm_sum, _mm512_load_si512,           \\\n                                  ACCUM_FP32_STEP_AVX512)                     \\\n    }                                                                         \\\n    if (qe >= qe_aligned + 16) {                                              \\\n      __m512 zmm_m = _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)m));  \\\n      __m512 zmm_q = _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)q));  \\\n      ACCUM_FP32_STEP_AVX512(zmm_m, zmm_q, zmm_sum_0_0)                       \\\n      m += 16;                                                                \\\n      q += 16;                                                                \\\n    }                                                                         \\\n  } else {                                                                    \\\n    for (; q != qe_aligned; m += 32, q += 32) {                               \\\n      MATRIX_FP16_ITER_1X1_AVX512(m, q, zmm_sum, _mm512_loadu_si512,          \\\n                                  ACCUM_FP32_STEP_AVX512)                     \\\n    }                                                                         \\\n    if (qe >= qe_aligned + 16) {                                              \\\n      __m512 zmm_m = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)m)); \\\n      __m512 zmm_q = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)q)); \\\n      ACCUM_FP32_STEP_AVX512(zmm_m, zmm_q, zmm_sum_0_0)                       \\\n      m += 16;                                                                \\\n      q += 16;                                                                \\\n    }                                                                         \\\n  }                                                                           \\\n  __m256 ymm_sum_0_0 = _mm256_add_ps(_mm512_castps512_ps256(zmm_sum_0_0),     \\\n                                     _mm256_castpd_ps(_mm512_extractf64x4_pd( \\\n                                         _mm512_castps_pd(zmm_sum_0_0), 1))); \\\n  if (qe >= q + 8) {                                                          \\\n    __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));      \\\n    __m256 ymm_q = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)q));      \\\n    ACCUM_FP32_STEP_AVX(ymm_m, ymm_q, ymm_sum_0_0)                            \\\n    m += 8;                                                                   \\\n    q += 8;                                                                   \\\n  }                                                                           \\\n  MATRIX_FP16_MASK_AVX(m, q, (qe - q), _MASK, ymm_sum, ACCUM_FP32_STEP_AVX)   \\\n  *out = _NORM(HorizontalAdd_FP32_V256(ymm_sum_0_0));\n\n//! Compute the distance between matrix and query (FP16, M=16, N=1)\n#define ACCUM_FP16_16X1_AVX512(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 1, __m512, zmm_sum, _mm512_setzero_ps())         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                   \\\n    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {        \\\n      MATRIX_FP16_ITER_16X1_AVX512(m, q, zmm_sum, _mm256_load_si256,  \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {        \\\n      MATRIX_FP16_ITER_16X1_AVX512(m, q, zmm_sum, _mm256_loadu_si256, \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=2)\n#define ACCUM_FP16_16X2_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP16_ITER_16X2_AVX512(m, q, zmm_sum, _mm256_load_si256,     \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP16_ITER_16X2_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                    \\\n    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=4)\n#define ACCUM_FP16_16X4_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 4, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP16_ITER_16X4_AVX512(m, q, zmm_sum, _mm256_load_si256,     \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP16_ITER_16X4_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                    \\\n    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=8)\n#define ACCUM_FP16_16X8_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 8, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP16_ITER_16X8_AVX512(m, q, zmm_sum, _mm256_load_si256,     \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP16_ITER_16X8_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                    \\\n    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=16, N=16)\n#define ACCUM_FP16_16X16_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 16, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {         \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP16_ITER_16X16_AVX512(m, q, zmm_sum, _mm256_load_si256,     \\\n                                    ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP16_ITER_16X16_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \\\n                                    ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                     \\\n    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=1)\n#define ACCUM_FP16_32X1_AVX512(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 1, __m512, zmm_sum, _mm512_setzero_ps())         \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                   \\\n    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {        \\\n      MATRIX_FP16_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_load_si512,  \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {        \\\n      MATRIX_FP16_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_loadu_si512, \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=2)\n#define ACCUM_FP16_32X2_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 2, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP16_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_load_si512,     \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP16_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                    \\\n    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=4)\n#define ACCUM_FP16_32X4_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 4, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP16_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_load_si512,     \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP16_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                    \\\n    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=8)\n#define ACCUM_FP16_32X8_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 8, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                      \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP16_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_load_si512,     \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  } else {                                                               \\\n    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP16_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \\\n                                   ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                    \\\n  }                                                                      \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                    \\\n    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                               \\\n    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=16)\n#define ACCUM_FP16_32X16_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 16, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                       \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP16_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_load_si512,     \\\n                                    ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP16_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \\\n                                    ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                     \\\n    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (FP16, M=32, N=32)\n#define ACCUM_FP16_32X32_AVX512(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 32, __m512, zmm_sum, _mm512_setzero_ps())            \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                       \\\n    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP16_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_load_si512,     \\\n                                    ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP16_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \\\n                                    ACCUM_FP32_STEP_AVX512)               \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                     \\\n    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \\\n  }\n\n#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\n#define ACCUM_FP16_1X1_NEON(m, q, dim, out, _MASK, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 1, float16x8_t, v_sum, vdupq_n_f16(0))                  \\\n  const Float16 *qe = q + dim;                                               \\\n  const Float16 *qe_aligned = q + ((dim >> 3) << 3);                         \\\n  for (; q != qe_aligned; m += 8, q += 8) {                                  \\\n    MATRIX_FP16_ITER_1X1_NEON(m, q, v_sum, ACCUM_FP16_STEP_NEON)             \\\n  }                                                                          \\\n  if (qe >= qe_aligned + 4) {                                                \\\n    float16x8_t v_m =                                                        \\\n        vcombine_f16(vld1_f16((const float16_t *)m),                         \\\n                     vreinterpret_f16_u64(vdup_n_u64((uint64_t)(_MASK))));   \\\n    float16x8_t v_q =                                                        \\\n        vcombine_f16(vld1_f16((const float16_t *)q),                         \\\n                     vreinterpret_f16_u64(vdup_n_u64((uint64_t)(_MASK))));   \\\n    ACCUM_FP16_STEP_NEON(v_m, v_q, v_sum_0_0)                                \\\n    m += 4;                                                                  \\\n    q += 4;                                                                  \\\n  }                                                                          \\\n  float result = vaddvq_f32(vaddq_f32(vcvt_f32_f16(vget_low_f16(v_sum_0_0)), \\\n                                      vcvt_high_f32_f16(v_sum_0_0)));        \\\n  switch (qe - q) {                                                          \\\n    case 3:                                                                  \\\n      ACCUM_FP16_STEP_GENERAL(m[2], q[2], result)                            \\\n      /* FALLTHRU */                                                         \\\n    case 2:                                                                  \\\n      ACCUM_FP16_STEP_GENERAL(m[1], q[1], result)                            \\\n      /* FALLTHRU */                                                         \\\n    case 1:                                                                  \\\n      ACCUM_FP16_STEP_GENERAL(m[0], q[0], result)                            \\\n  }                                                                          \\\n  *out = _NORM(result);\n\n#else\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\n#define ACCUM_FP16_1X1_NEON(m, q, dim, out, _MASK, _NORM)           \\\n  MATRIX_VAR_INIT(1, 1, float32x4_t, v_sum, vdupq_n_f32(0))         \\\n  const Float16 *qe = q + dim;                                      \\\n  const Float16 *qe_aligned = q + ((dim >> 3) << 3);                \\\n  for (; q != qe_aligned; m += 8, q += 8) {                         \\\n    MATRIX_FP16_ITER_1X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                 \\\n  if (qe >= qe_aligned + 4) {                                       \\\n    float32x4_t v_m = vcvt_f32_f16(vld1_f16((const float16_t *)m)); \\\n    float32x4_t v_q = vcvt_f32_f16(vld1_f16((const float16_t *)q)); \\\n    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum_0_0)                       \\\n    m += 4;                                                         \\\n    q += 4;                                                         \\\n  }                                                                 \\\n  float result = vaddvq_f32(v_sum_0_0);                             \\\n  switch (qe - q) {                                                 \\\n    case 3:                                                         \\\n      ACCUM_FP16_STEP_GENERAL(m[2], q[2], result)                   \\\n      /* FALLTHRU */                                                \\\n    case 2:                                                         \\\n      ACCUM_FP16_STEP_GENERAL(m[1], q[1], result)                   \\\n      /* FALLTHRU */                                                \\\n    case 1:                                                         \\\n      ACCUM_FP16_STEP_GENERAL(m[0], q[0], result)                   \\\n  }                                                                 \\\n  *out = _NORM(result);\n\n#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_accum_fp32.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_fp32.i\"\n#include \"matrix_utility.i\"\n\n#if !defined(__FMA__)\n#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))\n#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))\n#endif  // !__FMA__\n\n#if defined(__AVX512F__) && !defined(__AVX512DQ__)\n#define _mm512_and_ps(a, b) \\\n  _mm512_castsi512_ps(      \\\n      _mm512_and_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b)))\n#define _mm512_mask_and_ps(src, k, a, b)                                   \\\n  _mm512_castsi512_ps(_mm512_mask_and_epi32(_mm512_castps_si512(src), (k), \\\n                                            _mm512_castps_si512(a),        \\\n                                            _mm512_castps_si512(b)))\n#endif  // __AVX512DQ__\n\n#if defined(__ARM_NEON) && !defined(__aarch64__)\nstatic inline float32_t vaddvq_f32(float32x4_t v) {\n  float32x2_t s = vadd_f32(vget_low_f32(v), vget_high_f32(v));\n  return vget_lane_f32(vpadd_f32(s, s), 0);\n}\n\nstatic inline int32_t vaddvq_s32(int32x4_t v) {\n  int32x2_t s = vadd_s32(vget_low_s32(v), vget_high_s32(v));\n  return vget_lane_s32(vpadd_s32(s, s), 0);\n}\n#endif  //__ARM_NEON && !__aarch64__\n\n#if defined(__aarch64__)\n#define ACCUM_FP32_2X1_NEON ACCUM_FP32_2X1_NEON_A64\n#else\n#define ACCUM_FP32_2X1_NEON ACCUM_FP32_2X1_NEON_A32\n#endif  // __aarch64__\n\n//! Compute the distance between matrix and query (FP32, M=2, N=1)\n#define ACCUM_FP32_2X1_SSE(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())              \\\n  const float *qe_aligned = q + ((dim >> 2) << 2);                      \\\n  const float *qe = q + dim;                                            \\\n  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {         \\\n    for (; q != qe_aligned; m += 8, q += 4) {                           \\\n      MATRIX_FP32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_ps,              \\\n                               ACCUM_FP32_STEP_SSE)                     \\\n    }                                                                   \\\n    if (qe >= qe_aligned + 2) {                                         \\\n      __m128 xmm_m = _mm_load_ps(m);                                    \\\n      __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                \\\n      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                    \\\n      m += 4;                                                           \\\n      q += 2;                                                           \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (; q != qe_aligned; m += 8, q += 4) {                           \\\n      MATRIX_FP32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_ps,             \\\n                               ACCUM_FP32_STEP_SSE)                     \\\n    }                                                                   \\\n    if (qe >= qe_aligned + 2) {                                         \\\n      __m128 xmm_m = _mm_loadu_ps(m);                                   \\\n      __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                \\\n      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                    \\\n      m += 4;                                                           \\\n      q += 2;                                                           \\\n    }                                                                   \\\n  }                                                                     \\\n  xmm_sum_0_0 = _mm_add_ps(xmm_sum_0_0, xmm_sum_0_1);                   \\\n  xmm_sum_0_0 =                                                         \\\n      _mm_add_ps(xmm_sum_0_0, _mm_movehl_ps(xmm_sum_0_0, xmm_sum_0_0)); \\\n  if (q != qe) {                                                        \\\n    __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, m[1], m[0]);                  \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                                 \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                      \\\n  }                                                                     \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));\n\n//! Compute the distance between matrix and query (FP32, M=2, N=2)\n#define ACCUM_FP32_2X2_SSE(m, q, dim, out, _NORM)                          \\\n  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())                 \\\n  const float *qe = q + (dim << 1);                                        \\\n  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {            \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \\\n         m += 4, q += 4) {                                                 \\\n      MATRIX_FP32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_ps,                 \\\n                               ACCUM_FP32_STEP_SSE)                        \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \\\n         m += 4, q += 4) {                                                 \\\n      MATRIX_FP32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_ps,                \\\n                               ACCUM_FP32_STEP_SSE)                        \\\n    }                                                                      \\\n  }                                                                        \\\n  xmm_sum_0_0 = _mm_add_ps(_mm_movelh_ps(xmm_sum_0_0, xmm_sum_0_1),        \\\n                           _mm_movehl_ps(xmm_sum_0_1, xmm_sum_0_0));       \\\n  if (q != qe) {                                                           \\\n    __m128 xmm_m = _mm_set_ps(m[1], m[0], m[1], m[0]);                     \\\n    __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                     \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=4, N=1)\n#define ACCUM_FP32_4X1_SSE(m, q, dim, out, _NORM)                          \\\n  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())                 \\\n  const float *qe = q + dim;                                               \\\n  if (((uintptr_t)m & 0xf) == 0) {                                         \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \\\n         m += 8, q += 2) {                                                 \\\n      MATRIX_FP32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_ps,                 \\\n                               ACCUM_FP32_STEP_SSE)                        \\\n    }                                                                      \\\n    if (q != qe) {                                                         \\\n      __m128 xmm_m = _mm_load_ps(m);                                       \\\n      __m128 xmm_q = _mm_broadcast_ss(q);                                  \\\n      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \\\n         m += 8, q += 2) {                                                 \\\n      MATRIX_FP32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_ps,                \\\n                               ACCUM_FP32_STEP_SSE)                        \\\n    }                                                                      \\\n    if (q != qe) {                                                         \\\n      __m128 xmm_m = _mm_loadu_ps(m);                                      \\\n      __m128 xmm_q = _mm_broadcast_ss(q);                                  \\\n      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \\\n    }                                                                      \\\n  }                                                                        \\\n  xmm_sum_0_0 = _mm_add_ps(xmm_sum_0_0, xmm_sum_0_1);                      \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=4, N=2)\n#define ACCUM_FP32_4X2_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())            \\\n  if (((uintptr_t)m & 0xf) == 0) {                                    \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 4, q += 2) { \\\n      MATRIX_FP32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 4, q += 2) { \\\n      MATRIX_FP32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0xf) == 0) {                                  \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)      \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=4, N=4)\n#define ACCUM_FP32_4X4_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(1, 4, __m128, xmm_sum, _mm_setzero_ps())            \\\n  if (((uintptr_t)m & 0xf) == 0) {                                    \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 4, q += 4) { \\\n      MATRIX_FP32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 4, q += 4) { \\\n      MATRIX_FP32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0xf) == 0) {                                  \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)      \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=1)\n#define ACCUM_FP32_8X1_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 1, __m128, xmm_sum, _mm_setzero_ps())        \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (const float *qe = q + dim; q != qe; m += 8, ++q) {       \\\n      MATRIX_FP32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_ps,        \\\n                               ACCUM_FP32_STEP_SSE)               \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const float *qe = q + dim; q != qe; m += 8, ++q) {       \\\n      MATRIX_FP32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_ps,       \\\n                               ACCUM_FP32_STEP_SSE)               \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=2)\n#define ACCUM_FP32_8X2_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(2, 2, __m128, xmm_sum, _mm_setzero_ps())            \\\n  if (((uintptr_t)m & 0xf) == 0) {                                    \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \\\n      MATRIX_FP32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \\\n      MATRIX_FP32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0xf) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)      \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=4)\n#define ACCUM_FP32_8X4_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(2, 4, __m128, xmm_sum, _mm_setzero_ps())            \\\n  if (((uintptr_t)m & 0xf) == 0) {                                    \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \\\n      MATRIX_FP32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \\\n      MATRIX_FP32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0xf) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)      \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=8)\n#define ACCUM_FP32_8X8_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(2, 8, __m128, xmm_sum, _mm_setzero_ps())            \\\n  if (((uintptr_t)m & 0xf) == 0) {                                    \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \\\n      MATRIX_FP32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \\\n      MATRIX_FP32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                               ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0xf) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)      \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=1)\n#define ACCUM_FP32_16X1_SSE(m, q, dim, out, _NORM)                \\\n  MATRIX_VAR_INIT(4, 1, __m128, xmm_sum, _mm_setzero_ps())        \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (const float *qe = q + dim; q != qe; m += 16, ++q) {      \\\n      MATRIX_FP32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_ps,       \\\n                                ACCUM_FP32_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const float *qe = q + dim; q != qe; m += 16, ++q) {      \\\n      MATRIX_FP32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_ps,      \\\n                                ACCUM_FP32_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=2)\n#define ACCUM_FP32_16X2_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 2, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                     \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=4)\n#define ACCUM_FP32_16X4_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 4, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                     \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=8)\n#define ACCUM_FP32_16X8_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 8, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                     \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=16)\n#define ACCUM_FP32_16X16_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 16, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                      \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                 ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                 ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0xf) == 0) {                                    \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=1)\n#define ACCUM_FP32_32X1_SSE(m, q, dim, out, _NORM)                \\\n  MATRIX_VAR_INIT(8, 1, __m128, xmm_sum, _mm_setzero_ps())        \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (const float *qe = q + dim; q != qe; m += 32, ++q) {      \\\n      MATRIX_FP32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_ps,       \\\n                                ACCUM_FP32_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const float *qe = q + dim; q != qe; m += 32, ++q) {      \\\n      MATRIX_FP32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_ps,      \\\n                                ACCUM_FP32_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=2)\n#define ACCUM_FP32_32X2_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(8, 2, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                     \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=4)\n#define ACCUM_FP32_32X4_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(8, 4, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                     \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=8)\n#define ACCUM_FP32_32X8_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(8, 8, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                     \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=16)\n#define ACCUM_FP32_32X16_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(8, 16, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                      \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                 ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                 ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0xf) == 0) {                                    \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=32)\n#define ACCUM_FP32_32X32_SSE(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(8, 32, __m128, xmm_sum, _mm_setzero_ps())             \\\n  if (((uintptr_t)m & 0xf) == 0) {                                      \\\n    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_ps,            \\\n                                 ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_ps,           \\\n                                 ACCUM_FP32_STEP_SSE)                   \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0xf) == 0) {                                    \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=2, N=1)\n#define ACCUM_FP32_2X1_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())             \\\n  const float *qe_aligned = q + ((dim >> 2) << 2);                        \\\n  const float *qe = q + dim;                                              \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                       \\\n    for (; q != qe_aligned; m += 8, q += 4) {                             \\\n      MATRIX_FP32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_ps,             \\\n                               ACCUM_FP32_STEP_AVX)                       \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (; q != qe_aligned; m += 8, q += 4) {                             \\\n      MATRIX_FP32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,            \\\n                               ACCUM_FP32_STEP_AVX)                       \\\n    }                                                                     \\\n  }                                                                       \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),    \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1)); \\\n  if (qe >= qe_aligned + 2) {                                             \\\n    __m128 xmm_m = _mm_loadu_ps(m);                                       \\\n    __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                    \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                        \\\n    m += 4;                                                               \\\n    q += 2;                                                               \\\n  }                                                                       \\\n  xmm_sum_0_0 =                                                           \\\n      _mm_add_ps(xmm_sum_0_0, _mm_movehl_ps(xmm_sum_0_0, xmm_sum_0_0));   \\\n  if (q != qe) {                                                          \\\n    __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, m[1], m[0]);                    \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                                   \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                        \\\n  }                                                                       \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));\n\n//! Compute the distance between matrix and query (FP32, M=2, N=2)\n#define ACCUM_FP32_2X2_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())             \\\n  const float *qe_aligned = q + ((dim >> 2) << 3);                        \\\n  const float *qe = q + (dim << 1);                                       \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {         \\\n    for (; q != qe_aligned; m += 8, q += 8) {                             \\\n      MATRIX_FP32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_ps,             \\\n                               ACCUM_FP32_STEP_AVX)                       \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (; q != qe_aligned; m += 8, q += 8) {                             \\\n      MATRIX_FP32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,            \\\n                               ACCUM_FP32_STEP_AVX)                       \\\n    }                                                                     \\\n  }                                                                       \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),    \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1)); \\\n  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),    \\\n                                  _mm256_extractf128_ps(ymm_sum_0_1, 1)); \\\n  if (qe >= qe_aligned + 4) {                                             \\\n    __m128 xmm_q = _mm_loadu_ps(q);                                       \\\n    __m128 xmm_m = _mm_loadu_ps(m);                                       \\\n    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 0, 0));        \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0)                        \\\n    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));               \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_1)                        \\\n    m += 4;                                                               \\\n    q += 4;                                                               \\\n  }                                                                       \\\n  xmm_sum_0_0 = _mm_add_ps(_mm_movelh_ps(xmm_sum_0_0, xmm_sum_0_1),       \\\n                           _mm_movehl_ps(xmm_sum_0_1, xmm_sum_0_0));      \\\n  if (q != qe) {                                                          \\\n    __m128 xmm_m = _mm_set_ps(m[1], m[0], m[1], m[0]);                    \\\n    __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                    \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                        \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0xf) == 0) {                                      \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)          \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)         \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=4, N=1)\n#define ACCUM_FP32_4X1_AVX(m, q, dim, out, _NORM)                          \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())              \\\n  const float *qe = q + dim;                                               \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                        \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \\\n         m += 8, q += 2) {                                                 \\\n      MATRIX_FP32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_ps,              \\\n                               ACCUM_FP32_STEP_AVX)                        \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \\\n         m += 8, q += 2) {                                                 \\\n      MATRIX_FP32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,             \\\n                               ACCUM_FP32_STEP_AVX)                        \\\n    }                                                                      \\\n  }                                                                        \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),     \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));  \\\n  if (q != qe) {                                                           \\\n    __m128 xmm_m = _mm_loadu_ps(m);                                        \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                                    \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=4, N=2)\n#define ACCUM_FP32_4X2_AVX(m, q, dim, out, _NORM)                          \\\n  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())              \\\n  const float *qe = q + (dim << 1);                                        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                        \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \\\n         m += 8, q += 4) {                                                 \\\n      MATRIX_FP32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_ps,              \\\n                               ACCUM_FP32_STEP_AVX)                        \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \\\n         m += 8, q += 4) {                                                 \\\n      MATRIX_FP32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,             \\\n                               ACCUM_FP32_STEP_AVX)                        \\\n    }                                                                      \\\n  }                                                                        \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),     \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));  \\\n  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),     \\\n                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));  \\\n  if (q != qe) {                                                           \\\n    __m128 xmm_m = _mm_loadu_ps(m);                                        \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                                    \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \\\n    xmm_q = _mm_broadcast_ss(q + 1);                                       \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_1)                         \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=4, N=4)\n#define ACCUM_FP32_4X4_AVX(m, q, dim, out, _NORM)                          \\\n  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())              \\\n  const float *qe = q + (dim << 2);                                        \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {          \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \\\n         m += 8, q += 8) {                                                 \\\n      MATRIX_FP32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_ps,              \\\n                               ACCUM_FP32_STEP_AVX)                        \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const float *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \\\n         m += 8, q += 8) {                                                 \\\n      MATRIX_FP32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,             \\\n                               ACCUM_FP32_STEP_AVX)                        \\\n    }                                                                      \\\n  }                                                                        \\\n  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),     \\\n                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));  \\\n  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),     \\\n                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));  \\\n  __m128 xmm_sum_0_2 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_2),     \\\n                                  _mm256_extractf128_ps(ymm_sum_0_2, 1));  \\\n  __m128 xmm_sum_0_3 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_3),     \\\n                                  _mm256_extractf128_ps(ymm_sum_0_3, 1));  \\\n  if (q != qe) {                                                           \\\n    __m128 xmm_m = _mm_loadu_ps(m);                                        \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                                    \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \\\n    xmm_q = _mm_broadcast_ss(q + 1);                                       \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_1)                         \\\n    xmm_q = _mm_broadcast_ss(q + 2);                                       \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_2)                         \\\n    xmm_q = _mm_broadcast_ss(q + 3);                                       \\\n    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_3)                         \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=1)\n#define ACCUM_FP32_8X1_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (const float *qe = q + dim; q != qe; m += 8, ++q) {          \\\n      MATRIX_FP32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_ps,        \\\n                               ACCUM_FP32_STEP_AVX)                  \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const float *qe = q + dim; q != qe; m += 8, ++q) {          \\\n      MATRIX_FP32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,       \\\n                               ACCUM_FP32_STEP_AVX)                  \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=2)\n#define ACCUM_FP32_8X2_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                   \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \\\n      MATRIX_FP32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                               ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \\\n      MATRIX_FP32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                               ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=4)\n#define ACCUM_FP32_8X4_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                   \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \\\n      MATRIX_FP32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                               ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \\\n      MATRIX_FP32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                               ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=8, N=8)\n#define ACCUM_FP32_8X8_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(1, 8, __m256, ymm_sum, _mm256_setzero_ps())         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                   \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \\\n      MATRIX_FP32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                               ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \\\n      MATRIX_FP32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                               ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=1)\n#define ACCUM_FP32_16X1_AVX(m, q, dim, out, _NORM)                   \\\n  MATRIX_VAR_INIT(2, 1, __m256, ymm_sum, _mm256_setzero_ps())        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (const float *qe = q + dim; q != qe; m += 16, ++q) {         \\\n      MATRIX_FP32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_ps,       \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const float *qe = q + dim; q != qe; m += 16, ++q) {         \\\n      MATRIX_FP32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,      \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=2)\n#define ACCUM_FP32_16X2_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(2, 2, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=4)\n#define ACCUM_FP32_16X4_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(2, 4, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=8)\n#define ACCUM_FP32_16X8_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(2, 8, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=16)\n#define ACCUM_FP32_16X16_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(2, 16, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                     \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                 ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                 ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                   \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=1)\n#define ACCUM_FP32_32X1_AVX(m, q, dim, out, _NORM)                   \\\n  MATRIX_VAR_INIT(4, 1, __m256, ymm_sum, _mm256_setzero_ps())        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (const float *qe = q + dim; q != qe; m += 32, ++q) {         \\\n      MATRIX_FP32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_ps,       \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const float *qe = q + dim; q != qe; m += 32, ++q) {         \\\n      MATRIX_FP32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,      \\\n                                ACCUM_FP32_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=2)\n#define ACCUM_FP32_32X2_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 2, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=4)\n#define ACCUM_FP32_32X4_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 4, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=8)\n#define ACCUM_FP32_32X8_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 8, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=16)\n#define ACCUM_FP32_32X16_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 16, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                     \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                 ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                 ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                   \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=32)\n#define ACCUM_FP32_32X32_AVX(m, q, dim, out, _NORM)                     \\\n  MATRIX_VAR_INIT(4, 32, __m256, ymm_sum, _mm256_setzero_ps())          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                     \\\n    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_ps,         \\\n                                 ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \\\n                                 ACCUM_FP32_STEP_AVX)                   \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                   \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=1)\n#define ACCUM_FP32_16X1_AVX512(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 1, __m512, zmm_sum, _mm512_setzero_ps())         \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                   \\\n    for (const float *qe = q + dim; q != qe; m += 16, ++q) {          \\\n      MATRIX_FP32_ITER_16X1_AVX512(m, q, zmm_sum, _mm512_load_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + dim; q != qe; m += 16, ++q) {          \\\n      MATRIX_FP32_ITER_16X1_AVX512(m, q, zmm_sum, _mm512_loadu_ps,    \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=2)\n#define ACCUM_FP32_16X2_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP32_ITER_16X2_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n      MATRIX_FP32_ITER_16X2_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                  \\\n    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=4)\n#define ACCUM_FP32_16X4_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(1, 4, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP32_ITER_16X4_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n      MATRIX_FP32_ITER_16X4_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                  \\\n    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=8)\n#define ACCUM_FP32_16X8_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(1, 8, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP32_ITER_16X8_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n      MATRIX_FP32_ITER_16X8_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                  \\\n    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=16, N=16)\n#define ACCUM_FP32_16X16_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(1, 16, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                     \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP32_ITER_16X16_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                    ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n      MATRIX_FP32_ITER_16X16_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                    ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=1)\n#define ACCUM_FP32_32X1_AVX512(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 1, __m512, zmm_sum, _mm512_setzero_ps())         \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                   \\\n    for (const float *qe = q + dim; q != qe; m += 32, ++q) {          \\\n      MATRIX_FP32_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_load_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const float *qe = q + dim; q != qe; m += 32, ++q) {          \\\n      MATRIX_FP32_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_loadu_ps,    \\\n                                   ACCUM_FP32_STEP_AVX512)            \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=2)\n#define ACCUM_FP32_32X2_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 2, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP32_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n      MATRIX_FP32_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=4)\n#define ACCUM_FP32_32X4_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 4, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP32_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n      MATRIX_FP32_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=8)\n#define ACCUM_FP32_32X8_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 8, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                    \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP32_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n      MATRIX_FP32_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                   ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=16)\n#define ACCUM_FP32_32X16_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 16, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                     \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP32_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                    ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n      MATRIX_FP32_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                    ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                   \\\n    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=32, N=32)\n#define ACCUM_FP32_32X32_AVX512(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 32, __m512, zmm_sum, _mm512_setzero_ps())          \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                     \\\n    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP32_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_load_ps,      \\\n                                    ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n      MATRIX_FP32_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \\\n                                    ACCUM_FP32_STEP_AVX512)             \\\n    }                                                                   \\\n  }                                                                     \\\n  if (((uintptr_t)out & 0x3f) == 0) {                                   \\\n    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \\\n  } else {                                                              \\\n    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (FP32, M=2, N=1) on A64\n#define ACCUM_FP32_2X1_NEON_A64(m, q, dim, out, _NORM)                         \\\n  float32x4_t v_sum = vdupq_n_f32(0);                                          \\\n  const float *qe_aligned = q + ((dim >> 1) << 1);                             \\\n  const float *qe = q + dim;                                                   \\\n  for (; q != qe_aligned; m += 4, q += 2) {                                    \\\n    MATRIX_FP32_ITER_2X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)               \\\n  }                                                                            \\\n  v_sum = vaddq_f32(                                                           \\\n      vreinterpretq_f32_u64(vdupq_laneq_u64(vreinterpretq_u64_f32(v_sum), 1)), \\\n      v_sum);                                                                  \\\n  if (q != qe) {                                                               \\\n    float32x4_t v_m = vreinterpretq_f32_u64(                                   \\\n        vdupq_lane_u64(vld1_u64((const uint64_t *)m), 0));                     \\\n    float32x4_t v_q = vld1q_dup_f32(q);                                        \\\n    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum)                                      \\\n  }                                                                            \\\n  vst1_f32(out, _NORM(vget_low_f32(v_sum)));\n\n//! Compute the distance between matrix and query (FP32, M=2, N=1) on A32\n#define ACCUM_FP32_2X1_NEON_A32(m, q, dim, out, _NORM)                   \\\n  float32x4_t v_sum = vdupq_n_f32(0);                                    \\\n  const float *qe_aligned = q + ((dim >> 1) << 1);                       \\\n  const float *qe = q + dim;                                             \\\n  for (; q != qe_aligned; m += 4, q += 2) {                              \\\n    MATRIX_FP32_ITER_2X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)         \\\n  }                                                                      \\\n  float32x2_t sum = vadd_f32(vget_low_f32(v_sum), vget_high_f32(v_sum)); \\\n  v_sum = vcombine_f32(sum, sum);                                        \\\n  if (q != qe) {                                                         \\\n    float32x4_t v_m = vreinterpretq_f32_u64(                             \\\n        vdupq_lane_u64(vld1_u64((const uint64_t *)m), 0));               \\\n    float32x4_t v_q = vld1q_dup_f32(q);                                  \\\n    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum)                                \\\n  }                                                                      \\\n  vst1_f32(out, _NORM(vget_low_f32(v_sum)));\n\n//! Compute the distance between matrix and query (FP32, M=2, N=2)\n#define ACCUM_FP32_2X2_NEON(m, q, dim, out, _NORM)                       \\\n  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))              \\\n  const float *qe_aligned = q + ((dim >> 1) << 2);                       \\\n  const float *qe = q + (dim << 1);                                      \\\n  for (; q != qe_aligned; m += 4, q += 4) {                              \\\n    MATRIX_FP32_ITER_2X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)         \\\n  }                                                                      \\\n  v_sum_0_0 = vaddq_f32(                                                 \\\n      vcombine_f32(vget_low_f32(v_sum_0_0), vget_low_f32(v_sum_0_1)),    \\\n      vcombine_f32(vget_high_f32(v_sum_0_0), vget_high_f32(v_sum_0_1))); \\\n  if (q != qe) {                                                         \\\n    float32x2_t v_m_0 = vld1_f32(m);                                     \\\n    float32x2_t v_q_0 = vld1_f32(q);                                     \\\n    float32x4_t v_m = vcombine_f32(v_m_0, v_m_0);                        \\\n    float32x4_t v_q =                                                    \\\n        vcombine_f32(vdup_lane_f32(v_q_0, 0), vdup_lane_f32(v_q_0, 1));  \\\n    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum_0_0)                            \\\n  }                                                                      \\\n  MATRIX_VAR_STORE(1, 1, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=4, N=1)\n#define ACCUM_FP32_4X1_NEON(m, q, dim, out, _NORM)               \\\n  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))      \\\n  const float *qe_aligned = q + ((dim >> 1) << 1);               \\\n  const float *qe = q + dim;                                     \\\n  for (; q != qe_aligned; m += 8, q += 2) {                      \\\n    MATRIX_FP32_ITER_4X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \\\n  }                                                              \\\n  if (q != qe) {                                                 \\\n    float32x4_t v_m = vld1q_f32(m);                              \\\n    float32x4_t v_q = vld1q_dup_f32(q);                          \\\n    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum_0_0)                    \\\n  }                                                              \\\n  v_sum_0_0 = vaddq_f32(v_sum_0_0, v_sum_0_1);                   \\\n  MATRIX_VAR_STORE(1, 1, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=4, N=2)\n#define ACCUM_FP32_4X2_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))         \\\n  for (const float *qe = q + (dim << 1); q != qe; m += 4, q += 2) { \\\n    MATRIX_FP32_ITER_4X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                 \\\n  MATRIX_VAR_STORE(1, 2, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=4, N=4)\n#define ACCUM_FP32_4X4_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(1, 4, float32x4_t, v_sum, vdupq_n_f32(0))         \\\n  for (const float *qe = q + (dim << 2); q != qe; m += 4, q += 4) { \\\n    MATRIX_FP32_ITER_4X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                 \\\n  MATRIX_VAR_STORE(1, 4, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=8, N=1)\n#define ACCUM_FP32_8X1_NEON(m, q, dim, out, _NORM)               \\\n  MATRIX_VAR_INIT(2, 1, float32x4_t, v_sum, vdupq_n_f32(0))      \\\n  for (const float *qe = q + dim; q != qe; m += 8, ++q) {        \\\n    MATRIX_FP32_ITER_8X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \\\n  }                                                              \\\n  MATRIX_VAR_STORE(2, 1, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=8, N=2)\n#define ACCUM_FP32_8X2_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 2, float32x4_t, v_sum, vdupq_n_f32(0))         \\\n  for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \\\n    MATRIX_FP32_ITER_8X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                 \\\n  MATRIX_VAR_STORE(2, 2, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=8, N=4)\n#define ACCUM_FP32_8X4_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 4, float32x4_t, v_sum, vdupq_n_f32(0))         \\\n  for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \\\n    MATRIX_FP32_ITER_8X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                 \\\n  MATRIX_VAR_STORE(2, 4, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=8, N=8)\n#define ACCUM_FP32_8X8_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(2, 8, float32x4_t, v_sum, vdupq_n_f32(0))         \\\n  for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \\\n    MATRIX_FP32_ITER_8X8_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                 \\\n  MATRIX_VAR_STORE(2, 8, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=16, N=1)\n#define ACCUM_FP32_16X1_NEON(m, q, dim, out, _NORM)               \\\n  MATRIX_VAR_INIT(4, 1, float32x4_t, v_sum, vdupq_n_f32(0))       \\\n  for (const float *qe = q + dim; q != qe; m += 16, ++q) {        \\\n    MATRIX_FP32_ITER_16X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \\\n  }                                                               \\\n  MATRIX_VAR_STORE(4, 1, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=16, N=2)\n#define ACCUM_FP32_16X2_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(4, 2, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \\\n    MATRIX_FP32_ITER_16X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                  \\\n  MATRIX_VAR_STORE(4, 2, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=16, N=4)\n#define ACCUM_FP32_16X4_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(4, 4, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \\\n    MATRIX_FP32_ITER_16X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                  \\\n  MATRIX_VAR_STORE(4, 4, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=16, N=8)\n#define ACCUM_FP32_16X8_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(4, 8, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \\\n    MATRIX_FP32_ITER_16X8_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                  \\\n  MATRIX_VAR_STORE(4, 8, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=16, N=16)\n#define ACCUM_FP32_16X16_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(4, 16, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \\\n    MATRIX_FP32_ITER_16X16_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                   \\\n  MATRIX_VAR_STORE(4, 16, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=32, N=1)\n#define ACCUM_FP32_32X1_NEON(m, q, dim, out, _NORM)               \\\n  MATRIX_VAR_INIT(8, 1, float32x4_t, v_sum, vdupq_n_f32(0))       \\\n  for (const float *qe = q + dim; q != qe; m += 32, ++q) {        \\\n    MATRIX_FP32_ITER_32X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \\\n  }                                                               \\\n  MATRIX_VAR_STORE(8, 1, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=32, N=2)\n#define ACCUM_FP32_32X2_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(8, 2, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \\\n    MATRIX_FP32_ITER_32X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                  \\\n  MATRIX_VAR_STORE(8, 2, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=32, N=4)\n#define ACCUM_FP32_32X4_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(8, 4, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \\\n    MATRIX_FP32_ITER_32X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                  \\\n  MATRIX_VAR_STORE(8, 4, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=32, N=8)\n#define ACCUM_FP32_32X8_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(8, 8, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \\\n    MATRIX_FP32_ITER_32X8_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                  \\\n  MATRIX_VAR_STORE(8, 8, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=32, N=16)\n#define ACCUM_FP32_32X16_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(8, 16, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \\\n    MATRIX_FP32_ITER_32X16_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                   \\\n  MATRIX_VAR_STORE(8, 16, 4, v_sum, out, vst1q_f32, _NORM)\n\n//! Compute the distance between matrix and query (FP32, M=32, N=32)\n#define ACCUM_FP32_32X32_NEON(m, q, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(8, 32, float32x4_t, v_sum, vdupq_n_f32(0))          \\\n  for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \\\n    MATRIX_FP32_ITER_32X32_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \\\n  }                                                                   \\\n  MATRIX_VAR_STORE(8, 32, 4, v_sum, out, vst1q_f32, _NORM)\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_accum_int4.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_int32.i\"\n#include \"matrix_utility.i\"\n\n//! Compute the distance between matrix and query (INT4, M=2, N=1)\n#define ACCUM_INT4_2X1_SSE(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \\\n  const uint32_t *qe_aligned = qi + ((dim >> 5) << 2);                       \\\n  const uint32_t *qe = qi + (dim >> 3);                                      \\\n  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \\\n      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_load_si128,             \\\n                                ACCUM_INT4_STEP_SSE)                         \\\n    }                                                                        \\\n    if (qe >= qe_aligned + 2) {                                              \\\n      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));                \\\n      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \\\n      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \\\n      mi += 4;                                                               \\\n      qi += 2;                                                               \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \\\n      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \\\n                                ACCUM_INT4_STEP_SSE)                         \\\n    }                                                                        \\\n    if (qe >= qe_aligned + 2) {                                              \\\n      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));               \\\n      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \\\n      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \\\n      mi += 4;                                                               \\\n      qi += 2;                                                               \\\n    }                                                                        \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_0_1);                     \\\n  xmm_sum_0_0 = _mm_add_epi32(                                               \\\n      xmm_sum_0_0, _mm_shuffle_epi32(xmm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \\\n  if (qi != qe) {                                                            \\\n    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                                 \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \\\n  }                                                                          \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));\n\n//! Compute the distance between matrix and query (INT4, M=2, N=2)\n#define ACCUM_INT4_2X2_SSE(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \\\n  const uint32_t *qe = qi + ((dim >> 3) << 1);                               \\\n  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                \\\n         qi != qe_aligned; mi += 4, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_load_si128,             \\\n                                ACCUM_INT4_STEP_SSE)                         \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                \\\n         qi != qe_aligned; mi += 4, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \\\n                                ACCUM_INT4_STEP_SSE)                         \\\n    }                                                                        \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \\\n                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \\\n  if (qi != qe) {                                                            \\\n    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \\\n    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=4, N=1)\n#define ACCUM_INT4_4X1_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  const uint32_t *qe = qi + (dim >> 3);                           \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);     \\\n         qi != qe_aligned; mi += 8, qi += 2) {                    \\\n      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n    if (qi != qe) {                                               \\\n      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));     \\\n      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \\\n      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);     \\\n         qi != qe_aligned; mi += 8, qi += 2) {                    \\\n      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n    if (qi != qe) {                                               \\\n      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));    \\\n      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \\\n      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \\\n    }                                                             \\\n  }                                                               \\\n  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_1_0);          \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=4, N=2)\n#define ACCUM_INT4_4X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \\\n         mi += 4, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \\\n         mi += 4, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=4, N=4)\n#define ACCUM_INT4_4X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \\\n         mi += 4, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \\\n         mi += 4, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=1)\n#define ACCUM_INT4_8X1_SSE(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())            \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                       \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \\\n                                ACCUM_INT4_STEP_SSE)                      \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \\\n                                ACCUM_INT4_STEP_SSE)                      \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0xf) == 0) {                                      \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)          \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)         \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=2)\n#define ACCUM_INT4_8X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \\\n         mi += 8, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \\\n         mi += 8, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=4)\n#define ACCUM_INT4_8X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \\\n         mi += 8, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \\\n         mi += 8, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=8)\n#define ACCUM_INT4_8X8_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 8, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;   \\\n         mi += 8, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;   \\\n         mi += 8, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT4_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=1)\n#define ACCUM_INT4_16X1_SSE(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(4, 1, __m128i, xmm_sum, _mm_setzero_si128())             \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                        \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \\\n                                 ACCUM_INT4_STEP_SSE)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \\\n                                 ACCUM_INT4_STEP_SSE)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=2)\n#define ACCUM_INT4_16X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 2, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \\\n         mi += 16, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \\\n         mi += 16, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=4)\n#define ACCUM_INT4_16X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 4, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \\\n         mi += 16, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \\\n         mi += 16, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=8)\n#define ACCUM_INT4_16X8_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 8, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \\\n         mi += 16, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \\\n         mi += 16, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=16)\n#define ACCUM_INT4_16X16_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 16, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \\\n         mi += 16, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                  ACCUM_INT4_STEP_SSE)              \\\n    }                                                               \\\n  } else {                                                          \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \\\n         mi += 16, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                  ACCUM_INT4_STEP_SSE)              \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=1)\n#define ACCUM_INT4_32X1_SSE(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(8, 1, __m128i, xmm_sum, _mm_setzero_si128())             \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                        \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \\\n                                 ACCUM_INT4_STEP_SSE)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \\\n                                 ACCUM_INT4_STEP_SSE)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=2)\n#define ACCUM_INT4_32X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 2, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \\\n         mi += 32, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \\\n         mi += 32, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=4)\n#define ACCUM_INT4_32X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 4, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \\\n         mi += 32, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \\\n         mi += 32, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=8)\n#define ACCUM_INT4_32X8_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 8, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \\\n         mi += 32, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \\\n         mi += 32, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT4_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=16)\n#define ACCUM_INT4_32X16_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 16, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \\\n         mi += 32, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                  ACCUM_INT4_STEP_SSE)              \\\n    }                                                               \\\n  } else {                                                          \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \\\n         mi += 32, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                  ACCUM_INT4_STEP_SSE)              \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=32)\n#define ACCUM_INT4_32X32_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 32, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;     \\\n         mi += 32, qi += 32) {                                      \\\n      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                  ACCUM_INT4_STEP_SSE)              \\\n    }                                                               \\\n  } else {                                                          \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;     \\\n         mi += 32, qi += 32) {                                      \\\n      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                  ACCUM_INT4_STEP_SSE)              \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=2, N=1)\n#define ACCUM_INT4_2X1_AVX(m, q, dim, out, _NORM)                              \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())              \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                  \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                  \\\n  const uint32_t *qe_aligned = qi + ((dim >> 5) << 2);                         \\\n  const uint32_t *qe = qi + (dim >> 3);                                        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                           \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,            \\\n                                ACCUM_INT4_STEP_AVX)                           \\\n    }                                                                          \\\n  } else {                                                                     \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,           \\\n                                ACCUM_INT4_STEP_AVX)                           \\\n    }                                                                          \\\n  }                                                                            \\\n  __m128i xmm_sum_0 = _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),       \\\n                                    _mm256_extracti128_si256(ymm_sum_0_0, 1)); \\\n  if (qe >= qe_aligned + 2) {                                                  \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                   \\\n    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);                \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \\\n    mi += 4;                                                                   \\\n    qi += 2;                                                                   \\\n  }                                                                            \\\n  xmm_sum_0 = _mm_add_epi32(                                                   \\\n      xmm_sum_0, _mm_shuffle_epi32(xmm_sum_0, _MM_SHUFFLE(0, 0, 3, 2)));       \\\n  if (qi != qe) {                                                              \\\n    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                        \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                                   \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \\\n  }                                                                            \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0));\n\n//! Compute the distance between matrix and query (INT4, M=2, N=2)\n#define ACCUM_INT4_2X2_AVX(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \\\n  const uint32_t *qe_aligned = qi + ((dim >> 5) << 3);                       \\\n  const uint32_t *qe = qi + ((dim >> 3) << 1);                               \\\n  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {          \\\n    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \\\n      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,          \\\n                                ACCUM_INT4_STEP_AVX)                         \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \\\n      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,         \\\n                                ACCUM_INT4_STEP_AVX)                         \\\n    }                                                                        \\\n  }                                                                          \\\n  __m128i xmm_sum_0_0 =                                                      \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),                     \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));               \\\n  __m128i xmm_sum_0_1 =                                                      \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),                     \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));               \\\n  if (qe >= qe_aligned + 4) {                                                \\\n    __m128i xmm_qi = _mm_loadu_si128((const __m128i *)(qi));                 \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                 \\\n    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(2, 2, 0, 0));     \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_0)                         \\\n    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 1, 1));             \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_1)                         \\\n    mi += 4;                                                                 \\\n    qi += 4;                                                                 \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \\\n                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \\\n  if (qi != qe) {                                                            \\\n    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \\\n    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=4, N=1)\n#define ACCUM_INT4_4X1_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  const uint32_t *qe = qi + (dim >> 3);                              \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);        \\\n         qi != qe_aligned; mi += 8, qi += 2) {                       \\\n      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);        \\\n         qi != qe_aligned; mi += 8, qi += 2) {                       \\\n      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  __m128i xmm_sum_0_0 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \\\n  if (qi != qe) {                                                    \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)     \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=4, N=2)\n#define ACCUM_INT4_4X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  const uint32_t *qe = qi + ((dim >> 3) << 1);                       \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);        \\\n         qi != qe_aligned; mi += 8, qi += 4) {                       \\\n      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);        \\\n         qi != qe_aligned; mi += 8, qi += 4) {                       \\\n      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  __m128i xmm_sum_0_0 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \\\n  __m128i xmm_sum_0_1 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \\\n  if (qi != qe) {                                                    \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                             \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)     \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=4, N=4)\n#define ACCUM_INT4_4X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  const uint32_t *qe = qi + ((dim >> 3) << 2);                       \\\n  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {  \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 3);        \\\n         qi != qe_aligned; mi += 8, qi += 8) {                       \\\n      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 3);        \\\n         qi != qe_aligned; mi += 8, qi += 8) {                       \\\n      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  __m128i xmm_sum_0_0 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \\\n  __m128i xmm_sum_0_1 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \\\n  __m128i xmm_sum_0_2 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_2),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_2, 1));       \\\n  __m128i xmm_sum_0_3 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_3),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_3, 1));       \\\n  if (qi != qe) {                                                    \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                             \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                             \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_2)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                             \\\n    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_3)                 \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)     \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=1)\n#define ACCUM_INT4_8X1_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())         \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                      \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \\\n                                ACCUM_INT4_STEP_AVX)                      \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \\\n                                ACCUM_INT4_STEP_AVX)                      \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                     \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)       \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=2)\n#define ACCUM_INT4_8X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;      \\\n         mi += 8, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;      \\\n         mi += 8, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=4)\n#define ACCUM_INT4_8X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;      \\\n         mi += 8, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;      \\\n         mi += 8, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=8, N=8)\n#define ACCUM_INT4_8X8_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;      \\\n         mi += 8, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;      \\\n         mi += 8, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=1)\n#define ACCUM_INT4_16X1_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                       \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \\\n                                 ACCUM_INT4_STEP_AVX)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \\\n                                 ACCUM_INT4_STEP_AVX)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                      \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=2)\n#define ACCUM_INT4_16X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \\\n         mi += 16, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \\\n         mi += 16, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=4)\n#define ACCUM_INT4_16X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \\\n         mi += 16, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \\\n         mi += 16, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=8)\n#define ACCUM_INT4_16X8_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \\\n         mi += 16, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \\\n         mi += 16, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=16, N=16)\n#define ACCUM_INT4_16X16_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                   \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \\\n         mi += 16, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                  ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \\\n         mi += 16, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                  ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=1)\n#define ACCUM_INT4_32X1_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                       \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \\\n                                 ACCUM_INT4_STEP_AVX)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \\\n                                 ACCUM_INT4_STEP_AVX)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                      \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=2)\n#define ACCUM_INT4_32X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \\\n         mi += 32, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \\\n         mi += 32, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=4)\n#define ACCUM_INT4_32X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \\\n         mi += 32, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \\\n         mi += 32, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=8)\n#define ACCUM_INT4_32X8_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \\\n         mi += 32, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \\\n         mi += 32, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=16)\n#define ACCUM_INT4_32X16_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                   \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \\\n         mi += 32, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                  ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \\\n         mi += 32, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                  ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT4, M=32, N=32)\n#define ACCUM_INT4_32X32_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 32, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                   \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;        \\\n         mi += 32, qi += 32) {                                         \\\n      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                  ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;        \\\n         mi += 32, qi += 32) {                                         \\\n      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                  ACCUM_INT4_STEP_AVX)                 \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_accum_int8.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_int32.i\"\n#include \"matrix_utility.i\"\n\n//! Compute the distance between matrix and query (INT8, M=2, N=1)\n#define ACCUM_INT8_2X1_SSE(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \\\n  const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                       \\\n  const uint32_t *qe = qi + (dim >> 2);                                      \\\n  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \\\n      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_load_si128,             \\\n                                ACCUM_INT8_STEP_SSE)                         \\\n    }                                                                        \\\n    if (qe >= qe_aligned + 2) {                                              \\\n      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));                \\\n      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \\\n      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \\\n      mi += 4;                                                               \\\n      qi += 2;                                                               \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \\\n      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \\\n                                ACCUM_INT8_STEP_SSE)                         \\\n    }                                                                        \\\n    if (qe >= qe_aligned + 2) {                                              \\\n      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));               \\\n      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \\\n      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \\\n      mi += 4;                                                               \\\n      qi += 2;                                                               \\\n    }                                                                        \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_0_1);                     \\\n  xmm_sum_0_0 = _mm_add_epi32(                                               \\\n      xmm_sum_0_0, _mm_shuffle_epi32(xmm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \\\n  if (qi != qe) {                                                            \\\n    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                                 \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \\\n  }                                                                          \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));\n\n//! Compute the distance between matrix and query (INT8, M=2, N=2)\n#define ACCUM_INT8_2X2_SSE(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \\\n  const uint32_t *qe = qi + ((dim >> 2) << 1);                               \\\n  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);                \\\n         qi != qe_aligned; mi += 4, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_load_si128,             \\\n                                ACCUM_INT8_STEP_SSE)                         \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);                \\\n         qi != qe_aligned; mi += 4, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \\\n                                ACCUM_INT8_STEP_SSE)                         \\\n    }                                                                        \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \\\n                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \\\n  if (qi != qe) {                                                            \\\n    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \\\n    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=4, N=1)\n#define ACCUM_INT8_4X1_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  const uint32_t *qe = qi + (dim >> 2);                           \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);     \\\n         qi != qe_aligned; mi += 8, qi += 2) {                    \\\n      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n    if (qi != qe) {                                               \\\n      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));     \\\n      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \\\n      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);     \\\n         qi != qe_aligned; mi += 8, qi += 2) {                    \\\n      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n    if (qi != qe) {                                               \\\n      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));    \\\n      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \\\n      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \\\n    }                                                             \\\n  }                                                               \\\n  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_1_0);          \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=4, N=2)\n#define ACCUM_INT8_4X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \\\n         mi += 4, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \\\n         mi += 4, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=4, N=4)\n#define ACCUM_INT8_4X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \\\n         mi += 4, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \\\n         mi += 4, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=1)\n#define ACCUM_INT8_8X1_SSE(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())            \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                       \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \\\n                                ACCUM_INT8_STEP_SSE)                      \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \\\n                                ACCUM_INT8_STEP_SSE)                      \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0xf) == 0) {                                      \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)          \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)         \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=2)\n#define ACCUM_INT8_8X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \\\n         mi += 8, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \\\n         mi += 8, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=4)\n#define ACCUM_INT8_8X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \\\n         mi += 8, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \\\n         mi += 8, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=8)\n#define ACCUM_INT8_8X8_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(2, 8, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \\\n  if (((uintptr_t)mi & 0xf) == 0) {                               \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;   \\\n         mi += 8, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  } else {                                                        \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;   \\\n         mi += 8, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                ACCUM_INT8_STEP_SSE)              \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=1)\n#define ACCUM_INT8_16X1_SSE(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(4, 1, __m128i, xmm_sum, _mm_setzero_si128())             \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                        \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \\\n                                 ACCUM_INT8_STEP_SSE)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \\\n                                 ACCUM_INT8_STEP_SSE)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=2)\n#define ACCUM_INT8_16X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 2, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \\\n         mi += 16, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \\\n         mi += 16, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=4)\n#define ACCUM_INT8_16X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 4, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \\\n         mi += 16, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \\\n         mi += 16, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=8)\n#define ACCUM_INT8_16X8_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 8, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \\\n         mi += 16, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \\\n         mi += 16, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=16)\n#define ACCUM_INT8_16X16_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(4, 16, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \\\n         mi += 16, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                  ACCUM_INT8_STEP_SSE)              \\\n    }                                                               \\\n  } else {                                                          \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \\\n         mi += 16, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                  ACCUM_INT8_STEP_SSE)              \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=1)\n#define ACCUM_INT8_32X1_SSE(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(8, 1, __m128i, xmm_sum, _mm_setzero_si128())             \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                        \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \\\n                                 ACCUM_INT8_STEP_SSE)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \\\n                                 ACCUM_INT8_STEP_SSE)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0xf) == 0) {                                       \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=2)\n#define ACCUM_INT8_32X2_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 2, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \\\n         mi += 32, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \\\n         mi += 32, qi += 2) {                                      \\\n      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=4)\n#define ACCUM_INT8_32X4_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 4, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \\\n         mi += 32, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \\\n         mi += 32, qi += 4) {                                      \\\n      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=8)\n#define ACCUM_INT8_32X8_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 8, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \\\n         mi += 32, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  } else {                                                         \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \\\n         mi += 32, qi += 8) {                                      \\\n      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                 ACCUM_INT8_STEP_SSE)              \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=16)\n#define ACCUM_INT8_32X16_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 16, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \\\n         mi += 32, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                  ACCUM_INT8_STEP_SSE)              \\\n    }                                                               \\\n  } else {                                                          \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \\\n         mi += 32, qi += 16) {                                      \\\n      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                  ACCUM_INT8_STEP_SSE)              \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=32)\n#define ACCUM_INT8_32X32_SSE(m, q, dim, out, _NORM)                 \\\n  MATRIX_VAR_INIT(8, 32, __m128i, xmm_sum, _mm_setzero_si128())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \\\n  if (((uintptr_t)mi & 0xf) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;     \\\n         mi += 32, qi += 32) {                                      \\\n      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_load_si128,  \\\n                                  ACCUM_INT8_STEP_SSE)              \\\n    }                                                               \\\n  } else {                                                          \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;     \\\n         mi += 32, qi += 32) {                                      \\\n      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \\\n                                  ACCUM_INT8_STEP_SSE)              \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=2, N=1)\n#define ACCUM_INT8_2X1_AVX(m, q, dim, out, _NORM)                              \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())              \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                  \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                  \\\n  const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                         \\\n  const uint32_t *qe = qi + (dim >> 2);                                        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                           \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,            \\\n                                ACCUM_INT8_STEP_AVX)                           \\\n    }                                                                          \\\n  } else {                                                                     \\\n    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \\\n      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,           \\\n                                ACCUM_INT8_STEP_AVX)                           \\\n    }                                                                          \\\n  }                                                                            \\\n  __m128i xmm_sum_0 = _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),       \\\n                                    _mm256_extracti128_si256(ymm_sum_0_0, 1)); \\\n  if (qe >= qe_aligned + 2) {                                                  \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                   \\\n    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);                \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \\\n    mi += 4;                                                                   \\\n    qi += 2;                                                                   \\\n  }                                                                            \\\n  xmm_sum_0 = _mm_add_epi32(                                                   \\\n      xmm_sum_0, _mm_shuffle_epi32(xmm_sum_0, _MM_SHUFFLE(0, 0, 3, 2)));       \\\n  if (qi != qe) {                                                              \\\n    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                        \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                                   \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \\\n  }                                                                            \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0));\n\n//! Compute the distance between matrix and query (INT8, M=2, N=2)\n#define ACCUM_INT8_2X2_AVX(m, q, dim, out, _NORM)                            \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \\\n  const uint32_t *qe_aligned = qi + ((dim >> 4) << 3);                       \\\n  const uint32_t *qe = qi + ((dim >> 2) << 1);                               \\\n  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {          \\\n    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \\\n      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,          \\\n                                ACCUM_INT8_STEP_AVX)                         \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \\\n      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,         \\\n                                ACCUM_INT8_STEP_AVX)                         \\\n    }                                                                        \\\n  }                                                                          \\\n  __m128i xmm_sum_0_0 =                                                      \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),                     \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));               \\\n  __m128i xmm_sum_0_1 =                                                      \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),                     \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));               \\\n  if (qe >= qe_aligned + 4) {                                                \\\n    __m128i xmm_qi = _mm_loadu_si128((const __m128i *)(qi));                 \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                 \\\n    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(2, 2, 0, 0));     \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_0)                         \\\n    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 1, 1));             \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_1)                         \\\n    mi += 4;                                                                 \\\n    qi += 4;                                                                 \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \\\n                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \\\n  if (qi != qe) {                                                            \\\n    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \\\n    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=4, N=1)\n#define ACCUM_INT8_4X1_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  const uint32_t *qe = qi + (dim >> 2);                              \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);        \\\n         qi != qe_aligned; mi += 8, qi += 2) {                       \\\n      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);        \\\n         qi != qe_aligned; mi += 8, qi += 2) {                       \\\n      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  __m128i xmm_sum_0_0 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \\\n  if (qi != qe) {                                                    \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)     \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=4, N=2)\n#define ACCUM_INT8_4X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  const uint32_t *qe = qi + ((dim >> 2) << 1);                       \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);        \\\n         qi != qe_aligned; mi += 8, qi += 4) {                       \\\n      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);        \\\n         qi != qe_aligned; mi += 8, qi += 4) {                       \\\n      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  __m128i xmm_sum_0_0 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \\\n  __m128i xmm_sum_0_1 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \\\n  if (qi != qe) {                                                    \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                             \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)     \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=4, N=4)\n#define ACCUM_INT8_4X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  const uint32_t *qe = qi + ((dim >> 2) << 2);                       \\\n  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {  \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 3);        \\\n         qi != qe_aligned; mi += 8, qi += 8) {                       \\\n      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 3);        \\\n         qi != qe_aligned; mi += 8, qi += 8) {                       \\\n      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  __m128i xmm_sum_0_0 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \\\n  __m128i xmm_sum_0_1 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \\\n  __m128i xmm_sum_0_2 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_2),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_2, 1));       \\\n  __m128i xmm_sum_0_3 =                                              \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_3),             \\\n                    _mm256_extracti128_si256(ymm_sum_0_3, 1));       \\\n  if (qi != qe) {                                                    \\\n    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                             \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                             \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_2)                 \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                             \\\n    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_3)                 \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)     \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=1)\n#define ACCUM_INT8_8X1_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())         \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                      \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \\\n                                ACCUM_INT8_STEP_AVX)                      \\\n    }                                                                     \\\n  } else {                                                                \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \\\n      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \\\n                                ACCUM_INT8_STEP_AVX)                      \\\n    }                                                                     \\\n  }                                                                       \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                     \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)       \\\n  } else {                                                                \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=2)\n#define ACCUM_INT8_8X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;      \\\n         mi += 8, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;      \\\n         mi += 8, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=4)\n#define ACCUM_INT8_8X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;      \\\n         mi += 8, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;      \\\n         mi += 8, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=8, N=8)\n#define ACCUM_INT8_8X8_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(1, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                 \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;      \\\n         mi += 8, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  } else {                                                           \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;      \\\n         mi += 8, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=1)\n#define ACCUM_INT8_16X1_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                       \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \\\n                                 ACCUM_INT8_STEP_AVX)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \\\n      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \\\n                                 ACCUM_INT8_STEP_AVX)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                      \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=2)\n#define ACCUM_INT8_16X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \\\n         mi += 16, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \\\n         mi += 16, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=4)\n#define ACCUM_INT8_16X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \\\n         mi += 16, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \\\n         mi += 16, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=8)\n#define ACCUM_INT8_16X8_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \\\n         mi += 16, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \\\n         mi += 16, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=16, N=16)\n#define ACCUM_INT8_16X16_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(2, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                   \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \\\n         mi += 16, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                  ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \\\n         mi += 16, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                  ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=1)\n#define ACCUM_INT8_32X1_AVX(m, q, dim, out, _NORM)                         \\\n  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                       \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \\\n                                 ACCUM_INT8_STEP_AVX)                      \\\n    }                                                                      \\\n  } else {                                                                 \\\n    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \\\n      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \\\n                                 ACCUM_INT8_STEP_AVX)                      \\\n    }                                                                      \\\n  }                                                                        \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                      \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \\\n  } else {                                                                 \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=2)\n#define ACCUM_INT8_32X2_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \\\n         mi += 32, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \\\n         mi += 32, qi += 2) {                                         \\\n      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=4)\n#define ACCUM_INT8_32X4_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \\\n         mi += 32, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \\\n         mi += 32, qi += 4) {                                         \\\n      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=8)\n#define ACCUM_INT8_32X8_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                  \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \\\n         mi += 32, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \\\n         mi += 32, qi += 8) {                                         \\\n      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                 ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=16)\n#define ACCUM_INT8_32X16_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                   \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \\\n         mi += 32, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                  ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \\\n         mi += 32, qi += 16) {                                         \\\n      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                  ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (INT8, M=32, N=32)\n#define ACCUM_INT8_32X32_AVX(m, q, dim, out, _NORM)                    \\\n  MATRIX_VAR_INIT(4, 32, __m256i, ymm_sum, _mm256_setzero_si256())     \\\n  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \\\n  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \\\n  if (((uintptr_t)mi & 0x1f) == 0) {                                   \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;        \\\n         mi += 32, qi += 32) {                                         \\\n      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \\\n                                  ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;        \\\n         mi += 32, qi += 32) {                                         \\\n      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \\\n                                  ACCUM_INT8_STEP_AVX)                 \\\n    }                                                                  \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                  \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \\\n  }\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_euclidean_utility.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n//! Calculate sum of squared difference (GENERAL)\n#define SSD_FP32_GENERAL(m, q, sum) \\\n  {                                 \\\n    float x = m - q;                \\\n    sum += (x * x);                 \\\n  }\n\n//! Calculate sum of squared difference (SSE)\n#define SSD_FP32_SSE(xmm_m, xmm_q, xmm_sum)        \\\n  {                                                \\\n    __m128 xmm_d = _mm_sub_ps(xmm_m, xmm_q);       \\\n    xmm_sum = _mm_fmadd_ps(xmm_d, xmm_d, xmm_sum); \\\n  }\n\n//! Calculate sum of squared difference (AVX)\n#define SSD_FP32_AVX(ymm_m, ymm_q, ymm_sum)           \\\n  {                                                   \\\n    __m256 ymm_d = _mm256_sub_ps(ymm_m, ymm_q);       \\\n    ymm_sum = _mm256_fmadd_ps(ymm_d, ymm_d, ymm_sum); \\\n  }\n\n//! Calculate sum of squared difference (NEON)\n#define SSD_FP32_NEON(v_m, v_q, v_sum)     \\\n  {                                        \\\n    float32x4_t v_d = vsubq_f32(v_m, v_q); \\\n    v_sum = vfmaq_f32(v_sum, v_d, v_d);    \\\n  }\n\n//! Calculate sum of squared difference (GENERAL)\n#define SSD_FP16_GENERAL(m, q, sum) \\\n  {                                 \\\n    float x = m - q;                \\\n    sum += (x * x);                 \\\n  }\n\n//! Calculate sum of squared difference (NEON)\n#define SSD_FP16_NEON(v_m, v_q, v_sum)     \\\n  {                                        \\\n    float16x8_t v_d = vsubq_f16(v_m, v_q); \\\n    v_sum = vfmaq_f16(v_sum, v_d, v_d);    \\\n  }\n\n//! Calculate sum of squared difference (AVX512)\n#define SSD_FP32_AVX512(zmm_m, zmm_q, zmm_sum)        \\\n  {                                                   \\\n    __m512 zmm_d = _mm512_sub_ps(zmm_m, zmm_q);       \\\n    zmm_sum = _mm512_fmadd_ps(zmm_d, zmm_d, zmm_sum); \\\n  }\n\n//! Calculate sum of squared difference (GENERAL)\n#define SSD_INT4_GENERAL(m, q, sum)                                       \\\n  sum += Int4SquaredDiffTable[(((m) << 4) & 0xf0) | (((q) >> 0) & 0xf)] + \\\n         Int4SquaredDiffTable[(((m) >> 0) & 0xf0) | (((q) >> 4) & 0xf)];\n\n\n#if defined(__SSE4_1__)\nstatic const __m128i MASK_INT4_SSE = _mm_set1_epi32(0xf0f0f0f0);\nstatic const __m128i ONES_INT16_SSE = _mm_set1_epi32(0x00010001);\n#endif  // __SSE4_1__\n\n//! Compute the square root of value (SSE)\n#define SQRT_FP32_SSE(v, ...) _mm_sqrt_ps(_mm_cvtepi32_ps(v))\n\n#if defined(__AVX2__)\nstatic const __m256i MASK_INT4_AVX = _mm256_set1_epi32(0xf0f0f0f0);\nstatic const __m256i ONES_INT16_AVX = _mm256_set1_epi32(0x00010001);\n#endif  // __AVX2__\n\n//! Calculate sum of squared difference (SSE)\n#define SSD_INT4_SSE(xmm_m, xmm_q, xmm_sum)                                  \\\n  {                                                                          \\\n    __m128i xmm_lhs =                                                        \\\n        _mm_and_si128(_mm_slli_epi32((xmm_m), 4), MASK_INT4_SSE);            \\\n    __m128i xmm_rhs =                                                        \\\n        _mm_and_si128(_mm_slli_epi32((xmm_q), 4), MASK_INT4_SSE);            \\\n    xmm_lhs = _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),    \\\n                                          _mm_min_epi8(xmm_lhs, xmm_rhs)),   \\\n                             4);                                             \\\n    xmm_sum = _mm_add_epi32(                                                 \\\n        _mm_madd_epi16(_mm_maddubs_epi16(xmm_lhs, xmm_lhs), ONES_INT16_SSE), \\\n        xmm_sum);                                                            \\\n    xmm_lhs = _mm_and_si128((xmm_m), MASK_INT4_SSE);                         \\\n    xmm_rhs = _mm_and_si128((xmm_q), MASK_INT4_SSE);                         \\\n    xmm_lhs = _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),    \\\n                                          _mm_min_epi8(xmm_lhs, xmm_rhs)),   \\\n                             4);                                             \\\n    xmm_sum = _mm_add_epi32(                                                 \\\n        _mm_madd_epi16(_mm_maddubs_epi16(xmm_lhs, xmm_lhs), ONES_INT16_SSE), \\\n        xmm_sum);                                                            \\\n  }\n\n//! Compute the distance between matrix and query\n#define SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)                       \\\n  {                                                                        \\\n    __m128i xmm_lhs_0 =                                                    \\\n        _mm_and_si128(_mm_slli_epi32((xmm_lhs), 4), MASK_INT4_SSE);        \\\n    __m128i xmm_rhs_0 =                                                    \\\n        _mm_and_si128(_mm_slli_epi32((xmm_rhs), 4), MASK_INT4_SSE);        \\\n    __m128i xmm_lhs_1 = _mm_and_si128((xmm_lhs), MASK_INT4_SSE);           \\\n    __m128i xmm_rhs_1 = _mm_and_si128((xmm_rhs), MASK_INT4_SSE);           \\\n    xmm_lhs_0 =                                                            \\\n        _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs_0, xmm_rhs_0),    \\\n                                    _mm_min_epi8(xmm_lhs_0, xmm_rhs_0)),   \\\n                       4);                                                 \\\n    xmm_rhs_0 =                                                            \\\n        _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs_1, xmm_rhs_1),    \\\n                                    _mm_min_epi8(xmm_lhs_1, xmm_rhs_1)),   \\\n                       4);                                                 \\\n    xmm_lhs_0 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_lhs_0, xmm_lhs_0),    \\\n                               ONES_INT16_SSE);                            \\\n    xmm_rhs_0 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_rhs_0),    \\\n                               ONES_INT16_SSE);                            \\\n    xmm_sum = _mm_add_epi32(_mm_add_epi32(xmm_lhs_0, xmm_rhs_0), xmm_sum); \\\n  }\n\n//! Calculate sum of squared difference (AVX)\n#define SSD_INT4_AVX(ymm_m, ymm_q, ymm_sum)                                   \\\n  {                                                                           \\\n    __m256i ymm_lhs =                                                         \\\n        _mm256_and_si256(_mm256_slli_epi32((ymm_m), 4), MASK_INT4_AVX);       \\\n    __m256i ymm_rhs =                                                         \\\n        _mm256_and_si256(_mm256_slli_epi32((ymm_q), 4), MASK_INT4_AVX);       \\\n    ymm_lhs =                                                                 \\\n        _mm256_srli_epi32(_mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),  \\\n                                          _mm256_min_epi8(ymm_lhs, ymm_rhs)), \\\n                          4);                                                 \\\n    ymm_sum = _mm256_add_epi32(                                               \\\n        _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_lhs, ymm_lhs),             \\\n                          ONES_INT16_AVX),                                    \\\n        ymm_sum);                                                             \\\n    ymm_lhs = _mm256_and_si256((ymm_m), MASK_INT4_AVX);                       \\\n    ymm_rhs = _mm256_and_si256((ymm_q), MASK_INT4_AVX);                       \\\n    ymm_lhs =                                                                 \\\n        _mm256_srli_epi32(_mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),  \\\n                                          _mm256_min_epi8(ymm_lhs, ymm_rhs)), \\\n                          4);                                                 \\\n    ymm_sum = _mm256_add_epi32(                                               \\\n        _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_lhs, ymm_lhs),             \\\n                          ONES_INT16_AVX),                                    \\\n        ymm_sum);                                                             \\\n  }\n\n//! Compute the distance between matrix and query\n#define SSD_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)                          \\\n  {                                                                           \\\n    __m256i ymm_lhs_0 =                                                       \\\n        _mm256_and_si256(_mm256_slli_epi32((ymm_lhs), 4), MASK_INT4_AVX);     \\\n    __m256i ymm_rhs_0 =                                                       \\\n        _mm256_and_si256(_mm256_slli_epi32((ymm_rhs), 4), MASK_INT4_AVX);     \\\n    __m256i ymm_lhs_1 = _mm256_and_si256((ymm_lhs), MASK_INT4_AVX);           \\\n    __m256i ymm_rhs_1 = _mm256_and_si256((ymm_rhs), MASK_INT4_AVX);           \\\n    ymm_lhs_0 = _mm256_srli_epi32(                                            \\\n        _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_0, ymm_rhs_0),                \\\n                        _mm256_min_epi8(ymm_lhs_0, ymm_rhs_0)),               \\\n        4);                                                                   \\\n    ymm_rhs_0 = _mm256_srli_epi32(                                            \\\n        _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_1, ymm_rhs_1),                \\\n                        _mm256_min_epi8(ymm_lhs_1, ymm_rhs_1)),               \\\n        4);                                                                   \\\n    ymm_lhs_0 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_lhs_0, ymm_lhs_0), \\\n                                  ONES_INT16_AVX);                            \\\n    ymm_rhs_0 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_rhs_0), \\\n                                  ONES_INT16_AVX);                            \\\n    ymm_sum =                                                                 \\\n        _mm256_add_epi32(_mm256_add_epi32(ymm_lhs_0, ymm_rhs_0), ymm_sum);    \\\n  }\n\n//! Calculate sum of squared difference (GENERAL)\n#define SSD_INT8_GENERAL(m, q, sum)   \\\n  {                                   \\\n    int32_t x = m - q;                \\\n    sum += static_cast<float>(x * x); \\\n  }\n\n//! Calculate sum of squared difference (SSE)\n#define SSD_INT8_SSE(xmm_m, xmm_q, xmm_sum)                                \\\n  {                                                                        \\\n    xmm_sum = _mm_add_epi32(                                               \\\n        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_m),              \\\n                                         _mm_sign_epi8(xmm_m, xmm_m)),     \\\n                       ONES_INT16_SSE),                                    \\\n        xmm_sum);                                                          \\\n    xmm_sum = _mm_add_epi32(                                               \\\n        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_q),              \\\n                                         _mm_sign_epi8(xmm_q, xmm_q)),     \\\n                       ONES_INT16_SSE),                                    \\\n        xmm_sum);                                                          \\\n    xmm_sum = _mm_sub_epi32(                                               \\\n        xmm_sum,                                                           \\\n        _mm_slli_epi32(                                                    \\\n            _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_q),          \\\n                                             _mm_sign_epi8(xmm_m, xmm_q)), \\\n                           ONES_INT16_SSE),                                \\\n            1));                                                           \\\n  }\n\n//! Calculate sum of squared difference (AVX)\n#define SSD_INT8_AVX(ymm_m, ymm_q, ymm_sum)                                    \\\n  {                                                                            \\\n    ymm_sum = _mm256_add_epi32(                                                \\\n        _mm256_madd_epi16(                                                     \\\n            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_m),                       \\\n                                 _mm256_sign_epi8(ymm_m, ymm_m)),              \\\n            ONES_INT16_AVX),                                                   \\\n        ymm_sum);                                                              \\\n    ymm_sum = _mm256_add_epi32(                                                \\\n        _mm256_madd_epi16(                                                     \\\n            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_q),                       \\\n                                 _mm256_sign_epi8(ymm_q, ymm_q)),              \\\n            ONES_INT16_AVX),                                                   \\\n        ymm_sum);                                                              \\\n    ymm_sum = _mm256_sub_epi32(                                                \\\n        ymm_sum, _mm256_slli_epi32(                                            \\\n                     _mm256_madd_epi16(                                        \\\n                         _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_q),          \\\n                                              _mm256_sign_epi8(ymm_m, ymm_q)), \\\n                         ONES_INT16_AVX),                                      \\\n                     1));                                                      \\\n  }\n\n//! Compute the square root of value (AVX)\n#define SQRT_FP32_AVX(v, ...) _mm256_sqrt_ps(_mm256_cvtepi32_ps(v))\n\n//! Compute the square root of value (AVX512)\n#define SQRT_FP32_AVX512(v, ...) _mm512_sqrt_ps(_mm512_cvtepi32_ps(v))\n\n#define ACCUM_FP32_STEP_SSE SSD_FP32_SSE\n#define ACCUM_FP32_STEP_AVX SSD_FP32_AVX\n\n#define ACCUM_FP32_STEP_AVX512 SSD_FP32_AVX512\n#define ACCUM_FP16_STEP_GENERAL SSD_FP16_GENERAL\n\n#define ACCUM_FP16_STEP_NEON SSD_FP16_NEON\n#define ACCUM_FP32_STEP_NEON SSD_FP32_NEON\n\n#define ACCUM_INT4_STEP_SSE SSD_INT4_SSE\n#define ACCUM_INT4_STEP_AVX SSD_INT4_AVX\n#define ACCUM_INT8_STEP_SSE SSD_INT8_SSE\n#define ACCUM_INT8_STEP_AVX SSD_INT8_AVX"
  },
  {
    "path": "src/ailego/math/distance_matrix_fp16.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include \"matrix_define.i\"\n#include <iostream> \n#if !defined(__AVX__)\n#define _mm_broadcast_si32(a) _mm_castps_si128(_mm_load1_ps((const float *)(a)))\n#else\n#define _mm_broadcast_si32(a) \\\n  _mm_castps_si128(_mm_broadcast_ss((const float *)(a)))\n#define _mm256_broadcast_si32(a) \\\n  _mm256_castps_si256(_mm256_broadcast_ss((const float *)(a)))\n#endif  // !__AVX__\n\n//! Mask process of computing distance (FP16)\n#define MATRIX_FP16_MASK_AVX(lhs, rhs, cnt, _MASK, _RES, _PROC)              \\\n  switch (cnt) {                                                             \\\n    case 7: {                                                                \\\n      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi16(                        \\\n          (short)(_MASK), *((const short *)(lhs) + 6),                       \\\n          *((const short *)(lhs) + 5), *((const short *)(lhs) + 4),          \\\n          *((const short *)(lhs) + 3), *((const short *)(lhs) + 2),          \\\n          *((const short *)(lhs) + 1), *((const short *)(lhs))));            \\\n      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi16(                        \\\n          (short)(_MASK), *((const short *)(rhs) + 6),                       \\\n          *((const short *)(rhs) + 5), *((const short *)(rhs) + 4),          \\\n          *((const short *)(rhs) + 3), *((const short *)(rhs) + 2),          \\\n          *((const short *)(rhs) + 1), *((const short *)(rhs))));            \\\n      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \\\n      break;                                                                 \\\n    }                                                                        \\\n    case 6: {                                                                \\\n      __m256 ymm_lhs = _mm256_cvtph_ps(                                      \\\n          _mm_set_epi32((int)(_MASK), *((const int *)(lhs) + 2),             \\\n                        *((const int *)(lhs) + 1), *((const int *)(lhs))));  \\\n      __m256 ymm_rhs = _mm256_cvtph_ps(                                      \\\n          _mm_set_epi32((int)(_MASK), *((const int *)(rhs) + 2),             \\\n                        *((const int *)(rhs) + 1), *((const int *)(rhs))));  \\\n      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \\\n      break;                                                                 \\\n    }                                                                        \\\n    case 5: {                                                                \\\n      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi16(                        \\\n          (short)(_MASK), (short)(_MASK), (short)(_MASK),                    \\\n          *((const short *)(lhs) + 4), *((const short *)(lhs) + 3),          \\\n          *((const short *)(lhs) + 2), *((const short *)(lhs) + 1),          \\\n          *((const short *)(lhs))));                                         \\\n      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi16(                        \\\n          (short)(_MASK), (short)(_MASK), (short)(_MASK),                    \\\n          *((const short *)(rhs) + 4), *((const short *)(rhs) + 3),          \\\n          *((const short *)(rhs) + 2), *((const short *)(rhs) + 1),          \\\n          *((const short *)(rhs))));                                         \\\n      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \\\n      break;                                                                 \\\n    }                                                                        \\\n    case 4: {                                                                \\\n      __m256 ymm_lhs = _mm256_cvtph_ps(                                      \\\n          _mm_set_epi64((__m64)(_MASK), *((const __m64 *)(lhs))));           \\\n      __m256 ymm_rhs = _mm256_cvtph_ps(                                      \\\n          _mm_set_epi64((__m64)(_MASK), *((const __m64 *)(rhs))));           \\\n      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \\\n      break;                                                                 \\\n    }                                                                        \\\n    case 3: {                                                                \\\n      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi16(                        \\\n          (short)(_MASK), (short)(_MASK), (short)(_MASK), (short)(_MASK),    \\\n          (short)(_MASK), *((const short *)(lhs) + 2),                       \\\n          *((const short *)(lhs) + 1), *((const short *)(lhs))));            \\\n      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi16(                        \\\n          (short)(_MASK), (short)(_MASK), (short)(_MASK), (short)(_MASK),    \\\n          (short)(_MASK), *((const short *)(rhs) + 2),                       \\\n          *((const short *)(rhs) + 1), *((const short *)(rhs))));            \\\n      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \\\n      break;                                                                 \\\n    }                                                                        \\\n    case 2: {                                                                \\\n      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi32(                        \\\n          (int)(_MASK), (int)(_MASK), (int)(_MASK), *((const int *)(lhs)))); \\\n      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi32(                        \\\n          (int)(_MASK), (int)(_MASK), (int)(_MASK), *((const int *)(rhs)))); \\\n      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \\\n      break;                                                                 \\\n    }                                                                        \\\n    case 1: {                                                                \\\n      __m256 ymm_lhs = _mm256_cvtph_ps(                                      \\\n          _mm_set_epi16(*((const short *)(lhs)), (short)(_MASK),             \\\n                        (short)(_MASK), (short)(_MASK), (short)(_MASK),      \\\n                        (short)(_MASK), (short)(_MASK), (short)(_MASK)));    \\\n      __m256 ymm_rhs = _mm256_cvtph_ps(                                      \\\n          _mm_set_epi16(*((const short *)(rhs)), (short)(_MASK),             \\\n                        (short)(_MASK), (short)(_MASK), (short)(_MASK),      \\\n                        (short)(_MASK), (short)(_MASK), (short)(_MASK)));    \\\n      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \\\n      break;                                                                 \\\n    }                                                                        \\\n  }\n\n//! Iterative process of computing distance (FP16, M=1, N=1)\n#define MATRIX_FP16_ITER_1X1_AVX(m, q, _RES, _LOAD, _PROC)          \\\n  {                                                                 \\\n    __m256i ymm_mi = _LOAD((const __m256i *)m);                     \\\n    __m256i ymm_qi = _LOAD((const __m256i *)q);                     \\\n    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi)); \\\n    __m256 ymm_q = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_qi)); \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0);                                \\\n    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \\\n    ymm_q = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_qi, 1));   \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0);                                \\\n  }\n\n//! Iterative process of computing distance (FP16, M=2, N=1)\n#define MATRIX_FP16_ITER_2X1_AVX(m, q, _RES, _LOAD, _PROC)       \\\n  {                                                              \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \\\n    __m256 ymm_q = _mm256_cvtph_ps(_mm_shufflehi_epi16(          \\\n        _mm_shufflelo_epi16(_mm_set1_epi64(*(const __m64 *)(q)), \\\n                            _MM_SHUFFLE(1, 1, 0, 0)),            \\\n        _MM_SHUFFLE(3, 3, 2, 2)));                               \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                              \\\n  }\n\n//! Iterative process of computing distance (FP16, M=2, N=2)\n#define MATRIX_FP16_ITER_2X2_AVX(m, q, _RES, _LOAD, _PROC)       \\\n  {                                                              \\\n    __m256 ymm_q = _mm256_cvtph_ps(_LOAD((const __m128i *)(q))); \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \\\n    __m256 ymm_p = _mm256_moveldup_ps(ymm_q);                    \\\n    _PROC(ymm_m, ymm_p, _RES##_0_0)                              \\\n    ymm_p = _mm256_movehdup_ps(ymm_q);                           \\\n    _PROC(ymm_m, ymm_p, _RES##_0_1)                              \\\n  }\n\n//! Iterative process of computing distance (FP16, M=4, N=1)\n#define MATRIX_FP16_ITER_4X1_AVX(m, q, _RES, _LOAD, _PROC)                 \\\n  {                                                                        \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));           \\\n    __m256 ymm_q = _mm256_cvtph_ps(                                        \\\n        _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_broadcast_si32(q), 0), \\\n                            _MM_SHUFFLE(1, 1, 1, 1)));                     \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                                        \\\n  }\n\n//! Iterative process of computing distance (FP16, M=4, N=2)\n#define MATRIX_FP16_ITER_4X2_AVX(m, q, _RES, _LOAD, _PROC)       \\\n  {                                                              \\\n    __m128i xmm_qi = _mm_set1_epi64(*(const __m64 *)(q));        \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \\\n    __m256 ymm_q_0 = _mm256_cvtph_ps(_mm_shufflehi_epi16(        \\\n        _mm_shufflelo_epi16(xmm_qi, _MM_SHUFFLE(0, 0, 0, 0)),    \\\n        _MM_SHUFFLE(2, 2, 2, 2)));                               \\\n    __m256 ymm_q_1 = _mm256_cvtph_ps(_mm_shufflehi_epi16(        \\\n        _mm_shufflelo_epi16(xmm_qi, _MM_SHUFFLE(1, 1, 1, 1)),    \\\n        _MM_SHUFFLE(3, 3, 3, 3)));                               \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)          \\\n  }\n\n//! Iterative process of computing distance (FP16, M=4, N=4)\n#define MATRIX_FP16_ITER_4X4_AVX(m, q, _RES, _LOAD, _PROC)            \\\n  {                                                                   \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));      \\\n    __m256 ymm_q = _mm256_cvtph_ps(_LOAD((const __m128i *)(q)));      \\\n    __m256 ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(0, 0, 0, 0)); \\\n    _PROC(ymm_m, ymm_p, _RES##_0_0)                                   \\\n    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(1, 1, 1, 1));        \\\n    _PROC(ymm_m, ymm_p, _RES##_0_1)                                   \\\n    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(2, 2, 2, 2));        \\\n    _PROC(ymm_m, ymm_p, _RES##_0_2)                                   \\\n    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(3, 3, 3, 3));        \\\n    _PROC(ymm_m, ymm_p, _RES##_0_3)                                   \\\n  }\n\n//! Iterative process of computing distance (FP16, M=8, N=1)\n#define MATRIX_FP16_ITER_8X1_AVX(m, q, _RES, _LOAD, _PROC)               \\\n  {                                                                      \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));         \\\n    __m256 ymm_q = _mm256_cvtph_ps(_mm_set1_epi16(*(const short *)(q))); \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                                      \\\n  }\n\n//! Iterative process of computing distance (FP16, M=8, N=2)\n#define MATRIX_FP16_ITER_8X2_AVX(m, q, _RES, _LOAD, _PROC)       \\\n  {                                                              \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));          \\\n    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                   \\\n    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                   \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)          \\\n  }\n\n//! Iterative process of computing distance (FP16, M=8, N=4)\n#define MATRIX_FP16_ITER_8X4_AVX(m, q, _RES, _LOAD, _PROC)              \\\n  {                                                                     \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));        \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q))); \\\n    __m256 ymm_q = _mm256_set1_ps(xmm_p[0]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                                     \\\n    ymm_q = _mm256_set1_ps(xmm_p[1]);                                   \\\n    _PROC(ymm_m, ymm_q, _RES##_0_1)                                     \\\n    ymm_q = _mm256_set1_ps(xmm_p[2]);                                   \\\n    _PROC(ymm_m, ymm_q, _RES##_0_2)                                     \\\n    ymm_q = _mm256_set1_ps(xmm_p[3]);                                   \\\n    _PROC(ymm_m, ymm_q, _RES##_0_3)                                     \\\n  }\n\n//! Iterative process of computing distance (FP16, M=8, N=8)\n#define MATRIX_FP16_ITER_8X8_AVX(m, q, _RES, _LOAD, _PROC)       \\\n  {                                                              \\\n    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \\\n    __m256 ymm_p = _mm256_cvtph_ps(_LOAD((const __m128i *)(q))); \\\n    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                     \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                              \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_1)                              \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_2)                              \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_3)                              \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_4)                              \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_5)                              \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_6)                              \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                            \\\n    _PROC(ymm_m, ymm_q, _RES##_0_7)                              \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=1)\n#define MATRIX_FP16_ITER_16X1_AVX(m, q, _RES, _LOAD, _PROC)                \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_q = _mm256_cvtph_ps(_mm_set1_epi16(*(const short *)q));     \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=2)\n#define MATRIX_FP16_ITER_16X2_AVX(m, q, _RES, _LOAD, _PROC)         \\\n  {                                                                 \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                   \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));             \\\n    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                      \\\n    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                      \\\n    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi)); \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)             \\\n    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \\\n    MATRIX_VAR_PROC(1, 2, 1, ymm_m, ymm_q, _RES, _PROC)             \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=4)\n#define MATRIX_FP16_ITER_16X4_AVX(m, q, _RES, _LOAD, _PROC)                \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q)));    \\\n    __m256 ymm_q = _mm256_set1_ps(xmm_p[0]);                               \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(xmm_p[1]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(xmm_p[2]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(xmm_p[3]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=8)\n#define MATRIX_FP16_ITER_16X8_AVX(m, q, _RES, _LOAD, _PROC)                \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \\\n    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=16)\n#define MATRIX_FP16_ITER_16X16_AVX(m, q, _RES, _LOAD, _PROC)               \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \\\n    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 8)));    \\\n    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 8, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 9, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 10, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 11, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 12, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 13, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 14, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(2, 1, 15, ymm_m, ymm_q, _RES, _PROC)                   \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=1)\n#define MATRIX_FP16_ITER_32X1_AVX(m, q, _RES, _LOAD, _PROC)                \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \\\n    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    __m256 ymm_q = _mm256_cvtph_ps(_mm_set1_epi16(*(const short *)q));     \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=2)\n#define MATRIX_FP16_ITER_32X2_AVX(m, q, _RES, _LOAD, _PROC)         \\\n  {                                                                 \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));             \\\n    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                      \\\n    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                      \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                   \\\n    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi)); \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)             \\\n    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \\\n    MATRIX_VAR_PROC(1, 2, 1, ymm_m, ymm_q, _RES, _PROC)             \\\n    ymm_mi = _LOAD((const __m256i *)(m + 16));                      \\\n    ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));        \\\n    MATRIX_VAR_PROC(1, 2, 2, ymm_m, ymm_q, _RES, _PROC)             \\\n    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \\\n    MATRIX_VAR_PROC(1, 2, 3, ymm_m, ymm_q, _RES, _PROC)             \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=4)\n#define MATRIX_FP16_ITER_32X4_AVX(m, q, _RES, _LOAD, _PROC)             \\\n  {                                                                     \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q))); \\\n    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                          \\\n    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                          \\\n    __m256 ymm_q_2 = _mm256_set1_ps(xmm_p[2]);                          \\\n    __m256 ymm_q_3 = _mm256_set1_ps(xmm_p[3]);                          \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                       \\\n    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));     \\\n    MATRIX_VAR_PROC(1, 4, 0, ymm_m, ymm_q, _RES, _PROC)                 \\\n    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));       \\\n    MATRIX_VAR_PROC(1, 4, 1, ymm_m, ymm_q, _RES, _PROC)                 \\\n    ymm_mi = _LOAD((const __m256i *)(m + 16));                          \\\n    ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));            \\\n    MATRIX_VAR_PROC(1, 4, 2, ymm_m, ymm_q, _RES, _PROC)                 \\\n    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));       \\\n    MATRIX_VAR_PROC(1, 4, 3, ymm_m, ymm_q, _RES, _PROC)                 \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=8)\n#define MATRIX_FP16_ITER_32X8_AVX(m, q, _RES, _LOAD, _PROC)                \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \\\n    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \\\n    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=16)\n#define MATRIX_FP16_ITER_32X16_AVX(m, q, _RES, _LOAD, _PROC)               \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \\\n    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \\\n    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 8)));    \\\n    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)                   \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=32)\n#define MATRIX_FP16_ITER_32X32_AVX(m, q, _RES, _LOAD, _PROC)               \\\n  {                                                                        \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \\\n    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \\\n    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \\\n    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 8)));    \\\n    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)                    \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 16)));   \\\n    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 16, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 17, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 18, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 19, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 20, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 21, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 22, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 23, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 24)));   \\\n    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 24, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 25, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 26, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 27, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 28, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 29, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 30, ymm_m, ymm_q, _RES, _PROC)                   \\\n    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \\\n    MATRIX_VAR_PROC(4, 1, 31, ymm_m, ymm_q, _RES, _PROC)                   \\\n  }\n\n//! Iterative process of computing distance (FP16, M=1, N=1)\n#define MATRIX_FP16_ITER_1X1_AVX512(m, q, _RES, _LOAD, _PROC)       \\\n  {                                                                 \\\n    __m512i zmm_mi = _LOAD((const __m512i *)m);                     \\\n    __m512i zmm_qi = _LOAD((const __m512i *)q);                     \\\n    __m512 zmm_m = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi)); \\\n    __m512 zmm_q = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_qi)); \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0);                                \\\n    zmm_m = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1));  \\\n    zmm_q = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_qi, 1));  \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0);                                \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=1)\n#define MATRIX_FP16_ITER_16X1_AVX512(m, q, _RES, _LOAD, _PROC)            \\\n  {                                                                       \\\n    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m)));          \\\n    __m512 zmm_q = _mm512_cvtph_ps(_mm256_set1_epi16(*(const short *)q)); \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                                       \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=2)\n#define MATRIX_FP16_ITER_16X2_AVX512(m, q, _RES, _LOAD, _PROC)   \\\n  {                                                              \\\n    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m))); \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));          \\\n    __m512 zmm_q_0 = _mm512_set1_ps(xmm_p[0]);                   \\\n    __m512 zmm_q_1 = _mm512_set1_ps(xmm_p[1]);                   \\\n    MATRIX_VAR_PROC(1, 2, 0, zmm_m, zmm_q, _RES, _PROC)          \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=4)\n#define MATRIX_FP16_ITER_16X4_AVX512(m, q, _RES, _LOAD, _PROC)          \\\n  {                                                                     \\\n    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m)));        \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q))); \\\n    __m512 zmm_q = _mm512_set1_ps(xmm_p[0]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                                     \\\n    zmm_q = _mm512_set1_ps(xmm_p[1]);                                   \\\n    _PROC(zmm_m, zmm_q, _RES##_0_1)                                     \\\n    zmm_q = _mm512_set1_ps(xmm_p[2]);                                   \\\n    _PROC(zmm_m, zmm_q, _RES##_0_2)                                     \\\n    zmm_q = _mm512_set1_ps(xmm_p[3]);                                   \\\n    _PROC(zmm_m, zmm_q, _RES##_0_3)                                     \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=8)\n#define MATRIX_FP16_ITER_16X8_AVX512(m, q, _RES, _LOAD, _PROC)             \\\n  {                                                                        \\\n    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m)));           \\\n    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \\\n    __m512 zmm_q = _mm512_set1_ps(ymm_p[0]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                                        \\\n    zmm_q = _mm512_set1_ps(ymm_p[1]);                                      \\\n    _PROC(zmm_m, zmm_q, _RES##_0_1)                                        \\\n    zmm_q = _mm512_set1_ps(ymm_p[2]);                                      \\\n    _PROC(zmm_m, zmm_q, _RES##_0_2)                                        \\\n    zmm_q = _mm512_set1_ps(ymm_p[3]);                                      \\\n    _PROC(zmm_m, zmm_q, _RES##_0_3)                                        \\\n    zmm_q = _mm512_set1_ps(ymm_p[4]);                                      \\\n    _PROC(zmm_m, zmm_q, _RES##_0_4)                                        \\\n    zmm_q = _mm512_set1_ps(ymm_p[5]);                                      \\\n    _PROC(zmm_m, zmm_q, _RES##_0_5)                                        \\\n    zmm_q = _mm512_set1_ps(ymm_p[6]);                                      \\\n    _PROC(zmm_m, zmm_q, _RES##_0_6)                                        \\\n    zmm_q = _mm512_set1_ps(ymm_p[7]);                                      \\\n    _PROC(zmm_m, zmm_q, _RES##_0_7)                                        \\\n  }\n\n//! Iterative process of computing distance (FP16, M=16, N=16)\n#define MATRIX_FP16_ITER_16X16_AVX512(m, q, _RES, _LOAD, _PROC)  \\\n  {                                                              \\\n    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m))); \\\n    __m512 zmm_p = _mm512_cvtph_ps(_LOAD((const __m256i *)(q))); \\\n    __m512 zmm_q = _mm512_set1_ps(zmm_p[0]);                     \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[1]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_1)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[2]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_2)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[3]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_3)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[4]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_4)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[5]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_5)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[6]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_6)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[7]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_7)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[8]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_8)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[9]);                            \\\n    _PROC(zmm_m, zmm_q, _RES##_0_9)                              \\\n    zmm_q = _mm512_set1_ps(zmm_p[10]);                           \\\n    _PROC(zmm_m, zmm_q, _RES##_0_10)                             \\\n    zmm_q = _mm512_set1_ps(zmm_p[11]);                           \\\n    _PROC(zmm_m, zmm_q, _RES##_0_11)                             \\\n    zmm_q = _mm512_set1_ps(zmm_p[12]);                           \\\n    _PROC(zmm_m, zmm_q, _RES##_0_12)                             \\\n    zmm_q = _mm512_set1_ps(zmm_p[13]);                           \\\n    _PROC(zmm_m, zmm_q, _RES##_0_13)                             \\\n    zmm_q = _mm512_set1_ps(zmm_p[14]);                           \\\n    _PROC(zmm_m, zmm_q, _RES##_0_14)                             \\\n    zmm_q = _mm512_set1_ps(zmm_p[15]);                           \\\n    _PROC(zmm_m, zmm_q, _RES##_0_15)                             \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=1)\n#define MATRIX_FP16_ITER_32X1_AVX512(m, q, _RES, _LOAD, _PROC)              \\\n  {                                                                         \\\n    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \\\n    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \\\n    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \\\n    __m512 zmm_q = _mm512_cvtph_ps(_mm256_set1_epi16(*(const short *)q));   \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=2)\n#define MATRIX_FP16_ITER_32X2_AVX512(m, q, _RES, _LOAD, _PROC)              \\\n  {                                                                         \\\n    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \\\n    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \\\n    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));                     \\\n    __m512 zmm_q = _mm512_set1_ps(xmm_p[0]);                                \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(xmm_p[1]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                     \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=4)\n#define MATRIX_FP16_ITER_32X4_AVX512(m, q, _RES, _LOAD, _PROC)              \\\n  {                                                                         \\\n    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \\\n    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \\\n    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \\\n    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q)));     \\\n    __m512 zmm_q = _mm512_set1_ps(xmm_p[0]);                                \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(xmm_p[1]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(xmm_p[2]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(xmm_p[3]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                     \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=8)\n#define MATRIX_FP16_ITER_32X8_AVX512(m, q, _RES, _LOAD, _PROC)              \\\n  {                                                                         \\\n    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \\\n    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \\\n    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \\\n    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q)));  \\\n    __m512 zmm_q = _mm512_set1_ps(ymm_p[0]);                                \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(ymm_p[1]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(ymm_p[2]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(ymm_p[3]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(ymm_p[4]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(ymm_p[5]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(ymm_p[6]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)                     \\\n    zmm_q = _mm512_set1_ps(ymm_p[7]);                                       \\\n    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)                     \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=16)\n#define MATRIX_FP16_ITER_32X16_AVX512(m, q, _RES, _LOAD, _PROC)               \\\n  {                                                                           \\\n    __m512i zmm_mi = _LOAD((const __m512i *)(m));                             \\\n    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));         \\\n    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1));   \\\n    __m512 zmm_p = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(q))); \\\n    __m512 zmm_q = _mm512_set1_ps(zmm_p[0]);                                  \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[1]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[2]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[3]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[4]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[5]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[6]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[7]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[8]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[9]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[10]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[11]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[12]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[13]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[14]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[15]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)                      \\\n  }\n\n//! Iterative process of computing distance (FP16, M=32, N=32)\n#define MATRIX_FP16_ITER_32X32_AVX512(m, q, _RES, _LOAD, _PROC)               \\\n  {                                                                           \\\n    __m512i zmm_mi = _LOAD((const __m512i *)(m));                             \\\n    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));         \\\n    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1));   \\\n    __m512 zmm_p = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(q))); \\\n    __m512 zmm_q = _mm512_set1_ps(zmm_p[0]);                                  \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[1]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[2]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[3]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[4]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[5]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[6]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[7]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[8]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[9]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)                       \\\n    zmm_q = _mm512_set1_ps(zmm_p[10]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[11]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[12]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[13]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[14]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[15]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_p = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(q + 16)));   \\\n    zmm_q = _mm512_set1_ps(zmm_p[0]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 16, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[1]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 17, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[2]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 18, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[3]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 19, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[4]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 20, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[5]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 21, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[6]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 22, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[7]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 23, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[8]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 24, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[9]);                                         \\\n    MATRIX_VAR_PROC(2, 1, 25, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[10]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 26, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[11]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 27, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[12]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 28, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[13]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 29, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[14]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 30, zmm_m, zmm_q, _RES, _PROC)                      \\\n    zmm_q = _mm512_set1_ps(zmm_p[15]);                                        \\\n    MATRIX_VAR_PROC(2, 1, 31, zmm_m, zmm_q, _RES, _PROC)                      \\\n  }\n\n#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n//! Iterative process of computing distance (FP16, M=1, N=1)\n#define MATRIX_FP16_ITER_1X1_NEON(m, q, _RES, _PROC)   \\\n  {                                                    \\\n    float16x8_t v_m = vld1q_f16((const float16_t *)m); \\\n    float16x8_t v_q = vld1q_f16((const float16_t *)q); \\\n    _PROC(v_m, v_q, _RES##_0_0)                        \\\n  }\n\n#else\n//! Iterative process of computing distance (FP16, M=1, N=1)\n#define MATRIX_FP16_ITER_1X1_NEON(m, q, _RES, _PROC)     \\\n  {                                                      \\\n    float16x8_t v_m = vld1q_f16((const float16_t *)m);   \\\n    float16x8_t v_q = vld1q_f16((const float16_t *)q);   \\\n    float32x4_t v_m_0 = vcvt_f32_f16(vget_low_f16(v_m)); \\\n    float32x4_t v_q_0 = vcvt_f32_f16(vget_low_f16(v_q)); \\\n    _PROC(v_m_0, v_q_0, _RES##_0_0)                      \\\n    v_m_0 = vcvt_high_f32_f16(v_m);                      \\\n    v_q_0 = vcvt_high_f32_f16(v_q);                      \\\n    _PROC(v_m_0, v_q_0, _RES##_0_0)                      \\\n  }\n\n#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC"
  },
  {
    "path": "src/ailego/math/distance_matrix_fp32.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include \"matrix_define.i\"\n\n#if !defined(__AVX__)\n#undef _mm_permute_ps\n#define _mm_permute_ps(a, b) _mm_shuffle_ps((a), (a), (b))\n#define _mm_broadcast_ss(a) _mm_load1_ps(a)\n#endif  // !__AVX__\n\n#if defined(__AVX__) && defined(__GNUC__)\n#define _mm256_set_m128(a, b) \\\n  _mm256_insertf128_ps(_mm256_castps128_ps256(b), (a), 1)\n#endif  // __AVX__\n\n#if defined(__ARM_NEON) && !defined(__aarch64__)\n#define vdupq_laneq_f32(a, b) vdupq_n_f32(vgetq_lane_f32(a, b))\n#endif  // __ARM_NEON && __aarch64__\n\n//! Iterative process of computing distance (FP32, M=2, N=1)\n#define MATRIX_FP32_ITER_2X1_SSE(m, q, _RES, _LOAD, _PROC)         \\\n  {                                                                \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                                 \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                                 \\\n    __m128 xmm_q = _LOAD(q);                                       \\\n    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(1, 1, 0, 0)); \\\n    _PROC(xmm_m_0, xmm_p, _RES##_0_0)                              \\\n    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 2, 2));        \\\n    _PROC(xmm_m_1, xmm_p, _RES##_0_1)                              \\\n  }\n\n//! Iterative process of computing distance (FP32, M=2, N=2)\n#define MATRIX_FP32_ITER_2X2_SSE(m, q, _RES, _LOAD, _PROC)         \\\n  {                                                                \\\n    __m128 xmm_q = _LOAD(q);                                       \\\n    __m128 xmm_m = _LOAD(m);                                       \\\n    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 0, 0)); \\\n    _PROC(xmm_m, xmm_p, _RES##_0_0)                                \\\n    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));        \\\n    _PROC(xmm_m, xmm_p, _RES##_0_1)                                \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=1)\n#define MATRIX_FP32_ITER_4X1_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                         \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \\\n    _PROC(xmm_m_0, xmm_q, _RES##_0_0)                      \\\n    xmm_q = _mm_broadcast_ss(q + 1);                       \\\n    _PROC(xmm_m_1, xmm_q, _RES##_0_1)                      \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=2)\n#define MATRIX_FP32_ITER_4X2_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m128 xmm_m = _LOAD(m);                               \\\n    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \\\n    _PROC(xmm_m, xmm_q, _RES##_0_0)                        \\\n    xmm_q = _mm_broadcast_ss(q + 1);                       \\\n    _PROC(xmm_m, xmm_q, _RES##_0_1)                        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=4)\n#define MATRIX_FP32_ITER_4X4_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m128 xmm_m = _LOAD(m);                               \\\n    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \\\n    _PROC(xmm_m, xmm_q, _RES##_0_0)                        \\\n    xmm_q = _mm_broadcast_ss(q + 1);                       \\\n    _PROC(xmm_m, xmm_q, _RES##_0_1)                        \\\n    xmm_q = _mm_broadcast_ss(q + 2);                       \\\n    _PROC(xmm_m, xmm_q, _RES##_0_2)                        \\\n    xmm_q = _mm_broadcast_ss(q + 3);                       \\\n    _PROC(xmm_m, xmm_q, _RES##_0_3)                        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=1)\n#define MATRIX_FP32_ITER_8X1_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                         \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                    \\\n    _PROC(xmm_m_0, xmm_q, _RES##_0_0)                      \\\n    _PROC(xmm_m_1, xmm_q, _RES##_1_0)                      \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=2)\n#define MATRIX_FP32_ITER_8X2_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                         \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \\\n    MATRIX_VAR_PROC(2, 1, 0, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 1);                       \\\n    MATRIX_VAR_PROC(2, 1, 1, xmm_m, xmm_q, _RES, _PROC)    \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=4)\n#define MATRIX_FP32_ITER_8X4_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                         \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \\\n    MATRIX_VAR_PROC(2, 1, 0, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 1);                       \\\n    MATRIX_VAR_PROC(2, 1, 1, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 2);                       \\\n    MATRIX_VAR_PROC(2, 1, 2, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 3);                       \\\n    MATRIX_VAR_PROC(2, 1, 3, xmm_m, xmm_q, _RES, _PROC)    \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=8)\n#define MATRIX_FP32_ITER_8X8_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                         \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                    \\\n    MATRIX_VAR_PROC(2, 1, 0, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 1);                       \\\n    MATRIX_VAR_PROC(2, 1, 1, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 2);                       \\\n    MATRIX_VAR_PROC(2, 1, 2, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 3);                       \\\n    MATRIX_VAR_PROC(2, 1, 3, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 4);                       \\\n    MATRIX_VAR_PROC(2, 1, 4, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 5);                       \\\n    MATRIX_VAR_PROC(2, 1, 5, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 6);                       \\\n    MATRIX_VAR_PROC(2, 1, 6, xmm_m, xmm_q, _RES, _PROC)    \\\n    xmm_q = _mm_broadcast_ss(q + 7);                       \\\n    MATRIX_VAR_PROC(2, 1, 7, xmm_m, xmm_q, _RES, _PROC)    \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=1)\n#define MATRIX_FP32_ITER_16X1_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                     \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=2)\n#define MATRIX_FP32_ITER_16X2_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q + 0);                 \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 1);                        \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=4)\n#define MATRIX_FP32_ITER_16X4_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q + 0);                 \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 1);                        \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 2);                        \\\n    MATRIX_VAR_PROC(4, 1, 2, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 3);                        \\\n    MATRIX_VAR_PROC(4, 1, 3, xmm_m, xmm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=8)\n#define MATRIX_FP32_ITER_16X8_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                     \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 1);                        \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 2);                        \\\n    MATRIX_VAR_PROC(4, 1, 2, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 3);                        \\\n    MATRIX_VAR_PROC(4, 1, 3, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 4);                        \\\n    MATRIX_VAR_PROC(4, 1, 4, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 5);                        \\\n    MATRIX_VAR_PROC(4, 1, 5, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 6);                        \\\n    MATRIX_VAR_PROC(4, 1, 6, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 7);                        \\\n    MATRIX_VAR_PROC(4, 1, 7, xmm_m, xmm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=16)\n#define MATRIX_FP32_ITER_16X16_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                          \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                           \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                           \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                           \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                          \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                      \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 1);                         \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 2);                         \\\n    MATRIX_VAR_PROC(4, 1, 2, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 3);                         \\\n    MATRIX_VAR_PROC(4, 1, 3, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 4);                         \\\n    MATRIX_VAR_PROC(4, 1, 4, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 5);                         \\\n    MATRIX_VAR_PROC(4, 1, 5, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 6);                         \\\n    MATRIX_VAR_PROC(4, 1, 6, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 7);                         \\\n    MATRIX_VAR_PROC(4, 1, 7, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 8);                         \\\n    MATRIX_VAR_PROC(4, 1, 8, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 9);                         \\\n    MATRIX_VAR_PROC(4, 1, 9, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 10);                        \\\n    MATRIX_VAR_PROC(4, 1, 10, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 11);                        \\\n    MATRIX_VAR_PROC(4, 1, 11, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 12);                        \\\n    MATRIX_VAR_PROC(4, 1, 12, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 13);                        \\\n    MATRIX_VAR_PROC(4, 1, 13, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 14);                        \\\n    MATRIX_VAR_PROC(4, 1, 14, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 15);                        \\\n    MATRIX_VAR_PROC(4, 1, 15, xmm_m, xmm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=1)\n#define MATRIX_FP32_ITER_32X1_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                     \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    _PROC(xmm_m_0, xmm_q, _RES##_0_0)                       \\\n    _PROC(xmm_m_1, xmm_q, _RES##_1_0)                       \\\n    _PROC(xmm_m_2, xmm_q, _RES##_2_0)                       \\\n    _PROC(xmm_m_3, xmm_q, _RES##_3_0)                       \\\n    xmm_m_0 = _LOAD(m + 16);                                \\\n    xmm_m_1 = _LOAD(m + 20);                                \\\n    xmm_m_2 = _LOAD(m + 24);                                \\\n    xmm_m_3 = _LOAD(m + 28);                                \\\n    _PROC(xmm_m_0, xmm_q, _RES##_4_0)                       \\\n    _PROC(xmm_m_1, xmm_q, _RES##_5_0)                       \\\n    _PROC(xmm_m_2, xmm_q, _RES##_6_0)                       \\\n    _PROC(xmm_m_3, xmm_q, _RES##_7_0)                       \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=2)\n#define MATRIX_FP32_ITER_32X2_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_q_0 = _mm_broadcast_ss(q + 0);               \\\n    __m128 xmm_q_1 = _mm_broadcast_ss(q + 1);               \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    MATRIX_VAR_PROC(1, 2, 0, xmm_m_0, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 2, 1, xmm_m_1, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 2, 2, xmm_m_2, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 2, 3, xmm_m_3, xmm_q, _RES, _PROC)   \\\n    xmm_m_0 = _LOAD(m + 16);                                \\\n    xmm_m_1 = _LOAD(m + 20);                                \\\n    xmm_m_2 = _LOAD(m + 24);                                \\\n    xmm_m_3 = _LOAD(m + 28);                                \\\n    MATRIX_VAR_PROC(1, 2, 4, xmm_m_0, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 2, 5, xmm_m_1, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 2, 6, xmm_m_2, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 2, 7, xmm_m_3, xmm_q, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=4)\n#define MATRIX_FP32_ITER_32X4_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_q_0 = _mm_broadcast_ss(q + 0);               \\\n    __m128 xmm_q_1 = _mm_broadcast_ss(q + 1);               \\\n    __m128 xmm_q_2 = _mm_broadcast_ss(q + 2);               \\\n    __m128 xmm_q_3 = _mm_broadcast_ss(q + 3);               \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    MATRIX_VAR_PROC(1, 4, 0, xmm_m_0, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 4, 1, xmm_m_1, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 4, 2, xmm_m_2, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 4, 3, xmm_m_3, xmm_q, _RES, _PROC)   \\\n    xmm_m_0 = _LOAD(m + 16);                                \\\n    xmm_m_1 = _LOAD(m + 20);                                \\\n    xmm_m_2 = _LOAD(m + 24);                                \\\n    xmm_m_3 = _LOAD(m + 28);                                \\\n    MATRIX_VAR_PROC(1, 4, 4, xmm_m_0, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 4, 5, xmm_m_1, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 4, 6, xmm_m_2, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 4, 7, xmm_m_3, xmm_q, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=8)\n#define MATRIX_FP32_ITER_32X8_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m128 xmm_q_0 = _mm_broadcast_ss(q + 0);               \\\n    __m128 xmm_q_1 = _mm_broadcast_ss(q + 1);               \\\n    __m128 xmm_q_2 = _mm_broadcast_ss(q + 2);               \\\n    __m128 xmm_q_3 = _mm_broadcast_ss(q + 3);               \\\n    __m128 xmm_q_4 = _mm_broadcast_ss(q + 4);               \\\n    __m128 xmm_q_5 = _mm_broadcast_ss(q + 5);               \\\n    __m128 xmm_q_6 = _mm_broadcast_ss(q + 6);               \\\n    __m128 xmm_q_7 = _mm_broadcast_ss(q + 7);               \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                          \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                          \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                          \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                         \\\n    MATRIX_VAR_PROC(1, 8, 0, xmm_m_0, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 8, 1, xmm_m_1, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 8, 2, xmm_m_2, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 8, 3, xmm_m_3, xmm_q, _RES, _PROC)   \\\n    xmm_m_0 = _LOAD(m + 16);                                \\\n    xmm_m_1 = _LOAD(m + 20);                                \\\n    xmm_m_2 = _LOAD(m + 24);                                \\\n    xmm_m_3 = _LOAD(m + 28);                                \\\n    MATRIX_VAR_PROC(1, 8, 4, xmm_m_0, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 8, 5, xmm_m_1, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 8, 6, xmm_m_2, xmm_q, _RES, _PROC)   \\\n    MATRIX_VAR_PROC(1, 8, 7, xmm_m_3, xmm_q, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=16)\n#define MATRIX_FP32_ITER_32X16_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                          \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                           \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                           \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                           \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                          \\\n    __m128 xmm_m_4 = _LOAD(m + 16);                          \\\n    __m128 xmm_m_5 = _LOAD(m + 20);                          \\\n    __m128 xmm_m_6 = _LOAD(m + 24);                          \\\n    __m128 xmm_m_7 = _LOAD(m + 28);                          \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                      \\\n    MATRIX_VAR_PROC(8, 1, 0, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 1);                         \\\n    MATRIX_VAR_PROC(8, 1, 1, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 2);                         \\\n    MATRIX_VAR_PROC(8, 1, 2, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 3);                         \\\n    MATRIX_VAR_PROC(8, 1, 3, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 4);                         \\\n    MATRIX_VAR_PROC(8, 1, 4, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 5);                         \\\n    MATRIX_VAR_PROC(8, 1, 5, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 6);                         \\\n    MATRIX_VAR_PROC(8, 1, 6, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 7);                         \\\n    MATRIX_VAR_PROC(8, 1, 7, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 8);                         \\\n    MATRIX_VAR_PROC(8, 1, 8, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 9);                         \\\n    MATRIX_VAR_PROC(8, 1, 9, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 10);                        \\\n    MATRIX_VAR_PROC(8, 1, 10, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 11);                        \\\n    MATRIX_VAR_PROC(8, 1, 11, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 12);                        \\\n    MATRIX_VAR_PROC(8, 1, 12, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 13);                        \\\n    MATRIX_VAR_PROC(8, 1, 13, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 14);                        \\\n    MATRIX_VAR_PROC(8, 1, 14, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 15);                        \\\n    MATRIX_VAR_PROC(8, 1, 15, xmm_m, xmm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=32)\n#define MATRIX_FP32_ITER_32X32_SSE(m, q, _RES, _LOAD, _PROC) \\\n  {                                                          \\\n    __m128 xmm_m_0 = _LOAD(m + 0);                           \\\n    __m128 xmm_m_1 = _LOAD(m + 4);                           \\\n    __m128 xmm_m_2 = _LOAD(m + 8);                           \\\n    __m128 xmm_m_3 = _LOAD(m + 12);                          \\\n    __m128 xmm_m_4 = _LOAD(m + 16);                          \\\n    __m128 xmm_m_5 = _LOAD(m + 20);                          \\\n    __m128 xmm_m_6 = _LOAD(m + 24);                          \\\n    __m128 xmm_m_7 = _LOAD(m + 28);                          \\\n    __m128 xmm_q = _mm_broadcast_ss(q);                      \\\n    MATRIX_VAR_PROC(8, 1, 0, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 1);                         \\\n    MATRIX_VAR_PROC(8, 1, 1, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 2);                         \\\n    MATRIX_VAR_PROC(8, 1, 2, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 3);                         \\\n    MATRIX_VAR_PROC(8, 1, 3, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 4);                         \\\n    MATRIX_VAR_PROC(8, 1, 4, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 5);                         \\\n    MATRIX_VAR_PROC(8, 1, 5, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 6);                         \\\n    MATRIX_VAR_PROC(8, 1, 6, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 7);                         \\\n    MATRIX_VAR_PROC(8, 1, 7, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 8);                         \\\n    MATRIX_VAR_PROC(8, 1, 8, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 9);                         \\\n    MATRIX_VAR_PROC(8, 1, 9, xmm_m, xmm_q, _RES, _PROC)      \\\n    xmm_q = _mm_broadcast_ss(q + 10);                        \\\n    MATRIX_VAR_PROC(8, 1, 10, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 11);                        \\\n    MATRIX_VAR_PROC(8, 1, 11, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 12);                        \\\n    MATRIX_VAR_PROC(8, 1, 12, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 13);                        \\\n    MATRIX_VAR_PROC(8, 1, 13, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 14);                        \\\n    MATRIX_VAR_PROC(8, 1, 14, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 15);                        \\\n    MATRIX_VAR_PROC(8, 1, 15, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 16);                        \\\n    MATRIX_VAR_PROC(8, 1, 16, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 17);                        \\\n    MATRIX_VAR_PROC(8, 1, 17, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 18);                        \\\n    MATRIX_VAR_PROC(8, 1, 18, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 19);                        \\\n    MATRIX_VAR_PROC(8, 1, 19, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 20);                        \\\n    MATRIX_VAR_PROC(8, 1, 20, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 21);                        \\\n    MATRIX_VAR_PROC(8, 1, 21, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 22);                        \\\n    MATRIX_VAR_PROC(8, 1, 22, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 23);                        \\\n    MATRIX_VAR_PROC(8, 1, 23, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 24);                        \\\n    MATRIX_VAR_PROC(8, 1, 24, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 25);                        \\\n    MATRIX_VAR_PROC(8, 1, 25, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 26);                        \\\n    MATRIX_VAR_PROC(8, 1, 26, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 27);                        \\\n    MATRIX_VAR_PROC(8, 1, 27, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 28);                        \\\n    MATRIX_VAR_PROC(8, 1, 28, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 29);                        \\\n    MATRIX_VAR_PROC(8, 1, 29, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 30);                        \\\n    MATRIX_VAR_PROC(8, 1, 30, xmm_m, xmm_q, _RES, _PROC)     \\\n    xmm_q = _mm_broadcast_ss(q + 31);                        \\\n    MATRIX_VAR_PROC(8, 1, 31, xmm_m, xmm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=2, N=1)\n#define MATRIX_FP32_ITER_2X1_AVX(m, q, _RES, _LOAD, _PROC)             \\\n  {                                                                    \\\n    __m256 ymm_m = _LOAD(m);                                           \\\n    __m256 ymm_q =                                                     \\\n        _mm256_set_ps(q[3], q[3], q[2], q[2], q[1], q[1], q[0], q[0]); \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                                    \\\n  }\n\n//! Iterative process of computing distance (FP32, M=2, N=2)\n#define MATRIX_FP32_ITER_2X2_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m256 ymm_q = _LOAD(q);                               \\\n    __m256 ymm_m = _LOAD(m);                               \\\n    __m256 ymm_p = _mm256_moveldup_ps(ymm_q);              \\\n    _PROC(ymm_m, ymm_p, _RES##_0_0)                        \\\n    ymm_p = _mm256_movehdup_ps(ymm_q);                     \\\n    _PROC(ymm_m, ymm_p, _RES##_0_1)                        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=1)\n#define MATRIX_FP32_ITER_4X1_AVX(m, q, _RES, _LOAD, _PROC)             \\\n  {                                                                    \\\n    __m256 ymm_m = _LOAD(m);                                           \\\n    __m256 ymm_q =                                                     \\\n        _mm256_set_m128(_mm_broadcast_ss(q + 1), _mm_broadcast_ss(q)); \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                                    \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=2)\n#define MATRIX_FP32_ITER_4X2_AVX(m, q, _RES, _LOAD, _PROC)                     \\\n  {                                                                            \\\n    __m256 ymm_m = _LOAD(m);                                                   \\\n    __m256 ymm_q =                                                             \\\n        _mm256_set_m128(_mm_broadcast_ss(q + 2), _mm_broadcast_ss(q + 0));     \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                                            \\\n    ymm_q = _mm256_set_m128(_mm_broadcast_ss(q + 3), _mm_broadcast_ss(q + 1)); \\\n    _PROC(ymm_m, ymm_q, _RES##_0_1)                                            \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=4)\n#define MATRIX_FP32_ITER_4X4_AVX(m, q, _RES, _LOAD, _PROC)            \\\n  {                                                                   \\\n    __m256 ymm_q = _LOAD(q);                                          \\\n    __m256 ymm_m = _LOAD(m);                                          \\\n    __m256 ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(0, 0, 0, 0)); \\\n    _PROC(ymm_m, ymm_p, _RES##_0_0)                                   \\\n    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(1, 1, 1, 1));        \\\n    _PROC(ymm_m, ymm_p, _RES##_0_1)                                   \\\n    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(2, 2, 2, 2));        \\\n    _PROC(ymm_m, ymm_p, _RES##_0_2)                                   \\\n    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(3, 3, 3, 3));        \\\n    _PROC(ymm_m, ymm_p, _RES##_0_3)                                   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=1)\n#define MATRIX_FP32_ITER_8X1_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m256 ymm_m = _LOAD(m);                               \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                 \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=2)\n#define MATRIX_FP32_ITER_8X2_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m256 ymm_m = _LOAD(m);                               \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                 \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_1)                        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=4)\n#define MATRIX_FP32_ITER_8X4_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m256 ymm_m = _LOAD(m);                               \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                 \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_1)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_2)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_3)                        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=8)\n#define MATRIX_FP32_ITER_8X8_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                        \\\n    __m256 ymm_m = _LOAD(m);                               \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                 \\\n    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_1)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_2)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_3)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 4);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_4)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 5);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_5)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 6);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_6)                        \\\n    ymm_q = _mm256_broadcast_ss(q + 7);                    \\\n    _PROC(ymm_m, ymm_q, _RES##_0_7)                        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=1)\n#define MATRIX_FP32_ITER_16X1_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                  \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=2)\n#define MATRIX_FP32_ITER_16X2_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                  \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                     \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=4)\n#define MATRIX_FP32_ITER_16X4_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                  \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                     \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                     \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                     \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=8)\n#define MATRIX_FP32_ITER_16X8_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                  \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                     \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                     \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                     \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 4);                     \\\n    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 5);                     \\\n    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 6);                     \\\n    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 7);                     \\\n    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=16)\n#define MATRIX_FP32_ITER_16X16_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                          \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                           \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                           \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                   \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                      \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                      \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                      \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 4);                      \\\n    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 5);                      \\\n    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 6);                      \\\n    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 7);                      \\\n    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 8);                      \\\n    MATRIX_VAR_PROC(2, 1, 8, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 9);                      \\\n    MATRIX_VAR_PROC(2, 1, 9, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 10);                     \\\n    MATRIX_VAR_PROC(2, 1, 10, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 11);                     \\\n    MATRIX_VAR_PROC(2, 1, 11, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 12);                     \\\n    MATRIX_VAR_PROC(2, 1, 12, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 13);                     \\\n    MATRIX_VAR_PROC(2, 1, 13, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 14);                     \\\n    MATRIX_VAR_PROC(2, 1, 14, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 15);                     \\\n    MATRIX_VAR_PROC(2, 1, 15, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=1)\n#define MATRIX_FP32_ITER_32X1_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_m_2 = _LOAD(m + 16);                         \\\n    __m256 ymm_m_3 = _LOAD(m + 24);                         \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                  \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=2)\n#define MATRIX_FP32_ITER_32X2_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_m_2 = _LOAD(m + 16);                         \\\n    __m256 ymm_m_3 = _LOAD(m + 24);                         \\\n    __m256 ymm_q = _mm256_broadcast_ss(q + 0);              \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=4)\n#define MATRIX_FP32_ITER_32X4_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_m_2 = _LOAD(m + 16);                         \\\n    __m256 ymm_m_3 = _LOAD(m + 24);                         \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                  \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=8)\n#define MATRIX_FP32_ITER_32X8_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                         \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                          \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                          \\\n    __m256 ymm_m_2 = _LOAD(m + 16);                         \\\n    __m256 ymm_m_3 = _LOAD(m + 24);                         \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                  \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 4);                     \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 5);                     \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 6);                     \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 7);                     \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=16)\n#define MATRIX_FP32_ITER_32X16_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                          \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                           \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                           \\\n    __m256 ymm_m_2 = _LOAD(m + 16);                          \\\n    __m256 ymm_m_3 = _LOAD(m + 24);                          \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                   \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                      \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                      \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                      \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 4);                      \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 5);                      \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 6);                      \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 7);                      \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 8);                      \\\n    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 9);                      \\\n    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 10);                     \\\n    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 11);                     \\\n    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 12);                     \\\n    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 13);                     \\\n    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 14);                     \\\n    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 15);                     \\\n    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=32)\n#define MATRIX_FP32_ITER_32X32_AVX(m, q, _RES, _LOAD, _PROC) \\\n  {                                                          \\\n    __m256 ymm_m_0 = _LOAD(m + 0);                           \\\n    __m256 ymm_m_1 = _LOAD(m + 8);                           \\\n    __m256 ymm_m_2 = _LOAD(m + 16);                          \\\n    __m256 ymm_m_3 = _LOAD(m + 24);                          \\\n    __m256 ymm_q = _mm256_broadcast_ss(q);                   \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 1);                      \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 2);                      \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 3);                      \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 4);                      \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 5);                      \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 6);                      \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 7);                      \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 8);                      \\\n    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 9);                      \\\n    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)      \\\n    ymm_q = _mm256_broadcast_ss(q + 10);                     \\\n    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 11);                     \\\n    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 12);                     \\\n    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 13);                     \\\n    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 14);                     \\\n    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 15);                     \\\n    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 16);                     \\\n    MATRIX_VAR_PROC(4, 1, 16, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 17);                     \\\n    MATRIX_VAR_PROC(4, 1, 17, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 18);                     \\\n    MATRIX_VAR_PROC(4, 1, 18, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 19);                     \\\n    MATRIX_VAR_PROC(4, 1, 19, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 20);                     \\\n    MATRIX_VAR_PROC(4, 1, 20, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 21);                     \\\n    MATRIX_VAR_PROC(4, 1, 21, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 22);                     \\\n    MATRIX_VAR_PROC(4, 1, 22, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 23);                     \\\n    MATRIX_VAR_PROC(4, 1, 23, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 24);                     \\\n    MATRIX_VAR_PROC(4, 1, 24, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 25);                     \\\n    MATRIX_VAR_PROC(4, 1, 25, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 26);                     \\\n    MATRIX_VAR_PROC(4, 1, 26, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 27);                     \\\n    MATRIX_VAR_PROC(4, 1, 27, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 28);                     \\\n    MATRIX_VAR_PROC(4, 1, 28, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 29);                     \\\n    MATRIX_VAR_PROC(4, 1, 29, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 30);                     \\\n    MATRIX_VAR_PROC(4, 1, 30, ymm_m, ymm_q, _RES, _PROC)     \\\n    ymm_q = _mm256_broadcast_ss(q + 31);                     \\\n    MATRIX_VAR_PROC(4, 1, 31, ymm_m, ymm_q, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=1)\n#define MATRIX_FP32_ITER_16X1_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_m = _LOAD(m);                                   \\\n    __m512 zmm_q = _mm512_set1_ps(*q);                         \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=2)\n#define MATRIX_FP32_ITER_16X2_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_m = _LOAD(m);                                   \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \\\n    zmm_q = _mm512_set1_ps(q[1]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_1)                            \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=4)\n#define MATRIX_FP32_ITER_16X4_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_m = _LOAD(m);                                   \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \\\n    zmm_q = _mm512_set1_ps(q[1]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_1)                            \\\n    zmm_q = _mm512_set1_ps(q[2]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_2)                            \\\n    zmm_q = _mm512_set1_ps(q[3]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_3)                            \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=8)\n#define MATRIX_FP32_ITER_16X8_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_m = _LOAD(m);                                   \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \\\n    zmm_q = _mm512_set1_ps(q[1]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_1)                            \\\n    zmm_q = _mm512_set1_ps(q[2]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_2)                            \\\n    zmm_q = _mm512_set1_ps(q[3]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_3)                            \\\n    zmm_q = _mm512_set1_ps(q[4]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_4)                            \\\n    zmm_q = _mm512_set1_ps(q[5]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_5)                            \\\n    zmm_q = _mm512_set1_ps(q[6]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_6)                            \\\n    zmm_q = _mm512_set1_ps(q[7]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_7)                            \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=16)\n#define MATRIX_FP32_ITER_16X16_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m512 zmm_m = _LOAD(m);                                    \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                        \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                             \\\n    zmm_q = _mm512_set1_ps(q[1]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_1)                             \\\n    zmm_q = _mm512_set1_ps(q[2]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_2)                             \\\n    zmm_q = _mm512_set1_ps(q[3]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_3)                             \\\n    zmm_q = _mm512_set1_ps(q[4]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_4)                             \\\n    zmm_q = _mm512_set1_ps(q[5]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_5)                             \\\n    zmm_q = _mm512_set1_ps(q[6]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_6)                             \\\n    zmm_q = _mm512_set1_ps(q[7]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_7)                             \\\n    zmm_q = _mm512_set1_ps(q[8]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_8)                             \\\n    zmm_q = _mm512_set1_ps(q[9]);                               \\\n    _PROC(zmm_m, zmm_q, _RES##_0_9)                             \\\n    zmm_q = _mm512_set1_ps(q[10]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_10)                            \\\n    zmm_q = _mm512_set1_ps(q[11]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_11)                            \\\n    zmm_q = _mm512_set1_ps(q[12]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_12)                            \\\n    zmm_q = _mm512_set1_ps(q[13]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_13)                            \\\n    zmm_q = _mm512_set1_ps(q[14]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_14)                            \\\n    zmm_q = _mm512_set1_ps(q[15]);                              \\\n    _PROC(zmm_m, zmm_q, _RES##_0_15)                            \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=1)\n#define MATRIX_FP32_ITER_32X1_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_q = _mm512_set1_ps(*q);                         \\\n    __m512 zmm_m = _LOAD(m);                                   \\\n    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \\\n    zmm_m = _LOAD(m + 16);                                     \\\n    _PROC(zmm_m, zmm_q, _RES##_1_0)                            \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=2)\n#define MATRIX_FP32_ITER_32X2_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_m_0 = _LOAD(m + 0);                             \\\n    __m512 zmm_m_1 = _LOAD(m + 16);                            \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[1]);                              \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=4)\n#define MATRIX_FP32_ITER_32X4_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_m_0 = _LOAD(m + 0);                             \\\n    __m512 zmm_m_1 = _LOAD(m + 16);                            \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[1]);                              \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[2]);                              \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[3]);                              \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=8)\n#define MATRIX_FP32_ITER_32X8_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m512 zmm_m_0 = _LOAD(m + 0);                             \\\n    __m512 zmm_m_1 = _LOAD(m + 16);                            \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[1]);                              \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[2]);                              \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[3]);                              \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[4]);                              \\\n    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[5]);                              \\\n    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[6]);                              \\\n    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[7]);                              \\\n    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=16)\n#define MATRIX_FP32_ITER_32X16_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m512 zmm_m_0 = _LOAD(m + 0);                              \\\n    __m512 zmm_m_1 = _LOAD(m + 16);                             \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                        \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[1]);                               \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[2]);                               \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[3]);                               \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[4]);                               \\\n    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[5]);                               \\\n    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[6]);                               \\\n    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[7]);                               \\\n    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[8]);                               \\\n    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[9]);                               \\\n    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[10]);                              \\\n    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[11]);                              \\\n    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[12]);                              \\\n    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[13]);                              \\\n    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[14]);                              \\\n    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[15]);                              \\\n    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=32)\n#define MATRIX_FP32_ITER_32X32_AVX512(m, q, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m512 zmm_m_0 = _LOAD(m + 0);                              \\\n    __m512 zmm_m_1 = _LOAD(m + 16);                             \\\n    __m512 zmm_q = _mm512_set1_ps(q[0]);                        \\\n    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[1]);                               \\\n    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[2]);                               \\\n    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[3]);                               \\\n    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[4]);                               \\\n    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[5]);                               \\\n    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[6]);                               \\\n    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[7]);                               \\\n    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[8]);                               \\\n    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[9]);                               \\\n    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)         \\\n    zmm_q = _mm512_set1_ps(q[10]);                              \\\n    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[11]);                              \\\n    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[12]);                              \\\n    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[13]);                              \\\n    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[14]);                              \\\n    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[15]);                              \\\n    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[16]);                              \\\n    MATRIX_VAR_PROC(2, 1, 16, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[17]);                              \\\n    MATRIX_VAR_PROC(2, 1, 17, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[18]);                              \\\n    MATRIX_VAR_PROC(2, 1, 18, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[19]);                              \\\n    MATRIX_VAR_PROC(2, 1, 19, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[20]);                              \\\n    MATRIX_VAR_PROC(2, 1, 20, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[21]);                              \\\n    MATRIX_VAR_PROC(2, 1, 21, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[22]);                              \\\n    MATRIX_VAR_PROC(2, 1, 22, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[23]);                              \\\n    MATRIX_VAR_PROC(2, 1, 23, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[24]);                              \\\n    MATRIX_VAR_PROC(2, 1, 24, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[25]);                              \\\n    MATRIX_VAR_PROC(2, 1, 25, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[26]);                              \\\n    MATRIX_VAR_PROC(2, 1, 26, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[27]);                              \\\n    MATRIX_VAR_PROC(2, 1, 27, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[28]);                              \\\n    MATRIX_VAR_PROC(2, 1, 28, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[29]);                              \\\n    MATRIX_VAR_PROC(2, 1, 29, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[30]);                              \\\n    MATRIX_VAR_PROC(2, 1, 30, zmm_m, zmm_q, _RES, _PROC)        \\\n    zmm_q = _mm512_set1_ps(q[31]);                              \\\n    MATRIX_VAR_PROC(2, 1, 31, zmm_m, zmm_q, _RES, _PROC)        \\\n  }\n\n//! Iterative process of computing distance (FP32, M=2, N=1)\n#define MATRIX_FP32_ITER_2X1_NEON(m, q, _RES, _PROC)                \\\n  {                                                                 \\\n    float32x4_t v_m = vld1q_f32(m);                                 \\\n    float32x2_t v_q = vld1_f32(q);                                  \\\n    float32x4_t v_p =                                               \\\n        vcombine_f32(vdup_lane_f32(v_q, 0), vdup_lane_f32(v_q, 1)); \\\n    _PROC(v_m, v_p, _RES)                                           \\\n  }\n\n//! Iterative process of computing distance (FP32, M=2, N=2)\n#define MATRIX_FP32_ITER_2X2_NEON(m, q, _RES, _PROC)                      \\\n  {                                                                       \\\n    float32x4_t v_q = vld1q_f32(q);                                       \\\n    float32x4_t v_m = vld1q_f32(m);                                       \\\n    float32x2_t v_q_0 = vget_low_f32(v_q);                                \\\n    float32x2_t v_q_1 = vget_high_f32(v_q);                               \\\n    v_q = vcombine_f32(vdup_lane_f32(v_q_0, 0), vdup_lane_f32(v_q_1, 0)); \\\n    _PROC(v_m, v_q, _RES##_0_0)                                           \\\n    v_q = vcombine_f32(vdup_lane_f32(v_q_0, 1), vdup_lane_f32(v_q_1, 1)); \\\n    _PROC(v_m, v_q, _RES##_0_1)                                           \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=1)\n#define MATRIX_FP32_ITER_4X1_NEON(m, q, _RES, _PROC) \\\n  {                                                  \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);            \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);            \\\n    float32x2_t v_p = vld1_f32(q);                   \\\n    float32x4_t v_q = vdupq_lane_f32(v_p, 0);        \\\n    _PROC(v_m_0, v_q, _RES##_0_0)                    \\\n    v_q = vdupq_lane_f32(v_p, 1);                    \\\n    _PROC(v_m_1, v_q, _RES##_0_1)                    \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=2)\n#define MATRIX_FP32_ITER_4X2_NEON(m, q, _RES, _PROC) \\\n  {                                                  \\\n    float32x4_t v_m = vld1q_f32(m);                  \\\n    float32x2_t v_p = vld1_f32(q);                   \\\n    float32x4_t v_q = vdupq_lane_f32(v_p, 0);        \\\n    _PROC(v_m, v_q, _RES##_0_0)                      \\\n    v_q = vdupq_lane_f32(v_p, 1);                    \\\n    _PROC(v_m, v_q, _RES##_0_1)                      \\\n  }\n\n//! Iterative process of computing distance (FP32, M=4, N=4)\n#define MATRIX_FP32_ITER_4X4_NEON(m, q, _RES, _PROC) \\\n  {                                                  \\\n    float32x4_t v_m = vld1q_f32(m);                  \\\n    float32x4_t v_p = vld1q_f32(q);                  \\\n    float32x4_t v_q = vdupq_laneq_f32(v_p, 0);       \\\n    _PROC(v_m, v_q, _RES##_0_0)                      \\\n    v_q = vdupq_laneq_f32(v_p, 1);                   \\\n    _PROC(v_m, v_q, _RES##_0_1)                      \\\n    v_q = vdupq_laneq_f32(v_p, 2);                   \\\n    _PROC(v_m, v_q, _RES##_0_2)                      \\\n    v_q = vdupq_laneq_f32(v_p, 3);                   \\\n    _PROC(v_m, v_q, _RES##_0_3)                      \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=1)\n#define MATRIX_FP32_ITER_8X1_NEON(m, q, _RES, _PROC) \\\n  {                                                  \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);            \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);            \\\n    float32x4_t v_q = vld1q_dup_f32(q);              \\\n    _PROC(v_m_0, v_q, _RES##_0_0)                    \\\n    _PROC(v_m_1, v_q, _RES##_1_0)                    \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=2)\n#define MATRIX_FP32_ITER_8X2_NEON(m, q, _RES, _PROC) \\\n  {                                                  \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);            \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);            \\\n    float32x2_t v_p = vld1_f32(q);                   \\\n    float32x4_t v_q = vdupq_lane_f32(v_p, 0);        \\\n    MATRIX_VAR_PROC(2, 1, 0, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_lane_f32(v_p, 1);                    \\\n    MATRIX_VAR_PROC(2, 1, 1, v_m, v_q, _RES, _PROC)  \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=4)\n#define MATRIX_FP32_ITER_8X4_NEON(m, q, _RES, _PROC) \\\n  {                                                  \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);            \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);            \\\n    float32x4_t v_p = vld1q_f32(q);                  \\\n    float32x4_t v_q = vdupq_laneq_f32(v_p, 0);       \\\n    MATRIX_VAR_PROC(2, 1, 0, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 1);                   \\\n    MATRIX_VAR_PROC(2, 1, 1, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 2);                   \\\n    MATRIX_VAR_PROC(2, 1, 2, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 3);                   \\\n    MATRIX_VAR_PROC(2, 1, 3, v_m, v_q, _RES, _PROC)  \\\n  }\n\n//! Iterative process of computing distance (FP32, M=8, N=8)\n#define MATRIX_FP32_ITER_8X8_NEON(m, q, _RES, _PROC) \\\n  {                                                  \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);            \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);            \\\n    float32x4_t v_p = vld1q_f32(q + 0);              \\\n    float32x4_t v_q = vdupq_laneq_f32(v_p, 0);       \\\n    MATRIX_VAR_PROC(2, 1, 0, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 1);                   \\\n    MATRIX_VAR_PROC(2, 1, 1, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 2);                   \\\n    MATRIX_VAR_PROC(2, 1, 2, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 3);                   \\\n    MATRIX_VAR_PROC(2, 1, 3, v_m, v_q, _RES, _PROC)  \\\n    v_p = vld1q_f32(q + 4);                          \\\n    v_q = vdupq_laneq_f32(v_p, 0);                   \\\n    MATRIX_VAR_PROC(2, 1, 4, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 1);                   \\\n    MATRIX_VAR_PROC(2, 1, 5, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 2);                   \\\n    MATRIX_VAR_PROC(2, 1, 6, v_m, v_q, _RES, _PROC)  \\\n    v_q = vdupq_laneq_f32(v_p, 3);                   \\\n    MATRIX_VAR_PROC(2, 1, 7, v_m, v_q, _RES, _PROC)  \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=1)\n#define MATRIX_FP32_ITER_16X1_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    float32x4_t v_q = vld1q_dup_f32(q);               \\\n    MATRIX_VAR_PROC(4, 1, 0, v_m, v_q, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=2)\n#define MATRIX_FP32_ITER_16X2_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    float32x2_t v_p = vld1_f32(q);                    \\\n    float32x4_t v_q = vdupq_lane_f32(v_p, 0);         \\\n    MATRIX_VAR_PROC(4, 1, 0, v_m, v_q, _RES, _PROC)   \\\n    v_q = vdupq_lane_f32(v_p, 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, v_m, v_q, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=4)\n#define MATRIX_FP32_ITER_16X4_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    float32x4_t v_q = vld1q_f32(q);                   \\\n    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);        \\\n    MATRIX_VAR_PROC(4, 1, 0, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 1, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                    \\\n    MATRIX_VAR_PROC(4, 1, 2, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                    \\\n    MATRIX_VAR_PROC(4, 1, 3, v_m, v_p, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=8)\n#define MATRIX_FP32_ITER_16X8_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    float32x4_t v_q = vld1q_f32(q + 0);               \\\n    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);        \\\n    MATRIX_VAR_PROC(4, 1, 0, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 1, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                    \\\n    MATRIX_VAR_PROC(4, 1, 2, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                    \\\n    MATRIX_VAR_PROC(4, 1, 3, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 4);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                    \\\n    MATRIX_VAR_PROC(4, 1, 4, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 5, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                    \\\n    MATRIX_VAR_PROC(4, 1, 6, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                    \\\n    MATRIX_VAR_PROC(4, 1, 7, v_m, v_p, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=16, N=16)\n#define MATRIX_FP32_ITER_16X16_NEON(m, q, _RES, _PROC) \\\n  {                                                    \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);              \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);              \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);              \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);             \\\n    float32x4_t v_q = vld1q_f32(q + 0);                \\\n    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);         \\\n    MATRIX_VAR_PROC(4, 1, 0, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 2, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 3, v_m, v_p, _RES, _PROC)    \\\n    v_q = vld1q_f32(q + 4);                            \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(4, 1, 4, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 5, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 6, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 7, v_m, v_p, _RES, _PROC)    \\\n    v_q = vld1q_f32(q + 8);                            \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(4, 1, 8, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 9, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 10, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 11, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 12);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(4, 1, 12, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 13, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 14, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 15, v_m, v_p, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=1)\n#define MATRIX_FP32_ITER_32X1_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x4_t v_q = vld1q_dup_f32(q);               \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    _PROC(v_m_0, v_q, _RES##_0_0)                     \\\n    _PROC(v_m_1, v_q, _RES##_1_0)                     \\\n    _PROC(v_m_2, v_q, _RES##_2_0)                     \\\n    _PROC(v_m_3, v_q, _RES##_3_0)                     \\\n    v_m_0 = vld1q_f32(m + 16);                        \\\n    v_m_1 = vld1q_f32(m + 20);                        \\\n    v_m_2 = vld1q_f32(m + 24);                        \\\n    v_m_3 = vld1q_f32(m + 28);                        \\\n    _PROC(v_m_0, v_q, _RES##_4_0)                     \\\n    _PROC(v_m_1, v_q, _RES##_5_0)                     \\\n    _PROC(v_m_2, v_q, _RES##_6_0)                     \\\n    _PROC(v_m_3, v_q, _RES##_7_0)                     \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=2)\n#define MATRIX_FP32_ITER_32X2_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x2_t v_p = vld1_f32(q);                    \\\n    float32x4_t v_q_0 = vdupq_lane_f32(v_p, 0);       \\\n    float32x4_t v_q_1 = vdupq_lane_f32(v_p, 1);       \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    MATRIX_VAR_PROC(1, 2, 0, v_m_0, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 2, 1, v_m_1, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 2, 2, v_m_2, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 2, 3, v_m_3, v_q, _RES, _PROC) \\\n    v_m_0 = vld1q_f32(m + 16);                        \\\n    v_m_1 = vld1q_f32(m + 20);                        \\\n    v_m_2 = vld1q_f32(m + 24);                        \\\n    v_m_3 = vld1q_f32(m + 28);                        \\\n    MATRIX_VAR_PROC(1, 2, 4, v_m_0, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 2, 5, v_m_1, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 2, 6, v_m_2, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 2, 7, v_m_3, v_q, _RES, _PROC) \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=4)\n#define MATRIX_FP32_ITER_32X4_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x4_t v_p = vld1q_f32(q);                   \\\n    float32x4_t v_q_0 = vdupq_laneq_f32(v_p, 0);      \\\n    float32x4_t v_q_1 = vdupq_laneq_f32(v_p, 1);      \\\n    float32x4_t v_q_2 = vdupq_laneq_f32(v_p, 2);      \\\n    float32x4_t v_q_3 = vdupq_laneq_f32(v_p, 3);      \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    MATRIX_VAR_PROC(1, 4, 0, v_m_0, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 4, 1, v_m_1, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 4, 2, v_m_2, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 4, 3, v_m_3, v_q, _RES, _PROC) \\\n    v_m_0 = vld1q_f32(m + 16);                        \\\n    v_m_1 = vld1q_f32(m + 20);                        \\\n    v_m_2 = vld1q_f32(m + 24);                        \\\n    v_m_3 = vld1q_f32(m + 28);                        \\\n    MATRIX_VAR_PROC(1, 4, 4, v_m_0, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 4, 5, v_m_1, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 4, 6, v_m_2, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 4, 7, v_m_3, v_q, _RES, _PROC) \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=8)\n#define MATRIX_FP32_ITER_32X8_NEON(m, q, _RES, _PROC) \\\n  {                                                   \\\n    float32x4_t v_p_0 = vld1q_f32(q + 0);             \\\n    float32x4_t v_p_1 = vld1q_f32(q + 4);             \\\n    float32x4_t v_q_0 = vdupq_laneq_f32(v_p_0, 0);    \\\n    float32x4_t v_q_1 = vdupq_laneq_f32(v_p_0, 1);    \\\n    float32x4_t v_q_2 = vdupq_laneq_f32(v_p_0, 2);    \\\n    float32x4_t v_q_3 = vdupq_laneq_f32(v_p_0, 3);    \\\n    float32x4_t v_q_4 = vdupq_laneq_f32(v_p_1, 0);    \\\n    float32x4_t v_q_5 = vdupq_laneq_f32(v_p_1, 1);    \\\n    float32x4_t v_q_6 = vdupq_laneq_f32(v_p_1, 2);    \\\n    float32x4_t v_q_7 = vdupq_laneq_f32(v_p_1, 3);    \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);             \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);             \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);             \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);            \\\n    MATRIX_VAR_PROC(1, 8, 0, v_m_0, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 8, 1, v_m_1, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 8, 2, v_m_2, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 8, 3, v_m_3, v_q, _RES, _PROC) \\\n    v_m_0 = vld1q_f32(m + 16);                        \\\n    v_m_1 = vld1q_f32(m + 20);                        \\\n    v_m_2 = vld1q_f32(m + 24);                        \\\n    v_m_3 = vld1q_f32(m + 28);                        \\\n    MATRIX_VAR_PROC(1, 8, 4, v_m_0, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 8, 5, v_m_1, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 8, 6, v_m_2, v_q, _RES, _PROC) \\\n    MATRIX_VAR_PROC(1, 8, 7, v_m_3, v_q, _RES, _PROC) \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=16)\n#define MATRIX_FP32_ITER_32X16_NEON(m, q, _RES, _PROC) \\\n  {                                                    \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);              \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);              \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);              \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);             \\\n    float32x4_t v_m_4 = vld1q_f32(m + 16);             \\\n    float32x4_t v_m_5 = vld1q_f32(m + 20);             \\\n    float32x4_t v_m_6 = vld1q_f32(m + 24);             \\\n    float32x4_t v_m_7 = vld1q_f32(m + 28);             \\\n    float32x4_t v_q = vld1q_f32(q + 0);                \\\n    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);         \\\n    MATRIX_VAR_PROC(8, 1, 0, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 1, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 2, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 3, v_m, v_p, _RES, _PROC)    \\\n    v_q = vld1q_f32(q + 4);                            \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 4, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 5, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 6, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 7, v_m, v_p, _RES, _PROC)    \\\n    v_q = vld1q_f32(q + 8);                            \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 8, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 9, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 10, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 11, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 12);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 12, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 13, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 14, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 15, v_m, v_p, _RES, _PROC)   \\\n  }\n\n//! Iterative process of computing distance (FP32, M=32, N=32)\n#define MATRIX_FP32_ITER_32X32_NEON(m, q, _RES, _PROC) \\\n  {                                                    \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);              \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);              \\\n    float32x4_t v_m_2 = vld1q_f32(m + 8);              \\\n    float32x4_t v_m_3 = vld1q_f32(m + 12);             \\\n    float32x4_t v_m_4 = vld1q_f32(m + 16);             \\\n    float32x4_t v_m_5 = vld1q_f32(m + 20);             \\\n    float32x4_t v_m_6 = vld1q_f32(m + 24);             \\\n    float32x4_t v_m_7 = vld1q_f32(m + 28);             \\\n    float32x4_t v_q = vld1q_f32(q + 0);                \\\n    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);         \\\n    MATRIX_VAR_PROC(8, 1, 0, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 1, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 2, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 3, v_m, v_p, _RES, _PROC)    \\\n    v_q = vld1q_f32(q + 4);                            \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 4, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 5, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 6, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 7, v_m, v_p, _RES, _PROC)    \\\n    v_q = vld1q_f32(q + 8);                            \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 8, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 9, v_m, v_p, _RES, _PROC)    \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 10, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 11, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 12);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 12, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 13, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 14, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 15, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 16);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 16, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 17, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 18, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 19, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 20);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 20, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 21, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 22, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 23, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 24);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 24, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 25, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 26, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 27, v_m, v_p, _RES, _PROC)   \\\n    v_q = vld1q_f32(q + 28);                           \\\n    v_p = vdupq_laneq_f32(v_q, 0);                     \\\n    MATRIX_VAR_PROC(8, 1, 28, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 29, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 30, v_m, v_p, _RES, _PROC)   \\\n    v_p = vdupq_laneq_f32(v_q, 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 31, v_m, v_p, _RES, _PROC)   \\\n  }\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_inner_product_utility.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#if defined(__SSE4_1__)\n//! Four-bits Convert Table\nstatic const AILEGO_ALIGNED(32) int8_t Int4ConvertTable[32] = {\n    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1,\n    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1};\n\n#define NEGZEROS_FP32_SSE _mm_set1_ps(-0.0f)\n#define  MASK_INT4_SSE _mm_set1_epi32(0x0f0f0f0f)\n#define ONES_INT16_SSE _mm_set1_epi32(0x00010001)\n#define INT4_LOOKUP_SSE _mm_load_si128((const __m128i *)Int4ConvertTable)\n#endif  // __SSE4_1__\n\n#if defined(__AVX__)\n// #define NEGZEROS_FP32_AVX _mm256_set1_ps(-0.0f)\n#define MASK_INT4_AVX _mm256_set1_epi32(0x0f0f0f0f)\n#define ONES_INT16_AVX _mm256_set1_epi32(0x00010001)\n#define  INT4_LOOKUP_AVX _mm256_load_si256((const __m256i *)Int4ConvertTable)\n#endif  // __AVX__\n\n#if defined(__AVX512F__) && !defined(__AVX512DQ__)\n#define _mm512_xor_ps(a, b) \\\n  _mm512_castsi512_ps(      \\\n      _mm512_xor_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b)))\n#endif  // __AVX512DQ__\n\n//! Reverse sign of value (GENERAL)\n#define NEGATE_FP32_GENERAL(v) -(v)\n\n//! Calculate Fused-Multiply-Add (SSE)\n#define FMA_FP32_SSE(xmm_m, xmm_q, xmm_sum) \\\n  xmm_sum = _mm_fmadd_ps(xmm_m, xmm_q, xmm_sum);\n\n//! Calculate Fused-Multiply-Add (AVX)\n#define FMA_FP32_AVX(ymm_m, ymm_q, ymm_sum) \\\n  ymm_sum = _mm256_fmadd_ps(ymm_m, ymm_q, ymm_sum);\n\n//! Calculate Fused-Multiply-Add (AVX512)\n#define FMA_FP32_AVX512(zmm_m, zmm_q, zmm_sum) \\\n  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_q, zmm_sum);\n\n//! Calculate Fused-Multiply-Add (AVX512FP16)\n#define FMA_FP16_AVX512FP16(zmm_m, zmm_q, zmm_sum) \\\n  zmm_sum = _mm512_fmadd_ph(zmm_m, zmm_q, zmm_sum);\n\n//! Calculate Fused-Multiply-Add (GENERAL)\n#define FMA_FP16_GENERAL(m, q, sum) sum += (m * q);\n\n//! Calculate Fused-Multiply-Add (GENERAL)\n#define FMA_FP32_GENERAL(m, q, sum) sum += (m * q);\n\n//! Calculate Fused-Multiply-Add (NEON)\n#define FMA_FP16_NEON(v_m, v_q, v_sum) v_sum = vfmaq_f16(v_sum, v_m, v_q);\n\n//! Calculate Fused-Multiply-Add (NEON)\n#define FMA_FP32_NEON(v_m, v_q, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_q);\n\n//! Calculate Fused-Multiply-Add (GENERAL)\n#define FMA_INT4_GENERAL(m, q, sum)                               \\\n  sum += Int4MulTable[(((m) << 4) & 0xf0) | (((q) >> 0) & 0xf)] + \\\n         Int4MulTable[(((m) >> 0) & 0xf0) | (((q) >> 4) & 0xf)];\n\n//! Calculate Fused-Multiply-Add (GENERAL)\n#define FMA_INT8_GENERAL(m, q, sum) sum += static_cast<float>(m * q);\n\n//! Calculate Fused-Multiply-Add (SSE)\n#define FMA_INT8_SSE(xmm_m, xmm_q, xmm_sum)                                    \\\n  xmm_sum = _mm_add_epi32(                                                     \\\n      _mm_madd_epi16(                                                          \\\n          _mm_maddubs_epi16(_mm_abs_epi8(xmm_q), _mm_sign_epi8(xmm_m, xmm_q)), \\\n          ONES_INT16_SSE),                                                     \\\n      xmm_sum);\n\n//! Calculate Fused-Multiply-Add (AVX)\n#define FMA_INT8_AVX(ymm_m, ymm_q, ymm_sum)                                   \\\n  ymm_sum = _mm256_add_epi32(                                                 \\\n      _mm256_madd_epi16(_mm256_maddubs_epi16(_mm256_abs_epi8(ymm_q),          \\\n                                             _mm256_sign_epi8(ymm_m, ymm_q)), \\\n                        ONES_INT16_AVX),                                      \\\n      ymm_sum);\n\n//! Calculate Fused-Multiply-Add (SSE)\n#define FMA_INT4_SSE(xmm_m, xmm_q, xmm_sum)                                    \\\n  {                                                                            \\\n    __m128i xmm_lhs = _mm_shuffle_epi8(INT4_LOOKUP_SSE,                        \\\n                                       _mm_and_si128((xmm_m), MASK_INT4_SSE)); \\\n    __m128i xmm_rhs = _mm_shuffle_epi8(INT4_LOOKUP_SSE,                        \\\n                                       _mm_and_si128((xmm_q), MASK_INT4_SSE)); \\\n    xmm_sum = _mm_add_epi32(                                                   \\\n        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),                \\\n                                         _mm_sign_epi8(xmm_lhs, xmm_rhs)),     \\\n                       ONES_INT16_SSE),                                        \\\n        xmm_sum);                                                              \\\n    xmm_lhs = _mm_shuffle_epi8(                                                \\\n        INT4_LOOKUP_SSE,                                                       \\\n        _mm_and_si128(_mm_srli_epi32((xmm_m), 4), MASK_INT4_SSE));             \\\n    xmm_rhs = _mm_shuffle_epi8(                                                \\\n        INT4_LOOKUP_SSE,                                                       \\\n        _mm_and_si128(_mm_srli_epi32((xmm_q), 4), MASK_INT4_SSE));             \\\n    xmm_sum = _mm_add_epi32(                                                   \\\n        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),                \\\n                                         _mm_sign_epi8(xmm_lhs, xmm_rhs)),     \\\n                       ONES_INT16_SSE),                                        \\\n        xmm_sum);                                                              \\\n  }\n\n//! Calculate Fused-Multiply-Add (AVX)\n#define FMA_INT4_AVX(ymm_m, ymm_q, ymm_sum)                              \\\n  {                                                                      \\\n    __m256i ymm_lhs = _mm256_shuffle_epi8(                               \\\n        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_m), MASK_INT4_AVX));      \\\n    __m256i ymm_rhs = _mm256_shuffle_epi8(                               \\\n        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_q), MASK_INT4_AVX));      \\\n    ymm_sum = _mm256_add_epi32(                                          \\\n        _mm256_madd_epi16(                                               \\\n            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_rhs),               \\\n                                 _mm256_sign_epi8(ymm_lhs, ymm_rhs)),    \\\n            ONES_INT16_AVX),                                             \\\n        ymm_sum);                                                        \\\n    ymm_lhs = _mm256_shuffle_epi8(                                       \\\n        INT4_LOOKUP_AVX,                                                 \\\n        _mm256_and_si256(_mm256_srli_epi32((ymm_m), 4), MASK_INT4_AVX)); \\\n    ymm_rhs = _mm256_shuffle_epi8(                                       \\\n        INT4_LOOKUP_AVX,                                                 \\\n        _mm256_and_si256(_mm256_srli_epi32((ymm_q), 4), MASK_INT4_AVX)); \\\n    ymm_sum = _mm256_add_epi32(                                          \\\n        _mm256_madd_epi16(                                               \\\n            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_rhs),               \\\n                                 _mm256_sign_epi8(ymm_lhs, ymm_rhs)),    \\\n            ONES_INT16_AVX),                                             \\\n        ymm_sum);                                                        \\\n  }\n\n//! Compute the distance between matrix and query\n#define FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)                       \\\n  {                                                                        \\\n    __m128i xmm_lhs_0 = _mm_shuffle_epi8(                                  \\\n        INT4_LOOKUP_SSE, _mm_and_si128((xmm_lhs), MASK_INT4_SSE));         \\\n    __m128i xmm_rhs_0 = _mm_shuffle_epi8(                                  \\\n        INT4_LOOKUP_SSE, _mm_and_si128((xmm_rhs), MASK_INT4_SSE));         \\\n    __m128i xmm_lhs_1 = _mm_shuffle_epi8(                                  \\\n        INT4_LOOKUP_SSE,                                                   \\\n        _mm_and_si128(_mm_srli_epi32((xmm_lhs), 4), MASK_INT4_SSE));       \\\n    __m128i xmm_rhs_1 = _mm_shuffle_epi8(                                  \\\n        INT4_LOOKUP_SSE,                                                   \\\n        _mm_and_si128(_mm_srli_epi32((xmm_rhs), 4), MASK_INT4_SSE));       \\\n    xmm_lhs_0 = _mm_sign_epi8(xmm_lhs_0, xmm_rhs_0);                       \\\n    xmm_lhs_1 = _mm_sign_epi8(xmm_lhs_1, xmm_rhs_1);                       \\\n    xmm_rhs_0 = _mm_abs_epi8(xmm_rhs_0);                                   \\\n    xmm_rhs_1 = _mm_abs_epi8(xmm_rhs_1);                                   \\\n    xmm_lhs_0 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_lhs_0),    \\\n                               ONES_INT16_SSE);                            \\\n    xmm_lhs_1 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_1, xmm_lhs_1),    \\\n                               ONES_INT16_SSE);                            \\\n    xmm_sum = _mm_add_epi32(_mm_add_epi32(xmm_lhs_0, xmm_lhs_1), xmm_sum); \\\n  }\n\n//! Compute the distance between matrix and query\n#define FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)                          \\\n  {                                                                           \\\n    __m256i ymm_lhs_0 = _mm256_shuffle_epi8(                                  \\\n        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_lhs), MASK_INT4_AVX));         \\\n    __m256i ymm_rhs_0 = _mm256_shuffle_epi8(                                  \\\n        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_rhs), MASK_INT4_AVX));         \\\n    __m256i ymm_lhs_1 = _mm256_shuffle_epi8(                                  \\\n        INT4_LOOKUP_AVX,                                                      \\\n        _mm256_and_si256(_mm256_srli_epi32((ymm_lhs), 4), MASK_INT4_AVX));    \\\n    __m256i ymm_rhs_1 = _mm256_shuffle_epi8(                                  \\\n        INT4_LOOKUP_AVX,                                                      \\\n        _mm256_and_si256(_mm256_srli_epi32((ymm_rhs), 4), MASK_INT4_AVX));    \\\n    ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);                       \\\n    ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);                       \\\n    ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);                                   \\\n    ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);                                   \\\n    ymm_lhs_0 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0), \\\n                                  ONES_INT16_AVX);                            \\\n    ymm_lhs_1 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1), \\\n                                  ONES_INT16_AVX);                            \\\n    ymm_sum =                                                                 \\\n        _mm256_add_epi32(_mm256_add_epi32(ymm_lhs_0, ymm_lhs_1), ymm_sum);    \\\n  }\n\n#define ACCUM_FP16_STEP_GENERAL FMA_FP16_GENERAL\n#define ACCUM_FP16_STEP_NEON FMA_FP16_NEON\n\n#define ACCUM_FP32_STEP_SSE FMA_FP32_SSE\n#define ACCUM_FP32_STEP_AVX FMA_FP32_AVX\n#define ACCUM_FP32_STEP_AVX512 FMA_FP32_AVX512\n#define ACCUM_FP32_STEP_NEON FMA_FP32_NEON\n\n#define ACCUM_INT4_STEP_SSE FMA_INT4_SSE\n#define ACCUM_INT4_STEP_AVX FMA_INT4_AVX\n\n#define ACCUM_INT8_STEP_SSE FMA_INT8_SSE\n#define ACCUM_INT8_STEP_AVX FMA_INT8_AVX\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_int32.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include \"matrix_define.i\"\n\n#if defined(__AVX__) && defined(__GNUC__)\n#define _mm256_set_m128i(a, b) \\\n  _mm256_inserti128_si256(_mm256_castsi128_si256(b), (a), 1)\n#endif  // __AVX__\n\n#if !defined(__AVX__)\n#define _mm_broadcast_si32(a) _mm_castps_si128(_mm_load1_ps((const float *)(a)))\n#else\n#define _mm_broadcast_si32(a) \\\n  _mm_castps_si128(_mm_broadcast_ss((const float *)(a)))\n#define _mm256_broadcast_si32(a) \\\n  _mm256_castps_si256(_mm256_broadcast_ss((const float *)(a)))\n#endif  // !__AVX__\n\n//! Iterative process of computing distance (INT32, M=2, N=1)\n#define MATRIX_INT32_ITER_2X1_SSE(mi, qi, _RES, _LOAD, _PROC)            \\\n  {                                                                      \\\n    __m128i xmm_qi = _LOAD((const __m128i *)(qi));                       \\\n    __m128i xmm_mi = _LOAD((const __m128i *)(mi));                       \\\n    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(1, 1, 0, 0)); \\\n    _PROC(xmm_mi, xmm_pi, _RES##_0_0)                                    \\\n    xmm_mi = _LOAD((const __m128i *)(mi + 4));                           \\\n    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 2, 2));         \\\n    _PROC(xmm_mi, xmm_pi, _RES##_0_1)                                    \\\n  }\n\n//! Iterative process of computing distance (INT32, M=2, N=2)\n#define MATRIX_INT32_ITER_2X2_SSE(mi, qi, _RES, _LOAD, _PROC)            \\\n  {                                                                      \\\n    __m128i xmm_qi = _LOAD((const __m128i *)(qi));                       \\\n    __m128i xmm_mi = _LOAD((const __m128i *)(mi));                       \\\n    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(2, 2, 0, 0)); \\\n    _PROC(xmm_mi, xmm_pi, _RES##_0_0)                                    \\\n    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 1, 1));         \\\n    _PROC(xmm_mi, xmm_pi, _RES##_0_1)                                    \\\n  }\n\n//! Iterative process of computing distance (INT32, M=4, N=1)\n#define MATRIX_INT32_ITER_4X1_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \\\n    _PROC(xmm_mi_0, xmm_qi, _RES##_0_0)                       \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                      \\\n    _PROC(xmm_mi_1, xmm_qi, _RES##_1_0)                       \\\n  }\n\n//! Iterative process of computing distance (INT32, M=4, N=2)\n#define MATRIX_INT32_ITER_4X2_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);            \\\n    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);            \\\n    __m128i xmm_mi = _LOAD((const __m128i *)(mi));            \\\n    MATRIX_VAR_PROC(1, 2, 0, xmm_mi, xmm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT32, M=4, N=4)\n#define MATRIX_INT32_ITER_4X4_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m128i xmm_mi = _LOAD((const __m128i *)(mi));            \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \\\n    _PROC(xmm_mi, xmm_qi, _RES##_0_0)                         \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                      \\\n    _PROC(xmm_mi, xmm_qi, _RES##_0_1)                         \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                      \\\n    _PROC(xmm_mi, xmm_qi, _RES##_0_2)                         \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                      \\\n    _PROC(xmm_mi, xmm_qi, _RES##_0_3)                         \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=1)\n#define MATRIX_INT32_ITER_8X1_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                  \\\n    MATRIX_VAR_PROC(2, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=2)\n#define MATRIX_INT32_ITER_8X2_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);            \\\n    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);            \\\n    __m128i xmm_mi = _LOAD((const __m128i *)(mi + 0));        \\\n    MATRIX_VAR_PROC(1, 2, 0, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_mi = _LOAD((const __m128i *)(mi + 4));                \\\n    MATRIX_VAR_PROC(1, 2, 1, xmm_mi, xmm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=4)\n#define MATRIX_INT32_ITER_8X4_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \\\n    MATRIX_VAR_PROC(2, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                      \\\n    MATRIX_VAR_PROC(2, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                      \\\n    MATRIX_VAR_PROC(2, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                      \\\n    MATRIX_VAR_PROC(2, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=8)\n#define MATRIX_INT32_ITER_8X8_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \\\n    MATRIX_VAR_PROC(2, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                      \\\n    MATRIX_VAR_PROC(2, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                      \\\n    MATRIX_VAR_PROC(2, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                      \\\n    MATRIX_VAR_PROC(2, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 4);                      \\\n    MATRIX_VAR_PROC(2, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 5);                      \\\n    MATRIX_VAR_PROC(2, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 6);                      \\\n    MATRIX_VAR_PROC(2, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)     \\\n    xmm_qi = _mm_broadcast_si32(qi + 7);                      \\\n    MATRIX_VAR_PROC(2, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=1)\n#define MATRIX_INT32_ITER_16X1_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                   \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=2)\n#define MATRIX_INT32_ITER_16X2_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);               \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                       \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=4)\n#define MATRIX_INT32_ITER_16X4_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);               \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                       \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                       \\\n    MATRIX_VAR_PROC(4, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                       \\\n    MATRIX_VAR_PROC(4, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=8)\n#define MATRIX_INT32_ITER_16X8_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);               \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                       \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                       \\\n    MATRIX_VAR_PROC(4, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                       \\\n    MATRIX_VAR_PROC(4, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 4);                       \\\n    MATRIX_VAR_PROC(4, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 5);                       \\\n    MATRIX_VAR_PROC(4, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 6);                       \\\n    MATRIX_VAR_PROC(4, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 7);                       \\\n    MATRIX_VAR_PROC(4, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=16)\n#define MATRIX_INT32_ITER_16X16_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));        \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));        \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));        \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));       \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);                \\\n    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                        \\\n    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                        \\\n    MATRIX_VAR_PROC(4, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                        \\\n    MATRIX_VAR_PROC(4, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 4);                        \\\n    MATRIX_VAR_PROC(4, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 5);                        \\\n    MATRIX_VAR_PROC(4, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 6);                        \\\n    MATRIX_VAR_PROC(4, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 7);                        \\\n    MATRIX_VAR_PROC(4, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 8);                        \\\n    MATRIX_VAR_PROC(4, 1, 8, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 9);                        \\\n    MATRIX_VAR_PROC(4, 1, 9, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 10);                       \\\n    MATRIX_VAR_PROC(4, 1, 10, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 11);                       \\\n    MATRIX_VAR_PROC(4, 1, 11, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 12);                       \\\n    MATRIX_VAR_PROC(4, 1, 12, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 13);                       \\\n    MATRIX_VAR_PROC(4, 1, 13, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 14);                       \\\n    MATRIX_VAR_PROC(4, 1, 14, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 15);                       \\\n    MATRIX_VAR_PROC(4, 1, 15, xmm_mi, xmm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=1)\n#define MATRIX_INT32_ITER_32X1_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi);                   \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    _PROC(xmm_mi_0, xmm_qi, _RES##_0_0)                        \\\n    _PROC(xmm_mi_1, xmm_qi, _RES##_1_0)                        \\\n    _PROC(xmm_mi_2, xmm_qi, _RES##_2_0)                        \\\n    _PROC(xmm_mi_3, xmm_qi, _RES##_3_0)                        \\\n    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \\\n    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \\\n    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \\\n    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \\\n    _PROC(xmm_mi_0, xmm_qi, _RES##_4_0)                        \\\n    _PROC(xmm_mi_1, xmm_qi, _RES##_5_0)                        \\\n    _PROC(xmm_mi_2, xmm_qi, _RES##_6_0)                        \\\n    _PROC(xmm_mi_3, xmm_qi, _RES##_7_0)                        \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=2)\n#define MATRIX_INT32_ITER_32X2_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);             \\\n    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);             \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    MATRIX_VAR_PROC(1, 2, 0, xmm_mi_0, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 2, 1, xmm_mi_1, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 2, 2, xmm_mi_2, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 2, 3, xmm_mi_3, xmm_qi, _RES, _PROC)    \\\n    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \\\n    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \\\n    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \\\n    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \\\n    MATRIX_VAR_PROC(1, 2, 4, xmm_mi_0, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 2, 5, xmm_mi_1, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 2, 6, xmm_mi_2, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 2, 7, xmm_mi_3, xmm_qi, _RES, _PROC)    \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=4)\n#define MATRIX_INT32_ITER_32X4_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);             \\\n    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);             \\\n    __m128i xmm_qi_2 = _mm_broadcast_si32(qi + 2);             \\\n    __m128i xmm_qi_3 = _mm_broadcast_si32(qi + 3);             \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    MATRIX_VAR_PROC(1, 4, 0, xmm_mi_0, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 4, 1, xmm_mi_1, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 4, 2, xmm_mi_2, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 4, 3, xmm_mi_3, xmm_qi, _RES, _PROC)    \\\n    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \\\n    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \\\n    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \\\n    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \\\n    MATRIX_VAR_PROC(1, 4, 4, xmm_mi_0, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 4, 5, xmm_mi_1, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 4, 6, xmm_mi_2, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 4, 7, xmm_mi_3, xmm_qi, _RES, _PROC)    \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=8)\n#define MATRIX_INT32_ITER_32X8_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);             \\\n    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);             \\\n    __m128i xmm_qi_2 = _mm_broadcast_si32(qi + 2);             \\\n    __m128i xmm_qi_3 = _mm_broadcast_si32(qi + 3);             \\\n    __m128i xmm_qi_4 = _mm_broadcast_si32(qi + 4);             \\\n    __m128i xmm_qi_5 = _mm_broadcast_si32(qi + 5);             \\\n    __m128i xmm_qi_6 = _mm_broadcast_si32(qi + 6);             \\\n    __m128i xmm_qi_7 = _mm_broadcast_si32(qi + 7);             \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \\\n    MATRIX_VAR_PROC(1, 8, 0, xmm_mi_0, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 8, 1, xmm_mi_1, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 8, 2, xmm_mi_2, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 8, 3, xmm_mi_3, xmm_qi, _RES, _PROC)    \\\n    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \\\n    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \\\n    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \\\n    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \\\n    MATRIX_VAR_PROC(1, 8, 4, xmm_mi_0, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 8, 5, xmm_mi_1, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 8, 6, xmm_mi_2, xmm_qi, _RES, _PROC)    \\\n    MATRIX_VAR_PROC(1, 8, 7, xmm_mi_3, xmm_qi, _RES, _PROC)    \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=16)\n#define MATRIX_INT32_ITER_32X16_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));        \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));        \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));        \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));       \\\n    __m128i xmm_mi_4 = _LOAD((const __m128i *)(mi + 16));       \\\n    __m128i xmm_mi_5 = _LOAD((const __m128i *)(mi + 20));       \\\n    __m128i xmm_mi_6 = _LOAD((const __m128i *)(mi + 24));       \\\n    __m128i xmm_mi_7 = _LOAD((const __m128i *)(mi + 28));       \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);                \\\n    MATRIX_VAR_PROC(8, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                        \\\n    MATRIX_VAR_PROC(8, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                        \\\n    MATRIX_VAR_PROC(8, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                        \\\n    MATRIX_VAR_PROC(8, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 4);                        \\\n    MATRIX_VAR_PROC(8, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 5);                        \\\n    MATRIX_VAR_PROC(8, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 6);                        \\\n    MATRIX_VAR_PROC(8, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 7);                        \\\n    MATRIX_VAR_PROC(8, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 8);                        \\\n    MATRIX_VAR_PROC(8, 1, 8, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 9);                        \\\n    MATRIX_VAR_PROC(8, 1, 9, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 10);                       \\\n    MATRIX_VAR_PROC(8, 1, 10, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 11);                       \\\n    MATRIX_VAR_PROC(8, 1, 11, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 12);                       \\\n    MATRIX_VAR_PROC(8, 1, 12, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 13);                       \\\n    MATRIX_VAR_PROC(8, 1, 13, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 14);                       \\\n    MATRIX_VAR_PROC(8, 1, 14, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 15);                       \\\n    MATRIX_VAR_PROC(8, 1, 15, xmm_mi, xmm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=32)\n#define MATRIX_INT32_ITER_32X32_SSE(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));        \\\n    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));        \\\n    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));        \\\n    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));       \\\n    __m128i xmm_mi_4 = _LOAD((const __m128i *)(mi + 16));       \\\n    __m128i xmm_mi_5 = _LOAD((const __m128i *)(mi + 20));       \\\n    __m128i xmm_mi_6 = _LOAD((const __m128i *)(mi + 24));       \\\n    __m128i xmm_mi_7 = _LOAD((const __m128i *)(mi + 28));       \\\n    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);                \\\n    MATRIX_VAR_PROC(8, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 1);                        \\\n    MATRIX_VAR_PROC(8, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 2);                        \\\n    MATRIX_VAR_PROC(8, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 3);                        \\\n    MATRIX_VAR_PROC(8, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 4);                        \\\n    MATRIX_VAR_PROC(8, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 5);                        \\\n    MATRIX_VAR_PROC(8, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 6);                        \\\n    MATRIX_VAR_PROC(8, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 7);                        \\\n    MATRIX_VAR_PROC(8, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 8);                        \\\n    MATRIX_VAR_PROC(8, 1, 8, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 9);                        \\\n    MATRIX_VAR_PROC(8, 1, 9, xmm_mi, xmm_qi, _RES, _PROC)       \\\n    xmm_qi = _mm_broadcast_si32(qi + 10);                       \\\n    MATRIX_VAR_PROC(8, 1, 10, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 11);                       \\\n    MATRIX_VAR_PROC(8, 1, 11, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 12);                       \\\n    MATRIX_VAR_PROC(8, 1, 12, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 13);                       \\\n    MATRIX_VAR_PROC(8, 1, 13, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 14);                       \\\n    MATRIX_VAR_PROC(8, 1, 14, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 15);                       \\\n    MATRIX_VAR_PROC(8, 1, 15, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 16);                       \\\n    MATRIX_VAR_PROC(8, 1, 16, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 17);                       \\\n    MATRIX_VAR_PROC(8, 1, 17, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 18);                       \\\n    MATRIX_VAR_PROC(8, 1, 18, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 19);                       \\\n    MATRIX_VAR_PROC(8, 1, 19, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 20);                       \\\n    MATRIX_VAR_PROC(8, 1, 20, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 21);                       \\\n    MATRIX_VAR_PROC(8, 1, 21, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 22);                       \\\n    MATRIX_VAR_PROC(8, 1, 22, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 23);                       \\\n    MATRIX_VAR_PROC(8, 1, 23, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 24);                       \\\n    MATRIX_VAR_PROC(8, 1, 24, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 25);                       \\\n    MATRIX_VAR_PROC(8, 1, 25, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 26);                       \\\n    MATRIX_VAR_PROC(8, 1, 26, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 27);                       \\\n    MATRIX_VAR_PROC(8, 1, 27, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 28);                       \\\n    MATRIX_VAR_PROC(8, 1, 28, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 29);                       \\\n    MATRIX_VAR_PROC(8, 1, 29, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 30);                       \\\n    MATRIX_VAR_PROC(8, 1, 30, xmm_mi, xmm_qi, _RES, _PROC)      \\\n    xmm_qi = _mm_broadcast_si32(qi + 31);                       \\\n    MATRIX_VAR_PROC(8, 1, 31, xmm_mi, xmm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=2, N=1)\n#define MATRIX_INT32_ITER_2X1_AVX(mi, qi, _RES, _LOAD, _PROC)            \\\n  {                                                                      \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                       \\\n    __m256i ymm_qi = _mm256_set_epi32(qi[3], qi[3], qi[2], qi[2], qi[1], \\\n                                      qi[1], qi[0], qi[0]);              \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                                    \\\n  }\n\n//! Iterative process of computing distance (INT32, M=2, N=2)\n#define MATRIX_INT32_ITER_2X2_AVX(mi, qi, _RES, _LOAD, _PROC)               \\\n  {                                                                         \\\n    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                          \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                          \\\n    __m256i ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(2, 2, 0, 0)); \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                       \\\n    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(3, 3, 1, 1));         \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                       \\\n  }\n\n//! Iterative process of computing distance (INT32, M=4, N=1)\n#define MATRIX_INT32_ITER_4X1_AVX(mi, qi, _RES, _LOAD, _PROC)                 \\\n  {                                                                           \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                            \\\n    __m256i ymm_qi =                                                          \\\n        _mm256_set_m128i(_mm_broadcast_si32(qi + 1), _mm_broadcast_si32(qi)); \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                                         \\\n  }\n\n//! Iterative process of computing distance (INT32, M=4, N=2)\n#define MATRIX_INT32_ITER_4X2_AVX(mi, qi, _RES, _LOAD, _PROC)      \\\n  {                                                                \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                 \\\n    __m256i ymm_qi = _mm256_set_m128i(_mm_broadcast_si32(qi + 2),  \\\n                                      _mm_broadcast_si32(qi + 0)); \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                              \\\n    ymm_qi = _mm256_set_m128i(_mm_broadcast_si32(qi + 3),          \\\n                              _mm_broadcast_si32(qi + 1));         \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                              \\\n  }\n\n//! Iterative process of computing distance (INT32, M=4, N=4)\n#define MATRIX_INT32_ITER_4X4_AVX(mi, qi, _RES, _LOAD, _PROC)               \\\n  {                                                                         \\\n    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                          \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                          \\\n    __m256i ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(0, 0, 0, 0)); \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                       \\\n    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(1, 1, 1, 1));         \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                       \\\n    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(2, 2, 2, 2));         \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_2)                                       \\\n    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(3, 3, 3, 3));         \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_3)                                       \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=1)\n#define MATRIX_INT32_ITER_8X1_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi);               \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=2)\n#define MATRIX_INT32_ITER_8X2_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_qi_0 = _mm256_broadcast_si32(qi + 0);         \\\n    __m256i ymm_qi_1 = _mm256_broadcast_si32(qi + 1);         \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=4)\n#define MATRIX_INT32_ITER_8X4_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);           \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_2)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_3)                         \\\n  }\n\n//! Iterative process of computing distance (INT32, M=8, N=8)\n#define MATRIX_INT32_ITER_8X8_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);           \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_2)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_3)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 4);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_4)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 5);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_5)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 6);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_6)                         \\\n    ymm_qi = _mm256_broadcast_si32(qi + 7);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_7)                         \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=1)\n#define MATRIX_INT32_ITER_16X1_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=2)\n#define MATRIX_INT32_ITER_16X2_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=4)\n#define MATRIX_INT32_ITER_16X4_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=8)\n#define MATRIX_INT32_ITER_16X8_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 4);                    \\\n    MATRIX_VAR_PROC(2, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 5);                    \\\n    MATRIX_VAR_PROC(2, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 6);                    \\\n    MATRIX_VAR_PROC(2, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 7);                    \\\n    MATRIX_VAR_PROC(2, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=16, N=16)\n#define MATRIX_INT32_ITER_16X16_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));        \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);             \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                     \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                     \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                     \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 4);                     \\\n    MATRIX_VAR_PROC(2, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 5);                     \\\n    MATRIX_VAR_PROC(2, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 6);                     \\\n    MATRIX_VAR_PROC(2, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 7);                     \\\n    MATRIX_VAR_PROC(2, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 8);                     \\\n    MATRIX_VAR_PROC(2, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 9);                     \\\n    MATRIX_VAR_PROC(2, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 10);                    \\\n    MATRIX_VAR_PROC(2, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 11);                    \\\n    MATRIX_VAR_PROC(2, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 12);                    \\\n    MATRIX_VAR_PROC(2, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 13);                    \\\n    MATRIX_VAR_PROC(2, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 14);                    \\\n    MATRIX_VAR_PROC(2, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 15);                    \\\n    MATRIX_VAR_PROC(2, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=1)\n#define MATRIX_INT32_ITER_32X1_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi);                \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=2)\n#define MATRIX_INT32_ITER_32X2_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=4)\n#define MATRIX_INT32_ITER_32X4_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=8)\n#define MATRIX_INT32_ITER_32X8_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 4);                    \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 5);                    \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 6);                    \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 7);                    \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=16)\n#define MATRIX_INT32_ITER_32X16_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));        \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));       \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));       \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);             \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 4);                     \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 5);                     \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 6);                     \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 7);                     \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 8);                     \\\n    MATRIX_VAR_PROC(4, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 9);                     \\\n    MATRIX_VAR_PROC(4, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 10);                    \\\n    MATRIX_VAR_PROC(4, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 11);                    \\\n    MATRIX_VAR_PROC(4, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 12);                    \\\n    MATRIX_VAR_PROC(4, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 13);                    \\\n    MATRIX_VAR_PROC(4, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 14);                    \\\n    MATRIX_VAR_PROC(4, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 15);                    \\\n    MATRIX_VAR_PROC(4, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT32, M=32, N=32)\n#define MATRIX_INT32_ITER_32X32_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));        \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));       \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));       \\\n    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);             \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 4);                     \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 5);                     \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 6);                     \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 7);                     \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 8);                     \\\n    MATRIX_VAR_PROC(4, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 9);                     \\\n    MATRIX_VAR_PROC(4, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si32(qi + 10);                    \\\n    MATRIX_VAR_PROC(4, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 11);                    \\\n    MATRIX_VAR_PROC(4, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 12);                    \\\n    MATRIX_VAR_PROC(4, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 13);                    \\\n    MATRIX_VAR_PROC(4, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 14);                    \\\n    MATRIX_VAR_PROC(4, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 15);                    \\\n    MATRIX_VAR_PROC(4, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 16);                    \\\n    MATRIX_VAR_PROC(4, 1, 16, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 17);                    \\\n    MATRIX_VAR_PROC(4, 1, 17, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 18);                    \\\n    MATRIX_VAR_PROC(4, 1, 18, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 19);                    \\\n    MATRIX_VAR_PROC(4, 1, 19, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 20);                    \\\n    MATRIX_VAR_PROC(4, 1, 20, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 21);                    \\\n    MATRIX_VAR_PROC(4, 1, 21, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 22);                    \\\n    MATRIX_VAR_PROC(4, 1, 22, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 23);                    \\\n    MATRIX_VAR_PROC(4, 1, 23, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 24);                    \\\n    MATRIX_VAR_PROC(4, 1, 24, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 25);                    \\\n    MATRIX_VAR_PROC(4, 1, 25, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 26);                    \\\n    MATRIX_VAR_PROC(4, 1, 26, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 27);                    \\\n    MATRIX_VAR_PROC(4, 1, 27, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 28);                    \\\n    MATRIX_VAR_PROC(4, 1, 28, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 29);                    \\\n    MATRIX_VAR_PROC(4, 1, 29, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 30);                    \\\n    MATRIX_VAR_PROC(4, 1, 30, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si32(qi + 31);                    \\\n    MATRIX_VAR_PROC(4, 1, 31, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_int64.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include \"matrix_define.i\"\n\n#if defined(__AVX__)\n#define _mm256_broadcast_si64(a) \\\n  _mm256_castpd_si256(_mm256_broadcast_sd((const double *)(a)))\n#endif  // __AVX__\n\n//! Iterative process of computing distance (INT64, M=2, N=1)\n#define MATRIX_INT64_ITER_2X1_AVX(mi, qi, _RES, _LOAD, _PROC)           \\\n  {                                                                     \\\n    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                      \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                      \\\n    __m256i ymm_pi =                                                    \\\n        _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(1, 1, 0, 0));      \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                   \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                          \\\n    ymm_pi = _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(3, 3, 2, 2)); \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                   \\\n  }\n\n//! Iterative process of computing distance (INT64, M=2, N=2)\n#define MATRIX_INT64_ITER_2X2_AVX(mi, qi, _RES, _LOAD, _PROC)           \\\n  {                                                                     \\\n    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                      \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                      \\\n    __m256i ymm_pi =                                                    \\\n        _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(2, 2, 0, 0));      \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                   \\\n    ymm_pi = _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(3, 3, 1, 1)); \\\n    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                   \\\n  }\n\n//! Iterative process of computing distance (INT64, M=4, N=1)\n#define MATRIX_INT64_ITER_4X1_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));        \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                         \\\n  }\n\n//! Iterative process of computing distance (INT64, M=4, N=2)\n#define MATRIX_INT64_ITER_4X2_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);         \\\n    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);         \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT64, M=4, N=4)\n#define MATRIX_INT64_ITER_4X4_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                         \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_2)                         \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                   \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_3)                         \\\n  }\n\n//! Iterative process of computing distance (INT64, M=8, N=1)\n#define MATRIX_INT64_ITER_8X1_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi);               \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));        \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                \\\n    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                         \\\n  }\n\n//! Iterative process of computing distance (INT64, M=8, N=2)\n#define MATRIX_INT64_ITER_8X2_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);         \\\n    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);         \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));        \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                \\\n    MATRIX_VAR_PROC(1, 2, 1, ymm_mi, ymm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT64, M=8, N=4)\n#define MATRIX_INT64_ITER_8X4_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));      \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));      \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                   \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                   \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT64, M=8, N=8)\n#define MATRIX_INT64_ITER_8X8_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                           \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));      \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));      \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \\\n    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \\\n    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                   \\\n    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                   \\\n    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 4);                   \\\n    MATRIX_VAR_PROC(2, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 5);                   \\\n    MATRIX_VAR_PROC(2, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 6);                   \\\n    MATRIX_VAR_PROC(2, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)     \\\n    ymm_qi = _mm256_broadcast_si64(qi + 7);                   \\\n    MATRIX_VAR_PROC(2, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)     \\\n  }\n\n//! Iterative process of computing distance (INT64, M=16, N=1)\n#define MATRIX_INT64_ITER_16X1_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi);                \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \\\n    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \\\n    _PROC(ymm_mi, ymm_qi, _RES##_2_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \\\n    _PROC(ymm_mi, ymm_qi, _RES##_3_0)                          \\\n  }\n\n//! Iterative process of computing distance (INT64, M=16, N=2)\n#define MATRIX_INT64_ITER_16X2_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \\\n    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \\\n    MATRIX_VAR_PROC(1, 2, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \\\n    MATRIX_VAR_PROC(1, 2, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \\\n    MATRIX_VAR_PROC(1, 2, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=16, N=4)\n#define MATRIX_INT64_ITER_16X4_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));       \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));      \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);            \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                    \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                    \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=16, N=8)\n#define MATRIX_INT64_ITER_16X8_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));       \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));       \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));      \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);            \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                    \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                    \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                    \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 4);                    \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 5);                    \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 6);                    \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 7);                    \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=16, N=16)\n#define MATRIX_INT64_ITER_16X16_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));        \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));        \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));       \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);             \\\n    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                     \\\n    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                     \\\n    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                     \\\n    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 4);                     \\\n    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 5);                     \\\n    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 6);                     \\\n    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 7);                     \\\n    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 8);                     \\\n    MATRIX_VAR_PROC(4, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 9);                     \\\n    MATRIX_VAR_PROC(4, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 10);                    \\\n    MATRIX_VAR_PROC(4, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 11);                    \\\n    MATRIX_VAR_PROC(4, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 12);                    \\\n    MATRIX_VAR_PROC(4, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 13);                    \\\n    MATRIX_VAR_PROC(4, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 14);                    \\\n    MATRIX_VAR_PROC(4, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 15);                    \\\n    MATRIX_VAR_PROC(4, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=32, N=1)\n#define MATRIX_INT64_ITER_32X1_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi);                \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \\\n    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \\\n    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \\\n    _PROC(ymm_mi, ymm_qi, _RES##_2_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \\\n    _PROC(ymm_mi, ymm_qi, _RES##_3_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \\\n    _PROC(ymm_mi, ymm_qi, _RES##_4_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \\\n    _PROC(ymm_mi, ymm_qi, _RES##_5_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \\\n    _PROC(ymm_mi, ymm_qi, _RES##_6_0)                          \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \\\n    _PROC(ymm_mi, ymm_qi, _RES##_7_0)                          \\\n  }\n\n//! Iterative process of computing distance (INT64, M=32, N=2)\n#define MATRIX_INT64_ITER_32X2_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \\\n    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \\\n    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \\\n    MATRIX_VAR_PROC(1, 2, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \\\n    MATRIX_VAR_PROC(1, 2, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \\\n    MATRIX_VAR_PROC(1, 2, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \\\n    MATRIX_VAR_PROC(1, 2, 4, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \\\n    MATRIX_VAR_PROC(1, 2, 5, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \\\n    MATRIX_VAR_PROC(1, 2, 6, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \\\n    MATRIX_VAR_PROC(1, 2, 7, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=32, N=4)\n#define MATRIX_INT64_ITER_32X4_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \\\n    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \\\n    __m256i ymm_qi_2 = _mm256_broadcast_si64(qi + 2);          \\\n    __m256i ymm_qi_3 = _mm256_broadcast_si64(qi + 3);          \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \\\n    MATRIX_VAR_PROC(1, 4, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \\\n    MATRIX_VAR_PROC(1, 4, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \\\n    MATRIX_VAR_PROC(1, 4, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \\\n    MATRIX_VAR_PROC(1, 4, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \\\n    MATRIX_VAR_PROC(1, 4, 4, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \\\n    MATRIX_VAR_PROC(1, 4, 5, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \\\n    MATRIX_VAR_PROC(1, 4, 6, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \\\n    MATRIX_VAR_PROC(1, 4, 7, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=32, N=8)\n#define MATRIX_INT64_ITER_32X8_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                            \\\n    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \\\n    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \\\n    __m256i ymm_qi_2 = _mm256_broadcast_si64(qi + 2);          \\\n    __m256i ymm_qi_3 = _mm256_broadcast_si64(qi + 3);          \\\n    __m256i ymm_qi_4 = _mm256_broadcast_si64(qi + 4);          \\\n    __m256i ymm_qi_5 = _mm256_broadcast_si64(qi + 5);          \\\n    __m256i ymm_qi_6 = _mm256_broadcast_si64(qi + 6);          \\\n    __m256i ymm_qi_7 = _mm256_broadcast_si64(qi + 7);          \\\n    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \\\n    MATRIX_VAR_PROC(1, 8, 0, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \\\n    MATRIX_VAR_PROC(1, 8, 1, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \\\n    MATRIX_VAR_PROC(1, 8, 2, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \\\n    MATRIX_VAR_PROC(1, 8, 3, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \\\n    MATRIX_VAR_PROC(1, 8, 4, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \\\n    MATRIX_VAR_PROC(1, 8, 5, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \\\n    MATRIX_VAR_PROC(1, 8, 6, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \\\n    MATRIX_VAR_PROC(1, 8, 7, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=32, N=16)\n#define MATRIX_INT64_ITER_32X16_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));        \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));        \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));       \\\n    __m256i ymm_mi_4 = _LOAD((const __m256i *)(mi + 16));       \\\n    __m256i ymm_mi_5 = _LOAD((const __m256i *)(mi + 20));       \\\n    __m256i ymm_mi_6 = _LOAD((const __m256i *)(mi + 24));       \\\n    __m256i ymm_mi_7 = _LOAD((const __m256i *)(mi + 28));       \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);             \\\n    MATRIX_VAR_PROC(8, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 4);                     \\\n    MATRIX_VAR_PROC(8, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 5);                     \\\n    MATRIX_VAR_PROC(8, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 6);                     \\\n    MATRIX_VAR_PROC(8, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 7);                     \\\n    MATRIX_VAR_PROC(8, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 8);                     \\\n    MATRIX_VAR_PROC(8, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 9);                     \\\n    MATRIX_VAR_PROC(8, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 10);                    \\\n    MATRIX_VAR_PROC(8, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 11);                    \\\n    MATRIX_VAR_PROC(8, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 12);                    \\\n    MATRIX_VAR_PROC(8, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 13);                    \\\n    MATRIX_VAR_PROC(8, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 14);                    \\\n    MATRIX_VAR_PROC(8, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 15);                    \\\n    MATRIX_VAR_PROC(8, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n\n//! Iterative process of computing distance (INT64, M=32, N=32)\n#define MATRIX_INT64_ITER_32X32_AVX(mi, qi, _RES, _LOAD, _PROC) \\\n  {                                                             \\\n    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \\\n    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));        \\\n    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));        \\\n    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));       \\\n    __m256i ymm_mi_4 = _LOAD((const __m256i *)(mi + 16));       \\\n    __m256i ymm_mi_5 = _LOAD((const __m256i *)(mi + 20));       \\\n    __m256i ymm_mi_6 = _LOAD((const __m256i *)(mi + 24));       \\\n    __m256i ymm_mi_7 = _LOAD((const __m256i *)(mi + 28));       \\\n    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);             \\\n    MATRIX_VAR_PROC(8, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 1);                     \\\n    MATRIX_VAR_PROC(8, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 2);                     \\\n    MATRIX_VAR_PROC(8, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 3);                     \\\n    MATRIX_VAR_PROC(8, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 4);                     \\\n    MATRIX_VAR_PROC(8, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 5);                     \\\n    MATRIX_VAR_PROC(8, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 6);                     \\\n    MATRIX_VAR_PROC(8, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 7);                     \\\n    MATRIX_VAR_PROC(8, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 8);                     \\\n    MATRIX_VAR_PROC(8, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 9);                     \\\n    MATRIX_VAR_PROC(8, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \\\n    ymm_qi = _mm256_broadcast_si64(qi + 10);                    \\\n    MATRIX_VAR_PROC(8, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 11);                    \\\n    MATRIX_VAR_PROC(8, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 12);                    \\\n    MATRIX_VAR_PROC(8, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 13);                    \\\n    MATRIX_VAR_PROC(8, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 14);                    \\\n    MATRIX_VAR_PROC(8, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 15);                    \\\n    MATRIX_VAR_PROC(8, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 16);                    \\\n    MATRIX_VAR_PROC(8, 1, 16, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 17);                    \\\n    MATRIX_VAR_PROC(8, 1, 17, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 18);                    \\\n    MATRIX_VAR_PROC(8, 1, 18, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 19);                    \\\n    MATRIX_VAR_PROC(8, 1, 19, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 20);                    \\\n    MATRIX_VAR_PROC(8, 1, 20, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 21);                    \\\n    MATRIX_VAR_PROC(8, 1, 21, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 22);                    \\\n    MATRIX_VAR_PROC(8, 1, 22, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 23);                    \\\n    MATRIX_VAR_PROC(8, 1, 23, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 24);                    \\\n    MATRIX_VAR_PROC(8, 1, 24, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 25);                    \\\n    MATRIX_VAR_PROC(8, 1, 25, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 26);                    \\\n    MATRIX_VAR_PROC(8, 1, 26, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 27);                    \\\n    MATRIX_VAR_PROC(8, 1, 27, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 28);                    \\\n    MATRIX_VAR_PROC(8, 1, 28, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 29);                    \\\n    MATRIX_VAR_PROC(8, 1, 29, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 30);                    \\\n    MATRIX_VAR_PROC(8, 1, 30, ymm_mi, ymm_qi, _RES, _PROC)      \\\n    ymm_qi = _mm256_broadcast_si64(qi + 31);                    \\\n    MATRIX_VAR_PROC(8, 1, 31, ymm_mi, ymm_qi, _RES, _PROC)      \\\n  }\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_mips_utility.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n//! Calculate Fused-Multiply-Add (AVX512)\n#define FMA_FP32_AVX512(zmm_m, zmm_q, zmm_sum) \\\n  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_q, zmm_sum);\n\n#define FMA_MASK_FP32_AVX512(zmm_m, zmm_q, zmm_sum, mask) \\\n  zmm_sum = _mm512_mask3_fmadd_ps(zmm_m, zmm_q, zmm_sum, mask);\n\n#define HorizontalAdd_FP16_NEON(v) \\\n  vaddvq_f32(vaddq_f32(vcvt_f32_f16(vget_low_f16(v)), vcvt_high_f32_f16(v)))\n\n#define HorizontalAdd_FP32_V512_TO_V256(zmm) \\\n  _mm256_add_ps(                             \\\n      _mm512_castps512_ps256(zmm),           \\\n      _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(zmm), 1)))\n\n//! Calculate Fused-Multiply-Add (AVX, FP16)\n#define FMA_FP16_GENERAL(lhs, rhs, sum, norm1, norm2) \\\n  {                                                   \\\n    float v1 = lhs;                                   \\\n    float v2 = rhs;                                   \\\n    sum += v1 * v2;                                   \\\n    norm1 += v1 * v1;                                 \\\n    norm2 += v2 * v2;                                 \\\n  }\n\n//! Calculate Fused-Multiply-Add (GENERAL)\n#define FMA_FP32_GENERAL(lhs, rhs, sum, norm1, norm2) \\\n  {                                                   \\\n    sum += (lhs) * (rhs);                             \\\n    norm1 += (lhs) * (lhs);                           \\\n    norm2 += (rhs) * (rhs);                           \\\n  }\n\n#if defined(__SSE4_1__)\n//! Four-bits Convert Table\nstatic const AILEGO_ALIGNED(32) int8_t Int4ConvertTable[32] = {\n    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1,\n    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1};\n#endif  // __SSE4_1__\n\n#if defined(__SSE4_1__)\nstatic const __m128i MASK_INT4_SSE = _mm_set1_epi32(0x0f0f0f0f);\nstatic const __m128i ONES_INT16_SSE = _mm_set1_epi32(0x00010001);\nstatic const __m128i INT4_LOOKUP_SSE =\n    _mm_load_si128((const __m128i *)Int4ConvertTable);\n#endif  // __SSE4_1__\n\n#if defined(__AVX2__)\nstatic const __m256i MASK_INT4_AVX = _mm256_set1_epi32(0x0f0f0f0f);\nstatic const __m256i ONES_INT16_AVX = _mm256_set1_epi32(0x00010001);\nstatic const __m256i INT4_LOOKUP_AVX =\n    _mm256_load_si256((const __m256i *)Int4ConvertTable);\n#endif  // __AVX2__\n\n//! Calculate Fused-Multiply-Add (GENERAL)\n#define FMA_INT4_GENERAL(lhs, rhs, sum, norm1, norm2)                   \\\n  {                                                                     \\\n    sum += Int4MulTable[(((lhs) << 4) & 0xf0) | (((rhs) >> 0) & 0xf)] + \\\n           Int4MulTable[(((lhs) >> 0) & 0xf0) | (((rhs) >> 4) & 0xf)];  \\\n    norm1 += static_cast<float>(                                        \\\n        ((int8_t)((lhs) << 4) >> 4) * ((int8_t)((lhs) << 4) >> 4) +     \\\n        ((int8_t)((lhs) & 0xf0) >> 4) * ((int8_t)((lhs) & 0xf0) >> 4)); \\\n    norm2 += static_cast<float>(                                        \\\n        ((int8_t)((rhs) << 4) >> 4) * ((int8_t)((rhs) << 4) >> 4) +     \\\n        ((int8_t)((rhs) & 0xf0) >> 4) * ((int8_t)((rhs) & 0xf0) >> 4)); \\\n  }\n\n\n//! Compute the distance between matrix and query (SSE)\n#define FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum_0, xmm_sum_norm1, \\\n                          xmm_sum_norm2)                              \\\n  {                                                                   \\\n    __m128i xmm_lhs_0 = _mm_shuffle_epi8(                             \\\n        INT4_LOOKUP_SSE, _mm_and_si128((xmm_lhs), MASK_INT4_SSE));    \\\n    __m128i xmm_rhs_0 = _mm_shuffle_epi8(                             \\\n        INT4_LOOKUP_SSE, _mm_and_si128((xmm_rhs), MASK_INT4_SSE));    \\\n    __m128i xmm_lhs_1 = _mm_shuffle_epi8(                             \\\n        INT4_LOOKUP_SSE,                                              \\\n        _mm_and_si128(_mm_srli_epi32((xmm_lhs), 4), MASK_INT4_SSE));  \\\n    __m128i xmm_rhs_1 = _mm_shuffle_epi8(                             \\\n        INT4_LOOKUP_SSE,                                              \\\n        _mm_and_si128(_mm_srli_epi32((xmm_rhs), 4), MASK_INT4_SSE));  \\\n    FMA_INT8_SSE(xmm_lhs_0, xmm_rhs_0, xmm_sum_0);                    \\\n    FMA_INT8_SSE(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);                \\\n    FMA_INT8_SSE(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);                \\\n    FMA_INT8_SSE(xmm_lhs_1, xmm_rhs_1, xmm_sum_0);                    \\\n    FMA_INT8_SSE(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);                \\\n    FMA_INT8_SSE(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);                \\\n  }\n\n//! Calculate Fused-Multiply-Add (GENERAL)\n#define FMA_INT8_GENERAL(lhs, rhs, sum, norm1, norm2) \\\n  {                                                   \\\n    sum += static_cast<float>(lhs * rhs);             \\\n    norm1 += static_cast<float>(lhs * lhs);           \\\n    norm2 += static_cast<float>(rhs * rhs);           \\\n  }\n\n//! Calculate Fused-Multiply-Add (SSE)\n#define FMA_INT8_SSE(xmm_lhs, xmm_rhs, xmm_sum)                          \\\n  xmm_sum = _mm_add_epi32(                                               \\\n      _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),            \\\n                                       _mm_sign_epi8(xmm_lhs, xmm_rhs)), \\\n                     ONES_INT16_SSE),                                    \\\n      xmm_sum)\n\n//! Calculate Fused-Multiply-Add (AVX)\n#define FMA_INT8_AVX(ymm_lhs, ymm_rhs, ymm_sum)                     \\\n  ymm_sum = _mm256_add_epi32(                                       \\\n      _mm256_madd_epi16(                                            \\\n          _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_rhs),            \\\n                               _mm256_sign_epi8(ymm_lhs, ymm_rhs)), \\\n          ONES_INT16_AVX),                                          \\\n      ymm_sum)\n\n#define FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_rhs, ymm_sum)                   \\\n  ymm_sum = _mm256_add_epi32(                                                \\\n      _mm256_set_m128i(                                                      \\\n          _mm_setzero_si128(),                                               \\\n          _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),            \\\n                                           _mm_sign_epi8(xmm_lhs, xmm_rhs)), \\\n                         ONES_INT16_SSE)),                                   \\\n      ymm_sum)\n\n//! Compute the distance between matrix and query (AVX)\n#define FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum_0, ymm_sum1,           \\\n                          ymm_sum_norm1, ymm_sum_norm2)                    \\\n  {                                                                        \\\n    __m256i ymm_lhs_0 = _mm256_shuffle_epi8(                               \\\n        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_lhs), MASK_INT4_AVX));      \\\n    __m256i ymm_rhs_0 = _mm256_shuffle_epi8(                               \\\n        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_rhs), MASK_INT4_AVX));      \\\n    __m256i ymm_lhs_1 = _mm256_shuffle_epi8(                               \\\n        INT4_LOOKUP_AVX,                                                   \\\n        _mm256_and_si256(_mm256_srli_epi32((ymm_lhs), 4), MASK_INT4_AVX)); \\\n    __m256i ymm_rhs_1 = _mm256_shuffle_epi8(                               \\\n        INT4_LOOKUP_AVX,                                                   \\\n        _mm256_and_si256(_mm256_srli_epi32((ymm_rhs), 4), MASK_INT4_AVX)); \\\n    FMA_INT8_AVX(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);                         \\\n    FMA_INT8_AVX(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);                         \\\n    FMA_INT8_AVX(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);                     \\\n    FMA_INT8_AVX(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);                     \\\n    FMA_INT8_AVX(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);                     \\\n    FMA_INT8_AVX(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);                     \\\n  }\n\n"
  },
  {
    "path": "src/ailego/math/distance_matrix_popcnt.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_int32.i\"\n#include \"distance_matrix_int64.i\"\n#include \"matrix_utility.i\"\n\n//! Calculate population count (UINT32 Permute 1 SSE)\n#define POPCNT_UINT32_PERMUTE1_SSE(v, ...) \\\n  _mm_add_epi16(_mm_srli_epi16(v, 8), _mm_and_si128(v, _mm_set1_epi16(0xff)))\n\n//! Calculate population count (UINT32 Permute 2 SSE)\n#define POPCNT_UINT32_PERMUTE2_SSE(v, ...) \\\n  _mm_add_epi32(_mm_srli_epi32(v, 16), _mm_and_si128(v, _mm_set1_epi32(0xffff)))\n\n//! Calculate population count (UINT32 Permute 1 AVX)\n#define POPCNT_UINT32_PERMUTE1_AVX(v, ...)  \\\n  _mm256_add_epi16(_mm256_srli_epi16(v, 8), \\\n                   _mm256_and_si256(v, _mm256_set1_epi16(0xff)))\n\n//! Calculate population count (UINT32 Permute 2 AVX)\n#define POPCNT_UINT32_PERMUTE2_AVX(v, ...)   \\\n  _mm256_add_epi32(_mm256_srli_epi32(v, 16), \\\n                   _mm256_and_si256(v, _mm256_set1_epi32(0xffff)))\n\n//! Calculate population count (UINT64 Permute AVX)\n#define POPCNT_UINT64_PERMUTE_AVX(v, ...) \\\n  _mm256_sad_epu8(v, _mm256_setzero_si256())\n\n//! Compute the distance between matrix and query (UINT32, M=2, N=1)\n#define POPCNT_UINT32_2X1_SSE(m, q, cnt, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \\\n  const uint32_t *qe_0 = q + ((cnt >> 2) << 2);                              \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 2) : qe_0);           \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 2) << 2) : qe_0);       \\\n  const uint32_t *qe_3 = q + cnt;                                            \\\n  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {              \\\n    for (; q != qe_1; m += 8, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_si128,               \\\n                                POPCNT_UINT32_STEP1_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \\\n    for (; q != qe_2; m += 8, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_si128,               \\\n                                POPCNT_UINT32_STEP2_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \\\n    for (; q != qe_0; m += 8, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_si128,               \\\n                                POPCNT_UINT32_STEP3_SSE)                     \\\n    }                                                                        \\\n    if (qe_3 >= qe_0 + 2) {                                                  \\\n      __m128i xmm_m = _mm_load_si128((const __m128i *)(m));                  \\\n      __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                 \\\n      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                     \\\n      m += 4;                                                                \\\n      q += 2;                                                                \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; q != qe_1; m += 8, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_si128,              \\\n                                POPCNT_UINT32_STEP1_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \\\n    for (; q != qe_2; m += 8, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_si128,              \\\n                                POPCNT_UINT32_STEP2_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \\\n    for (; q != qe_0; m += 8, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_si128,              \\\n                                POPCNT_UINT32_STEP3_SSE)                     \\\n    }                                                                        \\\n    if (qe_3 >= qe_0 + 2) {                                                  \\\n      __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));                 \\\n      __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                 \\\n      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                     \\\n      m += 4;                                                                \\\n      q += 2;                                                                \\\n    }                                                                        \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_0_1);                     \\\n  xmm_sum_0_0 = _mm_add_epi32(                                               \\\n      xmm_sum_0_0, _mm_shuffle_epi32(xmm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \\\n  if (q != qe_3) {                                                           \\\n    __m128i xmm_m = _mm_set_epi32(0, 0, m[1], m[0]);                         \\\n    __m128i xmm_q = _mm_broadcast_si32(q);                                   \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \\\n  }                                                                          \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));\n\n//! Compute the distance between matrix and query (UINT32, M=2, N=2)\n#define POPCNT_UINT32_2X2_SSE(m, q, cnt, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \\\n  const uint32_t *qe_0 = q + ((cnt >> 1) << 2);                              \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 2) : qe_0);           \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 2) : qe_0);       \\\n  const uint32_t *qe_3 = q + (cnt << 1);                                     \\\n  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {              \\\n    for (; q != qe_1; m += 4, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_si128,               \\\n                                POPCNT_UINT32_STEP1_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \\\n    for (; q != qe_2; m += 4, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_si128,               \\\n                                POPCNT_UINT32_STEP2_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \\\n    for (; q != qe_0; m += 4, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_si128,               \\\n                                POPCNT_UINT32_STEP3_SSE)                     \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; q != qe_1; m += 4, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_si128,              \\\n                                POPCNT_UINT32_STEP1_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \\\n    for (; q != qe_2; m += 4, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_si128,              \\\n                                POPCNT_UINT32_STEP2_SSE)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \\\n    for (; q != qe_0; m += 4, q += 4) {                                      \\\n      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_si128,              \\\n                                POPCNT_UINT32_STEP3_SSE)                     \\\n    }                                                                        \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \\\n                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \\\n  if (q != qe_3) {                                                           \\\n    __m128i xmm_m = _mm_set_epi32(m[1], m[0], m[1], m[0]);                   \\\n    __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                   \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=4, N=1)\n#define POPCNT_UINT32_4X1_SSE(m, q, cnt, out, _NORM)                   \\\n  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())         \\\n  const uint32_t *qe_0 = q + ((cnt >> 1) << 1);                        \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 1) : qe_0);     \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 1) : qe_0); \\\n  const uint32_t *qe_3 = q + cnt;                                      \\\n  if (((uintptr_t)m & 0xf) == 0) {                                     \\\n    for (; q != qe_1; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_si128,         \\\n                                POPCNT_UINT32_STEP1_SSE)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)      \\\n    for (; q != qe_2; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_si128,         \\\n                                POPCNT_UINT32_STEP2_SSE)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)      \\\n    for (; q != qe_0; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_si128,         \\\n                                POPCNT_UINT32_STEP3_SSE)               \\\n    }                                                                  \\\n    if (q != qe_3) {                                                   \\\n      __m128i xmm_m = _mm_load_si128((const __m128i *)(m));            \\\n      __m128i xmm_q = _mm_broadcast_si32(q);                           \\\n      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)               \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (; q != qe_1; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_si128,        \\\n                                POPCNT_UINT32_STEP1_SSE)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)      \\\n    for (; q != qe_2; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_si128,        \\\n                                POPCNT_UINT32_STEP2_SSE)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)      \\\n    for (; q != qe_0; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_si128,        \\\n                                POPCNT_UINT32_STEP3_SSE)               \\\n    }                                                                  \\\n    if (q != qe_3) {                                                   \\\n      __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));           \\\n      __m128i xmm_q = _mm_broadcast_si32(q);                           \\\n      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)               \\\n    }                                                                  \\\n  }                                                                    \\\n  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_1_0);               \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=4, N=2)\n#define POPCNT_UINT32_4X2_SSE(m, q, cnt, out, _NORM)              \\\n  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 1);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 4, q += 2) {                           \\\n      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 4, q += 2) {                           \\\n      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 4, q += 2) {                           \\\n      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 4, q += 2) {                           \\\n      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 4, q += 2) {                           \\\n      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 4, q += 2) {                           \\\n      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=4, N=4)\n#define POPCNT_UINT32_4X4_SSE(m, q, cnt, out, _NORM)              \\\n  MATRIX_VAR_INIT(1, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 2);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 4, q += 4) {                           \\\n      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 4, q += 4) {                           \\\n      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 4, q += 4) {                           \\\n      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 4, q += 4) {                           \\\n      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 4, q += 4) {                           \\\n      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 4, q += 4) {                           \\\n      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=1)\n#define POPCNT_UINT32_8X1_SSE(m, q, cnt, out, _NORM)              \\\n  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + cnt;                                 \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);              \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);          \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 8, ++q) {                              \\\n      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, ++q) {                              \\\n      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, ++q) {                              \\\n      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 8, ++q) {                              \\\n      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, ++q) {                              \\\n      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, ++q) {                              \\\n      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=2)\n#define POPCNT_UINT32_8X2_SSE(m, q, cnt, out, _NORM)              \\\n  MATRIX_VAR_INIT(2, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 1);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 8, q += 2) {                           \\\n      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, q += 2) {                           \\\n      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, q += 2) {                           \\\n      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 8, q += 2) {                           \\\n      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, q += 2) {                           \\\n      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, q += 2) {                           \\\n      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=4)\n#define POPCNT_UINT32_8X4_SSE(m, q, cnt, out, _NORM)              \\\n  MATRIX_VAR_INIT(2, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 2);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 8, q += 4) {                           \\\n      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, q += 4) {                           \\\n      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, q += 4) {                           \\\n      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 8, q += 4) {                           \\\n      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, q += 4) {                           \\\n      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, q += 4) {                           \\\n      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=8)\n#define POPCNT_UINT32_8X8_SSE(m, q, cnt, out, _NORM)              \\\n  MATRIX_VAR_INIT(2, 8, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 3);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 8, q += 8) {                           \\\n      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, q += 8) {                           \\\n      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, q += 8) {                           \\\n      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_si128,    \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 8, q += 8) {                           \\\n      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP1_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 8, q += 8) {                           \\\n      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP2_SSE)          \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 8, q += 8) {                           \\\n      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_si128,   \\\n                                POPCNT_UINT32_STEP3_SSE)          \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=1)\n#define POPCNT_UINT32_16X1_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(4, 1, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + cnt;                                 \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);              \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);          \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 16, ++q) {                             \\\n      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, ++q) {                             \\\n      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, ++q) {                             \\\n      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 16, ++q) {                             \\\n      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, ++q) {                             \\\n      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, ++q) {                             \\\n      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=2)\n#define POPCNT_UINT32_16X2_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(4, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 1);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 16, q += 2) {                          \\\n      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 2) {                          \\\n      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 2) {                          \\\n      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 16, q += 2) {                          \\\n      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 2) {                          \\\n      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 2) {                          \\\n      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=4)\n#define POPCNT_UINT32_16X4_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(4, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 2);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 16, q += 4) {                          \\\n      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 4) {                          \\\n      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 4) {                          \\\n      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 16, q += 4) {                          \\\n      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 4) {                          \\\n      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 4) {                          \\\n      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=8)\n#define POPCNT_UINT32_16X8_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(4, 8, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 3);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 16, q += 8) {                          \\\n      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 8) {                          \\\n      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 8) {                          \\\n      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 16, q += 8) {                          \\\n      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 8) {                          \\\n      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 8) {                          \\\n      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=16)\n#define POPCNT_UINT32_16X16_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(4, 16, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 4);                           \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);        \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);    \\\n  if (((uintptr_t)m & 0xf) == 0) {                                 \\\n    for (; q != qe_1; m += 16, q += 16) {                          \\\n      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 16) {                          \\\n      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 16) {                          \\\n      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 16, q += 16) {                          \\\n      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 16, q += 16) {                          \\\n      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 16, q += 16) {                          \\\n      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=1)\n#define POPCNT_UINT32_32X1_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(8, 1, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + cnt;                                 \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);              \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);          \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 32, ++q) {                             \\\n      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, ++q) {                             \\\n      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, ++q) {                             \\\n      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 32, ++q) {                             \\\n      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, ++q) {                             \\\n      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, ++q) {                             \\\n      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=2)\n#define POPCNT_UINT32_32X2_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(8, 2, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 1);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 32, q += 2) {                          \\\n      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 2) {                          \\\n      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 2) {                          \\\n      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 32, q += 2) {                          \\\n      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 2) {                          \\\n      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 2) {                          \\\n      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=4)\n#define POPCNT_UINT32_32X4_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(8, 4, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 2);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 32, q += 4) {                          \\\n      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 4) {                          \\\n      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 4) {                          \\\n      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 32, q += 4) {                          \\\n      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 4) {                          \\\n      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 4) {                          \\\n      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=8)\n#define POPCNT_UINT32_32X8_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(8, 8, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 3);                          \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);       \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);   \\\n  if (((uintptr_t)m & 0xf) == 0) {                                \\\n    for (; q != qe_1; m += 32, q += 8) {                          \\\n      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 8) {                          \\\n      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 8) {                          \\\n      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  } else {                                                        \\\n    for (; q != qe_1; m += 32, q += 8) {                          \\\n      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 8) {                          \\\n      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                             \\\n    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 8) {                          \\\n      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                 POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                             \\\n  }                                                               \\\n  if (((uintptr_t)out & 0xf) == 0) {                              \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                        \\\n    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=16)\n#define POPCNT_UINT32_32X16_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(8, 16, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 4);                           \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);        \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);    \\\n  if (((uintptr_t)m & 0xf) == 0) {                                 \\\n    for (; q != qe_1; m += 32, q += 16) {                          \\\n      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 16) {                          \\\n      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 16) {                          \\\n      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 32, q += 16) {                          \\\n      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 16) {                          \\\n      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 16) {                          \\\n      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=32)\n#define POPCNT_UINT32_32X32_SSE(m, q, cnt, out, _NORM)             \\\n  MATRIX_VAR_INIT(8, 32, __m128i, xmm_sum, _mm_setzero_si128())    \\\n  const uint32_t *qe_0 = q + (cnt << 5);                           \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 5) : qe_0);        \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 5) : qe_0);    \\\n  if (((uintptr_t)m & 0xf) == 0) {                                 \\\n    for (; q != qe_1; m += 32, q += 32) {                          \\\n      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 32) {                          \\\n      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 32) {                          \\\n      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_si128,   \\\n                                  POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 32, q += 32) {                          \\\n      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP1_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \\\n    for (; q != qe_2; m += 32, q += 32) {                          \\\n      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP2_SSE)         \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \\\n    for (; q != qe_0; m += 32, q += 32) {                          \\\n      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_si128,  \\\n                                  POPCNT_UINT32_STEP3_SSE)         \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)  \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=2, N=1)\n#define POPCNT_UINT32_2X1_AVX(m, q, cnt, out, _NORM)                           \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())              \\\n  const uint32_t *qe_0 = q + ((cnt >> 2) << 2);                                \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 2) : qe_0);             \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 2) << 2) : qe_0);         \\\n  const uint32_t *qe_3 = q + cnt;                                              \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                            \\\n    for (; q != qe_1; m += 8, q += 4) {                                        \\\n      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,              \\\n                                POPCNT_UINT32_STEP1_AVX)                       \\\n    }                                                                          \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)              \\\n    for (; q != qe_2; m += 8, q += 4) {                                        \\\n      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,              \\\n                                POPCNT_UINT32_STEP2_AVX)                       \\\n    }                                                                          \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)              \\\n    for (; q != qe_0; m += 8, q += 4) {                                        \\\n      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,              \\\n                                POPCNT_UINT32_STEP3_AVX)                       \\\n    }                                                                          \\\n  } else {                                                                     \\\n    for (; q != qe_1; m += 8, q += 4) {                                        \\\n      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,             \\\n                                POPCNT_UINT32_STEP1_AVX)                       \\\n    }                                                                          \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)              \\\n    for (; q != qe_2; m += 8, q += 4) {                                        \\\n      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,             \\\n                                POPCNT_UINT32_STEP2_AVX)                       \\\n    }                                                                          \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)              \\\n    for (; q != qe_0; m += 8, q += 4) {                                        \\\n      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,             \\\n                                POPCNT_UINT32_STEP3_AVX)                       \\\n    }                                                                          \\\n  }                                                                            \\\n  __m128i xmm_sum_0 = _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),       \\\n                                    _mm256_extracti128_si256(ymm_sum_0_0, 1)); \\\n  if (qe_3 >= qe_0 + 2) {                                                      \\\n    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));                     \\\n    __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                     \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0)                           \\\n    m += 4;                                                                    \\\n    q += 2;                                                                    \\\n  }                                                                            \\\n  xmm_sum_0 = _mm_add_epi32(                                                   \\\n      xmm_sum_0, _mm_shuffle_epi32(xmm_sum_0, _MM_SHUFFLE(0, 0, 3, 2)));       \\\n  if (q != qe_3) {                                                             \\\n    __m128i xmm_m = _mm_set_epi32(0, 0, m[1], m[0]);                           \\\n    __m128i xmm_q = _mm_broadcast_si32(q);                                     \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0)                           \\\n  }                                                                            \\\n  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0));\n\n//! Compute the distance between matrix and query (UINT32, M=2, N=2)\n#define POPCNT_UINT32_2X2_AVX(m, q, cnt, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \\\n  const uint32_t *qe_0 = q + ((cnt >> 2) << 3);                              \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 3) : qe_0);           \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 2) << 3) : qe_0);       \\\n  const uint32_t *qe_3 = q + (cnt << 1);                                     \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {            \\\n    for (; q != qe_1; m += 8, q += 8) {                                      \\\n      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \\\n                                POPCNT_UINT32_STEP1_AVX)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)            \\\n    for (; q != qe_2; m += 8, q += 8) {                                      \\\n      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \\\n                                POPCNT_UINT32_STEP2_AVX)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)            \\\n    for (; q != qe_0; m += 8, q += 8) {                                      \\\n      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \\\n                                POPCNT_UINT32_STEP3_AVX)                     \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; q != qe_1; m += 8, q += 8) {                                      \\\n      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \\\n                                POPCNT_UINT32_STEP1_AVX)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)            \\\n    for (; q != qe_2; m += 8, q += 8) {                                      \\\n      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \\\n                                POPCNT_UINT32_STEP2_AVX)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)            \\\n    for (; q != qe_0; m += 8, q += 8) {                                      \\\n      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \\\n                                POPCNT_UINT32_STEP3_AVX)                     \\\n    }                                                                        \\\n  }                                                                          \\\n  __m128i xmm_sum_0_0 =                                                      \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),                     \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));               \\\n  __m128i xmm_sum_0_1 =                                                      \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),                     \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));               \\\n  if (qe_3 >= qe_0 + 4) {                                                    \\\n    __m128i xmm_q = _mm_loadu_si128((const __m128i *)(q));                   \\\n    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));                   \\\n    __m128i xmm_p = _mm_shuffle_epi32(xmm_q, _MM_SHUFFLE(2, 2, 0, 0));       \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_p, xmm_sum_0_0)                       \\\n    xmm_p = _mm_shuffle_epi32(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));               \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_p, xmm_sum_0_1)                       \\\n    m += 4;                                                                  \\\n    q += 4;                                                                  \\\n  }                                                                          \\\n  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \\\n                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \\\n  if (q != qe_3) {                                                           \\\n    __m128i xmm_m = _mm_set_epi32(m[1], m[0], m[1], m[0]);                   \\\n    __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                   \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=4, N=1)\n#define POPCNT_UINT32_4X1_AVX(m, q, cnt, out, _NORM)                   \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())      \\\n  const uint32_t *qe_0 = q + ((cnt >> 1) << 1);                        \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 1) : qe_0);     \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 1) : qe_0); \\\n  const uint32_t *qe_3 = q + cnt;                                      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (; q != qe_1; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \\\n    for (; q != qe_2; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP2_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \\\n    for (; q != qe_0; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP3_AVX)               \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (; q != qe_1; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \\\n    for (; q != qe_2; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP2_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \\\n    for (; q != qe_0; m += 8, q += 2) {                                \\\n      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP3_AVX)               \\\n    }                                                                  \\\n  }                                                                    \\\n  __m128i xmm_sum_0_0 =                                                \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),               \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));         \\\n  if (q != qe_3) {                                                     \\\n    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));             \\\n    __m128i xmm_q = _mm_broadcast_si32(q);                             \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                 \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=4, N=2)\n#define POPCNT_UINT32_4X2_AVX(m, q, cnt, out, _NORM)                   \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())      \\\n  const uint32_t *qe_0 = q + ((cnt >> 1) << 2);                        \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 2) : qe_0);     \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 2) : qe_0); \\\n  const uint32_t *qe_3 = q + (cnt << 1);                               \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                    \\\n    for (; q != qe_1; m += 8, q += 4) {                                \\\n      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \\\n    for (; q != qe_2; m += 8, q += 4) {                                \\\n      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP2_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \\\n    for (; q != qe_0; m += 8, q += 4) {                                \\\n      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP3_AVX)               \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (; q != qe_1; m += 8, q += 4) {                                \\\n      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \\\n    for (; q != qe_2; m += 8, q += 4) {                                \\\n      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP2_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \\\n    for (; q != qe_0; m += 8, q += 4) {                                \\\n      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP3_AVX)               \\\n    }                                                                  \\\n  }                                                                    \\\n  __m128i xmm_sum_0_0 =                                                \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),               \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));         \\\n  __m128i xmm_sum_0_1 =                                                \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),               \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));         \\\n  if (q != qe_3) {                                                     \\\n    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));             \\\n    __m128i xmm_q = _mm_broadcast_si32(q);                             \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                 \\\n    xmm_q = _mm_broadcast_si32(q + 1);                                 \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_1)                 \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=4, N=4)\n#define POPCNT_UINT32_4X4_AVX(m, q, cnt, out, _NORM)                   \\\n  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())      \\\n  const uint32_t *qe_0 = q + ((cnt >> 1) << 3);                        \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 3) : qe_0);     \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 3) : qe_0); \\\n  const uint32_t *qe_3 = q + (cnt << 2);                               \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {      \\\n    for (; q != qe_1; m += 8, q += 8) {                                \\\n      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \\\n    for (; q != qe_2; m += 8, q += 8) {                                \\\n      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP2_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \\\n    for (; q != qe_0; m += 8, q += 8) {                                \\\n      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT32_STEP3_AVX)               \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (; q != qe_1; m += 8, q += 8) {                                \\\n      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \\\n    for (; q != qe_2; m += 8, q += 8) {                                \\\n      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP2_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \\\n    for (; q != qe_0; m += 8, q += 8) {                                \\\n      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT32_STEP3_AVX)               \\\n    }                                                                  \\\n  }                                                                    \\\n  __m128i xmm_sum_0_0 =                                                \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),               \\\n                    _mm256_extracti128_si256(ymm_sum_0_0, 1));         \\\n  __m128i xmm_sum_0_1 =                                                \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),               \\\n                    _mm256_extracti128_si256(ymm_sum_0_1, 1));         \\\n  __m128i xmm_sum_0_2 =                                                \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_2),               \\\n                    _mm256_extracti128_si256(ymm_sum_0_2, 1));         \\\n  __m128i xmm_sum_0_3 =                                                \\\n      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_3),               \\\n                    _mm256_extracti128_si256(ymm_sum_0_3, 1));         \\\n  if (q != qe_3) {                                                     \\\n    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));             \\\n    __m128i xmm_q = _mm_broadcast_si32(q);                             \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                 \\\n    xmm_q = _mm_broadcast_si32(q + 1);                                 \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_1)                 \\\n    xmm_q = _mm_broadcast_si32(q + 2);                                 \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_2)                 \\\n    xmm_q = _mm_broadcast_si32(q + 3);                                 \\\n    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_3)                 \\\n  }                                                                    \\\n  if (((uintptr_t)out & 0xf) == 0) {                                   \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)       \\\n  } else {                                                             \\\n    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=1)\n#define POPCNT_UINT32_8X1_AVX(m, q, cnt, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + cnt;                                    \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                 \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);             \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 8, ++q) {                                 \\\n      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, ++q) {                                 \\\n      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, ++q) {                                 \\\n      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 8, ++q) {                                 \\\n      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, ++q) {                                 \\\n      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, ++q) {                                 \\\n      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=2)\n#define POPCNT_UINT32_8X2_AVX(m, q, cnt, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 1);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 8, q += 2) {                              \\\n      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, q += 2) {                              \\\n      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, q += 2) {                              \\\n      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 8, q += 2) {                              \\\n      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, q += 2) {                              \\\n      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, q += 2) {                              \\\n      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=4)\n#define POPCNT_UINT32_8X4_AVX(m, q, cnt, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 2);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 8, q += 4) {                              \\\n      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, q += 4) {                              \\\n      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, q += 4) {                              \\\n      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 8, q += 4) {                              \\\n      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, q += 4) {                              \\\n      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, q += 4) {                              \\\n      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=8, N=8)\n#define POPCNT_UINT32_8X8_AVX(m, q, cnt, out, _NORM)                 \\\n  MATRIX_VAR_INIT(1, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 3);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 8, q += 8) {                              \\\n      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, q += 8) {                              \\\n      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, q += 8) {                              \\\n      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,    \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 8, q += 8) {                              \\\n      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP1_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 8, q += 8) {                              \\\n      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP2_AVX)             \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 8, q += 8) {                              \\\n      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \\\n                                POPCNT_UINT32_STEP3_AVX)             \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=1)\n#define POPCNT_UINT32_16X1_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + cnt;                                    \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                 \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);             \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 16, ++q) {                                \\\n      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, ++q) {                                \\\n      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, ++q) {                                \\\n      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 16, ++q) {                                \\\n      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, ++q) {                                \\\n      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, ++q) {                                \\\n      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=2)\n#define POPCNT_UINT32_16X2_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 1);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 16, q += 2) {                             \\\n      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 2) {                             \\\n      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 2) {                             \\\n      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 16, q += 2) {                             \\\n      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 2) {                             \\\n      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 2) {                             \\\n      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=4)\n#define POPCNT_UINT32_16X4_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 2);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 16, q += 4) {                             \\\n      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 4) {                             \\\n      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 4) {                             \\\n      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 16, q += 4) {                             \\\n      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 4) {                             \\\n      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 4) {                             \\\n      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=8)\n#define POPCNT_UINT32_16X8_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 3);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 16, q += 8) {                             \\\n      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 8) {                             \\\n      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 8) {                             \\\n      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 16, q += 8) {                             \\\n      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 8) {                             \\\n      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 8) {                             \\\n      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=16, N=16)\n#define POPCNT_UINT32_16X16_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(2, 16, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 4);                              \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);           \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);       \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                   \\\n    for (; q != qe_1; m += 16, q += 16) {                             \\\n      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 16) {                             \\\n      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 16) {                             \\\n      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (; q != qe_1; m += 16, q += 16) {                             \\\n      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 16, q += 16) {                             \\\n      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 16, q += 16) {                             \\\n      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=1)\n#define POPCNT_UINT32_32X1_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + cnt;                                    \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                 \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);             \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 32, ++q) {                                \\\n      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, ++q) {                                \\\n      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, ++q) {                                \\\n      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 32, ++q) {                                \\\n      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, ++q) {                                \\\n      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, ++q) {                                \\\n      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=2)\n#define POPCNT_UINT32_32X2_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 1);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 32, q += 2) {                             \\\n      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 2) {                             \\\n      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 2) {                             \\\n      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 32, q += 2) {                             \\\n      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 2) {                             \\\n      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 2) {                             \\\n      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=4)\n#define POPCNT_UINT32_32X4_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 2);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 32, q += 4) {                             \\\n      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 4) {                             \\\n      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 4) {                             \\\n      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 32, q += 4) {                             \\\n      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 4) {                             \\\n      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 4) {                             \\\n      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=8)\n#define POPCNT_UINT32_32X8_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 3);                             \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);          \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);      \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 32, q += 8) {                             \\\n      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 8) {                             \\\n      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 8) {                             \\\n      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 32, q += 8) {                             \\\n      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 8) {                             \\\n      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 8) {                             \\\n      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                 POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=16)\n#define POPCNT_UINT32_32X16_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 4);                              \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);           \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);       \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                   \\\n    for (; q != qe_1; m += 32, q += 16) {                             \\\n      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 16) {                             \\\n      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 16) {                             \\\n      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (; q != qe_1; m += 32, q += 16) {                             \\\n      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 16) {                             \\\n      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 16) {                             \\\n      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT32, M=32, N=32)\n#define POPCNT_UINT32_32X32_AVX(m, q, cnt, out, _NORM)                \\\n  MATRIX_VAR_INIT(4, 32, __m256i, ymm_sum, _mm256_setzero_si256())    \\\n  const uint32_t *qe_0 = q + (cnt << 5);                              \\\n  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 5) : qe_0);           \\\n  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 5) : qe_0);       \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                   \\\n    for (; q != qe_1; m += 32, q += 32) {                             \\\n      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 32) {                             \\\n      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 32) {                             \\\n      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,   \\\n                                  POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                 \\\n  } else {                                                            \\\n    for (; q != qe_1; m += 32, q += 32) {                             \\\n      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP1_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \\\n    for (; q != qe_2; m += 32, q += 32) {                             \\\n      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP2_AVX)            \\\n    }                                                                 \\\n    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \\\n    for (; q != qe_0; m += 32, q += 32) {                             \\\n      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \\\n                                  POPCNT_UINT32_STEP3_AVX)            \\\n    }                                                                 \\\n  }                                                                   \\\n  if (((uintptr_t)out & 0x1f) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \\\n  } else {                                                            \\\n    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=2, N=1)\n#define POPCNT_UINT64_2X1_AVX(m, q, cnt, out, _NORM)                   \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())      \\\n  const uint64_t *qe_0 = q + ((cnt >> 2) << 2);                        \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 2) : qe_0);     \\\n  const uint64_t *qe_2 = q + cnt;                                      \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {      \\\n    for (; q != qe_1; m += 8, q += 4) {                                \\\n      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT64_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)       \\\n    for (; q != qe_0; m += 8, q += 4) {                                \\\n      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \\\n                                POPCNT_UINT64_STEP2_AVX)               \\\n    }                                                                  \\\n    if (qe_2 >= qe_0 + 2) {                                            \\\n      __m256i ymm_m = _mm256_load_si256((const __m256i *)(m));         \\\n      __m256i ymm_q = _mm256_set_epi64x(q[1], q[1], q[0], q[0]);       \\\n      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)               \\\n      m += 4;                                                          \\\n      q += 2;                                                          \\\n    }                                                                  \\\n  } else {                                                             \\\n    for (; q != qe_1; m += 8, q += 4) {                                \\\n      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT64_STEP1_AVX)               \\\n    }                                                                  \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)       \\\n    for (; q != qe_0; m += 8, q += 4) {                                \\\n      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \\\n                                POPCNT_UINT64_STEP2_AVX)               \\\n    }                                                                  \\\n    if (qe_2 >= qe_0 + 2) {                                            \\\n      __m256i ymm_m = _mm256_loadu_si256((const __m256i *)(m));        \\\n      __m256i ymm_q = _mm256_set_epi64x(q[1], q[1], q[0], q[0]);       \\\n      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)               \\\n      m += 4;                                                          \\\n      q += 2;                                                          \\\n    }                                                                  \\\n  }                                                                    \\\n  ymm_sum_0_0 = _mm256_add_epi64(ymm_sum_0_0, ymm_sum_0_1);            \\\n  ymm_sum_0_0 = _mm256_add_epi64(                                      \\\n      ymm_sum_0_0,                                                     \\\n      _mm256_permute4x64_epi64(ymm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \\\n  if (q != qe_2) {                                                     \\\n    __m256i ymm_m = _mm256_set_epi64x(0, 0, m[1], m[0]);               \\\n    __m256i ymm_q = _mm256_broadcast_si64(q);                          \\\n    POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)                 \\\n  }                                                                    \\\n  _mm_storel_pi((__m64 *)out, _NORM(ymm_sum_0_0));\n\n//! Compute the distance between matrix and query (UINT64, M=2, N=2)\n#define POPCNT_UINT64_2X2_AVX(m, q, cnt, out, _NORM)                         \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \\\n  const uint64_t *qe_0 = q + ((cnt >> 1) << 2);                              \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 2) : qe_0);           \\\n  const uint64_t *qe_2 = q + (cnt << 1);                                     \\\n  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {            \\\n    for (; q != qe_1; m += 4, q += 4) {                                      \\\n      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \\\n                                POPCNT_UINT64_STEP1_AVX)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)             \\\n    for (; q != qe_0; m += 4, q += 4) {                                      \\\n      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \\\n                                POPCNT_UINT64_STEP2_AVX)                     \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; q != qe_1; m += 4, q += 4) {                                      \\\n      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \\\n                                POPCNT_UINT64_STEP1_AVX)                     \\\n    }                                                                        \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)             \\\n    for (; q != qe_0; m += 4, q += 4) {                                      \\\n      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \\\n                                POPCNT_UINT64_STEP2_AVX)                     \\\n    }                                                                        \\\n  }                                                                          \\\n  ymm_sum_0_0 = _mm256_add_epi64(                                            \\\n      _mm256_inserti128_si256(ymm_sum_0_0,                                   \\\n                              _mm256_castsi256_si128(ymm_sum_0_1), 1),       \\\n      _mm256_inserti128_si256(ymm_sum_0_1,                                   \\\n                              _mm256_extractf128_si256(ymm_sum_0_0, 1), 0)); \\\n  if (q != qe_2) {                                                           \\\n    __m256i ymm_m = _mm256_set_epi64x(m[1], m[0], m[1], m[0]);               \\\n    __m256i ymm_q = _mm256_set_epi64x(q[1], q[1], q[0], q[0]);               \\\n    POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)                       \\\n  }                                                                          \\\n  if (((uintptr_t)out & 0xf) == 0) {                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)             \\\n  } else {                                                                   \\\n    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)            \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=4, N=1)\n#define POPCNT_UINT64_4X1_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())  \\\n  const uint64_t *qe_0 = q + ((cnt >> 1) << 1);                    \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 1) : qe_0); \\\n  const uint64_t *qe_2 = q + cnt;                                  \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                \\\n    for (; q != qe_1; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n    if (q != qe_2) {                                               \\\n      __m256i ymm_m = _mm256_load_si256((const __m256i *)(m));     \\\n      __m256i ymm_q = _mm256_broadcast_si64(q);                    \\\n      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)           \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n    if (q != qe_2) {                                               \\\n      __m256i ymm_m = _mm256_loadu_si256((const __m256i *)(m));    \\\n      __m256i ymm_q = _mm256_broadcast_si64(q);                    \\\n      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)           \\\n    }                                                              \\\n  }                                                                \\\n  ymm_sum_0_0 = _mm256_add_epi64(ymm_sum_0_0, ymm_sum_1_0);        \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=4, N=2)\n#define POPCNT_UINT64_4X2_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())  \\\n  const uint64_t *qe_0 = q + (cnt << 1);                           \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                \\\n    for (; q != qe_1; m += 4, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 4, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 4, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 4, q += 2) {                            \\\n      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(1, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(1, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=4, N=4)\n#define POPCNT_UINT64_4X4_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())  \\\n  const uint64_t *qe_0 = q + (cnt << 2);                           \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                \\\n    for (; q != qe_1; m += 4, q += 4) {                            \\\n      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 4, q += 4) {                            \\\n      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 4, q += 4) {                            \\\n      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 4, q += 4) {                            \\\n      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(1, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(1, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=8, N=1)\n#define POPCNT_UINT64_8X1_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())  \\\n  const uint64_t *qe_0 = q + cnt;                                  \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);               \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                \\\n    for (; q != qe_1; m += 8, ++q) {                               \\\n      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, ++q) {                               \\\n      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 8, ++q) {                               \\\n      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, ++q) {                               \\\n      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(2, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(2, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=8, N=2)\n#define POPCNT_UINT64_8X2_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())  \\\n  const uint64_t *qe_0 = q + (cnt << 1);                           \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                \\\n    for (; q != qe_1; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 2) {                            \\\n      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(2, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(2, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=8, N=4)\n#define POPCNT_UINT64_8X4_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())  \\\n  const uint64_t *qe_0 = q + (cnt << 2);                           \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                \\\n    for (; q != qe_1; m += 8, q += 4) {                            \\\n      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 4) {                            \\\n      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 8, q += 4) {                            \\\n      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 4) {                            \\\n      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(2, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(2, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=8, N=8)\n#define POPCNT_UINT64_8X8_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())  \\\n  const uint64_t *qe_0 = q + (cnt << 3);                           \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);        \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                \\\n    for (; q != qe_1; m += 8, q += 8) {                            \\\n      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 8) {                            \\\n      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  } else {                                                         \\\n    for (; q != qe_1; m += 8, q += 8) {                            \\\n      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                              \\\n    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \\\n    for (; q != qe_0; m += 8, q += 8) {                            \\\n      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                              \\\n  }                                                                \\\n  if (((uintptr_t)out & 0xf) == 0) {                               \\\n    MATRIX_VAR_STORE(2, 8, 4, ymm_sum, out, _mm_store_ps, _NORM)   \\\n  } else {                                                         \\\n    MATRIX_VAR_STORE(2, 8, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=16, N=1)\n#define POPCNT_UINT64_16X1_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + cnt;                                   \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 16, ++q) {                               \\\n      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, ++q) {                               \\\n      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 16, ++q) {                               \\\n      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, ++q) {                               \\\n      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(4, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=16, N=2)\n#define POPCNT_UINT64_16X2_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 1);                            \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 16, q += 2) {                            \\\n      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 2) {                            \\\n      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 16, q += 2) {                            \\\n      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 2) {                            \\\n      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(4, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=16, N=4)\n#define POPCNT_UINT64_16X4_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 2);                            \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 16, q += 4) {                            \\\n      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 4) {                            \\\n      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 16, q += 4) {                            \\\n      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 4) {                            \\\n      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(4, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=16, N=8)\n#define POPCNT_UINT64_16X8_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 3);                            \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 16, q += 8) {                            \\\n      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 8) {                            \\\n      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 16, q += 8) {                            \\\n      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 8) {                            \\\n      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(4, 8, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(4, 8, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=16, N=16)\n#define POPCNT_UINT64_16X16_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 4);                             \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 16, q += 16) {                            \\\n      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                  POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 16) {                            \\\n      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                  POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 16, q += 16) {                            \\\n      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                  POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 16, q += 16) {                            \\\n      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                  POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(4, 16, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(4, 16, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=32, N=1)\n#define POPCNT_UINT64_32X1_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(8, 1, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + cnt;                                   \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 32, ++q) {                               \\\n      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, ++q) {                               \\\n      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 32, ++q) {                               \\\n      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, ++q) {                               \\\n      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=32, N=2)\n#define POPCNT_UINT64_32X2_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(8, 2, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 1);                            \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 32, q += 2) {                            \\\n      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 2) {                            \\\n      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 32, q += 2) {                            \\\n      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 2) {                            \\\n      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=32, N=4)\n#define POPCNT_UINT64_32X4_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(8, 4, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 2);                            \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 32, q += 4) {                            \\\n      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 4) {                            \\\n      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 32, q += 4) {                            \\\n      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 4) {                            \\\n      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=32, N=8)\n#define POPCNT_UINT64_32X8_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(8, 8, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 3);                            \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);         \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                 \\\n    for (; q != qe_1; m += 32, q += 8) {                            \\\n      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 8) {                            \\\n      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  } else {                                                          \\\n    for (; q != qe_1; m += 32, q += 8) {                            \\\n      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                               \\\n    MATRIX_VAR_PERMUTE(8, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 8) {                            \\\n      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                 POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                               \\\n  }                                                                 \\\n  if (((uintptr_t)out & 0xf) == 0) {                                \\\n    MATRIX_VAR_STORE(8, 8, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                          \\\n    MATRIX_VAR_STORE(8, 8, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=32, N=16)\n#define POPCNT_UINT64_32X16_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(8, 16, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 4);                             \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 32, q += 16) {                            \\\n      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                  POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(8, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 16) {                            \\\n      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                  POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 32, q += 16) {                            \\\n      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                  POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(8, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 16) {                            \\\n      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                  POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(8, 16, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(8, 16, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n\n//! Compute the distance between matrix and query (UINT64, M=32, N=32)\n#define POPCNT_UINT64_32X32_AVX(m, q, cnt, out, _NORM)               \\\n  MATRIX_VAR_INIT(8, 32, __m256i, ymm_sum, _mm256_setzero_si256())   \\\n  const uint64_t *qe_0 = q + (cnt << 5);                             \\\n  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 5) : qe_0);          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                  \\\n    for (; q != qe_1; m += 32, q += 32) {                            \\\n      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                  POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(8, 32, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 32) {                            \\\n      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,  \\\n                                  POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                                \\\n  } else {                                                           \\\n    for (; q != qe_1; m += 32, q += 32) {                            \\\n      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                  POPCNT_UINT64_STEP1_AVX)           \\\n    }                                                                \\\n    MATRIX_VAR_PERMUTE(8, 32, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \\\n    for (; q != qe_0; m += 32, q += 32) {                            \\\n      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256, \\\n                                  POPCNT_UINT64_STEP2_AVX)           \\\n    }                                                                \\\n  }                                                                  \\\n  if (((uintptr_t)out & 0xf) == 0) {                                 \\\n    MATRIX_VAR_STORE(8, 32, 4, ymm_sum, out, _mm_store_ps, _NORM)    \\\n  } else {                                                           \\\n    MATRIX_VAR_STORE(8, 32, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \\\n  }\n"
  },
  {
    "path": "src/ailego/math/distance_utility.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Four-bits Squared Difference Table\n */\nstatic const AILEGO_ALIGNED(64) uint8_t Int4SquaredDiffTable[256] = {\n    0,  1,  4,   9,   16,  25,  36,  49,  64,  49,  36,  25,  16,  9,   4,  1,\n    1,  0,  1,   4,   9,   16,  25,  36,  81,  64,  49,  36,  25,  16,  9,  4,\n    4,  1,  0,   1,   4,   9,   16,  25,  100, 81,  64,  49,  36,  25,  16, 9,\n    9,  4,  1,   0,   1,   4,   9,   16,  121, 100, 81,  64,  49,  36,  25, 16,\n    16, 9,  4,   1,   0,   1,   4,   9,   144, 121, 100, 81,  64,  49,  36, 25,\n    25, 16, 9,   4,   1,   0,   1,   4,   169, 144, 121, 100, 81,  64,  49, 36,\n    36, 25, 16,  9,   4,   1,   0,   1,   196, 169, 144, 121, 100, 81,  64, 49,\n    49, 36, 25,  16,  9,   4,   1,   0,   225, 196, 169, 144, 121, 100, 81, 64,\n    64, 81, 100, 121, 144, 169, 196, 225, 0,   1,   4,   9,   16,  25,  36, 49,\n    49, 64, 81,  100, 121, 144, 169, 196, 1,   0,   1,   4,   9,   16,  25, 36,\n    36, 49, 64,  81,  100, 121, 144, 169, 4,   1,   0,   1,   4,   9,   16, 25,\n    25, 36, 49,  64,  81,  100, 121, 144, 9,   4,   1,   0,   1,   4,   9,  16,\n    16, 25, 36,  49,  64,  81,  100, 121, 16,  9,   4,   1,   0,   1,   4,  9,\n    9,  16, 25,  36,  49,  64,  81,  100, 25,  16,  9,   4,   1,   0,   1,  4,\n    4,  9,  16,  25,  36,  49,  64,  81,  36,  25,  16,  9,   4,   1,   0,  1,\n    1,  4,  9,   16,  25,  36,  49,  64,  49,  36,  25,  16,  9,   4,   1,  0,\n};\n\n/*! Four-bits Integer Multiplication Table\n */\nstatic const AILEGO_ALIGNED(64) int8_t Int4MulTable[256] = {\n    0, 0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n    0, 1,  2,   3,   4,   5,   6,   7,   -8,  -7,  -6,  -5,  -4,  -3,  -2,  -1,\n    0, 2,  4,   6,   8,   10,  12,  14,  -16, -14, -12, -10, -8,  -6,  -4,  -2,\n    0, 3,  6,   9,   12,  15,  18,  21,  -24, -21, -18, -15, -12, -9,  -6,  -3,\n    0, 4,  8,   12,  16,  20,  24,  28,  -32, -28, -24, -20, -16, -12, -8,  -4,\n    0, 5,  10,  15,  20,  25,  30,  35,  -40, -35, -30, -25, -20, -15, -10, -5,\n    0, 6,  12,  18,  24,  30,  36,  42,  -48, -42, -36, -30, -24, -18, -12, -6,\n    0, 7,  14,  21,  28,  35,  42,  49,  -56, -49, -42, -35, -28, -21, -14, -7,\n    0, -8, -16, -24, -32, -40, -48, -56, 64,  56,  48,  40,  32,  24,  16,  8,\n    0, -7, -14, -21, -28, -35, -42, -49, 56,  49,  42,  35,  28,  21,  14,  7,\n    0, -6, -12, -18, -24, -30, -36, -42, 48,  42,  36,  30,  24,  18,  12,  6,\n    0, -5, -10, -15, -20, -25, -30, -35, 40,  35,  30,  25,  20,  15,  10,  5,\n    0, -4, -8,  -12, -16, -20, -24, -28, 32,  28,  24,  20,  16,  12,  8,   4,\n    0, -3, -6,  -9,  -12, -15, -18, -21, 24,  21,  18,  15,  12,  9,   6,   3,\n    0, -2, -4,  -6,  -8,  -10, -12, -14, 16,  14,  12,  10,  8,   6,   4,   2,\n    0, -1, -2,  -3,  -4,  -5,  -6,  -7,  8,   7,   6,   5,   4,   3,   2,   1,\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"distance_utility.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n/*! Squared Euclidean Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N, typename = void>\nstruct SquaredEuclideanDistanceMatrix;\n\n/*! Squared Euclidean Distance Matrix (M=1, N=1)\n */\ntemplate <typename T>\nstruct SquaredEuclideanDistanceMatrix<\n    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    float sum = 0.0;\n    for (size_t i = 0; i < dim; ++i) {\n      sum += MathHelper::SquaredDifference(m[i], q[i]);\n    }\n    *out = sum;\n  }\n};\n\ntemplate <>\nstruct SquaredEuclideanDistanceMatrix<uint8_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct SquaredEuclideanDistanceMatrix<int8_t, 1, 1> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct SquaredEuclideanDistanceMatrix<Float16, 1, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct SquaredEuclideanDistanceMatrix<float, 1, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\n/*! Squared Euclidean Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N>\nstruct SquaredEuclideanDistanceMatrix<\n    T, M, N,\n    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&\n                            M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = MathHelper::SquaredDifference(m_val, q[j]);\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += MathHelper::SquaredDifference(m_val, q[j]);\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n  }\n};\n\n/*! Squared Euclidean Distance Matrix (N=1)\n */\ntemplate <typename T, size_t M>\nstruct SquaredEuclideanDistanceMatrix<\n    T, M, 1,\n    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&\n                            M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    const ValueType *q_end = q + dim;\n    if (q != q_end) {\n      ValueType q_val = *q++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = MathHelper::SquaredDifference(m[i], q_val);\n      }\n      m += M;\n    }\n\n    while (q != q_end) {\n      ValueType q_val = *q++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += MathHelper::SquaredDifference(m[i], q_val);\n      }\n      m += M;\n    }\n  }\n};\n\n/*! Squared Euclidean Distance Matrix (INT8)\n */\ntemplate <size_t M, size_t N>\nstruct SquaredEuclideanDistanceMatrix<\n    int8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n\n    dim >>= 2;\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = SquaredDifference(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += SquaredDifference(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n  }\n\n protected:\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    volatile int32_t sum = MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +\n                           MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +\n                           MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +\n                           MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 24), (int8_t)(rhs >> 24));\n    return static_cast<float>(sum);\n  }\n};\n\n/*! Squared Euclidean Distance Matrix (INT8, N=1)\n */\ntemplate <size_t M>\nstruct SquaredEuclideanDistanceMatrix<int8_t, M, 1,\n                                      typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 2);\n\n    if (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = SquaredDifference(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n\n    while (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += SquaredDifference(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    volatile int32_t sum = MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +\n                           MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +\n                           MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +\n                           MathHelper::SquaredDifference<int8_t, int32_t>(\n                               (int8_t)(lhs >> 24), (int8_t)(rhs >> 24));\n    return static_cast<float>(sum);\n  }\n};\n\n/*! Squared Euclidean Distance Matrix (INT4)\n */\ntemplate <size_t M, size_t N>\nstruct SquaredEuclideanDistanceMatrix<\n    uint8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n\n    dim >>= 3;\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = SquaredDifference(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += SquaredDifference(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n  }\n\n protected:\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n};\n\n/*! Squared Euclidean Distance Matrix (INT4, N=1)\n */\ntemplate <size_t M>\nstruct SquaredEuclideanDistanceMatrix<uint8_t, M, 1,\n                                      typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 3);\n\n    if (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = SquaredDifference(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n\n    while (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += SquaredDifference(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n};\n\n/*! Euclidean Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N,\n          typename =\n              typename std::enable_if<(IsSignedArithmetic<T>::value ||\n                                       std::is_same<T, uint8_t>::value) &&\n                                      M >= 1 && N >= 1>::type>\nstruct EuclideanDistanceMatrix {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    SquaredEuclideanDistanceMatrix<T, M, N>::Compute(m, q, dim, out);\n    for (size_t i = 0; i < N * M; ++i) {\n      float val = *out;\n      *out++ = std::sqrt(val);\n    }\n  }\n};\n\n/*! Euclidean Distance Matrix (M=1, N=1)\n */\ntemplate <typename T>\nstruct EuclideanDistanceMatrix<\n    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    float sum = 0.0;\n    for (size_t i = 0; i < dim; ++i) {\n      sum += MathHelper::SquaredDifference(m[i], q[i]);\n    }\n    *out = std::sqrt(sum);\n  }\n};\n\ntemplate <>\nstruct EuclideanDistanceMatrix<uint8_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct EuclideanDistanceMatrix<int8_t, 1, 1> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct EuclideanDistanceMatrix<Float16, 1, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct EuclideanDistanceMatrix<float, 1, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\n/*! Squared Euclidean Distance Sparse Matrix\n */\ntemplate <typename T>\nstruct SquaredEuclideanSparseDistanceMatrix {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  static float ComputeSquaredEuclideanSparseDistanceInSegment(\n      uint32_t m_sparse_count, const uint16_t *m_sparse_index,\n      const ValueType *m_sparse_value, uint32_t q_sparse_count,\n      const uint16_t *q_sparse_index, const ValueType *q_sparse_value);\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const void *m_sparse_data_in,\n                             const void *q_sparse_data_in, float *out) {\n    ailego_assert(out);\n\n    const uint8_t *m_sparse_data =\n        reinterpret_cast<const uint8_t *>(m_sparse_data_in);\n    const uint8_t *q_sparse_data =\n        reinterpret_cast<const uint8_t *>(q_sparse_data_in);\n\n    const uint32_t m_sparse_count =\n        *reinterpret_cast<const uint32_t *>(m_sparse_data);\n    const uint32_t q_sparse_count =\n        *reinterpret_cast<const uint32_t *>(q_sparse_data);\n\n    const uint32_t m_seg_count =\n        *reinterpret_cast<const uint32_t *>(m_sparse_data + sizeof(uint32_t));\n    const uint32_t q_seg_count =\n        *reinterpret_cast<const uint32_t *>(q_sparse_data + sizeof(uint32_t));\n\n    const uint32_t *m_seg_id = reinterpret_cast<const uint32_t *>(\n        m_sparse_data + 2 * sizeof(uint32_t));\n    const uint32_t *q_seg_id = reinterpret_cast<const uint32_t *>(\n        q_sparse_data + 2 * sizeof(uint32_t));\n\n    const uint32_t *m_seg_vec_cnt = reinterpret_cast<const uint32_t *>(\n        m_sparse_data + 2 * sizeof(uint32_t) + m_seg_count * sizeof(uint32_t));\n    const uint32_t *q_seg_vec_cnt = reinterpret_cast<const uint32_t *>(\n        q_sparse_data + 2 * sizeof(uint32_t) + q_seg_count * sizeof(uint32_t));\n\n    const uint16_t *m_sparse_index = reinterpret_cast<const uint16_t *>(\n        m_sparse_data + 2 * sizeof(uint32_t) +\n        m_seg_count * 2 * sizeof(uint32_t));\n    const uint16_t *q_sparse_index = reinterpret_cast<const uint16_t *>(\n        q_sparse_data + 2 * sizeof(uint32_t) +\n        q_seg_count * 2 * sizeof(uint32_t));\n\n    const ValueType *m_sparse_value = reinterpret_cast<const ValueType *>(\n        m_sparse_data + 2 * sizeof(uint32_t) +\n        m_seg_count * 2 * sizeof(uint32_t) + m_sparse_count * sizeof(uint16_t));\n    const ValueType *q_sparse_value = reinterpret_cast<const ValueType *>(\n        q_sparse_data + 2 * sizeof(uint32_t) +\n        q_seg_count * 2 * sizeof(uint32_t) + q_sparse_count * sizeof(uint16_t));\n\n    float sum = 0.0f;\n\n    size_t m_s = 0;\n    size_t q_s = 0;\n\n    size_t m_count = 0;\n    size_t q_count = 0;\n\n    while (m_s < m_seg_count && q_s < q_seg_count) {\n      if (m_seg_id[m_s] == q_seg_id[q_s]) {\n        sum += ComputeSquaredEuclideanSparseDistanceInSegment(\n            m_seg_vec_cnt[m_s], m_sparse_index + m_count,\n            m_sparse_value + m_count, q_seg_vec_cnt[q_s],\n            q_sparse_index + q_count, q_sparse_value + q_count);\n\n        m_count += m_seg_vec_cnt[m_s];\n        q_count += q_seg_vec_cnt[q_s];\n\n        ++m_s;\n        ++q_s;\n      } else if (m_seg_id[m_s] < q_seg_id[q_s]) {\n        for (size_t i = 0; i < m_seg_vec_cnt[m_s]; i++) {\n          float value = (m_sparse_value + m_count)[i];\n          sum += value * value;\n        }\n\n        m_count += m_seg_vec_cnt[m_s];\n\n        ++m_s;\n      } else {\n        for (size_t i = 0; i < q_seg_vec_cnt[q_s]; i++) {\n          float value = (q_sparse_value + q_count)[i];\n          sum += value * value;\n        }\n\n        q_count += q_seg_vec_cnt[q_s];\n        ++q_s;\n      }\n    }\n\n    for (; m_s < m_seg_count; m_s++) {\n      for (size_t i = 0; i < m_seg_vec_cnt[m_s]; i++) {\n        float diff = (m_sparse_value + m_count)[i];\n        sum += diff * diff;\n      }\n\n      m_count += m_seg_vec_cnt[m_s];\n    }\n\n    for (; q_s < q_seg_count; q_s++) {\n      for (size_t i = 0; i < q_seg_vec_cnt[q_s]; i++) {\n        float diff = (q_sparse_value + q_count)[i];\n        sum += diff * diff;\n      }\n\n      q_count += q_seg_vec_cnt[q_s];\n    }\n\n    *out = sum;\n  }\n};\n\ntemplate <typename T>\nfloat SquaredEuclideanSparseDistanceMatrix<T>::\n    ComputeSquaredEuclideanSparseDistanceInSegment(\n        uint32_t m_sparse_count, const uint16_t *m_sparse_index,\n        const ValueType *m_sparse_value, uint32_t q_sparse_count,\n        const uint16_t *q_sparse_index, const ValueType *q_sparse_value) {\n  float sum = 0.0f;\n\n  size_t m_i = 0;\n  size_t q_i = 0;\n\n  while (m_i < m_sparse_count && q_i < q_sparse_count) {\n    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {\n      float diff = m_sparse_value[m_i] - q_sparse_value[q_i];\n      sum += diff * diff;\n      ++m_i;\n      ++q_i;\n    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {\n      float diff = m_sparse_value[m_i];\n      sum += diff * diff;\n      ++m_i;\n    } else {\n      float diff = q_sparse_value[q_i];\n      sum += diff * diff;\n\n      ++q_i;\n    }\n  }\n\n  for (; m_i < m_sparse_count; m_i++) {\n    float diff = m_sparse_value[m_i];\n    sum += diff * diff;\n  }\n\n  for (; q_i < q_sparse_count; q_i++) {\n    float diff = q_sparse_value[q_i];\n    sum += diff * diff;\n  }\n\n  return sum;\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp16_avx.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX__)\n\nfloat SquaredEuclideanDistanceFp16AVX(const Float16 *lhs, const Float16 *rhs,\n                                      size_t size) {\n  float score{0.0f};\n\n  ACCUM_FP16_1X1_AVX(lhs, rhs, size, &score, 0ull, )\n\n  return score;\n}\n\n#endif  // __AVX__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp16_avx512.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX512F__)\nfloat SquaredEuclideanDistanceFp16AVX512(const Float16 *lhs, const Float16 *rhs,\n                                         size_t size) {\n  float score{0.0f};\n\n  ACCUM_FP16_1X1_AVX512(lhs, rhs, size, &score, 0ull, )\n\n  return score;\n}\n#endif\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp16_avx512fp16.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX512FP16__)\n//! Squared Euclidean Distance\nfloat SquaredEuclideanDistanceFp16AVX512FP16(const Float16 *lhs,\n                                             const Float16 *rhs, size_t size) {\n  const Float16 *last = lhs + size;\n  const Float16 *last_aligned = lhs + ((size >> 6) << 6);\n\n  __m512h zmm_sum_0 = _mm512_setzero_ph();\n  __m512h zmm_sum_1 = _mm512_setzero_ph();\n\n  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m512h zmm_d_0 =\n          _mm512_sub_ph(_mm512_load_ph(lhs + 0), _mm512_load_ph(rhs + 0));\n      __m512h zmm_d_1 =\n          _mm512_sub_ph(_mm512_load_ph(lhs + 32), _mm512_load_ph(rhs + 32));\n      zmm_sum_0 = _mm512_fmadd_ph(zmm_d_0, zmm_d_0, zmm_sum_0);\n      zmm_sum_1 = _mm512_fmadd_ph(zmm_d_1, zmm_d_1, zmm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m512h zmm_d = _mm512_sub_ph(_mm512_load_ph(lhs), _mm512_load_ph(rhs));\n      zmm_sum_0 = _mm512_fmadd_ph(zmm_d, zmm_d, zmm_sum_0);\n      lhs += 32;\n      rhs += 32;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m512h zmm_d_0 =\n          _mm512_sub_ph(_mm512_loadu_ph(lhs + 0), _mm512_loadu_ph(rhs + 0));\n      __m512h zmm_d_1 =\n          _mm512_sub_ph(_mm512_loadu_ph(lhs + 32), _mm512_loadu_ph(rhs + 32));\n      zmm_sum_0 = _mm512_fmadd_ph(zmm_d_0, zmm_d_0, zmm_sum_0);\n      zmm_sum_1 = _mm512_fmadd_ph(zmm_d_1, zmm_d_1, zmm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m512h zmm_d = _mm512_sub_ph(_mm512_loadu_ph(lhs), _mm512_loadu_ph(rhs));\n      zmm_sum_0 = _mm512_fmadd_ph(zmm_d, zmm_d, zmm_sum_0);\n      lhs += 32;\n      rhs += 32;\n    }\n  }\n\n  zmm_sum_0 = _mm512_add_ph(zmm_sum_0, zmm_sum_1);\n  if (lhs != last) {\n    __mmask32 mask = (__mmask32)((1 << (last - lhs)) - 1);\n    __m512i zmm_undefined = _mm512_undefined_epi32();\n    __m512h zmm_undefined_ph = _mm512_undefined_ph();\n    __m512h zmm_d = _mm512_mask_sub_ph(\n        zmm_undefined_ph, mask,\n        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, lhs)),\n        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, rhs)));\n    zmm_sum_0 = _mm512_mask3_fmadd_ph(zmm_d, zmm_d, zmm_sum_0, mask);\n  }\n\n  return HorizontalAdd_FP16_V512(zmm_sum_0);\n}\n#endif\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\nfloat SquaredEuclideanDistanceFp16NEON(const Float16 *lhs, const Float16 *rhs,\n                                       size_t size);\n#endif\n\n#if defined(__AVX512FP16__)\nfloat SquaredEuclideanDistanceFp16AVX512FP16(const Float16 *lhs,\n                                             const Float16 *rhs, size_t size);\n#endif\n\n#if defined(__AVX512F__)\nfloat SquaredEuclideanDistanceFp16AVX512(const Float16 *lhs, const Float16 *rhs,\n                                         size_t size);\n#endif\n\n#if defined(__AVX__)\nfloat SquaredEuclideanDistanceFp16AVX(const Float16 *lhs, const Float16 *rhs,\n                                      size_t size);\n#endif\n\nfloat SquaredEuclideanDistanceFp16Scalar(const Float16 *lhs, const Float16 *rhs,\n                                         size_t size);\n\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\nvoid SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(const ValueType *m,\n                                                            const ValueType *q,\n                                                            size_t dim,\n                                                            float *out) {\n#if defined(__ARM_NEON)\n  *out = SquaredEuclideanDistanceFp16NEON(m, q, dim);\n#else\n#if defined(__AVX512FP16__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {\n    *out = SquaredEuclideanDistanceFp16AVX512FP16(m, q, dim);\n    return;\n  }\n#endif\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = SquaredEuclideanDistanceFp16AVX512(m, q, dim);\n    return;\n  }\n#endif\n\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = SquaredEuclideanDistanceFp16AVX(m, q, dim);\n    return;\n  }\n#endif\n  *out = SquaredEuclideanDistanceFp16Scalar(m, q, dim);\n\n#endif  //__ARM_NEON\n}\n\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\nvoid EuclideanDistanceMatrix<Float16, 1, 1>::Compute(const ValueType *m,\n                                                     const ValueType *q,\n                                                     size_t dim, float *out) {\n  SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(m, q, dim, out);\n  *out = std::sqrt(*out);\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp16_neon.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\nfloat SquaredEuclideanDistanceFp16NEON(const Float16 *lhs, const Float16 *rhs,\n                                       size_t size) {\n  float score{0.0f};\n\n  ACCUM_FP16_1X1_NEON(lhs, rhs, size, &score, 0ull, )\n\n  return score;\n}\n#endif\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp32_avx.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX__)\nfloat SquaredEuclideanDistanceFp32SSEInternal(const float *lhs,\n                                              const float *rhs, size_t size);\n\nfloat SquaredEuclideanDistanceFp32AVXInternal(const float *lhs,\n                                              const float *rhs, size_t size) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 4) << 4);\n\n  __m256 ymm_sum_0 = _mm256_setzero_ps();\n  __m256 ymm_sum_1 = _mm256_setzero_ps();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256 ymm_d_0 =\n          _mm256_sub_ps(_mm256_load_ps(lhs + 0), _mm256_load_ps(rhs + 0));\n      __m256 ymm_d_1 =\n          _mm256_sub_ps(_mm256_load_ps(lhs + 8), _mm256_load_ps(rhs + 8));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_d_0, ymm_d_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_d_1, ymm_d_1, ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 8) {\n      __m256 ymm_d = _mm256_sub_ps(_mm256_load_ps(lhs), _mm256_load_ps(rhs));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_d, ymm_d, ymm_sum_0);\n      lhs += 8;\n      rhs += 8;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256 ymm_d_0 =\n          _mm256_sub_ps(_mm256_loadu_ps(lhs + 0), _mm256_loadu_ps(rhs + 0));\n      __m256 ymm_d_1 =\n          _mm256_sub_ps(_mm256_loadu_ps(lhs + 8), _mm256_loadu_ps(rhs + 8));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_d_0, ymm_d_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_d_1, ymm_d_1, ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 8) {\n      __m256 ymm_d = _mm256_sub_ps(_mm256_loadu_ps(lhs), _mm256_loadu_ps(rhs));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_d, ymm_d, ymm_sum_0);\n      lhs += 8;\n      rhs += 8;\n    }\n  }\n  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));\n\n  switch (last - lhs) {\n    case 7:\n      SSD_FP32_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      SSD_FP32_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      SSD_FP32_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      SSD_FP32_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      SSD_FP32_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      SSD_FP32_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      SSD_FP32_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat SquaredEuclideanDistanceFp32AVX(const float *lhs, const float *rhs,\n                                      size_t size) {\n  if (size > 7) {\n    return SquaredEuclideanDistanceFp32AVXInternal(lhs, rhs, size);\n  }\n\n  return SquaredEuclideanDistanceFp32SSEInternal(lhs, rhs, size);\n}\n\n#endif  // __AVX__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp32_avx512.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX512F__)\nfloat SquaredEuclideanDistanceFp32SSEInternal(const float *lhs,\n                                              const float *rhs, size_t size);\n\nfloat SquaredEuclideanDistanceFp32AVXInternal(const float *lhs,\n                                              const float *rhs, size_t size);\n\nfloat SquaredEuclideanDistanceFp32AVX512Internal(const float *lhs,\n                                                 const float *rhs,\n                                                 size_t size) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 5) << 5);\n\n  __m512 zmm_sum_0 = _mm512_setzero_ps();\n  __m512 zmm_sum_1 = _mm512_setzero_ps();\n\n  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m512 zmm_d_0 =\n          _mm512_sub_ps(_mm512_load_ps(lhs + 0), _mm512_load_ps(rhs + 0));\n      __m512 zmm_d_1 =\n          _mm512_sub_ps(_mm512_load_ps(lhs + 16), _mm512_load_ps(rhs + 16));\n      zmm_sum_0 = _mm512_fmadd_ps(zmm_d_0, zmm_d_0, zmm_sum_0);\n      zmm_sum_1 = _mm512_fmadd_ps(zmm_d_1, zmm_d_1, zmm_sum_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m512 zmm_d = _mm512_sub_ps(_mm512_load_ps(lhs), _mm512_load_ps(rhs));\n      zmm_sum_0 = _mm512_fmadd_ps(zmm_d, zmm_d, zmm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m512 zmm_d_0 =\n          _mm512_sub_ps(_mm512_loadu_ps(lhs + 0), _mm512_loadu_ps(rhs + 0));\n      __m512 zmm_d_1 =\n          _mm512_sub_ps(_mm512_loadu_ps(lhs + 16), _mm512_loadu_ps(rhs + 16));\n      zmm_sum_0 = _mm512_fmadd_ps(zmm_d_0, zmm_d_0, zmm_sum_0);\n      zmm_sum_1 = _mm512_fmadd_ps(zmm_d_1, zmm_d_1, zmm_sum_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m512 zmm_d = _mm512_sub_ps(_mm512_loadu_ps(lhs), _mm512_loadu_ps(rhs));\n      zmm_sum_0 = _mm512_fmadd_ps(zmm_d, zmm_d, zmm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n\n  zmm_sum_0 = _mm512_add_ps(zmm_sum_0, zmm_sum_1);\n  if (lhs != last) {\n    __mmask16 mask = (__mmask16)((1 << (last - lhs)) - 1);\n    __m512 zmm_undefined = _mm512_undefined_ps();\n    __m512 zmm_d = _mm512_mask_sub_ps(\n        zmm_undefined, mask, _mm512_mask_loadu_ps(zmm_undefined, mask, lhs),\n        _mm512_mask_loadu_ps(zmm_undefined, mask, rhs));\n    zmm_sum_0 = _mm512_mask3_fmadd_ps(zmm_d, zmm_d, zmm_sum_0, mask);\n  }\n  return HorizontalAdd_FP32_V512(zmm_sum_0);\n}\n\nfloat SquaredEuclideanDistanceFp32AVX512(const float *lhs, const float *rhs,\n                                         size_t size) {\n  if (size > 15) {\n    return SquaredEuclideanDistanceFp32AVX512Internal(lhs, rhs, size);\n  }\n\n  if (size > 7) {\n    return SquaredEuclideanDistanceFp32AVXInternal(lhs, rhs, size);\n  }\n\n  return SquaredEuclideanDistanceFp32SSEInternal(lhs, rhs, size);\n}\n\n#endif\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\nvoid SquaredEuclideanDistanceFp32NEON(const float *lhs, const float *rhs,\n                                      size_t size, float *out);\n#endif\n\n#if defined(__AVX512F__)\nfloat SquaredEuclideanDistanceFp32AVX512(const float *lhs, const float *rhs,\n                                         size_t size);\n#endif\n\n#if defined(__AVX__)\nfloat SquaredEuclideanDistanceFp32AVX(const float *lhs, const float *rhs,\n                                      size_t size);\n#endif\n\n#if defined(__SSE__)\nfloat SquaredEuclideanDistanceFp32SSE(const float *lhs, const float *rhs,\n                                      size_t size);\n#endif\n\nfloat SquaredEuclideanDistanceFp32Scalar(const float *lhs, const float *rhs,\n                                         size_t size);\n\n//-----------------------------------------------------------\n//  SquaredEuclideanDistance\n//-----------------------------------------------------------\n//! Compute the distance between matrix and query (FP32, M=1, N=1)\nvoid SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(const ValueType *m,\n                                                          const ValueType *q,\n                                                          size_t dim,\n                                                          float *out) {\n#if defined(__ARM_NEON)\n  SquaredEuclideanDistanceFp32NEON(m, q, dim, out);\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = SquaredEuclideanDistanceFp32AVX512(m, q, dim);\n    return;\n  }\n#endif  // __AVX512F__\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = SquaredEuclideanDistanceFp32AVX(m, q, dim);\n    return;\n  }\n#endif  // __AVX__\n\n#if defined(__SSE__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {\n    *out = SquaredEuclideanDistanceFp32SSE(m, q, dim);\n    return;\n  }\n#endif  // __SSE__\n  *out = SquaredEuclideanDistanceFp32Scalar(m, q, dim);\n#endif  // __ARM_NEON\n}\n\n//-----------------------------------------------------------\n//  EuclideanDistance\n//-----------------------------------------------------------\n//! Compute the distance between matrix and query (FP32, M=1, N=1)\nvoid EuclideanDistanceMatrix<float, 1, 1>::Compute(const ValueType *m,\n                                                   const ValueType *q,\n                                                   size_t dim, float *out) {\n  SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(m, q, dim, out);\n  *out = std::sqrt(*out);\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp32_neon.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\n//! Squared Euclidean Distance\nvoid SquaredEuclideanDistanceFp32NEON(const float *lhs, const float *rhs,\n                                      size_t size, float *out) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 3) << 3);\n\n  float32x4_t v_sum_0 = vdupq_n_f32(0);\n  float32x4_t v_sum_1 = vdupq_n_f32(0);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    float32x4_t v_d_0 = vsubq_f32(vld1q_f32(lhs + 0), vld1q_f32(rhs + 0));\n    float32x4_t v_d_1 = vsubq_f32(vld1q_f32(lhs + 4), vld1q_f32(rhs + 4));\n    v_sum_0 = vfmaq_f32(v_sum_0, v_d_0, v_d_0);\n    v_sum_1 = vfmaq_f32(v_sum_1, v_d_1, v_d_1);\n  }\n  if (last >= last_aligned + 4) {\n    float32x4_t v_d = vsubq_f32(vld1q_f32(lhs), vld1q_f32(rhs));\n    v_sum_0 = vfmaq_f32(v_sum_0, v_d, v_d);\n    lhs += 4;\n    rhs += 4;\n  }\n\n  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));\n  switch (last - lhs) {\n    case 3:\n      SSD_FP32_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      SSD_FP32_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      SSD_FP32_GENERAL(lhs[0], rhs[0], result)\n  }\n  *out = result;\n}\n\n#endif  // __ARM_NEON\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_fp32_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE__)\nfloat SquaredEuclideanDistanceFp32SSEInternal(const float *lhs,\n                                              const float *rhs, size_t size) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 3) << 3);\n\n  __m128 xmm_sum_0 = _mm_setzero_ps();\n  __m128 xmm_sum_1 = _mm_setzero_ps();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m128 xmm_d_0 = _mm_sub_ps(_mm_load_ps(lhs + 0), _mm_load_ps(rhs + 0));\n      __m128 xmm_d_1 = _mm_sub_ps(_mm_load_ps(lhs + 4), _mm_load_ps(rhs + 4));\n      xmm_sum_0 = _mm_fmadd_ps(xmm_d_0, xmm_d_0, xmm_sum_0);\n      xmm_sum_1 = _mm_fmadd_ps(xmm_d_1, xmm_d_1, xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 4) {\n      __m128 xmm_d = _mm_sub_ps(_mm_load_ps(lhs), _mm_load_ps(rhs));\n      xmm_sum_0 = _mm_fmadd_ps(xmm_d, xmm_d, xmm_sum_0);\n      lhs += 4;\n      rhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m128 xmm_d_0 = _mm_sub_ps(_mm_loadu_ps(lhs + 0), _mm_loadu_ps(rhs + 0));\n      __m128 xmm_d_1 = _mm_sub_ps(_mm_loadu_ps(lhs + 4), _mm_loadu_ps(rhs + 4));\n      xmm_sum_0 = _mm_fmadd_ps(xmm_d_0, xmm_d_0, xmm_sum_0);\n      xmm_sum_1 = _mm_fmadd_ps(xmm_d_1, xmm_d_1, xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 4) {\n      __m128 xmm_d = _mm_sub_ps(_mm_loadu_ps(lhs), _mm_loadu_ps(rhs));\n      xmm_sum_0 = _mm_fmadd_ps(xmm_d, xmm_d, xmm_sum_0);\n      lhs += 4;\n      rhs += 4;\n    }\n  }\n  float result = HorizontalAdd_FP32_V128(_mm_add_ps(xmm_sum_0, xmm_sum_1));\n\n  switch (last - lhs) {\n    case 3:\n      SSD_FP32_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      SSD_FP32_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      SSD_FP32_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat SquaredEuclideanDistanceFp32SSE(const float *lhs, const float *rhs,\n                                      size_t size) {\n  return SquaredEuclideanDistanceFp32SSEInternal(lhs, rhs, size);\n}\n\n#endif  // __SSE__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_int4_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int4.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\nfloat SquaredEuclideanDistanceInt4SSEInternal(const uint8_t *lhs,\n                                              const uint8_t *rhs, size_t size);\n\ninline float SquaredEuclideanDistanceInt4AVX2Internal(const uint8_t *lhs,\n                                                      const uint8_t *rhs,\n                                                      size_t size) {\n  const uint8_t *last = lhs + size;\n  const uint8_t *last_aligned = lhs + ((size >> 5) << 5);\n\n  __m256i ymm_sum = _mm256_setzero_si256();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)(rhs));\n      SSD_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)\n    }\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      __m128i xmm_sum = _mm_setzero_si128();\n      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),\n                                 ymm_sum);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)(rhs));\n      SSD_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)\n    }\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      __m128i xmm_sum = _mm_setzero_si128();\n      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),\n                                 ymm_sum);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  float result = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum));\n\n  switch (last - lhs) {\n    case 15:\n      SSD_INT4_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      SSD_INT4_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      SSD_INT4_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      SSD_INT4_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      SSD_INT4_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      SSD_INT4_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      SSD_INT4_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      SSD_INT4_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      SSD_INT4_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      SSD_INT4_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      SSD_INT4_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      SSD_INT4_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      SSD_INT4_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      SSD_INT4_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      SSD_INT4_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat SquaredEuclideanDistanceInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,\n                                       size_t size) {\n  if (size > 63) {\n    return SquaredEuclideanDistanceInt4AVX2Internal(lhs, rhs, size >> 1);\n  }\n\n  return SquaredEuclideanDistanceInt4SSEInternal(lhs, rhs, size >> 1);\n}\n\n#endif  // __AVX2__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_int4_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\nfloat SquaredEuclideanDistanceInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,\n                                       size_t size);\n#endif\n\n#if defined(__SSE4_1__)\nfloat SquaredEuclideanDistanceInt4SSE(const uint8_t *lhs, const uint8_t *rhs,\n                                      size_t size);\n#endif\n\nfloat SquaredEuclideanDistanceInt4Scalar(const uint8_t *lhs, const uint8_t *rhs,\n                                         size_t size);\n\n//! Compute the distance between matrix and query (INT4, M=1, N=1)\nvoid SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(const ValueType *m,\n                                                            const ValueType *q,\n                                                            size_t dim,\n                                                            float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = SquaredEuclideanDistanceInt4AVX2(m, q, dim);\n    return;\n  }\n#endif  // __AVX2__\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = SquaredEuclideanDistanceInt4SSE(m, q, dim);\n    return;\n  }\n#endif\n\n  *out = SquaredEuclideanDistanceInt4Scalar(m, q, dim);\n}\n\n//! Compute the distance between matrix and query (INT4, M=1, N=1)\nvoid EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(const ValueType *m,\n                                                     const ValueType *q,\n                                                     size_t dim, float *out) {\n  SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(m, q, dim, out);\n  *out = std::sqrt(*out);\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_int4_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int4.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE4_1__)\nfloat SquaredEuclideanDistanceInt4SSEInternal(const uint8_t *lhs,\n                                              const uint8_t *rhs, size_t size) {\n  const uint8_t *last = lhs + size;\n  const uint8_t *last_aligned = lhs + ((size >> 4) << 4);\n\n  __m128i xmm_sum = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)(lhs));\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)(rhs));\n      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)(lhs));\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)(rhs));\n      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n    }\n  }\n  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));\n\n  switch (last - lhs) {\n    case 15:\n      SSD_INT4_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      SSD_INT4_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      SSD_INT4_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      SSD_INT4_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      SSD_INT4_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      SSD_INT4_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      SSD_INT4_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      SSD_INT4_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      SSD_INT4_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      SSD_INT4_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      SSD_INT4_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      SSD_INT4_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      SSD_INT4_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      SSD_INT4_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      SSD_INT4_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat SquaredEuclideanDistanceInt4SSE(const uint8_t *lhs, const uint8_t *rhs,\n                                      size_t size) {\n  return SquaredEuclideanDistanceInt4SSEInternal(lhs, rhs, size >> 1);\n}\n\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_int8_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\nfloat SquaredEuclideanDistanceInt8SSEInternal(const int8_t *lhs,\n                                              const int8_t *rhs, size_t size);\n\nfloat SquaredEuclideanDistanceInt8AVX2Internal(const int8_t *lhs,\n                                               const int8_t *rhs, size_t size) {\n  const int8_t *last = lhs + size;\n  const int8_t *last_aligned = lhs + ((size >> 6) << 6);\n  float result = 0.0;\n\n  __m256i ymm_sum_0 = _mm256_setzero_si256();\n  __m256i ymm_sum_1 = _mm256_setzero_si256();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));\n\n      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_0, ymm_rhs_0),\n                                      _mm256_min_epi8(ymm_lhs_0, ymm_rhs_0));\n      ymm_lhs_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));\n      ymm_rhs_0 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));\n      ymm_sum_0 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_0, ymm_lhs_0), ymm_sum_0);\n      ymm_sum_1 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_0, ymm_rhs_0), ymm_sum_1);\n\n      ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_1, ymm_rhs_1),\n                              _mm256_min_epi8(ymm_lhs_1, ymm_rhs_1));\n      ymm_lhs_1 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));\n      ymm_rhs_1 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));\n      ymm_sum_0 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_1, ymm_lhs_1), ymm_sum_0);\n      ymm_sum_1 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_1, ymm_rhs_1), ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);\n      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),\n                                      _mm256_min_epi8(ymm_lhs, ymm_rhs));\n      ymm_lhs = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));\n      ymm_rhs = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));\n      ymm_sum_0 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs, ymm_lhs), ymm_sum_0);\n      ymm_sum_1 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs, ymm_rhs), ymm_sum_1);\n      lhs += 32;\n      rhs += 32;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));\n\n      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_0, ymm_rhs_0),\n                                      _mm256_min_epi8(ymm_lhs_0, ymm_rhs_0));\n      ymm_lhs_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));\n      ymm_rhs_0 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));\n      ymm_sum_0 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_0, ymm_lhs_0), ymm_sum_0);\n      ymm_sum_1 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_0, ymm_rhs_0), ymm_sum_1);\n\n      ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_1, ymm_rhs_1),\n                              _mm256_min_epi8(ymm_lhs_1, ymm_rhs_1));\n      ymm_lhs_1 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));\n      ymm_rhs_1 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));\n      ymm_sum_0 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_1, ymm_lhs_1), ymm_sum_0);\n      ymm_sum_1 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_1, ymm_rhs_1), ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);\n      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),\n                                      _mm256_min_epi8(ymm_lhs, ymm_rhs));\n      ymm_lhs = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));\n      ymm_rhs = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));\n      ymm_sum_0 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs, ymm_lhs), ymm_sum_0);\n      ymm_sum_1 =\n          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs, ymm_rhs), ymm_sum_1);\n      lhs += 32;\n      rhs += 32;\n    }\n  }\n  result = static_cast<float>(\n      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));\n\n  if (last >= lhs + 16) {\n    __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n    __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n    __m128i xmm_sum = _mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),\n                                   _mm_min_epi8(xmm_lhs, xmm_rhs));\n    xmm_lhs = _mm_cvtepu8_epi16(xmm_sum);\n    xmm_rhs = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_sum, xmm_sum));\n    xmm_sum = _mm_add_epi32(_mm_madd_epi16(xmm_lhs, xmm_lhs),\n                            _mm_madd_epi16(xmm_rhs, xmm_rhs));\n    result += static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));\n    lhs += 16;\n    rhs += 16;\n  }\n  switch (last - lhs) {\n    case 15:\n      SSD_INT8_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      SSD_INT8_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      SSD_INT8_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      SSD_INT8_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      SSD_INT8_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      SSD_INT8_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      SSD_INT8_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      SSD_INT8_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      SSD_INT8_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      SSD_INT8_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      SSD_INT8_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      SSD_INT8_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      SSD_INT8_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      SSD_INT8_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      SSD_INT8_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat SquaredEuclideanDistanceInt8AVX2(const int8_t *lhs, const int8_t *rhs,\n                                       size_t size) {\n  if (size > 31) {\n    return SquaredEuclideanDistanceInt8AVX2Internal(lhs, rhs, size);\n  }\n\n  return SquaredEuclideanDistanceInt8SSEInternal(lhs, rhs, size);\n}\n#endif  // __AVX2__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_int8_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\nfloat SquaredEuclideanDistanceInt8AVX2(const int8_t *lhs, const int8_t *rhs,\n                                       size_t size);\n#endif\n\n#if defined(__SSE4_1__)\nfloat SquaredEuclideanDistanceInt8SSE(const int8_t *lhs, const int8_t *rhs,\n                                      size_t size);\n#endif\n\nfloat SquaredEuclideanDistanceInt8Scalar(const int8_t *lhs, const int8_t *rhs,\n                                         size_t size);\n\n//! Compute the distance between matrix and query (INT8, M=1, N=1)\nvoid SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(const ValueType *m,\n                                                           const ValueType *q,\n                                                           size_t dim,\n                                                           float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = SquaredEuclideanDistanceInt8AVX2(m, q, dim);\n    return;\n  }\n#endif  // __AVX2__\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = SquaredEuclideanDistanceInt8SSE(m, q, dim);\n    return;\n  }\n#endif\n\n  *out = SquaredEuclideanDistanceInt8Scalar(m, q, dim);\n}\n\n//! Compute the distance between matrix and query (INT8, M=1, N=1)\nvoid EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(const ValueType *m,\n                                                    const ValueType *q,\n                                                    size_t dim, float *out) {\n  SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(m, q, dim, out);\n  *out = std::sqrt(*out);\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_int8_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_euclidean_utility.i\"\n#include \"euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE4_1__)\ninline float SquaredEuclideanDistanceInt8SSEInternal(const int8_t *lhs,\n                                                     const int8_t *rhs,\n                                                     size_t size) {\n  const int8_t *last = lhs + size;\n  const int8_t *last_aligned = lhs + ((size >> 5) << 5);\n\n  __m128i xmm_sum_0 = _mm_setzero_si128();\n  __m128i xmm_sum_1 = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m128i xmm_lhs_0 = _mm_load_si128((const __m128i *)(lhs + 0));\n      __m128i xmm_lhs_1 = _mm_load_si128((const __m128i *)(lhs + 16));\n      __m128i xmm_rhs_0 = _mm_load_si128((const __m128i *)(rhs + 0));\n      __m128i xmm_rhs_1 = _mm_load_si128((const __m128i *)(rhs + 16));\n\n      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_0, xmm_rhs_0),\n                                   _mm_min_epi8(xmm_lhs_0, xmm_rhs_0));\n      xmm_lhs_0 = _mm_cvtepu8_epi16(xmm_d);\n      xmm_rhs_0 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));\n      xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_1, xmm_rhs_1),\n                           _mm_min_epi8(xmm_lhs_1, xmm_rhs_1));\n      xmm_lhs_1 = _mm_cvtepu8_epi16(xmm_d);\n      xmm_rhs_1 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));\n\n      xmm_sum_0 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_0, xmm_lhs_0), xmm_sum_0);\n      xmm_sum_1 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_0, xmm_rhs_0), xmm_sum_1);\n      xmm_sum_0 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_1, xmm_lhs_1), xmm_sum_0);\n      xmm_sum_1 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_1, xmm_rhs_1), xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),\n                                   _mm_min_epi8(xmm_lhs, xmm_rhs));\n      xmm_lhs = _mm_cvtepu8_epi16(xmm_d);\n      xmm_rhs = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));\n      xmm_sum_0 = _mm_add_epi32(_mm_madd_epi16(xmm_lhs, xmm_lhs), xmm_sum_0);\n      xmm_sum_1 = _mm_add_epi32(_mm_madd_epi16(xmm_rhs, xmm_rhs), xmm_sum_1);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m128i xmm_lhs_0 = _mm_loadu_si128((const __m128i *)(lhs + 0));\n      __m128i xmm_lhs_1 = _mm_loadu_si128((const __m128i *)(lhs + 16));\n      __m128i xmm_rhs_0 = _mm_loadu_si128((const __m128i *)(rhs + 0));\n      __m128i xmm_rhs_1 = _mm_loadu_si128((const __m128i *)(rhs + 16));\n\n      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_0, xmm_rhs_0),\n                                   _mm_min_epi8(xmm_lhs_0, xmm_rhs_0));\n      xmm_lhs_0 = _mm_cvtepu8_epi16(xmm_d);\n      xmm_rhs_0 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));\n      xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_1, xmm_rhs_1),\n                           _mm_min_epi8(xmm_lhs_1, xmm_rhs_1));\n      xmm_lhs_1 = _mm_cvtepu8_epi16(xmm_d);\n      xmm_rhs_1 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));\n\n      xmm_sum_0 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_0, xmm_lhs_0), xmm_sum_0);\n      xmm_sum_1 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_0, xmm_rhs_0), xmm_sum_1);\n      xmm_sum_0 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_1, xmm_lhs_1), xmm_sum_0);\n      xmm_sum_1 =\n          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_1, xmm_rhs_1), xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),\n                                   _mm_min_epi8(xmm_lhs, xmm_rhs));\n      xmm_lhs = _mm_cvtepu8_epi16(xmm_d);\n      xmm_rhs = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));\n      xmm_sum_0 = _mm_add_epi32(_mm_madd_epi16(xmm_lhs, xmm_lhs), xmm_sum_0);\n      xmm_sum_1 = _mm_add_epi32(_mm_madd_epi16(xmm_rhs, xmm_rhs), xmm_sum_1);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  float result = static_cast<float>(\n      HorizontalAdd_INT32_V128(_mm_add_epi32(xmm_sum_0, xmm_sum_1)));\n\n  switch (last - lhs) {\n    case 15:\n      SSD_INT8_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      SSD_INT8_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      SSD_INT8_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      SSD_INT8_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      SSD_INT8_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      SSD_INT8_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      SSD_INT8_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      SSD_INT8_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      SSD_INT8_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      SSD_INT8_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      SSD_INT8_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      SSD_INT8_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      SSD_INT8_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      SSD_INT8_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      SSD_INT8_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\n//! Squared Euclidean Distance\nfloat SquaredEuclideanDistanceInt8SSE(const int8_t *lhs, const int8_t *rhs,\n                                      size_t size) {\n  return SquaredEuclideanDistanceInt8SSEInternal(lhs, rhs, size);\n}\n\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/euclidean_distance_matrix_scalar.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"distance_utility.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\ntemplate <typename T>\ninline float SquaredEuclideanDistanceScalar(const T *m, const T *q,\n                                            size_t dim) {\n  ailego_assert(m && q && dim);\n\n  float sum = 0.0;\n  for (size_t i = 0; i < dim; ++i) {\n    sum += MathHelper::SquaredDifference(m[i], q[i]);\n  }\n\n  return sum;\n}\n\ntemplate <typename T>\ninline float EuclideanDistanceScalar(const T *m, const T *q, size_t dim) {\n  ailego_assert(m && q && dim);\n\n  float sum = 0.0;\n  for (size_t i = 0; i < dim; ++i) {\n    sum += MathHelper::SquaredDifference(m[i], q[i]);\n  }\n\n  return std::sqrt(sum);\n}\n\nfloat SquaredEuclideanDistanceInt4Scalar(const uint8_t *m, const uint8_t *q,\n                                         size_t dim) {\n  ailego_assert(m && q && dim && !(dim & 1));\n\n  float sum = 0.0;\n  for (size_t i = 0; i < (dim >> 1); ++i) {\n    uint8_t m_val = m[i];\n    uint8_t q_val = q[i];\n    sum += Int4SquaredDiffTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +\n           Int4SquaredDiffTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];\n  }\n\n  return sum;\n}\n\n\nfloat EuclideanDistanceInt4Scalar(const uint8_t *m, const uint8_t *q,\n                                  size_t dim) {\n  ailego_assert(m && q && dim && !(dim & 1));\n\n  float sum = 0.0;\n  for (size_t i = 0; i < (dim >> 1); ++i) {\n    uint8_t m_val = m[i];\n    uint8_t q_val = q[i];\n    sum += Int4SquaredDiffTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +\n           Int4SquaredDiffTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];\n  }\n\n  return std::sqrt(sum);\n}\n\n\nfloat SquaredEuclideanDistanceInt8Scalar(const int8_t *m, const int8_t *q,\n                                         size_t dim) {\n  return SquaredEuclideanDistanceScalar<int8_t>(m, q, dim);\n}\n\nfloat EuclideanDistanceInt8Scalar(const int8_t *m, const int8_t *q,\n                                  size_t dim) {\n  return EuclideanDistanceScalar<int8_t>(m, q, dim);\n}\n\nfloat SquaredEuclideanDistanceFp16Scalar(const ailego::Float16 *m,\n                                         const ailego::Float16 *q, size_t dim) {\n  return SquaredEuclideanDistanceScalar<ailego::Float16>(m, q, dim);\n}\n\nfloat EuclideanDistanceFp16Scalar(const ailego::Float16 *m,\n                                  const ailego::Float16 *q, size_t dim) {\n  return EuclideanDistanceScalar<ailego::Float16>(m, q, dim);\n}\n\nfloat SquaredEuclideanDistanceFp32Scalar(const float *m, const float *q,\n                                         size_t dim) {\n  return SquaredEuclideanDistanceScalar<float>(m, q, dim);\n}\n\nfloat EuclideanDistanceFp32Scalar(const float *m, const float *q, size_t dim) {\n  return EuclideanDistanceScalar<float>(m, q, dim);\n}\n\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/hamming_distance_matrix.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"hamming_distance_matrix.h\"\n#include <arrow/util/future.h>\n#include <zvec/ailego/internal/platform.h>\n#include \"distance_matrix_popcnt.i\"\n\nnamespace zvec {\nnamespace ailego {\n\n#define POPCNT_UINT32_STEP1_SSE HAMMING_UINT32_STEP1_SSE\n#define POPCNT_UINT32_STEP2_SSE HAMMING_UINT32_STEP2_SSE\n#define POPCNT_UINT32_STEP3_SSE HAMMING_UINT32_STEP3_SSE\n#define POPCNT_UINT32_STEP1_AVX HAMMING_UINT32_STEP1_AVX\n#define POPCNT_UINT32_STEP2_AVX HAMMING_UINT32_STEP2_AVX\n#define POPCNT_UINT32_STEP3_AVX HAMMING_UINT32_STEP3_AVX\n#define POPCNT_UINT64_STEP1_AVX HAMMING_UINT64_STEP1_AVX\n#define POPCNT_UINT64_STEP2_AVX HAMMING_UINT64_STEP2_AVX\n\n//! Calculate population count (Step 1 SSE)\n#define HAMMING_UINT32_STEP1_SSE(xmm_m, xmm_q, xmm_sum) \\\n  xmm_sum = _mm_add_epi8(                               \\\n      VerticalPopCount_INT8_V128(_mm_xor_si128(xmm_m, xmm_q)), xmm_sum);\n\n//! Calculate population count (Step 2 SSE)\n#define HAMMING_UINT32_STEP2_SSE(xmm_m, xmm_q, xmm_sum) \\\n  xmm_sum = _mm_add_epi16(                              \\\n      VerticalPopCount_INT16_V128(_mm_xor_si128(xmm_m, xmm_q)), xmm_sum);\n\n//! Calculate population count (Step 3 SSE)\n#define HAMMING_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum) \\\n  xmm_sum = _mm_add_epi32(                              \\\n      VerticalPopCount_INT32_V128(_mm_xor_si128(xmm_m, xmm_q)), xmm_sum);\n\n//! Calculate population count (Step 1 AVX)\n#define HAMMING_UINT32_STEP1_AVX(ymm_m, ymm_q, ymm_sum) \\\n  ymm_sum = _mm256_add_epi8(                            \\\n      VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);\n\n//! Calculate population count (Step 2 AVX)\n#define HAMMING_UINT32_STEP2_AVX(ymm_m, ymm_q, ymm_sum) \\\n  ymm_sum = _mm256_add_epi16(                           \\\n      VerticalPopCount_INT16_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);\n\n//! Calculate population count (Step 3 AVX)\n#define HAMMING_UINT32_STEP3_AVX(ymm_m, ymm_q, ymm_sum) \\\n  ymm_sum = _mm256_add_epi32(                           \\\n      VerticalPopCount_INT32_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);\n\n//! Calculate population count (Step 1 AVX)\n#define HAMMING_UINT64_STEP1_AVX(ymm_m, ymm_q, ymm_sum) \\\n  ymm_sum = _mm256_add_epi8(                            \\\n      VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);\n\n//! Calculate population count (Step 2 AVX)\n#define HAMMING_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum) \\\n  ymm_sum = _mm256_add_epi64(                           \\\n      VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);\n\n#if defined(__AVX512VL__) && defined(__AVX512DQ__)\n#define CONVERT_UINT64_TO_FP32(v, ...) _mm256_cvtepu64_ps(v)\n#elif defined(__AVX2__)\nstatic const __m256i CONVERT_UINT32_MASK_AVX =\n    _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);\n\n#define CONVERT_UINT64_TO_FP32(v, ...)    \\\n  _mm_cvtepi32_ps(_mm256_castsi256_si128( \\\n      _mm256_permutevar8x32_epi32(v, CONVERT_UINT32_MASK_AVX)))\n#endif  // __AVX512VL__ && __AVX512DQ__\n\n#define SQRT_UINT64_TO_FP32(v, ...) _mm_sqrt_ps(CONVERT_UINT64_TO_FP32(v))\n#define SQRT_UINT32_TO_FP32_SSE(v, ...) _mm_sqrt_ps(_mm_cvtepi32_ps(v))\n#define SQRT_UINT32_TO_FP32_AVX(v, ...) _mm256_sqrt_ps(_mm256_cvtepi32_ps(v))\n\n#if defined(__AVX2__)\nstatic inline size_t HammingDistanceAVX(const uint32_t *lhs,\n                                        const uint32_t *rhs, size_t size) {\n  __m256i ymm_sum_0 = _mm256_setzero_si256();\n  __m256i ymm_sum_1 = _mm256_setzero_si256();\n\n  const uint32_t *lhs_0 = lhs + ((size >> 4) << 4);\n  const uint32_t *lhs_1 = (size > 496 ? lhs + 496 : lhs_0);\n  const uint32_t *lhs_2 = lhs + size;\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != lhs_1; lhs += 16, rhs += 16) {\n      __m256i ymm_lhs_0 = _mm256_load_si256((__m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_load_si256((__m256i *)(lhs + 8));\n      __m256i ymm_rhs_0 = _mm256_load_si256((__m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_load_si256((__m256i *)(rhs + 8));\n\n      ymm_sum_0 = _mm256_add_epi8(\n          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi8(\n          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),\n          ymm_sum_1);\n    }\n    ymm_sum_0 = _mm256_sad_epu8(ymm_sum_0, POPCNT_ZERO_AVX);\n    ymm_sum_1 = _mm256_sad_epu8(ymm_sum_1, POPCNT_ZERO_AVX);\n\n    for (; lhs != lhs_0; lhs += 16, rhs += 16) {\n      __m256i ymm_lhs_0 = _mm256_load_si256((__m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_load_si256((__m256i *)(lhs + 8));\n      __m256i ymm_rhs_0 = _mm256_load_si256((__m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_load_si256((__m256i *)(rhs + 8));\n\n      ymm_sum_0 = _mm256_add_epi64(\n          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi64(\n          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),\n          ymm_sum_1);\n    }\n\n    if (lhs_2 >= lhs + 8) {\n      __m256i ymm_lhs = _mm256_load_si256((__m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_load_si256((__m256i *)(rhs));\n      ymm_sum_0 = _mm256_add_epi64(\n          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs, ymm_rhs)),\n          ymm_sum_0);\n      lhs += 8;\n      rhs += 8;\n    }\n  } else {\n    for (; lhs != lhs_1; lhs += 16, rhs += 16) {\n      __m256i ymm_lhs_0 = _mm256_loadu_si256((__m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_loadu_si256((__m256i *)(lhs + 8));\n      __m256i ymm_rhs_0 = _mm256_loadu_si256((__m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_loadu_si256((__m256i *)(rhs + 8));\n\n      ymm_sum_0 = _mm256_add_epi8(\n          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi8(\n          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),\n          ymm_sum_1);\n    }\n    ymm_sum_0 = _mm256_sad_epu8(ymm_sum_0, POPCNT_ZERO_AVX);\n    ymm_sum_1 = _mm256_sad_epu8(ymm_sum_1, POPCNT_ZERO_AVX);\n\n    for (; lhs != lhs_0; lhs += 16, rhs += 16) {\n      __m256i ymm_lhs_0 = _mm256_loadu_si256((__m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_loadu_si256((__m256i *)(lhs + 8));\n      __m256i ymm_rhs_0 = _mm256_loadu_si256((__m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_loadu_si256((__m256i *)(rhs + 8));\n\n      ymm_sum_0 = _mm256_add_epi64(\n          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi64(\n          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),\n          ymm_sum_1);\n    }\n\n    if (lhs_2 >= lhs + 8) {\n      __m256i ymm_lhs = _mm256_loadu_si256((__m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_loadu_si256((__m256i *)(rhs));\n      ymm_sum_0 = _mm256_add_epi64(\n          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs, ymm_rhs)),\n          ymm_sum_0);\n      lhs += 8;\n      rhs += 8;\n    }\n  }\n\n  size_t count =\n      (size_t)HorizontalAdd_INT64_V256(_mm256_add_epi64(ymm_sum_0, ymm_sum_1));\n  switch (lhs_2 - lhs) {\n    case 7:\n      count += ailego_popcount32(lhs[6] ^ rhs[6]);\n      /* FALLTHRU */\n    case 6:\n      count += ailego_popcount32(lhs[5] ^ rhs[5]);\n      /* FALLTHRU */\n    case 5:\n      count += ailego_popcount32(lhs[4] ^ rhs[4]);\n      /* FALLTHRU */\n    case 4:\n      count += ailego_popcount32(lhs[3] ^ rhs[3]);\n      /* FALLTHRU */\n    case 3:\n      count += ailego_popcount32(lhs[2] ^ rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += ailego_popcount32(lhs[1] ^ rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += ailego_popcount32(lhs[0] ^ rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t HammingDistanceAVX(const uint64_t *lhs,\n                                        const uint64_t *rhs, size_t size) {\n  return HammingDistanceAVX(reinterpret_cast<const uint32_t *>(lhs),\n                            reinterpret_cast<const uint32_t *>(rhs),\n                            (size << 1));\n}\n#endif  // __AVX2__\n\n#if defined(AILEGO_M64)\nstatic inline size_t HammingDistance(const uint32_t *lhs, const uint32_t *rhs,\n                                     size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    count += ailego_popcount64(*(uint64_t *)(&lhs[6]) ^ *(uint64_t *)(&rhs[6]));\n    count += ailego_popcount64(*(uint64_t *)(&lhs[4]) ^ *(uint64_t *)(&rhs[4]));\n    count += ailego_popcount64(*(uint64_t *)(&lhs[2]) ^ *(uint64_t *)(&rhs[2]));\n    count += ailego_popcount64(*(uint64_t *)(&lhs[0]) ^ *(uint64_t *)(&rhs[0]));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      count += ailego_popcount32(lhs[6] ^ rhs[6]);\n      /* FALLTHRU */\n    case 6:\n      count += ailego_popcount32(lhs[5] ^ rhs[5]);\n      /* FALLTHRU */\n    case 5:\n      count += ailego_popcount32(lhs[4] ^ rhs[4]);\n      /* FALLTHRU */\n    case 4:\n      count += ailego_popcount32(lhs[3] ^ rhs[3]);\n      /* FALLTHRU */\n    case 3:\n      count += ailego_popcount32(lhs[2] ^ rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += ailego_popcount32(lhs[1] ^ rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += ailego_popcount32(lhs[0] ^ rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t HammingDistance(const uint64_t *lhs, const uint64_t *rhs,\n                                     size_t size) {\n  const uint64_t *last = lhs + size;\n  const uint64_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    count += ailego_popcount64(lhs[3] ^ rhs[3]);\n    count += ailego_popcount64(lhs[2] ^ rhs[2]);\n    count += ailego_popcount64(lhs[1] ^ rhs[1]);\n    count += ailego_popcount64(lhs[0] ^ rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 3:\n      count += ailego_popcount64(lhs[2] ^ rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += ailego_popcount64(lhs[1] ^ rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += ailego_popcount64(lhs[0] ^ rhs[0]);\n  }\n  return count;\n}\n#else\nstatic inline size_t HammingDistance(const uint32_t *lhs, const uint32_t *rhs,\n                                     size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    count += ailego_popcount32(lhs[3] ^ rhs[3]);\n    count += ailego_popcount32(lhs[2] ^ rhs[2]);\n    count += ailego_popcount32(lhs[1] ^ rhs[1]);\n    count += ailego_popcount32(lhs[0] ^ rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 3:\n      count += ailego_popcount32(lhs[2] ^ rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += ailego_popcount32(lhs[1] ^ rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += ailego_popcount32(lhs[0] ^ rhs[0]);\n  }\n  return count;\n}\n#endif  // AILEGO_M64\n\n//! Compute the distance between matrix and query (UINT32, M=1, N=1)\nvoid HammingDistanceMatrix<uint32_t, 1, 1>::Compute(const ValueType *m,\n                                                    const ValueType *q,\n                                                    size_t dim, float *out) {\n  size_t cnt = (dim >> 5);\n#if defined(__AVX2__)\n  if (cnt > 63) {\n    *out = static_cast<float>(HammingDistanceAVX(m, q, cnt));\n    return;\n  }\n#endif\n  *out = static_cast<float>(HammingDistance(m, q, cnt));\n}\n\n#if defined(AILEGO_M64)\n//! Compute the distance between matrix and query (UINT64, M=1, N=1)\nvoid HammingDistanceMatrix<uint64_t, 1, 1>::Compute(const ValueType *m,\n                                                    const ValueType *q,\n                                                    size_t dim, float *out) {\n  size_t cnt = (dim >> 6);\n#if defined(__AVX2__)\n  if (cnt > 31) {\n    *out = static_cast<float>(HammingDistanceAVX(m, q, cnt));\n    return;\n  }\n#endif\n  *out = static_cast<float>(HammingDistance(m, q, cnt));\n}\n\n#endif  // AILEGO_M64\n\n//! Compute the distance between matrix and query (UINT32, M=1, N=1)\nvoid HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute(\n    const ValueType *m, const ValueType *q, size_t dim, float *out) {\n  size_t cnt = (dim >> 5);\n#if defined(__AVX2__)\n  if (cnt > 63) {\n    *out = std::sqrt(static_cast<float>(HammingDistanceAVX(m, q, cnt)));\n    return;\n  }\n#endif\n  *out = std::sqrt(static_cast<float>(HammingDistance(m, q, cnt)));\n}\n\n\n#if defined(AILEGO_M64)\n//! Compute the distance between matrix and query (UINT64, M=1, N=1)\nvoid HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute(\n    const ValueType *m, const ValueType *q, size_t dim, float *out) {\n  size_t cnt = (dim >> 6);\n#if defined(__AVX2__)\n  if (cnt > 31) {\n    *out = std::sqrt(static_cast<float>(HammingDistanceAVX(m, q, cnt)));\n    return;\n  }\n#endif\n  *out = std::sqrt(static_cast<float>(HammingDistance(m, q, cnt)));\n}\n\n#endif  // AILEGO_M64\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/hamming_distance_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cmath>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Hamming Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N,\n          typename = void>  // NOTE: useless 'typename=void' to avoid clang\n                            // compile error\nstruct HammingDistanceMatrix;\n\n/*! Hamming Distance Matrix (UINT32)\n */\ntemplate <size_t M, size_t N>\nstruct HammingDistanceMatrix<uint32_t, M, N> {\n  //! Type of value\n  using ValueType = uint32_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && !(dim & 31) && out);\n\n    size_t cnt = (dim >> 5);\n    if (cnt > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = static_cast<float>(ailego_popcount32(m_val ^ q[j]));\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n\n    for (size_t k = 1; k < cnt; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += static_cast<float>(ailego_popcount32(m_val ^ q[j]));\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n  }\n};\n\n/*! Hamming Distance Matrix (UINT32, M=1, N=1)\n */\ntemplate <>\nstruct HammingDistanceMatrix<uint32_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint32_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\n#if defined(AILEGO_M64)\n/*! Hamming Distance Matrix (UINT64)\n */\ntemplate <size_t M, size_t N>\nstruct HammingDistanceMatrix<uint64_t, M, N> {\n  //! Type of value\n  using ValueType = uint64_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && !(dim & 63) && out);\n\n    size_t cnt = (dim >> 6);\n    if (cnt > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = static_cast<float>(ailego_popcount64(m_val ^ q[j]));\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n\n    for (size_t k = 1; k < cnt; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += static_cast<float>(ailego_popcount64(m_val ^ q[j]));\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n  }\n};\n\n/*! Hamming Distance Matrix (UINT64, M=1, N=1)\n */\ntemplate <>\nstruct HammingDistanceMatrix<uint64_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint64_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\n#endif  // AILEGO_M64\n\n/*! Hamming Square Root Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N>\nstruct HammingSquareRootDistanceMatrix {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    HammingDistanceMatrix<T, M, N>::Compute(m, q, dim, out);\n    for (size_t i = 0; i < N * M; ++i) {\n      float val = *out;\n      *out++ = std::sqrt(val);\n    }\n  }\n};\n\n/*! Hamming Square Root Distance Matrix (UINT32, M=1, N=1)\n */\ntemplate <>\nstruct HammingSquareRootDistanceMatrix<uint32_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint32_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\n\n#if defined(AILEGO_M64)\n/*! Hamming Square Root Distance Matrix (UINT64, M=1, N=1)\n */\ntemplate <>\nstruct HammingSquareRootDistanceMatrix<uint64_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint64_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\n#endif  // AILEGO_M64\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cmath>\n#include <string>\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"distance_utility.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n/*! Inner Product Matrix\n */\ntemplate <typename T, size_t M, size_t N, typename = void>\nstruct InnerProductMatrix;\n\n/*! Inner Product Matrix\n */\ntemplate <typename T, size_t M, size_t N, typename = void>\nstruct MinusInnerProductMatrix;\n\n/*! Inner Product Matrix (M=1, N=1)\n */\ntemplate <typename T>\nstruct InnerProductMatrix<\n    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    float sum = 0.0;\n    for (size_t i = 0; i < dim; ++i) {\n      sum += static_cast<float>(m[i] * q[i]);\n    }\n    *out = sum;\n  }\n};\n\n/*! Minus Inner Product Matrix (M=1, N=1)\n */\ntemplate <typename T>\nstruct MinusInnerProductMatrix<\n    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    float sum = 0.0;\n    for (size_t i = 0; i < dim; ++i) {\n      sum += static_cast<float>(m[i] * q[i]);\n    }\n    *out = -sum;\n  }\n};\n\ntemplate <>\nstruct InnerProductMatrix<uint8_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct InnerProductMatrix<int8_t, 1, 1> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct InnerProductMatrix<Float16, 1, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct InnerProductMatrix<float, 1, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct MinusInnerProductMatrix<uint8_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct MinusInnerProductMatrix<int8_t, 1, 1> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct MinusInnerProductMatrix<Float16, 1, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\ntemplate <>\nstruct MinusInnerProductMatrix<float, 1, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the distance between matrix and query\n  static void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                      float *out);\n};\n\n/*! Inner Product Matrix\n */\ntemplate <typename T, size_t M, size_t N>\nstruct InnerProductMatrix<\n    T, M, N,\n    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&\n                            M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = static_cast<float>(m_val * q[j]);\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += m_val * q[j];\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n  }\n};\n\n/*! Inner Product Matrix (N=1)\n */\ntemplate <typename T, size_t M>\nstruct InnerProductMatrix<\n    T, M, 1,\n    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&\n                            M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    const ValueType *q_end = q + dim;\n    if (q != q_end) {\n      ValueType q_val = *q++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = static_cast<float>(m[i] * q_val);\n      }\n      m += M;\n    }\n\n    while (q != q_end) {\n      ValueType q_val = *q++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += m[i] * q_val;\n      }\n      m += M;\n    }\n  }\n};\n\n/*! Inner Product Matrix (INT8)\n */\ntemplate <size_t M, size_t N>\nstruct InnerProductMatrix<int8_t, M, N,\n                          typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n\n    dim >>= 2;\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +\n                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +\n                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +\n                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));\n\n    return static_cast<float>(sum);\n  }\n};\n\n/*! Inner Product Matrix (INT8, N=1)\n */\ntemplate <size_t M>\nstruct InnerProductMatrix<int8_t, M, 1, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 2);\n\n    if (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n\n    while (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +\n                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +\n                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +\n                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));\n\n    return static_cast<float>(sum);\n  }\n};\n\n/*! Inner Product Matrix (INT4)\n */\ntemplate <size_t M, size_t N>\nstruct InnerProductMatrix<uint8_t, M, N,\n                          typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n\n    dim >>= 3;\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r += FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n};\n\n/*! Inner Product Matrix (INT4, N=1)\n */\ntemplate <size_t M>\nstruct InnerProductMatrix<uint8_t, M, 1,\n                          typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 3);\n\n    if (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n\n    while (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n};\n\n\n/*! Minus Inner Product Matrix\n */\ntemplate <typename T, size_t M, size_t N>\nstruct MinusInnerProductMatrix<\n    T, M, N,\n    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&\n                            M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = -static_cast<float>(m_val * q[j]);\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType m_val = m[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r -= m_val * q[j];\n          r += M;\n        }\n      }\n      m += M;\n      q += N;\n    }\n  }\n};\n\n/*! Minus Inner Product Matrix (N=1)\n */\ntemplate <typename T, size_t M>\nstruct MinusInnerProductMatrix<\n    T, M, 1,\n    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&\n                            M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && out);\n\n    const ValueType *q_end = q + dim;\n    if (q != q_end) {\n      ValueType q_val = *q++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = -static_cast<float>(m[i] * q_val);\n      }\n      m += M;\n    }\n\n    while (q != q_end) {\n      ValueType q_val = *q++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) -= m[i] * q_val;\n      }\n      m += M;\n    }\n  }\n};\n\n/*! Minus Inner Product Matrix (INT8)\n */\ntemplate <size_t M, size_t N>\nstruct MinusInnerProductMatrix<\n    int8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n\n    dim >>= 2;\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = -FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r -= FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +\n                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +\n                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +\n                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));\n\n    return static_cast<float>(sum);\n  }\n};\n\n/*! Minus Inner Product Matrix (INT8, N=1)\n */\ntemplate <size_t M>\nstruct MinusInnerProductMatrix<int8_t, M, 1,\n                               typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 2);\n\n    if (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = -FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n\n    while (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) -= FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +\n                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +\n                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +\n                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));\n\n    return static_cast<float>(sum);\n  }\n};\n\n/*! Minus Inner Product Matrix (INT4)\n */\ntemplate <size_t M, size_t N>\nstruct MinusInnerProductMatrix<\n    uint8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n\n    dim >>= 3;\n    if (dim > 0) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r = -FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        uint32_t m_val = m_it[i];\n        float *r = out + i;\n\n        for (size_t j = 0; j < N; ++j) {\n          *r -= FusedMultiplyAdd(m_val, q_it[j]);\n          r += M;\n        }\n      }\n      m_it += M;\n      q_it += N;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n};\n\n/*! Minus Inner Product Matrix (INT4, N=1)\n */\ntemplate <size_t M>\nstruct MinusInnerProductMatrix<uint8_t, M, 1,\n                               typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,\n                             float *out) {\n    ailego_assert(m && q && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 3);\n\n    if (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = -FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n\n    while (q_it != q_end) {\n      uint32_t q_val = *q_it++;\n\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) -= FusedMultiplyAdd(m_it[i], q_val);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n};\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\nstruct SparseSegmentInfo {\n public:\n  uint32_t seg_id_{-1U};\n  uint32_t vec_cnt_{0};\n\n public:\n  SparseSegmentInfo() : seg_id_{-1U}, vec_cnt_{0} {}\n\n  SparseSegmentInfo(uint32_t seg_id, uint32_t vec_cnt)\n      : seg_id_{seg_id}, vec_cnt_{vec_cnt} {}\n};\n\nconstexpr static uint32_t SEGMENT_ID_BITS = 16;\nconstexpr static uint32_t SEGMENT_ID_MASK = 0xFFFF;\n\ntemplate <typename T>\nstruct MinusInnerProductSparseMatrix {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  static inline float ComputeInnerProductSparseInSegment(\n      uint32_t m_sparse_count, const uint16_t *m_sparse_index,\n      const ValueType *m_sparse_value, uint32_t q_sparse_count,\n      const uint16_t *q_sparse_index, const ValueType *q_sparse_value);\n\n  //! Compute the distance between matrix and query\n  static inline void Compute(const void *m_sparse_data_in,\n                             const void *q_sparse_data_in, float *out);\n\n  static inline void transform_sparse_format(uint32_t sparse_count,\n                                             const uint32_t *sparse_index,\n                                             const void *sparse_value,\n                                             std::string &buffer);\n};\n\ntemplate <>\nstruct MinusInnerProductSparseMatrix<Float16> {\n  //! Type of value\n  using ValueType = Float16;\n\n  static float ComputeInnerProductSparseInSegment(\n      uint32_t m_sparse_count, const uint16_t *m_sparse_index,\n      const Float16 *m_sparse_value, uint32_t q_sparse_count,\n      const uint16_t *q_sparse_index, const Float16 *q_sparse_value);\n\n  //! Compute the distance between matrix and query\n  static void Compute(const void *m_sparse_data_in,\n                      const void *q_sparse_data_in, float *out);\n\n  static void transform_sparse_format(uint32_t sparse_count,\n                                      const uint32_t *sparse_index,\n                                      const void *sparse_value,\n                                      std::string &buffer) {\n    uint32_t unit_size = sizeof(ValueType);\n\n    uint32_t seg_count = 0;\n    if (sparse_count == 0) {\n      buffer.reserve(sizeof(uint32_t) + sizeof(uint32_t));\n\n      buffer.append(reinterpret_cast<const char *>(&sparse_count),\n                    sizeof(uint32_t));\n\n      buffer.append(reinterpret_cast<const char *>(&seg_count),\n                    sizeof(uint32_t));\n\n      return;\n    }\n\n    std::vector<SparseSegmentInfo> seg_infos;\n\n    uint32_t cur_seg_id = -1U;\n    uint32_t cur_vec_cnt = 0;\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      uint32_t seg_id = sparse_index[i] >> SEGMENT_ID_BITS;\n      if (cur_seg_id == -1U) {\n        cur_seg_id = seg_id;\n        cur_vec_cnt++;\n      } else {\n        if (seg_id == cur_seg_id) {\n          cur_vec_cnt++;\n        } else if (seg_id > cur_seg_id) {\n          seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);\n\n          cur_seg_id = seg_id;\n          cur_vec_cnt = 1;\n        } else {\n          // std::abort();\n        }\n      }\n    }\n\n    if (cur_vec_cnt > 0) {\n      seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);\n    }\n\n    uint32_t buffer_len = 2 * sizeof(uint32_t) +\n                          seg_infos.size() * 2 * sizeof(uint32_t) +\n                          sparse_count * (sizeof(uint16_t) + sizeof(ValueType));\n\n    buffer.reserve(buffer_len);\n\n    buffer.append(reinterpret_cast<const char *>(&sparse_count),\n                  sizeof(uint32_t));\n\n    seg_count = seg_infos.size();\n    buffer.append(reinterpret_cast<const char *>(&seg_count), sizeof(uint32_t));\n\n    for (size_t i = 0; i < seg_count; ++i) {\n      uint32_t seg_id = seg_infos[i].seg_id_;\n      buffer.append(reinterpret_cast<const char *>(&seg_id), sizeof(uint32_t));\n    }\n\n    for (size_t i = 0; i < seg_count; ++i) {\n      uint32_t vec_cnt = seg_infos[i].vec_cnt_;\n      buffer.append(reinterpret_cast<const char *>(&vec_cnt), sizeof(uint32_t));\n    }\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      uint16_t temp_dim = sparse_index[i] & SEGMENT_ID_MASK;\n      buffer.append(reinterpret_cast<const char *>(&temp_dim),\n                    sizeof(uint16_t));\n    }\n\n    const char *sparse_value_ptr = reinterpret_cast<const char *>(sparse_value);\n    for (size_t i = 0; i < sparse_count; ++i) {\n      buffer.append(sparse_value_ptr, unit_size);\n      sparse_value_ptr += unit_size;\n    }\n  }\n};\n\ntemplate <>\nstruct MinusInnerProductSparseMatrix<float> {\n  //! Type of value\n  using ValueType = float;\n\n  static float ComputeInnerProductSparseInSegment(\n      uint32_t m_sparse_count, const uint16_t *m_sparse_index,\n      const float *m_sparse_value, uint32_t q_sparse_count,\n      const uint16_t *q_sparse_index, const float *q_sparse_value);\n\n  //! Compute the distance between matrix and query\n  static void Compute(const void *m_sparse_data_in,\n                      const void *q_sparse_data_in, float *out);\n\n  static void transform_sparse_format(uint32_t sparse_count,\n                                      const uint32_t *sparse_index,\n                                      const void *sparse_value,\n                                      std::string &buffer) {\n    uint32_t unit_size = sizeof(ValueType);\n\n    uint32_t seg_count = 0;\n    if (sparse_count == 0) {\n      buffer.reserve(sizeof(uint32_t) + sizeof(uint32_t));\n\n      buffer.append(reinterpret_cast<const char *>(&sparse_count),\n                    sizeof(uint32_t));\n\n      buffer.append(reinterpret_cast<const char *>(&seg_count),\n                    sizeof(uint32_t));\n\n      return;\n    }\n\n    std::vector<SparseSegmentInfo> seg_infos;\n\n    uint32_t cur_seg_id = -1U;\n    uint32_t cur_vec_cnt = 0;\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      uint32_t seg_id = sparse_index[i] >> SEGMENT_ID_BITS;\n      if (cur_seg_id == -1U) {\n        cur_seg_id = seg_id;\n        cur_vec_cnt++;\n      } else {\n        if (seg_id == cur_seg_id) {\n          cur_vec_cnt++;\n        } else if (seg_id > cur_seg_id) {\n          seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);\n\n          cur_seg_id = seg_id;\n          cur_vec_cnt = 1;\n        } else {\n          // std::abort();\n        }\n      }\n    }\n\n    if (cur_vec_cnt > 0) {\n      seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);\n    }\n\n    uint32_t buffer_len = 2 * sizeof(uint32_t) +\n                          seg_infos.size() * 2 * sizeof(uint32_t) +\n                          sparse_count * (sizeof(uint16_t) + sizeof(ValueType));\n\n    buffer.reserve(buffer_len);\n\n    buffer.append(reinterpret_cast<const char *>(&sparse_count),\n                  sizeof(uint32_t));\n\n    seg_count = seg_infos.size();\n    buffer.append(reinterpret_cast<const char *>(&seg_count), sizeof(uint32_t));\n\n    for (size_t i = 0; i < seg_count; ++i) {\n      uint32_t seg_id = seg_infos[i].seg_id_;\n      buffer.append(reinterpret_cast<const char *>(&seg_id), sizeof(uint32_t));\n    }\n\n    for (size_t i = 0; i < seg_count; ++i) {\n      uint32_t vec_cnt = seg_infos[i].vec_cnt_;\n      buffer.append(reinterpret_cast<const char *>(&vec_cnt), sizeof(uint32_t));\n    }\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      uint16_t temp_dim = sparse_index[i] & SEGMENT_ID_MASK;\n      buffer.append(reinterpret_cast<const char *>(&temp_dim),\n                    sizeof(uint16_t));\n    }\n\n    const char *sparse_value_ptr = reinterpret_cast<const char *>(sparse_value);\n    for (size_t i = 0; i < sparse_count; ++i) {\n      buffer.append(sparse_value_ptr, unit_size);\n      sparse_value_ptr += unit_size;\n    }\n  }\n};\n\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp16_avx.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__AVX__)\nfloat InnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs, size_t size) {\n  float score{0.0f};\n\n  ACCUM_FP16_1X1_AVX(lhs, rhs, size, &score, 0ull, )\n\n  return score;\n}\n\nfloat MinusInnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs,\n                               size_t size) {\n  float score{0.0f};\n\n  ACCUM_FP16_1X1_AVX(lhs, rhs, size, &score, 0ull, NEGATE_FP32_GENERAL)\n\n  return score;\n}\n#endif\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\n#if defined(__AVX__)\nconst static __m128i SHUFFLE_MASK256[256] = {\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, -127, -127),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 5,\n                 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 7,\n                 6, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 7,\n                 6, 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 7,\n                 6, 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4, 3,\n                 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 9, 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 9, 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,\n                 8, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 9, 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,\n                 8, 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,\n                 8, 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 5, 4, 3,\n                 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 9, 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,\n                 8, 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,\n                 8, 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 3,\n                 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,\n                 8, 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5,\n                 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5,\n                 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 11, 10),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 11, 10, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 11, 10, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 11, 10, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 5, 4,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 11, 10, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 7, 6, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 9, 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 9, 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 9, 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,\n                 10, 9, 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 13, 12),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 13, 12, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 13, 12, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 13, 12, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 5, 4,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 13, 12, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 7, 6, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 13, 12, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 9, 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 9, 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 9, 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 9, 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 13, 12, 11, 10),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 11, 10, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 11, 10, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 11, 10, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 11, 10, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,\n                 12, 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 9, 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 9, 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 9, 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,\n                 9, 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6,\n                 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3,\n                 2),\n    _mm_set_epi8(-127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, 15, 14),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 5, 4,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 7, 6, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 9, 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 9, 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 9, 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 9, 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 11, 10),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 11, 10, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 11, 10, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 11, 10, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 11, 10, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 9, 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 9, 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 9, 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,\n                 9, 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6,\n                 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3,\n                 2),\n    _mm_set_epi8(-127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 13, 12),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 13, 12, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 13, 12, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 13, 12, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 5, 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 5, 4, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 13, 12, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 7, 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 7, 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 13, 12, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 9, 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 9, 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 3, 2,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 9, 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 9, 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6,\n                 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6,\n                 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6,\n                 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3,\n                 2),\n    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,\n                 14, 13, 12, 11, 10),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 11, 10, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 11, 10, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 3,\n                 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 11, 10, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5,\n                 4, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5,\n                 4, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 11, 10, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7,\n                 6, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7,\n                 6, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7,\n                 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 3,\n                 2),\n    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,\n                 8, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,\n                 8, 3, 2),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,\n                 8, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 3,\n                 2),\n    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,\n                 8, 7, 6),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 3,\n                 2),\n    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,\n                 4),\n    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1, 0),\n    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2),\n    _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n};\n\nconstexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;\n\nfloat InnerProductSparseInSegmentFp16AVX(uint32_t m_sparse_count,\n                                         const uint16_t *m_sparse_index,\n                                         const Float16 *m_sparse_value,\n                                         uint32_t q_sparse_count,\n                                         const uint16_t *q_sparse_index,\n                                         const Float16 *q_sparse_value) {\n  float sum = 0.0f;\n\n  // handle if the first dim is zero\n  bool m_zero = false;\n  Float16 m_zero_value{0.0f};\n  if (m_sparse_count > 0 && m_sparse_index[0] == 0) {\n    m_sparse_count--;\n    m_sparse_index++;\n    m_zero_value = *m_sparse_value++;\n    m_zero = true;\n  }\n\n  bool q_zero = false;\n  Float16 q_zero_value{0.0f};\n  if (q_sparse_count > 0 && q_sparse_index[0] == 0) {\n    q_sparse_count--;\n    q_sparse_index++;\n    q_zero_value = *q_sparse_value++;\n    q_zero = true;\n  }\n\n  if (m_zero && q_zero) {\n    sum = m_zero_value * q_zero_value;\n  }\n\n  size_t i1 = 0, i2 = 0;\n  size_t end1 = m_sparse_count / 8 * 8;\n  size_t end2 = q_sparse_count / 8 * 8;\n\n  uint16_t fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];\n  uint16_t fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];\n\n  Float16 *val_start_1 = reinterpret_cast<Float16 *>(fixed_buffer_1);\n  Float16 *val_start_2 = reinterpret_cast<Float16 *>(fixed_buffer_2);\n\n  Float16 *val_1 = val_start_1;\n  Float16 *val_2 = val_start_2;\n\n  if (i1 < end1 && i2 < end2) {\n    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {\n      i1 += 8;\n      if (i1 >= end1) goto do_scalar;\n    }\n\n    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {\n      i2 += 8;\n      if (i2 >= end2) goto do_scalar;\n    }\n\n    __m128i mm_index_m =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n    __m128i mm_index_q =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n\n    while (true) {\n#ifdef DEBUG_PRINT\n      std::cout << \"index 1: \" << std::endl;\n      print_data16(&mm_index_m);\n\n      std::cout << \"index 2: \" << std::endl;\n      print_data16(&mm_index_q);\n#endif\n\n      __m128i mm_cmp_res =\n          _mm_cmpistrm(mm_index_q, mm_index_m,\n                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n\n#ifdef DEBUG_PRINT\n      std::cout << \"cmp res: \" << std::endl;\n      print_data16(&mm_cmp_res);\n#endif\n\n      int r = _mm_extract_epi32(mm_cmp_res, 0);\n\n      if (r) {\n        int r1 = r;\n\n        __m128i v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));\n        __m128i vs = _mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]);\n\n        _mm_storeu_si128(reinterpret_cast<__m128i *>(val_1), vs);\n        val_1 += _mm_popcnt_u32(r1);\n\n        mm_cmp_res = _mm_cmpistrm(\n            mm_index_m, mm_index_q,\n            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n        r = _mm_extract_epi32(mm_cmp_res, 0);\n\n        r1 = r;\n\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));\n        vs = _mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]);\n\n        _mm_storeu_si128(reinterpret_cast<__m128i *>(val_2), vs);\n        val_2 += _mm_popcnt_u32(r1);\n      }\n\n      const uint16_t id1_max = m_sparse_index[i1 + 7];\n\n      if (id1_max <= q_sparse_index[i2 + 7]) {\n        i1 += 8;\n        if (i1 >= end1) goto do_scalar;\n        mm_index_m = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n      }\n\n      if (id1_max >= q_sparse_index[i2 + 7]) {\n        i2 += 8;\n        if (i2 >= end2) goto do_scalar;\n        mm_index_q = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n      }\n    }\n  }\n\ndo_scalar:\n  while (i1 < m_sparse_count && i2 < q_sparse_count) {\n    if (m_sparse_index[i1] == q_sparse_index[i2]) {\n      *val_1++ = m_sparse_value[i1];\n      *val_2++ = q_sparse_value[i2];\n\n      ++i1;\n      ++i2;\n    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {\n      ++i1;\n    } else {\n      ++i2;\n    }\n  }\n\n  size_t res_num = val_1 - val_start_1;\n\n  size_t res_num8 = res_num / 8 * 8;\n\n  if (res_num8) {\n    __m256 sum256 = _mm256_setzero_ps();\n\n    for (size_t k = 0; k < res_num8; k += 8) {\n      __m256 ymm_1 =\n          _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(val_start_1 + k)));\n      __m256 ymm_2 =\n          _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(val_start_2 + k)));\n      ACCUM_FP32_STEP_AVX(ymm_1, ymm_2, sum256);\n    }\n\n    sum += HorizontalAdd_FP32_V256(sum256);\n  }\n\n  for (size_t k = res_num8; k < res_num; ++k)\n    sum += val_start_1[k] * val_start_2[k];\n\n  return sum;\n}\n\n#endif  // __AVX__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp16_avx512.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX512F__)\nfloat InnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,\n                             size_t size) {\n  float score{0.0f};\n\n  ACCUM_FP16_1X1_AVX512(lhs, rhs, size, &score, 0ull, )\n\n  return score;\n}\n\nfloat MinusInnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,\n                                  size_t size) {\n  float score{0.0f};\n\n  ACCUM_FP16_1X1_AVX512(lhs, rhs, size, &score, 0ull, NEGATE_FP32_GENERAL)\n\n  return score;\n}\n#endif  //__AVX512F__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp16_avx512fp16.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX512FP16__)\n//! Inner Product\nfloat InnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,\n                                 size_t size) {\n  const Float16 *last = lhs + size;\n  const Float16 *last_aligned = lhs + ((size >> 6) << 6);\n\n  __m512h zmm_sum_0 = _mm512_setzero_ph();\n  __m512h zmm_sum_1 = _mm512_setzero_ph();\n\n  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      FMA_FP16_AVX512FP16(_mm512_load_ph(lhs + 0), _mm512_load_ph(rhs + 0),\n                          zmm_sum_0)\n\n      FMA_FP16_AVX512FP16(_mm512_load_ph(lhs + 32), _mm512_load_ph(rhs + 32),\n                          zmm_sum_1)\n    }\n\n    if (last >= last_aligned + 32) {\n      FMA_FP16_AVX512FP16(_mm512_load_ph(lhs), _mm512_load_ph(rhs), zmm_sum_0)\n      lhs += 32;\n      rhs += 32;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      FMA_FP16_AVX512FP16(_mm512_loadu_ph(lhs + 0), _mm512_loadu_ph(rhs + 0),\n                          zmm_sum_0)\n\n      FMA_FP16_AVX512FP16(_mm512_loadu_ph(lhs + 32), _mm512_loadu_ph(rhs + 32),\n                          zmm_sum_1)\n    }\n\n    if (last >= last_aligned + 32) {\n      FMA_FP16_AVX512FP16(_mm512_loadu_ph(lhs), _mm512_loadu_ph(rhs), zmm_sum_0)\n      lhs += 32;\n      rhs += 32;\n    }\n  }\n\n  zmm_sum_0 = _mm512_add_ph(zmm_sum_0, zmm_sum_1);\n\n  if (lhs != last) {\n    __mmask32 mask = (__mmask32)((1 << (last - lhs)) - 1);\n    __m512i zmm_undefined = _mm512_undefined_epi32();\n    zmm_sum_0 = _mm512_mask3_fmadd_ph(\n        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, lhs)),\n        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, rhs)),\n        zmm_sum_0, mask);\n  }\n\n  return HorizontalAdd_FP16_V512(zmm_sum_0);\n}\n\nfloat MinusInnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,\n                                      size_t size) {\n  return -1 * InnerProductFp16AVX512FP16(lhs, rhs, size);\n}\n#endif\n\n// sparse\n#if defined(__AVX512FP16__)\nconstexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;\n\nfloat InnerProductSparseInSegmentFp16AVX512FP16(uint32_t m_sparse_count,\n                                                const uint16_t *m_sparse_index,\n                                                const Float16 *m_sparse_value,\n                                                uint32_t q_sparse_count,\n                                                const uint16_t *q_sparse_index,\n                                                const Float16 *q_sparse_value) {\n  const static __m128i SHUFFLE_MASK256[256] = {\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, -127, -127),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   5, 4, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   7, 6, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   7, 6, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   7, 6, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   9, 8, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   9, 8, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   9, 8, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 5, 4,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   9, 8, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   9, 8, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   9, 8, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 11, 10),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 11, 10, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 11, 10, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 11, 10, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 5, 4,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 11, 10, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 7, 6, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 11, 10, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   11, 10, 9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                   7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                   7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                   7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,\n                   3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 13, 12),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 13, 12, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 13, 12, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 13, 12, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 5, 4,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 13, 12, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 7, 6, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 13, 12, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                   7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                   7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,\n                   7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,\n                   3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 13, 12, 11, 10),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 11, 10, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 11, 10, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 11, 10, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 5, 4, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 11, 10, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5,\n                   4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5,\n                   4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   13, 12, 11, 10, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5,\n                   4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5,\n                   4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,\n                   10, 9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7,\n                   6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7,\n                   6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7,\n                   6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3,\n                   2),\n      _mm_set_epi8(-127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, -127, -127, 15, 14),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 15, 14, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 15, 14, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 15, 14, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 5, 4,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 15, 14, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 7, 6, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 15, 14, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                   7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                   7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,\n                   7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,\n                   3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 15, 14, 11, 10),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 11, 10, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 11, 10, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 11, 10, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 5, 4, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 11, 10, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5,\n                   4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5,\n                   4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 11, 10, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5,\n                   4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5,\n                   4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,\n                   10, 9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7,\n                   6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7,\n                   6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7,\n                   6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3,\n                   2),\n      _mm_set_epi8(-127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   -127, -127, 15, 14, 13, 12),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 13, 12, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 13, 12, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 13, 12, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 5, 4, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 13, 12, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5,\n                   4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5,\n                   4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 13, 12, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 3,\n                   2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5,\n                   4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5,\n                   4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7,\n                   6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7,\n                   6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 3, 2, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7,\n                   6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 1,\n                   0),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3,\n                   2),\n      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                   15, 14, 13, 12, 11, 10),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 11, 10, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 11, 10, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 11, 10, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   5, 4, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5, 4, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 11, 10, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   7, 6, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   7, 6, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   7, 6, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4,\n                   3, 2),\n      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,\n                   12, 11, 10, 9, 8),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   9, 8, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   9, 8, 3, 2),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   9, 8, 5, 4),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4,\n                   3, 2),\n      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,\n                   9, 8, 7, 6),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6,\n                   1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6,\n                   3, 2),\n      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1, 0),\n      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6,\n                   5, 4),\n      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1, 0),\n      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2),\n      _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n  };\n\n  float sum = 0.0f;\n\n  // handle if the first dim is zero\n  bool m_zero = false;\n  Float16 m_zero_value{0.0f};\n  if (m_sparse_count > 0 && m_sparse_index[0] == 0) {\n    m_sparse_count--;\n    m_sparse_index++;\n    m_zero_value = *m_sparse_value++;\n    m_zero = true;\n  }\n\n  bool q_zero = false;\n  Float16 q_zero_value{0.0f};\n  if (q_sparse_count > 0 && q_sparse_index[0] == 0) {\n    q_sparse_count--;\n    q_sparse_index++;\n    q_zero_value = *q_sparse_value++;\n    q_zero = true;\n  }\n\n  if (m_zero && q_zero) {\n    sum = m_zero_value * q_zero_value;\n  }\n\n  size_t i1 = 0, i2 = 0;\n  size_t end1 = m_sparse_count / 8 * 8;\n  size_t end2 = q_sparse_count / 8 * 8;\n\n  uint16_t fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];\n  uint16_t fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];\n\n  Float16 *val_start_1 = reinterpret_cast<Float16 *>(fixed_buffer_1);\n  Float16 *val_start_2 = reinterpret_cast<Float16 *>(fixed_buffer_2);\n\n  Float16 *val_1 = val_start_1;\n  Float16 *val_2 = val_start_2;\n\n  if (i1 < end1 && i2 < end2) {\n    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {\n      i1 += 8;\n      if (i1 >= end1) goto do_scalar;\n    }\n\n    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {\n      i2 += 8;\n      if (i2 >= end2) goto do_scalar;\n    }\n\n    __m128i mm_index_m =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n    __m128i mm_index_q =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n\n    while (true) {\n#ifdef DEBUG_PRINT\n      std::cout << \"index 1: \" << std::endl;\n      print_data16(&mm_index_m);\n\n      std::cout << \"index 2: \" << std::endl;\n      print_data16(&mm_index_q);\n#endif\n\n      __m128i mm_cmp_res =\n          _mm_cmpistrm(mm_index_q, mm_index_m,\n                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n\n#ifdef DEBUG_PRINT\n      std::cout << \"cmp res: \" << std::endl;\n      print_data16(&mm_cmp_res);\n#endif\n\n      int r = _mm_extract_epi32(mm_cmp_res, 0);\n\n      if (r) {\n        int r1 = r;\n\n        __m128i v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));\n        __m128h vs = _mm_castsi128_ph(_mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]));\n\n        _mm_storeu_ph(val_1, vs);\n        val_1 += _mm_popcnt_u32(r1);\n\n        mm_cmp_res = _mm_cmpistrm(\n            mm_index_m, mm_index_q,\n            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n        r = _mm_extract_epi32(mm_cmp_res, 0);\n\n        r1 = r;\n\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));\n        vs = _mm_castsi128_ph(_mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]));\n\n        _mm_storeu_ph(val_2, vs);\n        val_2 += _mm_popcnt_u32(r1);\n      }\n\n      const uint16_t id1_max = m_sparse_index[i1 + 7];\n\n      if (id1_max <= q_sparse_index[i2 + 7]) {\n        i1 += 8;\n        if (i1 >= end1) goto do_scalar;\n        mm_index_m = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n      }\n\n      if (id1_max >= q_sparse_index[i2 + 7]) {\n        i2 += 8;\n        if (i2 >= end2) goto do_scalar;\n        mm_index_q = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n      }\n    }\n  }\n\ndo_scalar:\n  while (i1 < m_sparse_count && i2 < q_sparse_count) {\n    if (m_sparse_index[i1] == q_sparse_index[i2]) {\n      *val_1++ = m_sparse_value[i1];\n      *val_2++ = q_sparse_value[i2];\n\n      ++i1;\n      ++i2;\n    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {\n      ++i1;\n    } else {\n      ++i2;\n    }\n  }\n\n  size_t res_num = val_1 - val_start_1;\n\n  size_t res_num8 = res_num / 8 * 8;\n\n  if (res_num8) {\n    __m128h sum128 = _mm_set1_ph(0);\n\n    for (size_t k = 0; k < res_num8; k += 8) {\n      sum128 = _mm_add_ph(sum128, _mm_mul_ph(_mm_loadu_ph(val_start_1 + k),\n                                             _mm_loadu_ph(val_start_2 + k)));\n    }\n\n    Float16 __attribute__((aligned(16))) tmp_res[8];\n    _mm_store_ph(tmp_res, sum128);\n    sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3] + tmp_res[4] +\n            tmp_res[5] + tmp_res[6] + tmp_res[7]);\n  }\n\n  for (size_t k = res_num8; k < res_num; ++k)\n    sum += val_start_1[k] * val_start_2[k];\n\n  return sum;\n}\n\n#endif  // __AVX512FP16__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp16_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__ARM_NEON)\nfloat InnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs, size_t size);\nfloat MinusInnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,\n                                size_t size);\n#endif\n\n#if defined(__AVX__)\nfloat InnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs, size_t size);\nfloat MinusInnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs,\n                               size_t size);\n#endif\n\n#if defined(__AVX512F__)\nfloat InnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,\n                             size_t size);\nfloat MinusInnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,\n                                  size_t size);\n#endif\n\n#if defined(__AVX512FP16__)\nfloat InnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,\n                                 size_t size);\nfloat MinusInnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,\n                                      size_t size);\n#endif\n\nfloat InnerProductFp16Scalar(const Float16 *lhs, const Float16 *rhs,\n                             size_t size);\nfloat MinusInnerProductFp16Scalar(const Float16 *lhs, const Float16 *rhs,\n                                  size_t size);\n\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\nvoid InnerProductMatrix<Float16, 1, 1>::Compute(const ValueType *m,\n                                                const ValueType *q, size_t dim,\n                                                float *out) {\n#if defined(__ARM_NEON)\n  *out = InnerProductFp16NEON(m, q, dim);\n#else\n#if defined(__AVX512FP16__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {\n    *out = InnerProductFp16AVX512FP16(m, q, dim);\n    return;\n  }\n#endif  //__AVX512FP16__\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = InnerProductFp16AVX512(m, q, dim);\n    return;\n  }\n#endif  //__AVX512F__\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = InnerProductFp16AVX(m, q, dim);\n    return;\n  }\n#endif  //__AVX__\n  *out = InnerProductFp16Scalar(m, q, dim);\n\n#endif  //__ARM_NEON\n}\n\n//! Compute the distance between matrix and query (FP16, M=1, N=1)\nvoid MinusInnerProductMatrix<Float16, 1, 1>::Compute(const ValueType *m,\n                                                     const ValueType *q,\n                                                     size_t dim, float *out) {\n#if defined(__ARM_NEON)\n  *out = MinusInnerProductFp16NEON(m, q, dim);\n#else\n#if defined(__AVX512FP16__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {\n    *out = MinusInnerProductFp16AVX512FP16(m, q, dim);\n    return;\n  }\n#endif  //__AVX512FP16__\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = MinusInnerProductFp16AVX512(m, q, dim);\n    return;\n  }\n#endif  //__AVX512F__\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = MinusInnerProductFp16AVX(m, q, dim);\n    return;\n  }\n#endif  //__AVX__\n\n  *out = MinusInnerProductFp16Scalar(m, q, dim);\n\n#endif  //__ARM_NEON\n}\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\n#if defined(__AVX512FP16__)\nfloat InnerProductSparseInSegmentFp16AVX512FP16(uint32_t m_sparse_count,\n                                                const uint16_t *m_sparse_index,\n                                                const Float16 *m_sparse_value,\n                                                uint32_t q_sparse_count,\n                                                const uint16_t *q_sparse_index,\n                                                const Float16 *q_sparse_value);\n#endif  //__AVX512FP16__\n\n#if defined(__AVX__)\nfloat InnerProductSparseInSegmentFp16AVX(uint32_t m_sparse_count,\n                                         const uint16_t *m_sparse_index,\n                                         const Float16 *m_sparse_value,\n                                         uint32_t q_sparse_count,\n                                         const uint16_t *q_sparse_index,\n                                         const Float16 *q_sparse_value);\n#endif  //__AVX__\n\nfloat InnerProductSparseInSegmentFp16Scalar(uint32_t m_sparse_count,\n                                            const uint16_t *m_sparse_index,\n                                            const Float16 *m_sparse_value,\n                                            uint32_t q_sparse_count,\n                                            const uint16_t *q_sparse_index,\n                                            const Float16 *q_sparse_value);\n\nfloat MinusInnerProductSparseFp16Scalar(const void *m_sparse_data_in,\n                                        const void *q_sparse_data_in);\n\n//! Compute the distance between matrix and query\nvoid MinusInnerProductSparseMatrix<Float16>::Compute(\n    const void *m_sparse_data_in, const void *q_sparse_data_in, float *out) {\n  *out = MinusInnerProductSparseFp16Scalar(m_sparse_data_in, q_sparse_data_in);\n}\n\nfloat ComputeInnerProductSparseInSegmentFp16(uint32_t m_sparse_count,\n                                             const uint16_t *m_sparse_index,\n                                             const Float16 *m_sparse_value,\n                                             uint32_t q_sparse_count,\n                                             const uint16_t *q_sparse_index,\n                                             const Float16 *q_sparse_value) {\n#if defined(__AVX512FP16__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {\n    return InnerProductSparseInSegmentFp16AVX512FP16(\n        m_sparse_count, m_sparse_index, m_sparse_value, q_sparse_count,\n        q_sparse_index, q_sparse_value);\n  }\n#endif\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    return InnerProductSparseInSegmentFp16AVX(m_sparse_count, m_sparse_index,\n                                              m_sparse_value, q_sparse_count,\n                                              q_sparse_index, q_sparse_value);\n  }\n#endif\n  return InnerProductSparseInSegmentFp16Scalar(m_sparse_count, m_sparse_index,\n                                               m_sparse_value, q_sparse_count,\n                                               q_sparse_index, q_sparse_value);\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp16_neon.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\nfloat InnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,\n                           size_t size) {\n  float score;\n\n  ACCUM_FP16_1X1_NEON(lhs, rhs, size, &score, 0ull, )\n\n  return score;\n}\n\nfloat MinusInnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,\n                                size_t size) {\n  float score;\n\n  ACCUM_FP16_1X1_NEON(lhs, rhs, size, &score, 0ull, NEGATE_FP32_GENERAL)\n\n  return score;\n}\n#endif\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp32_avx.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__AVX__)\nfloat InnerProductFp32SSEInternal(const float *lhs, const float *rhs,\n                                  size_t size);\n\n//! Inner Product\nfloat InnerProductFp32AVXInternal(const float *lhs, const float *rhs,\n                                  size_t size) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 4) << 4);\n\n  __m256 ymm_sum_0 = _mm256_setzero_ps();\n  __m256 ymm_sum_1 = _mm256_setzero_ps();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256 ymm_lhs_0 = _mm256_load_ps(lhs + 0);\n      __m256 ymm_lhs_1 = _mm256_load_ps(lhs + 8);\n      __m256 ymm_rhs_0 = _mm256_load_ps(rhs + 0);\n      __m256 ymm_rhs_1 = _mm256_load_ps(rhs + 8);\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 8) {\n      ymm_sum_0 =\n          _mm256_fmadd_ps(_mm256_load_ps(lhs), _mm256_load_ps(rhs), ymm_sum_0);\n      lhs += 8;\n      rhs += 8;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256 ymm_lhs_0 = _mm256_loadu_ps(lhs + 0);\n      __m256 ymm_lhs_1 = _mm256_loadu_ps(lhs + 8);\n      __m256 ymm_rhs_0 = _mm256_loadu_ps(rhs + 0);\n      __m256 ymm_rhs_1 = _mm256_loadu_ps(rhs + 8);\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 8) {\n      ymm_sum_0 = _mm256_fmadd_ps(_mm256_loadu_ps(lhs), _mm256_loadu_ps(rhs),\n                                  ymm_sum_0);\n      lhs += 8;\n      rhs += 8;\n    }\n  }\n  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));\n\n  switch (last - lhs) {\n    case 7:\n      FMA_FP32_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      FMA_FP32_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      FMA_FP32_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      FMA_FP32_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      FMA_FP32_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_FP32_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_FP32_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat InnerProductFp32AVX(const float *lhs, const float *rhs, size_t size) {\n  if (size > 7) {\n    return InnerProductFp32AVXInternal(lhs, rhs, size);\n  }\n\n  return InnerProductFp32SSEInternal(lhs, rhs, size);\n}\n\nfloat MinusInnerProductFp32AVX(const float *lhs, const float *rhs,\n                               size_t size) {\n  return -1 * InnerProductFp32AVX(lhs, rhs, size);\n}\n\n#endif  // __AVX__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp32_avx512.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__AVX512F__)\nfloat InnerProductFp32AVXInternal(const float *lhs, const float *rhs,\n                                  size_t size);\n\nfloat InnerProductFp32SSEInternal(const float *lhs, const float *rhs,\n                                  size_t size);\n\n//! Inner Product\nfloat InnerProductFp32AVX512Internal(const float *lhs, const float *rhs,\n                                     size_t size) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 5) << 5);\n\n  __m512 zmm_sum_0 = _mm512_setzero_ps();\n  __m512 zmm_sum_1 = _mm512_setzero_ps();\n\n  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      FMA_FP32_AVX512(_mm512_load_ps(lhs + 0), _mm512_load_ps(rhs + 0),\n                      zmm_sum_0)\n\n      FMA_FP32_AVX512(_mm512_load_ps(lhs + 16), _mm512_load_ps(rhs + 16),\n                      zmm_sum_1)\n    }\n\n    if (last >= last_aligned + 16) {\n      FMA_FP32_AVX512(_mm512_load_ps(lhs), _mm512_load_ps(rhs), zmm_sum_0)\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      FMA_FP32_AVX512(_mm512_loadu_ps(lhs + 0), _mm512_loadu_ps(rhs + 0),\n                      zmm_sum_0)\n\n      FMA_FP32_AVX512(_mm512_loadu_ps(lhs + 16), _mm512_loadu_ps(rhs + 16),\n                      zmm_sum_1)\n    }\n\n    if (last >= last_aligned + 16) {\n      FMA_FP32_AVX512(_mm512_loadu_ps(lhs), _mm512_loadu_ps(rhs), zmm_sum_0)\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n\n  zmm_sum_0 = _mm512_add_ps(zmm_sum_0, zmm_sum_1);\n  if (lhs != last) {\n    __mmask16 mask = (__mmask16)((1 << (last - lhs)) - 1);\n    __m512 zmm_undefined = _mm512_undefined_ps();\n    zmm_sum_0 = _mm512_mask3_fmadd_ps(\n        _mm512_mask_loadu_ps(zmm_undefined, mask, lhs),\n        _mm512_mask_loadu_ps(zmm_undefined, mask, rhs), zmm_sum_0, mask);\n  }\n  return HorizontalAdd_FP32_V512(zmm_sum_0);\n}\n\nfloat InnerProductFp32AVX512(const float *lhs, const float *rhs, size_t size) {\n  if (size > 15) {\n    return InnerProductFp32AVX512Internal(lhs, rhs, size);\n  }\n\n  if (size > 7) {\n    return InnerProductFp32AVXInternal(lhs, rhs, size);\n  }\n\n  return InnerProductFp32SSEInternal(lhs, rhs, size);\n}\n\nfloat MinusInnerProductFp32AVX512(const float *lhs, const float *rhs,\n                                  size_t size) {\n  return -1 * InnerProductFp32AVX512(lhs, rhs, size);\n}\n\n#endif\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp32_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__ARM_NEON)\nfloat InnerProductFp32NEON(const float *lhs, const float *rhs, size_t size);\nfloat MinusInnerProductFp32NEON(const float *lhs, const float *rhs,\n                                size_t size);\n#endif\n\n#if defined(__AVX512F__)\nfloat InnerProductFp32AVX512(const float *lhs, const float *rhs, size_t size);\nfloat MinusInnerProductFp32AVX512(const float *lhs, const float *rhs,\n                                  size_t size);\n#endif\n\n#if defined(__AVX__)\nfloat InnerProductFp32AVX(const float *lhs, const float *rhs, size_t size);\nfloat MinusInnerProductFp32AVX(const float *lhs, const float *rhs, size_t size);\n#endif\n\n#if defined(__SSE__)\nfloat InnerProductFp32SSE(const float *lhs, const float *rhs, size_t size);\nfloat MinusInnerProductFp32SSE(const float *lhs, const float *rhs, size_t size);\n#endif\n\nfloat InnerProductFp32Scalar(const float *lhs, const float *rhs, size_t size);\nfloat MinusInnerProductFp32Scalar(const float *lhs, const float *rhs,\n                                  size_t size);\n\n//! Compute the distance between matrix and query (FP32, M=1, N=1)\nvoid InnerProductMatrix<float, 1, 1>::Compute(const float *m, const float *q,\n                                              size_t dim, float *out) {\n#if defined(__ARM_NEON)\n  *out = InnerProductFp32NEON(m, q, dim);\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = InnerProductFp32AVX512(m, q, dim);\n    return;\n  }\n#endif  // __AVX512F__\n\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = InnerProductFp32AVX(m, q, dim);\n    return;\n  }\n#endif  // __AVX__\n\n#if defined(__SSE__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {\n    *out = InnerProductFp32SSE(m, q, dim);\n    return;\n  }\n#endif  // __SSE__\n  *out = InnerProductFp32Scalar(m, q, dim);\n#endif  // __ARM_NEON\n}\n\n//! Compute the distance between matrix and query (FP32, M=1, N=1)\nvoid MinusInnerProductMatrix<float, 1, 1>::Compute(const float *m,\n                                                   const float *q, size_t dim,\n                                                   float *out) {\n#if defined(__ARM_NEON)\n  *out = MinusInnerProductFp32NEON(m, q, dim);\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = MinusInnerProductFp32AVX512(m, q, dim);\n    return;\n  }\n#endif  // __AVX512F__\n\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = MinusInnerProductFp32AVX(m, q, dim);\n    return;\n  }\n#endif  // __AVX__\n\n#if defined(__SSE__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {\n    *out = MinusInnerProductFp32SSE(m, q, dim);\n    return;\n  }\n#endif  // __SSE__\n  *out = MinusInnerProductFp32Scalar(m, q, dim);\n#endif  // __ARM_NEON\n}\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\n#if defined(__SSE4_1__)\nfloat InnerProductSparseInSegmentFp32SSE(uint32_t m_sparse_count,\n                                         const uint16_t *m_sparse_index,\n                                         const float *m_sparse_value,\n                                         uint32_t q_sparse_count,\n                                         const uint16_t *q_sparse_index,\n                                         const float *q_sparse_value);\n#endif\nfloat InnerProductSparseInSegmentFp32Scalar(uint32_t m_sparse_count,\n                                            const uint16_t *m_sparse_index,\n                                            const float *m_sparse_value,\n                                            uint32_t q_sparse_count,\n                                            const uint16_t *q_sparse_index,\n                                            const float *q_sparse_value);\n\nfloat MinusInnerProductSparseFp32Scalar(const void *m_sparse_data_in,\n                                        const void *q_sparse_data_in);\n\nvoid MinusInnerProductSparseMatrix<float>::Compute(const void *m_sparse_data_in,\n                                                   const void *q_sparse_data_in,\n                                                   float *out) {\n  *out = MinusInnerProductSparseFp32Scalar(m_sparse_data_in, q_sparse_data_in);\n}\n\nfloat ComputeInnerProductSparseInSegmentFp32(uint32_t m_sparse_count,\n                                             const uint16_t *m_sparse_index,\n                                             const float *m_sparse_value,\n                                             uint32_t q_sparse_count,\n                                             const uint16_t *q_sparse_index,\n                                             const float *q_sparse_value) {\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    return InnerProductSparseInSegmentFp32SSE(m_sparse_count, m_sparse_index,\n                                              m_sparse_value, q_sparse_count,\n                                              q_sparse_index, q_sparse_value);\n  }\n#endif\n  return InnerProductSparseInSegmentFp32Scalar(m_sparse_count, m_sparse_index,\n                                               m_sparse_value, q_sparse_count,\n                                               q_sparse_index, q_sparse_value);\n}\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp32_neon.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__ARM_NEON)\nfloat InnerProductFp32NEON(const float *lhs, const float *rhs, size_t size) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 3) << 3);\n\n  float32x4_t v_sum_0 = vdupq_n_f32(0);\n  float32x4_t v_sum_1 = vdupq_n_f32(0);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    v_sum_0 = vfmaq_f32(v_sum_0, vld1q_f32(lhs + 0), vld1q_f32(rhs + 0));\n    v_sum_1 = vfmaq_f32(v_sum_1, vld1q_f32(lhs + 4), vld1q_f32(rhs + 4));\n  }\n  if (last >= last_aligned + 4) {\n    v_sum_0 = vfmaq_f32(v_sum_0, vld1q_f32(lhs), vld1q_f32(rhs));\n    lhs += 4;\n    rhs += 4;\n  }\n\n  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));\n  switch (last - lhs) {\n    case 3:\n      FMA_FP32_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_FP32_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_FP32_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat MinusInnerProductFp32NEON(const float *lhs, const float *rhs,\n                                size_t size) {\n  return -1 * InnerProductFp32NEON(lhs, rhs, size);\n}\n\n#endif  // __ARM_NEON\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_fp32_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__SSE__)\nfloat InnerProductFp32SSEInternal(const float *lhs, const float *rhs,\n                                  size_t size) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 3) << 3);\n\n  __m128 xmm_sum_0 = _mm_setzero_ps();\n  __m128 xmm_sum_1 = _mm_setzero_ps();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m128 xmm_lhs_0 = _mm_load_ps(lhs + 0);\n      __m128 xmm_lhs_1 = _mm_load_ps(lhs + 4);\n      __m128 xmm_rhs_0 = _mm_load_ps(rhs + 0);\n      __m128 xmm_rhs_1 = _mm_load_ps(rhs + 4);\n      xmm_sum_0 = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum_0);\n      xmm_sum_1 = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 4) {\n      xmm_sum_0 = _mm_fmadd_ps(_mm_load_ps(lhs), _mm_load_ps(rhs), xmm_sum_0);\n      lhs += 4;\n      rhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m128 xmm_lhs_0 = _mm_loadu_ps(lhs + 0);\n      __m128 xmm_lhs_1 = _mm_loadu_ps(lhs + 4);\n      __m128 xmm_rhs_0 = _mm_loadu_ps(rhs + 0);\n      __m128 xmm_rhs_1 = _mm_loadu_ps(rhs + 4);\n      xmm_sum_0 = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum_0);\n      xmm_sum_1 = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 4) {\n      xmm_sum_0 = _mm_fmadd_ps(_mm_loadu_ps(lhs), _mm_loadu_ps(rhs), xmm_sum_0);\n      lhs += 4;\n      rhs += 4;\n    }\n  }\n  float result = HorizontalAdd_FP32_V128(_mm_add_ps(xmm_sum_0, xmm_sum_1));\n\n  switch (last - lhs) {\n    case 3:\n      FMA_FP32_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_FP32_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_FP32_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat InnerProductFp32SSE(const float *lhs, const float *rhs, size_t size) {\n  return InnerProductFp32SSEInternal(lhs, rhs, size);\n}\n\nfloat MinusInnerProductFp32SSE(const float *lhs, const float *rhs,\n                               size_t size) {\n  return -1 * InnerProductFp32SSE(lhs, rhs, size);\n}\n\n#endif  // __SSE__\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\n#if defined(__SSE4_1__)\nconst static __m128i SHUFFLE_MASK16[16] = {\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, -127, -127),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4, 3,\n                 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 13, 12),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,\n                 4),\n    _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n};\n\nconstexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;\n\nfloat InnerProductSparseInSegmentFp32SSE(uint32_t m_sparse_count,\n                                         const uint16_t *m_sparse_index,\n                                         const float *m_sparse_value,\n                                         uint32_t q_sparse_count,\n                                         const uint16_t *q_sparse_index,\n                                         const float *q_sparse_value) {\n  float sum = 0.0f;\n\n  // handle if the first dim is zero\n  bool m_zero = false;\n  float m_zero_value = 0.0f;\n  if (m_sparse_count > 0 && m_sparse_index[0] == 0) {\n    m_sparse_count--;\n    m_sparse_index++;\n    m_zero_value = *m_sparse_value++;\n    m_zero = true;\n  }\n\n  bool q_zero = false;\n  float q_zero_value = 0.0f;\n  if (q_sparse_count > 0 && q_sparse_index[0] == 0) {\n    q_sparse_count--;\n    q_sparse_index++;\n    q_zero_value = *q_sparse_value++;\n    q_zero = true;\n  }\n\n  if (m_zero && q_zero) {\n    sum = m_zero_value * q_zero_value;\n  }\n\n  size_t i1 = 0, i2 = 0;\n  size_t end1 = m_sparse_count / 8 * 8;\n  size_t end2 = q_sparse_count / 8 * 8;\n\n  // std::vector<float> mem1;\n  // std::vector<float> mem2;\n\n  float fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];\n  float fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];\n\n  float *val_start_1 = fixed_buffer_1;\n  float *val_start_2 = fixed_buffer_2;\n\n  // uint32_t max_count = std::max(m_sparse_count, q_sparse_count);\n\n  // if (MAX_SPARSE_BUFFER_LENGTH < max_count) {\n  //   mem1.reserve(max_count);\n  //   mem2.reserve(max_count);\n\n  //   val_start_1 = mem1.data();\n  //   val_start_2 = mem2.data();\n  // }\n\n  float *val_1 = val_start_1;\n  float *val_2 = val_start_2;\n\n  if (i1 < end1 && i2 < end2) {\n    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {\n      i1 += 8;\n      if (i1 >= end1) goto do_scalar;\n    }\n\n    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {\n      i2 += 8;\n      if (i2 >= end2) goto do_scalar;\n    }\n\n    __m128i mm_index_m =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n    __m128i mm_index_q =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n\n    while (true) {\n#ifdef DEBUG_PRINT\n      std::cout << \"index 1: \" << std::endl;\n      print_data16(&mm_index_m);\n\n      std::cout << \"index 2: \" << std::endl;\n      print_data16(&mm_index_q);\n#endif\n\n      __m128i mm_cmp_res =\n          _mm_cmpistrm(mm_index_q, mm_index_m,\n                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n\n#ifdef DEBUG_PRINT\n      std::cout << \"cmp res: \" << std::endl;\n      print_data16(&mm_cmp_res);\n#endif\n\n      int r = _mm_extract_epi32(mm_cmp_res, 0);\n\n      if (r) {\n        int r1 = r & 15;\n\n        __m128i v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));\n        __m128 vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));\n\n        _mm_storeu_ps(val_1, vs);\n        val_1 += _mm_popcnt_u32(r1);\n\n        int r2 = (r >> 4) & 15;\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_value[i1 + 4]));\n        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));\n        _mm_storeu_ps(val_1, vs);\n        val_1 += _mm_popcnt_u32(r2);\n\n        mm_cmp_res = _mm_cmpistrm(\n            mm_index_m, mm_index_q,\n            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n        r = _mm_extract_epi32(mm_cmp_res, 0);\n\n        r1 = r & 15;\n\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));\n        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));\n        _mm_storeu_ps(val_2, vs);\n        val_2 += _mm_popcnt_u32(r1);\n\n        r2 = (r >> 4) & 15;\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_value[i2 + 4]));\n        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));\n        _mm_storeu_ps(val_2, vs);\n        val_2 += _mm_popcnt_u32(r2);\n      }\n\n      const uint16_t id1_max = m_sparse_index[i1 + 7];\n\n      if (id1_max <= q_sparse_index[i2 + 7]) {\n        i1 += 8;\n        if (i1 >= end1) goto do_scalar;\n        mm_index_m = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n      }\n\n      if (id1_max >= q_sparse_index[i2 + 7]) {\n        i2 += 8;\n        if (i2 >= end2) goto do_scalar;\n        mm_index_q = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n      }\n    }\n  }\n\ndo_scalar:\n  while (i1 < m_sparse_count && i2 < q_sparse_count) {\n    if (m_sparse_index[i1] == q_sparse_index[i2]) {\n      *val_1++ = m_sparse_value[i1];\n      *val_2++ = q_sparse_value[i2];\n\n      ++i1;\n      ++i2;\n    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {\n      ++i1;\n    } else {\n      ++i2;\n    }\n  }\n\n  size_t res_num = val_1 - val_start_1;\n\n  //  if (res_num != val_2 - val_start_2) {\n  //   std::cerr << \"size mismatch!\" << std::endl;\n  //  }\n\n  size_t res_num4 = res_num / 4 * 4;\n\n  if (res_num4) {\n    __m128 sum128 = _mm_set1_ps(0);\n\n    for (size_t k = 0; k < res_num4; k += 4) {\n      sum128 = _mm_add_ps(sum128, _mm_mul_ps(_mm_loadu_ps(val_start_1 + k),\n                                             _mm_loadu_ps(val_start_2 + k)));\n    }\n\n    float __attribute__((aligned(16))) tmp_res[4];\n    _mm_store_ps(tmp_res, sum128);\n    sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3]);\n  }\n\n  for (size_t k = res_num4; k < res_num; ++k)\n    sum += val_start_1[k] * val_start_2[k];\n\n  return sum;\n}\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_int4_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int4.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__AVX2__)\nfloat InnerProductInt4SSEInternal(const uint8_t *lhs, const uint8_t *rhs,\n                                  size_t size);\n\n//! Inner Product\nfloat InnerProductInt4AVX2Internal(const uint8_t *lhs, const uint8_t *rhs,\n                                   size_t size) {\n  const uint8_t *last = lhs + size;\n  const uint8_t *last_aligned = lhs + ((size >> 5) << 5);\n  __m256i ymm_sum = _mm256_setzero_si256();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)(rhs));\n      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      __m128i xmm_sum = _mm_setzero_si128();\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),\n                                 ymm_sum);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)(rhs));\n      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      __m128i xmm_sum = _mm_setzero_si128();\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),\n                                 ymm_sum);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  float result = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT4_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT4_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT4_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT4_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT4_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT4_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT4_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT4_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT4_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT4_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT4_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT4_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT4_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT4_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT4_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat InnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,\n                           size_t size) {\n  if (size > 63) {\n    return InnerProductInt4AVX2Internal(lhs, rhs, size >> 1);\n  }\n\n  return InnerProductInt4SSEInternal(lhs, rhs, size >> 1);\n}\n\nfloat MinusInnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,\n                                size_t size) {\n  return -InnerProductInt4AVX2(lhs, rhs, size);\n}\n\n#endif  // __AVX2__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_int4_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__AVX2__)\nfloat InnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs, size_t size);\nfloat MinusInnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,\n                                size_t size);\n#endif\n\n#if defined(__SSE4_1__)\nfloat InnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs, size_t size);\nfloat MinusInnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs,\n                               size_t size);\n#endif\n\nfloat InnerProductInt4Scalar(const uint8_t *m, const uint8_t *q, size_t dim);\nfloat MinusInnerProductInt4Scalar(const uint8_t *m, const uint8_t *q,\n                                  size_t dim);\n\n//! Compute the distance between matrix and query (INT4, M=1, N=1)\nvoid InnerProductMatrix<uint8_t, 1, 1>::Compute(const uint8_t *m,\n                                                const uint8_t *q, size_t dim,\n                                                float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = InnerProductInt4AVX2(m, q, dim);\n    return;\n  }\n#endif  // __AVX2__\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = InnerProductInt4SSE(m, q, dim);\n    return;\n  }\n#endif  //__SSE4_1__\n  *out = InnerProductInt4Scalar(m, q, dim);\n}\n\n//! Compute the distance between matrix and query (INT4, M=1, N=1)\nvoid MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(const uint8_t *m,\n                                                     const uint8_t *q,\n                                                     size_t dim, float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = MinusInnerProductInt4AVX2(m, q, dim);\n    return;\n  }\n#endif  // __AVX2__\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = MinusInnerProductInt4SSE(m, q, dim);\n    return;\n  }\n#endif  //__SSE4_1__\n  *out = MinusInnerProductInt4Scalar(m, q, dim);\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_int4_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int4.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__SSE4_1__)\nfloat InnerProductInt4SSEInternal(const uint8_t *lhs, const uint8_t *rhs,\n                                  size_t size) {\n  const uint8_t *last = lhs + size;\n  const uint8_t *last_aligned = lhs + ((size >> 4) << 4);\n  __m128i xmm_sum = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)(lhs));\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)(rhs));\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)(lhs));\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)(rhs));\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)\n    }\n  }\n  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT4_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT4_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT4_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT4_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT4_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT4_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT4_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT4_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT4_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT4_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT4_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT4_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT4_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT4_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT4_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat InnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs, size_t size) {\n  return InnerProductInt4SSEInternal(lhs, rhs, size >> 1);\n}\n\nfloat MinusInnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs,\n                               size_t size) {\n  return -InnerProductInt4SSE(lhs, rhs, size);\n}\n\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_int8_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__AVX2__)\nfloat InnerProductInt8SSEInternal(const int8_t *lhs, const int8_t *rhs,\n                                  size_t size);\n\ninline float InnerProductInt8AVX2Internal(const int8_t *lhs, const int8_t *rhs,\n                                          size_t size) {\n  const int8_t *last = lhs + size;\n  const int8_t *last_aligned = lhs + ((size >> 6) << 6);\n  float result = 0.0;\n\n  __m256i ymm_sum_0 = _mm256_setzero_si256();\n  __m256i ymm_sum_1 = _mm256_setzero_si256();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));\n\n      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);\n      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);\n      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);\n      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);\n\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),\n                            ONES_INT16_AVX),\n          ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);\n      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);\n      ymm_rhs = _mm256_abs_epi8(ymm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      lhs += 32;\n      rhs += 32;\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);\n      xmm_rhs = _mm_abs_epi8(xmm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(),\n                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),\n                                          ONES_INT16_SSE)),\n          ymm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));\n\n      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);\n      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);\n      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);\n      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);\n\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),\n                            ONES_INT16_AVX),\n          ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);\n      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);\n      ymm_rhs = _mm256_abs_epi8(ymm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      lhs += 32;\n      rhs += 32;\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);\n      xmm_rhs = _mm_abs_epi8(xmm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(),\n                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),\n                                          ONES_INT16_SSE)),\n          ymm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  result = static_cast<float>(\n      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT8_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT8_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT8_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT8_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT8_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT8_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT8_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT8_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT8_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT8_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT8_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT8_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT8_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT8_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT8_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat InnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs, size_t size) {\n  if (size > 31) {\n    return InnerProductInt8AVX2Internal(lhs, rhs, size);\n  }\n\n  return InnerProductInt8SSEInternal(lhs, rhs, size);\n}\n\nfloat MinusInnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs,\n                                size_t size) {\n  return -InnerProductInt8AVX2(lhs, rhs, size);\n}\n\n#endif  // __AVX2__\n\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_int8_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__AVX2__)\nfloat InnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs, size_t size);\nfloat MinusInnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs,\n                                size_t size);\n#endif\n\n#if defined(__SSE4_1__)\nfloat InnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs, size_t size);\nfloat MinusInnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs,\n                               size_t size);\n#endif\n\nfloat InnerProductInt8Scalar(const int8_t *m, const int8_t *q, size_t dim);\nfloat MinusInnerProductInt8Scalar(const int8_t *m, const int8_t *q, size_t dim);\n\n//! Compute the distance between matrix and query (INT8, M=1, N=1)\nvoid InnerProductMatrix<int8_t, 1, 1>::Compute(const int8_t *m, const int8_t *q,\n                                               size_t dim, float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = InnerProductInt8AVX2(m, q, dim);\n    return;\n  }\n#endif  // __AVX2__\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = InnerProductInt8SSE(m, q, dim);\n    return;\n  }\n\n#endif  //__SSE4_1__\n\n  *out = InnerProductInt8Scalar(m, q, dim);\n}\n\n//! Compute the distance between matrix and query (INT8, M=1, N=1)\nvoid MinusInnerProductMatrix<int8_t, 1, 1>::Compute(const int8_t *m,\n                                                    const int8_t *q, size_t dim,\n                                                    float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = MinusInnerProductInt8AVX2(m, q, dim);\n    return;\n  }\n#endif  // __AVX2__\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = MinusInnerProductInt8SSE(m, q, dim);\n    return;\n  }\n#endif  //__SSE4_1__\n\n  *out = MinusInnerProductInt8Scalar(m, q, dim);\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_int8_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_inner_product_utility.i\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n#if defined(__SSE4_1__)\n//! Inner Product\nfloat InnerProductInt8SSEInternal(const int8_t *lhs, const int8_t *rhs,\n                                  size_t size) {\n  const int8_t *last = lhs + size;\n  const int8_t *last_aligned = lhs + ((size >> 5) << 5);\n\n  __m128i xmm_sum_0 = _mm_setzero_si128();\n  __m128i xmm_sum_1 = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m128i xmm_lhs_0 = _mm_load_si128((const __m128i *)(lhs + 0));\n      __m128i xmm_lhs_1 = _mm_load_si128((const __m128i *)(lhs + 16));\n      __m128i xmm_rhs_0 = _mm_load_si128((const __m128i *)(rhs + 0));\n      __m128i xmm_rhs_1 = _mm_load_si128((const __m128i *)(rhs + 16));\n\n      xmm_lhs_0 = _mm_sign_epi8(xmm_lhs_0, xmm_rhs_0);\n      xmm_lhs_1 = _mm_sign_epi8(xmm_lhs_1, xmm_rhs_1);\n      xmm_rhs_0 = _mm_abs_epi8(xmm_rhs_0);\n      xmm_rhs_1 = _mm_abs_epi8(xmm_rhs_1);\n      xmm_sum_0 =\n          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_lhs_0),\n                                       ONES_INT16_SSE),\n                        xmm_sum_0);\n      xmm_sum_1 =\n          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_1, xmm_lhs_1),\n                                       ONES_INT16_SSE),\n                        xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n\n      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);\n      xmm_rhs = _mm_abs_epi8(xmm_rhs);\n      xmm_sum_0 = _mm_add_epi32(\n          _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs), ONES_INT16_SSE),\n          xmm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m128i xmm_lhs_0 = _mm_loadu_si128((const __m128i *)(lhs + 0));\n      __m128i xmm_lhs_1 = _mm_loadu_si128((const __m128i *)(lhs + 16));\n      __m128i xmm_rhs_0 = _mm_loadu_si128((const __m128i *)(rhs + 0));\n      __m128i xmm_rhs_1 = _mm_loadu_si128((const __m128i *)(rhs + 16));\n\n      xmm_lhs_0 = _mm_sign_epi8(xmm_lhs_0, xmm_rhs_0);\n      xmm_lhs_1 = _mm_sign_epi8(xmm_lhs_1, xmm_rhs_1);\n      xmm_rhs_0 = _mm_abs_epi8(xmm_rhs_0);\n      xmm_rhs_1 = _mm_abs_epi8(xmm_rhs_1);\n      xmm_sum_0 =\n          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_lhs_0),\n                                       ONES_INT16_SSE),\n                        xmm_sum_0);\n      xmm_sum_1 =\n          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_1, xmm_lhs_1),\n                                       ONES_INT16_SSE),\n                        xmm_sum_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n\n      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);\n      xmm_rhs = _mm_abs_epi8(xmm_rhs);\n      xmm_sum_0 = _mm_add_epi32(\n          _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs), ONES_INT16_SSE),\n          xmm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  float result = static_cast<float>(\n      HorizontalAdd_INT32_V128(_mm_add_epi32(xmm_sum_0, xmm_sum_1)));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT8_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT8_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT8_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT8_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT8_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT8_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT8_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT8_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT8_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT8_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT8_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT8_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT8_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT8_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT8_GENERAL(lhs[0], rhs[0], result)\n  }\n  return result;\n}\n\nfloat InnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs, size_t size) {\n  return InnerProductInt8SSEInternal(lhs, rhs, size);\n}\n\nfloat MinusInnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs,\n                               size_t size) {\n  return -InnerProductInt8SSEInternal(lhs, rhs, size);\n}\n\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/inner_product_matrix_scalar.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cmath>\n#include <string>\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"distance_utility.h\"\n#include \"inner_product_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\ntemplate <typename T>\ninline float InnerProductScalar(const T *m, const T *q, size_t dim) {\n  ailego_assert(m && q && dim);\n\n  float sum = 0.0;\n  for (size_t i = 0; i < dim; ++i) {\n    sum += static_cast<float>(m[i] * q[i]);\n  }\n  return sum;\n}\n\ntemplate <typename T>\ninline float MinusInnerProductScalar(const T *m, const T *q, size_t dim) {\n  ailego_assert(m && q && dim);\n\n  float sum = 0.0;\n  for (size_t i = 0; i < dim; ++i) {\n    sum += static_cast<float>(m[i] * q[i]);\n  }\n  return -sum;\n}\n\nfloat InnerProductInt4Scalar(const uint8_t *m, const uint8_t *q, size_t dim) {\n  ailego_assert(m && q && dim && !(dim & 1));\n\n  float sum = 0.0;\n  for (size_t i = 0; i < (dim >> 1); ++i) {\n    uint8_t m_val = m[i];\n    uint8_t q_val = q[i];\n    sum += Int4MulTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +\n           Int4MulTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];\n  }\n\n  return sum;\n}\n\nfloat MinusInnerProductInt4Scalar(const uint8_t *m, const uint8_t *q,\n                                  size_t dim) {\n  ailego_assert(m && q && dim && !(dim & 1));\n\n  float sum = 0.0;\n  for (size_t i = 0; i < (dim >> 1); ++i) {\n    uint8_t m_val = m[i];\n    uint8_t q_val = q[i];\n    sum -= Int4MulTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +\n           Int4MulTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];\n  }\n  return sum;\n}\n\nfloat InnerProductInt8Scalar(const int8_t *m, const int8_t *q, size_t dim) {\n  return InnerProductScalar<int8_t>(m, q, dim);\n}\n\nfloat MinusInnerProductInt8Scalar(const int8_t *m, const int8_t *q,\n                                  size_t dim) {\n  return MinusInnerProductScalar<int8_t>(m, q, dim);\n}\n\nfloat InnerProductFp16Scalar(const ailego::Float16 *m, const ailego::Float16 *q,\n                             size_t dim) {\n  return InnerProductScalar<ailego::Float16>(m, q, dim);\n}\n\nfloat MinusInnerProductFp16Scalar(const ailego::Float16 *m,\n                                  const ailego::Float16 *q, size_t dim) {\n  return MinusInnerProductScalar<ailego::Float16>(m, q, dim);\n}\n\nfloat InnerProductFp32Scalar(const float *m, const float *q, size_t dim) {\n  return InnerProductScalar<float>(m, q, dim);\n}\n\nfloat MinusInnerProductFp32Scalar(const float *m, const float *q, size_t dim) {\n  return MinusInnerProductScalar<float>(m, q, dim);\n}\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\nfloat ComputeInnerProductSparseInSegmentFp32(uint32_t m_sparse_count,\n                                             const uint16_t *m_sparse_index,\n                                             const float *m_sparse_value,\n                                             uint32_t q_sparse_count,\n                                             const uint16_t *q_sparse_index,\n                                             const float *q_sparse_value);\n\nfloat ComputeInnerProductSparseInSegmentFp16(uint32_t m_sparse_count,\n                                             const uint16_t *m_sparse_index,\n                                             const Float16 *m_sparse_value,\n                                             uint32_t q_sparse_count,\n                                             const uint16_t *q_sparse_index,\n                                             const Float16 *q_sparse_value);\n\ntemplate <typename T>\nfloat ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,\n                                         const uint16_t *m_sparse_index,\n                                         const T *m_sparse_value,\n                                         uint32_t q_sparse_count,\n                                         const uint16_t *q_sparse_index,\n                                         const T *q_sparse_value);\n\ntemplate <>\nfloat ComputeInnerProductSparseInSegment<float>(uint32_t m_sparse_count,\n                                                const uint16_t *m_sparse_index,\n                                                const float *m_sparse_value,\n                                                uint32_t q_sparse_count,\n                                                const uint16_t *q_sparse_index,\n                                                const float *q_sparse_value) {\n  return ComputeInnerProductSparseInSegmentFp32(m_sparse_count, m_sparse_index,\n                                                m_sparse_value, q_sparse_count,\n                                                q_sparse_index, q_sparse_value);\n}\n\ntemplate <>\nfloat ComputeInnerProductSparseInSegment<Float16>(\n    uint32_t m_sparse_count, const uint16_t *m_sparse_index,\n    const Float16 *m_sparse_value, uint32_t q_sparse_count,\n    const uint16_t *q_sparse_index, const Float16 *q_sparse_value) {\n  return ComputeInnerProductSparseInSegmentFp16(m_sparse_count, m_sparse_index,\n                                                m_sparse_value, q_sparse_count,\n                                                q_sparse_index, q_sparse_value);\n}\n\ntemplate <typename T>\nfloat ComputeSegments(const void *m_sparse_data_in,\n                      const void *q_sparse_data_in) {\n  ailego_assert(m_sparse_data_in && q_sparse_data_in);\n\n  float sum{0.0f};\n\n  const uint8_t *m_sparse_data =\n      reinterpret_cast<const uint8_t *>(m_sparse_data_in);\n  const uint8_t *q_sparse_data =\n      reinterpret_cast<const uint8_t *>(q_sparse_data_in);\n\n  const uint32_t m_sparse_count =\n      *reinterpret_cast<const uint32_t *>(m_sparse_data);\n  const uint32_t q_sparse_count =\n      *reinterpret_cast<const uint32_t *>(q_sparse_data);\n\n  if (m_sparse_count == 0 || q_sparse_count == 0) {\n    return 0.0f;\n  }\n\n  const uint32_t m_seg_count =\n      *reinterpret_cast<const uint32_t *>(m_sparse_data + sizeof(uint32_t));\n  const uint32_t q_seg_count =\n      *reinterpret_cast<const uint32_t *>(q_sparse_data + sizeof(uint32_t));\n\n  const uint32_t *m_seg_id =\n      reinterpret_cast<const uint32_t *>(m_sparse_data + 2 * sizeof(uint32_t));\n  const uint32_t *q_seg_id =\n      reinterpret_cast<const uint32_t *>(q_sparse_data + 2 * sizeof(uint32_t));\n\n  const uint32_t *m_seg_vec_cnt = reinterpret_cast<const uint32_t *>(\n      m_sparse_data + 2 * sizeof(uint32_t) + m_seg_count * sizeof(uint32_t));\n  const uint32_t *q_seg_vec_cnt = reinterpret_cast<const uint32_t *>(\n      q_sparse_data + 2 * sizeof(uint32_t) + q_seg_count * sizeof(uint32_t));\n\n  const uint16_t *m_sparse_index =\n      reinterpret_cast<const uint16_t *>(m_sparse_data + 2 * sizeof(uint32_t) +\n                                         m_seg_count * 2 * sizeof(uint32_t));\n  const uint16_t *q_sparse_index =\n      reinterpret_cast<const uint16_t *>(q_sparse_data + 2 * sizeof(uint32_t) +\n                                         q_seg_count * 2 * sizeof(uint32_t));\n\n  const T *m_sparse_value = reinterpret_cast<const T *>(\n      m_sparse_data + 2 * sizeof(uint32_t) +\n      m_seg_count * 2 * sizeof(uint32_t) + m_sparse_count * sizeof(uint16_t));\n  const T *q_sparse_value = reinterpret_cast<const T *>(\n      q_sparse_data + 2 * sizeof(uint32_t) +\n      q_seg_count * 2 * sizeof(uint32_t) + q_sparse_count * sizeof(uint16_t));\n\n  size_t m_s = 0;\n  size_t q_s = 0;\n\n  size_t m_count = 0;\n  size_t q_count = 0;\n\n  while (m_s < m_seg_count && q_s < q_seg_count) {\n    if (m_seg_id[m_s] == q_seg_id[q_s]) {\n      sum += ComputeInnerProductSparseInSegment(\n          m_seg_vec_cnt[m_s], m_sparse_index + m_count,\n          m_sparse_value + m_count, q_seg_vec_cnt[q_s],\n          q_sparse_index + q_count, q_sparse_value + q_count);\n\n      m_count += m_seg_vec_cnt[m_s];\n      q_count += q_seg_vec_cnt[q_s];\n\n      ++m_s;\n      ++q_s;\n    } else if (m_seg_id[m_s] < q_seg_id[q_s]) {\n      m_count += m_seg_vec_cnt[m_s];\n\n      ++m_s;\n    } else {\n      q_count += q_seg_vec_cnt[q_s];\n\n      ++q_s;\n    }\n  }\n\n  return -sum;\n}\n\nfloat MinusInnerProductSparseFp16Scalar(const void *m_sparse_data_in,\n                                        const void *q_sparse_data_in) {\n  return ComputeSegments<Float16>(m_sparse_data_in, q_sparse_data_in);\n}\n\nfloat MinusInnerProductSparseFp32Scalar(const void *m_sparse_data_in,\n                                        const void *q_sparse_data_in) {\n  return ComputeSegments<float>(m_sparse_data_in, q_sparse_data_in);\n}\n\nfloat InnerProductSparseInSegmentFp16Scalar(uint32_t m_sparse_count,\n                                            const uint16_t *m_sparse_index,\n                                            const Float16 *m_sparse_value,\n                                            uint32_t q_sparse_count,\n                                            const uint16_t *q_sparse_index,\n                                            const Float16 *q_sparse_value) {\n  float sum = 0.0f;\n\n  size_t m_i = 0;\n  size_t q_i = 0;\n  while (m_i < m_sparse_count && q_i < q_sparse_count) {\n    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {\n      sum += m_sparse_value[m_i] * q_sparse_value[q_i];\n\n      ++m_i;\n      ++q_i;\n    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {\n      ++m_i;\n    } else {\n      ++q_i;\n    }\n  }\n\n  return sum;\n}\n\nfloat InnerProductSparseInSegmentFp32Scalar(uint32_t m_sparse_count,\n                                            const uint16_t *m_sparse_index,\n                                            const float *m_sparse_value,\n                                            uint32_t q_sparse_count,\n                                            const uint16_t *q_sparse_index,\n                                            const float *q_sparse_value) {\n  float sum = 0.0f;\n\n  size_t m_i = 0;\n  size_t q_i = 0;\n  while (m_i < m_sparse_count && q_i < q_sparse_count) {\n    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {\n      sum += m_sparse_value[m_i] * q_sparse_value[q_i];\n\n      ++m_i;\n      ++q_i;\n    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {\n      ++m_i;\n    } else {\n      ++q_i;\n    }\n  }\n\n  return sum;\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/matrix_define.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#define MATRIX_VAR_INIT_1X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  _VAR_TYPE _VAR_NAME##_0_0 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_1X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_1X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_1X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_1X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_1X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_1X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_1X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_1X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \\\n  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_2X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_1X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_1_0 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_2X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_2X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_2X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_2X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_2X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_2X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_7 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_2X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_2X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \\\n  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_15 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_2X32(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_2X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_31 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_4X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_2X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_2_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_0 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_4X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_4X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_1 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_4X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_4X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_3 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_4X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_4X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_7 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_4X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_4X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \\\n  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_2_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_3_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_2_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_3_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_15 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_4X32(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_4X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_31 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_8X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_4X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_4_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_0 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_8X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_8X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_1 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_8X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_8X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_3 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_8X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_8X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_4 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_5 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_6 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_7 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_7 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_8X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_8X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \\\n  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_2_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_3_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_4_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_5_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_6_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_7_8 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_2_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_3_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_4_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_5_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_6_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_7_9 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_10 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_11 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_12 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_13 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_14 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_15 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_15 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_8X32(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_8X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_16 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_17 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_18 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_19 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_20 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_21 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_22 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_23 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_24 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_25 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_26 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_27 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_28 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_29 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_30 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_1_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_2_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_3_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_4_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_5_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_6_31 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_7_31 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_16X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_8X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \\\n  _VAR_TYPE _VAR_NAME##_8_0 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_9_0 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_10_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_11_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_12_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_13_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_14_0 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_15_0 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_16X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_16X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_2_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_3_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_4_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_5_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_6_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_7_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_8_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_9_1 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_10_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_11_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_12_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_13_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_14_1 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_15_1 = (_VAR_INIT);\n\n#define MATRIX_VAR_INIT_16X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_16X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \\\n  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_2_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_3_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_4_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_5_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_6_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_7_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_8_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_9_2 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_10_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_11_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_12_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_13_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_14_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_15_2 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_2_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_3_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_4_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_5_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_6_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_7_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_8_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_9_3 = (_VAR_INIT);                    \\\n  _VAR_TYPE _VAR_NAME##_10_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_11_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_12_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_13_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_14_3 = (_VAR_INIT);                   \\\n  _VAR_TYPE _VAR_NAME##_15_3 = (_VAR_INIT);\n\n#define MATRIX_VAR_STORE_1X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...) \\\n  _STORE((_ARRAY) + (_STEP) * (0), _NORM((_VAR##_0_0), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_1X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_1X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (1), _NORM((_VAR##_0_1), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_1X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_1X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (2), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (3), _NORM((_VAR##_0_3), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_1X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_1X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_0_4), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_0_5), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_0_6), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_0_7), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_1X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \\\n  MATRIX_VAR_STORE_1X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_0_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_0_10), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_0_11), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_0_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_0_15), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_2X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_1X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (1), _NORM((_VAR##_1_0), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_2X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_2X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (2), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (3), _NORM((_VAR##_1_1), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_2X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_2X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_1_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_0_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_1_3), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_2X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_2X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_4), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_1_4), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_0_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_1_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_0_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_1_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_0_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_1_7), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_2X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \\\n  MATRIX_VAR_STORE_2X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_8), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_8), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_0_9), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_1_9), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_0_10), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_1_10), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_0_11), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_1_11), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_0_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_1_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_1_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_1_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_0_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_1_15), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_2X32(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_2X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_16), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_16), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_0_17), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_1_17), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_0_18), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_1_18), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_0_19), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_1_19), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_0_20), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_1_20), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_0_21), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_1_21), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_0_22), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_1_22), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_0_23), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_1_23), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_24), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_24), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_0_25), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_1_25), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_0_26), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_1_26), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_0_27), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_1_27), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_0_28), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_1_28), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_0_29), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_1_29), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_0_30), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_1_30), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_0_31), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_1_31), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_4X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_2X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (2), _NORM((_VAR##_2_0), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (3), _NORM((_VAR##_3_0), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_4X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_4X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_1_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_2_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_3_1), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_4X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_4X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_1_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_2_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_3_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_0_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_1_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_2_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_3_3), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_4X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_4X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_2_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_3_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_0_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_1_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_2_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_3_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_0_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_1_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_2_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_3_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_0_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_1_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_2_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_3_7), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_4X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \\\n  MATRIX_VAR_STORE_4X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_8), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_8), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_2_8), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_3_8), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_0_9), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_1_9), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_2_9), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_3_9), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_0_10), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_1_10), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_2_10), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_3_10), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_0_11), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_1_11), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_2_11), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_3_11), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_2_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_3_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_1_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_2_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_3_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_1_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_2_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_3_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_0_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_1_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_2_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_3_15), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_4X32(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_4X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (64), _NORM((_VAR##_0_16), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (65), _NORM((_VAR##_1_16), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (66), _NORM((_VAR##_2_16), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (67), _NORM((_VAR##_3_16), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (68), _NORM((_VAR##_0_17), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (69), _NORM((_VAR##_1_17), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (70), _NORM((_VAR##_2_17), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (71), _NORM((_VAR##_3_17), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (72), _NORM((_VAR##_0_18), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (73), _NORM((_VAR##_1_18), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (74), _NORM((_VAR##_2_18), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (75), _NORM((_VAR##_3_18), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (76), _NORM((_VAR##_0_19), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (77), _NORM((_VAR##_1_19), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (78), _NORM((_VAR##_2_19), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (79), _NORM((_VAR##_3_19), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (80), _NORM((_VAR##_0_20), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (81), _NORM((_VAR##_1_20), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (82), _NORM((_VAR##_2_20), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (83), _NORM((_VAR##_3_20), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (84), _NORM((_VAR##_0_21), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (85), _NORM((_VAR##_1_21), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (86), _NORM((_VAR##_2_21), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (87), _NORM((_VAR##_3_21), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (88), _NORM((_VAR##_0_22), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (89), _NORM((_VAR##_1_22), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (90), _NORM((_VAR##_2_22), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (91), _NORM((_VAR##_3_22), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (92), _NORM((_VAR##_0_23), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (93), _NORM((_VAR##_1_23), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (94), _NORM((_VAR##_2_23), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (95), _NORM((_VAR##_3_23), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (96), _NORM((_VAR##_0_24), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (97), _NORM((_VAR##_1_24), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (98), _NORM((_VAR##_2_24), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (99), _NORM((_VAR##_3_24), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (100), _NORM((_VAR##_0_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (101), _NORM((_VAR##_1_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (102), _NORM((_VAR##_2_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (103), _NORM((_VAR##_3_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (104), _NORM((_VAR##_0_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (105), _NORM((_VAR##_1_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (106), _NORM((_VAR##_2_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (107), _NORM((_VAR##_3_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (108), _NORM((_VAR##_0_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (109), _NORM((_VAR##_1_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (110), _NORM((_VAR##_2_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (111), _NORM((_VAR##_3_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (112), _NORM((_VAR##_0_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (113), _NORM((_VAR##_1_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (114), _NORM((_VAR##_2_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (115), _NORM((_VAR##_3_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (116), _NORM((_VAR##_0_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (117), _NORM((_VAR##_1_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (118), _NORM((_VAR##_2_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (119), _NORM((_VAR##_3_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (120), _NORM((_VAR##_0_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (121), _NORM((_VAR##_1_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (122), _NORM((_VAR##_2_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (123), _NORM((_VAR##_3_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (124), _NORM((_VAR##_0_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (125), _NORM((_VAR##_1_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (126), _NORM((_VAR##_2_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (127), _NORM((_VAR##_3_31), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_8X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_4X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_4_0), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_5_0), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_6_0), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_7_0), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_8X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_8X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_1_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_2_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_3_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_4_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_5_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_6_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_7_1), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_8X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_8X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_2_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_3_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_4_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_5_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_6_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_7_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_0_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_1_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_2_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_3_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_4_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_5_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_6_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_7_3), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_8X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_8X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_2_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_3_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_4_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_5_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_6_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_7_4), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_0_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_1_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_2_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_3_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_4_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_5_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_6_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_7_5), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_2_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_3_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_4_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_5_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_6_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_7_6), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_0_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_1_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_2_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_3_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_4_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_5_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_6_7), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_7_7), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_8X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_8X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__)  \\\n  _STORE((_ARRAY) + (_STEP) * (64), _NORM((_VAR##_0_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (65), _NORM((_VAR##_1_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (66), _NORM((_VAR##_2_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (67), _NORM((_VAR##_3_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (68), _NORM((_VAR##_4_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (69), _NORM((_VAR##_5_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (70), _NORM((_VAR##_6_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (71), _NORM((_VAR##_7_8), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (72), _NORM((_VAR##_0_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (73), _NORM((_VAR##_1_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (74), _NORM((_VAR##_2_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (75), _NORM((_VAR##_3_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (76), _NORM((_VAR##_4_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (77), _NORM((_VAR##_5_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (78), _NORM((_VAR##_6_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (79), _NORM((_VAR##_7_9), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (80), _NORM((_VAR##_0_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (81), _NORM((_VAR##_1_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (82), _NORM((_VAR##_2_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (83), _NORM((_VAR##_3_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (84), _NORM((_VAR##_4_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (85), _NORM((_VAR##_5_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (86), _NORM((_VAR##_6_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (87), _NORM((_VAR##_7_10), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (88), _NORM((_VAR##_0_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (89), _NORM((_VAR##_1_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (90), _NORM((_VAR##_2_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (91), _NORM((_VAR##_3_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (92), _NORM((_VAR##_4_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (93), _NORM((_VAR##_5_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (94), _NORM((_VAR##_6_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (95), _NORM((_VAR##_7_11), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (96), _NORM((_VAR##_0_12), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (97), _NORM((_VAR##_1_12), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (98), _NORM((_VAR##_2_12), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (99), _NORM((_VAR##_3_12), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (100), _NORM((_VAR##_4_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (101), _NORM((_VAR##_5_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (102), _NORM((_VAR##_6_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (103), _NORM((_VAR##_7_12), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (104), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (105), _NORM((_VAR##_1_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (106), _NORM((_VAR##_2_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (107), _NORM((_VAR##_3_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (108), _NORM((_VAR##_4_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (109), _NORM((_VAR##_5_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (110), _NORM((_VAR##_6_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (111), _NORM((_VAR##_7_13), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (112), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (113), _NORM((_VAR##_1_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (114), _NORM((_VAR##_2_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (115), _NORM((_VAR##_3_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (116), _NORM((_VAR##_4_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (117), _NORM((_VAR##_5_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (118), _NORM((_VAR##_6_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (119), _NORM((_VAR##_7_14), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (120), _NORM((_VAR##_0_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (121), _NORM((_VAR##_1_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (122), _NORM((_VAR##_2_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (123), _NORM((_VAR##_3_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (124), _NORM((_VAR##_4_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (125), _NORM((_VAR##_5_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (126), _NORM((_VAR##_6_15), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (127), _NORM((_VAR##_7_15), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_8X32(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_8X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (128), _NORM((_VAR##_0_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (129), _NORM((_VAR##_1_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (130), _NORM((_VAR##_2_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (131), _NORM((_VAR##_3_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (132), _NORM((_VAR##_4_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (133), _NORM((_VAR##_5_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (134), _NORM((_VAR##_6_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (135), _NORM((_VAR##_7_16), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (136), _NORM((_VAR##_0_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (137), _NORM((_VAR##_1_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (138), _NORM((_VAR##_2_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (139), _NORM((_VAR##_3_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (140), _NORM((_VAR##_4_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (141), _NORM((_VAR##_5_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (142), _NORM((_VAR##_6_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (143), _NORM((_VAR##_7_17), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (144), _NORM((_VAR##_0_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (145), _NORM((_VAR##_1_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (146), _NORM((_VAR##_2_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (147), _NORM((_VAR##_3_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (148), _NORM((_VAR##_4_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (149), _NORM((_VAR##_5_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (150), _NORM((_VAR##_6_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (151), _NORM((_VAR##_7_18), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (152), _NORM((_VAR##_0_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (153), _NORM((_VAR##_1_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (154), _NORM((_VAR##_2_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (155), _NORM((_VAR##_3_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (156), _NORM((_VAR##_4_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (157), _NORM((_VAR##_5_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (158), _NORM((_VAR##_6_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (159), _NORM((_VAR##_7_19), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (160), _NORM((_VAR##_0_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (161), _NORM((_VAR##_1_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (162), _NORM((_VAR##_2_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (163), _NORM((_VAR##_3_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (164), _NORM((_VAR##_4_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (165), _NORM((_VAR##_5_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (166), _NORM((_VAR##_6_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (167), _NORM((_VAR##_7_20), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (168), _NORM((_VAR##_0_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (169), _NORM((_VAR##_1_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (170), _NORM((_VAR##_2_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (171), _NORM((_VAR##_3_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (172), _NORM((_VAR##_4_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (173), _NORM((_VAR##_5_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (174), _NORM((_VAR##_6_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (175), _NORM((_VAR##_7_21), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (176), _NORM((_VAR##_0_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (177), _NORM((_VAR##_1_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (178), _NORM((_VAR##_2_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (179), _NORM((_VAR##_3_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (180), _NORM((_VAR##_4_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (181), _NORM((_VAR##_5_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (182), _NORM((_VAR##_6_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (183), _NORM((_VAR##_7_22), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (184), _NORM((_VAR##_0_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (185), _NORM((_VAR##_1_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (186), _NORM((_VAR##_2_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (187), _NORM((_VAR##_3_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (188), _NORM((_VAR##_4_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (189), _NORM((_VAR##_5_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (190), _NORM((_VAR##_6_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (191), _NORM((_VAR##_7_23), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (192), _NORM((_VAR##_0_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (193), _NORM((_VAR##_1_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (194), _NORM((_VAR##_2_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (195), _NORM((_VAR##_3_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (196), _NORM((_VAR##_4_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (197), _NORM((_VAR##_5_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (198), _NORM((_VAR##_6_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (199), _NORM((_VAR##_7_24), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (200), _NORM((_VAR##_0_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (201), _NORM((_VAR##_1_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (202), _NORM((_VAR##_2_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (203), _NORM((_VAR##_3_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (204), _NORM((_VAR##_4_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (205), _NORM((_VAR##_5_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (206), _NORM((_VAR##_6_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (207), _NORM((_VAR##_7_25), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (208), _NORM((_VAR##_0_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (209), _NORM((_VAR##_1_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (210), _NORM((_VAR##_2_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (211), _NORM((_VAR##_3_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (212), _NORM((_VAR##_4_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (213), _NORM((_VAR##_5_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (214), _NORM((_VAR##_6_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (215), _NORM((_VAR##_7_26), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (216), _NORM((_VAR##_0_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (217), _NORM((_VAR##_1_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (218), _NORM((_VAR##_2_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (219), _NORM((_VAR##_3_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (220), _NORM((_VAR##_4_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (221), _NORM((_VAR##_5_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (222), _NORM((_VAR##_6_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (223), _NORM((_VAR##_7_27), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (224), _NORM((_VAR##_0_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (225), _NORM((_VAR##_1_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (226), _NORM((_VAR##_2_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (227), _NORM((_VAR##_3_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (228), _NORM((_VAR##_4_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (229), _NORM((_VAR##_5_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (230), _NORM((_VAR##_6_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (231), _NORM((_VAR##_7_28), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (232), _NORM((_VAR##_0_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (233), _NORM((_VAR##_1_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (234), _NORM((_VAR##_2_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (235), _NORM((_VAR##_3_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (236), _NORM((_VAR##_4_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (237), _NORM((_VAR##_5_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (238), _NORM((_VAR##_6_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (239), _NORM((_VAR##_7_29), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (240), _NORM((_VAR##_0_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (241), _NORM((_VAR##_1_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (242), _NORM((_VAR##_2_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (243), _NORM((_VAR##_3_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (244), _NORM((_VAR##_4_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (245), _NORM((_VAR##_5_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (246), _NORM((_VAR##_6_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (247), _NORM((_VAR##_7_30), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (248), _NORM((_VAR##_0_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (249), _NORM((_VAR##_1_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (250), _NORM((_VAR##_2_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (251), _NORM((_VAR##_3_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (252), _NORM((_VAR##_4_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (253), _NORM((_VAR##_5_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (254), _NORM((_VAR##_6_31), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (255), _NORM((_VAR##_7_31), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_16X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \\\n  MATRIX_VAR_STORE_8X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_8_0), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_9_0), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_10_0), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_11_0), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_12_0), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_13_0), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_14_0), ##__VA_ARGS__)); \\\n  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_15_0), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_16X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_16X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_2_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_3_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_4_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_5_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_6_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_7_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_8_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_9_1), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_10_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_11_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_12_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_13_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_14_1), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_15_1), ##__VA_ARGS__));\n\n#define MATRIX_VAR_STORE_16X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \\\n  MATRIX_VAR_STORE_16X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \\\n  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_2_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_3_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_4_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_5_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_6_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_7_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_8_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_9_2), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_10_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_11_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_12_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_13_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_14_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_15_2), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_2_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_3_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_4_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_5_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_6_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_7_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_8_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_9_3), ##__VA_ARGS__));   \\\n  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_10_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_11_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_12_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_13_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_14_3), ##__VA_ARGS__));  \\\n  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_15_3), ##__VA_ARGS__));\n\n#define MATRIX_VAR_PERMUTE_1X1(_VAR, _PERMUTE, ...) \\\n  (_VAR##_0_0) = _PERMUTE((_VAR##_0_0), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_1X2(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_1X1(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_1X4(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_1X2(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \\\n  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_1X8(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_1X4(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \\\n  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \\\n  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \\\n  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_1X16(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_1X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \\\n  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \\\n  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \\\n  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \\\n  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \\\n  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \\\n  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \\\n  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \\\n  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_2X1(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_1X1(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_1_0) = _PERMUTE((_VAR##_1_0), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_2X2(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_2X1(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__); \\\n  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_2X4(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_2X2(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \\\n  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__); \\\n  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__); \\\n  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_2X8(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_2X4(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \\\n  (_VAR##_1_4) = _PERMUTE((_VAR##_1_4), ##__VA_ARGS__); \\\n  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \\\n  (_VAR##_1_5) = _PERMUTE((_VAR##_1_5), ##__VA_ARGS__); \\\n  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \\\n  (_VAR##_1_6) = _PERMUTE((_VAR##_1_6), ##__VA_ARGS__); \\\n  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__); \\\n  (_VAR##_1_7) = _PERMUTE((_VAR##_1_7), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_2X16(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_2X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \\\n  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \\\n  (_VAR##_1_8) = _PERMUTE((_VAR##_1_8), ##__VA_ARGS__);   \\\n  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \\\n  (_VAR##_1_9) = _PERMUTE((_VAR##_1_9), ##__VA_ARGS__);   \\\n  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \\\n  (_VAR##_1_10) = _PERMUTE((_VAR##_1_10), ##__VA_ARGS__); \\\n  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \\\n  (_VAR##_1_11) = _PERMUTE((_VAR##_1_11), ##__VA_ARGS__); \\\n  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \\\n  (_VAR##_1_12) = _PERMUTE((_VAR##_1_12), ##__VA_ARGS__); \\\n  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \\\n  (_VAR##_1_13) = _PERMUTE((_VAR##_1_13), ##__VA_ARGS__); \\\n  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \\\n  (_VAR##_1_14) = _PERMUTE((_VAR##_1_14), ##__VA_ARGS__); \\\n  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__); \\\n  (_VAR##_1_15) = _PERMUTE((_VAR##_1_15), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_2X32(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_2X16(_VAR, _PERMUTE, ##__VA_ARGS__)  \\\n  (_VAR##_0_16) = _PERMUTE((_VAR##_0_16), ##__VA_ARGS__); \\\n  (_VAR##_1_16) = _PERMUTE((_VAR##_1_16), ##__VA_ARGS__); \\\n  (_VAR##_0_17) = _PERMUTE((_VAR##_0_17), ##__VA_ARGS__); \\\n  (_VAR##_1_17) = _PERMUTE((_VAR##_1_17), ##__VA_ARGS__); \\\n  (_VAR##_0_18) = _PERMUTE((_VAR##_0_18), ##__VA_ARGS__); \\\n  (_VAR##_1_18) = _PERMUTE((_VAR##_1_18), ##__VA_ARGS__); \\\n  (_VAR##_0_19) = _PERMUTE((_VAR##_0_19), ##__VA_ARGS__); \\\n  (_VAR##_1_19) = _PERMUTE((_VAR##_1_19), ##__VA_ARGS__); \\\n  (_VAR##_0_20) = _PERMUTE((_VAR##_0_20), ##__VA_ARGS__); \\\n  (_VAR##_1_20) = _PERMUTE((_VAR##_1_20), ##__VA_ARGS__); \\\n  (_VAR##_0_21) = _PERMUTE((_VAR##_0_21), ##__VA_ARGS__); \\\n  (_VAR##_1_21) = _PERMUTE((_VAR##_1_21), ##__VA_ARGS__); \\\n  (_VAR##_0_22) = _PERMUTE((_VAR##_0_22), ##__VA_ARGS__); \\\n  (_VAR##_1_22) = _PERMUTE((_VAR##_1_22), ##__VA_ARGS__); \\\n  (_VAR##_0_23) = _PERMUTE((_VAR##_0_23), ##__VA_ARGS__); \\\n  (_VAR##_1_23) = _PERMUTE((_VAR##_1_23), ##__VA_ARGS__); \\\n  (_VAR##_0_24) = _PERMUTE((_VAR##_0_24), ##__VA_ARGS__); \\\n  (_VAR##_1_24) = _PERMUTE((_VAR##_1_24), ##__VA_ARGS__); \\\n  (_VAR##_0_25) = _PERMUTE((_VAR##_0_25), ##__VA_ARGS__); \\\n  (_VAR##_1_25) = _PERMUTE((_VAR##_1_25), ##__VA_ARGS__); \\\n  (_VAR##_0_26) = _PERMUTE((_VAR##_0_26), ##__VA_ARGS__); \\\n  (_VAR##_1_26) = _PERMUTE((_VAR##_1_26), ##__VA_ARGS__); \\\n  (_VAR##_0_27) = _PERMUTE((_VAR##_0_27), ##__VA_ARGS__); \\\n  (_VAR##_1_27) = _PERMUTE((_VAR##_1_27), ##__VA_ARGS__); \\\n  (_VAR##_0_28) = _PERMUTE((_VAR##_0_28), ##__VA_ARGS__); \\\n  (_VAR##_1_28) = _PERMUTE((_VAR##_1_28), ##__VA_ARGS__); \\\n  (_VAR##_0_29) = _PERMUTE((_VAR##_0_29), ##__VA_ARGS__); \\\n  (_VAR##_1_29) = _PERMUTE((_VAR##_1_29), ##__VA_ARGS__); \\\n  (_VAR##_0_30) = _PERMUTE((_VAR##_0_30), ##__VA_ARGS__); \\\n  (_VAR##_1_30) = _PERMUTE((_VAR##_1_30), ##__VA_ARGS__); \\\n  (_VAR##_0_31) = _PERMUTE((_VAR##_0_31), ##__VA_ARGS__); \\\n  (_VAR##_1_31) = _PERMUTE((_VAR##_1_31), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_4X1(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_2X1(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_2_0) = _PERMUTE((_VAR##_2_0), ##__VA_ARGS__); \\\n  (_VAR##_3_0) = _PERMUTE((_VAR##_3_0), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_4X2(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_4X1(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__); \\\n  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__); \\\n  (_VAR##_2_1) = _PERMUTE((_VAR##_2_1), ##__VA_ARGS__); \\\n  (_VAR##_3_1) = _PERMUTE((_VAR##_3_1), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_4X4(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_4X2(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \\\n  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__); \\\n  (_VAR##_2_2) = _PERMUTE((_VAR##_2_2), ##__VA_ARGS__); \\\n  (_VAR##_3_2) = _PERMUTE((_VAR##_3_2), ##__VA_ARGS__); \\\n  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__); \\\n  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__); \\\n  (_VAR##_2_3) = _PERMUTE((_VAR##_2_3), ##__VA_ARGS__); \\\n  (_VAR##_3_3) = _PERMUTE((_VAR##_3_3), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_4X8(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_4X4(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \\\n  (_VAR##_1_4) = _PERMUTE((_VAR##_1_4), ##__VA_ARGS__); \\\n  (_VAR##_2_4) = _PERMUTE((_VAR##_2_4), ##__VA_ARGS__); \\\n  (_VAR##_3_4) = _PERMUTE((_VAR##_3_4), ##__VA_ARGS__); \\\n  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \\\n  (_VAR##_1_5) = _PERMUTE((_VAR##_1_5), ##__VA_ARGS__); \\\n  (_VAR##_2_5) = _PERMUTE((_VAR##_2_5), ##__VA_ARGS__); \\\n  (_VAR##_3_5) = _PERMUTE((_VAR##_3_5), ##__VA_ARGS__); \\\n  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \\\n  (_VAR##_1_6) = _PERMUTE((_VAR##_1_6), ##__VA_ARGS__); \\\n  (_VAR##_2_6) = _PERMUTE((_VAR##_2_6), ##__VA_ARGS__); \\\n  (_VAR##_3_6) = _PERMUTE((_VAR##_3_6), ##__VA_ARGS__); \\\n  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__); \\\n  (_VAR##_1_7) = _PERMUTE((_VAR##_1_7), ##__VA_ARGS__); \\\n  (_VAR##_2_7) = _PERMUTE((_VAR##_2_7), ##__VA_ARGS__); \\\n  (_VAR##_3_7) = _PERMUTE((_VAR##_3_7), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_4X16(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_4X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \\\n  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \\\n  (_VAR##_1_8) = _PERMUTE((_VAR##_1_8), ##__VA_ARGS__);   \\\n  (_VAR##_2_8) = _PERMUTE((_VAR##_2_8), ##__VA_ARGS__);   \\\n  (_VAR##_3_8) = _PERMUTE((_VAR##_3_8), ##__VA_ARGS__);   \\\n  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \\\n  (_VAR##_1_9) = _PERMUTE((_VAR##_1_9), ##__VA_ARGS__);   \\\n  (_VAR##_2_9) = _PERMUTE((_VAR##_2_9), ##__VA_ARGS__);   \\\n  (_VAR##_3_9) = _PERMUTE((_VAR##_3_9), ##__VA_ARGS__);   \\\n  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \\\n  (_VAR##_1_10) = _PERMUTE((_VAR##_1_10), ##__VA_ARGS__); \\\n  (_VAR##_2_10) = _PERMUTE((_VAR##_2_10), ##__VA_ARGS__); \\\n  (_VAR##_3_10) = _PERMUTE((_VAR##_3_10), ##__VA_ARGS__); \\\n  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \\\n  (_VAR##_1_11) = _PERMUTE((_VAR##_1_11), ##__VA_ARGS__); \\\n  (_VAR##_2_11) = _PERMUTE((_VAR##_2_11), ##__VA_ARGS__); \\\n  (_VAR##_3_11) = _PERMUTE((_VAR##_3_11), ##__VA_ARGS__); \\\n  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \\\n  (_VAR##_1_12) = _PERMUTE((_VAR##_1_12), ##__VA_ARGS__); \\\n  (_VAR##_2_12) = _PERMUTE((_VAR##_2_12), ##__VA_ARGS__); \\\n  (_VAR##_3_12) = _PERMUTE((_VAR##_3_12), ##__VA_ARGS__); \\\n  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \\\n  (_VAR##_1_13) = _PERMUTE((_VAR##_1_13), ##__VA_ARGS__); \\\n  (_VAR##_2_13) = _PERMUTE((_VAR##_2_13), ##__VA_ARGS__); \\\n  (_VAR##_3_13) = _PERMUTE((_VAR##_3_13), ##__VA_ARGS__); \\\n  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \\\n  (_VAR##_1_14) = _PERMUTE((_VAR##_1_14), ##__VA_ARGS__); \\\n  (_VAR##_2_14) = _PERMUTE((_VAR##_2_14), ##__VA_ARGS__); \\\n  (_VAR##_3_14) = _PERMUTE((_VAR##_3_14), ##__VA_ARGS__); \\\n  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__); \\\n  (_VAR##_1_15) = _PERMUTE((_VAR##_1_15), ##__VA_ARGS__); \\\n  (_VAR##_2_15) = _PERMUTE((_VAR##_2_15), ##__VA_ARGS__); \\\n  (_VAR##_3_15) = _PERMUTE((_VAR##_3_15), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_4X32(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_4X16(_VAR, _PERMUTE, ##__VA_ARGS__)  \\\n  (_VAR##_0_16) = _PERMUTE((_VAR##_0_16), ##__VA_ARGS__); \\\n  (_VAR##_1_16) = _PERMUTE((_VAR##_1_16), ##__VA_ARGS__); \\\n  (_VAR##_2_16) = _PERMUTE((_VAR##_2_16), ##__VA_ARGS__); \\\n  (_VAR##_3_16) = _PERMUTE((_VAR##_3_16), ##__VA_ARGS__); \\\n  (_VAR##_0_17) = _PERMUTE((_VAR##_0_17), ##__VA_ARGS__); \\\n  (_VAR##_1_17) = _PERMUTE((_VAR##_1_17), ##__VA_ARGS__); \\\n  (_VAR##_2_17) = _PERMUTE((_VAR##_2_17), ##__VA_ARGS__); \\\n  (_VAR##_3_17) = _PERMUTE((_VAR##_3_17), ##__VA_ARGS__); \\\n  (_VAR##_0_18) = _PERMUTE((_VAR##_0_18), ##__VA_ARGS__); \\\n  (_VAR##_1_18) = _PERMUTE((_VAR##_1_18), ##__VA_ARGS__); \\\n  (_VAR##_2_18) = _PERMUTE((_VAR##_2_18), ##__VA_ARGS__); \\\n  (_VAR##_3_18) = _PERMUTE((_VAR##_3_18), ##__VA_ARGS__); \\\n  (_VAR##_0_19) = _PERMUTE((_VAR##_0_19), ##__VA_ARGS__); \\\n  (_VAR##_1_19) = _PERMUTE((_VAR##_1_19), ##__VA_ARGS__); \\\n  (_VAR##_2_19) = _PERMUTE((_VAR##_2_19), ##__VA_ARGS__); \\\n  (_VAR##_3_19) = _PERMUTE((_VAR##_3_19), ##__VA_ARGS__); \\\n  (_VAR##_0_20) = _PERMUTE((_VAR##_0_20), ##__VA_ARGS__); \\\n  (_VAR##_1_20) = _PERMUTE((_VAR##_1_20), ##__VA_ARGS__); \\\n  (_VAR##_2_20) = _PERMUTE((_VAR##_2_20), ##__VA_ARGS__); \\\n  (_VAR##_3_20) = _PERMUTE((_VAR##_3_20), ##__VA_ARGS__); \\\n  (_VAR##_0_21) = _PERMUTE((_VAR##_0_21), ##__VA_ARGS__); \\\n  (_VAR##_1_21) = _PERMUTE((_VAR##_1_21), ##__VA_ARGS__); \\\n  (_VAR##_2_21) = _PERMUTE((_VAR##_2_21), ##__VA_ARGS__); \\\n  (_VAR##_3_21) = _PERMUTE((_VAR##_3_21), ##__VA_ARGS__); \\\n  (_VAR##_0_22) = _PERMUTE((_VAR##_0_22), ##__VA_ARGS__); \\\n  (_VAR##_1_22) = _PERMUTE((_VAR##_1_22), ##__VA_ARGS__); \\\n  (_VAR##_2_22) = _PERMUTE((_VAR##_2_22), ##__VA_ARGS__); \\\n  (_VAR##_3_22) = _PERMUTE((_VAR##_3_22), ##__VA_ARGS__); \\\n  (_VAR##_0_23) = _PERMUTE((_VAR##_0_23), ##__VA_ARGS__); \\\n  (_VAR##_1_23) = _PERMUTE((_VAR##_1_23), ##__VA_ARGS__); \\\n  (_VAR##_2_23) = _PERMUTE((_VAR##_2_23), ##__VA_ARGS__); \\\n  (_VAR##_3_23) = _PERMUTE((_VAR##_3_23), ##__VA_ARGS__); \\\n  (_VAR##_0_24) = _PERMUTE((_VAR##_0_24), ##__VA_ARGS__); \\\n  (_VAR##_1_24) = _PERMUTE((_VAR##_1_24), ##__VA_ARGS__); \\\n  (_VAR##_2_24) = _PERMUTE((_VAR##_2_24), ##__VA_ARGS__); \\\n  (_VAR##_3_24) = _PERMUTE((_VAR##_3_24), ##__VA_ARGS__); \\\n  (_VAR##_0_25) = _PERMUTE((_VAR##_0_25), ##__VA_ARGS__); \\\n  (_VAR##_1_25) = _PERMUTE((_VAR##_1_25), ##__VA_ARGS__); \\\n  (_VAR##_2_25) = _PERMUTE((_VAR##_2_25), ##__VA_ARGS__); \\\n  (_VAR##_3_25) = _PERMUTE((_VAR##_3_25), ##__VA_ARGS__); \\\n  (_VAR##_0_26) = _PERMUTE((_VAR##_0_26), ##__VA_ARGS__); \\\n  (_VAR##_1_26) = _PERMUTE((_VAR##_1_26), ##__VA_ARGS__); \\\n  (_VAR##_2_26) = _PERMUTE((_VAR##_2_26), ##__VA_ARGS__); \\\n  (_VAR##_3_26) = _PERMUTE((_VAR##_3_26), ##__VA_ARGS__); \\\n  (_VAR##_0_27) = _PERMUTE((_VAR##_0_27), ##__VA_ARGS__); \\\n  (_VAR##_1_27) = _PERMUTE((_VAR##_1_27), ##__VA_ARGS__); \\\n  (_VAR##_2_27) = _PERMUTE((_VAR##_2_27), ##__VA_ARGS__); \\\n  (_VAR##_3_27) = _PERMUTE((_VAR##_3_27), ##__VA_ARGS__); \\\n  (_VAR##_0_28) = _PERMUTE((_VAR##_0_28), ##__VA_ARGS__); \\\n  (_VAR##_1_28) = _PERMUTE((_VAR##_1_28), ##__VA_ARGS__); \\\n  (_VAR##_2_28) = _PERMUTE((_VAR##_2_28), ##__VA_ARGS__); \\\n  (_VAR##_3_28) = _PERMUTE((_VAR##_3_28), ##__VA_ARGS__); \\\n  (_VAR##_0_29) = _PERMUTE((_VAR##_0_29), ##__VA_ARGS__); \\\n  (_VAR##_1_29) = _PERMUTE((_VAR##_1_29), ##__VA_ARGS__); \\\n  (_VAR##_2_29) = _PERMUTE((_VAR##_2_29), ##__VA_ARGS__); \\\n  (_VAR##_3_29) = _PERMUTE((_VAR##_3_29), ##__VA_ARGS__); \\\n  (_VAR##_0_30) = _PERMUTE((_VAR##_0_30), ##__VA_ARGS__); \\\n  (_VAR##_1_30) = _PERMUTE((_VAR##_1_30), ##__VA_ARGS__); \\\n  (_VAR##_2_30) = _PERMUTE((_VAR##_2_30), ##__VA_ARGS__); \\\n  (_VAR##_3_30) = _PERMUTE((_VAR##_3_30), ##__VA_ARGS__); \\\n  (_VAR##_0_31) = _PERMUTE((_VAR##_0_31), ##__VA_ARGS__); \\\n  (_VAR##_1_31) = _PERMUTE((_VAR##_1_31), ##__VA_ARGS__); \\\n  (_VAR##_2_31) = _PERMUTE((_VAR##_2_31), ##__VA_ARGS__); \\\n  (_VAR##_3_31) = _PERMUTE((_VAR##_3_31), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_8X1(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_4X1(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_4_0) = _PERMUTE((_VAR##_4_0), ##__VA_ARGS__); \\\n  (_VAR##_5_0) = _PERMUTE((_VAR##_5_0), ##__VA_ARGS__); \\\n  (_VAR##_6_0) = _PERMUTE((_VAR##_6_0), ##__VA_ARGS__); \\\n  (_VAR##_7_0) = _PERMUTE((_VAR##_7_0), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_8X2(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_8X1(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__); \\\n  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__); \\\n  (_VAR##_2_1) = _PERMUTE((_VAR##_2_1), ##__VA_ARGS__); \\\n  (_VAR##_3_1) = _PERMUTE((_VAR##_3_1), ##__VA_ARGS__); \\\n  (_VAR##_4_1) = _PERMUTE((_VAR##_4_1), ##__VA_ARGS__); \\\n  (_VAR##_5_1) = _PERMUTE((_VAR##_5_1), ##__VA_ARGS__); \\\n  (_VAR##_6_1) = _PERMUTE((_VAR##_6_1), ##__VA_ARGS__); \\\n  (_VAR##_7_1) = _PERMUTE((_VAR##_7_1), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_8X4(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_8X2(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \\\n  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__); \\\n  (_VAR##_2_2) = _PERMUTE((_VAR##_2_2), ##__VA_ARGS__); \\\n  (_VAR##_3_2) = _PERMUTE((_VAR##_3_2), ##__VA_ARGS__); \\\n  (_VAR##_4_2) = _PERMUTE((_VAR##_4_2), ##__VA_ARGS__); \\\n  (_VAR##_5_2) = _PERMUTE((_VAR##_5_2), ##__VA_ARGS__); \\\n  (_VAR##_6_2) = _PERMUTE((_VAR##_6_2), ##__VA_ARGS__); \\\n  (_VAR##_7_2) = _PERMUTE((_VAR##_7_2), ##__VA_ARGS__); \\\n  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__); \\\n  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__); \\\n  (_VAR##_2_3) = _PERMUTE((_VAR##_2_3), ##__VA_ARGS__); \\\n  (_VAR##_3_3) = _PERMUTE((_VAR##_3_3), ##__VA_ARGS__); \\\n  (_VAR##_4_3) = _PERMUTE((_VAR##_4_3), ##__VA_ARGS__); \\\n  (_VAR##_5_3) = _PERMUTE((_VAR##_5_3), ##__VA_ARGS__); \\\n  (_VAR##_6_3) = _PERMUTE((_VAR##_6_3), ##__VA_ARGS__); \\\n  (_VAR##_7_3) = _PERMUTE((_VAR##_7_3), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_8X8(_VAR, _PERMUTE, ...)     \\\n  MATRIX_VAR_PERMUTE_8X4(_VAR, _PERMUTE, ##__VA_ARGS__) \\\n  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \\\n  (_VAR##_1_4) = _PERMUTE((_VAR##_1_4), ##__VA_ARGS__); \\\n  (_VAR##_2_4) = _PERMUTE((_VAR##_2_4), ##__VA_ARGS__); \\\n  (_VAR##_3_4) = _PERMUTE((_VAR##_3_4), ##__VA_ARGS__); \\\n  (_VAR##_4_4) = _PERMUTE((_VAR##_4_4), ##__VA_ARGS__); \\\n  (_VAR##_5_4) = _PERMUTE((_VAR##_5_4), ##__VA_ARGS__); \\\n  (_VAR##_6_4) = _PERMUTE((_VAR##_6_4), ##__VA_ARGS__); \\\n  (_VAR##_7_4) = _PERMUTE((_VAR##_7_4), ##__VA_ARGS__); \\\n  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \\\n  (_VAR##_1_5) = _PERMUTE((_VAR##_1_5), ##__VA_ARGS__); \\\n  (_VAR##_2_5) = _PERMUTE((_VAR##_2_5), ##__VA_ARGS__); \\\n  (_VAR##_3_5) = _PERMUTE((_VAR##_3_5), ##__VA_ARGS__); \\\n  (_VAR##_4_5) = _PERMUTE((_VAR##_4_5), ##__VA_ARGS__); \\\n  (_VAR##_5_5) = _PERMUTE((_VAR##_5_5), ##__VA_ARGS__); \\\n  (_VAR##_6_5) = _PERMUTE((_VAR##_6_5), ##__VA_ARGS__); \\\n  (_VAR##_7_5) = _PERMUTE((_VAR##_7_5), ##__VA_ARGS__); \\\n  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \\\n  (_VAR##_1_6) = _PERMUTE((_VAR##_1_6), ##__VA_ARGS__); \\\n  (_VAR##_2_6) = _PERMUTE((_VAR##_2_6), ##__VA_ARGS__); \\\n  (_VAR##_3_6) = _PERMUTE((_VAR##_3_6), ##__VA_ARGS__); \\\n  (_VAR##_4_6) = _PERMUTE((_VAR##_4_6), ##__VA_ARGS__); \\\n  (_VAR##_5_6) = _PERMUTE((_VAR##_5_6), ##__VA_ARGS__); \\\n  (_VAR##_6_6) = _PERMUTE((_VAR##_6_6), ##__VA_ARGS__); \\\n  (_VAR##_7_6) = _PERMUTE((_VAR##_7_6), ##__VA_ARGS__); \\\n  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__); \\\n  (_VAR##_1_7) = _PERMUTE((_VAR##_1_7), ##__VA_ARGS__); \\\n  (_VAR##_2_7) = _PERMUTE((_VAR##_2_7), ##__VA_ARGS__); \\\n  (_VAR##_3_7) = _PERMUTE((_VAR##_3_7), ##__VA_ARGS__); \\\n  (_VAR##_4_7) = _PERMUTE((_VAR##_4_7), ##__VA_ARGS__); \\\n  (_VAR##_5_7) = _PERMUTE((_VAR##_5_7), ##__VA_ARGS__); \\\n  (_VAR##_6_7) = _PERMUTE((_VAR##_6_7), ##__VA_ARGS__); \\\n  (_VAR##_7_7) = _PERMUTE((_VAR##_7_7), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_8X16(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_8X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \\\n  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \\\n  (_VAR##_1_8) = _PERMUTE((_VAR##_1_8), ##__VA_ARGS__);   \\\n  (_VAR##_2_8) = _PERMUTE((_VAR##_2_8), ##__VA_ARGS__);   \\\n  (_VAR##_3_8) = _PERMUTE((_VAR##_3_8), ##__VA_ARGS__);   \\\n  (_VAR##_4_8) = _PERMUTE((_VAR##_4_8), ##__VA_ARGS__);   \\\n  (_VAR##_5_8) = _PERMUTE((_VAR##_5_8), ##__VA_ARGS__);   \\\n  (_VAR##_6_8) = _PERMUTE((_VAR##_6_8), ##__VA_ARGS__);   \\\n  (_VAR##_7_8) = _PERMUTE((_VAR##_7_8), ##__VA_ARGS__);   \\\n  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \\\n  (_VAR##_1_9) = _PERMUTE((_VAR##_1_9), ##__VA_ARGS__);   \\\n  (_VAR##_2_9) = _PERMUTE((_VAR##_2_9), ##__VA_ARGS__);   \\\n  (_VAR##_3_9) = _PERMUTE((_VAR##_3_9), ##__VA_ARGS__);   \\\n  (_VAR##_4_9) = _PERMUTE((_VAR##_4_9), ##__VA_ARGS__);   \\\n  (_VAR##_5_9) = _PERMUTE((_VAR##_5_9), ##__VA_ARGS__);   \\\n  (_VAR##_6_9) = _PERMUTE((_VAR##_6_9), ##__VA_ARGS__);   \\\n  (_VAR##_7_9) = _PERMUTE((_VAR##_7_9), ##__VA_ARGS__);   \\\n  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \\\n  (_VAR##_1_10) = _PERMUTE((_VAR##_1_10), ##__VA_ARGS__); \\\n  (_VAR##_2_10) = _PERMUTE((_VAR##_2_10), ##__VA_ARGS__); \\\n  (_VAR##_3_10) = _PERMUTE((_VAR##_3_10), ##__VA_ARGS__); \\\n  (_VAR##_4_10) = _PERMUTE((_VAR##_4_10), ##__VA_ARGS__); \\\n  (_VAR##_5_10) = _PERMUTE((_VAR##_5_10), ##__VA_ARGS__); \\\n  (_VAR##_6_10) = _PERMUTE((_VAR##_6_10), ##__VA_ARGS__); \\\n  (_VAR##_7_10) = _PERMUTE((_VAR##_7_10), ##__VA_ARGS__); \\\n  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \\\n  (_VAR##_1_11) = _PERMUTE((_VAR##_1_11), ##__VA_ARGS__); \\\n  (_VAR##_2_11) = _PERMUTE((_VAR##_2_11), ##__VA_ARGS__); \\\n  (_VAR##_3_11) = _PERMUTE((_VAR##_3_11), ##__VA_ARGS__); \\\n  (_VAR##_4_11) = _PERMUTE((_VAR##_4_11), ##__VA_ARGS__); \\\n  (_VAR##_5_11) = _PERMUTE((_VAR##_5_11), ##__VA_ARGS__); \\\n  (_VAR##_6_11) = _PERMUTE((_VAR##_6_11), ##__VA_ARGS__); \\\n  (_VAR##_7_11) = _PERMUTE((_VAR##_7_11), ##__VA_ARGS__); \\\n  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \\\n  (_VAR##_1_12) = _PERMUTE((_VAR##_1_12), ##__VA_ARGS__); \\\n  (_VAR##_2_12) = _PERMUTE((_VAR##_2_12), ##__VA_ARGS__); \\\n  (_VAR##_3_12) = _PERMUTE((_VAR##_3_12), ##__VA_ARGS__); \\\n  (_VAR##_4_12) = _PERMUTE((_VAR##_4_12), ##__VA_ARGS__); \\\n  (_VAR##_5_12) = _PERMUTE((_VAR##_5_12), ##__VA_ARGS__); \\\n  (_VAR##_6_12) = _PERMUTE((_VAR##_6_12), ##__VA_ARGS__); \\\n  (_VAR##_7_12) = _PERMUTE((_VAR##_7_12), ##__VA_ARGS__); \\\n  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \\\n  (_VAR##_1_13) = _PERMUTE((_VAR##_1_13), ##__VA_ARGS__); \\\n  (_VAR##_2_13) = _PERMUTE((_VAR##_2_13), ##__VA_ARGS__); \\\n  (_VAR##_3_13) = _PERMUTE((_VAR##_3_13), ##__VA_ARGS__); \\\n  (_VAR##_4_13) = _PERMUTE((_VAR##_4_13), ##__VA_ARGS__); \\\n  (_VAR##_5_13) = _PERMUTE((_VAR##_5_13), ##__VA_ARGS__); \\\n  (_VAR##_6_13) = _PERMUTE((_VAR##_6_13), ##__VA_ARGS__); \\\n  (_VAR##_7_13) = _PERMUTE((_VAR##_7_13), ##__VA_ARGS__); \\\n  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \\\n  (_VAR##_1_14) = _PERMUTE((_VAR##_1_14), ##__VA_ARGS__); \\\n  (_VAR##_2_14) = _PERMUTE((_VAR##_2_14), ##__VA_ARGS__); \\\n  (_VAR##_3_14) = _PERMUTE((_VAR##_3_14), ##__VA_ARGS__); \\\n  (_VAR##_4_14) = _PERMUTE((_VAR##_4_14), ##__VA_ARGS__); \\\n  (_VAR##_5_14) = _PERMUTE((_VAR##_5_14), ##__VA_ARGS__); \\\n  (_VAR##_6_14) = _PERMUTE((_VAR##_6_14), ##__VA_ARGS__); \\\n  (_VAR##_7_14) = _PERMUTE((_VAR##_7_14), ##__VA_ARGS__); \\\n  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__); \\\n  (_VAR##_1_15) = _PERMUTE((_VAR##_1_15), ##__VA_ARGS__); \\\n  (_VAR##_2_15) = _PERMUTE((_VAR##_2_15), ##__VA_ARGS__); \\\n  (_VAR##_3_15) = _PERMUTE((_VAR##_3_15), ##__VA_ARGS__); \\\n  (_VAR##_4_15) = _PERMUTE((_VAR##_4_15), ##__VA_ARGS__); \\\n  (_VAR##_5_15) = _PERMUTE((_VAR##_5_15), ##__VA_ARGS__); \\\n  (_VAR##_6_15) = _PERMUTE((_VAR##_6_15), ##__VA_ARGS__); \\\n  (_VAR##_7_15) = _PERMUTE((_VAR##_7_15), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_8X32(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_8X16(_VAR, _PERMUTE, ##__VA_ARGS__)  \\\n  (_VAR##_0_16) = _PERMUTE((_VAR##_0_16), ##__VA_ARGS__); \\\n  (_VAR##_1_16) = _PERMUTE((_VAR##_1_16), ##__VA_ARGS__); \\\n  (_VAR##_2_16) = _PERMUTE((_VAR##_2_16), ##__VA_ARGS__); \\\n  (_VAR##_3_16) = _PERMUTE((_VAR##_3_16), ##__VA_ARGS__); \\\n  (_VAR##_4_16) = _PERMUTE((_VAR##_4_16), ##__VA_ARGS__); \\\n  (_VAR##_5_16) = _PERMUTE((_VAR##_5_16), ##__VA_ARGS__); \\\n  (_VAR##_6_16) = _PERMUTE((_VAR##_6_16), ##__VA_ARGS__); \\\n  (_VAR##_7_16) = _PERMUTE((_VAR##_7_16), ##__VA_ARGS__); \\\n  (_VAR##_0_17) = _PERMUTE((_VAR##_0_17), ##__VA_ARGS__); \\\n  (_VAR##_1_17) = _PERMUTE((_VAR##_1_17), ##__VA_ARGS__); \\\n  (_VAR##_2_17) = _PERMUTE((_VAR##_2_17), ##__VA_ARGS__); \\\n  (_VAR##_3_17) = _PERMUTE((_VAR##_3_17), ##__VA_ARGS__); \\\n  (_VAR##_4_17) = _PERMUTE((_VAR##_4_17), ##__VA_ARGS__); \\\n  (_VAR##_5_17) = _PERMUTE((_VAR##_5_17), ##__VA_ARGS__); \\\n  (_VAR##_6_17) = _PERMUTE((_VAR##_6_17), ##__VA_ARGS__); \\\n  (_VAR##_7_17) = _PERMUTE((_VAR##_7_17), ##__VA_ARGS__); \\\n  (_VAR##_0_18) = _PERMUTE((_VAR##_0_18), ##__VA_ARGS__); \\\n  (_VAR##_1_18) = _PERMUTE((_VAR##_1_18), ##__VA_ARGS__); \\\n  (_VAR##_2_18) = _PERMUTE((_VAR##_2_18), ##__VA_ARGS__); \\\n  (_VAR##_3_18) = _PERMUTE((_VAR##_3_18), ##__VA_ARGS__); \\\n  (_VAR##_4_18) = _PERMUTE((_VAR##_4_18), ##__VA_ARGS__); \\\n  (_VAR##_5_18) = _PERMUTE((_VAR##_5_18), ##__VA_ARGS__); \\\n  (_VAR##_6_18) = _PERMUTE((_VAR##_6_18), ##__VA_ARGS__); \\\n  (_VAR##_7_18) = _PERMUTE((_VAR##_7_18), ##__VA_ARGS__); \\\n  (_VAR##_0_19) = _PERMUTE((_VAR##_0_19), ##__VA_ARGS__); \\\n  (_VAR##_1_19) = _PERMUTE((_VAR##_1_19), ##__VA_ARGS__); \\\n  (_VAR##_2_19) = _PERMUTE((_VAR##_2_19), ##__VA_ARGS__); \\\n  (_VAR##_3_19) = _PERMUTE((_VAR##_3_19), ##__VA_ARGS__); \\\n  (_VAR##_4_19) = _PERMUTE((_VAR##_4_19), ##__VA_ARGS__); \\\n  (_VAR##_5_19) = _PERMUTE((_VAR##_5_19), ##__VA_ARGS__); \\\n  (_VAR##_6_19) = _PERMUTE((_VAR##_6_19), ##__VA_ARGS__); \\\n  (_VAR##_7_19) = _PERMUTE((_VAR##_7_19), ##__VA_ARGS__); \\\n  (_VAR##_0_20) = _PERMUTE((_VAR##_0_20), ##__VA_ARGS__); \\\n  (_VAR##_1_20) = _PERMUTE((_VAR##_1_20), ##__VA_ARGS__); \\\n  (_VAR##_2_20) = _PERMUTE((_VAR##_2_20), ##__VA_ARGS__); \\\n  (_VAR##_3_20) = _PERMUTE((_VAR##_3_20), ##__VA_ARGS__); \\\n  (_VAR##_4_20) = _PERMUTE((_VAR##_4_20), ##__VA_ARGS__); \\\n  (_VAR##_5_20) = _PERMUTE((_VAR##_5_20), ##__VA_ARGS__); \\\n  (_VAR##_6_20) = _PERMUTE((_VAR##_6_20), ##__VA_ARGS__); \\\n  (_VAR##_7_20) = _PERMUTE((_VAR##_7_20), ##__VA_ARGS__); \\\n  (_VAR##_0_21) = _PERMUTE((_VAR##_0_21), ##__VA_ARGS__); \\\n  (_VAR##_1_21) = _PERMUTE((_VAR##_1_21), ##__VA_ARGS__); \\\n  (_VAR##_2_21) = _PERMUTE((_VAR##_2_21), ##__VA_ARGS__); \\\n  (_VAR##_3_21) = _PERMUTE((_VAR##_3_21), ##__VA_ARGS__); \\\n  (_VAR##_4_21) = _PERMUTE((_VAR##_4_21), ##__VA_ARGS__); \\\n  (_VAR##_5_21) = _PERMUTE((_VAR##_5_21), ##__VA_ARGS__); \\\n  (_VAR##_6_21) = _PERMUTE((_VAR##_6_21), ##__VA_ARGS__); \\\n  (_VAR##_7_21) = _PERMUTE((_VAR##_7_21), ##__VA_ARGS__); \\\n  (_VAR##_0_22) = _PERMUTE((_VAR##_0_22), ##__VA_ARGS__); \\\n  (_VAR##_1_22) = _PERMUTE((_VAR##_1_22), ##__VA_ARGS__); \\\n  (_VAR##_2_22) = _PERMUTE((_VAR##_2_22), ##__VA_ARGS__); \\\n  (_VAR##_3_22) = _PERMUTE((_VAR##_3_22), ##__VA_ARGS__); \\\n  (_VAR##_4_22) = _PERMUTE((_VAR##_4_22), ##__VA_ARGS__); \\\n  (_VAR##_5_22) = _PERMUTE((_VAR##_5_22), ##__VA_ARGS__); \\\n  (_VAR##_6_22) = _PERMUTE((_VAR##_6_22), ##__VA_ARGS__); \\\n  (_VAR##_7_22) = _PERMUTE((_VAR##_7_22), ##__VA_ARGS__); \\\n  (_VAR##_0_23) = _PERMUTE((_VAR##_0_23), ##__VA_ARGS__); \\\n  (_VAR##_1_23) = _PERMUTE((_VAR##_1_23), ##__VA_ARGS__); \\\n  (_VAR##_2_23) = _PERMUTE((_VAR##_2_23), ##__VA_ARGS__); \\\n  (_VAR##_3_23) = _PERMUTE((_VAR##_3_23), ##__VA_ARGS__); \\\n  (_VAR##_4_23) = _PERMUTE((_VAR##_4_23), ##__VA_ARGS__); \\\n  (_VAR##_5_23) = _PERMUTE((_VAR##_5_23), ##__VA_ARGS__); \\\n  (_VAR##_6_23) = _PERMUTE((_VAR##_6_23), ##__VA_ARGS__); \\\n  (_VAR##_7_23) = _PERMUTE((_VAR##_7_23), ##__VA_ARGS__); \\\n  (_VAR##_0_24) = _PERMUTE((_VAR##_0_24), ##__VA_ARGS__); \\\n  (_VAR##_1_24) = _PERMUTE((_VAR##_1_24), ##__VA_ARGS__); \\\n  (_VAR##_2_24) = _PERMUTE((_VAR##_2_24), ##__VA_ARGS__); \\\n  (_VAR##_3_24) = _PERMUTE((_VAR##_3_24), ##__VA_ARGS__); \\\n  (_VAR##_4_24) = _PERMUTE((_VAR##_4_24), ##__VA_ARGS__); \\\n  (_VAR##_5_24) = _PERMUTE((_VAR##_5_24), ##__VA_ARGS__); \\\n  (_VAR##_6_24) = _PERMUTE((_VAR##_6_24), ##__VA_ARGS__); \\\n  (_VAR##_7_24) = _PERMUTE((_VAR##_7_24), ##__VA_ARGS__); \\\n  (_VAR##_0_25) = _PERMUTE((_VAR##_0_25), ##__VA_ARGS__); \\\n  (_VAR##_1_25) = _PERMUTE((_VAR##_1_25), ##__VA_ARGS__); \\\n  (_VAR##_2_25) = _PERMUTE((_VAR##_2_25), ##__VA_ARGS__); \\\n  (_VAR##_3_25) = _PERMUTE((_VAR##_3_25), ##__VA_ARGS__); \\\n  (_VAR##_4_25) = _PERMUTE((_VAR##_4_25), ##__VA_ARGS__); \\\n  (_VAR##_5_25) = _PERMUTE((_VAR##_5_25), ##__VA_ARGS__); \\\n  (_VAR##_6_25) = _PERMUTE((_VAR##_6_25), ##__VA_ARGS__); \\\n  (_VAR##_7_25) = _PERMUTE((_VAR##_7_25), ##__VA_ARGS__); \\\n  (_VAR##_0_26) = _PERMUTE((_VAR##_0_26), ##__VA_ARGS__); \\\n  (_VAR##_1_26) = _PERMUTE((_VAR##_1_26), ##__VA_ARGS__); \\\n  (_VAR##_2_26) = _PERMUTE((_VAR##_2_26), ##__VA_ARGS__); \\\n  (_VAR##_3_26) = _PERMUTE((_VAR##_3_26), ##__VA_ARGS__); \\\n  (_VAR##_4_26) = _PERMUTE((_VAR##_4_26), ##__VA_ARGS__); \\\n  (_VAR##_5_26) = _PERMUTE((_VAR##_5_26), ##__VA_ARGS__); \\\n  (_VAR##_6_26) = _PERMUTE((_VAR##_6_26), ##__VA_ARGS__); \\\n  (_VAR##_7_26) = _PERMUTE((_VAR##_7_26), ##__VA_ARGS__); \\\n  (_VAR##_0_27) = _PERMUTE((_VAR##_0_27), ##__VA_ARGS__); \\\n  (_VAR##_1_27) = _PERMUTE((_VAR##_1_27), ##__VA_ARGS__); \\\n  (_VAR##_2_27) = _PERMUTE((_VAR##_2_27), ##__VA_ARGS__); \\\n  (_VAR##_3_27) = _PERMUTE((_VAR##_3_27), ##__VA_ARGS__); \\\n  (_VAR##_4_27) = _PERMUTE((_VAR##_4_27), ##__VA_ARGS__); \\\n  (_VAR##_5_27) = _PERMUTE((_VAR##_5_27), ##__VA_ARGS__); \\\n  (_VAR##_6_27) = _PERMUTE((_VAR##_6_27), ##__VA_ARGS__); \\\n  (_VAR##_7_27) = _PERMUTE((_VAR##_7_27), ##__VA_ARGS__); \\\n  (_VAR##_0_28) = _PERMUTE((_VAR##_0_28), ##__VA_ARGS__); \\\n  (_VAR##_1_28) = _PERMUTE((_VAR##_1_28), ##__VA_ARGS__); \\\n  (_VAR##_2_28) = _PERMUTE((_VAR##_2_28), ##__VA_ARGS__); \\\n  (_VAR##_3_28) = _PERMUTE((_VAR##_3_28), ##__VA_ARGS__); \\\n  (_VAR##_4_28) = _PERMUTE((_VAR##_4_28), ##__VA_ARGS__); \\\n  (_VAR##_5_28) = _PERMUTE((_VAR##_5_28), ##__VA_ARGS__); \\\n  (_VAR##_6_28) = _PERMUTE((_VAR##_6_28), ##__VA_ARGS__); \\\n  (_VAR##_7_28) = _PERMUTE((_VAR##_7_28), ##__VA_ARGS__); \\\n  (_VAR##_0_29) = _PERMUTE((_VAR##_0_29), ##__VA_ARGS__); \\\n  (_VAR##_1_29) = _PERMUTE((_VAR##_1_29), ##__VA_ARGS__); \\\n  (_VAR##_2_29) = _PERMUTE((_VAR##_2_29), ##__VA_ARGS__); \\\n  (_VAR##_3_29) = _PERMUTE((_VAR##_3_29), ##__VA_ARGS__); \\\n  (_VAR##_4_29) = _PERMUTE((_VAR##_4_29), ##__VA_ARGS__); \\\n  (_VAR##_5_29) = _PERMUTE((_VAR##_5_29), ##__VA_ARGS__); \\\n  (_VAR##_6_29) = _PERMUTE((_VAR##_6_29), ##__VA_ARGS__); \\\n  (_VAR##_7_29) = _PERMUTE((_VAR##_7_29), ##__VA_ARGS__); \\\n  (_VAR##_0_30) = _PERMUTE((_VAR##_0_30), ##__VA_ARGS__); \\\n  (_VAR##_1_30) = _PERMUTE((_VAR##_1_30), ##__VA_ARGS__); \\\n  (_VAR##_2_30) = _PERMUTE((_VAR##_2_30), ##__VA_ARGS__); \\\n  (_VAR##_3_30) = _PERMUTE((_VAR##_3_30), ##__VA_ARGS__); \\\n  (_VAR##_4_30) = _PERMUTE((_VAR##_4_30), ##__VA_ARGS__); \\\n  (_VAR##_5_30) = _PERMUTE((_VAR##_5_30), ##__VA_ARGS__); \\\n  (_VAR##_6_30) = _PERMUTE((_VAR##_6_30), ##__VA_ARGS__); \\\n  (_VAR##_7_30) = _PERMUTE((_VAR##_7_30), ##__VA_ARGS__); \\\n  (_VAR##_0_31) = _PERMUTE((_VAR##_0_31), ##__VA_ARGS__); \\\n  (_VAR##_1_31) = _PERMUTE((_VAR##_1_31), ##__VA_ARGS__); \\\n  (_VAR##_2_31) = _PERMUTE((_VAR##_2_31), ##__VA_ARGS__); \\\n  (_VAR##_3_31) = _PERMUTE((_VAR##_3_31), ##__VA_ARGS__); \\\n  (_VAR##_4_31) = _PERMUTE((_VAR##_4_31), ##__VA_ARGS__); \\\n  (_VAR##_5_31) = _PERMUTE((_VAR##_5_31), ##__VA_ARGS__); \\\n  (_VAR##_6_31) = _PERMUTE((_VAR##_6_31), ##__VA_ARGS__); \\\n  (_VAR##_7_31) = _PERMUTE((_VAR##_7_31), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_16X1(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_8X1(_VAR, _PERMUTE, ##__VA_ARGS__)   \\\n  (_VAR##_8_0) = _PERMUTE((_VAR##_8_0), ##__VA_ARGS__);   \\\n  (_VAR##_9_0) = _PERMUTE((_VAR##_9_0), ##__VA_ARGS__);   \\\n  (_VAR##_10_0) = _PERMUTE((_VAR##_10_0), ##__VA_ARGS__); \\\n  (_VAR##_11_0) = _PERMUTE((_VAR##_11_0), ##__VA_ARGS__); \\\n  (_VAR##_12_0) = _PERMUTE((_VAR##_12_0), ##__VA_ARGS__); \\\n  (_VAR##_13_0) = _PERMUTE((_VAR##_13_0), ##__VA_ARGS__); \\\n  (_VAR##_14_0) = _PERMUTE((_VAR##_14_0), ##__VA_ARGS__); \\\n  (_VAR##_15_0) = _PERMUTE((_VAR##_15_0), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_16X2(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_16X1(_VAR, _PERMUTE, ##__VA_ARGS__)  \\\n  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__);   \\\n  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__);   \\\n  (_VAR##_2_1) = _PERMUTE((_VAR##_2_1), ##__VA_ARGS__);   \\\n  (_VAR##_3_1) = _PERMUTE((_VAR##_3_1), ##__VA_ARGS__);   \\\n  (_VAR##_4_1) = _PERMUTE((_VAR##_4_1), ##__VA_ARGS__);   \\\n  (_VAR##_5_1) = _PERMUTE((_VAR##_5_1), ##__VA_ARGS__);   \\\n  (_VAR##_6_1) = _PERMUTE((_VAR##_6_1), ##__VA_ARGS__);   \\\n  (_VAR##_7_1) = _PERMUTE((_VAR##_7_1), ##__VA_ARGS__);   \\\n  (_VAR##_8_1) = _PERMUTE((_VAR##_8_1), ##__VA_ARGS__);   \\\n  (_VAR##_9_1) = _PERMUTE((_VAR##_9_1), ##__VA_ARGS__);   \\\n  (_VAR##_10_1) = _PERMUTE((_VAR##_10_1), ##__VA_ARGS__); \\\n  (_VAR##_11_1) = _PERMUTE((_VAR##_11_1), ##__VA_ARGS__); \\\n  (_VAR##_12_1) = _PERMUTE((_VAR##_12_1), ##__VA_ARGS__); \\\n  (_VAR##_13_1) = _PERMUTE((_VAR##_13_1), ##__VA_ARGS__); \\\n  (_VAR##_14_1) = _PERMUTE((_VAR##_14_1), ##__VA_ARGS__); \\\n  (_VAR##_15_1) = _PERMUTE((_VAR##_15_1), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PERMUTE_16X4(_VAR, _PERMUTE, ...)      \\\n  MATRIX_VAR_PERMUTE_16X2(_VAR, _PERMUTE, ##__VA_ARGS__)  \\\n  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__);   \\\n  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__);   \\\n  (_VAR##_2_2) = _PERMUTE((_VAR##_2_2), ##__VA_ARGS__);   \\\n  (_VAR##_3_2) = _PERMUTE((_VAR##_3_2), ##__VA_ARGS__);   \\\n  (_VAR##_4_2) = _PERMUTE((_VAR##_4_2), ##__VA_ARGS__);   \\\n  (_VAR##_5_2) = _PERMUTE((_VAR##_5_2), ##__VA_ARGS__);   \\\n  (_VAR##_6_2) = _PERMUTE((_VAR##_6_2), ##__VA_ARGS__);   \\\n  (_VAR##_7_2) = _PERMUTE((_VAR##_7_2), ##__VA_ARGS__);   \\\n  (_VAR##_8_2) = _PERMUTE((_VAR##_8_2), ##__VA_ARGS__);   \\\n  (_VAR##_9_2) = _PERMUTE((_VAR##_9_2), ##__VA_ARGS__);   \\\n  (_VAR##_10_2) = _PERMUTE((_VAR##_10_2), ##__VA_ARGS__); \\\n  (_VAR##_11_2) = _PERMUTE((_VAR##_11_2), ##__VA_ARGS__); \\\n  (_VAR##_12_2) = _PERMUTE((_VAR##_12_2), ##__VA_ARGS__); \\\n  (_VAR##_13_2) = _PERMUTE((_VAR##_13_2), ##__VA_ARGS__); \\\n  (_VAR##_14_2) = _PERMUTE((_VAR##_14_2), ##__VA_ARGS__); \\\n  (_VAR##_15_2) = _PERMUTE((_VAR##_15_2), ##__VA_ARGS__); \\\n  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__);   \\\n  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__);   \\\n  (_VAR##_2_3) = _PERMUTE((_VAR##_2_3), ##__VA_ARGS__);   \\\n  (_VAR##_3_3) = _PERMUTE((_VAR##_3_3), ##__VA_ARGS__);   \\\n  (_VAR##_4_3) = _PERMUTE((_VAR##_4_3), ##__VA_ARGS__);   \\\n  (_VAR##_5_3) = _PERMUTE((_VAR##_5_3), ##__VA_ARGS__);   \\\n  (_VAR##_6_3) = _PERMUTE((_VAR##_6_3), ##__VA_ARGS__);   \\\n  (_VAR##_7_3) = _PERMUTE((_VAR##_7_3), ##__VA_ARGS__);   \\\n  (_VAR##_8_3) = _PERMUTE((_VAR##_8_3), ##__VA_ARGS__);   \\\n  (_VAR##_9_3) = _PERMUTE((_VAR##_9_3), ##__VA_ARGS__);   \\\n  (_VAR##_10_3) = _PERMUTE((_VAR##_10_3), ##__VA_ARGS__); \\\n  (_VAR##_11_3) = _PERMUTE((_VAR##_11_3), ##__VA_ARGS__); \\\n  (_VAR##_12_3) = _PERMUTE((_VAR##_12_3), ##__VA_ARGS__); \\\n  (_VAR##_13_3) = _PERMUTE((_VAR##_13_3), ##__VA_ARGS__); \\\n  (_VAR##_14_3) = _PERMUTE((_VAR##_14_3), ##__VA_ARGS__); \\\n  (_VAR##_15_3) = _PERMUTE((_VAR##_15_3), ##__VA_ARGS__);\n\n#define MATRIX_VAR_PROC_2X1(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  _PROCESS((_LHS##_0), (_RHS), (_RES##_0_##_K))             \\\n  _PROCESS((_LHS##_1), (_RHS), (_RES##_1_##_K))\n\n#define MATRIX_VAR_PROC_4X1(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  MATRIX_VAR_PROC_2X1(_K, _LHS, _RHS, _RES, _PROCESS)       \\\n  _PROCESS((_LHS##_2), (_RHS), (_RES##_2_##_K))             \\\n  _PROCESS((_LHS##_3), (_RHS), (_RES##_3_##_K))\n\n#define MATRIX_VAR_PROC_8X1(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  MATRIX_VAR_PROC_4X1(_K, _LHS, _RHS, _RES, _PROCESS)       \\\n  _PROCESS((_LHS##_4), (_RHS), (_RES##_4_##_K))             \\\n  _PROCESS((_LHS##_5), (_RHS), (_RES##_5_##_K))             \\\n  _PROCESS((_LHS##_6), (_RHS), (_RES##_6_##_K))             \\\n  _PROCESS((_LHS##_7), (_RHS), (_RES##_7_##_K))\n\n#define MATRIX_VAR_PROC_16X1(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  MATRIX_VAR_PROC_8X1(_K, _LHS, _RHS, _RES, _PROCESS)        \\\n  _PROCESS((_LHS##_8), (_RHS), (_RES##_8_##_K))              \\\n  _PROCESS((_LHS##_9), (_RHS), (_RES##_9_##_K))              \\\n  _PROCESS((_LHS##_10), (_RHS), (_RES##_10_##_K))            \\\n  _PROCESS((_LHS##_11), (_RHS), (_RES##_11_##_K))            \\\n  _PROCESS((_LHS##_12), (_RHS), (_RES##_12_##_K))            \\\n  _PROCESS((_LHS##_13), (_RHS), (_RES##_13_##_K))            \\\n  _PROCESS((_LHS##_14), (_RHS), (_RES##_14_##_K))            \\\n  _PROCESS((_LHS##_15), (_RHS), (_RES##_15_##_K))\n\n#define MATRIX_VAR_PROC_1X2(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  _PROCESS((_LHS), (_RHS##_0), (_RES##_##_K##_0))           \\\n  _PROCESS((_LHS), (_RHS##_1), (_RES##_##_K##_1))\n\n#define MATRIX_VAR_PROC_1X4(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  MATRIX_VAR_PROC_1X2(_K, _LHS, _RHS, _RES, _PROCESS)       \\\n  _PROCESS((_LHS), (_RHS##_2), (_RES##_##_K##_2))           \\\n  _PROCESS((_LHS), (_RHS##_3), (_RES##_##_K##_3))\n\n#define MATRIX_VAR_PROC_1X8(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  MATRIX_VAR_PROC_1X4(_K, _LHS, _RHS, _RES, _PROCESS)       \\\n  _PROCESS((_LHS), (_RHS##_4), (_RES##_##_K##_4))           \\\n  _PROCESS((_LHS), (_RHS##_5), (_RES##_##_K##_5))           \\\n  _PROCESS((_LHS), (_RHS##_6), (_RES##_##_K##_6))           \\\n  _PROCESS((_LHS), (_RHS##_7), (_RES##_##_K##_7))\n\n#define MATRIX_VAR_PROC_1X16(_K, _LHS, _RHS, _RES, _PROCESS) \\\n  MATRIX_VAR_PROC_1X8(_K, _LHS, _RHS, _RES, _PROCESS)        \\\n  _PROCESS((_LHS), (_RHS##_8), (_RES##_##_K##_8))            \\\n  _PROCESS((_LHS), (_RHS##_9), (_RES##_##_K##_9))            \\\n  _PROCESS((_LHS), (_RHS##_10), (_RES##_##_K##_10))          \\\n  _PROCESS((_LHS), (_RHS##_11), (_RES##_##_K##_11))          \\\n  _PROCESS((_LHS), (_RHS##_12), (_RES##_##_K##_12))          \\\n  _PROCESS((_LHS), (_RHS##_13), (_RES##_##_K##_13))          \\\n  _PROCESS((_LHS), (_RHS##_14), (_RES##_##_K##_14))          \\\n  _PROCESS((_LHS), (_RHS##_15), (_RES##_##_K##_15))\n\n#define MATRIX_VAR_INIT(_M, _N, _VAR_TYPE, _VAR_NAME, _VAR_INIT) \\\n  MATRIX_VAR_INIT_##_M##X##_N(_VAR_TYPE, _VAR_NAME, _VAR_INIT)\n\n#define MATRIX_VAR_STORE(_M, _N, _STEP, _VAR, _ARRAY, _STORE, _NORM, ...) \\\n  MATRIX_VAR_STORE_##_M##X##_N(_STEP, _VAR, _ARRAY, _STORE, _NORM,        \\\n                               ##__VA_ARGS__)\n\n#define MATRIX_VAR_PERMUTE(_M, _N, _VAR, _PERMUTE, ...) \\\n  MATRIX_VAR_PERMUTE_##_M##X##_N(_VAR, _PERMUTE, ##__VA_ARGS__)\n\n#define MATRIX_VAR_PROC(_M, _N, _K, _LHS, _RHS, _RES, _PROCESS) \\\n  MATRIX_VAR_PROC_##_M##X##_N(_K, _LHS, _RHS, _RES, _PROCESS)\n"
  },
  {
    "path": "src/ailego/math/matrix_utility.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n//! Absolute value of a float\nstatic inline float FastAbs(float x) {\n  uint32_t *p = reinterpret_cast<uint32_t *>(&x);\n  *p &= 0x7fffffffu;\n  return *reinterpret_cast<float *>(p);\n}\n\n#if defined(__SSE__)\nstatic inline float HorizontalMax_FP32_V128(__m128 v) {\n  __m128 x1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 3, 2));\n  __m128 x2 = _mm_max_ps(v, x1);\n  __m128 x3 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(0, 0, 0, 1));\n  __m128 x4 = _mm_max_ps(x2, x3);\n  return _mm_cvtss_f32(x4);\n}\n\nstatic inline float HorizontalAdd_FP32_V128(__m128 v) {\n#ifdef __SSE3__\n  __m128 x1 = _mm_hadd_ps(v, v);\n  __m128 x2 = _mm_hadd_ps(x1, x1);\n  return _mm_cvtss_f32(x2);\n#else\n  __m128 x1 = _mm_movehl_ps(v, v);\n  __m128 x2 = _mm_add_ps(v, x1);\n  __m128 x3 = _mm_shuffle_ps(x2, x2, 1);\n  __m128 x4 = _mm_add_ss(x2, x3);\n  return _mm_cvtss_f32(x4);\n#endif\n}\n#endif // __SSE__\n\n#if defined(__SSE2__)\nstatic inline int32_t HorizontalAdd_INT32_V128(__m128i v) {\n#ifdef __SSE3__\n  __m128i x1 = _mm_hadd_epi32(v, v);\n  __m128i x2 = _mm_hadd_epi32(x1, x1);\n  return _mm_cvtsi128_si32(x2);\n#else\n  __m128i x1 = _mm_shuffle_epi32(v, _MM_SHUFFLE(0, 0, 3, 2));\n  __m128i x2 = _mm_add_epi32(v, x1);\n  __m128i x3 = _mm_shuffle_epi32(x2, _MM_SHUFFLE(0, 0, 0, 1));\n  __m128i x4 = _mm_add_epi32(x2, x3);\n  return _mm_cvtsi128_si32(x4);\n#endif\n}\n\nstatic inline int64_t HorizontalAdd_INT64_V128(__m128i v) {\n#ifdef __SSE4_1__\n  return (_mm_extract_epi64(v, 0) + _mm_extract_epi64(v, 1));\n#else\n  return _mm_cvtsi128_si64(\n      _mm_add_epi64(_mm_shuffle_epi32(v, _MM_SHUFFLE(0, 0, 3, 2)), v));\n#endif\n}\n#endif // __SSE2__\n\n#if defined(__SSSE3__)\nstatic const __m128i POPCNT_LOOKUP_SSE =\n    _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);\n\nstatic inline __m128i VerticalPopCount_INT8_V128(__m128i v) {\n#if defined(__AVX512VL__) && defined(__AVX512BITALG__)\n  return _mm_popcnt_epi8(v);\n#else\n  const __m128i low_mask = _mm_set1_epi8(0x0f);\n  __m128i lo = _mm_shuffle_epi8(POPCNT_LOOKUP_SSE, _mm_and_si128(v, low_mask));\n  __m128i hi = _mm_shuffle_epi8(POPCNT_LOOKUP_SSE,\n                                _mm_and_si128(_mm_srli_epi32(v, 4), low_mask));\n  return _mm_add_epi8(lo, hi);\n#endif // __AVX512VL__ && __AVX512BITALG__\n}\n\nstatic inline __m128i VerticalPopCount_INT16_V128(__m128i v) {\n#if defined(__AVX512VL__) && defined(__AVX512BITALG__)\n  return _mm_popcnt_epi16(v);\n#else\n  __m128i total = VerticalPopCount_INT8_V128(v);\n  return _mm_add_epi16(_mm_srli_epi16(total, 8),\n                       _mm_and_si128(total, _mm_set1_epi16(0xff)));\n#endif // __AVX512VL__ && __AVX512BITALG__\n}\n\nstatic inline __m128i VerticalPopCount_INT32_V128(__m128i v) {\n#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)\n  return _mm_popcnt_epi32(v);\n#else\n  __m128i total =\n      _mm_madd_epi16(VerticalPopCount_INT8_V128(v), _mm_set1_epi16(1));\n  return _mm_add_epi32(_mm_srli_epi32(total, 8),\n                       _mm_and_si128(total, _mm_set1_epi32(0xff)));\n#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__\n}\n\nstatic inline __m128i VerticalPopCount_INT64_V128(__m128i v) {\n#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)\n  return _mm_popcnt_epi64(v);\n#else\n  return _mm_sad_epu8(VerticalPopCount_INT8_V128(v), _mm_setzero_si128());\n#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__\n}\n#endif // __SSSE3__\n\n#if defined(__SSE4_1__)\nstatic inline int16_t HorizontalMax_UINT8_V128(__m128i v) {\n  v = _mm_max_epu8(v, _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 2, 3, 2)));\n  v = _mm_max_epu8(v, _mm_shuffle_epi32(v, _MM_SHUFFLE(1, 1, 1, 1)));\n  v = _mm_max_epu8(v, _mm_shufflelo_epi16(v, _MM_SHUFFLE(1, 1, 1, 1)));\n  v = _mm_max_epu8(v, _mm_srli_epi16(v, 8));\n  return static_cast<uint8_t>(_mm_cvtsi128_si32(v));\n}\n#endif // __SSE4_1__\n\n#if defined(__AVX__)\nstatic inline float HorizontalMax_FP32_V256(__m256 v) {\n  __m256 x1 = _mm256_permute_ps(v, _MM_SHUFFLE(0, 0, 3, 2));\n  __m256 x2 = _mm256_max_ps(v, x1);\n  __m256 x3 = _mm256_permute_ps(x2, _MM_SHUFFLE(0, 0, 0, 1));\n  __m256 x4 = _mm256_max_ps(x2, x3);\n  __m128 x5 = _mm256_extractf128_ps(x4, 1);\n  __m128 x6 = _mm_max_ss(_mm256_castps256_ps128(x4), x5);\n  return _mm_cvtss_f32(x6);\n}\n\nstatic inline float HorizontalAdd_FP32_V256(__m256 v) {\n  __m256 x1 = _mm256_hadd_ps(v, v);\n  __m256 x2 = _mm256_hadd_ps(x1, x1);\n  __m128 x3 = _mm256_extractf128_ps(x2, 1);\n  __m128 x4 = _mm_add_ss(_mm256_castps256_ps128(x2), x3);\n  return _mm_cvtss_f32(x4);\n}\n#endif // __AVX__\n\n#if defined(__AVX2__)\n#define POPCNT_MASK1_INT8_AVX _mm256_set1_epi8(0x0f)\n#define POPCNT_MASK1_INT16_AVX  _mm256_set1_epi16(1)\n#define POPCNT_MASK2_INT16_AVX _mm256_set1_epi16(0xff)\n#define POPCNT_MASK1_INT32_AVX _mm256_set1_epi32(0xff)\n#define POPCNT_ZERO_AVX _mm256_setzero_si256()\n#define POPCNT_LOOKUP_AVX _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4)\n\nstatic inline __m256i VerticalPopCount_INT8_V256(__m256i v) {\n#if defined(__AVX512VL__) && defined(__AVX512BITALG__)\n  return _mm256_popcnt_epi8(v);\n#else\n  __m256i lo = _mm256_shuffle_epi8(POPCNT_LOOKUP_AVX,\n                                   _mm256_and_si256(v, POPCNT_MASK1_INT8_AVX));\n  __m256i hi = _mm256_shuffle_epi8(\n      POPCNT_LOOKUP_AVX,\n      _mm256_and_si256(_mm256_srli_epi32(v, 4), POPCNT_MASK1_INT8_AVX));\n  return _mm256_add_epi8(lo, hi);\n#endif // __AVX512VL__ && __AVX512BITALG__\n}\n\nstatic inline __m256i VerticalPopCount_INT16_V256(__m256i v) {\n#if defined(__AVX512VL__) && defined(__AVX512BITALG__)\n  return _mm256_popcnt_epi16(v);\n#else\n  __m256i total = VerticalPopCount_INT8_V256(v);\n  return _mm256_add_epi16(_mm256_srli_epi16(total, 8),\n                          _mm256_and_si256(total, POPCNT_MASK2_INT16_AVX));\n#endif // __AVX512VL__ && __AVX512BITALG__\n}\n\nstatic inline __m256i VerticalPopCount_INT32_V256(__m256i v) {\n#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)\n  return _mm256_popcnt_epi32(v);\n#else\n  __m256i total =\n      _mm256_madd_epi16(VerticalPopCount_INT8_V256(v), POPCNT_MASK1_INT16_AVX);\n  return _mm256_add_epi32(_mm256_srli_epi32(total, 8),\n                          _mm256_and_si256(total, POPCNT_MASK1_INT32_AVX));\n#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__\n}\n\nstatic inline __m256i VerticalPopCount_INT64_V256(__m256i v) {\n#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)\n  return _mm256_popcnt_epi64(v);\n#else\n  return _mm256_sad_epu8(VerticalPopCount_INT8_V256(v), POPCNT_ZERO_AVX);\n#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__\n}\n\nstatic inline int16_t HorizontalMax_UINT8_V256(__m256i v) {\n  v = _mm256_max_epu8(v, _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 2, 3, 2)));\n  v = _mm256_max_epu8(v, _mm256_shuffle_epi32(v, _MM_SHUFFLE(1, 1, 1, 1)));\n  v = _mm256_max_epu8(v, _mm256_shufflelo_epi16(v, _MM_SHUFFLE(1, 1, 1, 1)));\n  __m128i x =\n      _mm_max_epu8(_mm256_castsi256_si128(v), _mm256_extractf128_si256(v, 1));\n  x = _mm_max_epu8(x, _mm_srli_epi16(x, 8));\n  return static_cast<uint8_t>(_mm_cvtsi128_si32(x));\n}\n\nstatic inline int32_t HorizontalAdd_INT32_V256(__m256i v) {\n  __m256i x1 = _mm256_hadd_epi32(v, v);\n  __m256i x2 = _mm256_hadd_epi32(x1, x1);\n  __m128i x3 = _mm256_extractf128_si256(x2, 1);\n  __m128i x4 = _mm_add_epi32(_mm256_castsi256_si128(x2), x3);\n  return _mm_cvtsi128_si32(x4);\n}\n\nstatic inline int64_t HorizontalAdd_INT64_V256(__m256i v) {\n  __m256i x1 = _mm256_shuffle_epi32(v, _MM_SHUFFLE(1, 0, 3, 2));\n  __m256i x2 = _mm256_add_epi64(v, x1);\n  __m128i x3 = _mm256_extractf128_si256(x2, 1);\n  __m128i x4 = _mm_add_epi64(_mm256_extractf128_si256(x2, 0), x3);\n  return _mm_cvtsi128_si64(x4);\n}\n#endif // __AVX2__\n\n#if defined(__AVX512F__)\nstatic inline float HorizontalMax_FP32_V512(__m512 v) {\n  __m256 low = _mm512_castps512_ps256(v);\n  __m256 high =\n      _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(v), 1));\n  return HorizontalMax_FP32_V256(_mm256_max_ps(low, high));\n}\n\nstatic inline float HorizontalAdd_FP32_V512(__m512 v) {\n  __m256 low = _mm512_castps512_ps256(v);\n  __m256 high =\n      _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(v), 1));\n  return HorizontalAdd_FP32_V256(_mm256_add_ps(low, high));\n}\n#endif // __AVX512F__\n\n#if defined(__AVX512FP16__)\nstatic inline float HorizontalMax_FP16_V512(__m512h v) {\n  __m512 low = _mm512_cvtxph_ps(_mm512_castph512_ph256(v));\n  __m512 high = _mm512_cvtxph_ps(\n      _mm256_castpd_ph(_mm512_extractf64x4_pd(_mm512_castph_pd(v), 1)));\n  return HorizontalMax_FP32_V512(_mm512_max_ps(low, high));\n}\n\nstatic inline float HorizontalAdd_FP16_V512(__m512h v) {\n  __m512 low = _mm512_cvtxph_ps(_mm512_castph512_ph256(v));\n  __m512 high = _mm512_cvtxph_ps(\n      _mm256_castpd_ph(_mm512_extractf64x4_pd(_mm512_castph_pd(v), 1)));\n\n  return HorizontalAdd_FP32_V512(_mm512_add_ps(low, high));\n}\n#endif // __AVX512FP16__\n\n} // namespace ailego\n} // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <array>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"distance_utility.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n/*! Compute the Mips SphericalInjection Squared Euclidean Distance with the two\n *  vectors's InnerProduct and each squared l2-normlized value, and the e2 is\n *  1.0 / max_squared_l2_norm\n */\nstatic float inline ComputeSphericalInjection(double ip, double u2, double v2,\n                                              double e2) {\n  if (e2 == 0.0) {\n    // Implies *localized* spherical injection.\n    return static_cast<float>(2.0 - 2.0 * ip / std::max(u2, v2));\n  }\n  auto v = (1.0 - e2 * u2) * (1.0 - e2 * v2);\n  auto score = v > 0.0 ? (1.0 - e2 * ip - std::sqrt(v)) : (1.0 - e2 * ip);\n  return static_cast<float>(score * 2.0);\n}\n\n/*! Mips Squared Euclidean Distance Matrix\n */\ntemplate <typename T, size_t M, size_t N, typename = void>\nstruct MipsSquaredEuclideanDistanceMatrix;\n\n/*! Mips Squared Euclidean Distance Matrix (M=1, N=1)\n */\ntemplate <typename T>\nstruct MipsSquaredEuclideanDistanceMatrix<T, 1, 1> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             float e2, float *out) {\n    ailego_assert(p && q && dim && out);\n\n    float sum = 0.0;\n    float u2 = 0.0;\n    float v2 = 0.0;\n    for (size_t i = 0; i < dim; ++i) {\n      u2 += p[i] * p[i];\n      v2 += q[i] * q[i];\n      sum += static_cast<float>(p[i] * q[i]);\n    }\n    *out = ComputeSphericalInjection(sum, u2, v2, e2);\n  }\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             size_t m, float e2, float *out) {\n    ailego_assert(p && q && dim && out);\n\n    float sum = 0.0;\n    float u2 = 0.0;\n    float v2 = 0.0;\n    for (size_t i = 0; i < dim; ++i) {\n      u2 += p[i] * p[i];\n      v2 += q[i] * q[i];\n      sum += MathHelper::SquaredDifference(p[i], q[i]);\n    }\n\n    sum *= e2;\n    u2 *= e2;\n    v2 *= e2;\n    for (size_t i = 0; i < m; ++i) {\n      sum += (u2 - v2) * (u2 - v2);\n      u2 = u2 * u2;\n      v2 = v2 * v2;\n    }\n    *out = sum;\n  }\n};\n\ntemplate <>\nstruct MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      float e2, float *out);\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      size_t m, float e2, float *out);\n};\n\ntemplate <>\nstruct MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      float e2, float *out);\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      size_t m, float e2, float *out);\n};\n\ntemplate <>\nstruct MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      float e2, float *out);\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      size_t m, float e2, float *out);\n};\n\ntemplate <>\nstruct MipsSquaredEuclideanDistanceMatrix<float, 1, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      float e2, float *out);\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                      size_t m, float e2, float *out);\n};\n\n/*! Mips Squared Euclidean Distance Matrix (M >= 2, N >= 2)\n */\ntemplate <typename T, size_t M, size_t N>\nstruct MipsSquaredEuclideanDistanceMatrix<\n    T, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             float e2, float *out) {\n    ailego_assert(p && q && dim && out);\n    if (dim == 0) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    std::array<float, N> v2;\n    for (size_t i = 0; i < M; ++i) {\n      const ValueType p_val = p[i];\n      u2[i] = static_cast<float>(p_val * p_val);\n      float *r = out + i;\n      for (size_t j = 0; j < N; ++j) {\n        *r = static_cast<float>(p_val * q[j]);\n        r += M;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] = static_cast<float>(q[i] * q[i]);\n    }\n    p += M;\n    q += N;\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const ValueType p_val = p[i];\n        u2[i] += static_cast<float>(p_val * p_val);\n        float *r = out + i;\n        for (size_t j = 0; j < N; ++j) {\n          *r += static_cast<float>(p_val * q[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] += static_cast<float>(q[i] * q[i]);\n      }\n      p += M;\n      q += N;\n    }\n\n    // Compute the injection\n    for (size_t i = 0; i < M; ++i) {\n      float *r = out + i;\n      const float u2_val = u2[i];\n      for (size_t j = 0; j < N; ++j) {\n        *r = ComputeSphericalInjection(*r, u2_val, v2[j], e2);\n        r += M;\n      }\n    }\n  }\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             size_t m, float e2, float *out) {\n    ailego_assert(p && q && dim && out);\n    if (dim == 0) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    std::array<float, N> v2;\n    for (size_t i = 0; i < M; ++i) {\n      const ValueType p_val = p[i];\n      u2[i] = static_cast<float>(p_val * p_val);\n      float *r = out + i;\n      for (size_t j = 0; j < N; ++j) {\n        *r = MathHelper::SquaredDifference(p_val, q[j]);\n        r += M;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] = static_cast<float>(q[i] * q[i]);\n    }\n    p += M;\n    q += N;\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const ValueType p_val = p[i];\n        u2[i] += static_cast<float>(p_val * p_val);\n        float *r = out + i;\n        for (size_t j = 0; j < N; ++j) {\n          *r += MathHelper::SquaredDifference(p_val, q[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] += static_cast<float>(q[i] * q[i]);\n      }\n      p += M;\n      q += N;\n    }\n\n    // Compute the injections\n    float *r = out;\n    for (size_t i = 0; i < M; ++i) {\n      u2[i] *= e2;\n      for (size_t j = 0; j < N; ++j) {\n        (*r++) *= e2;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] *= e2;\n    }\n    for (size_t k = 0; k < m; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        r = out + i;\n        float u2_val = u2[i];\n        u2[i] = u2_val * u2_val;\n        for (size_t j = 0; j < N; ++j) {\n          *r += (u2_val - v2[j]) * (u2_val - v2[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] = v2[i] * v2[i];\n      }\n    }\n  }\n};\n\n/*! Mips Squared Euclidean Distance Matrix (N=1)\n */\ntemplate <typename T, size_t M>\nstruct MipsSquaredEuclideanDistanceMatrix<\n    T, M, 1, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             float e2, float *out) {\n    ailego_assert(p && q && dim && out);\n    const ValueType *q_end = q + dim;\n    if (q == q_end) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    ValueType q_val = *q++;\n    float v2 = static_cast<float>(q_val * q_val);\n    for (size_t i = 0; i < M; ++i) {\n      u2[i] = static_cast<float>(p[i] * p[i]);\n      out[i] = static_cast<float>(p[i] * q_val);\n    }\n    p += M;\n\n    while (q != q_end) {\n      q_val = *q++;\n      v2 += static_cast<float>(q_val * q_val);\n      for (size_t i = 0; i < M; ++i) {\n        u2[i] += static_cast<float>(p[i] * p[i]);\n        out[i] += static_cast<float>(p[i] * q_val);\n      }\n      p += M;\n    }\n\n    // Compute the injection\n    for (size_t i = 0; i < M; ++i) {\n      out[i] = ComputeSphericalInjection(out[i], u2[i], v2, e2);\n    }\n  }\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             size_t m, float e2, float *out) {\n    ailego_assert(p && q && dim && out);\n    const ValueType *q_end = q + dim;\n    if (q == q_end) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    ValueType q_val = *q++;\n    float v2 = static_cast<float>(q_val * q_val);\n    for (size_t i = 0; i < M; ++i) {\n      u2[i] = static_cast<float>(p[i] * p[i]);\n      out[i] = MathHelper::SquaredDifference(p[i], q_val);\n    }\n    p += M;\n\n    while (q != q_end) {\n      q_val = *q++;\n      v2 += static_cast<float>(q_val * q_val);\n      for (size_t i = 0; i < M; ++i) {\n        u2[i] += static_cast<float>(p[i] * p[i]);\n        out[i] += MathHelper::SquaredDifference(p[i], q_val);\n      }\n      p += M;\n    }\n\n    // Compute the injections\n    for (size_t i = 0; i < M; ++i) {\n      out[i] *= e2;\n      u2[i] *= e2;\n    }\n    v2 *= e2;\n    for (size_t k = 0; k < m; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const float u_val = u2[i];\n        u2[i] = u_val * u_val;\n        out[i] += (u_val - v2) * (u_val - v2);\n      }\n      v2 = v2 * v2;\n    }\n  }\n};\n\n/*! Mips Squared Euclidean Distance Matrix (INT8, M >=2, N >= 2)\n */\ntemplate <size_t M, size_t N>\nstruct MipsSquaredEuclideanDistanceMatrix<\n    int8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 3) && out);\n    dim >>= 2;\n    if (dim == 0) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    std::array<float, N> v2;\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = p_it[i];\n      u2[i] = Squared(p_val);\n      float *r = out + i;\n      for (size_t j = 0; j < N; ++j) {\n        *r = FusedMultiplyAdd(p_val, q_it[j]);\n        r += M;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] = Squared(q_it[i]);\n    }\n    p_it += M;\n    q_it += N;\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = p_it[i];\n        u2[i] += Squared(p_val);\n        float *r = out + i;\n        for (size_t j = 0; j < N; ++j) {\n          *r += FusedMultiplyAdd(p_val, q_it[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] += Squared(q_it[i]);\n      }\n      p_it += M;\n      q_it += N;\n    }\n\n    // Compute the injection\n    for (size_t i = 0; i < M; ++i) {\n      float *r = out + i;\n      const float u2_val = u2[i];\n      for (size_t j = 0; j < N; ++j) {\n        *r = ComputeSphericalInjection(*r, u2_val, v2[j], e2);\n        r += M;\n      }\n    }\n  }\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             size_t m, float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 3) && out);\n    dim >>= 2;\n    if (dim == 0) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    std::array<float, N> v2;\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = p_it[i];\n      u2[i] = Squared(p_val);\n      float *r = out + i;\n      for (size_t j = 0; j < N; ++j) {\n        *r = SquaredDifference(p_val, q_it[j]);\n        r += M;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] = Squared(q_it[i]);\n    }\n    p_it += M;\n    q_it += N;\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = p_it[i];\n        u2[i] += Squared(p_val);\n        float *r = out + i;\n        for (size_t j = 0; j < N; ++j) {\n          *r += SquaredDifference(p_val, q_it[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] += Squared(q_it[i]);\n      }\n      p_it += M;\n      q_it += N;\n    }\n\n    // Compute the injections\n    float *r = out;\n    for (size_t i = 0; i < M; ++i) {\n      u2[i] *= e2;\n      for (size_t j = 0; j < N; ++j) {\n        (*r++) *= e2;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] *= e2;\n    }\n    for (size_t k = 0; k < m; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        r = out + i;\n        float u2_val = u2[i];\n        u2[i] = u2_val * u2_val;\n        for (size_t j = 0; j < N; ++j) {\n          *r += (u2_val - v2[j]) * (u2_val - v2[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] = v2[i] * v2[i];\n      }\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +\n                              (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +\n                              (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +\n                              (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));\n  }\n\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +\n                              MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +\n                              MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +\n                              MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 24), (int8_t)(rhs >> 24)));\n  }\n\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t v) {\n    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +\n                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +\n                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +\n                              (int8_t)(v >> 24) * (int8_t)(v >> 24));\n  }\n};\n\n/*! Mips Squared Euclidean Distance Matrix (INT8, N=1)\n */\ntemplate <size_t M>\nstruct MipsSquaredEuclideanDistanceMatrix<\n    int8_t, M, 1, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 3) && out);\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 2);\n    if (q_it == q_end) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    uint32_t q_val = *q_it++;\n    float v2 = Squared(q_val);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = *p_it++;\n      u2[i] = Squared(p_val);\n      out[i] = FusedMultiplyAdd(p_val, q_val);\n    }\n\n    while (q_it != q_end) {\n      q_val = *q_it++;\n      v2 += Squared(q_val);\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = *p_it++;\n        u2[i] += Squared(p_val);\n        out[i] += FusedMultiplyAdd(p_val, q_val);\n      }\n    }\n\n    // Compute the injection\n    for (size_t i = 0; i < M; ++i) {\n      out[i] = ComputeSphericalInjection(out[i], u2[i], v2, e2);\n    }\n  }\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             size_t m, float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 3) && out);\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 2);\n    if (q_it == q_end) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    uint32_t q_val = *q_it++;\n    float v2 = Squared(q_val);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = *p_it++;\n      u2[i] = Squared(p_val);\n      out[i] = SquaredDifference(p_val, q_val);\n    }\n\n    while (q_it != q_end) {\n      q_val = *q_it++;\n      v2 += Squared(q_val);\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = *p_it++;\n        u2[i] += Squared(p_val);\n        out[i] += SquaredDifference(p_val, q_val);\n      }\n    }\n\n    // Compute the injections\n    for (size_t i = 0; i < M; ++i) {\n      out[i] *= e2;\n      u2[i] *= e2;\n    }\n    v2 *= e2;\n    for (size_t k = 0; k < m; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const float u_val = u2[i];\n        u2[i] = u_val * u_val;\n        out[i] += (u_val - v2) * (u_val - v2);\n      }\n      v2 = v2 * v2;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +\n                              (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +\n                              (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +\n                              (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));\n  }\n\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +\n                              MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +\n                              MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +\n                              MathHelper::SquaredDifference<int8_t, int32_t>(\n                                  (int8_t)(lhs >> 24), (int8_t)(rhs >> 24)));\n  }\n\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t v) {\n    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +\n                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +\n                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +\n                              (int8_t)(v >> 24) * (int8_t)(v >> 24));\n  }\n};\n\n/*! Mips Squared Euclidean Distance Matrix (INT4, M >=2, N >= 2)\n */\ntemplate <size_t M, size_t N>\nstruct MipsSquaredEuclideanDistanceMatrix<\n    uint8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 7) && out);\n    dim >>= 3;\n    if (dim == 0) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    std::array<float, N> v2;\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = p_it[i];\n      u2[i] = Squared(p_val);\n      float *r = out + i;\n      for (size_t j = 0; j < N; ++j) {\n        *r = FusedMultiplyAdd(p_val, q_it[j]);\n        r += M;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] = Squared(q_it[i]);\n    }\n    p_it += M;\n    q_it += N;\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = p_it[i];\n        u2[i] += Squared(p_val);\n        float *r = out + i;\n        for (size_t j = 0; j < N; ++j) {\n          *r += FusedMultiplyAdd(p_val, q_it[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] += Squared(q_it[i]);\n      }\n      p_it += M;\n      q_it += N;\n    }\n\n    // Compute the injection\n    for (size_t i = 0; i < M; ++i) {\n      float *r = out + i;\n      const float u2_val = u2[i];\n      for (size_t j = 0; j < N; ++j) {\n        *r = ComputeSphericalInjection(*r, u2_val, v2[j], e2);\n        r += M;\n      }\n    }\n  }\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             size_t m, float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 7) && out);\n    dim >>= 3;\n    if (dim == 0) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    std::array<float, N> v2;\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = p_it[i];\n      u2[i] = Squared(p_val);\n      float *r = out + i;\n      for (size_t j = 0; j < N; ++j) {\n        *r = SquaredDifference(p_val, q_it[j]);\n        r += M;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] = Squared(q_it[i]);\n    }\n    p_it += M;\n    q_it += N;\n\n    for (size_t k = 1; k < dim; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = p_it[i];\n        u2[i] += Squared(p_val);\n        float *r = out + i;\n        for (size_t j = 0; j < N; ++j) {\n          *r += SquaredDifference(p_val, q_it[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] += Squared(q_it[i]);\n      }\n      p_it += M;\n      q_it += N;\n    }\n\n    // Compute the injections\n    float *r = out;\n    for (size_t i = 0; i < M; ++i) {\n      u2[i] *= e2;\n      for (size_t j = 0; j < N; ++j) {\n        (*r++) *= e2;\n      }\n    }\n    for (size_t i = 0; i < N; ++i) {\n      v2[i] *= e2;\n    }\n    for (size_t k = 0; k < m; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        r = out + i;\n        float u2_val = u2[i];\n        u2[i] = u2_val * u2_val;\n        for (size_t j = 0; j < N; ++j) {\n          *r += (u2_val - v2[j]) * (u2_val - v2[j]);\n          r += M;\n        }\n      }\n      for (size_t i = 0; i < N; ++i) {\n        v2[i] = v2[i] * v2[i];\n      }\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t u) {\n    float sum = 0.0f;\n    for (size_t i = 0; i < 32; i += 8) {\n      uint8_t v = (uint8_t)(u >> i);\n      int8_t lo = (int8_t)(v << 4) >> 4;\n      int8_t hi = (int8_t)(v & 0xf0) >> 4;\n      sum += hi * hi + lo * lo;\n    }\n    return sum;\n  }\n};\n\n/*! Mips Squared Euclidean Distance Matrix (INT4, N=1)\n */\ntemplate <size_t M>\nstruct MipsSquaredEuclideanDistanceMatrix<\n    uint8_t, M, 1, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 7) && out);\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 3);\n    if (q_it == q_end) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    uint32_t q_val = *q_it++;\n    float v2 = Squared(q_val);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = *p_it++;\n      u2[i] = Squared(p_val);\n      out[i] = FusedMultiplyAdd(p_val, q_val);\n    }\n\n    while (q_it != q_end) {\n      q_val = *q_it++;\n      v2 += Squared(q_val);\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = *p_it++;\n        u2[i] += Squared(p_val);\n        out[i] += FusedMultiplyAdd(p_val, q_val);\n      }\n    }\n\n    // Compute the injection\n    for (size_t i = 0; i < M; ++i) {\n      out[i] = ComputeSphericalInjection(out[i], u2[i], v2, e2);\n    }\n  }\n\n  // Compute the distance between matrix and query by RepeatedQuadraticInjection\n  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,\n                             size_t m, float e2, float *out) {\n    ailego_assert(p && q && dim && !(dim & 7) && out);\n    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);\n    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);\n    const uint32_t *q_end = q_it + (dim >> 3);\n    if (q_it == q_end) {\n      return;\n    }\n\n    std::array<float, M> u2;\n    uint32_t q_val = *q_it++;\n    float v2 = Squared(q_val);\n    for (size_t i = 0; i < M; ++i) {\n      const uint32_t p_val = *p_it++;\n      u2[i] = Squared(p_val);\n      out[i] = SquaredDifference(p_val, q_val);\n    }\n\n    while (q_it != q_end) {\n      q_val = *q_it++;\n      v2 += Squared(q_val);\n      for (size_t i = 0; i < M; ++i) {\n        const uint32_t p_val = *p_it++;\n        u2[i] += Squared(p_val);\n        out[i] += SquaredDifference(p_val, q_val);\n      }\n    }\n\n    // Compute the injections\n    for (size_t i = 0; i < M; ++i) {\n      out[i] *= e2;\n      u2[i] *= e2;\n    }\n    v2 *= e2;\n    for (size_t k = 0; k < m; ++k) {\n      for (size_t i = 0; i < M; ++i) {\n        const float u_val = u2[i];\n        u2[i] = u_val * u_val;\n        out[i] += (u_val - v2) * (u_val - v2);\n      }\n      v2 = v2 * v2;\n    }\n  }\n\n protected:\n  //! Calculate Fused-Multiply-Add\n  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n\n  //! Calculate the squared difference\n  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {\n    return static_cast<float>(\n        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +\n        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);\n  }\n\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t u) {\n    float sum = 0.0f;\n    for (size_t i = 0; i < 32; i += 8) {\n      uint8_t v = (uint8_t)(u >> i);\n      int8_t lo = (int8_t)(v << 4) >> 4;\n      int8_t hi = (int8_t)(v & 0xf0) >> 4;\n      sum += hi * hi + lo * lo;\n    }\n    return sum;\n  }\n};\n\n//--------------------------------------------------\n// Sparse\n//--------------------------------------------------\n/*! Mips Squared Euclidean Sparse Distance Matrix\n */\ntemplate <typename T>\nstruct MipsSquaredEuclideanSparseDistanceMatrix {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  static float ComputeInnerProductSparseInSegment(\n      uint32_t m_sparse_count, const uint16_t *m_sparse_index,\n      const ValueType *m_sparse_value, uint32_t q_sparse_count,\n      const uint16_t *q_sparse_index, const ValueType *q_sparse_value);\n\n  // Compute the distance between matrix and query by SphericalInjection\n  static inline void Compute(const void *m_sparse_data_in,\n                             const void *q_sparse_data_in, float *out) {\n    ailego_assert(m_sparse_data_in && q_sparse_data_in && out);\n\n    const uint8_t *m_sparse_data =\n        reinterpret_cast<const uint8_t *>(m_sparse_data_in);\n    const uint8_t *q_sparse_data =\n        reinterpret_cast<const uint8_t *>(q_sparse_data_in);\n\n    const uint32_t m_sparse_count =\n        *reinterpret_cast<const uint32_t *>(m_sparse_data);\n    const uint32_t q_sparse_count =\n        *reinterpret_cast<const uint32_t *>(q_sparse_data);\n\n    if (m_sparse_count == 0 && q_sparse_count == 0) {\n      *out = 0;\n      return;\n    }\n\n    if (m_sparse_count == 0 || q_sparse_count == 0) {\n      *out = 2;\n      return;\n    }\n\n    const uint32_t m_seg_count =\n        *reinterpret_cast<const uint32_t *>(m_sparse_data + sizeof(uint32_t));\n    const uint32_t q_seg_count =\n        *reinterpret_cast<const uint32_t *>(q_sparse_data + sizeof(uint32_t));\n\n    const uint32_t *m_seg_id = reinterpret_cast<const uint32_t *>(\n        m_sparse_data + 2 * sizeof(uint32_t));\n    const uint32_t *q_seg_id = reinterpret_cast<const uint32_t *>(\n        q_sparse_data + 2 * sizeof(uint32_t));\n\n    const uint32_t *m_seg_vec_cnt = reinterpret_cast<const uint32_t *>(\n        m_sparse_data + 2 * sizeof(uint32_t) + m_seg_count * sizeof(uint32_t));\n    const uint32_t *q_seg_vec_cnt = reinterpret_cast<const uint32_t *>(\n        q_sparse_data + 2 * sizeof(uint32_t) + q_seg_count * sizeof(uint32_t));\n\n    const uint16_t *m_sparse_index = reinterpret_cast<const uint16_t *>(\n        m_sparse_data + 2 * sizeof(uint32_t) +\n        m_seg_count * 2 * sizeof(uint32_t));\n    const uint16_t *q_sparse_index = reinterpret_cast<const uint16_t *>(\n        q_sparse_data + 2 * sizeof(uint32_t) +\n        q_seg_count * 2 * sizeof(uint32_t));\n\n    const ValueType *m_sparse_value = reinterpret_cast<const ValueType *>(\n        m_sparse_data + 2 * sizeof(uint32_t) +\n        m_seg_count * 2 * sizeof(uint32_t) + m_sparse_count * sizeof(uint16_t));\n    const ValueType *q_sparse_value = reinterpret_cast<const ValueType *>(\n        q_sparse_data + 2 * sizeof(uint32_t) +\n        q_seg_count * 2 * sizeof(uint32_t) + q_sparse_count * sizeof(uint16_t));\n\n    float ip = 0.0f;\n\n    size_t m_s = 0;\n    size_t q_s = 0;\n\n    size_t m_count = 0;\n    size_t q_count = 0;\n\n    while (m_s < m_seg_count && q_s < q_seg_count) {\n      if (m_seg_id[m_s] == q_seg_id[q_s]) {\n        ip += ComputeInnerProductSparseInSegment(\n            m_seg_vec_cnt[m_s], m_sparse_index + m_count,\n            m_sparse_value + m_count, q_seg_vec_cnt[q_s],\n            q_sparse_index + q_count, q_sparse_value + q_count);\n\n        m_count += m_seg_vec_cnt[m_s];\n        q_count += q_seg_vec_cnt[q_s];\n\n        ++m_s;\n        ++q_s;\n      } else if (m_seg_id[m_s] < q_seg_id[q_s]) {\n        m_count += m_seg_vec_cnt[m_s];\n\n        ++m_s;\n      } else {\n        q_count += q_seg_vec_cnt[q_s];\n\n        ++q_s;\n      }\n    }\n\n    float l2_m{0.0f};\n    SquaredNorm2Matrix<ValueType, 1>::Compute(m_sparse_value, m_sparse_count,\n                                              &l2_m);\n\n    float l2_q{0.0f};\n    SquaredNorm2Matrix<ValueType, 1>::Compute(q_sparse_value, q_sparse_count,\n                                              &l2_q);\n\n    *out = ComputeSphericalInjection(ip, l2_m, l2_q, 0.0f);\n  }\n};\n\ntemplate <typename T>\nfloat MipsSquaredEuclideanSparseDistanceMatrix<\n    T>::ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,\n                                           const uint16_t *m_sparse_index,\n                                           const ValueType *m_sparse_value,\n                                           uint32_t q_sparse_count,\n                                           const uint16_t *q_sparse_index,\n                                           const ValueType *q_sparse_value) {\n  float sum = 0.0f;\n\n  size_t m_i = 0;\n  size_t q_i = 0;\n  while (m_i < m_sparse_count && q_i < q_sparse_count) {\n    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {\n      sum += m_sparse_value[m_i] * q_sparse_value[q_i];\n\n      ++m_i;\n      ++q_i;\n    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {\n      ++m_i;\n    } else {\n      ++q_i;\n    }\n  }\n\n  return sum;\n}\n\ntemplate <>\nfloat MipsSquaredEuclideanSparseDistanceMatrix<\n    float>::ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,\n                                               const uint16_t *m_sparse_index,\n                                               const ValueType *m_sparse_value,\n                                               uint32_t q_sparse_count,\n                                               const uint16_t *q_sparse_index,\n                                               const ValueType *q_sparse_value);\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp16_avx.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX__) && defined(__F16C__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp16AVX(const Float16 *lhs, const Float16 *rhs,\n                                        size_t size, float *sql, float *sqr) {\n  __m256 ymm_sum_0 = _mm256_setzero_ps();\n  __m256 ymm_sum_1 = _mm256_setzero_ps();\n  __m256 ymm_sum_norm1 = _mm256_setzero_ps();\n  __m256 ymm_sum_norm2 = _mm256_setzero_ps();\n\n  const Float16 *last = lhs + size;\n  const Float16 *last_aligned = lhs + ((size >> 4) << 4);\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);\n      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_lhs));\n      __m256 ymm_lhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_lhs, 1));\n      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_rhs));\n      __m256 ymm_rhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_rhs, 1));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);\n    }\n    if (last >= last_aligned + 8) {\n      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)lhs));\n      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)rhs));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      lhs += 8;\n      rhs += 8;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);\n      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_lhs));\n      __m256 ymm_lhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_lhs, 1));\n      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_rhs));\n      __m256 ymm_rhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_rhs, 1));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);\n    }\n    if (last >= last_aligned + 8) {\n      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)lhs));\n      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)rhs));\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      lhs += 8;\n      rhs += 8;\n    }\n  }\n\n  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));\n  float norm1 = HorizontalAdd_FP32_V256(ymm_sum_norm1);\n  float norm2 = HorizontalAdd_FP32_V256(ymm_sum_norm2);\n  switch (last - lhs) {\n    case 7:\n      FMA_FP16_GENERAL(lhs[6], rhs[6], result, norm1, norm2);\n      /* FALLTHRU */\n    case 6:\n      FMA_FP16_GENERAL(lhs[5], rhs[5], result, norm1, norm2);\n      /* FALLTHRU */\n    case 5:\n      FMA_FP16_GENERAL(lhs[4], rhs[4], result, norm1, norm2);\n      /* FALLTHRU */\n    case 4:\n      FMA_FP16_GENERAL(lhs[3], rhs[3], result, norm1, norm2);\n      /* FALLTHRU */\n    case 3:\n      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);\n      /* FALLTHRU */\n    case 2:\n      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);\n      /* FALLTHRU */\n    case 1:\n      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);\n  }\n\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionFp16AVX(const Float16 *lhs,\n                                                     const Float16 *rhs,\n                                                     size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp16AVX(lhs, rhs, size, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX(\n    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp16AVX(lhs, rhs, size, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n\n#endif  // __AVX__ && __F16C__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp16_avx512.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX512F__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp16AVX512(const Float16 *lhs,\n                                           const Float16 *rhs, size_t size,\n                                           float *sql, float *sqr) {\n  __m512 zmm_sum_0 = _mm512_setzero_ps();\n  __m512 zmm_sum_1 = _mm512_setzero_ps();\n  __m512 zmm_sum_norm1 = _mm512_setzero_ps();\n  __m512 zmm_sum_norm2 = _mm512_setzero_ps();\n\n  const Float16 *last = lhs + size;\n  const Float16 *last_aligned = lhs + ((size >> 5) << 5);\n  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m512i zmm_lhs = _mm512_load_si512((const __m512i *)lhs);\n      __m512i zmm_rhs = _mm512_load_si512((const __m512i *)rhs);\n      __m512 zmm_lhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_lhs));\n      __m512 zmm_lhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_lhs, 1));\n      __m512 zmm_rhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_rhs));\n      __m512 zmm_rhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_rhs, 1));\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)\n    }\n    if (last >= last_aligned + 16) {\n      __m512 zmm_lhs_0 =\n          _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)lhs));\n      __m512 zmm_rhs_0 =\n          _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)rhs));\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m512i zmm_lhs = _mm512_loadu_si512((const __m512i *)lhs);\n      __m512i zmm_rhs = _mm512_loadu_si512((const __m512i *)rhs);\n      __m512 zmm_lhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_lhs));\n      __m512 zmm_lhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_lhs, 1));\n      __m512 zmm_rhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_rhs));\n      __m512 zmm_rhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_rhs, 1));\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)\n    }\n    if (last >= last_aligned + 16) {\n      __m512 zmm_lhs_0 =\n          _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)lhs));\n      __m512 zmm_rhs_0 =\n          _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)rhs));\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n\n  __m256 ymm_sum_0 =\n      HorizontalAdd_FP32_V512_TO_V256(_mm512_add_ps(zmm_sum_0, zmm_sum_1));\n  __m256 ymm_sum_norm1 = HorizontalAdd_FP32_V512_TO_V256(zmm_sum_norm1);\n  __m256 ymm_sum_norm2 = HorizontalAdd_FP32_V512_TO_V256(zmm_sum_norm2);\n  if (last >= lhs + 8) {\n    __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)lhs));\n    __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)rhs));\n    ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n    ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n    ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n    lhs += 8;\n    rhs += 8;\n  }\n\n  float result = HorizontalAdd_FP32_V256(ymm_sum_0);\n  float norm1 = HorizontalAdd_FP32_V256(ymm_sum_norm1);\n  float norm2 = HorizontalAdd_FP32_V256(ymm_sum_norm2);\n  switch (last - lhs) {\n    case 7:\n      FMA_FP16_GENERAL(lhs[6], rhs[6], result, norm1, norm2);\n      /* FALLTHRU */\n    case 6:\n      FMA_FP16_GENERAL(lhs[5], rhs[5], result, norm1, norm2);\n      /* FALLTHRU */\n    case 5:\n      FMA_FP16_GENERAL(lhs[4], rhs[4], result, norm1, norm2);\n      /* FALLTHRU */\n    case 4:\n      FMA_FP16_GENERAL(lhs[3], rhs[3], result, norm1, norm2);\n      /* FALLTHRU */\n    case 3:\n      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);\n      /* FALLTHRU */\n    case 2:\n      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);\n      /* FALLTHRU */\n    case 1:\n      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);\n  }\n\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionFp16AVX512(const Float16 *lhs,\n                                                        const Float16 *rhs,\n                                                        size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp16AVX512(lhs, rhs, size, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX512(\n    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp16AVX512(lhs, rhs, size, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n\n#endif  // __AVX512F__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(\n    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp16NEON(const Float16 *lhs,\n                                                      const Float16 *rhs,\n                                                      size_t size, float e2);\n#endif\n\n#if defined(__AVX512F__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX512(\n    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp16AVX512(const Float16 *lhs,\n                                                        const Float16 *rhs,\n                                                        size_t size, float e2);\n#endif\n\n#if defined(__AVX__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX(\n    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp16AVX(const Float16 *lhs,\n                                                     const Float16 *rhs,\n                                                     size_t size, float e2);\n#endif\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16Scalar(\n    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp16Scalar(\n    const ailego::Float16 *p, const ailego::Float16 *q, size_t dim, float e2);\n\n\n//! Compute the distance between matrix and query by SphericalInjection\nvoid MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {\n#if defined(__ARM_NEON)\n  *out = MipsEuclideanDistanceSphericalInjectionFp16NEON(p, q, dim, e2);\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = MipsEuclideanDistanceSphericalInjectionFp16AVX512(p, q, dim, e2);\n    return;\n  }\n#endif\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = MipsEuclideanDistanceSphericalInjectionFp16AVX(p, q, dim, e2);\n    return;\n  }\n#endif  //__AVX__\n  *out = MipsEuclideanDistanceSphericalInjectionFp16Scalar(p, q, dim, e2);\n  return;\n#endif  //__ARM_NEON\n}\n\n//! Compute the distance between matrix and query by RepeatedQuadraticInjection\nvoid MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,\n    float *out) {\n#if defined(__ARM_NEON)\n  *out =\n      MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(p, q, dim, m, e2);\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX512(p, q, dim,\n                                                                     m, e2);\n    return;\n  }\n#endif\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX(p, q, dim, m,\n                                                                  e2);\n    return;\n  }\n#endif  //__AVX__\n  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp16Scalar(p, q, dim, m,\n                                                                   e2);\n  return;\n#endif  //__ARM_NEON\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp16_neon.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp16.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON) && defined(__aarch64__)\n#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp16NEON(const Float16 *lhs, const Float16 *rhs,\n                                         size_t size, float *sql, float *sqr) {\n  const Float16 *last = lhs + size;\n  const Float16 *last_aligned = lhs + ((size >> 3) << 3);\n  float16x8_t v_sum = vdupq_n_f16(0);\n  float16x8_t v_sum_norm1 = vdupq_n_f16(0);\n  float16x8_t v_sum_norm2 = vdupq_n_f16(0);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    float16x8_t v_lhs = vld1q_f16((const float16_t *)lhs);\n    float16x8_t v_rhs = vld1q_f16((const float16_t *)rhs);\n    v_sum = vfmaq_f16(v_sum, v_lhs, v_rhs);\n    v_sum_norm1 = vfmaq_f16(v_sum_norm1, v_lhs, v_lhs);\n    v_sum_norm2 = vfmaq_f16(v_sum_norm2, v_rhs, v_rhs);\n  }\n  if (last >= last_aligned + 4) {\n    float16x8_t v_lhs = vcombine_f16(vld1_f16((const float16_t *)lhs),\n                                     vreinterpret_f16_u64(vdup_n_u64(0ul)));\n    float16x8_t v_rhs = vcombine_f16(vld1_f16((const float16_t *)rhs),\n                                     vreinterpret_f16_u64(vdup_n_u64(0ul)));\n    v_sum = vfmaq_f16(v_sum, v_lhs, v_rhs);\n    v_sum_norm1 = vfmaq_f16(v_sum_norm1, v_lhs, v_lhs);\n    v_sum_norm2 = vfmaq_f16(v_sum_norm2, v_rhs, v_rhs);\n    lhs += 4;\n    rhs += 4;\n  }\n\n  float result = HorizontalAdd_FP16_NEON(v_sum);\n  float norm1 = HorizontalAdd_FP16_NEON(v_sum_norm1);\n  float norm2 = HorizontalAdd_FP16_NEON(v_sum_norm2);\n\n  switch (last - lhs) {\n    case 3:\n      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);\n      /* FALLTHRU */\n    case 2:\n      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);\n      /* FALLTHRU */\n    case 1:\n      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n#else\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp16NEON(const Float16 *lhs, const Float16 *rhs,\n                                         size_t size, float *sql, float *sqr) {\n  const Float16 *last = lhs + size;\n  const Float16 *last_aligned = lhs + ((size >> 3) << 3);\n  float32x4_t v_sum_0 = vdupq_n_f32(0);\n  float32x4_t v_sum_1 = vdupq_n_f32(0);\n  float32x4_t v_sum_norm1 = vdupq_n_f32(0);\n  float32x4_t v_sum_norm2 = vdupq_n_f32(0);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    float16x8_t v_lhs = vld1q_f16((const float16_t *)lhs);\n    float16x8_t v_rhs = vld1q_f16((const float16_t *)rhs);\n    float32x4_t v_lhs_0 = vcvt_f32_f16(vget_low_f16(v_lhs));\n    float32x4_t v_rhs_0 = vcvt_f32_f16(vget_low_f16(v_rhs));\n    float32x4_t v_lhs_1 = vcvt_high_f32_f16(v_lhs);\n    float32x4_t v_rhs_1 = vcvt_high_f32_f16(v_rhs);\n    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);\n    v_sum_1 = vfmaq_f32(v_sum_1, v_lhs_1, v_rhs_1);\n    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);\n    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_1, v_lhs_1);\n    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);\n    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_1, v_rhs_1);\n  }\n  if (last >= last_aligned + 4) {\n    float32x4_t v_lhs_0 = vcvt_f32_f16(vld1_f16((const float16_t *)lhs));\n    float32x4_t v_rhs_0 = vcvt_f32_f16(vld1_f16((const float16_t *)rhs));\n    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);\n    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);\n    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);\n    lhs += 4;\n    rhs += 4;\n  }\n\n  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));\n  float norm1 = vaddvq_f32(v_sum_norm1);\n  float norm2 = vaddvq_f32(v_sum_norm2);\n  switch (last - lhs) {\n    case 3:\n      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);\n      /* FALLTHRU */\n    case 2:\n      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);\n      /* FALLTHRU */\n    case 1:\n      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\n#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC\n\nfloat MipsEuclideanDistanceSphericalInjectionFp16NEON(const Float16 *lhs,\n                                                      const Float16 *rhs,\n                                                      size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp16NEON(lhs, rhs, size, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(\n    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp16NEON(lhs, rhs, size, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n#endif  // __ARM_NEON && __aarch64__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp32_avx.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE__)\nfloat InnerProductAndSquaredNormFp32SSE(const float *lhs, const float *rhs,\n                                        size_t size, float *sql, float *sqr);\n#endif\n\n#if defined(__AVX__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp32AVX(const float *lhs, const float *rhs,\n                                        size_t size, float *sql, float *sqr) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 4) << 4);\n\n  __m256 ymm_sum_0 = _mm256_setzero_ps();\n  __m256 ymm_sum_1 = _mm256_setzero_ps();\n  __m256 ymm_sum_norm1 = _mm256_setzero_ps();\n  __m256 ymm_sum_norm2 = _mm256_setzero_ps();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256 ymm_lhs_0 = _mm256_load_ps(lhs + 0);\n      __m256 ymm_lhs_1 = _mm256_load_ps(lhs + 8);\n      __m256 ymm_rhs_0 = _mm256_load_ps(rhs + 0);\n      __m256 ymm_rhs_1 = _mm256_load_ps(rhs + 8);\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 8) {\n      __m256 ymm_lhs_0 = _mm256_load_ps(lhs);\n      __m256 ymm_rhs_0 = _mm256_load_ps(rhs);\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      lhs += 8;\n      rhs += 8;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m256 ymm_lhs_0 = _mm256_loadu_ps(lhs + 0);\n      __m256 ymm_lhs_1 = _mm256_loadu_ps(lhs + 8);\n      __m256 ymm_rhs_0 = _mm256_loadu_ps(rhs + 0);\n      __m256 ymm_rhs_1 = _mm256_loadu_ps(rhs + 8);\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 8) {\n      __m256 ymm_lhs_0 = _mm256_loadu_ps(lhs);\n      __m256 ymm_rhs_0 = _mm256_loadu_ps(rhs);\n      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      lhs += 8;\n      rhs += 8;\n    }\n  }\n  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));\n  float norm1 = HorizontalAdd_FP32_V256(ymm_sum_norm1);\n  float norm2 = HorizontalAdd_FP32_V256(ymm_sum_norm2);\n\n  switch (last - lhs) {\n    case 7:\n      FMA_FP32_GENERAL(lhs[6], rhs[6], result, norm1, norm2)\n      /* FALLTHRU */\n    case 6:\n      FMA_FP32_GENERAL(lhs[5], rhs[5], result, norm1, norm2)\n      /* FALLTHRU */\n    case 5:\n      FMA_FP32_GENERAL(lhs[4], rhs[4], result, norm1, norm2)\n      /* FALLTHRU */\n    case 4:\n      FMA_FP32_GENERAL(lhs[3], rhs[3], result, norm1, norm2)\n      /* FALLTHRU */\n    case 3:\n      FMA_FP32_GENERAL(lhs[2], rhs[2], result, norm1, norm2)\n      /* FALLTHRU */\n    case 2:\n      FMA_FP32_GENERAL(lhs[1], rhs[1], result, norm1, norm2)\n      /* FALLTHRU */\n    case 1:\n      FMA_FP32_GENERAL(lhs[0], rhs[0], result, norm1, norm2)\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionFp32AVX(const float *lhs,\n                                                     const float *rhs,\n                                                     size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  if (size > 7) {\n    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);\n  } else {\n    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);\n  }\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX(\n    const float *lhs, const float *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  if (size > 7) {\n    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);\n  } else {\n    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);\n  }\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n#endif  // __AVX__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp32_avx512.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE__)\nfloat InnerProductAndSquaredNormFp32SSE(const float *lhs, const float *rhs,\n                                        size_t size, float *sql, float *sqr);\n#endif\n\n#if defined(__AVX__)\nfloat InnerProductAndSquaredNormFp32AVX(const float *lhs, const float *rhs,\n                                        size_t size, float *sql, float *sqr);\n#endif\n\n#if defined(__AVX512F__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp32AVX512(const float *lhs, const float *rhs,\n                                           size_t size, float *sql,\n                                           float *sqr) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 5) << 5);\n\n  __m512 zmm_sum_0 = _mm512_setzero_ps();\n  __m512 zmm_sum_1 = _mm512_setzero_ps();\n  __m512 zmm_sum_norm1 = _mm512_setzero_ps();\n  __m512 zmm_sum_norm2 = _mm512_setzero_ps();\n\n  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m512 zmm_lhs_0 = _mm512_load_ps(lhs + 0);\n      __m512 zmm_lhs_1 = _mm512_load_ps(lhs + 16);\n      __m512 zmm_rhs_0 = _mm512_load_ps(rhs + 0);\n      __m512 zmm_rhs_1 = _mm512_load_ps(rhs + 16);\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)\n    }\n\n    if (last >= last_aligned + 16) {\n      __m512 zmm_lhs_0 = _mm512_load_ps(lhs);\n      __m512 zmm_rhs_0 = _mm512_load_ps(rhs);\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m512 zmm_lhs_0 = _mm512_loadu_ps(lhs + 0);\n      __m512 zmm_lhs_1 = _mm512_loadu_ps(lhs + 16);\n      __m512 zmm_rhs_0 = _mm512_loadu_ps(rhs + 0);\n      __m512 zmm_rhs_1 = _mm512_loadu_ps(rhs + 16);\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)\n    }\n\n    if (last >= last_aligned + 16) {\n      __m512 zmm_lhs_0 = _mm512_loadu_ps(lhs);\n      __m512 zmm_rhs_0 = _mm512_loadu_ps(rhs);\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)\n      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)\n      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n\n  zmm_sum_0 = _mm512_add_ps(zmm_sum_0, zmm_sum_1);\n  if (lhs != last) {\n    __mmask16 mask = (__mmask16)((1 << (last - lhs)) - 1);\n    __m512 zmm_undefined = _mm512_undefined_ps();\n    __m512 zmm_lhs_0 = _mm512_mask_loadu_ps(zmm_undefined, mask, lhs);\n    __m512 zmm_rhs_0 = _mm512_mask_loadu_ps(zmm_undefined, mask, rhs);\n    FMA_MASK_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0, mask);\n    FMA_MASK_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1, mask);\n    FMA_MASK_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2, mask);\n  }\n\n  *sql = HorizontalAdd_FP32_V512(zmm_sum_norm1);\n  *sqr = HorizontalAdd_FP32_V512(zmm_sum_norm2);\n  return HorizontalAdd_FP32_V512(zmm_sum_0);\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionFp32AVX512(const float *lhs,\n                                                        const float *rhs,\n                                                        size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  if (size > 15) {\n    sum = InnerProductAndSquaredNormFp32AVX512(lhs, rhs, size, &u2, &v2);\n  } else if (size > 7) {\n    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);\n  } else {\n    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);\n  }\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX512(\n    const float *lhs, const float *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  if (size > 15) {\n    sum = InnerProductAndSquaredNormFp32AVX512(lhs, rhs, size, &u2, &v2);\n  } else if (size > 7) {\n    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);\n  } else {\n    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);\n  }\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n#endif  // __AVX512F__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\nfloat InnerProductAndSquaredNormFp32NEON(const float *lhs, const float *rhs,\n                                         size_t size, float *sql, float *sqr);\n#endif\n\n#if defined(__AVX512F__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX512(\n    const float *lhs, const float *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp32AVX512(const float *lhs,\n                                                        const float *rhs,\n                                                        size_t size, float e2);\n#endif\n\n#if defined(__AVX__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX(\n    const float *lhs, const float *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp32AVX(const float *lhs,\n                                                     const float *rhs,\n                                                     size_t size, float e2);\n#endif\n\n#if defined(__SSE__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32SSE(\n    const float *lhs, const float *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp32SSE(const float *lhs,\n                                                     const float *rhs,\n                                                     size_t size, float e2);\n#endif\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32Scalar(\n    const float *p, const float *q, size_t dim, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionFp32Scalar(const float *p,\n                                                        const float *q,\n                                                        size_t dim, float e2);\n\nfloat MipsInnerProductSparseInSegment(uint32_t m_sparse_count,\n                                      const uint16_t *m_sparse_index,\n                                      const float *m_sparse_value,\n                                      uint32_t q_sparse_count,\n                                      const uint16_t *q_sparse_index,\n                                      const float *q_sparse_value);\n\n//! Compute the distance between matrix and query by SphericalInjection\nvoid MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {\n#if __ARM_NEON\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum = InnerProductAndSquaredNormFp32NEON(p, q, dim, &u2, &v2);\n\n  *out = ComputeSphericalInjection(sum, u2, v2, e2);\n  return;\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = MipsEuclideanDistanceSphericalInjectionFp32AVX512(p, q, dim, e2);\n    return;\n  }\n#endif  //__AVX512F__\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = MipsEuclideanDistanceSphericalInjectionFp32AVX(p, q, dim, e2);\n    return;\n  }\n#endif  // __AVX__\n#if defined(__SSE__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {\n    *out = MipsEuclideanDistanceSphericalInjectionFp32SSE(p, q, dim, e2);\n    return;\n  }\n#endif  // __SSE__\n  *out = MipsEuclideanDistanceSphericalInjectionFp32Scalar(p, q, dim, e2);\n  return;\n#endif  //__ARM_NEON\n}\n\n//! Compute the distance between matrix and query by RepeatedQuadraticInjection\nvoid MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,\n    float *out) {\n#if defined(__ARM_NEON)\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum = InnerProductAndSquaredNormFp32NEON(p, q, dim, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n  *out = sum;\n  return;\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX512(p, q, dim,\n                                                                     m, e2);\n    return;\n  }\n#endif  //__AVX512F__\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX(p, q, dim, m,\n                                                                  e2);\n    return;\n  }\n#endif  // __AVX__\n\n#if defined(__SSE__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32SSE(p, q, dim, m,\n                                                                  e2);\n    return;\n  }\n#endif  //__SSE__\n  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32Scalar(p, q, dim, m,\n                                                                   e2);\n\n  return;\n#endif  //__ARM_NEON\n}\n\n// Sparse\n#if defined(__SSE4_1__)\nfloat MipsInnerProductSparseInSegmentSSE(uint32_t m_sparse_count,\n                                         const uint16_t *m_sparse_index,\n                                         const float *m_sparse_value,\n                                         uint32_t q_sparse_count,\n                                         const uint16_t *q_sparse_index,\n                                         const float *q_sparse_value);\n#endif\n\ntemplate <>\nfloat MipsSquaredEuclideanSparseDistanceMatrix<float>::\n    ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,\n                                       const uint16_t *m_sparse_index,\n                                       const ValueType *m_sparse_value,\n                                       uint32_t q_sparse_count,\n                                       const uint16_t *q_sparse_index,\n                                       const ValueType *q_sparse_value) {\n#if defined(__SSE4_1__)\n  return MipsInnerProductSparseInSegmentSSE(m_sparse_count, m_sparse_index,\n                                            m_sparse_value, q_sparse_count,\n                                            q_sparse_index, q_sparse_value);\n#else\n  return MipsInnerProductSparseInSegment(m_sparse_count, m_sparse_index,\n                                         m_sparse_value, q_sparse_count,\n                                         q_sparse_index, q_sparse_value);\n#endif\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp32_neon.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__ARM_NEON)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp32NEON(const float *lhs, const float *rhs,\n                                         size_t size, float *sql, float *sqr) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 3) << 3);\n\n  float32x4_t v_sum_0 = vdupq_n_f32(0);\n  float32x4_t v_sum_1 = vdupq_n_f32(0);\n  float32x4_t v_sum_norm1 = vdupq_n_f32(0);\n  float32x4_t v_sum_norm2 = vdupq_n_f32(0);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    float32x4_t v_lhs_0 = vld1q_f32(lhs + 0);\n    float32x4_t v_lhs_1 = vld1q_f32(lhs + 4);\n    float32x4_t v_rhs_0 = vld1q_f32(rhs + 0);\n    float32x4_t v_rhs_1 = vld1q_f32(rhs + 4);\n    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);\n    v_sum_1 = vfmaq_f32(v_sum_1, v_lhs_1, v_rhs_1);\n    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);\n    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_1, v_lhs_1);\n    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);\n    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_1, v_rhs_1);\n  }\n  if (last >= last_aligned + 4) {\n    float32x4_t v_lhs_0 = vld1q_f32(lhs);\n    float32x4_t v_rhs_0 = vld1q_f32(rhs);\n    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);\n    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);\n    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);\n    lhs += 4;\n    rhs += 4;\n  }\n\n  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));\n  float norm1 = vaddvq_f32(v_sum_norm1);\n  float norm2 = vaddvq_f32(v_sum_norm2);\n  switch (last - lhs) {\n    case 3:\n      FMA_FP32_GENERAL(lhs[2], rhs[2], result, norm1, norm2)\n      /* FALLTHRU */\n    case 2:\n      FMA_FP32_GENERAL(lhs[1], rhs[1], result, norm1, norm2)\n      /* FALLTHRU */\n    case 1:\n      FMA_FP32_GENERAL(lhs[0], rhs[0], result, norm1, norm2)\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\n#endif  //__ARM_NEON\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_fp32_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_fp32.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormFp32SSE(const float *lhs, const float *rhs,\n                                        size_t size, float *sql, float *sqr) {\n  const float *last = lhs + size;\n  const float *last_aligned = lhs + ((size >> 3) << 3);\n\n  __m128 xmm_sum = _mm_setzero_ps();\n  __m128 xmm_sum_norm1 = _mm_setzero_ps();\n  __m128 xmm_sum_norm2 = _mm_setzero_ps();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m128 xmm_lhs_0 = _mm_load_ps(lhs + 0);\n      __m128 xmm_lhs_1 = _mm_load_ps(lhs + 4);\n      __m128 xmm_rhs_0 = _mm_load_ps(rhs + 0);\n      __m128 xmm_rhs_1 = _mm_load_ps(rhs + 4);\n      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);\n      xmm_sum = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum);\n      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);\n      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);\n      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);\n      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 4) {\n      __m128 xmm_lhs_0 = _mm_load_ps(lhs);\n      __m128 xmm_rhs_0 = _mm_load_ps(rhs);\n      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);\n      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);\n      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);\n      lhs += 4;\n      rhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m128 xmm_lhs_0 = _mm_loadu_ps(lhs + 0);\n      __m128 xmm_lhs_1 = _mm_loadu_ps(lhs + 4);\n      __m128 xmm_rhs_0 = _mm_loadu_ps(rhs + 0);\n      __m128 xmm_rhs_1 = _mm_loadu_ps(rhs + 4);\n      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);\n      xmm_sum = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum);\n      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);\n      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);\n      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);\n      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 4) {\n      __m128 xmm_lhs_0 = _mm_loadu_ps(lhs);\n      __m128 xmm_rhs_0 = _mm_loadu_ps(rhs);\n      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);\n      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);\n      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);\n      lhs += 4;\n      rhs += 4;\n    }\n  }\n  float result = HorizontalAdd_FP32_V128(xmm_sum);\n  float norm1 = HorizontalAdd_FP32_V128(xmm_sum_norm1);\n  float norm2 = HorizontalAdd_FP32_V128(xmm_sum_norm2);\n\n  switch (last - lhs) {\n    case 3:\n      FMA_FP32_GENERAL(lhs[2], rhs[2], result, norm1, norm2)\n      /* FALLTHRU */\n    case 2:\n      FMA_FP32_GENERAL(lhs[1], rhs[1], result, norm1, norm2)\n      /* FALLTHRU */\n    case 1:\n      FMA_FP32_GENERAL(lhs[0], rhs[0], result, norm1, norm2)\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionFp32SSE(const float *lhs,\n                                                     const float *rhs,\n                                                     size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32SSE(\n    const float *lhs, const float *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n\n#endif  // __SSE__\n\n// #if 1\n#if defined(__SSE4_1__)\nconst static __m128i SHUFFLE_MASK16[16] = {\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, -127, -127, -127, -127),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4, 3,\n                 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,\n                 -127, -127, 15, 14, 13, 12),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 3, 2, 1, 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 7, 6, 5, 4),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,\n                 11, 10, 9, 8),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2, 1,\n                 0),\n    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,\n                 4),\n    _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),\n};\n\nconstexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;\n\nfloat MipsInnerProductSparseInSegmentSSE(uint32_t m_sparse_count,\n                                         const uint16_t *m_sparse_index,\n                                         const float *m_sparse_value,\n                                         uint32_t q_sparse_count,\n                                         const uint16_t *q_sparse_index,\n                                         const float *q_sparse_value) {\n  float sum = 0.0f;\n\n  // size_t alloc_size = 0;\n\n  size_t i1 = 0, i2 = 0;\n  size_t end1 = m_sparse_count / 8 * 8;\n  size_t end2 = q_sparse_count / 8 * 8;\n\n  // std::vector<float> mem1;\n  // std::vector<float> mem2;\n\n  float fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];\n  float fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];\n\n  float *val_start_1 = fixed_buffer_1;\n  float *val_start_2 = fixed_buffer_2;\n\n  // uint32_t max_count = std::max(m_sparse_count, q_sparse_count);\n\n  // if (MAX_SPARSE_BUFFER_LENGTH < max_count) {\n  //   mem1.reserve(max_count);\n  //   mem2.reserve(max_count);\n\n  //   val_start_1 = mem1.data();\n  //   val_start_2 = mem2.data();\n  // }\n\n  float *val_1 = val_start_1;\n  float *val_2 = val_start_2;\n\n  if (i1 < end1 && i2 < end2) {\n    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {\n      i1 += 8;\n      if (i1 >= end1) goto do_scalar;\n    }\n\n    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {\n      i2 += 8;\n      if (i2 >= end2) goto do_scalar;\n    }\n\n    __m128i mm_index_m =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n    __m128i mm_index_q =\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n\n    while (true) {\n#ifdef DEBUG_PRINT\n      std::cout << \"index 1: \" << std::endl;\n      print_data16(&mm_index_m);\n\n      std::cout << \"index 2: \" << std::endl;\n      print_data16(&mm_index_q);\n#endif\n\n      __m128i mm_cmp_res =\n          _mm_cmpistrm(mm_index_q, mm_index_m,\n                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n\n#ifdef DEBUG_PRINT\n      std::cout << \"cmp res: \" << std::endl;\n      print_data16(&mm_cmp_res);\n#endif\n\n      int r = _mm_extract_epi32(mm_cmp_res, 0);\n\n      if (r) {\n        int r1 = r & 15;\n\n        __m128i v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));\n        __m128 vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));\n\n        _mm_storeu_ps(val_1, vs);\n        val_1 += _mm_popcnt_u32(r1);\n\n        int r2 = (r >> 4) & 15;\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_value[i1 + 4]));\n        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));\n        _mm_storeu_ps(val_1, vs);\n        val_1 += _mm_popcnt_u32(r2);\n\n        mm_cmp_res = _mm_cmpistrm(\n            mm_index_m, mm_index_q,\n            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);\n        r = _mm_extract_epi32(mm_cmp_res, 0);\n\n        r1 = r & 15;\n\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));\n        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));\n        _mm_storeu_ps(val_2, vs);\n        val_2 += _mm_popcnt_u32(r1);\n\n        r2 = (r >> 4) & 15;\n        v = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_value[i2 + 4]));\n        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));\n        _mm_storeu_ps(val_2, vs);\n        val_2 += _mm_popcnt_u32(r2);\n      }\n\n      const uint16_t id1_max = m_sparse_index[i1 + 7];\n\n      if (id1_max <= q_sparse_index[i2 + 7]) {\n        i1 += 8;\n        if (i1 >= end1) goto do_scalar;\n        mm_index_m = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));\n      }\n\n      if (id1_max >= q_sparse_index[i2 + 7]) {\n        i2 += 8;\n        if (i2 >= end2) goto do_scalar;\n        mm_index_q = _mm_loadu_si128(\n            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));\n      }\n    }\n  }\n\ndo_scalar:\n  while (i1 < m_sparse_count && i2 < q_sparse_count) {\n    if (m_sparse_index[i1] == q_sparse_index[i2]) {\n      *val_1++ = m_sparse_value[i1];\n      *val_2++ = q_sparse_value[i2];\n\n      ++i1;\n      ++i2;\n    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {\n      ++i1;\n    } else {\n      ++i2;\n    }\n  }\n\n  size_t res_num = val_1 - val_start_1;\n\n  //  if (res_num != val_2 - val_start_2) {\n  //   std::cerr << \"size mismatch!\" << std::endl;\n  //  }\n\n  size_t res_num4 = res_num / 4 * 4;\n\n  if (res_num4) {\n    __m128 sum128 = _mm_set1_ps(0);\n\n    for (size_t k = 0; k < res_num4; k += 4) {\n      sum128 = _mm_add_ps(sum128, _mm_mul_ps(_mm_loadu_ps(val_start_1 + k),\n                                             _mm_loadu_ps(val_start_2 + k)));\n    }\n\n    float __attribute__((aligned(16))) tmp_res[4];\n    _mm_store_ps(tmp_res, sum128);\n    sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3]);\n  }\n\n  for (size_t k = res_num4; k < res_num; ++k)\n    sum += val_start_1[k] * val_start_2[k];\n\n  return sum;\n}\n#else\nfloat MipsInnerProductSparseInSegment(uint32_t m_sparse_count,\n                                      const uint16_t *m_sparse_index,\n                                      const float *m_sparse_value,\n                                      uint32_t q_sparse_count,\n                                      const uint16_t *q_sparse_index,\n                                      const float *q_sparse_value) {\n  float sum = 0.0f;\n\n  size_t m_i = 0;\n  size_t q_i = 0;\n  while (m_i < m_sparse_count && q_i < q_sparse_count) {\n    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {\n      sum += m_sparse_value[m_i] * q_sparse_value[q_i];\n\n      ++m_i;\n      ++q_i;\n    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {\n      ++m_i;\n    } else {\n      ++q_i;\n    }\n  }\n\n  return sum;\n}\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_int4_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"inner_product_matrix.h\"\n#include \"mips_euclidean_distance_matrix.h\"\n#include \"norm_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,\n                                         size_t size, float *sql, float *sqr) {\n  const uint8_t *last = lhs + size;\n  const uint8_t *last_aligned = lhs + ((size >> 5) << 5);\n  __m256i ymm_sum_0 = _mm256_setzero_si256();\n  __m256i ymm_sum_1 = _mm256_setzero_si256();\n  __m256i ymm_sum_norm1 = _mm256_setzero_si256();\n  __m256i ymm_sum_norm2 = _mm256_setzero_si256();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)(rhs));\n      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum_0, ymm_sum1, ymm_sum_norm1,\n                        ymm_sum_norm2)\n    }\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      __m128i xmm_sum = _mm_setzero_si128();\n      __m128i xmm_sum_norm1 = _mm_setzero_si128();\n      __m128i xmm_sum_norm2 = _mm_setzero_si128();\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum), ymm_sum_0);\n      ymm_sum_norm1 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm1), ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm2), ymm_sum_norm2);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)(lhs));\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)(rhs));\n      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum_0, ymm_sum1, ymm_sum_norm1,\n                        ymm_sum_norm2)\n    }\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      __m128i xmm_sum = _mm_setzero_si128();\n      __m128i xmm_sum_norm1 = _mm_setzero_si128();\n      __m128i xmm_sum_norm2 = _mm_setzero_si128();\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum), ymm_sum_0);\n      ymm_sum_norm1 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm1), ymm_sum_norm1);\n      ymm_sum_norm2 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm2), ymm_sum_norm2);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  float result = static_cast<float>(\n      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));\n  float norm1 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm1));\n  float norm2 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm2));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT4_GENERAL(lhs[14], rhs[14], result, norm1, norm2)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT4_GENERAL(lhs[13], rhs[13], result, norm1, norm2)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT4_GENERAL(lhs[12], rhs[12], result, norm1, norm2)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT4_GENERAL(lhs[11], rhs[11], result, norm1, norm2)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT4_GENERAL(lhs[10], rhs[10], result, norm1, norm2)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT4_GENERAL(lhs[9], rhs[9], result, norm1, norm2)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT4_GENERAL(lhs[8], rhs[8], result, norm1, norm2)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT4_GENERAL(lhs[7], rhs[7], result, norm1, norm2)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT4_GENERAL(lhs[6], rhs[6], result, norm1, norm2)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT4_GENERAL(lhs[5], rhs[5], result, norm1, norm2)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT4_GENERAL(lhs[4], rhs[4], result, norm1, norm2)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT4_GENERAL(lhs[3], rhs[3], result, norm1, norm2)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT4_GENERAL(lhs[2], rhs[2], result, norm1, norm2)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT4_GENERAL(lhs[1], rhs[1], result, norm1, norm2)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT4_GENERAL(lhs[0], rhs[0], result, norm1, norm2)\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionInt4AVX2(const uint8_t *lhs,\n                                                      const uint8_t *rhs,\n                                                      size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt4AVX2(lhs, rhs, size >> 1, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt4AVX2(\n    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt4AVX2(lhs, rhs, size >> 1, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n#endif  // __AVX2__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_int4_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"inner_product_matrix.h\"\n#include \"mips_euclidean_distance_matrix.h\"\n#include \"norm_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt4AVX2(\n    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionInt4AVX2(const uint8_t *lhs,\n                                                      const uint8_t *rhs,\n                                                      size_t size, float e2);\n#endif\n\n#if defined(__SSE4_1__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt4SSE(\n    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionInt4SSE(const uint8_t *lhs,\n                                                     const uint8_t *rhs,\n                                                     size_t size, float e2);\n#endif\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt4Scalar(\n    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionInt4Scalar(const uint8_t *lhs,\n                                                        const uint8_t *rhs,\n                                                        size_t size, float e2);\n\n//! Compute the distance between matrix and query by SphericalInjection\nvoid MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = MipsEuclideanDistanceSphericalInjectionInt4AVX2(p, q, dim, e2);\n    return;\n  }\n#endif\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = MipsEuclideanDistanceSphericalInjectionInt4SSE(p, q, dim, e2);\n    return;\n  }\n#endif\n\n  *out = MipsEuclideanDistanceSphericalInjectionInt4Scalar(p, q, dim, e2);\n}\n\n//! Compute the distance between matrix and query by RepeatedQuadraticInjection\nvoid MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,\n    float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt4AVX2(p, q, dim, m,\n                                                                   e2);\n    return;\n  }\n#endif\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt4SSE(p, q, dim, m,\n                                                                  e2);\n    return;\n  }\n#endif\n\n  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt4Scalar(p, q, dim, m,\n                                                                   e2);\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_int4_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"inner_product_matrix.h\"\n#include \"mips_euclidean_distance_matrix.h\"\n#include \"norm_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE4_1__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormInt4SSE(const uint8_t *lhs, const uint8_t *rhs,\n                                        size_t size, float *sql, float *sqr) {\n  const uint8_t *last = lhs + size;\n  const uint8_t *last_aligned = lhs + ((size >> 4) << 4);\n  __m128i xmm_sum = _mm_setzero_si128();\n  __m128i xmm_sum_norm1 = _mm_setzero_si128();\n  __m128i xmm_sum_norm2 = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)(lhs));\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)(rhs));\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 16, rhs += 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)(lhs));\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)(rhs));\n      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)\n    }\n  }\n  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));\n  float norm1 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm1));\n  float norm2 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm2));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT4_GENERAL(lhs[14], rhs[14], result, norm1, norm2)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT4_GENERAL(lhs[13], rhs[13], result, norm1, norm2)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT4_GENERAL(lhs[12], rhs[12], result, norm1, norm2)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT4_GENERAL(lhs[11], rhs[11], result, norm1, norm2)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT4_GENERAL(lhs[10], rhs[10], result, norm1, norm2)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT4_GENERAL(lhs[9], rhs[9], result, norm1, norm2)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT4_GENERAL(lhs[8], rhs[8], result, norm1, norm2)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT4_GENERAL(lhs[7], rhs[7], result, norm1, norm2)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT4_GENERAL(lhs[6], rhs[6], result, norm1, norm2)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT4_GENERAL(lhs[5], rhs[5], result, norm1, norm2)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT4_GENERAL(lhs[4], rhs[4], result, norm1, norm2)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT4_GENERAL(lhs[3], rhs[3], result, norm1, norm2)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT4_GENERAL(lhs[2], rhs[2], result, norm1, norm2)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT4_GENERAL(lhs[1], rhs[1], result, norm1, norm2)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT4_GENERAL(lhs[0], rhs[0], result, norm1, norm2)\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionInt4SSE(const uint8_t *lhs,\n                                                     const uint8_t *rhs,\n                                                     size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt4SSE(lhs, rhs, size >> 1, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt4SSE(\n    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt4SSE(lhs, rhs, size >> 1, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_int8_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormInt8AVX2(const int8_t *lhs, const int8_t *rhs,\n                                         size_t size, float *sql, float *sqr) {\n  const int8_t *last = lhs + size;\n  const int8_t *last_aligned = lhs + ((size >> 6) << 6);\n\n  __m256i ymm_sum_0 = _mm256_setzero_si256();\n  __m256i ymm_sum_1 = _mm256_setzero_si256();\n  __m256i ymm_sum_norm1 = _mm256_setzero_si256();\n  __m256i ymm_sum_norm2 = _mm256_setzero_si256();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));\n      FMA_INT8_AVX(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      FMA_INT8_AVX(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n      FMA_INT8_AVX(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      FMA_INT8_AVX(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);\n      FMA_INT8_AVX(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      FMA_INT8_AVX(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);\n      FMA_INT8_AVX(ymm_lhs, ymm_rhs, ymm_sum_0);\n      FMA_INT8_AVX(ymm_lhs, ymm_lhs, ymm_sum_norm1);\n      FMA_INT8_AVX(ymm_rhs, ymm_rhs, ymm_sum_norm2);\n      lhs += 32;\n      rhs += 32;\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_rhs, ymm_sum_0);\n      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_lhs, ymm_sum_norm1);\n      FMA_INT8_AVX_SSE_HYBRID(xmm_rhs, xmm_rhs, ymm_sum_norm2);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));\n      FMA_INT8_AVX(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);\n      FMA_INT8_AVX(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);\n      FMA_INT8_AVX(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);\n      FMA_INT8_AVX(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);\n      FMA_INT8_AVX(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);\n      FMA_INT8_AVX(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);\n      FMA_INT8_AVX(ymm_lhs, ymm_rhs, ymm_sum_0);\n      FMA_INT8_AVX(ymm_lhs, ymm_lhs, ymm_sum_norm1);\n      FMA_INT8_AVX(ymm_rhs, ymm_rhs, ymm_sum_norm2);\n      lhs += 32;\n      rhs += 32;\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_rhs, ymm_sum_0);\n      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_lhs, ymm_sum_norm1);\n      FMA_INT8_AVX_SSE_HYBRID(xmm_rhs, xmm_rhs, ymm_sum_norm2);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  float result = static_cast<float>(\n      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));\n  float norm1 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm1));\n  float norm2 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm2));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT8_GENERAL(lhs[14], rhs[14], result, norm1, norm2)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT8_GENERAL(lhs[13], rhs[13], result, norm1, norm2)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT8_GENERAL(lhs[12], rhs[12], result, norm1, norm2)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT8_GENERAL(lhs[11], rhs[11], result, norm1, norm2)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT8_GENERAL(lhs[10], rhs[10], result, norm1, norm2)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT8_GENERAL(lhs[9], rhs[9], result, norm1, norm2)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT8_GENERAL(lhs[8], rhs[8], result, norm1, norm2)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT8_GENERAL(lhs[7], rhs[7], result, norm1, norm2)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT8_GENERAL(lhs[6], rhs[6], result, norm1, norm2)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT8_GENERAL(lhs[5], rhs[5], result, norm1, norm2)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT8_GENERAL(lhs[4], rhs[4], result, norm1, norm2)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT8_GENERAL(lhs[3], rhs[3], result, norm1, norm2)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT8_GENERAL(lhs[2], rhs[2], result, norm1, norm2)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT8_GENERAL(lhs[1], rhs[1], result, norm1, norm2)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT8_GENERAL(lhs[0], rhs[0], result, norm1, norm2)\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionInt8AVX2(const int8_t *lhs,\n                                                      const int8_t *rhs,\n                                                      size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt8AVX2(lhs, rhs, size, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2(\n    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt8AVX2(lhs, rhs, size, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n#endif  // __AVX2__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_int8_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__AVX2__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2(\n    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionInt8AVX2(const int8_t *lhs,\n                                                      const int8_t *rhs,\n                                                      size_t size, float e2);\n#endif\n\n#if defined(__SSE4_1__)\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt8SSE(\n    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionInt8SSE(const int8_t *lhs,\n                                                     const int8_t *rhs,\n                                                     size_t size, float e2);\n#endif\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt8Scalar(\n    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2);\nfloat MipsEuclideanDistanceSphericalInjectionInt8Scalar(const int8_t *lhs,\n                                                        const int8_t *rhs,\n                                                        size_t size, float e2);\n\n//! Compute the distance between matrix and query by SphericalInjection\nvoid MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = MipsEuclideanDistanceSphericalInjectionInt8AVX2(p, q, dim, e2);\n    return;\n  }\n#endif\n\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = MipsEuclideanDistanceSphericalInjectionInt8SSE(p, q, dim, e2);\n    return;\n  }\n#endif  //__SSE4_1__\n\n  *out = MipsEuclideanDistanceSphericalInjectionInt8Scalar(p, q, dim, e2);\n}\n\n//! Compute the distance between matrix and query by RepeatedQuadraticInjection\nvoid MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,\n    float *out) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2(p, q, dim, m,\n                                                                   e2);\n    return;\n  }\n#endif\n#if defined(__SSE4_1__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {\n    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8SSE(p, q, dim, m,\n                                                                  e2);\n    return;\n  }\n#endif  //__SSE4_1__\n\n  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8Scalar(p, q, dim, m,\n                                                                   e2);\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_int8_sse.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"distance_matrix_accum_int8.i\"\n#include \"distance_matrix_mips_utility.i\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__SSE4_1__)\n//! Compute the Inner Product between p and q, and each Squared L2-Norm value\nfloat InnerProductAndSquaredNormInt8SSE(const int8_t *lhs, const int8_t *rhs,\n                                        size_t size, float *sql, float *sqr) {\n  const int8_t *last = lhs + size;\n  const int8_t *last_aligned = lhs + ((size >> 5) << 5);\n\n  __m128i xmm_sum = _mm_setzero_si128();\n  __m128i xmm_sum_norm1 = _mm_setzero_si128();\n  __m128i xmm_sum_norm2 = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m128i xmm_lhs_0 = _mm_load_si128((const __m128i *)(lhs + 0));\n      __m128i xmm_lhs_1 = _mm_load_si128((const __m128i *)(lhs + 16));\n      __m128i xmm_rhs_0 = _mm_load_si128((const __m128i *)(rhs + 0));\n      __m128i xmm_rhs_1 = _mm_load_si128((const __m128i *)(rhs + 16));\n      FMA_INT8_SSE(xmm_lhs_0, xmm_rhs_0, xmm_sum);\n      FMA_INT8_SSE(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);\n      FMA_INT8_SSE(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);\n      FMA_INT8_SSE(xmm_lhs_1, xmm_rhs_1, xmm_sum);\n      FMA_INT8_SSE(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);\n      FMA_INT8_SSE(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      FMA_INT8_SSE(xmm_lhs, xmm_rhs, xmm_sum);\n      FMA_INT8_SSE(xmm_lhs, xmm_lhs, xmm_sum_norm1);\n      FMA_INT8_SSE(xmm_rhs, xmm_rhs, xmm_sum_norm2);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 32, rhs += 32) {\n      __m128i xmm_lhs_0 = _mm_loadu_si128((const __m128i *)(lhs + 0));\n      __m128i xmm_lhs_1 = _mm_loadu_si128((const __m128i *)(lhs + 16));\n      __m128i xmm_rhs_0 = _mm_loadu_si128((const __m128i *)(rhs + 0));\n      __m128i xmm_rhs_1 = _mm_loadu_si128((const __m128i *)(rhs + 16));\n      FMA_INT8_SSE(xmm_lhs_0, xmm_rhs_0, xmm_sum);\n      FMA_INT8_SSE(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);\n      FMA_INT8_SSE(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);\n      FMA_INT8_SSE(xmm_lhs_1, xmm_rhs_1, xmm_sum);\n      FMA_INT8_SSE(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);\n      FMA_INT8_SSE(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);\n    }\n\n    if (last >= last_aligned + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      FMA_INT8_SSE(xmm_lhs, xmm_rhs, xmm_sum);\n      FMA_INT8_SSE(xmm_lhs, xmm_lhs, xmm_sum_norm1);\n      FMA_INT8_SSE(xmm_rhs, xmm_rhs, xmm_sum_norm2);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));\n  float norm1 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm1));\n  float norm2 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm2));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT8_GENERAL(lhs[14], rhs[14], result, norm1, norm2)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT8_GENERAL(lhs[13], rhs[13], result, norm1, norm2)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT8_GENERAL(lhs[12], rhs[12], result, norm1, norm2)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT8_GENERAL(lhs[11], rhs[11], result, norm1, norm2)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT8_GENERAL(lhs[10], rhs[10], result, norm1, norm2)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT8_GENERAL(lhs[9], rhs[9], result, norm1, norm2)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT8_GENERAL(lhs[8], rhs[8], result, norm1, norm2)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT8_GENERAL(lhs[7], rhs[7], result, norm1, norm2)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT8_GENERAL(lhs[6], rhs[6], result, norm1, norm2)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT8_GENERAL(lhs[5], rhs[5], result, norm1, norm2)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT8_GENERAL(lhs[4], rhs[4], result, norm1, norm2)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT8_GENERAL(lhs[3], rhs[3], result, norm1, norm2)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT8_GENERAL(lhs[2], rhs[2], result, norm1, norm2)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT8_GENERAL(lhs[1], rhs[1], result, norm1, norm2)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT8_GENERAL(lhs[0], rhs[0], result, norm1, norm2)\n  }\n  *sql = norm1;\n  *sqr = norm2;\n  return result;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionInt8SSE(const int8_t *lhs,\n                                                     const int8_t *rhs,\n                                                     size_t size, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt8SSE(lhs, rhs, size, &u2, &v2);\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt8SSE(\n    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2) {\n  float u2{0.0f};\n  float v2{0.0f};\n  float sum{0.0f};\n\n  sum = InnerProductAndSquaredNormInt8SSE(lhs, rhs, size, &u2, &v2);\n\n  sum = e2 * (u2 + v2 - 2 * sum);\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n\n#endif  // __SSE4_1__\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/mips_euclidean_distance_matrix_scalar.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <array>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"distance_utility.h\"\n#include \"mips_euclidean_distance_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n//--------------------------------------------------\n// Dense\n//--------------------------------------------------\n// Compute the distance between matrix and query by SphericalInjection\ntemplate <typename T>\ninline float MipsEuclideanDistanceSphericalInjectionScalar(const T *p,\n                                                           const T *q,\n                                                           size_t dim,\n                                                           float e2) {\n  ailego_assert(p && q && dim);\n\n  float sum = 0.0;\n  float u2 = 0.0;\n  float v2 = 0.0;\n  for (size_t i = 0; i < dim; ++i) {\n    u2 += p[i] * p[i];\n    v2 += q[i] * q[i];\n    sum += static_cast<float>(p[i] * q[i]);\n  }\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\n// Compute the distance between matrix and query by RepeatedQuadraticInjection\ntemplate <typename T>\ninline float MipsEuclideanDistanceRepeatedQuadraticInjectionScalar(\n    const T *p, const T *q, size_t dim, size_t m, float e2) {\n  ailego_assert(p && q && dim);\n\n  float sum = 0.0;\n  float u2 = 0.0;\n  float v2 = 0.0;\n  for (size_t i = 0; i < dim; ++i) {\n    u2 += p[i] * p[i];\n    v2 += q[i] * q[i];\n    sum += MathHelper::SquaredDifference(p[i], q[i]);\n  }\n\n  sum *= e2;\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n\n/*! Mips Squared Euclidean Distance Matrix (INT4, M=1, N=1)\n */\n//! Calculate sum of squared values\nstatic inline float Squared(uint8_t v) {\n  return static_cast<float>(((int8_t)(v << 4) >> 4) * ((int8_t)(v << 4) >> 4) +\n                            ((int8_t)(v & 0xf0) >> 4) *\n                                ((int8_t)(v & 0xf0) >> 4));\n}\n\n// Compute the distance between matrix and query by SphericalInjection\nfloat MipsEuclideanDistanceSphericalInjectionInt4Scalar(const uint8_t *p,\n                                                        const uint8_t *q,\n                                                        size_t dim, float e2) {\n  ailego_assert(p && q && dim && !(dim & 1));\n\n  float sum = 0.0;\n  float u2 = 0.0;\n  float v2 = 0.0;\n  for (size_t i = 0; i < (dim >> 1); ++i) {\n    const uint8_t p_val = p[i];\n    const uint8_t q_val = q[i];\n    u2 += Squared(p_val);\n    v2 += Squared(q_val);\n    sum += Int4MulTable[((p_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +\n           Int4MulTable[((p_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];\n  }\n\n  return ComputeSphericalInjection(sum, u2, v2, e2);\n}\n\n// Compute the distance between matrix and query by RepeatedQuadraticInjection\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt4Scalar(\n    const uint8_t *p, const uint8_t *q, size_t dim, size_t m, float e2) {\n  ailego_assert(p && q && dim && !(dim & 1));\n\n  float sum = 0.0;\n  float u2 = 0.0;\n  float v2 = 0.0;\n  for (size_t i = 0; i < (dim >> 1); ++i) {\n    const uint8_t p_val = p[i];\n    const uint8_t q_val = q[i];\n    u2 += Squared(p_val);\n    v2 += Squared(q_val);\n    sum += Int4SquaredDiffTable[((p_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +\n           Int4SquaredDiffTable[((p_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];\n  }\n  sum *= e2;\n  u2 *= e2;\n  v2 *= e2;\n  for (size_t i = 0; i < m; ++i) {\n    sum += (u2 - v2) * (u2 - v2);\n    u2 = u2 * u2;\n    v2 = v2 * v2;\n  }\n\n  return sum;\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionInt8Scalar(const int8_t *p,\n                                                        const int8_t *q,\n                                                        size_t dim, float e2) {\n  return MipsEuclideanDistanceSphericalInjectionScalar<int8_t>(p, q, dim, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionInt8Scalar(\n    const int8_t *p, const int8_t *q, size_t dim, size_t m, float e2) {\n  return MipsEuclideanDistanceRepeatedQuadraticInjectionScalar<int8_t>(\n      p, q, dim, m, e2);\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionFp16Scalar(\n    const ailego::Float16 *p, const ailego::Float16 *q, size_t dim, float e2) {\n  return MipsEuclideanDistanceSphericalInjectionScalar<ailego::Float16>(\n      p, q, dim, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp16Scalar(\n    const ailego::Float16 *p, const ailego::Float16 *q, size_t dim, size_t m,\n    float e2) {\n  return MipsEuclideanDistanceRepeatedQuadraticInjectionScalar<ailego::Float16>(\n      p, q, dim, m, e2);\n}\n\nfloat MipsEuclideanDistanceSphericalInjectionFp32Scalar(const float *p,\n                                                        const float *q,\n                                                        size_t dim, float e2) {\n  return MipsEuclideanDistanceSphericalInjectionScalar<float>(p, q, dim, e2);\n}\n\nfloat MipsEuclideanDistanceRepeatedQuadraticInjectionFp32Scalar(\n    const float *p, const float *q, size_t dim, size_t m, float e2) {\n  return MipsEuclideanDistanceRepeatedQuadraticInjectionScalar<float>(p, q, dim,\n                                                                      m, e2);\n}\n\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/norm1_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! L1-Norm Matrix\n */\ntemplate <typename T, size_t M, typename = void>\nstruct Norm1Matrix;\n\n/*! L1-Norm Matrix\n */\ntemplate <typename T, size_t M>\nstruct Norm1Matrix<T, M,\n                   typename std::enable_if<IsSignedArithmetic<T>::value &&\n                                           sizeof(T) >= 2 && M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && out);\n\n    const ValueType *m_end = m + dim * M;\n    if (m != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = MathHelper::Absolute(m[i]);\n      }\n      m += M;\n    }\n    while (m != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += MathHelper::Absolute(m[i]);\n      }\n      m += M;\n    }\n  }\n};\n\n/*! L1-Norm Matrix (INT8)\n */\ntemplate <size_t M>\nstruct Norm1Matrix<int8_t, M, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *m_end = m_it + (dim >> 2) * M;\n\n    if (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = Absolute(m_it[i]);\n      }\n      m_it += M;\n    }\n    while (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += Absolute(m_it[i]);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate sum of absolute values\n  static inline float Absolute(uint32_t v) {\n    return static_cast<float>(\n        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 0)) +\n        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 8)) +\n        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 16)) +\n        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 24)));\n  }\n};\n\n/*! L1-Norm Matrix (M=1)\n */\ntemplate <typename T>\nstruct Norm1Matrix<\n    T, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && out);\n\n    const ValueType *m_end = m + dim;\n    if (m != m_end) {\n      *out = MathHelper::Absolute(*m++);\n    }\n    while (m != m_end) {\n      *out += MathHelper::Absolute(*m++);\n    }\n  }\n};\n\n#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))\n/*! L1-Norm Matrix (FP32, M=1)\n */\ntemplate <>\nstruct Norm1Matrix<float, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the L1-norm of vectors\n  static void Compute(const ValueType *m, size_t dim, float *out);\n};\n#endif  // __SSE__ || (__ARM_NEON && __aarch64__)\n\n#if (defined(__F16C__) && defined(__AVX__)) || \\\n    (defined(__ARM_NEON) && defined(__aarch64__))\n/*! L1-Norm Matrix (FP16, M=1)\n */\ntemplate <>\nstruct Norm1Matrix<Float16, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the L1-norm of vectors\n  static void Compute(const ValueType *m, size_t dim, float *out);\n};\n#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/norm1_matrix_fp16.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include \"ailego/internal/cpu_features.h\"\n#include \"norm1_matrix.h\"\n#include \"norm_matrix_fp16.i\"\n\nnamespace zvec {\nnamespace ailego {\n\n#define NORM_FP32_STEP_GENERAL SA_FP32_GENERAL\n#define NORM_FP32_STEP_SSE SA_FP32_SSE\n#define NORM_FP32_STEP_AVX SA_FP32_AVX\n#define NORM_FP32_STEP_AVX512 SA_FP32_AVX512\n#define NORM_FP32_STEP_NEON SA_FP32_NEON\n#define NORM_FP16_STEP_GENERAL SA_FP16_GENERAL\n#define NORM_FP16_STEP_NEON SA_FP16_NEON\n\n#if defined(__SSE__)\nstatic const __m128 ABS_MASK_FP32_SSE =\n    _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffu));\n#endif  // __SSE__\n\n#if defined(__AVX__)\nstatic const __m256 ABS_MASK_FP32_AVX =\n    _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffu));\n#endif  // __AVX__\n\n#if defined(__AVX512F__)\nstatic const __m512 ABS_MASK_FP32_AVX512 =\n    _mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffffu));\n#endif  // __AVX512F__\n\n//! Calculate sum of absolute (GENERAL)\n#define SA_FP32_GENERAL(m, sum) sum += FastAbs(m);\n\n//! Calculate sum of absolute (SSE)\n#define SA_FP32_SSE(xmm_m, xmm_sum) \\\n  xmm_sum = _mm_add_ps(_mm_and_ps(xmm_m, ABS_MASK_FP32_SSE), xmm_sum);\n\n//! Calculate sum of absolute (AVX)\n#define SA_FP32_AVX(ymm_m, ymm_sum) \\\n  ymm_sum = _mm256_add_ps(_mm256_and_ps(ymm_m, ABS_MASK_FP32_AVX), ymm_sum);\n\n//! Calculate sum of absolute (AVX512)\n#define SA_FP32_AVX512(zmm_m, zmm_sum) \\\n  zmm_sum = _mm512_add_ps(_mm512_and_ps(zmm_m, ABS_MASK_FP32_AVX512), zmm_sum);\n\n//! Calculate sum of absolute (NEON)\n#define SA_FP32_NEON(v_m, v_sum) v_sum = vaddq_f32(vabsq_f32(v_m), v_sum);\n\n//! Calculate sum of absolute (GENERAL)\n#define SA_FP16_GENERAL(m, sum) sum += Float16::Absolute(m);\n\n//! Calculate sum of absolute (NEON)\n#define SA_FP16_NEON(v_m, v_sum) v_sum = vaddq_f16(vabsq_f16(v_m), v_sum);\n\n#if (defined(__F16C__) && defined(__AVX__)) || \\\n    (defined(__ARM_NEON) && defined(__aarch64__))\n//! Compute the L1-norm of vectors (FP16, M=1)\nvoid Norm1Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,\n                                      float *out) {\n#if defined(__ARM_NEON)\n  NORM_FP16_1_NEON(m, dim, out, )\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    NORM_FP16_1_AVX512(m, dim, out, )\n    return;\n  }\n#endif\n  NORM_FP16_1_AVX(m, dim, out, )\n#endif\n}\n#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/norm1_matrix_fp32.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include \"ailego/internal/cpu_features.h\"\n#include \"norm1_matrix.h\"\n#include \"norm_matrix_fp32.i\"\n\nnamespace zvec {\nnamespace ailego {\n\n#define NORM_FP32_STEP_GENERAL SA_FP32_GENERAL\n#define NORM_FP32_STEP_SSE SA_FP32_SSE\n#define NORM_FP32_STEP_AVX SA_FP32_AVX\n#define NORM_FP32_STEP_AVX512 SA_FP32_AVX512\n#define NORM_FP32_STEP_NEON SA_FP32_NEON\n\n#if defined(__SSE__)\n#define ABS_MASK_FP32_SSE _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffu))\n#endif  // __SSE__\n\n#if defined(__AVX__)\n#define ABS_MASK_FP32_AVX _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffu))\n#endif  // __AVX__\n\n#if defined(__AVX512F__)\n#define ABS_MASK_FP32_AVX512 _mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffffu))\n#endif  // __AVX512F__\n\n//! Calculate sum of absolute (GENERAL)\n#define SA_FP32_GENERAL(m, sum) sum += FastAbs(m);\n\n//! Calculate sum of absolute (SSE)\n#define SA_FP32_SSE(xmm_m, xmm_sum) \\\n  xmm_sum = _mm_add_ps(_mm_and_ps(xmm_m, ABS_MASK_FP32_SSE), xmm_sum);\n\n//! Calculate sum of absolute (AVX)\n#define SA_FP32_AVX(ymm_m, ymm_sum) \\\n  ymm_sum = _mm256_add_ps(_mm256_and_ps(ymm_m, ABS_MASK_FP32_AVX), ymm_sum);\n\n//! Calculate sum of absolute (AVX512)\n#define SA_FP32_AVX512(zmm_m, zmm_sum) \\\n  zmm_sum = _mm512_add_ps(_mm512_and_ps(zmm_m, ABS_MASK_FP32_AVX512), zmm_sum);\n\n//! Calculate sum of absolute (NEON)\n#define SA_FP32_NEON(v_m, v_sum) v_sum = vaddq_f32(vabsq_f32(v_m), v_sum);\n\n#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))\n//! Compute the L1-norm of vectors (FP32, M=1)\nvoid Norm1Matrix<float, 1>::Compute(const ValueType *m, size_t dim,\n                                    float *out) {\n#if defined(__ARM_NEON)\n  NORM_FP32_1_NEON(m, dim, out, )\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    NORM_FP32_1_AVX512(m, dim, out, )\n    return;\n  }\n#endif\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    NORM_FP32_1_AVX(m, dim, out, )\n    return;\n  }\n#endif\n  NORM_FP32_1_SSE(m, dim, out, )\n#endif\n}\n#endif  // __SSE__ || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/norm2_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cmath>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! L2-Norm Matrix\n */\ntemplate <typename T, size_t M, typename = void>\nstruct Norm2Matrix;\n\n/*! L2-Norm Matrix\n */\ntemplate <typename T, size_t M>\nstruct Norm2Matrix<T, M,\n                   typename std::enable_if<IsSignedArithmetic<T>::value &&\n                                           sizeof(T) >= 2 && M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && out);\n\n    const ValueType *m_end = m + dim * M;\n    if (m != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType v = m[i];\n        *(out + i) = static_cast<float>(v * v);\n      }\n      m += M;\n    }\n    while (m != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType v = m[i];\n        *(out + i) += static_cast<float>(v * v);\n      }\n      m += M;\n    }\n    for (size_t i = 0; i < M; ++i) {\n      float v = *out;\n      *out++ = std::sqrt(v);\n    }\n  }\n};\n\n/*! L2-Norm Matrix (INT8)\n */\ntemplate <size_t M>\nstruct Norm2Matrix<int8_t, M, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *m_end = m_it + (dim >> 2) * M;\n\n    if (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n    while (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n    for (size_t i = 0; i < M; ++i) {\n      float v = *out;\n      *out++ = std::sqrt(v);\n    }\n  }\n\n protected:\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t v) {\n    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +\n                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +\n                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +\n                              (int8_t)(v >> 24) * (int8_t)(v >> 24));\n  }\n};\n\n/*! L2-Norm Matrix (M=1)\n */\ntemplate <typename T>\nstruct Norm2Matrix<\n    T, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && out);\n\n    const ValueType *m_end = m + dim;\n    if (m != m_end) {\n      ValueType v = *m++;\n      *out = static_cast<float>(v * v);\n    }\n    while (m != m_end) {\n      ValueType v = *m++;\n      *out += static_cast<float>(v * v);\n    }\n    *out = std::sqrt(*out);\n  }\n};\n\n/*! L2-Norm Matrix (M=1, INT4)\n */\ntemplate <>\nstruct Norm2Matrix<uint8_t, 1> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && !(dim & 1) && dim && out);\n\n    const uint8_t *m_end = m + (dim >> 1);\n    float square = 0.0f;\n    while (m != m_end) {\n      square += Squared(*m++);\n    }\n    *out = std::sqrt(square);\n  }\n\n protected:\n  //! Calculate sum of squared values\n  static inline float Squared(uint8_t v) {\n    return static_cast<float>(\n        ((int8_t)(v << 4) >> 4) * ((int8_t)(v << 4) >> 4) +\n        ((int8_t)(v & 0xf0) >> 4) * ((int8_t)(v & 0xf0) >> 4));\n  }\n};\n\n/*! L2-Norm Matrix (INT4)\n */\ntemplate <size_t M>\nstruct Norm2Matrix<uint8_t, M, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *m_end = m_it + (dim >> 3) * M;\n\n    if (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n    while (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n    for (size_t i = 0; i < M; ++i) {\n      float v = *out;\n      *out++ = std::sqrt(v);\n    }\n  }\n\n protected:\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t u) {\n    float sum = 0.0f;\n    for (size_t i = 0; i < 32; i += 8) {\n      uint8_t v = (uint8_t)(u >> i);\n      int8_t lo = (int8_t)(v << 4) >> 4;\n      int8_t hi = (int8_t)(v & 0xf0) >> 4;\n      sum += hi * hi + lo * lo;\n    }\n    return sum;\n  }\n};\n\n/*! Squared L2-Norm Matrix\n */\ntemplate <typename T, size_t M, typename = void>\nstruct SquaredNorm2Matrix;\n\n/*! Squared L2-Norm Matrix\n */\ntemplate <typename T, size_t M>\nstruct SquaredNorm2Matrix<\n    T, M,\n    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&\n                            M >= 2>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && out);\n\n    const ValueType *m_end = m + dim * M;\n    if (m != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType v = m[i];\n        *(out + i) = static_cast<float>(v * v);\n      }\n      m += M;\n    }\n    while (m != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        ValueType v = m[i];\n        *(out + i) += static_cast<float>(v * v);\n      }\n      m += M;\n    }\n  }\n};\n\n/*! Squared L2-Norm Matrix (INT8)\n */\ntemplate <size_t M>\nstruct SquaredNorm2Matrix<int8_t, M, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = int8_t;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && !(dim & 3) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *m_end = m_it + (dim >> 2) * M;\n\n    if (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n    while (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t v) {\n    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +\n                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +\n                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +\n                              (int8_t)(v >> 24) * (int8_t)(v >> 24));\n  }\n};\n\n/*! Squared L2-Norm Matrix (M=1)\n */\ntemplate <typename T>\nstruct SquaredNorm2Matrix<\n    T, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && out);\n\n    const ValueType *m_end = m + dim;\n    if (m != m_end) {\n      ValueType v = *m++;\n      *out = static_cast<float>(v * v);\n    }\n    while (m != m_end) {\n      ValueType v = *m++;\n      *out += static_cast<float>(v * v);\n    }\n  }\n};\n\n/*! L2-Norm Matrix (M=1, INT4)\n */\ntemplate <>\nstruct SquaredNorm2Matrix<uint8_t, 1> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && !(dim & 1) && out);\n\n    const uint8_t *m_end = m + (dim >> 1);\n    *out = 0.0f;\n    while (m != m_end) {\n      *out += Squared(*m++);\n    }\n  }\n\n protected:\n  //! Calculate sum of squared values\n  static inline float Squared(uint8_t v) {\n    return static_cast<float>(\n        ((int8_t)(v << 4) >> 4) * ((int8_t)(v << 4) >> 4) +\n        ((int8_t)(v & 0xf0) >> 4) * ((int8_t)(v & 0xf0) >> 4));\n  }\n};\n\n/*! Squared L2-Norm Matrix (INT4)\n */\ntemplate <size_t M>\nstruct SquaredNorm2Matrix<uint8_t, M, typename std::enable_if<M >= 2>::type> {\n  //! Type of value\n  using ValueType = uint8_t;\n\n  //! Compute the norm of vectors\n  static inline void Compute(const ValueType *m, size_t dim, float *out) {\n    ailego_assert(m && dim && !(dim & 7) && out);\n\n    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);\n    const uint32_t *m_end = m_it + (dim >> 3) * M;\n\n    if (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) = Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n    while (m_it != m_end) {\n      for (size_t i = 0; i < M; ++i) {\n        *(out + i) += Squared(m_it[i]);\n      }\n      m_it += M;\n    }\n  }\n\n protected:\n  //! Calculate sum of squared values\n  static inline float Squared(uint32_t u) {\n    float sum = 0.0f;\n    for (size_t i = 0; i < 32; i += 8) {\n      uint8_t v = (uint8_t)(u >> i);\n      int8_t lo = (int8_t)(v << 4) >> 4;\n      int8_t hi = (int8_t)(v & 0xf0) >> 4;\n      sum += hi * hi + lo * lo;\n    }\n    return sum;\n  }\n};\n\n#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))\n/*! L2-Norm Matrix (FP32, M=1)\n */\ntemplate <>\nstruct Norm2Matrix<float, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the L2-norm of vectors\n  static void Compute(const ValueType *m, size_t dim, float *out);\n};\n\n/*! Squared L2-Norm Matrix (FP32, M=1)\n */\ntemplate <>\nstruct SquaredNorm2Matrix<float, 1> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the squared L2-norm of vectors\n  static void Compute(const ValueType *m, size_t dim, float *out);\n};\n#endif  // __SSE__ || (__ARM_NEON && __aarch64__)\n\n#if (defined(__F16C__) && defined(__AVX__)) || \\\n    (defined(__ARM_NEON) && defined(__aarch64__))\n/*! L2-Norm Matrix (FP16, M=1)\n */\ntemplate <>\nstruct Norm2Matrix<Float16, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the L2-norm of vectors\n  static void Compute(const ValueType *m, size_t dim, float *out);\n};\n\n/*! Squared L2-Norm Matrix (FP16, M=1)\n */\ntemplate <>\nstruct SquaredNorm2Matrix<Float16, 1> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the squared L2-norm of vectors\n  static void Compute(const ValueType *m, size_t dim, float *out);\n};\n#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/norm2_matrix_fp16.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n#include \"ailego/internal/cpu_features.h\"\n#include \"norm2_matrix.h\"\n#include \"norm_matrix_fp16.i\"\n\nnamespace zvec {\nnamespace ailego {\n\n#define NORM_FP32_STEP_GENERAL SS_FP32_GENERAL\n#define NORM_FP32_STEP_SSE SS_FP32_SSE\n#define NORM_FP32_STEP_AVX SS_FP32_AVX\n#define NORM_FP32_STEP_AVX512 SS_FP32_AVX512\n#define NORM_FP32_STEP_NEON SS_FP32_NEON\n#define NORM_FP16_STEP_GENERAL SS_FP16_GENERAL\n#define NORM_FP16_STEP_NEON SS_FP16_NEON\n\n//! Calculate sum of squared (GENERAL)\n#define SS_FP32_GENERAL(m, sum) sum += (m) * (m);\n\n//! Calculate sum of squared (SSE)\n#define SS_FP32_SSE(xmm_m, xmm_sum) \\\n  xmm_sum = _mm_fmadd_ps(xmm_m, xmm_m, xmm_sum);\n\n//! Calculate sum of squared (AVX)\n#define SS_FP32_AVX(ymm_m, ymm_sum) \\\n  ymm_sum = _mm256_fmadd_ps(ymm_m, ymm_m, ymm_sum);\n\n//! Calculate sum of squared (AVX512)\n#define SS_FP32_AVX512(zmm_m, zmm_sum) \\\n  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_m, zmm_sum);\n\n//! Calculate sum of squared (NEON)\n#define SS_FP32_NEON(v_m, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_m);\n\n//! Calculate sum of squared (GENERAL)\n#define SS_FP16_GENERAL(m, sum) sum += (m) * (m);\n\n//! Calculate sum of squared (NEON)\n#define SS_FP16_NEON(v_m, v_sum) v_sum = vfmaq_f16(v_sum, v_m, v_m);\n\n#if (defined(__F16C__) && defined(__AVX__)) || \\\n    (defined(__ARM_NEON) && defined(__aarch64__))\n//! Compute the L2-norm of vectors (FP16, M=1)\nvoid Norm2Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,\n                                      float *out) {\n#if defined(__ARM_NEON)\n  NORM_FP16_1_NEON(m, dim, out, std::sqrt)\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    NORM_FP16_1_AVX512(m, dim, out, std::sqrt)\n    return;\n  }\n#endif\n  NORM_FP16_1_AVX(m, dim, out, std::sqrt)\n#endif\n}\n\n//! Compute the L2-norm of vectors (FP16, M=1)\nvoid SquaredNorm2Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,\n                                             float *out) {\n#if defined(__ARM_NEON)\n  NORM_FP16_1_NEON(m, dim, out, )\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    NORM_FP16_1_AVX512(m, dim, out, )\n    return;\n  }\n#endif\n  NORM_FP16_1_AVX(m, dim, out, )\n#endif\n}\n#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/norm2_matrix_fp32.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include \"norm2_matrix.h\"\n#include \"norm_matrix_fp32.i\"\n\nnamespace zvec {\nnamespace ailego {\n\n#define NORM_FP32_STEP_GENERAL SS_FP32_GENERAL\n#define NORM_FP32_STEP_SSE SS_FP32_SSE\n#define NORM_FP32_STEP_AVX SS_FP32_AVX\n#define NORM_FP32_STEP_AVX512 SS_FP32_AVX512\n#define NORM_FP32_STEP_NEON SS_FP32_NEON\n\n//! Calculate sum of squared (GENERAL)\n#define SS_FP32_GENERAL(m, sum) sum += (m) * (m);\n\n//! Calculate sum of squared (SSE)\n#define SS_FP32_SSE(xmm_m, xmm_sum) \\\n  xmm_sum = _mm_fmadd_ps(xmm_m, xmm_m, xmm_sum);\n\n//! Calculate sum of squared (AVX)\n#define SS_FP32_AVX(ymm_m, ymm_sum) \\\n  ymm_sum = _mm256_fmadd_ps(ymm_m, ymm_m, ymm_sum);\n\n//! Calculate sum of squared (AVX512)\n#define SS_FP32_AVX512(zmm_m, zmm_sum) \\\n  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_m, zmm_sum);\n\n//! Calculate sum of squared (NEON)\n#define SS_FP32_NEON(v_m, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_m);\n\n#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))\n//! Compute the L2-norm of vectors (FP32, M=1)\nvoid Norm2Matrix<float, 1>::Compute(const ValueType *m, size_t dim,\n                                    float *out) {\n#if defined(__ARM_NEON)\n  NORM_FP32_1_NEON(m, dim, out, std::sqrt)\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    NORM_FP32_1_AVX512(m, dim, out, std::sqrt)\n    return;\n  }\n#endif\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    NORM_FP32_1_AVX(m, dim, out, std::sqrt)\n    return;\n  }\n#endif\n  NORM_FP32_1_SSE(m, dim, out, std::sqrt)\n#endif\n}\n\n//! Compute the squared L2-norm of vectors (FP32, M=1)\nvoid SquaredNorm2Matrix<float, 1>::Compute(const ValueType *m, size_t dim,\n                                           float *out) {\n#if defined(__ARM_NEON)\n  NORM_FP32_1_NEON(m, dim, out, )\n#else\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    NORM_FP32_1_AVX512(m, dim, out, )\n    return;\n  }\n#endif\n#if defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    NORM_FP32_1_AVX(m, dim, out, )\n    return;\n  }\n#endif\n  NORM_FP32_1_SSE(m, dim, out, )\n#endif\n}\n#endif  // __SSE__ || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math/norm_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"norm1_matrix.h\"\n#include \"norm2_matrix.h\"\n"
  },
  {
    "path": "src/ailego/math/norm_matrix_fp16.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"matrix_define.i\"\n#include \"matrix_utility.i\"\n\n#if !defined(__FMA__)\n#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))\n#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))\n#endif  // !__FMA__\n\n//! Mask process of computing norm (FP16)\n#define NORM_FP16_MASK_AVX(m, cnt, _RES)                                       \\\n  switch (cnt) {                                                               \\\n    case 7: {                                                                  \\\n      __m256 ymm_m = _mm256_cvtph_ps(                                          \\\n          _mm_set_epi16(0, *((const short *)(m) + 6),                          \\\n                        *((const short *)(m) + 5), *((const short *)(m) + 4),  \\\n                        *((const short *)(m) + 3), *((const short *)(m) + 2),  \\\n                        *((const short *)(m) + 1), *((const short *)(m))));    \\\n      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \\\n      break;                                                                   \\\n    }                                                                          \\\n    case 6: {                                                                  \\\n      __m256 ymm_m = _mm256_cvtph_ps(_mm_set_epi32(0, *((const int *)(m) + 2), \\\n                                                   *((const int *)(m) + 1),    \\\n                                                   *((const int *)(m))));      \\\n      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \\\n      break;                                                                   \\\n    }                                                                          \\\n    case 5: {                                                                  \\\n      __m256 ymm_m = _mm256_cvtph_ps(                                          \\\n          _mm_set_epi16(0, 0, 0, *((const short *)(m) + 4),                    \\\n                        *((const short *)(m) + 3), *((const short *)(m) + 2),  \\\n                        *((const short *)(m) + 1), *((const short *)(m))));    \\\n      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \\\n      break;                                                                   \\\n    }                                                                          \\\n    case 4: {                                                                  \\\n      __m256 ymm_m = _mm256_cvtph_ps(                                          \\\n          _mm_set_epi64((__m64)(0ull), *((const __m64 *)(m))));                \\\n      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \\\n      break;                                                                   \\\n    }                                                                          \\\n    case 3: {                                                                  \\\n      __m256 ymm_m = _mm256_cvtph_ps(                                          \\\n          _mm_set_epi16(0, 0, 0, 0, 0, *((const short *)(m) + 2),              \\\n                        *((const short *)(m) + 1), *((const short *)(m))));    \\\n      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \\\n      break;                                                                   \\\n    }                                                                          \\\n    case 2: {                                                                  \\\n      __m256 ymm_m =                                                           \\\n          _mm256_cvtph_ps(_mm_set_epi32(0, 0, 0, *((const int *)(m))));        \\\n      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \\\n      break;                                                                   \\\n    }                                                                          \\\n    case 1: {                                                                  \\\n      __m256 ymm_m = _mm256_cvtph_ps(                                          \\\n          _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, *((const short *)(m))));          \\\n      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \\\n      break;                                                                   \\\n    }                                                                          \\\n  }\n\n//! Compute the norm of vectors (FP16, M=1)\n#define NORM_FP16_1_AVX(m, dim, out, _NORM)                                  \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())                \\\n  const Float16 *last = m + dim;                                             \\\n  const Float16 *last_aligned = m + ((dim >> 4) << 4);                       \\\n  if (((uintptr_t)m & 0x1f) == 0) {                                          \\\n    for (; m != last_aligned; m += 16) {                                     \\\n      __m256i ymm_mi = _mm256_load_si256((const __m256i *)m);                \\\n      __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n      __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                               \\\n      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                               \\\n    }                                                                        \\\n    if (last >= last_aligned + 8) {                                          \\\n      __m256 ymm_m = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)m));    \\\n      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                                 \\\n      m += 8;                                                                \\\n    }                                                                        \\\n  } else {                                                                   \\\n    for (; m != last_aligned; m += 16) {                                     \\\n      __m256i ymm_mi = _mm256_loadu_si256((const __m256i *)m);               \\\n      __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \\\n      __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \\\n      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                               \\\n      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                               \\\n    }                                                                        \\\n    if (last >= last_aligned + 8) {                                          \\\n      __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));   \\\n      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                                 \\\n      m += 8;                                                                \\\n    }                                                                        \\\n  }                                                                          \\\n  NORM_FP16_MASK_AVX(m, (last - m), ymm_sum)                                 \\\n  *out = _NORM(HorizontalAdd_FP32_V256(ymm_sum_0_0));\n\n//! Compute the norm of vectors (FP16, M=1)\n#define NORM_FP16_1_AVX512(m, dim, out, _NORM)                                \\\n  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())                 \\\n  const Float16 *last = m + dim;                                              \\\n  const Float16 *last_aligned = m + ((dim >> 5) << 5);                        \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                           \\\n    for (; m != last_aligned; m += 32) {                                      \\\n      __m512i zmm_mi = _mm512_load_si512((const __m512i *)m);                 \\\n      __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \\\n      __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \\\n      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                             \\\n      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                             \\\n    }                                                                         \\\n    if (last >= last_aligned + 16) {                                          \\\n      __m512 zmm_m = _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)m));  \\\n      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                               \\\n      m += 16;                                                                \\\n    }                                                                         \\\n  } else {                                                                    \\\n    for (; m != last_aligned; m += 32) {                                      \\\n      __m512i zmm_mi = _mm512_loadu_si512((const __m512i *)m);                \\\n      __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \\\n      __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \\\n      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                             \\\n      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                             \\\n    }                                                                         \\\n    if (last >= last_aligned + 16) {                                          \\\n      __m512 zmm_m = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)m)); \\\n      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                               \\\n      m += 16;                                                                \\\n    }                                                                         \\\n  }                                                                           \\\n  float result =                                                              \\\n      HorizontalAdd_FP32_V512(_mm512_add_ps(zmm_sum_0_0, zmm_sum_0_1));       \\\n  if (m != last) {                                                            \\\n    MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())               \\\n    if (last >= m + 8) {                                                      \\\n      __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));    \\\n      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                                  \\\n      m += 8;                                                                 \\\n    }                                                                         \\\n    NORM_FP16_MASK_AVX(m, (last - m), ymm_sum)                                \\\n    result += HorizontalAdd_FP32_V256(ymm_sum_0_0);                           \\\n  }                                                                           \\\n  *out = _NORM(result);\n\n#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\n//! Compute the norm of vectors (FP16, M=1)\n#define NORM_FP16_1_NEON(m, dim, out, _NORM)                                 \\\n  MATRIX_VAR_INIT(1, 1, float16x8_t, v_sum, vdupq_n_f16(0))                  \\\n  const Float16 *last = m + dim;                                             \\\n  const Float16 *last_aligned = m + ((dim >> 3) << 3);                       \\\n  for (; m != last_aligned; m += 8) {                                        \\\n    float16x8_t v_m = vld1q_f16((const float16_t *)m);                       \\\n    NORM_FP16_STEP_NEON(v_m, v_sum_0_0)                                      \\\n  }                                                                          \\\n  if (last >= m + 4) {                                                       \\\n    float16x8_t v_m = vreinterpretq_f16_u64(                                 \\\n        vld1q_lane_u64((const uint64_t *)m, vdupq_n_u64(0), 0));             \\\n    NORM_FP16_STEP_NEON(v_m, v_sum_0_0)                                      \\\n    m += 4;                                                                  \\\n  }                                                                          \\\n  float result = vaddvq_f32(vaddq_f32(vcvt_f32_f16(vget_low_f16(v_sum_0_0)), \\\n                                      vcvt_high_f32_f16(v_sum_0_0)));        \\\n  switch (last - m) {                                                        \\\n    case 3:                                                                  \\\n      NORM_FP16_STEP_GENERAL(m[2], result)                                   \\\n      /* FALLTHRU */                                                         \\\n    case 2:                                                                  \\\n      NORM_FP16_STEP_GENERAL(m[1], result)                                   \\\n      /* FALLTHRU */                                                         \\\n    case 1:                                                                  \\\n      NORM_FP16_STEP_GENERAL(m[0], result)                                   \\\n  }                                                                          \\\n  *out = _NORM(result);\n\n#else\n//! Compute the norm of vectors (FP16, M=1)\n#define NORM_FP16_1_NEON(m, dim, out, _NORM)                        \\\n  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))         \\\n  const Float16 *last = m + dim;                                    \\\n  const Float16 *last_aligned = m + ((dim >> 3) << 3);              \\\n  for (; m != last_aligned; m += 8) {                               \\\n    float16x8_t v_m = vld1q_f16((const float16_t *)m);              \\\n    float32x4_t v_n_0 = vcvt_f32_f16(vget_low_f16(v_m));            \\\n    float32x4_t v_n_1 = vcvt_high_f32_f16(v_m);                     \\\n    NORM_FP32_STEP_NEON(v_n_0, v_sum_0_0)                           \\\n    NORM_FP32_STEP_NEON(v_n_1, v_sum_0_1)                           \\\n  }                                                                 \\\n  if (last >= m + 4) {                                              \\\n    float32x4_t v_m = vcvt_f32_f16(vld1_f16((const float16_t *)m)); \\\n    NORM_FP32_STEP_NEON(v_m, v_sum_0_0)                             \\\n    m += 4;                                                         \\\n  }                                                                 \\\n  float result = vaddvq_f32(vaddq_f32(v_sum_0_0, v_sum_0_1));       \\\n  switch (last - m) {                                               \\\n    case 3:                                                         \\\n      NORM_FP16_STEP_GENERAL(m[2], result)                          \\\n      /* FALLTHRU */                                                \\\n    case 2:                                                         \\\n      NORM_FP16_STEP_GENERAL(m[1], result)                          \\\n      /* FALLTHRU */                                                \\\n    case 1:                                                         \\\n      NORM_FP16_STEP_GENERAL(m[0], result)                          \\\n  }                                                                 \\\n  *out = _NORM(result);\n\n#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC"
  },
  {
    "path": "src/ailego/math/norm_matrix_fp32.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"matrix_define.i\"\n#include \"matrix_utility.i\"\n\n#if !defined(__FMA__)\n#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))\n#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))\n#endif  // !__FMA__\n\n//! Mask process of computing norm (FP32)\n#define NORM_FP32_MASK_SSE(m, cnt, _RES)                 \\\n  switch (cnt) {                                         \\\n    case 3: {                                            \\\n      __m128 xmm_m = _mm_set_ps(0.0f, m[2], m[1], m[0]); \\\n      NORM_FP32_STEP_SSE(xmm_m, _RES##_0_0)              \\\n      break;                                             \\\n    }                                                    \\\n    case 2: {                                            \\\n      __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, m[1], m[0]); \\\n      NORM_FP32_STEP_SSE(xmm_m, _RES##_0_0)              \\\n      break;                                             \\\n    }                                                    \\\n    case 1: {                                            \\\n      __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, 0.0f, m[0]); \\\n      NORM_FP32_STEP_SSE(xmm_m, _RES##_0_0)              \\\n      break;                                             \\\n    }                                                    \\\n  }\n\n//! Compute the norm of vectors (FP32, M=1)\n#define NORM_FP32_1_SSE(m, dim, out, _NORM)                \\\n  MATRIX_VAR_INIT(1, 1, __m128, xmm_sum, _mm_setzero_ps()) \\\n  const float *last = m + dim;                             \\\n  const float *last_aligned = m + ((dim >> 3) << 3);       \\\n  if (((uintptr_t)m & 0xf) == 0) {                         \\\n    for (; m != last_aligned; m += 8) {                    \\\n      __m128 xmm_m_0 = _mm_load_ps(m + 0);                 \\\n      __m128 xmm_m_1 = _mm_load_ps(m + 4);                 \\\n      NORM_FP32_STEP_SSE(xmm_m_0, xmm_sum_0_0)             \\\n      NORM_FP32_STEP_SSE(xmm_m_1, xmm_sum_0_0)             \\\n    }                                                      \\\n    if (last >= last_aligned + 4) {                        \\\n      __m128 xmm_m = _mm_load_ps(m);                       \\\n      NORM_FP32_STEP_SSE(xmm_m, xmm_sum_0_0)               \\\n      m += 4;                                              \\\n    }                                                      \\\n  } else {                                                 \\\n    for (; m != last_aligned; m += 8) {                    \\\n      __m128 xmm_m_0 = _mm_loadu_ps(m + 0);                \\\n      __m128 xmm_m_1 = _mm_loadu_ps(m + 4);                \\\n      NORM_FP32_STEP_SSE(xmm_m_0, xmm_sum_0_0)             \\\n      NORM_FP32_STEP_SSE(xmm_m_1, xmm_sum_0_0)             \\\n    }                                                      \\\n    if (last >= last_aligned + 4) {                        \\\n      __m128 xmm_m = _mm_loadu_ps(m);                      \\\n      NORM_FP32_STEP_SSE(xmm_m, xmm_sum_0_0)               \\\n      m += 4;                                              \\\n    }                                                      \\\n  }                                                        \\\n  NORM_FP32_MASK_SSE(m, (last - m), xmm_sum)               \\\n  *out = _NORM(HorizontalAdd_FP32_V128(xmm_sum_0_0));\n\n//! Compute the norm of vectors (FP32, M=1)\n#define NORM_FP32_1_AVX(m, dim, out, _NORM)                   \\\n  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps()) \\\n  const float *last = m + dim;                                \\\n  const float *last_aligned = m + ((dim >> 4) << 4);          \\\n  if (((uintptr_t)m & 0x1f) == 0) {                           \\\n    for (; m != last_aligned; m += 16) {                      \\\n      __m256 ymm_m_0 = _mm256_load_ps(m + 0);                 \\\n      __m256 ymm_m_1 = _mm256_load_ps(m + 8);                 \\\n      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                \\\n      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                \\\n    }                                                         \\\n    if (last >= last_aligned + 8) {                           \\\n      __m256 ymm_m = _mm256_load_ps(m);                       \\\n      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                  \\\n      m += 8;                                                 \\\n    }                                                         \\\n  } else {                                                    \\\n    for (; m != last_aligned; m += 16) {                      \\\n      __m256 ymm_m_0 = _mm256_loadu_ps(m + 0);                \\\n      __m256 ymm_m_1 = _mm256_loadu_ps(m + 8);                \\\n      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                \\\n      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                \\\n    }                                                         \\\n    if (last >= last_aligned + 8) {                           \\\n      __m256 ymm_m = _mm256_loadu_ps(m);                      \\\n      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                  \\\n      m += 8;                                                 \\\n    }                                                         \\\n  }                                                           \\\n  float result = HorizontalAdd_FP32_V256(ymm_sum_0_0);        \\\n  if (m != last) {                                            \\\n    __m128 xmm_sum_0_0 = _mm_setzero_ps();                    \\\n    if (last >= m + 4) {                                      \\\n      __m128 xmm_m = _mm_loadu_ps(m);                         \\\n      NORM_FP32_STEP_SSE(xmm_m, xmm_sum_0_0)                  \\\n      m += 4;                                                 \\\n    }                                                         \\\n    NORM_FP32_MASK_SSE(m, (last - m), xmm_sum)                \\\n    result += HorizontalAdd_FP32_V128(xmm_sum_0_0);           \\\n  }                                                           \\\n  *out = _NORM(result);\n\n//! Compute the norm of vectors (FP32, M=1)\n#define NORM_FP32_1_AVX512(m, dim, out, _NORM)                          \\\n  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())           \\\n  const float *last = m + dim;                                          \\\n  const float *last_aligned = m + ((dim >> 5) << 5);                    \\\n  if (((uintptr_t)m & 0x3f) == 0) {                                     \\\n    for (; m != last_aligned; m += 32) {                                \\\n      __m512 zmm_m_0 = _mm512_load_ps(m + 0);                           \\\n      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                       \\\n      __m512 zmm_m_1 = _mm512_load_ps(m + 16);                          \\\n      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                       \\\n    }                                                                   \\\n    if (last >= last_aligned + 16) {                                    \\\n      __m512 zmm_m = _mm512_load_ps(m);                                 \\\n      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                         \\\n      m += 16;                                                          \\\n    }                                                                   \\\n  } else {                                                              \\\n    for (; m != last_aligned; m += 32) {                                \\\n      __m512 zmm_m_0 = _mm512_loadu_ps(m + 0);                          \\\n      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                       \\\n      __m512 zmm_m_1 = _mm512_loadu_ps(m + 16);                         \\\n      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                       \\\n    }                                                                   \\\n    if (last >= last_aligned + 16) {                                    \\\n      __m512 zmm_m = _mm512_loadu_ps(m);                                \\\n      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                         \\\n      m += 16;                                                          \\\n    }                                                                   \\\n  }                                                                     \\\n  if (m != last) {                                                      \\\n    __mmask16 mask = (__mmask16)((1 << (last - m)) - 1);                \\\n    __m512 zmm_m = _mm512_mask_loadu_ps(_mm512_setzero_ps(), mask, m);  \\\n    NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                           \\\n  }                                                                     \\\n  float result =                                                        \\\n      HorizontalAdd_FP32_V512(_mm512_add_ps(zmm_sum_0_0, zmm_sum_0_1)); \\\n  *out = _NORM(result);\n\n//! Compute the norm of vectors (FP32, M=1)\n#define NORM_FP32_1_NEON(m, dim, out, _NORM)                  \\\n  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))   \\\n  const float *last = m + dim;                                \\\n  const float *last_aligned = m + ((dim >> 3) << 3);          \\\n  for (; m != last_aligned; m += 8) {                         \\\n    float32x4_t v_m_0 = vld1q_f32(m + 0);                     \\\n    float32x4_t v_m_1 = vld1q_f32(m + 4);                     \\\n    NORM_FP32_STEP_NEON(v_m_0, v_sum_0_0)                     \\\n    NORM_FP32_STEP_NEON(v_m_1, v_sum_0_1)                     \\\n  }                                                           \\\n  if (last >= last_aligned + 4) {                             \\\n    float32x4_t v_m = vld1q_f32(m);                           \\\n    NORM_FP32_STEP_NEON(v_m, v_sum_0_0)                       \\\n    m += 4;                                                   \\\n  }                                                           \\\n  float result = vaddvq_f32(vaddq_f32(v_sum_0_0, v_sum_0_1)); \\\n  switch (last - m) {                                         \\\n    case 3:                                                   \\\n      NORM_FP32_STEP_GENERAL(m[2], result)                    \\\n      /* FALLTHRU */                                          \\\n    case 2:                                                   \\\n      NORM_FP32_STEP_GENERAL(m[1], result)                    \\\n      /* FALLTHRU */                                          \\\n    case 1:                                                   \\\n      NORM_FP32_STEP_GENERAL(m[0], result)                    \\\n  }                                                           \\\n  *out = _NORM(result);\n"
  },
  {
    "path": "src/ailego/math/normalizer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"normalizer.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n#if (defined(__ARM_NEON) && defined(__aarch64__))\nstatic inline void NormalizeNEON(float *arr, size_t dim, float norm) {\n  float *last = arr + dim;\n  float *last_aligned = arr + ((dim >> 3) << 3);\n\n  float32x4_t v_norm = vdupq_n_f32(norm);\n  for (; arr != last_aligned; arr += 8) {\n    vst1q_f32(arr + 0, vdivq_f32(vld1q_f32(arr + 0), v_norm));\n    vst1q_f32(arr + 4, vdivq_f32(vld1q_f32(arr + 4), v_norm));\n  }\n  if (last >= last_aligned + 4) {\n    vst1q_f32(arr, vdivq_f32(vld1q_f32(arr), v_norm));\n    arr += 4;\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] /= norm;\n      /* FALLTHRU */\n    case 2:\n      arr[1] /= norm;\n      /* FALLTHRU */\n    case 1:\n      arr[0] /= norm;\n  }\n}\n\n#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)\nstatic inline void NormalizeNEON(float16_t *arr, size_t dim, float norm) {\n  float16_t *last = arr + dim;\n  float16_t *last_aligned = arr + ((dim >> 4) << 4);\n\n  float16x8_t v_norm = vdupq_n_f16((float16_t)norm);\n  for (; arr != last_aligned; arr += 16) {\n    vst1q_f16(arr + 0, vdivq_f16(vld1q_f16(arr + 0), v_norm));\n    vst1q_f16(arr + 8, vdivq_f16(vld1q_f16(arr + 8), v_norm));\n  }\n  if (last >= arr + 8) {\n    vst1q_f16(arr, vdivq_f16(vld1q_f16(arr), v_norm));\n    arr += 8;\n  }\n  if (last >= arr + 4) {\n    vst1_f16(arr, vdiv_f16(vld1_f16(arr), vget_low_f16(v_norm)));\n    arr += 4;\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] /= norm;\n      /* FALLTHRU */\n    case 2:\n      arr[1] /= norm;\n      /* FALLTHRU */\n    case 1:\n      arr[0] /= norm;\n  }\n}\n#else\nstatic inline void NormalizeNEON(float16_t *arr, size_t dim, float norm) {\n  float16_t *last = arr + dim;\n  float16_t *last_aligned = arr + ((dim >> 4) << 4);\n\n  float32x4_t v_norm = vdupq_n_f32(norm);\n  for (; arr != last_aligned; arr += 16) {\n    float16x8_t vf16_0 = vld1q_f16(arr + 0);\n    float16x8_t vf16_1 = vld1q_f16(arr + 8);\n    vf16_0 = vcombine_f16(\n        vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vget_low_f16(vf16_0)), v_norm)),\n        vcvt_f16_f32(vdivq_f32(vcvt_high_f32_f16(vf16_0), v_norm)));\n    vf16_1 = vcombine_f16(\n        vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vget_low_f16(vf16_1)), v_norm)),\n        vcvt_f16_f32(vdivq_f32(vcvt_high_f32_f16(vf16_1), v_norm)));\n    vst1q_f16(arr + 0, vf16_0);\n    vst1q_f16(arr + 8, vf16_1);\n  }\n  if (last >= arr + 8) {\n    float16x8_t vf16 = vld1q_f16(arr);\n    vf16 = vcombine_f16(\n        vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vget_low_f16(vf16)), v_norm)),\n        vcvt_f16_f32(vdivq_f32(vcvt_high_f32_f16(vf16), v_norm)));\n    vst1q_f16(arr, vf16);\n    arr += 8;\n  }\n  if (last >= arr + 4) {\n    vst1_f16(arr, vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vld1_f16(arr)), v_norm)));\n    arr += 4;\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] /= norm;\n      /* FALLTHRU */\n    case 2:\n      arr[1] /= norm;\n      /* FALLTHRU */\n    case 1:\n      arr[0] /= norm;\n  }\n}\n#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC\n#endif  // __ARM_NEON && __aarch64__\n\n#if defined(__AVX__)\n#if defined(__AVX512F__)\nstatic inline void NormalizeAVX512(float *arr, size_t dim, float norm) {\n  float *last = arr + dim;\n  float *last_aligned = arr + ((dim >> 4) << 4);\n\n  __m512 zmm_norm = _mm512_set1_ps(norm);\n  if (((uintptr_t)arr & 0x3f) == 0) {\n    for (; arr != last_aligned; arr += 16) {\n      _mm512_store_ps(arr, _mm512_div_ps(_mm512_load_ps(arr), zmm_norm));\n    }\n    if (last >= arr + 8) {\n      __m256 ymm_norm = _mm256_set1_ps(norm);\n      _mm256_store_ps(arr, _mm256_div_ps(_mm256_load_ps(arr), ymm_norm));\n      arr += 8;\n    }\n    if (last >= arr + 4) {\n      __m128 xmm_norm = _mm_set1_ps(norm);\n      _mm_store_ps(arr, _mm_div_ps(_mm_load_ps(arr), xmm_norm));\n      arr += 4;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16) {\n      _mm512_storeu_ps(arr, _mm512_div_ps(_mm512_loadu_ps(arr), zmm_norm));\n    }\n    if (last >= arr + 8) {\n      __m256 ymm_norm = _mm256_set1_ps(norm);\n      _mm256_storeu_ps(arr, _mm256_div_ps(_mm256_loadu_ps(arr), ymm_norm));\n      arr += 8;\n    }\n    if (last >= arr + 4) {\n      __m128 xmm_norm = _mm_set1_ps(norm);\n      _mm_storeu_ps(arr, _mm_div_ps(_mm_loadu_ps(arr), xmm_norm));\n      arr += 4;\n    }\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] /= norm;\n      /* FALLTHRU */\n    case 2:\n      arr[1] /= norm;\n      /* FALLTHRU */\n    case 1:\n      arr[0] /= norm;\n  }\n}\n#endif  // __AVX512F__\n\nstatic inline void NormalizeAVX(float *arr, size_t dim, float norm) {\n  float *last = arr + dim;\n  float *last_aligned = arr + ((dim >> 4) << 4);\n\n  __m256 ymm_norm = _mm256_set1_ps(norm);\n  if (((uintptr_t)arr & 0x1f) == 0) {\n    for (; arr != last_aligned; arr += 16) {\n      _mm256_store_ps(arr + 0,\n                      _mm256_div_ps(_mm256_load_ps(arr + 0), ymm_norm));\n      _mm256_store_ps(arr + 8,\n                      _mm256_div_ps(_mm256_load_ps(arr + 8), ymm_norm));\n    }\n    if (last >= arr + 8) {\n      _mm256_store_ps(arr, _mm256_div_ps(_mm256_load_ps(arr), ymm_norm));\n      arr += 8;\n    }\n    if (last >= arr + 4) {\n      __m128 xmm_norm = _mm_set1_ps(norm);\n      _mm_store_ps(arr, _mm_div_ps(_mm_load_ps(arr), xmm_norm));\n      arr += 4;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16) {\n      _mm256_storeu_ps(arr + 0,\n                       _mm256_div_ps(_mm256_loadu_ps(arr + 0), ymm_norm));\n      _mm256_storeu_ps(arr + 8,\n                       _mm256_div_ps(_mm256_loadu_ps(arr + 8), ymm_norm));\n    }\n    if (last >= arr + 8) {\n      _mm256_storeu_ps(arr, _mm256_div_ps(_mm256_loadu_ps(arr), ymm_norm));\n      arr += 8;\n    }\n    if (last >= arr + 4) {\n      __m128 xmm_norm = _mm_set1_ps(norm);\n      _mm_storeu_ps(arr, _mm_div_ps(_mm_loadu_ps(arr), xmm_norm));\n      arr += 4;\n    }\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] /= norm;\n      /* FALLTHRU */\n    case 2:\n      arr[1] /= norm;\n      /* FALLTHRU */\n    case 1:\n      arr[0] /= norm;\n  }\n}\n#endif  // __AVX__\n\n#if defined(__AVX__) && defined(__F16C__)\n#if defined(__AVX512F__)\nstatic inline void NormalizeAVX512(uint16_t *arr, size_t dim, float norm) {\n  uint16_t *last = arr + dim;\n  uint16_t *last_aligned = arr + ((dim >> 4) << 4);\n\n  __m512 zmm_norm = _mm512_set1_ps(norm);\n  if (((uintptr_t)arr & 0x1f) == 0) {\n    for (; arr != last_aligned; arr += 16) {\n      _mm256_store_si256(\n          (__m256i *)arr,\n          _mm512_cvtps_ph(_mm512_div_ps(_mm512_cvtph_ps(_mm256_load_si256(\n                                            (const __m256i *)arr)),\n                                        zmm_norm),\n                          _MM_FROUND_NO_EXC));\n    }\n    if (last >= arr + 8) {\n      __m256 ymm_norm = _mm256_set1_ps(norm);\n      _mm_store_si128(\n          (__m128i *)arr,\n          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128(\n                                            (const __m128i *)arr)),\n                                        ymm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16) {\n      _mm256_storeu_si256(\n          (__m256i *)arr,\n          _mm512_cvtps_ph(_mm512_div_ps(_mm512_cvtph_ps(_mm256_loadu_si256(\n                                            (const __m256i *)arr)),\n                                        zmm_norm),\n                          _MM_FROUND_NO_EXC));\n    }\n    if (last >= arr + 8) {\n      __m256 ymm_norm = _mm256_set1_ps(norm);\n      _mm_storeu_si128(\n          (__m128i *)arr,\n          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128(\n                                            (const __m128i *)arr)),\n                                        ymm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 8;\n    }\n  }\n  if (last >= arr + 4) {\n    __m128 xmm_norm = _mm_set1_ps(norm);\n    _mm_storel_epi64(\n        (__m128i *)arr,\n        _mm_cvtps_ph(\n            _mm_div_ps(_mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)arr)),\n                       xmm_norm),\n            _MM_FROUND_NO_EXC));\n    arr += 8;\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] = _cvtss_sh(_cvtsh_ss(arr[2]) / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 2:\n      arr[1] = _cvtss_sh(_cvtsh_ss(arr[1]) / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 1:\n      arr[0] = _cvtss_sh(_cvtsh_ss(arr[0]) / norm, _MM_FROUND_NO_EXC);\n  }\n}\n#endif  // __AVX512F__\n\nstatic inline void NormalizeAVX(uint16_t *arr, size_t dim, float norm) {\n  uint16_t *last = arr + dim;\n  uint16_t *last_aligned = arr + ((dim >> 4) << 4);\n\n  __m256 ymm_norm = _mm256_set1_ps(norm);\n  if (((uintptr_t)arr & 0xf) == 0) {\n    for (; arr != last_aligned; arr += 16) {\n      __m128i xmm_0 = _mm_load_si128((const __m128i *)(arr + 0));\n      __m128i xmm_1 = _mm_load_si128((const __m128i *)(arr + 8));\n      __m256 ymm_0 = _mm256_div_ps(_mm256_cvtph_ps(xmm_0), ymm_norm);\n      __m256 ymm_1 = _mm256_div_ps(_mm256_cvtph_ps(xmm_1), ymm_norm);\n      _mm_store_si128((__m128i *)(arr + 0),\n                      _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));\n      _mm_store_si128((__m128i *)(arr + 8),\n                      _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));\n    }\n    if (last >= arr + 8) {\n      _mm_store_si128(\n          (__m128i *)arr,\n          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128(\n                                            (const __m128i *)arr)),\n                                        ymm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16) {\n      __m128i xmm_0 = _mm_loadu_si128((const __m128i *)(arr + 0));\n      __m128i xmm_1 = _mm_loadu_si128((const __m128i *)(arr + 8));\n      __m256 ymm_0 = _mm256_div_ps(_mm256_cvtph_ps(xmm_0), ymm_norm);\n      __m256 ymm_1 = _mm256_div_ps(_mm256_cvtph_ps(xmm_1), ymm_norm);\n      _mm_storeu_si128((__m128i *)(arr + 0),\n                       _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));\n      _mm_storeu_si128((__m128i *)(arr + 8),\n                       _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));\n    }\n    if (last >= arr + 8) {\n      _mm_storeu_si128(\n          (__m128i *)arr,\n          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128(\n                                            (const __m128i *)arr)),\n                                        ymm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 8;\n    }\n  }\n  if (last >= arr + 4) {\n    __m128 xmm_norm = _mm_set1_ps(norm);\n    _mm_storel_epi64(\n        (__m128i *)arr,\n        _mm_cvtps_ph(\n            _mm_div_ps(_mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)arr)),\n                       xmm_norm),\n            _MM_FROUND_NO_EXC));\n    arr += 8;\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] = _cvtss_sh(_cvtsh_ss(arr[2]) / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 2:\n      arr[1] = _cvtss_sh(_cvtsh_ss(arr[1]) / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 1:\n      arr[0] = _cvtss_sh(_cvtsh_ss(arr[0]) / norm, _MM_FROUND_NO_EXC);\n  }\n}\n#endif  // __AVX__ && __F16C__\n\n#if defined(__SSE__)\nstatic inline void NormalizeSSE(float *arr, size_t dim, float norm) {\n  float *last = arr + dim;\n  float *last_aligned = arr + ((dim >> 3) << 3);\n\n  __m128 xmm_norm = _mm_set1_ps(norm);\n  if (((uintptr_t)arr & 0xf) == 0) {\n    for (; arr != last_aligned; arr += 8) {\n      _mm_store_ps(arr + 0, _mm_div_ps(_mm_load_ps(arr + 0), xmm_norm));\n      _mm_store_ps(arr + 4, _mm_div_ps(_mm_load_ps(arr + 4), xmm_norm));\n    }\n    if (last >= last_aligned + 4) {\n      _mm_store_ps(arr, _mm_div_ps(_mm_load_ps(arr), xmm_norm));\n      arr += 4;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 8) {\n      _mm_storeu_ps(arr + 0, _mm_div_ps(_mm_loadu_ps(arr + 0), xmm_norm));\n      _mm_storeu_ps(arr + 4, _mm_div_ps(_mm_loadu_ps(arr + 4), xmm_norm));\n    }\n    if (last >= last_aligned + 4) {\n      _mm_storeu_ps(arr, _mm_div_ps(_mm_loadu_ps(arr), xmm_norm));\n      arr += 4;\n    }\n  }\n  switch (last - arr) {\n    case 3:\n      arr[2] /= norm;\n      /* FALLTHRU */\n    case 2:\n      arr[1] /= norm;\n      /* FALLTHRU */\n    case 1:\n      arr[0] /= norm;\n  }\n}\n#endif  // __SSE__\n\n#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))\n//! Compute the norm of vector\nvoid Normalizer<float>::Compute(ValueType *arr, size_t dim, float norm) {\n#if defined(__ARM_NEON)\n  NormalizeNEON(arr, dim, norm);\n#else\n#if defined(__AVX512F__)\n  if (dim > 15) {\n    NormalizeAVX512(arr, dim, norm);\n    return;\n  }\n#endif  // __AVX512F__\n#if defined(__AVX__)\n  if (dim > 7) {\n    NormalizeAVX(arr, dim, norm);\n    return;\n  }\n#endif  // __AVX__\n  NormalizeSSE(arr, dim, norm);\n#endif  // __ARM_NEON\n}\n#endif  // __SSE__ || (__ARM_NEON && __aarch64__)\n\n#if (defined(__F16C__) && defined(__AVX__)) || \\\n    (defined(__ARM_NEON) && defined(__aarch64__))\n//! Compute the norm of vector\nvoid Normalizer<Float16>::Compute(ValueType *arr, size_t dim, float norm) {\n#if defined(__ARM_NEON)\n  NormalizeNEON(reinterpret_cast<float16_t *>(arr), dim, norm);\n#else\n#if defined(__AVX512F__)\n  if (dim > 31) {\n    NormalizeAVX512(reinterpret_cast<uint16_t *>(arr), dim, norm);\n    return;\n  }\n#endif  // __AVX512F__\n  NormalizeAVX(reinterpret_cast<uint16_t *>(arr), dim, norm);\n#endif  // __ARM_NEON\n}\n#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/math/normalizer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"norm_matrix.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Normalizer\n */\ntemplate <typename T,\n          typename = typename std::enable_if<IsFloatingPoint<T>::value>::type>\nstruct Normalizer {\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! Compute the norm of vector\n  static inline void Compute(ValueType *arr, size_t dim, float norm) {\n    for (size_t i = 0; i < dim; ++i) {\n      arr[i] /= norm;\n    }\n  }\n\n  //! Normalize a vector (L1)\n  static inline void L1(ValueType *arr, size_t dim, float *norm) {\n    Norm1Matrix<ValueType, 1>::Compute(arr, dim, norm);\n    if (*norm > 0.0f) {\n      Compute(arr, dim, *norm);\n    }\n  }\n\n  //! Normalize a vector (L2)\n  static inline void L2(ValueType *arr, size_t dim, float *norm) {\n    Norm2Matrix<ValueType, 1>::Compute(arr, dim, norm);\n    if (*norm > 0.0f) {\n      Compute(arr, dim, *norm);\n    }\n  }\n};\n\n#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))\n/*! Normalizer (FP32)\n */\ntemplate <>\nstruct Normalizer<float> {\n  //! Type of value\n  using ValueType = float;\n\n  //! Compute the norm of vector\n  static void Compute(ValueType *arr, size_t dim, float norm);\n\n  //! Normalize a vector (L1)\n  static inline void L1(ValueType *arr, size_t dim, float *norm) {\n    Norm1Matrix<ValueType, 1>::Compute(arr, dim, norm);\n    if (*norm > 0.0f) {\n      Compute(arr, dim, *norm);\n    }\n  }\n\n  //! Normalize a vector (L2)\n  static inline void L2(ValueType *arr, size_t dim, float *norm) {\n    Norm2Matrix<ValueType, 1>::Compute(arr, dim, norm);\n    if (*norm > 0.0f) {\n      Compute(arr, dim, *norm);\n    }\n  }\n};\n#endif  // __SSE__ || (__ARM_NEON && __aarch64__)\n\n#if (defined(__F16C__) && defined(__AVX__)) || \\\n    (defined(__ARM_NEON) && defined(__aarch64__))\n/*! Normalizer (FP16)\n */\ntemplate <>\nstruct Normalizer<Float16> {\n  //! Type of value\n  using ValueType = Float16;\n\n  //! Compute the norm of vector\n  static void Compute(ValueType *arr, size_t dim, float norm);\n\n  //! Normalize a vector (L1)\n  static inline void L1(ValueType *arr, size_t dim, float *norm) {\n    Norm1Matrix<ValueType, 1>::Compute(arr, dim, norm);\n    if (*norm > 0.0f) {\n      Compute(arr, dim, *norm);\n    }\n  }\n\n  //! Normalize a vector (L2)\n  static inline void L2(ValueType *arr, size_t dim, float *norm) {\n    Norm2Matrix<ValueType, 1>::Compute(arr, dim, norm);\n    if (*norm > 0.0f) {\n      Compute(arr, dim, *norm);\n    }\n  }\n};\n#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/math_batch/cosine_distance_batch.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <vector>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"inner_product_distance_batch.h\"\n\nnamespace zvec::ailego::DistanceBatch {\n\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep, typename = void>\nstruct CosineDistanceBatch;\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep, typename>\nstruct CosineDistanceBatch {\n  using ValueType = typename std::remove_cv<T>::type;\n\n  static inline void ComputeBatch(const ValueType **vecs,\n                                  const ValueType *query, size_t num_vecs,\n                                  size_t dim, float *results) {\n    constexpr size_t extra_dim = sizeof(float) / sizeof(ValueType);\n    size_t _dim = dim - extra_dim;\n\n    InnerProductDistanceBatch<ValueType, BatchSize, PrefetchStep>::ComputeBatch(\n        vecs, query, num_vecs, _dim, results);\n\n    for (size_t i = 0; i < num_vecs; ++i) {\n      results[i] = 1 - results[i];\n    }\n  }\n\n  using IPImplType =\n      InnerProductDistanceBatch<ValueType, BatchSize, PrefetchStep>;\n\n  static void QueryPreprocess(void *query, size_t dim) {\n    return IPImplType::QueryPreprocess(query,\n                                       dim - sizeof(float) / sizeof(ValueType));\n  }\n};\n\n\n}  // namespace zvec::ailego::DistanceBatch"
  },
  {
    "path": "src/ailego/math_batch/distance_batch.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/math_batch/utils.h>\n#include \"ailego/math/distance_matrix.h\"\n#include \"cosine_distance_batch.h\"\n#include \"inner_product_distance_batch.h\"\n\nnamespace zvec::ailego {\n\ntemplate <\n    template <typename, size_t, size_t, typename = void> class DistanceType,\n    typename ValueType, size_t BatchSize, size_t PrefetchStep, typename = void>\nstruct BaseDistance {\n  static inline void _ComputeBatch(const ValueType **m, const ValueType *q,\n                                   size_t num, size_t dim, float *out) {\n    for (size_t i = 0; i < num; ++i) {\n      DistanceType<ValueType, 1, 1>::Compute(m[i], q, dim, out + i);\n    }\n  }\n\n  // If Distance has ComputeBatch, use it; otherwise fall back to _ComputeBatch.\n  static inline void ComputeBatch(const ValueType **m, const ValueType *q,\n                                  size_t num, size_t dim, float *out) {\n    if constexpr (std::is_same_v<DistanceType<ValueType, 1, 1>,\n                                 CosineDistanceMatrix<ValueType, 1, 1>>) {\n      return DistanceBatch::CosineDistanceBatch<\n          ValueType, BatchSize, PrefetchStep>::ComputeBatch(m, q, num, dim,\n                                                            out);\n    }\n\n    _ComputeBatch(m, q, num, dim, out);\n  }\n};\n\n}  // namespace zvec::ailego"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/math_batch/utils.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec::ailego::DistanceBatch {\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep, typename = void>\nstruct InnerProductDistanceBatch;\n\ntemplate <typename ValueType, size_t BatchSize>\nstatic void compute_one_to_many_inner_product_fallback(\n    const ValueType *query, const ValueType **ptrs,\n    std::array<const ValueType *, BatchSize> &prefetch_ptrs, size_t dim,\n    float *sums) {\n  for (size_t j = 0; j < BatchSize; ++j) {\n    sums[j] = 0.0;\n    InnerProductMatrix<ValueType, 1, 1>::Compute(ptrs[j], query, dim, sums + j);\n    ailego_prefetch(&prefetch_ptrs[j]);\n  }\n}\n\n// Function template partial specialization is not allowed,\n// therefore the wrapper struct is required.\ntemplate <typename T, size_t BatchSize>\nstruct InnerProductDistanceBatchImpl {\n  using ValueType = typename std::remove_cv<T>::type;\n  static void compute_one_to_many(\n      const ValueType *query, const ValueType **ptrs,\n      std::array<const ValueType *, BatchSize> &prefetch_ptrs, size_t dim,\n      float *sums) {\n    return compute_one_to_many_inner_product_fallback(query, ptrs,\n                                                      prefetch_ptrs, dim, sums);\n  }\n  static DistanceBatchQueryPreprocessFunc GetQueryPreprocessFunc() {\n    return nullptr;\n  }\n};\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep, typename>\nstruct InnerProductDistanceBatch {\n  using ValueType = typename std::remove_cv<T>::type;\n\n  static inline void ComputeBatch(const ValueType **vecs,\n                                  const ValueType *query, size_t num_vecs,\n                                  size_t dim, float *results) {\n    size_t i = 0;\n    for (; i + BatchSize <= num_vecs; i += BatchSize) {\n      std::array<const ValueType *, BatchSize> prefetch_ptrs;\n      for (size_t j = 0; j < BatchSize; ++j) {\n        if (i + j + BatchSize * PrefetchStep < num_vecs) {\n          prefetch_ptrs[j] = vecs[i + j + BatchSize * PrefetchStep];\n        } else {\n          prefetch_ptrs[j] = nullptr;\n        }\n      }\n      InnerProductDistanceBatchImpl<ValueType, BatchSize>::compute_one_to_many(\n          query, &vecs[i], prefetch_ptrs, dim, &results[i]);\n    }\n    for (; i < num_vecs; ++i) {  // TODO: unroll by 1, 2, 4, 8, etc.\n      std::array<const ValueType *, 1> prefetch_ptrs{nullptr};\n      InnerProductDistanceBatchImpl<ValueType, 1>::compute_one_to_many(\n          query, &vecs[i], prefetch_ptrs, dim, &results[i]);\n    }\n  }\n\n  static DistanceBatchQueryPreprocessFunc GetQueryPreprocessFunc() {\n    return InnerProductDistanceBatchImpl<ValueType,\n                                         1>::GetQueryPreprocessFunc();\n  }\n};\n\ntemplate <>\nstruct InnerProductDistanceBatchImpl<ailego::Float16, 1> {\n  using ValueType = ailego::Float16;\n  static void compute_one_to_many(\n      const ailego::Float16 *query, const ailego::Float16 **ptrs,\n      std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,\n      float *sums);\n};\n\ntemplate <>\nstruct InnerProductDistanceBatchImpl<float, 1> {\n  using ValueType = float;\n  static void compute_one_to_many(const float *query, const float **ptrs,\n                                  std::array<const float *, 1> &prefetch_ptrs,\n                                  size_t dim, float *sums);\n};\n\ntemplate <>\nstruct InnerProductDistanceBatchImpl<int8_t, 1> {\n  using ValueType = int8_t;\n  static void compute_one_to_many(const int8_t *query, const int8_t **ptrs,\n                                  std::array<const int8_t *, 1> &prefetch_ptrs,\n                                  size_t dim, float *sums);\n\n  static DistanceBatchQueryPreprocessFunc GetQueryPreprocessFunc();\n};\n\ntemplate <>\nstruct InnerProductDistanceBatchImpl<ailego::Float16, 12> {\n  using ValueType = ailego::Float16;\n  static void compute_one_to_many(\n      const ailego::Float16 *query, const ailego::Float16 **ptrs,\n      std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,\n      float *sums);\n};\n\ntemplate <>\nstruct InnerProductDistanceBatchImpl<float, 12> {\n  using ValueType = float;\n  static void compute_one_to_many(const float *query, const float **ptrs,\n                                  std::array<const float *, 12> &prefetch_ptrs,\n                                  size_t dim, float *sums);\n};\n\ntemplate <>\nstruct InnerProductDistanceBatchImpl<int8_t, 12> {\n  using ValueType = int8_t;\n  static void compute_one_to_many(const int8_t *query, const int8_t **ptrs,\n                                  std::array<const int8_t *, 12> &prefetch_ptrs,\n                                  size_t dim, float *sums);\n};\n\n}  // namespace zvec::ailego::DistanceBatch\n"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch_dispatch.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/math_batch/utils.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include \"inner_product_distance_batch.h\"\n\nnamespace zvec::ailego::DistanceBatch {\n\n#if defined(__AVX512VNNI__)\nvoid compute_one_to_many_inner_product_avx512_vnni_int8_query_preprocess(\n    void *query, size_t dim);\n\nvoid compute_one_to_many_inner_product_avx512_vnni_int8_1(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dimensionality,\n    float *results);\n\nvoid compute_one_to_many_inner_product_avx512_vnni_int8_12(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dimensionality,\n    float *results);\n#endif\n\n#if defined(__AVX512FP16__)\nvoid compute_one_to_many_inner_product_avx512fp16_fp16_1(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 1> &prefetch_ptrs,\n    size_t dimensionality, float *results);\n\nvoid compute_one_to_many_inner_product_avx512fp16_fp16_12(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 12> &prefetch_ptrs,\n    size_t dimensionality, float *results);\n#endif  //__AVX512FP16__\n\n#if defined(__AVX512F__)\nvoid compute_one_to_many_inner_product_avx512f_fp16_1(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 1> &prefetch_ptrs,\n    size_t dimensionality, float *results);\n\nvoid compute_one_to_many_inner_product_avx512f_fp16_12(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 12> &prefetch_ptrs,\n    size_t dimensionality, float *results);\n#endif  //__AVX512F__\n\n#if defined(__AVX2__)\nvoid compute_one_to_many_inner_product_avx2_fp32_1(\n    const float *query, const float **ptrs,\n    std::array<const float *, 1> &prefetch_ptrs, size_t dimensionality,\n    float *results);\n\nvoid compute_one_to_many_inner_product_avx2_fp16_1(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 1> &prefetch_ptrs,\n    size_t dimensionality, float *results);\n\nvoid compute_one_to_many_inner_product_avx2_int8_1(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dimensionality,\n    float *results);\n\nvoid compute_one_to_many_inner_product_avx2_fp32_12(\n    const float *query, const float **ptrs,\n    std::array<const float *, 12> &prefetch_ptrs, size_t dimensionality,\n    float *results);\n\nvoid compute_one_to_many_inner_product_avx2_fp16_12(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 12> &prefetch_ptrs,\n    size_t dimensionality, float *results);\n\nvoid compute_one_to_many_inner_product_avx2_int8_12(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dimensionality,\n    float *results);\n#endif\n\nvoid InnerProductDistanceBatchImpl<float, 1>::compute_one_to_many(\n    const ValueType *query, const ValueType **ptrs,\n    std::array<const ValueType *, 1> &prefetch_ptrs, size_t dim, float *sums) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    return compute_one_to_many_inner_product_avx2_fp32_1(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,\n                                                    dim, sums);\n}\n\nvoid InnerProductDistanceBatchImpl<ailego::Float16, 1>::compute_one_to_many(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,\n    float *sums) {\n#if defined(__AVX512FP16__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {\n    return compute_one_to_many_inner_product_avx512fp16_fp16_1(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    return compute_one_to_many_inner_product_avx512f_fp16_1(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    return compute_one_to_many_inner_product_avx2_fp16_1(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,\n                                                    dim, sums);\n}\n\nvoid InnerProductDistanceBatchImpl<int8_t, 1>::compute_one_to_many(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dim, float *sums) {\n// #if defined(__AVX512BW__) // TODO: this version is problematic\n//     return compute_one_to_many_avx512_int8<ValueType, BatchSize>(\n//         query, ptrs, prefetch_ptrs, dim, sums);\n#if defined(__AVX512VNNI__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {\n    return compute_one_to_many_inner_product_avx512_vnni_int8_1(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    return compute_one_to_many_inner_product_avx2_int8_1(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,\n                                                    dim, sums);\n}\n\nDistanceBatchQueryPreprocessFunc\nInnerProductDistanceBatchImpl<int8_t, 1>::GetQueryPreprocessFunc() {\n#if defined(__AVX512VNNI__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {\n    return compute_one_to_many_inner_product_avx512_vnni_int8_query_preprocess;\n  }\n#endif\n  return nullptr;\n}\n\nvoid InnerProductDistanceBatchImpl<float, 12>::compute_one_to_many(\n    const ValueType *query, const ValueType **ptrs,\n    std::array<const ValueType *, 12> &prefetch_ptrs, size_t dim, float *sums) {\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    return compute_one_to_many_inner_product_avx2_fp32_12(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,\n                                                    dim, sums);\n}\n\nvoid InnerProductDistanceBatchImpl<ailego::Float16, 12>::compute_one_to_many(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,\n    float *sums) {\n#if defined(__AVX512FP16__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {\n    return compute_one_to_many_inner_product_avx512fp16_fp16_12(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n#if defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    return compute_one_to_many_inner_product_avx512f_fp16_12(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    return compute_one_to_many_inner_product_avx2_fp16_12(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,\n                                                    dim, sums);\n}\n\nvoid InnerProductDistanceBatchImpl<int8_t, 12>::compute_one_to_many(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dim, float *sums) {\n// #if defined(__AVX512BW__) // TODO: this version is problematic\n//     return compute_one_to_many_avx512_int8<ValueType, BatchSize>(\n//         query, ptrs, prefetch_ptrs, dim, sums);\n#if defined(__AVX512VNNI__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {\n    return compute_one_to_many_inner_product_avx512_vnni_int8_12(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n#if defined(__AVX2__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {\n    return compute_one_to_many_inner_product_avx2_int8_12(\n        query, ptrs, prefetch_ptrs, dim, sums);\n  }\n#endif\n  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,\n                                                    dim, sums);\n}\n\n}  // namespace zvec::ailego::DistanceBatch\n"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch_impl_fp16_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <array>\n#include <ailego/math/matrix_utility.i>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec::ailego::DistanceBatch {\n\n#if defined(__AVX2__)\n\ntemplate <typename ValueType, size_t dp_batch>\nstatic std::enable_if_t<std::is_same_v<ValueType, ailego::Float16>, void>\ncompute_one_to_many_inner_product_avx2_fp16(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,\n    size_t dimensionality, float *results) {\n  __m256 accs[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    accs[i] = _mm256_setzero_ps();\n  }\n\n  size_t dim = 0;\n  for (; dim + 16 <= dimensionality; dim += 16) {\n    __m256i q =\n        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(query + dim));\n\n    __m256 q1 = _mm256_cvtph_ps(_mm256_castsi256_si128(q));\n    __m256 q2 = _mm256_cvtph_ps(_mm256_extractf128_si256(q, 1));\n\n    __m256 data_regs_1[dp_batch];\n    __m256 data_regs_2[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      __m256i m =\n          _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim));\n\n      data_regs_1[i] = _mm256_cvtph_ps(_mm256_castsi256_si128(m));\n      data_regs_2[i] = _mm256_cvtph_ps(_mm256_extractf128_si256(m, 1));\n    }\n\n    if (prefetch_ptrs[0]) {\n      for (size_t i = 0; i < dp_batch; ++i) {\n        ailego_prefetch(prefetch_ptrs[i] + dim);\n      }\n    }\n\n    for (size_t i = 0; i < dp_batch; ++i) {\n      accs[i] = _mm256_fmadd_ps(q1, data_regs_1[i], accs[i]);\n      accs[i] = _mm256_fmadd_ps(q2, data_regs_2[i], accs[i]);\n    }\n  }\n\n  if (dim + 8 <= dimensionality) {\n    __m256 q = _mm256_cvtph_ps(\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(query + dim)));\n\n    __m256 data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm256_cvtph_ps(\n          _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptrs[i] + dim)));\n      accs[i] = _mm256_fmadd_ps(q, data_regs[i], accs[i]);\n    }\n\n    dim += 8;\n  }\n\n  for (size_t i = 0; i < dp_batch; ++i) {\n    results[i] = HorizontalAdd_FP32_V256(accs[i]);\n  }\n\n  for (; dim < dimensionality; ++dim) {\n    for (size_t i = 0; i < dp_batch; ++i) {\n      results[i] += (*(query + dim)) * (*(ptrs[i] + dim));\n    }\n  }\n}\n\nvoid compute_one_to_many_inner_product_avx2_fp16_1(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,\n    float *sums) {\n  return compute_one_to_many_inner_product_avx2_fp16<ailego::Float16, 1>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\nvoid compute_one_to_many_inner_product_avx2_fp16_12(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,\n    float *sums) {\n  return compute_one_to_many_inner_product_avx2_fp16<ailego::Float16, 12>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\n#endif\n\n}  // namespace zvec::ailego::DistanceBatch"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch_impl_fp16_avx512.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <array>\n#include <ailego/math/matrix_utility.i>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec::ailego::DistanceBatch {\n\n#if defined(__AVX512F__)\n\ntemplate <typename ValueType, size_t dp_batch>\nstatic std::enable_if_t<std::is_same_v<ValueType, ailego::Float16>, void>\ncompute_one_to_many_inner_product_avx512f_fp16(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,\n    size_t dimensionality, float *results) {\n  __m512 accs[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    accs[i] = _mm512_setzero_ps();\n  }\n\n  size_t dim = 0;\n  for (; dim + 32 <= dimensionality; dim += 32) {\n    __m512i q =\n        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(query + dim));\n\n    __m512 q1 = _mm512_cvtph_ps(_mm512_castsi512_si256(q));\n    __m512 q2 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(q, 1));\n\n    __m512 data_regs_1[dp_batch];\n    __m512 data_regs_2[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      __m512i m =\n          _mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] + dim));\n\n      data_regs_1[i] = _mm512_cvtph_ps(_mm512_castsi512_si256(m));\n      data_regs_2[i] = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(m, 1));\n    }\n\n    if (prefetch_ptrs[0]) {\n      for (size_t i = 0; i < dp_batch; ++i) {\n        ailego_prefetch(prefetch_ptrs[i] + dim);\n      }\n    }\n\n    for (size_t i = 0; i < dp_batch; ++i) {\n      accs[i] = _mm512_fmadd_ps(q1, data_regs_1[i], accs[i]);\n      accs[i] = _mm512_fmadd_ps(q2, data_regs_2[i], accs[i]);\n    }\n  }\n\n  if (dim + 16 <= dimensionality) {\n    __m512 q = _mm512_cvtph_ps(\n        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(query + dim)));\n\n    __m512 data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm512_cvtph_ps(\n          _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim)));\n      accs[i] = _mm512_fmadd_ps(q, data_regs[i], accs[i]);\n    }\n\n    dim += 16;\n  }\n\n  __m256 acc_new[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    acc_new[i] = _mm256_add_ps(\n        _mm512_castps512_ps256(accs[i]),\n        _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(accs[i]), 1)));\n  }\n\n  if (dim + 8 <= dimensionality) {\n    __m256 q = _mm256_cvtph_ps(\n        _mm_loadu_si128(reinterpret_cast<const __m128i *>(query + dim)));\n\n    for (size_t i = 0; i < dp_batch; ++i) {\n      __m256 m = _mm256_cvtph_ps(\n          _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptrs[i] + dim)));\n      acc_new[i] = _mm256_fmadd_ps(m, q, acc_new[i]);\n    }\n\n    dim += 8;\n  }\n\n  for (size_t i = 0; i < dp_batch; ++i) {\n    results[i] = HorizontalAdd_FP32_V256(acc_new[i]);\n  }\n\n  for (; dim < dimensionality; ++dim) {\n    for (size_t i = 0; i < dp_batch; ++i) {\n      results[i] += (*(query + dim)) * (*(ptrs[i] + dim));\n    }\n  }\n}\n\nvoid compute_one_to_many_inner_product_avx512f_fp16_1(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,\n    float *sums) {\n  return compute_one_to_many_inner_product_avx512f_fp16<ailego::Float16, 1>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\nvoid compute_one_to_many_inner_product_avx512f_fp16_12(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,\n    float *sums) {\n  return compute_one_to_many_inner_product_avx512f_fp16<ailego::Float16, 12>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n#endif\n\n}  // namespace zvec::ailego::DistanceBatch\n"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch_impl_fp16_avx512fp16.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <array>\n#include <ailego/math/matrix_utility.i>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec::ailego::DistanceBatch {\n\n#if defined(__AVX512FP16__)\ntemplate <typename ValueType, size_t dp_batch>\nstatic std::enable_if_t<std::is_same_v<ValueType, ailego::Float16>, void>\ncompute_one_to_many_inner_product_avx512fp16_fp16(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,\n    size_t dimensionality, float *results) {\n  __m512h accs[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    accs[i] = _mm512_setzero_ph();\n  }\n\n  size_t dim = 0;\n  for (; dim + 32 <= dimensionality; dim += 32) {\n    __m512h q = _mm512_loadu_ph(query + dim);\n\n    __m512h data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm512_loadu_ph(ptrs[i] + dim);\n    }\n\n    if (prefetch_ptrs[0]) {\n      for (size_t i = 0; i < dp_batch; ++i) {\n        ailego_prefetch(prefetch_ptrs[i] + dim);\n      }\n    }\n\n    for (size_t i = 0; i < dp_batch; ++i) {\n      accs[i] = _mm512_fmadd_ph(data_regs[i], q, accs[i]);\n    }\n  }\n\n  if (dim < dimensionality) {\n    __mmask32 mask = (__mmask32)((1 << (dimensionality - dim)) - 1);\n\n    for (size_t i = 0; i < dp_batch; ++i) {\n      __m512i zmm_undefined = _mm512_undefined_epi32();\n\n      accs[i] =\n          _mm512_mask3_fmadd_ph(_mm512_castsi512_ph(_mm512_mask_loadu_epi16(\n                                    zmm_undefined, mask, query + dim)),\n                                _mm512_castsi512_ph(_mm512_mask_loadu_epi16(\n                                    zmm_undefined, mask, ptrs[i] + dim)),\n                                accs[i], mask);\n    }\n  }\n\n  for (size_t i = 0; i < dp_batch; ++i) {\n    results[i] = HorizontalAdd_FP16_V512(accs[i]);\n  }\n}\n\nvoid compute_one_to_many_inner_product_avx512fp16_fp16_1(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,\n    float *sums) {\n  return compute_one_to_many_inner_product_avx512fp16_fp16<ailego::Float16, 1>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\nvoid compute_one_to_many_inner_product_avx512fp16_fp16_12(\n    const ailego::Float16 *query, const ailego::Float16 **ptrs,\n    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,\n    float *sums) {\n  return compute_one_to_many_inner_product_avx512fp16_fp16<ailego::Float16, 12>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n#endif\n\n}  // namespace zvec::ailego::DistanceBatch\n"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch_impl_fp32_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <array>\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec::ailego::DistanceBatch {\n\n#if defined(__AVX2__)\n\ninline float sum4(__m128 v) {\n  v = _mm_add_ps(v, _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v), 8)));\n  return v[0] + v[1];\n}\n\ninline __m128 sum_top_bottom_avx(__m256 v) {\n  const __m128 high = _mm256_extractf128_ps(v, 1);\n  const __m128 low = _mm256_castps256_ps128(v);\n  return _mm_add_ps(high, low);\n}\n\ntemplate <typename ValueType, size_t dp_batch>\nstatic std::enable_if_t<std::is_same_v<ValueType, float>, void>\ncompute_one_to_many_inner_product_avx2_fp32(\n    const ValueType *query, const ValueType **ptrs,\n    std::array<const ValueType *, dp_batch> &prefetch_ptrs,\n    size_t dimensionality, float *results) {\n  __m256 accs[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    accs[i] = _mm256_setzero_ps();\n  }\n  size_t dim = 0;\n  for (; dim + 8 <= dimensionality; dim += 8) {\n    __m256 q = _mm256_loadu_ps(query + dim);\n\n    __m256 data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm256_loadu_ps(ptrs[i] + dim);\n    }\n    if (prefetch_ptrs[0]) {\n      for (size_t i = 0; i < dp_batch; ++i) {\n        ailego_prefetch(prefetch_ptrs[i] + dim);\n      }\n    }\n    for (size_t i = 0; i < dp_batch; ++i) {\n      accs[i] = _mm256_fnmadd_ps(q, data_regs[i], accs[i]);\n    }\n  }\n\n  __m128 sum128_regs[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    sum128_regs[i] = sum_top_bottom_avx(accs[i]);\n  }\n  if (dim + 4 <= dimensionality) {\n    __m128 q = _mm_loadu_ps(query + dim);\n\n    __m128 data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm_loadu_ps(ptrs[i] + dim);\n    }\n    if (prefetch_ptrs[0]) {\n      for (size_t i = 0; i < dp_batch; ++i) {\n        ailego_prefetch(prefetch_ptrs[i] + dim);\n      }\n    }\n    for (size_t i = 0; i < dp_batch; ++i) {\n      sum128_regs[i] = _mm_fnmadd_ps(q, data_regs[i], sum128_regs[i]);\n    }\n    dim += 4;\n  }\n  if (dim + 2 <= dimensionality) {\n    __m128 q = _mm_setzero_ps();\n\n    __m128 data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm_setzero_ps();\n    }\n\n    q = _mm_loadh_pi(q, (const __m64 *)(query + dim));\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm_loadh_pi(data_regs[i], (const __m64 *)(ptrs[i] + dim));\n    }\n    for (size_t i = 0; i < dp_batch; ++i) {\n      sum128_regs[i] = _mm_fnmadd_ps(q, data_regs[i], sum128_regs[i]);\n    }\n    dim += 2;\n  }\n\n  float res[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    res[i] = sum4(sum128_regs[i]);\n  }\n  if (dim < dimensionality) {\n    float q = query[dim];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      res[i] -= q * ptrs[i][dim];\n    }\n  }\n  for (size_t i = 0; i < dp_batch; ++i) {\n    results[i] = -res[i];\n  }\n}\n\nvoid compute_one_to_many_inner_product_avx2_fp32_1(\n    const float *query, const float **ptrs,\n    std::array<const float *, 1> &prefetch_ptrs, size_t dim, float *sums) {\n  return compute_one_to_many_inner_product_avx2_fp32<float, 1>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\nvoid compute_one_to_many_inner_product_avx2_fp32_12(\n    const float *query, const float **ptrs,\n    std::array<const float *, 12> &prefetch_ptrs, size_t dim, float *sums) {\n  return compute_one_to_many_inner_product_avx2_fp32<float, 12>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\n#endif\n\n}  // namespace zvec::ailego::DistanceBatch"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch_impl_int8_avx2.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <array>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec::ailego::DistanceBatch {\n\n#if defined(__AVX2__)\n\ntemplate <typename ValueType, size_t dp_batch>\nstatic std::enable_if_t<std::is_same_v<ValueType, int8_t>, void>\ncompute_one_to_many_inner_product_avx2_int8(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, dp_batch> &prefetch_ptrs, size_t dimensionality,\n    float *results) {\n  __m256i accs[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    accs[i] = _mm256_setzero_si256();\n  }\n  size_t dim = 0;\n  for (; dim + 32 <= dimensionality; dim += 32) {\n    __m256i q = _mm256_loadu_si256((const __m256i *)(query + dim));\n\n    __m256i data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] = _mm256_loadu_si256((const __m256i *)(ptrs[i] + dim));\n    }\n    if (prefetch_ptrs[0]) {\n      for (size_t i = 0; i < dp_batch; ++i) {\n        ailego_prefetch(prefetch_ptrs[i] + dim);\n      }\n    }\n    __m256i q_lo = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(q));\n    __m256i q_hi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(q, 1));\n    __m256i data_lo[dp_batch];\n    __m256i data_hi[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_lo[i] = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(data_regs[i]));\n      data_hi[i] =\n          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(data_regs[i], 1));\n    }\n    __m256i prod_lo[dp_batch];\n    __m256i prod_hi[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      prod_lo[i] = _mm256_madd_epi16(q_lo, data_lo[i]);\n      prod_hi[i] = _mm256_madd_epi16(q_hi, data_hi[i]);\n    }\n    for (size_t i = 0; i < dp_batch; ++i) {\n      accs[i] =\n          _mm256_add_epi32(accs[i], _mm256_add_epi32(prod_lo[i], prod_hi[i]));\n    }\n  }\n\n  int temp_results[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    __m128i lo = _mm256_castsi256_si128(accs[i]);\n    __m128i hi = _mm256_extracti128_si256(accs[i], 1);\n    __m128i sum128 = _mm_add_epi32(lo, hi);\n    sum128 = _mm_hadd_epi32(sum128, sum128);\n    sum128 = _mm_hadd_epi32(sum128, sum128);\n    temp_results[i] = _mm_cvtsi128_si32(sum128);\n  }\n  for (; dim < dimensionality; ++dim) {\n    int8_t q = query[dim];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      temp_results[i] += q * static_cast<int>(ptrs[i][dim]);\n    }\n  }\n  for (size_t i = 0; i < dp_batch; ++i) {\n    results[i] = static_cast<float>(temp_results[i]);\n  }\n}\n\nvoid compute_one_to_many_inner_product_avx2_int8_1(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dim, float *sums) {\n  return compute_one_to_many_inner_product_avx2_int8<int8_t, 1>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\nvoid compute_one_to_many_inner_product_avx2_int8_12(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dim, float *sums) {\n  return compute_one_to_many_inner_product_avx2_int8<int8_t, 12>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\n#endif\n\n}  // namespace zvec::ailego::DistanceBatch"
  },
  {
    "path": "src/ailego/math_batch/inner_product_distance_batch_impl_int8_avx512fp16.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <array>\n#include <ailego/utility/math_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec::ailego::DistanceBatch {\n\n#if defined(__AVX512VNNI__)\n\nvoid compute_one_to_many_inner_product_avx512_vnni_int8_query_preprocess(\n    void *query, size_t dim) {\n  const int8_t *input = reinterpret_cast<const int8_t *>(query);\n  uint8_t *output = reinterpret_cast<uint8_t *>(query);\n\n  // // AVX512 constant: 128 in each byte (cast to int8_t, which becomes -128\n  // // in signed representation, but addition works correctly due to two's\n  // // complement arithmetic)\n  const __m512i offset = _mm512_set1_epi8(static_cast<int8_t>(128));\n  //\n  size_t i = 0;\n  // // Process 64 bytes at a time using AVX512\n  for (; i + 64 <= dim; i += 64) {\n    __m512i data =\n        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(input + i));\n    __m512i result = _mm512_add_epi8(data, offset);\n    _mm512_storeu_si512(reinterpret_cast<__m512i *>(output + i), result);\n  }\n\n  // Handle remaining elements with scalar loop\n  for (; i < dim; ++i) {\n    output[i] = static_cast<uint8_t>(static_cast<int>(input[i]) + 128);\n  }\n}\n\n// query is unsigned\ntemplate <size_t dp_batch>\nstatic void compute_one_to_many_inner_product_avx512_vnni_int8(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, dp_batch> &prefetch_ptrs, size_t dimensionality,\n    float *results) {\n  __m512i accs[dp_batch];\n  for (size_t i = 0; i < dp_batch; ++i) {\n    accs[i] = _mm512_setzero_si512();\n  }\n  size_t dim = 0;\n  for (; dim + 64 <= dimensionality; dim += 64) {\n    __m512i q =\n        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(query + dim));\n\n    __m512i data_regs[dp_batch];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      data_regs[i] =\n          _mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] + dim));\n    }\n    if (prefetch_ptrs[0]) {\n      for (size_t i = 0; i < dp_batch; ++i) {\n        ailego_prefetch(prefetch_ptrs[i] + dim);\n      }\n    }\n    for (size_t i = 0; i < dp_batch; ++i) {\n      accs[i] = _mm512_dpbusd_epi32(accs[i], q, data_regs[i]);\n    }\n  }\n\n  int temp_results[dp_batch]{};\n  for (size_t i = 0; i < dp_batch; ++i) {\n    temp_results[i] = _mm512_reduce_add_epi32(accs[i]);\n  }\n  for (; dim < dimensionality; ++dim) {\n    uint q = reinterpret_cast<const u_int8_t *>(query)[dim];\n    for (size_t i = 0; i < dp_batch; ++i) {\n      temp_results[i] += q * static_cast<int>(ptrs[i][dim]);\n    }\n  }\n  for (size_t i = 0; i < dp_batch; ++i) {\n    results[i] = static_cast<float>(temp_results[i]);\n  }\n}\n\n//\n// #elif defined(__AVX512BW__)\n// // TODO: this version is problematic\n// template <typename ValueType, size_t dp_batch>\n// static std::enable_if_t<std::is_same_v<ValueType, int8_t>, void>\n// compute_one_to_many_avx512_int8(\n//     const int8_t *query, const int8_t **ptrs,\n//     std::array<const int8_t *, dp_batch> &prefetch_ptrs, size_t\n//     dimensionality, float *results) {\n//   std::array<__m512i, dp_batch> accs;\n//   size_t dim = 0;\n//   for (; dim + 64 <= dimensionality; dim += 64) {\n//     __m512i q =\n//         _mm512_loadu_si512(reinterpret_cast<const __m512i *>(query + dim));\n//     std::array<__m512i, dp_batch> data_regs;\n//     for (size_t i = 0; i < dp_batch; ++i) {\n//       data_regs[i] =\n//           _mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] +\n//           dim));\n//     }\n//     if (prefetch_ptrs[0]) {\n//       for (size_t i = 0; i < dp_batch; ++i) {\n//         ailego_prefetch(prefetch_ptrs[i] + dim);\n//       }\n//     }\n//     __m512i q_lo = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(q, 0));\n//     __m512i q_hi = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(q, 1));\n//     std::array<__m512i, dp_batch> data_lo;\n//     std::array<__m512i, dp_batch> data_hi;\n//     for (size_t i = 0; i < dp_batch; ++i) {\n//       data_lo[i] =\n//           _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(data_regs[i], 0));\n//       data_hi[i] =\n//           _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(data_regs[i], 1));\n//     }\n//     std::array<__m512i, dp_batch> prod_lo;\n//     std::array<__m512i, dp_batch> prod_hi;\n//     for (size_t i = 0; i < dp_batch; ++i) {\n//       prod_lo[i] = _mm512_madd_epi16(q_lo, data_lo[i]);\n//       prod_hi[i] = _mm512_madd_epi16(q_hi, data_hi[i]);\n//     }\n//     for (size_t i = 0; i < dp_batch; ++i) {\n//       accs[i] = _mm512_add_epi32(\n//           accs[i], _mm512_add_epi32(\n//                        _mm512_madd_epi16(prod_lo[i], _mm512_set1_epi16(1)),\n//                        _mm512_madd_epi16(prod_hi[i], _mm512_set1_epi16(1))));\n//     }\n//   }\n//   std::array<int, dp_batch> temp_results;\n//   for (size_t i = 0; i < dp_batch; ++i) {\n//     temp_results[i] = _mm512_reduce_add_epi32(accs[i]);\n//   }\n//   for (; dim < dimensionality; ++dim) {\n//     int8_t q = query[dim];\n//     for (size_t i = 0; i < dp_batch; ++i) {\n//       temp_results[i] += q * static_cast<int>(ptrs[i][dim]);\n//     }\n//   }\n//   for (size_t i = 0; i < dp_batch; ++i) {\n//     results[i] = static_cast<float>(temp_results[i]);\n//   }\n// }\n\nvoid compute_one_to_many_inner_product_avx512_vnni_int8_1(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dim, float *sums) {\n  return compute_one_to_many_inner_product_avx512_vnni_int8<1>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\nvoid compute_one_to_many_inner_product_avx512_vnni_int8_12(\n    const int8_t *query, const int8_t **ptrs,\n    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dim, float *sums) {\n  return compute_one_to_many_inner_product_avx512_vnni_int8<12>(\n      query, ptrs, prefetch_ptrs, dim, sums);\n}\n\n\n#endif\n\n}  // namespace zvec::ailego::DistanceBatch"
  },
  {
    "path": "src/ailego/parallel/lock.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <atomic>\n#include <condition_variable>\n#include <mutex>\n#if __cplusplus >= 201703L\n#include <shared_mutex>\n#endif\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n// Test if atomic_bool is always lock free.\n// Arm may be always lock free using some compiler flags,\n// see https://stackoverflow.com/a/64253858/486350.\n#if ATOMIC_BOOL_LOCK_FREE == 2\n\n/*! Spin Mutex (The atomic type is always lock-free)\n */\nclass SpinMutex {\n public:\n  //! Constructor\n  SpinMutex(void) {}\n\n  //! Locking\n  void lock(void) {\n    bool expected = false;\n    while (!flag_.compare_exchange_weak(\n        expected, true, std::memory_order_acquire, std::memory_order_relaxed)) {\n      expected = false;\n      // Provide a hint to the processor that the code sequence is a spin-wait\n      // loop. This can help improve the performance and power consumption of\n      // spin-wait loops.\n      ailego_yield();\n    }\n  }\n\n  //! Try locking\n  bool try_lock(void) {\n    bool expected = false;\n    return flag_.compare_exchange_strong(\n        expected, true, std::memory_order_acquire, std::memory_order_relaxed);\n  }\n\n  //! Unlocking\n  void unlock(void) {\n    flag_.store(false, std::memory_order_release);\n  }\n\n private:\n  //! Disable them\n  SpinMutex(const SpinMutex &) = delete;\n  SpinMutex(SpinMutex &&) = delete;\n  SpinMutex &operator=(const SpinMutex &) = delete;\n  SpinMutex &operator=(SpinMutex &&) = delete;\n\n  //! Members\n  std::atomic_bool flag_{false};\n};\n#else\n\n/*! Spin Mutex (General)\n */\nclass SpinMutex {\n public:\n  //! Constructor\n  SpinMutex(void) {}\n\n  //! Locking\n  void lock(void) {\n    while (flag_.test_and_set(std::memory_order_acquire));\n  }\n\n  //! Try locking\n  bool try_lock(void) {\n    return (!flag_.test_and_set(std::memory_order_acquire));\n  }\n\n  //! Unlocking\n  void unlock(void) {\n    flag_.clear(std::memory_order_release);\n  }\n\n private:\n  //! Disable them\n  SpinMutex(const SpinMutex &) = delete;\n  SpinMutex(SpinMutex &&) = delete;\n  SpinMutex &operator=(const SpinMutex &) = delete;\n  SpinMutex &operator=(SpinMutex &&) = delete;\n\n  //! Members\n  std::atomic_flag flag_{};\n};\n#endif  // ATOMIC_BOOL_LOCK_FREE == 2\n\n#if __cplusplus >= 201703L\n\nusing SharedMutex = std::shared_mutex;\n\n#else\n\n/*! Shared Mutex\n */\nclass SharedMutex {\n public:\n  //! Constructor\n  SharedMutex(void) {}\n\n  //! Locking\n  void lock(void) {\n    std::unique_lock<std::mutex> q(mutex_);\n    ++write_count_;\n    write_cond_.wait(q, [this]() { return (pending_count_ == 0); });\n    --write_count_;\n    --pending_count_;\n  }\n\n  //! Try locking\n  bool try_lock(void) {\n    std::unique_lock<std::mutex> q(mutex_, std::defer_lock);\n    if (q.try_lock()) {\n      if (pending_count_ == 0) {\n        --pending_count_;\n        return true;\n      }\n    }\n    return false;\n  }\n\n  //! Unlocking\n  void unlock(void) {\n    std::lock_guard<std::mutex> q(mutex_);\n    ++pending_count_;\n\n    if (write_count_ != 0) {\n      write_cond_.notify_one();\n    } else {\n      read_cond_.notify_all();\n    }\n  }\n\n  //! Locking (shared)\n  void lock_shared(void) {\n    std::unique_lock<std::mutex> q(mutex_);\n    ++read_count_;\n    read_cond_.wait(\n        q, [this]() { return (write_count_ == 0 && pending_count_ >= 0); });\n    --read_count_;\n    ++pending_count_;\n  }\n\n  //! Try locking (shared)\n  bool try_lock_shared(void) {\n    std::lock_guard<std::mutex> q(mutex_);\n    if (write_count_ == 0 && pending_count_ >= 0) {\n      ++pending_count_;\n      return true;\n    }\n    return false;\n  }\n\n  //! Unlocking (shared)\n  void unlock_shared(void) {\n    std::lock_guard<std::mutex> q(mutex_);\n    --pending_count_;\n\n    if (write_count_ != 0 && pending_count_ == 0) {\n      write_cond_.notify_one();\n    } else {\n      read_cond_.notify_all();\n    }\n  }\n\n private:\n  //! Disable them\n  SharedMutex(const SharedMutex &) = delete;\n  SharedMutex(SharedMutex &&) = delete;\n  SharedMutex &operator=(const SharedMutex &) = delete;\n  SharedMutex &operator=(SharedMutex &&) = delete;\n\n  //! Members\n  int32_t pending_count_{0};\n  int32_t read_count_{0};\n  int32_t write_count_{0};\n  std::mutex mutex_{};\n  std::condition_variable read_cond_{};\n  std::condition_variable write_cond_{};\n};\n\n#endif  // __cplusplus >= 201703L\n\n/*! Write Lock\n */\nclass WriteLock {\n public:\n  //! Constructor\n  WriteLock(SharedMutex &mutex) : mutex_(mutex) {}\n\n  //! Locking\n  void lock(void) {\n    mutex_.lock();\n  }\n\n  //! Try locking\n  bool try_lock(void) {\n    return mutex_.try_lock();\n  }\n\n  //! Unlocking\n  void unlock(void) {\n    mutex_.unlock();\n  }\n\n private:\n  //! Disable them\n  WriteLock(void) = delete;\n  WriteLock(const WriteLock &) = delete;\n  WriteLock(WriteLock &&) = delete;\n  WriteLock &operator=(const WriteLock &) = delete;\n  WriteLock &operator=(WriteLock &&) = delete;\n\n  //! Members\n  SharedMutex &mutex_;\n};\n\n/*! Read Lock\n */\nclass ReadLock {\n public:\n  //! Constructor\n  ReadLock(SharedMutex &mutex) : mutex_(mutex) {}\n\n  //! Locking\n  void lock(void) {\n    mutex_.lock_shared();\n  }\n\n  //! Try locking\n  bool try_lock(void) {\n    return mutex_.try_lock_shared();\n  }\n\n  //! Unlocking\n  void unlock(void) {\n    mutex_.unlock_shared();\n  }\n\n private:\n  //! Disable them\n  ReadLock(void) = delete;\n  ReadLock(const ReadLock &) = delete;\n  ReadLock(ReadLock &&) = delete;\n  ReadLock &operator=(const ReadLock &) = delete;\n  ReadLock &operator=(ReadLock &&) = delete;\n\n  //! Members\n  SharedMutex &mutex_;\n};\n\n/*\n  Atomic Close Lock\n */\n\n#define AILEGO_SAFE_ACCESS(CLOSE_ERR)              \\\n  counter_.fetch_add(1);                           \\\n  AILEGO_DEFER([this] { counter_.fetch_sub(1); }); \\\n  if (!opened_.load()) {                           \\\n    return CLOSE_ERR;                              \\\n  }\n\n#define AILEGO_SAFE_CLOSE                                      \\\n  opened_.store(false);                                        \\\n  while (counter_.load() > 0) {                                \\\n    std::this_thread::sleep_for(std::chrono::milliseconds(1)); \\\n  }\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/parallel/multi_thread_list.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <atomic>\n#include <condition_variable>\n#include <deque>\n#include <mutex>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Multi-Thread list\n */\ntemplate <typename T>\nclass MultiThreadList {\n public:\n  MultiThreadList(size_t size_limit = 1000) : size_limit_(size_limit) {}\n  bool produce(const T &item) {\n    std::unique_lock<std::mutex> lk(lock_);\n    not_full_.wait(\n        lk, [&]() { return (list_.size() < size_limit_) || done_.load(); });\n    if (done_.load()) {\n      return false;\n    }\n    list_.emplace_back(item);\n    not_empty_.notify_one();\n    return true;\n  }\n\n  bool produce(T &&item) {\n    std::unique_lock<std::mutex> lk(lock_);\n    not_full_.wait(\n        lk, [&]() { return (list_.size() < size_limit_) || done_.load(); });\n    if (done_.load()) {\n      return false;\n    }\n    list_.emplace_back(std::move(item));\n    not_empty_.notify_one();\n    return true;\n  }\n\n  bool consume(T *item) {\n    std::unique_lock<std::mutex> lk(lock_);\n    not_empty_.wait(lk, [&]() {\n      return !list_.empty() || done_.load() || consume_stopped_.load();\n    });\n    if ((list_.empty() && done_.load()) || consume_stopped_.load()) {\n      return false;\n    }\n    *item = std::move(list_.front());\n    list_.pop_front();\n    not_full_.notify_one();\n    return true;\n  }\n\n  void done() {\n    std::unique_lock<std::mutex> lk(lock_);\n    done_.store(true);\n    not_empty_.notify_all();\n    not_full_.notify_all();\n  }\n\n  void reset() {\n    done_.store(false);\n    list_.clear();\n  }\n\n  void stop_consume() {\n    std::unique_lock<std::mutex> lk(lock_);\n    consume_stopped_.store(true);\n    not_empty_.notify_all();\n  }\n\n  void resume_consume() {\n    consume_stopped_.store(false);\n  }\n\n private:\n  std::deque<T> list_;\n  size_t size_limit_{0};\n  std::mutex lock_;\n  std::condition_variable not_empty_, not_full_;\n\n  std::atomic<bool> done_{false};\n  std::atomic<bool> consume_stopped_{false};\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/parallel/semaphore.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <atomic>\n#include <condition_variable>\n#include <mutex>\n#include <type_traits>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Semaphore\n */\nclass Semaphore {\n public:\n  //! Constructor\n  Semaphore(void) : Semaphore{1} {}\n\n  //! Constructor\n  Semaphore(uint32_t count) : count_(count) {}\n\n  //! Acquire a permit from this semaphore, suspending until one is available\n  void lock(void) {\n    while (!this->try_lock()) {\n      std::unique_lock<std::mutex> latch(mutex_);\n      cond_.wait(latch, [this]() { return (count_ > 0); });\n    }\n  }\n\n  //! Try to acquire a permit from this semaphore without suspension\n  bool try_lock(void) {\n    uint32_t count = count_.load(std::memory_order_acquire);\n    return (count > 0 ? count_.compare_exchange_strong(\n                            count, count - 1, std::memory_order_release,\n                            std::memory_order_relaxed)\n                      : false);\n  }\n\n  //! Release a permit, returning it into this semaphore\n  void unlock(void) {\n    ++count_;\n    std::lock_guard<std::mutex> latch(mutex_);\n    cond_.notify_one();\n  }\n\n private:\n  //! Disable them\n  Semaphore(const Semaphore &) = delete;\n  Semaphore(Semaphore &&) = delete;\n  Semaphore &operator=(const Semaphore &) = delete;\n  Semaphore &operator=(Semaphore &&) = delete;\n\n  //! Members\n  std::atomic<uint32_t> count_{0};\n  std::mutex mutex_{};\n  std::condition_variable cond_{};\n};\n\n/*! Binary Semaphores\n */\ntemplate <size_t N, typename = typename std::enable_if<N <= 64u>::type>\nclass BinarySemaphores {\n public:\n  using BitwiseType = typename std::conditional<\n      N <= 32u,\n      typename std::conditional<\n          N <= 16u, typename std::conditional<N <= 8u, uint8_t, uint16_t>::type,\n          uint32_t>::type,\n      uint64_t>::type;\n\n  //! Constructor\n  BinarySemaphores(void) : BinarySemaphores{1} {}\n\n  //! Constructor\n  BinarySemaphores(uint32_t count) {\n    if (count == 0 || count > N) {\n      count = N;\n    }\n    count_ = count;\n    mask_ = static_cast<BitwiseType>(BitwiseType(1) << (count - 1));\n    mask_ |= static_cast<BitwiseType>(mask_ - 1);\n    flags_.store(mask_);\n  }\n\n  //! Acquire a permit from this semaphore, suspending until one is available\n  int acquire(void) {\n    int index = -1;\n    while ((index = this->try_acquire()) < 0) {\n      std::unique_lock<std::mutex> latch(mutex_);\n      cond_.wait(latch, [this]() { return (flags_ > 0); });\n    }\n    return index;\n  }\n\n  //! Try to acquire a permit from this semaphore without suspension\n  int try_acquire(void) {\n    BitwiseType flags = flags_.load(std::memory_order_relaxed);\n    while (flags > 0) {\n      int index = CountTrailingZeros<BitwiseType>(flags);\n      if (flags_.compare_exchange_weak(\n              flags, flags & (~(BitwiseType(1) << index)),\n              std::memory_order_release, std::memory_order_relaxed)) {\n        return index;\n      }\n      flags = flags_.load(std::memory_order_relaxed);\n    }\n    return -1;\n  }\n\n  //! Acquire a specified permit from this semaphore, suspending until index is\n  //! available\n  int acquire(int index) {\n    if (index < 0 || (uint32_t)index >= count_) {\n      return -1;\n    }\n    BitwiseType flags = flags_.load(std::memory_order_relaxed);\n    BitwiseType mask = BitwiseType(1) << index;\n    while (true) {\n      if ((flags & mask) &&\n          flags_.compare_exchange_weak(flags, flags & (~mask),\n                                       std::memory_order_release,\n                                       std::memory_order_relaxed)) {\n        return index;\n      }\n      flags = flags_.load(std::memory_order_relaxed);\n    }\n  }\n\n  //! Release a permit, returning it into this semaphore\n  void release(int index) {\n    flags_.fetch_or((BitwiseType(1) << index) & mask_);\n    std::lock_guard<std::mutex> latch(mutex_);\n    cond_.notify_one();\n  }\n\n protected:\n  //! Count the trailing zeros (32 bits)\n  template <typename T>\n  static inline auto CountTrailingZeros(T val) ->\n      typename std::enable_if<sizeof(T) <= 4, int>::type {\n    return ailego_ctz32(val);\n  }\n\n  //! Count the trailing zeros (64 bits)\n  template <typename T>\n  static inline auto CountTrailingZeros(T val) ->\n      typename std::enable_if<sizeof(T) <= 8 && 4 < sizeof(T), int>::type {\n    return ailego_ctz64(val);\n  }\n\n private:\n  //! Disable them\n  BinarySemaphores(const BinarySemaphores &) = delete;\n  BinarySemaphores(BinarySemaphores &&) = delete;\n  BinarySemaphores &operator=(const BinarySemaphores &) = delete;\n  BinarySemaphores &operator=(BinarySemaphores &&) = delete;\n\n  //! Members\n  uint32_t count_{0};\n  BitwiseType mask_{0};\n  std::atomic<BitwiseType> flags_{0};\n  std::mutex mutex_{};\n  std::condition_variable cond_{};\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/parallel/thread_pool.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/parallel/thread_pool.h>\n\n#if (defined(__linux) || defined(__linux__)) && !defined(__ANDROID__)\n#include <pthread.h>\n\nstatic inline void BindThreads(std::vector<std::thread> &pool) {\n  uint32_t hc = std::thread::hardware_concurrency();\n  if (hc > 1) {\n    cpu_set_t mask;\n\n    for (size_t i = 0u; i < pool.size(); ++i) {\n      CPU_ZERO(&mask);\n      CPU_SET(i % hc, &mask);\n      pthread_setaffinity_np(pool[i].native_handle(), sizeof(mask), &mask);\n    }\n  }\n}\n\nstatic inline void UnbindThreads(std::vector<std::thread> &pool) {\n  cpu_set_t mask;\n  CPU_ZERO(&mask);\n\n  for (size_t i = 0u; i < CPU_SETSIZE; ++i) {\n    CPU_SET(i, &mask);\n  }\n  for (size_t i = 0u; i < pool.size(); ++i) {\n    pthread_setaffinity_np(pool[i].native_handle(), sizeof(mask), &mask);\n  }\n}\n#else\nstatic inline void BindThreads(std::vector<std::thread> &) {}\nstatic inline void UnbindThreads(std::vector<std::thread> &) {}\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\nThreadPool::ThreadPool(uint32_t size, bool binding) {\n  for (uint32_t i = 0u; i < size; ++i) {\n    pool_.emplace_back(&ThreadPool::worker, this);\n  }\n  if (binding) {\n    this->bind();\n  }\n}\n\nvoid ThreadPool::bind(void) {\n  BindThreads(pool_);\n}\n\nvoid ThreadPool::unbind(void) {\n  UnbindThreads(pool_);\n}\n\nvoid ThreadPool::worker(void) {\n  // Counter of workers\n  ++worker_count_;\n\n  ThreadPool::Task task;\n  while (this->picking(&task)) {\n    // Run the task\n    task.handle->run();\n    task.handle = nullptr;\n\n    // Notify task finished\n    if (task.control) {\n      task.control->notify();\n    }\n\n    // Notify task group\n    if (task.group) {\n      task.group->notify();\n      task.group = nullptr;\n    }\n\n    // Decrease count of active works\n    std::lock_guard<std::mutex> lock(wait_mutex_);\n    if (--active_count_ == 0 && pending_count_ == 0) {\n      finished_cond_.notify_all();\n    }\n  }\n\n  // Decrease count of workers\n  std::lock_guard<std::mutex> lock(wait_mutex_);\n  if (--worker_count_ == 0) {\n    stopped_cond_.notify_all();\n  }\n}\n\nbool ThreadPool::picking(ThreadPool::Task *task) {\n  std::unique_lock<std::mutex> latch(queue_mutex_);\n  work_cond_.wait(latch,\n                  [this]() { return (pending_count_ > 0 || stopping_); });\n  if (stopping_) {\n    return false;\n  }\n\n  // Pop a task\n  auto &head = queue_.front();\n  task->control = head.control;\n  task->group = std::move(head.group);\n  task->handle = std::move(head.handle);\n  queue_.pop();\n\n  // Update group control\n  if (task->group) {\n    task->group->mark_task_actived();\n  }\n\n  // Counter of active tasks\n  std::unique_lock<std::mutex> lock(wait_mutex_);\n  ++active_count_;\n  --pending_count_;\n\n  return true;\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/pattern/defer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"scope_guard.h\"\n\n#define AILEGO_DEFER_NAME_(x, y) x##y\n#define AILEGO_DEFER_NAME(x) AILEGO_DEFER_NAME_(__ailegoDefer_, x)\n\n//! Defer operator\n#define AILEGO_DEFER(...) \\\n  auto AILEGO_DEFER_NAME(__LINE__) = ailego::ScopeGuard::Make(__VA_ARGS__)\n"
  },
  {
    "path": "src/ailego/pattern/scope_guard.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/pattern/closure.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Scope Guard Implementation\n */\ntemplate <typename T, typename TFunc>\nclass ScopeGuardImpl {\n public:\n  using Object = CallbackObject<T>;\n  using Functor = CallbackFunctor<TFunc>;\n\n  //! Constructor\n  ScopeGuardImpl(ScopeGuardImpl &&rhs)\n      : obj_(rhs.obj_),\n        impl_(std::move(rhs.impl_)),\n        tuple_(std::move(rhs.tuple_)) {\n    rhs.obj_ = nullptr;\n  }\n\n  //! Constructor\n  template <typename... TArgs>\n  ScopeGuardImpl(typename Object::Type *obj, const typename Functor::Type &impl,\n                 TArgs &&...args)\n      : obj_(obj), impl_(impl), tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Constructor\n  template <typename... TArgs>\n  ScopeGuardImpl(typename Object::Type *obj, typename Functor::Type &&impl,\n                 TArgs &&...args)\n      : obj_(obj),\n        impl_(std::move(impl)),\n        tuple_(std::forward<TArgs>(args)...) {}\n\n  // Destructor\n  ~ScopeGuardImpl(void) {\n    if (obj_) {\n      Functor::Run(obj_, impl_, tuple_);\n    }\n  }\n\n protected:\n  //! Disable them\n  ScopeGuardImpl(void) = delete;\n  ScopeGuardImpl(const ScopeGuardImpl &) = delete;\n  ScopeGuardImpl &operator=(const ScopeGuardImpl &) = delete;\n\n private:\n  //! Members\n  typename Object::Type *obj_;\n  typename Functor::Type impl_;\n  typename Functor::TupleType tuple_;\n};\n\n/*! Scope Guard Implementation (void, TFunc)\n */\ntemplate <typename TFunc>\nclass ScopeGuardImpl<void, TFunc> {\n public:\n  //! Callback Functor Type\n  using Functor = CallbackFunctor<TFunc>;\n\n  //! Constructor\n  ScopeGuardImpl(ScopeGuardImpl &&rhs)\n      : impl_(std::move(rhs.impl_)),\n        tuple_(std::move(rhs.tuple_)),\n        valid_(rhs.valid_) {\n    rhs.valid_ = false;\n  }\n\n  //! Constructor\n  template <typename... TArgs>\n  ScopeGuardImpl(const typename Functor::Type &impl, TArgs &&...args)\n      : impl_(impl), tuple_(std::forward<TArgs>(args)...), valid_(true) {}\n\n  //! Constructor\n  template <typename... TArgs>\n  ScopeGuardImpl(typename Functor::Type &&impl, TArgs &&...args)\n      : impl_(std::move(impl)),\n        tuple_(std::forward<TArgs>(args)...),\n        valid_(true) {}\n\n  // Destructor\n  ~ScopeGuardImpl(void) {\n    if (valid_) {\n      Functor::Run(impl_, tuple_);\n    }\n  }\n\n protected:\n  //! Disable them\n  ScopeGuardImpl(void) = delete;\n  ScopeGuardImpl(const ScopeGuardImpl &) = delete;\n  ScopeGuardImpl &operator=(const ScopeGuardImpl &) = delete;\n\n private:\n  //! Members\n  typename Functor::Type impl_;\n  typename Functor::TupleType tuple_;\n  bool valid_;\n};\n\n/*! Scope Guard\n */\nstruct ScopeGuard {\n  //! Make a scope guard object (member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static inline auto Make(T *obj, R (T::*impl)(TParams...), TArgs &&...args)\n      -> ScopeGuardImpl<T, typename CallbackTraits<decltype(impl)>::Type> {\n    return ScopeGuardImpl<T, typename CallbackTraits<decltype(impl)>::Type>(\n        obj, impl, std::forward<TArgs>(args)...);\n  }\n\n  //! Make a scope guard object (constable member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static inline auto Make(const T *obj, R (T::*impl)(TParams...) const,\n                          TArgs &&...args)\n      -> ScopeGuardImpl<const T,\n                        typename CallbackTraits<decltype(impl)>::Type> {\n    return ScopeGuardImpl<const T,\n                          typename CallbackTraits<decltype(impl)>::Type>(\n        obj, impl, std::forward<TArgs>(args)...);\n  }\n\n  //! Make a scope guard object (volatile member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static inline auto Make(volatile T *obj, R (T::*impl)(TParams...) volatile,\n                          TArgs &&...args)\n      -> ScopeGuardImpl<volatile T,\n                        typename CallbackTraits<decltype(impl)>::Type> {\n    return ScopeGuardImpl<volatile T,\n                          typename CallbackTraits<decltype(impl)>::Type>(\n        obj, impl, std::forward<TArgs>(args)...);\n  }\n\n  //! Make a scope guard object (constable volatile member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static inline auto Make(const volatile T *obj,\n                          R (T::*impl)(TParams...) const volatile,\n                          TArgs &&...args)\n      -> ScopeGuardImpl<const volatile T,\n                        typename CallbackTraits<decltype(impl)>::Type> {\n    return ScopeGuardImpl<const volatile T,\n                          typename CallbackTraits<decltype(impl)>::Type>(\n        obj, impl, std::forward<TArgs>(args)...);\n  }\n\n  //! Make a scope guard object (function)\n  template <\n      typename TFunc, typename... TArgs,\n      typename = typename std::enable_if<CallbackValidator<TFunc>::Value>::type>\n  static inline auto Make(TFunc &&impl, TArgs &&...args)\n      -> ScopeGuardImpl<void, typename CallbackTraits<TFunc>::Type> {\n    return ScopeGuardImpl<void, typename CallbackTraits<TFunc>::Type>(\n        std::forward<TFunc>(impl), std::forward<TArgs>(args)...);\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/bit_string_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <vector>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\n\nnamespace ailego {\n\nclass BitStringWriter {\n public:\n  BitStringWriter(uint8_t *buffer, size_t buffer_size)\n      : buffer_(buffer), buffer_size_(buffer_size), offset_(0) {\n    ::memset(buffer_, 0, buffer_size_);\n  }\n\n  bool write(uint64_t data, int nbit) {\n    if (buffer_size_ * 8 < nbit + offset_) {\n      return false;\n    }\n\n    int bits_remain = 8 - (offset_ & 7);\n\n    if (nbit <= bits_remain) {\n      buffer_[offset_ >> 3] |= data << (offset_ & 7);\n      offset_ += nbit;\n    } else {\n      size_t j = offset_ >> 3;\n      buffer_[j++] |= data << (offset_ & 7);\n      offset_ += nbit;\n      data >>= bits_remain;\n      while (data != 0) {\n        buffer_[j++] |= data;\n        data >>= 8;\n      }\n    }\n\n    return true;\n  }\n\n  size_t offset() {\n    return offset_;\n  }\n\n private:\n  uint8_t *buffer_;\n  size_t buffer_size_;\n  size_t offset_;\n};\n\nclass BitStringReader {\n public:\n  BitStringReader(const uint8_t *buffer, size_t buffer_size)\n      : buffer_(buffer), buffer_size_(buffer_size), offset_(0) {}\n\n  bool read(uint64_t &data, int nbit) {\n    if (buffer_size_ * 8 < nbit + offset_) {\n      return false;\n    }\n\n    int bits_remain = 8 - (offset_ & 7);\n\n    uint64_t result = buffer_[offset_ >> 3] >> (offset_ & 7);\n    if (nbit <= bits_remain) {\n      result &= (1 << nbit) - 1;\n      offset_ += nbit;\n\n      data = result;\n    } else {\n      int temp = bits_remain;\n      size_t i = (offset_ >> 3) + 1;\n      offset_ += nbit;\n      nbit -= bits_remain;\n\n      while (nbit > 8) {\n        result |= ((uint64_t)buffer_[i++]) << temp;\n        temp += 8;\n        nbit -= 8;\n      }\n\n      uint64_t last_byte = buffer_[i];\n\n      last_byte &= (1 << nbit) - 1;\n      result |= last_byte << temp;\n\n      data = result;\n    }\n\n    return true;\n  }\n\n  size_t offset() {\n    return offset_;\n  }\n\n private:\n  const uint8_t *buffer_;\n  size_t buffer_size_;\n  size_t offset_;\n};\n\n}  // namespace ailego\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/bitset_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"bitset_helper.h\"\n#include <zvec/ailego/internal/platform.h>\n\n#ifndef __SSE4_2__\n#define bitset_popcount32 ailego_popcount32\n#define bitset_popcount64 ailego_popcount64\n#else\n#define bitset_popcount32 _mm_popcnt_u32\n#define bitset_popcount64 _mm_popcnt_u64\n#endif  // !__SSE4_2__\n\n#if defined(__ARM_NEON)\nstatic inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    vst1q_u32(lhs, vandq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] &= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= rhs[0];\n  }\n}\n\nstatic inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,\n                                 size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    vst1q_u32(lhs, vbicq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] &= ~rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= ~rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= ~rhs[0];\n  }\n}\n\nstatic inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    vst1q_u32(lhs, vorrq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] |= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] |= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] |= rhs[0];\n  }\n}\n\nstatic inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    vst1q_u32(lhs, veorq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] ^= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] ^= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] ^= rhs[0];\n  }\n}\n\nstatic inline void bitset_not(uint32_t *lhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  static const uint32x4_t v_zero = vdupq_n_u32(0);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    vst1q_u32(lhs, vornq_u32(v_zero, vld1q_u32(lhs)));\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] = ~lhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] = ~lhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] = ~lhs[0];\n  }\n}\n\nstatic inline bool bitset_test_all(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    uint64x2_t vu64 = vld1q_u64((const uint64_t *)lhs);\n    if ((vgetq_lane_u64(vu64, 0) & vgetq_lane_u64(vu64, 1)) != (uint64_t)-1) {\n      return false;\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0xffffffffu) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0xffffffffu) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0xffffffffu) {\n        return false;\n      }\n  }\n  return true;\n}\n\nstatic inline bool bitset_test_any(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    uint64x2_t vu64 = vld1q_u64((const uint64_t *)lhs);\n    if (vgetq_lane_u64(vu64, 0) | vgetq_lane_u64(vu64, 1)) {\n      return true;\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return true;\n      }\n  }\n  return false;\n}\n\nstatic inline bool bitset_test_none(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    uint64x2_t vu64 = vld1q_u64((const uint64_t *)lhs);\n    if (vgetq_lane_u64(vu64, 0) | vgetq_lane_u64(vu64, 1)) {\n      return false;\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return false;\n      }\n  }\n  return true;\n}\n\n#elif defined(__AVX2__)\nstatic inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);\n      _mm256_store_si256((__m256i *)lhs, _mm256_and_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);\n      _mm256_storeu_si256((__m256i *)lhs, _mm256_and_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      lhs[2] &= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= rhs[0];\n  }\n}\n\nstatic inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,\n                                 size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);\n      _mm256_store_si256((__m256i *)lhs, _mm256_andnot_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);\n      _mm256_storeu_si256((__m256i *)lhs, _mm256_andnot_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      lhs[2] &= ~rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= ~rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= ~rhs[0];\n  }\n}\n\nstatic inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);\n      _mm256_store_si256((__m256i *)lhs, _mm256_or_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);\n      _mm256_storeu_si256((__m256i *)lhs, _mm256_or_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      lhs[2] |= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] |= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] |= rhs[0];\n  }\n}\n\nstatic inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);\n      _mm256_store_si256((__m256i *)lhs, _mm256_xor_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);\n      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);\n      _mm256_storeu_si256((__m256i *)lhs, _mm256_xor_si256(ymm1, ymm0));\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));\n      lhs += 4;\n      rhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      lhs[2] ^= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] ^= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] ^= rhs[0];\n  }\n}\n\nstatic inline void bitset_not(uint32_t *lhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  static const __m256i mask_256 = _mm256_set1_epi32(0xffffffffu);\n  static const __m128i mask_128 = _mm_set1_epi32(0xffffffffu);\n\n  if (((uintptr_t)lhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8) {\n      _mm256_store_si256(\n          (__m256i *)lhs,\n          _mm256_andnot_si256(_mm256_load_si256((__m256i *)lhs), mask_256));\n    }\n    if (last >= last_aligned + 4) {\n      _mm_store_si128(\n          (__m128i *)lhs,\n          _mm_andnot_si128(_mm_load_si128((__m128i *)lhs), mask_128));\n      lhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8) {\n      _mm256_storeu_si256(\n          (__m256i *)lhs,\n          _mm256_andnot_si256(_mm256_loadu_si256((__m256i *)lhs), mask_256));\n    }\n    if (last >= last_aligned + 4) {\n      _mm_storeu_si128(\n          (__m128i *)lhs,\n          _mm_andnot_si128(_mm_lddqu_si128((__m128i *)lhs), mask_128));\n      lhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      lhs[2] = ~lhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] = ~lhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] = ~lhs[0];\n  }\n}\n\nstatic inline bool bitset_test_all(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  static const __m256i mask_256 = _mm256_set1_epi32(0xffffffffu);\n  static const __m128i mask_128 = _mm_set1_epi32(0xffffffffu);\n\n  if (((uintptr_t)lhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8) {\n      __m256i neq =\n          _mm256_xor_si256(_mm256_load_si256((__m256i *)lhs), mask_256);\n      if (!_mm256_testz_si256(neq, neq)) {\n        return false;\n      }\n    }\n    if (last >= last_aligned + 4) {\n      __m128i neq = _mm_xor_si128(_mm_load_si128((__m128i *)lhs), mask_128);\n      if (!_mm_testz_si128(neq, neq)) {\n        return false;\n      }\n      lhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8) {\n      __m256i neq =\n          _mm256_xor_si256(_mm256_loadu_si256((__m256i *)lhs), mask_256);\n      if (!_mm256_testz_si256(neq, neq)) {\n        return false;\n      }\n    }\n    if (last >= last_aligned + 4) {\n      __m128i neq = _mm_xor_si128(_mm_lddqu_si128((__m128i *)lhs), mask_128);\n      if (!_mm_testz_si128(neq, neq)) {\n        return false;\n      }\n      lhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      if (lhs[2] != 0xffffffffu) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0xffffffffu) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0xffffffffu) {\n        return false;\n      }\n  }\n  return true;\n}\n\nstatic inline bool bitset_test_any(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  if (((uintptr_t)lhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8) {\n      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);\n      if (!_mm256_testz_si256(ymm0, ymm0)) {\n        return true;\n      }\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return true;\n      }\n      lhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8) {\n      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);\n      if (!_mm256_testz_si256(ymm0, ymm0)) {\n        return true;\n      }\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return true;\n      }\n      lhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return true;\n      }\n  }\n  return false;\n}\n\nstatic inline bool bitset_test_none(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  if (((uintptr_t)lhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 8) {\n      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);\n      if (!_mm256_testz_si256(ymm0, ymm0)) {\n        return false;\n      }\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return false;\n      }\n      lhs += 4;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 8) {\n      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);\n      if (!_mm256_testz_si256(ymm0, ymm0)) {\n        return false;\n      }\n    }\n    if (last >= last_aligned + 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return false;\n      }\n      lhs += 4;\n    }\n  }\n  switch (last - lhs) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return false;\n      }\n  }\n  return true;\n}\n\n#elif defined(__SSE2__)\n#ifndef __SSE3__\n#define _mm_lddqu_si128 _mm_loadu_si128\n#endif  // !__SSE3__\n\nstatic inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] &= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= rhs[0];\n  }\n}\n\nstatic inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,\n                                 size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] &= ~rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= ~rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= ~rhs[0];\n  }\n}\n\nstatic inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] |= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] |= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] |= rhs[0];\n  }\n}\n\nstatic inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);\n      _mm_store_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);\n      _mm_storeu_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] ^= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] ^= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] ^= rhs[0];\n  }\n}\n\nstatic inline void bitset_not(uint32_t *lhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  static const __m128i mask = _mm_set1_epi32(0xffffffffu);\n\n  if (((uintptr_t)lhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4) {\n      _mm_store_si128((__m128i *)lhs,\n                      _mm_andnot_si128(_mm_load_si128((__m128i *)lhs), mask));\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4) {\n      _mm_storeu_si128((__m128i *)lhs,\n                       _mm_andnot_si128(_mm_lddqu_si128((__m128i *)lhs), mask));\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] = ~lhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] = ~lhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] = ~lhs[0];\n  }\n}\n\nstatic inline bool bitset_test_all(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  static const __m128i mask = _mm_set1_epi32(0xffffffffu);\n\n#ifndef __SSE4_1__\n  if (((uintptr_t)lhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i eq = _mm_cmpeq_epi32(_mm_load_si128((__m128i *)lhs), mask);\n      if (_mm_movemask_epi8(eq) != 0xffffu) {\n        return false;\n      }\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i eq = _mm_cmpeq_epi32(_mm_lddqu_si128((__m128i *)lhs), mask);\n      if (_mm_movemask_epi8(eq) != 0xffffu) {\n        return false;\n      }\n    }\n  }\n#else\n  if (((uintptr_t)lhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i neq = _mm_xor_si128(_mm_load_si128((__m128i *)lhs), mask);\n      if (!_mm_testz_si128(neq, neq)) {\n        return false;\n      }\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i neq = _mm_xor_si128(_mm_lddqu_si128((__m128i *)lhs), mask);\n      if (!_mm_testz_si128(neq, neq)) {\n        return false;\n      }\n    }\n  }\n#endif  // !__SSE4_1__\n\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0xffffffffu) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0xffffffffu) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0xffffffffu) {\n        return false;\n      }\n  }\n  return true;\n}\n\nstatic inline bool bitset_test_any(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n#ifndef __SSE4_1__\n  static const __m128i zero = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i eq = _mm_cmpeq_epi32(_mm_load_si128((__m128i *)lhs), zero);\n      if (_mm_movemask_epi8(eq) != 0xffffu) {\n        return true;\n      }\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i eq = _mm_cmpeq_epi32(_mm_lddqu_si128((__m128i *)lhs), zero);\n      if (_mm_movemask_epi8(eq) != 0xffffu) {\n        return true;\n      }\n    }\n  }\n#else\n  if (((uintptr_t)lhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return true;\n      }\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return true;\n      }\n    }\n  }\n#endif  // !__SSE4_1__\n\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return true;\n      }\n  }\n  return false;\n}\n\nstatic inline bool bitset_test_none(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n#ifndef __SSE4_1__\n  static __m128i zero = _mm_setzero_si128();\n\n  if (((uintptr_t)lhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i eq = _mm_cmpeq_epi32(_mm_load_si128((__m128i *)lhs), zero);\n      if (_mm_movemask_epi8(eq) != 0xffffu) {\n        return false;\n      }\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i eq = _mm_cmpeq_epi32(_mm_lddqu_si128((__m128i *)lhs), zero);\n      if (_mm_movemask_epi8(eq) != 0xffffu) {\n        return false;\n      }\n    }\n  }\n#else\n  if (((uintptr_t)lhs & 0xf) == 0) {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return false;\n      }\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 4) {\n      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);\n      if (!_mm_testz_si128(xmm0, xmm0)) {\n        return false;\n      }\n    }\n  }\n#endif  // !__SSE4_1__\n\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return false;\n      }\n  }\n  return true;\n}\n\n#else\n#if defined(AILEGO_M64)\nstatic inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    *(uint64_t *)(&lhs[6]) &= *(uint64_t *)(&rhs[6]);\n    *(uint64_t *)(&lhs[4]) &= *(uint64_t *)(&rhs[4]);\n    *(uint64_t *)(&lhs[2]) &= *(uint64_t *)(&rhs[2]);\n    *(uint64_t *)(&lhs[0]) &= *(uint64_t *)(&rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 7:\n      lhs[6] &= rhs[6];\n      /* FALLTHRU */\n    case 6:\n      lhs[5] &= rhs[5];\n      /* FALLTHRU */\n    case 5:\n      lhs[4] &= rhs[4];\n      /* FALLTHRU */\n    case 4:\n      lhs[3] &= rhs[3];\n      /* FALLTHRU */\n    case 3:\n      lhs[2] &= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= rhs[0];\n  }\n}\n\nstatic inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,\n                                 size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    *(uint64_t *)(&lhs[6]) &= ~(*(uint64_t *)(&rhs[6]));\n    *(uint64_t *)(&lhs[4]) &= ~(*(uint64_t *)(&rhs[4]));\n    *(uint64_t *)(&lhs[2]) &= ~(*(uint64_t *)(&rhs[2]));\n    *(uint64_t *)(&lhs[0]) &= ~(*(uint64_t *)(&rhs[0]));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      lhs[6] &= ~rhs[6];\n      /* FALLTHRU */\n    case 6:\n      lhs[5] &= ~rhs[5];\n      /* FALLTHRU */\n    case 5:\n      lhs[4] &= ~rhs[4];\n      /* FALLTHRU */\n    case 4:\n      lhs[3] &= ~rhs[3];\n      /* FALLTHRU */\n    case 3:\n      lhs[2] &= ~rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= ~rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= ~rhs[0];\n  }\n}\n\nstatic inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    *(uint64_t *)(&lhs[6]) |= *(uint64_t *)(&rhs[6]);\n    *(uint64_t *)(&lhs[4]) |= *(uint64_t *)(&rhs[4]);\n    *(uint64_t *)(&lhs[2]) |= *(uint64_t *)(&rhs[2]);\n    *(uint64_t *)(&lhs[0]) |= *(uint64_t *)(&rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 7:\n      lhs[6] |= rhs[6];\n      /* FALLTHRU */\n    case 6:\n      lhs[5] |= rhs[5];\n      /* FALLTHRU */\n    case 5:\n      lhs[4] |= rhs[4];\n      /* FALLTHRU */\n    case 4:\n      lhs[3] |= rhs[3];\n      /* FALLTHRU */\n    case 3:\n      lhs[2] |= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] |= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] |= rhs[0];\n  }\n}\n\nstatic inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    *(uint64_t *)(&lhs[6]) ^= *(uint64_t *)(&rhs[6]);\n    *(uint64_t *)(&lhs[4]) ^= *(uint64_t *)(&rhs[4]);\n    *(uint64_t *)(&lhs[2]) ^= *(uint64_t *)(&rhs[2]);\n    *(uint64_t *)(&lhs[0]) ^= *(uint64_t *)(&rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 7:\n      lhs[6] ^= rhs[6];\n      /* FALLTHRU */\n    case 6:\n      lhs[5] ^= rhs[5];\n      /* FALLTHRU */\n    case 5:\n      lhs[4] ^= rhs[4];\n      /* FALLTHRU */\n    case 4:\n      lhs[3] ^= rhs[3];\n      /* FALLTHRU */\n    case 3:\n      lhs[2] ^= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] ^= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] ^= rhs[0];\n  }\n}\n\nstatic inline void bitset_not(uint32_t *lhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8) {\n    *(uint64_t *)(&lhs[6]) = ~(*(uint64_t *)(&lhs[6]));\n    *(uint64_t *)(&lhs[4]) = ~(*(uint64_t *)(&lhs[4]));\n    *(uint64_t *)(&lhs[2]) = ~(*(uint64_t *)(&lhs[2]));\n    *(uint64_t *)(&lhs[0]) = ~(*(uint64_t *)(&lhs[0]));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      lhs[6] = ~lhs[6];\n      /* FALLTHRU */\n    case 6:\n      lhs[5] = ~lhs[5];\n      /* FALLTHRU */\n    case 5:\n      lhs[4] = ~lhs[4];\n      /* FALLTHRU */\n    case 4:\n      lhs[3] = ~lhs[3];\n      /* FALLTHRU */\n    case 3:\n      lhs[2] = ~lhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] = ~lhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] = ~lhs[0];\n  }\n}\n\nstatic inline bool bitset_test_all(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8) {\n    if (*(uint64_t *)(&lhs[6]) != (uint64_t)-1) {\n      return false;\n    }\n    if (*(uint64_t *)(&lhs[4]) != (uint64_t)-1) {\n      return false;\n    }\n    if (*(uint64_t *)(&lhs[2]) != (uint64_t)-1) {\n      return false;\n    }\n    if (*(uint64_t *)(&lhs[0]) != (uint64_t)-1) {\n      return false;\n    }\n  }\n  switch (last - last_aligned) {\n    case 7:\n      if (lhs[6] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 6:\n      if (lhs[5] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 5:\n      if (lhs[4] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 4:\n      if (lhs[3] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 3:\n      if (lhs[2] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != (uint32_t)-1) {\n        return false;\n      }\n  }\n  return true;\n}\n\nstatic inline bool bitset_test_any(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8) {\n    if (*(uint64_t *)(&lhs[6]) != 0u) {\n      return true;\n    }\n    if (*(uint64_t *)(&lhs[4]) != 0u) {\n      return true;\n    }\n    if (*(uint64_t *)(&lhs[2]) != 0u) {\n      return true;\n    }\n    if (*(uint64_t *)(&lhs[0]) != 0u) {\n      return true;\n    }\n  }\n  switch (last - last_aligned) {\n    case 7:\n      if (lhs[6] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 6:\n      if (lhs[5] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 5:\n      if (lhs[4] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 4:\n      if (lhs[3] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 3:\n      if (lhs[2] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return true;\n      }\n  }\n  return false;\n}\n\nstatic inline bool bitset_test_none(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n\n  for (; lhs != last_aligned; lhs += 8) {\n    if (*(uint64_t *)(&lhs[6]) != 0u) {\n      return false;\n    }\n    if (*(uint64_t *)(&lhs[4]) != 0u) {\n      return false;\n    }\n    if (*(uint64_t *)(&lhs[2]) != 0u) {\n      return false;\n    }\n    if (*(uint64_t *)(&lhs[0]) != 0u) {\n      return false;\n    }\n  }\n  switch (last - last_aligned) {\n    case 7:\n      if (lhs[6] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 6:\n      if (lhs[5] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 5:\n      if (lhs[4] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 4:\n      if (lhs[3] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 3:\n      if (lhs[2] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return false;\n      }\n  }\n  return true;\n}\n\n#else   // AILEGO_M64\nstatic inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    lhs[3] &= rhs[3];\n    lhs[2] &= rhs[2];\n    lhs[1] &= rhs[1];\n    lhs[0] &= rhs[0];\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] &= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= rhs[0];\n  }\n}\n\nstatic inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,\n                                 size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    lhs[3] &= ~rhs[3];\n    lhs[2] &= ~rhs[2];\n    lhs[1] &= ~rhs[1];\n    lhs[0] &= ~rhs[0];\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] &= ~rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] &= ~rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] &= ~rhs[0];\n  }\n}\n\nstatic inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    lhs[3] |= rhs[3];\n    lhs[2] |= rhs[2];\n    lhs[1] |= rhs[1];\n    lhs[0] |= rhs[0];\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] |= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] |= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] |= rhs[0];\n  }\n}\n\nstatic inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    lhs[3] ^= rhs[3];\n    lhs[2] ^= rhs[2];\n    lhs[1] ^= rhs[1];\n    lhs[0] ^= rhs[0];\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] ^= rhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] ^= rhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] ^= rhs[0];\n  }\n}\n\nstatic inline void bitset_not(uint32_t *lhs, size_t size) {\n  uint32_t *last = lhs + size;\n  uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    lhs[3] = ~lhs[3];\n    lhs[2] = ~lhs[2];\n    lhs[1] = ~lhs[1];\n    lhs[0] = ~lhs[0];\n  }\n  switch (last - last_aligned) {\n    case 3:\n      lhs[2] = ~lhs[2];\n      /* FALLTHRU */\n    case 2:\n      lhs[1] = ~lhs[1];\n      /* FALLTHRU */\n    case 1:\n      lhs[0] = ~lhs[0];\n  }\n}\n\nstatic inline bool bitset_test_all(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    if (lhs[3] != (uint32_t)-1) {\n      return false;\n    }\n    if (lhs[2] != (uint32_t)-1) {\n      return false;\n    }\n    if (lhs[1] != (uint32_t)-1) {\n      return false;\n    }\n    if (lhs[0] != (uint32_t)-1) {\n      return false;\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != (uint32_t)-1) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != (uint32_t)-1) {\n        return false;\n      }\n  }\n  return true;\n}\n\nstatic inline bool bitset_test_any(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    if (lhs[3] != 0u) {\n      return true;\n    }\n    if (lhs[2] != 0u) {\n      return true;\n    }\n    if (lhs[1] != 0u) {\n      return true;\n    }\n    if (lhs[0] != 0u) {\n      return true;\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return true;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return true;\n      }\n  }\n  return false;\n}\n\nstatic inline bool bitset_test_none(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n\n  for (; lhs != last_aligned; lhs += 4) {\n    if (lhs[3] != 0u) {\n      return false;\n    }\n    if (lhs[2] != 0u) {\n      return false;\n    }\n    if (lhs[1] != 0u) {\n      return false;\n    }\n    if (lhs[0] != 0u) {\n      return false;\n    }\n  }\n  switch (last - last_aligned) {\n    case 3:\n      if (lhs[2] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 2:\n      if (lhs[1] != 0u) {\n        return false;\n      }\n      /* FALLTHRU */\n    case 1:\n      if (lhs[0] != 0u) {\n        return false;\n      }\n  }\n  return true;\n}\n#endif  // AILEGO_M64\n#endif  // __AVX2__\n\n#if (defined(__ARM_NEON) && defined(__aarch64__))\nstatic inline size_t bitset_cardinality(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  while (lhs != last_aligned) {\n    const uint32_t *last_stage =\n        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;\n\n    uint8x16_t v_count = vdupq_n_u8(0);\n    for (; lhs != last_stage; lhs += 4) {\n      v_count = vaddq_u8(vcntq_u8(vld1q_u8((const uint8_t *)lhs)), v_count);\n    }\n\n    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));\n    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));\n  }\n\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_xor_cardinality(const uint32_t *lhs,\n                                            const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  while (lhs != last_aligned) {\n    const uint32_t *last_stage =\n        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;\n\n    uint8x16_t v_count = vdupq_n_u8(0);\n    for (; lhs != last_stage; lhs += 4, rhs += 4) {\n      v_count = vaddq_u8(vcntq_u8(veorq_u8(vld1q_u8((const uint8_t *)lhs),\n                                           vld1q_u8((const uint8_t *)rhs))),\n                         v_count);\n    }\n\n    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));\n    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));\n  }\n\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] ^ rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] ^ rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] ^ rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_and_cardinality(const uint32_t *lhs,\n                                            const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  while (lhs != last_aligned) {\n    const uint32_t *last_stage =\n        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;\n\n    uint8x16_t v_count = vdupq_n_u8(0);\n    for (; lhs != last_stage; lhs += 4, rhs += 4) {\n      v_count = vaddq_u8(vcntq_u8(vandq_u8(vld1q_u8((const uint8_t *)lhs),\n                                           vld1q_u8((const uint8_t *)rhs))),\n                         v_count);\n    }\n\n    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));\n    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));\n  }\n\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] & rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] & rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] & rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_andnot_cardinality(const uint32_t *lhs,\n                                               const uint32_t *rhs,\n                                               size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  while (lhs != last_aligned) {\n    const uint32_t *last_stage =\n        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;\n\n    uint8x16_t v_count = vdupq_n_u8(0);\n    for (; lhs != last_stage; lhs += 4, rhs += 4) {\n      v_count = vaddq_u8(vcntq_u8(vbicq_u8(vld1q_u8((const uint8_t *)lhs),\n                                           vld1q_u8((const uint8_t *)rhs))),\n                         v_count);\n    }\n\n    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));\n    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));\n  }\n\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] & ~rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] & ~rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] & ~rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_or_cardinality(const uint32_t *lhs,\n                                           const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  while (lhs != last_aligned) {\n    const uint32_t *last_stage =\n        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;\n\n    uint8x16_t v_count = vdupq_n_u8(0);\n    for (; lhs != last_stage; lhs += 4, rhs += 4) {\n      v_count = vaddq_u8(vcntq_u8(vorrq_u8(vld1q_u8((const uint8_t *)lhs),\n                                           vld1q_u8((const uint8_t *)rhs))),\n                         v_count);\n    }\n\n    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));\n    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));\n  }\n\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] | rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] | rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] | rhs[0]);\n  }\n  return count;\n}\n\n#elif defined(AILEGO_M64)\nstatic inline size_t bitset_cardinality(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 8) {\n    count += bitset_popcount64(*(uint64_t *)(&lhs[6]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[4]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[2]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[0]));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      count += bitset_popcount32(lhs[6]);\n      /* FALLTHRU */\n    case 6:\n      count += bitset_popcount32(lhs[5]);\n      /* FALLTHRU */\n    case 5:\n      count += bitset_popcount32(lhs[4]);\n      /* FALLTHRU */\n    case 4:\n      count += bitset_popcount32(lhs[3]);\n      /* FALLTHRU */\n    case 3:\n      count += bitset_popcount32(lhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_xor_cardinality(const uint32_t *lhs,\n                                            const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    count += bitset_popcount64(*(uint64_t *)(&lhs[6]) ^ *(uint64_t *)(&rhs[6]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[4]) ^ *(uint64_t *)(&rhs[4]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[2]) ^ *(uint64_t *)(&rhs[2]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[0]) ^ *(uint64_t *)(&rhs[0]));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      count += bitset_popcount32(lhs[6] ^ rhs[6]);\n      /* FALLTHRU */\n    case 6:\n      count += bitset_popcount32(lhs[5] ^ rhs[5]);\n      /* FALLTHRU */\n    case 5:\n      count += bitset_popcount32(lhs[4] ^ rhs[4]);\n      /* FALLTHRU */\n    case 4:\n      count += bitset_popcount32(lhs[3] ^ rhs[3]);\n      /* FALLTHRU */\n    case 3:\n      count += bitset_popcount32(lhs[2] ^ rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] ^ rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] ^ rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_and_cardinality(const uint32_t *lhs,\n                                            const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    count += bitset_popcount64(*(uint64_t *)(&lhs[6]) & *(uint64_t *)(&rhs[6]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[4]) & *(uint64_t *)(&rhs[4]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[2]) & *(uint64_t *)(&rhs[2]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[0]) & *(uint64_t *)(&rhs[0]));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      count += bitset_popcount32(lhs[6] & rhs[6]);\n      /* FALLTHRU */\n    case 6:\n      count += bitset_popcount32(lhs[5] & rhs[5]);\n      /* FALLTHRU */\n    case 5:\n      count += bitset_popcount32(lhs[4] & rhs[4]);\n      /* FALLTHRU */\n    case 4:\n      count += bitset_popcount32(lhs[3] & rhs[3]);\n      /* FALLTHRU */\n    case 3:\n      count += bitset_popcount32(lhs[2] & rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] & rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] & rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_andnot_cardinality(const uint32_t *lhs,\n                                               const uint32_t *rhs,\n                                               size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    count +=\n        bitset_popcount64(*(uint64_t *)(&lhs[6]) & ~(*(uint64_t *)(&rhs[6])));\n    count +=\n        bitset_popcount64(*(uint64_t *)(&lhs[4]) & ~(*(uint64_t *)(&rhs[4])));\n    count +=\n        bitset_popcount64(*(uint64_t *)(&lhs[2]) & ~(*(uint64_t *)(&rhs[2])));\n    count +=\n        bitset_popcount64(*(uint64_t *)(&lhs[0]) & ~(*(uint64_t *)(&rhs[0])));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      count += bitset_popcount32(lhs[6] & ~rhs[6]);\n      /* FALLTHRU */\n    case 6:\n      count += bitset_popcount32(lhs[5] & ~rhs[5]);\n      /* FALLTHRU */\n    case 5:\n      count += bitset_popcount32(lhs[4] & ~rhs[4]);\n      /* FALLTHRU */\n    case 4:\n      count += bitset_popcount32(lhs[3] & ~rhs[3]);\n      /* FALLTHRU */\n    case 3:\n      count += bitset_popcount32(lhs[2] & ~rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] & ~rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] & ~rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_or_cardinality(const uint32_t *lhs,\n                                           const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 8, rhs += 8) {\n    count += bitset_popcount64(*(uint64_t *)(&lhs[6]) | *(uint64_t *)(&rhs[6]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[4]) | *(uint64_t *)(&rhs[4]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[2]) | *(uint64_t *)(&rhs[2]));\n    count += bitset_popcount64(*(uint64_t *)(&lhs[0]) | *(uint64_t *)(&rhs[0]));\n  }\n  switch (last - last_aligned) {\n    case 7:\n      count += bitset_popcount32(lhs[6] | rhs[6]);\n      /* FALLTHRU */\n    case 6:\n      count += bitset_popcount32(lhs[5] | rhs[5]);\n      /* FALLTHRU */\n    case 5:\n      count += bitset_popcount32(lhs[4] | rhs[4]);\n      /* FALLTHRU */\n    case 4:\n      count += bitset_popcount32(lhs[3] | rhs[3]);\n      /* FALLTHRU */\n    case 3:\n      count += bitset_popcount32(lhs[2] | rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] | rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] | rhs[0]);\n  }\n  return count;\n}\n\n#else   // !__ARM_NEON && !AILEGO_M64\nstatic inline size_t bitset_cardinality(const uint32_t *lhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 4) {\n    count += bitset_popcount32(lhs[3]);\n    count += bitset_popcount32(lhs[2]);\n    count += bitset_popcount32(lhs[1]);\n    count += bitset_popcount32(lhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_xor_cardinality(const uint32_t *lhs,\n                                            const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    count += bitset_popcount32(lhs[3] ^ rhs[3]);\n    count += bitset_popcount32(lhs[2] ^ rhs[2]);\n    count += bitset_popcount32(lhs[1] ^ rhs[1]);\n    count += bitset_popcount32(lhs[0] ^ rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] ^ rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] ^ rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] ^ rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_and_cardinality(const uint32_t *lhs,\n                                            const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    count += bitset_popcount32(lhs[3] & rhs[3]);\n    count += bitset_popcount32(lhs[2] & rhs[2]);\n    count += bitset_popcount32(lhs[1] & rhs[1]);\n    count += bitset_popcount32(lhs[0] & rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] & rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] & rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] & rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_andnot_cardinality(const uint32_t *lhs,\n                                               const uint32_t *rhs,\n                                               size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    count += bitset_popcount32(lhs[3] & ~rhs[3]);\n    count += bitset_popcount32(lhs[2] & ~rhs[2]);\n    count += bitset_popcount32(lhs[1] & ~rhs[1]);\n    count += bitset_popcount32(lhs[0] & ~rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] & ~rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] & ~rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] & ~rhs[0]);\n  }\n  return count;\n}\n\nstatic inline size_t bitset_or_cardinality(const uint32_t *lhs,\n                                           const uint32_t *rhs, size_t size) {\n  const uint32_t *last = lhs + size;\n  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);\n  size_t count = 0;\n\n  for (; lhs != last_aligned; lhs += 4, rhs += 4) {\n    count += bitset_popcount32(lhs[3] | rhs[3]);\n    count += bitset_popcount32(lhs[2] | rhs[2]);\n    count += bitset_popcount32(lhs[1] | rhs[1]);\n    count += bitset_popcount32(lhs[0] | rhs[0]);\n  }\n  switch (last - last_aligned) {\n    case 3:\n      count += bitset_popcount32(lhs[2] | rhs[2]);\n      /* FALLTHRU */\n    case 2:\n      count += bitset_popcount32(lhs[1] | rhs[1]);\n      /* FALLTHRU */\n    case 1:\n      count += bitset_popcount32(lhs[0] | rhs[0]);\n  }\n  return count;\n}\n#endif  // __ARM_NEON && __aarch64__\n\nnamespace zvec {\n\nnamespace ailego {\n\nvoid BitsetHelper::BitwiseAnd(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  bitset_and(lhs, rhs, size);\n}\n\nvoid BitsetHelper::BitwiseAndnot(uint32_t *lhs, const uint32_t *rhs,\n                                 size_t size) {\n  bitset_andnot(lhs, rhs, size);\n}\n\nvoid BitsetHelper::BitwiseOr(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  bitset_or(lhs, rhs, size);\n}\n\nvoid BitsetHelper::BitwiseXor(uint32_t *lhs, const uint32_t *rhs, size_t size) {\n  bitset_xor(lhs, rhs, size);\n}\n\nvoid BitsetHelper::BitwiseNot(uint32_t *arr, size_t size) {\n  bitset_not(arr, size);\n}\n\nbool BitsetHelper::TestAll(const uint32_t *arr, size_t size) {\n  return bitset_test_all(arr, size);\n}\n\nbool BitsetHelper::TestAny(const uint32_t *arr, size_t size) {\n  return bitset_test_any(arr, size);\n}\n\nbool BitsetHelper::TestNone(const uint32_t *arr, size_t size) {\n  return bitset_test_none(arr, size);\n}\n\nsize_t BitsetHelper::BitwiseAndCardinality(const uint32_t *lhs,\n                                           const uint32_t *rhs, size_t size) {\n  return bitset_and_cardinality(lhs, rhs, size);\n}\n\nsize_t BitsetHelper::BitwiseOrCardinality(const uint32_t *lhs,\n                                          const uint32_t *rhs, size_t size) {\n  return bitset_or_cardinality(lhs, rhs, size);\n}\n\nsize_t BitsetHelper::BitwiseAndnotCardinality(const uint32_t *lhs,\n                                              const uint32_t *rhs,\n                                              size_t size) {\n  return bitset_andnot_cardinality(lhs, rhs, size);\n}\n\nsize_t BitsetHelper::BitwiseXorCardinality(const uint32_t *lhs,\n                                           const uint32_t *rhs, size_t size) {\n  return bitset_xor_cardinality(lhs, rhs, size);\n}\n\nsize_t BitsetHelper::Cardinality(const uint32_t *arr, size_t size) {\n  return bitset_cardinality(arr, size);\n}\n\nbool BitsetHelper::test_all(void) const {\n  return bitset_test_all(array_, size_);\n}\n\nbool BitsetHelper::test_any(void) const {\n  return bitset_test_any(array_, size_);\n}\n\nbool BitsetHelper::test_none(void) const {\n  return bitset_test_none(array_, size_);\n}\n\nsize_t BitsetHelper::cardinality(void) const {\n  return bitset_cardinality(array_, size_);\n}\n\n}  // namespace ailego\n\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/utility/bitset_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <vector>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\n\nnamespace ailego {\n\n/*! Bitset Helper\n */\nclass BitsetHelper {\n public:\n  //! Constructor\n  BitsetHelper(void) {}\n\n  //! Constructor\n  BitsetHelper(void *buf, size_t len)\n      : array_(reinterpret_cast<uint32_t *>(buf)),\n        size_(len / sizeof(uint32_t)) {}\n\n  //! Mount a buffer as bitset\n  void mount(void *buf, size_t len) {\n    array_ = reinterpret_cast<uint32_t *>(buf);\n    size_ = len / sizeof(uint32_t);\n  }\n\n  //! Umount the buffer\n  void umount(void) {\n    array_ = nullptr;\n    size_ = 0u;\n  }\n\n  // ！Clear the bitset\n  void clear(void) {\n    memset(array_, 0, sizeof(uint32_t) * size_);\n  }\n\n  //! Test a bit in bitset\n  bool test(size_t num) const {\n    ailego_assert_with((size_ << 5) > num, \"overflow argument\");\n    return ((array_[num >> 5] & (1u << (num & 0x1f))) != 0);\n  }\n\n  //! Set a bit in bitset\n  void set(size_t num) {\n    ailego_assert_with((size_ << 5) > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] |= mask;\n  }\n\n  //! Reset a bit in bitset\n  void reset(size_t num) {\n    ailego_assert_with((size_ << 5) > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] &= ~mask;\n  }\n\n  //! Toggle a bit in bitset\n  void flip(size_t num) {\n    ailego_assert_with((size_ << 5) > num, \"overflow argument\");\n    uint32_t mask = (1u << (num & 0x1f));\n    array_[num >> 5] ^= mask;\n  }\n\n  //! Extract the bitset to an array\n  void extract(size_t base, std::vector<size_t> *out) const {\n    const uint32_t *iter = array_;\n    const uint32_t *last = array_ + size_;\n\n    for (; iter != last; ++iter) {\n      uint32_t w = *iter;\n\n      while (w != 0) {\n        uint32_t c = ailego_ctz32(w);\n        w &= ~(1u << c);\n        out->push_back(base + c);\n      }\n      base += 32u;\n    }\n  }\n\n  //! Extract the bitset to an array\n  void extract(std::vector<size_t> *out) const {\n    this->extract(0, out);\n  }\n\n  //! Check if all bits are set to true\n  bool test_all(void) const;\n\n  //! Check if any bits are set to true\n  bool test_any(void) const;\n\n  //! Check if none of the bits are set to true\n  bool test_none(void) const;\n\n  //! Compute the cardinality of a bitset\n  size_t cardinality(void) const;\n\n  //! Calculate the size of buffer if it contains N bits\n  static size_t BufferSize(size_t N) {\n    return (((N + 0x1f) >> 5) << 2);\n  }\n\n  //! Calculate the count of bits can be contained\n  static size_t BitsCount(size_t len) {\n    return ((len >> 2) << 2);\n  }\n\n  //! Check if all bits are set to true\n  static bool TestAll(const uint32_t *arr, size_t size);\n\n  //! Check if cube bits are set to true\n  static bool TestAny(const uint32_t *arr, size_t size);\n\n  //! Check if none of the bits are set to true\n  static bool TestNone(const uint32_t *arr, size_t size);\n\n  //! Compute the AND cardinality between two bitsets\n  static size_t BitwiseAndCardinality(const uint32_t *lhs, const uint32_t *rhs,\n                                      size_t size);\n\n  //! Compute the OR cardinality between two bitsets\n  static size_t BitwiseOrCardinality(const uint32_t *lhs, const uint32_t *rhs,\n                                     size_t size);\n\n  //! Compute the ANDNOT cardinality between two bitsets\n  static size_t BitwiseAndnotCardinality(const uint32_t *lhs,\n                                         const uint32_t *rhs, size_t size);\n\n  //! Compute the XOR cardinality between two bitsets\n  static size_t BitwiseXorCardinality(const uint32_t *lhs, const uint32_t *rhs,\n                                      size_t size);\n\n  //! Compute the cardinality of a bitset\n  static size_t Cardinality(const uint32_t *arr, size_t size);\n\n  //! Perform binary AND\n  static void BitwiseAnd(uint32_t *lhs, const uint32_t *rhs, size_t size);\n\n  //! Perform binary AND_NOT\n  static void BitwiseAndnot(uint32_t *lhs, const uint32_t *rhs, size_t size);\n\n  //! Perform binary OR\n  static void BitwiseOr(uint32_t *lhs, const uint32_t *rhs, size_t size);\n\n  //! Perform binary XOR\n  static void BitwiseXor(uint32_t *lhs, const uint32_t *rhs, size_t size);\n\n  //! Perform binary NOT\n  static void BitwiseNot(uint32_t *arr, size_t size);\n\n private:\n  uint32_t *array_{nullptr};\n  size_t size_{0u};\n};\n\n}  // namespace ailego\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/concurrency_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"concurrency_helper.h\"\n#include <fstream>\n#include <iostream>\n#include <thread>\n#include <zvec/ailego/utility/file_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n// Refer to:\n// https://stackoverflow.com/questions/65551215/get-docker-cpu-memory-limit-inside-container\nConcurrencyHelper::ConcurrencyHelper() {\n  std::string cfs_quota_us = \"/sys/fs/cgroup/cpu/cpu.cfs_quota_us\";\n  std::string cfs_period_us = \"/sys/fs/cgroup/cpu/cpu.cfs_period_us\";\n\n  concurrency_ = std::thread::hardware_concurrency();\n  if (FileHelper::IsExist(cfs_quota_us.c_str()) &&\n      FileHelper::IsExist(cfs_period_us.c_str())) {\n    std::ifstream quota_ifs;\n    std::string quota_str{\"\"};\n    uint32_t quota_val = 0;\n    quota_ifs.open(cfs_quota_us, std::ios::in);\n    if (quota_ifs.is_open()) {\n      quota_ifs >> quota_str;\n      if (quota_str != \"-1\") {\n        StringHelper::ToUint32(quota_str, &quota_val);\n      }\n      quota_ifs.close();\n    }\n\n    if (quota_val > 0) {\n      std::ifstream period_ifs;\n      std::string period_str{\"\"};\n      uint32_t period_val = 0;\n      period_ifs.open(cfs_period_us, std::ios::in);\n      if (period_ifs.is_open()) {\n        period_ifs >> period_str;\n        StringHelper::ToUint32(period_str, &period_val);\n        period_ifs.close();\n      }\n\n      if (period_val > 0) {\n        concurrency_ = (quota_val + period_val - 1) / period_val;\n      }\n    }\n  }\n}\n\nuint32_t ConcurrencyHelper::container_aware_concurrency() {\n  static ConcurrencyHelper concurrency_helper;\n  return concurrency_helper.concurrency_;\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/concurrency_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n\nnamespace zvec {\nnamespace ailego {\n\nclass ConcurrencyHelper {\n public:\n  ConcurrencyHelper();\n\n  //! get hardware concurrency from either vm or container\n  static uint32_t container_aware_concurrency();\n\n private:\n  uint32_t concurrency_{0};\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/utility/dl_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"dl_helper.h\"\n#if !defined(_WIN64) && !defined(_WIN32)\n#include <dlfcn.h>\n#else\n#include <Windows.h>\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\n#if !defined(_WIN64) && !defined(_WIN32)\nvoid *DLHelper::Load(const char *path, std::string *err) {\n  void *handle = dlopen(path, RTLD_NOW);\n  if (!handle && err) {\n    *err = dlerror();\n  }\n  return handle;\n}\n\nvoid DLHelper::Unload(void *handle) {\n  ailego_return_if_false(handle);\n  dlclose(handle);\n}\n\nvoid *DLHelper::Symbol(void *handle, const char *symbol) {\n  ailego_null_if_false(handle && symbol);\n  return dlsym(handle, symbol);\n}\n\n#else\nvoid *DLHelper::Load(const char *path, std::string *err) {\n  HMODULE handle = LoadLibraryA(path);\n  if (!handle && err) {\n    DWORD error_code = GetLastError();\n    LPSTR error_msg = nullptr;\n\n    DWORD len = FormatMessageA(\n        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |\n            FORMAT_MESSAGE_IGNORE_INSERTS,\n        nullptr, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),\n        (LPSTR)&error_msg, 0, nullptr);\n    err->assign(error_msg, len);\n    LocalFree(error_msg);\n  }\n  return handle;\n}\n\nvoid DLHelper::Unload(void *handle) {\n  ailego_return_if_false(handle);\n  FreeLibrary((HMODULE)handle);\n}\n\nvoid *DLHelper::Symbol(void *handle, const char *symbol) {\n  ailego_null_if_false(handle && symbol);\n  return GetProcAddress((HMODULE)handle, symbol);\n}\n#endif  // !_WIN64 && !_WIN32\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/utility/dl_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Dynamic Library Helper\n */\nstruct DLHelper {\n  //! Load library from path\n  static void *Load(const char *path, std::string *err);\n\n  //! Unload a library\n  static void Unload(void *handle);\n\n  //! Retrieve a symbol from a library handle\n  static void *Symbol(void *handle, const char *symbol);\n\n  //! Load library from path\n  static void *Load(const std::string &path, std::string *err) {\n    return DLHelper::Load(path.c_str(), err);\n  }\n\n  //! Retrieve a symbol from a library handle\n  static void *Symbol(void *handle, const std::string &symbol) {\n    return DLHelper::Symbol(handle, symbol.c_str());\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/file_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/utility/file_helper.h>\n\n#if defined(_WIN32) || defined(_WIN64)\n#include <Windows.h>\n#else\n#if defined(__APPLE__) || defined(__MACH__)\n#include <mach-o/dyld.h>\n#endif\n#include <sys/stat.h>\n#include <dirent.h>\n#include <errno.h>\n#include <fcntl.h>\n#include <string.h>\n#include <unistd.h>\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\nbool FileHelper::GetSelfPath(std::string *path) {\n#if defined(_WIN32) || defined(_WIN64)\n  char buf[MAX_PATH];\n  DWORD len = GetModuleFileNameA(NULL, buf, MAX_PATH);\n#elif defined(__APPLE__) || defined(__MACH__)\n  char buf[PATH_MAX];\n  size_t len = 0;\n\n  char dirty_buf[PATH_MAX];\n  uint32_t size = sizeof(dirty_buf);\n  if (_NSGetExecutablePath(dirty_buf, &size) == 0) {\n    realpath(dirty_buf, buf);\n    len = strlen(buf);\n  }\n#elif defined(__FreeBSD__)\n  char buf[PATH_MAX];\n  size_t len = PATH_MAX;\n  int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};\n  if (sysctl(mib, 4, &buf, &len, NULL, 0) != 0) {\n    len = 0;\n  }\n#else\n  char buf[PATH_MAX];\n  ssize_t len = readlink(\"/proc/self/exe\", buf, PATH_MAX);\n#endif\n\n  if (len <= 0) {\n    return false;\n  }\n  path->assign(buf, len);\n  return true;\n}\n\nbool FileHelper::GetFilePath(NativeHandle handle, std::string *path) {\n#if defined(_WIN32) || defined(_WIN64)\n  char buf[MAX_PATH];\n  DWORD len =\n      GetFinalPathNameByHandleA(handle, buf, MAX_PATH, FILE_NAME_OPENED);\n#elif defined(__linux) || defined(__linux__)\n  char buf[PATH_MAX];\n  char src[32];\n  snprintf(src, sizeof(src), \"/proc/self/fd/%d\", handle);\n  ssize_t len = readlink(src, buf, PATH_MAX);\n#else\n  char buf[PATH_MAX];\n  size_t len = 0;\n  if (fcntl(handle, F_GETPATH, buf) != -1) {\n    len = strlen(buf);\n  }\n#endif\n\n  if (len <= 0) {\n    return false;\n  }\n  path->assign(buf, len);\n  return true;\n}\n\n#if !defined(_WIN32) && !defined(_WIN64)\n\nstatic inline char *JoinFilePath(const char *prefix, const char *suffix) {\n  size_t prefix_len = strlen(prefix);\n  size_t suffix_len = strlen(suffix);\n\n  char *path = (char *)malloc(prefix_len + suffix_len + 2);\n  if (path) {\n    memcpy(path, prefix, prefix_len);\n    memcpy(path + prefix_len + 1, suffix, suffix_len);\n    path[prefix_len] = '/';\n    path[prefix_len + suffix_len + 1] = '\\0';\n  }\n  return path;\n}\n\nbool FileHelper::GetWorkingDirectory(std::string *path) {\n  char buf[PATH_MAX];\n\n  if (!getcwd(buf, PATH_MAX)) {\n    return false;\n  }\n  path->assign(buf);\n  return !path->empty();\n}\n\nbool FileHelper::GetFileSize(const char *path, size_t *psz) {\n  struct stat buf;\n  if (stat(path, &buf) != 0) {\n    return false;\n  }\n  *psz = buf.st_size;\n  return true;\n}\n\nbool FileHelper::DeleteFile(const char *path) {\n  // Delete a file by the path\n  return (unlink(path) == 0);\n}\n\nbool FileHelper::RenameFile(const char *oldpath, const char *newpath) {\n  return (rename(oldpath, newpath) == 0);\n}\n\nbool FileHelper::MakePath(const char *path) {\n  char pathbuf[PATH_MAX];\n  char *sp, *pp;\n\n  strncpy(pathbuf, path, sizeof(pathbuf) - 1);\n  pathbuf[PATH_MAX - 1] = '\\0';\n\n  pp = pathbuf;\n  while ((sp = strchr(pp, '/')) != nullptr) {\n    // Neither root nor double slash in path\n    if (sp != pp) {\n      *sp = '\\0';\n      if (mkdir(pathbuf, 0755) == -1 && errno != EEXIST) {\n        return false;\n      }\n      *sp = '/';\n    }\n    pp = sp + 1;\n  }\n  return !(*pp != '\\0' && mkdir(pathbuf, 0755) == -1 && errno != EEXIST);\n}\n\nbool FileHelper::RemoveDirectory(const char *path) {\n  DIR *dir = opendir(path);\n  if (!dir) {\n    return false;\n  }\n\n  struct dirent *dent;\n  while ((dent = readdir(dir)) != nullptr) {\n    if (!strcmp(dent->d_name, \".\") || !strcmp(dent->d_name, \"..\")) {\n      continue;\n    }\n    char *fullpath = JoinFilePath(path, dent->d_name);\n    if (!fullpath) {\n      continue;\n    }\n\n    if (FileHelper::IsDirectory(fullpath)) {\n      FileHelper::RemoveDirectory(fullpath);\n    } else {\n      FileHelper::DeleteFile(fullpath);\n    }\n    free(fullpath);\n  }\n  closedir(dir);\n  return (rmdir(path) == 0);\n}\n\nbool FileHelper::IsExist(const char *path) {\n  return (access(path, F_OK) == 0);\n}\n\nbool FileHelper::IsRegular(const char *path) {\n  struct stat buf;\n  if (stat(path, &buf) != 0) {\n    return false;\n  }\n  return ((buf.st_mode & S_IFREG) != 0);\n}\n\nbool FileHelper::IsDirectory(const char *path) {\n  struct stat buf;\n  if (stat(path, &buf) != 0) {\n    return false;\n  }\n  return ((buf.st_mode & S_IFDIR) != 0);\n}\n\nbool FileHelper::IsSymbolicLink(const char *path) {\n  struct stat buf;\n  if (stat(path, &buf) != 0) {\n    return false;\n  }\n  return ((buf.st_mode & S_IFLNK) != 0);\n}\n\nbool FileHelper::IsSame(const char *path1, const char *path2) {\n  char real_path1[PATH_MAX];\n  char real_path2[PATH_MAX];\n  if (!realpath(path1, real_path1)) {\n    return false;\n  }\n  if (!realpath(path2, real_path2)) {\n    return false;\n  }\n  return (!strcmp(real_path1, real_path2));\n}\n\n#else\n#undef RemoveDirectory\n#undef DeleteFile\n#undef GetFileSize\n\nstatic inline char *JoinFilePath(const char *prefix, const char *suffix) {\n  size_t prefix_len = strlen(prefix);\n  size_t suffix_len = strlen(suffix);\n\n  char *path = (char *)malloc(prefix_len + suffix_len + 2);\n  if (path) {\n    memcpy(path, prefix, prefix_len);\n    memcpy(path + prefix_len + 1, suffix, suffix_len);\n    path[prefix_len] = '\\\\';\n    path[prefix_len + suffix_len + 1] = '\\0';\n  }\n  return path;\n}\n\nbool FileHelper::GetWorkingDirectory(std::string *path) {\n  char buf[MAX_PATH];\n  DWORD len = GetCurrentDirectoryA(MAX_PATH, buf);\n\n  if (len <= 0) {\n    return false;\n  }\n  path->assign(buf, len);\n  return true;\n}\n\nbool FileHelper::GetFileSize(const char *path, size_t *psz) {\n  HANDLE handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr,\n                              OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);\n\n  LARGE_INTEGER file_size;\n  if (!GetFileSizeEx(handle, &file_size)) {\n    return false;\n  }\n  *psz = (size_t)file_size.QuadPart;\n  return true;\n}\n\nbool FileHelper::DeleteFile(const char *path) {\n  // Delete a file by the path\n  return (DeleteFileA(path));\n}\n\nbool FileHelper::RenameFile(const char *oldpath, const char *newpath) {\n  return (MoveFileA(oldpath, newpath));\n}\n\nbool FileHelper::MakePath(const char *path) {\n  char pathbuf[MAX_PATH];\n  char *sp, *pp;\n\n  strncpy(pathbuf, path, sizeof(pathbuf) - 1);\n  pathbuf[MAX_PATH - 1] = '\\0';\n\n  pp = pathbuf;\n  while ((sp = strpbrk(pp, \"/\\\\\")) != nullptr) {\n    // Neither root nor double slash in path\n    if (sp != pp) {\n      *sp = '\\0';\n      if (!CreateDirectoryA(pathbuf, nullptr) &&\n          GetLastError() != ERROR_ALREADY_EXISTS) {\n        return false;\n      }\n      *sp = '\\\\';\n    }\n    pp = sp + 1;\n  }\n  return !(*pp != '\\0' && !CreateDirectoryA(pathbuf, nullptr) &&\n           GetLastError() != ERROR_ALREADY_EXISTS);\n}\n\nbool FileHelper::RemoveDirectory(const char *path) {\n  char *pathbuf = JoinFilePath(path, \"*.*\");\n  ailego_false_if_false(pathbuf);\n\n  WIN32_FIND_DATAA file_info;\n  HANDLE file = FindFirstFileA(pathbuf, &file_info);\n\n  ailego_do_if_false(file != INVALID_HANDLE_VALUE) {\n    free(pathbuf);\n    FindClose(file);\n    return false;\n  }\n\n  do {\n    if (!strcmp(file_info.cFileName, \".\") ||\n        !strcmp(file_info.cFileName, \"..\")) {\n      continue;\n    }\n\n    char *fullpath = JoinFilePath(path, file_info.cFileName);\n    if (!fullpath) {\n      continue;\n    }\n\n    if (file_info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {\n      FileHelper::RemoveDirectory(fullpath);\n    } else {\n      FileHelper::DeleteFile(fullpath);\n    }\n    free(fullpath);\n  } while (FindNextFileA(file, &file_info));\n\n  free(pathbuf);\n  FindClose(file);\n  return (!!RemoveDirectoryA(path));\n}\n\nbool FileHelper::IsExist(const char *path) {\n  DWORD attr = GetFileAttributesA(path);\n  return (attr != INVALID_FILE_ATTRIBUTES);\n}\n\nbool FileHelper::IsRegular(const char *path) {\n  DWORD attr = GetFileAttributesA(path);\n  return (attr != INVALID_FILE_ATTRIBUTES &&\n          !(attr & FILE_ATTRIBUTE_DIRECTORY));\n}\n\nbool FileHelper::IsDirectory(const char *path) {\n  DWORD attr = GetFileAttributesA(path);\n  return (attr != INVALID_FILE_ATTRIBUTES && (attr & FILE_ATTRIBUTE_DIRECTORY));\n}\n\nbool FileHelper::IsSymbolicLink(const char *path) {\n  DWORD attr = GetFileAttributesA(path);\n  return (attr != INVALID_FILE_ATTRIBUTES &&\n          (attr & FILE_ATTRIBUTE_REPARSE_POINT));\n}\n\nbool FileHelper::IsSame(const char *path1, const char *path2) {\n  char real_path1[MAX_PATH];\n  char real_path2[MAX_PATH];\n  char **part_path1 = nullptr;\n  char **part_path2 = nullptr;\n  DWORD path1_size =\n      GetFullPathNameA(path1, sizeof(real_path1), real_path1, part_path1);\n  DWORD path2_size =\n      GetFullPathNameA(path2, sizeof(real_path2), real_path2, part_path2);\n\n  if ((part_path1 && *part_path1 != 0) || (part_path2 && *part_path2 != 0) ||\n      (path1_size != path2_size)) {\n    return false;\n  }\n  return (!strcmp(real_path1, real_path2));\n}\n\n#endif  // !_WIN32 && !_WIN64\n\nbool FileHelper::RemovePath(const char *path) {\n  if (FileHelper::IsDirectory(path)) {\n    return FileHelper::RemoveDirectory(path);\n  }\n  return FileHelper::DeleteFile(path);\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/utility/float_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/float_helper.h>\n\n// #if defined(__F16C__) && defined(__AVX__)\n// #define float16(x) _cvtss_sh((x), _MM_FROUND_NO_EXC)\n// #define float32(x) _cvtsh_ss(x)\n// #endif  // __F16C__ && __AVX__\n\n#if defined(__aarch64__)\nstatic inline float float32(uint16_t val) {\n  __fp16 *p = reinterpret_cast<__fp16 *>(&val);\n  return *p;\n}\n\nstatic inline uint16_t float16(float val) {\n  __fp16 f = static_cast<__fp16>(val);\n  uint16_t *fp = reinterpret_cast<uint16_t *>(&f);\n  return *fp;\n}\n\nstatic inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,\n                                        float *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float32(arr[i]);\n  }\n}\n\nstatic inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,\n                                        float norm, float *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float32(arr[i]) / norm;\n  }\n}\n\nstatic inline void convert_fp32_to_fp16(const float *arr, size_t size,\n                                        uint16_t *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float16(arr[i]);\n  }\n}\n\nstatic inline void convert_fp32_to_fp16(const float *arr, size_t size,\n                                        float norm, uint16_t *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float16(arr[i] / norm);\n  }\n}\n#else\n// Refer: https://github.com/Maratyszcza/FP16/blob/master/third-party/half.hpp\nstatic inline float float32(uint16_t val) {\n  static const uint32_t mantissa_table[2048] = {\n      0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000,\n      0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,\n      0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,\n      0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000,\n      0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000,\n      0x35F00000, 0x35F80000, 0x36000000, 0x36040000, 0x36080000, 0x360C0000,\n      0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000,\n      0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,\n      0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000,\n      0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000,\n      0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000,\n      0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000,\n      0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000,\n      0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000,\n      0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000,\n      0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,\n      0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000,\n      0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000,\n      0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000,\n      0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000,\n      0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000,\n      0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,\n      0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,\n      0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,\n      0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,\n      0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000,\n      0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 0x37200000, 0x37210000,\n      0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,\n      0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000,\n      0x372E0000, 0x372F0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,\n      0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,\n      0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,\n      0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,\n      0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000,\n      0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000,\n      0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,\n      0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000,\n      0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,\n      0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,\n      0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,\n      0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,\n      0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000,\n      0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000,\n      0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,\n      0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,\n      0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,\n      0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000,\n      0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,\n      0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,\n      0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,\n      0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,\n      0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000,\n      0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000,\n      0x379F0000, 0x379F8000, 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000,\n      0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000,\n      0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,\n      0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000,\n      0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000,\n      0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000,\n      0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000,\n      0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000,\n      0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000,\n      0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000,\n      0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,\n      0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000,\n      0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000,\n      0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000,\n      0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000,\n      0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000,\n      0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000,\n      0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000,\n      0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,\n      0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000,\n      0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000,\n      0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 0x37E00000, 0x37E08000,\n      0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000,\n      0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000,\n      0x37E70000, 0x37E78000, 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000,\n      0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000,\n      0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,\n      0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000,\n      0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000,\n      0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000,\n      0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000,\n      0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000,\n      0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000,\n      0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000,\n      0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,\n      0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000,\n      0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000,\n      0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000,\n      0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000,\n      0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000,\n      0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000,\n      0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000,\n      0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,\n      0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000,\n      0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000,\n      0x38130000, 0x38134000, 0x38138000, 0x3813C000, 0x38140000, 0x38144000,\n      0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000,\n      0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000,\n      0x38178000, 0x3817C000, 0x38180000, 0x38184000, 0x38188000, 0x3818C000,\n      0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000,\n      0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,\n      0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000,\n      0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000,\n      0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000,\n      0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000,\n      0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000,\n      0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000,\n      0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000,\n      0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,\n      0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000,\n      0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000,\n      0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000,\n      0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000,\n      0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000,\n      0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000,\n      0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000,\n      0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,\n      0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000,\n      0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000,\n      0x38370000, 0x38374000, 0x38378000, 0x3837C000, 0x38380000, 0x38384000,\n      0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000,\n      0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000,\n      0x383B8000, 0x383BC000, 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000,\n      0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000,\n      0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,\n      0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000,\n      0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000,\n      0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000,\n      0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000,\n      0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000,\n      0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000,\n      0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000,\n      0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,\n      0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000,\n      0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000,\n      0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000,\n      0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000,\n      0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000,\n      0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000,\n      0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000,\n      0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,\n      0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000,\n      0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000,\n      0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 0x385C0000, 0x385C4000,\n      0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000,\n      0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000,\n      0x385F8000, 0x385FC000, 0x38600000, 0x38604000, 0x38608000, 0x3860C000,\n      0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000,\n      0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,\n      0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000,\n      0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000,\n      0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000,\n      0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000,\n      0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000,\n      0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000,\n      0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000,\n      0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,\n      0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000,\n      0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000,\n      0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000,\n      0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000,\n      0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000,\n      0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000,\n      0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000,\n      0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,\n      0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000,\n      0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000,\n      0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 0x38000000, 0x38002000,\n      0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000,\n      0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000,\n      0x3801C000, 0x3801E000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,\n      0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000,\n      0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000,\n      0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000,\n      0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,\n      0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000,\n      0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000,\n      0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000,\n      0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,\n      0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000,\n      0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,\n      0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000,\n      0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000,\n      0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000,\n      0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000,\n      0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000,\n      0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000,\n      0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000,\n      0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000,\n      0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000,\n      0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,\n      0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 0x38120000, 0x38122000,\n      0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000,\n      0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000,\n      0x3813C000, 0x3813E000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,\n      0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000,\n      0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000,\n      0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000,\n      0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,\n      0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000,\n      0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000,\n      0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000,\n      0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000,\n      0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000,\n      0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,\n      0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000,\n      0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000,\n      0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000,\n      0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000,\n      0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000,\n      0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,\n      0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000,\n      0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000,\n      0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000,\n      0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,\n      0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 0x38240000, 0x38242000,\n      0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000,\n      0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000,\n      0x3825C000, 0x3825E000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,\n      0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000,\n      0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000,\n      0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000,\n      0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,\n      0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000,\n      0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000,\n      0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000,\n      0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000,\n      0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000,\n      0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,\n      0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000,\n      0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000,\n      0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000,\n      0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000,\n      0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000,\n      0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,\n      0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000,\n      0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000,\n      0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000,\n      0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,\n      0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 0x38360000, 0x38362000,\n      0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000,\n      0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000,\n      0x3837C000, 0x3837E000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,\n      0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000,\n      0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000,\n      0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000,\n      0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000,\n      0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000,\n      0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000,\n      0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000,\n      0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000,\n      0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000,\n      0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,\n      0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000,\n      0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,\n      0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000,\n      0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000,\n      0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000,\n      0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,\n      0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000,\n      0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000,\n      0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000,\n      0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,\n      0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 0x38480000, 0x38482000,\n      0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000,\n      0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000,\n      0x3849C000, 0x3849E000, 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000,\n      0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000,\n      0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000,\n      0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000,\n      0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000,\n      0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000,\n      0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000,\n      0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000,\n      0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,\n      0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000,\n      0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,\n      0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000,\n      0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,\n      0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000,\n      0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000,\n      0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000,\n      0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,\n      0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000,\n      0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000,\n      0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000,\n      0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,\n      0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 0x385A0000, 0x385A2000,\n      0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000,\n      0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000,\n      0x385BC000, 0x385BE000, 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000,\n      0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000,\n      0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000,\n      0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000,\n      0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000,\n      0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000,\n      0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000,\n      0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000,\n      0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,\n      0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000,\n      0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,\n      0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000,\n      0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,\n      0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000,\n      0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000,\n      0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000,\n      0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,\n      0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000,\n      0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000,\n      0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000,\n      0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000,\n      0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 0x386C0000, 0x386C2000,\n      0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000,\n      0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000,\n      0x386DC000, 0x386DE000, 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000,\n      0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000,\n      0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000,\n      0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000,\n      0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,\n      0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000,\n      0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000,\n      0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000,\n      0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,\n      0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000,\n      0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,\n      0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000,\n      0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,\n      0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000,\n      0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000,\n      0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000,\n      0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000,\n      0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000,\n      0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000,\n      0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000,\n      0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000,\n      0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 0x387E0000, 0x387E2000,\n      0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000,\n      0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000,\n      0x387FC000, 0x387FE000};\n  static const uint32_t exponent_table[64] = {\n      0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,\n      0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,\n      0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,\n      0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000,\n      0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000,\n      0x0F000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,\n      0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,\n      0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,\n      0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000,\n      0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000,\n      0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000};\n  static const uint16_t offset_table[64] = {\n      0,    1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,\n      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,\n      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 0,\n      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,\n      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,\n      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024};\n  uint16_t hval = static_cast<uint16_t>(val >> 10);\n  uint32_t bits =\n      mantissa_table[offset_table[hval] + (val & 0x3FF)] + exponent_table[hval];\n  float *p = reinterpret_cast<float *>(&bits);\n  return (*p);\n}\n\n// Refer: https://github.com/Maratyszcza/FP16/blob/master/third-party/half.hpp\nstatic inline uint16_t float16(float val) {\n  static const uint16_t base_table[512] = {\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\n      0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010,\n      0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000,\n      0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400,\n      0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800,\n      0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,\n      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,\n      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,\n      0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200,\n      0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400,\n      0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800,\n      0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00,\n      0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,\n      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00};\n  static const uint8_t shift_table[512] = {\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19,\n      18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,\n      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23,\n      22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13,\n      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,\n      13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,\n      24, 24, 24, 24, 24, 24, 24, 13};\n  uint32_t *p = reinterpret_cast<uint32_t *>(&val);\n  uint32_t hbits =\n      base_table[*p >> 23] +\n      static_cast<uint16_t>((*p & 0x7FFFFF) >> shift_table[*p >> 23]);\n  hbits += (((*p & 0x7FFFFF) >> (shift_table[*p >> 23] - 1)) |\n            (((*p >> 23) & 0xFF) == 102)) &\n           ((hbits & 0x7C00) != 0x7C00);\n  return static_cast<uint16_t>(hbits);\n}\n#if defined(__F16C__) && defined(__AVX512F__)\nstatic inline void convert_fp16_to_fp32_avx512f(const uint16_t *arr,\n                                                size_t size, float *out) {\n  const uint16_t *last = arr + size;\n  const uint16_t *last_aligned = arr + ((size >> 5) << 5);\n\n  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0x3f) == 0) {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      _mm512_store_ps(out + 0,\n                      _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 0))));\n      _mm512_store_ps(\n          out + 16, _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 16))));\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm512_store_ps(out, _mm512_cvtph_ps(_mm256_load_si256((__m256i *)arr)));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm256_store_ps(out, _mm256_cvtph_ps(_mm_load_si128((__m128i *)arr)));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      _mm512_storeu_ps(\n          out + 0, _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 0))));\n      _mm512_storeu_ps(\n          out + 16, _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 16))));\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm512_storeu_ps(out,\n                       _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)arr)));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm256_storeu_ps(out, _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)arr)));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = float32(arr[6]);\n      /* FALLTHRU */\n    case 6:\n      out[5] = float32(arr[5]);\n      /* FALLTHRU */\n    case 5:\n      out[4] = float32(arr[4]);\n      /* FALLTHRU */\n    case 4:\n      out[3] = float32(arr[3]);\n      /* FALLTHRU */\n    case 3:\n      out[2] = float32(arr[2]);\n      /* FALLTHRU */\n    case 2:\n      out[1] = float32(arr[1]);\n      /* FALLTHRU */\n    case 1:\n      out[0] = float32(arr[0]);\n  }\n}\n\nstatic inline void convert_fp16_to_fp32_avx512f(const uint16_t *arr,\n                                                size_t size, float norm,\n                                                float *out) {\n  const uint16_t *last = arr + size;\n  const uint16_t *last_aligned = arr + ((size >> 5) << 5);\n  __m512 zmm_norm = _mm512_set1_ps(norm);\n\n  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0x3f) == 0) {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      __m512 zmm_0 = _mm512_div_ps(\n          _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 0))), zmm_norm);\n      __m512 zmm_1 = _mm512_div_ps(\n          _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 16))), zmm_norm);\n      _mm512_store_ps(out + 0, zmm_0);\n      _mm512_store_ps(out + 16, zmm_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm512_store_ps(\n          out, _mm512_div_ps(_mm512_cvtph_ps(_mm256_load_si256((__m256i *)arr)),\n                             zmm_norm));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm256_store_ps(\n          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128((__m128i *)arr)),\n                             _mm256_set1_ps(norm)));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      __m512 zmm_0 = _mm512_div_ps(\n          _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 0))), zmm_norm);\n      __m512 zmm_1 = _mm512_div_ps(\n          _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 16))), zmm_norm);\n      _mm512_storeu_ps(out + 0, zmm_0);\n      _mm512_storeu_ps(out + 16, zmm_1);\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm512_storeu_ps(\n          out,\n          _mm512_div_ps(_mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)arr)),\n                        zmm_norm));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm256_storeu_ps(\n          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128((__m128i *)arr)),\n                             _mm256_set1_ps(norm)));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = float32(arr[6]) / norm;\n      /* FALLTHRU */\n    case 6:\n      out[5] = float32(arr[5]) / norm;\n      /* FALLTHRU */\n    case 5:\n      out[4] = float32(arr[4]) / norm;\n      /* FALLTHRU */\n    case 4:\n      out[3] = float32(arr[3]) / norm;\n      /* FALLTHRU */\n    case 3:\n      out[2] = float32(arr[2]) / norm;\n      /* FALLTHRU */\n    case 2:\n      out[1] = float32(arr[1]) / norm;\n      /* FALLTHRU */\n    case 1:\n      out[0] = float32(arr[0]) / norm;\n  }\n}\n\nstatic inline void convert_fp32_to_fp16_avx512f(const float *arr, size_t size,\n                                                uint16_t *out) {\n  const float *last = arr + size;\n  const float *last_aligned = arr + ((size >> 5) << 5);\n\n  if (((uintptr_t)arr & 0x3f) == 0 && ((uintptr_t)out & 0x1f) == 0) {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      _mm256_store_si256(\n          (__m256i *)(out + 0),\n          _mm512_cvtps_ph(_mm512_load_ps(arr + 0), _MM_FROUND_NO_EXC));\n      _mm256_store_si256(\n          (__m256i *)(out + 16),\n          _mm512_cvtps_ph(_mm512_load_ps(arr + 16), _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm256_store_si256(\n          (__m256i *)(out + 0),\n          _mm512_cvtps_ph(_mm512_load_ps(arr + 0), _MM_FROUND_NO_EXC));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm_store_si128(\n          (__m128i *)(out + 0),\n          _mm256_cvtps_ph(_mm256_load_ps(arr + 0), _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      _mm256_storeu_si256(\n          (__m256i *)(out + 0),\n          _mm512_cvtps_ph(_mm512_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));\n      _mm256_storeu_si256(\n          (__m256i *)(out + 16),\n          _mm512_cvtps_ph(_mm512_loadu_ps(arr + 16), _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm256_storeu_si256(\n          (__m256i *)(out + 0),\n          _mm512_cvtps_ph(_mm512_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm_storeu_si128(\n          (__m128i *)(out + 0),\n          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = float16(arr[6]);\n      /* FALLTHRU */\n    case 6:\n      out[5] = float16(arr[5]);\n      /* FALLTHRU */\n    case 5:\n      out[4] = float16(arr[4]);\n      /* FALLTHRU */\n    case 4:\n      out[3] = float16(arr[3]);\n      /* FALLTHRU */\n    case 3:\n      out[2] = float16(arr[2]);\n      /* FALLTHRU */\n    case 2:\n      out[1] = float16(arr[1]);\n      /* FALLTHRU */\n    case 1:\n      out[0] = float16(arr[0]);\n  }\n}\n\nstatic inline void convert_fp32_to_fp16_avx512f(const float *arr, size_t size,\n                                                float norm, uint16_t *out) {\n  const float *last = arr + size;\n  const float *last_aligned = arr + ((size >> 5) << 5);\n  __m512 zmm_norm = _mm512_set1_ps(norm);\n\n  if (((uintptr_t)arr & 0x3f) == 0 && ((uintptr_t)out & 0x1f) == 0) {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      __m512 zmm_0 = _mm512_div_ps(_mm512_load_ps(arr + 0), zmm_norm);\n      __m512 zmm_1 = _mm512_div_ps(_mm512_load_ps(arr + 16), zmm_norm);\n      _mm256_store_si256((__m256i *)(out + 0),\n                         _mm512_cvtps_ph(zmm_0, _MM_FROUND_NO_EXC));\n      _mm256_store_si256((__m256i *)(out + 16),\n                         _mm512_cvtps_ph(zmm_1, _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm256_store_si256(\n          (__m256i *)out,\n          _mm512_cvtps_ph(_mm512_div_ps(_mm512_load_ps(arr), zmm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm_store_si128((__m128i *)out,\n                      _mm256_cvtps_ph(_mm256_div_ps(_mm256_load_ps(arr),\n                                                    _mm256_set1_ps(norm)),\n                                      _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 32, out += 32) {\n      __m512 zmm_0 = _mm512_div_ps(_mm512_loadu_ps(arr + 0), zmm_norm);\n      __m512 zmm_1 = _mm512_div_ps(_mm512_loadu_ps(arr + 16), zmm_norm);\n      _mm256_storeu_si256((__m256i *)(out + 0),\n                          _mm512_cvtps_ph(zmm_0, _MM_FROUND_NO_EXC));\n      _mm256_storeu_si256((__m256i *)(out + 16),\n                          _mm512_cvtps_ph(zmm_1, _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 16) {\n      _mm256_storeu_si256(\n          (__m256i *)out,\n          _mm512_cvtps_ph(_mm512_div_ps(_mm512_loadu_ps(arr), zmm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 16;\n      out += 16;\n    }\n    if (last >= arr + 8) {\n      _mm_storeu_si128((__m128i *)out,\n                       _mm256_cvtps_ph(_mm256_div_ps(_mm256_loadu_ps(arr),\n                                                     _mm256_set1_ps(norm)),\n                                       _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = float16(arr[6] / norm);\n      /* FALLTHRU */\n    case 6:\n      out[5] = float16(arr[5] / norm);\n      /* FALLTHRU */\n    case 5:\n      out[4] = float16(arr[4] / norm);\n      /* FALLTHRU */\n    case 4:\n      out[3] = float16(arr[3] / norm);\n      /* FALLTHRU */\n    case 3:\n      out[2] = float16(arr[2] / norm);\n      /* FALLTHRU */\n    case 2:\n      out[1] = float16(arr[1] / norm);\n      /* FALLTHRU */\n    case 1:\n      out[0] = float16(arr[0] / norm);\n  }\n}\n#endif  //__F16C__ && __AVX512F__\n\n#if defined(__F16C__) && defined(__AVX__)\nstatic inline void convert_fp16_to_fp32_avx(const uint16_t *arr, size_t size,\n                                            float *out) {\n  const uint16_t *last = arr + size;\n  const uint16_t *last_aligned = arr + ((size >> 4) << 4);\n\n  if (((uintptr_t)arr & 0xf) == 0 && ((uintptr_t)out & 0x1f) == 0) {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      _mm256_store_ps(out + 0,\n                      _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 0))));\n      _mm256_store_ps(out + 8,\n                      _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 8))));\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm256_store_ps(out + 0,\n                      _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 0))));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      _mm256_storeu_ps(out + 0,\n                       _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 0))));\n      _mm256_storeu_ps(out + 8,\n                       _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 8))));\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm256_storeu_ps(out + 0,\n                       _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 0))));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = _cvtsh_ss(arr[6]);\n      /* FALLTHRU */\n    case 6:\n      out[5] = _cvtsh_ss(arr[5]);\n      /* FALLTHRU */\n    case 5:\n      out[4] = _cvtsh_ss(arr[4]);\n      /* FALLTHRU */\n    case 4:\n      out[3] = _cvtsh_ss(arr[3]);\n      /* FALLTHRU */\n    case 3:\n      out[2] = _cvtsh_ss(arr[2]);\n      /* FALLTHRU */\n    case 2:\n      out[1] = _cvtsh_ss(arr[1]);\n      /* FALLTHRU */\n    case 1:\n      out[0] = _cvtsh_ss(arr[0]);\n  }\n}\n\nstatic inline void convert_fp16_to_fp32_avx(const uint16_t *arr, size_t size,\n                                            float norm, float *out) {\n  const uint16_t *last = arr + size;\n  const uint16_t *last_aligned = arr + ((size >> 4) << 4);\n  __m256 ymm_norm = _mm256_set1_ps(norm);\n\n  if (((uintptr_t)arr & 0xf) == 0 && ((uintptr_t)out & 0x1f) == 0) {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      __m256 ymm_0 = _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 0)));\n      __m256 ymm_1 = _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 8)));\n      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);\n      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);\n      _mm256_store_ps(out + 0, ymm_0);\n      _mm256_store_ps(out + 8, ymm_1);\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm256_store_ps(\n          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128((__m128i *)arr)),\n                             ymm_norm));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      __m256 ymm_0 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 0)));\n      __m256 ymm_1 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 8)));\n      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);\n      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);\n      _mm256_storeu_ps(out + 0, ymm_0);\n      _mm256_storeu_ps(out + 8, ymm_1);\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm256_storeu_ps(\n          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128((__m128i *)arr)),\n                             ymm_norm));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = _cvtsh_ss(arr[6]) / norm;\n      /* FALLTHRU */\n    case 6:\n      out[5] = _cvtsh_ss(arr[5]) / norm;\n      /* FALLTHRU */\n    case 5:\n      out[4] = _cvtsh_ss(arr[4]) / norm;\n      /* FALLTHRU */\n    case 4:\n      out[3] = _cvtsh_ss(arr[3]) / norm;\n      /* FALLTHRU */\n    case 3:\n      out[2] = _cvtsh_ss(arr[2]) / norm;\n      /* FALLTHRU */\n    case 2:\n      out[1] = _cvtsh_ss(arr[1]) / norm;\n      /* FALLTHRU */\n    case 1:\n      out[0] = _cvtsh_ss(arr[0]) / norm;\n  }\n}\n\nstatic inline void convert_fp32_to_fp16_avx(const float *arr, size_t size,\n                                            uint16_t *out) {\n  const float *last = arr + size;\n  const float *last_aligned = arr + ((size >> 4) << 4);\n\n  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0xf) == 0) {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      _mm_store_si128(\n          (__m128i *)(out + 0),\n          _mm256_cvtps_ph(_mm256_load_ps(arr + 0), _MM_FROUND_NO_EXC));\n      _mm_store_si128(\n          (__m128i *)(out + 8),\n          _mm256_cvtps_ph(_mm256_load_ps(arr + 8), _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm_store_si128(\n          (__m128i *)(out + 0),\n          _mm256_cvtps_ph(_mm256_load_ps(arr + 0), _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      _mm_storeu_si128(\n          (__m128i *)(out + 0),\n          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));\n      _mm_storeu_si128(\n          (__m128i *)(out + 8),\n          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 8), _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm_storeu_si128(\n          (__m128i *)(out + 0),\n          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = _cvtss_sh(arr[6], _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 6:\n      out[5] = _cvtss_sh(arr[5], _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 5:\n      out[4] = _cvtss_sh(arr[4], _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 4:\n      out[3] = _cvtss_sh(arr[3], _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 3:\n      out[2] = _cvtss_sh(arr[2], _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 2:\n      out[1] = _cvtss_sh(arr[1], _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 1:\n      out[0] = _cvtss_sh(arr[0], _MM_FROUND_NO_EXC);\n  }\n}\n\nstatic inline void convert_fp32_to_fp16_avx(const float *arr, size_t size,\n                                            float norm, uint16_t *out) {\n  const float *last = arr + size;\n  const float *last_aligned = arr + ((size >> 4) << 4);\n  __m256 ymm_norm = _mm256_set1_ps(norm);\n\n  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0xf) == 0) {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      __m256 ymm_0 = _mm256_load_ps(arr + 0);\n      __m256 ymm_1 = _mm256_load_ps(arr + 8);\n      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);\n      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);\n      _mm_store_si128((__m128i *)(out + 0),\n                      _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));\n      _mm_store_si128((__m128i *)(out + 8),\n                      _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm_store_si128(\n          (__m128i *)out,\n          _mm256_cvtps_ph(_mm256_div_ps(_mm256_load_ps(arr), ymm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  } else {\n    for (; arr != last_aligned; arr += 16, out += 16) {\n      __m256 ymm_0 = _mm256_loadu_ps(arr + 0);\n      __m256 ymm_1 = _mm256_loadu_ps(arr + 8);\n      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);\n      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);\n      _mm_storeu_si128((__m128i *)(out + 0),\n                       _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));\n      _mm_storeu_si128((__m128i *)(out + 8),\n                       _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));\n    }\n\n    if (last >= last_aligned + 8) {\n      _mm_storeu_si128(\n          (__m128i *)out,\n          _mm256_cvtps_ph(_mm256_div_ps(_mm256_loadu_ps(arr), ymm_norm),\n                          _MM_FROUND_NO_EXC));\n      arr += 8;\n      out += 8;\n    }\n  }\n  switch (last - arr) {\n    case 7:\n      out[6] = _cvtss_sh(arr[6] / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 6:\n      out[5] = _cvtss_sh(arr[5] / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 5:\n      out[4] = _cvtss_sh(arr[4] / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 4:\n      out[3] = _cvtss_sh(arr[3] / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 3:\n      out[2] = _cvtss_sh(arr[2] / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 2:\n      out[1] = _cvtss_sh(arr[1] / norm, _MM_FROUND_NO_EXC);\n      /* FALLTHRU */\n    case 1:\n      out[0] = _cvtss_sh(arr[0] / norm, _MM_FROUND_NO_EXC);\n  }\n}\n#endif  // __F16C__ && __AVX__\n\nstatic inline void convert_fp16_to_fp32_fallback(const uint16_t *arr,\n                                                 size_t size, float *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float32(arr[i]);\n  }\n}\n\nstatic inline void convert_fp16_to_fp32_fallback(const uint16_t *arr,\n                                                 size_t size, float norm,\n                                                 float *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float32(arr[i]) / norm;\n  }\n}\n\nstatic inline void convert_fp32_to_fp16_fallback(const float *arr, size_t size,\n                                                 uint16_t *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float16(arr[i]);\n  }\n}\n\nstatic inline void convert_fp32_to_fp16_fallback(const float *arr, size_t size,\n                                                 float norm, uint16_t *out) {\n  for (size_t i = 0; i != size; ++i) {\n    out[i] = float16(arr[i] / norm);\n  }\n}\n\nstatic inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,\n                                        float *out) {\n#if defined(__F16C__) && defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    return convert_fp16_to_fp32_avx512f(arr, size, out);\n  }\n#endif\n\n#if defined(__F16C__) && defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    return convert_fp16_to_fp32_avx(arr, size, out);\n  }\n#endif\n\n  return convert_fp16_to_fp32_fallback(arr, size, out);\n}\n\nstatic inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,\n                                        float norm, float *out) {\n#if defined(__F16C__) && defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    return convert_fp16_to_fp32_avx512f(arr, size, norm, out);\n  }\n#endif\n\n#if defined(__F16C__) && defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    return convert_fp16_to_fp32_avx(arr, size, norm, out);\n  }\n#endif\n\n  return convert_fp16_to_fp32_fallback(arr, size, norm, out);\n}\n\nstatic inline void convert_fp32_to_fp16(const float *arr, size_t size,\n                                        uint16_t *out) {\n#if defined(__F16C__) && defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    return convert_fp32_to_fp16_avx512f(arr, size, out);\n  }\n#endif\n\n#if defined(__F16C__) && defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    return convert_fp32_to_fp16_avx(arr, size, out);\n  }\n#endif\n\n  return convert_fp32_to_fp16_fallback(arr, size, out);\n}\n\nstatic inline void convert_fp32_to_fp16(const float *arr, size_t size,\n                                        float norm, uint16_t *out) {\n#if defined(__F16C__) && defined(__AVX512F__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {\n    return convert_fp32_to_fp16_avx512f(arr, size, norm, out);\n  }\n#endif\n\n#if defined(__F16C__) && defined(__AVX__)\n  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&\n      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {\n    return convert_fp32_to_fp16_avx(arr, size, norm, out);\n  }\n#endif\n\n  return convert_fp32_to_fp16_fallback(arr, size, norm, out);\n}\n\n#endif  //\n\nnamespace zvec {\nnamespace ailego {\n\nfloat FloatHelper::ToFP32(uint16_t val) {\n  return float32(val);\n}\n\nvoid FloatHelper::ToFP32(const uint16_t *arr, size_t size, float *out) {\n  return convert_fp16_to_fp32(arr, size, out);\n}\n\nvoid FloatHelper::ToFP32(const uint16_t *arr, size_t size, float norm,\n                         float *out) {\n  return convert_fp16_to_fp32(arr, size, norm, out);\n}\n\nuint16_t FloatHelper::ToFP16(float val) {\n  return float16(val);\n}\n\nvoid FloatHelper::ToFP16(const float *arr, size_t size, uint16_t *out) {\n  return convert_fp32_to_fp16(arr, size, out);\n}\n\nvoid FloatHelper::ToFP16(const float *arr, size_t size, float norm,\n                         uint16_t *out) {\n  return convert_fp32_to_fp16(arr, size, norm, out);\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/math_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <cmath>\n#include <limits>\n#include <type_traits>\n#include <zvec/ailego/utility/float_helper.h>\nnamespace zvec {\nnamespace ailego {\n\n/*! Math Helper\n */\nstruct MathHelper {\n  //! Calculate the absolute value\n  template <typename T, typename R = float>\n  static inline auto Absolute(const T &x) ->\n      typename std::enable_if<std::is_arithmetic<T>::value, R>::type {\n    return static_cast<R>(std::abs(x));\n  }\n\n  //! Calculate the absolute value\n  template <typename R = float>\n  static inline R Absolute(const Float16 &x) {\n    return static_cast<R>(Float16::Absolute(x));\n  }\n\n  //! Calculate the absolute difference\n  template <typename T, typename R = float>\n  static inline auto AbsoluteDifference(const T &x, const T &y) ->\n      typename std::enable_if<std::is_integral<T>::value, R>::type {\n    auto m = ((x ^ y) & -(x < y));\n    auto d =\n        static_cast<typename std::make_unsigned<T>::type>((x ^ m) - (y ^ m));\n    return static_cast<R>(d);\n  }\n\n  //! Calculate the absolute difference\n  template <typename T, typename R = float>\n  static inline auto AbsoluteDifference(const T &x, const T &y) ->\n      typename std::enable_if<std::is_floating_point<T>::value, R>::type {\n    return static_cast<R>(std::abs(x - y));\n  }\n\n  //! Calculate the absolute difference\n  template <typename R = float>\n  static inline R AbsoluteDifference(const Float16 &x, const Float16 &y) {\n    return static_cast<R>(std::abs(x - y));\n  }\n\n  //! Calculate the squared difference\n  template <typename T, typename R = float>\n  static inline auto SquaredDifference(const T &x, const T &y) ->\n      typename std::enable_if<std::is_integral<T>::value, R>::type {\n    auto m = ((x ^ y) & -(x < y));\n    auto d =\n        static_cast<typename std::make_unsigned<T>::type>((x ^ m) - (y ^ m));\n    return static_cast<R>(d * d);\n  }\n\n  //! Calculate the squared difference\n  template <typename T, typename R = float>\n  static inline auto SquaredDifference(const T &x, const T &y) ->\n      typename std::enable_if<std::is_floating_point<T>::value, R>::type {\n    auto d = x - y;\n    return static_cast<R>(d * d);\n  }\n\n  //! Calculate the squared difference\n  template <typename R = float>\n  static inline R SquaredDifference(const Float16 &x, const Float16 &y) {\n    auto d = x - y;\n    return static_cast<R>(d * d);\n  }\n\n  //! Test whether two integral numbers are equal\n  template <class T>\n  static inline auto IsAlmostEqual(const T &x, const T &y, int) ->\n      typename std::enable_if<std::is_integral<T>::value, bool>::type {\n    return (x == y);\n  }\n\n  //! Test whether two floating point numbers are equal\n  template <class T>\n  static inline auto IsAlmostEqual(const T &x, const T &y, int ulp) ->\n      typename std::enable_if<std::is_floating_point<T>::value, bool>::type {\n    // the machine epsilon has to be scaled to the magnitude of the values used\n    // and multiplied by the desired precision in ULPs (units in the last place)\n    return ((std::fabs(x - y) <=\n             std::numeric_limits<T>::epsilon() * std::fabs(x + y) * ulp) ||\n            (std::fabs(x - y) < std::numeric_limits<T>::min()));\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/matrix_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\nstruct MatrixHelper {\n  //! Transpose a matrix\n  template <typename T, size_t M>\n  static inline void Transpose(const void *src, size_t N, void *dst) {\n    for (size_t i = 0; i < M; ++i) {\n      for (size_t j = 0; j < N; ++j) {\n        *(reinterpret_cast<T *>(dst) + (j * M + i)) =\n            *(reinterpret_cast<const T *>(src) + (i * N + j));\n      }\n    }\n  }\n\n  //! Reverse transpose a matrix\n  template <typename T, size_t M>\n  static inline void ReverseTranspose(const void *src, size_t N, void *dst) {\n    for (size_t i = 0; i < N; ++i) {\n      for (size_t j = 0; j < M; ++j) {\n        *(reinterpret_cast<T *>(dst) + (j * N + i)) =\n            *(reinterpret_cast<const T *>(src) + (i * M + j));\n      }\n    }\n  }\n\n  //! Transpose a matrix\n  template <typename T>\n  static inline void Transpose(const void *src, size_t M, size_t N, void *dst) {\n    for (size_t i = 0; i < M; ++i) {\n      for (size_t j = 0; j < N; ++j) {\n        *(reinterpret_cast<T *>(dst) + (j * M + i)) =\n            *(reinterpret_cast<const T *>(src) + (i * N + j));\n      }\n    }\n  }\n\n  //! Reverse transpose a matrix\n  template <typename T>\n  static inline void ReverseTranspose(const void *src, size_t M, size_t N,\n                                      void *dst) {\n    for (size_t i = 0; i < N; ++i) {\n      for (size_t j = 0; j < M; ++j) {\n        *(reinterpret_cast<T *>(dst) + (j * N + i)) =\n            *(reinterpret_cast<const T *>(src) + (i * M + j));\n      }\n    }\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/memory_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"memory_helper.h\"\n#include <cstdio>\n#include <cstring>\n#include <fstream>\n#include <zvec/ailego/utility/file_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n\n#if defined(_WIN64) || defined(_WIN32)\n#include <Windows.h>\n#include <psapi.h>\n#else\n#if defined(__linux__) || defined(__linux)\n#include <sys/resource.h>\n#elif defined(__APPLE__) && defined(__MACH__)\n#include <mach/mach.h>\n#include <sys/sysctl.h>\n#endif\n#include <unistd.h>\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(__linux__) || defined(__linux)\nbool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {\n  FILE *fp = fopen(\"/proc/self/statm\", \"r\");\n  if (!fp) {\n    return false;\n  }\n\n  if (fscanf(fp, \"%zd %zd\", vsz, rss) == EOF) {\n    fclose(fp);\n    return false;\n  }\n  fclose(fp);\n\n  long pagesz = sysconf(_SC_PAGESIZE);\n  *vsz *= (size_t)pagesz;\n  *rss *= (size_t)pagesz;\n  return true;\n}\n\nsize_t MemoryHelper::SelfRSS(void) {\n  FILE *fp = fopen(\"/proc/self/statm\", \"r\");\n  if (!fp) {\n    return 0;\n  }\n\n  size_t rss = 0;\n  if (fscanf(fp, \"%*d %zd %*d\", &rss) == EOF) {\n    fclose(fp);\n    return 0;\n  }\n  fclose(fp);\n  return (rss * sysconf(_SC_PAGESIZE));\n}\n\nsize_t MemoryHelper::SelfPeakRSS(void) {\n  struct rusage rusage;\n  getrusage(RUSAGE_SELF, &rusage);\n  return (size_t)(rusage.ru_maxrss * 1024);\n}\n\nsize_t MemoryHelper::TotalRamSize(void) {\n  return (sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE));\n}\n\nsize_t MemoryHelper::AvailableRamSize(void) {\n  FILE *fp = fopen(\"/proc/meminfo\", \"r\");\n  if (!fp) {\n    return 0;\n  }\n\n  size_t avail = 0;\n  char buf[128];\n  while (fgets(buf, sizeof(buf), fp)) {\n    if (strncmp(buf, \"MemAvailable:\", 13) == 0) {\n      avail = (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n      break;\n    }\n  }\n\n  // No found 'MemAvailable'\n  if (avail == 0) {\n    fseek(fp, 0L, SEEK_SET);\n\n    size_t count = 0;\n    while (fgets(buf, sizeof(buf), fp)) {\n      switch (buf[0]) {\n        case 'M':\n          if (strncmp(buf, \"MemFree:\", 8) == 0) {\n            avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n            ++count;\n          }\n          break;\n\n        case 'B':\n          if (strncmp(buf, \"Buffers:\", 8) == 0) {\n            avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n            ++count;\n          }\n          break;\n\n        case 'C':\n          if (strncmp(buf, \"Cached:\", 7) == 0) {\n            avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n            ++count;\n          }\n          break;\n      }\n      // All read\n      if (count == 3) {\n        break;\n      }\n    }\n  }\n  fclose(fp);\n  return (avail * 1024);\n}\n\nsize_t MemoryHelper::UsedRamSize(void) {\n  FILE *fp = fopen(\"/proc/meminfo\", \"r\");\n  if (!fp) {\n    return 0;\n  }\n\n  size_t total = 0, avail = 0, count = 0;\n  char buf[128];\n\n  while (fgets(buf, sizeof(buf), fp)) {\n    switch (buf[0]) {\n      case 'M':\n        if (strncmp(buf, \"MemTotal:\", 9) == 0) {\n          total = (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n          ++count;\n        } else if (strncmp(buf, \"MemFree:\", 8) == 0) {\n          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n          ++count;\n        }\n        break;\n\n      case 'B':\n        if (strncmp(buf, \"Buffers:\", 8) == 0) {\n          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n          ++count;\n        }\n        break;\n\n      case 'C':\n        if (strncmp(buf, \"Cached:\", 7) == 0) {\n          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n          ++count;\n        }\n        break;\n\n      case 'S':\n        if (strncmp(buf, \"Slab:\", 5) == 0) {\n          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);\n          ++count;\n        }\n        break;\n    }\n    // All read\n    if (count == 5) {\n      break;\n    }\n  }\n  fclose(fp);\n\n  if (total == 0) {\n    total = (sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE)) / 1024;\n  }\n  return ((total - avail) * 1024);\n}\n\nsize_t MemoryHelper::ContainerAwareTotalRamSize(void) {\n  size_t total_ram_size = TotalRamSize();\n  std::string limit_in_bytes = \"/sys/fs/cgroup/memory/memory.limit_in_bytes\";\n  if (FileHelper::IsExist(limit_in_bytes.c_str())) {\n    std::ifstream memory_limit_ifs;\n    std::string memory_limit_str{\"\"};\n    memory_limit_ifs.open(limit_in_bytes, std::ios::in);\n    if (memory_limit_ifs.is_open()) {\n      uint64_t limit = 0;\n      memory_limit_ifs >> memory_limit_str;\n      if (memory_limit_str != \"-1\") {\n        // Refer to:\n        // https://access.redhat.com/documentation/zh-cn/red_hat_enterprise_linux/7/html/resource_management_guide/sec-memory\n        StringHelper::ToUint64(memory_limit_str, &limit);\n        if (limit != 0x7FFFFFFFFFFFF000) {\n          // Refer to:\n          // https://stackoverflow.com/questions/70332396/why-cgroups-file-memory-limit-in-bytes-use-9223372036854771712-as-a-default-valu\n          total_ram_size = static_cast<size_t>(limit);\n        }\n      }\n      memory_limit_ifs.close();\n    }\n  }\n  return total_ram_size;\n}\n\n#elif defined(__APPLE__) && defined(__MACH__)\nbool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {\n  struct mach_task_basic_info info;\n  mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;\n\n  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info,\n                &count) != KERN_SUCCESS) {\n    return false;\n  }\n  *vsz = info.virtual_size;\n  *rss = info.resident_size;\n  return true;\n}\n\nsize_t MemoryHelper::SelfRSS(void) {\n  struct mach_task_basic_info info;\n  mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;\n\n  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info,\n                &count) != KERN_SUCCESS) {\n    return 0;\n  }\n  return info.resident_size;\n}\n\nsize_t MemoryHelper::SelfPeakRSS(void) {\n  struct mach_task_basic_info info;\n  mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;\n\n  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info,\n                &count) != KERN_SUCCESS) {\n    return 0;\n  }\n  return info.resident_size_max;\n}\n\nsize_t MemoryHelper::TotalRamSize(void) {\n  int mib[2] = {CTL_HW, HW_MEMSIZE};\n  uint64_t size = 0;\n  size_t len = sizeof(size);\n  if (sysctl(mib, 2, &size, &len, 0, 0) != 0) {\n    return 0;\n  }\n  return (size_t)size;\n}\n\nsize_t MemoryHelper::AvailableRamSize(void) {\n  struct vm_statistics stat;\n  mach_msg_type_number_t count = HOST_VM_INFO_COUNT;\n  vm_size_t pagesize = 0;\n\n  if (host_page_size(mach_host_self(), &pagesize) != KERN_SUCCESS) {\n    return 0;\n  }\n  if (host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&stat,\n                      &count) != KERN_SUCCESS) {\n    return 0;\n  }\n  return ((stat.free_count + stat.inactive_count) * pagesize);\n}\n\nsize_t MemoryHelper::UsedRamSize(void) {\n  struct vm_statistics stat;\n  mach_msg_type_number_t count = HOST_VM_INFO_COUNT;\n  vm_size_t pagesize = 0;\n\n  if (host_page_size(mach_host_self(), &pagesize) != KERN_SUCCESS) {\n    return 0;\n  }\n  if (host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&stat,\n                      &count) != KERN_SUCCESS) {\n    return 0;\n  }\n  return ((stat.active_count + stat.wire_count) * pagesize);\n}\n\nsize_t MemoryHelper::ContainerAwareTotalRamSize(void) {\n  return 0u;\n}\n\n#elif defined(_WIN64) || defined(_WIN32)\nstatic inline int getpagesize(void) {\n  SYSTEM_INFO info;\n  GetSystemInfo(&info);\n  return info.dwPageSize;\n}\n\nbool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {\n  PROCESS_MEMORY_COUNTERS info;\n  if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) {\n    return false;\n  }\n  *vsz = (size_t)info.PagefileUsage;\n  *rss = (size_t)info.WorkingSetSize;\n  return true;\n}\n\nsize_t MemoryHelper::SelfRSS(void) {\n  PROCESS_MEMORY_COUNTERS info;\n  if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) {\n    return 0u;\n  }\n  return (size_t)info.WorkingSetSize;\n}\n\nsize_t MemoryHelper::SelfPeakRSS(void) {\n  PROCESS_MEMORY_COUNTERS info;\n  GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));\n  return (size_t)info.PeakWorkingSetSize;\n}\n\nsize_t MemoryHelper::TotalRamSize(void) {\n  MEMORYSTATUSEX status;\n  status.dwLength = sizeof(status);\n  GlobalMemoryStatusEx(&status);\n  return (size_t)status.ullTotalPhys;\n}\n\nsize_t MemoryHelper::AvailableRamSize(void) {\n  MEMORYSTATUSEX status;\n  status.dwLength = sizeof(status);\n  GlobalMemoryStatusEx(&status);\n  return (size_t)status.ullAvailPhys;\n}\n\nsize_t MemoryHelper::UsedRamSize(void) {\n  MEMORYSTATUSEX status;\n  status.dwLength = sizeof(status);\n  GlobalMemoryStatusEx(&status);\n  return (size_t)(status.ullTotalPhys - status.ullAvailPhys);\n}\n\nsize_t MemoryHelper::ContainerAwareTotalRamSize(void) {\n  return 0u;\n}\n\n#else\nbool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {\n  *vsz = 0u;\n  *rss = 0u;\n  return false;\n}\n\nsize_t MemoryHelper::SelfRSS(void) {\n  return 0u;\n}\n\nsize_t MemoryHelper::SelfPeakRSS(void) {\n  return 0u;\n}\n\nsize_t MemoryHelper::TotalRamSize(void) {\n  return 0u;\n}\n\nsize_t MemoryHelper::AvailableRamSize(void) {\n  return 0u;\n}\n\nsize_t MemoryHelper::UsedRamSize(void) {\n  return 0u;\n}\n\nsize_t MemoryHelper::ContainerAwareTotalRamSize(void) {\n  return 0u;\n}\n#endif\n\nsize_t MemoryHelper::PageSize(void) {\n  static size_t page_size = static_cast<size_t>(getpagesize());\n  return page_size;\n}\n\nsize_t MemoryHelper::HugePageSize(void) {\n  static size_t page_size = static_cast<size_t>(2 * 1024 * 1024);\n  return page_size;\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/utility/memory_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Memory Helper\n */\nstruct MemoryHelper {\n  //! Retrieve the page size of memory\n  static size_t PageSize(void);\n\n  //! Retrieve the huge page size of memory\n  static size_t HugePageSize(void);\n\n  //! Retrieve the VSZ and RSS of self process in bytes\n  static bool SelfUsage(size_t *vsz, size_t *rss);\n\n  //! Retrieve the RSS of self process in bytes\n  static size_t SelfRSS(void);\n\n  //! Retrieve the peak RSS of self process in bytes\n  static size_t SelfPeakRSS(void);\n\n  //! Retrieve the total size of physical memory (RAM) in bytes\n  static size_t TotalRamSize(void);\n\n  //! Retrieve the available size of physical memory (RAM) in bytes\n  static size_t AvailableRamSize(void);\n\n  //! Retrieve the used size of physical memory (RAM) in bytes\n  static size_t UsedRamSize(void);\n\n  //! Retrieve the total size of physical memory (RAM) in bytes in container\n  static size_t ContainerAwareTotalRamSize(void);\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/ailego/utility/string_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <algorithm>\n#include <utility>\n#include <zvec/ailego/utility/string_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\nbool StringHelper::StartsWith(const std::string &ref,\n                              const std::string &prefix) {\n  return (ref.size() >= prefix.size()) &&\n         (ref.compare(0, prefix.size(), prefix) == 0);\n}\n\nbool StringHelper::EndsWith(const std::string &ref, const std::string &suffix) {\n  size_t s1 = ref.size();\n  size_t s2 = suffix.size();\n  return (s1 >= s2) && (ref.compare(s1 - s2, s2, suffix) == 0);\n}\n\nvoid StringHelper::LeftTrim(std::string &str) {\n  str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) {\n              return !std::isspace(ch);\n            }));\n}\n\nvoid StringHelper::RightTrim(std::string &str) {\n  str.erase(std::find_if(str.rbegin(), str.rend(),\n                         [](int ch) { return !std::isspace(ch); })\n                .base(),\n            str.end());\n}\n\nvoid StringHelper::Trim(std::string &str) {\n  StringHelper::RightTrim(str);\n  StringHelper::LeftTrim(str);\n}\n\nstd::string StringHelper::CopyLeftTrim(std::string str) {\n  StringHelper::LeftTrim(str);\n  return str;\n}\n\nstd::string StringHelper::CopyRightTrim(std::string str) {\n  StringHelper::RightTrim(str);\n  return str;\n}\n\nstd::string StringHelper::CopyTrim(std::string str) {\n  StringHelper::Trim(str);\n  return str;\n}\n\n#if defined(_MSC_VER)\n#define strncasecmp _strnicmp\n#endif\n\nbool StringHelper::CompareIgnoreCase(const std::string &a,\n                                     const std::string &b) {\n  if (a.size() != b.size()) {\n    return false;\n  }\n  return (strncasecmp(a.data(), b.data(), a.size()) == 0);\n}\n\nvoid StringHelper::Append(std::string *str, const internal::Alphameric &a) {\n  str->reserve(str->size() + a.size());\n  str->append(a.data(), a.size());\n}\n\nvoid StringHelper::Append(std::string *str, const internal::Alphameric &a,\n                          const internal::Alphameric &b) {\n  str->reserve(str->size() + a.size() + b.size());\n  str->append(a.data(), a.size());\n  str->append(b.data(), b.size());\n}\n\nvoid StringHelper::Append(std::string *str, const internal::Alphameric &a,\n                          const internal::Alphameric &b,\n                          const internal::Alphameric &c) {\n  str->reserve(str->size() + a.size() + b.size() + c.size());\n  str->append(a.data(), a.size());\n  str->append(b.data(), b.size());\n  str->append(c.data(), c.size());\n}\n\nvoid StringHelper::Append(std::string *str, const internal::Alphameric &a,\n                          const internal::Alphameric &b,\n                          const internal::Alphameric &c,\n                          const internal::Alphameric &d) {\n  str->reserve(str->size() + a.size() + b.size() + c.size() + d.size());\n  str->append(a.data(), a.size());\n  str->append(b.data(), b.size());\n  str->append(c.data(), c.size());\n  str->append(d.data(), d.size());\n}\n\nvoid StringHelper::AppendViews(std::string *str,\n                               std::initializer_list<StringView> views) {\n  size_t new_size = str->size();\n  for (auto &v : views) {\n    new_size += v.size();\n  }\n  str->reserve(new_size);\n  for (auto &v : views) {\n    str->append(v.data(), v.size());\n  }\n}\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/utility/time_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/utility/time_helper.h>\n\n#if defined(_WIN64) || defined(_WIN32)\n#include <Windows.h>\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\n#if defined(_WIN64) || defined(_WIN32)\nuint64_t Monotime::NanoSeconds(void) {\n  LARGE_INTEGER stamp, freq;\n  QueryPerformanceFrequency(&freq);\n  QueryPerformanceCounter(&stamp);\n  return (uint64_t)((double)stamp.QuadPart *\n                    (1000000000.0 / (double)freq.QuadPart));\n}\n\nuint64_t Monotime::MicroSeconds(void) {\n  LARGE_INTEGER stamp, freq;\n  QueryPerformanceFrequency(&freq);\n  QueryPerformanceCounter(&stamp);\n  return (stamp.QuadPart * 1000000u / freq.QuadPart);\n}\n\nuint64_t Monotime::MilliSeconds(void) {\n  LARGE_INTEGER stamp, freq;\n  QueryPerformanceFrequency(&freq);\n  QueryPerformanceCounter(&stamp);\n  return (stamp.QuadPart * 1000u / freq.QuadPart);\n}\n\nuint64_t Monotime::Seconds(void) {\n  LARGE_INTEGER stamp, freq;\n  QueryPerformanceFrequency(&freq);\n  QueryPerformanceCounter(&stamp);\n  return (stamp.QuadPart / freq.QuadPart);\n}\n\n// January 1, 1970 (start of Unix epoch) in \"ticks\"\n#define UNIX_TIME_START 0x019DB1DED53E8000ull\n\nuint64_t Realtime::NanoSeconds(void) {\n  LARGE_INTEGER stamp;\n  FILETIME file;\n  GetSystemTimeAsFileTime(&file);\n  stamp.HighPart = file.dwHighDateTime;\n  stamp.LowPart = file.dwLowDateTime;\n  return (stamp.QuadPart - UNIX_TIME_START) * 100u;\n}\n\nuint64_t Realtime::MicroSeconds(void) {\n  LARGE_INTEGER stamp;\n  FILETIME file;\n  GetSystemTimeAsFileTime(&file);\n  stamp.HighPart = file.dwHighDateTime;\n  stamp.LowPart = file.dwLowDateTime;\n  return (stamp.QuadPart - UNIX_TIME_START) / 10u;\n}\n\nuint64_t Realtime::MilliSeconds(void) {\n  LARGE_INTEGER stamp;\n  FILETIME file;\n  GetSystemTimeAsFileTime(&file);\n  stamp.HighPart = file.dwHighDateTime;\n  stamp.LowPart = file.dwLowDateTime;\n  return (stamp.QuadPart - UNIX_TIME_START) / 10000u;\n}\n\nuint64_t Realtime::Seconds(void) {\n  LARGE_INTEGER stamp;\n  FILETIME file;\n  GetSystemTimeAsFileTime(&file);\n  stamp.HighPart = file.dwHighDateTime;\n  stamp.LowPart = file.dwLowDateTime;\n  return (stamp.QuadPart - UNIX_TIME_START) / 10000000u;\n}\n\nsize_t Realtime::Localtime(uint64_t stamp, const char *format, char *buf,\n                           size_t len) {\n  time_t val = static_cast<time_t>(stamp);\n  return strftime(buf, len, format, localtime(&val));\n}\n\nsize_t Realtime::Gmtime(uint64_t stamp, const char *format, char *buf,\n                        size_t len) {\n  time_t val = static_cast<time_t>(stamp);\n  return strftime(buf, len, format, gmtime(&val));\n}\n\nsize_t Realtime::Localtime(const char *format, char *buf, size_t len) {\n  time_t now = time(0);\n  return strftime(buf, len, format, localtime(&now));\n}\n\nsize_t Realtime::Gmtime(const char *format, char *buf, size_t len) {\n  time_t now = time(0);\n  return strftime(buf, len, format, gmtime(&now));\n}\n#else\nuint64_t Monotime::NanoSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_MONOTONIC, &tspec);\n  return (tspec.tv_sec * 1000000000u + tspec.tv_nsec);\n}\n\nuint64_t Monotime::MicroSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_MONOTONIC, &tspec);\n  return (tspec.tv_sec * 1000000u + tspec.tv_nsec / 1000u);\n}\n\nuint64_t Monotime::MilliSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_MONOTONIC, &tspec);\n  return (tspec.tv_sec * 1000u + tspec.tv_nsec / 1000000u);\n}\n\nuint64_t Monotime::Seconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_MONOTONIC, &tspec);\n  return (tspec.tv_sec);\n}\n\nuint64_t Realtime::NanoSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_REALTIME, &tspec);\n  return (tspec.tv_sec * 1000000000u + tspec.tv_nsec);\n}\n\nuint64_t Realtime::MicroSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_REALTIME, &tspec);\n  return (tspec.tv_sec * 1000000u + tspec.tv_nsec / 1000u);\n}\n\nuint64_t Realtime::MilliSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_REALTIME, &tspec);\n  return (tspec.tv_sec * 1000u + tspec.tv_nsec / 1000000u);\n}\n\nuint64_t Realtime::Seconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_REALTIME, &tspec);\n  return (tspec.tv_sec);\n}\n\nsize_t Realtime::Localtime(uint64_t stamp, const char *format, char *buf,\n                           size_t len) {\n  struct tm tmbuf;\n  time_t val = static_cast<time_t>(stamp);\n  return strftime(buf, len, format, localtime_r(&val, &tmbuf));\n}\n\nsize_t Realtime::Gmtime(uint64_t stamp, const char *format, char *buf,\n                        size_t len) {\n  struct tm tmbuf;\n  time_t val = static_cast<time_t>(stamp);\n  return strftime(buf, len, format, gmtime_r(&val, &tmbuf));\n}\n\nsize_t Realtime::Localtime(const char *format, char *buf, size_t len) {\n  struct tm tmbuf;\n  time_t now = time(0);\n  return strftime(buf, len, format, localtime_r(&now, &tmbuf));\n}\n\nsize_t Realtime::Gmtime(const char *format, char *buf, size_t len) {\n  struct tm tmbuf;\n  time_t now = time(0);\n  return strftime(buf, len, format, gmtime_r(&now, &tmbuf));\n}\n\nuint64_t CPUtime::NanoSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);\n  return (tspec.tv_sec * 1000000000u + tspec.tv_nsec);\n}\n\nuint64_t CPUtime::MicroSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);\n  return (tspec.tv_sec * 1000000u + tspec.tv_nsec / 1000u);\n}\n\nuint64_t CPUtime::MilliSeconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);\n  return (tspec.tv_sec * 1000u + tspec.tv_nsec / 1000000u);\n}\n\nuint64_t CPUtime::Seconds(void) {\n  struct timespec tspec;\n  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);\n  return (tspec.tv_sec);\n}\n#endif  // _WIN64 || _WIN32\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/version.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"version.h\"\n#include \"version.i\"\n\n#ifdef ailego_VERSION\n#define AILEGO_VERSION_STRING ailego_VERSION\n#else\n#define AILEGO_VERSION_STRING \"unknown\"\n#endif\n\nnamespace zvec {\n\nnamespace ailego {\n\nstatic const char AILEGO_VERSION_DETAILS[] = AILEGO_VERSION_COMPILE_DETAILS(\n    \"AiLego Library Version \" AILEGO_VERSION_STRING\n    \".\\nCopyright (C) The Software Authors. All rights reserved.\\n\");\n\nconst char *Version::String(void) {\n  return AILEGO_VERSION_STRING;\n}\n\nconst char *Version::Details(void) {\n  return AILEGO_VERSION_DETAILS;\n}\n\n}  // namespace ailego\n}  // namespace zvec\n\n// extern \"C\" int __wrap_main(int, char *[]) {\n//   fwrite(ailego::AILEGO_VERSION_DETAILS, 1,\n//          strlen(ailego::AILEGO_VERSION_DETAILS), stdout);\n//   fflush(stdout);\n//   _Exit(0);\n// }\n"
  },
  {
    "path": "src/ailego/version.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\nnamespace zvec {\n\nnamespace ailego {\n\n/*! AiLego Version\n */\nstruct Version {\n  //! Retrieve the version number in string\n  static const char *String(void);\n\n  //! Retrieve the detailed version information\n  static const char *Details(void);\n};\n\n}  // namespace ailego\n\n}  // namespace zvec"
  },
  {
    "path": "src/ailego/version.i",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/internal/platform.h>\n\n#ifndef AILEGO_VERSION_TO_STRING_\n#define AILEGO_VERSION_TO_STRING_(x) #x\n#endif\n\n#ifndef AILEGO_VERSION_TO_STRING\n#define AILEGO_VERSION_TO_STRING(x) AILEGO_VERSION_TO_STRING_(x)\n#endif\n\n/*! http://nadeausoftware.com/articles/2012/01/\n *  c_c_tip_how_use_compiler_predefined_macros_detect_operating_system\n */\n#if defined(__linux) || defined(__linux__)\n#define AILEGO_VERSION_PLATFORM \"Linux\"\n#elif defined(__FreeBSD__)\n#define AILEGO_VERSION_PLATFORM \"FreeBSD\"\n#elif defined(__NetBSD__)\n#define AILEGO_VERSION_PLATFORM \"NetBSD\"\n#elif defined(__OpenBSD__)\n#define AILEGO_VERSION_PLATFORM \"OpenBSD\"\n#elif defined(__APPLE__) || defined(__MACH__)\n#define AILEGO_VERSION_PLATFORM \"Darwin\"\n#elif defined(__CYGWIN__) && !defined(_WIN32)\n#define AILEGO_VERSION_PLATFORM \"Cygwin\"\n#elif defined(_WIN64)\n#define AILEGO_VERSION_PLATFORM \"Microsoft Windows (64-bit)\"\n#elif defined(_WIN32)\n#define AILEGO_VERSION_PLATFORM \"Microsoft Windows (32-bit)\"\n#elif defined(__sun) && defined(__SVR4)\n#define AILEGO_VERSION_PLATFORM \"Solaris\"\n#elif defined(_AIX)\n#define AILEGO_VERSION_PLATFORM \"AIX\"\n#elif defined(__hpux)\n#define AILEGO_VERSION_PLATFORM \"HP-UX\"\n#elif defined(__unix) || defined(__unix__)\n#define AILEGO_VERSION_PLATFORM \"Unix\"\n#else\n#define AILEGO_VERSION_PLATFORM \"Unknown Platform\"\n#endif\n\n/*! http://nadeausoftware.com/articles/2012/10/\n *  c_c_tip_how_detect_compiler_name_and_version_using_compiler_predefined_macros\n */\n#if defined(__NVCC__)\n#define AILEGO_VERSION_COMPILER_NAME \"Nvidia CUDA Compiler\"\n#elif defined(__clang__)\n#define AILEGO_VERSION_COMPILER_NAME \"Clang/LLVM\"\n#elif defined(__ICC) || defined(__INTEL_COMPILER)\n#define AILEGO_VERSION_COMPILER_NAME \"Intel ICC/ICPC\"\n#elif defined(__GNUC__) || defined(__GNUG__)\n#define AILEGO_VERSION_COMPILER_NAME \"GNU GCC/G++\"\n#elif defined(__HP_cc) || defined(__HP_aCC)\n#define AILEGO_VERSION_COMPILER_NAME \"Hewlett-Packard C/aC++\"\n#elif defined(__IBMC__) || defined(__IBMCPP__)\n#define AILEGO_VERSION_COMPILER_NAME \"IBM XL C/C++\"\n#elif defined(_MSC_VER)\n#define AILEGO_VERSION_COMPILER_NAME \"Microsoft Visual C++\"\n#elif defined(__PGI)\n#define AILEGO_VERSION_COMPILER_NAME \"Portland Group PGCC/PGCPP\"\n#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)\n#define AILEGO_VERSION_COMPILER_NAME \"Oracle Solaris Studio\"\n#else\n#define AILEGO_VERSION_COMPILER_NAME \"Unknown Compiler\"\n#endif\n\n#if defined(__CUDACC_VER_MAJOR__)\n#define AILEGO_VERSION_COMPILER \\\n  AILEGO_VERSION_COMPILER_NAME  \\\n  \" (\" AILEGO_VERSION_TO_STRING(__CUDACC_VER_MAJOR__) \\\n  \".\" AILEGO_VERSION_TO_STRING(__CUDACC_VER_MINOR__)  \\\n  \".\" AILEGO_VERSION_TO_STRING(__CUDACC_VER_BUILD__) \")\"\n#elif defined(__VERSION__)\n#define AILEGO_VERSION_COMPILER \\\n  AILEGO_VERSION_COMPILER_NAME \" (\" __VERSION__ \")\"\n#elif defined(_MSC_FULL_VER)\n#define AILEGO_VERSION_COMPILER \\\n  AILEGO_VERSION_COMPILER_NAME \" (\" AILEGO_VERSION_TO_STRING(_MSC_FULL_VER) \")\"\n#elif defined(_MSC_VER)\n#define AILEGO_VERSION_COMPILER \\\n  AILEGO_VERSION_COMPILER_NAME \" (\" AILEGO_VERSION_TO_STRING(_MSC_VER) \")\"\n#elif defined(__PGIC__)\n#define AILEGO_VERSION_COMPILER                                         \\\n  AILEGO_VERSION_COMPILER_NAME                                          \\\n  \" (\" AILEGO_VERSION_TO_STRING(__PGIC__) \".\" AILEGO_VERSION_TO_STRING( \\\n      __PGIC_MINOR__) \".\" AILEGO_VERSION_TO_STRING(__PGIC_PATCHLEVEL__) \")\"\n#elif defined(__xlc__)\n#define AILEGO_VERSION_COMPILER AILEGO_VERSION_COMPILER_NAME \" (\" __xlc__ \")\"\n#elif defined(__SUNPRO_C)\n#define AILEGO_VERSION_COMPILER \\\n  AILEGO_VERSION_COMPILER_NAME \" (\" AILEGO_VERSION_TO_STRING(__SUNPRO_C) \")\"\n#elif defined(__HP_cc)\n#define AILEGO_VERSION_COMPILER \\\n  AILEGO_VERSION_COMPILER_NAME \" (\" AILEGO_VERSION_TO_STRING(__HP_cc) \")\"\n#else\n#define AILEGO_VERSION_COMPILER AILEGO_VERSION_COMPILER_NAME\n#endif\n\n#if defined(__x86_64__) || defined(_M_X64)\n#define AILEGO_VERSION_PROCESSOR \"x86 64-bit Processor\"\n#elif defined(__i386) || defined(_M_IX86)\n#define AILEGO_VERSION_PROCESSOR \"x86 32-bit Processor\"\n#elif defined(__ARM_ARCH)\n#if defined(__ARM_64BIT_STATE)\n#define AILEGO_VERSION_PROCESSOR \"ARM 64-bit Processor\"\n#else\n#define AILEGO_VERSION_PROCESSOR \"ARM 32-bit Processor\"\n#endif\n#elif defined(__ia64) || defined(__itanium__) || defined(_M_IA64)\n#define AILEGO_VERSION_PROCESSOR \"Itanium Processor\"\n#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)\n#define AILEGO_VERSION_PROCESSOR \"PowerPC 64-bit Processor\"\n#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)\n#define AILEGO_VERSION_PROCESSOR \"PowerPC 32-bit Processor\"\n#elif defined(__sparc)\n#define AILEGO_VERSION_PROCESSOR \"SPARC Processor\"\n#else\n#define AILEGO_VERSION_PROCESSOR \"Unknown Processor\"\n#endif\n\n#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__\n#define AILEGO_VERSION_BYTE_ORDER \"  Little-endian Byte Order\\n\"\n#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__\n#define AILEGO_VERSION_BYTE_ORDER \"  Big-endian Byte Order\\n\"\n#elif __BYTE_ORDER__ == __ORDER_PDP_ENDIAN__\n#define AILEGO_VERSION_BYTE_ORDER \"  PDP-endian Byte Order\\n\"\n#else\n#define AILEGO_VERSION_BYTE_ORDER \"\"\n#endif\n\n#if defined(_DEBUG) || (!defined(__OPTIMIZE__) && !defined(NDEBUG))\n#define AILEGO_VERSION_DEBUG_INFO \"  Debug Information\\n\"\n#else\n#define AILEGO_VERSION_DEBUG_INFO \"\"\n#endif\n\n#if defined(__SANITIZE_ADDRESS__)\n#define AILEGO_VERSION_ASAN \"  Address Sanitizer\\n\"\n#else\n#define AILEGO_VERSION_ASAN \"\"\n#endif\n\n#if defined(__STDC_VERSION__)\n#define AILEGO_VERSION_STDC \\\n  \"  C Standard \" AILEGO_VERSION_TO_STRING(__STDC_VERSION__) \"\\n\"\n#else\n#define AILEGO_VERSION_STDC \"\"\n#endif\n\n#if defined(__cplusplus)\n#define AILEGO_VERSION_CPLUSPLUS \\\n  \"  C++ Standard \" AILEGO_VERSION_TO_STRING(__cplusplus) \"\\n\"\n#else\n#define AILEGO_VERSION_CPLUSPLUS \"\"\n#endif\n\n#if defined(__GXX_ABI_VERSION)\n#define AILEGO_VERSION_GXX_ABI \\\n  \"  GNU C++ ABI \" AILEGO_VERSION_TO_STRING(__GXX_ABI_VERSION) \"\\n\"\n#else\n#define AILEGO_VERSION_GXX_ABI \"\"\n#endif\n\n#if defined(__GLIBC__)\n#define AILEGO_VERSION_GLIBC               \\\n  \"  GNU glibc \" AILEGO_VERSION_TO_STRING( \\\n      __GLIBC__) \".\" AILEGO_VERSION_TO_STRING(__GLIBC_MINOR__) \"\\n\"\n#else\n#define AILEGO_VERSION_GLIBC \"\"\n#endif\n\n#if defined(WINVER)\n#define AILEGO_VERSION_WINSDK \\\n  \"  Microsoft Windows SDK \" AILEGO_VERSION_TO_STRING(WINVER) \"\\n\"\n#else\n#define AILEGO_VERSION_WINSDK \"\"\n#endif\n\n#if defined(__CLR_VER)\n#define AILEGO_VERSION_CLR \\\n  \"  Microsoft CLR \" AILEGO_VERSION_TO_STRING(__CLR_VER) \"\\n\"\n#else\n#define AILEGO_VERSION_CLR \"\"\n#endif\n\n#if defined(__LSB_VERSION__)\n#define AILEGO_VERSION_LSB \\\n  \"  Linux Standards Base \" AILEGO_VERSION_TO_STRING(__LSB_VERSION__) \"\\n\"\n#else\n#define AILEGO_VERSION_LSB \"\"\n#endif\n\n#if defined(_POSIX_VERSION)\n#define AILEGO_VERSION_POSIX \\\n  \"  POSIX Specification \" AILEGO_VERSION_TO_STRING(_POSIX_VERSION) \"\\n\"\n#else\n#define AILEGO_VERSION_POSIX \"\"\n#endif\n\n#if defined(_XOPEN_VERSION)\n#define AILEGO_VERSION_XOPEN \\\n  \"  X/Open Specification \" AILEGO_VERSION_TO_STRING(_XOPEN_VERSION) \"\\n\"\n#else\n#define AILEGO_VERSION_XOPEN \"\"\n#endif\n\n#if defined(_OPENMP)\n#define AILEGO_VERSION_OPENMP \\\n  \"  OpenMP API \" AILEGO_VERSION_TO_STRING(_OPENMP) \"\\n\"\n#else\n#define AILEGO_VERSION_OPENMP \"\"\n#endif\n\n#if defined(__ARM_NEON)\n#define AILEGO_VERSION_SIMD \"  Arm Neon Instruction Set\\n\"\n#elif defined(__AVX512FP16__)\n#define AILEGO_VERSION_SIMD \"  AVX-512FP16 Instruction Set\\n\"\n#elif defined(__AVX512F__)\n#define AILEGO_VERSION_SIMD \"  AVX-512F Instruction Set\\n\"\n#elif defined(__AVX2__)\n#define AILEGO_VERSION_SIMD \"  AVX-2 Instruction Set\\n\"\n#elif defined(__AVX__)\n#define AILEGO_VERSION_SIMD \"  AVX Instruction Set\\n\"\n#elif defined(__SSE4_2__)\n#define AILEGO_VERSION_SIMD \"  SSE-4.2 Instruction Set\\n\"\n#elif defined(__SSE4_1__)\n#define AILEGO_VERSION_SIMD \"  SSE-4.1 Instruction Set\\n\"\n#elif defined(__SSSE3__)\n#define AILEGO_VERSION_SIMD \"  SSSE-3 Instruction Set\\n\"\n#elif defined(__SSE3__)\n#define AILEGO_VERSION_SIMD \"  SSE-3 Instruction Set\\n\"\n#elif defined(__SSE2__)\n#define AILEGO_VERSION_SIMD \"  SSE-2 Instruction Set\\n\"\n#elif defined(__SSE__)\n#define AILEGO_VERSION_SIMD \"  SSE Instruction Set\\n\"\n#elif defined(__MMX__)\n#define AILEGO_VERSION_SIMD \"  MMX Instruction Set\\n\"\n#else\n#define AILEGO_VERSION_SIMD \"\"\n#endif\n\n#if defined(PY_VERSION)\n#if PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_ALPHA\n#define AILEGO_VERSION_PYTHON \\\n  \"  Python API \" PY_VERSION  \\\n  \" Alpha \" AILEGO_VERSION_TO_STRING(PY_RELEASE_SERIAL) \"\\n\"\n#elif PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_BETA\n#define AILEGO_VERSION_PYTHON \\\n  \"  Python API \" PY_VERSION  \\\n  \" Beta \" AILEGO_VERSION_TO_STRING(PY_RELEASE_SERIAL) \"\\n\"\n#elif PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_GAMMA\n#define AILEGO_VERSION_PYTHON \\\n  \"  Python API \" PY_VERSION  \\\n  \" Release Candidate \" AILEGO_VERSION_TO_STRING(PY_RELEASE_SERIAL) \"\\n\"\n#elif PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_FINAL\n#define AILEGO_VERSION_PYTHON \"  Python API \" PY_VERSION \" Final\\n\"\n#else\n#define AILEGO_VERSION_PYTHON \"  Python API \" PY_VERSION \"\\n\"\n#endif\n#else\n#define AILEGO_VERSION_PYTHON \"\"\n#endif\n\n//! Gather information of compiling\n#define AILEGO_VERSION_COMPILE_DETAILS(__PREFIX_INFO__)                      \\\n  __PREFIX_INFO__                                                            \\\n  \"Compiled by \" AILEGO_VERSION_COMPILER                                     \\\n  \".\\n\"                                                                      \\\n  \"Compiled for \" AILEGO_VERSION_PROCESSOR                                   \\\n  \".\\n\"                                                                      \\\n  \"Compiled on \" AILEGO_VERSION_PLATFORM \" on \" __DATE__ \" \" __TIME__        \\\n  \".\\n\"                                                                      \\\n  \"Compiled with: \\n\"                                                        \\\n  \"\" AILEGO_VERSION_BYTE_ORDER \"\" AILEGO_VERSION_SIMD                        \\\n  \"\" AILEGO_VERSION_DEBUG_INFO \"\" AILEGO_VERSION_ASAN \"\" AILEGO_VERSION_STDC \\\n  \"\" AILEGO_VERSION_CPLUSPLUS \"\" AILEGO_VERSION_GXX_ABI                      \\\n  \"\" AILEGO_VERSION_POSIX \"\" AILEGO_VERSION_XOPEN \"\" AILEGO_VERSION_LSB      \\\n  \"\" AILEGO_VERSION_GLIBC \"\" AILEGO_VERSION_WINSDK \"\" AILEGO_VERSION_CLR     \\\n  \"\" AILEGO_VERSION_OPENMP \"\" AILEGO_VERSION_PYTHON \"\\n\"\n"
  },
  {
    "path": "src/binding/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\n# Retrieve version from git repository\ngit_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR})\n\n# Add repository\ncc_directory(python)"
  },
  {
    "path": "src/binding/python/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nset(CMAKE_CXX_STANDARD 17)\nset(CMAKE_CXX_STANDARD_REQUIRED ON)\n\nfind_package(pybind11 REQUIRED)\n\nset(SRC_LISTS\n        binding.cc\n        model/python_collection.cc\n        model/python_doc.cc\n        model/param/python_param.cc\n        model/schema/python_schema.cc\n        model/common/python_config.cc\n        typing/python_type.cc\n)\n\npybind11_add_module(_zvec ${SRC_LISTS})\n\nif (CMAKE_SYSTEM_NAME STREQUAL \"Linux\")\n    target_link_libraries(_zvec PRIVATE\n            -Wl,--whole-archive\n            $<TARGET_FILE:core_knn_flat_static>\n            $<TARGET_FILE:core_knn_flat_sparse_static>\n            $<TARGET_FILE:core_knn_hnsw_static>\n            $<TARGET_FILE:core_knn_hnsw_rabitq_static>\n            $<TARGET_FILE:core_knn_hnsw_sparse_static>\n            $<TARGET_FILE:core_knn_ivf_static>\n            $<TARGET_FILE:core_knn_cluster_static>\n            $<TARGET_FILE:core_mix_reducer_static>\n            $<TARGET_FILE:core_metric_static>\n            $<TARGET_FILE:core_utility_static>\n            $<TARGET_FILE:core_quantizer_static>\n            -Wl,--no-whole-archive\n            zvec_db\n    )\n    target_link_options(_zvec PRIVATE\n            \"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map\"\n    )\nelseif (APPLE)\n    target_link_libraries(_zvec PRIVATE\n            -Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>\n            -Wl,-force_load,$<TARGET_FILE:core_knn_flat_sparse_static>\n            -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_static>\n            -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_rabitq_static>\n            -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_sparse_static>\n            -Wl,-force_load,$<TARGET_FILE:core_knn_ivf_static>\n            -Wl,-force_load,$<TARGET_FILE:core_knn_cluster_static>\n            -Wl,-force_load,$<TARGET_FILE:core_mix_reducer_static>\n            -Wl,-force_load,$<TARGET_FILE:core_metric_static>\n            -Wl,-force_load,$<TARGET_FILE:core_utility_static>\n            -Wl,-force_load,$<TARGET_FILE:core_quantizer_static>\n            zvec_db\n    )\n    target_link_libraries(_zvec PRIVATE\n            -Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/exports.mac\n    )\nendif ()\n\ntarget_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include)\n"
  },
  {
    "path": "src/binding/python/binding.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"python_collection.h\"\n#include \"python_config.h\"\n#include \"python_doc.h\"\n#include \"python_param.h\"\n#include \"python_schema.h\"\n#include \"python_type.h\"\n\nnamespace zvec {\nPYBIND11_MODULE(_zvec, m) {\n  m.doc() = \"Zvec core module\";\n\n  ZVecPyTyping::Initialize(m);\n  ZVecPyParams::Initialize(m);\n  ZVecPySchemas::Initialize(m);\n  ZVecPyConfig::Initialize(m);\n  ZVecPyDoc::Initialize(m);\n  ZVecPyCollection::Initialize(m);\n}\n}  // namespace zvec\n"
  },
  {
    "path": "src/binding/python/exports.mac",
    "content": "_PyInit__zvec\n"
  },
  {
    "path": "src/binding/python/include/python_collection.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.#pragma once\n\n#include <pybind11/pybind11.h>\n#include <zvec/db/collection.h>\n\nnamespace py = pybind11;\n\nnamespace zvec {\n\nclass ZVecPyCollection {\n public:\n  ZVecPyCollection() = delete;\n\n public:\n  static void Initialize(py::module_ &m);\n\n private:\n  static void bind_db_methods(py::class_<Collection, Collection::Ptr> &col);\n  static void bind_ddl_methods(py::class_<Collection, Collection::Ptr> &col);\n  static void bind_dml_methods(py::class_<Collection, Collection::Ptr> &col);\n  static void bind_dql_methods(py::class_<Collection, Collection::Ptr> &col);\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/binding/python/include/python_config.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.#pragma once\n\n#include <pybind11/pybind11.h>\n#include <zvec/db/config.h>\n\nnamespace py = pybind11;\n\nnamespace zvec {\n\nclass ZVecPyConfig {\n public:\n  ZVecPyConfig() = delete;\n\n public:\n  static void Initialize(py::module_ &m);\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/binding/python/include/python_doc.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.#pragma once\n\n#include <pybind11/pybind11.h>\n#include <zvec/db/doc.h>\n\nnamespace py = pybind11;\n\nnamespace zvec {\n\nclass ZVecPyDoc {\n public:\n  ZVecPyDoc() = delete;\n\n public:\n  static void Initialize(py::module_ &m);\n\n private:\n  static void bind_doc_operator(py::module_ &m);\n  static void bind_doc(py::module_ &m);\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/binding/python/include/python_param.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.#pragma once\n\n#include <pybind11/pybind11.h>\n#include <zvec/db/options.h>\n#include <zvec/db/type.h>\n\nnamespace py = pybind11;\n\nnamespace zvec {\n\nclass ZVecPyParams {\n public:\n  ZVecPyParams() = delete;\n\n public:\n  static void Initialize(py::module_ &m);\n\n private:\n  static void bind_index_params(py::module_ &m);\n  static void bind_query_params(py::module_ &m);\n  static void bind_options(py::module_ &m);\n  static void bind_vector_query(py::module_ &m);\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/binding/python/include/python_schema.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.#pragma once\n\n#include <pybind11/pybind11.h>\n#include <zvec/db/type.h>\n\nnamespace py = pybind11;\n\nnamespace zvec {\n\nclass ZVecPySchemas {\n public:\n  ZVecPySchemas() = delete;\n\n public:\n  static void Initialize(py::module_ &m);\n\n private:\n  static void bind_field_schema(py::module_ &m);\n  static void bind_collection_schema(py::module_ &m);\n  static void bind_collection_stats(py::module_ &m);\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/binding/python/include/python_type.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.#pragma once\n\n#include <pybind11/pybind11.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n\nnamespace py = pybind11;\n\nnamespace zvec {\n\nclass ZVecPyTyping {\n public:\n  ZVecPyTyping() = delete;\n\n public:\n  static void Initialize(py::module_ &m);\n\n private:\n  static void bind_datatypes(py::module_ &m);\n  static void bind_index_types(py::module_ &m);\n  static void bind_metric_types(py::module_ &m);\n  static void bind_quantize_types(py::module_ &m);\n  static void bind_status(py::module_ &m);\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/binding/python/model/common/python_config.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"python_config.h\"\n#include <pybind11/stl.h>\n\nnamespace zvec {\n\ninline bool has_key(py::dict d, const std::string &key) {\n  return py::bool_(d.contains(key));\n}\n\ntemplate <typename T>\nstd::optional<T> get_if(py::dict d, const std::string &key) {\n  if (has_key(d, key)) {\n    try {\n      py::object obj = d[py::str(key)];\n      return obj.cast<T>();\n    } catch (const py::cast_error &) {\n      throw py::type_error(\"Key '\" + key + \"' is not of expected type.\");\n    }\n  }\n  return std::nullopt;\n}\n\ninline std::string to_lower(const std::string &s) {\n  std::string lower;\n  lower.reserve(s.size());\n  std::transform(s.begin(), s.end(), std::back_inserter(lower), ::tolower);\n  return lower;\n}\n\ninline bool iequals(const std::string &a, const std::string &b) {\n  return to_lower(a) == to_lower(b);\n}\n\nGlobalConfig::LogLevel str_to_loglevel(const std::string &s) {\n  if (iequals(s, \"debug\")) return GlobalConfig::LogLevel::DEBUG;\n  if (iequals(s, \"info\")) return GlobalConfig::LogLevel::INFO;\n  if (iequals(s, \"warn\") || iequals(s, \"warning\"))\n    return GlobalConfig::LogLevel::WARN;\n  if (iequals(s, \"error\")) return GlobalConfig::LogLevel::ERROR;\n  if (iequals(s, \"fatal\")) return GlobalConfig::LogLevel::FATAL;\n  throw py::value_error(\"Invalid log level: \");\n}\n\n\nvoid ZVecPyConfig::Initialize(pybind11::module_ &m) {\n  m.def(\"Initialize\", [](py::args args, py::kwargs kwargs) -> py::none {\n    py::dict config_dict;\n    // parse args\n    for (auto &arg : args) {\n      if (py::isinstance<py::dict>(arg)) {\n        for (auto item : arg.cast<py::dict>()) {\n          config_dict[item.first] = item.second;\n        }\n      } else {\n        throw py::type_error(\"Positional argument must be a dict if provided\");\n      }\n    }\n\n    // parser kwargs\n    if (kwargs) {\n      for (auto item : kwargs) {\n        config_dict[item.first] = item.second;\n      }\n    }\n\n    if (config_dict.empty()) {\n      return py::none();\n    }\n\n    GlobalConfig::ConfigData data;\n    // config memory_limit_mb\n    if (has_key(config_dict, \"memory_limit_mb\")) {\n      auto mb = get_if<int64_t>(config_dict, \"memory_limit_mb\").value();\n      if (mb <= 0) throw py::value_error(\"memory_limit_mb must be positive\");\n      data.memory_limit_bytes = static_cast<uint64_t>(mb) * 1024 * 1024;\n    }\n\n    // config log\n    bool has_log_type = has_key(config_dict, \"log_type\");\n    bool has_log_level = has_key(config_dict, \"log_level\");\n    if (has_log_type || has_log_level) {\n      std::string log_type = \"console\";\n      std::string log_level_str = \"warn\";\n\n      if (has_log_type) {\n        log_type = config_dict[\"log_type\"].cast<std::string>();\n      }\n      if (has_log_level) {\n        log_level_str = config_dict[\"log_level\"].cast<std::string>();\n      }\n      auto log_level = str_to_loglevel(log_level_str);\n      if (iequals(log_type, \"file\")) {\n        std::string dir = DEFAULT_LOG_DIR;\n        std::string basename = DEFAULT_LOG_BASENAME;\n        uint32_t file_size = DEFAULT_LOG_FILE_SIZE;\n        uint32_t overdue_days = DEFAULT_LOG_OVERDUE_DAYS;\n\n        if (has_key(config_dict, \"log_dir\")) {\n          dir = get_if<std::string>(config_dict, \"log_dir\").value();\n        }\n        if (has_key(config_dict, \"log_basename\")) {\n          basename = get_if<std::string>(config_dict, \"log_basename\").value();\n        }\n        if (has_key(config_dict, \"log_file_size\")) {\n          auto s = get_if<int32_t>(config_dict, \"log_file_size\").value();\n          if (s <= 0) {\n            throw py::value_error(\"log_file_size must be positive\");\n          }\n          file_size = static_cast<uint32_t>(s);\n        }\n        if (has_key(config_dict, \"log_overdue_days\")) {\n          std::cout << \" ** log_overdue_days: \" << overdue_days << std::endl;\n          auto d = get_if<int32_t>(config_dict, \"log_overdue_days\").value();\n          if (d <= 0) {\n            throw py::value_error(\"log_overdue_days must be positive\");\n          }\n          overdue_days = static_cast<uint32_t>(d);\n        }\n\n        data.log_config = std::make_shared<GlobalConfig::FileLogConfig>(\n            log_level, dir, basename, file_size, overdue_days);\n\n      } else if (iequals(log_type, \"console\")) {\n        data.log_config =\n            std::make_shared<GlobalConfig::ConsoleLogConfig>(log_level);\n      } else {\n        throw py::value_error(\"log_type must be 'console' or 'file'\");\n      }\n    }\n\n    // set query thread count\n    if (has_key(config_dict, \"query_threads\")) {\n      auto q = get_if<int32_t>(config_dict, \"query_threads\").value();\n      if (q <= 0) throw py::value_error(\"query_threads must be positive\");\n      data.query_thread_count = static_cast<uint32_t>(q);\n    }\n\n    // set optimize thread count\n    if (has_key(config_dict, \"optimize_threads\")) {\n      auto o = get_if<int32_t>(config_dict, \"optimize_threads\").value();\n      if (o <= 0) throw py::value_error(\"optimize_threads must be positive\");\n      data.optimize_thread_count = static_cast<uint32_t>(o);\n    }\n\n    // set invert_to_forward_scan_ratio\n    if (has_key(config_dict, \"invert_to_forward_scan_ratio\")) {\n      auto v =\n          get_if<double>(config_dict, \"invert_to_forward_scan_ratio\").value();\n      if (v < 0.0 || v > 1.0) {\n        throw py::value_error(\n            \"invert_to_forward_scan_ratio must be in [0.0, 1.0]\");\n      }\n      data.invert_to_forward_scan_ratio = static_cast<float>(v);\n    }\n\n    // set brute_force_by_keys_ratio\n    if (has_key(config_dict, \"brute_force_by_keys_ratio\")) {\n      auto v = get_if<double>(config_dict, \"brute_force_by_keys_ratio\").value();\n      if (v < 0.0 || v > 1.0) {\n        throw py::value_error(\n            \"brute_force_by_keys_ratio must be in [0.0, 1.0]\");\n      }\n      data.brute_force_by_keys_ratio = static_cast<float>(v);\n    }\n\n    // initialize (contains validate)\n    Status status = GlobalConfig::Instance().Initialize(data);\n    if (!status.ok()) {\n      throw std::runtime_error(\"Initialization failed: \" + status.message());\n    }\n    return py::none();\n  });\n}\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/binding/python/model/param/python_param.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"python_param.h\"\n#include <pybind11/numpy.h>\n#include <pybind11/stl.h>\n#include <zvec/core/interface/constants.h>\n#include <zvec/db/index_params.h>\n#include \"python_doc.h\"\n\nnamespace zvec {\n\nstatic std::string index_type_to_string(const IndexType type) {\n  switch (type) {\n    case IndexType::INVERT:\n      return \"INVERT\";\n    case IndexType::FLAT:\n      return \"FLAT\";\n    case IndexType::IVF:\n      return \"IVF\";\n    case IndexType::HNSW:\n      return \"HNSW\";\n    case IndexType::HNSW_RABITQ:\n      return \"HNSW_RABITQ\";\n    default:\n      return \"UNDEFINED\";\n  }\n}\n\nstatic std::string metric_type_to_string(const MetricType type) {\n  switch (type) {\n    case MetricType::COSINE:\n      return \"COSINE\";\n    case MetricType::IP:\n      return \"IP\";\n    case MetricType::L2:\n      return \"L2\";\n    default:\n      return \"UNDEFINED\";\n  }\n}\n\nstatic std::string quantize_type_to_string(const QuantizeType type) {\n  switch (type) {\n    case QuantizeType::UNDEFINED:\n      return \"UNDEFINED\";\n    case QuantizeType::INT8:\n      return \"INT8\";\n    case QuantizeType::INT4:\n      return \"INT4\";\n    case QuantizeType::FP16:\n      return \"FP16\";\n    case QuantizeType::RABITQ:\n      return \"RABITQ\";\n    default:\n      return \"UNDEFINED\";\n  }\n}\n\ntemplate <typename T>\nT checked_cast(const py::handle &h, const std::string &vector_field,\n               const std::string &expected_type) {\n  try {\n    return py::cast<T>(h);\n  } catch (const py::cast_error &e) {\n    std::string actual_type = std::string(py::str(py::type::of(h)));\n    std::string msg =\n        vector_field + \": expected \" + expected_type + \", got \" + actual_type;\n    throw py::type_error(msg);\n  }\n}\n\ntemplate <typename T>\nstd::string serialize_vector(const T *data, size_t n) {\n  std::string buf;\n  buf.resize(n * sizeof(T));\n  std::memcpy(buf.data(), data, n * sizeof(T));\n  return buf;\n}\n\ntemplate <typename ValueType, typename ValueCastFn>\nstd::pair<std::string, std::string> serialize_sparse_vector(\n    const py::dict &sparse_dict, ValueCastFn &&value_caster) {\n  const size_t n = sparse_dict.size();\n  if (n == 0) return {{}, {}};\n\n  std::string indices_buf;\n  indices_buf.resize(n * sizeof(uint32_t));\n  auto *indices_ptr = reinterpret_cast<uint32_t *>(indices_buf.data());\n\n  std::string values_buf;\n  values_buf.resize(n * sizeof(ValueType));\n  auto *values_ptr = reinterpret_cast<ValueType *>(values_buf.data());\n\n  size_t i = 0;\n  for (const auto &[py_key, py_val] : sparse_dict) {\n    indices_ptr[i] = checked_cast<uint32_t>(py_key, \"Sparse indices\", \"UINT32\");\n    values_ptr[i] = value_caster(py_val, i);\n    ++i;\n  }\n  return {std::move(indices_buf), std::move(values_buf)};\n}\n\nvoid ZVecPyParams::Initialize(pybind11::module_ &parent) {\n  auto m =\n      parent.def_submodule(\"param\", \"This module contains the params of Zvec\");\n\n  // binding index_params [invert/hnsw/flat/ivf]\n  bind_index_params(m);\n\n  // bind query_params [hnsw/ivf]\n  bind_query_params(m);\n\n  // bind options [collection/index/optimize/column]\n  bind_options(m);\n\n  // bind vector query\n  bind_vector_query(m);\n}\n\nvoid ZVecPyParams::bind_index_params(pybind11::module_ &m) {\n  // binding base index params\n  py::class_<IndexParams, std::shared_ptr<IndexParams>> index_params(\n      m, \"IndexParam\", R\"pbdoc(\nBase class for all index parameter configurations.\n\nThis abstract base class defines the common interface for index types.\nIt should not be instantiated directly; use derived classes instead.\n\nAttributes:\n    type (IndexType): The type of the index (e.g., HNSW, FLAT, INVERT).\n)pbdoc\");\n  index_params\n      .def_property_readonly(\n          \"type\",\n          [](const IndexParams &self) -> IndexType { return self.type(); },\n          \"IndexType: The type of the index.\")\n      .def(\"clone\", &IndexParams::clone, py::return_value_policy::copy)\n      .def(\n          \"__eq__\",\n          [](const IndexParams &self, const py::object &other) {\n            if (!py::isinstance<IndexParams>(other)) return false;\n            return self == other.cast<const IndexParams &>();\n          },\n          py::is_operator())\n      .def(\n          \"to_dict\",\n          [](const IndexParams &self) -> py::dict {\n            py::dict dict;\n            dict[\"type\"] = index_type_to_string(self.type());\n            return dict;\n          },\n          \"Convert to dictionary with all fields\")\n      .def(py::pickle(\n          [](const IndexParams &self) {  // __getstate__\n            return py::make_tuple(self.type());\n          },\n          [](py::tuple t) {  // __setstate__\n            if (t.size() != 1)\n              throw std::runtime_error(\"Invalid state for IndexParams\");\n            return std::shared_ptr<IndexParams>();\n          }));\n\n  // binding invert index params\n  py::class_<InvertIndexParams, IndexParams, std::shared_ptr<InvertIndexParams>>\n      invert_params(m, \"InvertIndexParam\", R\"pbdoc(\nParameters for configuring an invert index.\n\nThis class controls whether range query\noptimization is enabled for invert index structures.\n\nAttributes:\n    type (IndexType): Always `IndexType.INVERTED`.\n    enable_range_optimization (bool): Whether range optimization is enabled.\n    enable_extended_wildcard (bool): Whether extended wildcard (suffix and infix) search is enabled.\n\nExamples:\n    >>> params = InvertIndexParam(enable_range_optimization=True, enable_extended_wildcard=False)\n    >>> print(params.enable_range_optimization)\n    True\n    >>> print(params.enable_extended_wildcard)\n    False\n    >>> config = params.to_dict()\n    >>> print(config)\n    {'enable_range_optimization': True, 'enable_extended_wildcard': False}\n)pbdoc\");\n  invert_params\n      .def(py::init<bool, bool>(), py::arg(\"enable_range_optimization\") = false,\n           py::arg(\"enable_extended_wildcard\") = false,\n           R\"pbdoc(\nConstructs an InvertIndexParam instance.\n\nArgs:\n    enable_range_optimization (bool, optional): If True, enables range query\n        optimization for the invert index. Defaults to False.\n    enable_extended_wildcard (bool, optional): If True, enables extended wildcard\n        search including suffix and infix patterns. Defaults to False.\n)pbdoc\")\n      .def_property_readonly(\"enable_range_optimization\",\n                             &InvertIndexParams::enable_range_optimization,\n                             R\"pbdoc(\nbool: Whether range optimization is enabled for this inverted index.\n)pbdoc\")\n      .def_property_readonly(\"enable_extended_wildcard\",\n                             &InvertIndexParams::enable_extended_wildcard,\n                             R\"pbdoc(\nbool: Whether extended wildcard (suffix and infix) search is enabled.\nNote: Prefix search is always enabled regardless of this setting.\n)pbdoc\")\n      .def(\n          \"to_dict\",\n          [](const InvertIndexParams &self) -> py::dict {\n            py::dict dict;\n            dict[\"enable_range_optimization\"] =\n                self.enable_range_optimization();\n            dict[\"enable_extended_wildcard\"] = self.enable_extended_wildcard();\n            return dict;\n          },\n          \"Convert to dictionary with all fields\")\n      .def(\"__repr__\",\n           [](const InvertIndexParams &self) -> std::string {\n             return \"{\"\n                    \"\\\"enable_range_optimization\\\":\" +\n                    std::to_string(self.enable_range_optimization()) +\n                    \",\"\n                    \"\\\"enable_extended_wildcard\\\":\" +\n                    std::to_string(self.enable_extended_wildcard()) + \"}\";\n           })\n      .def(py::pickle(\n          [](const InvertIndexParams &self) {  // __getstate__\n            return py::make_tuple(self.enable_range_optimization(),\n                                  self.enable_extended_wildcard());\n          },\n          [](py::tuple t) {  // __setstate__\n            if (t.size() != 2)\n              throw std::runtime_error(\"Invalid state for InvertIndexParams\");\n            return std::make_shared<InvertIndexParams>(t[0].cast<bool>(),\n                                                       t[1].cast<bool>());\n          }));\n\n  // binding base vector index params\n  py::class_<VectorIndexParams, IndexParams, std::shared_ptr<VectorIndexParams>>\n      vector_params(m, \"VectorIndexParam\", R\"pbdoc(\nBase class for vector index parameter configurations.\n\nEncapsulates common settings for all vector index types.\n\nAttributes:\n    type (IndexType): The specific vector index type (e.g., HNSW, FLAT).\n    metric_type (MetricType): Distance metric used for similarity search.\n    quantize_type (QuantizeType): Optional vector quantization type.\n)pbdoc\");\n  vector_params\n      .def_property_readonly(\n          \"metric_type\",\n          [](const VectorIndexParams &self) -> MetricType {\n            return self.metric_type();\n          },\n          \"MetricType: Distance metric (e.g., IP, COSINE, L2).\")\n      .def_property_readonly(\n          \"quantize_type\",\n          [](const VectorIndexParams &self) -> QuantizeType {\n            return self.quantize_type();\n          },\n          \"QuantizeType: Vector quantization type (e.g., FP16, INT8).\")\n      .def(\n          \"to_dict\",\n          [](const VectorIndexParams &self) -> py::dict {\n            py::dict dict;\n            dict[\"type\"] = index_type_to_string(self.type());\n            dict[\"metric_type\"] = metric_type_to_string(self.metric_type());\n            dict[\"quantize_type\"] =\n                quantize_type_to_string(self.quantize_type());\n            return dict;\n          },\n          \"Convert to dictionary with all fields\")\n      .def(py::pickle(\n          [](const VectorIndexParams &self) {  // __getstate__\n            return py::make_tuple(self.type(), self.metric_type(),\n                                  self.quantize_type());\n          },\n          [](py::tuple t) {  // __setstate__\n            if (t.size() != 3)\n              throw std::runtime_error(\"Invalid state for VectorIndexParams\");\n            // 基类，不能直接实例化，用于子类\n            return std::shared_ptr<VectorIndexParams>();\n          }));\n\n  // binding hnsw index params\n  py::class_<HnswIndexParams, VectorIndexParams,\n             std::shared_ptr<HnswIndexParams>>\n      hnsw_params(m, \"HnswIndexParam\", R\"pbdoc(\nParameters for configuring an HNSW (Hierarchical Navigable Small World) index.\n\nHNSW is a graph-based approximate nearest neighbor search index. This class\nencapsulates its construction hyperparameters.\n\nAttributes:\n    metric_type (MetricType): Distance metric used for similarity computation.\n        Default is ``MetricType.IP`` (inner product).\n    m (int): Number of bi-directional links created for every new element\n        during construction. Higher values improve accuracy but increase\n        memory usage and construction time. Default is 50.\n    ef_construction (int): Size of the dynamic candidate list for nearest\n        neighbors during index construction. Larger values yield better\n        graph quality at the cost of slower build time. Default is 500.\n    quantize_type (QuantizeType): Optional quantization type for vector\n        compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to\n        disable quantization.\n\nExamples:\n    >>> from zvec.typing import MetricType, QuantizeType\n    >>> params = HnswIndexParam(\n    ...     metric_type=MetricType.COSINE,\n    ...     m=16,\n    ...     ef_construction=200,\n    ...     quantize_type=QuantizeType.INT8\n    ... )\n    >>> print(params)\n    {'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}\n)pbdoc\");\n  hnsw_params\n      .def(py::init<MetricType, int, int, QuantizeType>(),\n           py::arg(\"metric_type\") = MetricType::IP,\n           py::arg(\"m\") = core_interface::kDefaultHnswNeighborCnt,\n           py::arg(\"ef_construction\") =\n               core_interface::kDefaultHnswEfConstruction,\n           py::arg(\"quantize_type\") = QuantizeType::UNDEFINED)\n      .def_property_readonly(\n          \"m\", &HnswIndexParams::m,\n          \"int: Maximum number of neighbors per node in upper layers.\")\n      .def_property_readonly(\n          \"ef_construction\", &HnswIndexParams::ef_construction,\n          \"int: Candidate list size during index construction.\")\n      .def(\n          \"to_dict\",\n          [](const HnswIndexParams &self) -> py::dict {\n            py::dict dict;\n            dict[\"type\"] = index_type_to_string(self.type());\n            dict[\"metric_type\"] = metric_type_to_string(self.metric_type());\n            dict[\"m\"] = self.m();\n            dict[\"ef_construction\"] = self.ef_construction();\n            dict[\"quantize_type\"] =\n                quantize_type_to_string(self.quantize_type());\n            return dict;\n          },\n          \"Convert to dictionary with all fields\")\n      .def(\"__repr__\",\n           [](const HnswIndexParams &self) -> std::string {\n             return \"{\"\n                    \"\\\"metric_type\\\":\" +\n                    metric_type_to_string(self.metric_type()) +\n                    \", \\\"m\\\":\" + std::to_string(self.m()) +\n                    \", \\\"ef_construction\\\":\" +\n                    std::to_string(self.ef_construction()) +\n                    \", \\\"quantize_type\\\":\" +\n                    quantize_type_to_string(self.quantize_type()) + \"}\";\n           })\n      .def(py::pickle(\n          [](const HnswIndexParams &self) {\n            return py::make_tuple(self.metric_type(), self.m(),\n                                  self.ef_construction(), self.quantize_type());\n          },\n          [](py::tuple t) {\n            if (t.size() != 4)\n              throw std::runtime_error(\"Invalid state for HnswIndexParams\");\n            return std::make_shared<HnswIndexParams>(\n                t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),\n                t[3].cast<QuantizeType>());\n          }));\n\n  // binding hnsw rabitq index params\n  py::class_<HnswRabitqIndexParams, VectorIndexParams,\n             std::shared_ptr<HnswRabitqIndexParams>>\n      hnsw_rabitq_params(m, \"HnswRabitqIndexParam\", R\"pbdoc(\nParameters for configuring an HNSW (Hierarchical Navigable Small World) index with RabitQ quantization.\n\nHNSW is a graph-based approximate nearest neighbor search index. RabitQ is a\nquantization method that provides high compression with minimal accuracy loss.\n\nAttributes:\n    metric_type (MetricType): Distance metric used for similarity computation.\n        Default is ``MetricType.IP`` (inner product).\n    m (int): Number of bi-directional links created for every new element\n        during construction. Higher values improve accuracy but increase\n        memory usage and construction time. Default is 50.\n    ef_construction (int): Size of the dynamic candidate list for nearest\n        neighbors during index construction. Larger values yield better\n        graph quality at the cost of slower build time. Default is 500.\n\nExamples:\n    >>> from zvec.typing import MetricType\n    >>> params = HnswRabitqIndexParam(\n    ...     metric_type=MetricType.COSINE,\n    ...     m=16,\n    ...     ef_construction=200\n    ... )\n    >>> print(params)\n    {'metric_type': 'COSINE', 'm': 16, 'ef_construction': 200}\n)pbdoc\");\n  hnsw_rabitq_params\n      .def(py::init<MetricType, int, int, int, int, int>(),\n           py::arg(\"metric_type\") = MetricType::IP,\n           py::arg(\"total_bits\") = core_interface::kDefaultRabitqTotalBits,\n           py::arg(\"num_clusters\") = core_interface::kDefaultRabitqNumClusters,\n           py::arg(\"m\") = core_interface::kDefaultHnswNeighborCnt,\n           py::arg(\"ef_construction\") =\n               core_interface::kDefaultHnswEfConstruction,\n           py::arg(\"sample_count\") = 0)\n      .def_property_readonly(\"m\", &HnswRabitqIndexParams::m,\n                             \"int: Maximum number of neighbors per node.\")\n      .def_property_readonly(\n          \"ef_construction\", &HnswRabitqIndexParams::ef_construction,\n          \"int: Candidate list size during index construction.\")\n      .def_property_readonly(\"total_bits\", &HnswRabitqIndexParams::total_bits,\n                             \"int: Total bits for RabitQ quantization.\")\n      .def_property_readonly(\"num_clusters\",\n                             &HnswRabitqIndexParams::num_clusters,\n                             \"int: Number of clusters for RabitQ.\")\n      .def_property_readonly(\"sample_count\",\n                             &HnswRabitqIndexParams::sample_count,\n                             \"int: Sample count for RabitQ training.\")\n      .def(\n          \"to_dict\",\n          [](const HnswRabitqIndexParams &self) -> py::dict {\n            py::dict dict;\n            dict[\"type\"] = index_type_to_string(self.type());\n            dict[\"metric_type\"] = metric_type_to_string(self.metric_type());\n            dict[\"quantize_type\"] =\n                quantize_type_to_string(self.quantize_type());\n            dict[\"total_bits\"] = self.total_bits();\n            dict[\"num_clusters\"] = self.num_clusters();\n            dict[\"sample_count\"] = self.sample_count();\n            dict[\"m\"] = self.m();\n            dict[\"ef_construction\"] = self.ef_construction();\n            return dict;\n          },\n          \"Convert to dictionary with all fields\")\n      .def(\n          \"__repr__\",\n          [](const HnswRabitqIndexParams &self) -> std::string {\n            return \"{\"\n                   \"\\\"type\\\":\\\"\" +\n                   index_type_to_string(self.type()) +\n                   \"\\\", \\\"metric_type\\\":\\\"\" +\n                   metric_type_to_string(self.metric_type()) +\n                   \"\\\", \\\"total_bits\\\":\" + std::to_string(self.total_bits()) +\n                   \", \\\"num_clusters\\\":\" + std::to_string(self.num_clusters()) +\n                   \", \\\"sample_count\\\":\" + std::to_string(self.sample_count()) +\n                   \", \\\"m\\\":\" + std::to_string(self.m()) +\n                   \", \\\"ef_construction\\\":\" +\n                   std::to_string(self.ef_construction()) +\n                   \", \\\"quantize_type\\\":\\\"\" +\n                   quantize_type_to_string(self.quantize_type()) + \"\\\"}\";\n          })\n      .def(py::pickle(\n          [](const HnswRabitqIndexParams &self) {\n            return py::make_tuple(self.metric_type(), self.total_bits(),\n                                  self.num_clusters(), self.m(),\n                                  self.ef_construction(), self.sample_count());\n          },\n          [](py::tuple t) {\n            if (t.size() != 6)\n              throw std::runtime_error(\n                  \"Invalid state for HnswRabitqIndexParams\");\n            return std::make_shared<HnswRabitqIndexParams>(\n                t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),\n                t[3].cast<int>(), t[4].cast<int>(), t[5].cast<int>());\n          }));\n\n  // FlatIndexParams\n  py::class_<FlatIndexParams, VectorIndexParams,\n             std::shared_ptr<FlatIndexParams>>\n      flat_params(m, \"FlatIndexParam\", R\"pbdoc(\nParameters for configuring a flat (brute-force) index.\n\nA flat index performs exact nearest neighbor search by comparing the query\nvector against all vectors in the collection. It is simple, accurate, and\nsuitable for small to medium datasets or as a baseline.\n\nAttributes:\n    metric_type (MetricType): Distance metric used for similarity computation.\n        Default is ``MetricType.IP`` (inner product).\n    quantize_type (QuantizeType): Optional quantization type for vector\n        compression (e.g., FP16, INT8). Use ``QuantizeType.UNDEFINED`` to\n        disable quantization. Default is ``QuantizeType.UNDEFINED``.\n\nExamples:\n    >>> from zvec.typing import MetricType, QuantizeType\n    >>> params = FlatIndexParam(\n    ...     metric_type=MetricType.L2,\n    ...     quantize_type=QuantizeType.FP16\n    ... )\n    >>> print(params)\n    {'metric_type': 'L2', 'quantize_type': 'FP16'}\n)pbdoc\");\n  flat_params\n      .def(py::init<MetricType, QuantizeType>(),\n           py::arg(\"metric_type\") = MetricType::IP,\n           py::arg(\"quantize_type\") = QuantizeType::UNDEFINED,\n           R\"pbdoc(\nConstructs a FlatIndexParam instance.\n\nArgs:\n    metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.\n    quantize_type (QuantizeType, optional): Vector quantization type.\n        Defaults to QuantizeType.UNDEFINED (no quantization).\n)pbdoc\")\n      .def(\n          \"to_dict\",\n          [](const FlatIndexParams &self) -> py::dict {\n            py::dict dict;\n            dict[\"metric_type\"] = metric_type_to_string(self.metric_type());\n            dict[\"quantize_type\"] =\n                quantize_type_to_string(self.quantize_type());\n            return dict;\n          },\n          \"Convert to dictionary with all fields\")\n      .def(\"__repr__\",\n           [](const FlatIndexParams &self) -> std::string {\n             return \"{\"\n                    \"\\\"metric_type\\\":\" +\n                    metric_type_to_string(self.metric_type()) +\n                    \", \\\"quantize_type\\\":\" +\n                    quantize_type_to_string(self.quantize_type()) + \"}\";\n           })\n      .def(py::pickle(\n          [](const FlatIndexParams &self) {\n            return py::make_tuple(self.metric_type(), self.quantize_type());\n          },\n          [](py::tuple t) {\n            if (t.size() != 2)\n              throw std::runtime_error(\"Invalid state for FlatIndexParams\");\n            return std::make_shared<FlatIndexParams>(t[0].cast<MetricType>(),\n                                                     t[1].cast<QuantizeType>());\n          }));\n\n  // IVFIndexParams\n  py::class_<IVFIndexParams, VectorIndexParams, std::shared_ptr<IVFIndexParams>>\n      ivf_params(m, \"IVFIndexParam\", R\"pbdoc(\nParameters for configuring an IVF (Inverted File Index) index.\n\nIVF partitions the vector space into clusters (inverted lists). At query time,\nonly a subset of clusters is searched, providing a trade-off between speed\nand accuracy.\n\nAttributes:\n    metric_type (MetricType): Distance metric used for similarity computation.\n        Default is ``MetricType.IP`` (inner product).\n    n_list (int): Number of clusters (inverted lists) to partition the dataset into.\n        If set to 0, the system will auto-select a reasonable value based on data size.\n        Default is 0 (auto).\n    n_iters (int): Number of iterations for k-means clustering during index training.\n        Higher values yield more stable centroids. Default is 10.\n    use_soar (bool): Whether to enable SOAR (Scalable Optimized Adaptive Routing)\n        for improved IVF search performance. Default is False.\n    quantize_type (QuantizeType): Optional quantization type for vector\n        compression (e.g., FP16, INT8). Default is ``QuantizeType.UNDEFINED``.\n\nExamples:\n    >>> from zvec.typing import MetricType, QuantizeType\n    >>> params = IVFIndexParam(\n    ...     metric_type=MetricType.COSINE,\n    ...     n_list=100,\n    ...     n_iters=15,\n    ...     use_soar=True,\n    ...     quantize_type=QuantizeType.INT8\n    ... )\n    >>> print(params.n_list)\n    100\n)pbdoc\");\n  ivf_params\n      .def(py::init<MetricType, int, int, bool, QuantizeType>(),\n           py::arg(\"metric_type\") = MetricType::IP, py::arg(\"n_list\") = 0,\n           py::arg(\"n_iters\") = 10, py::arg(\"use_soar\") = false,\n           py::arg(\"quantize_type\") = QuantizeType::UNDEFINED,\n           R\"pbdoc(\nConstructs an IVFIndexParam instance.\n\nArgs:\n    metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.\n    n_list (int, optional): Number of inverted lists (clusters). Set to 0 for auto.\n        Defaults to 0.\n    n_iters (int, optional): Number of k-means iterations during training.\n        Defaults to 10.\n    use_soar (bool, optional): Enable SOAR optimization. Defaults to False.\n    quantize_type (QuantizeType, optional): Vector quantization type.\n        Defaults to QuantizeType.UNDEFINED.\n)pbdoc\")\n      .def_property_readonly(\"n_list\", &IVFIndexParams::n_list,\n                             \"int: Number of inverted lists (0 = auto).\")\n      .def_property_readonly(\n          \"n_iters\", &IVFIndexParams::n_iters,\n          \"int: Number of k-means iterations during training.\")\n      .def_property_readonly(\"use_soar\", &IVFIndexParams::use_soar,\n                             \"bool: Whether SOAR optimization is enabled.\")\n      .def(\n          \"to_dict\",\n          [](const IVFIndexParams &self) -> py::dict {\n            py::dict dict;\n            dict[\"type\"] = index_type_to_string(self.type());\n            dict[\"metric_type\"] = metric_type_to_string(self.metric_type());\n            dict[\"n_list\"] = self.n_list();\n            dict[\"n_iters\"] = self.n_iters();\n            dict[\"use_soar\"] = self.use_soar();\n            dict[\"quantize_type\"] =\n                quantize_type_to_string(self.quantize_type());\n            return dict;\n          },\n          \"Convert to dictionary with all fields\")\n      .def(\"__repr__\",\n           [](const IVFIndexParams &self) {\n             return \"{\"\n                    \"\\\"metric_type\\\":\" +\n                    metric_type_to_string(self.metric_type()) +\n                    \", \\\"n_list\\\":\" + std::to_string(self.n_list()) +\n                    \", \\\"n_iters\\\":\" + std::to_string(self.n_iters()) +\n                    \", \\\"use_soar\\\":\" + std::to_string(self.use_soar()) +\n                    \", \\\"quantize_type\\\":\" +\n                    quantize_type_to_string(self.quantize_type()) + \"}\";\n           })\n      .def(py::pickle(\n          [](const IVFIndexParams &self) {\n            return py::make_tuple(self.metric_type(), self.n_list(),\n                                  self.n_iters(), self.use_soar(),\n                                  self.quantize_type());\n          },\n          [](py::tuple t) {\n            if (t.size() != 5)\n              throw std::runtime_error(\"Invalid state for IVFIndexParams\");\n            return std::make_shared<IVFIndexParams>(\n                t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),\n                t[3].cast<bool>(), t[4].cast<QuantizeType>());\n          }));\n}\n\nvoid ZVecPyParams::bind_query_params(py::module_ &m) {\n  // binding base query params\n  py::class_<QueryParams, std::shared_ptr<QueryParams>> query_params(\n      m, \"QueryParam\", R\"pbdoc(\nBase class for all query parameter configurations.\n\nThis abstract base class defines common query settings such as search radius\nand whether to force linear (brute-force) search. It should not be instantiated\ndirectly; use derived classes like `HnswQueryParam` or `IVFQueryParam`.\n\nAttributes:\n    type (IndexType): The index type this query is configured for.\n    radius (float): Search radius for range queries. Used in combination with\n        top-k to filter results. Default is 0.0 (disabled).\n    is_linear (bool): If True, forces brute-force linear search instead of\n        using the index. Useful for debugging or small datasets. Default is False.\n    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n)pbdoc\");\n  query_params\n      .def_property_readonly(\n          \"type\",\n          [](const QueryParams &self) -> IndexType { return self.type(); },\n          \"IndexType: The type of index this query targets.\")\n      .def_property_readonly(\n          \"radius\",\n          [](const QueryParams &self) -> float { return self.radius(); },\n          \"IndexType: The type of index this query targets.\")\n      .def_property_readonly(\n          \"is_linear\",\n          [](const QueryParams &self) -> bool { return self.is_linear(); },\n          \"bool: Whether to bypass the index and use brute-force linear \"\n          \"search.\")\n      .def_property_readonly(\n          \"is_using_refiner\",\n          [](const QueryParams &self) -> bool {\n            return self.is_using_refiner();\n          },\n          \"bool: Whether to use refiner for the query.\")\n      .def(py::pickle(\n          [](const QueryParams &self) {  // __getstate__\n            return py::make_tuple(self.type(), self.radius(), self.is_linear());\n          },\n          [](py::tuple t) {  // __setstate__\n            if (t.size() != 3)\n              throw std::runtime_error(\"Invalid state for QueryParams\");\n            return std::shared_ptr<QueryParams>();\n          }));\n\n  // binding hnsw query params\n  py::class_<HnswQueryParams, QueryParams, std::shared_ptr<HnswQueryParams>>\n      hnsw_params(m, \"HnswQueryParam\", R\"pbdoc(\nQuery parameters for HNSW (Hierarchical Navigable Small World) index.\n\nControls the trade-off between search speed and accuracy via the `ef` parameter.\n\nAttributes:\n    type (IndexType): Always ``IndexType.HNSW``.\n    ef (int): Size of the dynamic candidate list during search.\n        Larger values improve recall but slow down search.\n        Default is 300.\n    radius (float): Search radius for range queries. Default is 0.0.\n    is_linear (bool): Force linear search. Default is False.\n    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n\nExamples:\n    >>> params = HnswQueryParam(ef=300)\n    >>> print(params.ef)\n    300\n    >>> print(params.to_dict() if hasattr(params, 'to_dict') else params)\n    {\"type\":\"HNSW\", \"ef\":300}\n)pbdoc\");\n  hnsw_params\n      .def(py::init<int, float, bool, bool>(),\n           py::arg(\"ef\") = core_interface::kDefaultHnswEfSearch,\n           py::arg(\"radius\") = 0.0f, py::arg(\"is_linear\") = false,\n           py::arg(\"is_using_refiner\") = false,\n           R\"pbdoc(\nConstructs an HnswQueryParam instance.\n\nArgs:\n    ef (int, optional): Search-time candidate list size.\n        Higher values improve accuracy. Defaults to 100.\n    radius (float, optional): Search radius for range queries. Default is 0.0.\n    is_linear (bool, optional): Force linear search. Default is False.\n    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n)pbdoc\")\n      .def_property_readonly(\n          \"ef\", [](const HnswQueryParams &self) -> int { return self.ef(); },\n          \"int: Size of the dynamic candidate list during HNSW search.\")\n      .def(\"__repr__\",\n           [](const HnswQueryParams &self) -> std::string {\n             return \"{\"\n                    \"\\\"type\\\":\" +\n                    index_type_to_string(self.type()) +\n                    \", \\\"ef\\\":\" + std::to_string(self.ef()) +\n                    \", \\\"radius\\\":\" + std::to_string(self.radius()) +\n                    \", \\\"is_linear\\\":\" + std::to_string(self.is_linear()) +\n                    \", \\\"is_using_refiner\\\":\" +\n                    std::to_string(self.is_using_refiner()) + \"}\";\n           })\n      .def(py::pickle(\n          [](const HnswQueryParams &self) {\n            return py::make_tuple(self.ef(), self.radius(), self.is_linear(),\n                                  self.is_using_refiner());\n          },\n          [](py::tuple t) {\n            if (t.size() != 4)\n              throw std::runtime_error(\"Invalid state for HnswQueryParams\");\n            auto obj = std::make_shared<HnswQueryParams>(t[0].cast<int>());\n            obj->set_radius(t[1].cast<float>());\n            obj->set_is_linear(t[2].cast<bool>());\n            obj->set_is_using_refiner(t[3].cast<bool>());\n            return obj;\n          }));\n\n  // binding ivf query params\n  py::class_<IVFQueryParams, QueryParams, std::shared_ptr<IVFQueryParams>>\n      ivf_params(m, \"IVFQueryParam\", R\"pbdoc(\nQuery parameters for IVF (Inverted File Index) index.\n\nControls how many inverted lists (`nprobe`) to visit during search.\n\nAttributes:\n    type (IndexType): Always ``IndexType.IVF``.\n    nprobe (int): Number of closest clusters (inverted lists) to search.\n        Higher values improve recall but increase latency.\n        Default is 10.\n    radius (float): Search radius for range queries. Default is 0.0.\n    is_linear (bool): Force linear search. Default is False.\n\nExamples:\n    >>> params = IVFQueryParam(nprobe=20)\n    >>> print(params.nprobe)\n    20\n)pbdoc\");\n  ivf_params\n      .def(py::init<int>(), py::arg(\"nprobe\") = 10, R\"pbdoc(\nConstructs an IVFQueryParam instance.\n\nArgs:\n    nprobe (int, optional): Number of inverted lists to probe during search.\n        Higher values improve accuracy. Defaults to 10.\n)pbdoc\")\n      .def_property_readonly(\n          \"nprobe\",\n          [](const IVFQueryParams &self) -> int { return self.nprobe(); },\n          \"int: Number of inverted lists to search during IVF query.\")\n      .def(\"__repr__\",\n           [](const IVFQueryParams &self) -> std::string {\n             return \"{\"\n                    \"\\\"type\\\":\" +\n                    index_type_to_string(self.type()) +\n                    \", \\\"nprobe\\\":\" + std::to_string(self.nprobe()) + \"}\";\n           })\n      .def(py::pickle(\n          [](const IVFQueryParams &self) {\n            return py::make_tuple(self.nprobe(), self.radius(),\n                                  self.is_linear());\n          },\n          [](py::tuple t) {\n            if (t.size() != 3)\n              throw std::runtime_error(\"Invalid state for IVFQueryParams\");\n            auto obj = std::make_shared<IVFQueryParams>(t[0].cast<int>());\n            obj->set_radius(t[1].cast<float>());\n            obj->set_is_linear(t[2].cast<bool>());\n            return obj;\n          }));\n\n  // binding hnsw rabitq query params\n  py::class_<HnswRabitqQueryParams, QueryParams,\n             std::shared_ptr<HnswRabitqQueryParams>>\n      hnsw_rabitq_query_params(m, \"HnswRabitqQueryParam\", R\"pbdoc(\nQuery parameters for HNSW RaBitQ (Hierarchical Navigable Small World with RaBitQ quantization) index.\n\nControls the trade-off between search speed and accuracy via the `ef` parameter.\nRaBitQ provides efficient quantization while maintaining high search quality.\n\nAttributes:\n    type (IndexType): Always ``IndexType.HNSW_RABITQ``.\n    ef (int): Size of the dynamic candidate list during search.\n        Larger values improve recall but slow down search.\n        Default is 300.\n    radius (float): Search radius for range queries. Default is 0.0.\n    is_linear (bool): Force linear search. Default is False.\n    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n\nExamples:\n    >>> params = HnswRabitqQueryParam(ef=300)\n    >>> print(params.ef)\n    300\n    >>> print(params.to_dict() if hasattr(params, 'to_dict') else params)\n    {\"type\":\"HNSW_RABITQ\", \"ef\":300}\n)pbdoc\");\n  hnsw_rabitq_query_params\n      .def(py::init<int, float, bool, bool>(),\n           py::arg(\"ef\") = core_interface::kDefaultHnswEfSearch,\n           py::arg(\"radius\") = 0.0f, py::arg(\"is_linear\") = false,\n           py::arg(\"is_using_refiner\") = false,\n           R\"pbdoc(\nConstructs an HnswRabitqQueryParam instance.\n\nArgs:\n    ef (int, optional): Search-time candidate list size.\n        Higher values improve accuracy. Defaults to 300.\n    radius (float, optional): Search radius for range queries. Default is 0.0.\n    is_linear (bool, optional): Force linear search. Default is False.\n    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.\n)pbdoc\")\n      .def_property_readonly(\n          \"ef\",\n          [](const HnswRabitqQueryParams &self) -> int { return self.ef(); },\n          \"int: Size of the dynamic candidate list during HNSW RaBitQ search.\")\n      .def(\"__repr__\",\n           [](const HnswRabitqQueryParams &self) -> std::string {\n             return \"{\"\n                    \"\\\"type\\\":\\\"\" +\n                    index_type_to_string(self.type()) +\n                    \"\\\", \\\"ef\\\":\" + std::to_string(self.ef()) +\n                    \", \\\"radius\\\":\" + std::to_string(self.radius()) +\n                    \", \\\"is_linear\\\":\" + std::to_string(self.is_linear()) +\n                    \", \\\"is_using_refiner\\\":\" +\n                    std::to_string(self.is_using_refiner()) + \"}\";\n           })\n      .def(py::pickle(\n          [](const HnswRabitqQueryParams &self) {\n            return py::make_tuple(self.ef(), self.radius(), self.is_linear(),\n                                  self.is_using_refiner());\n          },\n          [](py::tuple t) {\n            if (t.size() != 4)\n              throw std::runtime_error(\n                  \"Invalid state for HnswRabitqQueryParams\");\n            auto obj =\n                std::make_shared<HnswRabitqQueryParams>(t[0].cast<int>());\n            obj->set_radius(t[1].cast<float>());\n            obj->set_is_linear(t[2].cast<bool>());\n            obj->set_is_using_refiner(t[3].cast<bool>());\n            return obj;\n          }));\n}\n\nvoid ZVecPyParams::bind_options(py::module_ &m) {  // binding collection options\n  py::class_<CollectionOptions>(m, \"CollectionOption\", R\"pbdoc(\nOptions for opening or creating a collection.\n\nAttributes:\n    read_only (bool): Whether the collection is opened in read-only mode.\n        Default is False.\n    enable_mmap (bool): Whether to use memory-mapped I/O for data files.\n        Default is True.\n\nExamples:\n    >>> opt = CollectionOption(read_only=True, enable_mmap=False)\n    >>> print(opt.read_only)\n    True\n)pbdoc\")\n      .def(py::init<bool, bool>(), py::arg(\"read_only\") = false,\n           py::arg(\"enable_mmap\") = true,\n           R\"pbdoc(\nConstructs a CollectionOption instance.\n\nArgs:\n    read_only (bool, optional): Open collection in read-only mode.\n        Defaults to False.\n    enable_mmap (bool, optional): Enable memory-mapped I/O.\n        Defaults to True.\n)pbdoc\")\n      .def_property_readonly(\n          \"enable_mmap\",\n          [](const CollectionOptions &self) { return self.enable_mmap_; })\n      .def_property_readonly(\n          \"read_only\",\n          [](const CollectionOptions &self) { return self.read_only_; })\n      .def(\"__repr__\",\n           [](const CollectionOptions &self) -> std::string {\n             return \"{\"\n                    \"\\\"enable_mmap\\\":\" +\n                    std::to_string(self.enable_mmap_) +\n                    \", \\\"read_only\\\":\" + std::to_string(self.read_only_) + \"}\";\n           })\n      .def(py::pickle(\n          [](const CollectionOptions &self) {\n            return py::make_tuple(self.read_only_, self.enable_mmap_,\n                                  self.max_buffer_size_);\n          },\n          [](py::tuple t) {\n            if (t.size() != 3)\n              throw std::runtime_error(\n                  \"Invalid pickle data for CollectionOptions\");\n            CollectionOptions obj{};\n            obj.read_only_ = t[0].cast<bool>();\n            obj.enable_mmap_ = t[1].cast<bool>();\n            obj.max_buffer_size_ = t[2].cast<uint32_t>();\n            return obj;\n          }));\n\n  // SegmentOptions\n  py::class_<SegmentOptions>(m, \"SegmentOption\", R\"pbdoc(\nOptions for segment-level operations.\n\nCurrently, this class mirrors CollectionOption and is used internally.\nIt supports read-only mode, memory mapping, and buffer configuration.\n\nNote:\n    This class is primarily for internal use. Most users should use\n    CollectionOption instead.\n\nExamples:\n    >>> opt = SegmentOption()\n    >>> print(opt.enable_mmap)\n    True\n)pbdoc\")\n      .def(py::init<>(), \"Constructs a SegmentOption with default settings.\")\n      .def_property_readonly(\n          \"enable_mmap\",\n          [](const SegmentOptions &self) { return self.enable_mmap_; },\n          \"bool: Whether memory-mapped I/O is enabled.\")\n      .def_property_readonly(\n          \"read_only\",\n          [](const SegmentOptions &self) { return self.read_only_; },\n          \"bool: Whether the segment is read-only.\")\n      .def_property_readonly(\n          \"max_buffer_size\",\n          [](const SegmentOptions &self) { return self.max_buffer_size_; },\n          \"int: Maximum buffer size in bytes (internal use).\")\n      .def(\"__repr__\",\n           [](const SegmentOptions &self) -> std::string {\n             return \"{\"\n                    \"\\\"enable_mmap\\\":\" +\n                    std::to_string(self.enable_mmap_) +\n                    \", \\\"read_only\\\":\" + std::to_string(self.read_only_) +\n                    \", \\\"max_buffer_size\\\":\" +\n                    std::to_string(self.max_buffer_size_) + \"}\";\n           })\n      .def(py::pickle(\n          [](const SegmentOptions &self) {\n            return py::make_tuple(self.read_only_, self.enable_mmap_,\n                                  self.max_buffer_size_);\n          },\n          [](py::tuple t) {\n            if (t.size() != 3)\n              throw std::runtime_error(\n                  \"Invalid pickle data for SegmentOptions\");\n            SegmentOptions obj{};\n            obj.read_only_ = t[0].cast<bool>();\n            obj.enable_mmap_ = t[1].cast<bool>();\n            obj.max_buffer_size_ = t[2].cast<uint32_t>();\n            return obj;\n          }));\n\n  // CreateIndexOptions\n  py::class_<CreateIndexOptions>(m, \"IndexOption\",\n                                 R\"pbdoc(\nOptions for creating an index.\n\nAttributes:\n    concurrency (int): Number of threads to use during index creation.\n        If 0, the system will choose an optimal value automatically.\n        Default is 0.\n\nExamples:\n    >>> opt = IndexOption(concurrency=4)\n    >>> print(opt.concurrency)\n    4\n)pbdoc\")\n      .def(py::init<int>(), py::arg(\"concurrency\") = 0,\n           R\"pbdoc(\nConstructs an IndexOption instance.\n\nArgs:\n    concurrency (int, optional): Number of concurrent threads.\n        0 means auto-detect. Defaults to 0.\n)pbdoc\")\n      .def_property_readonly(\n          \"concurrency\",\n          [](const CreateIndexOptions &self) { return self.concurrency_; },\n          \"int: Number of threads used for index creation (0 = auto).\")\n      .def(py::pickle(\n          [](const CreateIndexOptions &self) {\n            return py::make_tuple(self.concurrency_);\n          },\n          [](py::tuple t) {\n            if (t.size() != 1)\n              throw std::runtime_error(\n                  \"Invalid pickle data for CreateIndexOptions\");\n            CreateIndexOptions obj{};\n            obj.concurrency_ = t[0].cast<int>();\n            return obj;\n          }));\n\n  // OptimizeOptions\n  py::class_<OptimizeOptions>(m, \"OptimizeOption\", R\"pbdoc(\nOptions for optimizing a collection (e.g., merging segments).\n\nAttributes:\n    concurrency (int): Number of threads to use during optimization.\n        If 0, the system will choose an optimal value automatically.\n        Default is 0.\n\nExamples:\n    >>> opt = OptimizeOption(concurrency=2)\n    >>> print(opt.concurrency)\n    2\n)pbdoc\")\n      .def(py::init<int>(), py::arg(\"concurrency\") = 0,\n           R\"pbdoc(\nConstructs an OptimizeOption instance.\n\nArgs:\n    concurrency (int, optional): Number of concurrent threads.\n        0 means auto-detect. Defaults to 0.\n)pbdoc\")\n      .def_property_readonly(\n          \"concurrency\",\n          [](const OptimizeOptions &self) { return self.concurrency_; },\n          \"int: Number of threads used for optimization (0 = auto).\")\n      .def(py::pickle(\n          [](const OptimizeOptions &self) {\n            return py::make_tuple(self.concurrency_);\n          },\n          [](py::tuple t) {\n            if (t.size() != 1)\n              throw std::runtime_error(\n                  \"Invalid pickle data for OptimizeOptions\");\n            OptimizeOptions obj{};\n            obj.concurrency_ = t[0].cast<int>();\n            return obj;\n          }));\n\n  // AddColumnOptions\n  py::class_<AddColumnOptions>(m, \"AddColumnOption\",\n                               R\"pbdoc(\nOptions for adding a new column to a collection.\n\nAttributes:\n    concurrency (int): Number of threads to use when backfilling data\n        for the new column. If 0, auto-detect is used. Default is 0.\n\nExamples:\n    >>> opt = AddColumnOption(concurrency=1)\n    >>> print(opt.concurrency)\n    1\n)pbdoc\")\n      .def(py::init<int>(), py::arg(\"concurrency\") = 0,\n           R\"pbdoc(\nConstructs an AddColumnOption instance.\n\nArgs:\n    concurrency (int, optional): Number of threads for data backfill.\n        0 means auto-detect. Defaults to 0.\n)pbdoc\")\n      .def_property_readonly(\n          \"concurrency\",\n          [](const AddColumnOptions &self) { return self.concurrency_; },\n          \"int: Number of threads used when adding a column (0 = auto).\")\n      .def(py::pickle(\n          [](const AddColumnOptions &self) {\n            return py::make_tuple(self.concurrency_);\n          },\n          [](py::tuple t) {\n            if (t.size() != 1)\n              throw std::runtime_error(\n                  \"Invalid pickle data for AddColumnOptions\");\n            AddColumnOptions obj{};\n            obj.concurrency_ = t[0].cast<int>();\n            return obj;\n          }));\n\n  // AlterColumnOptions\n  py::class_<AlterColumnOptions>(m, \"AlterColumnOption\", R\"pbdoc(\nOptions for altering an existing column (e.g., changing index settings).\n\nAttributes:\n    concurrency (int): Number of threads to use during the alteration process.\n        If 0, the system will choose an optimal value automatically.\n        Default is 0.\n\nExamples:\n    >>> opt = AlterColumnOption(concurrency=1)\n    >>> print(opt.concurrency)\n    1\n)pbdoc\")\n      .def(py::init<int>(), py::arg(\"concurrency\") = 0,\n           R\"pbdoc(\nConstructs an AlterColumnOption instance.\n\nArgs:\n    concurrency (int, optional): Number of threads for column alteration.\n        0 means auto-detect. Defaults to 0.\n)pbdoc\")\n      .def_property_readonly(\n          \"concurrency\",\n          [](const AlterColumnOptions &self) { return self.concurrency_; },\n          \"int: Number of threads used when altering a column (0 = auto).\")\n      .def(py::pickle(\n          [](const AlterColumnOptions &self) {\n            return py::make_tuple(self.concurrency_);\n          },\n          [](py::tuple t) {\n            if (t.size() != 1)\n              throw std::runtime_error(\n                  \"Invalid pickle data for AlterColumnOptions\");\n            AlterColumnOptions obj{};\n            obj.concurrency_ = t[0].cast<int>();\n            return obj;\n          }));\n}\n\nvoid ZVecPyParams::bind_vector_query(py::module_ &m) {\n  py::class_<VectorQuery>(m, \"_VectorQuery\")\n      .def(py::init<>())\n      // properties\n      .def_readwrite(\"topk\", &VectorQuery::topk_)\n      .def_readwrite(\"field_name\", &VectorQuery::field_name_)\n      .def_readwrite(\"filter\", &VectorQuery::filter_)\n      .def_readwrite(\"include_vector\", &VectorQuery::include_vector_)\n      .def_readwrite(\"query_params\", &VectorQuery::query_params_)\n      .def_readwrite(\"output_fields\", &VectorQuery::output_fields_)\n      // vector\n      .def(\"set_vector\",\n           [](VectorQuery &self, const FieldSchema &field_schema,\n              const py::object &obj) {\n             const DataType data_type = field_schema.data_type();\n\n             // dense vector\n             if (FieldSchema::is_dense_vector_field(data_type)) {\n               if (!py::isinstance<py::array>(obj)) {\n                 throw py::type_error(\"Dense vector[\" + field_schema.name() +\n                                      \"] expects a ndarray, got \" +\n                                      std::string(py::str(py::type::of(obj))));\n               }\n               const auto arr = obj.cast<py::array>();\n               if (arr.ndim() != 1) {\n                 throw py::type_error(\"Dense vector expects 1D array, got \" +\n                                      std::to_string(arr.ndim()) + \"D\");\n               }\n               const auto buf = arr.request();\n               switch (data_type) {\n                 case DataType::VECTOR_FP32: {\n                   self.query_vector_ = serialize_vector<float>(\n                       static_cast<const float *>(buf.ptr), buf.size);\n                   return;\n                 }\n                 case DataType::VECTOR_FP64: {\n                   self.query_vector_ = serialize_vector<double>(\n                       static_cast<const double *>(buf.ptr), buf.size);\n                   return;\n                 }\n                 case DataType::VECTOR_INT8: {\n                   self.query_vector_ = serialize_vector<int8_t>(\n                       static_cast<const int8_t *>(buf.ptr), buf.size);\n                   return;\n                 }\n                 case DataType::VECTOR_FP16: {\n                   self.query_vector_ = serialize_vector<uint16_t>(\n                       static_cast<const uint16_t *>(buf.ptr), buf.size);\n                   return;\n                 }\n                 default:\n                   throw py::type_error(\n                       \"Unsupported dense vector type for ndarray input: \" +\n                       std::to_string(static_cast<int>(data_type)));\n               }\n             }\n             // sparse vector\n             if (FieldSchema::is_sparse_vector_field(data_type)) {\n               if (!py::isinstance<py::dict>(obj)) {\n                 throw py::type_error(\"Sparse vector[\" + field_schema.name() +\n                                      \"] expects a Python dict, got \" +\n                                      std::string(py::str(py::type::of(obj))));\n               }\n               const auto sparse = obj.cast<py::dict>();\n\n               switch (data_type) {\n                 case DataType::SPARSE_VECTOR_FP16: {\n                   auto [indices, values] =\n                       serialize_sparse_vector<ailego::Float16>(\n                           sparse, [](const py::handle &h, size_t idx) {\n                             float f = checked_cast<float>(\n                                 h, \"Sparse value[\" + std::to_string(idx) + \"]\",\n                                 \"FLOAT\");\n                             return ailego::Float16(f);\n                           });\n                   self.query_sparse_indices_ = std::move(indices);\n                   self.query_sparse_values_ = std::move(values);\n                   break;\n                 }\n                 case DataType::SPARSE_VECTOR_FP32: {\n                   auto [indices, values] = serialize_sparse_vector<float>(\n                       sparse, [](const py::handle &h, size_t idx) {\n                         return checked_cast<float>(\n                             h, \"Sparse value[\" + std::to_string(idx) + \"]\",\n                             \"FLOAT\");\n                       });\n                   self.query_sparse_indices_ = std::move(indices);\n                   self.query_sparse_values_ = std::move(values);\n                   break;\n                 }\n                 default:\n                   throw py::type_error(\n                       \"Unsupported sparse vector type: \" +\n                       std::to_string(static_cast<int>(data_type)));\n               }\n               return;\n             }\n\n             throw py::type_error(\"Unsupported vector field type for field: \" +\n                                  field_schema.name());\n           })\n      .def(\n          \"get_vector\",\n          [](const VectorQuery &self,\n             const FieldSchema &field_schema) -> py::object {\n            DataType data_type = field_schema.data_type();\n            if (FieldSchema::is_dense_vector_field(data_type)) {\n              if (self.query_vector_.empty()) {\n                throw std::runtime_error(\"No dense vector has been set\");\n              }\n\n              size_t byte_size = self.query_vector_.size();\n              const void *data = self.query_vector_.data();\n\n              switch (data_type) {\n                case DataType::VECTOR_FP32: {\n                  if (byte_size % sizeof(float) != 0) {\n                    throw std::runtime_error(\n                        \"Invalid buffer size for VECTOR_FP32\");\n                  }\n                  size_t dim = byte_size / sizeof(float);\n                  return py::array_t<float>({dim}, {sizeof(float)},\n                                            static_cast<const float *>(data));\n                }\n                case DataType::VECTOR_FP64: {\n                  if (byte_size % sizeof(double) != 0) {\n                    throw std::runtime_error(\n                        \"Invalid buffer size for VECTOR_FP64\");\n                  }\n                  size_t dim = byte_size / sizeof(double);\n                  return py::array_t<double>({dim}, {sizeof(double)},\n                                             static_cast<const double *>(data));\n                }\n                case DataType::VECTOR_INT8: {\n                  if (byte_size % sizeof(int8_t) != 0) {\n                    throw std::runtime_error(\n                        \"Invalid buffer size for VECTOR_INT8\");\n                  }\n                  size_t dim = byte_size / sizeof(int8_t);\n                  return py::array_t<int8_t>({dim}, {sizeof(int8_t)},\n                                             static_cast<const int8_t *>(data));\n                }\n                case DataType::VECTOR_FP16: {\n                  if (byte_size % 2 != 0) {\n                    throw std::runtime_error(\n                        \"Invalid buffer size for VECTOR_FP16\");\n                  }\n                  size_t dim = byte_size / 2;\n                  return py::array(py::dtype(\"float16\"), {dim}, {2}, data);\n                }\n\n                default:\n                  throw py::type_error(\n                      \"Unsupported dense vector type for get_vector: \" +\n                      std::to_string(static_cast<int>(data_type)));\n              }\n            }\n            if (FieldSchema::is_sparse_vector_field(data_type)) {\n              if (self.query_sparse_indices_.empty()) {\n                return py::dict();\n              }\n\n              // Deserialize indices: stored as uint32_t[]\n              size_t indices_byte_size = self.query_sparse_indices_.size();\n              if (indices_byte_size % sizeof(uint32_t) != 0) {\n                throw std::runtime_error(\n                    \"Sparse indices buffer size not aligned to uint32_t\");\n              }\n              size_t n = indices_byte_size / sizeof(uint32_t);\n              const uint32_t *indices = reinterpret_cast<const uint32_t *>(\n                  self.query_sparse_indices_.data());\n\n              // Deserialize values\n              switch (data_type) {\n                case DataType::SPARSE_VECTOR_FP32: {\n                  if (self.query_sparse_values_.size() != n * sizeof(float)) {\n                    throw std::runtime_error(\n                        \"Sparse FP32 values buffer size mismatch\");\n                  }\n                  const float *values = reinterpret_cast<const float *>(\n                      self.query_sparse_values_.data());\n                  py::dict result;\n                  for (size_t i = 0; i < n; ++i) {\n                    result[py::int_(indices[i])] = py::float_(values[i]);\n                  }\n                  return result;\n                }\n                case DataType::SPARSE_VECTOR_FP16: {\n                  if (self.query_sparse_values_.size() !=\n                      n * sizeof(uint16_t)) {\n                    throw std::runtime_error(\n                        \"Sparse FP16 values buffer size mismatch\");\n                  }\n                  const uint16_t *raw_bits = reinterpret_cast<const uint16_t *>(\n                      self.query_sparse_values_.data());\n                  py::dict result;\n                  for (size_t i = 0; i < n; ++i) {\n                    float f = ailego::FloatHelper::ToFP32(raw_bits[i]);\n                    result[py::int_(indices[i])] = py::float_(f);\n                  }\n                  return result;\n                }\n                default:\n                  throw py::type_error(\"Unsupported sparse vector type...\");\n              }\n            }\n\n\n            throw py::type_error(\"Unsupported vector field type: \" +\n                                 field_schema.name());\n          },\n          py::arg(\"field_schema\"))\n      .def(py::pickle(\n          [](const VectorQuery &self) {\n            return py::make_tuple(\n                self.topk_, self.field_name_, self.query_vector_,\n                self.query_sparse_indices_, self.query_sparse_values_,\n                self.filter_, self.include_vector_, self.output_fields_,\n                self.query_params_ ? py::cast(self.query_params_) : py::none());\n          },\n          [](py::tuple t) {\n            if (t.size() != 9)\n              throw std::runtime_error(\"Invalid pickle data for VectorQuery\");\n\n            VectorQuery obj{};\n            obj.topk_ = t[0].cast<int>();\n            obj.field_name_ = t[1].cast<std::string>();\n            obj.query_vector_ = t[2].cast<std::string>();\n            obj.query_sparse_indices_ = t[3].cast<std::string>();\n            obj.query_sparse_values_ = t[4].cast<std::string>();\n            obj.filter_ = t[5].cast<std::string>();\n            obj.include_vector_ = t[6].cast<bool>();\n            obj.output_fields_ = t[7].cast<std::vector<std::string>>();\n\n            if (!t[8].is_none()) {\n              obj.query_params_ = t[8].cast<QueryParams::Ptr>();\n            }\n            return obj;\n          }));\n}\n}  // namespace zvec"
  },
  {
    "path": "src/binding/python/model/python_collection.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"python_collection.h\"\n#include <pybind11/stl.h>\n#include <zvec/db/collection.h>\n\nnamespace zvec {\n\ninline void throw_if_error(const Status &status) {\n  switch (status.code()) {\n    case StatusCode::OK:\n      return;\n    case StatusCode::NOT_FOUND:\n      throw py::key_error(status.message());\n    case StatusCode::INVALID_ARGUMENT:\n      throw py::value_error(status.message());\n    case StatusCode::INTERNAL_ERROR:\n    case StatusCode::ALREADY_EXISTS:\n    case StatusCode::NOT_SUPPORTED:\n    case StatusCode::PERMISSION_DENIED:\n    case StatusCode::FAILED_PRECONDITION:\n    case StatusCode::UNKNOWN:\n    default:\n      throw std::runtime_error(status.message());\n  }\n}\n\n\ntemplate <typename T>\nT unwrap_expected(const tl::expected<T, Status> &exp) {\n  if (exp.has_value()) {\n    return exp.value();\n  }\n  throw_if_error(exp.error());\n  return T{};\n}\n\nvoid ZVecPyCollection::Initialize(pybind11::module_ &m) {\n  py::class_<Collection, Collection::Ptr> collection(m, \"_Collection\");\n  bind_db_methods(collection);\n  bind_ddl_methods(collection);\n  bind_dml_methods(collection);\n  bind_dql_methods(collection);\n  collection.def(py::pickle(\n      [](const Collection &c) {\n        return py::make_tuple(c.Path(), c.Schema(), c.Options());\n      },\n      [](py::tuple t) {\n        if (t.size() != 3) {\n          throw std::runtime_error(\"Invalid tuple size for Collection pickle\");\n        }\n        std::string path = t[0].cast<std::string>();\n        auto schema = t[1].cast<CollectionSchema>();\n        CollectionOptions options = t[2].cast<CollectionOptions>();\n        auto result = Collection::Open(path, options);\n        // auto result = Collection::CreateAndOpen(path, schema, options);\n        return unwrap_expected(result);\n      }));\n}\n\nvoid ZVecPyCollection::bind_db_methods(\n    py::class_<Collection, Collection::Ptr> &col) {\n  col.def_static(\"CreateAndOpen\",\n                 [](const std::string &path, const CollectionSchema &schema,\n                    const CollectionOptions &options) {\n                   auto result =\n                       Collection::CreateAndOpen(path, schema, options);\n                   return unwrap_expected(result);\n                 })\n      .def_static(\"Open\", [](const std::string &path,\n                             const CollectionOptions &options) {\n        auto result = Collection::Open(path, options);\n        return unwrap_expected(result);\n      });\n}\n\n\nvoid ZVecPyCollection::bind_ddl_methods(\n    py::class_<Collection, Collection::Ptr> &col) {\n  // bind collection properties\n  col.def(\"Path\",\n          [](const Collection &self) {\n            auto ret = self.Path();\n            return unwrap_expected(ret);\n          })\n      .def(\"Options\",\n           [](const Collection &self) {\n             auto ret = self.Options();\n             return unwrap_expected(ret);\n           })\n      .def(\"Schema\",\n           [](const Collection &self) {\n             auto ret = self.Schema();\n             return unwrap_expected(ret);\n           })\n      .def(\"Stats\", [](const Collection &self) {\n        auto ret = self.Stats();\n        return unwrap_expected(ret);\n      });\n\n  // bind collection ddl methods\n  col.def(\"Destroy\",\n          [](Collection &self) {\n            const auto status = self.Destroy();\n            throw_if_error(status);\n          })\n      .def(\"Flush\", [](Collection &self) {\n        auto status = self.Flush();\n        throw_if_error(status);\n      });\n\n  // binding index ddl methods\n  col.def(\"CreateIndex\",\n          [](Collection &self, const std::string &column_name,\n             const IndexParams::Ptr &index_options,\n             const CreateIndexOptions &options) {\n            const auto status =\n                self.CreateIndex(column_name, index_options, options);\n            throw_if_error(status);\n          })\n      .def(\"DropIndex\",\n           [](Collection &self, const std::string &column_name) {\n             const auto status = self.DropIndex(column_name);\n             throw_if_error(status);\n           })\n      .def(\"Optimize\", [](Collection &self, const OptimizeOptions &options) {\n        const auto status = self.Optimize(options);\n        throw_if_error(status);\n      });\n\n  // binding column ddl methods\n  col.def(\"AddColumn\",\n          [](Collection &self, const FieldSchema::Ptr &column_schema,\n             const std::string &expression, const AddColumnOptions &options) {\n            const auto status =\n                self.AddColumn(column_schema, expression, options);\n            throw_if_error(status);\n          })\n      .def(\"DropColumn\",\n           [](Collection &self, std::string &column_name) {\n             auto status = self.DropColumn(column_name);\n             throw_if_error(status);\n           })\n      .def(\"AlterColumn\", [](Collection &self, std::string &column_name,\n                             const std::string &rename,\n                             const FieldSchema::Ptr &new_column_schema,\n                             const AlterColumnOptions &options) {\n        const auto status =\n            self.AlterColumn(column_name, rename, new_column_schema, options);\n        throw_if_error(status);\n      });\n}\n\nvoid ZVecPyCollection::bind_dml_methods(\n    py::class_<Collection, Collection::Ptr> &col) {\n  // bind collection upsert/insert/update/delete methods\n  col.def(\"Insert\",\n          [](Collection &self, std::vector<Doc> &docs) {\n            const auto result = self.Insert(docs);\n            return unwrap_expected(result);\n          })\n      .def(\"Update\",\n           [](Collection &self, std::vector<Doc> &docs) {\n             const auto result = self.Update(docs);\n             return unwrap_expected(result);\n           })\n      .def(\"Upsert\",\n           [](Collection &self, std::vector<Doc> &docs) {\n             const auto result = self.Upsert(docs);\n             return unwrap_expected(result);\n           })\n      .def(\"Delete\",\n           [](Collection &self, const std::vector<std::string> &pks) {\n             const auto result = self.Delete(pks);\n             return unwrap_expected(result);\n           })\n      .def(\"DeleteByFilter\", [](Collection &self, const std::string &filter) {\n        const auto status = self.DeleteByFilter(filter);\n        throw_if_error(status);\n      });\n}\n\nvoid ZVecPyCollection::bind_dql_methods(\n    py::class_<Collection, Collection::Ptr> &col) {\n  col.def(\"Query\",\n          [](const Collection &self, const VectorQuery &query) {\n            const auto result = self.Query(query);\n            // return DocPtrList\n            return unwrap_expected(result);\n          })\n      .def(\"GroupByQuery\",\n           [](const Collection &self, const GroupByVectorQuery &query) {\n             const auto result = self.GroupByQuery(query);\n             // return GroupResults\n             return unwrap_expected(result);\n           })\n      .def(\"Fetch\",\n           [](const Collection &self, const std::vector<std::string> &pks) {\n             const auto result = self.Fetch(pks);\n             // return DocPtrMap\n             return unwrap_expected(result);\n           });\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/binding/python/model/python_doc.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"python_doc.h\"\n#include <pybind11/numpy.h>\n#include <pybind11/stl.h>\n\nnamespace zvec {\n\ntemplate <typename T>\nT checked_cast(const py::object &obj, const std::string &field,\n               const std::string &expected_type) {\n  try {\n    return obj.cast<T>();\n  } catch (const py::cast_error &e) {\n    std::string actual_type = std::string(py::str(py::type::of(obj)));\n    std::string msg = \"Field '\" + field + \"': expected \" + expected_type +\n                      \", got \" + actual_type;\n    throw py::type_error(msg);\n  }\n}\n\nvoid ZVecPyDoc::Initialize(pybind11::module_ &m) {\n  bind_doc_operator(m);\n  bind_doc(m);\n}\n\n\nvoid ZVecPyDoc::bind_doc_operator(py::module_ &m) {\n  py::enum_<Operator>(m, \"_DocOp\")\n      .value(\"INSERT\", Operator::INSERT)\n      .value(\"UPDATE\", Operator::UPDATE)\n      .value(\"DELETE\", Operator::DELETE)\n      .value(\"UPSERT\", Operator::UPSERT);\n}\n\n\nvoid ZVecPyDoc::bind_doc(py::module_ &m) {\n  // binding doc\n  py::class_<Doc, Doc::Ptr> doc(m, \"_Doc\");\n\n  doc.def(py::init([]() { return std::make_shared<Doc>(); }))\n      .def(\"set_pk\", &Doc::set_pk)\n      .def(\"pk\", &Doc::pk)\n      .def(\"set_score\", &Doc::set_score)\n      .def(\"score\", &Doc::score)\n      .def(\"has_field\", &Doc::has)\n      .def(\"field_names\", &Doc::field_names)\n      .def(py::pickle(\n          [](const Doc &d) {\n            std::vector<uint8_t> data = d.serialize();\n            return py::bytes(reinterpret_cast<const char *>(data.data()),\n                             data.size());\n          },\n          [](py::bytes b) {\n            py::buffer_info info(py::buffer(b).request());\n            const uint8_t *buf = reinterpret_cast<const uint8_t *>(info.ptr);\n            size_t size = static_cast<size_t>(info.size);\n            Doc::Ptr d = Doc::deserialize(buf, size);\n            if (!d) throw std::runtime_error(\"Failed to unpickle Doc\");\n            return d;\n          }));\n\n\n  // binding doc set field\n  doc.def(\n      \"set_any\",\n      [](Doc &self, const std::string &field, const FieldSchema &field_schema,\n         const py::object &obj) -> bool {\n        if (obj.is_none()) {\n          if (field_schema.nullable()) {\n            self.set_null(field);\n            return true;\n          }\n          throw py::value_error(\"Field '\" + field +\n                                \"': expected non-nullable type\");\n        }\n        switch (field_schema.data_type()) {\n          // base datatypes\n          case DataType::STRING:\n            return self.set(field,\n                            checked_cast<std::string>(obj, field, \"STRING\"));\n          case DataType::BOOL:\n            return self.set(field, checked_cast<bool>(obj, field, \"BOOL\"));\n          case DataType::INT32:\n            return self.set(field, checked_cast<int32_t>(obj, field, \"INT32\"));\n          case DataType::INT64:\n            return self.set(field, checked_cast<int64_t>(obj, field, \"INT64\"));\n          case DataType::UINT32:\n            return self.set(field,\n                            checked_cast<uint32_t>(obj, field, \"UINT32\"));\n          case DataType::UINT64:\n            return self.set(field,\n                            checked_cast<uint64_t>(obj, field, \"UINT64\"));\n          case DataType::FLOAT:\n            return self.set(field, checked_cast<float>(obj, field, \"FLOAT\"));\n          case DataType::DOUBLE:\n            return self.set(field, checked_cast<double>(obj, field, \"DOUBLE\"));\n\n          // array datatypes\n          case DataType::ARRAY_STRING:\n            return self.set(field, checked_cast<std::vector<std::string>>(\n                                       obj, field, \"ARRAY_STRING\"));\n          case DataType::ARRAY_BOOL:\n            return self.set(field, checked_cast<std::vector<bool>>(\n                                       obj, field, \"ARRAY_BOOL\"));\n          case DataType::ARRAY_INT32:\n            return self.set(field, checked_cast<std::vector<int32_t>>(\n                                       obj, field, \"ARRAY_INT32\"));\n          case DataType::ARRAY_UINT32:\n            return self.set(field, checked_cast<std::vector<uint32_t>>(\n                                       obj, field, \"ARRAY_UINT32\"));\n          case DataType::ARRAY_INT64:\n            return self.set(field, checked_cast<std::vector<int64_t>>(\n                                       obj, field, \"ARRAY_INT64\"));\n          case DataType::ARRAY_UINT64:\n            return self.set(field, checked_cast<std::vector<uint64_t>>(\n                                       obj, field, \"ARRAY_UINT64\"));\n          case DataType::ARRAY_FLOAT:\n            return self.set(field, checked_cast<std::vector<float>>(\n                                       obj, field, \"ARRAY_FLOAT\"));\n          case DataType::ARRAY_DOUBLE:\n            return self.set(field, checked_cast<std::vector<double>>(\n                                       obj, field, \"ARRAY_DOUBLE\"));\n\n          // dense vector datatypes\n          case DataType::VECTOR_FP16: {\n            const auto value = checked_cast<py::list>(\n                obj, field, \"VECTOR_FP16 (list of numbers)\");\n            std::vector<ailego::Float16> new_value;\n            new_value.reserve(value.size());\n            for (const auto &item : value) {\n              try {\n                new_value.emplace_back(item.cast<float>());\n              } catch (const py::cast_error &e) {\n                throw py::type_error(\"Vector '\" + field +\n                                     \"': expected VECTOR_FP16, got \" +\n                                     std::string(py::str(py::type::of(obj))));\n              }\n            }\n            return self.set(field, new_value);\n          }\n          case DataType::VECTOR_FP32:\n            return self.set(field, checked_cast<std::vector<float>>(\n                                       obj, field, \"VECTOR_FP32\"));\n          case DataType::VECTOR_FP64:\n            return self.set(field, checked_cast<std::vector<double>>(\n                                       obj, field, \"VECTOR_FP64\"));\n          case DataType::VECTOR_INT8:\n            return self.set(field, checked_cast<std::vector<int8_t>>(\n                                       obj, field, \"VECTOR_INT8\"));\n\n          // sparse vector datatypes\n          case DataType::SPARSE_VECTOR_FP32: {\n            const auto sparse_dict =\n                checked_cast<py::dict>(obj, field, \"SPARSE_VECTOR_FP32 (dict)\");\n            std::vector<uint32_t> indices;\n            std::vector<float> values;\n            for (const auto &item : sparse_dict) {\n              try {\n                indices.push_back(item.first.cast<uint32_t>());\n                values.push_back(item.second.cast<float>());\n              } catch (const py::cast_error &e) {\n                throw py::type_error(\n                    \"Vector '\" + field +\n                    \"': sparse vector key/value must be (uint32, float), \"\n                    \"got key=\" +\n                    std::string(py::str(py::type::of(item.first))) +\n                    \", value=\" +\n                    std::string(py::str(py::type::of(item.second))));\n              }\n            }\n            const std::pair<std::vector<uint32_t>, std::vector<float>>\n                sparse_vector{std::move(indices), std::move(values)};\n            return self.set(field, sparse_vector);\n          }\n          case DataType::SPARSE_VECTOR_FP16: {\n            const auto sparse_dict =\n                checked_cast<py::dict>(obj, field, \"SPARSE_VECTOR_FP16 (dict)\");\n            std::vector<uint32_t> indices;\n            std::vector<ailego::Float16> values;\n            for (const auto &item : sparse_dict) {\n              try {\n                indices.push_back(item.first.cast<uint32_t>());\n                values.push_back(ailego::Float16(item.second.cast<float>()));\n              } catch (const py::cast_error &e) {\n                throw py::type_error(\n                    \"Field '\" + field +\n                    \"': sparse vector key/value must be (uint32, float), \"\n                    \"got key=\" +\n                    std::string(py::str(py::type::of(item.first))) +\n                    \", value=\" +\n                    std::string(py::str(py::type::of(item.second))));\n              }\n            }\n            const std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>\n                sparse_vector{std::move(indices), std::move(values)};\n            return self.set(field, sparse_vector);\n          }\n          default:\n            throw py::type_error(\"Unsupported type for field: \" + field);\n        }\n      });\n\n  // binding doc get field\n  doc.def(\n      \"get_any\",\n      [](Doc &self, const std::string &field,\n         const DataType &type) -> py::object {\n        switch (type) {\n          // base datatypes\n          case DataType::STRING:\n            return py::cast(self.get<std::string>(field));\n          case DataType::BOOL:\n            return py::cast(self.get<bool>(field));\n          case DataType::INT32:\n            return py::cast(self.get<int32_t>(field));\n          case DataType::UINT32:\n            return py::cast(self.get<uint32_t>(field));\n          case DataType::INT64:\n            return py::cast(self.get<int64_t>(field));\n          case DataType::UINT64:\n            return py::cast(self.get<uint64_t>(field));\n          case DataType::FLOAT:\n            return py::cast(self.get<float>(field));\n          case DataType::DOUBLE:\n            return py::cast(self.get<double>(field));\n\n          // array datatypes\n          case DataType::ARRAY_STRING:\n            return py::cast(self.get<std::vector<std::string>>(field));\n          case DataType::ARRAY_INT32:\n            return py::cast(self.get<std::vector<int32_t>>(field));\n          case DataType::ARRAY_INT64:\n            return py::cast(self.get<std::vector<int64_t>>(field));\n          case DataType::ARRAY_UINT32:\n            return py::cast(self.get<std::vector<uint32_t>>(field));\n          case DataType::ARRAY_UINT64:\n            return py::cast(self.get<std::vector<uint64_t>>(field));\n          case DataType::ARRAY_FLOAT:\n            return py::cast(self.get<std::vector<float>>(field));\n          case DataType::ARRAY_DOUBLE:\n            return py::cast(self.get<std::vector<double>>(field));\n          case DataType::ARRAY_BOOL:\n            return py::cast(self.get<std::vector<bool>>(field));\n\n          // vector datatypes\n          case DataType::VECTOR_INT8:\n            return py::cast(self.get<std::vector<int8_t>>(field));\n          case DataType::VECTOR_FP16: {\n            auto value = self.get<std::vector<ailego::Float16>>(field);\n            if (value.has_value()) {\n              std::vector<float> new_value;\n              new_value.reserve(value.value().size());\n              for (auto &item : value.value()) {\n                new_value.push_back(static_cast<float>(item));\n              }\n              return py::cast(new_value);\n            }\n            return py::none();\n          }\n          case DataType::VECTOR_FP32:\n            return py::cast(self.get<std::vector<float>>(field));\n          case DataType::VECTOR_FP64:\n            return py::cast(self.get<std::vector<double>>(field));\n          case DataType::SPARSE_VECTOR_FP16: {\n            auto vector = self.get<\n                std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(\n                field);\n            const auto &indices = vector->first;\n            const auto &values = vector->second;\n            py::dict d;\n            for (size_t i = 0; i < indices.size(); ++i) {\n              d[py::int_(indices[i])] =\n                  py::float_(static_cast<float>(values[i]));\n            }\n            return std::move(d);\n          }\n          case DataType::SPARSE_VECTOR_FP32: {\n            auto vector =\n                self.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n                    field);\n            const auto &indices = vector->first;\n            const auto &values = vector->second;\n            py::dict d;\n            for (size_t i = 0; i < indices.size(); ++i) {\n              d[py::int_(indices[i])] = py::float_(values[i]);\n            }\n            return std::move(d);\n          }\n          default:\n            throw py::type_error(\"Unsupported type for field: \" + field);\n        }\n      });\n  doc.def(\n      \"get_all\",\n      [](Doc &self, const CollectionSchema &schema) -> py::tuple {\n        py::tuple result(4);\n        // 1. set doc id and score\n        result[0] = py::str(self.pk());\n        result[1] = py::float_(self.score());\n\n        if (self.is_empty()) {\n          result[2] = py::none();\n          result[3] = py::none();\n          return result;\n        }\n        // 2. set scalar fields\n        py::dict fields;\n        for (const auto &field_meta : schema.forward_fields()) {\n          const std::string &field = field_meta->name();\n          if (!self.has_value(field)) {\n            continue;\n          }\n\n          try {\n            auto val = [&]() -> py::object {\n              switch (field_meta->data_type()) {\n                // base datatypes\n                case DataType::STRING:\n                  return py::str(self.get<std::string>(field).value());\n                case DataType::BOOL:\n                  return py::cast(self.get<bool>(field));\n                case DataType::INT32:\n                  return py::cast(self.get<int32_t>(field));\n                case DataType::UINT32:\n                  return py::cast(self.get<uint32_t>(field));\n                case DataType::INT64:\n                  return py::cast(self.get<int64_t>(field));\n                case DataType::UINT64:\n                  return py::cast(self.get<uint64_t>(field));\n                case DataType::FLOAT:\n                  return py::cast(self.get<float>(field));\n                case DataType::DOUBLE:\n                  return py::cast(self.get<double>(field));\n\n                // array datatypes\n                case DataType::ARRAY_STRING:\n                  return py::cast(self.get<std::vector<std::string>>(field));\n                case DataType::ARRAY_INT32:\n                  return py::cast(self.get<std::vector<int32_t>>(field));\n                case DataType::ARRAY_INT64:\n                  return py::cast(self.get<std::vector<int64_t>>(field));\n                case DataType::ARRAY_UINT32:\n                  return py::cast(self.get<std::vector<uint32_t>>(field));\n                case DataType::ARRAY_UINT64:\n                  return py::cast(self.get<std::vector<uint64_t>>(field));\n                case DataType::ARRAY_FLOAT:\n                  return py::cast(self.get<std::vector<float>>(field));\n                case DataType::ARRAY_DOUBLE:\n                  return py::cast(self.get<std::vector<double>>(field));\n                case DataType::ARRAY_BOOL:\n                  return py::cast(self.get<std::vector<bool>>(field));\n                default:\n                  throw py::type_error(\"Unsupported type for field: \" + field);\n              }\n            }();\n            fields[py::str(field)] = val;\n          } catch (const std::exception &e) {\n            fields[py::str(field)] = py::none();\n          }\n        }\n        if (!fields.empty()) {\n          result[2] = fields;\n        } else {\n          result[2] = py::none();\n        }\n        // 3. set vector fields\n        py::dict vectors;\n        for (const auto &vec_meta : schema.vector_fields()) {\n          const std::string &vec = vec_meta->name();\n          if (!self.has_value(vec)) continue;\n\n          try {\n            auto array = [&]() -> py::object {\n              switch (vec_meta->data_type()) {\n                case DataType::VECTOR_INT8:\n                  return py::cast(self.get<std::vector<int8_t>>(vec));\n                case DataType::VECTOR_FP16: {\n                  auto value = self.get<std::vector<ailego::Float16>>(vec);\n                  if (value.has_value()) {\n                    std::vector<float> new_value;\n                    new_value.reserve(value.value().size());\n                    for (auto &item : value.value()) {\n                      new_value.push_back(static_cast<float>(item));\n                    }\n                    return py::cast(new_value);\n                  }\n                  return py::none();\n                }\n                case DataType::VECTOR_FP32:\n                  return py::cast(self.get<std::vector<float>>(vec));\n                case DataType::VECTOR_FP64:\n                  return py::cast(self.get<std::vector<double>>(vec));\n                case DataType::SPARSE_VECTOR_FP16: {\n                  auto vector =\n                      self.get<std::pair<std::vector<uint32_t>,\n                                         std::vector<ailego::Float16>>>(vec);\n                  const auto &indices = vector->first;\n                  const auto &values = vector->second;\n                  py::dict d;\n                  for (size_t i = 0; i < indices.size(); ++i) {\n                    d[py::int_(indices[i])] =\n                        py::float_(static_cast<float>(values[i]));\n                  }\n                  return std::move(d);\n                }\n                case DataType::SPARSE_VECTOR_FP32: {\n                  auto vector = self.get<\n                      std::pair<std::vector<uint32_t>, std::vector<float>>>(\n                      vec);\n                  const auto &indices = vector->first;\n                  const auto &values = vector->second;\n                  py::dict d;\n                  for (size_t i = 0; i < indices.size(); ++i) {\n                    d[py::int_(indices[i])] = py::float_(values[i]);\n                  }\n                  return std::move(d);\n                }\n                default:\n                  throw py::type_error(\"Unsupported type for field: \" + vec);\n              }\n            }();\n            vectors[py::str(vec)] = array;\n          } catch (const std::exception &e) {\n            vectors[py::str(vec)] = py::none();\n          }\n        }\n        if (!vectors.empty()) {\n          result[3] = vectors;\n        } else {\n          result[3] = py::none();\n        }\n        return result;\n      },\n      py::arg(\"schema\"),\n      \"Get all fields and vectors as a tuple: (id, score, fields, vectors). \"\n      \"Vectors are zero-copy numpy arrays (dense: ndarray, sparse: (indices, \"\n      \"values) tuple).\");\n}\n}  // namespace zvec"
  },
  {
    "path": "src/binding/python/model/schema/python_schema.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"python_schema.h\"\n#include <pybind11/stl.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/stats.h>\n\nnamespace zvec {\n\nvoid ZVecPySchemas::Initialize(pybind11::module_ &parent) {\n  auto m =\n      parent.def_submodule(\"schema\", \"This module contains the schema of Zvec\");\n\n  bind_field_schema(m);\n  bind_collection_schema(m);\n  bind_collection_stats(m);\n}\n\nvoid ZVecPySchemas::bind_field_schema(py::module_ &m) {\n  py::class_<FieldSchema, FieldSchema::Ptr>(m, \"_FieldSchema\")\n      .def(py::init<const std::string &, DataType, uint32_t, bool,\n                    const IndexParams::Ptr &>(),\n           py::arg(\"name\"), py::arg(\"data_type\"), py::arg(\"dimension\") = 0,\n           py::arg(\"nullable\") = false, py::arg(\"index_param\") = nullptr)\n      .def_property_readonly(\"name\", &FieldSchema::name)\n      .def_property_readonly(\"data_type\", &FieldSchema::data_type)\n      .def_property_readonly(\"nullable\", &FieldSchema::nullable)\n      .def_property_readonly(\"dimension\", &FieldSchema::dimension)\n      .def_property_readonly(\"is_dense_vector\", &FieldSchema::is_dense_vector)\n      .def_property_readonly(\"is_sparse_vector\", &FieldSchema::is_sparse_vector)\n      .def_property_readonly(\"index_type\",\n                             [](const FieldSchema &self) {\n                               return self.index_params()\n                                          ? self.index_type()\n                                          : IndexType::UNDEFINED;\n                             })\n      .def_property_readonly(\"index_param\",\n                             [](const FieldSchema &self) -> py::object {\n                               if (self.index_params()) {\n                                 return py::cast(self.index_params());\n                               }\n                               return py::none();\n                             })\n      .def(\"__eq__\", &FieldSchema::operator==)\n      .def(\"__ne__\", &FieldSchema::operator!=)\n      .def(py::pickle(\n          [](const FieldSchema &self) {\n            return py::make_tuple(self.name(), self.data_type(),\n                                  self.dimension(), self.nullable(),\n                                  self.index_params()\n                                      ? py::cast(self.index_params())\n                                      : py::none());\n          },\n          [](py::tuple t) {\n            if (t.size() != 5) {\n              throw std::runtime_error(\n                  \"Invalid tuple size for FieldSchema pickle\");\n            }\n            std::string name = t[0].cast<std::string>();\n            DataType dtype = t[1].cast<DataType>();\n            uint32_t dim = t[2].cast<uint32_t>();\n            bool nullable = t[3].cast<bool>();\n\n            IndexParams::Ptr idx_params = nullptr;\n            if (!t[4].is_none()) {\n              idx_params = t[4].cast<IndexParams::Ptr>();\n            }\n\n            return std::make_shared<FieldSchema>(name, dtype, dim, nullable,\n                                                 idx_params);\n          }));\n}\n\nvoid ZVecPySchemas::bind_collection_schema(py::module_ &m) {\n  py::class_<CollectionSchema, CollectionSchema::Ptr>(m, \"_CollectionSchema\")\n      .def(py::init<const std::string &, const FieldSchemaPtrList &>(),\n           py::arg(\"name\"), py::arg(\"fields\"),\n           \"Construct with name and list of fields\")\n      .def_property_readonly(\"name\", &CollectionSchema::name)\n      .def(\"has_field\", &CollectionSchema::has_field, py::arg(\"field_name\"),\n           \"Check if a field exists.\")\n      .def(\n          \"get_field\",\n          [](const CollectionSchema &self, const std::string &name)\n              -> const FieldSchema * { return self.get_field(name); },\n          py::arg(\"field_name\"), py::return_value_policy::reference_internal,\n          \"Get field by name (const pointer), returns None if not found.\")\n      .def(\n          \"get_forward_field\",\n          [](const CollectionSchema &self, const std::string &name)\n              -> const FieldSchema * { return self.get_forward_field(name); },\n          py::arg(\"field_name\"), py::return_value_policy::reference_internal,\n          \"Get forward field (used for filtering).\")\n      .def(\n          \"get_vector_field\",\n          [](const CollectionSchema &self, const std::string &name)\n              -> const FieldSchema * { return self.get_vector_field(name); },\n          py::arg(\"field_name\"), py::return_value_policy::reference_internal,\n          \"Get vector field by name.\")\n      .def(\"fields\", &CollectionSchema::fields,\n           \"Return list of all field schemas.\", py::return_value_policy::copy)\n      .def(\"forward_fields\", &CollectionSchema::forward_fields,\n           \"Return list of forward-indexed fields.\",\n           py::return_value_policy::copy)\n      .def(\"vector_fields\", &CollectionSchema::vector_fields,\n           \"Return list of vector fields.\", py::return_value_policy::copy)\n      .def(\"__eq__\", &CollectionSchema::operator==)\n      .def(\"__ne__\", &CollectionSchema::operator!=)\n      .def(py::pickle(\n          [](const CollectionSchema &cs) {\n            return py::make_tuple(cs.name(), cs.fields(),\n                                  cs.max_doc_count_per_segment());\n          },\n          [](py::tuple t) {\n            if (t.size() != 3)\n              throw std::runtime_error(\"Invalid state for CollectionSchema!\");\n\n            auto name = t[0].cast<std::string>();\n            auto fields = t[1].cast<FieldSchemaPtrList>();\n            auto max_docs = t[2].cast<uint64_t>();\n\n            auto cs = std::make_shared<CollectionSchema>(name, fields);\n            cs->set_max_doc_count_per_segment(max_docs);\n            return cs;\n          }));\n}\n\nvoid ZVecPySchemas::bind_collection_stats(py::module_ &m) {\n  pybind11::class_<CollectionStats>(m, \"CollectionStats\")\n      .def(pybind11::init<>())\n      .def_property_readonly(\n          \"doc_count\", [](const CollectionStats &c) { return c.doc_count; })\n      .def_property_readonly(\n          \"index_completeness\",\n          [](const CollectionStats &c) { return c.index_completeness; })\n      .def(\"__repr__\", [](const CollectionStats &c) {\n        std::string map_str = \"{\";\n        bool first = true;\n        for (const auto &[k, v] : c.index_completeness) {\n          if (!first) map_str += \", \";\n          map_str += \"\\\"\" + k + \"\\\":\" + std::to_string(v);\n          first = false;\n        }\n        map_str += \"}\";\n        return \"{\\\"doc_count\\\":\" + std::to_string(c.doc_count) +\n               \", \\\"index_completeness\\\":\" + map_str + \"}\";\n      });\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/binding/python/typing/python_type.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"python_type.h\"\n\nnamespace zvec {\n\nvoid ZVecPyTyping::Initialize(pybind11::module_ &parent) {\n  auto m = parent.def_submodule(\n      \"typing\", \"This module contains the basic data types of Zvec\");\n  // binding base types\n  bind_datatypes(m);\n  bind_index_types(m);\n  bind_metric_types(m);\n  bind_quantize_types(m);\n  bind_status(m);\n}\n\nvoid ZVecPyTyping::bind_datatypes(pybind11::module_ &m) {\n  py::enum_<DataType>(m, \"DataType\", R\"pbdoc(\nEnumeration of supported data types in Zvec.\n\nIncludes scalar types, dense/sparse vector types, and array types.\n\nExamples:\n    >>> from zvec.typing import DataType\n    >>> print(DataType.FLOAT)\n    DataType.FLOAT\n    >>> print(DataType.VECTOR_FP32)\n    DataType.VECTOR_FP32\n)pbdoc\")\n      // field type\n      .value(\"STRING\", DataType::STRING)\n      .value(\"BOOL\", DataType::BOOL)\n      .value(\"INT32\", DataType::INT32)\n      .value(\"INT64\", DataType::INT64)\n      .value(\"FLOAT\", DataType::FLOAT)\n      .value(\"DOUBLE\", DataType::DOUBLE)\n      .value(\"UINT32\", DataType::UINT32)\n      .value(\"UINT64\", DataType::UINT64)\n\n\n      // dense vector type\n      .value(\"VECTOR_FP16\", DataType::VECTOR_FP16)\n      .value(\"VECTOR_FP32\", DataType::VECTOR_FP32)\n      .value(\"VECTOR_FP64\", DataType::VECTOR_FP64)\n      .value(\"VECTOR_INT8\", DataType::VECTOR_INT8)\n\n\n      // sparse vector type\n      .value(\"SPARSE_VECTOR_FP32\", DataType::SPARSE_VECTOR_FP32)\n      .value(\"SPARSE_VECTOR_FP16\", DataType::SPARSE_VECTOR_FP16)\n\n\n      // array type [not support bool/bytes]\n      .value(\"ARRAY_STRING\", DataType::ARRAY_STRING)\n      .value(\"ARRAY_INT32\", DataType::ARRAY_INT32)\n      .value(\"ARRAY_INT64\", DataType::ARRAY_INT64)\n      .value(\"ARRAY_FLOAT\", DataType::ARRAY_FLOAT)\n      .value(\"ARRAY_DOUBLE\", DataType::ARRAY_DOUBLE)\n      .value(\"ARRAY_BOOL\", DataType::ARRAY_BOOL)\n      .value(\"ARRAY_UINT32\", DataType::ARRAY_UINT32)\n      .value(\"ARRAY_UINT64\", DataType::ARRAY_UINT64)\n\n\n      // non support\n      // .value(\"BINARY\",    DataType::BINARY)\n      // .value(\"ARRAY_BINARY\", DataType::ARRAY_BINARY)\n      // .value(\"VECTOR_INT4\",    DataType::VECTOR_INT4)\n      // .value(\"VECTOR_INT16\",   DataType::VECTOR_INT16)\n      // .value(\"VECTOR_BINARY32\", DataType::VECTOR_BINARY32)\n      // .value(\"VECTOR_BINARY64\", DataType::VECTOR_BINARY64)\n      // .value(\"UNDEFINED\", DataType::UNDEFINED)\n      ;\n}\n\nvoid ZVecPyTyping::bind_index_types(pybind11::module_ &m) {\n  py::enum_<IndexType>(m, \"IndexType\", R\"pbdoc(\nEnumeration of supported index types in Zvec.\n\nExamples:\n    >>> from zvec.typing import IndexType\n    >>> print(IndexType.HNSW)\n    IndexType.HNSW\n)pbdoc\")\n      .value(\"UNDEFINED\", IndexType::UNDEFINED)\n      .value(\"HNSW\", IndexType::HNSW)\n      .value(\"HNSW_RABITQ\", IndexType::HNSW_RABITQ)\n      .value(\"IVF\", IndexType::IVF)\n      .value(\"FLAT\", IndexType::FLAT)\n      .value(\"INVERT\", IndexType::INVERT);\n}\n\nvoid ZVecPyTyping::bind_metric_types(pybind11::module_ &m) {\n  py::enum_<MetricType>(m, \"MetricType\", R\"pbdoc(\nEnumeration of supported distance/similarity metrics.\n\n- COSINE: Cosine similarity.\n- IP: Inner product (dot product).\n- L2: Euclidean distance (L2 norm).\n\nExamples:\n    >>> from zvec.typing import MetricType\n    >>> print(MetricType.COSINE)\n    MetricType.COSINE\n)pbdoc\")\n      .value(\"COSINE\", MetricType::COSINE)\n      .value(\"IP\", MetricType::IP)\n      .value(\"L2\", MetricType::L2);\n}\n\nvoid ZVecPyTyping::bind_quantize_types(py::module_ &m) {\n  py::enum_<QuantizeType>(m, \"QuantizeType\", R\"pbdoc(\nEnumeration of supported quantization types for vector compression.\n\nExamples:\n    >>> from zvec.typing import QuantizeType\n    >>> print(QuantizeType.INT8)\n    QuantizeType.INT8\n)pbdoc\")\n      .value(\"UNDEFINED\", QuantizeType::UNDEFINED)\n      .value(\"FP16\", QuantizeType::FP16)\n      .value(\"INT8\", QuantizeType::INT8)\n      .value(\"INT4\", QuantizeType::INT4)\n      .value(\"RABITQ\", QuantizeType::RABITQ);\n}\n\nvoid ZVecPyTyping::bind_status(py::module_ &m) {\n  // bind status code\n  py::enum_<StatusCode>(m, \"StatusCode\", R\"pbdoc(\nEnumeration of possible status codes for Zvec operations.\n\nUsed by the `Status` class to indicate success or failure reason.\n)pbdoc\")\n      .value(\"OK\", StatusCode::OK)\n      .value(\"NOT_FOUND\", StatusCode::NOT_FOUND)\n      .value(\"ALREADY_EXISTS\", StatusCode::ALREADY_EXISTS)\n      .value(\"INVALID_ARGUMENT\", StatusCode::INVALID_ARGUMENT)\n      .value(\"PERMISSION_DENIED\", StatusCode::PERMISSION_DENIED)\n      .value(\"FAILED_PRECONDITION\", StatusCode::FAILED_PRECONDITION)\n      .value(\"RESOURCE_EXHAUSTED\", StatusCode::RESOURCE_EXHAUSTED)\n      .value(\"UNAVAILABLE\", StatusCode::UNAVAILABLE)\n      .value(\"INTERNAL_ERROR\", StatusCode::INTERNAL_ERROR)\n      .value(\"NOT_SUPPORTED\", StatusCode::NOT_SUPPORTED)\n      .value(\"UNKNOWN\", StatusCode::UNKNOWN);\n\n  // bind status\n  py::class_<Status>(m, \"Status\", R\"pbdoc(\nRepresents the outcome of a Zvec operation.\n\nA `Status` object is either OK (success) or carries an error code and message.\n\nExamples:\n    >>> from zvec.typing import Status, StatusCode\n    >>> s = Status()\n    >>> print(s.ok())\n    True\n    >>> s = Status(StatusCode.INVALID_ARGUMENT, \"Field not found\")\n    >>> print(s.code() == StatusCode.INVALID_ARGUMENT)\n    True\n    >>> print(s.message())\n    Field not found\n)pbdoc\")\n      .def(py::init<>())\n      .def(py::init<StatusCode, const std::string &>(), py::arg(\"code\"),\n           py::arg(\"message\") = \"\", R\"pbdoc(\nConstruct a status with the given code and optional message.\n\nArgs:\n    code (StatusCode): The status code.\n    message (str, optional): Error message. Defaults to empty string.\n)pbdoc\")\n      .def(\"ok\", &Status::ok, \"bool: Returns True if the status is OK.\")\n      .def(\"code\", &Status::code, \"StatusCode: Returns the status code.\")\n      .def(\"message\", &Status::message,\n           \"str: Returns the error message (may be empty).\")\n      .def_static(\"OK\", &Status::OK, \"Create an OK status.\")\n      .def_static(\n          \"InvalidArgument\",\n          [](const std::string &msg) { return Status::InvalidArgument(msg); },\n          py::arg(\"message\"))\n      .def_static(\n          \"NotFound\",\n          [](const std::string &msg) { return Status::NotFound(msg); },\n          py::arg(\"message\"))\n      .def_static(\n          \"AlreadyExists\",\n          [](const std::string &msg) { return Status::AlreadyExists(msg); },\n          py::arg(\"message\"))\n      .def_static(\n          \"InternalError\",\n          [](const std::string &msg) { return Status::InternalError(msg); },\n          py::arg(\"message\"))\n      .def_static(\n          \"PermissionDenied\",\n          [](const std::string &msg) { return Status::PermissionDenied(msg); },\n          py::arg(\"message\"))\n      .def(\"__eq__\", [](const Status &self,\n                        const Status &other) { return self == other; })\n      .def(\"__ne__\", [](const Status &self,\n                        const Status &other) { return self != other; })\n      .def(\"__repr__\", [](const Status &self) {\n        std::string result =\n            \"{\"\n            \"\\\"code\\\":\" +\n            std::to_string(static_cast<int>(self.code()));\n\n        if (!self.message().empty()) {\n          result += \", \\\"message\\\":\\\"\" + self.message() + \"\\\"\";\n        }\n\n        result += \"}\";\n        return result;\n      });\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/core/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(RABITQ_SUPPORTED AND AUTO_DETECT_ARCH)\n  set(HNSW_RABITQ_FILES\n      hnsw_rabitq_query_algorithm.cc\n      hnsw_rabitq_streamer.cc\n      hnsw_rabitq_searcher.cc\n      hnsw_rabitq_entity.cc\n      rabitq_reformer.cc\n      rabitq_converter.cc\n  )\n  set(HNSW_RABITQ_FILES_FULL ${HNSW_RABITQ_FILES})\n  list(TRANSFORM HNSW_RABITQ_FILES_FULL PREPEND \"algorithm/hnsw_rabitq/\")\n  foreach(FILE ${HNSW_RABITQ_FILES_FULL})\n      set_source_files_properties(\n          ${FILE}\n          PROPERTIES\n          COMPILE_FLAGS \"${RABITQ_ARCH_FLAG}\"\n      )\n  endforeach()\nendif()\n\ncc_directory(framework)\ncc_directory(algorithm)\ncc_directory(metric)\ncc_directory(quantizer)\ncc_directory(utility)\ncc_directory(interface)\ncc_directory(mixed_reducer)\n\ngit_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR})\nfile(GLOB_RECURSE ALL_CORE_SRCS *.cc *.c *.h)\n\n# Remove algorithm/hnsw_rabitq implementation files if not supported.\n# interface/indexes/hnsw_rabitq_index.cc is kept because it provides the vtable\n# for HNSWRabitqIndex and guards rabitqlib usage with #if RABITQ_SUPPORTED.\nif(NOT RABITQ_SUPPORTED)\n  list(FILTER ALL_CORE_SRCS EXCLUDE REGEX \".*/algorithm/hnsw_rabitq/.*\")\nendif()\n\ncc_library(\n    NAME zvec_core STATIC STRICT PACKED\n    SRCS ${ALL_CORE_SRCS}\n    LIBS zvec_ailego zvec_turbo sparsehash magic_enum rabitqlib\n    INCS . ${PROJECT_ROOT_DIR}/src/core\n    VERSION \"${GIT_SRCS_VER}\"\n)\n"
  },
  {
    "path": "src/core/algorithm/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_directory(cluster)\ncc_directory(flat)\ncc_directory(flat_sparse)\ncc_directory(ivf)\ncc_directory(hnsw)\ncc_directory(hnsw_sparse)\nif(RABITQ_SUPPORTED)\n  message(STATUS \"BUILD RABITQ\")\n  cc_directory(hnsw_rabitq)\nelse()\n  message(STATUS \"NOT BUILD RABITQ\")\n  # Empty stub library for unsupported platforms\n  file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/rabitq_stub.cc\n    \"// Stub implementation for unsupported platforms\\n\"\n    \"// RaBitQ only supports Linux x86_64\\n\"\n    \"namespace zvec { namespace core { /* empty namespace for compatibility */ } }\\n\"\n  )\n\n  cc_library(\n      NAME core_knn_hnsw_rabitq\n      STATIC SHARED STRICT ALWAYS_LINK\n      SRCS ${CMAKE_CURRENT_BINARY_DIR}/rabitq_stub.cc\n      LIBS core_framework\n      INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n      VERSION \"${PROXIMA_ZVEC_VERSION}\"\n    )\nendif()\n"
  },
  {
    "path": "src/core/algorithm/cluster/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_knn_cluster STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS zvec_ailego core_framework \n    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/cluster\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/algorithm/cluster/cluster_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\n//! General\nstatic const std::string GENERAL_CLUSTER_COUNT =\n    \"proxima.general.cluster.count\";\nstatic const std::string GENERAL_THREAD_COUNT =\n    \"proxima.general.cluster.thread_count\";\n\n//! Optimize K-means\nstatic const std::string OPTKMEANS_CLUSTER_COUNT =\n    \"proxima.optkmeans.cluster.count\";\nstatic const std::string OPTKMEANS_CLUSTER_MAX_ITERATIONS =\n    \"proxima.optkmeans.cluster.max_iterations\";\nstatic const std::string OPTKMEANS_CLUSTER_EPSILON =\n    \"proxima.optkmeans.cluster.epsilon\";\nstatic const std::string OPTKMEANS_CLUSTER_SHARD_FACTOR =\n    \"proxima.optkmeans.cluster.shard_factor\";\nstatic const std::string OPTKMEANS_CLUSTER_PURGE_EMPTY =\n    \"proxima.optkmeans.cluster.purge_empty\";\nstatic const std::string OPTKMEANS_CLUSTER_MARKOV_CHAIN_LENGTH =\n    \"proxima.optkmeans.cluster.markov_chain_length\";\nstatic const std::string OPTKMEANS_CLUSTER_ASSUMPTION_FREE =\n    \"proxima.optkmeans.cluster.assumption_free\";\n\n//! K-means\nstatic const std::string KMEANS_CLUSTER_COUNT = \"proxima.kmeans.cluster.count\";\nstatic const std::string KMEANS_CLUSTER_SHARD_FACTOR =\n    \"proxima.kmeans.cluster.shard_factor\";\nstatic const std::string KMEANS_CLUSTER_EPSILON =\n    \"proxima.kmeans.cluster.epsilon\";\nstatic const std::string KMEANS_CLUSTER_MAX_ITERATIONS =\n    \"proxima.kmeans.cluster.max_iterations\";\nstatic const std::string KMEANS_CLUSTER_PURGE_EMPTY =\n    \"proxima.kmeans.cluster.purge_empty\";\nstatic const std::string KMEANS_CLUSTER_BATCH = \"proxima.kmeans.cluster.batch\";\nstatic const std::string KMEANS_CLUSTER_SEEKER_CLASS =\n    \"proxima.kmeans.cluster.seeker_class\";\nstatic const std::string KMEANS_CLUSTER_SEEKER_PARAMS =\n    \"proxima.kmeans.cluster.seeker_params\";\n\n//! Mini Batch K-means\nstatic const std::string MINIBATCHKMEANS_CLUSTER_COUNT =\n    \"proxima.minibatchkmeans.cluster.count\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_SHARD_FACTOR =\n    \"proxima.minibatchkmeans.cluster.shard_factor\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_EPSILON =\n    \"proxima.minibatchkmeans.cluster.epsilon\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_MAX_ITERATIONS =\n    \"proxima.minibatchkmeans.cluster.max_iterations\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_PURGE_EMPTY =\n    \"proxima.minibatchkmeans.cluster.purge_empty\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_TRY_COUNT =\n    \"proxima.minibatchkmeans.cluster.try_count\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_BATCH_COUNT =\n    \"proxima.minibatchkmeans.cluster.batch_count\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_SEEKER_CLASS =\n    \"proxima.minibatchkmeans.cluster.seeker_class\";\nstatic const std::string MINIBATCHKMEANS_CLUSTER_SEEKER_PARAMS =\n    \"proxima.minibatchkmeans.cluster.seeker_params\";\n\n//! K-means++\nstatic const std::string KMEANSPP_CLUSTER_COUNT =\n    \"proxima.kmeanspp.cluster.count\";\nstatic const std::string KMEANSPP_CLUSTER_SHARD_FACTOR =\n    \"proxima.kmeanspp.cluster.shard_factor\";\nstatic const std::string KMEANSPP_CLUSTER_CLASS =\n    \"proxima.kmeanspp.cluster.class\";\nstatic const std::string KMEANSPP_CLUSTER_PARAMS =\n    \"proxima.kmeanspp.cluster.params\";\n\n//! K-MC2\nstatic const std::string KMC2_CLUSTER_COUNT = \"proxima.kmc2.cluster.count\";\nstatic const std::string KMC2_CLUSTER_SHARD_FACTOR =\n    \"proxima.kmc2.cluster.shard_factor\";\nstatic const std::string KMC2_CLUSTER_MARKOV_CHAIN_LENGTH =\n    \"proxima.kmc2.cluster.markov_chain_length\";\nstatic const std::string KMC2_CLUSTER_ASSUMPTION_FREE =\n    \"proxima.kmc2.cluster.assumption_free\";\nstatic const std::string KMC2_CLUSTER_CLASS = \"proxima.kmc2.cluster.class\";\nstatic const std::string KMC2_CLUSTER_PARAMS = \"proxima.kmc2.cluster.params\";\n\n//! Bisecting K-means\nstatic const std::string BIKMEANS_CLUSTER_COUNT =\n    \"proxima.bikmeans.cluster.count\";\nstatic const std::string BIKMEANS_CLUSTER_INIT_COUNT =\n    \"proxima.bikmeans.cluster.init_count\";\nstatic const std::string BIKMEANS_CLUSTER_PURGE_EMPTY =\n    \"proxima.bikmeans.cluster.purge_empty\";\nstatic const std::string BIKMEANS_CLUSTER_FIRST_CLASS =\n    \"proxima.bikmeans.cluster.first_class\";\nstatic const std::string BIKMEANS_CLUSTER_SECOND_CLASS =\n    \"proxima.bikmeans.cluster.second_class\";\nstatic const std::string BIKMEANS_CLUSTER_FIRST_PARAMS =\n    \"proxima.bikmeans.cluster.first_params\";\nstatic const std::string BIKMEANS_CLUSTER_SECOND_PARAMS =\n    \"proxima.bikmeans.cluster.second_params\";\n\n//! K-medoids\nstatic const std::string KMEDOIDS_CLUSTER_COUNT =\n    \"proxima.kmedoids.cluster.count\";\nstatic const std::string KMEDOIDS_CLUSTER_SHARD_FACTOR =\n    \"proxima.kmedoids.cluster.shard_factor\";\nstatic const std::string KMEDOIDS_CLUSTER_EPSILON =\n    \"proxima.kmedoids.cluster.epsilon\";\nstatic const std::string KMEDOIDS_CLUSTER_MAX_ITERATIONS =\n    \"proxima.kmedoids.cluster.max_iterations\";\nstatic const std::string KMEDOIDS_CLUSTER_PURGE_EMPTY =\n    \"proxima.kmedoids.cluster.purge_empty\";\nstatic const std::string KMEDOIDS_CLUSTER_BENCH_RATIO =\n    \"proxima.kmedoids.cluster.bench_ratio\";\nstatic const std::string KMEDOIDS_CLUSTER_ONLY_MEANS =\n    \"proxima.kmedoids.cluster.only_means\";\nstatic const std::string KMEDOIDS_CLUSTER_WITHOUT_MEANS =\n    \"proxima.kmedoids.cluster.without_means\";\nstatic const std::string KMEDOIDS_CLUSTER_SEEKER_CLASS =\n    \"proxima.kmedoids.cluster.seeker_class\";\nstatic const std::string KMEDOIDS_CLUSTER_SEEKER_PARAMS =\n    \"proxima.kmedoids.cluster.seeker_params\";\n\n//! Stratified\nstatic const std::string STRATIFIED_CLUSTER_COUNT =\n    \"proxima.stratified.cluster.count\";\nstatic const std::string STRATIFIED_CLUSTER_FIRST_CLASS =\n    \"proxima.stratified.cluster.first_class\";\nstatic const std::string STRATIFIED_CLUSTER_SECOND_CLASS =\n    \"proxima.stratified.cluster.second_class\";\nstatic const std::string STRATIFIED_CLUSTER_FIRST_COUNT =\n    \"proxima.stratified.cluster.first_count\";\nstatic const std::string STRATIFIED_CLUSTER_SECOND_COUNT =\n    \"proxima.stratified.cluster.second_count\";\nstatic const std::string STRATIFIED_CLUSTER_FIRST_PARAMS =\n    \"proxima.stratified.cluster.first_params\";\nstatic const std::string STRATIFIED_CLUSTER_SECOND_PARAMS =\n    \"proxima.stratified.cluster.second_params\";\nstatic const std::string STRATIFIED_CLUSTER_AUTO_TUNING =\n    \"proxima.stratified.cluster.auto_tuning\";\nstatic const std::string STRATIFIED_CLUSTER_SECOND_POOL_COUNT =\n    \"proxima.stratified.cluster.second_pool_count\";\n\n//! Gap Statistics\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MIN =\n    \"proxima.gapstats.cluster_estimater.k_min\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MAX =\n    \"proxima.gapstats.cluster_estimater.k_max\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MIN_STEP =\n    \"proxima.gapstats.cluster_estimater.k_min_step\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MAX_STEP =\n    \"proxima.gapstats.cluster_estimater.k_max_step\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_TRY_COUNT =\n    \"proxima.gapstats.cluster_estimater.try_count\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_SHARD_FACTOR =\n    \"proxima.gapstats.cluster_estimater.shard_factor\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_ENABLE_MC2 =\n    \"proxima.gapstats.cluster_estimater.enable_mc2\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_MARKOV_CHAIN_LENGTH =\n    \"proxima.gapstats.cluster_estimater.markov_chain_length\";\nstatic const std::string GAPSTATS_CLUSTER_ESTIMATER_CLUSTER_CLASS =\n    \"proxima.gapstats.cluster_estimater.cluster_class\";\n\nstatic const std::string CLUSTER_TRAINER_SAMPLE_COUNT =\n    \"proxima.cluster.trainer.sample_count\";\nstatic const std::string CLUSTER_TRAINER_SAMPLE_RATIO =\n    \"proxima.cluster.trainer.sample_ratio\";\nstatic const std::string CLUSTER_TRAINER_THREAD_COUNT =\n    \"proxima.cluster.trainer.thread_count\";\nstatic const std::string CLUSTER_TRAINER_FILE_NAME =\n    \"proxima.cluster.trainer.file_name\";\nstatic const std::string CLUSTER_TRAINER_CLASS_NAME =\n    \"proxima.cluster.trainer.class_name\";\n\nstatic const std::string STRATIFIED_TRAINER_SAMPLE_COUNT =\n    \"proxima.stratified.trainer.sample_count\";\nstatic const std::string STRATIFIED_TRAINER_SAMPLE_RATIO =\n    \"proxima.stratified.trainer.sample_ratio\";\nstatic const std::string STRATIFIED_TRAINER_THREAD_COUNT =\n    \"proxima.stratified.trainer.thread_count\";\nstatic const std::string STRATIFIED_TRAINER_FILE_NAME =\n    \"proxima.stratified.trainer.file_name\";\nstatic const std::string STRATIFIED_TRAINER_CLASS_NAME =\n    \"proxima.stratified.trainer.class_name\";\nstatic const std::string STRATIFIED_TRAINER_CLUSTER_COUNT =\n    \"proxima.stratified.trainer.cluster_count\";\nstatic const std::string STRATIFIED_TRAINER_AUTOAUNE =\n    \"proxima.stratified.trainer.autotune\";\nstatic const std::string STRATIFIED_TRAINER_PARAMS_IN_LEVEL_PREFIX =\n    \"proxima.stratified.trainer.cluster_params_in_level_\";\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/kmeans_cluster.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/container/reservoir.h>\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_cluster.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"cluster_params.h\"\n#include \"linear_seeker.h\"\n#include \"vector_mean.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Kmeans Cluster\n */\nclass KmeansCluster : public IndexCluster {\n public:\n  //! Constructor\n  KmeansCluster(void) {}\n\n  //! Constructor\n  KmeansCluster(size_t iters, bool batch)\n      : max_iterations_(iters), batch_(batch) {}\n\n  //! Constructor\n  KmeansCluster(bool batch) : batch_(batch) {}\n\n  //! Destructor\n  virtual ~KmeansCluster(void) {}\n\n  //! Initialize Cluster\n  virtual int init(const IndexMeta &meta, const ailego::Params &params);\n\n  //! Cleanup Cluster\n  virtual int cleanup(void);\n\n  //! Reset Cluster\n  virtual int reset(void);\n\n  //! Update Cluster\n  virtual int update(const ailego::Params &params);\n\n  //! Suggest dividing to K clusters\n  virtual void suggest(uint32_t k);\n\n  //! Mount features\n  virtual int mount(IndexFeatures::Pointer feats);\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n  //! Classify\n  virtual int classify(IndexThreads::Pointer threads,\n                       IndexCluster::CentroidList &cents);\n\n  //! Label\n  virtual int label(IndexThreads::Pointer threads,\n                    const IndexCluster::CentroidList &cents,\n                    std::vector<uint32_t> *out);\n\n protected:\n  //! Test if it is valid\n  bool is_valid(void) const;\n\n  //! Cluster once\n  int clustering(IndexThreads *threads, IndexCluster::CentroidList &cents,\n                 double *cost);\n\n  //! Update parameters\n  void update_params(const ailego::Params &params);\n\n  //! Init seeker\n  int init_seeker(void);\n\n  //! Build seeker\n  int build_seeker(const IndexCluster::CentroidList &cents);\n\n  //! Check Centroids\n  bool check_centroids(const IndexCluster::CentroidList &cents);\n\n  //! Initialize Centroids\n  void init_centroids(size_t count, IndexCluster::CentroidList *out);\n\n  //! Initialize Shard Containers\n  void init_containers(size_t shard_count);\n\n  //! Initialize Shard Features Containers\n  void init_features_containers(size_t shard_count);\n\n  //! Split Clusters\n  void split_clusters(IndexThreads *threads,\n                      const IndexCluster::CentroidList &cents);\n\n  //! Update Centroids\n  void update_centroids(IndexThreads *threads,\n                        IndexCluster::CentroidList &cents);\n\n  //! Update Clusters\n  void update_clusters(IndexThreads *threads,\n                       const IndexCluster::CentroidList &cents);\n\n  //! Update Clusters' Features\n  void update_features(IndexThreads *threads,\n                       IndexCluster::CentroidList &cents);\n\n  //! Update Labels\n  void update_labels(IndexThreads *threads, std::vector<uint32_t> *labels);\n\n  //! Split Clusters in Thread\n  void split_clusters_thread(size_t index_begin, size_t index_end,\n                             const IndexThreads *threads);\n\n  //! Update Centroid in Thread\n  void update_centroid_thread(size_t column, IndexCluster::CentroidList *out);\n\n  //! Update Cluster in Thread\n  void update_cluster_thread(size_t index_begin, size_t index_end,\n                             const IndexThreads *threads);\n\n  //! Update Cluster's Features in Thread\n  void update_features_thread(size_t column, IndexCluster::CentroidList *out);\n\n  //! Update Labels in Thread\n  void update_labels_thread(size_t index_begin, size_t index_end,\n                            std::vector<uint32_t> *labels);\n\n protected:\n  //! Members\n  IndexMeta meta_{};\n  IndexFeatures::Pointer features_{};\n  LinearSeeker::Pointer seeker_{};\n  std::vector<double> shard_cluster_scores_{};\n  std::vector<std::vector<const void *>> shard_cluster_features_{};\n  std::shared_ptr<VectorMeanArray> shard_cluster_means_{};\n  std::shared_ptr<VectorMeanArray> batch_means_{};\n  std::vector<double> batch_scores_{};\n  double epsilon_{std::numeric_limits<float>::epsilon()};\n  float shard_factor_{16.0f};\n  uint32_t max_iterations_{20u};\n  uint32_t cluster_count_{0u};\n  uint32_t thread_count_{0u};\n  bool batch_{false};\n  bool purge_empty_{false};\n};\n\n/*! Centroid Features\n */\nclass KmeansCentroidFeatures : public IndexFeatures {\n public:\n  //! Constructor\n  KmeansCentroidFeatures(const IndexMeta &meta,\n                         const IndexCluster::CentroidList &cents)\n      : centroids_(cents),\n        feature_size_(meta.element_size()),\n        feature_dimension_(meta.dimension()),\n        data_type_(meta.data_type()) {}\n\n  virtual size_t count(void) const {\n    return centroids_.size();\n  }\n\n  virtual size_t dimension(void) const {\n    return feature_dimension_;\n  }\n\n  virtual const void *element(size_t i) const {\n    return centroids_[i].feature();\n  }\n\n  virtual IndexMeta::DataType data_type(void) const {\n    return data_type_;\n  }\n\n  virtual size_t element_size(void) const {\n    return feature_size_;\n  }\n\n private:\n  const IndexCluster::CentroidList &centroids_;\n  size_t feature_size_;\n  size_t feature_dimension_;\n  IndexMeta::DataType data_type_;\n};\n\nstatic inline std::shared_ptr<VectorMean> NewVectorMean(const IndexMeta &meta) {\n  switch (meta.data_type()) {\n    case IndexMeta::DataType::DT_FP16:\n      return std::make_shared<NumericalVectorMean<ailego::Float16>>(\n          meta.dimension());\n\n    case IndexMeta::DataType::DT_FP32:\n      return std::make_shared<NumericalVectorMean<float>>(meta.dimension());\n\n    case IndexMeta::DataType::DT_FP64:\n      return std::make_shared<NumericalVectorMean<double>>(meta.dimension());\n\n    case IndexMeta::DataType::DT_INT8:\n      return std::make_shared<NumericalVectorMean<int8_t>>(meta.dimension());\n\n    case IndexMeta::DataType::DT_INT4:\n      return std::make_shared<NibbleVectorMean<uint8_t>>(meta.dimension());\n\n    case IndexMeta::DataType::DT_INT16:\n      return std::make_shared<NumericalVectorMean<int16_t>>(meta.dimension());\n\n    default:\n      break;\n  }\n  // As binary default\n  return std::make_shared<BinaryVectorMean>(meta.dimension());\n}\n\nstatic inline std::shared_ptr<VectorMeanArray> NewVectorMeanArray(\n    const IndexMeta &meta) {\n  switch (meta.data_type()) {\n    case IndexMeta::DataType::DT_FP16:\n      return std::make_shared<\n          GeneralVectorMeanArray<NumericalVectorMean<ailego::Float16>>>(\n          meta.dimension());\n\n    case IndexMeta::DataType::DT_FP32:\n      return std::make_shared<\n          GeneralVectorMeanArray<NumericalVectorMean<float>>>(meta.dimension());\n\n    case IndexMeta::DataType::DT_FP64:\n      return std::make_shared<\n          GeneralVectorMeanArray<NumericalVectorMean<double>>>(\n          meta.dimension());\n\n    case IndexMeta::DataType::DT_INT8:\n      return std::make_shared<\n          GeneralVectorMeanArray<NumericalVectorMean<int8_t>>>(\n          meta.dimension());\n\n    case IndexMeta::DataType::DT_INT4:\n      return std::make_shared<\n          GeneralVectorMeanArray<NibbleVectorMean<uint8_t>>>(meta.dimension());\n\n    case IndexMeta::DataType::DT_INT16:\n      return std::make_shared<\n          GeneralVectorMeanArray<NumericalVectorMean<int16_t>>>(\n          meta.dimension());\n\n    default:\n      break;\n  }\n  // As binary default\n  return std::make_shared<GeneralVectorMeanArray<BinaryVectorMean>>(\n      meta.dimension());\n}\n\nstatic inline std::shared_ptr<VectorMeanArray> NewVectorMeanArray(\n    const IndexMeta &meta, const IndexCluster::CentroidList &cents) {\n  switch (meta.data_type()) {\n    case IndexMeta::DataType::DT_FP16: {\n      auto ptr = std::make_shared<\n          GeneralVectorMeanArray<NumericalVectorMean<ailego::Float16>>>(\n          meta.dimension());\n\n      for (const auto &it : cents) {\n        ptr->emplace(reinterpret_cast<const ailego::Float16 *>(it.feature()),\n                     meta.dimension(), it.follows());\n      }\n      return ptr;\n    }\n\n    case IndexMeta::DataType::DT_FP32: {\n      auto ptr =\n          std::make_shared<GeneralVectorMeanArray<NumericalVectorMean<float>>>(\n              meta.dimension());\n\n      for (const auto &it : cents) {\n        ptr->emplace(reinterpret_cast<const float *>(it.feature()),\n                     meta.dimension(), it.follows());\n      }\n      return ptr;\n    }\n\n    case IndexMeta::DataType::DT_FP64: {\n      auto ptr =\n          std::make_shared<GeneralVectorMeanArray<NumericalVectorMean<double>>>(\n              meta.dimension());\n\n      for (const auto &it : cents) {\n        ptr->emplace(reinterpret_cast<const double *>(it.feature()),\n                     meta.dimension(), it.follows());\n      }\n      return ptr;\n    }\n\n    case IndexMeta::DataType::DT_INT8: {\n      auto ptr =\n          std::make_shared<GeneralVectorMeanArray<NumericalVectorMean<int8_t>>>(\n              meta.dimension());\n\n      for (const auto &it : cents) {\n        ptr->emplace(reinterpret_cast<const int8_t *>(it.feature()),\n                     meta.dimension(), it.follows());\n      }\n      return ptr;\n    }\n\n    case IndexMeta::DataType::DT_INT4: {\n      auto ptr =\n          std::make_shared<GeneralVectorMeanArray<NibbleVectorMean<uint8_t>>>(\n              meta.dimension());\n\n      for (const auto &it : cents) {\n        ptr->emplace(reinterpret_cast<const uint8_t *>(it.feature()),\n                     meta.dimension(), it.follows());\n      }\n      return ptr;\n    }\n\n    case IndexMeta::DataType::DT_INT16: {\n      auto ptr = std::make_shared<\n          GeneralVectorMeanArray<NumericalVectorMean<int16_t>>>(\n          meta.dimension());\n\n      for (const auto &it : cents) {\n        ptr->emplace(reinterpret_cast<const int16_t *>(it.feature()),\n                     meta.dimension(), it.follows());\n      }\n      return ptr;\n    }\n\n    default:\n      break;\n  }\n\n  // As binary default\n  auto ptr = std::make_shared<GeneralVectorMeanArray<BinaryVectorMean>>(\n      meta.dimension());\n\n  for (const auto &it : cents) {\n    ptr->emplace(it.feature(), meta.dimension(), it.follows());\n  }\n  return ptr;\n}\n\nstatic inline double CalculateSSE(const IndexCluster::CentroidList &cents) {\n  double accum = 0.0;\n  for (const auto &it : cents) {\n    accum += it.score();\n  }\n  return accum;\n}\n\nstatic inline void PurgeCentroids(IndexCluster::CentroidList &cents,\n                                  bool cutting) {\n  size_t index = 0;\n  size_t tamp = cents.size();\n\n  while (index < tamp) {\n    if (cents[index].follows() == 0) {\n      size_t last_index = tamp - 1;\n\n      if (index != last_index) {\n        std::swap(cents[index], cents[last_index]);\n      }\n      tamp = last_index;\n      continue;\n    }\n    ++index;\n  }\n  if (cutting) {\n    cents.resize(tamp);\n  }\n}\n\nint KmeansCluster::init(const IndexMeta &meta, const ailego::Params &params) {\n  meta_ = meta;\n  this->update_params(params);\n\n  return this->init_seeker();\n}\n\nint KmeansCluster::cleanup(void) {\n  features_.reset();\n  shard_cluster_scores_.clear();\n  shard_cluster_features_.clear();\n  shard_cluster_means_.reset();\n  batch_means_.reset();\n  batch_scores_.clear();\n  seeker_->cleanup();\n  return 0;\n}\n\nint KmeansCluster::reset(void) {\n  features_.reset();\n  shard_cluster_scores_.clear();\n  shard_cluster_features_.clear();\n  shard_cluster_means_->clear();\n  batch_means_->clear();\n  batch_scores_.clear();\n  seeker_->reset();\n  return 0;\n}\n\nint KmeansCluster::update(const ailego::Params &params) {\n  this->update_params(params);\n  return 0;\n}\n\nvoid KmeansCluster::suggest(uint32_t k) {\n  cluster_count_ = k;\n}\n\nint KmeansCluster::mount(IndexFeatures::Pointer feats) {\n  if (!feats) {\n    return IndexError_InvalidArgument;\n  }\n  if (!feats->is_matched(meta_)) {\n    return IndexError_Mismatch;\n  }\n\n  // Check dimension\n  auto data_type = meta_.data_type();\n  switch (data_type) {\n    case IndexMeta::DataType::DT_INT4:\n      if (feats->dimension() % 2 != 0) {\n        LOG_ERROR(\n            \"Unsupported feature dimension %zu (dimension of int4 \"\n            \"must be an integer multiple of 2).\",\n            feats->dimension());\n        return IndexError_Mismatch;\n      }\n      break;\n    case IndexMeta::DataType::DT_BINARY32:\n      if (feats->dimension() % 32 != 0) {\n        LOG_ERROR(\n            \"Unsupported feature dimension %zu (dimension of binary32 \"\n            \"must be an integer multiple of 32).\",\n            feats->dimension());\n        return IndexError_Mismatch;\n      }\n      break;\n    case IndexMeta::DataType::DT_BINARY64:\n      if (feats->dimension() % 64 != 0) {\n        LOG_ERROR(\n            \"Unsupported feature dimension %zu (dimension of binary64 \"\n            \"must be an integer multiple of 64).\",\n            feats->dimension());\n        return IndexError_Mismatch;\n      }\n      break;\n    default:\n      break;\n  }\n\n  features_ = std::move(feats);\n  return 0;\n}\n\nint KmeansCluster::cluster(IndexThreads::Pointer threads,\n                           IndexCluster::CentroidList &cents) {\n  ailego::ElapsedTime stamp;\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  if (cents.empty()) {\n    if (cluster_count_ == 0) {\n      LOG_ERROR(\"The count of cluster is unknown.\");\n      return IndexError_NoReady;\n    }\n    this->init_centroids(cluster_count_, &cents);\n  }\n\n  if (batch_) {\n    batch_means_ = NewVectorMeanArray(meta_, cents);\n    batch_scores_.clear();\n    for (const auto &it : cents) {\n      batch_scores_.push_back(it.score());\n    }\n  }\n\n  double cost = 0.0;\n\n  // we need to do clustering and update the centroids' follows, even if\n  // cents.size() == 1. Otherwise, the centroid with empty follows will be\n  // removed if purge_empty enabled\n  for (uint32_t i = 0; (i < max_iterations_) && (cents.size() > 0); ++i) {\n    double new_cost, new_epsilon;\n\n    int result = this->clustering(threads.get(), cents, &new_cost);\n    if (result != 0) {\n      LOG_ERROR(\"(%u) Failed to cluster.\", i + 1);\n      return result;\n    }\n\n    new_epsilon = new_cost - cost;\n    LOG_DEBUG(\"(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> %f = %f\",\n              i, cents.size(), features_->count(),\n              (size_t)stamp.milli_seconds(), cost, new_cost, new_epsilon);\n    stamp.reset();\n\n    new_epsilon = std::abs(new_epsilon);\n    if (new_epsilon < epsilon_) {\n      break;\n    }\n    cost = new_cost;\n  }\n\n  // Purge the empty centroids\n  PurgeCentroids(cents, purge_empty_);\n  return 0;\n}\n\nint KmeansCluster::classify(IndexThreads::Pointer threads,\n                            IndexCluster::CentroidList &cents) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (cents.empty()) {\n    LOG_ERROR(\"The input centroid's list is empty.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  int result = this->build_seeker(cents);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to build the seeker.\");\n    return result;\n  }\n\n  this->update_clusters(threads.get(), cents);\n  this->update_features(threads.get(), cents);\n  return 0;\n}\n\nint KmeansCluster::label(IndexThreads::Pointer threads,\n                         const IndexCluster::CentroidList &cents,\n                         std::vector<uint32_t> *out) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (cents.empty()) {\n    LOG_ERROR(\"The input centroid's list is empty.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  int result = this->build_seeker(cents);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to build the seeker.\");\n    return result;\n  }\n\n  this->update_labels(threads.get(), out);\n  return 0;\n}\n\nbool KmeansCluster::is_valid(void) const {\n  if (!seeker_ || !features_ || !features_->count()) {\n    return false;\n  }\n  return true;\n}\n\nint KmeansCluster::clustering(IndexThreads *threads,\n                              IndexCluster::CentroidList &cents, double *cost) {\n  int result = this->build_seeker(cents);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to build the seeker.\");\n    return result;\n  }\n\n  this->split_clusters(threads, cents);\n  this->update_centroids(threads, cents);\n  *cost = CalculateSSE(cents);\n  return 0;\n}\n\nvoid KmeansCluster::update_params(const ailego::Params &params) {\n  params.get(GENERAL_THREAD_COUNT, &thread_count_);\n  params.get(GENERAL_CLUSTER_COUNT, &cluster_count_);\n  params.get(KMEANS_CLUSTER_COUNT, &cluster_count_);\n  params.get(KMEANS_CLUSTER_SHARD_FACTOR, &shard_factor_);\n  params.get(KMEANS_CLUSTER_EPSILON, &epsilon_);\n  params.get(KMEANS_CLUSTER_MAX_ITERATIONS, &max_iterations_);\n  params.get(KMEANS_CLUSTER_BATCH, &batch_);\n  params.get(KMEANS_CLUSTER_PURGE_EMPTY, &purge_empty_);\n}\n\nint KmeansCluster::init_seeker(void) {\n  seeker_.reset(new (std::nothrow) LinearSeeker);\n  if (!seeker_) {\n    LOG_ERROR(\"Failed to create linear seeker.\");\n    return IndexError_NoMemory;\n  }\n\n  int result = seeker_->init(meta_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize linear seeker.\");\n    return result;\n  }\n\n  return 0;\n}\n\nint KmeansCluster::build_seeker(const IndexCluster::CentroidList &cents) {\n  int result =\n      seeker_->mount(std::make_shared<KmeansCentroidFeatures>(meta_, cents));\n  if (result != 0) {\n    LOG_ERROR(\"Failed to mount features for linear seeker.\");\n    return result;\n  }\n\n  return 0;\n}\n\nbool KmeansCluster::check_centroids(const IndexCluster::CentroidList &cents) {\n  for (const auto &it : cents) {\n    if (it.size() != meta_.element_size()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nvoid KmeansCluster::init_centroids(size_t count,\n                                   IndexCluster::CentroidList *out) {\n  size_t feature_size = features_->element_size();\n  size_t features_count = features_->count();\n  size_t sample_count = std::min<size_t>(count, features_count);\n\n  ailego::Reservoir<size_t> sampler(sample_count);\n  for (size_t i = 0; i < features_count; ++i) {\n    sampler.fill(i);\n  }\n\n  // Save centroids\n  out->reserve(sampler.pool().size());\n  for (auto i : sampler.pool()) {\n    out->emplace_back(features_->element(i), feature_size);\n  }\n}\n\nvoid KmeansCluster::init_containers(size_t shard_count) {\n  if (!shard_cluster_means_) {\n    shard_cluster_means_ = NewVectorMeanArray(meta_);\n  }\n  shard_cluster_means_->clear();\n  shard_cluster_means_->resize(shard_count);\n  shard_cluster_scores_.clear();\n  shard_cluster_scores_.resize(shard_count);\n}\n\nvoid KmeansCluster::init_features_containers(size_t shard_count) {\n  shard_cluster_features_.resize(shard_count);\n  for (auto &features : shard_cluster_features_) {\n    features.clear();\n  }\n}\n\nvoid KmeansCluster::split_clusters(IndexThreads *threads,\n                                   const IndexCluster::CentroidList &cents) {\n  // Initilize containers\n  this->init_containers(threads->count() * cents.size());\n  auto task_group = threads->make_group();\n\n  // Initilize base information\n  size_t features_count = features_->count();\n  size_t shard_count = std::max<size_t>(\n      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);\n  size_t fregment_count = (features_count + shard_count - 1) / shard_count;\n\n  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);\n       ++i) {\n    size_t next_index = index + fregment_count;\n    if (next_index > features_count) {\n      next_index = features_count;\n    }\n\n    // Process in work thread\n    task_group->submit(\n        ailego::Closure::New(this, &KmeansCluster::split_clusters_thread, index,\n                             next_index, threads));\n\n    // Next index\n    index = next_index;\n  }\n  task_group->wait_finish();\n}\n\nvoid KmeansCluster::update_centroids(IndexThreads *threads,\n                                     IndexCluster::CentroidList &cents) {\n  auto task_group = threads->make_group();\n  for (size_t i = 0; i < cents.size(); ++i) {\n    task_group->submit(ailego::Closure::New(\n        this, &KmeansCluster::update_centroid_thread, i, &cents));\n  }\n  task_group->wait_finish();\n}\n\nvoid KmeansCluster::update_clusters(IndexThreads *threads,\n                                    const IndexCluster::CentroidList &cents) {\n  // Initilize containers\n  this->init_features_containers(threads->count() * cents.size());\n  auto task_group = threads->make_group();\n\n  size_t features_count = features_->count();\n  size_t shard_count = std::max<size_t>(\n      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);\n  size_t fregment_count = (features_count + shard_count - 1) / shard_count;\n\n  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);\n       ++i) {\n    size_t next_index = index + fregment_count;\n    if (next_index > features_count) {\n      next_index = features_count;\n    }\n    // Process in work thread\n    task_group->submit(\n        ailego::Closure::New(this, &KmeansCluster::update_cluster_thread, index,\n                             next_index, threads));\n\n    // Next index\n    index = next_index;\n  }\n  task_group->wait_finish();\n}\n\nvoid KmeansCluster::update_features(IndexThreads *threads,\n                                    IndexCluster::CentroidList &cents) {\n  auto task_group = threads->make_group();\n  for (size_t i = 0; i < cents.size(); ++i) {\n    // Process in work thread\n    task_group->submit(ailego::Closure::New(\n        this, &KmeansCluster::update_features_thread, i, &cents));\n  }\n  task_group->wait_finish();\n}\n\nvoid KmeansCluster::update_labels(IndexThreads *threads,\n                                  std::vector<uint32_t> *labels) {\n  size_t features_count = features_->count();\n  size_t shard_count = std::max<size_t>(\n      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);\n  size_t fregment_count = (features_count + shard_count - 1) / shard_count;\n  auto task_group = threads->make_group();\n\n  // Prepare buffer\n  labels->resize(features_count);\n\n  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);\n       ++i) {\n    size_t next_index = index + fregment_count;\n    if (next_index > features_count) {\n      next_index = features_count;\n    }\n\n    // Process in work thread\n    task_group->submit(ailego::Closure::New(\n        this, &KmeansCluster::update_labels_thread, index, next_index, labels));\n\n    // Next index\n    index = next_index;\n  }\n  task_group->wait_finish();\n}\n\nvoid KmeansCluster::split_clusters_thread(size_t index_begin, size_t index_end,\n                                          const IndexThreads *threads) {\n  size_t feature_size = features_->element_size();\n  size_t thread_offset = threads->indexof_this() * seeker_->original()->count();\n\n  for (size_t i = index_begin; i != index_end; ++i) {\n    const void *feat = features_->element(i);\n    LinearSeeker::Document result(0, std::numeric_limits<float>::max());\n\n    // ignore error\n    seeker_->seek(feat, meta_.element_size(), &result);\n\n    size_t sel_column = thread_offset + result.index;\n    shard_cluster_scores_[sel_column] += result.score;\n    shard_cluster_means_->at(sel_column).plus(feat, feature_size);\n  }\n}\n\nvoid KmeansCluster::update_centroid_thread(size_t column,\n                                           IndexCluster::CentroidList *out) {\n  size_t cluster_count = out->size();\n  double cluster_score = 0.0;\n\n  // Create Accumulator\n  std::shared_ptr<VectorMean> accum = NewVectorMean(meta_);\n  if (batch_) {\n    cluster_score += batch_scores_[column];\n    accum->merge(batch_means_->at(column));\n  }\n\n  // Compute the score of centroid\n  for (size_t i = column; i < shard_cluster_scores_.size();\n       i += cluster_count) {\n    cluster_score += shard_cluster_scores_[i];\n    accum->merge(shard_cluster_means_->at(i));\n  }\n\n  // Update centroid\n  IndexCluster::Centroid *centroid = &(out->at(column));\n  centroid->set_score(cluster_score);\n  centroid->set_follows(accum->count());\n  accum->mean(centroid->mutable_buffer());\n}\n\nvoid KmeansCluster::update_cluster_thread(size_t index_begin, size_t index_end,\n                                          const IndexThreads *threads) {\n  size_t thread_offset = threads->indexof_this() * seeker_->original()->count();\n\n  for (size_t i = index_begin; i != index_end; ++i) {\n    const void *feat = features_->element(i);\n    LinearSeeker::Document result(0, std::numeric_limits<float>::max());\n\n    // ignore error\n    seeker_->seek(feat, meta_.element_size(), &result);\n\n    size_t sel_column = thread_offset + result.index;\n    shard_cluster_features_[sel_column].emplace_back(feat);\n  }\n}\n\nvoid KmeansCluster::update_features_thread(size_t column,\n                                           IndexCluster::CentroidList *out) {\n  size_t cluster_count = out->size();\n  size_t cluster_follows = 0u;\n\n  // Compute the follows of cluster\n  for (size_t i = column; i < shard_cluster_features_.size();\n       i += cluster_count) {\n    cluster_follows += shard_cluster_features_[i].size();\n  }\n\n  // Merge all features in cluster\n  std::vector<const void *> &cluster_features =\n      *(out->at(column).mutable_similars());\n  cluster_features.resize(cluster_follows);\n\n  for (size_t i = column, j = 0; i < shard_cluster_features_.size();\n       i += cluster_count) {\n    const std::vector<const void *> &it = shard_cluster_features_[i];\n    std::memcpy(&cluster_features[j], it.data(), it.size() * sizeof(void *));\n    j += it.size();\n  }\n}\n\nvoid KmeansCluster::update_labels_thread(size_t index_begin, size_t index_end,\n                                         std::vector<uint32_t> *labels) {\n  for (size_t i = index_begin; i != index_end; ++i) {\n    const void *feat = features_->element(i);\n    LinearSeeker::Document result(0, std::numeric_limits<float>::max());\n\n    // ignore error\n    seeker_->seek(feat, meta_.element_size(), &result);\n    (*labels)[i] = static_cast<uint32_t>(result.index);\n  }\n}\n\nINDEX_FACTORY_REGISTER_CLUSTER_ALIAS(KmeansCluster, KmeansCluster, false);\nINDEX_FACTORY_REGISTER_CLUSTER_ALIAS(BatchKmeansCluster, KmeansCluster, true);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/linear_seeker.cc",
    "content": "\n// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"linear_seeker.h\"\n\nnamespace zvec {\nnamespace core {\n\nint LinearSeeker::seek(const void *query, size_t len, Document *out) {\n  if (ailego_unlikely(!query || !out || meta_.element_size() != len)) {\n    return IndexError_InvalidArgument;\n  }\n\n  float sel_score = std::numeric_limits<float>::max();\n  uint32_t sel_column = 0;\n  uint32_t total = static_cast<uint32_t>(features_->count());\n\n  for (uint32_t i = 0; i < total; ++i) {\n    float score = 0.0f;\n\n    distance_func_(features_->element(i), query, meta_.dimension(), &score);\n    if (score < sel_score) {\n      sel_score = score;\n      sel_column = i;\n    }\n  }\n\n  out->index = sel_column;\n  out->score = sel_score;\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/linear_seeker.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"seeker.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Linear Seeker\n */\nclass LinearSeeker : public Seeker {\n public:\n  typedef std::shared_ptr<LinearSeeker> Pointer;\n\n  //! Constructor\n  LinearSeeker(void) : meta_(), metric_(), features_() {}\n\n  //! Destructor\n  ~LinearSeeker(void) {}\n\n  //! Initialize Seeker\n  int init(const IndexMeta &meta) override {\n    meta_ = meta;\n\n    metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n    if (!metric_) {\n      LOG_ERROR(\"Create Metric %s failed.\", meta_.metric_name().c_str());\n\n      return IndexError_Unsupported;\n    }\n    int ret = metric_->init(meta_, meta_.metric_params());\n    if (ret != 0) {\n      LOG_ERROR(\"IndexMetric init failed wit ret %d.\", ret);\n\n      return ret;\n    }\n    distance_func_ = metric_->distance_matrix(1, 1);\n    if (!distance_func_) {\n      LOG_ERROR(\"DistanceMatrix function is nullptr.\");\n\n      return IndexError_Unsupported;\n    }\n    return 0;\n  }\n\n  //! Cleanup Seeker\n  int cleanup(void) override {\n    features_.reset();\n    return 0;\n  }\n\n  //! Reset Seeker\n  int reset(void) override {\n    features_.reset();\n    return 0;\n  }\n\n  //! Mount features\n  int mount(IndexFeatures::Pointer feats) override {\n    if (!feats) {\n      return IndexError_InvalidArgument;\n    }\n    if (!feats->is_matched(meta_)) {\n      return IndexError_Mismatch;\n    }\n    features_ = std::move(feats);\n    return 0;\n  }\n\n  //! Seek (TOP 1 Document)\n  int seek(const void *query, size_t len, Document *out) override;\n\n  //! Retrieve the original features\n  IndexFeatures::Pointer original(void) const override {\n    return features_;\n  }\n\n private:\n  IndexMeta meta_{};\n  IndexMetric::Pointer metric_{};\n  IndexFeatures::Pointer features_{};\n  IndexMetric::MatrixDistance distance_func_{nullptr};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/opt_kmeans_cluster.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/algorithm/kmeans.h>\n#include <ailego/container/reservoir.h>\n#include <zvec/core/framework/index_cluster.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"cluster_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Optimize K-Means cluster algorithm\n */\nclass OptKmeansAlgorithm : public IndexCluster {\n public:\n  //! Constructor\n  OptKmeansAlgorithm(void) {}\n\n  //! Destructor\n  virtual ~OptKmeansAlgorithm(void) {}\n\n  //! Initialize Cluster\n  int init(const IndexMeta &meta, const ailego::Params &params);\n\n  //! Mount features\n  virtual int mount(IndexFeatures::Pointer feats);\n\n  //! Suggest dividing to K clusters\n  virtual void suggest(uint32_t k);\n\n  //! Classify\n  virtual int classify(IndexThreads::Pointer threads,\n                       IndexCluster::CentroidList &cents);\n\n  //! Label\n  virtual int label(IndexThreads::Pointer threads,\n                    const IndexCluster::CentroidList &cents,\n                    std::vector<uint32_t> *out);\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents) = 0;\n\n  //! Cleanup Cluster\n  virtual int cleanup(void);\n\n  //! Reset Cluster\n  virtual int reset(void);\n\n  //! Update Cluster\n  virtual int update(const ailego::Params &params);\n\n protected:\n  //! Update parameters\n  void update_params(const ailego::Params &params);\n\n  //! Init Kmeans Algorithm\n  int init_algorithm();\n\n  //! Init Distance function\n  int init_distance_func();\n\n  //! Check Centroids\n  bool check_centroids(const IndexCluster::CentroidList &cents);\n\n  //! Test if it is valid\n  bool is_valid(void) const;\n\n  //! Update Clusters\n  void update_clusters(IndexThreads *threads,\n                       const IndexCluster::CentroidList &cents);\n\n  //! Update Cluster in Thread\n  void update_cluster_thread(size_t index_begin, size_t index_end,\n                             const IndexThreads *threads,\n                             const IndexCluster::CentroidList &cents);\n\n  //! Initialize Shard Features Containers\n  void init_features_containers(size_t shard_count);\n\n  //! Update Clusters' Features\n  void update_features(IndexThreads *threads,\n                       IndexCluster::CentroidList &cents);\n\n  //! Update Cluster's Features in Thread\n  void update_features_thread(size_t column, IndexCluster::CentroidList *out);\n\n  //! Update Labels\n  void update_labels(IndexThreads *threads, std::vector<uint32_t> *labels,\n                     const IndexCluster::CentroidList &cents);\n\n  //! Update Labels in Thread\n  void update_labels_thread(size_t index_begin, size_t index_end,\n                            std::vector<uint32_t> *labels,\n                            const IndexCluster::CentroidList &cents);\n\n  //! Initialize Centroids\n  void init_centroids(size_t count, IndexCluster::CentroidList *out);\n\n protected:\n  uint32_t cluster_count_{0u};\n  uint32_t thread_count_{0u};\n  uint32_t max_iterations_{20u};\n  double epsilon_{std::numeric_limits<float>::epsilon()};\n  float shard_factor_{16.0f};\n  bool purge_empty_{false};\n  bool assumption_free_{false};\n  uint32_t markov_chain_length_{32};\n  IndexMeta meta_{};\n  IndexFeatures::Pointer features_{};\n  std::vector<std::vector<const void *>> shard_cluster_features_{};\n  IndexMetric::MatrixDistance distance_func_{nullptr};\n};\n\nbool OptKmeansAlgorithm::is_valid(void) const {\n  if (!features_ || !features_->count()) {\n    return false;\n  }\n  return true;\n}\n\nbool OptKmeansAlgorithm::check_centroids(\n    const IndexCluster::CentroidList &cents) {\n  for (const auto &it : cents) {\n    if (it.size() != meta_.element_size()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nvoid OptKmeansAlgorithm::update_params(const ailego::Params &params) {\n  params.get(GENERAL_THREAD_COUNT, &thread_count_);\n  params.get(GENERAL_CLUSTER_COUNT, &cluster_count_);\n  params.get(OPTKMEANS_CLUSTER_COUNT, &cluster_count_);\n  params.get(OPTKMEANS_CLUSTER_SHARD_FACTOR, &shard_factor_);\n  params.get(OPTKMEANS_CLUSTER_EPSILON, &epsilon_);\n  params.get(OPTKMEANS_CLUSTER_MAX_ITERATIONS, &max_iterations_);\n  params.get(OPTKMEANS_CLUSTER_PURGE_EMPTY, &purge_empty_);\n  params.get(OPTKMEANS_CLUSTER_MARKOV_CHAIN_LENGTH, &markov_chain_length_);\n  params.get(OPTKMEANS_CLUSTER_ASSUMPTION_FREE, &assumption_free_);\n}\n\nint OptKmeansAlgorithm::init_distance_func() {\n  IndexMetric::Pointer metric_{};\n  metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"Create Metric %s failed.\", meta_.metric_name().c_str());\n    return IndexError_Unsupported;\n  }\n  int ret = metric_->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"IndexMetric init failed wit ret %d.\", ret);\n    return ret;\n  }\n  distance_func_ = metric_->distance_matrix(1, 1);\n  if (!distance_func_) {\n    LOG_ERROR(\"DistanceMatrix function is nullptr.\");\n    return IndexError_Unsupported;\n  }\n  return 0;\n}\n\nvoid OptKmeansAlgorithm::update_clusters(\n    IndexThreads *threads, const IndexCluster::CentroidList &cents) {\n  // Initilize containers\n  this->init_features_containers(threads->count() * cents.size());\n  auto task_group = threads->make_group();\n\n  size_t features_count = features_->count();\n  size_t shard_count = std::max<size_t>(\n      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);\n  size_t fregment_count = (features_count + shard_count - 1) / shard_count;\n\n  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);\n       ++i) {\n    size_t next_index = index + fregment_count;\n    if (next_index > features_count) {\n      next_index = features_count;\n    }\n\n    // Process in work thread·\n    task_group->submit(\n        ailego::Closure::New(this, &OptKmeansAlgorithm::update_cluster_thread,\n                             index, next_index, threads, cents));\n\n    // Next index\n    index = next_index;\n  }\n  task_group->wait_finish();\n}\n\nvoid OptKmeansAlgorithm::update_cluster_thread(\n    size_t index_begin, size_t index_end, const IndexThreads *threads,\n    const IndexCluster::CentroidList &cents) {\n  size_t thread_offset = threads->indexof_this() * cents.size();\n\n  for (size_t i = index_begin; i != index_end; ++i) {\n    const void *feat = features_->element(i);\n    uint32_t sel_index = 0;\n    float sel_score = std::numeric_limits<float>::max();\n\n    // todo: get min distance\n    uint32_t total = static_cast<uint32_t>(cents.size());\n    for (uint32_t j = 0; j < total; ++j) {\n      float score = 0.0f;\n\n      distance_func_(cents[j].feature(), feat, meta_.dimension(), &score);\n      if (score < sel_score) {\n        sel_score = score;\n        sel_index = j;\n      }\n    }\n\n    size_t sel_column = thread_offset + sel_index;\n    shard_cluster_features_[sel_column].emplace_back(feat);\n  }\n}\n\nvoid OptKmeansAlgorithm::init_features_containers(size_t shard_count) {\n  shard_cluster_features_.resize(shard_count);\n  for (auto &features : shard_cluster_features_) {\n    features.clear();\n  }\n}\n\nvoid OptKmeansAlgorithm::update_features(IndexThreads *threads,\n                                         IndexCluster::CentroidList &cents) {\n  auto task_group = threads->make_group();\n  for (size_t i = 0; i < cents.size(); ++i) {\n    // Process in work thread\n    task_group->submit(ailego::Closure::New(\n        this, &OptKmeansAlgorithm::update_features_thread, i, &cents));\n  }\n  task_group->wait_finish();\n}\n\nvoid OptKmeansAlgorithm::update_labels(\n    IndexThreads *threads, std::vector<uint32_t> *labels,\n    const IndexCluster::CentroidList &cents) {\n  size_t features_count = features_->count();\n  size_t shard_count = std::max<size_t>(\n      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);\n  size_t fregment_count = (features_count + shard_count - 1) / shard_count;\n  auto task_group = threads->make_group();\n\n  // Prepare buffer\n  labels->resize(features_count);\n\n  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);\n       ++i) {\n    size_t next_index = index + fregment_count;\n    if (next_index > features_count) {\n      next_index = features_count;\n    }\n\n    // Process in work thread\n    task_group->submit(\n        ailego::Closure::New(this, &OptKmeansAlgorithm::update_labels_thread,\n                             index, next_index, labels, cents));\n\n    // Next index\n    index = next_index;\n  }\n  task_group->wait_finish();\n}\n\nvoid OptKmeansAlgorithm::update_labels_thread(\n    size_t index_begin, size_t index_end, std::vector<uint32_t> *labels,\n    const IndexCluster::CentroidList &cents) {\n  for (size_t i = index_begin; i != index_end; ++i) {\n    const void *feat = features_->element(i);\n\n    uint32_t sel_index = 0;\n    float sel_score = std::numeric_limits<float>::max();\n\n    // todo: get min distance\n    uint32_t total = static_cast<uint32_t>(cents.size());\n    for (uint32_t j = 0; j < total; ++j) {\n      float score = 0.0f;\n\n      distance_func_(cents[j].feature(), feat, meta_.dimension(), &score);\n      if (score < sel_score) {\n        sel_score = score;\n        sel_index = j;\n      }\n    }\n\n    (*labels)[i] = static_cast<uint32_t>(sel_index);\n  }\n}\n\nvoid OptKmeansAlgorithm::init_centroids(size_t count,\n                                        IndexCluster::CentroidList *out) {\n  // Just resize, because the get random centroid step is done by cluster_once\n  out->resize(count);\n}\n\nvoid OptKmeansAlgorithm::update_features_thread(\n    size_t column, IndexCluster::CentroidList *out) {\n  size_t cluster_count = out->size();\n  size_t cluster_follows = 0u;\n\n  // Compute the follows of cluster\n  for (size_t i = column; i < shard_cluster_features_.size();\n       i += cluster_count) {\n    cluster_follows += shard_cluster_features_[i].size();\n  }\n\n  // Merge all features in cluster\n  std::vector<const void *> &cluster_features =\n      *(out->at(column).mutable_similars());\n  cluster_features.resize(cluster_follows);\n\n  for (size_t i = column, j = 0; i < shard_cluster_features_.size();\n       i += cluster_count) {\n    const std::vector<const void *> &it = shard_cluster_features_[i];\n    std::memcpy(&cluster_features[j], it.data(), it.size() * sizeof(void *));\n    j += it.size();\n  }\n}\n\nstatic inline void PurgeCentroids(IndexCluster::CentroidList &cents,\n                                  bool cutting) {\n  size_t index = 0;\n  size_t tamp = cents.size();\n\n  while (index < tamp) {\n    if (cents[index].follows() == 0) {\n      size_t last_index = tamp - 1;\n\n      if (index != last_index) {\n        std::swap(cents[index], cents[last_index]);\n      }\n      tamp = last_index;\n      continue;\n    }\n    ++index;\n  }\n  if (cutting) {\n    cents.resize(tamp);\n  }\n}\n\nint OptKmeansAlgorithm::init(const IndexMeta &meta,\n                             const ailego::Params &params) {\n  meta_ = meta;\n  this->update_params(params);\n\n  return init_distance_func();\n}\n\nint OptKmeansAlgorithm::mount(IndexFeatures::Pointer feats) {\n  if (!feats) {\n    return IndexError_InvalidArgument;\n  }\n  if (!feats->is_matched(meta_)) {\n    return IndexError_Mismatch;\n  }\n\n  // Check dimension\n  auto type_ = meta_.data_type();\n  switch (type_) {\n    case IndexMeta::DataType::DT_INT4:\n      if (feats->dimension() % 8 != 0) {\n        LOG_ERROR(\n            \"Unsupported feature dimension %zu (dimension of int4 \"\n            \"must be an integer multiple of 8).\",\n            feats->dimension());\n        return IndexError_Mismatch;\n      }\n      break;\n    case IndexMeta::DataType::DT_INT8:\n      if (feats->dimension() % 4 != 0) {\n        LOG_ERROR(\n            \"Unsupported feature dimension %zu (dimension of int8 \"\n            \"must be an integer multiple of 4).\",\n            feats->dimension());\n        return IndexError_Mismatch;\n      }\n      break;\n    case IndexMeta::DataType::DT_BINARY32:\n    case IndexMeta::DataType::DT_BINARY64:\n      if (feats->dimension() % 32 != 0) {\n        LOG_ERROR(\n            \"Unsupported feature dimension %zu (dimension of binary \"\n            \"must be an integer multiple of 32).\",\n            feats->dimension());\n        return IndexError_Mismatch;\n      }\n      break;\n    default:\n      break;\n  }\n\n  features_ = std::move(feats);\n  return 0;\n}\n\nvoid OptKmeansAlgorithm::suggest(uint32_t k) {\n  cluster_count_ = k;\n}\n\nint OptKmeansAlgorithm::classify(IndexThreads::Pointer threads,\n                                 IndexCluster::CentroidList &cents) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (cents.empty()) {\n    LOG_ERROR(\"The input centroid's list is empty.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  this->update_clusters(threads.get(), cents);\n  this->update_features(threads.get(), cents);\n  return 0;\n}\n\nint OptKmeansAlgorithm::label(IndexThreads::Pointer threads,\n                              const IndexCluster::CentroidList &cents,\n                              std::vector<uint32_t> *out) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (cents.empty()) {\n    LOG_ERROR(\"The input centroid's list is empty.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  this->update_labels(threads.get(), out, cents);\n  return 0;\n}\n\nint OptKmeansAlgorithm::update(const ailego::Params &params) {\n  this->update_params(params);\n  // algorithm_->reset(cluster_count_);\n  return 0;\n}\n\nint OptKmeansAlgorithm::reset(void) {\n  features_.reset();\n  shard_cluster_features_.clear();\n\n  return 0;\n}\n\nint OptKmeansAlgorithm::cleanup(void) {\n  features_.reset();\n  shard_cluster_features_.clear();\n\n  return 0;\n}\n\n\n/*! Numerical K-Means cluster algorithm\n */\ntemplate <typename T>\nclass NumericalKmeansAlgorithm : public OptKmeansAlgorithm {\n public:\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(ailego::IsArithmetic<ValueType>::value,\n                \"ValueType must be arithmetic\");\n\n  //! Constructor\n  NumericalKmeansAlgorithm(void) {}\n\n  //! Destructor\n  virtual ~NumericalKmeansAlgorithm(void) {}\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n protected:\n  void update_centroids(\n      IndexCluster::CentroidList &cents,\n      const ailego::NumericalKmeans<T, IndexThreads> &algorithm);\n};\n\ntemplate <typename T>\nvoid NumericalKmeansAlgorithm<T>::update_centroids(\n    IndexCluster::CentroidList &cents,\n    const ailego::NumericalKmeans<T, IndexThreads> &algorithm) {\n  this->init_centroids(algorithm.centroids().count(), &cents);\n  for (size_t i = 0; i < cents.size(); ++i) {\n    IndexCluster::Centroid *centroid = &(cents.at(i));\n    centroid->set_score(algorithm.context().clusters()[i].cost());\n    centroid->set_follows(algorithm.context().clusters()[i].count());\n    centroid->set_feature(algorithm.centroids()[i],\n                          meta_.dimension() * sizeof(T));\n  }\n}\n\ntemplate <typename T>\nint NumericalKmeansAlgorithm<T>::cluster(IndexThreads::Pointer threads,\n                                         IndexCluster::CentroidList &cents) {\n  ailego::ElapsedTime stamp;\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  // get cluster algorithm\n  size_t centroid_count =\n      cents.empty()\n          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))\n          : cents.size();\n  if (centroid_count == 0) {\n    LOG_ERROR(\"The count of cluster is unknown.\");\n    return IndexError_NoReady;\n  }\n  ailego::NumericalKmeans<T, IndexThreads> algorithm(centroid_count,\n                                                     meta_.dimension());\n\n  // mount features into algorithm\n  auto features_count = features_->count();\n  auto dim = meta_.dimension();\n\n  algorithm.feature_matrix_reserve(features_count);\n\n  for (size_t i = 0; i < features_count; ++i) {\n    auto vec = reinterpret_cast<const T *>(features_->element(i));\n    algorithm.append(vec, dim);\n  }\n\n  if (!cents.empty()) {\n    auto centroids = algorithm.mutable_centroids();\n    centroids->reserve(cents.size());\n    for (const auto &it : cents) {\n      centroids->append(reinterpret_cast<const T *>(it.feature()),\n                        meta_.dimension());\n    }\n  } else {\n    ailego::Kmc2CentroidsGenerator<\n        ailego::NumericalKmeans<ValueType, IndexThreads>, IndexThreads>\n        g;\n    g.set_chain_length(markov_chain_length_);\n    g.set_assumption_free(assumption_free_);\n    algorithm.init_centroids(*threads, g);\n  }\n\n  double cost = 0.0;\n\n  for (uint32_t i = 0; i < max_iterations_; ++i) {\n    double old_cost, new_epsilon;\n    old_cost = cost;\n\n    bool result = algorithm.cluster_once(*threads, &cost);\n    if (result != true) {\n      LOG_ERROR(\"(%u) Failed to cluster.\", i + 1);\n      return -1;\n    }\n\n    new_epsilon = std::abs(cost - old_cost);\n    LOG_DEBUG(\"(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> %f = %f\",\n              i, algorithm.centroids().count(), features_->count(),\n              (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);\n    stamp.reset();\n\n    if (new_epsilon < epsilon_) {\n      break;\n    }\n  }\n\n  // update_centroids(cents);\n  update_centroids(cents, algorithm);\n\n  // Purge the empty centroids\n  PurgeCentroids(cents, purge_empty_);\n  return 0;\n}\n\n/*! Nibble K-Means cluster algorithm\n */\ntemplate <typename T>\nclass NibbleKmeansAlgorithm : public OptKmeansAlgorithm {\n public:\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(ailego::IsArithmetic<ValueType>::value,\n                \"ValueType must be arithmetic\");\n\n  //! Constructor\n  NibbleKmeansAlgorithm(void) {}\n\n  //! Destructor\n  virtual ~NibbleKmeansAlgorithm(void) {}\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n protected:\n  //! update centroids\n  void update_centroids(IndexCluster::CentroidList &cents,\n                        const ailego::NibbleKmeans<T, IndexThreads> &algorithm);\n};\n\ntemplate <typename T>\nvoid NibbleKmeansAlgorithm<T>::update_centroids(\n    IndexCluster::CentroidList &cents,\n    const ailego::NibbleKmeans<T, IndexThreads> &algorithm) {\n  this->init_centroids(algorithm.centroids().count(), &cents);\n  for (size_t i = 0; i < cents.size(); ++i) {\n    IndexCluster::Centroid *centroid = &(cents.at(i));\n    centroid->set_score(algorithm.context().clusters()[i].cost());\n    centroid->set_follows(algorithm.context().clusters()[i].count());\n    centroid->set_feature(algorithm.centroids()[i], (meta_.dimension() >> 1));\n  }\n}\n\ntemplate <typename T>\nint NibbleKmeansAlgorithm<T>::cluster(IndexThreads::Pointer threads,\n                                      IndexCluster::CentroidList &cents) {\n  ailego::ElapsedTime stamp;\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  // get cluster algorithm\n  size_t centroid_count =\n      cents.empty()\n          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))\n          : cents.size();\n  if (centroid_count == 0) {\n    LOG_ERROR(\"The count of cluster is unknown.\");\n    return IndexError_NoReady;\n  }\n  ailego::NibbleKmeans<T, IndexThreads> algorithm(centroid_count,\n                                                  meta_.dimension());\n\n  // mount features into algorithm\n  auto features_count = features_->count();\n  auto dim = meta_.dimension();\n  for (size_t i = 0; i < features_count; ++i) {\n    auto vec = reinterpret_cast<const typename std::make_unsigned<T>::type *>(\n        features_->element(i));\n    algorithm.append(vec, dim);\n  }\n\n  if (!cents.empty()) {\n    auto centroids = algorithm.mutable_centroids();\n    centroids->reserve(cents.size());\n    for (const auto &it : cents) {\n      centroids->append(\n          reinterpret_cast<const typename std::make_unsigned<T>::type *>(\n              it.feature()),\n          size_t(meta_.dimension()));\n    }\n  } else {\n    ailego::Kmc2CentroidsGenerator<\n        ailego::NibbleKmeans<ValueType, IndexThreads>, IndexThreads>\n        g;\n    g.set_chain_length(markov_chain_length_);\n    g.set_assumption_free(assumption_free_);\n    algorithm.init_centroids(*threads, g);\n  }\n\n  double cost = 0.0;\n\n  for (uint32_t i = 0; i < max_iterations_; ++i) {\n    double old_cost, new_epsilon;\n    old_cost = cost;\n\n    bool result = algorithm.cluster_once(*threads, &cost);\n    if (result != true) {\n      LOG_ERROR(\"(%u) Failed to cluster.\", i + 1);\n      return -1;\n    }\n\n    new_epsilon = std::abs(cost - old_cost);\n    LOG_DEBUG(\n        \"(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> \"\n        \"%f = %f\",\n        i, algorithm.centroids().count(), features_->count(),\n        (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);\n    stamp.reset();\n\n    if (new_epsilon < epsilon_) {\n      break;\n    }\n  }\n\n  // update centroids\n  update_centroids(cents, algorithm);\n\n  // Purge the empty centroids\n  PurgeCentroids(cents, purge_empty_);\n  return 0;\n}\n\n/*! Binary K-Means cluster algorithm\n */\ntemplate <typename T>\nclass BinaryKmeansAlgorithm : public OptKmeansAlgorithm {\n public:\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(ailego::IsArithmetic<ValueType>::value,\n                \"ValueType must be arithmetic\");\n\n  //! Constructor\n  BinaryKmeansAlgorithm(void) {}\n\n  //! Destructor\n  virtual ~BinaryKmeansAlgorithm(void) {}\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n protected:\n  //! update centroids\n  void update_centroids(IndexCluster::CentroidList &cents,\n                        const ailego::BinaryKmeans<T, IndexThreads> &algorithm);\n};\n\ntemplate <typename T>\nvoid BinaryKmeansAlgorithm<T>::update_centroids(\n    IndexCluster::CentroidList &cents,\n    const ailego::BinaryKmeans<T, IndexThreads> &algorithm) {\n  this->init_centroids(algorithm.centroids().count(), &cents);\n  for (size_t i = 0; i < cents.size(); ++i) {\n    IndexCluster::Centroid *centroid = &(cents.at(i));\n    centroid->set_score(algorithm.context().clusters()[i].cost());\n    centroid->set_follows(algorithm.context().clusters()[i].count());\n    centroid->set_feature(algorithm.centroids()[i], (meta_.dimension() >> 3));\n  }\n}\n\ntemplate <typename T>\nint BinaryKmeansAlgorithm<T>::cluster(IndexThreads::Pointer threads,\n                                      IndexCluster::CentroidList &cents) {\n  ailego::ElapsedTime stamp;\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  // get cluster algorithm\n  size_t centroid_count =\n      cents.empty()\n          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))\n          : cents.size();\n  if (centroid_count == 0) {\n    LOG_ERROR(\"The count of cluster is unknown.\");\n    return IndexError_NoReady;\n  }\n  ailego::BinaryKmeans<T, IndexThreads> algorithm(centroid_count,\n                                                  meta_.dimension());\n\n  // mount features into algorithm\n  auto features_count = features_->count();\n  auto dim = meta_.dimension();\n  for (size_t i = 0; i < features_count; ++i) {\n    auto vec = reinterpret_cast<const T *>(features_->element(i));\n    algorithm.append(vec, dim);\n  }\n\n  if (!cents.empty()) {\n    auto centroids = algorithm.mutable_centroids();\n    centroids->reserve(cents.size());\n    for (const auto &it : cents) {\n      centroids->append(reinterpret_cast<const T *>(it.feature()),\n                        meta_.dimension());\n    }\n  } else {\n    ailego::Kmc2CentroidsGenerator<\n        ailego::BinaryKmeans<ValueType, IndexThreads>, IndexThreads>\n        g;\n    g.set_chain_length(markov_chain_length_);\n    g.set_assumption_free(assumption_free_);\n    algorithm.init_centroids(*threads, g);\n  }\n\n  double cost = 0.0;\n\n  for (uint32_t i = 0; i < max_iterations_; ++i) {\n    double old_cost, new_epsilon;\n    old_cost = cost;\n\n    bool result = algorithm.cluster_once(*threads, &cost);\n    if (result != true) {\n      LOG_ERROR(\"(%u) Failed to cluster.\", i + 1);\n      return -1;\n    }\n\n    new_epsilon = std::abs(cost - old_cost);\n    LOG_DEBUG(\n        \"(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> \"\n        \"%f = %f\",\n        i, algorithm.centroids().count(), features_->count(),\n        (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);\n    stamp.reset();\n\n    if (new_epsilon < epsilon_) {\n      break;\n    }\n  }\n\n  // update centroids\n  update_centroids(cents, algorithm);\n\n  // Purge the empty centroids\n  PurgeCentroids(cents, purge_empty_);\n  return 0;\n}\n\n/*! Numerical K-Means cluster algorithm\n */\ntemplate <typename T>\nclass NumericalInnerProductKmeansAlgorithm : public OptKmeansAlgorithm {\n public:\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(ailego::IsArithmetic<ValueType>::value,\n                \"ValueType must be arithmetic\");\n\n  //! Constructor\n  NumericalInnerProductKmeansAlgorithm(void) {}\n\n  //! Destructor\n  virtual ~NumericalInnerProductKmeansAlgorithm(void) {}\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n protected:\n  void update_centroids(\n      IndexCluster::CentroidList &cents,\n      const ailego::NumericalInnerProductKmeans<T, IndexThreads> &algorithm);\n};\n\ntemplate <typename T>\nvoid NumericalInnerProductKmeansAlgorithm<T>::update_centroids(\n    IndexCluster::CentroidList &cents,\n    const ailego::NumericalInnerProductKmeans<T, IndexThreads> &algorithm) {\n  this->init_centroids(algorithm.centroids().count(), &cents);\n  for (size_t i = 0; i < cents.size(); ++i) {\n    IndexCluster::Centroid *centroid = &(cents.at(i));\n    centroid->set_score(algorithm.context().clusters()[i].cost());\n    centroid->set_follows(algorithm.context().clusters()[i].count());\n    centroid->set_feature(algorithm.centroids()[i],\n                          meta_.dimension() * sizeof(T));\n  }\n}\n\ntemplate <typename T>\nint NumericalInnerProductKmeansAlgorithm<T>::cluster(\n    IndexThreads::Pointer threads, IndexCluster::CentroidList &cents) {\n  ailego::ElapsedTime stamp;\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  // get cluster algorithm\n  size_t centroid_count =\n      cents.empty()\n          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))\n          : cents.size();\n  if (centroid_count == 0) {\n    LOG_ERROR(\"The count of cluster is unknown.\");\n    return IndexError_NoReady;\n  }\n  ailego::NumericalInnerProductKmeans<T, IndexThreads> algorithm(\n      centroid_count, meta_.dimension(), true);\n\n  // mount features into algorithm\n  auto features_count = features_->count();\n  auto dim = meta_.dimension();\n\n  algorithm.feature_matrix_reserve(features_count);\n\n  for (size_t i = 0; i < features_count; ++i) {\n    auto vec = reinterpret_cast<const T *>(features_->element(i));\n    algorithm.append(vec, dim);\n  }\n\n  if (!cents.empty()) {\n    auto centroids = algorithm.mutable_centroids();\n    centroids->reserve(cents.size());\n    for (const auto &it : cents) {\n      centroids->append(reinterpret_cast<const T *>(it.feature()),\n                        meta_.dimension());\n    }\n  } else {\n    ailego::Kmc2CentroidsGenerator<\n        ailego::NumericalInnerProductKmeans<ValueType, IndexThreads>,\n        IndexThreads>\n        g;\n    g.set_chain_length(markov_chain_length_);\n    g.set_assumption_free(assumption_free_);\n    algorithm.init_centroids(*threads, g);\n  }\n\n  double cost = 0.0;\n\n  for (uint32_t i = 0; i < max_iterations_; ++i) {\n    double old_cost, new_epsilon;\n    old_cost = cost;\n\n    bool result = algorithm.cluster_once(*threads, &cost);\n    if (result != true) {\n      LOG_ERROR(\"(%u) Failed to cluster.\", i + 1);\n      return -1;\n    }\n\n    new_epsilon = std::abs(cost - old_cost);\n    LOG_DEBUG(\"(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> %f = %f\",\n              i, algorithm.centroids().count(), features_->count(),\n              (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);\n    stamp.reset();\n\n    if (new_epsilon < epsilon_) {\n      break;\n    }\n  }\n\n  // update_centroids(cents);\n  update_centroids(cents, algorithm);\n\n  // Purge the empty centroids\n  PurgeCentroids(cents, purge_empty_);\n  return 0;\n}\n\n/*! Nibble Inner Product K-Means cluster algorithm\n */\ntemplate <typename T>\nclass NibbleInnerProductKmeansAlgorithm : public OptKmeansAlgorithm {\n public:\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  // Check supporting type\n  static_assert(ailego::IsArithmetic<ValueType>::value,\n                \"ValueType must be arithmetic\");\n\n  //! Constructor\n  NibbleInnerProductKmeansAlgorithm(void) {}\n\n  //! Destructor\n  virtual ~NibbleInnerProductKmeansAlgorithm(void) {}\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n protected:\n  //! update centroids\n  void update_centroids(\n      IndexCluster::CentroidList &cents,\n      const ailego::NibbleInnerProductKmeans<T, IndexThreads> &algorithm);\n};\n\ntemplate <typename T>\nvoid NibbleInnerProductKmeansAlgorithm<T>::update_centroids(\n    IndexCluster::CentroidList &cents,\n    const ailego::NibbleInnerProductKmeans<T, IndexThreads> &algorithm) {\n  this->init_centroids(algorithm.centroids().count(), &cents);\n  for (size_t i = 0; i < cents.size(); ++i) {\n    IndexCluster::Centroid *centroid = &(cents.at(i));\n    centroid->set_score(algorithm.context().clusters()[i].cost());\n    centroid->set_follows(algorithm.context().clusters()[i].count());\n    centroid->set_feature(algorithm.centroids()[i], (meta_.dimension() >> 1));\n  }\n}\n\ntemplate <typename T>\nint NibbleInnerProductKmeansAlgorithm<T>::cluster(\n    IndexThreads::Pointer threads, IndexCluster::CentroidList &cents) {\n  ailego::ElapsedTime stamp;\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  // get cluster algorithm\n  size_t centroid_count =\n      cents.empty()\n          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))\n          : cents.size();\n  if (centroid_count == 0) {\n    LOG_ERROR(\"The count of cluster is unknown.\");\n    return IndexError_NoReady;\n  }\n  ailego::NibbleInnerProductKmeans<T, IndexThreads> algorithm(\n      centroid_count, meta_.dimension());\n\n  // mount features into algorithm\n  auto features_count = features_->count();\n  auto dim = meta_.dimension();\n  for (size_t i = 0; i < features_count; ++i) {\n    auto vec = reinterpret_cast<const typename std::make_unsigned<T>::type *>(\n        features_->element(i));\n    algorithm.append(vec, dim);\n  }\n\n  if (!cents.empty()) {\n    auto centroids = algorithm.mutable_centroids();\n    centroids->reserve(cents.size());\n    for (const auto &it : cents) {\n      centroids->append(\n          reinterpret_cast<const typename std::make_unsigned<T>::type *>(\n              it.feature()),\n          size_t(meta_.dimension()));\n    }\n  } else {\n    ailego::Kmc2CentroidsGenerator<\n        ailego::NibbleInnerProductKmeans<ValueType, IndexThreads>, IndexThreads>\n        g;\n    g.set_chain_length(markov_chain_length_);\n    g.set_assumption_free(assumption_free_);\n    algorithm.init_centroids(*threads, g);\n  }\n\n  double cost = 0.0;\n\n  for (uint32_t i = 0; i < max_iterations_; ++i) {\n    double old_cost, new_epsilon;\n    old_cost = cost;\n\n    bool result = algorithm.cluster_once(*threads, &cost);\n    if (result != true) {\n      LOG_ERROR(\"(%u) Failed to cluster.\", i + 1);\n      return -1;\n    }\n\n    new_epsilon = std::abs(cost - old_cost);\n    LOG_DEBUG(\n        \"(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> \"\n        \"%f = %f\",\n        i, algorithm.centroids().count(), features_->count(),\n        (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);\n    stamp.reset();\n\n    if (new_epsilon < epsilon_) {\n      break;\n    }\n  }\n\n  // update centroids\n  update_centroids(cents, algorithm);\n\n  // Purge the empty centroids\n  PurgeCentroids(cents, purge_empty_);\n  return 0;\n}\n\n/*! Kmeans Cluster\n */\nclass OptKmeansCluster : public IndexCluster {\n public:\n  //! Constructor\n  OptKmeansCluster(void) {}\n\n  //! Destructor\n  virtual ~OptKmeansCluster(void) {}\n\n  //! Initialize Cluster\n  virtual int init(const IndexMeta &meta, const ailego::Params &params);\n\n  //! Cleanup Cluster\n  virtual int cleanup(void);\n\n  //! Reset Cluster\n  virtual int reset(void);\n\n  //! Update Cluster\n  virtual int update(const ailego::Params &params);\n\n  //! Suggest dividing to K clusters\n  virtual void suggest(uint32_t k);\n\n  //! Mount features\n  virtual int mount(IndexFeatures::Pointer feats);\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n  //! Classify\n  virtual int classify(IndexThreads::Pointer threads,\n                       IndexCluster::CentroidList &cents);\n\n  //! Label\n  virtual int label(IndexThreads::Pointer threads,\n                    const IndexCluster::CentroidList &cents,\n                    std::vector<uint32_t> *out);\n\n protected:\n  //! Members\n  IndexCluster::Pointer algorithm_{};\n};\n\n//! Cluster\nint OptKmeansCluster::cluster(IndexThreads::Pointer threads,\n                              IndexCluster::CentroidList &cents) {\n  return algorithm_->cluster(std::move(threads), cents);\n}\n\n//! Classify\nint OptKmeansCluster::classify(IndexThreads::Pointer threads,\n                               IndexCluster::CentroidList &cents) {\n  return algorithm_->classify(std::move(threads), cents);\n}\n\n//! Label\nint OptKmeansCluster::label(IndexThreads::Pointer threads,\n                            const IndexCluster::CentroidList &cents,\n                            std::vector<uint32_t> *out) {\n  return algorithm_->label(std::move(threads), cents, out);\n}\n\n//! Update Cluster\nint OptKmeansCluster::update(const ailego::Params &params) {\n  return algorithm_->update(params);\n}\n\n//! Reset Cluster\nint OptKmeansCluster::reset(void) {\n  return algorithm_->reset();\n}\n\n//! Cleanup Cluster\nint OptKmeansCluster::cleanup(void) {\n  return algorithm_->cleanup();\n}\n\n//! Suggest dividing to K clusters\nvoid OptKmeansCluster::suggest(uint32_t k) {\n  algorithm_->suggest(k);\n}\n\nint OptKmeansCluster::mount(IndexFeatures::Pointer feats) {\n  return algorithm_->mount(feats);\n}\n\nint OptKmeansCluster::init(const IndexMeta &meta,\n                           const ailego::Params &params) {\n  auto type_ = meta.data_type();\n\n  if (meta.metric_name() == \"InnerProduct\" || meta.metric_name() == \"Cosine\") {\n    switch (type_) {\n      case IndexMeta::DataType::DT_FP16: {\n        algorithm_.reset(\n            new (std::nothrow)\n                NumericalInnerProductKmeansAlgorithm<ailego::Float16>);\n        break;\n      }\n      case IndexMeta::DataType::DT_FP32: {\n        algorithm_.reset(new (std::nothrow)\n                             NumericalInnerProductKmeansAlgorithm<float>);\n        break;\n      }\n      case IndexMeta::DataType::DT_FP64: {\n        algorithm_.reset(new (std::nothrow)\n                             NumericalInnerProductKmeansAlgorithm<double>);\n        break;\n      }\n      case IndexMeta::DataType::DT_INT8: {\n        algorithm_.reset(new (std::nothrow)\n                             NumericalInnerProductKmeansAlgorithm<int8_t>);\n        break;\n      }\n      case IndexMeta::DataType::DT_INT16: {\n        algorithm_.reset(new (std::nothrow)\n                             NumericalInnerProductKmeansAlgorithm<int16_t>);\n        break;\n      }\n      case IndexMeta::DataType::DT_INT4: {\n        algorithm_.reset(new (std::nothrow)\n                             NibbleInnerProductKmeansAlgorithm<int32_t>);\n        break;\n      }\n      default: {\n        LOG_ERROR(\"Unsupported feature types %d.\", type_);\n        return IndexError_Mismatch;\n      }\n    }\n  } else {\n    switch (type_) {\n      case IndexMeta::DataType::DT_FP16: {\n        algorithm_.reset(new (std::nothrow)\n                             NumericalKmeansAlgorithm<ailego::Float16>);\n        break;\n      }\n      case IndexMeta::DataType::DT_FP32: {\n        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<float>);\n        break;\n      }\n      case IndexMeta::DataType::DT_FP64: {\n        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<double>);\n        break;\n      }\n      case IndexMeta::DataType::DT_INT8: {\n        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<int8_t>);\n        break;\n      }\n      case IndexMeta::DataType::DT_INT16: {\n        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<int16_t>);\n        break;\n      }\n      case IndexMeta::DataType::DT_INT4: {\n        algorithm_.reset(new (std::nothrow) NibbleKmeansAlgorithm<int32_t>);\n        break;\n      }\n      // TODO\n      case IndexMeta::DataType::DT_BINARY32: {\n        algorithm_.reset(new (std::nothrow) BinaryKmeansAlgorithm<uint32_t>);\n        break;\n      }\n\n#if defined(AILEGO_M64)\n      case IndexMeta::DataType::DT_BINARY64: {\n        algorithm_.reset(new (std::nothrow) BinaryKmeansAlgorithm<uint64_t>);\n        break;\n      }\n#endif  // AILEGO_M64\n\n      default: {\n        LOG_ERROR(\"Unsupported feature types %d.\", type_);\n        return IndexError_Mismatch;\n      }\n    }\n  }\n\n  algorithm_->init(meta, params);\n\n  return 0;\n}\n\nINDEX_FACTORY_REGISTER_CLUSTER(OptKmeansCluster);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/seeker.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_framework.h>\n\nnamespace zvec {\nnamespace core {\n\nclass Seeker {\n public:\n  struct Document {\n    uint32_t index;\n    float score;\n\n    //! Constructor\n    Document(void) : index(0), score(0.0f) {}\n\n    //! Constructor\n    Document(uint32_t i, float v) : index(i), score(v) {}\n\n    //! Constructor\n    Document(const Document &rhs) : index(rhs.index), score(rhs.score) {}\n\n    //! Assignment\n    Document &operator=(const Document &rhs) {\n      index = rhs.index;\n      score = rhs.score;\n      return *this;\n    }\n\n    //! Less than\n    bool operator<(const Document &rhs) const {\n      return (this->score < rhs.score);\n    }\n\n    //! Greater than\n    bool operator>(const Document &rhs) const {\n      return (this->score > rhs.score);\n    }\n  };\n\n public:\n  //! Destructor\n  virtual ~Seeker(void) {}\n\n  virtual int init(const IndexMeta &meta) = 0;\n\n  virtual int cleanup(void) = 0;\n\n  virtual int reset(void) = 0;\n\n  virtual int mount(IndexFeatures::Pointer feats) = 0;\n\n  virtual int seek(const void *query, size_t len, Document *out) = 0;\n\n  virtual IndexFeatures::Pointer original(void) const = 0;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/stratified_cluster.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_cluster.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"cluster_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Stratified Cluster\n */\nclass StratifiedCluster : public IndexCluster {\n public:\n  //! Constructor\n  StratifiedCluster(void) {}\n\n  //! Destructor\n  virtual ~StratifiedCluster(void) {}\n\n  //! Initialize Cluster\n  virtual int init(const IndexMeta &meta, const ailego::Params &params) {\n    meta_ = meta;\n    this->update_params(params);\n    return 0;\n  }\n\n  //! Cleanup Cluster\n  virtual int cleanup(void) {\n    features_.reset();\n    return 0;\n  }\n\n  //! Reset Cluster\n  virtual int reset(void) {\n    features_.reset();\n    return 0;\n  }\n\n  //! Update Cluster\n  virtual int update(const ailego::Params &params) {\n    this->update_params(params);\n    return 0;\n  }\n\n  //! Suggest dividing to K clusters\n  virtual void suggest(uint32_t k) {\n    cluster_count_ = k;\n  }\n\n  //! Mount features\n  virtual int mount(IndexFeatures::Pointer feats) {\n    if (!feats) {\n      return IndexError_InvalidArgument;\n    }\n    if (!feats->is_matched(meta_)) {\n      return IndexError_Mismatch;\n    }\n    features_ = std::move(feats);\n    return 0;\n  }\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads,\n                      IndexCluster::CentroidList &cents);\n\n  //! Classify\n  virtual int classify(IndexThreads::Pointer threads,\n                       IndexCluster::CentroidList &cents);\n\n  //! Label\n  virtual int label(IndexThreads::Pointer threads,\n                    const IndexCluster::CentroidList &cents,\n                    std::vector<uint32_t> *out);\n\n protected:\n  //! Test if it is valid\n  bool is_valid(void) const {\n    if (!features_ || !features_->count()) {\n      return false;\n    }\n    return true;\n  }\n\n  //! Update parameters\n  void update_params(const ailego::Params &params);\n\n  //! Check Centroids\n  bool check_centroids(const IndexCluster::CentroidList &cents);\n\n  //! Initialize Sub Clusters\n  int init_sub_clusters(IndexCluster::Pointer *first,\n                        IndexCluster::Pointer *second);\n\n  //! Initialize First Cluster\n  int init_first_cluster(IndexCluster::Pointer *first);\n\n  //! Initialize Second Cluster\n  int init_second_cluster(IndexCluster::Pointer *second,\n                          IndexFeatures::Pointer features);\n\n private:\n  //! Members\n  IndexMeta meta_{};\n  IndexFeatures::Pointer features_{};\n  uint32_t cluster_count_{0u};\n  uint32_t thread_count_{0u};\n  uint32_t first_cluster_count_{0u};\n  uint32_t second_cluster_count_{0u};\n  bool auto_tuning_{false};\n  std::string first_cluster_class_{\"OptKmeansCluster\"};\n  std::string second_cluster_class_{\"OptKmeansCluster\"};\n  ailego::Params first_cluster_params_{};\n  ailego::Params second_cluster_params_{};\n\n  // TODO: Maybe optimize later\n  uint32_t second_threads_count_{10u};  // todo\n};\n\nint StratifiedCluster::cluster(IndexThreads::Pointer threads,\n                               IndexCluster::CentroidList &cents) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  IndexCluster::Pointer first_cluster;\n  int result = init_first_cluster(&first_cluster);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize the first cluster.\");\n    return result;\n  }\n\n  if (first_cluster_count_) {\n    first_cluster->suggest(first_cluster_count_);\n  }\n\n  // The first clustering\n  LOG_DEBUG(\"Clustering with first cluster: %s.\", first_cluster_class_.c_str());\n  result = first_cluster->cluster(threads, cents);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to cluster in first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n\n  result = first_cluster->classify(threads, cents);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to classify in first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n\n  // Cleanup for saving memory\n  first_cluster.reset();\n\n  // Calculate the total cluster count\n  uint32_t total_cluster_count = cents.size() * second_cluster_count_;\n  if (cluster_count_) {\n    total_cluster_count = cluster_count_;\n  }\n\n  // Use thread_threads cluster instead\n  uint32_t tail_threads = threads->count() % second_threads_count_;\n  std::vector<std::shared_ptr<IndexThreads>> threads_cluster;\n\n  // TODO: reusing threads pool?\n  // Incase the threads count less than second threads count\n  if (threads->count() / second_threads_count_ == 0) {\n    for (size_t threads_idx = 0; threads_idx < tail_threads; threads_idx++) {\n      std::shared_ptr<IndexThreads> curr_threads =\n          std::make_shared<SingleQueueIndexThreads>(1, false);\n      threads_cluster.push_back(curr_threads);\n    }\n  } else {\n    for (size_t threads_idx = 0; threads_idx < second_threads_count_;\n         threads_idx++) {\n      uint32_t curr_threads_count = threads->count() / second_threads_count_;\n      if (threads_idx >= second_threads_count_ - tail_threads) {\n        curr_threads_count++;\n      }\n      std::shared_ptr<IndexThreads> curr_threads =\n          std::make_shared<SingleQueueIndexThreads>(curr_threads_count, false);\n      threads_cluster.push_back(curr_threads);\n    }\n  }\n\n  auto task_group = threads->make_group();\n  // The second clustering\n  for (size_t i = 0; i < cents.size(); ++i) {\n    if (cents[i].similars().empty()) {\n      continue;\n    }\n\n    IndexThreads::Pointer &curr_threads =\n        threads_cluster[i % (threads_cluster.size())];\n\n    task_group->submit(ailego::Closure::New(\n        [this, &curr_threads, &total_cluster_count, &cents](size_t index) {\n          auto &it = cents[index];\n          IndexCluster::Pointer second_cluster;\n          std::shared_ptr<FlexibleIndexFeatures> features =\n              std::make_shared<FlexibleIndexFeatures>(\n                  meta_, it.similars().data(), it.similars().size());\n\n          int ret = this->init_second_cluster(&second_cluster, features);\n          if (ret != 0) {\n            LOG_ERROR(\"Failed to initialize the second cluster.\");\n            return;\n          }\n\n          if (auto_tuning_) {\n            if (total_cluster_count) {\n              double factor = static_cast<double>(it.similars().size()) /\n                              static_cast<double>(this->features_->count());\n              second_cluster->suggest(\n                  std::max(static_cast<uint32_t>(\n                               std::floor(total_cluster_count * factor)),\n                           1u));\n            }\n          } else if (second_cluster_count_) {\n            second_cluster->suggest(second_cluster_count_);\n          }\n\n          LOG_DEBUG(\"Clustering with second cluster: %s.\",\n                    second_cluster_class_.c_str());\n          ret = second_cluster->cluster(curr_threads, *(it.mutable_subitems()));\n          if (ret != 0) {\n            LOG_ERROR(\"Failed to cluster in second cluster: %s.\",\n                      second_cluster_class_.c_str());\n          }\n        },\n        i));\n  }\n  task_group->wait_finish();\n  return 0;\n}\n\nint StratifiedCluster::classify(IndexThreads::Pointer threads,\n                                IndexCluster::CentroidList &cents) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (cents.empty()) {\n    LOG_ERROR(\"The input centroid's list is empty.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  IndexCluster::Pointer first_cluster, second_cluster;\n  int result = init_sub_clusters(&first_cluster, &second_cluster);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize the subclusters.\");\n    return result;\n  }\n\n  // The first classifying\n  result = first_cluster->classify(threads, cents);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to classify in first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n\n  // Cleanup for saving memory\n  first_cluster.reset();\n\n  std::shared_ptr<FlexibleIndexFeatures> shell =\n      std::make_shared<FlexibleIndexFeatures>(meta_);\n\n  // The second classifying\n  for (IndexCluster::Centroid &it : cents) {\n    const auto &feats = it.similars();\n\n    if (feats.empty()) {\n      continue;\n    }\n\n    shell->mount(feats.data(), feats.size());\n    result = second_cluster->mount(shell);\n    if (result != 0) {\n      LOG_ERROR(\"Failed to mount features for second cluster: %s.\",\n                second_cluster_class_.c_str());\n      return result;\n    }\n\n    result = second_cluster->classify(threads, *it.mutable_subitems());\n    if (result != 0) {\n      LOG_ERROR(\"Failed to classify in second cluster: %s.\",\n                second_cluster_class_.c_str());\n      return result;\n    }\n  }\n  return 0;\n}\n\nint StratifiedCluster::label(IndexThreads::Pointer threads,\n                             const IndexCluster::CentroidList &cents,\n                             std::vector<uint32_t> *out) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  if (cents.empty()) {\n    LOG_ERROR(\"The input centroid's list is empty.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->check_centroids(cents)) {\n    LOG_ERROR(\"The input centroid's list includes some invalid centroids.\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!this->is_valid()) {\n    LOG_ERROR(\"The cluster is not ready.\");\n    return IndexError_NoReady;\n  }\n\n  IndexCluster::Pointer first_cluster;\n  int result = init_first_cluster(&first_cluster);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize the subclusters.\");\n    return result;\n  }\n\n  result = first_cluster->label(threads, cents, out);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to label in first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n  return 0;\n}\n\nvoid StratifiedCluster::update_params(const ailego::Params &params) {\n  params.get(GENERAL_THREAD_COUNT, &thread_count_);\n  params.get(GENERAL_CLUSTER_COUNT, &cluster_count_);\n  params.get(STRATIFIED_CLUSTER_COUNT, &cluster_count_);\n  params.get(STRATIFIED_CLUSTER_FIRST_COUNT, &first_cluster_count_);\n  params.get(STRATIFIED_CLUSTER_SECOND_COUNT, &second_cluster_count_);\n  params.get(STRATIFIED_CLUSTER_FIRST_CLASS, &first_cluster_class_);\n  params.get(STRATIFIED_CLUSTER_SECOND_CLASS, &second_cluster_class_);\n  params.get(STRATIFIED_CLUSTER_FIRST_PARAMS, &first_cluster_params_);\n  params.get(STRATIFIED_CLUSTER_SECOND_PARAMS, &second_cluster_params_);\n  params.get(STRATIFIED_CLUSTER_AUTO_TUNING, &auto_tuning_);\n  params.get(STRATIFIED_CLUSTER_SECOND_POOL_COUNT, &second_threads_count_);\n}\n\nbool StratifiedCluster::check_centroids(\n    const IndexCluster::CentroidList &cents) {\n  for (const auto &it : cents) {\n    if (it.size() != meta_.element_size()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nint StratifiedCluster::init_sub_clusters(IndexCluster::Pointer *first,\n                                         IndexCluster::Pointer *second) {\n  IndexCluster::Pointer first_cluster =\n      IndexFactory::CreateCluster(first_cluster_class_);\n\n  if (!first_cluster) {\n    LOG_ERROR(\"Failed to create first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return IndexError_NoExist;\n  }\n\n  IndexCluster::Pointer second_cluster =\n      IndexFactory::CreateCluster(second_cluster_class_);\n\n  if (!second_cluster) {\n    LOG_ERROR(\"Failed to create second cluster: %s.\",\n              first_cluster_class_.c_str());\n    return IndexError_NoExist;\n  }\n\n  int result = first_cluster->init(meta_, first_cluster_params_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n\n  result = second_cluster->init(meta_, second_cluster_params_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize second cluster: %s.\",\n              second_cluster_class_.c_str());\n    return result;\n  }\n\n  result = first_cluster->mount(features_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to mount features for first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n\n  *first = std::move(first_cluster);\n  *second = std::move(second_cluster);\n  return 0;\n}\n\nint StratifiedCluster::init_first_cluster(IndexCluster::Pointer *first) {\n  IndexCluster::Pointer first_cluster =\n      IndexFactory::CreateCluster(first_cluster_class_);\n\n  if (!first_cluster) {\n    LOG_ERROR(\"Failed to create first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return IndexError_NoExist;\n  }\n\n  int result = first_cluster->init(meta_, first_cluster_params_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n\n  result = first_cluster->mount(features_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to mount features for first cluster: %s.\",\n              first_cluster_class_.c_str());\n    return result;\n  }\n\n  *first = std::move(first_cluster);\n  return 0;\n}\n\nint StratifiedCluster::init_second_cluster(IndexCluster::Pointer *second,\n                                           IndexFeatures::Pointer features) {\n  IndexCluster::Pointer second_cluster =\n      IndexFactory::CreateCluster(second_cluster_class_);\n\n  if (!second_cluster) {\n    LOG_ERROR(\"Failed to create second cluster: %s.\",\n              second_cluster_class_.c_str());\n    return IndexError_NoExist;\n  }\n\n  int result = second_cluster->init(meta_, second_cluster_params_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize second cluster: %s.\",\n              second_cluster_class_.c_str());\n    return result;\n  }\n\n  result = second_cluster->mount(features);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to mount features for second cluster: %s.\",\n              second_cluster_class_.c_str());\n    return result;\n  }\n\n  *second = std::move(second_cluster);\n  return 0;\n}\n\nINDEX_FACTORY_REGISTER_CLUSTER(StratifiedCluster);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/stratified_cluster_trainer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"stratified_cluster_trainer.h\"\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_helper.h>\n#include \"cluster_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nconst std::string StratifiedClusterTrainer::SEP_TOKEN = \"*\";\nconst std::string StratifiedClusterTrainer::DEFAULT_CLUSTER_CLASS =\n    \"OptKmeansCluster\";\n\nint StratifiedClusterTrainer::init_params(const ailego::Params &params) {\n  params.get(STRATIFIED_TRAINER_SAMPLE_COUNT, &sample_count_);\n  params.get(STRATIFIED_TRAINER_SAMPLE_RATIO, &sample_ratio_);\n  params.get(STRATIFIED_TRAINER_THREAD_COUNT, &thread_count_);\n  cluster_auto_tuning_ = params.get_as_bool(STRATIFIED_TRAINER_AUTOAUNE);\n\n  std::string centroids_num =\n      params.get_as_string(STRATIFIED_TRAINER_CLUSTER_COUNT);\n  if (!centroids_num.empty()) {\n    ailego::StringHelper::Split(centroids_num, SEP_TOKEN, &centroid_num_vec_);\n    for (size_t i = 0; i < centroid_num_vec_.size(); ++i) {\n      if (centroid_num_vec_[i] == 0) {\n        LOG_ERROR(\"Invalid centroid num %s\", centroids_num.c_str());\n        return IndexError_InvalidArgument;\n      }\n    }\n  } else {\n    LOG_ERROR(\"No centroids_num configed.\");\n    return IndexError_InvalidArgument;\n  }\n\n  size_t level_cnt = centroid_num_vec_.size();\n  for (size_t i = 1; i <= level_cnt; ++i) {\n    std::string level_params_key =\n        STRATIFIED_TRAINER_PARAMS_IN_LEVEL_PREFIX + std::to_string(i);\n    ailego::Params level_params;\n    params.get(level_params_key, &level_params);\n    cluster_params_.push_back(level_params);\n  }\n\n  std::string cluster_class(DEFAULT_CLUSTER_CLASS);\n  params.get(STRATIFIED_TRAINER_CLASS_NAME, &cluster_class);\n  ailego::StringHelper::Split(cluster_class, SEP_TOKEN, &cluster_class_);\n  if (cluster_class_.size() == 1) {\n    // repeat the cluster class to level_cnt\n    for (size_t i = 1; i < level_cnt; ++i) {\n      cluster_class_.push_back(cluster_class_[0]);\n    }\n  } else if (cluster_class_.size() != level_cnt) {\n    LOG_ERROR(\"Cluster class should be equal to level count\");\n    return IndexError_InvalidArgument;\n  }\n  return 0;\n}\n\nint StratifiedClusterTrainer::init(const IndexMeta &index_meta,\n                                   const ailego::Params &params) {\n  int err = init_params(params);\n  if (err != 0) {\n    LOG_ERROR(\"init params failed, errno:%d,%s\", err, IndexError::What(err));\n    return err;\n  }\n\n  meta_ = index_meta;\n  ailego::Params cluster_params;\n  if (centroid_num_vec_.size() == 0) {\n    LOG_ERROR(\"invalid centroid num\");\n    return IndexError_InvalidArgument;\n  } else if (centroid_num_vec_.size() == 1) {\n    // one level clustering\n    class_name_ = cluster_class_[0];\n    cluster_params = cluster_params_[0];\n    suggest_centriod_cnt_ = centroid_num_vec_[0];\n  } else if (centroid_num_vec_.size() == 2) {\n    // cluster level > 1\n    class_name_ = \"StratifiedCluster\";\n    int level_cnt = centroid_num_vec_.size();\n    cluster_params.set(STRATIFIED_CLUSTER_FIRST_CLASS,\n                       cluster_class_[level_cnt - 2]);\n    cluster_params.set(STRATIFIED_CLUSTER_SECOND_CLASS,\n                       cluster_class_[level_cnt - 1]);\n    cluster_params.set(STRATIFIED_CLUSTER_FIRST_COUNT,\n                       centroid_num_vec_[level_cnt - 2]);\n    cluster_params.set(STRATIFIED_CLUSTER_SECOND_COUNT,\n                       centroid_num_vec_[level_cnt - 1]);\n    cluster_params.set(STRATIFIED_CLUSTER_FIRST_PARAMS,\n                       cluster_params_[level_cnt - 2]);\n    cluster_params.set(STRATIFIED_CLUSTER_SECOND_PARAMS,\n                       cluster_params_[level_cnt - 1]);\n    cluster_params.set(STRATIFIED_CLUSTER_AUTO_TUNING, cluster_auto_tuning_);\n    suggest_centriod_cnt_ =\n        centroid_num_vec_[level_cnt - 1] * centroid_num_vec_[level_cnt - 2];\n  } else {\n    LOG_ERROR(\"Unsupported more than 2 level clustering.\");\n    return IndexError_Unsupported;\n  }\n\n  cluster_ = IndexFactory::CreateCluster(class_name_);\n  if (!cluster_) {\n    LOG_ERROR(\"Failed to create cluster[%s]\", class_name_.c_str());\n    return IndexError_InvalidArgument;\n  }\n  int result = cluster_->init(meta_, cluster_params);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to initialize of cluster[%s], error: %d, %s\",\n              class_name_.c_str(), result, IndexError::What(result));\n    return result;\n  }\n  if (suggest_centriod_cnt_ > 0) {\n    cluster_->suggest(suggest_centriod_cnt_);\n  }\n\n  return 0;\n}\n\nint StratifiedClusterTrainer::cleanup(void) {\n  cluster_ = nullptr;\n  centroids_.clear();\n  return 0;\n}\n\nint StratifiedClusterTrainer::train(IndexThreads::Pointer threads,\n                                    IndexHolder::Pointer holder) {\n  ailego::ElapsedTime timer;\n  if (!holder) {\n    return IndexError_InvalidArgument;\n  }\n  if (!holder->is_matched(meta_)) {\n    return IndexError_Mismatch;\n  }\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n\n  size_t train_sample_count = std::max(\n      sample_count_, static_cast<uint32_t>(sample_ratio_ * holder->count()));\n\n  IndexFeatures::Pointer features;\n  if (train_sample_count > 0) {\n    LOG_INFO(\n        \"Train sampling, SampleCount=%u, SampleRatio=%f, HolderCount=%lu, \"\n        \"TrainCount=%lu\",\n        sample_count_, sample_ratio_, holder->count(), train_sample_count);\n\n    auto sampler = std::make_shared<SampleIndexFeatures<CompactIndexFeatures>>(\n        meta_, train_sample_count);\n    size_t pre_reserve = train_sample_count < holder->count()\n                             ? train_sample_count\n                             : holder->count();\n    sampler->reserve(pre_reserve);\n    for (auto iter = holder->create_iterator(); iter && iter->is_valid();\n         iter->next()) {\n      sampler->emplace(iter->data());\n    }\n    features = sampler;\n    stats_.set_trained_count(train_sample_count);\n  } else {\n    LOG_INFO(\n        \"Do no sampling, SampleCount=%u, SampleRatio=%f, \"\n        \"HolderCount=%lu, TrainCount=%lu\",\n        sample_count_, sample_ratio_, holder->count(), holder->count());\n\n    auto no_sampler = std::make_shared<CompactIndexFeatures>(meta_);\n    for (auto iter = holder->create_iterator(); iter && iter->is_valid();\n         iter->next()) {\n      no_sampler->emplace(iter->data());\n    }\n\n    features = no_sampler;\n    stats_.set_trained_count(holder->count());\n  }\n  stats_.set_discarded_count(0);\n\n  // Holder is not needed, cleanup it.\n  holder.reset();\n\n  int result = cluster_->mount(features);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to mount features of cluster[%s], error: %d, %s\",\n              class_name_.c_str(), result, IndexError::What(result));\n    return result;\n  }\n\n  centroids_.clear();\n  result = cluster_->cluster(std::move(threads), centroids_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to cluster features of cluster[%s], error: %d, %s\",\n              class_name_.c_str(), result, IndexError::What(result));\n    return result;\n  }\n\n  // check build result\n  std::vector<size_t> level_size;\n  std::function<void(const IndexCluster::CentroidList &, size_t)>\n      cal_centroid_cnt =\n          [&cal_centroid_cnt, &level_size](\n              const IndexCluster::CentroidList &cents, size_t level) {\n            if (level > level_size.size()) {\n              level_size.resize(level);\n            }\n            level_size[level - 1] += cents.size();\n            for (const auto &it : cents) {\n              if (!it.subitems().empty()) {\n                cal_centroid_cnt(it.subitems(), level + 1);\n              }\n            }\n          };\n  cal_centroid_cnt(centroids_, 1);\n\n  size_t centroids_num = level_size[level_size.size() - 1];\n  if (centroids_num > suggest_centriod_cnt_) {\n    LOG_WARN(\n        \"Built centroid(%zd level) count[%zd] bigger than expected \"\n        \"count[%d]\",\n        level_size.size(), centroids_num, suggest_centriod_cnt_);\n  } else {\n    LOG_INFO(\"Built centroid(%zd level) count[%zd], expected count[%d]\",\n             level_size.size(), centroids_num, suggest_centriod_cnt_);\n  }\n\n  stats_.set_trained_costtime(timer.milli_seconds());\n\n  return 0;\n}\n\nint StratifiedClusterTrainer::load(IndexStorage::Pointer cntr) {\n  if (!cntr) {\n    LOG_ERROR(\"IndexStorage is nullptr.\");\n    return IndexError_InvalidArgument;\n  }\n  std::shared_ptr<MemoryIndexBundle> bundle =\n      std::make_shared<MemoryIndexBundle>();\n  if (!bundle) {\n    LOG_ERROR(\"New MemoryInndexBundle failed.\");\n    return IndexError_NoMemory;\n  }\n\n  auto results = cntr->get_all();\n  for (auto &it : results) {\n    IndexStorage::Segment::Pointer &seg = it.second;\n    if (!seg) {\n      LOG_ERROR(\"Get Segment %s failed.\", it.first.c_str());\n      return IndexError_InvalidArgument;\n    }\n    size_t data_size = seg->data_size();\n    const void *data = nullptr;\n    size_t actual_size = seg->read(0, &data, data_size);\n    if (actual_size != data_size) {\n      LOG_ERROR(\"Read data failed expect %zu, actual %zu.\", data_size,\n                actual_size);\n      return IndexError_ReadData;\n    }\n    bundle->set(it.first, data, data_size);\n  }\n\n  int result = IndexHelper::DeserializeFromStorage(cntr.get(), &meta_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from container\");\n    return result;\n  }\n\n  result = IndexCluster::Deserialize(meta_, std::move(bundle), &centroids_);\n  if (result != 0) {\n    LOG_ERROR(\"Failed to deserialize index: %d\", result);\n    return result;\n  }\n  return 0;\n}\n\nint StratifiedClusterTrainer::dump(const IndexDumper::Pointer &dumper) {\n  IndexBundle::Pointer bundle;\n  int result = IndexCluster::Serialize(meta_, centroids_, &bundle);\n  if (result != 0) {\n    LOG_ERROR(\"IndexCluster Serialize failed with ret %d.\", result);\n    return result;\n  }\n\n  result = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (result != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return result;\n  }\n\n  for (const auto &it : bundle->all()) {\n    size_t data_size = it.second.size();\n    result = dumper->append(it.first, data_size, 0, 0);\n    if (result != 0) {\n      LOG_ERROR(\"Dumper append meta %s %zu failed.\", it.first.c_str(),\n                data_size);\n      return IndexError_PackIndex;\n    }\n    size_t actual_size = dumper->write(it.second.buffer(), data_size);\n    if (actual_size != data_size) {\n      LOG_ERROR(\"Dumper segment %s expect %zu, actual %zu.\", it.first.c_str(),\n                data_size, actual_size);\n      return IndexError_PackIndex;\n    }\n  }\n  return 0;\n}\n\nconst IndexMeta &StratifiedClusterTrainer::meta(void) const {\n  return meta_;\n}\n\nconst IndexTrainer::Stats &StratifiedClusterTrainer::stats(void) const {\n  return stats_;\n}\n\nIndexBundle::Pointer StratifiedClusterTrainer::indexes(void) const {\n  IndexBundle::Pointer bundle;\n  IndexCluster::Serialize(meta_, centroids_, &bundle);\n  return bundle;\n}\n\n//! Register Cluster Trainer in Factory\nINDEX_FACTORY_REGISTER_TRAINER(StratifiedClusterTrainer);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/stratified_cluster_trainer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_cluster.h>\n#include <zvec/core/framework/index_trainer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Cluster Trainer\n */\nclass StratifiedClusterTrainer : public IndexTrainer {\n public:\n  typedef std::shared_ptr<StratifiedClusterTrainer> Pointer;\n\n  //! Constructor\n  StratifiedClusterTrainer(void) {}\n\n  //! Destructor\n  ~StratifiedClusterTrainer(void) {}\n\n protected:\n  //! Initialize Trainer\n  virtual int init(const IndexMeta &meta, const ailego::Params &params);\n\n  //! Cleanup Trainer\n  virtual int cleanup(void);\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer threads, IndexHolder::Pointer holder);\n\n  //! Load index from file path or dir\n  virtual int load(IndexStorage::Pointer cntr);\n\n  //! Dump index into file path or dir\n  virtual int dump(const IndexDumper::Pointer &dumper);\n\n  //! Retrieve Index Meta\n  virtual const IndexMeta &meta(void) const;\n\n  //! Retrieve statistics\n  virtual const IndexTrainer::Stats &stats(void) const;\n\n  //! Retrieve the output indexes\n  virtual IndexBundle::Pointer indexes(void) const;\n\n private:\n  int init_params(const ailego::Params &params);\n\n private:\n  IndexMeta meta_{};\n  uint32_t sample_count_{0u};\n  float sample_ratio_{0.0};\n  uint32_t thread_count_{0u};\n  bool cluster_auto_tuning_{false};\n  IndexCluster::Pointer cluster_{};\n  IndexCluster::CentroidList centroids_{};\n\n  uint32_t suggest_centriod_cnt_{0u};\n  std::string class_name_;\n  std::vector<std::string> cluster_class_;\n  std::vector<uint64_t> centroid_num_vec_;\n  std::vector<ailego::Params> cluster_params_;\n  IndexTrainer::Stats stats_{};\n\n private:\n  static const std::string SEP_TOKEN;\n  static const std::string DEFAULT_CLUSTER_CLASS;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/cluster/vector_mean.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cmath>\n#include <cstring>\n#include <type_traits>\n#include <vector>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Vector Mean\n */\nstruct VectorMean {\n  //! Destructor\n  virtual ~VectorMean(void) {}\n\n  //! Reset accumulator\n  virtual void reset(void) = 0;\n\n  //! Plus a vector\n  virtual bool plus(const void *vec, size_t len) = 0;\n\n  //! Retrieve the mean of vectors\n  virtual bool mean(void *out, size_t len) const = 0;\n\n  //! Retrieve the mean of vectors\n  virtual void mean(std::string *out) const = 0;\n\n  //! Merge another vector mean\n  virtual bool merge(const VectorMean &rhs) = 0;\n\n  //! Retrieve the count of vectors\n  virtual size_t count(void) const = 0;\n\n  //! Retrieve the dimension of vectors\n  virtual size_t dimension(void) const = 0;\n};\n\n/*! Vector Mean Array\n */\nstruct VectorMeanArray {\n  //! Destructor\n  virtual ~VectorMeanArray(void) {}\n\n  //! Operator []\n  VectorMean &operator[](size_t i) {\n    return this->at(i);\n  }\n\n  //! Operator []\n  const VectorMean &operator[](size_t i) const {\n    return this->at(i);\n  }\n\n  //! Resize accumulators\n  virtual void resize(size_t cnt) = 0;\n\n  //! Clear accumulators\n  virtual void clear(void) = 0;\n\n  //! Retrieve an accumulator\n  virtual VectorMean &at(size_t i) = 0;\n\n  //! Retrieve an accumulator\n  virtual const VectorMean &at(size_t i) const = 0;\n\n  //! Retrieve the count of accumulators\n  virtual size_t count(void) const = 0;\n\n  //! Retrieve the dimension of accumulators\n  virtual size_t dimension(void) const = 0;\n};\n\n/*! General Vector Mean Array\n */\ntemplate <typename T, typename = typename std::is_base_of<VectorMean, T>::type>\nclass GeneralVectorMeanArray : public VectorMeanArray {\n public:\n  //! Constructor\n  GeneralVectorMeanArray(size_t dim) : dimension_(dim), array_() {}\n\n  //! Constructor\n  GeneralVectorMeanArray(const GeneralVectorMeanArray &rhs)\n      : dimension_(rhs.dimension_), array_(rhs.array_) {}\n\n  //! Constructor\n  GeneralVectorMeanArray(GeneralVectorMeanArray &&rhs)\n      : dimension_(rhs.dimension_), array_(std::move(rhs.array_)) {}\n\n  //! Emplace an accumulator\n  template <typename... TArgs>\n  bool emplace(TArgs &&...args) {\n    T accum(std::forward<TArgs>(args)...);\n    if (accum.dimension() != dimension_) {\n      return false;\n    }\n    array_.push_back(std::move(accum));\n    return true;\n  }\n\n  //! Resize accumulators\n  virtual void resize(size_t cnt) {\n    if (array_.size() < cnt) {\n      for (size_t i = array_.size(); i < cnt; ++i) {\n        array_.emplace_back(dimension_);\n      }\n    } else {\n      array_.resize(cnt);\n    }\n  }\n\n  //! Clear accumulators\n  virtual void clear(void) {\n    array_.clear();\n  }\n\n  //! Retrieve an accumulator\n  virtual VectorMean &at(size_t i) {\n    return array_[i];\n  }\n\n  //! Retrieve an accumulator\n  virtual const VectorMean &at(size_t i) const {\n    return array_[i];\n  }\n\n  //! Retrieve the count of accumulators\n  virtual size_t count(void) const {\n    return array_.size();\n  }\n\n  //! Retrieve the dimension of accumulators\n  virtual size_t dimension(void) const {\n    return dimension_;\n  }\n\n private:\n  //! Disable them\n  GeneralVectorMeanArray(void) = delete;\n\n  //! Members\n  size_t dimension_;\n  std::vector<T> array_;\n};\n\n/*! Numerical Vector Mean\n */\ntemplate <typename T,\n          typename =\n              typename std::enable_if<ailego::IsArithmetic<T>::value>::type>\nclass NumericalVectorMean : public VectorMean {\n public:\n  //! Constructor\n  NumericalVectorMean(void) : count_(0), accums_() {}\n\n  //! Constructor\n  NumericalVectorMean(const NumericalVectorMean &rhs)\n      : count_(rhs.count_), accums_(rhs.accums_) {}\n\n  //! Constructor\n  NumericalVectorMean(NumericalVectorMean &&rhs)\n      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}\n\n  //! Constructor\n  NumericalVectorMean(size_t dim) : count_(0), accums_(dim) {}\n\n  //! Constructor\n  NumericalVectorMean(const T *means, size_t dim, size_t cnt)\n      : count_(cnt), accums_(dim) {\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] = static_cast<double>(means[i]) * count_;\n    }\n  }\n\n  //! Reset accumulator\n  void reset(size_t dim) {\n    count_ = 0u;\n    accums_.clear();\n    accums_.resize(dim, 0.0);\n  }\n\n  //! Reset accumulator\n  virtual void reset(void) {\n    this->reset(accums_.size());\n  }\n\n  //! Plus a vector\n  virtual bool plus(const void *vec, size_t len) {\n    size_t dim = accums_.size();\n    if (dim * sizeof(T) != len) {\n      return false;\n    }\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] += *(static_cast<const T *>(vec) + i);\n    }\n    ++count_;\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual bool mean(void *out, size_t len) const {\n    size_t dim = accums_.size();\n    if (dim * sizeof(T) != len) {\n      return false;\n    }\n    for (size_t i = 0; i < dim; ++i) {\n      *(static_cast<T *>(out) + i) = FloatCast<T>(accums_[i] / count_);\n    }\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual void mean(std::string *out) const {\n    ailego::NumericalVector<T> &vec =\n        *static_cast<ailego::NumericalVector<T> *>(out);\n\n    size_t dim = accums_.size();\n    vec.resize(dim);\n    for (size_t i = 0; i < dim; ++i) {\n      vec[i] = FloatCast<T>(accums_[i] / count_);\n    }\n  }\n\n  //! Merge another vector mean\n  virtual bool merge(const VectorMean &rhs) {\n    const NumericalVectorMean<T> &src =\n        dynamic_cast<const NumericalVectorMean<T> &>(rhs);\n\n    size_t dim = accums_.size();\n    if (dim != src.accums_.size()) {\n      return false;\n    }\n    count_ += src.count_;\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] += src.accums_[i];\n    }\n    return true;\n  }\n\n  //! Retrieve the count of vectors\n  virtual size_t count(void) const {\n    return count_;\n  }\n\n  //! Retrieve dimension of accumulator\n  virtual size_t dimension(void) const {\n    return accums_.size();\n  }\n\n protected:\n  //! Convert float type to another type\n  template <typename U>\n  static auto FloatCast(const double &val) ->\n      typename std::enable_if<!std::is_integral<U>::value, U>::type {\n    return static_cast<U>(val);\n  }\n\n  //! Convert float type to another type\n  template <typename U>\n  static auto FloatCast(const double &val) ->\n      typename std::enable_if<std::is_integral<U>::value, U>::type {\n    return static_cast<U>(std::round(val));\n  }\n\n private:\n  //! Members\n  size_t count_;\n  std::vector<double> accums_;\n};\n\n/*! Numerical Vector Harmonic Mean\n */\ntemplate <typename T,\n          typename =\n              typename std::enable_if<ailego::IsArithmetic<T>::value>::type>\nclass NumericalVectorHarmonicMean : public VectorMean {\n public:\n  //! Constructor\n  NumericalVectorHarmonicMean(void) : count_(0), accums_() {}\n\n  //! Constructor\n  NumericalVectorHarmonicMean(const NumericalVectorHarmonicMean &rhs)\n      : count_(rhs.count_), accums_(rhs.accums_) {}\n\n  //! Constructor\n  NumericalVectorHarmonicMean(NumericalVectorHarmonicMean &&rhs)\n      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}\n\n  //! Constructor\n  NumericalVectorHarmonicMean(size_t dim) : count_(0), accums_(dim) {}\n\n  //! Constructor\n  NumericalVectorHarmonicMean(const T *means, size_t dim, size_t cnt)\n      : count_(cnt), accums_(dim) {\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] = static_cast<double>(count_) / static_cast<double>(means[i]);\n    }\n  }\n\n  //! Reset accumulator\n  void reset(size_t dim) {\n    count_ = 0u;\n    accums_.clear();\n    accums_.resize(dim, 0.0);\n  }\n\n  //! Reset accumulator\n  virtual void reset(void) {\n    this->reset(accums_.size());\n  }\n\n  //! Plus a vector (harmonic)\n  virtual bool plus(const void *vec, size_t len) {\n    size_t dim = accums_.size();\n    if (dim * sizeof(T) != len) {\n      return false;\n    }\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] += 1.0 / *(static_cast<const T *>(vec) + i);\n    }\n    ++count_;\n    return true;\n  }\n\n  //! Retrieve the mean of vectors (harmonic)\n  virtual bool mean(void *out, size_t len) const {\n    size_t dim = accums_.size();\n    if (dim * sizeof(T) != len) {\n      return false;\n    }\n    for (size_t i = 0; i < dim; ++i) {\n      *(static_cast<T *>(out) + i) = FloatCast<T>(count_ / accums_[i]);\n    }\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual void mean(std::string *out) const {\n    ailego::NumericalVector<T> &vec =\n        *static_cast<ailego::NumericalVector<T> *>(out);\n\n    size_t dim = accums_.size();\n    vec.resize(dim);\n    for (size_t i = 0; i < dim; ++i) {\n      vec[i] = FloatCast<T>(count_ / accums_[i]);\n    }\n  }\n\n  //! Merge another vector mean\n  virtual bool merge(const VectorMean &rhs) {\n    const NumericalVectorHarmonicMean<T> &src =\n        dynamic_cast<const NumericalVectorHarmonicMean<T> &>(rhs);\n\n    size_t dim = accums_.size();\n    if (dim != src.accums_.size()) {\n      return false;\n    }\n    count_ += src.count_;\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] += src.accums_[i];\n    }\n    return true;\n  }\n\n  //! Retrieve the count of vectors\n  virtual size_t count(void) const {\n    return count_;\n  }\n\n  //! Retrieve dimension of accumulator\n  virtual size_t dimension(void) const {\n    return accums_.size();\n  }\n\n protected:\n  //! Convert float type to another type\n  template <typename U>\n  static auto FloatCast(const double &val) ->\n      typename std::enable_if<!std::is_integral<U>::value, U>::type {\n    return static_cast<U>(val);\n  }\n\n  //! Convert float type to another type\n  template <typename U>\n  static auto FloatCast(const double &val) ->\n      typename std::enable_if<std::is_integral<U>::value, U>::type {\n    return static_cast<U>(std::round(val));\n  }\n\n private:\n  //! Members\n  size_t count_;\n  std::vector<double> accums_;\n};\n\n/*! Numerical Vector Geometric Mean\n */\ntemplate <typename T,\n          typename =\n              typename std::enable_if<ailego::IsArithmetic<T>::value>::type>\nclass NumericalVectorGeometricMean : public VectorMean {\n public:\n  //! Constructor\n  NumericalVectorGeometricMean(void) : count_(0), accums_() {}\n\n  //! Constructor\n  NumericalVectorGeometricMean(const NumericalVectorGeometricMean &rhs)\n      : count_(rhs.count_), accums_(rhs.accums_) {}\n\n  //! Constructor\n  NumericalVectorGeometricMean(NumericalVectorGeometricMean &&rhs)\n      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}\n\n  //! Constructor\n  NumericalVectorGeometricMean(size_t dim) : count_(0), accums_(dim, 1.0) {}\n\n  //! Constructor\n  NumericalVectorGeometricMean(const T *means, size_t dim, size_t cnt)\n      : count_(cnt), accums_(dim, 1.0) {\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] = std::pow(static_cast<double>(means[i]), count_);\n    }\n  }\n\n  //! Reset accumulator\n  void reset(size_t dim) {\n    count_ = 0u;\n    accums_.clear();\n    accums_.resize(dim, 1.0);\n  }\n\n  //! Reset accumulator\n  virtual void reset(void) {\n    this->reset(accums_.size());\n  }\n\n  //! Plus a vector (geometric)\n  virtual bool plus(const void *vec, size_t len) {\n    size_t dim = accums_.size();\n    if (dim * sizeof(T) != len) {\n      return false;\n    }\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] *= *(static_cast<const T *>(vec) + i);\n    }\n    ++count_;\n    return true;\n  }\n\n  //! Retrieve the mean of vectors (geometric)\n  virtual bool mean(void *out, size_t len) const {\n    size_t dim = accums_.size();\n    if (dim * sizeof(T) != len) {\n      return false;\n    }\n    for (size_t i = 0; i < dim; ++i) {\n      *(static_cast<T *>(out) + i) =\n          FloatCast<T>(std::pow(accums_[i], 1.0 / count_));\n    }\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual void mean(std::string *out) const {\n    ailego::NumericalVector<T> &vec =\n        *static_cast<ailego::NumericalVector<T> *>(out);\n\n    size_t dim = accums_.size();\n    vec.resize(dim);\n    for (size_t i = 0; i < dim; ++i) {\n      vec[i] = FloatCast<T>(std::pow(accums_[i], 1.0 / count_));\n    }\n  }\n\n  //! Merge another vector mean\n  virtual bool merge(const VectorMean &rhs) {\n    const NumericalVectorGeometricMean<T> &src =\n        dynamic_cast<const NumericalVectorGeometricMean<T> &>(rhs);\n\n    size_t dim = accums_.size();\n    if (dim != src.accums_.size()) {\n      return false;\n    }\n    count_ += src.count_;\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] *= src.accums_[i];\n    }\n    return true;\n  }\n\n  //! Retrieve the count of vectors\n  virtual size_t count(void) const {\n    return count_;\n  }\n\n  //! Retrieve dimension of accumulator\n  virtual size_t dimension(void) const {\n    return accums_.size();\n  }\n\n protected:\n  //! Convert float type to another type\n  template <typename U>\n  static auto FloatCast(const double &val) ->\n      typename std::enable_if<!std::is_integral<U>::value, U>::type {\n    return static_cast<U>(val);\n  }\n\n  //! Convert float type to another type\n  template <typename U>\n  static auto FloatCast(const double &val) ->\n      typename std::enable_if<std::is_integral<U>::value, U>::type {\n    return static_cast<U>(std::round(val));\n  }\n\n private:\n  //! Members\n  size_t count_;\n  std::vector<double> accums_;\n};\n\n/*! Binary Vector Mean\n */\nclass BinaryVectorMean : public VectorMean {\n public:\n  //! Constructor\n  BinaryVectorMean(void) : count_(0), accums_() {}\n\n  //! Constructor\n  BinaryVectorMean(const BinaryVectorMean &rhs)\n      : count_(rhs.count_), accums_(rhs.accums_) {}\n\n  //! Constructor\n  BinaryVectorMean(BinaryVectorMean &&rhs)\n      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}\n\n  //! Constructor\n  BinaryVectorMean(size_t dim) : count_(0), accums_(((dim + 7) >> 3) << 3) {}\n\n  //! Constructor\n  BinaryVectorMean(const void *means, size_t dim, size_t cnt)\n      : count_(cnt), accums_(((dim + 7) >> 3) << 3) {\n    const uint8_t *bits = reinterpret_cast<const uint8_t *>(means);\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] = (count_ >> 1);\n\n      if (bits[i >> 3] & static_cast<uint8_t>(1 << (i & 0x7))) {\n        accums_[i] += 1;\n      }\n    }\n  }\n\n  //! Reset accumulator\n  void reset(size_t dim) {\n    count_ = 0u;\n    accums_.clear();\n    accums_.resize(dim);\n  }\n\n  //! Reset accumulator\n  virtual void reset(void) {\n    this->reset(accums_.size());\n  }\n\n  //! Plus a vector\n  virtual bool plus(const void *vec, size_t len) {\n    size_t dim = accums_.size();\n    if (dim != (len << 3)) {\n      return false;\n    }\n\n    const uint8_t *bits = reinterpret_cast<const uint8_t *>(vec);\n    for (size_t i = 0; i < dim; ++i) {\n      if (bits[i >> 3] & static_cast<uint8_t>(1 << (i & 0x7))) {\n        accums_[i] += 1;\n      }\n    }\n    ++count_;\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual bool mean(void *out, size_t len) const {\n    size_t dim = accums_.size();\n    if (dim != (len << 3)) {\n      return false;\n    }\n    memset(out, 0, len);\n\n    uint8_t *bits = reinterpret_cast<uint8_t *>(out);\n    size_t half_count = count_ >> 1;\n    for (size_t i = 0; i < dim; ++i) {\n      if (accums_[i] > half_count) {\n        bits[i >> 3] |= static_cast<uint8_t>(1 << (i & 0x7));\n      }\n    }\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual void mean(std::string *out) const {\n    size_t dim = accums_.size();\n    out->clear();\n    out->resize((dim + 7) / 8);\n\n    uint8_t *bits =\n        reinterpret_cast<uint8_t *>(const_cast<char *>(out->data()));\n    size_t half_count = count_ >> 1;\n    for (size_t i = 0; i < dim; ++i) {\n      if (accums_[i] > half_count) {\n        bits[i >> 3] |= static_cast<uint8_t>(1 << (i & 0x7));\n      }\n    }\n  }\n\n  //! Merge another vector mean\n  virtual bool merge(const VectorMean &rhs) {\n    const BinaryVectorMean &src = dynamic_cast<const BinaryVectorMean &>(rhs);\n\n    size_t dim = accums_.size();\n    if (dim != src.accums_.size()) {\n      return false;\n    }\n    count_ += src.count_;\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] += src.accums_[i];\n    }\n    return true;\n  }\n\n  //! Retrieve the count of vectors\n  virtual size_t count(void) const {\n    return count_;\n  }\n\n  //! Retrieve dimension of accumulator\n  virtual size_t dimension(void) const {\n    return accums_.size();\n  }\n\n private:\n  //! Members\n  size_t count_;\n  std::vector<size_t> accums_;\n};\n\n/*! Numerical Vector Mean\n */\ntemplate <typename T,\n          typename = typename std::enable_if<std::is_integral<T>::value>::type>\nclass NibbleVectorMean : public VectorMean {\n public:\n  //! Constructor\n  NibbleVectorMean(void) : count_(0), accums_() {}\n\n  //! Constructor\n  NibbleVectorMean(const NibbleVectorMean &rhs)\n      : count_(rhs.count_), accums_(rhs.accums_) {}\n\n  //! Constructor\n  NibbleVectorMean(NibbleVectorMean &&rhs)\n      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}\n\n  //! Constructor\n  NibbleVectorMean(size_t dim) : count_(0), accums_(dim) {}\n\n  //! Constructor\n  NibbleVectorMean(const void *means, size_t dim, size_t cnt)\n      : count_(cnt), accums_(dim) {\n    const uint8_t *arr = reinterpret_cast<const uint8_t *>(means);\n    for (size_t i = 0; i != dim; i += 2) {\n      uint8_t val = arr[i >> 1];\n      int lo = ((int8_t)(val << 4) >> 4);\n      int hi = ((int8_t)(val) >> 4);\n      accums_[i] = static_cast<double>(lo) * count_;\n      accums_[i + 1] = static_cast<double>(hi) * count_;\n    }\n  }\n\n  //! Reset accumulator\n  void reset(size_t dim) {\n    count_ = 0u;\n    accums_.clear();\n    accums_.resize(dim, 0.0);\n  }\n\n  //! Reset accumulator\n  virtual void reset(void) {\n    this->reset(accums_.size());\n  }\n\n  //! Plus a vector\n  virtual bool plus(const void *vec, size_t len) {\n    size_t dim = accums_.size();\n    if (dim != (len << 1)) {\n      return false;\n    }\n\n    const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);\n    for (size_t i = 0; i != dim; i += 2) {\n      uint8_t val = arr[i >> 1];\n      accums_[i] += ((int8_t)(val << 4) >> 4);\n      accums_[i + 1] += ((int8_t)(val) >> 4);\n    }\n    ++count_;\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual bool mean(void *out, size_t len) const {\n    size_t dim = accums_.size();\n    if (dim != (len << 1)) {\n      return false;\n    }\n    memset(out, 0, len);\n\n    uint8_t *arr = reinterpret_cast<uint8_t *>(out);\n\n    for (size_t i = 0; i != dim; i += 2) {\n      int lo = static_cast<int>(std::round(accums_[i] / count_));\n      int hi = static_cast<int>(std::round(accums_[i + 1] / count_));\n      arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);\n    }\n\n    return true;\n  }\n\n  //! Retrieve the mean of vectors\n  virtual void mean(std::string *out) const {\n    size_t dim = accums_.size();\n    out->clear();\n    out->resize(dim >> 1);\n\n    uint8_t *arr = reinterpret_cast<uint8_t *>(const_cast<char *>(out->data()));\n\n    for (size_t i = 0; i != dim; i += 2) {\n      int lo = static_cast<int>(std::round(accums_[i] / count_));\n      int hi = static_cast<int>(std::round(accums_[i + 1] / count_));\n      arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);\n    }\n  }\n\n  //! Merge another vector mean\n  virtual bool merge(const VectorMean &rhs) {\n    const NibbleVectorMean &src = dynamic_cast<const NibbleVectorMean &>(rhs);\n\n    size_t dim = accums_.size();\n    if (dim != src.accums_.size()) {\n      return false;\n    }\n    count_ += src.count_;\n    for (size_t i = 0; i < dim; ++i) {\n      accums_[i] += src.accums_[i];\n    }\n    return true;\n  }\n\n  //! Retrieve the count of vectors\n  virtual size_t count(void) const {\n    return count_;\n  }\n\n  //! Retrieve dimension of accumulator\n  virtual size_t dimension(void) const {\n    return accums_.size();\n  }\n\n private:\n  //! Members\n  size_t count_;\n  std::vector<double> accums_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n#message(STATUS \"PROJECT_ROOT_DIR = ${PROJECT_ROOT_DIR}\")\ncc_library(\n    NAME core_knn_flat \n    STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS core_framework \n    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/src/core/framework\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_builder.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_builder.h\"\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\ntemplate <size_t BATCH_SIZE>\nint FlatBuilder<BATCH_SIZE>::init(const IndexMeta &meta,\n                                  const ailego::Params &params) {\n  meta_ = meta;\n\n  // Set the major order\n  bool column_major_order = false;\n  if (params.get(PARAM_FLAT_COLUMN_MAJOR_ORDER, &column_major_order)) {\n    meta_.set_major_order(column_major_order ? IndexMeta::MO_COLUMN\n                                             : IndexMeta::MO_ROW);\n  }\n\n  // Verify column major order\n  if (meta_.major_order() != IndexMeta::MO_ROW) {\n    IndexMeta::DataType dt = meta_.data_type();\n\n    bool support_column_major = false;\n    if ((dt != IndexMeta::DataType::DT_FP32 &&\n         dt != IndexMeta::DataType::DT_FP16 &&\n         dt != IndexMeta::DataType::DT_INT8 && dt != IndexMeta::DT_INT4 &&\n         dt != IndexMeta::DT_BINARY32 && dt != IndexMeta::DT_BINARY64) ||\n        (meta_.unit_size() != IndexMeta::UnitSizeof(dt))) {\n      if (meta_.major_order() == IndexMeta::MO_COLUMN) {\n        LOG_ERROR(\"Unsupported type %d with unit size %u.\", dt,\n                  meta_.unit_size());\n        return IndexError_Unsupported;\n      } else {\n        support_column_major = false;\n      }\n    }\n    if (meta_.element_size() % IndexMeta::AlignSizeof(dt) != 0) {\n      if (meta_.major_order() == IndexMeta::MO_COLUMN) {\n        LOG_ERROR(\"Unsupported type %d with dimension %u.\", dt,\n                  meta_.dimension());\n        return IndexError_Unsupported;\n      } else {\n        support_column_major = false;\n      }\n    }\n\n    if (meta_.major_order() == IndexMeta::MO_UNDEFINED &&\n        support_column_major) {\n      meta_.set_major_order(IndexMeta::MO_COLUMN);\n    }\n  }\n\n  if (!VerifyMetric(meta_)) {\n    LOG_ERROR(\"Invalid index measure %s.\", meta_.metric_name().c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  std::string tag = std::to_string(BATCH_SIZE);\n  ailego::Params searcher_params;\n  searcher_params.set(PARAM_FLAT_BATCH_SIZE, BATCH_SIZE);\n  meta_.set_searcher(\"FlatSearcher\" + tag, 0, searcher_params);\n  meta_.set_builder(\"FlatBuilder\" + tag, 0, params);\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatBuilder<BATCH_SIZE>::build(IndexThreads::Pointer,\n                                   IndexHolder::Pointer holder) {\n  ailego::ElapsedTime stamp;\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"The holder is unmatched with initialized meta.\");\n    return IndexError_Mismatch;\n  }\n\n  holder_ = std::move(holder);\n  stats_.set_built_count(holder_->count());\n  stats_.set_built_costtime(stamp.milli_seconds());\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatBuilder<BATCH_SIZE>::dump(const IndexDumper::Pointer &dumper) {\n  ailego::ElapsedTime stamp;\n  if (!holder_) {\n    return IndexError_NoReady;\n  }\n\n  std::vector<uint64_t> keys;\n  if (meta_.major_order() == IndexMeta::MO_COLUMN) {\n    int error_code = this->write_column_index(dumper.get(), &keys);\n    if (error_code != 0) {\n      return error_code;\n    }\n  } else {\n    int error_code = this->write_row_index(dumper.get(), &keys);\n    if (error_code != 0) {\n      return error_code;\n    }\n  }\n\n  int error_code = this->write_keys(keys, dumper.get());\n  if (error_code != 0) {\n    return error_code;\n  }\n\n  error_code = this->write_mapping(keys, dumper.get());\n  if (error_code != 0) {\n    return error_code;\n  }\n\n  error_code = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (error_code != 0) {\n    return error_code;\n  }\n\n  stats_.set_dumped_count(keys.size());\n  stats_.set_dumped_costtime(stamp.milli_seconds());\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatBuilder<BATCH_SIZE>::write_keys(const std::vector<uint64_t> &keys,\n                                        IndexDumper *dumper) {\n  size_t keys_size = keys.size() * sizeof(uint64_t);\n  size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;\n  if (dumper->write(keys.data(), keys_size) != keys_size) {\n    LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n    return IndexError_WriteData;\n  }\n\n  // Write the padding if need\n  if (keys_padding_size) {\n    std::string padding(keys_padding_size, '\\0');\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(FLAT_SEGMENT_KEYS_SEG_ID, keys_size, keys_padding_size,\n                        0);\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatBuilder<BATCH_SIZE>::write_mapping(const std::vector<uint64_t> &keys,\n                                           IndexDumper *dumper) {\n  std::vector<uint32_t> mapping(keys.size());\n  std::iota(mapping.begin(), mapping.end(), 0);\n  std::sort(\n      mapping.begin(), mapping.end(),\n      [&keys](uint32_t lhs, uint32_t rhs) { return (keys[lhs] < keys[rhs]); });\n\n  size_t mapping_size = mapping.size() * sizeof(uint32_t);\n  size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;\n  if (dumper->write(mapping.data(), mapping_size) != mapping_size) {\n    LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n    return IndexError_WriteData;\n  }\n\n  // Write the padding if need\n  if (mapping_padding_size) {\n    std::string padding(mapping_padding_size, '\\0');\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(FLAT_SEGMENT_MAPPING_SEG_ID, mapping_size,\n                        mapping_padding_size, 0);\n}\n\ntemplate <size_t BATCH_SIZE>\ntemplate <typename T>\nint FlatBuilder<BATCH_SIZE>::write_column_index(IndexDumper *dumper,\n                                                std::vector<uint64_t> *keys) {\n  auto iter = holder_->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Failed to create iterator of holder\");\n    return IndexError_Runtime;\n  }\n\n  // Write features\n  size_t element_size = holder_->element_size();\n  size_t block_size = element_size * BATCH_SIZE;\n  std::string block1, block2;\n  block1.reserve(block_size);\n  block2.reserve(block_size);\n\n  for (; iter->is_valid(); iter->next()) {\n    block1.append(reinterpret_cast<const char *>(iter->data()), element_size);\n    keys->emplace_back(iter->key());\n\n    if (block1.size() == block_size) {\n      ailego::MatrixHelper::Transpose<T, BATCH_SIZE>(\n          block1.data(), element_size / sizeof(T), (void *)block2.data());\n\n      if (dumper->write(block2.data(), block_size) != block_size) {\n        LOG_ERROR(\"Failed to write data into dumper %s\",\n                  dumper->name().c_str());\n        return IndexError_WriteData;\n      }\n      block1.clear();\n    }\n  }\n\n  if (!block1.empty()) {\n    if (dumper->write(block1.data(), block1.size()) != block1.size()) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n\n  // Write the padding if need\n  size_t features_size = keys->size() * element_size;\n  size_t features_padding_size =\n      ailego_align(features_size, 32) - features_size;\n  if (features_padding_size) {\n    std::string padding(features_padding_size, '\\0');\n\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,\n                        features_padding_size, 0);\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatBuilder<BATCH_SIZE>::write_row_index(IndexDumper *dumper,\n                                             std::vector<uint64_t> *keys) {\n  auto iter = holder_->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Failed to create iterator of holder\");\n    return IndexError_Runtime;\n  }\n\n  // Write features\n  size_t element_size = holder_->element_size();\n  for (; iter->is_valid(); iter->next()) {\n    if (dumper->write(iter->data(), element_size) != element_size) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n    keys->emplace_back(iter->key());\n  }\n\n  // Write the padding if need\n  size_t features_size = keys->size() * element_size;\n  size_t features_padding_size =\n      ailego_align(features_size, 32) - features_size;\n  if (features_padding_size) {\n    std::string padding(features_padding_size, '\\0');\n\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,\n                        features_padding_size, 0);\n}\n\nINDEX_FACTORY_REGISTER_BUILDER_ALIAS(LinearBuilder, FlatBuilder<32>);\nINDEX_FACTORY_REGISTER_BUILDER_ALIAS(FlatBuilder, FlatBuilder<32>);\nINDEX_FACTORY_REGISTER_BUILDER_ALIAS(FlatBuilder16, FlatBuilder<16>);\nINDEX_FACTORY_REGISTER_BUILDER_ALIAS(FlatBuilder32, FlatBuilder<32>);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_builder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_builder.h>\n#include <zvec/core/framework/index_helper.h>\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Flat Builder\n */\ntemplate <size_t BATCH_SIZE>\nclass FlatBuilder : public IndexBuilder {\n public:\n  //! Destructor\n  virtual ~FlatBuilder(void) {}\n\n  //! Initialize the builder\n  int init(const IndexMeta &meta, const ailego::Params &params) override;\n\n  //! Cleanup the builder\n  int cleanup(void) override {\n    holder_ = nullptr;\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexThreads::Pointer, IndexHolder::Pointer) override {\n    stats_.set_trained_count(0u);\n    stats_.set_trained_costtime(0u);\n    return 0;\n  }\n\n  //! Train the data\n  int train(const IndexTrainer::Pointer &) override {\n    stats_.set_trained_count(0u);\n    stats_.set_trained_costtime(0u);\n    return 0;\n  }\n\n  //! Build the index\n  int build(IndexThreads::Pointer, IndexHolder::Pointer holder) override;\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  const IndexBuilder::Stats &stats(void) const override {\n    return stats_;\n  }\n\n protected:\n  //! Dump index keys\n  int write_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper);\n\n  //! Dump index keys mapping\n  int write_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper);\n\n  //! Dump index using column-major-order format\n  template <typename T>\n  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys);\n\n  //! Dump index using column-major-order format\n  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {\n    switch (IndexMeta::AlignSizeof(meta_.data_type())) {\n      case 2:\n        return this->write_column_index<uint16_t>(dumper, keys);\n      case 4:\n        return this->write_column_index<uint32_t>(dumper, keys);\n      case 8:\n        return this->write_column_index<uint64_t>(dumper, keys);\n      default:\n        ailego_check_with(0, \"BAD CASE\");\n    }\n    return IndexError_Runtime;\n  }\n\n  //! Dump index using row-major-order format\n  int write_row_index(IndexDumper *dumper, std::vector<uint64_t> *keys);\n\n private:\n  IndexMeta meta_{};\n  IndexBuilder::Stats stats_{};\n  IndexHolder::Pointer holder_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_distance_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Brute Force Distance Tuple\n */\ntemplate <size_t K, typename = void>\nclass FlatDistanceTuple;\n\n/*! Brute Force Distance Tuple\n */\ntemplate <>\nclass FlatDistanceTuple<1> {\n public:\n  //! Retrieve non-zero if all distances are valid.\n  bool is_valid(void) const {\n    return !!distance_;\n  }\n\n  //! Retrieve non-zero if a distance is valid.\n  bool is_valid(size_t m) const {\n    return m == 1 && !!distance_;\n  }\n\n  //! Initialize the distance tuple\n  void initialize(const IndexMetric &measure) {\n    distance_ = measure.distance_matrix(1, 1);\n  }\n\n  //! Initialize the distance tuple\n  void initialize(const IndexMetric &measure, size_t m) {\n    distance_ = measure.distance_matrix(m, 1);\n  }\n\n  //! Compute the distance between matrix and query\n  template <size_t M>\n  auto distance(const void *m, const void *q, size_t dim, float *out) const ->\n      typename std::enable_if<M == 1>::type {\n    distance_(m, q, dim, out);\n  }\n\n private:\n  IndexMetric::MatrixDistance distance_{};\n};\n\n/*! Brute Force Distance Tuple\n */\ntemplate <size_t K>\nclass FlatDistanceTuple<\n    K, typename std::enable_if<IsEqualPowerofTwo<K>::value>::type> {\n public:\n  //! Retrieve non-zero if all distances are valid.\n  bool is_valid(void) const {\n    return (distance_tuple_.is_valid() && !!distance_);\n  }\n\n  //! Retrieve non-zero if a distance is valid.\n  bool is_valid(size_t m) const {\n    return (m == K ? (!!distance_)\n                   : (m < K ? distance_tuple_.is_valid(m) : false));\n  }\n\n  //! Initialize the distance tuple\n  void initialize(const IndexMetric &measure) {\n    distance_tuple_.initialize(measure);\n    distance_ = measure.distance_matrix(K, 1);\n  }\n\n  //! Initialize the distance tuple\n  void initialize(const IndexMetric &measure, size_t m) {\n    distance_tuple_.initialize(measure, m);\n    distance_ = measure.distance_matrix(m, K);\n  }\n\n  //! Compute the distance between matrix and query\n  template <size_t M>\n  auto distance(const void *m, const void *q, size_t dim, float *out) const ->\n      typename std::enable_if<K == M>::type {\n    distance_(m, q, dim, out);\n  }\n\n  //! Compute the distance between matrix and query\n  template <size_t M>\n  auto distance(const void *m, const void *q, size_t dim, float *out) const ->\n      typename std::enable_if<(K > M) && IsEqualPowerofTwo<M>::value>::type {\n    distance_tuple_.template distance<M>(m, q, dim, out);\n  }\n\n private:\n  FlatDistanceTuple<(K >> 1)> distance_tuple_{};\n  IndexMetric::MatrixDistance distance_{};\n};\n\n/*! Brute Force Distance Matrix\n */\ntemplate <size_t K, typename = void>\nclass FlatDistanceMatrix;\n\n/*! Brute Force Distance Matrix\n */\ntemplate <>\nclass FlatDistanceMatrix<1> {\n public:\n  //! Retrieve non-zero if all distances are valid.\n  bool is_valid(void) const {\n    return (!!distance_);\n  }\n\n  //! Initialize the distance tuple\n  void initialize(const IndexMetric &measure) {\n    distance_ = measure.distance_matrix(1, 1);\n  }\n\n  //! Compute the distance between matrix and query\n  template <size_t M, size_t N = 1u>\n  auto distance(const void *m, const void *q, size_t dim, float *out) const ->\n      typename std::enable_if<M == 1u && N == 1u>::type {\n    distance_(m, q, dim, out);\n  }\n\n private:\n  IndexMetric::MatrixDistance distance_{};\n};\n\n/*! Brute Force Distance Matrix\n */\ntemplate <size_t K>\nclass FlatDistanceMatrix<\n    K, typename std::enable_if<IsEqualPowerofTwo<K>::value>::type> {\n public:\n  //! Retrieve non-zero if all distances are valid.\n  bool is_valid(void) const {\n    return (tuple_h_.is_valid() && tuple_v_.is_valid());\n  }\n\n  //! Retrieve non-zero if a distance is valid.\n  bool is_valid(size_t m, size_t n) const {\n    return (m == K ? tuple_h_.is_valid(n)\n                   : (m < K && n == 1 ? tuple_v_.is_valid(m) : false));\n  }\n\n  //! Initialize the distance tuple\n  void initialize(const IndexMetric &measure) {\n    tuple_h_.initialize(measure, K);\n    tuple_v_.initialize(measure);\n  }\n\n  //! Compute the distance between matrix and query\n  template <size_t M, size_t N>\n  auto distance(const void *m, const void *q, size_t dim, float *out) const ->\n      typename std::enable_if<(K == M) && (K >= N)>::type {\n    tuple_h_.template distance<N>(m, q, dim, out);\n  }\n\n  //! Compute the distance between matrix and query\n  template <size_t M, size_t N = 1u>\n  auto distance(const void *m, const void *q, size_t dim, float *out) const ->\n      typename std::enable_if<(K > M) && (N == 1u)>::type {\n    tuple_v_.template distance<M>(m, q, dim, out);\n  }\n\n private:\n  FlatDistanceTuple<K> tuple_h_{};\n  FlatDistanceTuple<(K >> 1)> tuple_v_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_index_format.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <ailego/container/bitmap.h>\n\nnamespace zvec {\nnamespace core {\n\nusing node_id_t = uint32_t;\nusing key_t = uint64_t;\nusing level_t = int32_t;\nusing dist_t = float;\nusing TopkHeap = ailego::KeyValueHeap<node_id_t, dist_t>;\nusing CandidateHeap =\n    ailego::KeyValueHeap<node_id_t, dist_t, std::greater<dist_t>>;\nconstexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);\nconstexpr key_t kInvalidKey = static_cast<key_t>(-1);\n\n/*! Index Format of Linear Index Header\n */\nstruct LinearIndexHeader {\n  LinearIndexHeader()\n      : header_size(0),\n        total_vector_count(0),\n        linear_body_size(0),\n        linear_list_count(0),\n        block_vector_count(0),\n        block_size(0),\n        block_count(0),\n        index_meta_size(0) {\n    memset(reserved_, 0, sizeof(reserved_));\n    memset(index_meta, 0, sizeof(index_meta));\n  }\n  uint32_t header_size{0};\n  uint32_t total_vector_count{0};\n  uint64_t linear_body_size{0};\n  uint32_t linear_list_count{0};\n  uint32_t block_vector_count{0};\n  uint32_t block_size{0};\n  uint32_t block_count{0};\n  uint32_t index_meta_size{0};\n  char reserved_[28] = {0};\n  char index_meta[0];\n};\n\n/*! Index Format of Linear Index Meta for each Linear list\n */\nstruct LinearListMeta {\n  LinearListMeta() : offset(0), block_count(0), vector_count(0), id_offset(0) {\n    memset(reserved_, 0, sizeof(reserved_));\n  }\n  uint64_t offset{0};\n  uint32_t block_count{0};\n  uint32_t vector_count{0};\n  uint32_t id_offset{0};\n  char reserved_[16] = {0};\n};\n\n/*! Index Format of Location in Linear Index for each vector\n */\nstruct LinearVecLocation {\n  LinearVecLocation(size_t off, bool col)\n      : offset(off), column_major(col), reserved(0u) {}\n\n  uint64_t offset : 48;       // feature offset in posting block segment\n  uint64_t column_major : 1;  // coloum major if true\n  uint64_t reserved : 15;\n};\n\n/*! Index Format of Integer Quantizer params for each linear list\n */\nstruct LinearIntegerQuantizerParams {\n  float scale{1.0};\n  float bias{0.0};\n};\n\n/*! Location of Vectors Block in Storage Segment\n */\nstruct BlockLocation {\n  uint32_t segment_id{0};\n  uint32_t block_index{0};\n};\n\n/*! The Header of a Block in Storage Segment\n */\nstruct BlockHeader {\n  BlockHeader() : vector_count(0u), column_major(0u), reserved(0u) {}\n  BlockLocation next;\n  uint16_t vector_count{0};\n  uint16_t column_major : 1;\n  uint16_t reserved : 15;\n};\n\nstruct DeletionMap {\n  void set(uint32_t index) {\n    bitset.set(index);\n  }\n\n  void reset(uint32_t index) {\n    bitset.reset(index);\n  }\n\n  bool test(uint32_t index) const {\n    return bitset.test(index);\n  }\n\n  bool is_dirty() const {\n    return bitset.test_any();\n  }\n\n  ailego::FixedBitset<32> bitset{};\n};\n\nstatic_assert(sizeof(DeletionMap) == 4, \"DeletionMap must be 4 bytes\");\n\n/*! Meta Information of Streamer Entity\n */\nstruct StreamerLinearMeta {\n  StreamerLinearMeta()\n      : create_time(0),\n        update_time(0),\n        revision_id(0),\n        segment_count(0),\n        segment_size(0) {\n    memset(reserved_, 0, sizeof(reserved_));\n  }\n  uint64_t create_time{0};\n  uint64_t update_time{0};\n  uint64_t revision_id{0};\n  uint32_t segment_count{0};\n  uint32_t segment_size{0};\n  uint8_t reserved_[32] = {0};\n  LinearIndexHeader header;\n};\n\n/*! Location of Vector in Storage Segment\n */\nstruct VectorLocation {\n  //! Constructor\n  VectorLocation(void)\n      : segment_id(0u), column_major(0u), reserved(0u), offset(0u) {}\n\n  //! Constructor\n  VectorLocation(uint32_t id, bool col, uint32_t off)\n      : segment_id(id), column_major(col), reserved(0u), offset(off) {}\n\n  uint32_t segment_id{0};\n  uint16_t column_major : 1;\n  uint16_t reserved : 15;\n  uint32_t offset{0};\n\n public:\n  bool operator==(const VectorLocation &other) const {\n    return segment_id == other.segment_id &&\n           column_major == other.column_major && offset == other.offset;\n  }\n};\n\n// static_assert(sizeof(VectorLocation) == sizeof(uint64_t),\n//               \"VectorLocation must be size of 8 bytes\");\n\nstruct KeyInfo {\n  KeyInfo(void) : centroid_idx(0u) {}\n  KeyInfo(uint32_t idx, const VectorLocation &loc)\n      : centroid_idx(idx), location(loc) {}\n  KeyInfo(VectorLocation loc) : location(loc) {}\n  uint32_t centroid_idx{0};\n  VectorLocation location;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_searcher.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"flat_searcher.h\"\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_searcher.h>\n#include \"flat_distance_matrix.h\"\n#include \"flat_searcher_context.h\"\n#include \"flat_searcher_provider.h\"\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\ntemplate <size_t BATCH_SIZE>\nIndexProvider::Pointer FlatSearcher<BATCH_SIZE>::create_provider(void) const {\n  std::lock_guard<std::mutex> lock(mapping_mutex_);\n\n  if (mapping_.empty()) {\n    auto mapping_segment = container_->get(FLAT_SEGMENT_MAPPING_SEG_ID);\n    if (!mapping_segment) {\n      LOG_ERROR(\"Failed to fetch segment %s\",\n                FLAT_SEGMENT_MAPPING_SEG_ID.c_str());\n      return nullptr;\n    }\n\n    if (mapping_segment->data_size() % sizeof(uint32_t) != 0) {\n      LOG_ERROR(\"Invalid data size %zu of mapping segment\",\n                mapping_segment->data_size());\n      return nullptr;\n    }\n\n    size_t mapping_count = mapping_segment->data_size() / sizeof(uint32_t);\n    if (mapping_count * meta_.element_size() !=\n        features_segment_->data_size()) {\n      LOG_ERROR(\"Invalid data size %zd of mapping segment\",\n                features_segment_->data_size());\n      return nullptr;\n    }\n\n    const uint32_t *mapping = nullptr;\n    if (mapping_segment->read(0, reinterpret_cast<const void **>(&mapping),\n                              mapping_segment->data_size()) !=\n        mapping_segment->data_size()) {\n      LOG_ERROR(\"Failed to read data (%zu bytes) from mapping segment\",\n                mapping_segment->data_size());\n      return nullptr;\n    }\n    mapping_.clear();\n    mapping_.reserve(mapping_count);\n    std::copy(mapping, mapping + mapping_count, std::back_inserter(mapping_));\n  }\n  return IndexProvider::Pointer(new (std::nothrow)\n                                    FlatSearcherProvider<BATCH_SIZE>(this));\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcher<BATCH_SIZE>::load(IndexStorage::Pointer cntr,\n                                   IndexMetric::Pointer measure) {\n  ailego::ElapsedTime stamp;\n  if (!cntr) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = IndexHelper::DeserializeFromStorage(cntr.get(), &meta_);\n  if (error_code != 0) {\n    LOG_ERROR(\n        \"Failed to deserialize index meta from container %s, error=%d, %s\",\n        cntr->name().c_str(), error_code, IndexError::What(error_code));\n    return error_code;\n  }\n\n  if (!measure) {\n    error_code = InitializeMetric(meta_, &measure_);\n    if (error_code != 0) {\n      LOG_ERROR(\"Failed to initialize index measure %s, error=%d, %s\",\n                meta_.metric_name().c_str(), error_code,\n                IndexError::What(error_code));\n      return error_code;\n    }\n    if (measure_->query_metric()) {\n      measure_ = measure_->query_metric();\n    }\n  } else {\n    if (!measure->is_matched(meta_)) {\n      LOG_ERROR(\n          \"The index measure is unmatched with index meta from container.\");\n      return IndexError_Mismatch;\n    }\n    measure_ = std::move(measure);\n  }\n\n  column_major_order_ = (meta_.major_order() == IndexMeta::MO_COLUMN);\n  distance_matrix_.initialize(*measure_);\n\n  if (column_major_order_) {\n    if (!distance_matrix_.is_valid()) {\n      LOG_ERROR(\"Lack of distance functions to support column index.\");\n      return IndexError_Unsupported;\n    }\n  } else {\n    if (!distance_matrix_.is_valid(1, 1)) {\n      LOG_ERROR(\"Lack of distance functions to support row index.\");\n      return IndexError_Unsupported;\n    }\n  }\n\n  auto keys_segment = cntr->get(FLAT_SEGMENT_KEYS_SEG_ID);\n  if (!keys_segment) {\n    LOG_ERROR(\"Failed to fetch segment %s\", FLAT_SEGMENT_KEYS_SEG_ID.c_str());\n    return IndexError_NoExist;\n  }\n  features_segment_ = cntr->get(FLAT_SEGMENT_FEATURES_SEG_ID);\n  if (!features_segment_) {\n    LOG_ERROR(\"Failed to fetch segment %s\", FLAT_SEGMENT_KEYS_SEG_ID.c_str());\n    return IndexError_NoExist;\n  }\n\n  if (keys_segment->data_size() % sizeof(uint64_t) != 0) {\n    LOG_ERROR(\"Invalid data size %zu of keys segment\",\n              keys_segment->data_size());\n    return IndexError_InvalidLength;\n  }\n\n  size_t keys_count = keys_segment->data_size() / sizeof(uint64_t);\n  if (keys_count * meta_.element_size() != features_segment_->data_size()) {\n    LOG_ERROR(\"Invalid data size %zd of features segment\",\n              features_segment_->data_size());\n    return IndexError_Mismatch;\n  }\n\n  if (keys_segment->read(0, reinterpret_cast<const void **>(&keys_),\n                         keys_segment->data_size()) !=\n      keys_segment->data_size()) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from keys segment\",\n              keys_segment->data_size());\n    return IndexError_ReadData;\n  }\n\n  for (size_t i = 0; i < keys_count; i++) {\n    key_id_mapping_[keys_[i]] = i;\n  }\n\n  container_ = cntr;\n  magic_ = IndexContext::GenerateMagic();\n  stats_.set_loaded_count(keys_count);\n  stats_.set_loaded_costtime(stamp.milli_seconds());\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcher<BATCH_SIZE>::search_impl(const void *query,\n                                          const IndexQueryMeta &qmeta,\n                                          Context::Pointer &context) const {\n  ailego_assert(query && !!context);\n  ailego_assert(measure_->is_matched(meta_, qmeta));\n\n  FlatSearcherContext<BATCH_SIZE> *bf_context =\n      dynamic_cast<FlatSearcherContext<BATCH_SIZE> *>(context.get());\n  if (!bf_context) {\n    LOG_ERROR(\"Invalid brute-force searcher context\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_context->magic() != magic_) {\n    bf_context->reset(this);\n  }\n  if (bf_context->group_by_search()) {\n    return bf_context->group_by_search_impl(query, qmeta, 1);\n  } else {\n    return (column_major_order_ ? bf_context->search_column(query, qmeta)\n                                : bf_context->search_row(query, qmeta));\n  }\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcher<BATCH_SIZE>::search_impl(const void *query,\n                                          const IndexQueryMeta &qmeta,\n                                          uint32_t count,\n                                          Context::Pointer &context) const {\n  ailego_assert(query && count && !!context);\n  ailego_assert(measure_->is_matched(meta_, qmeta));\n\n  FlatSearcherContext<BATCH_SIZE> *bf_context =\n      dynamic_cast<FlatSearcherContext<BATCH_SIZE> *>(context.get());\n  if (!bf_context) {\n    LOG_ERROR(\"Invalid brute-force searcher context\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_context->magic() != magic_) {\n    bf_context->reset(this);\n  }\n\n  if (bf_context->group_by_search()) {\n    return bf_context->group_by_search_impl(query, qmeta, count);\n  } else {\n    return (column_major_order_ ? bf_context->search_column(query, qmeta, count)\n                                : bf_context->search_row(query, qmeta, count));\n  }\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcher<BATCH_SIZE>::search_bf_by_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  ailego_assert(query && count && !!context);\n  ailego_assert(measure_->is_matched(meta_, qmeta));\n\n  if (ailego_unlikely(p_keys.size() != count)) {\n    LOG_ERROR(\"The size of p_keys is not equal to count\");\n    return IndexError_InvalidArgument;\n  }\n\n  FlatSearcherContext<BATCH_SIZE> *bf_context =\n      dynamic_cast<FlatSearcherContext<BATCH_SIZE> *>(context.get());\n  if (!bf_context) {\n    LOG_ERROR(\"Invalid brute-force searcher context\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_context->magic() != magic_) {\n    bf_context->reset(this);\n  }\n\n  return bf_context->search_bf_by_p_keys_impl(query, p_keys, qmeta, count);\n}\n\ntemplate <size_t BATCH_SIZE>\nIndexSearcher::Context::Pointer FlatSearcher<BATCH_SIZE>::create_context(\n    void) const {\n  return IndexSearcher::Context::Pointer(\n      new FlatSearcherContext<BATCH_SIZE>(this));\n}\n\nINDEX_FACTORY_REGISTER_SEARCHER_ALIAS(LinearSearcher, FlatSearcher<32>);\nINDEX_FACTORY_REGISTER_SEARCHER_ALIAS(FlatSearcher, FlatSearcher<32>);\nINDEX_FACTORY_REGISTER_SEARCHER_ALIAS(FlatSearcher16, FlatSearcher<16>);\nINDEX_FACTORY_REGISTER_SEARCHER_ALIAS(FlatSearcher32, FlatSearcher<32>);\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_searcher.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n#include <unordered_map>\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_searcher.h>\n#include \"flat_distance_matrix.h\"\n#include \"flat_index_format.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Flat Searcher\n */\ntemplate <size_t BATCH_SIZE>\nclass FlatSearcher : public IndexSearcher {\n public:\n  //! Destructor\n  virtual ~FlatSearcher(void) = default;\n\n  //! Initialize Searcher\n  int init(const ailego::Params &index_params) override {\n    params_ = index_params;\n    read_block_size_ = FLAT_DEFAULT_READ_BLOCK_SIZE;\n    index_params.get(PARAM_FLAT_READ_BLOCK_SIZE, &read_block_size_);\n    return 0;\n  }\n\n  //! Cleanup Searcher\n  int cleanup(void) override {\n    return this->unload();\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer cntr, IndexMetric::Pointer measure) override;\n\n  //! Unload index\n  int unload(void) override {\n    container_ = nullptr;\n    measure_ = nullptr;\n    features_segment_ = nullptr;\n    keys_ = nullptr;\n    key_id_mapping_.clear();\n    return 0;\n  }\n\n  //! Similarity brute force search\n  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                     Context::Pointer &context) const override {\n    return this->search_impl(query, qmeta, context);\n  }\n\n  //! Similarity brute force search\n  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                     uint32_t count, Context::Pointer &context) const override {\n    return this->search_impl(query, qmeta, count, context);\n  }\n\n  //! Similarity search\n  int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                  Context::Pointer &context) const override;\n\n  //! Similarity search\n  int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::Pointer &context) const override;\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const void *query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta,\n                               Context::Pointer &context) const override {\n    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const void *query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta, uint32_t count,\n                               Context::Pointer &context) const override;\n\n  //! Retrieve statistics\n  const IndexSearcher::Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  //! Retrieve params of index\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Create a searcher context\n  IndexSearcher::Context::Pointer create_context(void) const override;\n\n  //! Create a searcher provider\n  IndexProvider::Pointer create_provider(void) const override;\n\n  //! Retrieve magic number\n  uint32_t magic(void) const {\n    return magic_;\n  }\n\n  //! Retrieve block size of data read\n  uint32_t read_block_size(void) const {\n    return read_block_size_;\n  }\n\n  //! Retrieve primary key via index id\n  uint64_t key(size_t i) const {\n    return keys_[i];\n  }\n\n  // Retrieve index id via primary key\n  node_id_t get_id(key_t key) const {\n    auto it = key_id_mapping_.find(key);\n    if (it != key_id_mapping_.end()) {\n      return it->second;\n    } else {\n      return kInvalidNodeId;\n    }\n  }\n\n  //! Retrieve primary key via index id\n  uint32_t local_index(size_t i) const {\n    return mapping_[i];\n  }\n\n  //! Retrieve primary key via index id\n  inline bool column_major_order(void) const {\n    return column_major_order_;\n  }\n\n  //! Retrieve the distance matrix\n  const FlatDistanceMatrix<BATCH_SIZE> &distance_matrix(void) const {\n    return distance_matrix_;\n  }\n\n  //! Clone a features segment\n  IndexStorage::Segment::Pointer clone_features_segment(void) const {\n    return features_segment_->clone();\n  }\n\n  const void *get_vector(key_t key) const override {\n    auto provider = this->create_provider();\n    return provider->get_vector(key);\n  }\n\n private:\n  //! Members\n  const uint64_t *keys_{nullptr};\n  std::unordered_map<key_t, node_id_t> key_id_mapping_;\n  uint32_t magic_{IndexContext::GenerateMagic()};\n  uint32_t read_block_size_{FLAT_DEFAULT_READ_BLOCK_SIZE};\n  bool column_major_order_{false};\n  IndexMeta meta_{};\n  IndexStorage::Pointer container_{};\n  IndexMetric::Pointer measure_{};\n  ailego::Params params_{};\n  IndexStorage::Segment::Pointer features_segment_{};\n  mutable std::vector<uint32_t> mapping_{};\n  mutable std::mutex mapping_mutex_{};\n  FlatDistanceMatrix<BATCH_SIZE> distance_matrix_{};\n  IndexSearcher::Stats stats_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_searcher_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_document.h>\n#include <zvec/core/framework/index_error.h>\n#include \"flat_index_format.h\"\n#include \"flat_searcher.h\"\n#include \"flat_utility.h\"\n\n\nnamespace zvec {\nnamespace core {\n\n/*! Brute Force Searcher Context\n */\ntemplate <size_t BATCH_SIZE>\nclass FlatSearcherContext : public IndexSearcher::Context {\n public:\n  //! Constructor\n  FlatSearcherContext(const FlatSearcher<BATCH_SIZE> *owner) {\n    this->reset(owner);\n  }\n\n  //! Destructor\n  virtual ~FlatSearcherContext(void) {}\n\n  //! Set topk of search result\n  void set_topk(uint32_t topk) override {\n    topk_ = topk;\n  }\n\n  //! Retrieve search result\n  const IndexDocumentList &result(void) const override {\n    return result_heaps_.at(0);\n  }\n\n  //! Retrieve search result with index\n  const IndexDocumentList &result(size_t index) const override {\n    return result_heaps_.at(index);\n  }\n\n  //! Retrieve result object for output\n  IndexDocumentList *mutable_result(size_t idx) override {\n    return &result_heaps_.at(idx);\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(void) const override {\n    return group_results_[0];\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(\n      size_t idx) const override {\n    return group_results_[idx];\n  }\n\n  //! Update the parameters of context\n  int update(const ailego::Params & /*params*/) override {\n    return 0;\n  }\n\n  //! Retrieve magic number\n  uint32_t magic(void) const override {\n    return magic_;\n  }\n\n  //! Get group topk\n  inline uint32_t group_topk() const {\n    return group_topk_;\n  }\n\n  //! Get group num\n  inline uint32_t group_num() const {\n    return group_num_;\n  }\n\n  inline std::map<std::string, TopkHeap> &group_topk_heaps() {\n    return group_topk_heaps_;\n  }\n\n  void set_fetch_vector(bool v) override {\n    fetch_vector_ = v;\n  }\n\n  bool fetch_vector() const override {\n    return fetch_vector_;\n  }\n\n  inline void resize_group_results(size_t size) {\n    if (group_by_search()) {\n      group_results_.resize(size);\n    }\n  }\n\n  void topk_to_group_result(uint32_t idx) {\n    ailego_assert_with(idx < group_results_.size(), \"invalid idx\");\n\n    group_results_[idx].clear();\n\n    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;\n    std::vector<std::pair<std::string, float>> best_score_in_groups;\n    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();\n         itr++) {\n      const std::string &group_id = (*itr).first;\n      auto &heap = (*itr).second;\n      heap.sort();\n\n      if (heap.size() > 0) {\n        float best_score = heap[0].second;\n        best_score_in_groups.push_back(std::make_pair(group_id, best_score));\n      }\n    }\n\n    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),\n              [](const std::pair<std::string, float> &a,\n                 const std::pair<std::string, float> &b) -> int {\n                return a.second < b.second;\n              });\n\n    // truncate to group num\n    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();\n         ++i) {\n      const std::string &group_id = best_score_in_groups[i].first;\n\n      group_topk_list.emplace_back(\n          std::make_pair(group_id, group_topk_heaps_[group_id]));\n    }\n\n    group_results_[idx].resize(group_topk_list.size());\n\n    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {\n      const std::string &group_id = group_topk_list[i].first;\n      group_results_[idx][i].set_group_id(group_id);\n\n      uint32_t size = std::min(\n          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));\n\n      for (uint32_t j = 0; j < size; ++j) {\n        auto score = group_topk_list[i].second[j].second;\n        if (score > this->threshold()) {\n          break;\n        }\n\n        node_id_t id = group_topk_list[i].second[j].first;\n\n        auto provider = owner_->create_provider();\n\n        if (fetch_vector_) {\n          group_results_[idx][i].mutable_docs()->emplace_back(\n              id, score, id, provider->get_vector(id));\n        } else {\n          group_results_[idx][i].mutable_docs()->emplace_back(id, score, id);\n        }\n      }\n    }\n  }\n\n  //! Get if group by search\n  bool group_by_search() {\n    return group_num_ > 0;\n  }\n\n  //! Set group params\n  void set_group_params(uint32_t group_num, uint32_t group_topk) override {\n    group_num_ = group_num;\n    group_topk_ = group_topk;\n    group_topk_heaps_.clear();\n  }\n\n  void reset() override {}\n\n  //! Reset the context\n  void reset(const FlatSearcher<BATCH_SIZE> *owner) {\n    magic_ = owner->magic();\n    feature_size_ = owner->meta().element_size();\n\n    uint32_t block_size = feature_size_ * BATCH_SIZE;\n    actual_read_size_ =\n        (owner->read_block_size() + block_size - 1) / block_size * block_size;\n    features_segment_ = owner->clone_features_segment();\n    owner_ = owner;\n  }\n\n  //! Similarity search\n  int search_row(const void *query, const IndexQueryMeta &qmeta) {\n    return (this->filter().is_valid()\n                ? this->search_row_filter(query, qmeta)\n                : this->search_row_nofilter(query, qmeta));\n  }\n\n  //! Similarity search\n  int search_row(const void *query, const IndexQueryMeta &qmeta, size_t count) {\n    return (this->filter().is_valid()\n                ? this->batch_search_row_filter(query, qmeta, count)\n                : this->batch_search_row_nofilter(query, qmeta, count));\n  }\n\n  //! Similarity search\n  int search_column(const void *query, const IndexQueryMeta &qmeta) {\n    return (this->filter().is_valid()\n                ? this->search_column_filter(query, qmeta)\n                : this->search_column_nofilter(query, qmeta));\n  }\n\n  //! Similarity search\n  int search_column(const void *query, const IndexQueryMeta &qmeta,\n                    size_t count) {\n    return (this->filter().is_valid()\n                ? this->batch_search_column_filter(query, qmeta, count)\n                : this->batch_search_column_nofilter(query, qmeta, count));\n  }\n\n  int group_by_search_impl(const void *query, const IndexQueryMeta &qmeta,\n                           uint32_t count);\n\n  int search_bf_by_p_keys_impl(const void *query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta, uint32_t count);\n\n protected:\n  //! Enqueue items into the search heaps (without filter)\n  template <size_t K>\n  auto batch_enqueue_nofilter(const void *block, size_t block_index,\n                              size_t query_index, const IndexQueryMeta &qmeta,\n                              size_t query_count) ->\n      typename std::enable_if<K != 1 && IsEqualPowerofTwo<K>::value>::type {\n    size_t query_batch_count = query_count / K;\n\n    for (size_t i = 0; i != query_batch_count; ++i) {\n      owner_->distance_matrix().template distance<BATCH_SIZE, K>(\n          block, &batch_queries_[query_index * qmeta.element_size()],\n          qmeta.dimension(), scores_);\n\n      for (size_t k = 0; k != K; ++k) {\n        IndexDocumentHeap *heap = &result_heaps_[query_index++];\n        for (size_t j = 0; j != BATCH_SIZE; ++j) {\n          heap->emplace(0, scores_[k * BATCH_SIZE + j], block_index + j);\n        }\n      }  // end of for\n    }  // end of for\n\n    size_t query_left_count = query_count % K;\n    if (query_left_count != 0) {\n      this->batch_enqueue_nofilter<(K >> 1)>(block, block_index, query_index,\n                                             qmeta, query_left_count);\n    }\n  }\n\n  //! Enqueue items into the search heaps (without filter)\n  template <size_t K>\n  auto batch_enqueue_nofilter(const void *block, size_t block_index,\n                              size_t query_index, const IndexQueryMeta &qmeta,\n                              size_t query_count) ->\n      typename std::enable_if<K == 1>::type {\n    ailego_assert(query_count == 1);\n    (void)query_count;\n\n    owner_->distance_matrix().template distance<BATCH_SIZE, 1>(\n        block, &batch_queries_[query_index * qmeta.element_size()],\n        qmeta.dimension(), scores_);\n\n    IndexDocumentHeap *heap = &result_heaps_[query_index];\n    for (size_t i = 0; i != BATCH_SIZE; ++i) {\n      heap->emplace(0, scores_[i], block_index + i);\n    }\n  }\n\n  //! Enqueue items into the search heaps (with filter)\n  template <size_t K>\n  auto batch_enqueue_filter(const void *block, size_t block_index,\n                            size_t block_mask, size_t query_index,\n                            const IndexQueryMeta &qmeta, size_t query_count) ->\n      typename std::enable_if<K != 1 && IsEqualPowerofTwo<K>::value>::type {\n    size_t query_batch_count = query_count / K;\n\n    for (size_t i = 0; i != query_batch_count; ++i) {\n      owner_->distance_matrix().template distance<BATCH_SIZE, K>(\n          block, &batch_queries_[query_index * qmeta.element_size()],\n          qmeta.dimension(), scores_);\n\n      for (size_t k = 0; k != K; ++k) {\n        IndexDocumentHeap *heap = &result_heaps_[query_index++];\n        for (size_t j = 0; j != BATCH_SIZE; ++j) {\n          if ((block_mask & (1 << j)) != 0) {\n            heap->emplace(0, scores_[k * BATCH_SIZE + j], block_index + j);\n          }\n        }\n      }  // end of for\n    }  // end of for\n\n    size_t query_left_count = query_count % K;\n    if (query_left_count != 0) {\n      this->batch_enqueue_filter<(K >> 1)>(\n          block, block_index, block_mask, query_index, qmeta, query_left_count);\n    }\n  }\n\n  //! Enqueue items into the search heaps (with filter)\n  template <size_t K>\n  auto batch_enqueue_filter(const void *block, size_t block_index,\n                            size_t block_mask, size_t query_index,\n                            const IndexQueryMeta &qmeta, size_t query_count) ->\n      typename std::enable_if<K == 1>::type {\n    ailego_assert(query_count == 1);\n    (void)query_count;\n\n    owner_->distance_matrix().template distance<BATCH_SIZE, 1>(\n        block, &batch_queries_[query_index * qmeta.element_size()],\n        qmeta.dimension(), scores_);\n\n    IndexDocumentHeap *heap = &result_heaps_[query_index];\n    for (size_t i = 0; i != BATCH_SIZE; ++i) {\n      if ((block_mask & (1 << i)) != 0) {\n        heap->emplace(0, scores_[i], block_index + i);\n      }\n    }\n  }\n\n  //! Enqueue items into the search heaps (without filter)\n  template <size_t K>\n  auto single_enqueue_nofilter(const void *feature, size_t feature_index,\n                               size_t query_index, const IndexQueryMeta &qmeta,\n                               size_t query_count) ->\n      typename std::enable_if<K != 1 && IsEqualPowerofTwo<K>::value>::type {\n    size_t query_batch_count = query_count / K;\n\n    for (size_t i = 0; i != query_batch_count; ++i) {\n      owner_->distance_matrix().template distance<K, 1>(\n          &batch_queries_[query_index * qmeta.element_size()], feature,\n          qmeta.dimension(), scores_);\n\n      for (size_t k = 0; k != K; ++k) {\n        result_heaps_[query_index++].emplace(0, scores_[k], feature_index);\n      }\n    }\n    size_t query_left_count = query_count % K;\n    if (query_left_count != 0) {\n      this->single_enqueue_nofilter<(K >> 1)>(\n          feature, feature_index, query_index, qmeta, query_left_count);\n    }\n  }\n\n  //! Enqueue items into the search heaps (without filter)\n  template <size_t K>\n  auto single_enqueue_nofilter(const void *feature, size_t feature_index,\n                               size_t query_index, const IndexQueryMeta &qmeta,\n                               size_t query_count) ->\n      typename std::enable_if<K == 1>::type {\n    ailego_assert(query_count == 1);\n    (void)query_count;\n\n    owner_->distance_matrix().template distance<1>(\n        feature, &batch_queries_[query_index * qmeta.element_size()],\n        qmeta.dimension(), scores_);\n    result_heaps_[query_index].emplace(0, scores_[0], feature_index);\n  }\n\n protected:\n  //! Similarity search (1 column without filter)\n  int search_column_nofilter(const void *query, const IndexQueryMeta &qmeta);\n\n  //! Similarity search (1 column with filter)\n  int search_column_filter(const void *query, const IndexQueryMeta &qmeta);\n\n  //! Similarity search (1 row without filter)\n  int search_row_nofilter(const void *query, const IndexQueryMeta &qmeta);\n\n  //! Similarity search (1 row with filter)\n  int search_row_filter(const void *query, const IndexQueryMeta &qmeta);\n\n  //! Similarity search (n columns without filter)\n  int batch_search_column_nofilter(const void *query,\n                                   const IndexQueryMeta &qmeta,\n                                   size_t query_count);\n\n  //! Similarity search (n columns with filter)\n  int batch_search_column_filter(const void *query, const IndexQueryMeta &qmeta,\n                                 size_t query_count);\n\n  //! Similarity search (n rows without filter)\n  int batch_search_row_nofilter(const void *query, const IndexQueryMeta &qmeta,\n                                size_t query_count);\n\n  //! Similarity search (n rows with filter)\n  int batch_search_row_filter(const void *query, const IndexQueryMeta &qmeta,\n                              size_t query_count);\n\n private:\n  const FlatSearcher<BATCH_SIZE> *owner_{nullptr};\n  uint32_t magic_{0};\n  uint32_t topk_{0};\n  uint32_t feature_size_{0};\n  uint32_t actual_read_size_{0};\n  IndexStorage::Segment::Pointer features_segment_{};\n  std::vector<IndexDocumentHeap> result_heaps_{1};\n  std::string batch_queries_{};\n  float scores_[BATCH_SIZE * BATCH_SIZE];\n  bool fetch_vector_{false};\n\n  // group\n  uint32_t group_num_{0}, group_topk_{0};\n  std::map<std::string, TopkHeap> group_topk_heaps_{};\n  std::vector<IndexGroupDocumentList> group_results_{};\n};\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::search_column_nofilter(\n    const void *query, const IndexQueryMeta &qmeta) {\n  IndexDocumentHeap *heap = &result_heaps_[0];\n  heap->clear();\n  heap->limit(topk_);\n  heap->set_threshold(this->threshold());\n\n  size_t left_size = features_segment_->data_size();\n  size_t block_size = feature_size_ * BATCH_SIZE;\n  size_t read_offset = 0;\n  size_t feature_index = 0;\n  auto matrix = this->owner_->distance_matrix();\n\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {\n      matrix.template distance<BATCH_SIZE, 1>(\n          (const char *)data + offset, query, qmeta.dimension(), scores_);\n\n      for (size_t i = 0; i != BATCH_SIZE; ++i) {\n        heap->emplace(0, scores_[i], feature_index++);\n      }\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  // Process left block features\n  size_t left_size_aligned = left_size / block_size * block_size;\n  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {\n    matrix.template distance<BATCH_SIZE, 1>((const char *)data + offset, query,\n                                            qmeta.dimension(), scores_);\n\n    for (size_t i = 0; i != BATCH_SIZE; ++i) {\n      heap->emplace(0, scores_[i], feature_index++);\n    }\n  }\n\n  // Process left single features\n  for (size_t offset = left_size_aligned; offset < left_size;\n       offset += feature_size_) {\n    float score;\n    matrix.template distance<1>((const char *)data + offset, query,\n                                qmeta.dimension(), &score);\n    heap->emplace(0, score, feature_index++);\n  }\n\n  for (auto &it : *heap) {\n    it.set_key(owner_->key(it.index()));\n  }\n  heap->sort();\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::search_column_filter(\n    const void *query, const IndexQueryMeta &qmeta) {\n  IndexDocumentHeap *heap = &result_heaps_[0];\n  heap->clear();\n  heap->limit(topk_);\n  heap->set_threshold(this->threshold());\n\n  size_t left_size = features_segment_->data_size();\n  size_t block_size = feature_size_ * BATCH_SIZE;\n  size_t read_offset = 0;\n  size_t feature_index = 0;\n  auto matrix = owner_->distance_matrix();\n\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {\n      matrix.template distance<BATCH_SIZE, 1>(\n          (const char *)data + offset, query, qmeta.dimension(), scores_);\n\n      for (size_t i = 0; i != BATCH_SIZE; ++i) {\n        uint64_t feature_key = owner_->key(feature_index);\n\n        if (!this->filter()(feature_key)) {\n          if (group_by_search()) {\n          }\n          heap->emplace(feature_key, scores_[i], feature_index);\n        }\n        feature_index += 1;\n      }\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  // Process left block features\n  size_t left_size_aligned = left_size / block_size * block_size;\n  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {\n    matrix.template distance<BATCH_SIZE, 1>((const char *)data + offset, query,\n                                            qmeta.dimension(), scores_);\n\n    for (size_t i = 0; i != BATCH_SIZE; ++i) {\n      uint64_t feature_key = owner_->key(feature_index);\n\n      if (!this->filter()(feature_key)) {\n        heap->emplace(feature_key, scores_[i], feature_index);\n      }\n      feature_index += 1;\n    }\n  }\n\n  // Process left single features\n  for (size_t offset = left_size_aligned; offset < left_size;\n       offset += feature_size_) {\n    uint64_t feature_key = owner_->key(feature_index);\n    if (!this->filter()(feature_key)) {\n      float score;\n      matrix.template distance<1>((const char *)data + offset, query,\n                                  qmeta.dimension(), &score);\n      heap->emplace(feature_key, score, feature_index);\n    }\n    feature_index += 1;\n  }\n  heap->sort();\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::search_row_nofilter(\n    const void *query, const IndexQueryMeta &qmeta) {\n  IndexDocumentHeap *heap = &result_heaps_[0];\n  heap->clear();\n  heap->limit(topk_);\n  heap->set_threshold(this->threshold());\n\n  size_t left_size = features_segment_->data_size();\n  size_t read_offset = 0;\n  size_t feature_index = 0;\n  auto matrix = owner_->distance_matrix();\n\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_;\n         offset += feature_size_) {\n      float score;\n      matrix.template distance<1>((const char *)data + offset, query,\n                                  qmeta.dimension(), &score);\n      heap->emplace(0, score, feature_index++);\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  for (size_t offset = 0; offset < left_size; offset += feature_size_) {\n    float score;\n    matrix.template distance<1>((const char *)data + offset, query,\n                                qmeta.dimension(), &score);\n    heap->emplace(0, score, feature_index++);\n  }\n  for (auto &it : *heap) {\n    it.set_key(owner_->key(it.index()));\n  }\n  heap->sort();\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::search_row_filter(\n    const void *query, const IndexQueryMeta &qmeta) {\n  IndexDocumentHeap *heap = &result_heaps_[0];\n  heap->clear();\n  heap->limit(topk_);\n  heap->set_threshold(this->threshold());\n\n  size_t left_size = features_segment_->data_size();\n  size_t read_offset = 0;\n  size_t feature_index = 0;\n  auto matrix = owner_->distance_matrix();\n\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_;\n         offset += feature_size_) {\n      uint64_t feature_key = owner_->key(feature_index);\n      if (!this->filter()(feature_key)) {\n        float score;\n        matrix.template distance<1>((const char *)data + offset, query,\n                                    qmeta.dimension(), &score);\n        heap->emplace(feature_key, score, feature_index);\n      }\n      feature_index += 1;\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  for (size_t offset = 0; offset < left_size; offset += feature_size_) {\n    uint64_t feature_key = owner_->key(feature_index);\n    if (!this->filter()(feature_key)) {\n      float score;\n      matrix.template distance<1>((const char *)data + offset, query,\n                                  qmeta.dimension(), &score);\n      heap->emplace(feature_key, score, feature_index);\n    }\n    feature_index += 1;\n  }\n  heap->sort();\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::batch_search_column_nofilter(\n    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {\n  // Initialize resources\n  result_heaps_.resize(query_count);\n  for (auto &heap : result_heaps_) {\n    heap.clear();\n    heap.limit(topk_);\n    heap.set_threshold(this->threshold());\n  }\n\n  // Transpose queries\n  batch_queries_.clear();\n  batch_queries_.reserve(query_count * qmeta.element_size());\n  TransposeQueries<BATCH_SIZE>(query, qmeta, query_count, &batch_queries_);\n\n  size_t left_size = features_segment_->data_size();\n  size_t block_size = feature_size_ * BATCH_SIZE;\n  size_t read_offset = 0;\n  size_t block_index = 0;\n\n  // Process feature blocks\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {\n      this->batch_enqueue_nofilter<BATCH_SIZE>(\n          (const char *)data + offset, block_index, 0, qmeta, query_count);\n      block_index += BATCH_SIZE;\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  // Process left block features\n  size_t left_size_aligned = left_size / block_size * block_size;\n  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {\n    this->batch_enqueue_nofilter<BATCH_SIZE>(\n        (const char *)data + offset, block_index, 0, qmeta, query_count);\n    block_index += BATCH_SIZE;\n  }\n\n  // Process left single features\n  for (size_t offset = left_size_aligned; offset < left_size;\n       offset += feature_size_) {\n    this->single_enqueue_nofilter<BATCH_SIZE>(\n        (const char *)data + offset, block_index, 0, qmeta, query_count);\n    block_index += 1;\n  }\n\n  // Normalize results\n  for (auto &heap : result_heaps_) {\n    for (auto &it : heap) {\n      it.set_key(owner_->key(it.index()));\n    }\n    heap.sort();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::batch_search_column_filter(\n    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {\n  // Initialize resources\n  result_heaps_.resize(query_count);\n  for (auto &heap : result_heaps_) {\n    heap.clear();\n    heap.limit(topk_);\n    heap.set_threshold(this->threshold());\n  }\n\n  // Transpose queries\n  batch_queries_.clear();\n  batch_queries_.reserve(query_count * qmeta.element_size());\n  TransposeQueries<BATCH_SIZE>(query, qmeta, query_count, &batch_queries_);\n\n  size_t left_size = features_segment_->data_size();\n  size_t block_size = feature_size_ * BATCH_SIZE;\n  size_t read_offset = 0;\n  size_t block_index = 0;\n\n  // Process feature blocks\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {\n      size_t block_mask = 0;\n      for (size_t i = 0; i != BATCH_SIZE; ++i) {\n        if (!this->filter()(this->owner_->key(block_index + i))) {\n          block_mask |= (1 << i);\n        }\n      }\n      if (block_mask != 0) {\n        this->batch_enqueue_filter<BATCH_SIZE>((const char *)data + offset,\n                                               block_index, block_mask, 0,\n                                               qmeta, query_count);\n      }\n      block_index += BATCH_SIZE;\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  // Process left block features\n  size_t left_size_aligned = left_size / block_size * block_size;\n  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {\n    size_t block_mask = 0;\n    for (size_t i = 0; i != BATCH_SIZE; ++i) {\n      if (!this->filter()(this->owner_->key(block_index + i))) {\n        block_mask |= (1 << i);\n      }\n    }\n    if (block_mask != 0) {\n      this->batch_enqueue_filter<BATCH_SIZE>((const char *)data + offset,\n                                             block_index, block_mask, 0, qmeta,\n                                             query_count);\n    }\n    block_index += BATCH_SIZE;\n  }\n\n  // Process left single features\n  for (size_t offset = left_size_aligned; offset < left_size;\n       offset += feature_size_) {\n    if (!this->filter()(owner_->key(block_index))) {\n      this->single_enqueue_nofilter<BATCH_SIZE>(\n          (const char *)data + offset, block_index, 0, qmeta, query_count);\n    }\n    block_index += 1;\n  }\n\n  // Normalize results\n  for (auto &heap : result_heaps_) {\n    for (auto &it : heap) {\n      it.set_key(owner_->key(it.index()));\n    }\n    heap.sort();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::batch_search_row_nofilter(\n    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {\n  // Initialize resources\n  result_heaps_.resize(query_count);\n  for (auto &heap : result_heaps_) {\n    heap.clear();\n    heap.limit(topk_);\n    heap.set_threshold(this->threshold());\n  }\n\n  size_t left_size = features_segment_->data_size();\n  size_t read_offset = 0;\n  size_t feature_index = 0;\n  auto matrix = owner_->distance_matrix();\n\n  // Process feature blocks\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_;\n         offset += feature_size_) {\n      size_t query_offset = 0;\n      const void *feature = (const char *)data + offset;\n\n      for (auto &heap : result_heaps_) {\n        float score;\n        matrix.template distance<1>(feature, (const char *)query + query_offset,\n                                    qmeta.dimension(), &score);\n        heap.emplace(0, score, feature_index);\n        query_offset += qmeta.element_size();\n      }\n      feature_index += 1;\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  // Process left features\n  for (size_t offset = 0; offset < left_size; offset += feature_size_) {\n    size_t query_offset = 0;\n    const void *feature = (const char *)data + offset;\n\n    for (auto &heap : result_heaps_) {\n      float score;\n      matrix.template distance<1>(feature, (const char *)query + query_offset,\n                                  qmeta.dimension(), &score);\n      heap.emplace(0, score, feature_index);\n      query_offset += qmeta.element_size();\n    }\n    feature_index += 1;\n  }\n\n  // Normalize results\n  for (auto &heap : result_heaps_) {\n    for (auto &it : heap) {\n      it.set_key(owner_->key(it.index()));\n    }\n    heap.sort();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::batch_search_row_filter(\n    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {\n  // Initialize resources\n  result_heaps_.resize(query_count);\n  for (auto &heap : result_heaps_) {\n    heap.clear();\n    heap.limit(topk_);\n    heap.set_threshold(this->threshold());\n  }\n\n  size_t left_size = features_segment_->data_size();\n  size_t read_offset = 0;\n  size_t feature_index = 0;\n  auto matrix = owner_->distance_matrix();\n\n  // Process feature blocks\n  while (left_size >= actual_read_size_) {\n    const void *data = nullptr;\n    if (features_segment_->read(read_offset, &data, actual_read_size_) !=\n        actual_read_size_) {\n      LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                actual_read_size_);\n      return IndexError_ReadData;\n    }\n\n    for (size_t offset = 0; offset < actual_read_size_;\n         offset += feature_size_) {\n      uint64_t feature_key = owner_->key(feature_index);\n\n      if (!this->filter()(feature_key)) {\n        size_t query_offset = 0;\n        const void *feature = (const char *)data + offset;\n\n        for (auto &heap : result_heaps_) {\n          float score;\n          matrix.template distance<1>(feature,\n                                      (const char *)query + query_offset,\n                                      qmeta.dimension(), &score);\n          heap.emplace(feature_key, score, feature_index);\n          query_offset += qmeta.element_size();\n        }\n      }\n      feature_index += 1;\n    }\n    read_offset += actual_read_size_;\n    left_size -= actual_read_size_;\n  }\n\n  const void *data = nullptr;\n  if (features_segment_->read(read_offset, &data, left_size) != left_size) {\n    LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n              left_size);\n    return IndexError_ReadData;\n  }\n\n  // Process left features\n  for (size_t offset = 0; offset < left_size; offset += feature_size_) {\n    uint64_t feature_key = owner_->key(feature_index);\n\n    if (!this->filter()(feature_key)) {\n      size_t query_offset = 0;\n      const void *feature = (const char *)data + offset;\n\n      for (auto &heap : result_heaps_) {\n        float score;\n        matrix.template distance<1>(feature, (const char *)query + query_offset,\n                                    qmeta.dimension(), &score);\n        heap.emplace(feature_key, score, feature_index);\n        query_offset += qmeta.element_size();\n      }\n    }\n    feature_index += 1;\n  }\n\n  // Normalize results\n  for (auto &heap : result_heaps_) {\n    heap.sort();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::group_by_search_impl(\n    const void *query, const IndexQueryMeta &qmeta, uint32_t count) {\n  this->resize_group_results(count);\n  if (!this->group_by().is_valid()) {\n    LOG_ERROR(\"Invalid group-by function\");\n    return IndexError_InvalidArgument;\n  }\n\n  std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {\n    return this->group_by()(key);\n  };\n\n  auto provider = owner_->create_provider();\n\n  for (size_t q = 0; q < count; ++q) {\n    this->group_topk_heaps().clear();\n\n    for (node_id_t id = 0; id < provider->count(); ++id) {\n      if (!this->filter().is_valid() || !this->filter()(owner_->key(id))) {\n        dist_t dist = 0;\n        owner_->distance_matrix().template distance<1>(\n            query, provider->get_vector(owner_->key(id)), provider->dimension(),\n            &dist);\n\n        std::string group_id = group_by(owner_->key(id));\n        auto &topk_heap = this->group_topk_heaps()[group_id];\n        if (topk_heap.empty()) {\n          topk_heap.limit(this->group_topk());\n        }\n        topk_heap.emplace(id, dist);\n      }\n    }\n    this->topk_to_group_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatSearcherContext<BATCH_SIZE>::search_bf_by_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count) {\n  auto provider = owner_->create_provider();\n  if (this->group_by_search()) {\n    this->resize_group_results(count);\n    if (!this->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {\n      return this->group_by()(key);\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      this->group_topk_heaps().clear();\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!this->filter().is_valid() || !this->filter()(pk)) {\n          dist_t dist = 0;\n          owner_->distance_matrix().template distance<1>(\n              query, provider->get_vector(pk), provider->dimension(), &dist);\n\n          std::string group_id = group_by(pk);\n          auto &topk_heap = this->group_topk_heaps()[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(this->group_topk());\n          }\n          topk_heap.emplace(owner_->get_id(pk), dist);\n        }\n      }\n      this->topk_to_group_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    result_heaps_.resize(count);\n    for (auto &heap : result_heaps_) {\n      heap.clear();\n      heap.limit(topk_);\n      heap.set_threshold(this->threshold());\n    }\n    for (size_t q = 0; q < count; ++q) {\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!this->filter().is_valid() || !this->filter()(pk)) {\n          dist_t dist = 0;\n          owner_->distance_matrix().template distance<1>(\n              query, provider->get_vector(pk), provider->dimension(), &dist);\n          result_heaps_[q].emplace(pk, dist, owner_->get_id(pk));\n        }\n      }\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n    for (auto &heap : result_heaps_) {\n      heap.sort();\n    }\n  }\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_searcher_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include \"flat_distance_matrix.h\"\n#include \"flat_searcher.h\"\n// #include \"flat_streamer.h\"\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Brute Force Searcher Provider\n */\ntemplate <size_t BATCH_SIZE>\nclass FlatSearcherProvider : public IndexProvider {\n public:\n  //! Constructor\n  FlatSearcherProvider(const FlatSearcher<BATCH_SIZE> *owner) {\n    feature_size_ = owner->meta().element_size();\n    features_segment_ = owner->clone_features_segment();\n    total_vector_count_ =\n        features_segment_->data_size() / owner->meta().element_size();\n    owner_ = owner;\n    block_buffer_.resize(BATCH_SIZE * feature_size_);\n  }\n\n  //! Create a new iterator\n  IndexProvider::Iterator::Pointer create_iterator(void) override {\n    return IndexProvider::Iterator::Pointer(\n        new (std::nothrow) FlatSearcherProvider::Iterator(owner_));\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const override {\n    return total_vector_count_;\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const override {\n    return owner_->meta().dimension();\n  }\n\n  //! Retrieve type of vector\n  IndexMeta::DataType data_type(void) const override {\n    return owner_->meta().data_type();\n  }\n\n  //! Retrieve vector size in bytes\n  size_t element_size(void) const override {\n    return owner_->meta().element_size();\n  }\n\n  //! Retrieve a vector using a primary key\n  const void *get_vector(uint64_t key) const override {\n    return this->get_vector_by_index(owner_->get_id(key));\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_->name();\n  }\n\n protected:\n  /*! Brute Force Provider Iterator\n   */\n  class Iterator : public IndexProvider::Iterator {\n   public:\n    //! Constructor\n    Iterator(const FlatSearcher<BATCH_SIZE> *owner) {\n      block_buffer_.resize(BATCH_SIZE * owner->meta().element_size());\n      feature_size_ = owner->meta().element_size();\n      features_segment_ = owner->clone_features_segment();\n      total_vector_count_ =\n          features_segment_->data_size() / owner->meta().element_size();\n      owner_ = owner;\n      cursor_index_ = 0;\n      offset_ = 0;\n      this->next_block();\n    }\n\n    //! Retrieve pointer of data\n    //! NOTICE: the vec feature will be changed after iterating to next, so\n    //! the caller need to keep a copy of it before iterator to next vector\n    const void *data(void) const override {\n      return data_;\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (!invalid_ && cursor_index_ < total_vector_count_);\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return owner_->key(cursor_index_);\n    }\n\n    //! Next iterator\n    void next(void) override {\n      ++cursor_index_;\n\n      if ((cursor_index_ % BATCH_SIZE) != 0) {\n        data_ += feature_size_;\n      } else {\n        this->next_block();\n      }\n    }\n\n   protected:\n    //! Read a block of data\n    void next_block(void) {\n      const void *read_data = nullptr;\n      size_t read_size = 0;\n\n      if (cursor_index_ >= total_vector_count_) {\n        invalid_ = true;\n        return;\n      }\n\n      if (cursor_index_ + BATCH_SIZE < total_vector_count_) {\n        read_size = BATCH_SIZE * feature_size_;\n      } else {\n        read_size = (total_vector_count_ - cursor_index_) * feature_size_;\n      }\n      if (features_segment_->read(offset_, &read_data, read_size) !=\n          read_size) {\n        LOG_ERROR(\"Failed to read data (%zu bytes) from features segment\",\n                  read_size);\n        invalid_ = true;\n        return;\n      }\n      offset_ += read_size;\n\n      // The order of data may be a column format, convert it to the row format.\n      if (owner_->column_major_order() &&\n          read_size == BATCH_SIZE * feature_size_) {\n        uint32_t align_size =\n            IndexMeta::AlignSizeof(owner_->meta().data_type());\n        ReverseTranspose<BATCH_SIZE>(align_size, read_data,\n                                     feature_size_ / align_size,\n                                     &block_buffer_[0]);\n        data_ = block_buffer_.data();\n      } else {\n        data_ = reinterpret_cast<const uint8_t *>(read_data);\n      }\n    }\n\n   private:\n    const FlatSearcher<BATCH_SIZE> *owner_{nullptr};\n    IndexStorage::Segment::Pointer features_segment_{};\n    uint32_t total_vector_count_{0};\n    uint32_t feature_size_{0};\n    std::vector<uint8_t> block_buffer_{};\n    const uint8_t *data_{nullptr};\n    uint64_t offset_{0};\n    uint32_t cursor_index_{0};\n    bool invalid_{false};\n  };\n\n  //! Retrieve a vector via local index\n  const void *get_vector_by_index(uint32_t index) const {\n    const void *read_data = nullptr;\n    if (index == kInvalidNodeId) {\n      LOG_ERROR(\"Failed to get vector by Invalid Id.\");\n      return nullptr;\n    }\n\n    if (owner_->column_major_order() &&\n        index < (total_vector_count_ - (total_vector_count_ % BATCH_SIZE))) {\n      uint32_t block_size = feature_size_ * BATCH_SIZE;\n      uint64_t offset = (index - (index % BATCH_SIZE)) * feature_size_;\n\n      if (features_segment_->read(offset, &read_data, block_size) !=\n          block_size) {\n        LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                  block_size);\n        return nullptr;\n      }\n\n      uint32_t align_size = IndexMeta::AlignSizeof(owner_->meta().data_type());\n      ReverseTranspose<BATCH_SIZE>(\n          align_size, read_data, feature_size_ / align_size, &block_buffer_[0]);\n      read_data = block_buffer_.data() + ((index % BATCH_SIZE) * feature_size_);\n\n    } else {\n      if (features_segment_->read(index * feature_size_, &read_data,\n                                  feature_size_) != feature_size_) {\n        LOG_ERROR(\"Failed to read data (%u bytes) from features segment\",\n                  feature_size_);\n        return nullptr;\n      }\n    }\n    return read_data;\n  }\n\n private:\n  //! Members\n  const FlatSearcher<BATCH_SIZE> *owner_{nullptr};\n  IndexStorage::Segment::Pointer features_segment_{};\n  uint32_t feature_size_{0};\n  uint32_t total_vector_count_{0};\n  mutable std::vector<uint8_t> block_buffer_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_streamer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_streamer.h\"\n#include <zvec/core/framework/index_factory.h>\n#include \"flat_streamer_context.h\"\n#include \"flat_streamer_dumper.h\"\n#include \"flat_streamer_provider.h\"\n\nnamespace zvec {\nnamespace core {\n\n#define WRITE_LOCK_GUARD(MUTEX, LOCK_NAME) \\\n  ailego::WriteLock write_lock(MUTEX);     \\\n  std::unique_lock<ailego::WriteLock> LOCK_NAME(write_lock);\n\n#define READ_LOCK_GUARD_DEFER(MUTEX, LOCK_NAME) \\\n  ailego::ReadLock read_lock(MUTEX);            \\\n  std::unique_lock<ailego::ReadLock> LOCK_NAME(read_lock, std::defer_lock);\n\ntemplate <size_t BATCH_SIZE>\nFlatStreamer<BATCH_SIZE>::FlatStreamer() : entity_(stats_) {}\n\ntemplate <size_t BATCH_SIZE>\nFlatStreamer<BATCH_SIZE>::~FlatStreamer() {\n  if (state_ == STATE_INITED) {\n    this->cleanup();\n  }\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::init(const IndexMeta &imeta,\n                                   const ailego::Params &params) {\n  meta_ = imeta;\n  meta_.set_streamer(\"FlatStreamer\", 0U, params);\n\n  int error_code = InitializeMetric(meta_, &metric_);\n  if (error_code != 0) {\n    LOG_ERROR(\"Failed to initialize index metric %s, error=%d, %s\",\n              meta_.metric_name().c_str(), error_code,\n              IndexError::What(error_code));\n    return error_code;\n  }\n  if (metric_->query_metric()) {\n    metric_ = metric_->query_metric();\n  }\n\n  // 参数设置\n  if (params.get(PARAM_FLAT_COLUMN_MAJOR_ORDER, &column_major_order_)) {\n    meta_.set_major_order(column_major_order_ ? IndexMeta::MO_COLUMN\n                                              : IndexMeta::MO_ROW);\n  }\n  // Verify column major order\n  if (meta_.major_order() != IndexMeta::MO_ROW) {\n    IndexMeta::DataType ft = meta_.data_type();\n\n    bool support_column_major = true;\n    if ((ft != IndexMeta::DT_FP32 && ft != IndexMeta::DT_FP16 &&\n         ft != IndexMeta::DT_INT8 && ft != IndexMeta::DT_INT4 &&\n         ft != IndexMeta::DT_BINARY32 && ft != IndexMeta::DT_BINARY64) ||\n        (meta_.unit_size() != IndexMeta::UnitSizeof(ft))) {\n      if (meta_.major_order() == IndexMeta::MO_COLUMN) {\n        LOG_ERROR(\"Unsupported type %d with unit size %u.\", ft,\n                  meta_.unit_size());\n        return IndexError_Unsupported;\n      } else {\n        support_column_major = false;\n      }\n    }\n    if (meta_.element_size() % IndexMeta::AlignSizeof(ft) != 0) {\n      if (meta_.major_order() == IndexMeta::MO_COLUMN) {\n        LOG_ERROR(\"Unsupported type %d with dimension %u.\", ft,\n                  meta_.dimension());\n        return IndexError_Unsupported;\n      } else {\n        support_column_major = false;\n      }\n    }\n\n    if (meta_.major_order() == IndexMeta::MO_UNDEFINED &&\n        support_column_major) {\n      meta_.set_major_order(IndexMeta::MO_ROW);\n    }\n  }\n\n  if (!VerifyMetric(meta_)) {\n    LOG_ERROR(\"Invalid index metric %s.\", meta_.metric_name().c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  read_block_size_ = FLAT_DEFAULT_READ_BLOCK_SIZE;\n  params.get(PARAM_FLAT_READ_BLOCK_SIZE, &read_block_size_);\n  params.get(PARAM_FLAT_USE_ID_MAP, &use_key_info_map_);\n\n  // entity init\n  uint32_t block_vector_count = kDefaultBlockVecCount;\n  uint32_t segment_size = kDefaultSegmentSize;\n  bool filter_same_key = true;\n  entity_.set_block_vector_count(block_vector_count);\n  entity_.set_segment_size(segment_size);\n  entity_.enable_filter_same_key(filter_same_key);\n  entity_.set_linear_list_count(1);\n  entity_.set_use_key_info_map(use_key_info_map_);\n  *entity_.mutable_meta() = meta_;\n\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::cleanup() {\n  if (state_ == STATE_OPENED) {\n    this->close();\n  }\n\n  LOG_DEBUG(\"FlatStreamer cleanup\");\n  state_ = STATE_INIT;\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::open(IndexStorage::Pointer stg) {\n  if (!stg) {\n    LOG_ERROR(\"Failed to open for invalid storage\");\n    return IndexError_InvalidArgument;\n  }\n  if (ailego_unlikely(state_ != STATE_INITED)) {\n    LOG_ERROR(\"Open storage failed, init streamer first!\");\n    return IndexError_NoReady;\n  }\n\n  LOG_DEBUG(\"FlatStreamer open with %s\", stg->name().c_str());\n\n  int ret = entity_.open(std::move(stg), meta_);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to open storage\");\n    return ret;\n  }\n  magic_ = IndexContext::GenerateMagic();\n\n  state_ = STATE_OPENED;\n\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::close(void) {\n  LOG_DEBUG(\"FlatStreamer close\");\n\n  entity_.flush_linear_meta();\n\n  stats_.clear();\n\n  int ret = entity_.close();\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  state_ = STATE_INITED;\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::flush(uint64_t checkpoint) {\n  LOG_INFO(\"FlatStreamer flush with checkpoint %zu\", (size_t)checkpoint);\n  return entity_.flush(checkpoint);\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::dump(const IndexDumper::Pointer &dumper) {\n  std::string searcher_name = \"FlatSearcher\";\n  if (BATCH_SIZE == 16) {\n    searcher_name = \"FlatSearcher16\";\n  }\n  meta_.set_searcher(searcher_name, 0U, ailego::Params());\n  WRITE_LOCK_GUARD(dump_mutex_, dump_lock);\n  std::shared_ptr<FlatStreamerDumper<BATCH_SIZE>> bf_dumper =\n      std::make_shared<FlatStreamerDumper<BATCH_SIZE>>(this);\n  int ret = bf_dumper->dump(dumper);\n  *(stats_.mutable_dumped_size()) += bf_dumper->dump_size();\n  return ret;\n}\n\ntemplate <size_t BATCH_SIZE>\nIndexStreamer::Context::UPointer FlatStreamer<BATCH_SIZE>::create_context(\n    void) const {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to create Context, open storage first!\");\n    return Context::UPointer();\n  }\n  return IndexStreamer::Context::Pointer(\n      new FlatStreamerContext<BATCH_SIZE>(this));\n}\n\ntemplate <size_t BATCH_SIZE>\nIndexProvider::Pointer FlatStreamer<BATCH_SIZE>::create_provider(void) const {\n  return IndexProvider::Pointer(new (std::nothrow)\n                                    FlatStreamerProvider<BATCH_SIZE>(this));\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::add_impl(uint64_t pkey, const void *query,\n                                       const IndexQueryMeta &qmeta,\n                                       Context::UPointer &context) {\n  if (!query || qmeta.dimension() != meta_.dimension() ||\n      qmeta.data_type() != meta_.data_type() ||\n      qmeta.element_size() != meta_.element_size()) {\n    LOG_ERROR(\n        \"Failed to add for invalid arguments, query=%p, qmeta(type=%u \"\n        \"dim=%u size=%u) vs meta(type=%u dim=%u size=%u)\",\n        query, qmeta.data_type(), qmeta.dimension(), qmeta.element_size(),\n        meta_.data_type(), meta_.dimension(), meta_.element_size());\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_InvalidArgument;\n  }\n\n  auto *ctx = dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());\n  if (!ctx) {\n    LOG_ERROR(\"Failed to cast FlatStreamerContext\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Cast;\n  }\n\n  READ_LOCK_GUARD_DEFER(dump_mutex_, dump_lock);\n\n  if (!dump_lock.try_lock()) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n\n  // IndexQueryMeta iv_qmeta;\n  // int ret = entity_.convert(query, qmeta, &query, &iv_qmeta);\n  // if (ret != 0) {\n  //   LOG_ERROR(\"Failed to convert record for %s\",\n  //             IndexError::What(ret));\n  //   (*stats_.mutable_discarded_count())++;\n  //   return ret;\n  // }\n\n  int ret = entity_.add(pkey, query, qmeta.element_size());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to add record for %s\", IndexError::What(ret));\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::add_with_id_impl(uint32_t id, const void *query,\n                                               const IndexQueryMeta &qmeta,\n                                               Context::Pointer &context) {\n  if (!query || qmeta.dimension() != meta_.dimension() ||\n      qmeta.data_type() != meta_.data_type() ||\n      qmeta.element_size() != meta_.element_size()) {\n    LOG_ERROR(\n        \"Failed to add for invalid arguments, query=%p, qmeta(type=%u \"\n        \"dim=%u size=%u) vs meta(type=%u dim=%u size=%u)\",\n        query, qmeta.data_type(), qmeta.dimension(), qmeta.element_size(),\n        meta_.data_type(), meta_.dimension(), meta_.element_size());\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_InvalidArgument;\n  }\n\n  auto *ctx = dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());\n  if (!ctx) {\n    LOG_ERROR(\"Failed to cast FlatStreamerContext\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Cast;\n  }\n\n  READ_LOCK_GUARD_DEFER(dump_mutex_, dump_lock);\n\n  if (!dump_lock.try_lock()) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n\n  int ret = entity_.add_vector_with_id(id, query, qmeta.element_size());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to add record for %s\", IndexError::What(ret));\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::search_bf_impl(const void *query,\n                                             const IndexQueryMeta &qmeta,\n                                             uint32_t count,\n                                             Context::Pointer &context) const {\n  ailego_assert(query && count && !!context);\n  ailego_assert(metric_->is_matched(meta_, qmeta));\n\n  FlatStreamerContext<BATCH_SIZE> *bf_context =\n      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());\n  if (!bf_context) {\n    LOG_ERROR(\"Invalid brute-force streamer context\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_context->magic() != magic_) {\n    bf_context->reset(this);\n  }\n\n  if (bf_context->group_by_search()) {\n    return group_by_search_impl(query, qmeta, count, context);\n  }\n\n  bf_context->reset_results(count);\n  auto &filter = bf_context->filter();\n\n  for (size_t q = 0; q < count; ++q) {\n    auto *heap = bf_context->result_heap();\n    auto *context_stats = bf_context->mutable_stats(q);\n    uint32_t scan_count = 0;\n    int ret = entity_.search(query, filter, &scan_count, heap, context_stats);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Failed to search for %s\", IndexError::What(ret));\n      return ret;\n    }\n    heap->sort();\n    bf_context->topk_to_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::search_bf_by_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  ailego_assert(query && count && !!context);\n  ailego_assert(metric_->is_matched(meta_, qmeta));\n\n  FlatStreamerContext<BATCH_SIZE> *bf_context =\n      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());\n  if (!bf_context) {\n    LOG_ERROR(\"Invalid brute-force streamer context\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_context->magic() != magic_) {\n    bf_context->reset(this);\n  }\n\n  if (bf_context->group_by_search()) {\n    return group_by_search_p_keys_impl(query, p_keys, qmeta, count, context);\n  }\n\n  bf_context->reset_results(count);\n  auto &filter = bf_context->filter();\n\n  for (size_t q = 0; q < count; ++q) {\n    auto *heap = bf_context->result_heap();\n    for (node_id_t idx = 0; idx < p_keys[q].size(); ++idx) {\n      uint64_t key = p_keys[q][idx];\n      if (!filter.is_valid() || !filter(key)) {\n        dist_t dist = 0;\n        IndexStorage::MemoryBlock block;\n        if (entity_.get_vector_by_key(key, block) != 0) continue;\n        entity_.row_major_distance(query, block.data(), 1, &dist);\n        heap->emplace(key, dist);\n      }\n    }\n    heap->sort();\n    bf_context->topk_to_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::group_by_search_impl(\n    const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  FlatStreamerContext<BATCH_SIZE> *bf_context =\n      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());\n  if (!bf_context) {\n    LOG_ERROR(\"Invalid brute-force streamer context\");\n    return IndexError_InvalidArgument;\n  }\n\n  bf_context->resize_group_results(count);\n  if (!bf_context->group_by().is_valid()) {\n    LOG_ERROR(\"Invalid group-by function\");\n    return IndexError_InvalidArgument;\n  }\n\n  std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {\n    return bf_context->group_by()(key);\n  };\n\n  auto iterator = entity_.creater_iterator();\n\n  for (size_t q = 0; q < count; ++q) {\n    bf_context->group_topk_heaps().clear();\n    for (node_id_t id = 0; id < entity_.vector_count(); ++id) {\n      uint64_t key = entity_.key(id);\n      if (!bf_context->filter().is_valid() || !bf_context->filter()(key)) {\n        dist_t dist = 0;\n        IndexStorage::MemoryBlock block;\n        if (entity_.get_vector_by_key(key, block) != 0) continue;\n        entity_.row_major_distance(query, block.data(), 1, &dist);\n\n        std::string group_id = group_by(key);\n        auto &topk_heap = bf_context->group_topk_heaps()[group_id];\n        if (topk_heap.empty()) {\n          topk_heap.limit(bf_context->group_topk());\n        }\n        topk_heap.emplace(key, dist);\n      }\n    }\n    bf_context->topk_to_group_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n  return 0;\n}\n\ntemplate <size_t BATCH_SIZE>\nint FlatStreamer<BATCH_SIZE>::group_by_search_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  FlatStreamerContext<BATCH_SIZE> *bf_context =\n      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());\n  if (!bf_context) {\n    LOG_ERROR(\"Invalid brute-force streamer context\");\n    return IndexError_InvalidArgument;\n  }\n\n  bf_context->resize_group_results(count);\n  if (!bf_context->group_by().is_valid()) {\n    LOG_ERROR(\"Invalid group-by function\");\n    return IndexError_InvalidArgument;\n  }\n\n  std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {\n    return bf_context->group_by()(key);\n  };\n\n  auto iterator = entity_.creater_iterator();\n\n  for (size_t q = 0; q < count; ++q) {\n    bf_context->group_topk_heaps().clear();\n    for (node_id_t idx = 0; idx < p_keys[q].size(); ++idx) {\n      uint64_t key = p_keys[q][idx];\n      if (!bf_context->filter().is_valid() || !bf_context->filter()(key)) {\n        dist_t dist = 0;\n        IndexStorage::MemoryBlock block;\n        if (entity_.get_vector_by_key(key, block) != 0) continue;\n        entity_.row_major_distance(query, block.data(), 1, &dist);\n\n        std::string group_id = group_by(key);\n        auto &topk_heap = bf_context->group_topk_heaps()[group_id];\n        if (topk_heap.empty()) {\n          topk_heap.limit(bf_context->group_topk());\n        }\n        topk_heap.emplace(key, dist);\n      }\n    }\n    bf_context->topk_to_group_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n  return 0;\n}\n\nINDEX_FACTORY_REGISTER_STREAMER_ALIAS(LinearStreamer, FlatStreamer<32>);\nINDEX_FACTORY_REGISTER_STREAMER_ALIAS(FlatStreamer, FlatStreamer<32>);\nINDEX_FACTORY_REGISTER_STREAMER_ALIAS(FlatStreamer16, FlatStreamer<16>);\nINDEX_FACTORY_REGISTER_STREAMER_ALIAS(FlatStreamer32, FlatStreamer<32>);\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_streamer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <ailego/parallel/lock.h>\n#include <zvec/core/framework/index_streamer.h>\n#include \"flat_streamer_entity.h\"\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Flat Streamer\n */\ntemplate <size_t BATCH_SIZE>\nclass FlatStreamer : public IndexStreamer {\n public:\n  using ContextPointer = IndexStreamer::Context::UPointer;\n\n  FlatStreamer(void);\n  virtual ~FlatStreamer(void);\n\n  FlatStreamer(const FlatStreamer &streamer) = delete;\n  FlatStreamer &operator=(const FlatStreamer &streamer) = delete;\n\n public:\n  //! Initialize Streamer\n  int init(const IndexMeta &, const ailego::Params &) override;\n\n  //! Cleanup Streamer\n  int cleanup(void) override;\n\n  //! Create a context\n  IndexStreamer::Context::UPointer create_context(void) const override;\n\n  //! Create a new iterator\n  IndexProvider::Pointer create_provider(void) const override;\n\n  //! Add a vector into index\n  int add_impl(uint64_t pkey, const void *query, const IndexQueryMeta &qmeta,\n               Context::UPointer &context) override;\n\n  int add_with_id_impl(uint32_t id, const void *query,\n                       const IndexQueryMeta &qmeta,\n                       Context::Pointer &context) override;\n\n  //! Similarity search\n  int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                  Context::UPointer &context) const override {\n    return search_bf_impl(query, qmeta, 1, context);\n  }\n\n  //! Similarity search\n  int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::UPointer &context) const override {\n    return search_bf_impl(query, qmeta, count, context);\n  }\n\n  //! Similarity brute force search\n  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                     Context::UPointer &context) const override {\n    return search_bf_impl(query, qmeta, 1, context);\n  }\n\n  //! Similarity brute force search\n  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                     uint32_t count, Context::UPointer &context) const override;\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const void *query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta,\n                               Context::UPointer &context) const override {\n    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const void *query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta, uint32_t count,\n                               Context::UPointer &context) const override;\n\n  int group_by_search_impl(const void *query, const IndexQueryMeta &qmeta,\n                           uint32_t count, Context::UPointer &context) const;\n\n  int group_by_search_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, uint32_t count,\n      Context::Pointer &context) const;\n\n  //! Open index from file path\n  int open(IndexStorage::Pointer stg) override;\n\n  //! Close file\n  int close(void) override;\n\n  //! flush file\n  int flush(uint64_t checkpoint) override;\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  const FlatStreamerEntity &entity(void) const {\n    return entity_;\n  }\n\n  virtual const void *get_vector(uint64_t key) const override {\n    return this->get_vector_by_key(key);\n  }\n\n  virtual int get_vector(const uint64_t key,\n                         IndexStorage::MemoryBlock &block) const override {\n    return this->get_vector_by_key(key, block);\n  }\n\n  const void *get_vector_by_key(uint64_t key) const {\n    return entity_.get_vector_by_key(key);\n  }\n\n  int get_vector_by_key(const uint64_t key,\n                        IndexStorage::MemoryBlock &block) const override {\n    return entity_.get_vector_by_key(key, block);\n  }\n  const void *get_vector_by_id(uint32_t id) const override {\n    return get_vector_by_key(id);\n  }\n\n  int get_vector_by_id(const uint32_t id,\n                       IndexStorage::MemoryBlock &block) const override {\n    return get_vector_by_key(id, block);\n  }\n\n  uint32_t magic(void) const {\n    return magic_;\n  }\n\n  //! Retrieve block size of data read\n  uint32_t read_block_size(void) const {\n    return read_block_size_;\n  }\n\n private:\n  //! Constants\n  static constexpr uint32_t kDefaultBlockVecCount = 32u;\n  static constexpr uint32_t kDefaultSegmentSize = 4 * 1024 * 1024u;\n  static constexpr float kDefaultDocsSoftLimitRatio = 0.9f;\n\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };\n\n  //! Members\n  uint32_t magic_{0};\n  uint32_t docs_hard_limit_{std::numeric_limits<uint32_t>::max()};\n  uint32_t docs_soft_limit_{0};\n  IndexMeta meta_{};\n  std::vector<std::vector<std::string>> data_;\n  IndexStreamer::Stats stats_{};\n  IndexMetric::Pointer metric_{};\n  State state_{STATE_INIT};\n  mutable std::mutex mapping_mutex_{};\n  ailego::SharedMutex dump_mutex_{};\n  FlatStreamerEntity entity_;\n  bool column_major_order_{false};\n  bool use_key_info_map_{true};\n  uint32_t read_block_size_{0};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_streamer_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"flat_streamer.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Brute Force Streamer Context\n */\ntemplate <size_t BATCH_SIZE>\nclass FlatStreamerContext : public IndexStreamer::Context {\n public:\n  //! Constructor\n  FlatStreamerContext(const FlatStreamer<BATCH_SIZE> *owner) {\n    this->reset(owner);\n  }\n\n  //! Destructor\n  virtual ~FlatStreamerContext(void) = default;\n\n  //! Set topk of search result\n  void set_topk(uint32_t topk) override {\n    topk_ = topk;\n    result_heap_.limit(topk);\n  }\n\n  //! Retrieve search result\n  const IndexDocumentList &result(void) const override {\n    return results_[0];\n  }\n\n  //! Retrieve search result with index\n  const IndexDocumentList &result(size_t idx) const override {\n    return results_[idx];\n  }\n\n  //! Retrieve result object for output\n  IndexDocumentList *mutable_result(size_t idx) override {\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    return &results_[idx];\n  }\n\n  inline IndexDocumentHeap *result_heap() {\n    return &result_heap_;\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(void) const override {\n    return group_results_[0];\n  }\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(\n      size_t idx) const override {\n    return group_results_[idx];\n  }\n\n  //! Update the parameters of context\n  int update(const ailego::Params & /*params*/) override {\n    return 0;\n  }\n\n  //! Retrieve magic number\n  uint32_t magic(void) const override {\n    return magic_;\n  }\n\n  //! Get group topk\n  inline uint32_t group_topk() const {\n    return group_topk_;\n  }\n  //! Get group num\n  inline uint32_t group_num() const {\n    return group_num_;\n  }\n  inline std::map<std::string, TopkHeap> &group_topk_heaps() {\n    return group_topk_heaps_;\n  }\n  void set_fetch_vector(bool v) override {\n    fetch_vector_ = v;\n  }\n  bool fetch_vector() const override {\n    return fetch_vector_;\n  }\n  inline void resize_group_results(size_t size) {\n    if (group_by_search()) {\n      group_results_.resize(size);\n    }\n  }\n\n  void topk_to_result(uint32_t idx) {\n    if (ailego_unlikely(result_heap_.size() == 0)) {\n      return;\n    }\n\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    int size = std::min(topk_, static_cast<uint32_t>(result_heap_.size()));\n    result_heap_.sort();\n    results_[idx].clear();\n    for (int i = 0; i < size; ++i) {\n      auto score = result_heap_[i].score();\n      if (score > this->threshold()) {\n        break;\n      }\n\n      key_t key = result_heap_[i].key();\n      if (fetch_vector_) {\n        IndexStorage::MemoryBlock block;\n        owner_->entity().get_vector_by_key(key, block);\n        results_[idx].emplace_back(key, score, key, block);\n      } else {\n        results_[idx].emplace_back(key, score, key);\n      }\n    }\n  }\n\n  void topk_to_group_result(uint32_t idx) {\n    ailego_assert_with(idx < group_results_.size(), \"invalid idx\");\n    group_results_[idx].clear();\n    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;\n    std::vector<std::pair<std::string, float>> best_score_in_groups;\n    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();\n         itr++) {\n      const std::string &group_id = (*itr).first;\n      auto &heap = (*itr).second;\n      heap.sort();\n      if (heap.size() > 0) {\n        float best_score = heap[0].second;\n        best_score_in_groups.push_back(std::make_pair(group_id, best_score));\n      }\n    }\n    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),\n              [](const std::pair<std::string, float> &a,\n                 const std::pair<std::string, float> &b) -> int {\n                return a.second < b.second;\n              });\n    // truncate to group num\n    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();\n         ++i) {\n      const std::string &group_id = best_score_in_groups[i].first;\n      group_topk_list.emplace_back(\n          std::make_pair(group_id, group_topk_heaps_[group_id]));\n    }\n    group_results_[idx].resize(group_topk_list.size());\n    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {\n      const std::string &group_id = group_topk_list[i].first;\n      group_results_[idx][i].set_group_id(group_id);\n      uint32_t size = std::min(\n          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));\n      for (uint32_t j = 0; j < size; ++j) {\n        auto score = group_topk_list[i].second[j].second;\n        if (score > this->threshold()) {\n          break;\n        }\n        node_id_t id = group_topk_list[i].second[j].first;\n        auto provider = owner_->create_provider();\n        if (fetch_vector_) {\n          IndexStorage::MemoryBlock block;\n          provider->get_vector(id, block);\n          group_results_[idx][i].mutable_docs()->emplace_back(id, score, id,\n                                                              block);\n        } else {\n          group_results_[idx][i].mutable_docs()->emplace_back(id, score, id);\n        }\n      }\n    }\n  }\n\n  //! Get if group by search\n  bool group_by_search() {\n    return group_num_ > 0;\n  }\n  //! Set group params\n  void set_group_params(uint32_t group_num, uint32_t group_topk) override {\n    group_num_ = group_num;\n    group_topk_ = group_topk;\n    group_topk_heaps_.clear();\n  }\n\n  void reset() override {\n    for (auto &it : results_) {\n      it.clear();\n    }\n    for (auto &it : group_results_) {\n      it.clear();\n    }\n  }\n\n  //! Reset the context\n  void reset(const FlatStreamer<BATCH_SIZE> *owner) {\n    this->reset();\n    magic_ = owner->magic();\n    feature_size_ = owner->meta().element_size();\n\n    uint32_t block_size = feature_size_ * BATCH_SIZE;\n    actual_read_size_ =\n        (owner->read_block_size() + block_size - 1) / block_size * block_size;\n    owner_ = owner;\n  }\n\n  //! Reset all the query results\n  void reset_results(size_t qnum) {\n    results_.resize(qnum);\n    stats_vec_.resize(qnum);\n    for (size_t i = 0; i < qnum; ++i) {\n      results_[i].clear();\n      stats_vec_[i].clear();\n    }\n    result_heap_.clear();\n    result_heap_.limit(topk_);\n    result_heap_.set_threshold(this->threshold());\n  }\n\n  Stats *mutable_stats(size_t idx = 0) {\n    ailego_assert_with(stats_vec_.size() > idx, \"invalid index\");\n    return &stats_vec_[idx];\n  }\n\n private:\n  const FlatStreamer<BATCH_SIZE> *owner_{nullptr};\n  std::vector<Stats> stats_vec_{};\n  uint32_t magic_{0};\n  uint32_t topk_{0};\n  uint32_t feature_size_{0};\n  uint32_t actual_read_size_{0};\n  IndexDocumentHeap result_heap_;\n  std::vector<IndexDocumentList> results_{};\n  std::string batch_queries_{};\n  float scores_[BATCH_SIZE * BATCH_SIZE];\n  bool fetch_vector_{false};\n  // group\n  uint32_t group_num_{0};\n  uint32_t group_topk_{0};\n  std::map<std::string, TopkHeap> group_topk_heaps_{};\n  std::vector<IndexGroupDocumentList> group_results_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_streamer_dumper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"flat_streamer.h\"\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\ntemplate <size_t BATCH_SIZE>\nclass FlatStreamerDumper {\n public:\n  typedef std::unique_ptr<FlatStreamerDumper> Pointer;\n\n  FlatStreamerDumper(const FlatStreamer<BATCH_SIZE> *owner) {\n    owner_ = owner;\n    dump_size_ = 0;\n  }\n\n  int dump(const IndexDumper::Pointer &dumper) {\n    ailego::ElapsedTime stamp;\n\n    std::vector<uint64_t> keys;\n    if (owner_->meta().major_order() == IndexMeta::MO_COLUMN) {\n      int error_code = this->write_column_index(dumper.get(), &keys);\n      if (error_code != 0) {\n        return error_code;\n      }\n    } else {\n      int error_code = this->write_row_index(dumper.get(), &keys);\n      if (error_code != 0) {\n        return error_code;\n      }\n    }\n\n    int error_code = this->write_keys(keys, dumper.get());\n    if (error_code != 0) {\n      return error_code;\n    }\n\n    error_code = this->write_mapping(keys, dumper.get());\n    if (error_code != 0) {\n      return error_code;\n    }\n\n    error_code = IndexHelper::SerializeToDumper(owner_->meta(), dumper.get());\n    if (error_code != 0) {\n      return error_code;\n    }\n    LOG_DEBUG(\"dumped_count: %zu, costtime: %zu\", keys.size(),\n              (size_t)stamp.milli_seconds());\n    return 0;\n  }\n\n  size_t dump_size() {\n    return dump_size_;\n  }\n\n private:\n  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {\n    switch (IndexMeta::AlignSizeof(owner_->meta().data_type())) {\n      case 2:\n        return this->write_column_index<uint16_t>(dumper, keys);\n      case 4:\n        return this->write_column_index<uint32_t>(dumper, keys);\n      case 8:\n        return this->write_column_index<uint64_t>(dumper, keys);\n      default:\n        ailego_check_with(0, \"BAD CASE\");\n    }\n    return IndexError_Runtime;\n  }\n\n  template <typename T>\n  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {\n    auto iter = owner_->entity().creater_iterator();\n    if (!iter) {\n      LOG_ERROR(\"Failed to create iterator\");\n      return IndexError_Runtime;\n    }\n\n    // Write features\n    size_t element_size = owner_->meta().element_size();\n    size_t block_size = element_size * BATCH_SIZE;\n    std::string block1, block2;\n    block1.reserve(block_size);\n    block2.reserve(block_size);\n\n    for (; iter->is_valid(); iter->next()) {\n      block1.append(reinterpret_cast<const char *>(iter->data()), element_size);\n      keys->emplace_back(iter->key());\n\n      if (block1.size() == block_size) {\n        ailego::MatrixHelper::Transpose<T, BATCH_SIZE>(\n            block1.data(), element_size / sizeof(T), (void *)block2.data());\n\n        if (dumper->write(block2.data(), block_size) != block_size) {\n          LOG_ERROR(\"Failed to write data into dumper %s\",\n                    dumper->name().c_str());\n          return IndexError_WriteData;\n        }\n        block1.clear();\n        dump_size_ += block_size;\n      }\n    }\n\n    if (!block1.empty()) {\n      if (dumper->write(block1.data(), block1.size()) != block1.size()) {\n        LOG_ERROR(\"Failed to write data into dumper %s\",\n                  dumper->name().c_str());\n        return IndexError_WriteData;\n      }\n      dump_size_ += block1.size();\n    }\n\n    // Write the padding if need\n    size_t features_size = keys->size() * element_size;\n    size_t features_padding_size =\n        ailego_align(features_size, 32) - features_size;\n    if (features_padding_size) {\n      std::string padding(features_padding_size, '\\0');\n\n      if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n        LOG_ERROR(\"Failed to write data into dumper %s\",\n                  dumper->name().c_str());\n        return IndexError_WriteData;\n      }\n      dump_size_ += padding.size();\n    }\n    return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,\n                          features_padding_size, 0);\n  }\n\n  int write_row_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {\n    auto iter = owner_->entity().creater_iterator();\n    if (!iter) {\n      LOG_ERROR(\"Failed to create iterator\");\n      return IndexError_Runtime;\n    }\n\n    // Write features\n    size_t element_size = owner_->meta().element_size();\n    for (; iter->is_valid(); iter->next()) {\n      if (dumper->write(iter->data(), element_size) != element_size) {\n        LOG_ERROR(\"Failed to write data into dumper %s\",\n                  dumper->name().c_str());\n        return IndexError_WriteData;\n      }\n      dump_size_ += element_size;\n      keys->emplace_back(iter->key());\n    }\n\n    // Write the padding if need\n    size_t features_size = keys->size() * element_size;\n    size_t features_padding_size =\n        ailego_align(features_size, 32) - features_size;\n    if (features_padding_size) {\n      std::string padding(features_padding_size, '\\0');\n\n      if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n        LOG_ERROR(\"Failed to write data into dumper %s\",\n                  dumper->name().c_str());\n        return IndexError_WriteData;\n      }\n      dump_size_ += padding.size();\n    }\n    return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,\n                          features_padding_size, 0);\n  }\n\n  int write_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper) {\n    size_t keys_size = keys.size() * sizeof(uint64_t);\n    size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;\n    if (dumper->write(keys.data(), keys_size) != keys_size) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n    dump_size_ += keys_size;\n\n    // Write the padding if need\n    if (keys_padding_size) {\n      std::string padding(keys_padding_size, '\\0');\n      if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n        LOG_ERROR(\"Failed to write data into dumper %s\",\n                  dumper->name().c_str());\n        return IndexError_WriteData;\n      }\n      dump_size_ += padding.size();\n    }\n    return dumper->append(FLAT_SEGMENT_KEYS_SEG_ID, keys_size,\n                          keys_padding_size, 0);\n  }\n\n  int write_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper) {\n    std::vector<uint32_t> mapping(keys.size());\n    std::iota(mapping.begin(), mapping.end(), 0);\n    std::sort(mapping.begin(), mapping.end(),\n              [&keys](uint32_t lhs, uint32_t rhs) {\n                return (keys[lhs] < keys[rhs]);\n              });\n\n    size_t mapping_size = mapping.size() * sizeof(uint32_t);\n    size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;\n    if (dumper->write(mapping.data(), mapping_size) != mapping_size) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n    dump_size_ += mapping_size;\n\n    // Write the padding if need\n    if (mapping_padding_size) {\n      std::string padding(mapping_padding_size, '\\0');\n      if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n        LOG_ERROR(\"Failed to write data into dumper %s\",\n                  dumper->name().c_str());\n        return IndexError_WriteData;\n      }\n      dump_size_ += padding.size();\n    }\n    return dumper->append(FLAT_SEGMENT_MAPPING_SEG_ID, mapping_size,\n                          mapping_padding_size, 0);\n  }\n\n private:\n  const FlatStreamer<BATCH_SIZE> *owner_{nullptr};\n  size_t dump_size_{0};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_streamer_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_streamer_entity.h\"\n#include <cstdint>\n#include <zvec/core/framework/index_error.h>\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nFlatStreamerEntity::FlatStreamerEntity(IndexStreamer::Stats &stats)\n    : stats_(stats) {}\n\nint FlatStreamerEntity::open(IndexStorage::Pointer storage,\n                             const IndexMeta & /*mt*/) {\n  if (storage_) {\n    LOG_ERROR(\"An storage instance is already opened\");\n    return IndexError_Duplicate;\n  }\n  // segments_[0] store the meta information of the linear list\n  ailego_assert_with(segments_.size() == 0, \"Invalid Size\");\n\n  key_info_map_lock_ = std::make_shared<ailego::SharedMutex>();\n  key_info_map_.clear();\n  id_key_vector_.clear();\n  withid_key_info_map_.clear();\n  withid_key_map_.clear();\n\n  vec_unit_size_ = IndexMeta::AlignSizeof(index_meta_.data_type());\n  vec_cols_ = index_meta_.element_size() / vec_unit_size_;\n  meta_.header.block_size =\n      ailego_align(sizeof(BlockHeader) + sizeof(DeletionMap) +\n                       (index_meta_.element_size() + sizeof(uint64_t)) *\n                           meta_.header.block_vector_count,\n                   32);\n\n  if (storage->get(FLAT_LINEAR_LIST_HEAD_SEG_ID) ||\n      storage->get(FLAT_LINEAR_META_SEG_ID)) {\n    int ret = this->load_storage(storage);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Failed to load storage index\");\n      return ret;\n    }\n  } else {\n    int ret = this->init_storage(storage);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Failed to init storage\");\n      return ret;\n    }\n  }\n\n  storage_ = storage;\n\n  //! Create the distance calculator\n  auto metric = IndexFactory::CreateMetric(index_meta_.metric_name());\n  if (!metric) {\n    LOG_ERROR(\"Failed to create metric %s\", index_meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  int ret = metric->init(index_meta_, index_meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to initialize metric %s\",\n              index_meta_.metric_name().c_str());\n    return ret;\n  }\n  row_distance_ = metric->distance();\n  column_distance_ =\n      metric->distance_matrix(meta_.header.block_vector_count, 1);\n\n  LOG_DEBUG(\"Open storage %s done, metric=%s\", storage_->name().c_str(),\n            index_meta_.metric_name().c_str());\n\n  return 0;\n}\n\nint FlatStreamerEntity::close(void) {\n  segments_.clear();\n  storage_.reset();\n  key_info_map_lock_.reset();\n  key_info_map_.clear();\n  withid_key_info_map_.clear();\n  withid_key_map_.clear();\n  id_key_vector_.clear();\n  meta_.create_time = 0;\n  meta_.update_time = 0;\n  meta_.segment_count = 0;\n  meta_.header.total_vector_count = 0;\n  meta_.header.block_count = 0;\n  meta_.header.block_size = 0;\n  meta_.header.linear_body_size = 0;\n\n  return 0;\n}\n\nint FlatStreamerEntity::flush_linear_meta(void) {\n  if (!storage_) {\n    return 0;\n  }\n\n  meta_.update_time = ailego::Realtime::Seconds();\n  meta_.revision_id = stats_.revision_id();\n  stats_.set_update_time(meta_.update_time);\n  auto segment = storage_->get(FLAT_LINEAR_META_SEG_ID);\n  if (ailego_unlikely(!segment)) {\n    LOG_ERROR(\"Failed to get segment %s\", FLAT_LINEAR_META_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {\n    LOG_ERROR(\"Failed to write segment %s\", FLAT_LINEAR_META_SEG_ID.c_str());\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint FlatStreamerEntity::flush(uint64_t checkpoint) {\n  int ret = this->flush_linear_meta();\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  if (checkpoint != 0) {\n    storage_->refresh(checkpoint);\n  }\n  ret = storage_->flush();\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to refresh storage for %s\", IndexError::What(ret));\n    return ret;\n  }\n  if (checkpoint != 0) {\n    stats_.set_check_point(checkpoint);\n  }\n\n  return 0;\n}\n\nint FlatStreamerEntity::add(uint64_t key, const void *vec, size_t size) {\n  std::lock_guard<std::mutex> lock(mutex_);\n  if (filter_same_key_) {\n    key_info_map_lock_->lock_shared();\n    if (key_info_map_.find(key) != key_info_map_.end()) {\n      key_info_map_lock_->unlock_shared();\n      LOG_WARN(\"Try to add duplicate key, drop it\");\n      return IndexError_Duplicate;\n    }\n    key_info_map_lock_->unlock_shared();\n  }\n  if (size != static_cast<size_t>(index_meta_.element_size())) {\n    LOG_ERROR(\"Failed to add, mismatch size %zu vs elemsize %u\", size,\n              index_meta_.element_size());\n    return IndexError_Mismatch;\n  }\n\n  IndexStorage::MemoryBlock head_block;\n  this->get_head_block(head_block);\n  const BlockLocation *bl =\n      reinterpret_cast<const BlockLocation *>(head_block.data());\n  if (ailego_unlikely(bl == nullptr)) {\n    LOG_ERROR(\"Failed to get block loc\");\n    return IndexError_ReadData;\n  }\n  BlockLocation block = *bl;\n\n  if (!this->is_valid_block(block)) {\n    int ret = this->alloc_block(block, &block);\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n    ret = this->update_head_block(block);\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n  }\n\n  int ret = this->add_to_block(block, key, vec, size);\n  if (ret == IndexError_IndexFull) {\n    ret = this->alloc_block(block, &block);\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n    ret = this->update_head_block(block);\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n    ret = this->add_to_block(block, key, vec, size);\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n  }\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  (*stats_.mutable_added_count())++;\n  stats_.set_revision_id(meta_.revision_id + 1);\n\n  return 0;\n}\n\nint FlatStreamerEntity::search(const void *query, const IndexFilter &filter,\n                               uint32_t *scan_count, IndexDocumentHeap *heap,\n                               IndexContext::Stats *context_stats) const {\n  IndexStorage::MemoryBlock head_block;\n  this->get_head_block(head_block);\n  const BlockLocation *bl =\n      reinterpret_cast<const BlockLocation *>(head_block.data());\n  if (ailego_unlikely(bl == nullptr)) {\n    LOG_ERROR(\"Failed to get block loc\");\n    return IndexError_ReadData;\n  }\n\n  BlockLocation block = *bl;\n\n  while (this->is_valid_block(block)) {\n    IndexStorage::MemoryBlock block_header_block;\n    this->get_block_header(block, block_header_block);\n    const BlockHeader *hd =\n        reinterpret_cast<const BlockHeader *>(block_header_block.data());\n    if (ailego_unlikely(hd == nullptr)) {\n      LOG_ERROR(\"Failed to get block header\");\n      return IndexError_ReadData;\n    }\n\n    if (hd->vector_count > 0) {\n      *scan_count += hd->vector_count;\n      IndexStorage::MemoryBlock deletion_map_block;\n      this->get_block_deletion_map(block, deletion_map_block);\n      const DeletionMap *deletion_map =\n          reinterpret_cast<const DeletionMap *>(deletion_map_block.data());\n      if (filter.is_valid() || deletion_map->is_dirty()) {\n        this->search_block(query, block, hd, 1.0, filter, deletion_map, heap,\n                           context_stats);\n      } else {\n        *(context_stats->mutable_dist_calced_count()) += hd->vector_count;\n        this->search_block(query, block, hd, 1.0, heap);\n      }\n    }\n    block = hd->next;\n  }\n  return 0;\n}\n\n//! Search in a block\nvoid FlatStreamerEntity::search_block(const void *query,\n                                      const BlockLocation &bl,\n                                      const BlockHeader *hd, float norm_val,\n                                      IndexDocumentHeap *heap) const {\n  std::vector<float> distances(block_vector_count());\n  IndexStorage::MemoryBlock vecs_block;\n  this->get_block_vectors(bl, vecs_block);\n  const char *vecs = reinterpret_cast<const char *>(vecs_block.data());\n  IndexStorage::MemoryBlock keys_block;\n  this->get_block_keys(bl, keys_block);\n  const uint64_t *keys = reinterpret_cast<const uint64_t *>(keys_block.data());\n  row_major_distance(query, vecs, hd->vector_count, distances.data());\n  for (size_t k = 0; k < hd->vector_count; ++k) {\n    if (keys[k] != kInvalidKey) {\n      heap->emplace(keys[k], distances[k] * norm_val);\n    }\n  }\n}\n\n//! Search in a block with filter\nvoid FlatStreamerEntity::search_block(\n    const void *query, const BlockLocation &bl, const BlockHeader *hd,\n    float norm_val, const IndexFilter &filter, const DeletionMap *deletion_map,\n    IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const {\n  std::vector<float> distances(block_vector_count());\n\n  IndexStorage::MemoryBlock vecs_block;\n  this->get_block_vectors(bl, vecs_block);\n  const char *vecs = reinterpret_cast<const char *>(vecs_block.data());\n  IndexStorage::MemoryBlock keys_block;\n  this->get_block_keys(bl, keys_block);\n  const uint64_t *keys = reinterpret_cast<const uint64_t *>(keys_block.data());\n\n  DeletionMap keeps;\n  for (size_t k = 0; k < hd->vector_count; ++k) {\n    const bool condition1 = !deletion_map->test(k);\n    const bool condition2 = filter.is_valid() ? !filter(keys[k]) : true;\n    const bool condition3 = keys[k] != kInvalidKey;\n    if (condition1 && condition2 && condition3) {\n      keeps.set(k);\n    }\n  }\n  if (!keeps.is_dirty()) {\n    (*context_stats->mutable_filtered_count()) += hd->vector_count;\n    return;\n  }\n  for (size_t k = 0; k < hd->vector_count; ++k) {\n    if (keeps.test(k)) {\n      auto cur_vec = vecs + index_meta_.element_size() * k;\n      row_major_distance(query, cur_vec, 1, distances.data() + k);\n      ++(*context_stats->mutable_dist_calced_count());\n    }\n  }\n  for (size_t k = 0; k < hd->vector_count; ++k) {\n    if (keeps.test(k)) {\n      heap->emplace(keys[k], distances[k] * norm_val);\n    } else {\n      ++(*context_stats->mutable_filtered_count());\n    }\n  }\n}\n\nint FlatStreamerEntity::search_bf(const void *query, const IndexFilter &filter,\n                                  IndexDocumentHeap *heap,\n                                  IndexContext::Stats *context_stats) const {\n  uint32_t scan_count;\n  return this->search(query, filter, &scan_count, heap, context_stats);\n}\n\nFlatStreamerEntity::Pointer FlatStreamerEntity::clone(void) const {\n  std::vector<IndexStorage::Segment::Pointer> segments;\n  segments.reserve(segments_.size());\n  for (size_t i = 0; i < segments_.size(); ++i) {\n    segments.emplace_back(segments_[i]->clone());\n    if (!segments[i]) {\n      LOG_ERROR(\"Failed to clone segment, index=%zu\", i);\n      return nullptr;\n    }\n  }\n  auto entity = new (std::nothrow) FlatStreamerEntity(stats_);\n  if (!entity) {\n    LOG_ERROR(\"Failed to New FlatStreamerEntity object\");\n    return nullptr;\n  }\n  entity->index_meta_ = this->index_meta_;\n  entity->storage_ = this->storage_;\n  // entity->reformer_ = this->reformer_;\n  entity->segments_ = segments;\n  entity->meta_ = this->meta_;\n  entity->key_info_map_lock_ = this->key_info_map_lock_;\n  entity->key_info_map_ = this->key_info_map_;\n  entity->id_key_vector_ = this->id_key_vector_;\n  entity->withid_key_info_map_ = this->withid_key_info_map_;\n  entity->withid_key_map_ = this->withid_key_map_;\n  entity->filter_same_key_ = this->filter_same_key_;\n  entity->vec_unit_size_ = this->vec_unit_size_;\n  entity->vec_cols_ = this->vec_cols_;\n  return FlatStreamerEntity::Pointer(entity);\n}\n\nconst void *FlatStreamerEntity::get_vector_by_key(uint64_t key) const {\n  VectorLocation loc{};\n  key_info_map_lock_->lock_shared();\n  if (use_key_info_map_) {\n    auto iterator = key_info_map_.find(key);\n    if (iterator == key_info_map_.end()) {\n      key_info_map_lock_->unlock_shared();\n      return nullptr;\n    }\n    loc = iterator->second;\n  } else {\n    if (key < withid_key_info_map_.size()) {\n      loc = withid_key_info_map_[key];\n    } else {\n      key_info_map_lock_->unlock_shared();\n      return nullptr;\n    }\n  }\n  key_info_map_lock_->unlock_shared();\n\n  auto segment = this->get_segment(loc.segment_id);\n  const void *data = nullptr;\n  if (segment->read(loc.offset, &data, index_meta_.element_size()) !=\n      index_meta_.element_size()) {\n    LOG_ERROR(\"Failed to read segment, size=%u\", index_meta_.element_size());\n    return nullptr;\n  }\n  return data;\n}\n\nint FlatStreamerEntity::get_vector_by_key(\n    const uint64_t key, IndexStorage::MemoryBlock &block) const {\n  VectorLocation loc{};\n  key_info_map_lock_->lock_shared();\n  if (use_key_info_map_) {\n    auto iterator = key_info_map_.find(key);\n    if (iterator == key_info_map_.end()) {\n      key_info_map_lock_->unlock_shared();\n      return -1;\n    }\n    loc = iterator->second;\n  } else {\n    if (key < withid_key_info_map_.size()) {\n      loc = withid_key_info_map_[key];\n    } else {\n      key_info_map_lock_->unlock_shared();\n      return -1;\n    }\n  }\n  key_info_map_lock_->unlock_shared();\n\n  auto segment = this->get_segment(loc.segment_id);\n  if (segment->read(loc.offset, block, index_meta_.element_size()) !=\n      index_meta_.element_size()) {\n    LOG_ERROR(\"Failed to read segment, size=%u\", index_meta_.element_size());\n    return -1;\n  }\n  return 0;\n}\n\nIndexProvider::Iterator::Pointer FlatStreamerEntity::creater_iterator(\n    void) const {\n  auto entity = this->clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone entity\");\n    return nullptr;\n  }\n\n  return Iterator::Pointer(new (std::nothrow)\n                               FlatStreamerEntity::Iterator(std::move(entity)));\n}\n\nvoid FlatStreamerEntity::Iterator::read_next_block(void) {\n  auto block_size = entity_->linear_block_size();\n  while (segment_id_ < entity_->segments_.size()) {\n    auto &segment = entity_->segments_[segment_id_];\n    size_t off = block_index_ * block_size;\n    if (off + block_size > segment->data_size()) {\n      ++segment_id_;\n      block_index_ = 0;\n      continue;\n    }\n    if (segment->read(off, block_, block_size) != block_size) {\n      LOG_ERROR(\"Failed to read block, off=%zu\", off);\n      break;\n    }\n    data_ = block_.data();\n    auto hd = reinterpret_cast<const BlockHeader *>(\n        static_cast<const char *>(data_) + block_size - sizeof(BlockHeader));\n    if (hd->vector_count == 0) {\n      ++block_index_;\n      continue;\n    }\n\n    block_vector_count_ = hd->vector_count;\n    block_vector_index_ = 0;\n    size_t elemsize = entity_->index_meta_.element_size();\n    keys_ = reinterpret_cast<const uint64_t *>(\n        reinterpret_cast<const char *>(data_) +\n        elemsize * entity_->block_vector_count());\n    return;\n  }\n\n  is_valid_ = false;\n}\n\nint FlatStreamerEntity::init_storage(IndexStorage::Pointer storage) {\n  // Init Linear Meta Segment\n  meta_.create_time = ailego::Realtime::Seconds();\n  stats_.set_create_time(meta_.create_time);\n  meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(meta_.update_time);\n  meta_.segment_count = 0;\n  meta_.revision_id = 0;\n\n  std::string str;\n  index_meta_.serialize(&str);\n  const size_t page = ailego::MemoryHelper::PageSize();\n\n  meta_.header.header_size = sizeof(LinearIndexHeader) + str.size();\n  meta_.header.total_vector_count = 0;\n  meta_.header.linear_body_size = 0;\n  meta_.header.block_count = 0;\n  meta_.header.index_meta_size = str.size();\n  meta_.header.linear_list_count = 1;\n\n  AdjustSegmentSize(&meta_);\n\n  LOG_DEBUG(\n      \"Create Streamer Index, VecSize=%u, BlockSize=%u SegmentSize=%u \"\n      \"LinearListCount=%u\",\n      index_meta_.element_size(), meta_.header.block_size, meta_.segment_size,\n      meta_.header.linear_list_count);\n\n  size_t size = ailego_align(sizeof(meta_) + str.size(), page);\n  int ret = storage->append(FLAT_LINEAR_META_SEG_ID, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to append segment %s\", FLAT_LINEAR_META_SEG_ID.c_str());\n    return ret;\n  }\n  auto segment = storage->get(FLAT_LINEAR_META_SEG_ID);\n  if (ailego_unlikely(!segment)) {\n    LOG_ERROR(\"Failed to get segment %s\", FLAT_LINEAR_META_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {\n    LOG_ERROR(\"Failed to write segment data\");\n    return IndexError_WriteData;\n  }\n  if (segment->write(sizeof(meta_), str.data(), str.size()) != str.size()) {\n    LOG_ERROR(\"Failed to write segment data, size=%zu\", str.size());\n    return IndexError_WriteData;\n  }\n\n  ret = storage->append(\"IndexMeta\", str.size());\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to append segment IndexMeta, code: %d\", ret);\n    return ret;\n  }\n  auto index_meta_segment = storage->get(\"IndexMeta\");\n  if (index_meta_segment->write(0, str.data(), str.size()) != str.size()) {\n    LOG_ERROR(\"Failed to write segment data, size=%zu\", str.size());\n    return IndexError_WriteData;\n  }\n  *stats_.mutable_index_size() += size;\n\n  // Init Linear List Head Segment\n  size = ailego_align(sizeof(BlockLocation) * linear_list_count(), page);\n  ret = storage->append(FLAT_LINEAR_LIST_HEAD_SEG_ID, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to append segment %s for %s, size=%zu\",\n              FLAT_LINEAR_LIST_HEAD_SEG_ID.c_str(), IndexError::What(ret),\n              size);\n    return ret;\n  }\n  segment = storage->get(FLAT_LINEAR_LIST_HEAD_SEG_ID);\n  if (ailego_unlikely(!segment)) {\n    LOG_ERROR(\"Failed to get segment %s\", FLAT_LINEAR_LIST_HEAD_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (segment->resize(size) != size) {\n    LOG_ERROR(\"Failed to resize segment, size=%zu\", size);\n    return IndexError_WriteData;\n  }\n  segments_.emplace_back(std::move(segment));\n\n  *stats_.mutable_index_size() += size;\n\n  return 0;\n}\n\nint FlatStreamerEntity::load_linear_meta(IndexStorage::Pointer storage) {\n  AdjustSegmentSize(&meta_);\n\n  // Load Meta Segment\n  auto segment = storage->get(FLAT_LINEAR_META_SEG_ID);\n  if (!segment || segment->data_size() < sizeof(meta_)) {\n    LOG_ERROR(\"Missing segment %s, or invalid segment size\",\n              FLAT_LINEAR_META_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  IndexStorage::MemoryBlock data_block;\n  if (segment->read(0, data_block, segment->data_size()) !=\n      segment->data_size()) {\n    LOG_ERROR(\"Failed to read storage, size=%zu\", segment->data_size());\n    return IndexError_InvalidFormat;\n  }\n  auto *mt = reinterpret_cast<const decltype(meta_) *>(data_block.data());\n  if (mt->header.block_vector_count != meta_.header.block_vector_count) {\n    LOG_ERROR(\"Unmatched BlockVecCount Setting, Index %u vs Setting %u\",\n              mt->header.block_vector_count, meta_.header.block_vector_count);\n    return IndexError_Mismatch;\n  }\n  if (mt->header.block_size != meta_.header.block_size) {\n    LOG_ERROR(\"Unmatched BlockSize Setting, Index %u vs Setting %u\",\n              mt->header.block_size, meta_.header.block_size);\n    return IndexError_Mismatch;\n  }\n  if (mt->header.index_meta_size + sizeof(meta_) > segment->data_size()) {\n    LOG_ERROR(\"Invalid format, IndexMetaSize %u, SegmentSize %zu\",\n              mt->header.index_meta_size, segment->data_size());\n    return IndexError_InvalidFormat;\n  }\n  if (mt->header.linear_list_count != meta_.header.linear_list_count) {\n    LOG_ERROR(\"Unmatch LinearListCount, Index size %u vs Setting %u\",\n              mt->header.linear_list_count, meta_.header.linear_list_count);\n    return IndexError_InvalidFormat;\n  }\n  IndexMeta index_meta;\n  if (!index_meta.deserialize(mt->header.index_meta,\n                              mt->header.index_meta_size)) {\n    LOG_ERROR(\"Failed to deserialize IndexMeta, size=%u\",\n              mt->header.index_meta_size);\n    return IndexError_InvalidFormat;\n  }\n  if (index_meta.data_type() != index_meta_.data_type() ||\n      index_meta.dimension() != index_meta_.dimension() ||\n      index_meta.element_size() != index_meta_.element_size() ||\n      index_meta.metric_name() != index_meta_.metric_name()) {\n    LOG_ERROR(\n        \"Unmatch IndexMeta, Index(type=%u dim=%u elemsize=%u \"\n        \"metric=%s) Setting(type=%u dim=%u elemsize=%u metric=%s)\",\n        index_meta.data_type(), index_meta.dimension(),\n        index_meta.element_size(), index_meta.metric_name().c_str(),\n        index_meta_.data_type(), index_meta_.dimension(),\n        index_meta_.element_size(), index_meta_.metric_name().c_str());\n    return IndexError_Mismatch;\n  }\n  // Segment Size can be reconfigurable\n  auto segment_size = meta_.segment_size;\n  std::memcpy(&meta_, mt, sizeof(meta_));\n  meta_.segment_size = segment_size;\n  return 0;\n}\n\nint FlatStreamerEntity::load_segment_keys_to_map(BlockLocation block) {\n  while (this->is_valid_block(block)) {\n    auto segment = this->get_segment(block.segment_id);\n\n    IndexStorage::MemoryBlock block_header_block;\n    this->get_block_header(block, block_header_block);\n    const BlockHeader *hd =\n        reinterpret_cast<const BlockHeader *>(block_header_block.data());\n    if (ailego_unlikely(hd == nullptr)) {\n      LOG_ERROR(\"Failed to get block header\");\n      return IndexError_ReadData;\n    }\n    IndexStorage::MemoryBlock keys_block;\n    this->get_block_keys(block, keys_block);\n    const uint64_t *keys =\n        reinterpret_cast<const uint64_t *>(keys_block.data());\n    IndexStorage::MemoryBlock deletion_map_block;\n    this->get_block_deletion_map(block, deletion_map_block);\n    const DeletionMap *deletion_map =\n        reinterpret_cast<const DeletionMap *>(deletion_map_block.data());\n\n    for (uint32_t vector_index = 0; vector_index < hd->vector_count;\n         ++vector_index) {\n      if (deletion_map->test(vector_index)) {\n        continue;\n      }\n      size_t vector_off =\n          this->get_block_vector_offset(block.block_index, vector_index);\n      key_info_map_[keys[vector_index]] =\n          VectorLocation(block.segment_id, false, vector_off);\n      id_key_vector_.push_back(keys[vector_index]);\n    }\n    block = hd->next;\n  }\n  return 0;\n}\n\nint FlatStreamerEntity::load_segment_keys_to_vector() {\n  for (uint32_t i = 0; i < meta_.header.total_vector_count; i++) {\n    size_t block_id = i / block_vector_count();\n    uint32_t vector_index = i % block_vector_count();\n\n    ailego_assert(segments_.size() > 1);\n    size_t segment_block_count =\n        segments_[1]->data_size() / linear_block_size();\n    size_t segment_id = block_id / segment_block_count + 1;\n    size_t real_block_id = block_id % segment_block_count;\n    size_t vector_off =\n        this->get_block_vector_offset(real_block_id, vector_index);\n\n    withid_key_info_map_.push_back(\n        VectorLocation(segment_id, false, vector_off));\n    size_t key_off = get_block_key_offset(real_block_id, vector_index);\n    withid_key_map_.push_back(key_off);\n  }\n  return 0;\n}\n\nint FlatStreamerEntity::load_storage(IndexStorage::Pointer storage) {\n  int ret = this->load_linear_meta(storage);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  // Load Linear List\n  auto hd_segment = storage->get(FLAT_LINEAR_LIST_HEAD_SEG_ID);\n  if (ailego_unlikely(!hd_segment)) {\n    LOG_ERROR(\"Failed to get segment %s\", FLAT_LINEAR_LIST_HEAD_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (hd_segment->data_size() < linear_list_count() * sizeof(BlockLocation)) {\n    LOG_ERROR(\"Invalid segment size, LinearListCount=%zu, size=%zu\",\n              linear_list_count(), hd_segment->data_size());\n    return IndexError_InvalidFormat;\n  }\n  segments_.emplace_back(hd_segment);\n\n  size_t index_size = hd_segment->capacity();\n  for (size_t i = 1; i <= meta_.segment_count; ++i) {\n    std::string segment_id =\n        ailego::StringHelper::Concat(FLAT_SEGMENT_FEATURES_SEG_ID, i);\n    auto seg = storage->get(segment_id);\n    if (!seg || seg->data_size() < meta_.header.block_size) {\n      LOG_ERROR(\"Failed to get segment %s, or invalid segment size\",\n                segment_id.c_str());\n      return IndexError_InvalidFormat;\n    }\n    index_size += seg->capacity();\n    segments_.emplace_back(std::move(seg));\n  }\n\n  for (size_t i = 0; i < linear_list_count(); i++) {\n    IndexStorage::MemoryBlock head_block;\n    this->get_head_block(head_block);\n    const BlockLocation *bl =\n        reinterpret_cast<const BlockLocation *>(head_block.data());\n    if (ailego_unlikely(bl == nullptr)) {\n      LOG_ERROR(\"Failed to get block loc\");\n      return IndexError_ReadData;\n    }\n    BlockLocation block = *bl;\n    if (use_key_info_map_) {\n      ret = this->load_segment_keys_to_map(block);\n    } else {\n      ret = this->load_segment_keys_to_vector();\n    }\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n  }\n\n  char create_time[32];\n  char update_time[32];\n  ailego::Realtime::Gmtime(meta_.create_time, \"%Y-%m-%d %H:%M:%S\", create_time,\n                           sizeof(create_time));\n  ailego::Realtime::Gmtime(meta_.update_time, \"%Y-%m-%d %H:%M:%S\", update_time,\n                           sizeof(update_time));\n  LOG_DEBUG(\n      \"Load Index, IndexSize=%zu SegmentCount=%u SegmentSize=%u \"\n      \"RevisionId=%zu BlockCount=%u BlockSize=%u \"\n      \"BlockVectorCount=%u LinearListCount=%u TotalVecCount=%zu \"\n      \"CreateTime=%s UpdateTime=%s\",\n      index_size, meta_.segment_count, meta_.segment_size,\n      static_cast<size_t>(meta_.revision_id), meta_.header.block_count,\n      meta_.header.block_size, meta_.header.block_vector_count,\n      meta_.header.linear_list_count,\n      static_cast<size_t>(meta_.header.total_vector_count), create_time,\n      update_time);\n\n  stats_.set_index_size(index_size);\n  stats_.set_check_point(storage->check_point());\n  stats_.set_create_time(meta_.create_time);\n  stats_.set_revision_id(meta_.revision_id);\n  stats_.set_update_time(meta_.update_time);\n  stats_.set_loaded_count(meta_.header.total_vector_count);\n\n  return 0;\n}\n\nint FlatStreamerEntity::alloc_segment(void) {\n  size_t index = segments_.size();\n  if (index == kMaxSegmentId) {\n    LOG_ERROR(\"Failed to alloc new segment, exceed max count %zu\",\n              kMaxSegmentId);\n    return IndexError_IndexFull;\n  }\n\n  std::string segment_id =\n      ailego::StringHelper::Concat(FLAT_SEGMENT_FEATURES_SEG_ID, index);\n  size_t size =\n      ailego_align(meta_.segment_size, ailego::MemoryHelper::PageSize());\n  auto segment = storage_->get(segment_id);\n  if (segment) {\n    if (segment->padding_size() < linear_block_size()) {\n      LOG_ERROR(\n          \"Unexpect segment, index=%zu, data_size=%zu \"\n          \"padding_size=%zu block_size=%zu\",\n          index, segment->data_size(), segment->padding_size(),\n          linear_block_size());\n      return IndexError_Runtime;\n    }\n    LOG_WARN(\"Alloc an existing segment=%s capacity=%zu\", segment_id.c_str(),\n             segment->capacity());\n  } else {\n    int ret = storage_->append(segment_id, size);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Failed to alloc segment from storage\");\n      return ret;\n    }\n    segment = storage_->get(segment_id);\n    if (ailego_unlikely(!segment)) {\n      LOG_ERROR(\"Failed to get segment %s\", segment_id.c_str());\n      return IndexError_Runtime;\n    }\n  }\n  meta_.segment_count += 1;\n  meta_.header.linear_body_size += size;\n  segments_.emplace_back(std::move(segment));\n  *stats_.mutable_index_size() += size;\n\n  // Update meta information\n  auto meta_segment = storage_->get(FLAT_LINEAR_META_SEG_ID);\n  if (ailego_unlikely(!meta_segment)) {\n    LOG_ERROR(\"Failed to get segment %s\", FLAT_LINEAR_META_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (meta_segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {\n    LOG_ERROR(\"Failed to write meta segment\");\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint FlatStreamerEntity::alloc_block(const BlockLocation &next,\n                                    BlockLocation *block) {\n  if (segments_.size() <= 1 ||\n      segments_.back()->padding_size() < linear_block_size()) {\n    int ret = this->alloc_segment();\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n  }\n\n  auto &segment = segments_.back();\n  size_t block_index = segment->data_size() / linear_block_size();\n  if (block_index == kMaxBlockId) {\n    LOG_ERROR(\"Failed to alloc block, exceed max count %zu per segment\",\n              kMaxBlockId);\n    return IndexError_IndexFull;\n  }\n\n  BlockHeader header;\n  header.next = next;\n  header.vector_count = 0;\n  header.column_major = false;\n\n  size_t hd_off = segment->data_size() + linear_block_size() - sizeof(header);\n  if (segment->write(hd_off, &header, sizeof(header)) != sizeof(header)) {\n    LOG_ERROR(\"Failed to write block header\");\n    return IndexError_WriteData;\n  }\n\n  size_t del_off = hd_off - sizeof(DeletionMap);\n  DeletionMap reset_del_map{};\n  if (segment->write(del_off, &reset_del_map, sizeof(reset_del_map)) !=\n      sizeof(reset_del_map)) {\n    LOG_ERROR(\"Failed to write block deletion map\");\n    return IndexError_WriteData;\n  }\n\n  ++meta_.header.block_count;\n  block->segment_id = segments_.size() - 1;\n  block->block_index = (segment->data_size() / linear_block_size()) - 1;\n\n  return 0;\n}\n\nint FlatStreamerEntity::add_to_block(const BlockLocation &block, uint64_t key,\n                                     const void *data, size_t size) {\n  IndexStorage::MemoryBlock block_header_block;\n  this->get_block_header(block, block_header_block);\n  const BlockHeader *header =\n      reinterpret_cast<const BlockHeader *>(block_header_block.data());\n  if (ailego_unlikely(header == nullptr)) {\n    LOG_ERROR(\"Failed to get header\");\n    return IndexError_ReadData;\n  }\n\n  if (header->vector_count == block_vector_count()) {\n    return IndexError_IndexFull;\n  }\n\n  auto &segment = segments_[block.segment_id];\n\n  size_t vector_off =\n      get_block_vector_offset(block.block_index, header->vector_count);\n  if (segment->write(vector_off, data, size) != size) {\n    LOG_ERROR(\"Failed to write vector, off=%zu size=%zu\", vector_off, size);\n    return IndexError_WriteData;\n  }\n\n  size_t key_off =\n      get_block_key_offset(block.block_index, header->vector_count);\n  if (segment->write(key_off, &key, sizeof(key)) != sizeof(key)) {\n    LOG_ERROR(\"Failed to write key, off=%zu\", key_off);\n    return IndexError_WriteData;\n  }\n\n  BlockHeader hd = *header;\n  hd.vector_count += 1;\n  size_t hd_off = get_block_header_offset(block.block_index);\n  if (segment->write(hd_off, &hd, sizeof(hd)) != sizeof(hd)) {\n    LOG_ERROR(\"Failed to write block header, off=%zu\", hd_off);\n    return IndexError_WriteData;\n  }\n\n  VectorLocation loc(block.segment_id, false, vector_off);\n  key_info_map_lock_->lock();\n  key_info_map_[key] = loc;\n  id_key_vector_.push_back(key);\n  withid_key_info_map_.push_back(loc);\n  withid_key_map_.push_back(key_off);\n  key_info_map_lock_->unlock();\n\n  ++meta_.header.total_vector_count;\n  return 0;\n}\n\nint FlatStreamerEntity::add_vector_with_id(const uint32_t id, const void *query,\n                                           const uint32_t size) {\n  std::lock_guard<std::mutex> lock(mutex_);\n  // if (filter_same_key_) {\n  //   key_info_map_lock_->lock_shared();\n  //   if (key_info_map_.find(id) != key_info_map_.end()) {\n  //     key_info_map_lock_->unlock_shared();\n  //     LOG_WARN(\"Try to add duplicate key, drop it\");\n  //     return IndexError_Duplicate;\n  //   }\n  //   key_info_map_lock_->unlock_shared();\n  // }\n\n  if (size != static_cast<size_t>(index_meta_.element_size())) {\n    LOG_ERROR(\"Failed to add, mismatch size %u vs elemsize %u\", size,\n              index_meta_.element_size());\n    return IndexError_Mismatch;\n  }\n\n\n  if (id >= vector_count()) {\n    IndexStorage::MemoryBlock head_block;\n    this->get_head_block(head_block);\n    BlockLocation block =\n        *reinterpret_cast<const BlockLocation *>(head_block.data());\n    if (!this->is_valid_block(block)) {\n      int ret = this->alloc_block(block, &block);\n      if (ailego_unlikely(ret != 0)) {\n        return ret;\n      }\n      ret = this->update_head_block(block);\n      if (ailego_unlikely(ret != 0)) {\n        return ret;\n      }\n    }\n    for (size_t start_id = vector_count(); start_id < id; ++start_id) {\n      std::vector<char> vec(size);\n      int ret = this->add_to_block(block, kInvalidKey, vec.data(), size);\n      if (ret == IndexError_IndexFull) {\n        ret = this->alloc_block(block, &block);\n        if (ailego_unlikely(ret != 0)) {\n          return ret;\n        }\n        ret = this->update_head_block(block);\n        if (ailego_unlikely(ret != 0)) {\n          return ret;\n        }\n        ret = this->add_to_block(block, kInvalidKey, vec.data(), size);\n        if (ailego_unlikely(ret != 0)) {\n          return ret;\n        }\n      }\n    }\n\n    int ret = this->add_to_block(block, id, query, size);\n    if (ret == IndexError_IndexFull) {\n      ret = this->alloc_block(block, &block);\n      if (ailego_unlikely(ret != 0)) {\n        return ret;\n      }\n      ret = this->update_head_block(block);\n      if (ailego_unlikely(ret != 0)) {\n        return ret;\n      }\n      ret = this->add_to_block(block, id, query, size);\n      if (ailego_unlikely(ret != 0)) {\n        return ret;\n      }\n    }\n  } else {\n    VectorLocation vector_loc = withid_key_info_map_[id];\n    auto segment = this->get_segment(vector_loc.segment_id);\n    size_t vector_off = vector_loc.offset;\n    if (segment->write(vector_off, query, size) != size) {\n      LOG_ERROR(\"Failed to write vector, off=%zu size=%u\", vector_off, size);\n      return IndexError_WriteData;\n    }\n    size_t key_off = withid_key_map_[id];\n    uint64_t key = id;\n    if (segment->write(key_off, &key, sizeof(key)) != sizeof(key)) {\n      LOG_ERROR(\"Failed to write key, off=%zu\", key_off);\n      return IndexError_WriteData;\n    }\n    key_info_map_lock_->lock();\n    key_info_map_[key] = vector_loc;\n    key_info_map_lock_->unlock();\n  }\n  (*stats_.mutable_added_count())++;\n  stats_.set_revision_id(meta_.revision_id + 1);\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_streamer_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <unordered_map>\n#include <ailego/parallel/lock.h>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_streamer.h>\n#include \"flat_index_format.h\"\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Flat Streamer Entity\n */\nclass FlatStreamerEntity {\n public:\n  typedef std::shared_ptr<FlatStreamerEntity> Pointer;\n\n  //! Constructor\n  explicit FlatStreamerEntity(IndexStreamer::Stats &stats);\n\n  //! Destructor\n  virtual ~FlatStreamerEntity(void) = default;\n\n  //! Open the entity with storage\n  int open(IndexStorage::Pointer storage, const IndexMeta &mt);\n\n  //! Close the entity\n  int close(void);\n\n  //! Flush Linear Meta information to storage\n  int flush_linear_meta(void);\n\n  //! Flush linear index to storage\n  int flush(uint64_t checkpoint);\n\n  //! Add vector to linear index\n  int add(uint64_t key, const void *vec, size_t size);\n\n  //! Search in linear list with filter\n  int search(const void *query, const IndexFilter &filter, uint32_t *scan_count,\n             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;\n\n  //! Search in a block\n  void search_block(const void *query, const BlockLocation &bl,\n                    const BlockHeader *hd, float norm_val,\n                    IndexDocumentHeap *heap) const;\n\n  //! Search in a block with filter\n  void search_block(const void *query, const BlockLocation &bl,\n                    const BlockHeader *hd, float norm_val,\n                    const IndexFilter &filter, const DeletionMap *deletion_map,\n                    IndexDocumentHeap *heap,\n                    IndexContext::Stats *context_stats) const;\n\n  //! Flat Search with filter\n  int search_bf(const void *query, const IndexFilter &filter,\n                IndexDocumentHeap *heap,\n                IndexContext::Stats *context_stats) const;\n\n  //! Clone the entity\n  virtual FlatStreamerEntity::Pointer clone(void) const;\n\n  //! Retrieve the total vectors in the index\n  size_t vector_count(void) const {\n    return meta_.header.total_vector_count;\n  }\n\n  //! Retrieve the linear list count\n  size_t linear_list_count(void) const {\n    return meta_.header.linear_list_count;\n  }\n\n  //! Retrieve block size of the linear vector\n  size_t linear_block_size(void) const {\n    return meta_.header.block_size;\n  }\n\n  //! Retrieve the vectors count in one block\n  size_t block_vector_count(void) const {\n    // assert(meta_.header.block_vector_count == 32);\n    return meta_.header.block_vector_count;\n  }\n\n  //! Retrieve IndexMeta of the linear index\n  const IndexMeta &meta(void) const {\n    return index_meta_;\n  }\n\n  //! Retrieve mutable IndexMeta of the linear index\n  IndexMeta *mutable_meta(void) {\n    return &index_meta_;\n  }\n\n  //! Retrieve vector by local id\n  const void *get_vector_by_key(uint64_t key) const;\n\n  int get_vector_by_key(const uint64_t key,\n                        IndexStorage::MemoryBlock &block) const;\n\n  //! Create a new iterator\n  IndexProvider::Iterator::Pointer creater_iterator(void) const;\n\n\n  //! Set params\n  void set_block_vector_count(uint32_t count) {\n    meta_.header.block_vector_count = count;\n  }\n\n  void set_use_key_info_map(bool use_id_map) {\n    use_key_info_map_ = use_id_map;\n    LOG_DEBUG(\"use_key_info_map_: %d\", (int)use_key_info_map_);\n  }\n\n  //! Set params\n  void set_segment_size(uint32_t size) {\n    meta_.segment_size = size;\n  }\n\n  //! Set params\n  void set_linear_list_count(uint32_t count) {\n    meta_.header.linear_list_count = count;\n  }\n\n  //! Set params\n  void enable_filter_same_key(bool enabled) {\n    filter_same_key_ = enabled;\n  }\n\n  inline uint64_t key(uint32_t id) const {\n    if (id < id_key_vector_.size()) {\n      return id_key_vector_[id];\n    } else {\n      return kInvalidKey;\n    }\n  }\n\n  inline void row_major_distance(const void *query, const void *feature,\n                                 size_t fnum, float *out) const {\n    const uint8_t *cur_feature = reinterpret_cast<const uint8_t *>(feature);\n    for (size_t f = 0; f < fnum; ++f) {\n      row_distance_(query, cur_feature, index_meta_.dimension(), out + f);\n      cur_feature += index_meta_.element_size();\n    }\n  }\n\n  int add_vector_with_id(const uint32_t id, const void *query,\n                         const uint32_t element_size);\n\n private:\n  //! Disable them\n  FlatStreamerEntity(const FlatStreamerEntity &) = delete;\n  FlatStreamerEntity &operator=(const FlatStreamerEntity &) = delete;\n\n  /*! Iterator of all the linear list\n   */\n  class Iterator : public IndexProvider::Iterator {\n   public:\n    //! Constructor\n    Iterator(const FlatStreamerEntity::Pointer &entity) : entity_(entity) {\n      this->read_next_block();\n    }\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return reinterpret_cast<const char *>(data_) +\n             block_vector_index_ * entity_->index_meta_.element_size();\n    }\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return is_valid_;\n    }\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return keys_[block_vector_index_];\n    }\n    //! Next iterator\n    void next(void) override {\n      if (++block_vector_index_ == block_vector_count_) {\n        ++block_index_;\n        this->read_next_block();\n      }\n    }\n\n   private:\n    //! Read next non-empty block\n    void read_next_block(void);\n\n    //! Members\n    std::string buffer_{};\n    const FlatStreamerEntity::Pointer entity_;\n    IndexStorage::MemoryBlock block_;\n    const void *data_{nullptr};\n    const uint64_t *keys_{nullptr};\n    uint32_t segment_id_{1u};  // The first segment is header info\n    uint32_t block_index_{0u};\n    uint32_t block_vector_index_{0u};\n    uint32_t block_vector_count_{0u};\n    bool is_valid_{true};\n  };\n\n  //! Retrive storage segment by index\n  const IndexStorage::Segment::Pointer get_segment(size_t index) const {\n    for (size_t i = segments_.size(); i <= index; ++i) {\n      auto segment_id =\n          ailego::StringHelper::Concat(FLAT_SEGMENT_FEATURES_SEG_ID, i);\n      auto segment = storage_->get(segment_id);\n      if (!segment) {\n        LOG_ERROR(\"Failed to get segment %s\", segment_id.c_str());\n        return IndexStorage::Segment::Pointer();\n      }\n      segments_.emplace_back(std::move(segment));\n    }\n    return segments_[index];\n  }\n\n  //! Rejust the segment size as to aligned by page size\n  void AdjustSegmentSize(StreamerLinearMeta *mt) {\n    if (mt->segment_size < mt->header.block_size) {\n      mt->segment_size = mt->header.block_size;\n    }\n    mt->segment_size = ailego_align(\n        mt->segment_size / mt->header.block_size * mt->header.block_size,\n        ailego::MemoryHelper::PageSize());\n  }\n\n  //! Init with an empty storage\n  int init_storage(IndexStorage::Pointer storage);\n\n  //! Load linear meta information from storage\n  int load_linear_meta(IndexStorage::Pointer storage);\n\n  //! Load keys to keys map\n  int load_segment_keys_to_map(BlockLocation block);\n\n  //! Load keys to keys map\n  int load_segment_keys_to_vector(void);\n\n  //! Load index from storage\n  int load_storage(IndexStorage::Pointer storage);\n\n  //! Check whether the block is empty\n  bool is_valid_block(const BlockLocation &block) const {\n    return block.segment_id != 0;\n  }\n\n  //! Update header block of an linear list\n  int update_head_block(const BlockLocation &block) {\n    ailego_assert_with(segments_.size() != 0, \"Invalid Segments\");\n\n    auto &hd_segment = segments_[0];\n    if (hd_segment->write(0, &block, sizeof(block)) != sizeof(block)) {\n      LOG_ERROR(\"Failed to write head block location\");\n      return IndexError_WriteData;\n    }\n\n    return 0;\n  }\n\n  //! Alloc a new segment\n  int alloc_segment(void);\n\n  //! Alloc a new block\n  int alloc_block(const BlockLocation &next, BlockLocation *block);\n\n  //! Add a record to a block\n  int add_to_block(const BlockLocation &block, uint64_t key, const void *data,\n                   size_t size);\n\n private:\n  size_t get_block_offset(uint32_t block_index) const {\n    return block_index * linear_block_size();\n  }\n\n  size_t get_block_header_offset(uint32_t block_index) const {\n    return get_block_offset(block_index) + linear_block_size() -\n           sizeof(BlockHeader);\n  }\n\n  size_t get_block_deletion_map_offset(uint32_t block_index) const {\n    return get_block_header_offset(block_index) - sizeof(DeletionMap);\n  }\n\n  size_t get_block_key_offset(uint32_t block_index,\n                              uint32_t vector_index) const {\n    return get_block_offset(block_index) +\n           block_vector_count() * index_meta_.element_size() +\n           sizeof(uint64_t) * vector_index;\n  }\n\n  size_t get_block_vector_offset(uint32_t block_index,\n                                 uint32_t vector_index) const {\n    return this->get_block_offset(block_index) +\n           vector_index * index_meta_.element_size();\n  }\n\n  //! Get header block of an linear list\n  int get_head_block(IndexStorage::MemoryBlock &header_block) const {\n    ailego_assert_with(segments_.size() != 0, \"Invalid Segments\");\n    auto &hd_segment = segments_[0];\n    if (hd_segment->read(0, header_block, sizeof(BlockLocation)) !=\n        sizeof(BlockLocation)) {\n      LOG_ERROR(\"Failed to read head block location\");\n      return -1;\n    }\n    return 0;\n  }\n\n  //! Get BlockHeader of the block\n  int get_block_header(const BlockLocation &block,\n                       IndexStorage::MemoryBlock &header_block) const {\n    // The header is located in the end of a block to align features\n    auto &segment = this->get_segment(block.segment_id);\n    ailego_assert_with(segment != nullptr, \"Index Overflow\");\n    size_t off = this->get_block_header_offset(block.block_index);\n    if (segment->read(off, header_block, sizeof(BlockHeader)) !=\n        sizeof(BlockHeader)) {\n      LOG_ERROR(\"Failed to read block header, off=%zu\", off);\n      return -1;\n    }\n    return 0;\n  }\n  int get_block_deletion_map(\n      const BlockLocation &block,\n      IndexStorage::MemoryBlock &deletion_map_block) const {\n    auto &segment = this->get_segment(block.segment_id);\n    ailego_assert_with(segment != nullptr, \"Index Overflow\");\n    size_t off = this->get_block_deletion_map_offset(block.block_index);\n    if (segment->read(off, deletion_map_block, sizeof(DeletionMap)) !=\n        sizeof(DeletionMap)) {\n      LOG_ERROR(\"Failed to read deletion map, off=%zu\", off);\n      return -1;\n    }\n    return 0;\n  }\n\n  int get_block_keys(const BlockLocation &block,\n                     IndexStorage::MemoryBlock &keys_block) const {\n    auto &segment = this->get_segment(block.segment_id);\n    ailego_assert_with(segment != nullptr, \"Index Overflow\");\n    size_t off = this->get_block_key_offset(block.block_index, 0);\n    if (segment->read(off, keys_block,\n                      block_vector_count() * sizeof(uint64_t)) !=\n        block_vector_count() * sizeof(uint64_t)) {\n      LOG_ERROR(\"Failed to read block header, off=%zu\", off);\n      return -1;\n    }\n    return 0;\n  }\n\n  int get_block_vectors(const BlockLocation &block,\n                        IndexStorage::MemoryBlock &vector_block) const {\n    auto &segment = this->get_segment(block.segment_id);\n    ailego_assert_with(segment != nullptr, \"Index Overflow\");\n    size_t off = this->get_block_vector_offset(block.block_index, 0);\n    if (segment->read(off, vector_block,\n                      block_vector_count() * index_meta_.element_size()) !=\n        block_vector_count() * index_meta_.element_size()) {\n      LOG_ERROR(\"Failed to read block header, off=%zu\", off);\n      return -1;\n    }\n    return 0;\n  }\n\n private:\n  //! Constants\n  static constexpr size_t kMaxSegmentId = std::numeric_limits<uint32_t>::max();\n  static constexpr size_t kMaxBlockId = std::numeric_limits<uint32_t>::max();\n\n  //! Members\n  std::mutex mutex_{};\n  IndexMeta index_meta_{};\n  IndexStorage::Pointer storage_{};\n  IndexMetric::MatrixDistance row_distance_{}, column_distance_{};\n  mutable std::vector<IndexStorage::Segment::Pointer> segments_{};\n  StreamerLinearMeta meta_{};\n  IndexStreamer::Stats &stats_;\n  mutable std::shared_ptr<ailego::SharedMutex> key_info_map_lock_{};\n  std::unordered_map<uint64_t, VectorLocation> key_info_map_{};\n  std::vector<VectorLocation> withid_key_info_map_{};\n  std::vector<uint32_t> withid_key_map_{};\n  std::vector<uint64_t> id_key_vector_{};\n  bool filter_same_key_{false};\n  bool use_key_info_map_{true};\n  uint32_t vec_unit_size_{0};\n  uint32_t vec_cols_{0};\n  mutable std::string vec_buf_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_streamer_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"flat_distance_matrix.h\"\n#include \"flat_searcher.h\"\n#include \"flat_streamer.h\"\n#include \"flat_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Brute Force Streamer Provider\n */\n\ntemplate <size_t BATCH_SIZE>\nclass FlatStreamerProvider : public IndexProvider {\n public:\n  //! Constructor\n  FlatStreamerProvider(const FlatStreamer<BATCH_SIZE> *owner) {\n    feature_size_ = owner->meta().element_size();\n    total_vector_count_ = owner->entity().vector_count();\n    owner_ = owner;\n    block_buffer_.resize(BATCH_SIZE * feature_size_);\n  }\n\n  //! Create a new iterator\n  IndexProvider::Iterator::Pointer create_iterator(void) override {\n    return owner_->entity().creater_iterator();\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const override {\n    return total_vector_count_;\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const override {\n    return owner_->meta().dimension();\n  }\n\n  //! Retrieve type of vector\n  IndexMeta::DataType data_type(void) const override {\n    return owner_->meta().data_type();\n  }\n\n  //! Retrieve vector size in bytes\n  size_t element_size(void) const override {\n    return owner_->meta().element_size();\n  }\n\n  //! Retrieve a vector using a primary key\n  const void *get_vector(uint64_t key) const override {\n    return this->get_vector_by_key(key);\n  }\n\n  int get_vector(const uint64_t key,\n                 IndexStorage::MemoryBlock &block) const override {\n    return this->get_vector_by_key(key, block);\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_->name();\n  }\n\n protected:\n  //! Retrieve a vector via primary key\n  const void *get_vector_by_key(uint64_t key) const {\n    return owner_->get_vector_by_key(key);\n  }\n\n  int get_vector_by_key(const uint64_t key,\n                        IndexStorage::MemoryBlock &block) const {\n    return owner_->get_vector_by_key(key, block);\n  }\n\n private:\n  //! Members\n  const FlatStreamer<BATCH_SIZE> *owner_{nullptr};\n  IndexStorage::Segment::Pointer features_segment_{};\n  uint32_t feature_size_{0};\n  uint32_t total_vector_count_{0};\n  mutable std::vector<uint8_t> block_buffer_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat/flat_utility.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <mutex>\n#include <ailego/utility/matrix_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_metric.h>\n\nnamespace zvec {\nnamespace core {\n\n//! The default size of reading a block\nstatic constexpr uint32_t FLAT_DEFAULT_READ_BLOCK_SIZE = 4 * 1024 * 1024;\nstatic const std::string FLAT_LINEAR_META_SEG_ID = \"flat.linear_meta\";\nstatic const std::string FLAT_LINEAR_LIST_HEAD_SEG_ID = \"flat.linear_list_head\";\n\nstatic const std::string FLAT_SEGMENT_KEYS_SEG_ID(\"flat.keys\");\nstatic const std::string FLAT_SEGMENT_FEATURES_SEG_ID(\"flat.features\");\nstatic const std::string FLAT_SEGMENT_MAPPING_SEG_ID(\"flat.mapping\");\n\n// index params\nstatic const std::string PARAM_FLAT_COLUMN_MAJOR_ORDER(\n    \"proxima.flat.column_major_order\");\nstatic const std::string PARAM_FLAT_BATCH_SIZE(\"proxima.flat.batch_size\");\nstatic const std::string PARAM_FLAT_READ_BLOCK_SIZE(\n    \"proxima.flat.read_block_size\");\nstatic const std::string PARAM_FLAT_USE_ID_MAP(\"proxima.flat.use_id_map\");\n\n//! Determines if a number is equal to two to the power of n.\ntemplate <size_t K>\nstruct IsEqualPowerofTwo\n    : std::integral_constant<bool, K != 0 && (K ^ (K - 1)) == (K | (K - 1))> {};\n\n//! Transpose a block\ntemplate <size_t M>\nstatic inline void ReverseTranspose(size_t align_size, const void *src,\n                                    size_t dim, void *dst) {\n  switch (align_size) {\n    case 2:\n      ailego::MatrixHelper::ReverseTranspose<uint16_t, M>(src, dim, dst);\n      break;\n    case 4:\n      ailego::MatrixHelper::ReverseTranspose<uint32_t, M>(src, dim, dst);\n      break;\n    case 8:\n      ailego::MatrixHelper::ReverseTranspose<uint64_t, M>(src, dim, dst);\n      break;\n  }\n}\n\nstatic inline void ReverseTranspose(size_t align_size, const void *src,\n                                    size_t m, size_t dim, void *dst) {\n  switch (align_size) {\n    case 2:\n      ailego::MatrixHelper::ReverseTranspose<uint16_t>(src, m, dim, dst);\n      break;\n    case 4:\n      ailego::MatrixHelper::ReverseTranspose<uint32_t>(src, m, dim, dst);\n      break;\n    case 8:\n      ailego::MatrixHelper::ReverseTranspose<uint64_t>(src, m, dim, dst);\n      break;\n  }\n}\n\ntemplate <typename T>\nstatic inline void TransposeOne(const void *src, size_t M, size_t N,\n                                void *dst) {\n  for (size_t i = 0; i < N; ++i) {\n    reinterpret_cast<T *>(dst)[i] = reinterpret_cast<const T *>(src)[i * M];\n  }\n}\n\nstatic inline void Transpose(size_t align_size, const void *src, size_t m,\n                             size_t dim, void *dst) {\n  switch (align_size) {\n    case 2:\n      ailego::MatrixHelper::Transpose<uint16_t>(src, m, dim, dst);\n      break;\n    case 4:\n      ailego::MatrixHelper::Transpose<uint32_t>(src, m, dim, dst);\n      break;\n    case 8:\n      ailego::MatrixHelper::Transpose<uint64_t>(src, m, dim, dst);\n      break;\n  }\n}\n\n//! Transpose queries\ntemplate <size_t K>\nvoid TransposeQueries(const void *query, const IndexQueryMeta &qmeta,\n                      size_t query_count, std::string *out) {\n  if (K <= 1) {\n    ailego_assert(query_count == 1);\n    (void)query_count;\n    out->append(reinterpret_cast<const char *>(query) + out->size(),\n                qmeta.element_size());\n  } else {\n    ailego_assert_with(IsEqualPowerofTwo<K>::value,\n                       \"K must be equal to two to the power of n.\");\n\n    size_t query_batch_count = query_count / K;\n    size_t query_offset = out->size();\n    out->resize(query_offset + query_batch_count * K * qmeta.element_size());\n\n    switch (IndexMeta::AlignSizeof(qmeta.data_type())) {\n      case 2:\n        for (size_t i = 0; i != query_batch_count; ++i) {\n          ailego::MatrixHelper::Transpose<uint16_t, K>(\n              (const char *)query + query_offset,\n              qmeta.element_size() / sizeof(uint16_t), &((*out)[query_offset]));\n          query_offset += qmeta.element_size() * K;\n        }\n        break;\n\n      case 4:\n        for (size_t i = 0; i != query_batch_count; ++i) {\n          ailego::MatrixHelper::Transpose<uint32_t, K>(\n              (const char *)query + query_offset,\n              qmeta.element_size() / sizeof(uint32_t), &((*out)[query_offset]));\n\n          query_offset += qmeta.element_size() * K;\n        }\n        break;\n\n      case 8:\n        for (size_t i = 0; i != query_batch_count; ++i) {\n          ailego::MatrixHelper::Transpose<uint64_t, K>(\n              (const char *)query + query_offset,\n              qmeta.element_size() / sizeof(uint64_t), &((*out)[query_offset]));\n          query_offset += qmeta.element_size() * K;\n        }\n        break;\n\n      default:\n        ailego_check_with(0, \"BAD CASE\");\n    }\n    size_t query_left_count = query_count % K;\n    if (query_left_count != 0) {\n      TransposeQueries<(K >> 1)>(query, qmeta, query_left_count, out);\n    }\n  }\n}\n\n//! Create and initialize measure\nstatic inline int InitializeMetric(const IndexMeta &mt,\n                                   IndexMetric::Pointer *out) {\n  IndexMetric::Pointer measure = IndexFactory::CreateMetric(mt.metric_name());\n  if (!measure) {\n    return IndexError_NoExist;\n  }\n\n  int error_code = measure->init(mt, mt.metric_params());\n  if (error_code != 0) {\n    return error_code;\n  }\n  *out = measure;\n  return 0;\n}\n\n//! Verify measure\nstatic inline bool VerifyMetric(const IndexMeta &meta) {\n  IndexMetric::Pointer measure = IndexFactory::CreateMetric(meta.metric_name());\n  if (!measure) {\n    return false;\n  }\n  int error_code = measure->init(meta, meta.metric_params());\n  if (error_code != 0) {\n    return false;\n  }\n  return true;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_knn_flat_sparse \n    STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS core_framework \n    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_builder.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_sparse_builder.h\"\n#include <cstddef>\n#include <cstdint>\n#include <utility>\n#include <utility/sparse_utility.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_logger.h>\n#include \"flat_sparse_index_format.h\"\n#include \"flat_sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nFlatSparseBuilder::FlatSparseBuilder() {}\n\nint FlatSparseBuilder::init(const IndexMeta &meta,\n                            const ailego::Params & /*params*/) {\n  LOG_INFO(\"Begin FlatSparseBuilder::init\");\n\n  meta_ = meta;\n\n  state_ = BUILD_STATE_INITED;\n  LOG_INFO(\"End FlatSparseBuilder::init\");\n  return 0;\n}\n\nint FlatSparseBuilder::cleanup(void) {\n  LOG_INFO(\"Begin FlatSparseBuilder::cleanup\");\n\n  stats_.clear_attributes();\n  stats_.set_trained_count(0UL);\n  stats_.set_built_count(0UL);\n  stats_.set_dumped_count(0UL);\n  stats_.set_discarded_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  stats_.set_built_costtime(0UL);\n  stats_.set_dumped_costtime(0UL);\n  state_ = BUILD_STATE_INIT;\n\n  LOG_INFO(\"End FlatSparseBuilder::cleanup\");\n\n  return 0;\n}\n\nint FlatSparseBuilder::train(IndexThreads::Pointer,\n                             IndexSparseHolder::Pointer /*holder*/) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before FlatSparseBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin FlatSparseBuilder::train\");\n\n  stats_.set_trained_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End FlatSparseBuilder::train\");\n\n  return 0;\n}\n\nint FlatSparseBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before FlatSparseBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin FlatSparseBuilder::train by trainer\");\n\n  stats_.set_trained_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End FlatSparseBuilder::train by trainer\");\n\n  return 0;\n}\n\nint FlatSparseBuilder::build(IndexThreads::Pointer,\n                             IndexSparseHolder::Pointer holder) {\n  LOG_INFO(\"Begin FlatSparseBuilder::build\");\n\n  ailego::ElapsedTime stamp;\n  if (!holder) {\n    LOG_ERROR(\"Input holder is nullptr while building index\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while building index\");\n    return IndexError_Mismatch;\n  }\n\n  holder_ = std::move(holder);\n\n  stats_.set_built_count(holder_->count());\n  stats_.set_built_costtime(stamp.milli_seconds());\n  state_ = BUILD_STATE_BUILT;\n\n  LOG_INFO(\"End FlatSparseBuilder::build\");\n  return 0;\n}\n\nint FlatSparseBuilder::dump(const IndexDumper::Pointer &dumper) {\n  if (state_ != BUILD_STATE_BUILT || !holder_) {\n    LOG_INFO(\"Build the index before FlatSparseBuilder::dump\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin FlatSparseBuilder::dump\");\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n\n  uint32_t dump_count;\n  ret = do_dump(dumper, &dump_count);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump index\");\n    return ret;\n  }\n\n  holder_ = nullptr;\n  stats_.set_dumped_count(dump_count);\n  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);\n\n  LOG_INFO(\"End FlatSparseBuilder::dump\");\n  return 0;\n}\n\nint FlatSparseBuilder::do_dump(const IndexDumper::Pointer &dumper,\n                               uint32_t *dump_count) {\n  // bf meta\n  int ret = dump_meta(dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump meta\");\n    return ret;\n  }\n\n  std::vector<uint64_t> keys;\n  ret = dump_vector_and_offset(dumper.get(), &keys);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump offset data\");\n    return ret;\n  }\n\n  ret = dump_keys(keys, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump keys\");\n    return ret;\n  }\n\n  ret = dump_mapping(keys, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump mapping\");\n    return ret;\n  }\n\n  *dump_count = keys.size();\n\n  return 0;\n}\n\nint FlatSparseBuilder::dump_meta(IndexDumper *dumper) {\n  FlatSparseMeta meta;\n  meta.create_time = ailego::Realtime::Seconds();\n  meta.update_time = ailego::Realtime::Seconds();\n  meta.doc_cnt = holder_->count();\n\n  if (dumper->write(&meta, sizeof(meta)) != sizeof(meta)) {\n    LOG_ERROR(\"Failed to write meta\");\n    return IndexError_WriteData;\n  }\n\n  size_t meta_padding_size = ailego_align(sizeof(meta), 32) - sizeof(meta);\n  if (meta_padding_size) {\n    std::string padding(meta_padding_size, '\\0');\n    if (dumper->write(padding.data(), meta_padding_size) != meta_padding_size) {\n      LOG_ERROR(\"Failed to write meta padding\");\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(PARAM_FLAT_SPARSE_META_SEG_ID, sizeof(meta),\n                        meta_padding_size, 0);\n}\n\nint FlatSparseBuilder::dump_vector_and_offset(IndexDumper *dumper,\n                                              std::vector<uint64_t> *keys) {\n  // iterate the holder\n  auto iter = holder_->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Failed to create iterator\");\n    return IndexError_Runtime;\n  }\n\n  uint64_t written_length{0U};\n\n  std::vector<std::pair<uint64_t, uint32_t>> offset_lens;\n  while (iter->is_valid()) {\n    keys->push_back(iter->key());\n\n    uint32_t length;\n    if (write_vector_data(iter->sparse_count(), iter->sparse_indices(),\n                          iter->sparse_data(), dumper, &length) != 0) {\n      return IndexError_WriteData;\n    }\n\n    offset_lens.push_back({written_length, length});\n    written_length += length;\n    iter->next();\n  }\n\n  if (dumper->append(PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID, written_length, 0,\n                     0) != 0) {\n    LOG_ERROR(\"Failed to append offset data\");\n    return IndexError_WriteData;\n  }\n\n  LOG_DEBUG(\"Data total written: %zu\", (size_t)written_length);\n\n  for (auto &offset_len : offset_lens) {\n    if (dumper->write(&offset_len.first, sizeof(offset_len.first)) !=\n        sizeof(offset_len.first)) {\n      LOG_ERROR(\"Failed to write offset\");\n      return IndexError_WriteData;\n    }\n\n    if (dumper->write(&offset_len.second, sizeof(offset_len.second)) !=\n        sizeof(offset_len.second)) {\n      LOG_ERROR(\"Failed to write length\");\n      return IndexError_WriteData;\n    }\n  }\n\n  if (dumper->append(PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID,\n                     offset_lens.size() * (sizeof(uint64_t) + sizeof(uint32_t)),\n                     0, 0) != 0) {\n    LOG_ERROR(\"Failed to append offset data\");\n    return IndexError_WriteData;\n  }\n\n  LOG_DEBUG(\"Offset total written: %zu\",\n            offset_lens.size() * (sizeof(uint64_t) + sizeof(uint32_t)));\n\n  return 0;\n}\n\nint FlatSparseBuilder::write_vector_data(const uint32_t sparse_count,\n                                         const uint32_t *sparse_indices,\n                                         const void *sparse_vec,\n                                         IndexDumper *dumper,\n                                         uint32_t *length) {\n  std::string sparse_buffer;\n\n  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_vec,\n                                   meta_.unit_size(), sparse_buffer);\n\n  if (dumper->write(sparse_buffer.data(), sparse_buffer.size()) !=\n      sparse_buffer.size()) {\n    LOG_ERROR(\"Failed to write sparse data\");\n    return IndexError_WriteData;\n  }\n\n  *length = sparse_buffer.size();\n\n  return 0;\n}\n\nint FlatSparseBuilder::dump_keys(const std::vector<uint64_t> &keys,\n                                 IndexDumper *dumper) {\n  size_t keys_size = keys.size() * sizeof(uint64_t);\n  if (dumper->write(keys.data(), keys_size) != keys_size) {\n    LOG_ERROR(\"Failed to write keys to dumper %s\", dumper->name().c_str());\n    return IndexError_WriteData;\n  }\n  size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;\n  if (keys_padding_size) {\n    std::string padding(keys_padding_size, '\\0');\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write padding to dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID, keys_size,\n                        keys_padding_size, 0);\n}\n\nint FlatSparseBuilder::dump_mapping(const std::vector<uint64_t> &keys,\n                                    IndexDumper *dumper) {\n  std::vector<uint32_t> mapping(keys.size());\n  std::iota(mapping.begin(), mapping.end(), 0);\n  std::sort(\n      mapping.begin(), mapping.end(),\n      [&keys](uint32_t lhs, uint32_t rhs) { return (keys[lhs] < keys[rhs]); });\n\n  size_t mapping_size = mapping.size() * sizeof(uint32_t);\n  size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;\n  if (dumper->write(mapping.data(), mapping_size) != mapping_size) {\n    LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n    return IndexError_WriteData;\n  }\n\n  // Write the padding if need\n  if (mapping_padding_size) {\n    std::string padding(mapping_padding_size, '\\0');\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID, mapping_size,\n                        mapping_padding_size, 0);\n}\n\nINDEX_FACTORY_REGISTER_BUILDER(FlatSparseBuilder);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_builder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/core/framework/index_builder.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_holder.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Brute Force Sparse Builder\n */\nclass FlatSparseBuilder : public IndexBuilder {\n public:\n  //! Constructor\n  FlatSparseBuilder();\n\n  //! Initialize the builder\n  int init(const IndexMeta &meta, const ailego::Params &params) override;\n\n  //! Cleanup the builder\n  int cleanup(void) override;\n\n  //! Train the data\n  int train(IndexThreads::Pointer, IndexSparseHolder::Pointer holder) override;\n\n  //! Train the data\n  int train(const IndexTrainer::Pointer &trainer) override;\n\n  int train(IndexThreads::Pointer /*threads*/,\n            IndexHolder::Pointer /*holder*/) override {\n    return IndexError_NotImplemented;\n  }\n\n  int build(IndexThreads::Pointer /*threads*/,\n            IndexHolder::Pointer /*holder*/) override {\n    return IndexError_NotImplemented;\n  }\n\n  //! Build the index\n  int build(IndexThreads::Pointer threads,\n            IndexSparseHolder::Pointer holder) override;\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n private:\n  int do_dump(const IndexDumper::Pointer &dumper, uint32_t *dump_count);\n\n  int dump_meta(IndexDumper *dumper);\n\n  int dump_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper);\n\n  int dump_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper);\n\n  int dump_vector_and_offset(IndexDumper *dumper, std::vector<uint64_t> *keys);\n\n  int write_vector_data(const uint32_t sparse_count,\n                        const uint32_t *sparse_indices, const void *sparse_vec,\n                        IndexDumper *dumper, uint32_t *length);\n\n private:\n  enum BUILD_STATE {\n    BUILD_STATE_INIT = 0,\n    BUILD_STATE_INITED = 1,\n    BUILD_STATE_TRAINED = 2,\n    BUILD_STATE_BUILT = 3\n  };\n\n  IndexSparseHolder::Pointer holder_{};\n\n  std::atomic_bool error_{false};\n  IndexMeta meta_{};\n  IndexMetric::Pointer measure_{};\n  std::mutex mutex_{};\n  std::condition_variable cond_{};\n  Stats stats_{};\n\n  BUILD_STATE state_{BUILD_STATE_INIT};\n};\n\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_context.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_sparse_context.h\"\n\nnamespace zvec {\nnamespace core {\n\nconst FlatSparseEntity *FlatSparseContext::entity() const {\n  if (context_type_ == kStreamerContext) {\n    return &streamer_owner_->entity();\n  } else if (context_type_ == kSearcherContext) {\n    return &searcher_owner_->entity();\n  }\n  return nullptr;\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <utility/sparse_utility.h>\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_document.h>\n#include \"flat_sparse_entity.h\"\n#include \"flat_sparse_searcher.h\"\n#include \"flat_sparse_streamer.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass FlatSparseStreamer;\nclass FlatSparseSearcher;\n\n/*! Brute Force Sparse Streamer Context\n */\nclass FlatSparseContext : public IndexContext {\n public:\n  //! Constructor\n  enum ContextType {\n    kUnknownContext = 0,\n    kSearcherContext = 1,\n    kStreamerContext = 3\n  };\n  FlatSparseContext(const FlatSparseStreamer *streamer_ptr)\n      : streamer_owner_(streamer_ptr), context_type_(kStreamerContext) {}\n\n  FlatSparseContext(const FlatSparseSearcher *searcher_ptr)\n      : searcher_owner_(searcher_ptr), context_type_(kSearcherContext) {}\n\n  //! Destructor\n  virtual ~FlatSparseContext(void) = default;\n\n  //! Set topk of search result\n  void set_topk(uint32_t topk) override {\n    topk_ = topk;\n    result_heap_.limit(topk_);\n    result_heap_.set_threshold(this->threshold());\n  }\n\n  //! Retrieve search result\n  const IndexDocumentList &result(void) const override {\n    return results_.at(0);\n  }\n\n  //! Retrieve search result with index\n  const IndexDocumentList &result(size_t index) const override {\n    return results_.at(index);\n  }\n\n  //! Retrieve result object for output\n  IndexDocumentList *mutable_result(size_t idx) override {\n    return &results_.at(idx);\n  }\n\n  inline IndexDocumentHeap *result_heap() {\n    return &result_heap_;\n  }\n\n  //! Update the parameters of context\n  int update(const ailego::Params & /*params*/) override {\n    return 0;\n  }\n\n  //! Retrieve magic number\n  uint32_t magic(void) const override {\n    return magic_;\n  }\n\n  void set_fetch_vector(bool v) override {\n    fetch_vector_ = v;\n  }\n\n  bool fetch_vector() const override {\n    return fetch_vector_;\n  }\n\n  //! Retrieve search group result with index\n  const IndexGroupDocumentList &group_result(void) const override {\n    return group_results_[0];\n  }\n\n  //! Retrieve search group result with index\n  const IndexGroupDocumentList &group_result(size_t idx) const override {\n    return group_results_[idx];\n  }\n\n  IndexGroupDocumentList *mutable_group_result(size_t idx) {\n    return &group_results_[idx];\n  }\n\n  //! Set group params\n  void set_group_params(uint32_t group_num, uint32_t group_topk) override {\n    group_num_ = group_num;\n    group_topk_ = group_topk;\n    result_group_heap_.clear();\n  }\n\n  //! Get if group by search\n  inline bool group_by_search() {\n    return group_num_ > 0;\n  }\n\n  inline uint32_t group_topk() const {\n    return group_topk_;\n  }\n\n  inline uint32_t group_num() const {\n    return group_num_;\n  }\n\n  void reset() override {}\n\n  //! Reset the context\n  void reset(const FlatSparseStreamer *streamer_ptr) {\n    magic_ = streamer_ptr->magic();\n    streamer_owner_ = streamer_ptr;\n    context_type_ = kStreamerContext;\n  }\n\n  void reset(const FlatSparseSearcher *searcher_ptr) {\n    magic_ = searcher_ptr->magic();\n    searcher_owner_ = searcher_ptr;\n    context_type_ = kSearcherContext;\n  }\n\n  //! Reset all the query results\n  void reset_results(size_t qnum) {\n    if (group_by_search()) {\n      group_results_.resize(qnum);\n    } else {\n      result_heap_.clear();\n      result_heap_.limit(topk_);\n      result_heap_.set_threshold(this->threshold());\n      results_.resize(qnum);\n      stats_vec_.resize(qnum);\n      for (size_t i = 0; i < results_.size(); ++i) {\n        results_[i].clear();\n        stats_vec_[i].clear();\n      }\n    }\n  }\n\n  Stats *mutable_stats(size_t idx = 0) {\n    ailego_assert_with(stats_vec_.size() > idx, \"invalid index\");\n    return &stats_vec_[idx];\n  }\n\n  inline void topk_to_result(uint32_t idx) {\n    if (ailego_unlikely(result_heap_.size() == 0)) {\n      return;\n    }\n\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    int size = std::min(topk_, static_cast<uint32_t>(result_heap_.size()));\n    result_heap_.sort();\n    results_[idx].clear();\n    for (int i = 0; i < size; ++i) {\n      auto score = result_heap_[i].score();\n      if (score > this->threshold()) {\n        break;\n      }\n\n      key_t key = result_heap_[i].key();\n      if (fetch_vector_) {\n        node_id_t id = entity()->get_id(key);\n        IndexStorage::MemoryBlock vec_block;\n        entity()->get_sparse_vector(id, vec_block);\n        const void *sparse_data = vec_block.data();\n        IndexSparseDocument sparse_doc;\n        if (sparse_data != nullptr) {\n          SparseUtility::ReverseSparseFormat(sparse_data, sparse_doc,\n                                             entity()->sparse_unit_size());\n        }\n        results_[idx].emplace_back(key, score, id, nullptr, sparse_doc);\n      } else {\n        results_[idx].emplace_back(key, score);\n      }\n    }\n  }\n\n private:\n  const FlatSparseEntity *entity() const;\n\n private:\n  const FlatSparseStreamer *streamer_owner_{nullptr};\n  const FlatSparseSearcher *searcher_owner_{nullptr};\n  ContextType context_type_{kUnknownContext};\n  std::vector<Stats> stats_vec_{};\n  uint32_t magic_{0};\n  uint32_t topk_{0};\n  IndexDocumentHeap result_heap_;\n  // std::string batch_queries_{};\n  bool fetch_vector_{false};\n\n  // group\n  uint32_t group_num_{0};\n  uint32_t group_topk_{0};\n  std::map<std::string, IndexDocumentHeap> result_group_heap_{};\n  std::vector<IndexDocumentList> results_{};\n  std::vector<IndexGroupDocumentList> group_results_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <unordered_map>\n#include <zvec/core/framework/index_framework.h>\n#include \"flat_sparse_index_format.h\"\n\nnamespace zvec {\nnamespace core {\n\nusing node_id_t = uint32_t;\nconstexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);\n\n/*! Flat Sparse Entity\n */\nclass FlatSparseEntity {\n public:\n  typedef std::shared_ptr<FlatSparseEntity> Pointer;\n\n  //! Constructor\n  explicit FlatSparseEntity() {}\n\n  //! Destructor\n  ~FlatSparseEntity() = default;\n\n  //! Disable them\n  FlatSparseEntity(const FlatSparseEntity &) = delete;\n  FlatSparseEntity &operator=(const FlatSparseEntity &) = delete;\n\n  //! Search in linear list with filter\n  int search(const std::string &sparse_vector, const IndexFilter &filter,\n             IndexDocumentHeap *heap) const {\n    for (node_id_t i = 0; i < doc_cnt(); i++) {\n      uint64_t key = get_key(i);\n      if (ailego_unlikely(key == kInvalidKey)) {\n        // LOG_ERROR(\"The key of node_id[%u] not found in keys map\", i);\n        // return IndexError_Runtime;\n        continue;\n      }\n      if (!filter.is_valid() || !filter(key)) {\n        float dist = get_search_distance(sparse_vector, i);\n        heap->emplace(key, dist);\n      }\n    }\n\n    return 0;\n  }\n\n  //! Search in linear list with filter and target pkeys\n  int search_p_keys(const std::string &sparse_vector,\n                    const std::vector<uint64_t> &p_keys,\n                    const IndexFilter &filter, IndexDocumentHeap *heap) const {\n    for (auto p_key : p_keys) {\n      if (!filter.is_valid() || !filter(p_key)) {\n        auto node_id = get_id(p_key);\n        if (node_id != kInvalidNodeId) {\n          float dist = get_search_distance(sparse_vector, node_id);\n          heap->emplace(p_key, dist);\n        }\n      }\n    }\n\n    return 0;\n  }\n\n  //! Group search in linear list with filter\n  int search_group(\n      const std::string &sparse_vector, const IndexFilter &filter,\n      const std::function<std::string(uint64_t)> &group_by_func, uint32_t topk,\n      std::unordered_map<std::string, IndexDocumentHeap> *heap) const {\n    for (node_id_t i = 0; i < doc_cnt(); i++) {\n      uint64_t key = get_key(i);\n      if (ailego_unlikely(key == kInvalidKey)) {\n        LOG_ERROR(\"The key of node_id[%u] not found in keys map\", i);\n        return IndexError_Runtime;\n      }\n      if (!filter.is_valid() || !filter(key)) {\n        float dist = get_search_distance(sparse_vector, i);\n\n        std::string group_id = group_by_func(key);\n\n        auto &group_heap = (*heap)[group_id];\n        if (group_heap.empty()) {\n          group_heap.limit(topk);\n        }\n        group_heap.emplace(key, dist);\n      }\n    }\n\n    return 0;\n  }\n\n  //! Group search in linear list with filter and target pkeys\n  int search_group_p_keys(\n      const std::string &sparse_vector, const std::vector<uint64_t> &p_keys,\n      const IndexFilter &filter,\n      const std::function<std::string(uint64_t)> &group_by_func, uint32_t topk,\n      std::unordered_map<std::string, IndexDocumentHeap> *heap) const {\n    for (auto p_key : p_keys) {\n      if (!filter.is_valid() || !filter(p_key)) {\n        auto node_id = get_id(p_key);\n        if (node_id != kInvalidNodeId) {\n          float dist = get_search_distance(sparse_vector, node_id);\n\n          std::string group_id = group_by_func(p_key);\n\n          auto &group_heap = (*heap)[group_id];\n          if (group_heap.empty()) {\n            group_heap.limit(topk);\n          }\n          group_heap.emplace(p_key, dist);\n        }\n      }\n    }\n\n    return 0;\n  }\n\n  //! Get sparse vector by key\n  int get_sparse_vector(uint64_t key, std::string *sparse_vector) const {\n    const void *sparse_vector_ptr;\n    uint32_t sparse_vector_len;\n    int ret = get_sparse_vector_ptr_by_key(key, &sparse_vector_ptr,\n                                           &sparse_vector_len);\n    if (ret != 0) {\n      return ret;\n    }\n    *sparse_vector = std::string(static_cast<const char *>(sparse_vector_ptr),\n                                 sparse_vector_len);\n    return 0;\n  }\n\n  //! Get sparse vector by node id\n  const void *get_sparse_vector(node_id_t id) const {\n    const void *sparse_vector_ptr;\n    uint32_t sparse_vector_len;\n    int ret =\n        get_sparse_vector_ptr_by_id(id, &sparse_vector_ptr, &sparse_vector_len);\n    if (ret != 0) {\n      return nullptr;\n    }\n    return sparse_vector_ptr;\n  }\n\n  int get_sparse_vector_by_key(const uint64_t key,\n                               std::string *sparse_vector) const {\n    uint32_t sparse_vector_len;\n    IndexStorage::MemoryBlock sparse_vector_block;\n    int ret = get_sparse_vector_ptr_by_key(key, sparse_vector_block,\n                                           &sparse_vector_len);\n    if (ret != 0) {\n      return ret;\n    }\n    *sparse_vector =\n        std::string(static_cast<const char *>(sparse_vector_block.data()),\n                    sparse_vector_len);\n    return 0;\n  }\n\n  int get_sparse_vector(node_id_t id,\n                        IndexStorage::MemoryBlock &sparse_vector_block) const {\n    uint32_t sparse_vector_len;\n    return get_sparse_vector_ptr_by_id(id, sparse_vector_block,\n                                       &sparse_vector_len);\n  }\n\n  int get_sparse_vector_ptr_by_key(uint64_t key, const void **sparse_vector_ptr,\n                                   uint32_t *sparse_vector_len_ptr) const {\n    auto node_id = get_id(key);\n    if (node_id == kInvalidNodeId) {\n      return IndexError_NoExist;\n    }\n\n    return get_sparse_vector_ptr_by_id(node_id, sparse_vector_ptr,\n                                       sparse_vector_len_ptr);\n  }\n\n  int get_sparse_vector_ptr_by_key(\n      const uint64_t key, IndexStorage::MemoryBlock &sparse_vector_block,\n      uint32_t *sparse_vector_len_ptr) const {\n    auto node_id = get_id(key);\n    if (node_id == kInvalidNodeId) {\n      return IndexError_NoExist;\n    }\n\n    return get_sparse_vector_ptr_by_id(node_id, sparse_vector_block,\n                                       sparse_vector_len_ptr);\n  }\n\n  std::vector<uint64_t> get_keys() const {\n    std::vector<uint64_t> keys;\n    node_id_t doc_total_cnt = doc_cnt();\n    for (node_id_t node_id = 0; node_id < doc_total_cnt; ++node_id) {\n      uint64_t key = get_key(node_id);\n      if (key == kInvalidKey) {\n        return {kInvalidKey};\n      } else {\n        keys.push_back(key);\n      }\n    }\n\n    return keys;\n  }\n\n\n public:\n  virtual uint32_t doc_cnt() const = 0;\n\n  virtual uint32_t total_sparse_count() const = 0;\n\n  virtual node_id_t get_id(uint64_t key) const = 0;\n\n  virtual uint64_t get_key(node_id_t id) const = 0;\n\n  virtual int get_sparse_vector_ptr_by_id(\n      node_id_t id, const void **sparse_vector,\n      uint32_t *sparse_vector_len) const = 0;\n\n  virtual int get_sparse_vector_ptr_by_id(\n      const node_id_t /*id*/,\n      IndexStorage::MemoryBlock & /*sparse_vector_block*/,\n      uint32_t * /*sparse_vector_len*/) const {\n    return IndexError_NotImplemented;\n  }\n\n\n  virtual float get_search_distance(const std::string &vector,\n                                    node_id_t target_node_id) const = 0;\n  virtual size_t sparse_unit_size() const = 0;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_index_format.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_framework.h>\n\nnamespace zvec {\nnamespace core {\n\nstatic constexpr uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();\nstatic constexpr uint32_t kDefaultOffsetChunkSize = 1024 * 1024;    // 1MB\nstatic constexpr uint32_t kDefaultDataChunkSize = 8 * 1024 * 1024;  // 8MB\n\nstruct FlatSparseMeta {\n  uint64_t create_time{0};\n  uint64_t update_time{0};\n  uint32_t doc_cnt{0};\n  uint32_t total_sparse_count{0};\n  uint8_t reserved[8] = {0};\n};\n\nstatic_assert(sizeof(FlatSparseMeta) % 32 == 0,\n              \"FlatSparseMeta must be aligned with 32 bytes\");\n\nstruct FlatSparseStreamerMeta {\n  uint32_t offset_chunk_count{0};\n  uint32_t offset_chunk_size{kDefaultOffsetChunkSize};\n  uint32_t data_chunk_count{0};\n  uint32_t data_chunk_size{kDefaultDataChunkSize};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include <utility/sparse_utility.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_meta.h>\n#include \"flat_sparse_streamer_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Brute Force Sparse Streamer Provider\n */\n// FlatSparseStreamerEntity or FlatSparseSearcherEntity\ntemplate <typename FlatSparseEntityType>\nclass FlatSparseIndexProvider : public IndexSparseProvider {\n public:\n  //! Constructor\n  FlatSparseIndexProvider(const std::shared_ptr<FlatSparseEntityType> entity,\n                          const IndexMeta &meta, const std::string &owner)\n      : entity_(entity), meta_(meta), owner_class_(owner) {}\n\n  //! Create a new iterator\n  IndexSparseProvider::Iterator::Pointer create_iterator(void) override {\n    return IndexSparseProvider::Iterator::Pointer(new (std::nothrow)\n                                                      Iterator(entity_, meta_));\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const override {\n    return entity_->doc_cnt();\n  }\n\n  //! Retrieve type of vector\n  IndexMeta::DataType data_type(void) const override {\n    return meta_.data_type();\n  }\n\n  //! Retrieve a vector using a primary key\n  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,\n                        std::string *sparse_indices_buffer,\n                        std::string *sparse_values_buffer) const override {\n    std::string sparse_data;\n\n    int ret = entity_->get_sparse_vector_by_key(key, &sparse_data);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Failed to get sparse vector, key=%zu, ret=%s\", (size_t)key,\n                IndexError::What(ret));\n      return ret;\n    }\n\n    SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,\n                                       sparse_indices_buffer,\n                                       sparse_values_buffer, meta_.unit_size());\n    return 0;\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_class_;\n  }\n\n  size_t total_sparse_count() const override {\n    return entity_->total_sparse_count();\n  }\n\n private:\n  class Iterator : public IndexSparseProvider::Iterator {\n   public:\n    Iterator(const std::shared_ptr<FlatSparseEntityType> &entity,\n             const IndexMeta &meta)\n        : entity_(entity), meta_(meta), cur_id_(0U), valid_(false) {\n      IndexStorage::MemoryBlock sparse_data_block;\n      entity_->get_sparse_vector(cur_id_, sparse_data_block);\n      const void *sparse_data = sparse_data_block.data();\n      if (sparse_data != nullptr) {\n        valid_ = true;\n\n        sparse_indices_buffer_.clear();\n        sparse_data_buffer_.clear();\n\n        SparseUtility::ReverseSparseFormat(\n            sparse_data, &sparse_count_, &sparse_indices_buffer_,\n            &sparse_data_buffer_, meta.unit_size());\n      }\n    }\n\n    //! Retrieve sparse count\n    virtual uint32_t sparse_count() const override {\n      return sparse_count_;\n    }\n\n    //! Retrieve sparse indices\n    virtual const uint32_t *sparse_indices() const override {\n      return reinterpret_cast<const uint32_t *>(sparse_indices_buffer_.data());\n    }\n\n    //! Retrieve sparse data\n    virtual const void *sparse_data() const override {\n      return reinterpret_cast<const void *>(sparse_data_buffer_.data());\n    }\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return cur_id_ < entity_->doc_cnt() && valid_;\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      // std::cout << \"iter key=\" << cur_id_ << std::endl;\n      return entity_->get_key(cur_id_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      cur_id_ = get_next_valid_id(cur_id_ + 1);\n\n      if (cur_id_ < entity_->doc_cnt()) {\n        IndexStorage::MemoryBlock sparse_data_block;\n        entity_->get_sparse_vector(cur_id_, sparse_data_block);\n        const void *sparse_data = sparse_data_block.data();\n        if (sparse_data != nullptr) {\n          valid_ = true;\n\n          sparse_indices_buffer_.clear();\n          sparse_data_buffer_.clear();\n\n          SparseUtility::ReverseSparseFormat(\n              sparse_data, &sparse_count_, &sparse_indices_buffer_,\n              &sparse_data_buffer_, meta_.unit_size());\n        } else {\n          valid_ = false;\n        }\n      }\n    }\n\n    //! Reset the iterator\n    void reset(void) {\n      cur_id_ = get_next_valid_id(0);\n      IndexStorage::MemoryBlock sparse_data_block;\n      entity_->get_sparse_vector(cur_id_, sparse_data_block);\n      const void *sparse_data = sparse_data_block.data();\n      if (sparse_data != nullptr) {\n        valid_ = true;\n\n        SparseUtility::ReverseSparseFormat(\n            sparse_data, &sparse_count_, &sparse_indices_buffer_,\n            &sparse_data_buffer_, meta_.unit_size());\n      }\n    }\n\n   private:\n    node_id_t get_next_valid_id(node_id_t start_id) {\n      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {\n        if (entity_->get_key(i) != kInvalidNodeId) {\n          return i;\n        }\n      }\n      return kInvalidNodeId;\n    }\n\n   private:\n    const std::shared_ptr<FlatSparseEntityType> entity_{nullptr};\n    const IndexMeta &meta_;\n    node_id_t cur_id_;\n    uint32_t sparse_count_;\n    std::string sparse_indices_buffer_;\n    std::string sparse_data_buffer_;\n    bool valid_{false};\n  };\n\n private:\n  const std::shared_ptr<FlatSparseEntityType> entity_{nullptr};\n  const IndexMeta &meta_;\n  const std::string owner_class_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_search.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <ailego/math/inner_product_matrix.h>\n#include \"flat_sparse_context.h\"\n\nnamespace zvec {\nnamespace core {\n\nstatic inline IndexGroupDocumentList ConvertGroupMapToResult(\n    std::unordered_map<std::string, IndexDocumentHeap> group_map,\n    uint32_t group_num) {\n  IndexGroupDocumentList result;\n\n  std::vector<std::pair<std::string, float>> best_score_in_groups;\n  for (auto itr = group_map.begin(); itr != group_map.end(); itr++) {\n    const std::string &group_id = (*itr).first;\n    auto &heap = (*itr).second;\n\n    if (heap.size() > 0) {\n      float best_score = heap[0].score();\n      best_score_in_groups.push_back(std::make_pair(group_id, best_score));\n    }\n  }\n\n  std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),\n            [](const std::pair<std::string, float> &a,\n               const std::pair<std::string, float> &b) -> int {\n              return a.second < b.second;\n            });\n\n  // truncate to group num\n  for (uint32_t i = 0; i < group_num && i < best_score_in_groups.size(); ++i) {\n    const std::string &group_id = best_score_in_groups[i].first;\n\n    result.emplace_back(\n        GroupIndexDocument(group_id, std::move(group_map[group_id])));\n  }\n\n  return result;\n}\n\nstatic inline int FlatSearch(const uint32_t *sparse_count,\n                             const uint32_t *sparse_indices,\n                             const void *sparse_query, bool with_p_keys,\n                             const std::vector<std::vector<uint64_t>> &p_keys,\n                             const IndexQueryMeta &qmeta, uint32_t count,\n                             const IndexMeta, IndexContext::Pointer &context,\n                             FlatSparseEntity *entity) {\n  int ret;\n\n  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to FlatSparseContext failed\");\n    return IndexError_Cast;\n  }\n\n  // reset context results\n  ctx->reset_results(count);\n\n  const uint32_t *sparse_indices_tmp = sparse_indices;\n  const void *sparse_query_tmp = sparse_query;\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {\n      return ctx->group_by()(key);\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      ailego::MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          sparse_query_buffer);\n\n      std::unordered_map<std::string, IndexDocumentHeap> group_heap{};\n\n      if (with_p_keys) {\n        ret = entity->search_group_p_keys(sparse_query_buffer, p_keys[q],\n                                          ctx->filter(), group_by,\n                                          ctx->group_topk(), &group_heap);\n      } else {\n        ret = entity->search_group(sparse_query_buffer, ctx->filter(), group_by,\n                                   ctx->group_topk(), &group_heap);\n      }\n\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Failed to search group, ret=%s\", IndexError::What(ret));\n        return ret;\n      }\n\n      // sort group heap\n      for (auto &group : group_heap) {\n        group.second.sort();\n      }\n\n      auto group_result =\n          ConvertGroupMapToResult(std::move(group_heap), ctx->group_num());\n      ctx->mutable_group_result(q)->swap(group_result);\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      ailego::MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          sparse_query_buffer);\n\n      auto heap = ctx->result_heap();\n\n      if (with_p_keys) {\n        ret = entity->search_p_keys(sparse_query_buffer, p_keys[q],\n                                    ctx->filter(), heap);\n      } else {\n        ret = entity->search(sparse_query_buffer, ctx->filter(), heap);\n      }\n\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Failed to search, ret=%s\", IndexError::What(ret));\n        return ret;\n      }\n\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  }\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_searcher.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_sparse_searcher.h\"\n#include <utility/sparse_utility.h>\n#include <zvec/core/framework/index_error.h>\n#include \"flat_sparse_context.h\"\n#include \"flat_sparse_provider.h\"\n#include \"flat_sparse_search.h\"\n\nnamespace zvec {\nnamespace core {\n\nconst uint32_t FlatSparseSearcher::VERSION = 0U;\n\nFlatSparseSearcher::FlatSparseSearcher(void) {}\n\nFlatSparseSearcher::~FlatSparseSearcher(void) {}\n\nint FlatSparseSearcher::init(const ailego::Params & /*params*/) {\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint FlatSparseSearcher::cleanup(void) {\n  this->unload();\n  return 0;\n}\n\nint FlatSparseSearcher::load(IndexStorage::Pointer container,\n                             IndexMetric::Pointer /*measure*/) {\n  if (state_ != STATE_INITED) {\n    LOG_ERROR(\"Init the searcher first before load index\");\n    return IndexError_Runtime;\n  }\n\n  LOG_INFO(\"Begin FlatSparseSearcher::load\");\n\n  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from container\");\n    return ret;\n  }\n\n  if (meta_.searcher_revision() != VERSION) {\n    LOG_ERROR(\"Unsupported searcher revision %u\", meta_.searcher_revision());\n    return IndexError_Unsupported;\n  }\n\n  ret = entity_.load(container, meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"FlatSparseSearcher load index failed\");\n    return ret;\n  }\n\n  state_ = STATE_LOADED;\n  magic_ = IndexContext::GenerateMagic();\n\n  LOG_INFO(\"End FlatSparseSearcher::load\");\n\n  return 0;\n}\n\nint FlatSparseSearcher::unload(void) {\n  LOG_INFO(\"Begin FlatSparseSearcher::unload\");\n\n  meta_.clear();\n  entity_.unload();\n  state_ = STATE_INITED;\n\n  LOG_INFO(\"End FlatSparseSearcher::unload\");\n\n  return 0;\n}\n\nint FlatSparseSearcher::search_bf_impl(const uint32_t *sparse_count,\n                                       const uint32_t *sparse_indices,\n                                       const void *sparse_query,\n                                       const IndexQueryMeta &qmeta,\n                                       uint32_t count,\n                                       Context::Pointer &context) const {\n  return do_search(sparse_count, sparse_indices, sparse_query, false, {}, qmeta,\n                   count, context);\n}\n\nint FlatSparseSearcher::search_bf_by_p_keys_impl(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    ContextPointer &context) const {\n  return do_search(sparse_count, sparse_indices, sparse_query, true, p_keys,\n                   qmeta, count, context);\n}\n\nint FlatSparseSearcher::get_sparse_vector(\n    uint64_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n    std::string *sparse_values_buffer) const {\n  if (state_ != STATE_LOADED) {\n    LOG_ERROR(\"Failed to get sparse vector, load container first!\");\n    return IndexError_NoIndexLoaded;\n  }\n\n  std::string sparse_data;\n\n  int ret = entity_.get_sparse_vector(key, &sparse_data);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to get sparse vector, key=%zu, ret=%s\", (size_t)key,\n              IndexError::What(ret));\n    return ret;\n  }\n\n  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,\n                                     sparse_indices_buffer,\n                                     sparse_values_buffer, meta_.unit_size());\n\n  return 0;\n}\n\nFlatSparseSearcher::ContextPointer FlatSparseSearcher::create_context() const {\n  if (state_ != STATE_LOADED) {\n    LOG_ERROR(\"Failed to create Context, load container first!\");\n    return Context::UPointer();\n  }\n  FlatSparseSearcherEntity::Pointer entity = entity_.clone();\n  return FlatSparseSearcher::ContextPointer(new FlatSparseContext(this));\n}\n\n//! Create a new iterator\nIndexSearcher::SparseProvider::Pointer\nFlatSparseSearcher::create_sparse_provider(void) const {\n  if (state_ != STATE_LOADED) {\n    LOG_ERROR(\"Failed to create provider, load container first!\");\n    return SparseProvider::Pointer();\n  }\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone entity failed\");\n    return SparseProvider::Pointer();\n  }\n  return SparseProvider::Pointer(\n      new FlatSparseIndexProvider<FlatSparseSearcherEntity>(\n          entity, meta_, \"FlatSparseSearcher\"));\n}\n\nint FlatSparseSearcher::do_search(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, bool with_p_keys,\n    const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    ContextPointer &context) const {\n  if (state_ != STATE_LOADED) {\n    LOG_ERROR(\"Failed to do search, load container first!\");\n    return IndexError_NoIndexLoaded;\n  }\n\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  return FlatSearch(sparse_count, sparse_indices, sparse_query, with_p_keys,\n                    p_keys, qmeta, count, meta_, context,\n                    (FlatSparseEntity *)&entity_);\n}\n\nINDEX_FACTORY_REGISTER_SEARCHER(FlatSparseSearcher);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_searcher.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"flat_sparse_searcher_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass FlatSparseSearcher : public IndexSearcher {\n public:\n  static const uint32_t VERSION;\n\n public:\n  using ContextPointer = IndexSearcher::Context::Pointer;\n\n public:\n  FlatSparseSearcher(void);\n  virtual ~FlatSparseSearcher(void);\n\n  FlatSparseSearcher(const FlatSparseSearcher &) = delete;\n  FlatSparseSearcher &operator=(const FlatSparseSearcher &) = delete;\n\n public:\n  //! Initialize Searcher\n  int init(const ailego::Params &params) override;\n\n  //! Cleanup Searcher\n  int cleanup(void) override;\n\n  //! Load Index from storage\n  int load(IndexStorage::Pointer container,\n           IndexMetric::Pointer /*measure*/) override;\n\n  //! Unload index from storage\n  int unload(void) override;\n\n  int search_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,\n                  Context::Pointer & /*context*/) const override {\n    return IndexError_NotImplemented;\n  }\n\n  int search_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,\n                  uint32_t /*count*/,\n                  Context::Pointer & /*context*/) const override {\n    return IndexError_NotImplemented;\n  }\n\n  int search_bf_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,\n                     Context::Pointer & /*context*/) const override {\n    return IndexError_NotImplemented;\n  }\n\n  int search_bf_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,\n                     uint32_t /*count*/,\n                     Context::Pointer & /*context*/) const override {\n    return IndexError_NotImplemented;\n  }\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  Context::Pointer &context) const override {\n    return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                       context);\n  }\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::Pointer &context) const override {\n    return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,\n                          count, context);\n  }\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta,\n                     Context::Pointer &context) const override {\n    return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                          context);\n  }\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t *sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta, uint32_t count,\n                     Context::Pointer &context) const override;\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const uint32_t sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta,\n                               ContextPointer &context) const override {\n    return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,\n                                    p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta, uint32_t count,\n                               ContextPointer &context) const override;\n\n  //! Fetch sparser vector by key\n  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,\n                        std::string *sparse_indices_buffer,\n                        std::string *sparse_values_buffer) const override;\n\n  //! Create a searcher context\n  ContextPointer create_context() const override;\n\n  //! Create a new iterator\n  IndexSearcher::SparseProvider::Pointer create_sparse_provider(\n      void) const override;\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  //! Retrieve params of index\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  const FlatSparseSearcherEntity &entity(void) const {\n    return entity_;\n  }\n\n  uint32_t magic(void) const {\n    return magic_;\n  }\n\n private:\n  inline int check_params(const IndexQueryMeta &qmeta) const {\n    if (ailego_unlikely(qmeta.data_type() != meta_.data_type())) {\n      LOG_ERROR(\"Unsupported query meta\");\n      return IndexError_Mismatch;\n    }\n    return 0;\n  }\n\n  int do_search(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                const void *sparse_query, bool with_p_keys,\n                const std::vector<std::vector<uint64_t>> &p_keys,\n                const IndexQueryMeta &qmeta, uint32_t count,\n                ContextPointer &context) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };\n\n  FlatSparseSearcherEntity entity_{};\n  IndexMeta meta_{};\n  ailego::Params params_{};\n  uint32_t magic_{0U};\n\n  Stats stats_;\n  State state_{STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_searcher_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_sparse_searcher_entity.h\"\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_logger.h>\n#include \"flat_sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nFlatSparseSearcherEntity::FlatSparseSearcherEntity() {}\n\nint FlatSparseSearcherEntity::load(const IndexStorage::Pointer &container,\n                                   const IndexMeta &index_meta) {\n  if (container_) {\n    LOG_ERROR(\"An storage instance is already opened\");\n    return IndexError_Duplicate;\n  }\n\n  int ret = this->load_container(container);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to load storage index\");\n    return ret;\n  }\n\n  if (init_measure(index_meta) != 0) {\n    LOG_ERROR(\"Failed to init measure\");\n    return IndexError_InvalidFormat;\n  }\n\n  container_ = container;\n  return 0;\n}\n\nint FlatSparseSearcherEntity::init_measure(const IndexMeta &meta) {\n  measure_ = IndexFactory::CreateMetric(meta.metric_name());\n  if (!measure_) {\n    LOG_ERROR(\"Failed to create measure %s\", meta.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  int ret = measure_->init(meta, meta.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"Failled to init measure, ret=%d\", ret);\n    return ret;\n  }\n\n  if (!measure_->sparse_distance()) {\n    LOG_ERROR(\"Invalid measure distance\");\n    return IndexError_InvalidArgument;\n  }\n\n  search_sparse_distance_ = measure_->sparse_distance();\n\n  if (measure_->query_metric() && measure_->query_metric()->distance()) {\n    search_sparse_distance_ = measure_->query_metric()->sparse_distance();\n  }\n  sparse_unit_size_ = meta.unit_size();\n\n  return 0;\n}\n\nint FlatSparseSearcherEntity::load_container(\n    const IndexStorage::Pointer &container) {\n  // meta\n  auto segment = container->get(PARAM_FLAT_SPARSE_META_SEG_ID);\n  if (!segment || segment->data_size() < sizeof(meta_)) {\n    LOG_ERROR(\"Missing segment %s, or invalid segment size\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  const void *data;\n  if (ailego_unlikely(segment->read(0, &data, sizeof(meta_)) !=\n                      sizeof(meta_))) {\n    LOG_ERROR(\"Failed to read meta segment %s\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_ReadData;\n  }\n  meta_ = *(reinterpret_cast<const decltype(meta_) *>(data));\n\n  // keys segment\n  keys_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID);\n  if (!keys_chunk_) {\n    LOG_ERROR(\"Missing segment %s\", PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  // mapping segment\n  mapping_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID);\n  if (!mapping_chunk_) {\n    LOG_ERROR(\"Missing segment %s\",\n              PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  // offset segment\n  sparse_offset_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID);\n  if (!sparse_offset_chunk_) {\n    LOG_ERROR(\"Missing segment %s\",\n              PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  // data segment\n  sparse_data_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID);\n  if (!sparse_data_chunk_) {\n    LOG_ERROR(\"Missing segment %s\", PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  return 0;\n}\n\nint FlatSparseSearcherEntity::unload() {\n  container_.reset();\n  sparse_data_chunk_.reset();\n  sparse_offset_chunk_.reset();\n  keys_chunk_.reset();\n  mapping_chunk_.reset();\n\n  return 0;\n}\n\nFlatSparseSearcherEntity::Pointer FlatSparseSearcherEntity::clone() const {\n  auto entity = new (std::nothrow)\n      FlatSparseSearcherEntity(meta_, sparse_data_chunk_, sparse_offset_chunk_,\n                               keys_chunk_, mapping_chunk_);\n  return FlatSparseSearcherEntity::Pointer(entity);\n}\n\nint FlatSparseSearcherEntity::get_sparse_vector_ptr_by_id(\n    node_id_t id, const void **sparse_vector_ptr,\n    uint32_t *sparse_vector_len_ptr) const {\n  uint32_t offset_chunk_offset = id * offset_size_per_node();\n\n  const void *offset_info = nullptr;\n  if (ailego_unlikely(sparse_offset_chunk_->read(\n                          offset_chunk_offset, &offset_info,\n                          offset_size_per_node()) != offset_size_per_node())) {\n    LOG_ERROR(\"Read offset info failed, offset=%u\", offset_chunk_offset);\n    return IndexError_ReadData;\n  };\n\n  // sparse offset\n  uint64_t sparse_offset = *(uint64_t *)offset_info;\n  uint32_t sparse_vector_len =\n      *(uint32_t *)((uint8_t *)offset_info + sizeof(uint64_t));\n\n  if (sparse_vector_len > 0) {\n    const void *sparse_data =\n        get_sparse_vector_data(sparse_offset, sparse_vector_len);\n    if (ailego_unlikely(sparse_data == nullptr)) {\n      LOG_ERROR(\"Get nullptr sparse, offset=%zu, len=%u\", (size_t)sparse_offset,\n                sparse_vector_len);\n\n      return IndexError_ReadData;\n    }\n    *sparse_vector_ptr = sparse_data;\n    *sparse_vector_len_ptr = sparse_vector_len;\n  }\n\n  return 0;\n}\n\nconst void *FlatSparseSearcherEntity::get_sparse_vector_data(\n    uint64_t offset, uint32_t length) const {\n  const void *data;\n  auto size = sparse_data_chunk_->read(offset, &data, length);\n  if (size != length) {\n    LOG_ERROR(\n        \"read sparse vector data failed: offset=%zu, \"\n        \"length=%u, size=%zu\",\n        (size_t)offset, length, size);\n    return nullptr;\n  }\n  return data;\n}\n\n\nnode_id_t FlatSparseSearcherEntity::get_id(uint64_t key) const {\n  if (ailego_unlikely(!mapping_chunk_)) {\n    LOG_ERROR(\"Index missing mapping segment\");\n    return kInvalidNodeId;\n  }\n\n  //! Do binary search\n  node_id_t start = 0UL;\n  node_id_t end = doc_cnt();\n  const void *data;\n  node_id_t idx = 0u;\n  while (start < end) {\n    idx = start + (end - start) / 2;\n    if (ailego_unlikely(mapping_chunk_->read(idx * sizeof(node_id_t), &data,\n                                             sizeof(node_id_t)) !=\n                        sizeof(node_id_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    const uint64_t *mkey;\n    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);\n    if (ailego_unlikely(keys_chunk_->read(\n                            local_id * sizeof(uint64_t), (const void **)(&mkey),\n                            sizeof(uint64_t)) != sizeof(uint64_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    if (*mkey < key) {\n      start = idx + 1;\n    } else if (*mkey > key) {\n      end = idx;\n    } else {\n      return local_id;\n    }\n  }\n  return kInvalidNodeId;\n}\n\nuint64_t FlatSparseSearcherEntity::get_key(node_id_t id) const {\n  const void *key;\n  if (ailego_unlikely(\n          keys_chunk_->read(id * sizeof(uint64_t), &key, sizeof(uint64_t)) !=\n          sizeof(uint64_t))) {\n    LOG_ERROR(\"Read key from segment failed\");\n    return kInvalidKey;\n  }\n  return *(reinterpret_cast<const uint64_t *>(key));\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_searcher_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_framework.h>\n#include \"flat_sparse_entity.h\"\n#include \"flat_sparse_index_format.h\"\n\nnamespace zvec {\nnamespace core {\n\n\n/*! Flat Sparse Searcher Entity\n */\nclass FlatSparseSearcherEntity : public FlatSparseEntity {\n public:\n  typedef std::shared_ptr<FlatSparseSearcherEntity> Pointer;\n\n  using Chunk = IndexStorage::Segment;\n\n  //! Constructor\n  explicit FlatSparseSearcherEntity();\n\n  //! Destructor\n  virtual ~FlatSparseSearcherEntity() = default;\n\n  //! Disable them\n  FlatSparseSearcherEntity(const FlatSparseSearcherEntity &) = delete;\n  FlatSparseSearcherEntity &operator=(const FlatSparseSearcherEntity &) =\n      delete;\n\n  //! Load the entity with container\n  int load(const IndexStorage::Pointer &container, const IndexMeta &index_meta);\n\n  //! Unload the entity\n  int unload();\n\n public:\n  inline uint32_t doc_cnt() const override {\n    return meta_.doc_cnt;\n  }\n\n  inline uint32_t total_sparse_count() const override {\n    return meta_.total_sparse_count;\n  }\n\n  size_t sparse_unit_size() const override {\n    return sparse_unit_size_;\n  }\n\n  float get_search_distance(const std::string &vector,\n                            node_id_t target_node_id) const override {\n    float dist;\n    const void *target_vector;\n    uint32_t target_vector_len;\n    get_sparse_vector_ptr_by_id(target_node_id, &target_vector,\n                                &target_vector_len);\n    search_sparse_distance_(vector.c_str(), target_vector, &dist);\n    return dist;\n  }\n\n  FlatSparseSearcherEntity::Pointer clone() const;\n\n  node_id_t get_id(uint64_t key) const override;\n\n  uint64_t get_key(node_id_t id) const override;\n\n  int get_sparse_vector_ptr_by_id(node_id_t id, const void **sparse_vector,\n                                  uint32_t *sparse_vector_len) const override;\n\n private:\n  int load_container(const IndexStorage::Pointer &container);\n\n  int init_measure(const IndexMeta &meta);\n\n  inline uint32_t offset_size_per_node() const {\n    return sizeof(uint64_t) + sizeof(uint32_t);\n  }\n\n  const void *get_sparse_vector_data(uint64_t offset, uint32_t length) const;\n\n private:\n  FlatSparseSearcherEntity(const FlatSparseMeta &meta,\n                           Chunk::Pointer sparse_data_chunk,\n                           Chunk::Pointer sparse_offset_chunk,\n                           Chunk::Pointer keys_chunk,\n                           Chunk::Pointer mapping_chunk)\n      : meta_(meta),\n        sparse_data_chunk_(sparse_data_chunk),\n        sparse_offset_chunk_(sparse_offset_chunk),\n        keys_chunk_(keys_chunk),\n        mapping_chunk_(mapping_chunk) {}\n\n private:\n  IndexStorage::Pointer container_{};\n\n  // meta\n  FlatSparseMeta meta_;\n\n  // measure\n  IndexMetric::Pointer measure_{};\n  IndexMetric::MatrixSparseDistance search_sparse_distance_{};\n\n  // chunk\n  Chunk::Pointer sparse_data_chunk_;\n  Chunk::Pointer sparse_offset_chunk_;\n  Chunk::Pointer keys_chunk_;\n  Chunk::Pointer mapping_chunk_;\n\n  size_t sparse_unit_size_{0U};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_streamer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_sparse_streamer.h\"\n#include <cstdint>\n#include <utility/sparse_utility.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_meta.h>\n#include \"flat_sparse_context.h\"\n#include \"flat_sparse_provider.h\"\n#include \"flat_sparse_search.h\"\n\nnamespace zvec {\nnamespace core {\n\nconst uint32_t FlatSparseStreamer::VERSION = 0U;\n\nFlatSparseStreamer::FlatSparseStreamer() : entity_(stats_) {}\n\nFlatSparseStreamer::~FlatSparseStreamer() {\n  this->close();\n}\n\nint FlatSparseStreamer::init(const IndexMeta &imeta,\n                             const ailego::Params &params) {\n  LOG_DEBUG(\"FlatSparseStreamer init\");\n\n  meta_ = imeta;\n  meta_.set_streamer(\"FlatSparseStreamer\", VERSION, params);\n\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint FlatSparseStreamer::cleanup() {\n  LOG_DEBUG(\"FlatSparseStreamer cleanup\");\n\n  this->close();\n\n  meta_.clear();\n\n  return 0;\n}\n\nint FlatSparseStreamer::open(IndexStorage::Pointer stg) {\n  LOG_DEBUG(\"FlatSparseStreamer open\");\n\n  if (ailego_unlikely(state_ != STATE_INITED)) {\n    LOG_ERROR(\"Open storage failed, init streamer first!\");\n    return IndexError_NoReady;\n  }\n\n  int ret = entity_.open(std::move(stg), meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"FlatSparseStreamer entity failed to open storage\");\n    return ret;\n  }\n\n  IndexMeta index_meta;\n  ret = entity_.get_index_sparse_meta(&index_meta);\n  if (ret == IndexError_NoExist) {\n    // Set IndexMeta for the new index\n    ret = entity_.set_index_sparse_meta(meta_);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to set index meta for %s\", IndexError::What(ret));\n      return ret;\n    }\n  } else {\n    if (index_meta.streamer_revision() != meta_.streamer_revision()) {\n      LOG_ERROR(\"Streamer revision mismatch, expect=%u, actual=%u\",\n                meta_.streamer_revision(), index_meta.streamer_revision());\n      return IndexError_Mismatch;\n    }\n    if (index_meta.metric_name() != meta_.metric_name() ||\n        index_meta.data_type() != meta_.data_type()) {\n      LOG_ERROR(\"IndexMeta mismatch from the previous in index\");\n      return IndexError_Mismatch;\n    }\n    // The IndexMeasure Params may be updated like MipsSquaredEuclidean\n    auto metric_params = index_meta.metric_params();\n    metric_params.merge(meta_.metric_params());\n    meta_.set_metric(index_meta.metric_name(), 0, metric_params);\n  }\n\n  state_ = STATE_OPENED;\n  magic_ = IndexContext::GenerateMagic();\n\n  return 0;\n}\n\nint FlatSparseStreamer::close() {\n  if (state_ != STATE_OPENED) {\n    return 0;\n  }\n\n  LOG_DEBUG(\"FlatSparseStreamer close\");\n\n  stats_.clear();\n  int ret = entity_.close();\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to close entity %s\", IndexError::What(ret));\n    return ret;\n  }\n  state_ = STATE_INITED;\n  return 0;\n}\n\nint FlatSparseStreamer::flush(uint64_t checkpoint) {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to flush, open streamer first!\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"FlatSparseStreamer flush, checkpoint=%zu\", (size_t)checkpoint);\n\n  return entity_.flush(checkpoint);\n}\n\nint FlatSparseStreamer::dump(const IndexDumper::Pointer &dumper) {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to dump, open streamer first!\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"FlatSparseStreamer dump\");\n\n  shared_mutex_.lock();\n  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });\n\n  meta_.set_searcher(\"FlatSparseSearcher\", VERSION, ailego::Params());\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n\n  return entity_.dump(dumper);\n}\n\nFlatSparseStreamer::ContextPointer FlatSparseStreamer::create_context() const {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to create Context, open streamer first!\");\n    return Context::UPointer();\n  }\n  FlatSparseStreamerEntity::Pointer entity = entity_.clone();\n  return FlatSparseStreamer::ContextPointer(new FlatSparseContext(this));\n}\n\nIndexStreamer::SparseProvider::Pointer\nFlatSparseStreamer::create_sparse_provider(void) const {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to create provider, open streamer first!\");\n    return SparseProvider::Pointer();\n  }\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone entity failed\");\n    return SparseProvider::Pointer();\n  }\n  return SparseProvider::Pointer(\n      new FlatSparseIndexProvider<FlatSparseStreamerEntity>(\n          entity, meta_, \"FlatSparseStreamerProvider\"));\n}\n\nint FlatSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count,\n                                 const uint32_t *sparse_indices,\n                                 const void *sparse_query,\n                                 const IndexQueryMeta &qmeta,\n                                 Context::Pointer &context) {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to add_impl, open streamer first!\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_NoReady;\n  }\n\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) {\n    LOG_ERROR(\n        \"Failed to add sparse vector: number of non-zero elements (%u) exceeds \"\n        \"maximum allowed (%u), key=%zu\",\n        sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey);\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_InvalidValue;\n  }\n\n  // context is trivial here\n  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to FlatSparseContext failed\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Cast;\n  }\n\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  // convert to sparse format and add to entity\n  std::string sparse_query_buffer;\n  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,\n                                   meta_.unit_size(), sparse_query_buffer);\n\n  ret = entity_.add(pkey, sparse_query_buffer, sparse_count);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to add sparse vector, key=%zu, ret=%s\", (size_t)pkey,\n              IndexError::What(ret));\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  (*stats_.mutable_added_count())++;\n  return 0;\n}\n\nint FlatSparseStreamer::add_with_id_impl(uint32_t pkey,\n                                         const uint32_t sparse_count,\n                                         const uint32_t *sparse_indices,\n                                         const void *sparse_query,\n                                         const IndexQueryMeta &qmeta,\n                                         Context::Pointer &context) {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to add_with_id_impl, open streamer first!\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_NoReady;\n  }\n\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) {\n    LOG_ERROR(\n        \"Failed to add sparse vector: number of non-zero elements (%u) exceeds \"\n        \"maximum allowed (%u), key=%zu\",\n        sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey);\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_InvalidValue;\n  }\n\n  // context is trivial here\n  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to FlatSparseContext failed\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Cast;\n  }\n\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  // convert to sparse format and add to entity\n  std::string sparse_query_buffer;\n  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,\n                                   meta_.unit_size(), sparse_query_buffer);\n\n  ret = entity_.add_vector_with_id(pkey, sparse_query_buffer, sparse_count);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to add sparse vector, key=%zu, ret=%s\", (size_t)pkey,\n              IndexError::What(ret));\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  (*stats_.mutable_added_count())++;\n  return 0;\n}\n\n//! Similarity search with sparse inputs\nint FlatSparseStreamer::search_impl(const uint32_t sparse_count,\n                                    const uint32_t *sparse_indices,\n                                    const void *sparse_query,\n                                    const IndexQueryMeta &qmeta,\n                                    Context::Pointer &context) const {\n  return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                     context);\n}\n\n//! Similarity search with sparse inputs\nint FlatSparseStreamer::search_impl(const uint32_t *sparse_count,\n                                    const uint32_t *sparse_indices,\n                                    const void *sparse_query,\n                                    const IndexQueryMeta &qmeta, uint32_t count,\n                                    Context::Pointer &context) const {\n  return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,\n                        count, context);\n}\n\n//! Similarity brute force search with sparse inputs\nint FlatSparseStreamer::search_bf_impl(const uint32_t sparse_count,\n                                       const uint32_t *sparse_indices,\n                                       const void *sparse_query,\n                                       const IndexQueryMeta &qmeta,\n                                       Context::Pointer &context) const {\n  return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                        context);\n}\n\n//! Linear search by primary keys\nint FlatSparseStreamer::search_bf_by_p_keys_impl(\n    const uint32_t sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, ContextPointer &context) const {\n  return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,\n                                  p_keys, qmeta, 1, context);\n}\n\n//! Similarity brute force search with sparse inputs\nint FlatSparseStreamer::search_bf_impl(const uint32_t *sparse_count,\n                                       const uint32_t *sparse_indices,\n                                       const void *sparse_query,\n                                       const IndexQueryMeta &qmeta,\n                                       uint32_t count,\n                                       Context::Pointer &context) const {\n  return do_search(sparse_count, sparse_indices, sparse_query, false, {}, qmeta,\n                   count, context);\n}\n\n//! Linear search by primary keys with sparse inputs\nint FlatSparseStreamer::search_bf_by_p_keys_impl(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    ContextPointer &context) const {\n  return do_search(sparse_count, sparse_indices, sparse_query, true, p_keys,\n                   qmeta, count, context);\n}\n\n//! Fetch sparse vector by key\nint FlatSparseStreamer::get_sparse_vector(\n    uint64_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n    std::string *sparse_values_buffer) const {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to get_sparse_vector, open streamer first!\");\n    return IndexError_NoReady;\n  }\n\n  std::string sparse_data;\n\n  int ret = entity_.get_sparse_vector_by_key(key, &sparse_data);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to get sparse vector, key=%zu, ret=%s\", (size_t)key,\n              IndexError::What(ret));\n    return ret;\n  }\n\n  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,\n                                     sparse_indices_buffer,\n                                     sparse_values_buffer, meta_.unit_size());\n\n  return 0;\n}\n\nint FlatSparseStreamer::do_search(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, bool with_p_keys,\n    const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    ContextPointer &context) const {\n  if (state_ != STATE_OPENED) {\n    LOG_ERROR(\"Failed to do_search, open streamer first!\");\n    return IndexError_NoReady;\n  }\n\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());\n  if (ctx->magic() != magic_) {\n    ctx->reset(this);\n  }\n\n  return FlatSearch(sparse_count, sparse_indices, sparse_query, with_p_keys,\n                    p_keys, qmeta, count, meta_, context,\n                    (FlatSparseEntity *)&entity_);\n}\n\nINDEX_FACTORY_REGISTER_STREAMER(FlatSparseStreamer);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_streamer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <ailego/parallel/lock.h>\n#include <zvec/core/framework/index_streamer.h>\n#include \"flat_sparse_streamer_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Flat Sparse Streamer\n */\nclass FlatSparseStreamer : public IndexStreamer {\n public:\n  static const uint32_t VERSION;\n\n public:\n  using ContextPointer = IndexStreamer::Context::Pointer;\n\n  FlatSparseStreamer(void);\n  virtual ~FlatSparseStreamer(void);\n\n  FlatSparseStreamer(const FlatSparseStreamer &streamer) = delete;\n  FlatSparseStreamer &operator=(const FlatSparseStreamer &streamer) = delete;\n\n public:\n  //! Initialize Streamer\n  int init(const IndexMeta &, const ailego::Params &) override;\n\n  //! Cleanup Streamer\n  int cleanup(void) override;\n\n  //! Open index from file path\n  int open(IndexStorage::Pointer stg) override;\n\n  //! Close file\n  int close(void) override;\n\n  //! flush file\n  int flush(uint64_t checkpoint) override;\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Create a context\n  ContextPointer create_context(void) const override;\n\n  //! Create a new iterator\n  IndexStreamer::SparseProvider::Pointer create_sparse_provider(\n      void) const override;\n\n  int add_impl(uint64_t pkey, const uint32_t sparse_count,\n               const uint32_t *sparse_indices, const void *sparse_query,\n               const IndexQueryMeta &qmeta, Context::Pointer &context) override;\n\n  int add_with_id_impl(uint32_t pkey, const uint32_t sparse_count,\n                       const uint32_t *sparse_indices, const void *sparse_query,\n                       const IndexQueryMeta &qmeta,\n                       Context::Pointer &context) override;\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  Context::Pointer &context) const override;\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::Pointer &context) const override;\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta,\n                     Context::Pointer &context) const override;\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t *sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta, uint32_t count,\n                     Context::Pointer &context) const override;\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const uint32_t sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta,\n                               ContextPointer &context) const override;\n\n  //! Linear search by primary keys with sparse inputs\n  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta, uint32_t count,\n                               ContextPointer &context) const override;\n\n  //! Fetch sparse vector by key\n  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,\n                        std::string *sparse_indices_buffer,\n                        std::string *sparse_values_buffer) const override;\n\n  int get_sparse_vector_by_id(\n      uint32_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n      std::string *sparse_values_buffer) const override {\n    return get_sparse_vector(id, sparse_count, sparse_indices_buffer,\n                             sparse_values_buffer);\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n  const FlatSparseStreamerEntity &entity(void) const {\n    return entity_;\n  }\n\n  uint32_t magic(void) const {\n    return magic_;\n  }\n\n private:\n  inline int check_params(const IndexQueryMeta &qmeta) const {\n    if (ailego_unlikely(qmeta.data_type() != meta_.data_type())) {\n      LOG_ERROR(\"Unsupported query meta, type=%d, expected=%d\",\n                qmeta.data_type(), meta_.data_type());\n      return IndexError_Mismatch;\n    }\n    return 0;\n  }\n\n  int do_search(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                const void *sparse_query, bool with_p_keys,\n                const std::vector<std::vector<uint64_t>> &p_keys,\n                const IndexQueryMeta &qmeta, uint32_t count,\n                ContextPointer &context) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };\n\n  IndexMeta meta_{};\n  FlatSparseStreamerEntity entity_;\n\n  uint32_t magic_{0U};\n  Stats stats_{};\n  State state_{STATE_INIT};\n\n  //! avoid add vector while dumping index\n  ailego::SharedMutex shared_mutex_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_streamer_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_sparse_streamer_entity.h\"\n#include <cstdint>\n#include <memory>\n#include <utility>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_logger.h>\n#include \"flat_sparse_index_format.h\"\n#include \"flat_sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nFlatSparseStreamerEntity::FlatSparseStreamerEntity(IndexStreamer::Stats &stats)\n    : stats_(stats) {}\n\nint FlatSparseStreamerEntity::open(IndexStorage::Pointer storage,\n                                   const IndexMeta &meta) {\n  if (storage_) {\n    LOG_ERROR(\"An storage instance is already opened\");\n    return IndexError_Duplicate;\n  }\n\n  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();\n  if (!keys_map_lock_) {\n    LOG_ERROR(\"FlatSparseStreamerEntity new object failed\");\n    return IndexError_NoMemory;\n  }\n  keys_map_ = std::make_shared<std::map<uint64_t, node_id_t>>();\n\n  if (storage->get(PARAM_FLAT_SPARSE_META_SEG_ID) ||\n      storage->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID)) {\n    int ret = this->load_storage(storage, meta);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Failed to load storage index\");\n      return ret;\n    }\n  } else {\n    int ret = this->init_storage(storage, meta);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Failed to load storage index\");\n      return ret;\n    }\n  }\n\n  if (init_metric(meta) != 0) {\n    LOG_ERROR(\"Failed to init metric\");\n    return IndexError_InvalidFormat;\n  }\n\n  // reserve data chunk\n  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_MAX_DATA_CHUNK_CNT,\n                             &max_data_chunk_cnt_);\n  sparse_data_chunks_.reserve(max_data_chunk_cnt_);\n\n  // reserve offset chunk\n  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_MAX_DOC_CNT,\n                             &max_doc_cnt_);\n  sparse_offset_chunks_.reserve(max_doc_cnt_ / doc_cnt_per_offset_chunk() + 1);\n  sparse_unit_size_ = meta.unit_size();\n\n  LOG_DEBUG(\n      \"FlatSparseStreamerEntity open success, doc_count[%u], \"\n      \"data_chunk_size[%u], offset_chunk_size[%u], data_chunk_count[%zu], \"\n      \"offset_chunk_count[%zu]\",\n      meta_.doc_cnt, streamer_meta_.data_chunk_size,\n      streamer_meta_.offset_chunk_size, sparse_data_chunks_.size(),\n      sparse_offset_chunks_.size());\n\n  storage_ = storage;\n  return 0;\n}\n\nint FlatSparseStreamerEntity::init_metric(const IndexMeta &meta) {\n  metric_ = IndexFactory::CreateMetric(meta.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"Failed to create metric %s\", meta.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  int ret = metric_->init(meta, meta.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"Failled to init metric, ret=%d\", ret);\n    return ret;\n  }\n\n  if (!metric_->sparse_distance()) {\n    LOG_ERROR(\"Invalid metric distance\");\n    return IndexError_InvalidArgument;\n  }\n\n  search_sparse_distance_ = metric_->sparse_distance();\n\n  if (metric_->query_metric() && metric_->query_metric()->distance()) {\n    search_sparse_distance_ = metric_->query_metric()->sparse_distance();\n  }\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::load_storage(IndexStorage::Pointer storage,\n                                           const IndexMeta &meta) {\n  size_t index_size{0};\n\n  // load meta\n  auto segment = storage->get(PARAM_FLAT_SPARSE_META_SEG_ID);\n\n  if (!segment || segment->data_size() < sizeof(meta_)) {\n    LOG_ERROR(\"Missing segment %s, or invalid segment size\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  IndexStorage::MemoryBlock data_block;\n  if (ailego_unlikely(segment->read(0, data_block, sizeof(meta_)) !=\n                      sizeof(meta_))) {\n    LOG_ERROR(\"Failed to read meta segment %s\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_ReadData;\n  }\n  meta_ = *(reinterpret_cast<const decltype(meta_) *>(data_block.data()));\n  index_size += segment->capacity();\n\n  // load streamer meta\n  segment = storage->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID);\n  if (!segment || segment->data_size() < sizeof(streamer_meta_)) {\n    LOG_ERROR(\"Missing segment %s, or invalid segment size\",\n              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (ailego_unlikely(segment->read(0, data_block, sizeof(streamer_meta_)) !=\n                      sizeof(streamer_meta_))) {\n    LOG_ERROR(\"Failed to read streamer meta segment %s\",\n              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());\n    return IndexError_ReadData;\n  }\n  streamer_meta_ =\n      *(reinterpret_cast<const decltype(streamer_meta_) *>(data_block.data()));\n  index_size += segment->capacity();\n\n  uint32_t meta_data_chunk_size{streamer_meta_.data_chunk_size};\n  uint32_t meta_offset_chunk_size{streamer_meta_.offset_chunk_size};\n  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE,\n                             &meta_data_chunk_size);\n  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_OFFSET_CHUNK_SIZE,\n                             &meta_offset_chunk_size);\n  if (streamer_meta_.data_chunk_size != meta_data_chunk_size ||\n      streamer_meta_.offset_chunk_size != meta_offset_chunk_size) {\n    LOG_ERROR(\n        \"Invalid streamer meta chunk size data[%u] offset[%u], expect data[%u] \"\n        \"offset[%u]\",\n        streamer_meta_.data_chunk_size, streamer_meta_.offset_chunk_size,\n        meta_data_chunk_size, meta_offset_chunk_size);\n    return IndexError_InvalidFormat;\n  }\n\n  // check chunk cnt\n  if (streamer_meta_.data_chunk_count > max_data_chunk_cnt_ ||\n      meta_.doc_cnt > max_doc_cnt_) {\n    LOG_ERROR(\n        \"Invalid data chunk count[%u] doc count[%u], expect less than \"\n        \"chunk count[%u] doc count[%u]\",\n        streamer_meta_.data_chunk_count, meta_.doc_cnt, max_data_chunk_cnt_,\n        max_doc_cnt_);\n    return IndexError_InvalidFormat;\n  }\n\n  // load offset chunks\n  for (size_t i = 0; i < streamer_meta_.offset_chunk_count; ++i) {\n    std::string segment_id =\n        ailego::StringHelper::Concat(PARAM_FLAT_SPARSE_OFFSET_SEG_ID_PREFIX, i);\n    segment = storage->get(segment_id);\n    if (!segment) {\n      LOG_ERROR(\"Missing segment %s\", segment_id.c_str());\n      return IndexError_InvalidFormat;\n    }\n    sparse_offset_chunks_.emplace_back(segment);\n    index_size += segment->capacity();\n  }\n  // load data chunks\n  for (size_t i = 0; i < streamer_meta_.data_chunk_count; ++i) {\n    std::string segment_id =\n        ailego::StringHelper::Concat(PARAM_FLAT_SPARSE_DATA_SEG_ID_PREFIX, i);\n    segment = storage->get(segment_id);\n    if (!segment) {\n      LOG_ERROR(\"Missing segment %s\", segment_id.c_str());\n    }\n    sparse_data_chunks_.emplace_back(segment);\n    index_size += segment->capacity();\n  }\n\n  // load keys\n  for (node_id_t i = 0; i < meta_.doc_cnt; ++i) {\n    (*keys_map_)[get_key(i)] = i;\n  }\n\n  stats_.set_index_size(index_size);\n  stats_.set_check_point(storage->check_point());\n  stats_.set_create_time(meta_.create_time);\n  stats_.set_update_time(meta_.update_time);\n  stats_.set_loaded_count(keys_map_->size());\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::init_storage(IndexStorage::Pointer storage,\n                                           const IndexMeta &meta) {\n  meta_.create_time = ailego::Realtime::Seconds();\n  stats_.set_create_time(meta_.create_time);\n  meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(meta_.update_time);\n  meta_.doc_cnt = 0;\n\n  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE,\n                             &streamer_meta_.data_chunk_size);\n  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_OFFSET_CHUNK_SIZE,\n                             &streamer_meta_.offset_chunk_size);\n\n  // append meta segment\n  size_t size = ailego_align(sizeof(meta_), ailego::MemoryHelper::PageSize());\n  int ret = storage->append(PARAM_FLAT_SPARSE_META_SEG_ID, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to append meta segment %s\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return ret;\n  }\n  auto segment = storage->get(PARAM_FLAT_SPARSE_META_SEG_ID);\n  if (ailego_unlikely(!segment)) {\n    LOG_ERROR(\"Failed to get meta segment %s\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {\n    LOG_ERROR(\"Failed to write meta segment %s\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_WriteData;\n  }\n\n  *stats_.mutable_index_size() += size;\n\n  // append streamer meta segment\n  size = ailego_align(sizeof(streamer_meta_), ailego::MemoryHelper::PageSize());\n  ret = storage->append(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to append streamer meta segment %s\",\n              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());\n    return ret;\n  }\n  segment = storage->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID);\n  if (ailego_unlikely(!segment)) {\n    LOG_ERROR(\"Failed to get streamer meta segment %s\",\n              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (segment->write(0, &streamer_meta_, sizeof(streamer_meta_)) !=\n      sizeof(streamer_meta_)) {\n    LOG_ERROR(\"Failed to write streamer meta segment %s\",\n              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());\n    return IndexError_WriteData;\n  }\n\n  *stats_.mutable_index_size() += size;\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::close() {\n  storage_.reset();\n  sparse_data_chunks_.clear();\n  sparse_offset_chunks_.clear();\n\n  keys_map_lock_.reset();\n  keys_map_.reset();\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::flush(uint64_t checkpoint) {\n  // flush meta\n  meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(meta_.update_time);\n  auto segment = storage_->get(PARAM_FLAT_SPARSE_META_SEG_ID);\n  if (ailego_unlikely(!segment)) {\n    LOG_ERROR(\"Failed to get meta segment %s\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {\n    LOG_ERROR(\"Failed to write meta segment %s\",\n              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());\n    return IndexError_WriteData;\n  }\n\n  // flush streamer meta\n  streamer_meta_.data_chunk_count = sparse_data_chunks_.size();\n  streamer_meta_.offset_chunk_count = sparse_offset_chunks_.size();\n  segment = storage_->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID);\n  if (ailego_unlikely(!segment)) {\n    LOG_ERROR(\"Failed to get streamer meta segment %s\",\n              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());\n    return IndexError_Runtime;\n  }\n  if (segment->write(0, &streamer_meta_, sizeof(streamer_meta_)) !=\n      sizeof(streamer_meta_)) {\n    LOG_ERROR(\"Failed to write streamer meta segment %s\",\n              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());\n    return IndexError_WriteData;\n  }\n\n  if (checkpoint != 0) {\n    storage_->refresh(checkpoint);\n  }\n  int ret = storage_->flush();\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Failed to flush storage for %s\", IndexError::What(ret));\n    return ret;\n  }\n  if (checkpoint != 0) {\n    stats_.set_check_point(checkpoint);\n  }\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::dump(const IndexDumper::Pointer &dumper) {\n  ailego::ElapsedTime stamp;\n\n  int ret;\n  // meta\n  ret = dump_meta(dumper.get());\n  if (ret != 0) {\n    return ret;\n  }\n\n  auto duration_dump_meta = stamp.milli_seconds();\n\n  // offset & data\n  ret = dump_offset_data(dumper.get());\n  if (ret != 0) {\n    return ret;\n  }\n\n  auto duration_dump_offset_data = stamp.milli_seconds() - duration_dump_meta;\n\n  // keys\n  std::vector<uint64_t> keys = get_keys();\n  ret = dump_keys(keys, dumper.get());\n  if (ret != 0) {\n    return ret;\n  }\n\n  auto duration_dump_keys =\n      stamp.milli_seconds() - duration_dump_offset_data - duration_dump_meta;\n\n  // mapping\n  ret = dump_mapping(keys, dumper.get());\n  if (ret != 0) {\n    return ret;\n  }\n\n  auto duration_dump_mapping = stamp.milli_seconds() -\n                               duration_dump_offset_data - duration_dump_meta -\n                               duration_dump_keys;\n\n  LOG_INFO(\n      \"Dump index meta: %zu ms, offset & data: %zu ms, keys: %zu ms, \"\n      \"mapping: %zu ms\",\n      (size_t)duration_dump_meta, (size_t)duration_dump_offset_data,\n      (size_t)duration_dump_keys, (size_t)duration_dump_mapping);\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::dump_offset_data(IndexDumper *dumper) {\n  ailego::ElapsedTime stamp;\n\n  uint64_t init_offset = dump_size_;\n  std::vector<std::pair<uint64_t, uint32_t>> offset_length;\n\n  // write data\n  int ret;\n  node_id_t total_doc_cnt = doc_cnt();\n  for (node_id_t node_id = 0; node_id < total_doc_cnt; node_id++) {\n    uint32_t target_vector_len;\n    IndexStorage::MemoryBlock target_vector_block;\n    ret = get_sparse_vector_ptr_by_id(node_id, target_vector_block,\n                                      &target_vector_len);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to get vector, node_id=%u, error: %s\", node_id,\n                IndexError::What(ret));\n      return ret;\n    }\n    const void *target_vector = target_vector_block.data();\n    ret = dump_sparse_vector_data(target_vector, target_vector_len, dumper);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to dump sparse vector data, node_id=%u, error: %s\",\n                node_id, IndexError::What(ret));\n      return ret;\n    }\n\n    offset_length.push_back({dump_size_ - init_offset, target_vector_len});\n    dump_size_ += target_vector_len;\n  }\n\n  // append data segment\n  if (dumper->append(PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID,\n                     dump_size_ - init_offset, 0, 0) != 0) {\n    LOG_ERROR(\"append data segment failed\");\n    return IndexError_WriteData;\n  }\n\n  auto duration_dump_data = stamp.milli_seconds();\n\n  // write offset\n  for (auto &offset_length_pair : offset_length) {\n    if (dumper->write(&offset_length_pair.first,\n                      sizeof(offset_length_pair.first)) !=\n        sizeof(offset_length_pair.first)) {\n      return IndexError_WriteData;\n    }\n    if (dumper->write(&offset_length_pair.second,\n                      sizeof(offset_length_pair.second)) !=\n        sizeof(offset_length_pair.second)) {\n      return IndexError_WriteData;\n    }\n    dump_size_ +=\n        sizeof(offset_length_pair.first) + sizeof(offset_length_pair.second);\n  }\n\n  // append offset segment\n  if (dumper->append(\n          PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID,\n          offset_length.size() * (sizeof(uint64_t) + sizeof(uint32_t)), 0,\n          0) != 0) {\n    LOG_ERROR(\"append offset segment failed\");\n    return IndexError_WriteData;\n  }\n\n  auto duration_dump_offset = stamp.milli_seconds() - duration_dump_data;\n\n  LOG_INFO(\"Dump offset: %zu ms, data: %zu ms\", (size_t)duration_dump_offset,\n           (size_t)duration_dump_data);\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::dump_sparse_vector_data(const void *data,\n                                                      uint32_t length,\n                                                      IndexDumper *dumper) {\n  if (dumper->write(data, length) != length) {\n    return IndexError_WriteData;\n  }\n  return 0;\n}\n\nint FlatSparseStreamerEntity::dump_meta(IndexDumper *dumper) {\n  if (dumper->write(&meta_, sizeof(meta_)) != sizeof(meta_)) {\n    LOG_ERROR(\"write meta failed\");\n    return IndexError_WriteData;\n  }\n\n  size_t meta_padding_size = ailego_align(sizeof(meta_), 32) - sizeof(meta_);\n  if (meta_padding_size) {\n    std::string padding(meta_padding_size, '\\0');\n    if (dumper->write(padding.data(), meta_padding_size) != meta_padding_size) {\n      LOG_ERROR(\"write meta padding failed\");\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(PARAM_FLAT_SPARSE_META_SEG_ID, sizeof(meta_),\n                        meta_padding_size, 0);\n}\n\nint FlatSparseStreamerEntity::dump_keys(const std::vector<uint64_t> &keys,\n                                        IndexDumper *dumper) {\n  if (keys.size() == 1 && keys.back() == kInvalidKey) {\n    return IndexError_Runtime;\n  }\n\n  size_t keys_size = keys.size() * sizeof(uint64_t);\n  if (dumper->write(keys.data(), keys_size) != keys_size) {\n    LOG_ERROR(\"Failed to write keys to dumper %s\", dumper->name().c_str());\n    return IndexError_WriteData;\n  }\n  size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;\n  if (keys_padding_size) {\n    std::string padding(keys_padding_size, '\\0');\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write padding to dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID, keys_size,\n                        keys_padding_size, 0);\n}\n\nint FlatSparseStreamerEntity::dump_mapping(const std::vector<uint64_t> &keys,\n                                           IndexDumper *dumper) {\n  std::vector<uint32_t> mapping(keys.size());\n  std::iota(mapping.begin(), mapping.end(), 0);\n  std::sort(\n      mapping.begin(), mapping.end(),\n      [&keys](uint32_t lhs, uint32_t rhs) { return (keys[lhs] < keys[rhs]); });\n\n  size_t mapping_size = mapping.size() * sizeof(uint32_t);\n  size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;\n  if (dumper->write(mapping.data(), mapping_size) != mapping_size) {\n    LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n    return IndexError_WriteData;\n  }\n\n  // Write the padding if need\n  if (mapping_padding_size) {\n    std::string padding(mapping_padding_size, '\\0');\n    if (dumper->write(padding.data(), padding.size()) != padding.size()) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper->name().c_str());\n      return IndexError_WriteData;\n    }\n  }\n  return dumper->append(PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID, mapping_size,\n                        mapping_padding_size, 0);\n}\n\nFlatSparseStreamerEntity::Pointer FlatSparseStreamerEntity::clone() const {\n  auto entity = new (std::nothrow) FlatSparseStreamerEntity(\n      stats_, meta_, streamer_meta_, keys_map_lock_, keys_map_,\n      sparse_data_chunks_, sparse_offset_chunks_);\n  return FlatSparseStreamerEntity::Pointer(entity);\n}\n\nint FlatSparseStreamerEntity::add(uint64_t key,\n                                  const std::string &sparse_vector,\n                                  const uint32_t sparse_count) {\n  uint32_t sparse_vector_len = sparse_vector.size();\n\n  sparse_vector_len = AlignSize(sparse_vector_len);\n\n  if (sparse_vector_len > streamer_meta_.data_chunk_size) {\n    LOG_ERROR(\n        \"Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk \"\n        \"size: %u\",\n        sparse_vector_len, streamer_meta_.data_chunk_size);\n    return IndexError_InvalidArgument;\n  }\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  node_id_t local_id = doc_cnt();\n\n  if (ailego_unlikely(local_id >= max_doc_cnt_)) {\n    LOG_ERROR(\"Add vector failed for exceed max doc count: %u\", max_doc_cnt_);\n    return IndexError_IndexFull;\n  }\n\n  // duplicate check\n  if (ailego_unlikely(get_id(key) != kInvalidNodeId)) {\n    LOG_WARN(\"Try to add duplicate key, ignore it\");\n    return IndexError_Duplicate;\n  }\n\n  // get sparse data chunk and offset for write sparse vector\n  Chunk::Pointer sparse_data_chunk;\n  uint32_t sparse_data_chunk_offset = -1U;\n  uint32_t sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;\n  if (sparse_data_chunk_index == -1U ||\n      sparse_data_chunks_[sparse_data_chunk_index]->data_size() +\n              sparse_vector_len >\n          streamer_meta_.data_chunk_size) {\n    if (ailego_unlikely(sparse_data_chunks_.capacity() ==\n                        sparse_data_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      if (sparse_data_chunk_index != -1U) {\n        LOG_ERROR(\n            \"capacity: %zu, chunk used size: %zu, chunk size: %u, \"\n            \"sparse_vector_len: %u\",\n            sparse_data_chunks_.capacity(),\n            sparse_data_chunks_[sparse_data_chunk_index]->data_size(),\n            streamer_meta_.data_chunk_size, sparse_vector_len);\n      }\n      return IndexError_IndexFull;\n    }\n\n    sparse_data_chunk = alloc_new_data_chunk(sparse_data_chunks_.size());\n    if (ailego_unlikely(!sparse_data_chunk)) {\n      LOG_ERROR(\"allocate data chunk failed\");\n      return IndexError_NoMemory;\n    }\n    sparse_data_chunks_.emplace_back(sparse_data_chunk);\n    sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;\n    sparse_data_chunk_offset = 0UL;\n  } else {\n    sparse_data_chunk = sparse_data_chunks_[sparse_data_chunk_index];\n    sparse_data_chunk_offset = sparse_data_chunk->data_size();\n  }\n\n  // write sparse vector\n  if (sparse_vector.size() > 0) {\n    if (ailego_unlikely(write_sparse_vector_data(\n                            sparse_data_chunk_index, sparse_data_chunk_offset,\n                            sparse_vector.data(), sparse_vector.size()) != 0)) {\n      LOG_ERROR(\"write sparse vector failed\");\n      return IndexError_NoMemory;\n    }\n  }\n\n  uint64_t sparse_offset = sparse_data_chunk_index;\n  sparse_offset = (sparse_offset << 32U) + sparse_data_chunk_offset;\n\n  // get sparse offset chunk and offset for write new info\n  Chunk::Pointer sparse_offset_chunk;\n  uint32_t sparse_offset_chunk_offset = -1U;\n  uint32_t sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;\n  if (sparse_offset_chunk_index == -1U ||\n      sparse_offset_chunks_[sparse_offset_chunk_index]->data_size() +\n              offset_size_per_node() >\n          streamer_meta_.offset_chunk_size) {\n    // no space left and need to allocate new offset chunk\n    if (ailego_unlikely(sparse_offset_chunks_.capacity() ==\n                        sparse_offset_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      return IndexError_IndexFull;\n    }\n\n    sparse_offset_chunk = alloc_new_offset_chunk(sparse_offset_chunks_.size());\n    if (ailego_unlikely(!sparse_offset_chunk)) {\n      LOG_ERROR(\"allocate offset chunk failed\");\n      return IndexError_NoMemory;\n    }\n    sparse_offset_chunks_.emplace_back(sparse_offset_chunk);\n    sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;\n    sparse_offset_chunk_offset = 0UL;\n  } else {\n    sparse_offset_chunk = sparse_offset_chunks_[sparse_offset_chunk_index];\n    sparse_offset_chunk_offset = sparse_offset_chunk->data_size();\n  }\n\n  // write offset\n  size_t size = sparse_offset_chunk->write(sparse_offset_chunk_offset,\n                                           &sparse_offset, sizeof(uint64_t));\n  if (ailego_unlikely(size != sizeof(uint64_t))) {\n    LOG_ERROR(\"Chunk write sparse vec offset failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  // write length\n  size =\n      sparse_offset_chunk->write(sparse_offset_chunk_offset + sizeof(uint64_t),\n                                 &sparse_vector_len, sizeof(uint32_t));\n  if (ailego_unlikely(size != sizeof(uint32_t))) {\n    LOG_ERROR(\"Chunk write sparse vec len failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  // write key\n  size = sparse_offset_chunk->write(\n      sparse_offset_chunk_offset + 2 * sizeof(uint64_t), &key,\n      sizeof(uint64_t));\n  if (ailego_unlikely(size != sizeof(uint64_t))) {\n    LOG_ERROR(\"Chunk write key failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  // LOG_INFO(\"Write sparse vector, key=%lu, offset chunk=%u, offset=%u,\n  // len=%u\",\n  //          key, sparse_offset_chunk_index, sparse_offset_chunk_offset,\n  //          offset_size_per_node());\n\n  // LOG_INFO(\"Write sparse vector, key=%lu, data chunk=%u, offset=%u, len=%u\",\n  //          key, sparse_data_chunk_index, sparse_data_chunk_offset,\n  //          sparse_vector_len);\n\n  // resize chunk\n  if (sparse_vector_len > 0) {\n    sparse_data_chunk_offset += sparse_vector_len;\n    if (ailego_unlikely(sparse_data_chunk->resize(sparse_data_chunk_offset) !=\n                        sparse_data_chunk_offset)) {\n      LOG_ERROR(\"Sparse Chunk resize to %u failed\", sparse_data_chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  // persist in keys_map\n  {\n    keys_map_lock_->lock();\n    (*keys_map_)[key] = local_id;\n    keys_map_lock_->unlock();\n  }\n\n  inc_doc_count();\n  inc_total_sparse_count(sparse_count);\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::add_vector_with_id(\n    uint32_t id, const std::string &sparse_vector,\n    const uint32_t sparse_count) {\n  uint32_t sparse_vector_len = sparse_vector.size();\n\n  sparse_vector_len = AlignSize(sparse_vector_len);\n\n  if (sparse_vector_len > streamer_meta_.data_chunk_size) {\n    LOG_ERROR(\n        \"Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk \"\n        \"size: %u\",\n        sparse_vector_len, streamer_meta_.data_chunk_size);\n    return IndexError_InvalidArgument;\n  }\n\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (id >= doc_cnt()) {\n    for (auto i = doc_cnt(); i <= id; i++) {\n      node_id_t local_id = doc_cnt();\n      if (ailego_unlikely(local_id >= max_doc_cnt_)) {\n        LOG_ERROR(\"Add vector failed for exceed max doc count: %u\",\n                  max_doc_cnt_);\n        return IndexError_IndexFull;\n      }\n      uint32_t sparse_data_chunk_index, sparse_data_chunk_offset,\n          sparse_offset_chunk_index, sparse_offset_chunk_offset;\n      if (i < id) {\n        write_sparse_vector_to_chunk(\"\", 0, sparse_data_chunk_index,\n                                     sparse_data_chunk_offset);\n      } else {\n        write_sparse_vector_to_chunk(sparse_vector, sparse_vector_len,\n                                     sparse_data_chunk_index,\n                                     sparse_data_chunk_offset);\n      }\n      uint64_t sparse_offset =\n          ((uint64_t)sparse_data_chunk_index << 32U) + sparse_data_chunk_offset;\n      get_new_sparse_offset_chunk(sparse_offset_chunk_index,\n                                  sparse_offset_chunk_offset);\n      uint64_t written_key = kInvalidKey;\n      if (i == id) {\n        written_key = i;\n      }\n      write_sparse_offset_to_chunk(sparse_offset_chunk_index,\n                                   sparse_offset_chunk_offset, sparse_offset,\n                                   sparse_vector_len, written_key);\n      {\n        keys_map_lock_->lock();\n        (*keys_map_)[i] = written_key;\n        keys_map_lock_->unlock();\n      }\n      inc_doc_count();\n    }\n  } else {\n    uint32_t sparse_data_chunk_index, sparse_data_chunk_offset;\n    write_sparse_vector_to_chunk(sparse_vector, sparse_vector_len,\n                                 sparse_data_chunk_index,\n                                 sparse_data_chunk_offset);\n    uint64_t sparse_offset =\n        ((uint64_t)sparse_data_chunk_index << 32U) + sparse_data_chunk_offset;\n    uint32_t sparse_offset_chunk_index =\n        id / get_offset_info_number_per_chunk();\n    uint32_t sparse_offset_chunk_offset =\n        id % get_offset_info_number_per_chunk() * offset_size_per_node();\n    write_sparse_offset_to_chunk(sparse_offset_chunk_index,\n                                 sparse_offset_chunk_offset, sparse_offset,\n                                 sparse_vector_len, id);\n    {\n      keys_map_lock_->lock();\n      (*keys_map_)[id] = id;\n      keys_map_lock_->unlock();\n    }\n  }\n  inc_total_sparse_count(sparse_count);\n  return 0;\n}\n\nint FlatSparseStreamerEntity::write_sparse_vector_to_chunk(\n    const std::string &sparse_vector, const uint32_t sparse_vector_len,\n    uint32_t &sparse_data_chunk_index, uint32_t &sparse_data_chunk_offset) {\n  // get sparse data chunk and offset for write sparse vector\n  Chunk::Pointer sparse_data_chunk;\n  sparse_data_chunk_offset = -1U;\n  sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;\n  if (sparse_data_chunk_index == -1U ||\n      sparse_data_chunks_[sparse_data_chunk_index]->data_size() +\n              sparse_vector_len >\n          streamer_meta_.data_chunk_size) {\n    if (ailego_unlikely(sparse_data_chunks_.capacity() ==\n                        sparse_data_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      if (sparse_data_chunk_index != -1U) {\n        LOG_ERROR(\n            \"capacity: %zu, chunk used size: %zu, chunk size: %u, \"\n            \"sparse_vector_len: %u\",\n            sparse_data_chunks_.capacity(),\n            sparse_data_chunks_[sparse_data_chunk_index]->data_size(),\n            streamer_meta_.data_chunk_size, sparse_vector_len);\n      }\n      return IndexError_IndexFull;\n    }\n\n    sparse_data_chunk = alloc_new_data_chunk(sparse_data_chunks_.size());\n    if (ailego_unlikely(!sparse_data_chunk)) {\n      LOG_ERROR(\"allocate data chunk failed\");\n      return IndexError_NoMemory;\n    }\n    sparse_data_chunks_.emplace_back(sparse_data_chunk);\n    sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;\n    sparse_data_chunk_offset = 0UL;\n  } else {\n    sparse_data_chunk = sparse_data_chunks_[sparse_data_chunk_index];\n    sparse_data_chunk_offset = sparse_data_chunk->data_size();\n  }\n\n  // write sparse vector\n  if (sparse_vector.size() > 0) {\n    if (ailego_unlikely(write_sparse_vector_data(\n                            sparse_data_chunk_index, sparse_data_chunk_offset,\n                            sparse_vector.data(), sparse_vector.size()) != 0)) {\n      LOG_ERROR(\"write sparse vector failed\");\n      return IndexError_NoMemory;\n    }\n  }\n\n  // resize chunk\n  if (sparse_vector_len > 0) {\n    uint32_t sparse_data_chunk_size =\n        sparse_data_chunk_offset + sparse_vector_len;\n    if (ailego_unlikely(sparse_data_chunk->resize(sparse_data_chunk_size) !=\n                        sparse_data_chunk_size)) {\n      LOG_ERROR(\"Sparse Chunk resize to %u failed\", sparse_data_chunk_size);\n      return IndexError_Runtime;\n    }\n  }\n  return 0;\n}\n\nint FlatSparseStreamerEntity::get_new_sparse_offset_chunk(\n    uint32_t &sparse_offset_chunk_index, uint32_t &sparse_offset_chunk_offset) {\n  // get sparse offset chunk and offset for write new info\n  Chunk::Pointer sparse_offset_chunk;\n  sparse_offset_chunk_offset = -1U;\n  sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;\n  if (sparse_offset_chunk_index == -1U ||\n      sparse_offset_chunks_[sparse_offset_chunk_index]->data_size() +\n              offset_size_per_node() >\n          streamer_meta_.offset_chunk_size) {\n    // no space left and need to allocate new offset chunk\n    if (ailego_unlikely(sparse_offset_chunks_.capacity() ==\n                        sparse_offset_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      return IndexError_IndexFull;\n    }\n\n    sparse_offset_chunk = alloc_new_offset_chunk(sparse_offset_chunks_.size());\n    if (ailego_unlikely(!sparse_offset_chunk)) {\n      LOG_ERROR(\"allocate offset chunk failed\");\n      return IndexError_NoMemory;\n    }\n    sparse_offset_chunks_.emplace_back(sparse_offset_chunk);\n    sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;\n    sparse_offset_chunk_offset = 0UL;\n  } else {\n    sparse_offset_chunk = sparse_offset_chunks_[sparse_offset_chunk_index];\n    sparse_offset_chunk_offset = sparse_offset_chunk->data_size();\n  }\n  return 0;\n}\n\nint FlatSparseStreamerEntity::write_sparse_offset_to_chunk(\n    const uint32_t sparse_offset_chunk_index,\n    const uint32_t sparse_offset_chunk_offset, const uint64_t sparse_offset,\n    const uint32_t sparse_vector_len, const uint64_t node_id) {\n  // write offset\n  Chunk::Pointer sparse_offset_chunk =\n      sparse_offset_chunks_[sparse_offset_chunk_index];\n  size_t size = sparse_offset_chunk->write(sparse_offset_chunk_offset,\n                                           &sparse_offset, sizeof(uint64_t));\n  if (ailego_unlikely(size != sizeof(uint64_t))) {\n    LOG_ERROR(\"Chunk write sparse vec offset failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  // write length\n  size =\n      sparse_offset_chunk->write(sparse_offset_chunk_offset + sizeof(uint64_t),\n                                 &sparse_vector_len, sizeof(uint32_t));\n  if (ailego_unlikely(size != sizeof(uint32_t))) {\n    LOG_ERROR(\"Chunk write sparse vec len failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  // write key\n  size = sparse_offset_chunk->write(\n      sparse_offset_chunk_offset + 2 * sizeof(uint64_t), &node_id,\n      sizeof(uint64_t));\n  if (ailego_unlikely(size != sizeof(uint64_t))) {\n    LOG_ERROR(\"Chunk write key failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n  return 0;\n}\n\nuint64_t FlatSparseStreamerEntity::get_key(node_id_t node_id) const {\n  uint32_t offset_chunk_index = node_id / get_offset_info_number_per_chunk();\n  uint32_t offset_chunk_key_offset =\n      node_id % get_offset_info_number_per_chunk() * offset_size_per_node() +\n      2 * sizeof(uint64_t);\n\n  IndexStorage::MemoryBlock block;\n  if (ailego_unlikely(sparse_offset_chunks_[offset_chunk_index]->read(\n                          offset_chunk_key_offset, block, sizeof(uint64_t)) !=\n                      sizeof(uint64_t))) {\n    LOG_ERROR(\"Read key failed, offset=%u, node_id=%u\", offset_chunk_key_offset,\n              node_id);\n    return kInvalidKey;\n  };\n\n  return *reinterpret_cast<const uint64_t *>(block.data());\n}\n\nint FlatSparseStreamerEntity::get_sparse_vector_ptr_by_id(\n    node_id_t node_id, const void **sparse_vector_ptr,\n    uint32_t *sparse_vector_len_ptr) const {\n  uint32_t offset_chunk_index = node_id / get_offset_info_number_per_chunk();\n  uint32_t offset_chunk_offset =\n      node_id % get_offset_info_number_per_chunk() * offset_size_per_node();\n\n  // LOG_DEBUG(\"Read sparse vector, offset chunk=%u, offset=%u, len=%u\",\n  //           offset_chunk_index, offset_chunk_offset, offset_size_per_node());\n\n  auto offset_chunk = sparse_offset_chunks_[offset_chunk_index];\n\n  const void *offset_info = nullptr;\n  size_t read_len = offset_chunk->read(offset_chunk_offset, &offset_info,\n                                       offset_size_per_node());\n  if (ailego_unlikely(read_len != offset_size_per_node())) {\n    LOG_ERROR(\"Read offset info failed, offset=%u, read_len=%zu, expect=%u\",\n              offset_chunk_offset, read_len, offset_size_per_node());\n    return IndexError_ReadData;\n  };\n\n  // sparse offset\n  uint64_t sparse_offset = *(uint64_t *)offset_info;\n  uint32_t sparse_vector_len =\n      *(uint32_t *)((uint8_t *)offset_info + sizeof(uint64_t));\n\n  uint32_t sparse_data_chunk_index =\n      static_cast<uint32_t>((sparse_offset >> 32) & 0xFFFFFFFF);\n  uint32_t sparse_data_chunk_offset =\n      static_cast<uint32_t>(sparse_offset & 0xFFFFFFFF);\n\n  if (sparse_vector_len > 0) {\n    const void *sparse_data = get_sparse_vector_data(\n        sparse_data_chunk_index, sparse_data_chunk_offset, sparse_vector_len);\n    if (ailego_unlikely(sparse_data == nullptr)) {\n      LOG_ERROR(\"Get nullptr sparse, offset=%zu, len=%u\", (size_t)sparse_offset,\n                sparse_vector_len);\n\n      return IndexError_ReadData;\n    }\n    *sparse_vector_ptr = sparse_data;\n    *sparse_vector_len_ptr = sparse_vector_len;\n  }\n\n  // LOG_DEBUG(\"Read sparse vector, data chunk=%u, offset=%u, len=%u\",\n  //           sparse_data_chunk_index, sparse_data_chunk_offset,\n  //           sparse_vector_len);\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::get_sparse_vector_ptr_by_id(\n    node_id_t node_id, IndexStorage::MemoryBlock &sparse_vector_block,\n    uint32_t *sparse_vector_len_ptr) const {\n  uint32_t offset_chunk_index = node_id / get_offset_info_number_per_chunk();\n  uint32_t offset_chunk_offset =\n      node_id % get_offset_info_number_per_chunk() * offset_size_per_node();\n\n  // LOG_DEBUG(\"Read sparse vector, offset chunk=%u, offset=%u, len=%u\",\n  //           offset_chunk_index, offset_chunk_offset, offset_size_per_node());\n\n  auto offset_chunk = sparse_offset_chunks_[offset_chunk_index];\n\n  const void *offset_info = nullptr;\n  IndexStorage::MemoryBlock offset_info_block;\n  size_t read_len = offset_chunk->read(offset_chunk_offset, offset_info_block,\n                                       offset_size_per_node());\n  if (ailego_unlikely(read_len != offset_size_per_node())) {\n    LOG_ERROR(\"Read offset info failed, offset=%u, read_len=%zu, expect=%u\",\n              offset_chunk_offset, read_len, offset_size_per_node());\n    return IndexError_ReadData;\n  };\n  offset_info = offset_info_block.data();\n\n  // sparse offset\n  uint64_t sparse_offset = *(uint64_t *)offset_info;\n  uint32_t sparse_vector_len =\n      *(uint32_t *)((uint8_t *)offset_info + sizeof(uint64_t));\n\n  uint32_t sparse_data_chunk_index =\n      static_cast<uint32_t>((sparse_offset >> 32) & 0xFFFFFFFF);\n  uint32_t sparse_data_chunk_offset =\n      static_cast<uint32_t>(sparse_offset & 0xFFFFFFFF);\n\n  if (sparse_vector_len > 0) {\n    get_sparse_vector_data(sparse_data_chunk_index, sparse_data_chunk_offset,\n                           sparse_vector_len, sparse_vector_block);\n    if (ailego_unlikely(sparse_vector_block.data() == nullptr)) {\n      LOG_ERROR(\"Get nullptr sparse, offset=%zu, len=%u\", (size_t)sparse_offset,\n                sparse_vector_len);\n\n      return IndexError_ReadData;\n    }\n    *sparse_vector_len_ptr = sparse_vector_len;\n  }\n\n  return 0;\n}\n\nint FlatSparseStreamerEntity::write_sparse_vector_data(uint32_t chunk_index,\n                                                       uint64_t offset,\n                                                       const void *data,\n                                                       uint32_t length) {\n  auto size = sparse_data_chunks_[chunk_index]->write(offset, data, length);\n  if (size != length) {\n    LOG_ERROR(\n        \"write sparse vector data failed: chunk_index=%u, offset=%zu, \"\n        \"length=%u, size=%zu, chunk_data_size=%zu\",\n        chunk_index, (size_t)offset, length, size,\n        sparse_data_chunks_[chunk_index]->data_size());\n    return IndexError_WriteData;\n  }\n  // LOG_DEBUG(\n  //     \"write_sparse_vector_data: chunk_index=%u, offset=%lu, length=%u, \"\n  //     \"data=%p\",\n  //     chunk_index, offset, length, data);\n  return 0;\n}\n\nconst void *FlatSparseStreamerEntity::get_sparse_vector_data(\n    uint32_t chunk_index, uint64_t offset, uint32_t length) const {\n  const void *data;\n  auto size = sparse_data_chunks_[chunk_index]->read(offset, &data, length);\n  if (size != length) {\n    LOG_ERROR(\n        \"read sparse vector data failed: chunk_index=%u, offset=%zu, \"\n        \"length=%u, size=%zu\",\n        chunk_index, (size_t)offset, length, size);\n    return nullptr;\n  }\n  // LOG_DEBUG(\n  //     \"get_sparse_vector_data: chunk_index=%u, offset=%lu, length=%u, \"\n  //     \"data=%p\",\n  //     chunk_index, offset, length, data);\n  return data;\n}\n\nint FlatSparseStreamerEntity::get_sparse_vector_data(\n    uint32_t chunk_index, uint64_t offset, uint32_t length,\n    IndexStorage::MemoryBlock &block) const {\n  auto size = sparse_data_chunks_[chunk_index]->read(offset, block, length);\n  if (size != length) {\n    LOG_ERROR(\n        \"read sparse vector data failed: chunk_index=%u, offset=%zu, \"\n        \"length=%u, size=%zu\",\n        chunk_index, (size_t)offset, length, size);\n    return IndexError_ReadData;\n  }\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_streamer_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <map>\n#include <memory>\n#include <string>\n#include <vector>\n#include <ailego/parallel/lock.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/framework/index_streamer.h>\n#include \"flat_sparse_entity.h\"\n#include \"flat_sparse_index_format.h\"\n#include \"flat_sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n\n/*! Flat Sparse Streamer Entity\n */\nclass FlatSparseStreamerEntity : public FlatSparseEntity {\n public:\n  typedef std::shared_ptr<FlatSparseStreamerEntity> Pointer;\n\n  using Chunk = IndexStorage::Segment;\n\n  //! Constructor\n  explicit FlatSparseStreamerEntity(IndexStreamer::Stats &stats);\n\n  //! Destructor\n  virtual ~FlatSparseStreamerEntity() = default;\n\n  //! Disable them\n  FlatSparseStreamerEntity(const FlatSparseStreamerEntity &) = delete;\n  FlatSparseStreamerEntity &operator=(const FlatSparseStreamerEntity &) =\n      delete;\n\n  //! Open the entity with storage\n  int open(IndexStorage::Pointer storage, const IndexMeta &meta);\n\n  //! Close the entity\n  int close();\n\n  //! Flush linear index to storage\n  int flush(uint64_t checkpoint);\n\n  //! Dump index by dumper\n  int dump(const IndexDumper::Pointer &dumper);\n\n  //! Add sparse vector to linear index\n  int add(uint64_t key, const std::string &sparse_vector,\n          const uint32_t sparse_count);\n\n  //! Add sparse vector to linear index with id\n  int add_vector_with_id(uint32_t id, const std::string &sparse_vector,\n                         uint32_t sparse_count);\n\n  //! Clone entity\n  FlatSparseStreamerEntity::Pointer clone() const;\n\n  int get_index_sparse_meta(IndexMeta *meta) const {\n    return IndexHelper::DeserializeFromStorage(storage_.get(), meta);\n  }\n\n  int set_index_sparse_meta(const IndexMeta &meta) const {\n    return IndexHelper::SerializeToStorage(meta, storage_.get());\n  }\n\n public:\n  inline uint32_t doc_cnt() const override {\n    return meta_.doc_cnt;\n  }\n\n  inline uint32_t total_sparse_count() const override {\n    return meta_.total_sparse_count;\n  }\n\n  size_t sparse_unit_size() const override {\n    return sparse_unit_size_;\n  }\n\n  inline node_id_t get_id(uint64_t key) const override {\n    keys_map_lock_->lock_shared();\n    auto it = keys_map_->find(key);\n    keys_map_lock_->unlock_shared();\n    return it == keys_map_->end() ? kInvalidNodeId : it->second;\n  }\n\n  uint64_t get_key(node_id_t node_id) const override;\n\n  int get_sparse_vector_ptr_by_id(node_id_t id, const void **sparse_vector,\n                                  uint32_t *sparse_vector_len) const override;\n  int get_sparse_vector_ptr_by_id(\n      const node_id_t id, IndexStorage::MemoryBlock &sparse_vector_block,\n      uint32_t *sparse_vector_len) const override;\n\n  float get_search_distance(const std::string &vector,\n                            node_id_t target_node_id) const override {\n    float dist;\n    const void *target_vector;\n    uint32_t target_vector_len;\n    get_sparse_vector_ptr_by_id(target_node_id, &target_vector,\n                                &target_vector_len);\n    search_sparse_distance_(vector.c_str(), target_vector, &dist);\n    return dist;\n  }\n\n private:\n  void inc_doc_count() {\n    meta_.doc_cnt++;\n  }\n  void inc_total_sparse_count(uint32_t count) {\n    meta_.total_sparse_count += count;\n  }\n\n  int init_metric(const IndexMeta &meta);\n\n  int init_storage(IndexStorage::Pointer storage, const IndexMeta &meta);\n\n  int load_storage(IndexStorage::Pointer storage, const IndexMeta &meta);\n\n  static inline size_t AlignSize(size_t size) {\n    return (size + 0x1F) & (~0x1F);\n  }\n\n  inline uint32_t offset_size_per_node() const {\n    return 3 * sizeof(uint64_t);\n  }\n\n  inline uint32_t doc_cnt_per_offset_chunk() const {\n    return streamer_meta_.offset_chunk_size / offset_size_per_node();\n  }\n\n  Chunk::Pointer alloc_new_offset_chunk(uint32_t chunk_id) {\n    std::string segment_id = ailego::StringHelper::Concat(\n        PARAM_FLAT_SPARSE_OFFSET_SEG_ID_PREFIX, chunk_id);\n    // LOG_INFO(\"Alloc new offset chunk %s\", segment_id.c_str());\n    return alloc_new_chunk(segment_id, streamer_meta_.offset_chunk_size);\n  }\n\n  Chunk::Pointer alloc_new_data_chunk(uint32_t chunk_id) {\n    std::string segment_id = ailego::StringHelper::Concat(\n        PARAM_FLAT_SPARSE_DATA_SEG_ID_PREFIX, chunk_id);\n    // LOG_INFO(\"Alloc new data chunk %s\", segment_id.c_str());\n    return alloc_new_chunk(segment_id, streamer_meta_.data_chunk_size);\n  }\n\n  Chunk::Pointer alloc_new_chunk(const std::string &segment_id, uint32_t size) {\n    int ret = storage_->append(segment_id, size);\n    if (ailego_unlikely(ret != 0)) {\n      return nullptr;\n    }\n    *stats_.mutable_index_size() += size;\n    return storage_->get(segment_id);\n  }\n\n  inline uint32_t get_offset_info_number_per_chunk() const {\n    return streamer_meta_.offset_chunk_size / offset_size_per_node();\n  }\n\n  int write_sparse_vector_to_chunk(const std::string &sparse_vector,\n                                   const uint32_t sparse_vector_len,\n                                   uint32_t &sparse_data_chunk_index,\n                                   uint32_t &sparse_data_chunk_offset);\n\n  int get_new_sparse_offset_chunk(uint32_t &sparse_offset_chunk_index,\n                                  uint32_t &sparse_offset_chunk_offset);\n\n  int write_sparse_offset_to_chunk(const uint32_t sparse_offset_chunk_index,\n                                   const uint32_t sparse_offset_chunk_offset,\n                                   const uint64_t sparse_offset,\n                                   const uint32_t sparse_vector_len,\n                                   const uint64_t node_id);\n\n  int write_sparse_vector_data(uint32_t chunk_index, uint64_t offset,\n                               const void *data, uint32_t length);\n\n  const void *get_sparse_vector_data(uint32_t chunk_index, uint64_t offset,\n                                     uint32_t length) const;\n\n  int get_sparse_vector_data(uint32_t chunk_index, uint64_t offset,\n                             uint32_t length,\n                             IndexStorage::MemoryBlock &block) const;\n\n  int dump_sparse_vector_data(const void *data, uint32_t length,\n                              IndexDumper *dumper);\n\n  int dump_meta(IndexDumper *dumper);\n\n  int dump_index_meta(IndexDumper *dumper);\n\n  int dump_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper);\n\n  int dump_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper);\n\n  int dump_offset_data(IndexDumper *dumper);\n\n\n private:\n  FlatSparseStreamerEntity(\n      IndexStreamer::Stats &stats, const FlatSparseMeta &meta,\n      const FlatSparseStreamerMeta &streamer_meta,\n      std::shared_ptr<ailego::SharedMutex> keys_map_lock,\n      std::shared_ptr<std::map<uint64_t, node_id_t>> keys_map,\n      std::vector<Chunk::Pointer> sparse_data_chunks,\n      std::vector<Chunk::Pointer> sparse_offset_chunks)\n      : stats_(stats),\n        meta_(meta),\n        streamer_meta_(streamer_meta),\n        keys_map_lock_(keys_map_lock),\n        keys_map_(keys_map),\n        sparse_data_chunks_(std::move(sparse_data_chunks)),\n        sparse_offset_chunks_(std::move(sparse_offset_chunks)) {}\n\n private:\n  IndexStorage::Pointer storage_{};\n  IndexStreamer::Stats &stats_;\n\n  // meta\n  FlatSparseMeta meta_;\n  FlatSparseStreamerMeta streamer_meta_;\n\n  // metric\n  IndexMetric::Pointer metric_{};\n  IndexMetric::MatrixSparseDistance search_sparse_distance_{};\n\n  std::mutex mutex_{};\n\n  // keys map\n  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};\n  std::shared_ptr<std::map<uint64_t, node_id_t>> keys_map_{};\n\n  // chunks\n  mutable std::vector<Chunk::Pointer> sparse_data_chunks_{};\n  mutable std::vector<Chunk::Pointer> sparse_offset_chunks_{};\n\n  // config\n  uint32_t max_doc_cnt_{1 << 24U};  // 16 million\n  uint32_t max_data_chunk_cnt_{\n      1 << 10U};  // 1024, default single_data_chunk_size = 8M,\n                  // default_total_max = 1024 * 8M = 8G\n\n  uint64_t dump_size_{0U};\n  size_t sparse_unit_size_{0U};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/flat_sparse/flat_sparse_utility.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\nstatic constexpr uint32_t PARAM_FLAT_SPARSE_MAX_DIM_SIZE = 16384;\nstatic const std::string PARAM_FLAT_SPARSE_META_SEG_ID =\n    \"bruteforce_sparse_meta\";\n\n// streamer\nstatic const std::string PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID =\n    \"bruteforce_sparse_streamer_meta\";\nstatic const std::string PARAM_FLAT_SPARSE_OFFSET_SEG_ID_PREFIX =\n    \"bruteforce_sparse_streamer_offset_\";\nstatic const std::string PARAM_FLAT_SPARSE_DATA_SEG_ID_PREFIX =\n    \"bruteforce_sparse_streamer_data_\";\n\n// searcher\nstatic const std::string PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID =\n    \"bruteforce_sparse_searcher_offset_segment\";\nstatic const std::string PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID =\n    \"bruteforce_sparse_searcher_data_segment\";\nstatic const std::string PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID =\n    \"bruteforce_sparse_searcher_keys_segment\";\nstatic const std::string PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID =\n    \"bruteforce_sparse_searcher_mapping_segment\";\n\n// streamer\nstatic const std::string PARAM_FLAT_SPARSE_STREAMER_OFFSET_CHUNK_SIZE(\n    \"proxima.bruteforce.sparse_streamer.offset_chunk_size\");\n\nstatic const std::string PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE(\n    \"proxima.bruteforce.sparse_streamer.data_chunk_size\");\n\nstatic const std::string PARAM_FLAT_SPARSE_STREAMER_MAX_DOC_CNT(\n    \"proxima.bruteforce.sparse_streamer.max_doc_cnt\");\n\nstatic const std::string PARAM_FLAT_SPARSE_STREAMER_MAX_DATA_CHUNK_CNT(\n    \"proxima.bruteforce.sparse_streamer.max_data_chunk_cnt\");\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_knn_hnsw \n    STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS core_framework sparsehash\n    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_algorithm.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_algorithm.h\"\n#include <chrono>\n#include <iostream>\n#include <vector>\n#include <ailego/internal/cpu_features.h>\n\nnamespace zvec {\nnamespace core {\n\nHnswAlgorithm::HnswAlgorithm(HnswEntity &entity)\n    : entity_(entity),\n      mt_(std::chrono::system_clock::now().time_since_epoch().count()),\n      lock_pool_(kLockCnt) {}\n\nint HnswAlgorithm::cleanup() {\n  return 0;\n}\n\nint HnswAlgorithm::add_node(node_id_t id, level_t level, HnswContext *ctx) {\n  spin_lock_.lock();\n\n  // std::cout << \"id: \" << id << \", level: \" << level << std::endl;\n\n  auto cur_max_level = entity_.cur_max_level();\n  auto entry_point = entity_.entry_point();\n  if (ailego_unlikely(entry_point == kInvalidNodeId)) {\n    entity_.update_ep_and_level(id, level);\n    spin_lock_.unlock();\n    return 0;\n  }\n  spin_lock_.unlock();\n\n  if (ailego_unlikely(level > cur_max_level)) {\n    mutex_.lock();\n    // re-check max level\n    cur_max_level = entity_.cur_max_level();\n    entry_point = entity_.entry_point();\n    if (level <= cur_max_level) {\n      mutex_.unlock();\n    }\n  }\n\n  level_t cur_level = cur_max_level;\n  dist_t dist = ctx->dist_calculator()(entry_point);\n  for (; cur_level > level; --cur_level) {\n    select_entry_point(cur_level, &entry_point, &dist, ctx);\n  }\n\n  for (; cur_level >= 0; --cur_level) {\n    search_neighbors(cur_level, &entry_point, &dist, ctx->level_topk(cur_level),\n                     ctx);\n  }\n\n  // add neighbors from down level to top level, to avoid upper level visible\n  // to knn_search but the under layer level not ready\n  for (cur_level = 0; cur_level <= level; ++cur_level) {\n    add_neighbors(id, cur_level, ctx->level_topk(cur_level), ctx);\n    ctx->level_topk(cur_level).clear();\n  }\n\n  if (ailego_unlikely(level > cur_max_level)) {\n    spin_lock_.lock();\n    entity_.update_ep_and_level(id, level);\n    spin_lock_.unlock();\n    mutex_.unlock();\n  }\n\n  return 0;\n}\n\nint HnswAlgorithm::search(HnswContext *ctx) const {\n  spin_lock_.lock();\n  auto maxLevel = entity_.cur_max_level();\n  auto entry_point = entity_.entry_point();\n  spin_lock_.unlock();\n\n  if (ailego_unlikely(entry_point == kInvalidNodeId)) {\n    return 0;\n  }\n\n  dist_t dist = ctx->dist_calculator().dist(entry_point);\n  for (level_t cur_level = maxLevel; cur_level >= 1; --cur_level) {\n    select_entry_point(cur_level, &entry_point, &dist, ctx);\n  }\n\n  auto &topk_heap = ctx->topk_heap();\n  topk_heap.clear();\n  search_neighbors(0, &entry_point, &dist, topk_heap, ctx);\n\n  if (ctx->group_by_search()) {\n    expand_neighbors_by_group(topk_heap, ctx);\n  }\n\n  return 0;\n}\n\n//! select_entry_point on hnsw level, ef = 1\nvoid HnswAlgorithm::select_entry_point(level_t level, node_id_t *entry_point,\n                                       dist_t *dist, HnswContext *ctx) const {\n  auto &entity = ctx->get_entity();\n  HnswDistCalculator &dc = ctx->dist_calculator();\n  while (true) {\n    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n    uint32_t size = neighbors.size();\n    if (size == 0) {\n      break;\n    }\n\n    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;\n    int ret = entity.get_vector(&neighbors[0], size, neighbor_vec_blocks);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_vector())++;\n    }\n    if (ailego_unlikely(ret != 0)) {\n      break;\n    }\n\n    bool find_closer = false;\n\n    std::vector<float> dists(size);\n    std::vector<const void *> neighbor_vecs(size);\n    for (uint32_t i = 0; i < size; ++i) {\n      neighbor_vecs[i] = neighbor_vec_blocks[i].data();\n    }\n\n    dc.batch_dist(neighbor_vecs.data(), size, dists.data());\n\n    for (uint32_t i = 0; i < size; ++i) {\n      dist_t cur_dist = dists[i];\n\n      if (cur_dist < *dist) {\n        *entry_point = neighbors[i];\n        *dist = cur_dist;\n        find_closer = true;\n      }\n    }\n\n    if (!find_closer) {\n      break;\n    }\n  }\n\n  return;\n}\n\nvoid HnswAlgorithm::add_neighbors(node_id_t id, level_t level,\n                                  TopkHeap &topk_heap, HnswContext *ctx) {\n  if (ailego_unlikely(topk_heap.size() == 0)) {\n    return;\n  }\n\n  HnswDistCalculator &dc = ctx->dist_calculator();\n\n  update_neighbors(dc, id, level, topk_heap);\n\n  // reverse update neighbors\n  for (size_t i = 0; i < topk_heap.size(); ++i) {\n    reverse_update_neighbors(dc, topk_heap[i].first, level, id,\n                             topk_heap[i].second, ctx->update_heap());\n  }\n\n  return;\n}\n\nvoid HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,\n                                     dist_t *dist, TopkHeap &topk,\n                                     HnswContext *ctx) const {\n  const auto &entity = ctx->get_entity();\n  HnswDistCalculator &dc = ctx->dist_calculator();\n  VisitFilter &visit = ctx->visit_filter();\n  CandidateHeap &candidates = ctx->candidates();\n  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };\n  if (ctx->filter().is_valid()) {\n    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };\n  }\n\n  candidates.clear();\n  visit.clear();\n  visit.set_visited(*entry_point);\n  if (!filter(*entry_point)) {\n    topk.emplace(*entry_point, *dist);\n  }\n\n  candidates.emplace(*entry_point, *dist);\n  while (!candidates.empty() && !ctx->reach_scan_limit()) {\n    auto top = candidates.begin();\n    node_id_t main_node = top->first;\n    dist_t main_dist = top->second;\n\n    if (topk.full() && main_dist > topk[0].second) {\n      break;\n    }\n\n    candidates.pop();\n    const Neighbors neighbors = entity.get_neighbors(level, main_node);\n    ailego_prefetch(neighbors.data);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n\n    std::vector<node_id_t> neighbor_ids(neighbors.size());\n    uint32_t size = 0;\n    for (uint32_t i = 0; i < neighbors.size(); ++i) {\n      node_id_t node = neighbors[i];\n      if (visit.visited(node)) {\n        if (ailego_unlikely(ctx->debugging())) {\n          (*ctx->mutable_stats_visit_dup_cnt())++;\n        }\n        continue;\n      }\n      visit.set_visited(node);\n      neighbor_ids[size++] = node;\n    }\n    if (size == 0) {\n      continue;\n    }\n\n    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;\n    int ret = entity.get_vector(neighbor_ids.data(), size, neighbor_vec_blocks);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_vector())++;\n    }\n    if (ailego_unlikely(ret != 0)) {\n      break;\n    }\n\n    // do prefetch\n    static constexpr node_id_t BATCH_SIZE = 12;\n    static constexpr node_id_t PREFETCH_STEP = 2;\n    for (uint32_t i = 0; i < std::min(BATCH_SIZE * PREFETCH_STEP, size); ++i) {\n      ailego_prefetch(neighbor_vec_blocks[i].data());\n    }\n    // done\n\n    std::vector<float> dists(size);\n    std::vector<const void *> neighbor_vecs(size);\n\n    for (uint32_t i = 0; i < size; ++i) {\n      neighbor_vecs[i] = neighbor_vec_blocks[i].data();\n    }\n\n    dc.batch_dist(neighbor_vecs.data(), size, dists.data());\n\n    for (uint32_t i = 0; i < size; ++i) {\n      node_id_t node = neighbor_ids[i];\n      dist_t cur_dist = dists[i];\n\n      if ((!topk.full()) || cur_dist < topk[0].second) {\n        candidates.emplace(node, cur_dist);\n        // update entry_point for next level scan\n        if (cur_dist < *dist) {\n          *entry_point = node;\n          *dist = cur_dist;\n        }\n        if (!filter(node)) {\n          topk.emplace(node, cur_dist);\n        }\n      }  // end if\n    }  // end for\n  }  // while\n\n  return;\n}\n\nvoid HnswAlgorithm::expand_neighbors_by_group(TopkHeap &topk,\n                                              HnswContext *ctx) const {\n  if (!ctx->group_by().is_valid()) {\n    return;\n  }\n\n  const auto &entity = ctx->get_entity();\n  std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n    return ctx->group_by()(entity.get_key(id));\n  };\n\n  // devide into groups\n  std::map<std::string, TopkHeap> &group_topk_heaps = ctx->group_topk_heaps();\n  for (uint32_t i = 0; i < topk.size(); ++i) {\n    node_id_t id = topk[i].first;\n    auto score = topk[i].second;\n\n    std::string group_id = group_by(id);\n\n    auto &topk_heap = group_topk_heaps[group_id];\n    if (topk_heap.empty()) {\n      topk_heap.limit(ctx->group_topk());\n    }\n    topk_heap.emplace_back(id, score);\n  }\n\n  // stage 2, expand to reach group num as possible\n  if (group_topk_heaps.size() < ctx->group_num()) {\n    VisitFilter &visit = ctx->visit_filter();\n    CandidateHeap &candidates = ctx->candidates();\n    HnswDistCalculator &dc = ctx->dist_calculator();\n\n    std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };\n    if (ctx->filter().is_valid()) {\n      filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };\n    }\n\n    // refill to get enough groups\n    candidates.clear();\n    visit.clear();\n    for (uint32_t i = 0; i < topk.size(); ++i) {\n      node_id_t id = topk[i].first;\n      float score = topk[i].second;\n\n      visit.set_visited(id);\n      candidates.emplace_back(id, score);\n    }\n\n    // do expand\n    while (!candidates.empty() && !ctx->reach_scan_limit()) {\n      auto top = candidates.begin();\n      node_id_t main_node = top->first;\n\n      candidates.pop();\n      const Neighbors neighbors = entity.get_neighbors(0, main_node);\n      if (ailego_unlikely(ctx->debugging())) {\n        (*ctx->mutable_stats_get_neighbors())++;\n      }\n\n      std::vector<node_id_t> neighbor_ids(neighbors.size());\n      uint32_t size = 0;\n      for (uint32_t i = 0; i < neighbors.size(); ++i) {\n        node_id_t node = neighbors[i];\n        if (visit.visited(node)) {\n          if (ailego_unlikely(ctx->debugging())) {\n            (*ctx->mutable_stats_visit_dup_cnt())++;\n          }\n          continue;\n        }\n        visit.set_visited(node);\n        neighbor_ids[size++] = node;\n      }\n      if (size == 0) {\n        continue;\n      }\n\n      std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;\n      int ret =\n          entity.get_vector(neighbor_ids.data(), size, neighbor_vec_blocks);\n      if (ailego_unlikely(ctx->debugging())) {\n        (*ctx->mutable_stats_get_vector())++;\n      }\n      if (ailego_unlikely(ret != 0)) {\n        break;\n      }\n\n      static constexpr node_id_t PREFETCH_STEP = 2;\n      for (uint32_t i = 0; i < size; ++i) {\n        node_id_t node = neighbor_ids[i];\n        node_id_t prefetch_id = i + PREFETCH_STEP;\n        if (prefetch_id < size) {\n          ailego_prefetch(neighbor_vec_blocks[prefetch_id].data());\n        }\n        dist_t cur_dist = dc.dist(neighbor_vec_blocks[i].data());\n\n        if (!filter(node)) {\n          std::string group_id = group_by(node);\n\n          auto &topk_heap = group_topk_heaps[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(node, cur_dist);\n\n          if (group_topk_heaps.size() >= ctx->group_num()) {\n            break;\n          }\n        }\n\n        candidates.emplace(node, cur_dist);\n      }  // end for\n    }  // end while\n  }  // end if\n}\n\nvoid HnswAlgorithm::update_neighbors(HnswDistCalculator &dc, node_id_t id,\n                                     level_t level, TopkHeap &topk_heap) {\n  topk_heap.sort();\n\n  uint32_t max_neighbor_cnt = entity_.neighbor_cnt(level);\n  if (topk_heap.size() <= static_cast<size_t>(entity_.prune_cnt())) {\n    if (topk_heap.size() <= static_cast<size_t>(max_neighbor_cnt)) {\n      entity_.update_neighbors(level, id, topk_heap);\n      return;\n    }\n  }\n\n  uint32_t cur_size = 0;\n  for (size_t i = 0; i < topk_heap.size(); ++i) {\n    node_id_t cur_node = topk_heap[i].first;\n    dist_t cur_node_dist = topk_heap[i].second;\n    bool good = true;\n    for (uint32_t j = 0; j < cur_size; ++j) {\n      dist_t tmp_dist = dc.dist(cur_node, topk_heap[j].first);\n      if (tmp_dist <= cur_node_dist) {\n        good = false;\n        break;\n      }\n    }\n\n    if (good) {\n      topk_heap[cur_size].first = cur_node;\n      topk_heap[cur_size].second = cur_node_dist;\n      cur_size++;\n      if (cur_size >= max_neighbor_cnt) {\n        break;\n      }\n    }\n  }\n\n  // when after-prune neighbor count is too seldom,\n  // we use this strategy to make-up enough edges\n  // not only just make-up out-degrees\n  // we also make-up enough in-degrees\n  uint32_t min_neighbors = entity_.min_neighbor_cnt();\n  for (size_t k = cur_size; cur_size < min_neighbors && k < topk_heap.size();\n       ++k) {\n    bool exist = false;\n    for (size_t j = 0; j < cur_size; ++j) {\n      if (topk_heap[j].first == topk_heap[k].first) {\n        exist = true;\n        break;\n      }\n    }\n    if (!exist) {\n      topk_heap[cur_size].first = topk_heap[k].first;\n      topk_heap[cur_size].second = topk_heap[k].second;\n      cur_size++;\n    }\n  }\n\n  topk_heap.resize(cur_size);\n  entity_.update_neighbors(level, id, topk_heap);\n\n  return;\n}\n\nvoid HnswAlgorithm::reverse_update_neighbors(HnswDistCalculator &dc,\n                                             node_id_t id, level_t level,\n                                             node_id_t link_id, dist_t dist,\n                                             TopkHeap &update_heap) {\n  const size_t max_neighbor_cnt = entity_.neighbor_cnt(level);\n\n  uint32_t lock_idx = id & kLockMask;\n  lock_pool_[lock_idx].lock();\n  const Neighbors neighbors = entity_.get_neighbors(level, id);\n  size_t size = neighbors.size();\n  ailego_assert_with(size <= max_neighbor_cnt, \"invalid neighbor size\");\n  if (size < max_neighbor_cnt) {\n    entity_.add_neighbor(level, id, size, link_id);\n    lock_pool_[lock_idx].unlock();\n    return;\n  }\n\n  update_heap.emplace(link_id, dist);\n\n  for (size_t i = 0; i < size; ++i) {\n    node_id_t node = neighbors[i];\n    dist_t cur_dist = dc.dist(id, node);\n    update_heap.emplace(node, cur_dist);\n  }\n\n  //! TODO: optimize prune\n  //! prune edges\n  update_heap.sort();\n  size_t cur_size = 0;\n  for (size_t i = 0; i < update_heap.size(); ++i) {\n    node_id_t cur_node = update_heap[i].first;\n    dist_t cur_node_dist = update_heap[i].second;\n    bool good = true;\n    for (size_t j = 0; j < cur_size; ++j) {\n      dist_t tmp_dist = dc.dist(cur_node, update_heap[j].first);\n      if (tmp_dist <= cur_node_dist) {\n        good = false;\n        break;\n      }\n    }\n\n    if (good) {\n      update_heap[cur_size].first = cur_node;\n      update_heap[cur_size].second = cur_node_dist;\n      cur_size++;\n      if (cur_size >= max_neighbor_cnt) {\n        break;\n      }\n    }\n  }\n\n  update_heap.resize(cur_size);\n  entity_.update_neighbors(level, id, update_heap);\n\n  lock_pool_[lock_idx].unlock();\n\n  update_heap.clear();\n\n  return;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_algorithm.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <ailego/parallel/lock.h>\n#include \"hnsw_context.h\"\n#include \"hnsw_dist_calculator.h\"\n#include \"hnsw_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! hnsw graph algorithm implement\nclass HnswAlgorithm {\n public:\n  typedef std::unique_ptr<HnswAlgorithm> UPointer;\n\n public:\n  //! Constructor\n  explicit HnswAlgorithm(HnswEntity &entity);\n\n  //! Destructor\n  ~HnswAlgorithm() = default;\n\n  //! Cleanup HnswAlgorithm\n  int cleanup();\n\n  //! Add a node to hnsw graph\n  //! @id:     the node unique id\n  //! @level:  a node will be add to graph in each level [0, level]\n  //! return 0 on success, or errCode in failure\n  int add_node(node_id_t id, level_t level, HnswContext *ctx);\n\n  //! do knn search in graph\n  //! return 0 on success, or errCode in failure. results saved in ctx\n  int search(HnswContext *ctx) const;\n\n  //! Initiate HnswAlgorithm\n  int init() {\n    level_probas_.clear();\n    double level_mult =\n        1 / std::log(static_cast<double>(entity_.scaling_factor()));\n    for (int level = 0;; level++) {\n      // refers faiss get_random_level alg\n      double proba =\n          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));\n      if (proba < 1e-9) {\n        break;\n      }\n      level_probas_.push_back(proba);\n    }\n\n    return 0;\n  }\n\n  //! Generate a random level\n  //! return graph level\n  uint32_t get_random_level() const {\n    // gen rand float (0, 1)\n    double f = mt_() / static_cast<float>(mt_.max());\n    for (size_t level = 0; level < level_probas_.size(); level++) {\n      if (f < level_probas_[level]) {\n        return level;\n      }\n      f -= level_probas_[level];\n    }\n    return level_probas_.size() - 1;\n  }\n\n private:\n  //! Select in upper layer to get entry point for next layer search\n  void select_entry_point(level_t level, node_id_t *entry_point, dist_t *dist,\n                          HnswContext *ctx) const;\n\n  //! update node id neighbors from topkHeap, and reverse link is also updated\n  void add_neighbors(node_id_t id, level_t level, TopkHeap &topk_heap,\n                     HnswContext *ctx);\n\n  //! Given a node id and level, search the nearest neighbors in graph\n  //! Note: the nearest neighbors result keeps in topk, and entry_point and\n  //! dist will be updated to current level nearest node id and distance\n  void search_neighbors(level_t level, node_id_t *entry_point, dist_t *dist,\n                        TopkHeap &topk, HnswContext *ctx) const;\n\n  //! Update the node's neighbors\n  void update_neighbors(HnswDistCalculator &dc, node_id_t id, level_t level,\n                        TopkHeap &topk_heap);\n\n  //! Checking linkId could be id's new neighbor, and add as neighbor if true\n  //! @dc         distance calculator\n  //! @updateHeap temporary heap in updating neighbors\n  void reverse_update_neighbors(HnswDistCalculator &dc, node_id_t id,\n                                level_t level, node_id_t link_id, dist_t dist,\n                                TopkHeap &update_heap);\n\n  //! expand neighbors until group nums are reached\n  void expand_neighbors_by_group(TopkHeap &topk, HnswContext *ctx) const;\n\n private:\n  HnswAlgorithm(const HnswAlgorithm &) = delete;\n  HnswAlgorithm &operator=(const HnswAlgorithm &) = delete;\n\n private:\n  static constexpr uint32_t kLockCnt{1U << 8};\n  static constexpr uint32_t kLockMask{kLockCnt - 1U};\n\n  HnswEntity &entity_;\n  mutable std::mt19937 mt_{};\n  std::vector<double> level_probas_{};\n\n  mutable ailego::SpinMutex spin_lock_{};  // global spin lock\n  std::mutex mutex_{};                     // global mutex\n  // TODO: spin lock?\n  std::vector<std::mutex> lock_pool_{};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_builder.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_builder.h\"\n#include <iostream>\n#include <thread>\n#include <ailego/pattern/defer.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_logger.h>\n#include \"hnsw_algorithm.h\"\n#include \"hnsw_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswBuilder::HnswBuilder() = default;\n\nint HnswBuilder::init(const IndexMeta &meta, const ailego::Params &params) {\n  LOG_INFO(\"Begin HnswBuilder::init\");\n\n  meta_ = meta;\n  auto params_copy = params;\n  meta_.set_builder(\"HnswBuilder\", HnswEntity::kRevision,\n                    std::move(params_copy));\n\n  size_t memory_quota = 0UL;\n  params.get(PARAM_HNSW_BUILDER_MEMORY_QUOTA, &memory_quota);\n  params.get(PARAM_HNSW_BUILDER_THREAD_COUNT, &thread_cnt_);\n  params.get(PARAM_HNSW_BUILDER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);\n  params.get(PARAM_HNSW_BUILDER_EFCONSTRUCTION, &ef_construction_);\n  params.get(PARAM_HNSW_BUILDER_CHECK_INTERVAL_SECS, &check_interval_secs_);\n\n  params.get(PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT, &upper_max_neighbor_cnt_);\n  float multiplier = HnswEntity::kDefaultL0MaxNeighborCntMultiplier;\n  params.get(PARAM_HNSW_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER, &multiplier);\n  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;\n  scaling_factor_ = upper_max_neighbor_cnt_;\n  params.get(PARAM_HNSW_BUILDER_SCALING_FACTOR, &scaling_factor_);\n\n  multiplier = HnswEntity::kDefaultNeighborPruneMultiplier;\n  params.get(PARAM_HNSW_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);\n  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;\n\n  if (ef_construction_ == 0) {\n    ef_construction_ = HnswEntity::kDefaultEfConstruction;\n  }\n  if (upper_max_neighbor_cnt_ == 0) {\n    upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (upper_max_neighbor_cnt_ > kMaxNeighborCnt) {\n    LOG_ERROR(\"[%s] must be in range (0,%d]\",\n              PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT.c_str(), kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {\n    LOG_ERROR(\"[%s]-[%d] must be <= [%s]-[%d]\",\n              PARAM_HNSW_BUILDER_MIN_NEIGHBOR_COUNT.c_str(), min_neighbor_cnt_,\n              PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),\n              upper_max_neighbor_cnt_);\n    return IndexError_InvalidArgument;\n  }\n  if (l0_max_neighbor_cnt_ == 0) {\n    l0_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (l0_max_neighbor_cnt_ > HnswEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"L0MaxNeighborCnt must be in range (0,%d)\",\n              HnswEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (scaling_factor_ == 0U) {\n    scaling_factor_ = HnswEntity::kDefaultScalingFactor;\n  }\n  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {\n    LOG_ERROR(\"[%s] must be in range [5,1000]\",\n              PARAM_HNSW_BUILDER_SCALING_FACTOR.c_str());\n    return IndexError_InvalidArgument;\n  }\n  if (thread_cnt_ == 0) {\n    thread_cnt_ = std::thread::hardware_concurrency();\n  }\n  if (thread_cnt_ > std::thread::hardware_concurrency()) {\n    LOG_WARN(\"[%s] greater than cpu cores %u\",\n             PARAM_HNSW_BUILDER_THREAD_COUNT.c_str(),\n             std::thread::hardware_concurrency());\n  }\n  if (prune_cnt == 0UL) {\n    prune_cnt = upper_max_neighbor_cnt_;\n  }\n\n  metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"CreateMetric failed, name: %s\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  int ret = metric_->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"IndexMetric init failed, ret=%d\", ret);\n    return ret;\n  }\n\n  entity_.set_vector_size(meta_.element_size());\n\n  entity_.set_ef_construction(ef_construction_);\n  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);\n  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);\n  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);\n  entity_.set_scaling_factor(scaling_factor_);\n  entity_.set_memory_quota(memory_quota);\n  entity_.set_prune_cnt(prune_cnt);\n\n  ret = entity_.init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  alg_ = HnswAlgorithm::UPointer(new HnswAlgorithm(entity_));\n\n  ret = alg_->init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  state_ = BUILD_STATE_INITED;\n  LOG_INFO(\n      \"End HnswBuilder::init, params: vectorSize=%u efConstruction=%u \"\n      \"l0NeighborCnt=%u upperNeighborCnt=%u scalingFactor=%u \"\n      \"memoryQuota=%zu neighborPruneCnt=%zu metricName=%s \",\n      meta_.element_size(), ef_construction_, l0_max_neighbor_cnt_,\n      upper_max_neighbor_cnt_, scaling_factor_, memory_quota, prune_cnt,\n      meta_.metric_name().c_str());\n\n  return 0;\n}\n\nint HnswBuilder::cleanup(void) {\n  LOG_INFO(\"Begin HnswBuilder::cleanup\");\n\n  l0_max_neighbor_cnt_ = HnswEntity::kDefaultL0MaxNeighborCnt;\n  min_neighbor_cnt_ = 0;\n  upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;\n  ef_construction_ = HnswEntity::kDefaultEfConstruction;\n  scaling_factor_ = HnswEntity::kDefaultScalingFactor;\n  check_interval_secs_ = kDefaultLogIntervalSecs;\n  errcode_ = 0;\n  error_ = false;\n  entity_.cleanup();\n  alg_->cleanup();\n  meta_.clear();\n  metric_.reset();\n  stats_.clear_attributes();\n  stats_.set_trained_count(0UL);\n  stats_.set_built_count(0UL);\n  stats_.set_dumped_count(0UL);\n  stats_.set_discarded_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  stats_.set_built_costtime(0UL);\n  stats_.set_dumped_costtime(0UL);\n  state_ = BUILD_STATE_INIT;\n\n  LOG_INFO(\"End HnswBuilder::cleanup\");\n\n  return 0;\n}\n\nint HnswBuilder::train(IndexThreads::Pointer, IndexHolder::Pointer holder) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before HnswBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  if (!holder) {\n    LOG_ERROR(\"Input holder is nullptr while training index\");\n    return IndexError_InvalidArgument;\n  }\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while training index\");\n    return IndexError_Mismatch;\n  }\n  LOG_INFO(\"Begin HnswBuilder::train\");\n  size_t trained_cost_time = 0;\n  size_t trained_count = 0;\n\n  if (metric_->support_train()) {\n    auto start_time = ailego::Monotime::MilliSeconds();\n    auto iter = holder->create_iterator();\n    if (!iter) {\n      LOG_ERROR(\"Create iterator for holder failed\");\n      return IndexError_Runtime;\n    }\n    while (iter->is_valid()) {\n      int ret = metric_->train(iter->data(), meta_.dimension());\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw build measure train failed, ret=%d\", ret);\n        return ret;\n      }\n      iter->next();\n      ++trained_count;\n    }\n    trained_cost_time = ailego::Monotime::MilliSeconds() - start_time;\n  }\n  stats_.set_trained_count(trained_count);\n  stats_.set_trained_costtime(trained_cost_time);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End HnswBuilder::train\");\n\n  return 0;\n}\n\nint HnswBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before HnswBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin HnswBuilder::train by trainer\");\n\n  stats_.set_trained_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End HnswBuilder::train by trainer\");\n\n  return 0;\n}\n\nint HnswBuilder::build(IndexThreads::Pointer threads,\n                       IndexHolder::Pointer holder) {\n  if (state_ != BUILD_STATE_TRAINED) {\n    LOG_ERROR(\"Train the index before HnswBuilder::build\");\n    return IndexError_NoReady;\n  }\n\n  if (!holder) {\n    LOG_ERROR(\"Input holder is nullptr while building index\");\n    return IndexError_InvalidArgument;\n  }\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while building index\");\n    return IndexError_Mismatch;\n  }\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n  LOG_INFO(\"Begin HnswBuilder::build\");\n\n  if (holder->count() != static_cast<size_t>(-1)) {\n    LOG_DEBUG(\"HnswBuilder holder documents count %lu\", holder->count());\n    int ret = entity_.reserve_space(holder->count());\n    if (ret != 0) {\n      LOG_ERROR(\"HnswBuilde reserver space failed\");\n      return ret;\n    }\n  }\n  auto iter = holder->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Create iterator for holder failed\");\n    return IndexError_Runtime;\n  }\n  int ret;\n  error_ = false;\n  while (iter->is_valid()) {\n    level_t level = alg_->get_random_level();\n    node_id_t id;\n\n    const void *vec = iter->data();\n    ret = entity_.add_vector(level, iter->key(), vec, &id);\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n    iter->next();\n  }\n  // Holder is not needed, cleanup it.\n  holder.reset();\n\n  LOG_INFO(\"Finished save vector, start build graph...\");\n\n  auto task_group = threads->make_group();\n  if (!task_group) {\n    LOG_ERROR(\"Failed to create task group\");\n    return IndexError_Runtime;\n  }\n\n  std::atomic<node_id_t> finished{0};\n  for (size_t i = 0; i < threads->count(); ++i) {\n    task_group->submit(ailego::Closure ::New(this, &HnswBuilder::do_build, i,\n                                             threads->count(), &finished));\n  }\n\n  while (!task_group->is_finished()) {\n    std::unique_lock<std::mutex> lk(mutex_);\n    cond_.wait_until(lk, std::chrono::system_clock::now() +\n                             std::chrono::seconds(check_interval_secs_));\n    if (error_.load(std::memory_order_acquire)) {\n      LOG_ERROR(\"Failed to build index while waiting finish\");\n      return errcode_;\n    }\n    LOG_INFO(\"Built cnt %u, finished percent %.3f%%\", finished.load(),\n             finished.load() * 100.0f / entity_.doc_cnt());\n  }\n  if (error_.load(std::memory_order_acquire)) {\n    LOG_ERROR(\"Failed to build index while waiting finish\");\n    return errcode_;\n  }\n  task_group->wait_finish();\n\n  stats_.set_built_count(finished.load());\n  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);\n  state_ = BUILD_STATE_BUILT;\n\n  LOG_INFO(\"End HnswBuilder::build\");\n  return 0;\n}\n\nvoid HnswBuilder::do_build(node_id_t idx, size_t step_size,\n                           std::atomic<node_id_t> *finished) {\n  AILEGO_DEFER([&]() {\n    std::lock_guard<std::mutex> latch(mutex_);\n    cond_.notify_one();\n  });\n  HnswContext *ctx = new (std::nothrow)\n      HnswContext(meta_.dimension(), metric_,\n                  std::shared_ptr<HnswEntity>(&entity_, [](HnswEntity *) {}));\n  if (ailego_unlikely(ctx == nullptr)) {\n    if (!error_.exchange(true)) {\n      LOG_ERROR(\"Failed to create context\");\n      errcode_ = IndexError_NoMemory;\n    }\n    return;\n  }\n  HnswContext::Pointer auto_ptr(ctx);\n  ctx->set_max_scan_num(entity_.doc_cnt());\n  int ret = ctx->init(HnswContext::kBuilderContext);\n  if (ret != 0) {\n    if (!error_.exchange(true)) {\n      LOG_ERROR(\"Failed to init context\");\n      errcode_ = IndexError_Runtime;\n    }\n    return;\n  }\n\n  IndexQueryMeta qmeta(meta_.data_type(), meta_.dimension());\n  for (node_id_t id = idx; id < entity_.doc_cnt(); id += step_size) {\n    ctx->reset_query(entity_.get_vector(id));\n    ret = alg_->add_node(id, entity_.get_level(id), ctx);\n    if (ailego_unlikely(ret != 0)) {\n      if (!error_.exchange(true)) {\n        LOG_ERROR(\"Hnsw graph add node failed\");\n        errcode_ = ret;\n      }\n      return;\n    }\n    ctx->clear();\n    (*finished)++;\n  }\n}\n\nint HnswBuilder::dump(const IndexDumper::Pointer &dumper) {\n  if (state_ != BUILD_STATE_BUILT) {\n    LOG_INFO(\"Build the index before HnswBuilder::dump\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin HnswBuilder::dump\");\n\n  meta_.set_searcher(\"HnswSearcher\", HnswEntity::kRevision, ailego::Params());\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n\n  ret = entity_.dump(dumper);\n  if (ret != 0) {\n    LOG_ERROR(\"HnswBuilder dump index failed\");\n    return ret;\n  }\n\n  stats_.set_dumped_count(entity_.doc_cnt());\n  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);\n\n  LOG_INFO(\"EndHnswBuilder::dump\");\n  return 0;\n}\n\nINDEX_FACTORY_REGISTER_BUILDER(HnswBuilder);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_builder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/core/framework/index_builder.h>\n#include \"hnsw_algorithm.h\"\n#include \"hnsw_builder_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswBuilder : public IndexBuilder {\n public:\n  //! Constructor\n  HnswBuilder();\n\n  //! Initialize the builder\n  virtual int init(const IndexMeta &meta,\n                   const ailego::Params &params) override;\n\n  //! Cleanup the builder\n  virtual int cleanup(void) override;\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer,\n                    IndexHolder::Pointer holder) override;\n\n  //! Train the data\n  virtual int train(const IndexTrainer::Pointer &trainer) override;\n\n\n  //! Build the index\n  virtual int build(IndexThreads::Pointer threads,\n                    IndexHolder::Pointer holder) override;\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override {\n    return stats_;\n  }\n\n private:\n  void do_build(node_id_t idx, size_t step_size,\n                std::atomic<node_id_t> *finished);\n\n  constexpr static uint32_t kDefaultLogIntervalSecs = 15U;\n  constexpr static uint32_t kMaxNeighborCnt = 65535;\n\n private:\n  enum BUILD_STATE {\n    BUILD_STATE_INIT = 0,\n    BUILD_STATE_INITED = 1,\n    BUILD_STATE_TRAINED = 2,\n    BUILD_STATE_BUILT = 3\n  };\n\n  HnswBuilderEntity entity_{};\n  HnswAlgorithm::UPointer alg_;  // impl graph algorithm\n  uint32_t thread_cnt_{0};\n  uint32_t min_neighbor_cnt_{0};\n  uint32_t upper_max_neighbor_cnt_{HnswEntity::kDefaultUpperMaxNeighborCnt};\n  uint32_t l0_max_neighbor_cnt_{HnswEntity::kDefaultL0MaxNeighborCnt};\n  uint32_t ef_construction_{HnswEntity::kDefaultEfConstruction};\n  uint32_t scaling_factor_{HnswEntity::kDefaultScalingFactor};\n  uint32_t check_interval_secs_{kDefaultLogIntervalSecs};\n\n  int errcode_{0};\n  std::atomic_bool error_{false};\n  IndexMeta meta_{};\n  IndexMetric::Pointer metric_{};\n  std::mutex mutex_{};\n  std::condition_variable cond_{};\n  Stats stats_{};\n\n  BUILD_STATE state_{BUILD_STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_builder_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_builder_entity.h\"\n#include <iostream>\n#include <zvec/ailego/hash/crc32c.h>\n#include \"utility/sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswBuilderEntity::HnswBuilderEntity() {\n  update_ep_and_level(kInvalidNodeId, 0U);\n}\n\nint HnswBuilderEntity::cleanup() {\n  memory_quota_ = 0UL;\n  neighbors_size_ = 0U;\n  upper_neighbors_size_ = 0U;\n  padding_size_ = 0U;\n  vectors_buffer_.clear();\n  keys_buffer_.clear();\n  neighbors_buffer_.clear();\n  upper_neighbors_buffer_.clear();\n  neighbors_index_.clear();\n\n  vectors_buffer_.shrink_to_fit();\n  keys_buffer_.shrink_to_fit();\n  neighbors_buffer_.shrink_to_fit();\n  upper_neighbors_buffer_.shrink_to_fit();\n  neighbors_index_.shrink_to_fit();\n\n  this->HnswEntity::cleanup();\n\n  return 0;\n}\n\nint HnswBuilderEntity::init() {\n  size_t size = vector_size();\n\n  //! aligned size to 32\n  set_node_size(AlignSize(size));\n  //! if node size is aligned to 1k, the build performance will downgrade\n  if (node_size() % 1024 == 0) {\n    set_node_size(AlignSize(node_size() + 1));\n  }\n\n  padding_size_ = node_size() - size;\n\n  neighbors_size_ = neighbors_size();\n  upper_neighbors_size_ = upper_neighbors_size();\n\n  return 0;\n}\n\nint HnswBuilderEntity::reserve_space(size_t docs) {\n  if (memory_quota_ > 0 && (node_size() * docs + neighbors_size_ * docs +\n                                sizeof(NeighborIndex) * docs >\n                            memory_quota_)) {\n    return IndexError_NoMemory;\n  }\n\n  vectors_buffer_.reserve(node_size() * docs);\n  keys_buffer_.reserve(sizeof(key_t) * docs);\n  neighbors_buffer_.reserve(neighbors_size_ * docs);\n  neighbors_index_.reserve(docs);\n\n  return 0;\n}\n\nint HnswBuilderEntity::add_vector(level_t level, key_t key, const void *vec,\n                                  node_id_t *id) {\n  if (memory_quota_ > 0 &&\n      (vectors_buffer_.capacity() + keys_buffer_.capacity() +\n       neighbors_buffer_.capacity() + upper_neighbors_buffer_.capacity() +\n       neighbors_index_.capacity() * sizeof(NeighborIndex)) > memory_quota_) {\n    LOG_ERROR(\"Add vector failed, used memory exceed quota, cur_doc=%u\",\n              doc_cnt());\n    return IndexError_NoMemory;\n  }\n\n  vectors_buffer_.append(reinterpret_cast<const char *>(vec), vector_size());\n  vectors_buffer_.append(padding_size_, '\\0');\n  keys_buffer_.append(reinterpret_cast<const char *>(&key), sizeof(key));\n\n  // init level 0 neighbors\n  neighbors_buffer_.append(neighbors_size_, '\\0');\n\n  neighbors_index_.emplace_back(upper_neighbors_buffer_.size(), level);\n\n  // init upper layer neighbors\n  for (level_t cur_level = 1; cur_level <= level; ++cur_level) {\n    upper_neighbors_buffer_.append(upper_neighbors_size_, '\\0');\n  }\n\n  *id = (*mutable_doc_cnt())++;\n\n  return 0;\n}\n\nkey_t HnswBuilderEntity::get_key(node_id_t id) const {\n  return *(reinterpret_cast<const key_t *>(keys_buffer_.data() +\n                                           id * sizeof(key_t)));\n}\n\nconst void *HnswBuilderEntity::get_vector(node_id_t id) const {\n  return vectors_buffer_.data() + id * node_size();\n}\n\nint HnswBuilderEntity::get_vector(const node_id_t id,\n                                  IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_vector(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\nint HnswBuilderEntity::get_vector(const node_id_t *ids, uint32_t count,\n                                  const void **vecs) const {\n  for (uint32_t i = 0; i < count; ++i) {\n    vecs[i] = vectors_buffer_.data() + ids[i] * node_size();\n  }\n\n  return 0;\n}\n\nint HnswBuilderEntity::get_vector(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {\n  const void *vecs[count];\n  get_vector(ids, count, vecs);\n  for (uint32_t i = 0; i < count; ++i) {\n    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));\n  }\n  return 0;\n}\n\nconst Neighbors HnswBuilderEntity::get_neighbors(level_t level,\n                                                 node_id_t id) const {\n  const NeighborsHeader *hd = get_neighbor_header(level, id);\n  return {hd->neighbor_cnt, hd->neighbors};\n}\n\nint HnswBuilderEntity::update_neighbors(\n    level_t level, node_id_t id,\n    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {\n  NeighborsHeader *hd =\n      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));\n  for (size_t i = 0; i < neighbors.size(); ++i) {\n    hd->neighbors[i] = neighbors[i].first;\n  }\n  hd->neighbor_cnt = neighbors.size();\n\n  // std::cout << \"id: \" << id << \", neighbour, id: \";\n  // for (size_t i = 0; i < neighbors.size(); ++i) {\n  //   if (i == neighbors.size()-1)\n  //     std::cout << neighbors[i].first << \", score:\" << neighbors[i].second <<\n  //     std::endl;\n  //   else\n  //     std::cout << neighbors[i].first << \", score:\" << neighbors[i].second <<\n  //     \", id: \";\n  // }\n\n  return 0;\n}\n\nvoid HnswBuilderEntity::add_neighbor(level_t level, node_id_t id,\n                                     uint32_t /*size*/, node_id_t neighbor_id) {\n  NeighborsHeader *hd =\n      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));\n  hd->neighbors[hd->neighbor_cnt++] = neighbor_id;\n\n  return;\n}\n\nint HnswBuilderEntity::dump(const IndexDumper::Pointer &dumper) {\n  key_t *keys =\n      reinterpret_cast<key_t *>(const_cast<char *>(keys_buffer_.data()));\n  auto ret =\n      dump_segments(dumper, keys, [&](node_id_t id) { return get_level(id); });\n  if (ailego_unlikely(ret < 0)) {\n    return ret;\n  }\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_builder_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n#include \"hnsw_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswBuilderEntity : public HnswEntity {\n public:\n  //! Add vector and key to hnsw entity, and local id will be saved to id\n  virtual int add_vector(level_t level, key_t key, const void *vec,\n                         node_id_t *id) override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector(node_id_t id) const override;\n\n  //! Batch get vectors feature data by keys\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const override;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const override;\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;\n\n  //! Get the node id's neighbors on graph level\n  const NeighborsHeader *get_neighbor_header(level_t level,\n                                             node_id_t id) const {\n    if (level == 0) {\n      return reinterpret_cast<const NeighborsHeader *>(\n          neighbors_buffer_.data() + neighbors_size_ * id);\n    } else {\n      size_t offset = neighbors_index_[id].offset;\n      return reinterpret_cast<const NeighborsHeader *>(\n          upper_neighbors_buffer_.data() + offset +\n          (level - 1) * upper_neighbors_size_);\n    }\n  }\n\n  //! Get the node id's neighbors on graph level\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  //! Replace node id in level's neighbors\n  virtual int update_neighbors(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;\n\n  //! add a neighbor to id in graph level\n  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,\n                            node_id_t neighbor_id) override;\n\n  //! Dump the hnsw graph to dumper\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Cleanup the entity\n  virtual int cleanup(void) override;\n\n public:\n  //! Constructor\n  HnswBuilderEntity();\n\n  //! Get the node graph level by id\n  level_t get_level(node_id_t id) const {\n    return neighbors_index_[id].level;\n  }\n\n  //! Init builerEntity\n  int init();\n\n  //! reserve buffer space for documents\n  //! @param  docs    number of documents\n  int reserve_space(size_t docs);\n\n  //! Set memory quota params\n  inline void set_memory_quota(size_t memory_quota) {\n    memory_quota_ = memory_quota;\n  }\n\n  //! Get neighbors size\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! Get upper neighbors size\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n public:\n  HnswBuilderEntity(const HnswBuilderEntity &) = delete;\n  HnswBuilderEntity &operator=(const HnswBuilderEntity &) = delete;\n\n private:\n  friend class HnswSearcherEntity;\n  //! class internal used only\n  struct NeighborIndex {\n    NeighborIndex(size_t off, level_t l) : offset(off), level(l) {}\n    uint64_t offset : 48;\n    uint64_t level : 16;\n  };\n\n  std::string vectors_buffer_{};          // aligned vectors\n  std::string keys_buffer_{};             // aligned vectors\n  std::string neighbors_buffer_{};        // level 0 neighbors buffer\n  std::string upper_neighbors_buffer_{};  // upper layer neighbors buffer\n\n  std::string sparse_data_buffer_{};  // aligned spase data buffer\n  size_t sparse_data_offset_{0};      //\n\n  // upper layer offset + level in upper_neighbors_buffer_\n  std::vector<NeighborIndex> neighbors_index_{};\n  size_t memory_quota_{0UL};\n  size_t neighbors_size_{0U};        // level 0 neighbors size\n  size_t upper_neighbors_size_{0U};  // level 0 neighbors size\n  size_t padding_size_{};            // padding size for each vector element\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_chunk.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_chunk.h\"\n#include <chrono>\n#include <random>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_streamer.h>\n\nnamespace zvec {\nnamespace core {\n\nint ChunkBroker::init_storage(size_t chunk_size) {\n  chunk_meta_.clear();\n  chunk_meta_.chunk_size = chunk_size;\n  chunk_meta_.create_time = ailego::Realtime::Seconds();\n  stats_.set_create_time(chunk_meta_.create_time);\n  chunk_meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(chunk_meta_.update_time);\n\n  //! alloc meta chunk\n  size_t size = sizeof(HnswChunkMeta);\n  size = (size + page_mask_) & (~page_mask_);\n  const std::string segment_id =\n      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  int ret = stg_->append(segment_id, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Storage append segment failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  chunk_meta_segment_ = get_chunk(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  if (ailego_unlikely(!chunk_meta_segment_)) {\n    LOG_ERROR(\"Get meta segment failed\");\n    return IndexError_Runtime;\n  }\n\n  //! update meta info and write to storage\n  chunk_meta_.chunk_cnts[CHUNK_TYPE_META] += 1;\n  chunk_meta_.total_size += size;\n  (*stats_.mutable_index_size()) += size;\n  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {\n    LOG_ERROR(\"Storage write data failed, wsize=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint ChunkBroker::load_storage(size_t chunk_size) {\n  IndexStorage::MemoryBlock data_block;\n  size_t size = chunk_meta_segment_->read(0UL, data_block,\n                                          chunk_meta_segment_->data_size());\n  if (size != sizeof(HnswChunkMeta)) {\n    LOG_ERROR(\"Invalid hnsw meta chunk, read size=%zu chunk size=%zu\", size,\n              chunk_meta_segment_->data_size());\n    return IndexError_InvalidFormat;\n  }\n  std::memcpy(&chunk_meta_, data_block.data(), size);\n  if (chunk_meta_.chunk_size != chunk_size) {\n    LOG_ERROR(\n        \"Params hnsw chunk size=%zu mismatch from previous %zu \"\n        \"in index\",\n        chunk_size, (size_t)chunk_meta_.chunk_size);\n    return IndexError_Mismatch;\n  }\n\n  *stats_.mutable_check_point() = stg_->check_point();\n  stats_.set_revision_id(chunk_meta_.revision_id);\n  stats_.set_update_time(chunk_meta_.update_time);\n  stats_.set_create_time(chunk_meta_.create_time);\n\n  char create_time[32];\n  char update_time[32];\n  ailego::Realtime::Gmtime(chunk_meta_.create_time, \"%Y-%m-%d %H:%M:%S\",\n                           create_time, sizeof(create_time));\n  ailego::Realtime::Gmtime(chunk_meta_.update_time, \"%Y-%m-%d %H:%M:%S\",\n                           update_time, sizeof(update_time));\n  LOG_DEBUG(\n      \"Load index, indexSize=%zu chunkSize=%zu nodeChunks=%zu \"\n      \"upperNeighborChunks=%zu revisionId=%zu \"\n      \"createTime=%s updateTime=%s\",\n      (size_t)chunk_meta_.total_size, (size_t)chunk_meta_.chunk_size,\n      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_NODE],\n      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_UPPER_NEIGHBOR],\n      (size_t)chunk_meta_.revision_id, create_time, update_time);\n\n  return 0;\n}\n\nint ChunkBroker::open(IndexStorage::Pointer stg, size_t max_index_size,\n                      size_t chunk_size, bool check_crc) {\n  if (ailego_unlikely(stg_)) {\n    LOG_ERROR(\"An storage instance is already opened\");\n    return IndexError_Duplicate;\n  }\n  stg_ = std::move(stg);\n  if (stg_->isHugePage()) {\n    page_mask_ = ailego::MemoryHelper::HugePageSize() - 1;\n  } else {\n    page_mask_ = ailego::MemoryHelper::PageSize() - 1;\n  }\n  check_crc_ = check_crc;\n  max_chunks_size_ = max_index_size;\n  dirty_ = false;\n\n  const std::string segment_id =\n      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  chunk_meta_segment_ = stg_->get(segment_id);\n  if (!chunk_meta_segment_) {\n    LOG_DEBUG(\"Create new index\");\n    return init_storage(chunk_size);\n  }\n\n  return load_storage(chunk_size);\n}\n\nint ChunkBroker::close(void) {\n  flush(0UL);\n\n  stg_.reset();\n  check_crc_ = false;\n  dirty_ = false;\n\n  return 0;\n}\n\nint ChunkBroker::flush(uint64_t checkpoint) {\n  ailego_assert_with(chunk_meta_segment_, \"invalid meta segment\");\n\n  chunk_meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(chunk_meta_.update_time);\n\n  size_t size =\n      chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {\n    LOG_ERROR(\"Storage write data failed, wsize=%zu\", size);\n  }\n\n  stg_->refresh(checkpoint);\n  int ret = stg_->flush();\n  if (ret == 0) {\n    (*stats_.mutable_check_point()) = checkpoint;\n  } else {\n    LOG_ERROR(\"Storage flush failed for %s\", IndexError::What(ret));\n  }\n  return ret;\n}\n\nstd::pair<int, Chunk::Pointer> ChunkBroker::alloc_chunk(int type,\n                                                        uint64_t seq_id,\n                                                        size_t size) {\n  ailego_assert_with(type < CHUNK_TYPE_MAX, \"chunk type overflow\");\n\n  Chunk::Pointer chunk;\n  if (ailego_unlikely(!stg_)) {\n    LOG_ERROR(\"Init storage first\");\n    return std::make_pair(IndexError_Uninitialized, chunk);\n  }\n\n  //! check exist a empty chunk with the same name\n  chunk = get_chunk(type, seq_id);\n  if (chunk) {\n    if (ailego_unlikely(chunk->capacity() == size &&\n                        chunk->data_size() == 0UL)) {\n      LOG_ERROR(\"Exist invalid chunk size %zu, expect size %zu\",\n                chunk->capacity(), size);\n      chunk.reset();\n      return std::make_pair(IndexError_Runtime, chunk);\n    }\n    return std::make_pair(0, chunk);\n  }\n  //! align to page size\n  size = (size + page_mask_) & (~page_mask_);\n  if (ailego_unlikely(chunk_meta_.total_size + size >= max_chunks_size_)) {\n    LOG_ERROR(\"No space to new a chunk, curIndexSize=%zu allocSize=%zu\",\n              (size_t)chunk_meta_.total_size, size);\n    return std::make_pair(IndexError_IndexFull, chunk);\n  }\n\n  std::string segment_id = make_segment_id(type, seq_id);\n  int ret = stg_->append(segment_id, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Storage append segment failed for %s\", IndexError::What(ret));\n    return std::make_pair(ret, chunk);\n  }\n  chunk_meta_.chunk_cnts[type] += 1;\n  chunk_meta_.total_size += size;\n  (*stats_.mutable_index_size()) += size;\n\n  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {\n    LOG_ERROR(\"Storage append segment failed, wsize=%zu\", size);\n  }\n\n  chunk = get_chunk(type, seq_id);\n  return std::make_pair(chunk ? 0 : IndexError_NoMemory, chunk);\n}\n\nChunk::Pointer ChunkBroker::get_chunk(int type, uint64_t seq_id) const {\n  std::string segment_id = make_segment_id(type, seq_id);\n  return stg_->get(segment_id);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_chunk.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <string.h>\n#include <unistd.h>\n#include <atomic>\n#include <cstddef>\n#include <mutex>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/framework/index_streamer.h>\n\nnamespace zvec {\nnamespace core {\n\nusing Chunk = IndexStorage::Segment;\n\nclass ChunkBroker {\n public:\n  typedef std::shared_ptr<ChunkBroker> Pointer;\n\n  enum CHUNK_TYPE {\n    CHUNK_TYPE_HEADER = 1,\n    CHUNK_TYPE_META = 2,\n    CHUNK_TYPE_NODE = 3,\n    CHUNK_TYPE_UPPER_NEIGHBOR = 4,\n    CHUNK_TYPE_NEIGHBOR_INDEX = 5,\n    CHUNK_TYPE_SPARSE_NODE = 6,\n    CHUNK_TYPE_MAX = 8\n  };\n  static constexpr size_t kDefaultChunkSeqId = 0UL;\n\n  ChunkBroker(IndexStreamer::Stats &stats) : stats_(stats) {}\n\n  //! Open storage\n  int open(IndexStorage::Pointer stg, size_t max_index_size, size_t chunk_size,\n           bool check_crc);\n\n  int close(void);\n\n  int flush(uint64_t checkpoint);\n\n  //! alloc a new chunk with size, not thread-safe\n  std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id,\n                                             size_t size);\n\n  //! alloc a new chunk with chunk size\n  inline std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id) {\n    return alloc_chunk(type, seq_id, chunk_meta_.chunk_size);\n  }\n\n  Chunk::Pointer get_chunk(int type, uint64_t seq_id) const;\n\n  inline size_t get_chunk_cnt(int type) const {\n    ailego_assert_with(type < CHUNK_TYPE_MAX, \"chunk type overflow\");\n    return chunk_meta_.chunk_cnts[type];\n  }\n\n  inline bool dirty(void) const {\n    return dirty_;\n  }\n\n  inline void mark_dirty(void) {\n    if (!dirty_) {\n      dirty_ = true;\n      chunk_meta_.revision_id += 1;\n      stats_.set_revision_id(chunk_meta_.revision_id);\n    }\n  }\n\n  const IndexStorage::Pointer storage(void) const {\n    return stg_;\n  }\n\n private:\n  ChunkBroker(const ChunkBroker &) = delete;\n  ChunkBroker &operator=(const ChunkBroker &) = delete;\n\n  struct HnswChunkMeta {\n    HnswChunkMeta(void) {\n      memset(this, 0, sizeof(HnswChunkMeta));\n    }\n    void clear() {\n      memset(this, 0, sizeof(HnswChunkMeta));\n    }\n\n    uint64_t chunk_cnts[CHUNK_TYPE_MAX];\n    uint64_t chunk_size;   // size of per chunk\n    uint64_t total_size;   // total size of allocated chunk\n    uint64_t revision_id;  // index revision\n    uint64_t create_time;\n    uint64_t update_time;\n    uint64_t reserved[3];\n  };\n\n  static_assert(sizeof(HnswChunkMeta) % 32 == 0,\n                \"HnswChunkMeta must be aligned with 32 bytes\");\n\n  //! Init the storage after open an empty index\n  int init_storage(size_t chunk_size);\n\n  //! Load index from storage\n  int load_storage(size_t chunk_size);\n\n  static inline const std::string make_segment_id(int type, uint64_t seq_id) {\n    return \"HnswT\" + ailego::StringHelper::ToString(type) + \"S\" +\n           ailego::StringHelper::ToString(seq_id);\n  }\n\n private:\n  IndexStreamer::Stats &stats_;\n  HnswChunkMeta chunk_meta_{};\n  size_t page_mask_{0UL};\n  size_t max_chunks_size_{0UL};\n  IndexStorage::Pointer stg_{};\n  IndexStorage::Segment::Pointer chunk_meta_segment_{};\n  bool check_crc_{false};\n  bool dirty_{false};  // set as true if index is modified , the flag\n                       // will not be cleared even if flushed\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_context.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_context.h\"\n#include <chrono>\n#include \"hnsw_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswContext::HnswContext(size_t dimension, const IndexMetric::Pointer &metric,\n                         const HnswEntity::Pointer &entity)\n    : IndexContext(metric),\n      entity_(entity),\n      dc_(entity_.get(), metric, dimension) {}\n\nHnswContext::HnswContext(const IndexMetric::Pointer &metric,\n                         const HnswEntity::Pointer &entity)\n    : IndexContext(metric), entity_(entity), dc_(entity_.get(), metric) {}\n\nHnswContext::~HnswContext() {\n  visit_filter_.destroy();\n}\n\nint HnswContext::init(ContextType type) {\n  int ret;\n  uint32_t doc_cnt;\n\n  type_ = type;\n\n  switch (type) {\n    case kBuilderContext:\n      ret = visit_filter_.init(VisitFilter::ByteMap, entity_->doc_cnt(),\n                               max_scan_num_, negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n      candidates_.limit(max_scan_num_);\n      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);\n      break;\n\n    case kSearcherContext:\n      ret = visit_filter_.init(filter_mode_, entity_->doc_cnt(), max_scan_num_,\n                               negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n      candidates_.limit(max_scan_num_);\n      break;\n\n    case kStreamerContext:\n      // maxScanNum is unknown if inited from streamer, so the docCnt may\n      // change. we need to compute maxScanNum by scan ratio, and preserve\n      // max_doc_cnt space from visit filter\n      doc_cnt = entity_->doc_cnt();\n      max_scan_num_ = compute_max_scan_num(doc_cnt);\n      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);\n      ret = visit_filter_.init(filter_mode_, reserve_max_doc_cnt_,\n                               max_scan_num_, negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n\n      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);\n      candidates_.limit(max_scan_num_);\n\n      check_need_adjuct_ctx();\n      break;\n\n    default:\n      LOG_ERROR(\"Init context failed\");\n      return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswContext::update(const ailego::Params &params) {\n  auto update_visit_filter_param = [&]() {\n    bool need_update = false;\n    std::string p;\n    switch (type_) {\n      case kSearcherContext:\n        p = PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_ENABLE;\n        break;\n      case kStreamerContext:\n        p = PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE;\n        break;\n    }\n\n    if (params.has(p)) {\n      bool bf_enabled;\n      params.get(p, &bf_enabled);\n      if (bf_enabled ^ (filter_mode_ == VisitFilter::BloomFilter)) {\n        need_update = true;\n        filter_mode_ =\n            bf_enabled ? VisitFilter::BloomFilter : VisitFilter::ByteMap;\n      }\n    }\n\n    float prob = negative_probability_;\n    p.clear();\n    switch (type_) {\n      case kSearcherContext:\n        p = PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB;\n        break;\n      case kStreamerContext:\n        p = PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB;\n        break;\n    }\n    params.get(p, &prob);\n    if (filter_mode_ == VisitFilter::BloomFilter &&\n        std::abs(prob - negative_probability_) > 1e-6) {\n      need_update = true;\n    }\n    if (need_update) {\n      visit_filter_.destroy();\n      int max_doc_cnt = 0;\n      if (type_ == kSearcherContext) {\n        max_doc_cnt = entity_->doc_cnt();\n      } else {\n        max_doc_cnt = reserve_max_doc_cnt_;\n      }\n      int ret = visit_filter_.init(filter_mode_, max_doc_cnt, max_scan_num_,\n                                   negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n    }\n    return 0;\n  };\n\n  switch (type_) {\n    case kSearcherContext:\n      if (params.has(PARAM_HNSW_SEARCHER_EF)) {\n        params.get(PARAM_HNSW_SEARCHER_EF, &ef_);\n        topk_heap_.limit(std::max(topk_, ef_));\n      }\n\n      if (params.has(PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO)) {\n        params.get(PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);\n        max_scan_num_ =\n            static_cast<uint32_t>(max_scan_ratio_ * entity_->doc_cnt());\n        max_scan_num_ = std::max(10000U, max_scan_num_);\n      }\n\n      if (params.has(PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD)) {\n        params.get(PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD,\n                   &bruteforce_threshold_);\n      }\n\n      return update_visit_filter_param();\n\n    case kStreamerContext:\n      if (params.has(PARAM_HNSW_STREAMER_EF)) {\n        params.get(PARAM_HNSW_STREAMER_EF, &ef_);\n        topk_heap_.limit(std::max(topk_, ef_));\n      }\n      params.get(PARAM_HNSW_STREAMER_EF, &ef_);\n      params.get(PARAM_HNSW_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);\n      params.get(PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);\n      params.get(PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);\n      if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {\n        LOG_ERROR(\"[%s] must be in range (0.0f,1.0f]\",\n                  PARAM_HNSW_STREAMER_MAX_SCAN_RATIO.c_str());\n        return IndexError_InvalidArgument;\n      }\n      if (max_scan_limit_ < min_scan_limit_) {\n        LOG_ERROR(\"[%s] must be >= [%s]\",\n                  PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT.c_str(),\n                  PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT.c_str());\n        return IndexError_InvalidArgument;\n      }\n\n      if (params.has(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD)) {\n        params.get(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD,\n                   &bruteforce_threshold_);\n      }\n\n      return update_visit_filter_param();\n\n    default:\n      LOG_ERROR(\"update context failed, type=%u\", type_);\n      return IndexError_Runtime;\n  }\n}\n\nint HnswContext::update_context(ContextType type, const IndexMeta &meta,\n                                const IndexMetric::Pointer &metric,\n                                const HnswEntity::Pointer &entity,\n                                uint32_t magic_num) {\n  uint32_t doc_cnt;\n\n  if (ailego_unlikely(type != type_)) {\n    LOG_ERROR(\n        \"HnswContext doesn't support shared by different type, \"\n        \"src=%u dst=%u\",\n        type_, type);\n    return IndexError_Unsupported;\n  }\n\n  magic_ = kInvalidMgic;\n\n  // TODO: support change filter mode?\n  switch (type) {\n    case kBuilderContext:\n      LOG_ERROR(\"BuildContext doesn't support update\");\n      return IndexError_NotImplemented;\n\n    case kSearcherContext:\n      if (!visit_filter_.reset(entity->doc_cnt(), max_scan_num_)) {\n        LOG_ERROR(\"Reset filter failed, mode %d\", visit_filter_.get_mode());\n        return IndexError_Runtime;\n      }\n\n      candidates_.limit(max_scan_num_);\n      topk_heap_.limit(std::max(topk_, ef_));\n      break;\n\n    case kStreamerContext:\n      doc_cnt = entity->doc_cnt();\n      max_scan_num_ = compute_max_scan_num(doc_cnt);\n      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);\n      if (!visit_filter_.reset(reserve_max_doc_cnt_, max_scan_num_)) {\n        LOG_ERROR(\"Reset filter failed, mode %d\", visit_filter_.get_mode());\n        return IndexError_Runtime;\n      }\n\n      update_heap_.limit(entity->l0_neighbor_cnt() + 1);\n      candidates_.limit(max_scan_num_);\n      topk_heap_.limit(std::max(topk_, ef_));\n      break;\n\n    default:\n      LOG_ERROR(\"update context failed\");\n      return IndexError_Runtime;\n  }\n\n  entity_ = entity;\n  dc_.update(entity_.get(), metric, meta.dimension());\n  magic_ = magic_num;\n  level_topks_.clear();\n\n  return 0;\n}\n\nvoid HnswContext::fill_random_to_topk_full(void) {\n  static std::mt19937 mt(\n      std::chrono::system_clock::now().time_since_epoch().count());\n  std::uniform_int_distribution<node_id_t> dt(0, entity_->doc_cnt() - 1);\n  std::function<node_id_t()> gen;\n  node_id_t seqid;\n  std::function<bool(node_id_t)> myfilter = [](node_id_t) { return false; };\n  if (this->filter().is_valid()) {\n    myfilter = [&](node_id_t id) {\n      return this->filter()(entity_->get_key(id));\n    };\n  }\n\n  if (topk_heap_.limit() < entity_->doc_cnt() / 2) {\n    gen = [&](void) { return dt(mt); };\n  } else {\n    // If topk limit is big value, gen sequential id from an random initial\n    seqid = dt(mt);\n    gen = [&](void) {\n      seqid = seqid == (entity_->doc_cnt() - 1) ? 0 : (seqid + 1);\n      return seqid;\n    };\n  }\n\n  for (size_t i = 0; !topk_heap_.full() && i < entity_->doc_cnt(); ++i) {\n    const auto id = gen();\n    if (!visit_filter_.visited(id) && !myfilter(id)) {\n      visit_filter_.set_visited(id);\n      topk_heap_.emplace(id, dc_.dist(id));\n    }\n  }\n  return;\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_context.h>\n#include \"utility/sparse_utility.h\"\n#include \"utility/visit_filter.h\"\n#include \"hnsw_dist_calculator.h\"\n#include \"hnsw_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswContext : public IndexContext {\n public:\n  //! Index Context Pointer\n  typedef std::unique_ptr<HnswContext> Pointer;\n\n  enum ContextType {\n    kUnknownContext = 0,\n    kSearcherContext = 1,\n    kBuilderContext = 2,\n    kStreamerContext = 3\n  };\n\n  //! Construct\n  HnswContext(size_t dimension, const IndexMetric::Pointer &metric,\n              const HnswEntity::Pointer &entity);\n\n  //! Construct\n  HnswContext(const IndexMetric::Pointer &metric,\n              const HnswEntity::Pointer &entity);\n\n  //! Destructor\n  virtual ~HnswContext();\n\n public:\n  //! Set topk of search result\n  virtual void set_topk(uint32_t val) override {\n    topk_ = val;\n    topk_heap_.limit(std::max(val, ef_));\n  }\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(void) const override {\n    return results_[0];\n  }\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(size_t idx) const override {\n    return results_[idx];\n  }\n\n  //! Retrieve result object for output\n  virtual IndexDocumentList *mutable_result(size_t idx) override {\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    return &results_[idx];\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(void) const override {\n    return group_results_[0];\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(\n      size_t idx) const override {\n    return group_results_[idx];\n  }\n\n  virtual uint32_t magic(void) const override {\n    return magic_;\n  }\n\n  //! Set mode of debug\n  virtual void set_debug_mode(bool enable) override {\n    debug_mode_ = enable;\n  }\n\n  //! Retrieve mode of debug\n  virtual bool debug_mode(void) const override {\n    return this->debugging();\n  }\n\n  //! Retrieve string of debug\n  virtual std::string debug_string(void) const override {\n    char buf[4096];\n    size_t size = snprintf(\n        buf, sizeof(buf),\n        \"scan_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u\",\n        get_scan_num(), stats_get_vector_cnt_, stats_get_neighbors_cnt_,\n        stats_visit_dup_cnt_);\n    return std::string(buf, size);\n  }\n\n  //! Update the parameters of context\n  virtual int update(const ailego::Params &params) override;\n\n public:\n  //! Init context\n  int init(ContextType type);\n\n  //! Update context, the context may be shared by different searcher/streamer\n  int update_context(ContextType type, const IndexMeta &meta,\n                     const IndexMetric::Pointer &metric,\n                     const HnswEntity::Pointer &entity, uint32_t magic_num);\n\n  inline const HnswEntity &get_entity() const {\n    return *entity_;\n  }\n\n  inline void resize_results(size_t size) {\n    if (group_by_search()) {\n      group_results_.resize(size);\n    } else {\n      results_.resize(size);\n    }\n  }\n\n  inline void topk_to_result() {\n    return topk_to_result(0);\n  }\n\n  //! Construct result from topk heap, result will be normalized\n  inline void topk_to_result(uint32_t idx) {\n    if (group_by_search()) {\n      topk_to_group_result(idx);\n    } else {\n      topk_to_single_result(idx);\n    }\n  }\n\n  inline void recal_topk_dist() {\n    TopkHeap heap(topk_heap_);\n    topk_heap_.clear();\n\n    for (size_t i = 0; i < heap.size(); ++i) {\n      node_id_t id = heap[i].first;\n      dist_t dist = dc_.dist(id);\n      topk_heap_.emplace_back(id, dist);\n    }\n  }\n\n  inline void topk_to_single_result(uint32_t idx) {\n    if (force_padding_topk_ && !topk_heap_.full() &&\n        topk_heap_.size() < entity_->doc_cnt()) {\n      this->fill_random_to_topk_full();\n    }\n    if (ailego_unlikely(topk_heap_.size() == 0)) {\n      return;\n    }\n\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    int size = std::min(topk_, static_cast<uint32_t>(topk_heap_.size()));\n    topk_heap_.sort();\n    results_[idx].clear();\n\n    for (int i = 0; i < size; ++i) {\n      auto score = topk_heap_[i].second;\n      if (score > this->threshold()) {\n        break;\n      }\n\n      node_id_t id = topk_heap_[i].first;\n      if (fetch_vector_) {\n        results_[idx].emplace_back(entity_->get_key(id), score, id,\n                                   entity_->get_vector(id));\n      } else {\n        results_[idx].emplace_back(entity_->get_key(id), score, id);\n      }\n    }\n\n    return;\n  }\n\n  //! Construct result from topk heap, result will be normalized\n  inline void topk_to_group_result(uint32_t idx) {\n    ailego_assert_with(idx < group_results_.size(), \"invalid idx\");\n\n    group_results_[idx].clear();\n\n    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;\n    std::vector<std::pair<std::string, float>> best_score_in_groups;\n    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();\n         itr++) {\n      const std::string &group_id = (*itr).first;\n      auto &heap = (*itr).second;\n      heap.sort();\n\n      if (heap.size() > 0) {\n        float best_score = heap[0].second;\n        best_score_in_groups.push_back(std::make_pair(group_id, best_score));\n      }\n    }\n\n    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),\n              [](const std::pair<std::string, float> &a,\n                 const std::pair<std::string, float> &b) -> int {\n                return a.second < b.second;\n              });\n\n    // truncate to group num\n    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();\n         ++i) {\n      const std::string &group_id = best_score_in_groups[i].first;\n\n      group_topk_list.emplace_back(\n          std::make_pair(group_id, group_topk_heaps_[group_id]));\n    }\n\n    group_results_[idx].resize(group_topk_list.size());\n\n    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {\n      const std::string &group_id = group_topk_list[i].first;\n      group_results_[idx][i].set_group_id(group_id);\n\n      uint32_t size = std::min(\n          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));\n\n      for (uint32_t j = 0; j < size; ++j) {\n        auto score = group_topk_list[i].second[j].second;\n        if (score > this->threshold()) {\n          break;\n        }\n\n        node_id_t id = group_topk_list[i].second[j].first;\n\n        if (fetch_vector_) {\n          group_results_[idx][i].mutable_docs()->emplace_back(\n              entity_->get_key(id), score, id, entity_->get_vector(id));\n        } else {\n          group_results_[idx][i].mutable_docs()->emplace_back(\n              entity_->get_key(id), score, id);\n        }\n      }\n    }\n  }\n\n  inline void reset_query(const void *query) {\n    if (auto query_preprocess_func = index_metric_->get_query_preprocess_func();\n        query_preprocess_func != nullptr) {\n      size_t dim = dc_.dimension();\n      preprocess_buffer_.resize(dim);\n      memcpy(preprocess_buffer_.data(), query, dim);\n      query_preprocess_func(preprocess_buffer_.data(), dim);\n      query = preprocess_buffer_.data();\n    }\n\n    dc_.reset_query(query);\n    dc_.clear_compare_cnt();\n  }\n\n  inline HnswDistCalculator &dist_calculator() {\n    return dc_;\n  }\n\n  inline TopkHeap &topk_heap() {\n    return topk_heap_;\n  }\n\n  inline TopkHeap &update_heap() {\n    return update_heap_;\n  }\n\n  inline VisitFilter &visit_filter() {\n    return visit_filter_;\n  }\n\n  inline CandidateHeap &candidates() {\n    return candidates_;\n  }\n\n  inline void set_max_scan_num(uint32_t max_scan_num) {\n    max_scan_num_ = max_scan_num;\n  }\n\n  inline void set_max_scan_limit(uint32_t max_scan_limit) {\n    max_scan_limit_ = max_scan_limit;\n  }\n\n  inline void set_min_scan_limit(uint32_t min_scan_limit) {\n    min_scan_limit_ = min_scan_limit;\n  }\n\n  inline void set_ef(uint32_t v) {\n    ef_ = v;\n  }\n\n  inline void set_filter_mode(uint32_t v) {\n    filter_mode_ = v;\n  }\n\n  inline void set_filter_negative_probability(float v) {\n    negative_probability_ = v;\n  }\n\n  inline void set_max_scan_ratio(float v) {\n    max_scan_ratio_ = v;\n  }\n\n  virtual void set_magic(uint32_t v) {\n    magic_ = v;\n  }\n\n  virtual void set_force_padding_topk(bool v) {\n    force_padding_topk_ = v;\n  }\n\n  void set_bruteforce_threshold(uint32_t v) override {\n    bruteforce_threshold_ = v;\n  }\n\n  inline uint32_t get_bruteforce_threshold() const {\n    return bruteforce_threshold_;\n  }\n\n  void set_fetch_vector(bool v) override {\n    fetch_vector_ = v;\n  }\n\n  bool fetch_vector() const override {\n    return fetch_vector_;\n  }\n\n  //! Reset context\n  void reset(void) override {\n    this->clear();\n    set_filter(nullptr);\n    reset_threshold();\n    set_fetch_vector(false);\n    set_group_params(0, 0);\n    reset_group_by();\n  }\n\n  inline std::map<std::string, TopkHeap> &group_topk_heaps() {\n    return group_topk_heaps_;\n  }\n\n  inline TopkHeap &level_topk(int level) {\n    if (ailego_unlikely(level_topks_.size() <= static_cast<size_t>(level))) {\n      int cur_level = level_topks_.size();\n      level_topks_.resize(level + 1);\n      for (; cur_level <= level; ++cur_level) {\n        size_t heap_size = std::max(entity_->neighbor_cnt(cur_level),\n                                    entity_->ef_construction());\n        level_topks_[cur_level].clear();\n        level_topks_[cur_level].limit(heap_size);\n      }\n    }\n\n    return level_topks_[level];\n  }\n\n  inline void check_need_adjuct_ctx(void) {\n    check_need_adjuct_ctx(entity_->doc_cnt());\n  }\n\n  inline size_t compute_reserve_cnt(uint32_t cur_doc) const {\n    if (cur_doc > kMaxReserveDocCnt) {\n      return kMaxReserveDocCnt;\n    } else if (cur_doc < kMinReserveDocCnt) {\n      return kMinReserveDocCnt;\n    }\n    return cur_doc;\n  }\n\n  //! candidates heap and visitfilter need to resize as doc cnt growing up\n  inline void check_need_adjuct_ctx(uint32_t doc_cnt) {\n    if (ailego_unlikely(doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_)) {\n      while (doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_) {\n        reserve_max_doc_cnt_ =\n            reserve_max_doc_cnt_ + compute_reserve_cnt(reserve_max_doc_cnt_);\n      }\n      uint32_t max_scan_cnt = compute_max_scan_num(reserve_max_doc_cnt_);\n      max_scan_num_ = max_scan_cnt;\n      visit_filter_.reset(reserve_max_doc_cnt_, max_scan_cnt);\n      candidates_.clear();\n      candidates_.limit(max_scan_num_);\n    }\n  }\n\n  inline uint32_t compute_max_scan_num(uint32_t max_doc_cnt) const {\n    uint32_t max_scan = max_doc_cnt * max_scan_ratio_;\n    if (max_scan < min_scan_limit_) {\n      max_scan = min_scan_limit_;\n    } else if (max_scan > max_scan_limit_) {\n      max_scan = max_scan_limit_;\n    }\n    return max_scan;\n  }\n\n  inline size_t get_scan_num() const {\n    return dc_.compare_cnt();\n  }\n\n  inline uint64_t reach_scan_limit() const {\n    return dc_.compare_cnt() >= max_scan_num_;\n  }\n\n  inline bool error() const {\n    return dc_.error();\n  }\n\n  inline void clear() {\n    dc_.clear();\n    if (ailego_unlikely(this->debugging())) {\n      stats_get_neighbors_cnt_ = 0u;\n      stats_get_vector_cnt_ = 0u;\n      stats_visit_dup_cnt_ = 0u;\n    }\n    // do not clear results_ for the next query will need it\n    for (auto &it : results_) {\n      it.clear();\n    }\n    for (auto &it : group_results_) {\n      it.clear();\n    }\n  }\n\n  uint32_t *mutable_stats_get_neighbors() {\n    return &stats_get_neighbors_cnt_;\n  }\n\n  uint32_t *mutable_stats_get_vector() {\n    return &stats_get_vector_cnt_;\n  }\n\n  uint32_t *mutable_stats_visit_dup_cnt() {\n    return &stats_visit_dup_cnt_;\n  }\n\n  inline bool debugging(void) const {\n    return debug_mode_;\n  }\n\n  inline void update_dist_caculator_distance(\n      const IndexMetric::MatrixDistance &distance,\n      const IndexMetric::MatrixBatchDistance &batch_distance) {\n    dc_.update_distance(distance, batch_distance);\n  }\n\n  //! Get topk\n  inline uint32_t topk() const override {\n    return topk_;\n  }\n\n  //! Get group topk\n  inline uint32_t group_topk() const {\n    return group_topk_;\n  }\n\n  //! Get group num\n  inline uint32_t group_num() const {\n    return group_num_;\n  }\n\n  //! Get if group by search\n  inline bool group_by_search() {\n    return group_num_ > 0;\n  }\n\n  //! Set group params\n  void set_group_params(uint32_t group_num, uint32_t group_topk) override {\n    group_num_ = group_num;\n    group_topk_ = group_topk;\n\n    topk_ = group_topk_ * group_num_;\n\n    topk_heap_.limit(std::max(topk_, ef_));\n\n    group_topk_heaps_.clear();\n  }\n\n private:\n  // Filling random nodes if topk not full\n  void fill_random_to_topk_full(void);\n\n  constexpr static uint32_t kTriggerReserveCnt = 4096UL;\n  constexpr static uint32_t kMinReserveDocCnt = 4096UL;\n  constexpr static uint32_t kMaxReserveDocCnt = 128 * 1024UL;\n  constexpr static uint32_t kInvalidMgic = -1U;\n\n private:\n  HnswEntity::Pointer entity_;\n  HnswDistCalculator dc_;\n  IndexMetric::Pointer metric_;\n\n  bool debug_mode_{false};\n  bool force_padding_topk_{false};\n  uint32_t max_scan_num_{0};\n  uint32_t max_scan_limit_{0};\n  uint32_t min_scan_limit_{0};\n  uint32_t reserve_max_doc_cnt_{kMinReserveDocCnt};\n  uint32_t topk_{0};\n  uint32_t group_topk_{0};\n  uint32_t filter_mode_{VisitFilter::ByteMap};\n  float negative_probability_{HnswEntity::kDefaultBFNegativeProbability};\n  uint32_t ef_{HnswEntity::kDefaultEf};\n  float max_scan_ratio_{HnswEntity::kDefaultScanRatio};\n  uint32_t magic_{0U};\n  std::vector<IndexDocumentList> results_{};\n  std::vector<IndexGroupDocumentList> group_results_{};\n  TopkHeap topk_heap_{};\n  TopkHeap update_heap_{};\n  std::vector<TopkHeap> level_topks_{};\n  CandidateHeap candidates_{};\n  VisitFilter visit_filter_{};\n  uint32_t bruteforce_threshold_{};\n  bool fetch_vector_{false};\n\n  uint32_t group_num_{0};\n  std::map<std::string, TopkHeap> group_topk_heaps_{};\n\n  uint32_t type_{kUnknownContext};\n  //! debug stats info\n  uint32_t stats_get_neighbors_cnt_{0u};\n  uint32_t stats_get_vector_cnt_{0u};\n  uint32_t stats_visit_dup_cnt_{0u};\n  std::string preprocess_buffer_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_dist_calculator.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_meta.h>\n#include \"hnsw_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswDistCalculator {\n public:\n  typedef std::shared_ptr<HnswDistCalculator> Pointer;\n\n public:\n  enum DistType {\n    DIST_NONE = 0,\n    DIST_DENSE = 1,\n    DIST_HYBRID = 2,\n    DIST_SPARSE = 3\n  };\n\n public:\n  //! Constructor\n  HnswDistCalculator(const HnswEntity *entity,\n                     const IndexMetric::Pointer &metric, uint32_t dim)\n      : entity_(entity),\n        distance_(metric->distance()),\n        batch_distance_(metric->batch_distance()),\n        query_(nullptr),\n        dim_(dim),\n        compare_cnt_(0) {}\n\n  //! Constructor\n  HnswDistCalculator(const HnswEntity *entity,\n                     const IndexMetric::Pointer &metric, uint32_t dim,\n                     const void *query)\n      : entity_(entity),\n        distance_(metric->distance()),\n        batch_distance_(metric->batch_distance()),\n        query_(query),\n        dim_(dim),\n        compare_cnt_(0) {}\n\n  //! Constructor\n  HnswDistCalculator(const HnswEntity *entity,\n                     const IndexMetric::Pointer &metric)\n      : entity_(entity),\n        distance_(metric->distance()),\n        batch_distance_(metric->batch_distance()),\n        query_(nullptr),\n        dim_(0),\n        compare_cnt_(0) {}\n\n  void update(const HnswEntity *entity, const IndexMetric::Pointer &metric) {\n    entity_ = entity;\n    distance_ = metric->distance();\n    batch_distance_ = metric->batch_distance();\n  }\n\n  void update(const HnswEntity *entity, const IndexMetric::Pointer &metric,\n              uint32_t dim) {\n    entity_ = entity;\n    distance_ = metric->distance();\n    batch_distance_ = metric->batch_distance();\n    dim_ = dim;\n  }\n\n  inline void update_distance(\n      const IndexMetric::MatrixDistance &distance,\n      const IndexMetric::MatrixBatchDistance &batch_distance) {\n    distance_ = distance;\n    batch_distance_ = batch_distance;\n  }\n\n  //! Reset query vector data\n  inline void reset_query(const void *query) {\n    error_ = false;\n    query_ = query;\n  }\n\n  //! Returns distance\n  inline dist_t dist(const void *vec_lhs, const void *vec_rhs) {\n    if (ailego_unlikely(vec_lhs == nullptr || vec_rhs == nullptr)) {\n      LOG_ERROR(\"Nullptr of dense vector\");\n      error_ = true;\n      return 0.0f;\n    }\n\n    float score{0.0f};\n\n    distance_(vec_lhs, vec_rhs, dim_, &score);\n\n    return score;\n  }\n\n  //! Returns distance between query and vec.\n  inline dist_t dist(const void *vec) {\n    compare_cnt_++;\n\n    return dist(vec, query_);\n  }\n\n  //! Return distance between query and node id.\n  inline dist_t dist(node_id_t id) {\n    compare_cnt_++;\n\n    const void *feat = entity_->get_vector(id);\n    if (ailego_unlikely(feat == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector, id=%u\", id);\n      error_ = true;\n      return 0.0f;\n    }\n\n    return dist(feat, query_);\n  }\n\n  //! Return dist node lhs between node rhs\n  inline dist_t dist(node_id_t lhs, node_id_t rhs) {\n    compare_cnt_++;\n\n    const void *feat = entity_->get_vector(lhs);\n    const void *query = entity_->get_vector(rhs);\n    if (ailego_unlikely(feat == nullptr || query == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector\");\n      error_ = true;\n      return 0.0f;\n    }\n\n    return dist(feat, query);\n  }\n\n  dist_t operator()(const void *vec) {\n    return dist(vec);\n  }\n\n  dist_t operator()(id_t i) {\n    return dist(i);\n  }\n\n  dist_t operator()(id_t lhs, id_t rhs) {\n    return dist(lhs, rhs);\n  }\n\n  void batch_dist(const void **vecs, size_t num, dist_t *distances) {\n    compare_cnt_++;\n\n    batch_distance_(vecs, query_, num, dim_, distances);\n  }\n\n  inline dist_t batch_dist(node_id_t id) {\n    compare_cnt_++;\n\n    const void *feat = entity_->get_vector(id);\n    if (ailego_unlikely(feat == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector, id=%u\", id);\n      error_ = true;\n      return 0.0f;\n    }\n    dist_t score = 0;\n    batch_distance_(&feat, query_, 1, dim_, &score);\n\n    return score;\n  }\n\n  inline void clear() {\n    compare_cnt_ = 0;\n    error_ = false;\n  }\n\n  inline void clear_compare_cnt() {\n    compare_cnt_ = 0;\n  }\n\n  inline bool error() const {\n    return error_;\n  }\n\n  //! Get distances compute times\n  inline uint32_t compare_cnt() const {\n    return compare_cnt_;\n  }\n\n  inline uint32_t dimension() const {\n    return dim_;\n  }\n\n private:\n  HnswDistCalculator(const HnswDistCalculator &) = delete;\n  HnswDistCalculator &operator=(const HnswDistCalculator &) = delete;\n\n private:\n  const HnswEntity *entity_;\n\n  IndexMetric::MatrixDistance distance_;\n  IndexMetric::MatrixBatchDistance batch_distance_;\n\n  const void *query_;\n  uint32_t dim_;\n\n  uint32_t compare_cnt_;  // record distance compute times\n  // uint32_t compare_cnt_batch_;  // record batch distance compute time\n  bool error_{false};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_entity.h\"\n#include <zvec/core/framework/index_stats.h>\n#include \"utility/sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nconst std::string HnswEntity::kGraphHeaderSegmentId = \"graph.header\";\nconst std::string HnswEntity::kGraphFeaturesSegmentId = \"graph.features\";\nconst std::string HnswEntity::kGraphKeysSegmentId = \"graph.keys\";\nconst std::string HnswEntity::kGraphNeighborsSegmentId = \"graph.neighbors\";\nconst std::string HnswEntity::kGraphOffsetsSegmentId = \"graph.offsets\";\nconst std::string HnswEntity::kGraphMappingSegmentId = \"graph.mapping\";\nconst std::string HnswEntity::kHnswHeaderSegmentId = \"hnsw.header\";\nconst std::string HnswEntity::kHnswNeighborsSegmentId = \"hnsw.neighbors\";\nconst std::string HnswEntity::kHnswOffsetsSegmentId = \"hnsw.offsets\";\n\nint HnswEntity::CalcAndAddPadding(const IndexDumper::Pointer &dumper,\n                                  size_t data_size, size_t *padding_size) {\n  *padding_size = AlignSize(data_size) - data_size;\n  if (*padding_size == 0) {\n    return 0;\n  }\n\n  std::string padding(*padding_size, '\\0');\n  if (dumper->write(padding.data(), *padding_size) != *padding_size) {\n    LOG_ERROR(\"Append padding failed, size %lu\", *padding_size);\n    return IndexError_WriteData;\n  }\n  return 0;\n}\n\nint64_t HnswEntity::dump_segment(const IndexDumper::Pointer &dumper,\n                                 const std::string &segment_id,\n                                 const void *data, size_t size) const {\n  size_t len = dumper->write(data, size);\n  if (len != size) {\n    LOG_ERROR(\"Dump segment %s data failed, expect: %lu, actual: %lu\",\n              segment_id.c_str(), size, len);\n    return IndexError_WriteData;\n  }\n\n  size_t padding_size = AlignSize(size) - size;\n  if (padding_size > 0) {\n    std::string padding(padding_size, '\\0');\n    if (dumper->write(padding.data(), padding_size) != padding_size) {\n      LOG_ERROR(\"Append padding failed, size %lu\", padding_size);\n      return IndexError_WriteData;\n    }\n  }\n\n  uint32_t crc = ailego::Crc32c::Hash(data, size);\n  int ret = dumper->append(segment_id, size, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s meta failed, ret=%d\", segment_id.c_str(), ret);\n    return ret;\n  }\n\n  return len + padding_size;\n}\n\nint64_t HnswEntity::dump_header(const IndexDumper::Pointer &dumper,\n                                const HNSWHeader &hd) const {\n  //! dump basic graph header. header is aligned and does not need padding\n  int64_t graph_hd_size =\n      dump_segment(dumper, kGraphHeaderSegmentId, &hd.graph, hd.graph.size);\n  if (graph_hd_size < 0) {\n    return graph_hd_size;\n  }\n\n  //! dump basic graph header. header is aligned and does not need padding\n  int64_t hnsw_hd_size =\n      dump_segment(dumper, kHnswHeaderSegmentId, &hd.hnsw, hd.hnsw.size);\n  if (hnsw_hd_size < 0) {\n    return hnsw_hd_size;\n  }\n\n  return graph_hd_size + hnsw_hd_size;\n}\n\nvoid HnswEntity::reshuffle_vectors(\n    const std::function<level_t(node_id_t)> & /*get_level*/,\n    std::vector<node_id_t> * /*n2o_mapping*/,\n    std::vector<node_id_t> * /*o2n_mapping*/, key_t * /*keys*/) const {\n  // TODO\n  return;\n}\n\nint64_t HnswEntity::dump_mapping_segment(const IndexDumper::Pointer &dumper,\n                                         const key_t *keys) const {\n  std::vector<node_id_t> mapping(doc_cnt());\n\n  std::iota(mapping.begin(), mapping.end(), 0U);\n  std::sort(mapping.begin(), mapping.end(),\n            [&](node_id_t i, node_id_t j) { return keys[i] < keys[j]; });\n\n  size_t size = mapping.size() * sizeof(node_id_t);\n\n  return dump_segment(dumper, kGraphMappingSegmentId, mapping.data(), size);\n}\n\nint64_t HnswEntity::dump_segments(\n    const IndexDumper::Pointer &dumper, key_t *keys,\n    const std::function<level_t(node_id_t)> &get_level) const {\n  HNSWHeader dump_hd(header());\n\n  dump_hd.graph.node_size = AlignSize(vector_size());\n\n  std::vector<node_id_t> n2o_mapping;  // map new id to origin id\n  std::vector<node_id_t> o2n_mapping;  // map origin id to new id\n  reshuffle_vectors(get_level, &n2o_mapping, &o2n_mapping, keys);\n  if (!o2n_mapping.empty()) {\n    dump_hd.hnsw.entry_point = o2n_mapping[entry_point()];\n  }\n\n  //! Dump header\n  int64_t hd_size = dump_header(dumper, dump_hd);\n  if (hd_size < 0) {\n    return hd_size;\n  }\n\n  //! Dump vectors\n  int64_t vecs_size = dump_vectors(dumper, n2o_mapping);\n  if (vecs_size < 0) {\n    return vecs_size;\n  }\n\n  //! Dump neighbors\n  auto neighbors_size =\n      dump_neighbors(dumper, get_level, n2o_mapping, o2n_mapping);\n  if (neighbors_size < 0) {\n    return neighbors_size;\n  }\n  //! free memory\n  n2o_mapping = std::vector<node_id_t>();\n  o2n_mapping = std::vector<node_id_t>();\n\n  //! Dump keys\n  size_t key_segment_size = doc_cnt() * sizeof(key_t);\n  int64_t keys_size =\n      dump_segment(dumper, kGraphKeysSegmentId, keys, key_segment_size);\n  if (keys_size < 0) {\n    return keys_size;\n  }\n\n  //! Dump mapping\n  int64_t mapping_size = dump_mapping_segment(dumper, keys);\n  if (mapping_size < 0) {\n    return mapping_size;\n  }\n\n  return hd_size + keys_size + vecs_size + neighbors_size + mapping_size;\n}\n\nint64_t HnswEntity::dump_vectors(\n    const IndexDumper::Pointer &dumper,\n    const std::vector<node_id_t> &reorder_mapping) const {\n  size_t vector_dump_size = vector_size();\n\n  size_t padding_size = AlignSize(vector_dump_size) - vector_dump_size;\n\n  std::vector<char> padding(padding_size);\n  memset(padding.data(), 0, sizeof(char) * padding_size);\n  const void *data = nullptr;\n  uint32_t crc = 0U;\n  size_t vecs_size = 0UL;\n\n  //! dump vectors\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    data = get_vector(reorder_mapping.empty() ? id : reorder_mapping[id]);\n    if (ailego_unlikely(!data)) {\n      return IndexError_ReadData;\n    }\n    size_t len = dumper->write(data, vector_size());\n    if (len != vector_size()) {\n      LOG_ERROR(\"Dump vectors failed, write=%zu expect=%zu\", len,\n                vector_size());\n      return IndexError_WriteData;\n    }\n\n    crc = ailego::Crc32c::Hash(data, vector_size(), crc);\n    vecs_size += vector_size();\n\n    if (padding_size == 0) {\n      continue;\n    }\n\n    len = dumper->write(padding.data(), padding_size);\n    if (len != padding_size) {\n      LOG_ERROR(\"Dump vectors failed, write=%zu expect=%zu\", len, padding_size);\n      return IndexError_WriteData;\n    }\n    crc = ailego::Crc32c::Hash(padding.data(), padding_size, crc);\n    vecs_size += padding_size;\n  }\n\n  int ret = dumper->append(kGraphFeaturesSegmentId, vecs_size, 0UL, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump vectors segment meta failed, ret %d\", ret);\n    return ret;\n  }\n\n  return vecs_size;\n}\n\nint64_t HnswEntity::dump_graph_neighbors(\n    const IndexDumper::Pointer &dumper,\n    const std::vector<node_id_t> &reorder_mapping,\n    const std::vector<node_id_t> &neighbor_mapping) const {\n  std::vector<GraphNeighborMeta> graph_meta;\n  graph_meta.reserve(doc_cnt());\n  size_t offset = 0;\n  uint32_t crc = 0;\n  std::vector<node_id_t> mapping(l0_neighbor_cnt());\n\n  uint32_t min_neighbor_count = 10000;\n  uint32_t max_neighbor_count = 0;\n  size_t sum_neighbor_count = 0;\n\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    const Neighbors neighbors =\n        get_neighbors(0, reorder_mapping.empty() ? id : reorder_mapping[id]);\n    ailego_assert_with(!!neighbors.data, \"invalid neighbors\");\n    ailego_assert_with(neighbors.size() <= l0_neighbor_cnt(),\n                       \"invalid neighbors\");\n\n    uint32_t neighbor_count = neighbors.size();\n    if (neighbor_count < min_neighbor_count) {\n      min_neighbor_count = neighbor_count;\n    }\n    if (neighbor_count > max_neighbor_count) {\n      max_neighbor_count = neighbor_count;\n    }\n    sum_neighbor_count += neighbor_count;\n\n    graph_meta.emplace_back(offset, neighbor_count);\n    size_t size = neighbors.size() * sizeof(node_id_t);\n    const node_id_t *data = &neighbors[0];\n    if (!neighbor_mapping.empty()) {\n      for (node_id_t i = 0; i < neighbors.size(); ++i) {\n        mapping[i] = neighbor_mapping[neighbors[i]];\n      }\n      data = mapping.data();\n    }\n    if (dumper->write(data, size) != size) {\n      LOG_ERROR(\"Dump graph neighbor id=%u failed, size %lu\", id, size);\n      return IndexError_WriteData;\n    }\n    crc = ailego::Crc32c::Hash(data, size, crc);\n    offset += size;\n  }\n\n  uint32_t average_neighbor_count = 0;\n  if (doc_cnt() > 0) {\n    average_neighbor_count = sum_neighbor_count / doc_cnt();\n  }\n  LOG_INFO(\n      \"Dump hnsw graph: min_neighbor_count[%u] max_neighbor_count[%u] \"\n      \"average_neighbor_count[%u]\",\n      min_neighbor_count, max_neighbor_count, average_neighbor_count);\n\n  size_t padding_size = 0;\n  int ret = CalcAndAddPadding(dumper, offset, &padding_size);\n  if (ret != 0) {\n    return ret;\n  }\n  ret = dumper->append(kGraphNeighborsSegmentId, offset, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s failed, ret %d\",\n              kGraphNeighborsSegmentId.c_str(), ret);\n    return ret;\n  }\n\n  //! dump level 0 neighbors meta\n  auto len = dump_segment(dumper, kGraphOffsetsSegmentId, graph_meta.data(),\n                          graph_meta.size() * sizeof(GraphNeighborMeta));\n  if (len < 0) {\n    return len;\n  }\n\n  return len + offset + padding_size;\n}\n\nint64_t HnswEntity::dump_upper_neighbors(\n    const IndexDumper::Pointer &dumper,\n    const std::function<level_t(node_id_t)> &get_level,\n    const std::vector<node_id_t> &reorder_mapping,\n    const std::vector<node_id_t> &neighbor_mapping) const {\n  std::vector<HnswNeighborMeta> hnsw_meta;\n  hnsw_meta.reserve(doc_cnt());\n  size_t offset = 0;\n  uint32_t crc = 0;\n  std::vector<node_id_t> buffer(upper_neighbor_cnt() + 1);\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    node_id_t new_id = reorder_mapping.empty() ? id : reorder_mapping[id];\n    auto level = get_level(new_id);\n    if (level == 0) {\n      hnsw_meta.emplace_back(0U, 0U);\n      continue;\n    }\n    hnsw_meta.emplace_back(offset, level);\n    ailego_assert_with((size_t)level < kMaxGraphLayers, \"invalid level\");\n    for (level_t cur_level = 1; cur_level <= level; ++cur_level) {\n      const Neighbors neighbors = get_neighbors(cur_level, new_id);\n      ailego_assert_with(!!neighbors.data, \"invalid neighbors\");\n      ailego_assert_with(neighbors.size() <= neighbor_cnt(cur_level),\n                         \"invalid neighbors\");\n      memset(buffer.data(), 0, sizeof(node_id_t) * buffer.size());\n      buffer[0] = neighbors.size();\n      if (neighbor_mapping.empty()) {\n        memcpy(&buffer[1], &neighbors[0], neighbors.size() * sizeof(node_id_t));\n      } else {\n        for (node_id_t i = 0; i < neighbors.size(); ++i) {\n          buffer[i + 1] = neighbor_mapping[neighbors[i]];\n        }\n      }\n      if (dumper->write(buffer.data(), sizeof(node_id_t) * buffer.size()) !=\n          sizeof(node_id_t) * buffer.size()) {\n        LOG_ERROR(\"Dump graph neighbor id=%u failed, size %lu\", id,\n                  sizeof(node_id_t) * buffer.size());\n        return IndexError_WriteData;\n      }\n      crc = ailego::Crc32c::Hash(buffer.data(),\n                                 sizeof(node_id_t) * buffer.size(), crc);\n      offset += sizeof(node_id_t) * buffer.size();\n    }\n  }\n  size_t padding_size = 0;\n  int ret = CalcAndAddPadding(dumper, offset, &padding_size);\n  if (ret != 0) {\n    return ret;\n  }\n\n  ret = dumper->append(kHnswNeighborsSegmentId, offset, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s failed, ret %d\", kHnswNeighborsSegmentId.c_str(),\n              ret);\n    return ret;\n  }\n\n  //! dump level 0 neighbors meta\n  auto len = dump_segment(dumper, kHnswOffsetsSegmentId, hnsw_meta.data(),\n                          hnsw_meta.size() * sizeof(HnswNeighborMeta));\n  if (len < 0) {\n    return len;\n  }\n\n  return len + offset + padding_size;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string.h>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_storage.h>\n\nnamespace zvec {\nnamespace core {\n\nusing node_id_t = uint32_t;\nusing key_t = uint64_t;\nusing level_t = int32_t;\nusing dist_t = float;\nusing TopkHeap = ailego::KeyValueHeap<node_id_t, dist_t>;\nusing CandidateHeap =\n    ailego::KeyValueHeap<node_id_t, dist_t, std::greater<dist_t>>;\nconstexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);\nconstexpr key_t kInvalidKey = static_cast<key_t>(-1);\nclass DistCalculator;\n\nstruct GraphHeader {\n  uint32_t size;\n  uint32_t version;\n  uint32_t graph_type;\n  uint32_t doc_count;\n  uint32_t vector_size;\n  uint32_t node_size;\n  uint32_t l0_neighbor_count;\n  uint32_t prune_type;\n  uint32_t prune_neighbor_count;\n  uint32_t ef_construction;\n  uint32_t options;\n  uint32_t min_neighbor_count;\n  uint8_t reserved_[4080];\n};\n\nstatic_assert(sizeof(GraphHeader) % 32 == 0,\n              \"GraphHeader must be aligned with 32 bytes\");\n\n//! Hnsw upper neighbor header\nstruct HnswHeader {\n  uint32_t size;      // header size\n  uint32_t revision;  // current total docs of the graph\n  uint32_t upper_neighbor_count;\n  uint32_t ef_construction;\n  uint32_t scaling_factor;\n  uint32_t max_level;\n  uint32_t entry_point;\n  uint32_t options;\n  uint8_t reserved_[30];\n};\n\nstatic_assert(sizeof(HnswHeader) % 32 == 0,\n              \"GraphHeader must be aligned with 32 bytes\");\n\n//! Hnsw common header and upper neighbor header\nstruct HNSWHeader {\n  HNSWHeader() {\n    clear();\n  }\n\n  HNSWHeader(const HNSWHeader &header) {\n    memcpy(this, &header, sizeof(header));\n  }\n\n  HNSWHeader &operator=(const HNSWHeader &header) {\n    memcpy(this, &header, sizeof(header));\n    return *this;\n  }\n\n  //! Reset state to zero, and the params is untouched\n  void inline reset() {\n    graph.doc_count = 0U;\n    hnsw.entry_point = kInvalidNodeId;\n    hnsw.max_level = 0;\n  }\n\n  //! Clear all fields to init value\n  void inline clear() {\n    memset(this, 0, sizeof(HNSWHeader));\n    hnsw.entry_point = kInvalidNodeId;\n    graph.size = sizeof(GraphHeader);\n    hnsw.size = sizeof(HnswHeader);\n  }\n\n  size_t l0_neighbor_cnt() const {\n    return graph.l0_neighbor_count;\n  }\n\n  size_t upper_neighbor_cnt() const {\n    return hnsw.upper_neighbor_count;\n  }\n\n  size_t vector_size() const {\n    return graph.vector_size;\n  }\n\n  size_t ef_construction() const {\n    return graph.ef_construction;\n  }\n\n  size_t scaling_factor() const {\n    return hnsw.scaling_factor;\n  }\n\n  size_t neighbor_prune_cnt() const {\n    return graph.prune_neighbor_count;\n  }\n\n  node_id_t entry_point() const {\n    return hnsw.entry_point;\n  }\n\n  node_id_t doc_cnt() const {\n    return graph.doc_count;\n  }\n\n  GraphHeader graph;\n  HnswHeader hnsw;\n};\n\nstruct NeighborsHeader {\n  uint32_t neighbor_cnt;\n  node_id_t neighbors[0];\n};\n\nstruct Neighbors {\n  Neighbors() : cnt{0}, data{nullptr} {}\n\n  Neighbors(uint32_t cnt_in, const node_id_t *data_in)\n      : cnt{cnt_in}, data{data_in} {}\n\n  Neighbors(const IndexStorage::MemoryBlock &mem_block)\n      : neighbor_block{mem_block} {\n    auto hd = reinterpret_cast<const NeighborsHeader *>(neighbor_block.data());\n    cnt = hd->neighbor_cnt;\n    data = hd->neighbors;\n  }\n\n  size_t size(void) const {\n    return cnt;\n  }\n\n  const node_id_t &operator[](size_t idx) const {\n    return data[idx];\n  }\n\n  uint32_t cnt;\n  const node_id_t *data;\n  IndexStorage::MemoryBlock neighbor_block;\n};\n\n//! level 0 neighbors offset\nstruct GraphNeighborMeta {\n  GraphNeighborMeta(size_t o, size_t cnt) : offset(o), neighbor_cnt(cnt) {}\n\n  uint64_t offset : 48;\n  uint64_t neighbor_cnt : 16;\n};\n\n//! hnsw upper neighbors meta\nstruct HnswNeighborMeta {\n  HnswNeighborMeta(size_t o, size_t l) : offset(o), level(l) {}\n\n  uint64_t offset : 48;  // offset = idx * upper neighors size\n  uint64_t level : 16;\n};\n\nclass HnswEntity {\n public:\n  //! Constructor\n  HnswEntity() {}\n\n  //! Constructor\n  HnswEntity(const HNSWHeader &hd) {\n    header_ = hd;\n  }\n\n  //! Destructor\n  virtual ~HnswEntity() {}\n\n  //! HnswEntity Pointerd;\n  typedef std::shared_ptr<HnswEntity> Pointer;\n\n  //! Get max neighbor size of graph level\n  inline size_t neighbor_cnt(level_t level) const {\n    return level == 0 ? header_.graph.l0_neighbor_count\n                      : header_.hnsw.upper_neighbor_count;\n  }\n\n  //! get max neighbor size of graph level 0\n  inline size_t l0_neighbor_cnt() const {\n    return header_.graph.l0_neighbor_count;\n  }\n\n  //! get min neighbor size of graph\n  inline size_t min_neighbor_cnt() const {\n    return header_.graph.min_neighbor_count;\n  }\n\n  //! get upper neighbor size of graph level other than 0\n  inline size_t upper_neighbor_cnt() const {\n    return header_.hnsw.upper_neighbor_count;\n  }\n\n  //! Get current total doc of the hnsw graph\n  inline node_id_t *mutable_doc_cnt() {\n    return &header_.graph.doc_count;\n  }\n\n  inline node_id_t doc_cnt() const {\n    return header_.graph.doc_count;\n  }\n\n  //! Get hnsw graph scaling params\n  inline size_t scaling_factor() const {\n    return header_.hnsw.scaling_factor;\n  }\n\n  //! Get prune_size\n  inline size_t prune_cnt() const {\n    return header_.graph.prune_neighbor_count;\n  }\n\n  //! Current entity of top level graph\n  inline node_id_t entry_point() const {\n    return header_.hnsw.entry_point;\n  }\n\n  //! Current max graph level\n  inline level_t cur_max_level() const {\n    return header_.hnsw.max_level;\n  }\n\n  //! Retrieve index vector size\n  size_t vector_size() const {\n    return header_.graph.vector_size;\n  }\n\n  //! Retrieve node size\n  size_t node_size() const {\n    return header_.graph.node_size;\n  }\n\n  //! Retrieve ef constuction\n  size_t ef_construction() const {\n    return header_.graph.ef_construction;\n  }\n\n  void set_vector_size(size_t size) {\n    header_.graph.vector_size = size;\n  }\n\n  void set_prune_cnt(size_t v) {\n    header_.graph.prune_neighbor_count = v;\n  }\n\n  void set_scaling_factor(size_t val) {\n    header_.hnsw.scaling_factor = val;\n  }\n\n  void set_l0_neighbor_cnt(size_t cnt) {\n    header_.graph.l0_neighbor_count = cnt;\n  }\n\n  void set_min_neighbor_cnt(size_t cnt) {\n    header_.graph.min_neighbor_count = cnt;\n  }\n\n  void set_upper_neighbor_cnt(size_t cnt) {\n    header_.hnsw.upper_neighbor_count = cnt;\n  }\n\n  void set_ef_construction(size_t ef) {\n    header_.graph.ef_construction = ef;\n  }\n\n protected:\n  inline const HNSWHeader &header() const {\n    return header_;\n  }\n\n  inline HNSWHeader *mutable_header() {\n    return &header_;\n  }\n\n  inline size_t header_size() const {\n    return sizeof(header_);\n  }\n\n  void set_node_size(size_t size) {\n    header_.graph.node_size = size;\n  }\n\n  //! Dump all segment by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_segments(\n      const IndexDumper::Pointer &dumper, key_t *keys,\n      const std::function<level_t(node_id_t)> &get_level) const;\n\n private:\n  //! dump mapping segment, for get_vector_by_key in provider\n  int64_t dump_mapping_segment(const IndexDumper::Pointer &dumper,\n                               const key_t *keys) const;\n\n  //! dump hnsw head by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_header(const IndexDumper::Pointer &dumper,\n                      const HNSWHeader &hd) const;\n\n  //! dump vectors by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_vectors(const IndexDumper::Pointer &dumper,\n                       const std::vector<node_id_t> &reorder_mapping) const;\n\n  //! dump hnsw neighbors by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_neighbors(const IndexDumper::Pointer &dumper,\n                         const std::function<level_t(node_id_t)> &get_level,\n                         const std::vector<node_id_t> &reorder_mapping,\n                         const std::vector<node_id_t> &neighbor_mapping) const {\n    auto len1 = dump_graph_neighbors(dumper, reorder_mapping, neighbor_mapping);\n    if (len1 < 0) {\n      return len1;\n    }\n    auto len2 = dump_upper_neighbors(dumper, get_level, reorder_mapping,\n                                     neighbor_mapping);\n    if (len2 < 0) {\n      return len2;\n    }\n\n    return len1 + len2;\n  }\n\n  //! dump segment by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_segment(const IndexDumper::Pointer &dumper,\n                       const std::string &segment_id, const void *data,\n                       size_t size) const;\n\n  //! Dump level 0 neighbors\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_graph_neighbors(\n      const IndexDumper::Pointer &dumper,\n      const std::vector<node_id_t> &reorder_mapping,\n      const std::vector<node_id_t> &neighbor_mapping) const;\n\n  //! Dump upper level neighbors\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_upper_neighbors(\n      const IndexDumper::Pointer &dumper,\n      const std::function<level_t(node_id_t)> &get_level,\n      const std::vector<node_id_t> &reorder_mapping,\n      const std::vector<node_id_t> &neighbor_mapping) const;\n\n public:\n  //! Cleanup the entity\n  virtual int cleanup(void) {\n    header_.clear();\n    return 0;\n  }\n\n  //! Make a copy of searcher entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswEntity::Pointer clone() const {\n    LOG_ERROR(\"Update neighbors not implemented\");\n    return HnswEntity::Pointer();\n  }\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const = 0;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector(node_id_t id) const = 0;\n\n  //! Get vectors feature data by keys\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const = 0;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const = 0;\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const = 0;\n\n  //! Retrieve a vector using a primary key\n  virtual const void *get_vector_by_key(uint64_t /*key*/) const {\n    LOG_ERROR(\"get vector not implemented\");\n    return nullptr;\n  }\n\n  virtual int get_vector_by_key(const key_t /*key*/,\n                                IndexStorage::MemoryBlock & /*block*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Get the node id's neighbors on graph level\n  //! Note: the neighbors cannot be modified, using the following\n  //! method to get WritableNeighbors if want to\n  virtual const Neighbors get_neighbors(level_t level, node_id_t id) const = 0;\n\n  //! Add vector and key to hnsw entity, and local id will be saved in id\n  virtual int add_vector(level_t /*level*/, key_t /*key*/, const void * /*vec*/,\n                         node_id_t * /*id*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Add vector and id to hnsw entity\n  virtual int add_vector_with_id(level_t /*level*/, node_id_t /*id*/,\n                                 const void * /*vec*/) {\n    return IndexError_NotImplemented;\n  }\n\n  virtual int update_neighbors(\n      level_t /*level*/, node_id_t /*id*/,\n      const std::vector<std::pair<node_id_t, dist_t>> & /*neighbors*/) {\n    LOG_ERROR(\"Update neighbors dense not implemented\");\n\n    return 0;\n  }\n\n  //! Append neighbor_id to node id neighbors on level, size is the current\n  //! neighbors size. Notice: the caller must be ensure the neighbors not full\n  virtual void add_neighbor(level_t /*level*/, node_id_t /*id*/,\n                            uint32_t /*size*/, node_id_t /*neighbor_id*/) {\n    LOG_ERROR(\"Add neighbor not implemented\");\n  }\n\n  //! Update entry point and max level\n  virtual void update_ep_and_level(node_id_t ep, level_t level) {\n    header_.hnsw.entry_point = ep;\n    header_.hnsw.max_level = level;\n  }\n\n  virtual int load(const IndexStorage::Pointer & /*container*/,\n                   bool /*check_crc*/) {\n    LOG_ERROR(\"Load not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {\n    LOG_ERROR(\"Dump not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  static int CalcAndAddPadding(const IndexDumper::Pointer &dumper,\n                               size_t data_size, size_t *padding_size);\n\n protected:\n  static inline size_t AlignSize(size_t size) {\n    return (size + 0x1F) & (~0x1F);\n  }\n\n  static inline size_t AlignPageSize(size_t size) {\n    size_t page_mask = ailego::MemoryHelper::PageSize() - 1;\n    return (size + page_mask) & (~page_mask);\n  }\n\n  static inline size_t AlignHugePageSize(size_t size) {\n    size_t page_mask = ailego::MemoryHelper::HugePageSize() - 1;\n    return (size + page_mask) & (~page_mask);\n  }\n\n  //! rearrange vectors to improve cache locality\n  void reshuffle_vectors(const std::function<level_t(node_id_t)> &get_level,\n                         std::vector<node_id_t> *n2o_mapping,\n                         std::vector<node_id_t> *o2n_mapping,\n                         key_t *keys) const;\n\n public:\n  const static std::string kGraphHeaderSegmentId;\n  const static std::string kGraphFeaturesSegmentId;\n  const static std::string kGraphKeysSegmentId;\n  const static std::string kGraphNeighborsSegmentId;\n  const static std::string kGraphOffsetsSegmentId;\n  const static std::string kGraphMappingSegmentId;\n  const static std::string kHnswHeaderSegmentId;\n  const static std::string kHnswNeighborsSegmentId;\n  const static std::string kHnswOffsetsSegmentId;\n\n  constexpr static uint32_t kRevision = 0U;\n  constexpr static size_t kMaxGraphLayers = 15;\n  constexpr static uint32_t kDefaultEfConstruction = 500;\n  constexpr static uint32_t kDefaultEf = 500;\n  constexpr static uint32_t kDefaultUpperMaxNeighborCnt = 50;  // M of HNSW\n  constexpr static uint32_t kDefaultL0MaxNeighborCnt = 100;\n  constexpr static uint32_t kMaxNeighborCnt = 65535;\n  constexpr static float kDefaultScanRatio = 0.1f;\n  constexpr static uint32_t kDefaultMinScanLimit = 10000;\n  constexpr static uint32_t kDefaultMaxScanLimit =\n      std::numeric_limits<uint32_t>::max();\n  constexpr static float kDefaultBFNegativeProbability = 0.001f;\n  constexpr static uint32_t kDefaultScalingFactor = 50U;\n  constexpr static uint32_t kDefaultBruteForceThreshold = 1000U;\n  constexpr static uint32_t kDefaultDocsHardLimit = 1 << 30U;  // 1 billion\n  constexpr static float kDefaultDocsSoftLimitRatio = 0.9f;\n  constexpr static size_t kMaxChunkSize = 0xFFFFFFFF;\n  constexpr static size_t kDefaultChunkSize = 2UL * 1024UL * 1024UL;\n  constexpr static size_t kDefaultMaxChunkCnt = 50000UL;\n  constexpr static float kDefaultNeighborPruneMultiplier =\n      1.0f;  // prune_cnt = upper_max_neighbor_cnt * multiplier\n  constexpr static float kDefaultL0MaxNeighborCntMultiplier =\n      2.0f;  // l0_max_neighbor_cnt = upper_max_neighbor_cnt * multiplier\n\n protected:\n  HNSWHeader header_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_index_hash.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"hnsw_chunk.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! Persistent hashmap implement through open addressing algorithm\ntemplate <class Key, class Val, Val EmptyVal = 0U,\n          typename =\n              typename std::enable_if<std::is_integral<Key>::value>::type>\nclass HnswIndexHashMap {\n  using key_type = Key;\n  using val_type = Val;\n\n  struct Iterator {\n    key_type first;\n    val_type second;\n  };\n  typedef Iterator *iterator;\n  typedef Iterator Item;\n  typedef const Iterator *const_iterator;\n\n  class Slot {\n   public:\n    Slot(Chunk::Pointer &&chunk, const void *data)\n        : chunk_(std::move(chunk)),\n          items_(reinterpret_cast<const Item *>(data)) {}\n    //! Return a empty loc or the key item loc\n\n    Slot(Chunk::Pointer &&chunk, IndexStorage::MemoryBlock &&mem_block)\n        : chunk_(std::move(chunk)), items_block_(std::move(mem_block)) {\n      items_ = reinterpret_cast<const Item *>(items_block_.data());\n    }\n    const_iterator find(key_type key, uint32_t max_items, uint32_t mask) const {\n      auto it = &items_[key & mask];\n      for (auto i = 0U; i < max_items; ++i) {\n        if (it->first == key || it->second == EmptyVal) {\n          // LOG_DEBUG(\"i=%u\", i);\n          return it;\n        }\n        ++it;\n        if (it == &items_[max_items]) {\n          it = &items_[0];\n        }\n      }\n      return nullptr;\n    }\n\n    bool update(const_iterator it) {\n      uint32_t offset = reinterpret_cast<const uint8_t *>(it) -\n                        reinterpret_cast<const uint8_t *>(&items_[0]);\n      if (ailego_unlikely(chunk_->write(offset, it, sizeof(Item)) !=\n                          sizeof(Item))) {\n        LOG_ERROR(\"Chunk write failed\");\n        return false;\n      }\n      return true;\n    }\n\n   private:\n    Chunk::Pointer chunk_{};\n    const Item *items_{nullptr};  // point to chunk data\n    IndexStorage::MemoryBlock items_block_{};\n  };\n\n public:\n  //! Init the hash\n  //! broker      the index allocator\n  //! chunk_size  the size of per chunk allocated, actual size may greater\n  //! factor      factor = 1/ratio, ratio is the probability of a squence\n  //! number inserted to this container\n  //! max         the max number key can be inserted\n  //! expansion_ratio   memory expansion ratio\n  int init(ChunkBroker::Pointer &broker, uint32_t chunk_size, uint32_t factor,\n           size_t max, float expansion_ratio) {\n    ailego_assert_with(expansion_ratio > 1.0f, \"ratio must > 1.0f\");\n    broker_ = broker;\n\n    size_t items = std::ceil(chunk_size * 1.0f / sizeof(Item));\n    slot_items_ = 1UL << static_cast<size_t>((std::ceil(std::log2(items))));\n    size_t range = slot_items_ * factor / expansion_ratio;\n    mask_bits_ = std::floor(std::log2(range));\n    range = 1UL << mask_bits_;\n    size_t max_slots = std::ceil(max * 1.0f / range);\n    slots_.reserve(max_slots);\n    slot_loc_mask_ = slot_items_ - 1U;\n    int ret = load();\n    if (ret != 0) {\n      return ret;\n    }\n\n    LOG_DEBUG(\n        \"HnswIndexHash init, chunkSize=%u factor=%u max=%zu \"\n        \"ratio=%f slotItems=%u maxSlots=%zu maskBits=%u \"\n        \"range=%zu\",\n        chunk_size, factor, max, expansion_ratio, slot_items_, max_slots,\n        mask_bits_, range);\n\n    return 0;\n  }\n\n  int cleanup(void) {\n    broker_.reset();\n    slots_.clear();\n    slots_.shrink_to_fit();\n    mask_bits_ = 0U;\n    slot_items_ = 0U;\n    slot_loc_mask_ = 0U;\n\n    return 0;\n  }\n\n  const_iterator end(void) const {\n    return nullptr;\n  }\n\n  const_iterator find(const key_type key) const {\n    auto idx = key >> mask_bits_;\n    if (idx >= slots_.size()) {\n      return end();\n    }\n    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);\n    return it && it->second != EmptyVal ? it : nullptr;\n  }\n\n  bool insert(key_type key, val_type val) {\n    auto idx = key >> mask_bits_;\n    if (idx >= slots_.size()) {\n      if (ailego_unlikely(idx >= slots_.capacity())) {\n        LOG_ERROR(\"no space to insert\");\n        return false;\n      }\n      for (auto i = slots_.size(); i <= idx; ++i) {\n        if (ailego_unlikely(!alloc_slot(i))) {\n          return false;\n        }\n      }\n    }\n    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);\n    if (ailego_unlikely(it == nullptr)) {\n      LOG_ERROR(\"no space to insert\");\n      return false;\n    }\n\n    //! TODO: write memory is ok?\n    const_cast<iterator>(it)->first = key;\n    const_cast<iterator>(it)->second = val;\n\n    return slots_[idx].update(it);\n  }\n\n private:\n  bool alloc_slot(size_t idx) {\n    ailego_assert_with(idx == slots_.size(), \"invalid idx\");\n\n    size_t size = slot_items_ * sizeof(Item);\n    auto p =\n        broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, idx, size);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return false;\n    }\n    Chunk::Pointer chunk = p.second;\n    if (ailego_unlikely(chunk->resize(size) != size)) {\n      LOG_ERROR(\"Chunk resize failed, size=%zu\", size);\n      return false;\n    }\n    //! Read the whole data to memory\n    IndexStorage::MemoryBlock data_block;\n    if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {\n      LOG_ERROR(\"Chunk read failed, size=%zu\", size);\n      return false;\n    }\n\n    slots_.emplace_back(std::move(chunk), std::move(data_block));\n    return true;\n  }\n\n  int load(void) {\n    size_t slots_cnt =\n        broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX);\n    for (size_t i = 0UL; i < slots_cnt; ++i) {\n      auto chunk =\n          broker_->get_chunk(ChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, i);\n      if (!chunk) {\n        LOG_ERROR(\"Get chunk failed, seq=%zu\", i);\n        return IndexError_InvalidFormat;\n      }\n      size_t size = sizeof(Item) * slot_items_;\n      if (chunk->data_size() < size) {\n        LOG_ERROR(\n            \"Hash params may be mismatch, seq=%zu, data_size=%zu \"\n            \"expect=%zu\",\n            i, chunk->data_size(), size);\n        return IndexError_InvalidFormat;\n      }\n      //! Read the whole data to memory\n      IndexStorage::MemoryBlock data_block;\n      if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {\n        LOG_ERROR(\"Chunk read failed, size=%zu\", size);\n        return false;\n      }\n      slots_.emplace_back(std::move(chunk), std::move(data_block));\n    }\n    return 0;\n  }\n\n private:\n  ChunkBroker::Pointer broker_{};  // chunk broker\n  std::vector<Slot> slots_{};\n  uint32_t mask_bits_{0U};\n  uint32_t slot_items_{};  // must be a power of 2\n  uint32_t slot_loc_mask_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_index_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_provider.h>\n#include <zvec/core/framework/index_searcher.h>\n#include <zvec/core/framework/index_streamer.h>\n#include \"hnsw_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswIndexProvider : public IndexProvider {\n public:\n  HnswIndexProvider(const IndexMeta &meta, const HnswEntity::Pointer &entity,\n                    const std::string &owner)\n      : meta_(meta), entity_(entity), owner_class_(owner) {}\n\n  HnswIndexProvider(const HnswIndexProvider &) = delete;\n  HnswIndexProvider &operator=(const HnswIndexProvider &) = delete;\n\n public:  // holder interface\n  //! Create a new iterator\n  IndexProvider::Iterator::Pointer create_iterator() override {\n    return HnswIndexProvider::Iterator::Pointer(new (std::nothrow)\n                                                    Iterator(entity_));\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const override {\n    return entity_->doc_cnt();\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const override {\n    return meta_.dimension();\n  }\n\n  //! Retrieve type of vector\n  IndexMeta::DataType data_type(void) const override {\n    return meta_.data_type();\n  }\n\n  //! Retrieve vector size in bytes\n  size_t element_size(void) const override {\n    return meta_.element_size();\n  }\n\n public:  // provider's unique interface\n  //! Retrieve a vector using a primary key\n  const void *get_vector(uint64_t key) const override {\n    return entity_->get_vector_by_key(key);\n  }\n\n  int get_vector(const uint64_t key,\n                 IndexStorage::MemoryBlock &block) const override {\n    return entity_->get_vector_by_key(key, block);\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_class_;\n  }\n\n private:\n  class Iterator : public IndexProvider::Iterator {\n   public:\n    Iterator(const HnswEntity::Pointer &entity)\n        : entity_(entity), cur_id_(0U) {}\n\n    //! Retrieve pointer of data\n    //! NOTICE: the vec feature will be changed after iterating to next, so\n    //! the caller need to keep a copy of it before iterator to next vector\n    virtual const void *data(void) const override {\n      return entity_->get_vector(cur_id_);\n    }\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return cur_id_ < entity_->doc_cnt();\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return entity_->get_key(cur_id_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      // cur_id_ += 1;\n      cur_id_ = get_next_valid_id(cur_id_ + 1);\n    }\n\n    //! Reset the iterator\n    void reset(void) {\n      cur_id_ = get_next_valid_id(0);\n    }\n\n   private:\n    node_id_t get_next_valid_id(node_id_t start_id) {\n      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {\n        if (entity_->get_key(i) != kInvalidNodeId) {\n          cur_id_ = i;\n          return i;\n        }\n      }\n      return kInvalidNodeId;\n    }\n\n   private:\n    const HnswEntity::Pointer entity_;\n    node_id_t cur_id_;\n  };\n\n private:\n  const IndexMeta &meta_;\n  const HnswEntity::Pointer entity_;\n  const std::string owner_class_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\nstatic const std::string PARAM_HNSW_BUILDER_THREAD_COUNT(\n    \"proxima.hnsw.builder.thread_count\");\nstatic const std::string PARAM_HNSW_BUILDER_MEMORY_QUOTA(\n    \"proxima.hnsw.builder.memory_quota\");\nstatic const std::string PARAM_HNSW_BUILDER_EFCONSTRUCTION(\n    \"proxima.hnsw.builder.efconstruction\");\nstatic const std::string PARAM_HNSW_BUILDER_SCALING_FACTOR(\n    \"proxima.hnsw.builder.scaling_factor\");\nstatic const std::string PARAM_HNSW_BUILDER_CHECK_INTERVAL_SECS(\n    \"proxima.hnsw.builder.check_interval_secs\");\nstatic const std::string PARAM_HNSW_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER(\n    \"proxima.hnsw.builder.neighbor_prune_multiplier\");\nstatic const std::string PARAM_HNSW_BUILDER_MIN_NEIGHBOR_COUNT(\n    \"proxima.hnsw.builder.min_neighbor_count\");\nstatic const std::string PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT(\n    \"proxima.hnsw.builder.max_neighbor_count\");\nstatic const std::string PARAM_HNSW_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(\n    \"proxima.hnsw.builder.l0_max_neighbor_count_multiplier\");\n\nstatic const std::string PARAM_HNSW_SEARCHER_EF(\"proxima.hnsw.searcher.ef\");\nstatic const std::string PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD(\n    \"proxima.hnsw.searcher.brute_force_threshold\");\nstatic const std::string PARAM_HNSW_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE(\n    \"proxima.hnsw.searcher.neighbors_in_memory_enable\");\nstatic const std::string PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO(\n    \"proxima.hnsw.searcher.max_scan_ratio\");\nstatic const std::string PARAM_HNSW_SEARCHER_CHECK_CRC_ENABLE(\n    \"proxima.hnsw.searcher.check_crc_enable\");\nstatic const std::string PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_ENABLE(\n    \"proxima.hnsw.searcher.visit_bloomfilter_enable\");\nstatic const std::string PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB(\n    \"proxima.hnsw.searcher.visit_bloomfilter_negative_prob\");\nstatic const std::string PARAM_HNSW_SEARCHER_FORCE_PADDING_RESULT_ENABLE(\n    \"proxima.hnsw.searcher.force_padding_result_enable\");\n\nstatic const std::string PARAM_HNSW_STREAMER_MAX_SCAN_RATIO(\n    \"proxima.hnsw.streamer.max_scan_ratio\");\nstatic const std::string PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT(\n    \"proxima.hnsw.streamer.min_scan_limit\");\nstatic const std::string PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT(\n    \"proxima.hnsw.streamer.max_scan_limit\");\nstatic const std::string PARAM_HNSW_STREAMER_EF(\"proxima.hnsw.streamer.ef\");\nstatic const std::string PARAM_HNSW_STREAMER_EFCONSTRUCTION(\n    \"proxima.hnsw.streamer.efconstruction\");\nstatic const std::string PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT(\n    \"proxima.hnsw.streamer.max_neighbor_count\");\nstatic const std::string PARAM_HNSW_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(\n    \"proxima.hnsw.streamer.l0_max_neighbor_count_multiplier\");\nstatic const std::string PARAM_HNSW_STREAMER_SCALING_FACTOR(\n    \"proxima.hnsw.streamer.scaling_factor\");\nstatic const std::string PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD(\n    \"proxima.hnsw.streamer.brute_force_threshold\");\nstatic const std::string PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT(\n    \"proxima.hnsw.streamer.docs_hard_limit\");\nstatic const std::string PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT(\n    \"proxima.hnsw.streamer.docs_soft_limit\");\nstatic const std::string PARAM_HNSW_STREAMER_MAX_INDEX_SIZE(\n    \"proxima.hnsw.streamer.max_index_size\");\nstatic const std::string PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE(\n    \"proxima.hnsw.streamer.visit_bloomfilter_enable\");\nstatic const std::string PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB(\n    \"proxima.hnsw.streamer.visit_bloomfilter_negative_prob\");\nstatic const std::string PARAM_HNSW_STREAMER_CHECK_CRC_ENABLE(\n    \"proxima.hnsw.streamer.check_crc_enable\");\nstatic const std::string PARAM_HNSW_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER(\n    \"proxima.hnsw.streamer.neighbor_prune_multiplier\");\nstatic const std::string PARAM_HNSW_STREAMER_CHUNK_SIZE(\n    \"proxima.hnsw.streamer.chunk_size\");\nstatic const std::string PARAM_HNSW_STREAMER_FILTER_SAME_KEY(\n    \"proxima.hnsw.streamer.filter_same_key\");\nstatic const std::string PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE(\n    \"proxima.hnsw.streamer.get_vector_enable\");\nstatic const std::string PARAM_HNSW_STREAMER_MIN_NEIGHBOR_COUNT(\n    \"proxima.hnsw.streamer.min_neighbor_count\");\nstatic const std::string PARAM_HNSW_STREAMER_FORCE_PADDING_RESULT_ENABLE(\n    \"proxima.hnsw.streamer.force_padding_result_enable\");\nstatic const std::string PARAM_HNSW_STREAMER_ESTIMATE_DOC_COUNT(\n    \"proxima.hnsw.streamer.estimate_doc_count\");\nstatic const std::string PARAM_HNSW_STREAMER_USE_ID_MAP(\n    \"proxima.hnsw.streamer.use_id_map\");\n\nstatic const std::string PARAM_HNSW_REDUCER_WORKING_PATH(\n    \"proxima.hnsw.reducer.working_path\");\nstatic const std::string PARAM_HNSW_REDUCER_NUM_OF_ADD_THREADS(\n    \"proxima.hnsw.reducer.num_of_add_threads\");\nstatic const std::string PARAM_HNSW_REDUCER_INDEX_NAME(\n    \"proxima.hnsw.reducer.index_name\");\nstatic const std::string PARAM_HNSW_REDUCER_EFCONSTRUCTION(\n    \"proxima.hnsw.reducer.efconstruction\");\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_searcher.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_searcher.h\"\n#include \"hnsw_algorithm.h\"\n#include \"hnsw_index_provider.h\"\n#include \"hnsw_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSearcher::HnswSearcher() = default;\n\nHnswSearcher::~HnswSearcher() = default;\n\nint HnswSearcher::init(const ailego::Params &search_params) {\n  params_ = search_params;\n  params_.get(PARAM_HNSW_SEARCHER_EF, &ef_);\n  params_.get(PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);\n  params_.get(PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);\n  params_.get(PARAM_HNSW_SEARCHER_CHECK_CRC_ENABLE, &check_crc_enabled_);\n  params_.get(PARAM_HNSW_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE,\n              &neighbors_in_memory_enabled_);\n  params_.get(PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB,\n              &bf_negative_probability_);\n  params_.get(PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD,\n              &bruteforce_threshold_);\n  params_.get(PARAM_HNSW_SEARCHER_FORCE_PADDING_RESULT_ENABLE,\n              &force_padding_topk_enabled_);\n\n  if (ef_ == 0) {\n    ef_ = HnswEntity::kDefaultEf;\n  }\n  if (bf_negative_probability_ <= 0.0f || bf_negative_probability_ >= 1.0f) {\n    LOG_ERROR(\"[%s] must be in range (0,1)\",\n              PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  entity_.set_neighbors_in_memory(neighbors_in_memory_enabled_);\n\n  state_ = STATE_INITED;\n\n  LOG_DEBUG(\n      \"Init params: ef=%u maxScanRatio=%f bfEnabled=%u checkCrcEnabled=%u \"\n      \"neighborsInMemoryEnabled=%u bfNagtiveProb=%f bruteForceThreshold=%u \"\n      \"forcePadding=%u\",\n      ef_, max_scan_ratio_, bf_enabled_, check_crc_enabled_,\n      neighbors_in_memory_enabled_, bf_negative_probability_,\n      bruteforce_threshold_, force_padding_topk_enabled_);\n\n  return 0;\n}\n\nvoid HnswSearcher::print_debug_info() {\n  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n    Neighbors neighbours = entity_.get_neighbors(0, id);\n    std::cout << \"node: \" << id << \"; \";\n    for (uint32_t i = 0; i < neighbours.size(); ++i) {\n      std::cout << neighbours[i];\n\n      if (i == neighbours.size() - 1) {\n        std::cout << std::endl;\n      } else {\n        std::cout << \", \";\n      }\n    }\n  }\n}\n\nint HnswSearcher::cleanup() {\n  LOG_INFO(\"Begin HnswSearcher:cleanup\");\n\n  metric_.reset();\n  meta_.clear();\n  stats_.clear_attributes();\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  max_scan_ratio_ = HnswEntity::kDefaultScanRatio;\n  max_scan_num_ = 0U;\n  ef_ = HnswEntity::kDefaultEf;\n  bf_enabled_ = false;\n  bf_negative_probability_ = HnswEntity::kDefaultBFNegativeProbability;\n  bruteforce_threshold_ = HnswEntity::kDefaultBruteForceThreshold;\n  check_crc_enabled_ = false;\n  neighbors_in_memory_enabled_ = false;\n  entity_.cleanup();\n  state_ = STATE_INIT;\n\n  LOG_INFO(\"End HnswSearcher:cleanup\");\n\n  return 0;\n}\n\nint HnswSearcher::load(IndexStorage::Pointer container,\n                       IndexMetric::Pointer metric) {\n  if (state_ != STATE_INITED) {\n    LOG_ERROR(\"Init the searcher first before load index\");\n    return IndexError_Runtime;\n  }\n\n  LOG_INFO(\"Begin HnswSearcher:load\");\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from container\");\n    return ret;\n  }\n\n  ret = entity_.load(container, check_crc_enabled_);\n  if (ret != 0) {\n    LOG_ERROR(\"HnswSearcher load index failed\");\n    return ret;\n  }\n\n  alg_ = HnswAlgorithm::UPointer(new HnswAlgorithm(entity_));\n\n  if (metric) {\n    metric_ = metric;\n  } else {\n    metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n    if (!metric_) {\n      LOG_ERROR(\"CreateMetric failed, name: %s\", meta_.metric_name().c_str());\n      return IndexError_NoExist;\n    }\n    ret = metric_->init(meta_, meta_.metric_params());\n    if (ret != 0) {\n      LOG_ERROR(\"IndexMetric init failed, ret=%d\", ret);\n      return ret;\n    }\n    if (metric_->query_metric()) {\n      metric_ = metric_->query_metric();\n    }\n  }\n\n  if (!metric_->is_matched(meta_)) {\n    LOG_ERROR(\"IndexMetric not match index meta\");\n    return IndexError_Mismatch;\n  }\n\n  max_scan_num_ = static_cast<uint32_t>(max_scan_ratio_ * entity_.doc_cnt());\n  max_scan_num_ = std::max(4096U, max_scan_num_);\n\n  stats_.set_loaded_count(entity_.doc_cnt());\n  stats_.set_loaded_costtime(ailego::Monotime::MilliSeconds() - start_time);\n  state_ = STATE_LOADED;\n  magic_ = IndexContext::GenerateMagic();\n\n  LOG_INFO(\"End HnswSearcher::load\");\n\n  return 0;\n}\n\nint HnswSearcher::unload() {\n  LOG_INFO(\"HnswSearcher unload index\");\n\n  meta_.clear();\n  entity_.cleanup();\n  metric_.reset();\n  max_scan_num_ = 0;\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswSearcher::update_context(HnswContext *ctx) const {\n  const HnswEntity::Pointer entity = entity_.clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone search context entity\");\n    return IndexError_Runtime;\n  }\n  ctx->set_max_scan_num(max_scan_num_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n\n  return ctx->update_context(HnswContext::kSearcherContext, meta_, metric_,\n                             entity, magic_);\n}\n\nint HnswSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                              uint32_t count, Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n\n  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {\n    return search_bf_impl(query, qmeta, count, context);\n  }\n\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n  for (size_t q = 0; q < count; ++q) {\n    ctx->reset_query(query);\n    int ret = alg_->search(ctx);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher fast search failed\");\n      return ret;\n    }\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswSearcher::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                                 uint32_t count,\n                                 Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().batch_dist(id);\n\n          std::string group_id = group_by(id);\n\n          auto &topk_heap = ctx->group_topk_heaps()[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      ctx->topk_heap().clear();\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().batch_dist(id);\n          ctx->topk_heap().emplace(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswSearcher::search_bf_by_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n\n  if (ailego_unlikely(p_keys.size() != count)) {\n    LOG_ERROR(\"The size of p_keys is not equal to count\");\n    return IndexError_InvalidArgument;\n  }\n\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().batch_dist(id);\n            std::string group_id = group_by(id);\n\n            auto &topk_heap = ctx->group_topk_heaps()[group_id];\n            if (topk_heap.empty()) {\n              topk_heap.limit(ctx->group_topk());\n            }\n            topk_heap.emplace_back(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      ctx->topk_heap().clear();\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().batch_dist(id);\n            ctx->topk_heap().emplace(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nIndexSearcher::Context::Pointer HnswSearcher::create_context() const {\n  if (ailego_unlikely(state_ != STATE_LOADED)) {\n    LOG_ERROR(\"Load the index first before create context\");\n    return Context::Pointer();\n  }\n  const HnswEntity::Pointer search_ctx_entity = entity_.clone();\n  if (!search_ctx_entity) {\n    LOG_ERROR(\"Failed to create search context entity\");\n    return Context::Pointer();\n  }\n  HnswContext *ctx = new (std::nothrow)\n      HnswContext(meta_.dimension(), metric_, search_ctx_entity);\n  if (ailego_unlikely(ctx == nullptr)) {\n    LOG_ERROR(\"Failed to new HnswContext\");\n    return Context::Pointer();\n  }\n  ctx->set_ef(ef_);\n  ctx->set_max_scan_num(max_scan_num_);\n  uint32_t filter_mode =\n      bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;\n  ctx->set_filter_mode(filter_mode);\n  ctx->set_filter_negative_probability(bf_negative_probability_);\n  ctx->set_magic(magic_);\n  ctx->set_force_padding_topk(force_padding_topk_enabled_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n  if (ailego_unlikely(ctx->init(HnswContext::kSearcherContext)) != 0) {\n    LOG_ERROR(\"Init HnswContext failed\");\n    delete ctx;\n    return Context::Pointer();\n  }\n\n  return Context::Pointer(ctx);\n}\n\nIndexProvider::Pointer HnswSearcher::create_provider(void) const {\n  LOG_DEBUG(\"HnswSearcher create provider\");\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone HnswEntity failed\");\n    return Provider::Pointer();\n  }\n  return Provider::Pointer(\n      new (std::nothrow) HnswIndexProvider(meta_, entity, \"HnswSearcher\"));\n}\n\nconst void *HnswSearcher::get_vector(uint64_t key) const {\n  return entity_.get_vector_by_key(key);\n}\n\nINDEX_FACTORY_REGISTER_SEARCHER(HnswSearcher);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_searcher.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_framework.h>\n#include \"hnsw_searcher_entity.h\"\n#include \"hnsw_streamer.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSearcher : public IndexSearcher {\n public:\n  using ContextPointer = IndexSearcher::Context::Pointer;\n\n public:\n  HnswSearcher(void);\n  ~HnswSearcher(void);\n\n  HnswSearcher(const HnswSearcher &) = delete;\n  HnswSearcher &operator=(const HnswSearcher &) = delete;\n\n protected:\n  //! Initialize Searcher\n  virtual int init(const ailego::Params &params) override;\n\n  //! Cleanup Searcher\n  virtual int cleanup(void) override;\n\n  //! Load Index from storage\n  virtual int load(IndexStorage::Pointer container,\n                   IndexMetric::Pointer metric) override;\n\n  //! Unload index from storage\n  virtual int unload(void) override;\n\n  //! KNN Search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          ContextPointer &context) const override {\n    return search_impl(query, qmeta, 1, context);\n  }\n\n  //! KNN Search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          uint32_t count,\n                          ContextPointer &context) const override;\n\n  //! Linear Search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             ContextPointer &context) const override {\n    return search_bf_impl(query, qmeta, 1, context);\n  }\n\n  //! Linear Search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count,\n                             ContextPointer &context) const override;\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, ContextPointer &context) const override {\n    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, uint32_t count,\n      ContextPointer &context) const override;\n\n  //! Fetch vector by key\n  virtual const void *get_vector(uint64_t key) const override;\n\n  //! Create a searcher context\n  virtual ContextPointer create_context() const override;\n\n  //! Create a new iterator\n  virtual IndexProvider::Pointer create_provider(void) const override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  //! Retrieve params of index\n  virtual const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  virtual void print_debug_info() override;\n\n private:\n  //! To share ctx across streamer/searcher, we need to update the context for\n  //! current streamer/searcher\n  int update_context(HnswContext *ctx) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };\n\n  HnswSearcherEntity entity_{};\n  HnswAlgorithm::UPointer alg_;  // impl graph algorithm\n\n  IndexMetric::Pointer metric_{};\n  IndexMeta meta_{};\n  ailego::Params params_{};\n  Stats stats_;\n  uint32_t ef_{HnswEntity::kDefaultEf};\n  uint32_t max_scan_num_{0U};\n  uint32_t bruteforce_threshold_{HnswEntity::kDefaultBruteForceThreshold};\n  float max_scan_ratio_{HnswEntity::kDefaultScanRatio};\n  bool bf_enabled_{false};\n  bool check_crc_enabled_{false};\n  bool neighbors_in_memory_enabled_{false};\n  bool force_padding_topk_enabled_{false};\n  float bf_negative_probability_{HnswEntity::kDefaultBFNegativeProbability};\n  uint32_t magic_{0U};\n\n  State state_{STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_searcher_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_searcher_entity.h\"\n#include <zvec/ailego/hash/crc32c.h>\n#include \"utility/sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSearcherEntity::HnswSearcherEntity() {}\n\nint HnswSearcherEntity::cleanup(void) {\n  storage_.reset();\n  vectors_.reset();\n  keys_.reset();\n  neighbors_.reset();\n  neighbors_meta_.reset();\n  neighbors_in_memory_enabled_ = false;\n  loaded_ = false;\n\n  this->HnswEntity::cleanup();\n\n  return 0;\n}\n\nkey_t HnswSearcherEntity::get_key(node_id_t id) const {\n  const void *key;\n  if (ailego_unlikely(keys_->read(id * sizeof(key_t), &key, sizeof(key_t)) !=\n                      sizeof(key_t))) {\n    LOG_ERROR(\"Read key from segment failed\");\n    return kInvalidKey;\n  }\n  return *(reinterpret_cast<const key_t *>(key));\n}\n\n//! Get vector local id by key\nnode_id_t HnswSearcherEntity::get_id(key_t key) const {\n  if (ailego_unlikely(!mapping_)) {\n    LOG_ERROR(\"Index missing mapping segment\");\n    return kInvalidNodeId;\n  }\n\n  //! Do binary search\n  node_id_t start = 0UL;\n  node_id_t end = doc_cnt();\n  const void *data;\n  node_id_t idx = 0u;\n  while (start < end) {\n    idx = start + (end - start) / 2;\n    if (ailego_unlikely(\n            mapping_->read(idx * sizeof(node_id_t), &data, sizeof(node_id_t)) !=\n            sizeof(node_id_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    const key_t *mkey;\n    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);\n    if (ailego_unlikely(keys_->read(local_id * sizeof(key_t),\n                                    (const void **)(&mkey),\n                                    sizeof(key_t)) != sizeof(key_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    if (*mkey < key) {\n      start = idx + 1;\n    } else if (*mkey > key) {\n      end = idx;\n    } else {\n      return local_id;\n    }\n  }\n  return kInvalidNodeId;\n}\n\nconst void *HnswSearcherEntity::get_vector_by_key(key_t key) const {\n  node_id_t local_id = get_id(key);\n  if (ailego_unlikely(local_id == kInvalidNodeId)) {\n    return nullptr;\n  }\n\n  return get_vector(local_id);\n}\n\nconst void *HnswSearcherEntity::get_vector(node_id_t id) const {\n  size_t read_size = vector_size();\n  size_t offset = node_size() * id;\n\n  const void *vec;\n  if (ailego_unlikely(vectors_->read(offset, &vec, read_size) != read_size)) {\n    LOG_ERROR(\"Read vector from segment failed\");\n    return nullptr;\n  }\n  return vec;\n}\n\nint HnswSearcherEntity::get_vector(const node_id_t id,\n                                   IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_vector(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\nconst void *HnswSearcherEntity::get_vectors() const {\n  const void *vec;\n  size_t len = node_size() * doc_cnt();\n  if (vectors_->read(0, &vec, len) != len) {\n    LOG_ERROR(\"Read vectors from segment failed\");\n    return nullptr;\n  }\n  return vec;\n}\n\nint HnswSearcherEntity::get_vector(const node_id_t *ids, uint32_t count,\n                                   const void **vecs) const {\n  ailego_assert_with(count <= segment_datas_.size(), \"invalid count\");\n\n  size_t read_size = vector_size();\n\n  for (uint32_t i = 0; i < count; ++i) {\n    segment_datas_[i].offset = node_size() * ids[i];\n    segment_datas_[i].length = read_size;\n\n    ailego_assert_with(segment_datas_[i].offset < vectors_->data_size(),\n                       \"invalid offset\");\n  }\n  if (ailego_unlikely(!vectors_->read(&segment_datas_[0], count))) {\n    LOG_ERROR(\"Read vectors from segment failed\");\n    return IndexError_ReadData;\n  }\n  for (uint32_t i = 0; i < count; ++i) {\n    vecs[i] = segment_datas_[i].data;\n  }\n\n  return 0;\n}\n\nint HnswSearcherEntity::get_vector(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {\n  const void *vecs[count];\n  get_vector(ids, count, vecs);\n  for (uint32_t i = 0; i < count; ++i) {\n    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));\n  }\n  return 0;\n}\n\nconst Neighbors HnswSearcherEntity::get_neighbors(level_t level,\n                                                  node_id_t id) const {\n  if (level == 0) {\n    if (neighbors_in_memory_enabled_) {\n      auto hd = reinterpret_cast<const NeighborsHeader *>(\n          fixed_neighbors_.get() + neighbors_size() * id);\n      return {hd->neighbor_cnt, hd->neighbors};\n    }\n\n    const GraphNeighborMeta *m;\n    if (ailego_unlikely(neighbors_meta_->read(id * sizeof(GraphNeighborMeta),\n                                              (const void **)(&m),\n                                              sizeof(GraphNeighborMeta)) !=\n                        sizeof(GraphNeighborMeta))) {\n      LOG_ERROR(\"Read neighbors meta from segment failed\");\n      return {0, nullptr};\n    }\n\n    const void *data;\n    if (ailego_unlikely(neighbors_->read(m->offset, &data,\n                                         m->neighbor_cnt * sizeof(node_id_t)) !=\n                        m->neighbor_cnt * sizeof(node_id_t))) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return {0, nullptr};\n    }\n    return {static_cast<uint32_t>(m->neighbor_cnt),\n            reinterpret_cast<const node_id_t *>(data)};\n  }\n\n  //! Read level > 0 neighbors\n  const HnswNeighborMeta *m;\n  if (ailego_unlikely(upper_neighbors_meta_->read(id * sizeof(HnswNeighborMeta),\n                                                  (const void **)(&m),\n                                                  sizeof(HnswNeighborMeta)) !=\n                      sizeof(HnswNeighborMeta))) {\n    LOG_ERROR(\"Read neighbors meta from segment failed\");\n    return {0, nullptr};\n  }\n\n  ailego_assert_with(level <= m->level, \"invalid level\");\n  size_t offset = m->offset + (level - 1) * upper_neighbors_size();\n  ailego_assert_with(offset <= upper_neighbors_->data_size(), \"invalid offset\");\n  const void *data;\n  if (ailego_unlikely(\n          upper_neighbors_->read(offset, &data, upper_neighbors_size()) !=\n          upper_neighbors_size())) {\n    LOG_ERROR(\"Read neighbors from segment failed\");\n    return {0, nullptr};\n  }\n\n  auto hd = reinterpret_cast<const NeighborsHeader *>(data);\n  return {hd->neighbor_cnt, hd->neighbors};\n}\n\nint HnswSearcherEntity::load(const IndexStorage::Pointer &container,\n                             bool check_crc) {\n  storage_ = container;\n\n  int ret = load_segments(check_crc);\n  if (ret != 0) {\n    return ret;\n  }\n\n  loaded_ = true;\n\n  LOG_INFO(\n      \"Index info: docCnt=%u entryPoint=%u maxLevel=%d efConstruct=%zu \"\n      \"l0NeighborCnt=%zu upperNeighborCnt=%zu scalingFactor=%zu \"\n      \"vectorSize=%zu nodeSize=%zu vectorSegmentSize=%zu keySegmentSize=%zu \"\n      \"neighborsSegmentSize=%zu neighborsMetaSegmentSize=%zu \",\n      doc_cnt(), entry_point(), cur_max_level(), ef_construction(),\n      l0_neighbor_cnt(), upper_neighbor_cnt(), scaling_factor(), vector_size(),\n      node_size(), vectors_->data_size(), keys_->data_size(),\n      neighbors_ == nullptr ? 0 : neighbors_->data_size(),\n      neighbors_meta_ == nullptr ? 0 : neighbors_meta_->data_size());\n\n  return 0;\n}\n\nint HnswSearcherEntity::load_segments(bool check_crc) {\n  //! load header\n  const void *data = nullptr;\n  HNSWHeader hd;\n  auto graph_hd_segment = storage_->get(kGraphHeaderSegmentId);\n  if (!graph_hd_segment || graph_hd_segment->data_size() < sizeof(hd.graph)) {\n    LOG_ERROR(\"Miss or invalid segment %s\", kGraphHeaderSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (graph_hd_segment->read(0, reinterpret_cast<const void **>(&data),\n                             sizeof(hd.graph)) != sizeof(hd.graph)) {\n    LOG_ERROR(\"Read segment %s failed\", kGraphHeaderSegmentId.c_str());\n    return IndexError_ReadData;\n  }\n  memcpy(&hd.graph, data, sizeof(hd.graph));\n\n  auto hnsw_hd_segment = storage_->get(kHnswHeaderSegmentId);\n  if (!hnsw_hd_segment || hnsw_hd_segment->data_size() < sizeof(hd.hnsw)) {\n    LOG_ERROR(\"Miss or invalid segment %s\", kHnswHeaderSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (hnsw_hd_segment->read(0, reinterpret_cast<const void **>(&data),\n                            sizeof(hd.hnsw)) != sizeof(hd.hnsw)) {\n    LOG_ERROR(\"Read segment %s failed\", kHnswHeaderSegmentId.c_str());\n    return IndexError_ReadData;\n  }\n  memcpy(&hd.hnsw, data, sizeof(hd.hnsw));\n  *mutable_header() = hd;\n  segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()));\n\n  vectors_ = storage_->get(kGraphFeaturesSegmentId);\n  if (!vectors_) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              kGraphFeaturesSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  keys_ = storage_->get(kGraphKeysSegmentId);\n  if (!keys_) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              kGraphKeysSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  neighbors_ = storage_->get(kGraphNeighborsSegmentId);\n  if (!neighbors_ || (neighbors_->data_size() == 0 && doc_cnt() > 1)) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or empty\",\n              kGraphNeighborsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n  neighbors_meta_ = storage_->get(kGraphOffsetsSegmentId);\n  if (!neighbors_meta_ ||\n      neighbors_meta_->data_size() < sizeof(GraphNeighborMeta) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  upper_neighbors_ = storage_->get(kHnswNeighborsSegmentId);\n  if (!upper_neighbors_ ||\n      (upper_neighbors_->data_size() == 0 && cur_max_level() > 0)) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or empty\",\n              kHnswNeighborsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  upper_neighbors_meta_ = storage_->get(kHnswOffsetsSegmentId);\n  if (!upper_neighbors_meta_ || upper_neighbors_meta_->data_size() <\n                                    sizeof(HnswNeighborMeta) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kHnswOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  mapping_ = storage_->get(kGraphMappingSegmentId);\n  if (!mapping_ || mapping_->data_size() < sizeof(node_id_t) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kGraphMappingSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (check_crc) {\n    std::vector<SegmentPointer> segments;\n    segments.emplace_back(graph_hd_segment);\n    segments.emplace_back(hnsw_hd_segment);\n    segments.emplace_back(vectors_);\n    segments.emplace_back(keys_);\n\n    segments.emplace_back(neighbors_);\n    segments.emplace_back(neighbors_meta_);\n    segments.emplace_back(upper_neighbors_);\n    segments.emplace_back(upper_neighbors_meta_);\n\n    if (!do_crc_check(segments)) {\n      LOG_ERROR(\"Check index crc failed, the index may broken\");\n      return IndexError_Runtime;\n    }\n  }\n\n  if (neighbors_in_memory_enabled_) {\n    int ret = load_and_flat_neighbors();\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  return 0;\n}\n\nint HnswSearcherEntity::load_and_flat_neighbors() {\n  fixed_neighbors_.reset(\n      new (std::nothrow) char[neighbors_size() * doc_cnt()]{},\n      std::default_delete<char[]>());\n  if (!fixed_neighbors_) {\n    LOG_ERROR(\"Malloc memory failed\");\n    return IndexError_NoMemory;\n  }\n\n  //! Get a new segemnt to release the buffer after loading neighbors\n  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);\n  if (!neighbors_meta) {\n    LOG_ERROR(\"IndexStorage get segment graph.offsets failed\");\n    return IndexError_InvalidArgument;\n  }\n\n  const GraphNeighborMeta *neighbors_index = nullptr;\n  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),\n                           neighbors_meta->data_size()) !=\n      neighbors_meta->data_size()) {\n    LOG_ERROR(\"Read segment %s data failed\", kGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  const char *neighbor_data;\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    size_t rd_size = neighbors_index[id].neighbor_cnt * sizeof(node_id_t);\n    if (ailego_unlikely(\n            neighbors_->read(neighbors_index[id].offset,\n                             reinterpret_cast<const void **>(&neighbor_data),\n                             rd_size) != rd_size)) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return IndexError_ReadData;\n    }\n    // copy level 0 neighbors to fixed size neighbors memory\n    char *dst = fixed_neighbors_.get() + neighbors_size() * id;\n    *reinterpret_cast<uint32_t *>(dst) = neighbors_index[id].neighbor_cnt;\n    memcpy(dst + sizeof(uint32_t), neighbor_data, rd_size);\n  }\n\n  return 0;\n}\n\nint HnswSearcherEntity::get_fixed_neighbors(\n    std::vector<uint32_t> *fixed_neighbors) const {\n  //! Get a new segemnt to release the buffer after loading neighbors\n  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);\n  if (!neighbors_meta) {\n    LOG_ERROR(\"IndexStorage get segment graph.offsets failed\");\n    return IndexError_InvalidArgument;\n  }\n\n  const GraphNeighborMeta *neighbors_index = nullptr;\n  size_t meta_size = neighbors_meta->data_size();\n  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),\n                           meta_size) != meta_size) {\n    LOG_ERROR(\"Read segment %s data failed\", kGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  size_t fixed_neighbor_cnt = l0_neighbor_cnt();\n  fixed_neighbors->resize((fixed_neighbor_cnt + 1) * doc_cnt(), kInvalidNodeId);\n\n  size_t neighbors_cnt_offset = fixed_neighbor_cnt * doc_cnt();\n  size_t total_neighbor_cnt = 0;\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    size_t cur_neighbor_cnt = neighbors_index[id].neighbor_cnt;\n    if (cur_neighbor_cnt == 0) {\n      (*fixed_neighbors)[neighbors_cnt_offset + id] = 0;\n      continue;\n    }\n    size_t rd_size = cur_neighbor_cnt * sizeof(node_id_t);\n    const uint32_t *neighbors;\n    if (neighbors_->read(neighbors_index[id].offset,\n                         reinterpret_cast<const void **>(&neighbors),\n                         rd_size) != rd_size) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return IndexError_ReadData;\n    }\n\n    // copy level 0 neighbors to fixed size neighbors memory\n    auto it = fixed_neighbors->begin() + id * fixed_neighbor_cnt;\n    std::copy(neighbors, neighbors + cur_neighbor_cnt, it);\n\n    (*fixed_neighbors)[neighbors_cnt_offset + id] = cur_neighbor_cnt;\n    total_neighbor_cnt += cur_neighbor_cnt;\n  }\n  LOG_INFO(\"total neighbor cnt: %zu, average neighbor cnt: %zu\",\n           total_neighbor_cnt, total_neighbor_cnt / doc_cnt());\n\n  return 0;\n}\n\nbool HnswSearcherEntity::do_crc_check(\n    std::vector<SegmentPointer> &segments) const {\n  constexpr size_t blk_size = 4096;\n  const void *data;\n  for (auto &segment : segments) {\n    size_t offset = 0;\n    size_t rd_size;\n    uint32_t crc = 0;\n    while (offset < segment->data_size()) {\n      size_t size = std::min(blk_size, segment->data_size() - offset);\n      if ((rd_size = segment->read(offset, &data, size)) <= 0) {\n        break;\n      }\n      offset += rd_size;\n      crc = ailego::Crc32c::Hash(data, rd_size, crc);\n    }\n    if (crc != segment->data_crc()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nconst HnswEntity::Pointer HnswSearcherEntity::clone() const {\n  auto vectors = vectors_->clone();\n  if (ailego_unlikely(!vectors)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphFeaturesSegmentId.c_str());\n    return HnswEntity::Pointer();\n  }\n  auto keys = keys_->clone();\n  if (ailego_unlikely(!keys)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphKeysSegmentId.c_str());\n    return HnswEntity::Pointer();\n  }\n\n  auto mapping = mapping_->clone();\n  if (ailego_unlikely(!mapping)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphMappingSegmentId.c_str());\n    return HnswEntity::Pointer();\n  }\n\n  auto neighbors = neighbors_->clone();\n  if (ailego_unlikely(!neighbors)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphNeighborsSegmentId.c_str());\n    return HnswEntity::Pointer();\n  }\n  auto upper_neighbors = upper_neighbors_->clone();\n  if (ailego_unlikely(!neighbors)) {\n    LOG_ERROR(\"clone segment %s failed\", kHnswNeighborsSegmentId.c_str());\n    return HnswEntity::Pointer();\n  }\n  auto neighbors_meta = neighbors_meta_->clone();\n  if (ailego_unlikely(!neighbors_meta)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphOffsetsSegmentId.c_str());\n    return HnswEntity::Pointer();\n  }\n  auto upper_neighbors_meta = upper_neighbors_meta_->clone();\n  if (ailego_unlikely(!upper_neighbors_meta)) {\n    LOG_ERROR(\"clone segment %s failed\", kHnswOffsetsSegmentId.c_str());\n    return HnswEntity::Pointer();\n  }\n\n  SegmentGroupParam neighbor_group{neighbors, neighbors_meta, upper_neighbors,\n                                   upper_neighbors_meta};\n\n  HnswSearcherEntity *entity = new (std::nothrow)\n      HnswSearcherEntity(header(), vectors, keys, mapping, neighbor_group,\n                         fixed_neighbors_, neighbors_in_memory_enabled_);\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"HnswSearcherEntity new failed\");\n  }\n\n  return HnswEntity::Pointer(entity);\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_searcher_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"hnsw_builder_entity.h\"\n#include \"hnsw_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSearcherEntity : public HnswEntity {\n public:\n  using Pointer = std::shared_ptr<HnswSearcherEntity>;\n  using SegmentPointer = IndexStorage::Segment::Pointer;\n\n public:\n  struct SegmentGroupParam {\n    SegmentGroupParam(SegmentPointer neighbors_in,\n                      SegmentPointer neighbors_meta_in,\n                      SegmentPointer upper_neighbors_in,\n                      SegmentPointer upper_neighbors_meta_in)\n        : neighbors{neighbors_in},\n          neighbors_meta{neighbors_meta_in},\n          upper_neighbors{upper_neighbors_in},\n          upper_neighbors_meta{upper_neighbors_meta_in} {}\n\n    SegmentPointer neighbors{nullptr};\n    SegmentPointer neighbors_meta{nullptr};\n    SegmentPointer upper_neighbors{nullptr};\n    SegmentPointer upper_neighbors_meta{nullptr};\n  };\n\n  //! Constructor\n  HnswSearcherEntity();\n\n  //! Make a copy of searcher entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswEntity::Pointer clone() const override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector local id by key\n  node_id_t get_id(key_t key) const;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector_by_key(key_t key) const override;\n\n  //! Get vector feature data by id\n  virtual const void *get_vector(node_id_t id) const override;\n\n  //! Get vector feature data by id\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const override;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const override;\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;\n\n  //! Get all vectors\n  const void *get_vectors() const;\n\n  //! Get the node id's neighbors on graph level\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  virtual int load(const IndexStorage::Pointer &container,\n                   bool check_crc) override;\n\n  int load_segments(bool check_crc);\n\n  virtual int cleanup(void) override;\n\n public:\n  bool is_loaded() const {\n    return loaded_;\n  }\n\n  void set_neighbors_in_memory(bool enabled) {\n    neighbors_in_memory_enabled_ = enabled;\n  }\n\n  //! get fixed length neighbors data\n  int get_fixed_neighbors(std::vector<uint32_t> *fixed_neighbors) const;\n\n private:\n  //! Constructor\n  HnswSearcherEntity(const HNSWHeader &hd, const SegmentPointer &vectors,\n                     const SegmentPointer &keys, const SegmentPointer &mapping,\n                     const SegmentGroupParam &neighbor_group,\n                     const std::shared_ptr<char> &fixed_neighbors,\n                     bool neighbors_in_memory_enabled)\n      : HnswEntity(hd),\n        vectors_(vectors),\n        keys_(keys),\n        mapping_(mapping),\n        neighbors_(neighbor_group.neighbors),\n        neighbors_meta_(neighbor_group.neighbors_meta),\n        upper_neighbors_(neighbor_group.upper_neighbors),\n        upper_neighbors_meta_(neighbor_group.upper_neighbors_meta),\n        neighbors_in_memory_enabled_(neighbors_in_memory_enabled) {\n    segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()),\n                          IndexStorage::SegmentData(0U, 0U));\n    fixed_neighbors_ = fixed_neighbors;\n  }\n\n  bool do_crc_check(std::vector<SegmentPointer> &segments) const;\n\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! If neighbors_in_memory_enabled, load the level0 neighbors to memory\n  int load_and_flat_neighbors(void);\n\n public:\n  HnswSearcherEntity(const HnswSearcherEntity &) = delete;\n  HnswSearcherEntity &operator=(const HnswSearcherEntity &) = delete;\n\n private:\n  IndexStorage::Pointer storage_{};\n\n  SegmentPointer vectors_{};\n  SegmentPointer keys_{};\n  SegmentPointer mapping_{};\n\n  SegmentPointer neighbors_{};\n  SegmentPointer neighbors_meta_{};\n  SegmentPointer upper_neighbors_{};\n  SegmentPointer upper_neighbors_meta_{};\n\n  mutable std::vector<IndexStorage::SegmentData> segment_datas_{};\n  std::shared_ptr<char> fixed_neighbors_{};  // level 0 fixed size neighbors\n  bool neighbors_in_memory_enabled_{false};\n  bool loaded_{false};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_streamer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_streamer.h\"\n#include <iostream>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/pattern/defer.h>\n#include <ailego/utility/memory_helper.h>\n#include \"utility/sparse_utility.h\"\n#include \"hnsw_algorithm.h\"\n#include \"hnsw_context.h\"\n#include \"hnsw_dist_calculator.h\"\n#include \"hnsw_index_provider.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswStreamer::HnswStreamer() : entity_(stats_) {}\n\nHnswStreamer::~HnswStreamer() {\n  if (state_ == STATE_INITED) {\n    this->cleanup();\n  }\n}\n\nint HnswStreamer::init(const IndexMeta &imeta, const ailego::Params &params) {\n  meta_ = imeta;\n  meta_.set_streamer(\"HnswStreamer\", HnswEntity::kRevision, params);\n\n  params.get(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, &max_index_size_);\n\n  params.get(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, &upper_max_neighbor_cnt_);\n  float multiplier = HnswEntity::kDefaultL0MaxNeighborCntMultiplier;\n  params.get(PARAM_HNSW_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER, &multiplier);\n  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;\n\n  multiplier = HnswEntity::kDefaultNeighborPruneMultiplier;\n  params.get(PARAM_HNSW_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);\n  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;\n  scaling_factor_ = upper_max_neighbor_cnt_;\n  params.get(PARAM_HNSW_STREAMER_SCALING_FACTOR, &scaling_factor_);\n\n  params.get(PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT, &docs_hard_limit_);\n  params.get(PARAM_HNSW_STREAMER_EF, &ef_);\n  params.get(PARAM_HNSW_STREAMER_EFCONSTRUCTION, &ef_construction_);\n  params.get(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);\n  params.get(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB,\n             &bf_negative_prob_);\n  params.get(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, &bruteforce_threshold_);\n  params.get(PARAM_HNSW_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);\n  params.get(PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);\n  params.get(PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);\n  params.get(PARAM_HNSW_STREAMER_CHECK_CRC_ENABLE, &check_crc_enabled_);\n  params.get(PARAM_HNSW_STREAMER_CHUNK_SIZE, &chunk_size_);\n  params.get(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, &filter_same_key_);\n  params.get(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, &get_vector_enabled_);\n  params.get(PARAM_HNSW_STREAMER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);\n  params.get(PARAM_HNSW_STREAMER_FORCE_PADDING_RESULT_ENABLE,\n             &force_padding_topk_enabled_);\n  params.get(PARAM_HNSW_STREAMER_USE_ID_MAP, &use_id_map_);\n  entity_.set_use_key_info_map(use_id_map_);\n\n  params.get(PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT, &docs_soft_limit_);\n  if (docs_soft_limit_ > 0 && docs_soft_limit_ > docs_hard_limit_) {\n    LOG_ERROR(\"[%s] must be >= [%s]\",\n              PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT.c_str(),\n              PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    return IndexError_InvalidArgument;\n  } else if (docs_soft_limit_ == 0UL) {\n    docs_soft_limit_ =\n        docs_hard_limit_ * HnswEntity::kDefaultDocsSoftLimitRatio;\n  }\n\n  if (ef_ == 0U) {\n    ef_ = HnswEntity::kDefaultEf;\n  }\n  if (ef_construction_ == 0U) {\n    ef_construction_ = HnswEntity::kDefaultEfConstruction;\n  }\n  if (upper_max_neighbor_cnt_ == 0U) {\n    upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (upper_max_neighbor_cnt_ > HnswEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"[%s] must be in range (0,%d)\",\n              PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),\n              HnswEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (l0_max_neighbor_cnt_ == 0U) {\n    l0_max_neighbor_cnt_ = HnswEntity::kDefaultL0MaxNeighborCnt;\n  }\n  if (l0_max_neighbor_cnt_ > HnswEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"MaxL0NeighborCnt must be in range (0,%d)\",\n              HnswEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {\n    LOG_ERROR(\"[%s]-[%u] must be <= [%s]-[%u]\",\n              PARAM_HNSW_STREAMER_MIN_NEIGHBOR_COUNT.c_str(), min_neighbor_cnt_,\n              PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),\n              upper_max_neighbor_cnt_);\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_negative_prob_ <= 0.0f || bf_negative_prob_ >= 1.0f) {\n    LOG_ERROR(\"[%s] must be in range (0,1)\",\n              PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (scaling_factor_ == 0U) {\n    scaling_factor_ = HnswEntity::kDefaultScalingFactor;\n  }\n  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {\n    LOG_ERROR(\"[%s] must be in range [5,1000]\",\n              PARAM_HNSW_STREAMER_SCALING_FACTOR.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {\n    LOG_ERROR(\"[%s] must be in range (0.0f,1.0f]\",\n              PARAM_HNSW_STREAMER_MAX_SCAN_RATIO.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (max_scan_limit_ < min_scan_limit_) {\n    LOG_ERROR(\"[%s] must be >= [%s]\",\n              PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT.c_str(),\n              PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (prune_cnt == 0UL) {\n    prune_cnt = upper_max_neighbor_cnt_;\n  }\n  if (chunk_size_ == 0UL) {\n    chunk_size_ = HnswEntity::kDefaultChunkSize;\n  }\n  if (chunk_size_ > HnswEntity::kMaxChunkSize) {\n    LOG_ERROR(\"[%s] must be < %zu\", PARAM_HNSW_STREAMER_CHUNK_SIZE.c_str(),\n              HnswEntity::kMaxChunkSize);\n    return IndexError_InvalidArgument;\n  }\n\n  entity_.set_ef_construction(ef_construction_);\n  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);\n  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);\n  entity_.set_scaling_factor(scaling_factor_);\n  entity_.set_prune_cnt(prune_cnt);\n\n  entity_.set_vector_size(meta_.element_size());\n\n  entity_.set_chunk_size(chunk_size_);\n  entity_.set_filter_same_key(filter_same_key_);\n  entity_.set_get_vector(get_vector_enabled_);\n  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);\n\n  int ret = entity_.init(docs_hard_limit_);\n  if (ret != 0) {\n    LOG_ERROR(\"Hnsw entity init failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n\n  LOG_DEBUG(\n      \"Init params: maxIndexSize=%zu docsHardLimit=%zu docsSoftLimit=%zu \"\n      \"efConstruction=%u ef=%u upperMaxNeighborCnt=%u l0MaxNeighborCnt=%u \"\n      \"scalingFactor=%u maxScanRatio=%.3f minScanLimit=%zu maxScanLimit=%zu \"\n      \"bfEnabled=%d bruteFoceThreshold=%zu bfNegativeProbability=%.5f \"\n      \"checkCrcEnabled=%d pruneSize=%zu vectorSize=%u chunkSize=%zu \"\n      \"filterSameKey=%u getVectorEnabled=%u minNeighborCount=%u \"\n      \"forcePadding=%u \",\n      max_index_size_, docs_hard_limit_, docs_soft_limit_, ef_construction_,\n      ef_, upper_max_neighbor_cnt_, l0_max_neighbor_cnt_, scaling_factor_,\n      max_scan_ratio_, min_scan_limit_, max_scan_limit_, bf_enabled_,\n      bruteforce_threshold_, bf_negative_prob_, check_crc_enabled_, prune_cnt,\n      meta_.element_size(), chunk_size_, filter_same_key_, get_vector_enabled_,\n      min_neighbor_cnt_, force_padding_topk_enabled_);\n\n  alg_ = HnswAlgorithm::UPointer(new HnswAlgorithm(entity_));\n\n  ret = alg_->init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswStreamer::cleanup(void) {\n  if (state_ == STATE_OPENED) {\n    this->close();\n  }\n\n  LOG_INFO(\"HnswStreamer cleanup\");\n\n  meta_.clear();\n  metric_.reset();\n  stats_.clear();\n  entity_.cleanup();\n\n  if (alg_) {\n    alg_->cleanup();\n  }\n\n  max_index_size_ = 0UL;\n  docs_hard_limit_ = HnswEntity::kDefaultDocsHardLimit;\n  docs_soft_limit_ = 0UL;\n  upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;\n  l0_max_neighbor_cnt_ = HnswEntity::kDefaultL0MaxNeighborCnt;\n  ef_ = HnswEntity::kDefaultEf;\n  ef_construction_ = HnswEntity::kDefaultEfConstruction;\n  bf_enabled_ = false;\n  scaling_factor_ = HnswEntity::kDefaultScalingFactor;\n  bruteforce_threshold_ = HnswEntity::kDefaultBruteForceThreshold;\n  max_scan_limit_ = HnswEntity::kDefaultMaxScanLimit;\n  min_scan_limit_ = HnswEntity::kDefaultMinScanLimit;\n  chunk_size_ = HnswEntity::kDefaultChunkSize;\n  bf_negative_prob_ = HnswEntity::kDefaultBFNegativeProbability;\n  max_scan_ratio_ = HnswEntity::kDefaultScanRatio;\n  state_ = STATE_INIT;\n  check_crc_enabled_ = false;\n  filter_same_key_ = false;\n  get_vector_enabled_ = false;\n\n  return 0;\n}\n\nint HnswStreamer::open(IndexStorage::Pointer stg) {\n  LOG_INFO(\"HnswStreamer open\");\n\n  if (ailego_unlikely(state_ != STATE_INITED)) {\n    LOG_ERROR(\"Open storage failed, init streamer first!\");\n    return IndexError_NoReady;\n  }\n  int ret = entity_.open(std::move(stg), max_index_size_, check_crc_enabled_);\n  if (ret != 0) {\n    return ret;\n  }\n  IndexMeta index_meta;\n  ret = entity_.get_index_meta(&index_meta);\n  if (ret == IndexError_NoExist) {\n    // Set IndexMeta for the new index\n    ret = entity_.set_index_meta(meta_);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to set index meta for %s\", IndexError::What(ret));\n      return ret;\n    }\n  } else if (ret != 0) {\n    LOG_ERROR(\"Failed to get index meta for %s\", IndexError::What(ret));\n    return ret;\n  } else {\n    if (index_meta.dimension() != meta_.dimension() ||\n        index_meta.element_size() != meta_.element_size() ||\n        index_meta.metric_name() != meta_.metric_name() ||\n        index_meta.data_type() != meta_.data_type()) {\n      LOG_ERROR(\"IndexMeta mismatch from the previous in index\");\n      return IndexError_Mismatch;\n    }\n    // The IndexMetric Params may be updated like MipsSquaredEuclidean\n    auto metric_params = index_meta.metric_params();\n    metric_params.merge(meta_.metric_params());\n    meta_.set_metric(index_meta.metric_name(), 0, metric_params);\n  }\n\n  metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"Failed to create metric %s\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  ret = metric_->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to init metric, ret=%d\", ret);\n    return ret;\n  }\n\n  if (!metric_->distance()) {\n    LOG_ERROR(\"Invalid metric distance\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!metric_->batch_distance()) {\n    LOG_ERROR(\"Invalid metric batch distance\");\n    return IndexError_InvalidArgument;\n  }\n\n  add_distance_ = metric_->distance();\n  add_batch_distance_ = metric_->batch_distance();\n\n  search_distance_ = add_distance_;\n  search_batch_distance_ = add_batch_distance_;\n\n  if (metric_->query_metric() && metric_->query_metric()->distance() &&\n      metric_->query_metric()->batch_distance()) {\n    search_distance_ = metric_->query_metric()->distance();\n    search_batch_distance_ = metric_->query_metric()->batch_distance();\n  }\n\n  state_ = STATE_OPENED;\n  magic_ = IndexContext::GenerateMagic();\n\n  return 0;\n}\n\nint HnswStreamer::close(void) {\n  LOG_INFO(\"HnswStreamer close\");\n\n  stats_.clear();\n  meta_.set_metric(metric_->name(), 0, metric_->params());\n  entity_.set_index_meta(meta_);\n  int ret = entity_.close();\n  if (ret != 0) {\n    return ret;\n  }\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswStreamer::flush(uint64_t checkpoint) {\n  LOG_INFO(\"HnswStreamer flush checkpoint=%zu\", (size_t)checkpoint);\n\n  meta_.set_metric(metric_->name(), 0, metric_->params());\n  entity_.set_index_meta(meta_);\n  return entity_.flush(checkpoint);\n}\n\nint HnswStreamer::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"HnswStreamer dump\");\n\n  shared_mutex_.lock();\n  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });\n\n  meta_.set_searcher(\"HnswSearcher\", HnswEntity::kRevision, ailego::Params());\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n  return entity_.dump(dumper);\n}\n\nIndexStreamer::Context::Pointer HnswStreamer::create_context(void) const {\n  if (ailego_unlikely(state_ != STATE_OPENED)) {\n    LOG_ERROR(\"Create context failed, open storage first!\");\n    return Context::Pointer();\n  }\n\n  HnswEntity::Pointer entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"CreateContext clone init failed\");\n    return Context::Pointer();\n  }\n  HnswContext *ctx =\n      new (std::nothrow) HnswContext(meta_.dimension(), metric_, entity);\n  if (ailego_unlikely(ctx == nullptr)) {\n    LOG_ERROR(\"Failed to new HnswContext\");\n    return Context::Pointer();\n  }\n  ctx->set_ef(ef_);\n  ctx->set_max_scan_limit(max_scan_limit_);\n  ctx->set_min_scan_limit(min_scan_limit_);\n  ctx->set_max_scan_ratio(max_scan_ratio_);\n  ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter\n                                   : VisitFilter::ByteMap);\n  ctx->set_filter_negative_probability(bf_negative_prob_);\n  ctx->set_magic(magic_);\n  ctx->set_force_padding_topk(force_padding_topk_enabled_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n\n  if (ailego_unlikely(ctx->init(HnswContext::kStreamerContext)) != 0) {\n    LOG_ERROR(\"Init HnswContext failed\");\n    delete ctx;\n    return Context::Pointer();\n  }\n  uint32_t estimate_doc_count = 0;\n  if (meta_.streamer_params().get(PARAM_HNSW_STREAMER_ESTIMATE_DOC_COUNT,\n                                  &estimate_doc_count)) {\n    LOG_DEBUG(\"HnswStreamer doc_count[%zu] estimate[%zu]\",\n              (size_t)entity_.doc_cnt(), (size_t)estimate_doc_count);\n  }\n  ctx->check_need_adjuct_ctx(std::max(entity_.doc_cnt(), estimate_doc_count));\n\n  return Context::Pointer(ctx);\n}\n\nIndexProvider::Pointer HnswStreamer::create_provider(void) const {\n  LOG_DEBUG(\"HnswStreamer create provider\");\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone HnswEntity failed\");\n    return nullptr;\n  }\n  return Provider::Pointer(\n      new HnswIndexProvider(meta_, entity, \"HnswStreamer\"));\n}\n\nint HnswStreamer::update_context(HnswContext *ctx) const {\n  const HnswEntity::Pointer entity = entity_.clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone search context entity\");\n    return IndexError_Runtime;\n  }\n  ctx->set_max_scan_limit(max_scan_limit_);\n  ctx->set_min_scan_limit(min_scan_limit_);\n  ctx->set_max_scan_ratio(max_scan_ratio_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n  return ctx->update_context(HnswContext::kStreamerContext, meta_, metric_,\n                             entity, magic_);\n}\n\n//! Add a vector with id into index\nint HnswStreamer::add_with_id_impl(uint32_t id, const void *query,\n                                   const IndexQueryMeta &qmeta,\n                                   IndexStreamer::Context::Pointer &context) {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {\n    if (entity_.doc_cnt() >= docs_hard_limit_) {\n      LOG_ERROR(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n                PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT.c_str());\n      const std::lock_guard<std::mutex> lk(mutex_);\n      (*stats_.mutable_discarded_count())++;\n      return IndexError_IndexFull;\n    } else {\n      LOG_WARN(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n               PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    }\n  }\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);\n  ctx->reset_query(query);\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n\n  if (metric_->support_train()) {\n    const std::lock_guard<std::mutex> lk(mutex_);\n    ret = metric_->train(query, meta_.dimension());\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw streamer metric train failed\");\n      (*stats_.mutable_discarded_count())++;\n      return ret;\n    }\n  }\n\n  level_t level = alg_->get_random_level();\n  ret = entity_.add_vector_with_id(level, id, query);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw streamer add vector failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  ret = alg_->add_node(id, level, ctx);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw steamer add node failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Runtime;\n  }\n  (*stats_.mutable_added_count())++;\n\n  return 0;\n}\n\n//! Add a vector into index\nint HnswStreamer::add_impl(uint64_t pkey, const void *query,\n                           const IndexQueryMeta &qmeta,\n                           IndexStreamer::Context::Pointer &context) {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {\n    if (entity_.doc_cnt() >= docs_hard_limit_) {\n      LOG_ERROR(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n                PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT.c_str());\n      const std::lock_guard<std::mutex> lk(mutex_);\n      (*stats_.mutable_discarded_count())++;\n      return IndexError_IndexFull;\n    } else {\n      LOG_WARN(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n               PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    }\n  }\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);\n  ctx->reset_query(query);\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n\n  if (metric_->support_train()) {\n    const std::lock_guard<std::mutex> lk(mutex_);\n    ret = metric_->train(query, meta_.dimension());\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw streamer metric train failed\");\n      (*stats_.mutable_discarded_count())++;\n      return ret;\n    }\n  }\n\n  level_t level = alg_->get_random_level();\n  node_id_t id;\n  ret = entity_.add_vector(level, pkey, query, &id);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw streamer add vector failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  ret = alg_->add_node(id, level, ctx);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw steamer add node failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Runtime;\n  }\n  (*stats_.mutable_added_count())++;\n\n  return 0;\n}\n\n\nint HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                              IndexStreamer::Context::Pointer &context) const {\n  return search_impl(query, qmeta, 1, context);\n}\n\n//! Similarity search\nint HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                              uint32_t count,\n                              IndexStreamer::Context::Pointer &context) const {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n\n  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {\n    return search_bf_impl(query, qmeta, count, context);\n  }\n\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);\n  ctx->resize_results(count);\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n  for (size_t q = 0; q < count; ++q) {\n    ctx->reset_query(query);\n    ret = alg_->search(ctx);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher fast search failed\");\n      return ret;\n    }\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nvoid HnswStreamer::print_debug_info() {\n  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n    if (entity_.get_key(id) == kInvalidKey) {\n      continue;\n    }\n    Neighbors neighbours = entity_.get_neighbors(0, id);\n    std::cout << \"node: \" << id << \"; \";\n    if (neighbours.size() == 0) std::cout << std::endl;\n    for (uint32_t i = 0; i < neighbours.size(); ++i) {\n      std::cout << neighbours[i];\n\n      if (i == neighbours.size() - 1) {\n        std::cout << std::endl;\n      } else {\n        std::cout << \", \";\n      }\n    }\n  }\n\n  // entity_.print_key_map();\n}\n\nint HnswStreamer::search_bf_impl(\n    const void *query, const IndexQueryMeta &qmeta,\n    IndexStreamer::Context::Pointer &context) const {\n  return search_bf_impl(query, qmeta, 1, context);\n}\n\nint HnswStreamer::search_bf_impl(\n    const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n    IndexStreamer::Context::Pointer &context) const {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().batch_dist(id);\n\n          std::string group_id = group_by(id);\n\n          auto &topk_heap = ctx->group_topk_heaps()[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    auto &filter = ctx->filter();\n    auto &topk = ctx->topk_heap();\n\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      topk.clear();\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n\n        if (!filter.is_valid() || !filter(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().batch_dist(id);\n          topk.emplace(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswStreamer::search_bf_by_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  if (ailego_unlikely(p_keys.size() != count)) {\n    LOG_ERROR(\"The size of p_keys is not equal to count\");\n    return IndexError_InvalidArgument;\n  }\n\n  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().batch_dist(id);\n            std::string group_id = group_by(id);\n\n            auto &topk_heap = ctx->group_topk_heaps()[group_id];\n            if (topk_heap.empty()) {\n              topk_heap.limit(ctx->group_topk());\n            }\n            topk_heap.emplace_back(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    auto &filter = ctx->filter();\n    auto &topk = ctx->topk_heap();\n\n    for (size_t q = 0; q < count; ++q) {\n      ctx->reset_query(query);\n      topk.clear();\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        key_t pk = p_keys[q][idx];\n        if (!filter.is_valid() || !filter(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().batch_dist(id);\n            topk.emplace(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\n\nINDEX_FACTORY_REGISTER_STREAMER(HnswStreamer);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_streamer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <ailego/parallel/lock.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"hnsw_algorithm.h\"\n#include \"hnsw_streamer_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswStreamer : public IndexStreamer {\n public:\n  using ContextPointer = IndexStreamer::Context::Pointer;\n\n  HnswStreamer(void);\n  virtual ~HnswStreamer(void);\n\n  HnswStreamer(const HnswStreamer &streamer) = delete;\n  HnswStreamer &operator=(const HnswStreamer &streamer) = delete;\n\n protected:\n  //! Initialize Streamer\n  virtual int init(const IndexMeta &imeta,\n                   const ailego::Params &params) override;\n\n  //! Cleanup Streamer\n  virtual int cleanup(void) override;\n\n  //! Create a context\n  virtual Context::Pointer create_context(void) const override;\n\n  //! Create a new iterator\n  virtual IndexProvider::Pointer create_provider(void) const override;\n\n  //! Add a vector into index\n  virtual int add_impl(uint64_t pkey, const void *query,\n                       const IndexQueryMeta &qmeta,\n                       Context::Pointer &context) override;\n\n  //! Add a vector with id into index\n  virtual int add_with_id_impl(uint32_t id, const void *query,\n                               const IndexQueryMeta &qmeta,\n                               Context::Pointer &context) override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          Context::Pointer &context) const override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          uint32_t count,\n                          Context::Pointer &context) const override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             Context::Pointer &context) const override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count,\n                             Context::Pointer &context) const override;\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, ContextPointer &context) const override {\n    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, uint32_t count,\n      ContextPointer &context) const override;\n\n  //! Fetch vector by key\n  virtual const void *get_vector(uint64_t key) const override {\n    return entity_.get_vector_by_key(key);\n  }\n\n  virtual int get_vector(const uint64_t key,\n                         IndexStorage::MemoryBlock &block) const override {\n    return entity_.get_vector_by_key(key, block);\n  }\n\n  //! Fetch vector by id\n  virtual const void *get_vector_by_id(uint32_t id) const override {\n    return entity_.get_vector(id);\n  }\n\n  virtual int get_vector_by_id(\n      const uint32_t id, IndexStorage::MemoryBlock &block) const override {\n    return entity_.get_vector(id, block);\n  }\n\n  //! Open index from file path\n  virtual int open(IndexStorage::Pointer stg) override;\n\n  //! Close file\n  virtual int close(void) override;\n\n  //! flush file\n  virtual int flush(uint64_t checkpoint) override;\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  virtual void print_debug_info() override;\n\n private:\n  inline int check_params(const void *query,\n                          const IndexQueryMeta &qmeta) const {\n    if (ailego_unlikely(!query)) {\n      LOG_ERROR(\"null query\");\n      return IndexError_InvalidArgument;\n    }\n    if (ailego_unlikely(qmeta.dimension() != meta_.dimension() ||\n                        qmeta.data_type() != meta_.data_type() ||\n                        qmeta.element_size() != meta_.element_size())) {\n      LOG_ERROR(\"Unsupported query meta\");\n      return IndexError_Mismatch;\n    }\n    return 0;\n  }\n\n  inline int check_sparse_count_is_zero(const uint32_t *sparse_count,\n                                        uint32_t count) const {\n    for (uint32_t i = 0; i < count; ++i) {\n      if (sparse_count[i] != 0)\n        LOG_ERROR(\"Sparse cout is not empty. Index: %u, Sparse Count: %u\", i,\n                  sparse_count[i]);\n      return IndexError_InvalidArgument;\n    }\n\n    return 0;\n  }\n\n private:\n  //! To share ctx across streamer/searcher, we need to update the context for\n  //! current streamer/searcher\n  int update_context(HnswContext *ctx) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };\n  class Stats : public IndexStreamer::Stats {\n   public:\n    void clear(void) {\n      set_revision_id(0u);\n      set_loaded_count(0u);\n      set_added_count(0u);\n      set_discarded_count(0u);\n      set_index_size(0u);\n      set_dumped_size(0u);\n      set_check_point(0u);\n      set_create_time(0u);\n      set_update_time(0u);\n      clear_attributes();\n    }\n  };\n\n  HnswStreamerEntity entity_;\n  HnswAlgorithm::UPointer alg_;\n  IndexMeta meta_{};\n  IndexMetric::Pointer metric_{};\n\n  IndexMetric::MatrixDistance add_distance_{};\n  IndexMetric::MatrixDistance search_distance_{};\n\n  IndexMetric::MatrixBatchDistance add_batch_distance_{};\n  IndexMetric::MatrixBatchDistance search_batch_distance_{};\n\n  Stats stats_{};\n  std::mutex mutex_{};\n\n  size_t max_index_size_{0UL};\n  size_t chunk_size_{HnswEntity::kDefaultChunkSize};\n  size_t docs_hard_limit_{HnswEntity::kDefaultDocsHardLimit};\n  size_t docs_soft_limit_{0UL};\n  uint32_t min_neighbor_cnt_{0u};\n  uint32_t upper_max_neighbor_cnt_{HnswEntity::kDefaultUpperMaxNeighborCnt};\n  uint32_t l0_max_neighbor_cnt_{HnswEntity::kDefaultL0MaxNeighborCnt};\n  uint32_t ef_{HnswEntity::kDefaultEf};\n  uint32_t ef_construction_{HnswEntity::kDefaultEfConstruction};\n  uint32_t scaling_factor_{HnswEntity::kDefaultScalingFactor};\n  size_t bruteforce_threshold_{HnswEntity::kDefaultBruteForceThreshold};\n  size_t max_scan_limit_{HnswEntity::kDefaultMaxScanLimit};\n  size_t min_scan_limit_{HnswEntity::kDefaultMinScanLimit};\n  float bf_negative_prob_{HnswEntity::kDefaultBFNegativeProbability};\n  float max_scan_ratio_{HnswEntity::kDefaultScanRatio};\n\n  uint32_t magic_{0U};\n  State state_{STATE_INIT};\n  bool bf_enabled_{false};\n  bool check_crc_enabled_{false};\n  bool filter_same_key_{false};\n  bool get_vector_enabled_{false};\n  bool force_padding_topk_enabled_{false};\n  bool use_id_map_{true};\n\n  //! avoid add vector while dumping index\n  ailego::SharedMutex shared_mutex_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_streamer_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"hnsw_streamer_entity.h\"\n#include <ailego/utility/memory_helper.h>\n\n// #define DEBUG_PRINT\n\nnamespace zvec {\nnamespace core {\n\nHnswStreamerEntity::HnswStreamerEntity(IndexStreamer::Stats &stats)\n    : stats_(stats) {}\n\nHnswStreamerEntity::~HnswStreamerEntity() {}\n\nint HnswStreamerEntity::init(size_t max_doc_cnt) {\n  if (std::pow(scaling_factor(), kMaxGraphLayers) < max_doc_cnt) {\n    LOG_ERROR(\"scalingFactor=%zu is too small\", scaling_factor());\n    return IndexError_InvalidArgument;\n  }\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  broker_ = std::make_shared<ChunkBroker>(stats_);\n  upper_neighbor_index_ = std::make_shared<NIHashMap>();\n  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();\n  keys_map_ = std::make_shared<HashMap<key_t, node_id_t>>();\n  if (!keys_map_ || !upper_neighbor_index_ || !broker_ || !keys_map_lock_) {\n    LOG_ERROR(\"HnswStreamerEntity new object failed\");\n    return IndexError_NoMemory;\n  }\n  keys_map_->set_empty_key(kInvalidKey);\n\n  neighbor_size_ = neighbors_size();\n  upper_neighbor_size_ = upper_neighbors_size();\n\n  //! vector + key + level 0 neighbors\n  size_t size = vector_size() + sizeof(key_t) + neighbor_size_;\n\n  size = AlignSize(size);\n  set_node_size(size);\n  return 0;\n}\n\nint HnswStreamerEntity::cleanup() {\n  std::lock_guard<std::mutex> lock(mutex_);\n  mutable_header()->clear();\n  chunk_size_ = kDefaultChunkSize;\n  node_index_mask_bits_ = 0U;\n  node_index_mask_ = 0U;\n  node_cnt_per_chunk_ = 0U;\n  neighbor_size_ = 0U;\n  upper_neighbor_size_ = 0U;\n  if (upper_neighbor_index_) {\n    upper_neighbor_index_->cleanup();\n  }\n  if (keys_map_) {\n    keys_map_->clear();\n  }\n  node_chunks_.clear();\n  upper_neighbor_chunks_.clear();\n  filter_same_key_ = false;\n  get_vector_enabled_ = false;\n  broker_.reset();\n\n  return 0;\n}\n\nint HnswStreamerEntity::update_neighbors(\n    level_t level, node_id_t id,\n    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {\n  std::vector<char> buffer(neighbor_size_);\n  NeighborsHeader *hd = reinterpret_cast<NeighborsHeader *>(buffer.data());\n  hd->neighbor_cnt = neighbors.size();\n  size_t i = 0;\n  for (; i < neighbors.size(); ++i) {\n    hd->neighbors[i] = neighbors[i].first;\n  }\n\n  auto loc = get_neighbor_chunk_loc(level, id);\n  size_t size = reinterpret_cast<char *>(&hd->neighbors[i]) - &buffer[0];\n  size_t ret = loc.first->write(loc.second, hd, size);\n  if (ailego_unlikely(ret != size)) {\n    LOG_ERROR(\"Write neighbor header failed, ret=%zu\", ret);\n\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nconst Neighbors HnswStreamerEntity::get_neighbors(level_t level,\n                                                  node_id_t id) const {\n  Chunk *chunk = nullptr;\n  size_t offset = 0UL;\n  size_t neighbor_size = neighbor_size_;\n  if (level == 0UL) {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    offset =\n        (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);\n\n    sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n    ailego_assert_with(chunk_idx < node_chunks_.size(), \"invalid chunk idx\");\n    chunk = node_chunks_[chunk_idx].get();\n  } else {\n    auto p = get_upper_neighbor_chunk_loc(level, id);\n    chunk = upper_neighbor_chunks_[p.first].get();\n    offset = p.second;\n    neighbor_size = upper_neighbor_size_;\n  }\n\n  ailego_assert_with(offset < chunk->data_size(), \"invalid chunk offset\");\n  IndexStorage::MemoryBlock neighbor_block;\n  size_t size = chunk->read(offset, neighbor_block, neighbor_size);\n  if (ailego_unlikely(size != neighbor_size)) {\n    LOG_ERROR(\"Read neighbor header failed, ret=%zu\", size);\n    return Neighbors();\n  }\n  return Neighbors(neighbor_block);\n}\n\n//! Get vector data by key\nconst void *HnswStreamerEntity::get_vector(node_id_t id) const {\n  auto loc = get_vector_chunk_loc(id);\n  const void *vec = nullptr;\n  ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t read_size = vector_size();\n\n  size_t ret = node_chunks_[loc.first]->read(loc.second, &vec, read_size);\n  if (ailego_unlikely(ret != read_size)) {\n    LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n              loc.second, read_size, ret);\n  }\n\n  return vec;\n}\n\nint HnswStreamerEntity::get_vector(const node_id_t *ids, uint32_t count,\n                                   const void **vecs) const {\n  for (auto i = 0U; i < count; ++i) {\n    auto loc = get_vector_chunk_loc(ids[i]);\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n\n    size_t read_size = vector_size();\n\n    size_t ret = node_chunks_[loc.first]->read(loc.second, &vecs[i], read_size);\n    if (ailego_unlikely(ret != read_size)) {\n      LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n                loc.second, read_size, ret);\n      return IndexError_ReadData;\n    }\n  }\n  return 0;\n}\n\nint HnswStreamerEntity::get_vector(const node_id_t id,\n                                   IndexStorage::MemoryBlock &block) const {\n  auto loc = get_vector_chunk_loc(id);\n  ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t read_size = vector_size();\n\n  size_t ret = node_chunks_[loc.first]->read(loc.second, block, read_size);\n  if (ailego_unlikely(ret != read_size)) {\n    LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n              loc.second, read_size, ret);\n    return IndexError_ReadData;\n  }\n  return 0;\n}\n\nint HnswStreamerEntity::get_vector(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {\n  vec_blocks.resize(count);\n  for (auto i = 0U; i < count; ++i) {\n    auto loc = get_vector_chunk_loc(ids[i]);\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n\n    size_t read_size = vector_size();\n\n    size_t ret =\n        node_chunks_[loc.first]->read(loc.second, vec_blocks[i], read_size);\n    if (ailego_unlikely(ret != read_size)) {\n      LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n                loc.second, read_size, ret);\n      return IndexError_ReadData;\n    }\n  }\n  return 0;\n}\n\nkey_t HnswStreamerEntity::get_key(node_id_t id) const {\n  if (use_key_info_map_) {\n    auto loc = get_key_chunk_loc(id);\n    IndexStorage::MemoryBlock key_block;\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n    size_t ret =\n        node_chunks_[loc.first]->read(loc.second, key_block, sizeof(key_t));\n    if (ailego_unlikely(ret != sizeof(key_t))) {\n      LOG_ERROR(\"Read vector failed, ret=%zu\", ret);\n      return kInvalidKey;\n    }\n\n    return *reinterpret_cast<const key_t *>(key_block.data());\n  } else {\n    return id;\n  }\n}\n\nvoid HnswStreamerEntity::add_neighbor(level_t level, node_id_t id,\n                                      uint32_t size, node_id_t neighbor_id) {\n  auto loc = get_neighbor_chunk_loc(level, id);\n  size_t offset =\n      loc.second + sizeof(NeighborsHeader) + size * sizeof(node_id_t);\n  ailego_assert_with(size < neighbor_cnt(level), \"invalid neighbor size\");\n  ailego_assert_with(offset < loc.first->data_size(), \"invalid chunk offset\");\n  size_t ret = loc.first->write(offset, &neighbor_id, sizeof(node_id_t));\n  if (ailego_unlikely(ret != sizeof(node_id_t))) {\n    LOG_ERROR(\"Write neighbor id failed, ret=%zu\", ret);\n    return;\n  }\n\n  uint32_t neighbors = size + 1;\n  ret = loc.first->write(loc.second, &neighbors, sizeof(uint32_t));\n  if (ailego_unlikely(ret != sizeof(uint32_t))) {\n    LOG_ERROR(\"Write neighbor cnt failed, ret=%zu\", ret);\n  }\n\n  return;\n}\n\nint HnswStreamerEntity::init_chunks(const Chunk::Pointer &header_chunk) {\n  if (header_chunk->data_size() < header_size()) {\n    LOG_ERROR(\"Invalid header chunk size\");\n    return IndexError_InvalidFormat;\n  }\n  IndexStorage::MemoryBlock header_block;\n  size_t size = header_chunk->read(0UL, header_block, header_size());\n  if (ailego_unlikely(size != header_size())) {\n    LOG_ERROR(\"Read header chunk failed\");\n    return IndexError_ReadData;\n  }\n  *mutable_header() =\n      *reinterpret_cast<const HNSWHeader *>(header_block.data());\n\n  int ret = check_hnsw_index(&header());\n  if (ret != 0) {\n    broker_->close();\n    return ret;\n  }\n\n  node_chunks_.resize(broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_NODE));\n  for (auto seq = 0UL; seq < node_chunks_.size(); ++seq) {\n    node_chunks_[seq] = broker_->get_chunk(ChunkBroker::CHUNK_TYPE_NODE, seq);\n    if (!node_chunks_[seq]) {\n      LOG_ERROR(\"Missing hnsw streamer data chunk %zu th of %zu\", seq,\n                node_chunks_.size());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  upper_neighbor_chunks_.resize(\n      broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR));\n  for (auto seq = 0UL; seq < upper_neighbor_chunks_.size(); ++seq) {\n    upper_neighbor_chunks_[seq] =\n        broker_->get_chunk(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, seq);\n    if (!upper_neighbor_chunks_[seq]) {\n      LOG_ERROR(\"Missing hnsw streamer index chunk %zu th of %zu\", seq,\n                upper_neighbor_chunks_.size());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  return 0;\n}\n\nint HnswStreamerEntity::open(IndexStorage::Pointer stg, uint64_t max_index_size,\n                             bool check_crc) {\n  std::lock_guard<std::mutex> lock(mutex_);\n  bool huge_page = stg->isHugePage();\n  LOG_DEBUG(\"huge_page: %d\", (int)huge_page);\n  int ret = init_chunk_params(max_index_size, huge_page);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"init_chunk_params failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  ret = broker_->open(std::move(stg), max_index_size_, chunk_size_, check_crc);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Open index failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  ret = upper_neighbor_index_->init(broker_, upper_neighbor_chunk_size_,\n                                    scaling_factor(), estimate_doc_capacity(),\n                                    kUpperHashMemoryInflateRatio);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Init neighbor hash map failed\");\n    return ret;\n  }\n\n  //! init header\n  auto header_chunk = broker_->get_chunk(ChunkBroker::CHUNK_TYPE_HEADER,\n                                         ChunkBroker::kDefaultChunkSeqId);\n  if (!header_chunk) {  // open empty index, create one\n    auto p =\n        broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_HEADER,\n                             ChunkBroker::kDefaultChunkSeqId, header_size());\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc header chunk failed\");\n      return p.first;\n    }\n    size_t size = p.second->write(0UL, &header(), header_size());\n    if (ailego_unlikely(size != header_size())) {\n      LOG_ERROR(\"Write header chunk failed\");\n      return IndexError_WriteData;\n    }\n    return 0;\n  }\n\n  //! Open an exist hnsw index\n  ret = init_chunks(header_chunk);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  //! total docs including features wrote in index but neighbors may not ready\n  node_id_t total_vecs = 0;\n  if (node_chunks_.size() > 0) {\n    size_t last_idx = node_chunks_.size() - 1;\n    auto last_chunk = node_chunks_[last_idx];\n    if (last_chunk->data_size() % node_size()) {\n      LOG_WARN(\"The index may broken\");\n      return IndexError_InvalidFormat;\n    }\n    total_vecs = last_idx * node_cnt_per_chunk_ +\n                 node_chunks_[last_idx]->data_size() / node_size();\n  }\n\n  LOG_INFO(\n      \"Open index, l0NeighborCnt=%zu upperNeighborCnt=%zu \"\n      \"efConstruction=%zu curDocCnt=%u totalVecs=%u maxLevel=%u\",\n      l0_neighbor_cnt(), upper_neighbor_cnt(), ef_construction(), doc_cnt(),\n      total_vecs, cur_max_level());\n  //! try to correct the docCnt if index not fully flushed\n  if (doc_cnt() != total_vecs) {\n    LOG_WARN(\"Index closed abnormally, using totalVecs as curDocCnt\");\n    *mutable_doc_cnt() = total_vecs;\n  }\n  if (filter_same_key_ || get_vector_enabled_) {\n    if (use_key_info_map_) {\n      for (node_id_t id = 0U; id < doc_cnt(); ++id) {\n        if (get_key(id) == kInvalidKey) {\n          continue;\n        }\n        (*keys_map_)[get_key(id)] = id;\n      }\n    }\n  }\n\n  stats_.set_loaded_count(doc_cnt());\n\n  return 0;\n}\n\nint HnswStreamerEntity::close() {\n  LOG_DEBUG(\"close index\");\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  flush_header();\n  mutable_header()->reset();\n  upper_neighbor_index_->cleanup();\n  keys_map_->clear();\n  header_.clear();\n  node_chunks_.clear();\n  upper_neighbor_chunks_.clear();\n\n  return broker_->close();\n}\n\nint HnswStreamerEntity::flush(uint64_t checkpoint) {\n  LOG_INFO(\"Flush index, curDocs=%u\", doc_cnt());\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  flush_header();\n  int ret = broker_->flush(checkpoint);\n  if (ret != 0) {\n    return ret;\n  }\n\n  return 0;\n}\n\nint HnswStreamerEntity::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"Dump index, curDocs=%u\", doc_cnt());\n\n  //! sort by keys, to support get_vector by key in searcher\n  std::vector<key_t> keys(doc_cnt());\n  for (node_id_t i = 0; i < doc_cnt(); ++i) {\n    keys[i] = get_key(i);\n  }\n\n  //! dump neighbors\n  auto get_level = [&](node_id_t id) {\n    auto it = upper_neighbor_index_->find(id);\n    if (it == upper_neighbor_index_->end()) {\n      return 0U;\n    };\n    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);\n    return meta->level;\n  };\n  auto ret = dump_segments(dumper, keys.data(), get_level);\n  if (ailego_unlikely(ret < 0)) {\n    return ret;\n  }\n  *stats_.mutable_dumped_size() += ret;\n\n  return 0;\n}\n\nint HnswStreamerEntity::check_hnsw_index(const HNSWHeader *hd) const {\n  if (l0_neighbor_cnt() != hd->l0_neighbor_cnt() ||\n      upper_neighbor_cnt() != hd->upper_neighbor_cnt()) {\n    LOG_ERROR(\"Param neighbor cnt: %zu:%zu mismatch index previous %zu:%zu\",\n              l0_neighbor_cnt(), upper_neighbor_cnt(), hd->l0_neighbor_cnt(),\n              hd->upper_neighbor_cnt());\n    return IndexError_Mismatch;\n  }\n  if (vector_size() != hd->vector_size()) {\n    LOG_ERROR(\"vector size %zu mismatch index previous %zu\", vector_size(),\n              hd->vector_size());\n    return IndexError_Mismatch;\n  }\n  if (ef_construction() != hd->ef_construction()) {\n    LOG_WARN(\"Param efConstruction %zu mismatch index previous %zu\",\n             ef_construction(), hd->ef_construction());\n  }\n  if (scaling_factor() != hd->scaling_factor()) {\n    LOG_WARN(\"Param scalingFactor %zu mismatch index previous %zu\",\n             scaling_factor(), hd->scaling_factor());\n    return IndexError_Mismatch;\n  }\n  if (prune_cnt() != hd->neighbor_prune_cnt()) {\n    LOG_WARN(\"Param pruneCnt %zu mismatch index previous %zu\", prune_cnt(),\n             hd->neighbor_prune_cnt());\n    return IndexError_Mismatch;\n  }\n  if ((hd->entry_point() != kInvalidNodeId &&\n       hd->entry_point() >= hd->doc_cnt()) ||\n      (hd->entry_point() == kInvalidNodeId && hd->doc_cnt() > 0U)) {\n    LOG_WARN(\"Invalid entryPoint %u, docCnt %u\", hd->entry_point(),\n             hd->doc_cnt());\n    return IndexError_InvalidFormat;\n  }\n  if (hd->entry_point() == kInvalidNodeId &&\n      broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_NODE) > 0) {\n    LOG_WARN(\"The index is broken, maybe it haven't flush\");\n    return IndexError_InvalidFormat;\n  }\n\n  return 0;\n}\n\nint HnswStreamerEntity::add_vector(level_t level, key_t key, const void *vec,\n                                   node_id_t *id) {\n  Chunk::Pointer node_chunk;\n  size_t chunk_offset = -1UL;\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  // duplicate check\n  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {\n    LOG_WARN(\"Try to add duplicate key, ignore it\");\n    return IndexError_Duplicate;\n  }\n\n  node_id_t local_id = static_cast<node_id_t>(doc_cnt());\n  uint32_t chunk_index = node_chunks_.size() - 1U;\n  if (chunk_index == -1U ||\n      (node_chunks_[chunk_index]->data_size() >=\n       node_cnt_per_chunk_ * node_size())) {  // no space left and need to alloc\n    if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      return IndexError_IndexFull;\n    }\n    chunk_index++;\n    auto p = broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_NODE, chunk_index,\n                                  chunk_size_);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return p.first;\n    }\n    node_chunk = p.second;\n    chunk_offset = 0UL;\n    node_chunks_.emplace_back(node_chunk);\n  } else {\n    node_chunk = node_chunks_[chunk_index];\n    chunk_offset = node_chunk->data_size();\n  }\n\n  size_t size = node_chunk->write(chunk_offset, vec, vector_size());\n  if (ailego_unlikely(size != vector_size())) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));\n  if (ailego_unlikely(size != sizeof(key_t))) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n  //! level 0 neighbors is inited to zero by default\n\n  int ret = add_upper_neighbor(level, local_id);\n  if (ret != 0) {\n    return ret;\n  }\n\n  chunk_offset += node_size();\n  if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n    LOG_ERROR(\"Chunk resize to %zu failed\", chunk_offset);\n    return IndexError_Runtime;\n  }\n  if (filter_same_key_ || get_vector_enabled_) {\n    if (use_key_info_map_) {\n      keys_map_lock_->lock();\n      (*keys_map_)[key] = local_id;\n      keys_map_lock_->unlock();\n    }\n  }\n\n  *mutable_doc_cnt() += 1;\n  broker_->mark_dirty();\n  *id = local_id;\n\n  return 0;\n}\n\nint HnswStreamerEntity::add_vector_with_id(level_t level, node_id_t id,\n                                           const void *vec) {\n  Chunk::Pointer node_chunk;\n  size_t chunk_offset = -1UL;\n  key_t key = id;\n\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  // duplicate check\n  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {\n    LOG_WARN(\"Try to add duplicate key, ignore it\");\n    return IndexError_Duplicate;\n  }\n\n  // set node_chunk & chunk_offset if succeed\n  auto func_get_node_chunk_and_offset = [&](node_id_t node_id) -> int {\n    uint32_t chunk_index = node_id >> node_index_mask_bits_;\n    ailego_assert_with(chunk_index <= node_chunks_.size(), \"invalid chunk idx\");\n    // belongs to next chunk\n    if (chunk_index == node_chunks_.size()) {\n      if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {\n        LOG_ERROR(\"add vector failed for no memory quota\");\n        return IndexError_IndexFull;\n      }\n      auto p = broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_NODE, chunk_index,\n                                    chunk_size_);\n      if (ailego_unlikely(p.first != 0)) {\n        LOG_ERROR(\"Alloc data chunk failed\");\n        return p.first;\n      }\n      node_chunk = p.second;\n      node_chunks_.emplace_back(node_chunk);\n    }\n\n    node_chunk = node_chunks_[chunk_index];\n    chunk_offset = (node_id & node_index_mask_) * node_size();\n    return 0;\n  };\n\n  for (size_t start_id = doc_cnt(); start_id < id; ++start_id) {\n    if (auto ret = func_get_node_chunk_and_offset(start_id); ret != 0) {\n      LOG_ERROR(\"func_get_node_chunk_and_offset failed\");\n      return ret;\n    }\n    size_t size = node_chunk->write(chunk_offset + vector_size(), &kInvalidKey,\n                                    sizeof(key_t));\n    if (ailego_unlikely(size != sizeof(key_t))) {\n      LOG_ERROR(\"Chunk write key failed, ret=%zu\", size);\n      return IndexError_WriteData;\n    }\n\n    chunk_offset += node_size();\n    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"Chunk resize to %zu failed\", chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  if (auto ret = func_get_node_chunk_and_offset(id); ret != 0) {\n    LOG_ERROR(\"func_get_node_chunk_and_offset failed\");\n    return ret;\n  }\n\n  size_t size = node_chunk->write(chunk_offset, vec, vector_size());\n  if (ailego_unlikely(size != vector_size())) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));\n  if (ailego_unlikely(size != sizeof(key_t))) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n  //! level 0 neighbors is inited to zero by default\n\n  int ret = add_upper_neighbor(level, id);\n  if (ret != 0) {\n    return ret;\n  }\n\n  if (*mutable_doc_cnt() <= id) {\n    *mutable_doc_cnt() = id + 1;\n    chunk_offset += node_size();\n    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"Chunk resize to %zu failed\", chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  if (filter_same_key_ || get_vector_enabled_) {\n    if (use_key_info_map_) {\n      keys_map_lock_->lock();\n      (*keys_map_)[key] = id;\n      keys_map_lock_->unlock();\n    }\n  }\n\n  broker_->mark_dirty();\n\n  return 0;\n}\n\nvoid HnswStreamerEntity::update_ep_and_level(node_id_t ep, level_t level) {\n  HnswEntity::update_ep_and_level(ep, level);\n  flush_header();\n\n  return;\n}\n\nconst HnswEntity::Pointer HnswStreamerEntity::clone() const {\n  std::vector<Chunk::Pointer> node_chunks;\n  node_chunks.reserve(node_chunks_.size());\n  for (size_t i = 0UL; i < node_chunks_.size(); ++i) {\n    node_chunks.emplace_back(node_chunks_[i]->clone());\n    if (ailego_unlikely(!node_chunks[i])) {\n      LOG_ERROR(\"HnswStreamerEntity get chunk failed in clone\");\n      return HnswEntity::Pointer();\n    }\n  }\n\n  std::vector<Chunk::Pointer> upper_neighbor_chunks;\n  upper_neighbor_chunks.reserve(upper_neighbor_chunks_.size());\n  for (size_t i = 0UL; i < upper_neighbor_chunks_.size(); ++i) {\n    upper_neighbor_chunks.emplace_back(upper_neighbor_chunks_[i]->clone());\n    if (ailego_unlikely(!upper_neighbor_chunks[i])) {\n      LOG_ERROR(\"HnswStreamerEntity get chunk failed in clone\");\n      return HnswEntity::Pointer();\n    }\n  }\n\n  HnswStreamerEntity *entity = new (std::nothrow) HnswStreamerEntity(\n      stats_, header(), chunk_size_, node_index_mask_bits_,\n      upper_neighbor_mask_bits_, filter_same_key_, get_vector_enabled_,\n      upper_neighbor_index_, keys_map_lock_, keys_map_, use_key_info_map_,\n      std::move(node_chunks), std::move(upper_neighbor_chunks), broker_);\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"HnswStreamerEntity new failed\");\n  }\n  return HnswEntity::Pointer(entity);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw/hnsw_streamer_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <iostream>\n#include <ailego/parallel/lock.h>\n#include <sparsehash/dense_hash_map>\n#include <sparsehash/dense_hash_set>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"hnsw_chunk.h\"\n#include \"hnsw_entity.h\"\n#include \"hnsw_index_hash.h\"\n#include \"hnsw_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! HnswStreamerEntity manage vector data, pkey, and node's neighbors\nclass HnswStreamerEntity : public HnswEntity {\n public:\n  //! Cleanup\n  //! return 0 on success, or errCode in failure\n  virtual int cleanup() override;\n\n  //! Make a copy of streamer entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswEntity::Pointer clone() const override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector(node_id_t id) const override;\n\n  //! Get vectors feature data by local ids\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const override;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const override;\n\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;\n\n  //! Get the node id's neighbors on graph level\n  //! Note: the neighbors cannot be modified, using the following\n  //! method to get WritableNeighbors if want to\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  //! Add vector and key to hnsw entity, and local id will be saved in id\n  virtual int add_vector(level_t level, key_t key, const void *vec,\n                         node_id_t *id) override;\n\n  //! Add vector and id to hnsw entity\n  virtual int add_vector_with_id(level_t level, node_id_t id,\n                                 const void *vec) override;\n\n  virtual int update_neighbors(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;\n\n  //! Append neighbor_id to node id neighbors on level\n  //! Notice: the caller must be ensure the neighbors not full\n  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,\n                            node_id_t neighbor_id) override;\n\n  //! Dump index by dumper\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  virtual void update_ep_and_level(node_id_t ep, level_t level) override;\n\n  void set_use_key_info_map(bool use_id_map) {\n    use_key_info_map_ = use_id_map;\n    LOG_DEBUG(\"use_key_info_map_: %d\", (int)use_key_info_map_);\n  }\n\n public:\n  //! Constructor\n  HnswStreamerEntity(IndexStreamer::Stats &stats);\n\n  //! Destructor\n  ~HnswStreamerEntity();\n\n  //! Get vector feature data by key\n  virtual const void *get_vector_by_key(key_t key) const override {\n    auto id = get_id(key);\n    return id == kInvalidNodeId ? nullptr : get_vector(id);\n  }\n\n  virtual int get_vector_by_key(\n      const key_t key, IndexStorage::MemoryBlock &block) const override {\n    auto id = get_id(key);\n    if (id != kInvalidNodeId) {\n      return get_vector(id, block);\n    } else {\n      return IndexError_InvalidArgument;\n    }\n  }\n\n  //! Init entity\n  int init(size_t max_doc_cnt);\n\n  //! Flush graph entity to disk\n  //! return 0 on success, or errCode in failure\n  int flush(uint64_t checkpoint);\n\n  //! Open entity from storage\n  //! return 0 on success, or errCode in failure\n  int open(IndexStorage::Pointer stg, uint64_t max_index_size, bool check_crc);\n\n  //! Close entity\n  //! return 0 on success, or errCode in failure\n  int close();\n\n  //! Set meta information from entity\n  int set_index_meta(const IndexMeta &meta) const {\n    return IndexHelper::SerializeToStorage(meta, broker_->storage().get());\n  }\n\n  //! Get meta information from entity\n  int get_index_meta(IndexMeta *meta) const {\n    return IndexHelper::DeserializeFromStorage(broker_->storage().get(), meta);\n  }\n\n  //! Set params: chunk size\n  inline void set_chunk_size(size_t val) {\n    chunk_size_ = val;\n  }\n\n  //! Set params\n  inline void set_filter_same_key(bool val) {\n    filter_same_key_ = val;\n  }\n\n  //! Set params\n  inline void set_get_vector(bool val) {\n    get_vector_enabled_ = val;\n  }\n\n  //! Get vector local id by key\n  inline node_id_t get_id(key_t key) const {\n    if (use_key_info_map_) {\n      keys_map_lock_->lock_shared();\n      auto it = keys_map_->find(key);\n      keys_map_lock_->unlock_shared();\n      return it == keys_map_->end() ? kInvalidNodeId : it->second;\n    } else {\n      return key;\n    }\n  }\n\n  void print_key_map() const {\n    std::cout << \"key map begins\" << std::endl;\n\n    auto iter = keys_map_->begin();\n    while (iter != keys_map_->end()) {\n      std::cout << \"key: \" << iter->first << \", id: \" << iter->second\n                << std::endl;\n      ;\n      iter++;\n    }\n\n    std::cout << \"key map ends\" << std::endl;\n  }\n\n  //! Get l0 neighbors size\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! Get neighbors size for level > 0\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n\n private:\n  union UpperNeighborIndexMeta {\n    struct {\n      uint32_t level : 4;\n      uint32_t index : 28;  // index is composite type: chunk idx, and the\n                            // N th neighbors in chunk, they two composite\n                            // the 28 bits location\n    };\n    uint32_t data;\n  };\n\n  template <class Key, class T>\n  using HashMap = google::dense_hash_map<Key, T, std::hash<Key>>;\n  template <class Key, class T>\n  using HashMapPointer = std::shared_ptr<HashMap<Key, T>>;\n\n  template <class Key>\n  using HashSet = google::dense_hash_set<Key, std::hash<Key>>;\n  template <class Key>\n  using HashSetPointer = std::shared_ptr<HashSet<Key>>;\n\n  //! upper neighbor index hashmap\n  using NIHashMap = HnswIndexHashMap<node_id_t, uint32_t>;\n  using NIHashMapPointer = std::shared_ptr<NIHashMap>;\n\n  //! Private construct, only be called by clone method\n  HnswStreamerEntity(IndexStreamer::Stats &stats, const HNSWHeader &hd,\n                     size_t chunk_size, uint32_t node_index_mask_bits,\n                     uint32_t upper_neighbor_mask_bits, bool filter_same_key,\n                     bool get_vector_enabled,\n                     const NIHashMapPointer &upper_neighbor_index,\n                     std::shared_ptr<ailego::SharedMutex> &keys_map_lock,\n                     const HashMapPointer<key_t, node_id_t> &keys_map,\n                     bool use_key_info_map,\n                     std::vector<Chunk::Pointer> &&node_chunks,\n                     std::vector<Chunk::Pointer> &&upper_neighbor_chunks,\n                     const ChunkBroker::Pointer &broker)\n      : stats_(stats),\n        chunk_size_(chunk_size),\n        node_index_mask_bits_(node_index_mask_bits),\n        node_cnt_per_chunk_(1UL << node_index_mask_bits_),\n        node_index_mask_(node_cnt_per_chunk_ - 1),\n        upper_neighbor_mask_bits_(upper_neighbor_mask_bits),\n        upper_neighbor_mask_((1U << upper_neighbor_mask_bits_) - 1),\n        filter_same_key_(filter_same_key),\n        get_vector_enabled_(get_vector_enabled),\n        use_key_info_map_(use_key_info_map),\n        upper_neighbor_index_(upper_neighbor_index),\n        keys_map_lock_(keys_map_lock),\n        keys_map_(keys_map),\n        node_chunks_(std::move(node_chunks)),\n        upper_neighbor_chunks_(std::move(upper_neighbor_chunks)),\n        broker_(broker) {\n    *mutable_header() = hd;\n\n    neighbor_size_ = neighbors_size();\n    upper_neighbor_size_ = upper_neighbors_size();\n  }\n\n  //! Called only in searching procedure per context, so no need to lock\n  void sync_chunks(ChunkBroker::CHUNK_TYPE type, size_t idx,\n                   std::vector<Chunk::Pointer> *chunks) const {\n    if (ailego_likely(idx < chunks->size())) {\n      return;\n    }\n    for (size_t i = chunks->size(); i <= idx; ++i) {\n      auto chunk = broker_->get_chunk(type, i);\n      // the storage can ensure get chunk will success after the first get\n      ailego_assert_with(!!chunk, \"get chunk failed\");\n      chunks->emplace_back(std::move(chunk));\n    }\n  }\n\n  //! return pair: chunk index + chunk offset\n  inline std::pair<uint32_t, uint32_t> get_vector_chunk_loc(\n      node_id_t id) const {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    uint32_t offset = (id & node_index_mask_) * node_size();\n\n    sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  //! return pair: chunk index + chunk offset\n  inline std::pair<uint32_t, uint32_t> get_key_chunk_loc(node_id_t id) const {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    uint32_t offset = (id & node_index_mask_) * node_size() + vector_size();\n\n    sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  inline std::pair<uint32_t, uint32_t> get_upper_neighbor_chunk_loc(\n      level_t level, node_id_t id) const {\n    auto it = upper_neighbor_index_->find(id);\n    ailego_assert_abort(it != upper_neighbor_index_->end(),\n                        \"Get upper neighbor header failed\");\n    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);\n    uint32_t chunk_idx = (meta->index) >> upper_neighbor_mask_bits_;\n    uint32_t offset = (((meta->index) & upper_neighbor_mask_) + level - 1) *\n                      upper_neighbor_size_;\n    sync_chunks(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, chunk_idx,\n                &upper_neighbor_chunks_);\n    ailego_assert_abort(chunk_idx < upper_neighbor_chunks_.size(),\n                        \"invalid chunk idx\");\n    ailego_assert_abort(offset < upper_neighbor_chunks_[chunk_idx]->data_size(),\n                        \"invalid chunk offset\");\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  //! return pair: chunk + chunk offset\n  inline std::pair<Chunk *, size_t> get_neighbor_chunk_loc(level_t level,\n                                                           node_id_t id) const {\n    if (level == 0UL) {\n      uint32_t chunk_idx = id >> node_index_mask_bits_;\n      uint32_t offset =\n          (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);\n\n      sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n      ailego_assert_abort(chunk_idx < node_chunks_.size(), \"invalid chunk idx\");\n      ailego_assert_abort(offset < node_chunks_[chunk_idx]->data_size(),\n                          \"invalid chunk offset\");\n      return std::make_pair(node_chunks_[chunk_idx].get(), offset);\n    } else {\n      auto p = get_upper_neighbor_chunk_loc(level, id);\n      return std::make_pair(upper_neighbor_chunks_[p.first].get(), p.second);\n    }\n  }\n\n  //! Chunk hnsw index valid\n  int check_hnsw_index(const HNSWHeader *hd) const;\n\n  size_t get_total_upper_neighbors_size(level_t level) const {\n    return level * upper_neighbor_size_;\n  }\n\n  //! Add upper neighbor header and reserve space for upper neighbor\n  int add_upper_neighbor(level_t level, node_id_t id) {\n    if (level == 0) {\n      return 0;\n    }\n    Chunk::Pointer chunk;\n    uint64_t chunk_offset = -1UL;\n    size_t neighbors_size = get_total_upper_neighbors_size(level);\n    uint64_t chunk_index = upper_neighbor_chunks_.size() - 1UL;\n    if (chunk_index == -1UL ||\n        (upper_neighbor_chunks_[chunk_index]->padding_size() <\n         neighbors_size)) {  // no space left and need to alloc\n      chunk_index++;\n      if (ailego_unlikely(upper_neighbor_chunks_.capacity() ==\n                          upper_neighbor_chunks_.size())) {\n        LOG_ERROR(\"add upper neighbor failed for no memory quota\");\n        return IndexError_IndexFull;\n      }\n      auto p = broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR,\n                                    chunk_index, upper_neighbor_chunk_size_);\n      if (ailego_unlikely(p.first != 0)) {\n        LOG_ERROR(\"Alloc data chunk failed\");\n        return p.first;\n      }\n      chunk = p.second;\n      chunk_offset = 0UL;\n      upper_neighbor_chunks_.emplace_back(chunk);\n    } else {\n      chunk = upper_neighbor_chunks_[chunk_index];\n      chunk_offset = chunk->data_size();\n    }\n    ailego_assert_with((size_t)level < kMaxGraphLayers, \"invalid level\");\n    ailego_assert_with(chunk_offset % upper_neighbor_size_ == 0,\n                       \"invalid offset\");\n    ailego_assert_with((chunk_offset / upper_neighbor_size_) <\n                           (1U << upper_neighbor_mask_bits_),\n                       \"invalid offset\");\n    ailego_assert_with(chunk_index < (1U << (28 - upper_neighbor_mask_bits_)),\n                       \"invalid chunk index\");\n    UpperNeighborIndexMeta meta;\n    meta.level = level;\n    meta.index = (chunk_index << upper_neighbor_mask_bits_) |\n                 (chunk_offset / upper_neighbor_size_);\n    chunk_offset += upper_neighbor_size_ * level;\n    if (ailego_unlikely(!upper_neighbor_index_->insert(id, meta.data))) {\n      LOG_ERROR(\"HashMap insert value failed\");\n      return IndexError_Runtime;\n    }\n\n    if (ailego_unlikely(chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"Chunk resize to %zu failed\", (size_t)chunk_offset);\n      return IndexError_Runtime;\n    }\n\n    return 0;\n  }\n\n  size_t estimate_doc_capacity() const {\n    return node_chunks_.capacity() * node_cnt_per_chunk_;\n  }\n\n  int init_chunk_params(size_t max_index_size, bool huge_page) {\n    node_cnt_per_chunk_ = std::max<uint32_t>(1, chunk_size_ / node_size());\n    //! align node cnt per chunk to pow of 2\n    node_index_mask_bits_ = std::ceil(std::log2(node_cnt_per_chunk_));\n    node_cnt_per_chunk_ = 1UL << node_index_mask_bits_;\n    if (huge_page) {\n      chunk_size_ = AlignHugePageSize(node_cnt_per_chunk_ * node_size());\n    } else {\n      chunk_size_ = AlignPageSize(node_cnt_per_chunk_ * node_size());\n    }\n    node_index_mask_ = node_cnt_per_chunk_ - 1;\n\n    if (max_index_size == 0UL) {\n      max_index_size_ = chunk_size_ * kDefaultMaxChunkCnt;\n    } else {\n      max_index_size_ = max_index_size;\n    }\n\n    //! To get a balanced upper neighbor chunk size.\n    //! If the upper chunk size is equal to node chunk size, it may waste\n    //! upper neighbor chunk space; if the upper neighbor chunk size is too\n    //! small, the will need large upper neighbor chunks index space. So to\n    //! get a balanced ratio be sqrt of the node/neighbor size ratio\n    float ratio =\n        std::sqrt(node_size() * scaling_factor() * 1.0f / upper_neighbor_size_);\n    if (huge_page) {\n      upper_neighbor_chunk_size_ = AlignHugePageSize(\n          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),\n                   static_cast<size_t>(chunk_size_ / ratio)));\n    } else {\n      upper_neighbor_chunk_size_ = AlignPageSize(\n          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),\n                   static_cast<size_t>(chunk_size_ / ratio)));\n    }\n    upper_neighbor_mask_bits_ =\n        std::ceil(std::log2(upper_neighbor_chunk_size_ / upper_neighbor_size_));\n    upper_neighbor_mask_ = (1 << upper_neighbor_mask_bits_) - 1;\n\n    size_t max_node_chunk_cnt = std::ceil(max_index_size_ / chunk_size_);\n    size_t max_upper_chunk_cnt = std::ceil(\n        (max_node_chunk_cnt * node_cnt_per_chunk_ * 1.0f / scaling_factor()) /\n        (upper_neighbor_chunk_size_ / upper_neighbor_size_));\n    max_upper_chunk_cnt =\n        max_upper_chunk_cnt + std::ceil(max_upper_chunk_cnt / scaling_factor());\n\n    //! reserve space to avoid memmove in chunks vector emplace chunk, so\n    //! as to lock-free in reading chunk\n    node_chunks_.reserve(max_node_chunk_cnt);\n    upper_neighbor_chunks_.reserve(max_upper_chunk_cnt);\n\n    LOG_DEBUG(\n        \"Settings: nodeSize=%zu chunkSize=%u upperNeighborSize=%u \"\n        \"upperNeighborChunkSize=%u \"\n        \"nodeCntPerChunk=%u maxChunkCnt=%zu maxNeighborChunkCnt=%zu \"\n        \"maxIndexSize=%zu ratio=%.3f\",\n        node_size(), chunk_size_, upper_neighbor_size_,\n        upper_neighbor_chunk_size_, node_cnt_per_chunk_, max_node_chunk_cnt,\n        max_upper_chunk_cnt, max_index_size_, ratio);\n\n    return 0;\n  }\n\n  //! Init node chunk and neighbor chunks\n  int init_chunks(const Chunk::Pointer &header_chunk);\n\n  int flush_header(void) {\n    if (!broker_->dirty()) {\n      // do not need to flush\n      return 0;\n    }\n    auto header_chunk = broker_->get_chunk(ChunkBroker::CHUNK_TYPE_HEADER,\n                                           ChunkBroker::kDefaultChunkSeqId);\n    if (ailego_unlikely(!header_chunk)) {\n      LOG_ERROR(\"get header chunk failed\");\n      return IndexError_Runtime;\n    }\n    size_t size = header_chunk->write(0UL, &header(), header_size());\n    if (ailego_unlikely(size != header_size())) {\n      LOG_ERROR(\"Write header chunk failed\");\n      return IndexError_WriteData;\n    }\n\n    return 0;\n  }\n\n private:\n  HnswStreamerEntity(const HnswStreamerEntity &) = delete;\n  HnswStreamerEntity &operator=(const HnswStreamerEntity &) = delete;\n  static constexpr uint64_t kUpperHashMemoryInflateRatio = 2.0f;\n\n private:\n  IndexStreamer::Stats &stats_;\n  HNSWHeader header_{};\n  std::mutex mutex_{};\n  size_t max_index_size_{0UL};\n  uint32_t chunk_size_{kDefaultChunkSize};\n  uint32_t upper_neighbor_chunk_size_{kDefaultChunkSize};\n  uint32_t node_index_mask_bits_{0U};\n  uint32_t node_cnt_per_chunk_{0U};\n  uint32_t node_index_mask_{0U};\n  uint32_t neighbor_size_{0U};\n  uint32_t upper_neighbor_size_{0U};\n  //! UpperNeighborIndex.index composite chunkIdx and offset in chunk by the\n  //! following mask\n  uint32_t upper_neighbor_mask_bits_{0U};\n  uint32_t upper_neighbor_mask_{0U};\n  bool filter_same_key_{false};\n  bool get_vector_enabled_{false};\n  bool use_key_info_map_{true};\n\n  NIHashMapPointer upper_neighbor_index_{};\n\n  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};\n  HashMapPointer<key_t, node_id_t> keys_map_{};\n\n  //! the chunks will be changed in searcher, so need mutable\n  //! data chunk include: vector, key, level 0 neighbors\n  mutable std::vector<Chunk::Pointer> node_chunks_{};\n\n  //! upper neighbor chunk inlude: UpperNeighborHeader + (1~level) neighbors\n  mutable std::vector<Chunk::Pointer> upper_neighbor_chunks_{};\n\n  ChunkBroker::Pointer broker_{};  // chunk broker\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(AUTO_DETECT_ARCH)\n  foreach(FILE ${HNSW_RABITQ_FILES})\n      set_source_files_properties(\n          ${FILE}\n          PROPERTIES\n          COMPILE_FLAGS \"${RABITQ_ARCH_FLAG}\"\n      )\n  endforeach()\nendif()\n\ncc_library(\n    NAME core_knn_hnsw_rabitq\n    STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS core_framework rabitqlib sparsehash\n    INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_algorithm.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_algorithm.h\"\n#include <chrono>\n#include \"hnsw_rabitq_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqAlgorithm::HnswRabitqAlgorithm(HnswRabitqEntity &entity)\n    : entity_(entity),\n      mt_(std::chrono::system_clock::now().time_since_epoch().count()),\n      lock_pool_(kLockCnt) {}\n\nint HnswRabitqAlgorithm::cleanup() {\n  return 0;\n}\n\nint HnswRabitqAlgorithm::add_node(node_id_t id, level_t level,\n                                  HnswRabitqContext *ctx) {\n  spin_lock_.lock();\n\n  // std::cout << \"id: \" << id << \", level: \" << level << std::endl;\n\n  auto cur_max_level = entity_.cur_max_level();\n  auto entry_point = entity_.entry_point();\n  if (ailego_unlikely(entry_point == kInvalidNodeId)) {\n    entity_.update_ep_and_level(id, level);\n    spin_lock_.unlock();\n    return 0;\n  }\n  spin_lock_.unlock();\n\n  if (ailego_unlikely(level > cur_max_level)) {\n    mutex_.lock();\n    // re-check max level\n    cur_max_level = entity_.cur_max_level();\n    entry_point = entity_.entry_point();\n    if (level <= cur_max_level) {\n      mutex_.unlock();\n    }\n  }\n\n  level_t cur_level = cur_max_level;\n  ResultRecord dist = ctx->dist_calculator()(entry_point);\n  for (; cur_level > level; --cur_level) {\n    select_entry_point(cur_level, &entry_point, &dist, ctx);\n  }\n\n  for (; cur_level >= 0; --cur_level) {\n    search_neighbors(cur_level, &entry_point, &dist, ctx->level_topk(cur_level),\n                     ctx);\n  }\n\n  // add neighbors from down level to top level, to avoid upper level visible\n  // to knn_search but the under layer level not ready\n  for (cur_level = 0; cur_level <= level; ++cur_level) {\n    add_neighbors(id, cur_level, ctx->level_topk(cur_level), ctx);\n    ctx->level_topk(cur_level).clear();\n  }\n\n  if (ailego_unlikely(level > cur_max_level)) {\n    spin_lock_.lock();\n    entity_.update_ep_and_level(id, level);\n    spin_lock_.unlock();\n    mutex_.unlock();\n  }\n\n  return 0;\n}\n\n//! select_entry_point on hnsw level, ef = 1\nvoid HnswRabitqAlgorithm::select_entry_point(level_t level,\n                                             node_id_t *entry_point,\n                                             ResultRecord *dist,\n                                             HnswRabitqContext *ctx) const {\n  auto &entity = ctx->get_entity();\n  HnswRabitqAddDistCalculator &dc = ctx->dist_calculator();\n  while (true) {\n    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n    uint32_t size = neighbors.size();\n    if (size == 0) {\n      break;\n    }\n\n    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;\n    int ret = dc.get_vector(&neighbors[0], size, neighbor_vec_blocks);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_vector())++;\n    }\n    if (ailego_unlikely(ret != 0)) {\n      break;\n    }\n\n    bool find_closer = false;\n\n    std::vector<float> dists(size);\n    std::vector<const void *> neighbor_vecs(size);\n    for (uint32_t i = 0; i < size; ++i) {\n      neighbor_vecs[i] = neighbor_vec_blocks[i].data();\n    }\n\n    dc.batch_dist(neighbor_vecs.data(), size, dists.data());\n\n    for (uint32_t i = 0; i < size; ++i) {\n      ResultRecord cur_dist = dists[i];\n\n      if (cur_dist < *dist) {\n        *entry_point = neighbors[i];\n        *dist = cur_dist;\n        find_closer = true;\n      }\n    }\n\n    if (!find_closer) {\n      break;\n    }\n  }\n\n  return;\n}\n\nvoid HnswRabitqAlgorithm::add_neighbors(node_id_t id, level_t level,\n                                        TopkHeap &topk_heap,\n                                        HnswRabitqContext *ctx) {\n  if (ailego_unlikely(topk_heap.size() == 0)) {\n    return;\n  }\n\n  HnswRabitqAddDistCalculator &dc = ctx->dist_calculator();\n\n  update_neighbors(dc, id, level, topk_heap);\n\n  // reverse update neighbors\n  for (size_t i = 0; i < topk_heap.size(); ++i) {\n    reverse_update_neighbors(dc, topk_heap[i].first, level, id,\n                             topk_heap[i].second, ctx->update_heap());\n  }\n\n  return;\n}\n\nvoid HnswRabitqAlgorithm::search_neighbors(level_t level,\n                                           node_id_t *entry_point,\n                                           ResultRecord *dist, TopkHeap &topk,\n                                           HnswRabitqContext *ctx) const {\n  const auto &entity = ctx->get_entity();\n  HnswRabitqAddDistCalculator &dc = ctx->dist_calculator();\n  VisitFilter &visit = ctx->visit_filter();\n  CandidateHeap &candidates = ctx->candidates();\n  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };\n  if (ctx->filter().is_valid()) {\n    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };\n  }\n\n  candidates.clear();\n  visit.clear();\n  visit.set_visited(*entry_point);\n  if (!filter(*entry_point)) {\n    topk.emplace(*entry_point, *dist);\n  }\n\n  candidates.emplace(*entry_point, *dist);\n  while (!candidates.empty() && !ctx->reach_scan_limit()) {\n    auto top = candidates.begin();\n    node_id_t main_node = top->first;\n    ResultRecord main_dist = top->second;\n\n    if (topk.full() && main_dist > topk[0].second) {\n      break;\n    }\n\n    candidates.pop();\n    const Neighbors neighbors = entity.get_neighbors(level, main_node);\n    ailego_prefetch(neighbors.data);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n\n    std::vector<node_id_t> neighbor_ids(neighbors.size());\n    uint32_t size = 0;\n    for (uint32_t i = 0; i < neighbors.size(); ++i) {\n      node_id_t node = neighbors[i];\n      if (visit.visited(node)) {\n        if (ailego_unlikely(ctx->debugging())) {\n          (*ctx->mutable_stats_visit_dup_cnt())++;\n        }\n        continue;\n      }\n      visit.set_visited(node);\n      neighbor_ids[size++] = node;\n    }\n    if (size == 0) {\n      continue;\n    }\n\n    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;\n    int ret = dc.get_vector(neighbor_ids.data(), size, neighbor_vec_blocks);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_vector())++;\n    }\n    if (ailego_unlikely(ret != 0)) {\n      break;\n    }\n\n    // do prefetch\n    static constexpr node_id_t BATCH_SIZE = 12;\n    static constexpr node_id_t PREFETCH_STEP = 2;\n    for (uint32_t i = 0; i < std::min(BATCH_SIZE * PREFETCH_STEP, size); ++i) {\n      ailego_prefetch(neighbor_vec_blocks[i].data());\n    }\n    // done\n\n    std::vector<float> dists(size);\n    std::vector<const void *> neighbor_vecs(size);\n\n    for (uint32_t i = 0; i < size; ++i) {\n      neighbor_vecs[i] = neighbor_vec_blocks[i].data();\n    }\n\n    dc.batch_dist(neighbor_vecs.data(), size, dists.data());\n\n    for (uint32_t i = 0; i < size; ++i) {\n      node_id_t node = neighbor_ids[i];\n      ResultRecord cur_dist = dists[i];\n\n      if ((!topk.full()) || cur_dist < topk[0].second) {\n        candidates.emplace(node, cur_dist);\n        // update entry_point for next level scan\n        if (cur_dist < *dist) {\n          *entry_point = node;\n          *dist = cur_dist;\n        }\n        if (!filter(node)) {\n          topk.emplace(node, cur_dist);\n        }\n      }  // end if\n    }  // end for\n  }  // while\n\n  return;\n}\n\nvoid HnswRabitqAlgorithm::update_neighbors(HnswRabitqAddDistCalculator &dc,\n                                           node_id_t id, level_t level,\n                                           TopkHeap &topk_heap) {\n  topk_heap.sort();\n\n  uint32_t max_neighbor_cnt = entity_.neighbor_cnt(level);\n  if (topk_heap.size() <= static_cast<size_t>(entity_.prune_cnt())) {\n    if (topk_heap.size() <= static_cast<size_t>(max_neighbor_cnt)) {\n      entity_.update_neighbors(level, id, topk_heap);\n      return;\n    }\n  }\n\n  uint32_t cur_size = 0;\n  for (size_t i = 0; i < topk_heap.size(); ++i) {\n    node_id_t cur_node = topk_heap[i].first;\n    ResultRecord cur_node_dist = topk_heap[i].second;\n    bool good = true;\n    for (uint32_t j = 0; j < cur_size; ++j) {\n      ResultRecord tmp_dist = dc.dist(cur_node, topk_heap[j].first);\n      if (tmp_dist <= cur_node_dist) {\n        good = false;\n        break;\n      }\n    }\n\n    if (good) {\n      topk_heap[cur_size].first = cur_node;\n      topk_heap[cur_size].second = cur_node_dist;\n      cur_size++;\n      if (cur_size >= max_neighbor_cnt) {\n        break;\n      }\n    }\n  }\n\n  // when after-prune neighbor count is too seldom,\n  // we use this strategy to make-up enough edges\n  // not only just make-up out-degrees\n  // we also make-up enough in-degrees\n  uint32_t min_neighbors = entity_.min_neighbor_cnt();\n  for (size_t k = cur_size; cur_size < min_neighbors && k < topk_heap.size();\n       ++k) {\n    bool exist = false;\n    for (size_t j = 0; j < cur_size; ++j) {\n      if (topk_heap[j].first == topk_heap[k].first) {\n        exist = true;\n        break;\n      }\n    }\n    if (!exist) {\n      topk_heap[cur_size].first = topk_heap[k].first;\n      topk_heap[cur_size].second = topk_heap[k].second;\n      cur_size++;\n    }\n  }\n\n  topk_heap.resize(cur_size);\n  entity_.update_neighbors(level, id, topk_heap);\n\n  return;\n}\n\nvoid HnswRabitqAlgorithm::reverse_update_neighbors(\n    HnswRabitqAddDistCalculator &dc, node_id_t id, level_t level,\n    node_id_t link_id, ResultRecord dist, TopkHeap &update_heap) {\n  const size_t max_neighbor_cnt = entity_.neighbor_cnt(level);\n\n  uint32_t lock_idx = id & kLockMask;\n  lock_pool_[lock_idx].lock();\n  const Neighbors neighbors = entity_.get_neighbors(level, id);\n  size_t size = neighbors.size();\n  ailego_assert_with(size <= max_neighbor_cnt, \"invalid neighbor size\");\n  if (size < max_neighbor_cnt) {\n    entity_.add_neighbor(level, id, size, link_id);\n    lock_pool_[lock_idx].unlock();\n    return;\n  }\n\n  update_heap.emplace(link_id, dist);\n\n  for (size_t i = 0; i < size; ++i) {\n    node_id_t node = neighbors[i];\n    ResultRecord cur_dist = dc.dist(id, node);\n    update_heap.emplace(node, cur_dist);\n  }\n\n  //! TODO: optimize prune\n  //! prune edges\n  update_heap.sort();\n  size_t cur_size = 0;\n  for (size_t i = 0; i < update_heap.size(); ++i) {\n    node_id_t cur_node = update_heap[i].first;\n    ResultRecord cur_node_dist = update_heap[i].second;\n    bool good = true;\n    for (size_t j = 0; j < cur_size; ++j) {\n      ResultRecord tmp_dist = dc.dist(cur_node, update_heap[j].first);\n      if (tmp_dist <= cur_node_dist) {\n        good = false;\n        break;\n      }\n    }\n\n    if (good) {\n      update_heap[cur_size].first = cur_node;\n      update_heap[cur_size].second = cur_node_dist;\n      cur_size++;\n      if (cur_size >= max_neighbor_cnt) {\n        break;\n      }\n    }\n  }\n\n  update_heap.resize(cur_size);\n  entity_.update_neighbors(level, id, update_heap);\n\n  lock_pool_[lock_idx].unlock();\n\n  update_heap.clear();\n\n  return;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_algorithm.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <ailego/parallel/lock.h>\n#include \"hnsw_rabitq_context.h\"\n#include \"hnsw_rabitq_dist_calculator.h\"\n#include \"hnsw_rabitq_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! hnsw graph algorithm implement\nclass HnswRabitqAlgorithm {\n public:\n  typedef std::unique_ptr<HnswRabitqAlgorithm> UPointer;\n\n public:\n  //! Constructor\n  explicit HnswRabitqAlgorithm(HnswRabitqEntity &entity);\n\n  //! Destructor\n  ~HnswRabitqAlgorithm() = default;\n\n  //! Cleanup HnswRabitqAlgorithm\n  int cleanup();\n\n  //! Add a node to hnsw graph\n  //! @id:     the node unique id\n  //! @level:  a node will be add to graph in each level [0, level]\n  //! return 0 on success, or errCode in failure\n  int add_node(node_id_t id, level_t level, HnswRabitqContext *ctx);\n\n  //! Initiate HnswRabitqAlgorithm\n  int init() {\n    level_probas_.clear();\n    double level_mult =\n        1 / std::log(static_cast<double>(entity_.scaling_factor()));\n    for (int level = 0;; level++) {\n      // refers faiss get_random_level alg\n      double proba =\n          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));\n      if (proba < 1e-9) {\n        break;\n      }\n      level_probas_.push_back(proba);\n    }\n\n    return 0;\n  }\n\n  //! Generate a random level\n  //! return graph level\n  uint32_t get_random_level() const {\n    // gen rand float (0, 1)\n    double f = mt_() / static_cast<float>(mt_.max());\n    for (size_t level = 0; level < level_probas_.size(); level++) {\n      if (f < level_probas_[level]) {\n        return level;\n      }\n      f -= level_probas_[level];\n    }\n    return level_probas_.size() - 1;\n  }\n\n private:\n  //! Select in upper layer to get entry point for next layer search\n  void select_entry_point(level_t level, node_id_t *entry_point,\n                          ResultRecord *dist, HnswRabitqContext *ctx) const;\n\n  //! update node id neighbors from topkHeap, and reverse link is also updated\n  void add_neighbors(node_id_t id, level_t level, TopkHeap &topk_heap,\n                     HnswRabitqContext *ctx);\n\n  //! Given a node id and level, search the nearest neighbors in graph\n  //! Note: the nearest neighbors result keeps in topk, and entry_point and\n  //! dist will be updated to current level nearest node id and distance\n  void search_neighbors(level_t level, node_id_t *entry_point,\n                        ResultRecord *dist, TopkHeap &topk,\n                        HnswRabitqContext *ctx) const;\n\n  //! Update the node's neighbors\n  void update_neighbors(HnswRabitqAddDistCalculator &dc, node_id_t id,\n                        level_t level, TopkHeap &topk_heap);\n\n  //! Checking linkId could be id's new neighbor, and add as neighbor if true\n  //! @dc         distance calculator\n  //! @updateHeap temporary heap in updating neighbors\n  void reverse_update_neighbors(HnswRabitqAddDistCalculator &dc, node_id_t id,\n                                level_t level, node_id_t link_id,\n                                ResultRecord dist, TopkHeap &update_heap);\n\n private:\n  HnswRabitqAlgorithm(const HnswRabitqAlgorithm &) = delete;\n  HnswRabitqAlgorithm &operator=(const HnswRabitqAlgorithm &) = delete;\n\n private:\n  static constexpr uint32_t kLockCnt{1U << 8};\n  static constexpr uint32_t kLockMask{kLockCnt - 1U};\n\n  HnswRabitqEntity &entity_;\n  mutable std::mt19937 mt_{};\n  std::vector<double> level_probas_{};\n\n  mutable ailego::SpinMutex spin_lock_{};  // global spin lock\n  std::mutex mutex_{};                     // global mutex\n  // TODO: spin lock?\n  std::vector<std::mutex> lock_pool_{};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_builder.h\"\n#include <cstdlib>\n#include <iostream>\n#include <thread>\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_logger.h\"\n#include \"zvec/core/framework/index_memory.h\"\n#include \"zvec/core/framework/index_meta.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"hnsw_rabitq_algorithm.h\"\n#include \"hnsw_rabitq_entity.h\"\n#include \"hnsw_rabitq_params.h\"\n#include \"rabitq_converter.h\"\n#include \"rabitq_params.h\"\n#include \"rabitq_reformer.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqBuilder::HnswRabitqBuilder() {}\n\nint HnswRabitqBuilder::init(const IndexMeta &meta,\n                            const ailego::Params &params) {\n  LOG_INFO(\"Begin HnswRabitqBuilder::init\");\n\n  meta_ = meta;\n  auto params_copy = params;\n  meta_.set_builder(\"HnswRabitqBuilder\", HnswRabitqEntity::kRevision,\n                    std::move(params_copy));\n\n  size_t memory_quota = 0UL;\n  params.get(PARAM_HNSW_RABITQ_BUILDER_MEMORY_QUOTA, &memory_quota);\n  params.get(PARAM_HNSW_RABITQ_BUILDER_THREAD_COUNT, &thread_cnt_);\n  params.get(PARAM_HNSW_RABITQ_BUILDER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);\n  params.get(PARAM_HNSW_RABITQ_BUILDER_EFCONSTRUCTION, &ef_construction_);\n  params.get(PARAM_HNSW_RABITQ_BUILDER_CHECK_INTERVAL_SECS,\n             &check_interval_secs_);\n\n  params.get(PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT,\n             &upper_max_neighbor_cnt_);\n  float multiplier = HnswRabitqEntity::kDefaultL0MaxNeighborCntMultiplier;\n  params.get(PARAM_HNSW_RABITQ_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,\n             &multiplier);\n  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;\n  scaling_factor_ = upper_max_neighbor_cnt_;\n  params.get(PARAM_HNSW_RABITQ_BUILDER_SCALING_FACTOR, &scaling_factor_);\n\n  multiplier = HnswRabitqEntity::kDefaultNeighborPruneMultiplier;\n  params.get(PARAM_HNSW_RABITQ_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);\n  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;\n\n  if (ef_construction_ == 0) {\n    ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;\n  }\n  if (upper_max_neighbor_cnt_ == 0) {\n    upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (upper_max_neighbor_cnt_ > kMaxNeighborCnt) {\n    LOG_ERROR(\"[%s] must be in range (0,%d]\",\n              PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),\n              kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {\n    LOG_ERROR(\"[%s]-[%d] must be <= [%s]-[%d]\",\n              PARAM_HNSW_RABITQ_BUILDER_MIN_NEIGHBOR_COUNT.c_str(),\n              min_neighbor_cnt_,\n              PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),\n              upper_max_neighbor_cnt_);\n    return IndexError_InvalidArgument;\n  }\n  if (l0_max_neighbor_cnt_ == 0) {\n    l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (l0_max_neighbor_cnt_ > HnswRabitqEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"L0MaxNeighborCnt must be in range (0,%d)\",\n              HnswRabitqEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (scaling_factor_ == 0U) {\n    scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;\n  }\n  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {\n    LOG_ERROR(\"[%s] must be in range [5,1000]\",\n              PARAM_HNSW_RABITQ_BUILDER_SCALING_FACTOR.c_str());\n    return IndexError_InvalidArgument;\n  }\n  if (thread_cnt_ == 0) {\n    thread_cnt_ = std::thread::hardware_concurrency();\n  }\n  if (thread_cnt_ > std::thread::hardware_concurrency()) {\n    LOG_WARN(\"[%s] greater than cpu cores %zu\",\n             PARAM_HNSW_RABITQ_BUILDER_THREAD_COUNT.c_str(),\n             static_cast<size_t>(std::thread::hardware_concurrency()));\n  }\n  if (prune_cnt == 0UL) {\n    prune_cnt = upper_max_neighbor_cnt_;\n  }\n\n  metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"CreateMetric failed, name: %s\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  int ret = metric_->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"IndexMetric init failed, ret=%d\", ret);\n    return ret;\n  }\n\n  uint32_t total_bits = 0;\n  params.get(PARAM_RABITQ_TOTAL_BITS, &total_bits);\n  if (total_bits == 0) {\n    total_bits = kDefaultRabitqTotalBits;\n  }\n  if (total_bits < 1 || total_bits > 9) {\n    LOG_ERROR(\"Invalid total_bits: %zu, must be in [1, 9]\", (size_t)total_bits);\n    return IndexError_InvalidArgument;\n  }\n  uint8_t ex_bits = total_bits - 1;\n  entity_.set_ex_bits(ex_bits);\n\n  uint32_t dimension = 0;\n  params.get(PARAM_HNSW_RABITQ_GENERAL_DIMENSION, &dimension);\n  if (dimension == 0) {\n    LOG_ERROR(\"%s not set\", PARAM_HNSW_RABITQ_GENERAL_DIMENSION.c_str());\n    return IndexError_InvalidArgument;\n  }\n  if (dimension < kMinRabitqDimSize || dimension > kMaxRabitqDimSize) {\n    LOG_ERROR(\"Invalid dimension: %u, must be in [%d, %d]\", dimension,\n              kMinRabitqDimSize, kMaxRabitqDimSize);\n    return IndexError_InvalidArgument;\n  }\n  entity_.update_rabitq_params_and_vector_size(dimension);\n\n  entity_.set_ef_construction(ef_construction_);\n  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);\n  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);\n  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);\n  entity_.set_scaling_factor(scaling_factor_);\n  entity_.set_memory_quota(memory_quota);\n  entity_.set_prune_cnt(prune_cnt);\n\n  ret = entity_.init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  alg_ = HnswRabitqAlgorithm::UPointer(new HnswRabitqAlgorithm(entity_));\n\n  ret = alg_->init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  // Create and initialize RaBitQ converter\n  converter_ = std::make_shared<RabitqConverter>();\n\n  IndexMeta converter_meta = meta_;\n  converter_meta.set_dimension(dimension);\n  ret = converter_->init(converter_meta, params);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to initialize RabitqConverter: %d\", ret);\n    return ret;\n  }\n\n  state_ = BUILD_STATE_INITED;\n  LOG_INFO(\n      \"End HnswRabitqBuilder::init, params: rawVectorSize=%u vectorSize=%zu \"\n      \"efConstruction=%u \"\n      \"l0NeighborCnt=%u upperNeighborCnt=%u scalingFactor=%u \"\n      \"memoryQuota=%zu neighborPruneCnt=%zu metricName=%s \",\n      meta_.element_size(), entity_.vector_size(), ef_construction_,\n      l0_max_neighbor_cnt_, upper_max_neighbor_cnt_, scaling_factor_,\n      memory_quota, prune_cnt, meta_.metric_name().c_str());\n\n  return 0;\n}\n\nint HnswRabitqBuilder::cleanup(void) {\n  LOG_INFO(\"Begin HnswRabitqBuilder::cleanup\");\n\n  l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultL0MaxNeighborCnt;\n  min_neighbor_cnt_ = 0;\n  upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;\n  ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;\n  scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;\n  check_interval_secs_ = kDefaultLogIntervalSecs;\n  errcode_ = 0;\n  error_ = false;\n  entity_.cleanup();\n  if (alg_) {\n    alg_->cleanup();\n  }\n  meta_.clear();\n  metric_.reset();\n  stats_.clear_attributes();\n  stats_.set_trained_count(0UL);\n  stats_.set_built_count(0UL);\n  stats_.set_dumped_count(0UL);\n  stats_.set_discarded_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  stats_.set_built_costtime(0UL);\n  stats_.set_dumped_costtime(0UL);\n  state_ = BUILD_STATE_INIT;\n\n  LOG_INFO(\"End HnswRabitqBuilder::cleanup\");\n\n  return 0;\n}\n\nint HnswRabitqBuilder::train(IndexThreads::Pointer,\n                             IndexHolder::Pointer holder) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before HnswRabitqBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  if (!holder) {\n    LOG_ERROR(\"Input holder is nullptr while training index\");\n    return IndexError_InvalidArgument;\n  }\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while training index\");\n    return IndexError_Mismatch;\n  }\n  LOG_INFO(\"Begin HnswRabitqBuilder::train\");\n  size_t trained_cost_time = 0;\n  size_t trained_count = 0;\n\n  int ret = train_converter_and_load_reformer(holder);\n  if (ret != 0) {\n    return ret;\n  }\n\n  if (metric_->support_train()) {\n    auto start_time = ailego::Monotime::MilliSeconds();\n    auto iter = holder->create_iterator();\n    if (!iter) {\n      LOG_ERROR(\"Create iterator for holder failed\");\n      return IndexError_Runtime;\n    }\n    while (iter->is_valid()) {\n      ret = metric_->train(iter->data(), meta_.dimension());\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw build measure train failed, ret=%d\", ret);\n        return ret;\n      }\n      iter->next();\n      ++trained_count;\n    }\n    trained_cost_time = ailego::Monotime::MilliSeconds() - start_time;\n  }\n  stats_.set_trained_count(trained_count);\n  stats_.set_trained_costtime(trained_cost_time);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End HnswRabitqBuilder::train\");\n\n  return 0;\n}\n\nint HnswRabitqBuilder::train_converter_and_load_reformer(\n    IndexHolder::Pointer holder) {\n  // Train converter (KMeans clustering)\n  int ret = converter_->train(holder);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to train RabitqConverter: %d\", ret);\n    return ret;\n  }\n  auto memory_dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  memory_dumper->init(ailego::Params());\n  std::string file_id = ailego::StringHelper::Concat(\n      \"rabitq_converter_\", ailego::Monotime::MilliSeconds(), rand());\n  ret = memory_dumper->create(file_id);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to create memory dumper: %d\", ret);\n    return ret;\n  }\n  // Release memory\n  AILEGO_DEFER([&file_id]() { IndexMemory::Instance()->remove(file_id); });\n  ret = converter_->dump(memory_dumper);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump RabitqConverter: %d\", ret);\n    return ret;\n  }\n  ret = memory_dumper->close();\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to close memory dumper: %d\", ret);\n    return ret;\n  }\n\n  reformer_ = std::make_shared<RabitqReformer>();\n  ailego::Params reformer_params;\n  reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());\n  ret = reformer_->init(reformer_params);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to initialize RabitqReformer: %d\", ret);\n    return ret;\n  }\n  auto memory_storage = IndexFactory::CreateStorage(\"MemoryReadStorage\");\n  ret = memory_storage->open(file_id, false);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to open memory storage: %d\", ret);\n    return ret;\n  }\n  ret = reformer_->load(memory_storage);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load RabitqReformer: %d\", ret);\n    return ret;\n  }\n  return 0;\n}\n\nint HnswRabitqBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before HnswRabitqBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin HnswRabitqBuilder::train by trainer\");\n\n  stats_.set_trained_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End HnswRabitqBuilder::train by trainer\");\n\n  return 0;\n}\n\nint HnswRabitqBuilder::build(IndexThreads::Pointer threads,\n                             IndexHolder::Pointer holder) {\n  if (state_ != BUILD_STATE_TRAINED) {\n    LOG_ERROR(\"Train the index before HnswRabitqBuilder::build\");\n    return IndexError_NoReady;\n  }\n\n  if (!holder) {\n    LOG_ERROR(\"Input holder is nullptr while building index\");\n    return IndexError_InvalidArgument;\n  }\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while building index\");\n    return IndexError_Mismatch;\n  }\n  IndexProvider::Pointer provider =\n      std::dynamic_pointer_cast<IndexProvider>(holder);\n  if (!provider) {\n    LOG_ERROR(\"Rabitq builder expect IndexProvider\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);\n  }\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n  LOG_INFO(\"Begin HnswRabitqBuilder::build\");\n\n  if (holder->count() != static_cast<size_t>(-1)) {\n    LOG_DEBUG(\"HnswRabitqBuilder holder documents count %lu\", holder->count());\n    int ret = entity_.reserve_space(holder->count());\n    if (ret != 0) {\n      LOG_ERROR(\"HnswBuilde reserver space failed\");\n      return ret;\n    }\n  }\n  auto iter = holder->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Create iterator for holder failed\");\n    return IndexError_Runtime;\n  }\n  int ret;\n  error_ = false;\n  IndexQueryMeta ometa;\n  ometa.set_meta(holder->data_type(), holder->dimension());\n  while (iter->is_valid()) {\n    const void *vec = iter->data();\n    // quantize vector\n    std::string converted_vector;\n    IndexQueryMeta converted_meta;\n    ret = reformer_->convert(vec, ometa, &converted_vector, &converted_meta);\n    if (ret != 0) {\n      LOG_ERROR(\"Rabitq hnsw convert failed, ret=%d\", ret);\n      return ret;\n    }\n\n\n    level_t level = alg_->get_random_level();\n    node_id_t id;\n\n    if (converted_vector.size() != entity_.vector_size()) {\n      LOG_ERROR(\n          \"Converted vector size %zu is not equal to entity vector size %zu\",\n          converted_vector.size(), entity_.vector_size());\n      return IndexError_InvalidArgument;\n    }\n    ret = entity_.add_vector(level, iter->key(), converted_vector.data(), &id);\n    if (ailego_unlikely(ret != 0)) {\n      return ret;\n    }\n    iter->next();\n  }\n\n  LOG_INFO(\"Finished save vector, start build graph...\");\n\n  auto task_group = threads->make_group();\n  if (!task_group) {\n    LOG_ERROR(\"Failed to create task group\");\n    return IndexError_Runtime;\n  }\n\n  std::atomic<node_id_t> finished{0};\n  for (size_t i = 0; i < threads->count(); ++i) {\n    task_group->submit(ailego::Closure ::New(this, &HnswRabitqBuilder::do_build,\n                                             i, threads->count(), provider,\n                                             &finished));\n  }\n\n  while (!task_group->is_finished()) {\n    std::unique_lock<std::mutex> lk(mutex_);\n    cond_.wait_until(lk, std::chrono::system_clock::now() +\n                             std::chrono::seconds(check_interval_secs_));\n    if (error_.load(std::memory_order_acquire)) {\n      LOG_ERROR(\"Failed to build index while waiting finish\");\n      return errcode_;\n    }\n    LOG_INFO(\"Built cnt %zu, finished percent %.3f%%\",\n             static_cast<size_t>(finished.load()),\n             finished.load() * 100.0f / entity_.doc_cnt());\n  }\n  if (error_.load(std::memory_order_acquire)) {\n    LOG_ERROR(\"Failed to build index while waiting finish\");\n    return errcode_;\n  }\n  task_group->wait_finish();\n\n  stats_.set_built_count(finished.load());\n  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);\n\n  state_ = BUILD_STATE_BUILT;\n  LOG_INFO(\"End HnswRabitqBuilder::build with RaBitQ quantization\");\n  return 0;\n}\n\nvoid HnswRabitqBuilder::do_build(node_id_t idx, size_t step_size,\n                                 IndexProvider::Pointer provider,\n                                 std::atomic<node_id_t> *finished) {\n  AILEGO_DEFER([&]() {\n    std::lock_guard<std::mutex> latch(mutex_);\n    cond_.notify_one();\n  });\n  HnswRabitqContext *ctx = new (std::nothrow) HnswRabitqContext(\n      meta_.dimension(), metric_,\n      std::shared_ptr<HnswRabitqEntity>(&entity_, [](HnswRabitqEntity *) {}));\n  if (ailego_unlikely(ctx == nullptr)) {\n    if (!error_.exchange(true)) {\n      LOG_ERROR(\"Failed to create context\");\n      errcode_ = IndexError_NoMemory;\n    }\n    return;\n  }\n  HnswRabitqContext::Pointer auto_ptr(ctx);\n  ctx->set_provider(std::move(provider));\n  ctx->set_max_scan_num(entity_.doc_cnt());\n  int ret = ctx->init(HnswRabitqContext::kBuilderContext);\n  if (ret != 0) {\n    if (!error_.exchange(true)) {\n      LOG_ERROR(\"Failed to init context\");\n      errcode_ = IndexError_Runtime;\n    }\n    return;\n  }\n\n  for (node_id_t id = idx; id < entity_.doc_cnt(); id += step_size) {\n    ctx->reset_query(ctx->dist_calculator().get_vector(id));\n    ret = alg_->add_node(id, entity_.get_level(id), ctx);\n    if (ailego_unlikely(ret != 0)) {\n      if (!error_.exchange(true)) {\n        LOG_ERROR(\"Hnsw graph add node failed\");\n        errcode_ = ret;\n      }\n      return;\n    }\n    ctx->clear();\n    (*finished)++;\n  }\n}\n\nint HnswRabitqBuilder::dump(const IndexDumper::Pointer &dumper) {\n  if (state_ != BUILD_STATE_BUILT) {\n    LOG_INFO(\"Build the index before HnswRabitqBuilder::dump\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin HnswRabitqBuilder::dump\");\n\n  meta_.set_searcher(\"HnswRabitqSearcher\", HnswRabitqEntity::kRevision,\n                     ailego::Params());\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n\n  // Dump RaBitQ centroids first\n  if (converter_) {\n    ret = converter_->dump(dumper);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to dump RabitqConverter: %d\", ret);\n      return ret;\n    }\n    LOG_INFO(\"RaBitQ centroids dumped: %zu bytes, cost %zu ms\",\n             converter_->stats().dumped_size(),\n             static_cast<size_t>(converter_->stats().dumped_costtime()));\n  }\n\n  ret = entity_.dump(dumper);\n  if (ret != 0) {\n    LOG_ERROR(\"HnswRabitqBuilder dump index failed\");\n    return ret;\n  }\n\n  stats_.set_dumped_count(entity_.doc_cnt());\n  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);\n\n  LOG_INFO(\"End HnswRabitqBuilder::dump\");\n  return 0;\n}\n\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/parallel/thread_pool.h>\n#include \"zvec/core/framework/index_builder.h\"\n#include \"zvec/core/framework/index_converter.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"hnsw_rabitq_algorithm.h\"\n#include \"hnsw_rabitq_builder_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqBuilder : public IndexBuilder {\n public:\n  //! Constructor\n  HnswRabitqBuilder();\n\n  //! Initialize the builder\n  virtual int init(const IndexMeta &meta,\n                   const ailego::Params &params) override;\n\n  //! Cleanup the builder\n  virtual int cleanup(void) override;\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer,\n                    IndexHolder::Pointer holder) override;\n\n  //! Train the data\n  virtual int train(const IndexTrainer::Pointer &trainer) override;\n\n\n  //! Build the index\n  virtual int build(IndexThreads::Pointer threads,\n                    IndexHolder::Pointer holder) override;\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override {\n    return stats_;\n  }\n\n private:\n  void do_build(node_id_t idx, size_t step_size,\n                IndexProvider::Pointer provider,\n                std::atomic<node_id_t> *finished);\n\n  int train_converter_and_load_reformer(IndexHolder::Pointer holder);\n\n  constexpr static uint32_t kDefaultLogIntervalSecs = 15U;\n  constexpr static uint32_t kMaxNeighborCnt = 65535;\n\n private:\n  enum BUILD_STATE {\n    BUILD_STATE_INIT = 0,\n    BUILD_STATE_INITED = 1,\n    BUILD_STATE_TRAINED = 2,\n    BUILD_STATE_BUILT = 3\n  };\n\n  HnswRabitqBuilderEntity entity_{};\n  HnswRabitqAlgorithm::UPointer alg_;  // impl graph algorithm\n  uint32_t thread_cnt_{0};\n  uint32_t min_neighbor_cnt_{0};\n  uint32_t upper_max_neighbor_cnt_{\n      HnswRabitqEntity::kDefaultUpperMaxNeighborCnt};\n  uint32_t l0_max_neighbor_cnt_{HnswRabitqEntity::kDefaultL0MaxNeighborCnt};\n  uint32_t ef_construction_{HnswRabitqEntity::kDefaultEfConstruction};\n  uint32_t scaling_factor_{HnswRabitqEntity::kDefaultScalingFactor};\n  uint32_t check_interval_secs_{kDefaultLogIntervalSecs};\n\n  int errcode_{0};\n  std::atomic_bool error_{false};\n  IndexMeta meta_{};\n  IndexMetric::Pointer metric_{};\n  IndexConverter::Pointer converter_{};  // RaBitQ converter\n  IndexReformer::Pointer reformer_{};    // RaBitQ reformer\n  std::mutex mutex_{};\n  std::condition_variable cond_{};\n  Stats stats_{};\n\n  BUILD_STATE state_{BUILD_STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_builder_entity.h\"\n#include <iostream>\n#include <zvec/ailego/hash/crc32c.h>\n#include \"utility/sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqBuilderEntity::HnswRabitqBuilderEntity() {\n  update_ep_and_level(kInvalidNodeId, 0U);\n}\n\nint HnswRabitqBuilderEntity::cleanup() {\n  memory_quota_ = 0UL;\n  neighbors_size_ = 0U;\n  upper_neighbors_size_ = 0U;\n  padding_size_ = 0U;\n  vectors_buffer_.clear();\n  keys_buffer_.clear();\n  neighbors_buffer_.clear();\n  upper_neighbors_buffer_.clear();\n  neighbors_index_.clear();\n\n  vectors_buffer_.shrink_to_fit();\n  keys_buffer_.shrink_to_fit();\n  neighbors_buffer_.shrink_to_fit();\n  upper_neighbors_buffer_.shrink_to_fit();\n  neighbors_index_.shrink_to_fit();\n\n  this->HnswRabitqEntity::cleanup();\n\n  return 0;\n}\n\nint HnswRabitqBuilderEntity::init() {\n  size_t size = vector_size();\n\n  //! aligned size to 32\n  set_node_size(AlignSize(size));\n  //! if node size is aligned to 1k, the build performance will downgrade\n  if (node_size() % 1024 == 0) {\n    set_node_size(AlignSize(node_size() + 1));\n  }\n\n  padding_size_ = node_size() - size;\n\n  neighbors_size_ = neighbors_size();\n  upper_neighbors_size_ = upper_neighbors_size();\n\n  return 0;\n}\n\nint HnswRabitqBuilderEntity::reserve_space(size_t docs) {\n  if (memory_quota_ > 0 && (node_size() * docs + neighbors_size_ * docs +\n                                sizeof(NeighborIndex) * docs >\n                            memory_quota_)) {\n    return IndexError_NoMemory;\n  }\n\n  vectors_buffer_.reserve(node_size() * docs);\n  keys_buffer_.reserve(sizeof(key_t) * docs);\n  neighbors_buffer_.reserve(neighbors_size_ * docs);\n  neighbors_index_.reserve(docs);\n\n  return 0;\n}\n\nint HnswRabitqBuilderEntity::add_vector(level_t level, key_t key,\n                                        const void *vec, node_id_t *id) {\n  if (memory_quota_ > 0 &&\n      (vectors_buffer_.capacity() + keys_buffer_.capacity() +\n       neighbors_buffer_.capacity() + upper_neighbors_buffer_.capacity() +\n       neighbors_index_.capacity() * sizeof(NeighborIndex)) > memory_quota_) {\n    LOG_ERROR(\"Add vector failed, used memory exceed quota, cur_doc=%zu\",\n              static_cast<size_t>(doc_cnt()));\n    return IndexError_NoMemory;\n  }\n\n  vectors_buffer_.append(reinterpret_cast<const char *>(vec), vector_size());\n  vectors_buffer_.append(padding_size_, '\\0');\n  keys_buffer_.append(reinterpret_cast<const char *>(&key), sizeof(key));\n\n  // init level 0 neighbors\n  neighbors_buffer_.append(neighbors_size_, '\\0');\n\n  neighbors_index_.emplace_back(upper_neighbors_buffer_.size(), level);\n\n  // init upper layer neighbors\n  for (level_t cur_level = 1; cur_level <= level; ++cur_level) {\n    upper_neighbors_buffer_.append(upper_neighbors_size_, '\\0');\n  }\n\n  *id = (*mutable_doc_cnt())++;\n\n  return 0;\n}\n\nkey_t HnswRabitqBuilderEntity::get_key(node_id_t id) const {\n  return *(reinterpret_cast<const key_t *>(keys_buffer_.data() +\n                                           id * sizeof(key_t)));\n}\n\nconst void *HnswRabitqBuilderEntity::get_vector(node_id_t id) const {\n  return vectors_buffer_.data() + id * node_size();\n}\n\nint HnswRabitqBuilderEntity::get_vector(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_vector(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\nint HnswRabitqBuilderEntity::get_vector(const node_id_t *ids, uint32_t count,\n                                        const void **vecs) const {\n  for (uint32_t i = 0; i < count; ++i) {\n    vecs[i] = vectors_buffer_.data() + ids[i] * node_size();\n  }\n\n  return 0;\n}\n\nint HnswRabitqBuilderEntity::get_vector(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {\n  std::vector<const void *> vecs(count);\n  get_vector(ids, count, vecs.data());\n  for (uint32_t i = 0; i < count; ++i) {\n    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));\n  }\n  return 0;\n}\n\nconst Neighbors HnswRabitqBuilderEntity::get_neighbors(level_t level,\n                                                       node_id_t id) const {\n  const NeighborsHeader *hd = get_neighbor_header(level, id);\n  return {hd->neighbor_cnt, hd->neighbors};\n}\n\nint HnswRabitqBuilderEntity::update_neighbors(\n    level_t level, node_id_t id,\n    const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors) {\n  NeighborsHeader *hd =\n      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));\n  for (size_t i = 0; i < neighbors.size(); ++i) {\n    hd->neighbors[i] = neighbors[i].first;\n  }\n  hd->neighbor_cnt = neighbors.size();\n\n  // std::cout << \"id: \" << id << \", neighbour, id: \";\n  // for (size_t i = 0; i < neighbors.size(); ++i) {\n  //   if (i == neighbors.size()-1)\n  //     std::cout << neighbors[i].first << \", score:\" << neighbors[i].second <<\n  //     std::endl;\n  //   else\n  //     std::cout << neighbors[i].first << \", score:\" << neighbors[i].second <<\n  //     \", id: \";\n  // }\n\n  return 0;\n}\n\nvoid HnswRabitqBuilderEntity::add_neighbor(level_t level, node_id_t id,\n                                           uint32_t /*size*/,\n                                           node_id_t neighbor_id) {\n  NeighborsHeader *hd =\n      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));\n  hd->neighbors[hd->neighbor_cnt++] = neighbor_id;\n\n  return;\n}\n\nint HnswRabitqBuilderEntity::dump(const IndexDumper::Pointer &dumper) {\n  key_t *keys =\n      reinterpret_cast<key_t *>(const_cast<char *>(keys_buffer_.data()));\n  auto ret =\n      dump_segments(dumper, keys, [&](node_id_t id) { return get_level(id); });\n  if (ailego_unlikely(ret < 0)) {\n    return ret;\n  }\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n#include \"hnsw_rabitq_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqBuilderEntity : public HnswRabitqEntity {\n public:\n  //! Add vector and key to hnsw entity, and local id will be saved to id\n  virtual int add_vector(level_t level, key_t key, const void *vec,\n                         node_id_t *id) override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector(node_id_t id) const override;\n\n  //! Batch get vectors feature data by keys\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const override;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const override;\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;\n\n  //! Get the node id's neighbors on graph level\n  const NeighborsHeader *get_neighbor_header(level_t level,\n                                             node_id_t id) const {\n    if (level == 0) {\n      return reinterpret_cast<const NeighborsHeader *>(\n          neighbors_buffer_.data() + neighbors_size_ * id);\n    } else {\n      size_t offset = neighbors_index_[id].offset;\n      return reinterpret_cast<const NeighborsHeader *>(\n          upper_neighbors_buffer_.data() + offset +\n          (level - 1) * upper_neighbors_size_);\n    }\n  }\n\n  //! Get the node id's neighbors on graph level\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  //! Replace node id in level's neighbors\n  virtual int update_neighbors(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors)\n      override;\n\n  //! add a neighbor to id in graph level\n  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,\n                            node_id_t neighbor_id) override;\n\n  //! Dump the hnsw graph to dumper\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Cleanup the entity\n  virtual int cleanup(void) override;\n\n public:\n  //! Constructor\n  HnswRabitqBuilderEntity();\n\n  //! Get the node graph level by id\n  level_t get_level(node_id_t id) const {\n    return neighbors_index_[id].level;\n  }\n\n  //! Init builerEntity\n  int init();\n\n  //! reserve buffer space for documents\n  //! @param  docs    number of documents\n  int reserve_space(size_t docs);\n\n  //! Set memory quota params\n  inline void set_memory_quota(size_t memory_quota) {\n    memory_quota_ = memory_quota;\n  }\n\n  //! Get neighbors size\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! Get upper neighbors size\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n public:\n  HnswRabitqBuilderEntity(const HnswRabitqBuilderEntity &) = delete;\n  HnswRabitqBuilderEntity &operator=(const HnswRabitqBuilderEntity &) = delete;\n\n private:\n  friend class HnswRabitqSearcherEntity;\n  //! class internal used only\n  struct NeighborIndex {\n    NeighborIndex(size_t off, level_t l) : offset(off), level(l) {}\n    uint64_t offset : 48;\n    uint64_t level : 16;\n  };\n\n  std::string vectors_buffer_{};          // aligned vectors\n  std::string keys_buffer_{};             // aligned vectors\n  std::string neighbors_buffer_{};        // level 0 neighbors buffer\n  std::string upper_neighbors_buffer_{};  // upper layer neighbors buffer\n\n  std::string sparse_data_buffer_{};  // aligned spase data buffer\n  size_t sparse_data_offset_{0};      //\n\n  // upper layer offset + level in upper_neighbors_buffer_\n  std::vector<NeighborIndex> neighbors_index_{};\n  size_t memory_quota_{0UL};\n  size_t neighbors_size_{0U};        // level 0 neighbors size\n  size_t upper_neighbors_size_{0U};  // level 0 neighbors size\n  size_t padding_size_{};            // padding size for each vector element\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_chunk.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_chunk.h\"\n#include <chrono>\n#include <random>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_helper.h\"\n#include \"zvec/core/framework/index_logger.h\"\n#include \"zvec/core/framework/index_streamer.h\"\n\nnamespace zvec {\nnamespace core {\n\nint HnswRabitqChunkBroker::init_storage(size_t chunk_size) {\n  chunk_meta_.clear();\n  chunk_meta_.chunk_size = chunk_size;\n  chunk_meta_.create_time = ailego::Realtime::Seconds();\n  stats_.set_create_time(chunk_meta_.create_time);\n  chunk_meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(chunk_meta_.update_time);\n\n  //! alloc meta chunk\n  size_t size = sizeof(HnswChunkMeta);\n  size = (size + page_mask_) & (~page_mask_);\n  const std::string segment_id =\n      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  int ret = stg_->append(segment_id, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Storage append segment failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  chunk_meta_segment_ = get_chunk(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  if (ailego_unlikely(!chunk_meta_segment_)) {\n    LOG_ERROR(\"Get meta segment failed\");\n    return IndexError_Runtime;\n  }\n\n  //! update meta info and write to storage\n  chunk_meta_.chunk_cnts[CHUNK_TYPE_META] += 1;\n  chunk_meta_.total_size += size;\n  (*stats_.mutable_index_size()) += size;\n  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {\n    LOG_ERROR(\"Storage write data failed, wsize=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint HnswRabitqChunkBroker::load_storage(size_t chunk_size) {\n  IndexStorage::MemoryBlock data_block;\n  size_t size = chunk_meta_segment_->read(0UL, data_block,\n                                          chunk_meta_segment_->data_size());\n  if (size != sizeof(HnswChunkMeta)) {\n    LOG_ERROR(\"Invalid hnsw meta chunk, read size=%zu chunk size=%zu\", size,\n              chunk_meta_segment_->data_size());\n    return IndexError_InvalidFormat;\n  }\n  std::memcpy(&chunk_meta_, data_block.data(), size);\n  if (chunk_meta_.chunk_size != chunk_size) {\n    LOG_ERROR(\n        \"Params hnsw chunk size=%zu mismatch from previous %zu \"\n        \"in index\",\n        chunk_size, (size_t)chunk_meta_.chunk_size);\n    return IndexError_Mismatch;\n  }\n\n  *stats_.mutable_check_point() = stg_->check_point();\n  stats_.set_revision_id(chunk_meta_.revision_id);\n  stats_.set_update_time(chunk_meta_.update_time);\n  stats_.set_create_time(chunk_meta_.create_time);\n\n  char create_time[32];\n  char update_time[32];\n  ailego::Realtime::Gmtime(chunk_meta_.create_time, \"%Y-%m-%d %H:%M:%S\",\n                           create_time, sizeof(create_time));\n  ailego::Realtime::Gmtime(chunk_meta_.update_time, \"%Y-%m-%d %H:%M:%S\",\n                           update_time, sizeof(update_time));\n  LOG_DEBUG(\n      \"Load index, indexSize=%zu chunkSize=%zu nodeChunks=%zu \"\n      \"upperNeighborChunks=%zu revisionId=%zu \"\n      \"createTime=%s updateTime=%s\",\n      (size_t)chunk_meta_.total_size, (size_t)chunk_meta_.chunk_size,\n      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_NODE],\n      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_UPPER_NEIGHBOR],\n      (size_t)chunk_meta_.revision_id, create_time, update_time);\n\n  return 0;\n}\n\nint HnswRabitqChunkBroker::open(IndexStorage::Pointer stg,\n                                size_t max_index_size, size_t chunk_size,\n                                bool check_crc) {\n  if (ailego_unlikely(stg_)) {\n    LOG_ERROR(\"An storage instance is already opened\");\n    return IndexError_Duplicate;\n  }\n  stg_ = std::move(stg);\n  if (stg_->isHugePage()) {\n    page_mask_ = ailego::MemoryHelper::HugePageSize() - 1;\n  } else {\n    page_mask_ = ailego::MemoryHelper::PageSize() - 1;\n  }\n  check_crc_ = check_crc;\n  max_chunks_size_ = max_index_size;\n  dirty_ = false;\n\n  const std::string segment_id =\n      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  chunk_meta_segment_ = stg_->get(segment_id);\n  if (!chunk_meta_segment_) {\n    LOG_DEBUG(\"Create new index\");\n    return init_storage(chunk_size);\n  }\n\n  return load_storage(chunk_size);\n}\n\nint HnswRabitqChunkBroker::close(void) {\n  flush(0UL);\n\n  stg_.reset();\n  check_crc_ = false;\n  dirty_ = false;\n\n  return 0;\n}\n\nint HnswRabitqChunkBroker::flush(uint64_t checkpoint) {\n  ailego_assert_with(chunk_meta_segment_, \"invalid meta segment\");\n\n  chunk_meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(chunk_meta_.update_time);\n\n  size_t size =\n      chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {\n    LOG_ERROR(\"Storage write data failed, wsize=%zu\", size);\n  }\n\n  stg_->refresh(checkpoint);\n  int ret = stg_->flush();\n  if (ret == 0) {\n    (*stats_.mutable_check_point()) = checkpoint;\n  } else {\n    LOG_ERROR(\"Storage flush failed for %s\", IndexError::What(ret));\n  }\n  return ret;\n}\n\nstd::pair<int, Chunk::Pointer> HnswRabitqChunkBroker::alloc_chunk(\n    int type, uint64_t seq_id, size_t size) {\n  ailego_assert_with(type < CHUNK_TYPE_MAX, \"chunk type overflow\");\n\n  Chunk::Pointer chunk;\n  if (ailego_unlikely(!stg_)) {\n    LOG_ERROR(\"Init storage first\");\n    return std::make_pair(IndexError_Uninitialized, chunk);\n  }\n\n  //! check exist a empty chunk with the same name\n  chunk = get_chunk(type, seq_id);\n  if (chunk) {\n    if (ailego_unlikely(chunk->capacity() == size &&\n                        chunk->data_size() == 0UL)) {\n      LOG_ERROR(\"Exist invalid chunk size %zu, expect size %zu\",\n                chunk->capacity(), size);\n      chunk.reset();\n      return std::make_pair(IndexError_Runtime, chunk);\n    }\n    return std::make_pair(0, chunk);\n  }\n  //! align to page size\n  size = (size + page_mask_) & (~page_mask_);\n  if (ailego_unlikely(chunk_meta_.total_size + size >= max_chunks_size_)) {\n    LOG_ERROR(\"No space to new a chunk, curIndexSize=%zu allocSize=%zu\",\n              (size_t)chunk_meta_.total_size, size);\n    return std::make_pair(IndexError_IndexFull, chunk);\n  }\n\n  std::string segment_id = make_segment_id(type, seq_id);\n  int ret = stg_->append(segment_id, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Storage append segment failed for %s\", IndexError::What(ret));\n    return std::make_pair(ret, chunk);\n  }\n  chunk_meta_.chunk_cnts[type] += 1;\n  chunk_meta_.total_size += size;\n  (*stats_.mutable_index_size()) += size;\n\n  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {\n    LOG_ERROR(\"Storage append segment failed, wsize=%zu\", size);\n  }\n\n  chunk = get_chunk(type, seq_id);\n  return std::make_pair(chunk ? 0 : IndexError_NoMemory, chunk);\n}\n\nChunk::Pointer HnswRabitqChunkBroker::get_chunk(int type,\n                                                uint64_t seq_id) const {\n  std::string segment_id = make_segment_id(type, seq_id);\n  return stg_->get(segment_id);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_chunk.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <string.h>\n#include <unistd.h>\n#include <atomic>\n#include <cstddef>\n#include <mutex>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_logger.h\"\n#include \"zvec/core/framework/index_storage.h\"\n#include \"zvec/core/framework/index_streamer.h\"\n\nnamespace zvec {\nnamespace core {\n\nusing Chunk = IndexStorage::Segment;\n\nclass HnswRabitqChunkBroker {\n public:\n  typedef std::shared_ptr<HnswRabitqChunkBroker> Pointer;\n\n  enum CHUNK_TYPE {\n    CHUNK_TYPE_HEADER = 1,\n    CHUNK_TYPE_META = 2,\n    CHUNK_TYPE_NODE = 3,\n    CHUNK_TYPE_UPPER_NEIGHBOR = 4,\n    CHUNK_TYPE_NEIGHBOR_INDEX = 5,\n    CHUNK_TYPE_SPARSE_NODE = 6,\n    CHUNK_TYPE_MAX = 8\n  };\n  static constexpr size_t kDefaultChunkSeqId = 0UL;\n\n  HnswRabitqChunkBroker(IndexStreamer::Stats &stats) : stats_(stats) {}\n\n  //! Open storage\n  int open(IndexStorage::Pointer stg, size_t max_index_size, size_t chunk_size,\n           bool check_crc);\n\n  int close(void);\n\n  int flush(uint64_t checkpoint);\n\n  //! alloc a new chunk with size, not thread-safe\n  std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id,\n                                             size_t size);\n\n  //! alloc a new chunk with chunk size\n  inline std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id) {\n    return alloc_chunk(type, seq_id, chunk_meta_.chunk_size);\n  }\n\n  Chunk::Pointer get_chunk(int type, uint64_t seq_id) const;\n\n  inline size_t get_chunk_cnt(int type) const {\n    ailego_assert_with(type < CHUNK_TYPE_MAX, \"chunk type overflow\");\n    return chunk_meta_.chunk_cnts[type];\n  }\n\n  inline bool dirty(void) const {\n    return dirty_;\n  }\n\n  inline void mark_dirty(void) {\n    if (!dirty_) {\n      dirty_ = true;\n      chunk_meta_.revision_id += 1;\n      stats_.set_revision_id(chunk_meta_.revision_id);\n    }\n  }\n\n  const IndexStorage::Pointer storage(void) const {\n    return stg_;\n  }\n\n private:\n  HnswRabitqChunkBroker(const HnswRabitqChunkBroker &) = delete;\n  HnswRabitqChunkBroker &operator=(const HnswRabitqChunkBroker &) = delete;\n\n  struct HnswChunkMeta {\n    HnswChunkMeta(void) {\n      memset(this, 0, sizeof(HnswChunkMeta));\n    }\n    void clear() {\n      memset(this, 0, sizeof(HnswChunkMeta));\n    }\n\n    uint64_t chunk_cnts[CHUNK_TYPE_MAX];\n    uint64_t chunk_size;   // size of per chunk\n    uint64_t total_size;   // total size of allocated chunk\n    uint64_t revision_id;  // index revision\n    uint64_t create_time;\n    uint64_t update_time;\n    uint64_t reserved[3];\n  };\n\n  static_assert(sizeof(HnswChunkMeta) % 32 == 0,\n                \"HnswChunkMeta must be aligned with 32 bytes\");\n\n  //! Init the storage after open an empty index\n  int init_storage(size_t chunk_size);\n\n  //! Load index from storage\n  int load_storage(size_t chunk_size);\n\n  static inline const std::string make_segment_id(int type, uint64_t seq_id) {\n    return \"HnswT\" + ailego::StringHelper::ToString(type) + \"S\" +\n           ailego::StringHelper::ToString(seq_id);\n  }\n\n private:\n  IndexStreamer::Stats &stats_;\n  HnswChunkMeta chunk_meta_{};\n  size_t page_mask_{0UL};\n  size_t max_chunks_size_{0UL};\n  IndexStorage::Pointer stg_{};\n  IndexStorage::Segment::Pointer chunk_meta_segment_{};\n  bool check_crc_{false};\n  bool dirty_{false};  // set as true if index is modified , the flag\n                       // will not be cleared even if flushed\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_context.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_context.h\"\n#include <chrono>\n#include \"hnsw_rabitq_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqContext::HnswRabitqContext(size_t dimension,\n                                     const IndexMetric::Pointer &metric,\n                                     const HnswRabitqEntity::Pointer &entity)\n    : IndexContext(metric),\n      entity_(entity),\n      add_dc_(entity_.get(), metric, dimension) {}\n\nHnswRabitqContext::HnswRabitqContext(const IndexMetric::Pointer &metric,\n                                     const HnswRabitqEntity::Pointer &entity)\n    : IndexContext(metric), entity_(entity), add_dc_(entity_.get(), metric) {}\n\nHnswRabitqContext::~HnswRabitqContext() {\n  visit_filter_.destroy();\n}\n\nint HnswRabitqContext::init(ContextType type) {\n  int ret;\n  uint32_t doc_cnt;\n\n  type_ = type;\n\n  switch (type) {\n    case kBuilderContext:\n      ret = visit_filter_.init(VisitFilter::ByteMap, entity_->doc_cnt(),\n                               max_scan_num_, negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n      candidates_.limit(max_scan_num_);\n      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);\n      break;\n\n    case kSearcherContext:\n      ret = visit_filter_.init(filter_mode_, entity_->doc_cnt(), max_scan_num_,\n                               negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n      candidates_.limit(max_scan_num_);\n      break;\n\n    case kStreamerContext:\n      // maxScanNum is unknown if inited from streamer, so the docCnt may\n      // change. we need to compute maxScanNum by scan ratio, and preserve\n      // max_doc_cnt space from visit filter\n      doc_cnt = entity_->doc_cnt();\n      max_scan_num_ = compute_max_scan_num(doc_cnt);\n      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);\n      ret = visit_filter_.init(filter_mode_, reserve_max_doc_cnt_,\n                               max_scan_num_, negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n\n      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);\n      candidates_.limit(max_scan_num_);\n\n      check_need_adjuct_ctx();\n      break;\n\n    default:\n      LOG_ERROR(\"Init context failed\");\n      return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswRabitqContext::update(const ailego::Params &params) {\n  auto update_visit_filter_param = [&]() {\n    bool need_update = false;\n    std::string p;\n    switch (type_) {\n      case kSearcherContext:\n        p = PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_ENABLE;\n        break;\n      case kStreamerContext:\n        p = PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_ENABLE;\n        break;\n    }\n\n    if (params.has(p)) {\n      bool bf_enabled;\n      params.get(p, &bf_enabled);\n      if (bf_enabled ^ (filter_mode_ == VisitFilter::BloomFilter)) {\n        need_update = true;\n        filter_mode_ =\n            bf_enabled ? VisitFilter::BloomFilter : VisitFilter::ByteMap;\n      }\n    }\n\n    float prob = negative_probability_;\n    p.clear();\n    switch (type_) {\n      case kSearcherContext:\n        p = PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB;\n        break;\n      case kStreamerContext:\n        p = PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB;\n        break;\n    }\n    params.get(p, &prob);\n    if (filter_mode_ == VisitFilter::BloomFilter &&\n        std::abs(prob - negative_probability_) > 1e-6) {\n      need_update = true;\n    }\n    if (need_update) {\n      visit_filter_.destroy();\n      int max_doc_cnt = 0;\n      if (type_ == kSearcherContext) {\n        max_doc_cnt = entity_->doc_cnt();\n      } else {\n        max_doc_cnt = reserve_max_doc_cnt_;\n      }\n      int ret = visit_filter_.init(filter_mode_, max_doc_cnt, max_scan_num_,\n                                   negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n    }\n    return 0;\n  };\n\n  switch (type_) {\n    case kSearcherContext:\n      if (params.has(PARAM_HNSW_RABITQ_SEARCHER_EF)) {\n        params.get(PARAM_HNSW_RABITQ_SEARCHER_EF, &ef_);\n        topk_heap_.limit(std::max(topk_, ef_));\n      }\n\n      if (params.has(PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO)) {\n        params.get(PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);\n        max_scan_num_ =\n            static_cast<uint32_t>(max_scan_ratio_ * entity_->doc_cnt());\n        max_scan_num_ = std::max(10000U, max_scan_num_);\n      }\n\n      if (params.has(PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD)) {\n        params.get(PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD,\n                   &bruteforce_threshold_);\n      }\n\n      return update_visit_filter_param();\n\n    case kStreamerContext:\n      if (params.has(PARAM_HNSW_RABITQ_STREAMER_EF)) {\n        params.get(PARAM_HNSW_RABITQ_STREAMER_EF, &ef_);\n        topk_heap_.limit(std::max(topk_, ef_));\n      }\n      params.get(PARAM_HNSW_RABITQ_STREAMER_EF, &ef_);\n      params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);\n      params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);\n      params.get(PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);\n      if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {\n        LOG_ERROR(\"[%s] must be in range (0.0f,1.0f]\",\n                  PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO.c_str());\n        return IndexError_InvalidArgument;\n      }\n      if (max_scan_limit_ < min_scan_limit_) {\n        LOG_ERROR(\"[%s] must be >= [%s]\",\n                  PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT.c_str(),\n                  PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT.c_str());\n        return IndexError_InvalidArgument;\n      }\n\n      if (params.has(PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD)) {\n        params.get(PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD,\n                   &bruteforce_threshold_);\n      }\n\n      return update_visit_filter_param();\n\n    default:\n      LOG_ERROR(\"update context failed, type=%zu\", static_cast<size_t>(type_));\n      return IndexError_Runtime;\n  }\n}\n\nint HnswRabitqContext::update_context(ContextType type, const IndexMeta &meta,\n                                      const IndexMetric::Pointer &metric,\n                                      const HnswRabitqEntity::Pointer &entity,\n                                      uint32_t magic_num) {\n  uint32_t doc_cnt;\n\n  if (ailego_unlikely(type != type_)) {\n    LOG_ERROR(\n        \"HnswRabitqContext doesn't support shared by different type, \"\n        \"src=%u dst=%u\",\n        type_, type);\n    return IndexError_Unsupported;\n  }\n\n  magic_ = kInvalidMgic;\n\n  // TODO: support change filter mode?\n  switch (type) {\n    case kBuilderContext:\n      LOG_ERROR(\"BuildContext doesn't support update\");\n      return IndexError_NotImplemented;\n\n    case kSearcherContext:\n      if (!visit_filter_.reset(entity->doc_cnt(), max_scan_num_)) {\n        LOG_ERROR(\"Reset filter failed, mode %d\", visit_filter_.get_mode());\n        return IndexError_Runtime;\n      }\n\n      candidates_.limit(max_scan_num_);\n      topk_heap_.limit(std::max(topk_, ef_));\n      break;\n\n    case kStreamerContext:\n      doc_cnt = entity->doc_cnt();\n      max_scan_num_ = compute_max_scan_num(doc_cnt);\n      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);\n      if (!visit_filter_.reset(reserve_max_doc_cnt_, max_scan_num_)) {\n        LOG_ERROR(\"Reset filter failed, mode %d\", visit_filter_.get_mode());\n        return IndexError_Runtime;\n      }\n\n      update_heap_.limit(entity->l0_neighbor_cnt() + 1);\n      candidates_.limit(max_scan_num_);\n      topk_heap_.limit(std::max(topk_, ef_));\n      break;\n\n    default:\n      LOG_ERROR(\"update context failed\");\n      return IndexError_Runtime;\n  }\n\n  entity_ = entity;\n  dc().update(entity_.get(), metric, meta.dimension());\n  magic_ = magic_num;\n  level_topks_.clear();\n\n  return 0;\n}\n\nvoid HnswRabitqContext::fill_random_to_topk_full(void) {\n  static std::mt19937 mt(\n      std::chrono::system_clock::now().time_since_epoch().count());\n  std::uniform_int_distribution<node_id_t> dt(0, entity_->doc_cnt() - 1);\n  std::function<node_id_t()> gen;\n  node_id_t seqid;\n  std::function<bool(node_id_t)> myfilter = [](node_id_t) { return false; };\n  if (this->filter().is_valid()) {\n    myfilter = [&](node_id_t id) {\n      return this->filter()(entity_->get_key(id));\n    };\n  }\n\n  if (topk_heap_.limit() < entity_->doc_cnt() / 2) {\n    gen = [&](void) { return dt(mt); };\n  } else {\n    // If topk limit is big value, gen sequential id from an random initial\n    seqid = dt(mt);\n    gen = [&](void) {\n      seqid = seqid == (entity_->doc_cnt() - 1) ? 0 : (seqid + 1);\n      return seqid;\n    };\n  }\n\n  for (size_t i = 0; !topk_heap_.full() && i < entity_->doc_cnt(); ++i) {\n    const auto id = gen();\n    if (!visit_filter_.visited(id) && !myfilter(id)) {\n      visit_filter_.set_visited(id);\n      topk_heap_.emplace(id, dc().dist(id));\n    }\n  }\n  return;\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/logger/logger.h>\n#include \"utility/visit_filter.h\"\n#include \"zvec/core/framework/index_context.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"hnsw_rabitq_dist_calculator.h\"\n#include \"hnsw_rabitq_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqContext : public IndexContext {\n public:\n  //! Index Context Pointer\n  typedef std::unique_ptr<HnswRabitqContext> Pointer;\n\n  enum ContextType {\n    kUnknownContext = 0,\n    kSearcherContext = 1,\n    kBuilderContext = 2,\n    kStreamerContext = 3\n  };\n\n  //! Construct\n  HnswRabitqContext(size_t dimension, const IndexMetric::Pointer &metric,\n                    const HnswRabitqEntity::Pointer &entity);\n\n  //! Construct\n  HnswRabitqContext(const IndexMetric::Pointer &metric,\n                    const HnswRabitqEntity::Pointer &entity);\n\n  //! Destructor\n  virtual ~HnswRabitqContext();\n\n public:\n  //! Set topk of search result\n  virtual void set_topk(uint32_t val) override {\n    topk_ = val;\n    topk_heap_.limit(std::max(val, ef_));\n  }\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(void) const override {\n    return results_[0];\n  }\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(size_t idx) const override {\n    return results_[idx];\n  }\n\n  //! Retrieve result object for output\n  virtual IndexDocumentList *mutable_result(size_t idx) override {\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    return &results_[idx];\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(void) const override {\n    return group_results_[0];\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(\n      size_t idx) const override {\n    return group_results_[idx];\n  }\n\n  virtual uint32_t magic(void) const override {\n    return magic_;\n  }\n\n  //! Set mode of debug\n  virtual void set_debug_mode(bool enable) override {\n    debug_mode_ = enable;\n  }\n\n  //! Retrieve mode of debug\n  virtual bool debug_mode(void) const override {\n    return this->debugging();\n  }\n\n  //! Retrieve string of debug\n  virtual std::string debug_string(void) const override {\n    char buf[4096];\n    size_t size = snprintf(\n        buf, sizeof(buf),\n        \"scan_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u\",\n        get_scan_num(), stats_get_vector_cnt_, stats_get_neighbors_cnt_,\n        stats_visit_dup_cnt_);\n    return std::string(buf, size);\n  }\n\n  //! Update the parameters of context\n  virtual int update(const ailego::Params &params) override;\n\n public:\n  //! Init context\n  int init(ContextType type);\n\n  //! Update context, the context may be shared by different searcher/streamer\n  int update_context(ContextType type, const IndexMeta &meta,\n                     const IndexMetric::Pointer &metric,\n                     const HnswRabitqEntity::Pointer &entity,\n                     uint32_t magic_num);\n\n  inline const HnswRabitqEntity &get_entity() const {\n    return *entity_;\n  }\n\n  inline void resize_results(size_t size) {\n    if (group_by_search()) {\n      group_results_.resize(size);\n    } else {\n      results_.resize(size);\n    }\n  }\n\n  inline void topk_to_result() {\n    return topk_to_result(0);\n  }\n\n  //! Construct result from topk heap, result will be normalized\n  inline void topk_to_result(uint32_t idx) {\n    if (group_by_search()) {\n      topk_to_group_result(idx);\n    } else {\n      topk_to_single_result(idx);\n    }\n  }\n\n  inline void topk_to_single_result(uint32_t idx) {\n    if (force_padding_topk_ && !topk_heap_.full() &&\n        topk_heap_.size() < entity_->doc_cnt()) {\n      this->fill_random_to_topk_full();\n    }\n    if (ailego_unlikely(topk_heap_.size() == 0)) {\n      return;\n    }\n\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    int size = std::min(topk_, static_cast<uint32_t>(topk_heap_.size()));\n    topk_heap_.sort();\n    results_[idx].clear();\n\n    for (int i = 0; i < size; ++i) {\n      auto score = topk_heap_[i].second;\n      if (score.est_dist > this->threshold()) {\n        break;\n      }\n\n      node_id_t id = topk_heap_[i].first;\n      if (fetch_vector_) {\n        results_[idx].emplace_back(entity_->get_key(id), score.est_dist, id,\n                                   entity_->get_vector(id));\n      } else {\n        results_[idx].emplace_back(entity_->get_key(id), score.est_dist, id);\n      }\n    }\n\n    return;\n  }\n\n  //! Construct result from topk heap, result will be normalized\n  inline void topk_to_group_result(uint32_t idx) {\n    ailego_assert_with(idx < group_results_.size(), \"invalid idx\");\n\n    group_results_[idx].clear();\n\n    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;\n    std::vector<std::pair<std::string, ResultRecord>> best_score_in_groups;\n    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();\n         itr++) {\n      const std::string &group_id = (*itr).first;\n      auto &heap = (*itr).second;\n      heap.sort();\n\n      if (heap.size() > 0) {\n        ResultRecord best_score = heap[0].second;\n        best_score_in_groups.push_back(std::make_pair(group_id, best_score));\n      }\n    }\n\n    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),\n              [](const std::pair<std::string, ResultRecord> &a,\n                 const std::pair<std::string, ResultRecord> &b) -> int {\n                return a.second < b.second;\n              });\n\n    // truncate to group num\n    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();\n         ++i) {\n      const std::string &group_id = best_score_in_groups[i].first;\n\n      group_topk_list.emplace_back(\n          std::make_pair(group_id, group_topk_heaps_[group_id]));\n    }\n\n    group_results_[idx].resize(group_topk_list.size());\n\n    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {\n      const std::string &group_id = group_topk_list[i].first;\n      group_results_[idx][i].set_group_id(group_id);\n\n      uint32_t size = std::min(\n          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));\n\n      for (uint32_t j = 0; j < size; ++j) {\n        auto score = group_topk_list[i].second[j].second;\n        if (score > this->threshold()) {\n          break;\n        }\n\n        node_id_t id = group_topk_list[i].second[j].first;\n\n        if (fetch_vector_) {\n          group_results_[idx][i].mutable_docs()->emplace_back(\n              entity_->get_key(id), score.est_dist, id,\n              entity_->get_vector(id));\n        } else {\n          group_results_[idx][i].mutable_docs()->emplace_back(\n              entity_->get_key(id), score.est_dist, id);\n        }\n      }\n    }\n  }\n\n  inline void reset_query(const void *query) {\n    if (auto query_preprocess_func = index_metric_->get_query_preprocess_func();\n        query_preprocess_func != nullptr) {\n      size_t dim = dc().dimension();\n      preprocess_buffer_.resize(dim);\n      memcpy(preprocess_buffer_.data(), query, dim);\n      query_preprocess_func(preprocess_buffer_.data(), dim);\n      query = preprocess_buffer_.data();\n    }\n\n    dc().reset_query(query);\n    dc().clear_compare_cnt();\n    query_ = query;\n  }\n\n  inline HnswRabitqAddDistCalculator &dist_calculator() {\n    return dc();\n  }\n\n  inline TopkHeap &topk_heap() {\n    return topk_heap_;\n  }\n\n  inline TopkHeap &update_heap() {\n    return update_heap_;\n  }\n\n  inline VisitFilter &visit_filter() {\n    return visit_filter_;\n  }\n\n  inline CandidateHeap &candidates() {\n    return candidates_;\n  }\n\n  inline void set_max_scan_num(uint32_t max_scan_num) {\n    max_scan_num_ = max_scan_num;\n  }\n\n  inline void set_max_scan_limit(uint32_t max_scan_limit) {\n    max_scan_limit_ = max_scan_limit;\n  }\n\n  inline void set_min_scan_limit(uint32_t min_scan_limit) {\n    min_scan_limit_ = min_scan_limit;\n  }\n\n  inline void set_ef(uint32_t v) {\n    ef_ = v;\n  }\n\n  inline void set_filter_mode(uint32_t v) {\n    filter_mode_ = v;\n  }\n\n  inline void set_filter_negative_probability(float v) {\n    negative_probability_ = v;\n  }\n\n  inline void set_max_scan_ratio(float v) {\n    max_scan_ratio_ = v;\n  }\n\n  virtual void set_magic(uint32_t v) {\n    magic_ = v;\n  }\n\n  virtual void set_force_padding_topk(bool v) {\n    force_padding_topk_ = v;\n  }\n\n  void set_bruteforce_threshold(uint32_t v) override {\n    bruteforce_threshold_ = v;\n  }\n\n  inline uint32_t get_bruteforce_threshold() const {\n    return bruteforce_threshold_;\n  }\n\n  void set_fetch_vector(bool v) override {\n    fetch_vector_ = v;\n  }\n\n  bool fetch_vector() const override {\n    return fetch_vector_;\n  }\n\n  //! Reset context\n  void reset(void) override {\n    set_filter(nullptr);\n    reset_threshold();\n    set_fetch_vector(false);\n    set_group_params(0, 0);\n    reset_group_by();\n  }\n\n  inline std::map<std::string, TopkHeap> &group_topk_heaps() {\n    return group_topk_heaps_;\n  }\n\n  inline TopkHeap &level_topk(int level) {\n    if (ailego_unlikely(level_topks_.size() <= static_cast<size_t>(level))) {\n      int cur_level = level_topks_.size();\n      level_topks_.resize(level + 1);\n      for (; cur_level <= level; ++cur_level) {\n        size_t heap_size = std::max(entity_->neighbor_cnt(cur_level),\n                                    entity_->ef_construction());\n        level_topks_[cur_level].clear();\n        level_topks_[cur_level].limit(heap_size);\n      }\n    }\n\n    return level_topks_[level];\n  }\n\n  inline void check_need_adjuct_ctx(void) {\n    check_need_adjuct_ctx(entity_->doc_cnt());\n  }\n\n  inline size_t compute_reserve_cnt(uint32_t cur_doc) const {\n    if (cur_doc > kMaxReserveDocCnt) {\n      return kMaxReserveDocCnt;\n    } else if (cur_doc < kMinReserveDocCnt) {\n      return kMinReserveDocCnt;\n    }\n    return cur_doc;\n  }\n\n  //! candidates heap and visitfilter need to resize as doc cnt growing up\n  inline void check_need_adjuct_ctx(uint32_t doc_cnt) {\n    if (ailego_unlikely(doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_)) {\n      while (doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_) {\n        reserve_max_doc_cnt_ =\n            reserve_max_doc_cnt_ + compute_reserve_cnt(reserve_max_doc_cnt_);\n      }\n      uint32_t max_scan_cnt = compute_max_scan_num(reserve_max_doc_cnt_);\n      max_scan_num_ = max_scan_cnt;\n      visit_filter_.reset(reserve_max_doc_cnt_, max_scan_cnt);\n      candidates_.clear();\n      candidates_.limit(max_scan_num_);\n    }\n  }\n\n  inline uint32_t compute_max_scan_num(uint32_t max_doc_cnt) const {\n    uint32_t max_scan = max_doc_cnt * max_scan_ratio_;\n    if (max_scan < min_scan_limit_) {\n      max_scan = min_scan_limit_;\n    } else if (max_scan > max_scan_limit_) {\n      max_scan = max_scan_limit_;\n    }\n    return max_scan;\n  }\n\n  inline size_t get_scan_num() const {\n    return dc().compare_cnt();\n  }\n\n  inline uint64_t reach_scan_limit() const {\n    return dc().compare_cnt() >= max_scan_num_;\n  }\n\n  inline bool error() const {\n    return dc().error();\n  }\n\n  inline void clear() {\n    add_dc_.clear();\n    if (ailego_unlikely(this->debugging())) {\n      stats_get_neighbors_cnt_ = 0u;\n      stats_get_vector_cnt_ = 0u;\n      stats_visit_dup_cnt_ = 0u;\n    }\n    // do not clear results_ for the next query will need it\n    for (auto &it : results_) {\n      it.clear();\n    }\n  }\n\n  uint32_t *mutable_stats_get_neighbors() {\n    return &stats_get_neighbors_cnt_;\n  }\n\n  uint32_t *mutable_stats_get_vector() {\n    return &stats_get_vector_cnt_;\n  }\n\n  uint32_t *mutable_stats_visit_dup_cnt() {\n    return &stats_visit_dup_cnt_;\n  }\n\n  inline bool debugging(void) const {\n    return debug_mode_;\n  }\n\n  inline void update_dist_caculator_distance(\n      const IndexMetric::MatrixDistance &distance,\n      const IndexMetric::MatrixBatchDistance &batch_distance) {\n    dc().update_distance(distance, batch_distance);\n  }\n\n  //! Get topk\n  inline uint32_t topk() const override {\n    return topk_;\n  }\n\n  //! Get group topk\n  inline uint32_t group_topk() const {\n    return group_topk_;\n  }\n\n  //! Get group num\n  inline uint32_t group_num() const {\n    return group_num_;\n  }\n\n  //! Get if group by search\n  inline bool group_by_search() {\n    return group_num_ > 0;\n  }\n\n  //! Set group params\n  void set_group_params(uint32_t group_num, uint32_t group_topk) override {\n    group_num_ = group_num;\n    group_topk_ = group_topk;\n\n    topk_ = group_topk_ * group_num_;\n\n    topk_heap_.limit(std::max(topk_, ef_));\n\n    group_topk_heaps_.clear();\n  }\n\n  void set_provider(IndexProvider::Pointer provider) {\n    add_dc_.set_provider(std::move(provider));\n  }\n\n  const void *query() const {\n    return query_;\n  }\n\n private:\n  inline HnswRabitqAddDistCalculator &dc() {\n    return add_dc_;\n  }\n\n  inline const HnswRabitqAddDistCalculator &dc() const {\n    return add_dc_;\n  }\n\n private:\n  // Filling random nodes if topk not full\n  void fill_random_to_topk_full(void);\n\n  constexpr static uint32_t kTriggerReserveCnt = 4096UL;\n  constexpr static uint32_t kMinReserveDocCnt = 4096UL;\n  constexpr static uint32_t kMaxReserveDocCnt = 128 * 1024UL;\n  constexpr static uint32_t kInvalidMgic = -1U;\n\n private:\n  HnswRabitqEntity::Pointer entity_;\n  HnswRabitqAddDistCalculator add_dc_;\n  IndexMetric::Pointer metric_;\n\n  bool debug_mode_{false};\n  bool force_padding_topk_{false};\n  uint32_t max_scan_num_{0};\n  uint32_t max_scan_limit_{0};\n  uint32_t min_scan_limit_{0};\n  uint32_t reserve_max_doc_cnt_{kMinReserveDocCnt};\n  uint32_t topk_{0};\n  uint32_t group_topk_{0};\n  uint32_t filter_mode_{VisitFilter::ByteMap};\n  float negative_probability_{HnswRabitqEntity::kDefaultBFNegativeProbability};\n  uint32_t ef_{HnswRabitqEntity::kDefaultEf};\n  float max_scan_ratio_{HnswRabitqEntity::kDefaultScanRatio};\n  uint32_t magic_{0U};\n  std::vector<IndexDocumentList> results_{};\n  std::vector<IndexGroupDocumentList> group_results_{};\n  TopkHeap topk_heap_{};\n  TopkHeap update_heap_{};\n  std::vector<TopkHeap> level_topks_{};\n  CandidateHeap candidates_{};\n  VisitFilter visit_filter_{};\n  uint32_t bruteforce_threshold_{};\n  bool fetch_vector_{false};\n\n  uint32_t group_num_{0};\n  std::map<std::string, TopkHeap> group_topk_heaps_{};\n\n  uint32_t type_{kUnknownContext};\n  //! debug stats info\n  uint32_t stats_get_neighbors_cnt_{0u};\n  uint32_t stats_get_vector_cnt_{0u};\n  uint32_t stats_visit_dup_cnt_{0u};\n  std::string preprocess_buffer_;\n  const void *query_{nullptr};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_dist_calculator.cc",
    "content": "// Copyright 2025-present the centaurdb project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include \"core/algorithm/hnsw_rabitq/hnsw_rabitq_dist_calculator.h\"\n#include \"zvec/core/framework/index_error.h\"\n\nnamespace zvec::core {\n\nint HnswRabitqAddDistCalculator::get_vector(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {\n  for (uint32_t i = 0; i < count; ++i) {\n    const node_id_t id = ids[i];\n    key_t key = entity_->get_key(id);\n    if (key == kInvalidKey) {\n      return IndexError_NoExist;\n    }\n    IndexStorage::MemoryBlock block;\n    int ret = provider_->get_vector(key, block);\n    if (ret != 0) {\n      return ret;\n    }\n    vec_blocks.push_back(std::move(block));\n  }\n  return 0;\n}\n\n}  // namespace zvec::core\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_dist_calculator.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"zvec/core/framework/index_meta.h\"\n#include \"zvec/core/framework/index_metric.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"hnsw_rabitq_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! HnswRabitqAddDistCalculator is only used for index construction\nclass HnswRabitqAddDistCalculator {\n public:\n  typedef std::shared_ptr<HnswRabitqAddDistCalculator> Pointer;\n\n public:\n  enum DistType {\n    DIST_NONE = 0,\n    DIST_DENSE = 1,\n    DIST_HYBRID = 2,\n    DIST_SPARSE = 3\n  };\n\n public:\n  //! Constructor\n  HnswRabitqAddDistCalculator(const HnswRabitqEntity *entity,\n                              const IndexMetric::Pointer &metric, uint32_t dim)\n      : entity_(entity),\n        distance_(metric->distance()),\n        batch_distance_(metric->batch_distance()),\n        query_(nullptr),\n        dim_(dim),\n        compare_cnt_(0) {}\n\n  //! Constructor\n  HnswRabitqAddDistCalculator(const HnswRabitqEntity *entity,\n                              const IndexMetric::Pointer &metric, uint32_t dim,\n                              const void *query)\n      : entity_(entity),\n        distance_(metric->distance()),\n        batch_distance_(metric->batch_distance()),\n        query_(query),\n        dim_(dim),\n        compare_cnt_(0) {}\n\n  //! Constructor\n  HnswRabitqAddDistCalculator(const HnswRabitqEntity *entity,\n                              const IndexMetric::Pointer &metric)\n      : entity_(entity),\n        distance_(metric->distance()),\n        batch_distance_(metric->batch_distance()),\n        query_(nullptr),\n        dim_(0),\n        compare_cnt_(0) {}\n\n  void update(const HnswRabitqEntity *entity,\n              const IndexMetric::Pointer &metric) {\n    entity_ = entity;\n    distance_ = metric->distance();\n    batch_distance_ = metric->batch_distance();\n  }\n\n  void update(const HnswRabitqEntity *entity,\n              const IndexMetric::Pointer &metric, uint32_t dim) {\n    entity_ = entity;\n    distance_ = metric->distance();\n    batch_distance_ = metric->batch_distance();\n    dim_ = dim;\n  }\n\n  inline void update_distance(\n      const IndexMetric::MatrixDistance &distance,\n      const IndexMetric::MatrixBatchDistance &batch_distance) {\n    distance_ = distance;\n    batch_distance_ = batch_distance;\n  }\n\n  //! Reset query vector data\n  inline void reset_query(const void *query) {\n    error_ = false;\n    query_ = query;\n  }\n\n  //! Returns distance\n  inline dist_t dist(const void *vec_lhs, const void *vec_rhs) {\n    if (ailego_unlikely(vec_lhs == nullptr || vec_rhs == nullptr)) {\n      LOG_ERROR(\"Nullptr of dense vector\");\n      error_ = true;\n      return 0.0f;\n    }\n\n    float score{0.0f};\n\n    distance_(vec_lhs, vec_rhs, dim_, &score);\n\n    return score;\n  }\n\n  //! Returns distance between query and vec.\n  inline dist_t dist(const void *vec) {\n    compare_cnt_++;\n\n    return dist(vec, query_);\n  }\n\n  //! Return distance between query and node id.\n  inline dist_t dist(node_id_t id) {\n    compare_cnt_++;\n\n    const void *feat = get_vector(id);\n    if (ailego_unlikely(feat == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector, id=%u\", id);\n      error_ = true;\n      return 0.0f;\n    }\n\n    return dist(feat, query_);\n  }\n\n  //! Return dist node lhs between node rhs\n  inline dist_t dist(node_id_t lhs, node_id_t rhs) {\n    compare_cnt_++;\n\n    const void *feat = get_vector(lhs);\n    const void *query = get_vector(rhs);\n    if (ailego_unlikely(feat == nullptr || query == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector\");\n      error_ = true;\n      return 0.0f;\n    }\n\n    return dist(feat, query);\n  }\n\n  dist_t operator()(const void *vec) {\n    return dist(vec);\n  }\n\n  dist_t operator()(id_t i) {\n    return dist(i);\n  }\n\n  dist_t operator()(id_t lhs, id_t rhs) {\n    return dist(lhs, rhs);\n  }\n\n  void batch_dist(const void **vecs, size_t num, dist_t *distances) {\n    compare_cnt_++;\n\n    batch_distance_(vecs, query_, num, dim_, distances);\n  }\n\n  inline dist_t batch_dist(node_id_t id) {\n    compare_cnt_++;\n\n    const void *feat = get_vector(id);\n    if (ailego_unlikely(feat == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector, id=%u\", id);\n      error_ = true;\n      return 0.0f;\n    }\n    dist_t score = 0;\n    batch_distance_(&feat, query_, 1, dim_, &score);\n\n    return score;\n  }\n\n  inline void clear() {\n    compare_cnt_ = 0;\n    error_ = false;\n  }\n\n  inline void clear_compare_cnt() {\n    compare_cnt_ = 0;\n  }\n\n  inline bool error() const {\n    return error_;\n  }\n\n  //! Get distances compute times\n  inline uint32_t compare_cnt() const {\n    return compare_cnt_;\n  }\n\n  inline uint32_t dimension() const {\n    return dim_;\n  }\n\n  void set_provider(IndexProvider::Pointer provider) {\n    provider_ = std::move(provider);\n  }\n\n  int get_vector(const node_id_t *ids, uint32_t count,\n                 std::vector<IndexStorage::MemoryBlock> &vec_blocks) const;\n\n  const void *get_vector(node_id_t id) const {\n    key_t key = entity_->get_key(id);\n    if (key == kInvalidKey) {\n      return nullptr;\n    }\n    return provider_->get_vector(key);\n  }\n\n private:\n  HnswRabitqAddDistCalculator(const HnswRabitqAddDistCalculator &) = delete;\n  HnswRabitqAddDistCalculator &operator=(const HnswRabitqAddDistCalculator &) =\n      delete;\n\n private:\n  const HnswRabitqEntity *entity_;\n  IndexMetric::MatrixDistance distance_;\n  IndexMetric::MatrixBatchDistance batch_distance_;\n\n  const void *query_;\n  uint32_t dim_;\n\n  uint32_t compare_cnt_;        // record distance compute times\n  uint32_t compare_cnt_batch_;  // record batch distance compute time\n  bool error_{false};\n\n  // get raw vector\n  IndexProvider::Pointer provider_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_entity.h\"\n#include <rabitqlib/index/query.hpp>\n#include \"utility/sparse_utility.h\"\n#include \"zvec/core/framework/index_stats.h\"\n\nnamespace zvec {\nnamespace core {\n\nconst std::string HnswRabitqEntity::kGraphHeaderSegmentId = \"graph.header\";\nconst std::string HnswRabitqEntity::kGraphFeaturesSegmentId = \"graph.features\";\nconst std::string HnswRabitqEntity::kGraphKeysSegmentId = \"graph.keys\";\nconst std::string HnswRabitqEntity::kGraphNeighborsSegmentId =\n    \"graph.neighbors\";\nconst std::string HnswRabitqEntity::kGraphOffsetsSegmentId = \"graph.offsets\";\nconst std::string HnswRabitqEntity::kGraphMappingSegmentId = \"graph.mapping\";\nconst std::string HnswRabitqEntity::kHnswHeaderSegmentId = \"hnsw.header\";\nconst std::string HnswRabitqEntity::kHnswNeighborsSegmentId = \"hnsw.neighbors\";\nconst std::string HnswRabitqEntity::kHnswOffsetsSegmentId = \"hnsw.offsets\";\n\nvoid HnswRabitqEntity::update_rabitq_params_and_vector_size(\n    uint32_t dimension) {\n  uint32_t padded_dim = ((dimension + 63) / 64) * 64;\n  header_.graph.padded_dim = padded_dim;\n  // BinDataMap layout: bin_code (padded_dim/8) + f_add + f_rescale + f_error\n  header_.graph.size_bin_data =\n      rabitqlib::BinDataMap<float>::data_bytes(padded_dim);\n  // ExDataMap layout: ex_code (padded_dim*ex_bits/8) + f_add_ex + f_rescale_ex\n  header_.graph.size_ex_data = rabitqlib::ExDataMap<float>::data_bytes(\n      padded_dim, header_.graph.ex_bits);\n  // quantized vector format: cluster_id + bin_data + ex_data\n  header_.graph.vector_size =\n      sizeof(uint32_t) + size_bin_data() + size_ex_data();\n}\n\nint HnswRabitqEntity::CalcAndAddPadding(const IndexDumper::Pointer &dumper,\n                                        size_t data_size,\n                                        size_t *padding_size) {\n  *padding_size = AlignSize(data_size) - data_size;\n  if (*padding_size == 0) {\n    return 0;\n  }\n\n  std::string padding(*padding_size, '\\0');\n  if (dumper->write(padding.data(), *padding_size) != *padding_size) {\n    LOG_ERROR(\"Append padding failed, size %zu\", *padding_size);\n    return IndexError_WriteData;\n  }\n  return 0;\n}\n\nint64_t HnswRabitqEntity::dump_segment(const IndexDumper::Pointer &dumper,\n                                       const std::string &segment_id,\n                                       const void *data, size_t size) const {\n  size_t len = dumper->write(data, size);\n  if (len != size) {\n    LOG_ERROR(\"Dump segment %s data failed, expect: %zu, actual: %zu\",\n              segment_id.c_str(), size, len);\n    return IndexError_WriteData;\n  }\n\n  size_t padding_size = AlignSize(size) - size;\n  if (padding_size > 0) {\n    std::string padding(padding_size, '\\0');\n    if (dumper->write(padding.data(), padding_size) != padding_size) {\n      LOG_ERROR(\"Append padding failed, size %zu\", padding_size);\n      return IndexError_WriteData;\n    }\n  }\n\n  uint32_t crc = ailego::Crc32c::Hash(data, size);\n  int ret = dumper->append(segment_id, size, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s meta failed, ret=%d\", segment_id.c_str(), ret);\n    return ret;\n  }\n\n  return len + padding_size;\n}\n\nint64_t HnswRabitqEntity::dump_header(const IndexDumper::Pointer &dumper,\n                                      const HNSWHeader &hd) const {\n  //! dump basic graph header. header is aligned and does not need padding\n  int64_t graph_hd_size =\n      dump_segment(dumper, kGraphHeaderSegmentId, &hd.graph, hd.graph.size);\n  if (graph_hd_size < 0) {\n    return graph_hd_size;\n  }\n\n  //! dump basic graph header. header is aligned and does not need padding\n  int64_t hnsw_hd_size =\n      dump_segment(dumper, kHnswHeaderSegmentId, &hd.hnsw, hd.hnsw.size);\n  if (hnsw_hd_size < 0) {\n    return hnsw_hd_size;\n  }\n\n  return graph_hd_size + hnsw_hd_size;\n}\n\nvoid HnswRabitqEntity::reshuffle_vectors(\n    const std::function<level_t(node_id_t)> & /*get_level*/,\n    std::vector<node_id_t> * /*n2o_mapping*/,\n    std::vector<node_id_t> * /*o2n_mapping*/, key_t * /*keys*/) const {\n  // TODO\n  return;\n}\n\nint64_t HnswRabitqEntity::dump_mapping_segment(\n    const IndexDumper::Pointer &dumper, const key_t *keys) const {\n  std::vector<node_id_t> mapping(doc_cnt());\n\n  std::iota(mapping.begin(), mapping.end(), 0U);\n  std::sort(mapping.begin(), mapping.end(),\n            [&](node_id_t i, node_id_t j) { return keys[i] < keys[j]; });\n\n  size_t size = mapping.size() * sizeof(node_id_t);\n\n  return dump_segment(dumper, kGraphMappingSegmentId, mapping.data(), size);\n}\n\nint64_t HnswRabitqEntity::dump_segments(\n    const IndexDumper::Pointer &dumper, key_t *keys,\n    const std::function<level_t(node_id_t)> &get_level) const {\n  HNSWHeader dump_hd(header());\n\n  dump_hd.graph.node_size = AlignSize(vector_size());\n\n  std::vector<node_id_t> n2o_mapping;  // map new id to origin id\n  std::vector<node_id_t> o2n_mapping;  // map origin id to new id\n  reshuffle_vectors(get_level, &n2o_mapping, &o2n_mapping, keys);\n  if (!o2n_mapping.empty()) {\n    dump_hd.hnsw.entry_point = o2n_mapping[entry_point()];\n  }\n\n  //! Dump header\n  int64_t hd_size = dump_header(dumper, dump_hd);\n  if (hd_size < 0) {\n    return hd_size;\n  }\n\n  //! Dump vectors\n  int64_t vecs_size = dump_vectors(dumper, n2o_mapping);\n  if (vecs_size < 0) {\n    return vecs_size;\n  }\n\n  //! Dump neighbors\n  auto neighbors_size =\n      dump_neighbors(dumper, get_level, n2o_mapping, o2n_mapping);\n  if (neighbors_size < 0) {\n    return neighbors_size;\n  }\n  //! free memory\n  n2o_mapping = std::vector<node_id_t>();\n  o2n_mapping = std::vector<node_id_t>();\n\n  //! Dump keys\n  size_t key_segment_size = doc_cnt() * sizeof(key_t);\n  int64_t keys_size =\n      dump_segment(dumper, kGraphKeysSegmentId, keys, key_segment_size);\n  if (keys_size < 0) {\n    return keys_size;\n  }\n\n  //! Dump mapping\n  int64_t mapping_size = dump_mapping_segment(dumper, keys);\n  if (mapping_size < 0) {\n    return mapping_size;\n  }\n\n  return hd_size + keys_size + vecs_size + neighbors_size + mapping_size;\n}\n\nint64_t HnswRabitqEntity::dump_vectors(\n    const IndexDumper::Pointer &dumper,\n    const std::vector<node_id_t> &reorder_mapping) const {\n  size_t vector_dump_size = vector_size();\n\n  size_t padding_size = AlignSize(vector_dump_size) - vector_dump_size;\n\n  char padding[padding_size];\n  memset(padding, 0, sizeof(padding));\n  const void *data = nullptr;\n  uint32_t crc = 0U;\n  size_t vecs_size = 0UL;\n\n  //! dump vectors\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    data = get_vector(reorder_mapping.empty() ? id : reorder_mapping[id]);\n    if (ailego_unlikely(!data)) {\n      return IndexError_ReadData;\n    }\n    size_t len = dumper->write(data, vector_size());\n    if (len != vector_size()) {\n      LOG_ERROR(\"Dump vectors failed, write=%zu expect=%zu\", len,\n                vector_size());\n      return IndexError_WriteData;\n    }\n\n    crc = ailego::Crc32c::Hash(data, vector_size(), crc);\n    vecs_size += vector_size();\n\n    if (padding_size == 0) {\n      continue;\n    }\n\n    len = dumper->write(padding, padding_size);\n    if (len != padding_size) {\n      LOG_ERROR(\"Dump vectors failed, write=%zu expect=%zu\", len, padding_size);\n      return IndexError_WriteData;\n    }\n    crc = ailego::Crc32c::Hash(padding, padding_size, crc);\n    vecs_size += padding_size;\n  }\n\n  int ret = dumper->append(kGraphFeaturesSegmentId, vecs_size, 0UL, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump vectors segment meta failed, ret %d\", ret);\n    return ret;\n  }\n\n  return vecs_size;\n}\n\nint64_t HnswRabitqEntity::dump_graph_neighbors(\n    const IndexDumper::Pointer &dumper,\n    const std::vector<node_id_t> &reorder_mapping,\n    const std::vector<node_id_t> &neighbor_mapping) const {\n  std::vector<GraphNeighborMeta> graph_meta;\n  graph_meta.reserve(doc_cnt());\n  size_t offset = 0;\n  uint32_t crc = 0;\n  node_id_t mapping[l0_neighbor_cnt()];\n\n  uint32_t min_neighbor_count = 10000;\n  uint32_t max_neighbor_count = 0;\n  size_t sum_neighbor_count = 0;\n\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    const Neighbors neighbors =\n        get_neighbors(0, reorder_mapping.empty() ? id : reorder_mapping[id]);\n    ailego_assert_with(!!neighbors.data, \"invalid neighbors\");\n    ailego_assert_with(neighbors.size() <= l0_neighbor_cnt(),\n                       \"invalid neighbors\");\n\n    uint32_t neighbor_count = neighbors.size();\n    if (neighbor_count < min_neighbor_count) {\n      min_neighbor_count = neighbor_count;\n    }\n    if (neighbor_count > max_neighbor_count) {\n      max_neighbor_count = neighbor_count;\n    }\n    sum_neighbor_count += neighbor_count;\n\n    graph_meta.emplace_back(offset, neighbor_count);\n    size_t size = neighbors.size() * sizeof(node_id_t);\n    const node_id_t *data = &neighbors[0];\n    if (!neighbor_mapping.empty()) {\n      for (node_id_t i = 0; i < neighbors.size(); ++i) {\n        mapping[i] = neighbor_mapping[neighbors[i]];\n      }\n      data = mapping;\n    }\n    if (dumper->write(data, size) != size) {\n      LOG_ERROR(\"Dump graph neighbor id=%zu failed, size %zu\",\n                static_cast<size_t>(id), size);\n      return IndexError_WriteData;\n    }\n    crc = ailego::Crc32c::Hash(data, size, crc);\n    offset += size;\n  }\n\n  uint32_t average_neighbor_count = 0;\n  if (doc_cnt() > 0) {\n    average_neighbor_count = sum_neighbor_count / doc_cnt();\n  }\n  LOG_INFO(\n      \"Dump hnsw graph: min_neighbor_count[%u] max_neighbor_count[%u] \"\n      \"average_neighbor_count[%u]\",\n      min_neighbor_count, max_neighbor_count, average_neighbor_count);\n\n  size_t padding_size = 0;\n  int ret = CalcAndAddPadding(dumper, offset, &padding_size);\n  if (ret != 0) {\n    return ret;\n  }\n  ret = dumper->append(kGraphNeighborsSegmentId, offset, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s failed, ret %d\",\n              kGraphNeighborsSegmentId.c_str(), ret);\n    return ret;\n  }\n\n  //! dump level 0 neighbors meta\n  auto len = dump_segment(dumper, kGraphOffsetsSegmentId, graph_meta.data(),\n                          graph_meta.size() * sizeof(GraphNeighborMeta));\n  if (len < 0) {\n    return len;\n  }\n\n  return len + offset + padding_size;\n}\n\nint64_t HnswRabitqEntity::dump_upper_neighbors(\n    const IndexDumper::Pointer &dumper,\n    const std::function<level_t(node_id_t)> &get_level,\n    const std::vector<node_id_t> &reorder_mapping,\n    const std::vector<node_id_t> &neighbor_mapping) const {\n  std::vector<HnswNeighborMeta> hnsw_meta;\n  hnsw_meta.reserve(doc_cnt());\n  size_t offset = 0;\n  uint32_t crc = 0;\n  node_id_t buffer[upper_neighbor_cnt() + 1];\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    node_id_t new_id = reorder_mapping.empty() ? id : reorder_mapping[id];\n    auto level = get_level(new_id);\n    if (level == 0) {\n      hnsw_meta.emplace_back(0U, 0U);\n      continue;\n    }\n    hnsw_meta.emplace_back(offset, level);\n    ailego_assert_with((size_t)level < kMaxGraphLayers, \"invalid level\");\n    for (level_t cur_level = 1; cur_level <= level; ++cur_level) {\n      const Neighbors neighbors = get_neighbors(cur_level, new_id);\n      ailego_assert_with(!!neighbors.data, \"invalid neighbors\");\n      ailego_assert_with(neighbors.size() <= neighbor_cnt(cur_level),\n                         \"invalid neighbors\");\n      memset(buffer, 0, sizeof(buffer));\n      buffer[0] = neighbors.size();\n      if (neighbor_mapping.empty()) {\n        memcpy(&buffer[1], &neighbors[0], neighbors.size() * sizeof(node_id_t));\n      } else {\n        for (node_id_t i = 0; i < neighbors.size(); ++i) {\n          buffer[i + 1] = neighbor_mapping[neighbors[i]];\n        }\n      }\n      if (dumper->write(buffer, sizeof(buffer)) != sizeof(buffer)) {\n        LOG_ERROR(\"Dump graph neighbor id=%zu failed, size %zu\",\n                  static_cast<size_t>(id), sizeof(buffer));\n        return IndexError_WriteData;\n      }\n      crc = ailego::Crc32c::Hash(buffer, sizeof(buffer), crc);\n      offset += sizeof(buffer);\n    }\n  }\n  size_t padding_size = 0;\n  int ret = CalcAndAddPadding(dumper, offset, &padding_size);\n  if (ret != 0) {\n    return ret;\n  }\n\n  ret = dumper->append(kHnswNeighborsSegmentId, offset, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s failed, ret %d\", kHnswNeighborsSegmentId.c_str(),\n              ret);\n    return ret;\n  }\n\n  //! dump level 0 neighbors meta\n  auto len = dump_segment(dumper, kHnswOffsetsSegmentId, hnsw_meta.data(),\n                          hnsw_meta.size() * sizeof(HnswNeighborMeta));\n  if (len < 0) {\n    return len;\n  }\n\n  return len + offset + padding_size;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <execinfo.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <string>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"zvec/core/framework/index_dumper.h\"\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_storage.h\"\n\nnamespace zvec {\nnamespace core {\n\nusing node_id_t = uint32_t;\nusing key_t = uint64_t;\nusing level_t = int32_t;\nusing dist_t = float;\nstruct EstimateRecord {\n  float ip_x0_qr;\n  float est_dist;\n  float low_dist;\n\n  bool operator<(const EstimateRecord &other) const {\n    return this->est_dist < other.est_dist;\n  }\n};\nstruct ResultRecord {\n  float est_dist;\n  float low_dist;\n  ResultRecord() : est_dist(0.0f), low_dist(0.0f) {}\n  ResultRecord(float dist) : est_dist(dist), low_dist(dist) {}\n  explicit ResultRecord(const EstimateRecord &other)\n      : est_dist(other.est_dist), low_dist(other.low_dist) {}\n  ResultRecord(float est_dist, float low_dist)\n      : est_dist(est_dist), low_dist(low_dist) {}\n  bool operator<(const ResultRecord &other) const {\n    return this->est_dist < other.est_dist;\n  }\n  bool operator<=(const ResultRecord &other) const {\n    return this->est_dist <= other.est_dist;\n  }\n  bool operator>(const ResultRecord &other) const {\n    return this->est_dist > other.est_dist;\n  }\n};\nusing TopkHeap = ailego::KeyValueHeap<node_id_t, ResultRecord>;\nusing CandidateHeap =\n    ailego::KeyValueHeap<node_id_t, ResultRecord, std::greater<ResultRecord>>;\nconstexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);\nconstexpr key_t kInvalidKey = static_cast<key_t>(-1);\nclass DistCalculator;\n\nstruct GraphHeader {\n  uint32_t size;\n  uint32_t version;\n  uint32_t graph_type;\n  uint32_t doc_count;\n  uint32_t vector_size;\n  uint32_t node_size;\n  uint32_t l0_neighbor_count;\n  uint32_t prune_type;\n  uint32_t prune_neighbor_count;\n  uint32_t ef_construction;\n  uint32_t options;\n  uint32_t min_neighbor_count;\n  uint32_t padded_dim;\n  uint32_t size_bin_data;\n  uint32_t size_ex_data;\n  uint8_t ex_bits;\n  uint8_t reserved_[4067];\n};\n\nstatic_assert(sizeof(GraphHeader) % 32 == 0,\n              \"GraphHeader must be aligned with 32 bytes\");\n\n//! Hnsw upper neighbor header\nstruct HnswHeader {\n  uint32_t size;      // header size\n  uint32_t revision;  // current total docs of the graph\n  uint32_t upper_neighbor_count;\n  uint32_t ef_construction;\n  uint32_t scaling_factor;\n  uint32_t max_level;\n  uint32_t entry_point;\n  uint32_t options;\n  uint8_t reserved_[30];\n};\n\nstatic_assert(sizeof(HnswHeader) % 32 == 0,\n              \"GraphHeader must be aligned with 32 bytes\");\n\n//! Hnsw common header and upper neighbor header\nstruct HNSWHeader {\n  HNSWHeader() {\n    clear();\n  }\n\n  HNSWHeader(const HNSWHeader &header) {\n    memcpy(this, &header, sizeof(header));\n  }\n\n  HNSWHeader &operator=(const HNSWHeader &header) {\n    memcpy(this, &header, sizeof(header));\n    return *this;\n  }\n\n  //! Reset state to zero, and the params is untouched\n  void inline reset() {\n    graph.doc_count = 0U;\n    hnsw.entry_point = kInvalidNodeId;\n    hnsw.max_level = 0;\n  }\n\n  //! Clear all fields to init value\n  void inline clear() {\n    memset(this, 0, sizeof(HNSWHeader));\n    hnsw.entry_point = kInvalidNodeId;\n    graph.size = sizeof(GraphHeader);\n    hnsw.size = sizeof(HnswHeader);\n  }\n\n  size_t l0_neighbor_cnt() const {\n    return graph.l0_neighbor_count;\n  }\n\n  size_t upper_neighbor_cnt() const {\n    return hnsw.upper_neighbor_count;\n  }\n\n  size_t vector_size() const {\n    return graph.vector_size;\n  }\n\n  uint8_t ex_bits() const {\n    return graph.ex_bits;\n  }\n\n  uint32_t padded_dim() const {\n    return graph.padded_dim;\n  }\n\n  size_t ef_construction() const {\n    return graph.ef_construction;\n  }\n\n  size_t scaling_factor() const {\n    return hnsw.scaling_factor;\n  }\n\n  size_t neighbor_prune_cnt() const {\n    return graph.prune_neighbor_count;\n  }\n\n  node_id_t entry_point() const {\n    return hnsw.entry_point;\n  }\n\n  node_id_t doc_cnt() const {\n    return graph.doc_count;\n  }\n\n  GraphHeader graph;\n  HnswHeader hnsw;\n};\n\nstruct NeighborsHeader {\n  uint32_t neighbor_cnt;\n  node_id_t neighbors[0];\n};\n\nstruct Neighbors {\n  Neighbors() : cnt{0}, data{nullptr} {}\n\n  Neighbors(uint32_t cnt_in, const node_id_t *data_in)\n      : cnt{cnt_in}, data{data_in} {}\n\n  Neighbors(IndexStorage::MemoryBlock &&mem_block)\n      : neighbor_block{std::move(mem_block)} {\n    auto hd = reinterpret_cast<const NeighborsHeader *>(neighbor_block.data());\n    cnt = hd->neighbor_cnt;\n    data = hd->neighbors;\n  }\n\n  size_t size(void) const {\n    return cnt;\n  }\n\n  const node_id_t &operator[](size_t idx) const {\n    return data[idx];\n  }\n\n  uint32_t cnt;\n  const node_id_t *data;\n  IndexStorage::MemoryBlock neighbor_block;\n};\n\n//! level 0 neighbors offset\nstruct GraphNeighborMeta {\n  GraphNeighborMeta(size_t o, size_t cnt) : offset(o), neighbor_cnt(cnt) {}\n\n  uint64_t offset : 48;\n  uint64_t neighbor_cnt : 16;\n};\n\n//! hnsw upper neighbors meta\nstruct HnswNeighborMeta {\n  HnswNeighborMeta(size_t o, size_t l) : offset(o), level(l) {}\n\n  uint64_t offset : 48;  // offset = idx * upper neighors size\n  uint64_t level : 16;\n};\n\nclass HnswRabitqEntity {\n public:\n  //! Constructor\n  HnswRabitqEntity() {}\n\n  //! Constructor\n  HnswRabitqEntity(const HNSWHeader &hd) {\n    header_ = hd;\n  }\n\n  //! Destructor\n  virtual ~HnswRabitqEntity() {}\n\n  //! HnswRabitqEntity Pointerd;\n  typedef std::shared_ptr<HnswRabitqEntity> Pointer;\n\n  //! Get max neighbor size of graph level\n  inline size_t neighbor_cnt(level_t level) const {\n    return level == 0 ? header_.graph.l0_neighbor_count\n                      : header_.hnsw.upper_neighbor_count;\n  }\n\n  //! get max neighbor size of graph level 0\n  inline size_t l0_neighbor_cnt() const {\n    return header_.graph.l0_neighbor_count;\n  }\n\n  //! get min neighbor size of graph\n  inline size_t min_neighbor_cnt() const {\n    return header_.graph.min_neighbor_count;\n  }\n\n  //! get upper neighbor size of graph level other than 0\n  inline size_t upper_neighbor_cnt() const {\n    return header_.hnsw.upper_neighbor_count;\n  }\n\n  //! Get current total doc of the hnsw graph\n  inline node_id_t *mutable_doc_cnt() {\n    return &header_.graph.doc_count;\n  }\n\n  inline node_id_t doc_cnt() const {\n    return header_.graph.doc_count;\n  }\n\n  //! Get hnsw graph scaling params\n  inline size_t scaling_factor() const {\n    return header_.hnsw.scaling_factor;\n  }\n\n  //! Get prune_size\n  inline size_t prune_cnt() const {\n    return header_.graph.prune_neighbor_count;\n  }\n\n  //! Current entity of top level graph\n  inline node_id_t entry_point() const {\n    return header_.hnsw.entry_point;\n  }\n\n  //! Current max graph level\n  inline level_t cur_max_level() const {\n    return header_.hnsw.max_level;\n  }\n\n  //! Retrieve index vector size\n  size_t vector_size() const {\n    return header_.graph.vector_size;\n  }\n\n  //! Retrieve node size\n  size_t node_size() const {\n    return header_.graph.node_size;\n  }\n\n  //! Retrieve ef constuction\n  size_t ef_construction() const {\n    return header_.graph.ef_construction;\n  }\n\n  uint8_t ex_bits() const {\n    return header_.graph.ex_bits;\n  }\n\n  uint32_t padded_dim() const {\n    return header_.graph.padded_dim;\n  }\n\n  uint32_t size_bin_data() const {\n    return header_.graph.size_bin_data;\n  }\n\n  uint32_t size_ex_data() const {\n    return header_.graph.size_ex_data;\n  }\n\n  void update_rabitq_params_and_vector_size(uint32_t dimension);\n\n  void set_ex_bits(uint8_t ex_bits) {\n    header_.graph.ex_bits = ex_bits;\n  }\n\n  void set_prune_cnt(size_t v) {\n    header_.graph.prune_neighbor_count = v;\n  }\n\n  void set_scaling_factor(size_t val) {\n    header_.hnsw.scaling_factor = val;\n  }\n\n  void set_l0_neighbor_cnt(size_t cnt) {\n    header_.graph.l0_neighbor_count = cnt;\n  }\n\n  void set_min_neighbor_cnt(size_t cnt) {\n    header_.graph.min_neighbor_count = cnt;\n  }\n\n  void set_upper_neighbor_cnt(size_t cnt) {\n    header_.hnsw.upper_neighbor_count = cnt;\n  }\n\n  void set_ef_construction(size_t ef) {\n    header_.graph.ef_construction = ef;\n  }\n\n protected:\n  inline const HNSWHeader &header() const {\n    return header_;\n  }\n\n  inline HNSWHeader *mutable_header() {\n    return &header_;\n  }\n\n  inline size_t header_size() const {\n    return sizeof(header_);\n  }\n\n  void set_node_size(size_t size) {\n    header_.graph.node_size = size;\n  }\n\n  //! Dump all segment by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_segments(\n      const IndexDumper::Pointer &dumper, key_t *keys,\n      const std::function<level_t(node_id_t)> &get_level) const;\n\n private:\n  //! dump mapping segment, for get_vector_by_key in provider\n  int64_t dump_mapping_segment(const IndexDumper::Pointer &dumper,\n                               const key_t *keys) const;\n\n  //! dump hnsw head by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_header(const IndexDumper::Pointer &dumper,\n                      const HNSWHeader &hd) const;\n\n  //! dump vectors by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_vectors(const IndexDumper::Pointer &dumper,\n                       const std::vector<node_id_t> &reorder_mapping) const;\n\n  //! dump hnsw neighbors by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_neighbors(const IndexDumper::Pointer &dumper,\n                         const std::function<level_t(node_id_t)> &get_level,\n                         const std::vector<node_id_t> &reorder_mapping,\n                         const std::vector<node_id_t> &neighbor_mapping) const {\n    auto len1 = dump_graph_neighbors(dumper, reorder_mapping, neighbor_mapping);\n    if (len1 < 0) {\n      return len1;\n    }\n    auto len2 = dump_upper_neighbors(dumper, get_level, reorder_mapping,\n                                     neighbor_mapping);\n    if (len2 < 0) {\n      return len2;\n    }\n\n    return len1 + len2;\n  }\n\n  //! dump segment by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_segment(const IndexDumper::Pointer &dumper,\n                       const std::string &segment_id, const void *data,\n                       size_t size) const;\n\n  //! Dump level 0 neighbors\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_graph_neighbors(\n      const IndexDumper::Pointer &dumper,\n      const std::vector<node_id_t> &reorder_mapping,\n      const std::vector<node_id_t> &neighbor_mapping) const;\n\n  //! Dump upper level neighbors\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_upper_neighbors(\n      const IndexDumper::Pointer &dumper,\n      const std::function<level_t(node_id_t)> &get_level,\n      const std::vector<node_id_t> &reorder_mapping,\n      const std::vector<node_id_t> &neighbor_mapping) const;\n\n public:\n  //! Cleanup the entity\n  virtual int cleanup(void) {\n    header_.clear();\n    return 0;\n  }\n\n  //! Make a copy of searcher entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswRabitqEntity::Pointer clone() const {\n    LOG_ERROR(\"Update neighbors not implemented\");\n    return HnswRabitqEntity::Pointer();\n  }\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const = 0;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector(node_id_t id) const = 0;\n\n  //! Get vectors feature data by keys\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const = 0;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const = 0;\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const = 0;\n\n  //! Retrieve a vector using a primary key\n  virtual const void *get_vector_by_key(uint64_t /*key*/) const {\n    LOG_ERROR(\"get vector not implemented\");\n    return nullptr;\n  }\n\n  virtual int get_vector_by_key(const key_t /*key*/,\n                                IndexStorage::MemoryBlock & /*block*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Get the node id's neighbors on graph level\n  //! Note: the neighbors cannot be modified, using the following\n  //! method to get WritableNeighbors if want to\n  virtual const Neighbors get_neighbors(level_t level, node_id_t id) const = 0;\n\n  //! Add vector and key to hnsw entity, and local id will be saved in id\n  virtual int add_vector(level_t /*level*/, key_t /*key*/, const void * /*vec*/,\n                         node_id_t * /*id*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Add vector and id to hnsw entity\n  virtual int add_vector_with_id(level_t /*level*/, node_id_t /*id*/,\n                                 const void * /*vec*/) {\n    return IndexError_NotImplemented;\n  }\n\n  virtual int update_neighbors(\n      level_t /*level*/, node_id_t /*id*/,\n      const std::vector<std::pair<node_id_t, ResultRecord>> & /*neighbors*/) {\n    LOG_ERROR(\"Update neighbors dense not implemented\");\n\n    return 0;\n  }\n\n  //! Append neighbor_id to node id neighbors on level, size is the current\n  //! neighbors size. Notice: the caller must be ensure the neighbors not full\n  virtual void add_neighbor(level_t /*level*/, node_id_t /*id*/,\n                            uint32_t /*size*/, node_id_t /*neighbor_id*/) {\n    LOG_ERROR(\"Add neighbor not implemented\");\n  }\n\n  //! Update entry point and max level\n  virtual void update_ep_and_level(node_id_t ep, level_t level) {\n    header_.hnsw.entry_point = ep;\n    header_.hnsw.max_level = level;\n  }\n\n  virtual int load(const IndexStorage::Pointer & /*container*/,\n                   bool /*check_crc*/) {\n    LOG_ERROR(\"Load not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {\n    LOG_ERROR(\"Dump not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  static int CalcAndAddPadding(const IndexDumper::Pointer &dumper,\n                               size_t data_size, size_t *padding_size);\n\n  uint32_t get_cluster_id(const void *vec) const {\n    return *reinterpret_cast<const uint32_t *>(\n        reinterpret_cast<const char *>(vec) + cluster_id_offset());\n  }\n\n  const char *get_bin_data(const void *vec) const {\n    return reinterpret_cast<const char *>(vec) + bin_data_offset();\n  }\n\n  const char *get_ex_data(const void *vec) const {\n    return reinterpret_cast<const char *>(vec) + ex_data_offset();\n  }\n\n  uint32_t cluster_id_offset() const {\n    return 0;\n  }\n\n  uint32_t bin_data_offset() const {\n    return cluster_id_offset() + sizeof(uint32_t);\n  }\n\n  uint32_t ex_data_offset() const {\n    return bin_data_offset() + size_bin_data();\n  }\n\n protected:\n  static inline size_t AlignSize(size_t size) {\n    return (size + 0x1F) & (~0x1F);\n  }\n\n  static inline size_t AlignPageSize(size_t size) {\n    size_t page_mask = ailego::MemoryHelper::PageSize() - 1;\n    return (size + page_mask) & (~page_mask);\n  }\n\n  static inline size_t AlignHugePageSize(size_t size) {\n    size_t page_mask = ailego::MemoryHelper::HugePageSize() - 1;\n    return (size + page_mask) & (~page_mask);\n  }\n\n  //! rearrange vectors to improve cache locality\n  void reshuffle_vectors(const std::function<level_t(node_id_t)> &get_level,\n                         std::vector<node_id_t> *n2o_mapping,\n                         std::vector<node_id_t> *o2n_mapping,\n                         key_t *keys) const;\n\n public:\n  const static std::string kGraphHeaderSegmentId;\n  const static std::string kGraphFeaturesSegmentId;\n  const static std::string kGraphKeysSegmentId;\n  const static std::string kGraphNeighborsSegmentId;\n  const static std::string kGraphOffsetsSegmentId;\n  const static std::string kGraphMappingSegmentId;\n  const static std::string kHnswHeaderSegmentId;\n  const static std::string kHnswNeighborsSegmentId;\n  const static std::string kHnswOffsetsSegmentId;\n\n  constexpr static uint32_t kRevision = 0U;\n  constexpr static size_t kMaxGraphLayers = 15;\n  constexpr static uint32_t kDefaultEfConstruction = 500;\n  constexpr static uint32_t kDefaultEf = 500;\n  constexpr static uint32_t kDefaultUpperMaxNeighborCnt = 50;  // M of HNSW\n  constexpr static uint32_t kDefaultL0MaxNeighborCnt = 100;\n  constexpr static uint32_t kMaxNeighborCnt = 65535;\n  constexpr static float kDefaultScanRatio = 0.1f;\n  constexpr static uint32_t kDefaultMinScanLimit = 10000;\n  constexpr static uint32_t kDefaultMaxScanLimit =\n      std::numeric_limits<uint32_t>::max();\n  constexpr static float kDefaultBFNegativeProbability = 0.001f;\n  constexpr static uint32_t kDefaultScalingFactor = 50U;\n  constexpr static uint32_t kDefaultBruteForceThreshold = 1000U;\n  constexpr static uint32_t kDefaultDocsHardLimit = 1 << 30U;  // 1 billion\n  constexpr static float kDefaultDocsSoftLimitRatio = 0.9f;\n  constexpr static size_t kMaxChunkSize = 0xFFFFFFFF;\n  constexpr static size_t kDefaultChunkSize = 2UL * 1024UL * 1024UL;\n  constexpr static size_t kDefaultMaxChunkCnt = 50000UL;\n  constexpr static float kDefaultNeighborPruneMultiplier =\n      1.0f;  // prune_cnt = upper_max_neighbor_cnt * multiplier\n  constexpr static float kDefaultL0MaxNeighborCntMultiplier =\n      2.0f;  // l0_max_neighbor_cnt = upper_max_neighbor_cnt * multiplier\n\n protected:\n  HNSWHeader header_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_index_hash.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"hnsw_rabitq_chunk.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! Persistent hashmap implement through open addressing algorithm\ntemplate <class Key, class Val, Val EmptyVal = 0U,\n          typename =\n              typename std::enable_if<std::is_integral<Key>::value>::type>\nclass HnswIndexHashMap {\n  using key_type = Key;\n  using val_type = Val;\n\n  struct Iterator {\n    key_type first;\n    val_type second;\n  };\n  typedef Iterator *iterator;\n  typedef Iterator Item;\n  typedef const Iterator *const_iterator;\n\n  class Slot {\n   public:\n    Slot(Chunk::Pointer &&chunk, const void *data)\n        : chunk_(std::move(chunk)),\n          items_(reinterpret_cast<const Item *>(data)) {}\n    //! Return a empty loc or the key item loc\n\n    Slot(Chunk::Pointer &&chunk, IndexStorage::MemoryBlock &&mem_block)\n        : chunk_(std::move(chunk)), items_block_(std::move(mem_block)) {\n      items_ = reinterpret_cast<const Item *>(items_block_.data());\n    }\n    const_iterator find(key_type key, uint32_t max_items, uint32_t mask) const {\n      auto it = &items_[key & mask];\n      for (auto i = 0U; i < max_items; ++i) {\n        if (it->first == key || it->second == EmptyVal) {\n          // LOG_DEBUG(\"i=%u\", i);\n          return it;\n        }\n        ++it;\n        if (it == &items_[max_items]) {\n          it = &items_[0];\n        }\n      }\n      return nullptr;\n    }\n\n    bool update(const_iterator it) {\n      uint32_t offset = reinterpret_cast<const uint8_t *>(it) -\n                        reinterpret_cast<const uint8_t *>(&items_[0]);\n      if (ailego_unlikely(chunk_->write(offset, it, sizeof(Item)) !=\n                          sizeof(Item))) {\n        LOG_ERROR(\"Chunk write failed\");\n        return false;\n      }\n      return true;\n    }\n\n   private:\n    Chunk::Pointer chunk_{};\n    const Item *items_{nullptr};  // point to chunk data\n    IndexStorage::MemoryBlock items_block_{};\n  };\n\n public:\n  //! Init the hash\n  //! broker      the index allocator\n  //! chunk_size  the size of per chunk allocated, actual size may greater\n  //! factor      factor = 1/ratio, ratio is the probability of a squence\n  //! number inserted to this container\n  //! max         the max number key can be inserted\n  //! expansion_ratio   memory expansion ratio\n  int init(HnswRabitqChunkBroker::Pointer &broker, uint32_t chunk_size,\n           uint32_t factor, size_t max, float expansion_ratio) {\n    ailego_assert_with(expansion_ratio > 1.0f, \"ratio must > 1.0f\");\n    broker_ = broker;\n\n    size_t items = std::ceil(chunk_size * 1.0f / sizeof(Item));\n    slot_items_ = 1UL << static_cast<size_t>((std::ceil(std::log2(items))));\n    size_t range = slot_items_ * factor / expansion_ratio;\n    mask_bits_ = std::floor(std::log2(range));\n    range = 1UL << mask_bits_;\n    size_t max_slots = std::ceil(max * 1.0f / range);\n    slots_.reserve(max_slots);\n    slot_loc_mask_ = slot_items_ - 1U;\n    int ret = load();\n    if (ret != 0) {\n      return ret;\n    }\n\n    LOG_DEBUG(\n        \"HnswRabitqIndexHash init, chunkSize=%u factor=%u max=%zu \"\n        \"ratio=%f slotItems=%u maxSlots=%zu maskBits=%u \"\n        \"range=%zu\",\n        chunk_size, factor, max, expansion_ratio, slot_items_, max_slots,\n        mask_bits_, range);\n\n    return 0;\n  }\n\n  int cleanup(void) {\n    broker_.reset();\n    slots_.clear();\n    slots_.shrink_to_fit();\n    mask_bits_ = 0U;\n    slot_items_ = 0U;\n    slot_loc_mask_ = 0U;\n\n    return 0;\n  }\n\n  const_iterator end(void) const {\n    return nullptr;\n  }\n\n  const_iterator find(const key_type key) const {\n    auto idx = key >> mask_bits_;\n    if (idx >= slots_.size()) {\n      return end();\n    }\n    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);\n    return it && it->second != EmptyVal ? it : nullptr;\n  }\n\n  bool insert(key_type key, val_type val) {\n    auto idx = key >> mask_bits_;\n    if (idx >= slots_.size()) {\n      if (ailego_unlikely(idx >= slots_.capacity())) {\n        LOG_ERROR(\"no space to insert\");\n        return false;\n      }\n      for (auto i = slots_.size(); i <= idx; ++i) {\n        if (ailego_unlikely(!alloc_slot(i))) {\n          return false;\n        }\n      }\n    }\n    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);\n    if (ailego_unlikely(it == nullptr)) {\n      LOG_ERROR(\"no space to insert\");\n      return false;\n    }\n\n    //! TODO: write memory is ok?\n    const_cast<iterator>(it)->first = key;\n    const_cast<iterator>(it)->second = val;\n\n    return slots_[idx].update(it);\n  }\n\n private:\n  bool alloc_slot(size_t idx) {\n    ailego_assert_with(idx == slots_.size(), \"invalid idx\");\n\n    size_t size = slot_items_ * sizeof(Item);\n    auto p = broker_->alloc_chunk(\n        HnswRabitqChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, idx, size);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return false;\n    }\n    Chunk::Pointer chunk = p.second;\n    if (ailego_unlikely(chunk->resize(size) != size)) {\n      LOG_ERROR(\"Chunk resize failed, size=%zu\", size);\n      return false;\n    }\n    //! Read the whole data to memory\n    IndexStorage::MemoryBlock data_block;\n    if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {\n      LOG_ERROR(\"Chunk read failed, size=%zu\", size);\n      return false;\n    }\n\n    slots_.emplace_back(std::move(chunk), std::move(data_block));\n    return true;\n  }\n\n  int load(void) {\n    size_t slots_cnt = broker_->get_chunk_cnt(\n        HnswRabitqChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX);\n    for (size_t i = 0UL; i < slots_cnt; ++i) {\n      auto chunk = broker_->get_chunk(\n          HnswRabitqChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, i);\n      if (!chunk) {\n        LOG_ERROR(\"Get chunk failed, seq=%zu\", i);\n        return IndexError_InvalidFormat;\n      }\n      size_t size = sizeof(Item) * slot_items_;\n      if (chunk->data_size() < size) {\n        LOG_ERROR(\n            \"Hash params may be mismatch, seq=%zu, data_size=%zu \"\n            \"expect=%zu\",\n            i, chunk->data_size(), size);\n        return IndexError_InvalidFormat;\n      }\n      //! Read the whole data to memory\n      IndexStorage::MemoryBlock data_block;\n      if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {\n        LOG_ERROR(\"Chunk read failed, size=%zu\", size);\n        return false;\n      }\n      slots_.emplace_back(std::move(chunk), std::move(data_block));\n    }\n    return 0;\n  }\n\n private:\n  HnswRabitqChunkBroker::Pointer broker_{};  // chunk broker\n  std::vector<Slot> slots_{};\n  uint32_t mask_bits_{0U};\n  uint32_t slot_items_{};  // must be a power of 2\n  uint32_t slot_loc_mask_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_index_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_searcher.h\"\n#include \"zvec/core/framework/index_streamer.h\"\n#include \"hnsw_rabitq_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqIndexProvider : public IndexProvider {\n public:\n  HnswRabitqIndexProvider(const IndexMeta &meta,\n                          const HnswRabitqEntity::Pointer &entity,\n                          const std::string &owner)\n      : meta_(meta), entity_(entity), owner_class_(owner) {}\n\n  HnswRabitqIndexProvider(const HnswRabitqIndexProvider &) = delete;\n  HnswRabitqIndexProvider &operator=(const HnswRabitqIndexProvider &) = delete;\n\n public:  // holder interface\n  //! Create a new iterator\n  IndexProvider::Iterator::Pointer create_iterator() override {\n    return HnswRabitqIndexProvider::Iterator::Pointer(new (std::nothrow)\n                                                          Iterator(entity_));\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const override {\n    return entity_->doc_cnt();\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const override {\n    return meta_.dimension();\n  }\n\n  //! Retrieve type of vector\n  IndexMeta::DataType data_type(void) const override {\n    return meta_.data_type();\n  }\n\n  //! Retrieve vector size in bytes\n  size_t element_size(void) const override {\n    return meta_.element_size();\n  }\n\n public:  // provider's unique interface\n  //! Retrieve a vector using a primary key\n  const void *get_vector(uint64_t key) const override {\n    return entity_->get_vector_by_key(key);\n  }\n\n  int get_vector(const uint64_t key,\n                 IndexStorage::MemoryBlock &block) const override {\n    return entity_->get_vector_by_key(key, block);\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_class_;\n  }\n\n private:\n  class Iterator : public IndexProvider::Iterator {\n   public:\n    Iterator(const HnswRabitqEntity::Pointer &entity)\n        : entity_(entity), cur_id_(0U) {}\n\n    //! Retrieve pointer of data\n    //! NOTICE: the vec feature will be changed after iterating to next, so\n    //! the caller need to keep a copy of it before iterator to next vector\n    virtual const void *data(void) const override {\n      return entity_->get_vector(cur_id_);\n    }\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return cur_id_ < entity_->doc_cnt();\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return entity_->get_key(cur_id_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      // cur_id_ += 1;\n      cur_id_ = get_next_valid_id(cur_id_ + 1);\n    }\n\n    //! Reset the iterator\n    void reset(void) {\n      cur_id_ = get_next_valid_id(0);\n    }\n\n   private:\n    node_id_t get_next_valid_id(node_id_t start_id) {\n      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {\n        if (entity_->get_key(i) != kInvalidNodeId) {\n          cur_id_ = i;\n          return i;\n        }\n      }\n      return kInvalidNodeId;\n    }\n\n   private:\n    const HnswRabitqEntity::Pointer entity_;\n    node_id_t cur_id_;\n  };\n\n private:\n  const IndexMeta &meta_;\n  const HnswRabitqEntity::Pointer entity_;\n  const std::string owner_class_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\ninline const std::string PARAM_HNSW_RABITQ_GENERAL_DIMENSION(\n    \"proxima.hnsw_rabitq.general.dimension\");\n\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_THREAD_COUNT(\n    \"proxima.hnsw_rabitq.builder.thread_count\");\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_MEMORY_QUOTA(\n    \"proxima.hnsw_rabitq.builder.memory_quota\");\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_EFCONSTRUCTION(\n    \"proxima.hnsw_rabitq.builder.efconstruction\");\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_SCALING_FACTOR(\n    \"proxima.hnsw_rabitq.builder.scaling_factor\");\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_CHECK_INTERVAL_SECS(\n    \"proxima.hnsw_rabitq.builder.check_interval_secs\");\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER(\n    \"proxima.hnsw_rabitq.builder.neighbor_prune_multiplier\");\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_MIN_NEIGHBOR_COUNT(\n    \"proxima.hnsw_rabitq.builder.min_neighbor_count\");\ninline const std::string PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT(\n    \"proxima.hnsw_rabitq.builder.max_neighbor_count\");\ninline const std::string\n    PARAM_HNSW_RABITQ_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(\n        \"proxima.hnsw_rabitq.builder.l0_max_neighbor_count_multiplier\");\n\ninline const std::string PARAM_HNSW_RABITQ_SEARCHER_EF(\n    \"proxima.hnsw_rabitq.searcher.ef\");\ninline const std::string PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD(\n    \"proxima.hnsw_rabitq.searcher.brute_force_threshold\");\ninline const std::string PARAM_HNSW_RABITQ_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE(\n    \"proxima.hnsw_rabitq.searcher.neighbors_in_memory_enable\");\ninline const std::string PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO(\n    \"proxima.hnsw_rabitq.searcher.max_scan_ratio\");\ninline const std::string PARAM_HNSW_RABITQ_SEARCHER_CHECK_CRC_ENABLE(\n    \"proxima.hnsw_rabitq.searcher.check_crc_enable\");\ninline const std::string PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_ENABLE(\n    \"proxima.hnsw_rabitq.searcher.visit_bloomfilter_enable\");\ninline const std::string\n    PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB(\n        \"proxima.hnsw_rabitq.searcher.visit_bloomfilter_negative_prob\");\ninline const std::string PARAM_HNSW_RABITQ_SEARCHER_FORCE_PADDING_RESULT_ENABLE(\n    \"proxima.hnsw_rabitq.searcher.force_padding_result_enable\");\n\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO(\n    \"proxima.hnsw_rabitq.streamer.max_scan_ratio\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT(\n    \"proxima.hnsw_rabitq.streamer.min_scan_limit\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT(\n    \"proxima.hnsw_rabitq.streamer.max_scan_limit\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_EF(\n    \"proxima.hnsw_rabitq.streamer.ef\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_EFCONSTRUCTION(\n    \"proxima.hnsw_rabitq.streamer.efconstruction\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT(\n    \"proxima.hnsw_rabitq.streamer.max_neighbor_count\");\ninline const std::string\n    PARAM_HNSW_RABITQ_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(\n        \"proxima.hnsw_rabitq.streamer.l0_max_neighbor_count_multiplier\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_SCALING_FACTOR(\n    \"proxima.hnsw_rabitq.streamer.scaling_factor\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD(\n    \"proxima.hnsw_rabitq.streamer.brute_force_threshold\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT(\n    \"proxima.hnsw_rabitq.streamer.docs_hard_limit\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT(\n    \"proxima.hnsw_rabitq.streamer.docs_soft_limit\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_INDEX_SIZE(\n    \"proxima.hnsw_rabitq.streamer.max_index_size\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_ENABLE(\n    \"proxima.hnsw_rabitq.streamer.visit_bloomfilter_enable\");\ninline const std::string\n    PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB(\n        \"proxima.hnsw_rabitq.streamer.visit_bloomfilter_negative_prob\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_CHECK_CRC_ENABLE(\n    \"proxima.hnsw_rabitq.streamer.check_crc_enable\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER(\n    \"proxima.hnsw_rabitq.streamer.neighbor_prune_multiplier\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_CHUNK_SIZE(\n    \"proxima.hnsw_rabitq.streamer.chunk_size\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_FILTER_SAME_KEY(\n    \"proxima.hnsw_rabitq.streamer.filter_same_key\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_GET_VECTOR_ENABLE(\n    \"proxima.hnsw_rabitq.streamer.get_vector_enable\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_MIN_NEIGHBOR_COUNT(\n    \"proxima.hnsw_rabitq.streamer.min_neighbor_count\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_FORCE_PADDING_RESULT_ENABLE(\n    \"proxima.hnsw_rabitq.streamer.force_padding_result_enable\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_ESTIMATE_DOC_COUNT(\n    \"proxima.hnsw_rabitq.streamer.estimate_doc_count\");\ninline const std::string PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP(\n    \"proxima.hnsw_rabitq.streamer.use_id_map\");\n\ninline const std::string PARAM_HNSW_RABITQ_REDUCER_WORKING_PATH(\n    \"proxima.hnsw_rabitq.reducer.working_path\");\ninline const std::string PARAM_HNSW_RABITQ_REDUCER_NUM_OF_ADD_THREADS(\n    \"proxima.hnsw_rabitq.reducer.num_of_add_threads\");\ninline const std::string PARAM_HNSW_RABITQ_REDUCER_INDEX_NAME(\n    \"proxima.hnsw_rabitq.reducer.index_name\");\ninline const std::string PARAM_HNSW_RABITQ_REDUCER_EFCONSTRUCTION(\n    \"proxima.hnsw_rabitq.reducer.efconstruction\");\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_query_algorithm.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_query_algorithm.h\"\n#include <chrono>\n#include <iostream>\n#include <ailego/internal/cpu_features.h>\n#include <rabitqlib/index/estimator.hpp>\n#include \"zvec/ailego/internal/platform.h\"\n#include \"hnsw_rabitq_entity.h\"\n#include \"hnsw_rabitq_query_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqQueryAlgorithm::HnswRabitqQueryAlgorithm(HnswRabitqEntity &entity,\n                                                   size_t num_clusters,\n                                                   RabitqMetricType metric_type)\n    : entity_(entity),\n      mt_(std::chrono::system_clock::now().time_since_epoch().count()),\n      lock_pool_(kLockCnt),\n      num_clusters_(num_clusters),\n      metric_type_(metric_type) {\n  ex_bits_ = entity_.ex_bits();\n  padded_dim_ = entity_.padded_dim();\n  ip_func_ = rabitqlib::select_excode_ipfunc(ex_bits_);\n  LOG_INFO(\n      \"Create query algorithm. num_clusters=%zu ex_bits=%zu padded_dim=%zu\",\n      num_clusters_, ex_bits_, padded_dim_);\n}\n\nint HnswRabitqQueryAlgorithm::cleanup() {\n  return 0;\n}\n\nint HnswRabitqQueryAlgorithm::search(HnswRabitqQueryEntity *entity,\n                                     HnswRabitqContext *ctx) const {\n  spin_lock_.lock();\n  auto maxLevel = entity_.cur_max_level();\n  auto entry_point = entity_.entry_point();\n  spin_lock_.unlock();\n\n  if (ailego_unlikely(entry_point == kInvalidNodeId)) {\n    return 0;\n  }\n\n  EstimateRecord curest;\n  get_bin_est(entity_.get_vector(entry_point), curest, *entity);\n\n  for (level_t cur_level = maxLevel; cur_level >= 1; --cur_level) {\n    select_entry_point(cur_level, &entry_point, &curest, ctx, entity);\n  }\n\n  auto &topk_heap = ctx->topk_heap();\n  topk_heap.clear();\n  search_neighbors(0, &entry_point, &curest, topk_heap, ctx, entity);\n\n  if (ctx->group_by_search()) {\n    expand_neighbors_by_group(topk_heap, ctx, entity);\n  }\n\n  return 0;\n}\n\n\n//! select_entry_point on hnsw level, ef = 1\nvoid HnswRabitqQueryAlgorithm::select_entry_point(\n    level_t level, node_id_t *entry_point, EstimateRecord *curest,\n    HnswRabitqContext *ctx, HnswRabitqQueryEntity *query_entity) const {\n  auto &entity = ctx->get_entity();\n  while (true) {\n    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n    ailego_prefetch(neighbors.data);\n    uint32_t size = neighbors.size();\n    if (size == 0) {\n      break;\n    }\n\n    bool find_closer = false;\n    for (uint32_t i = 0; i < size; ++i) {\n      EstimateRecord candest;\n      get_bin_est(entity_.get_vector(neighbors[i]), candest, *query_entity);\n\n      if (candest.est_dist < curest->est_dist) {\n        *curest = candest;\n        *entry_point = neighbors[i];\n        find_closer = true;\n      }\n    }\n\n    if (!find_closer) {\n      break;\n    }\n  }\n\n  return;\n}\n\nvoid HnswRabitqQueryAlgorithm::search_neighbors(\n    level_t level, node_id_t *entry_point, EstimateRecord *dist, TopkHeap &topk,\n    HnswRabitqContext *ctx, HnswRabitqQueryEntity *query_entity) const {\n  const auto &entity = ctx->get_entity();\n  VisitFilter &visit = ctx->visit_filter();\n  CandidateHeap &candidates = ctx->candidates();\n  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };\n  if (ctx->filter().is_valid()) {\n    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };\n  }\n\n  candidates.clear();\n  visit.clear();\n  visit.set_visited(*entry_point);\n  if (!filter(*entry_point)) {\n    topk.emplace(*entry_point, ResultRecord(*dist));\n  }\n\n  candidates.emplace(*entry_point, ResultRecord(*dist));\n  while (!candidates.empty() && !ctx->reach_scan_limit()) {\n    auto top = candidates.begin();\n    node_id_t main_node = top->first;\n    auto main_dist = top->second;\n\n    if (topk.full() && main_dist.est_dist > topk[0].second.est_dist) {\n      break;\n    }\n\n    candidates.pop();\n    const Neighbors neighbors = entity.get_neighbors(level, main_node);\n    ailego_prefetch(neighbors.data);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n\n    std::vector<node_id_t> neighbor_ids(neighbors.size());\n    uint32_t size = 0;\n    for (uint32_t i = 0; i < neighbors.size(); ++i) {\n      node_id_t node = neighbors[i];\n      if (visit.visited(node)) {\n        if (ailego_unlikely(ctx->debugging())) {\n          (*ctx->mutable_stats_visit_dup_cnt())++;\n        }\n        continue;\n      }\n      visit.set_visited(node);\n      neighbor_ids[size++] = node;\n    }\n    if (size == 0) {\n      continue;\n    }\n\n    for (uint32_t i = 0; i < size; ++i) {\n      node_id_t node = neighbor_ids[i];\n      EstimateRecord candest;\n      auto *cand_vector = entity_.get_vector(node);\n      ailego_prefetch(cand_vector);\n      get_bin_est(cand_vector, candest, *query_entity);\n\n      if (ex_bits_ > 0) {\n        // Check preliminary score against current worst full estimate.\n        bool flag_update_KNNs =\n            (!topk.full()) || candest.low_dist < topk[0].second.est_dist;\n\n        if (flag_update_KNNs) {\n          // Compute the full estimate if promising.\n          get_full_est(cand_vector, candest, *query_entity);\n        } else {\n          continue;\n        }\n      } else {\n        // ex_bits_ == 0: est_dist is already the best estimate\n        if (topk.full() && candest.est_dist >= topk[0].second.est_dist) {\n          continue;\n        }\n      }\n      candidates.emplace(node, ResultRecord(candest));\n      // update entry_point for next level scan\n      if (candest < *dist) {\n        *entry_point = node;\n        *dist = candest;\n      }\n      if (!filter(node)) {\n        topk.emplace(node, ResultRecord(candest));\n      }\n    }  // end for\n  }  // while\n\n  return;\n}\n\nvoid HnswRabitqQueryAlgorithm::expand_neighbors_by_group(\n    TopkHeap &topk, HnswRabitqContext *ctx,\n    HnswRabitqQueryEntity *query_entity) const {\n  if (!ctx->group_by().is_valid()) {\n    return;\n  }\n\n  const auto &entity = ctx->get_entity();\n  std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n    return ctx->group_by()(entity.get_key(id));\n  };\n\n  // devide into groups\n  std::map<std::string, TopkHeap> &group_topk_heaps = ctx->group_topk_heaps();\n  for (uint32_t i = 0; i < topk.size(); ++i) {\n    node_id_t id = topk[i].first;\n    auto score = topk[i].second;\n\n    std::string group_id = group_by(id);\n\n    auto &topk_heap = group_topk_heaps[group_id];\n    if (topk_heap.empty()) {\n      topk_heap.limit(ctx->group_topk());\n    }\n    topk_heap.emplace_back(id, score);\n  }\n\n  // stage 2, expand to reach group num as possible\n  if (group_topk_heaps.size() < ctx->group_num()) {\n    VisitFilter &visit = ctx->visit_filter();\n    CandidateHeap &candidates = ctx->candidates();\n\n    std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };\n    if (ctx->filter().is_valid()) {\n      filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };\n    }\n\n    // refill to get enough groups\n    candidates.clear();\n    visit.clear();\n    for (uint32_t i = 0; i < topk.size(); ++i) {\n      node_id_t id = topk[i].first;\n      auto score = topk[i].second;\n\n      visit.set_visited(id);\n      candidates.emplace_back(id, score);\n    }\n\n    // do expand\n    while (!candidates.empty() && !ctx->reach_scan_limit()) {\n      auto top = candidates.begin();\n      node_id_t main_node = top->first;\n\n      candidates.pop();\n      const Neighbors neighbors = entity.get_neighbors(0, main_node);\n      ailego_prefetch(neighbors.data);\n      if (ailego_unlikely(ctx->debugging())) {\n        (*ctx->mutable_stats_get_neighbors())++;\n      }\n\n      std::vector<node_id_t> neighbor_ids(neighbors.size());\n      uint32_t size = 0;\n      for (uint32_t i = 0; i < neighbors.size(); ++i) {\n        node_id_t node = neighbors[i];\n        if (visit.visited(node)) {\n          if (ailego_unlikely(ctx->debugging())) {\n            (*ctx->mutable_stats_visit_dup_cnt())++;\n          }\n          continue;\n        }\n        visit.set_visited(node);\n        neighbor_ids[size++] = node;\n      }\n      if (size == 0) {\n        continue;\n      }\n\n      for (uint32_t i = 0; i < size; ++i) {\n        node_id_t node = neighbor_ids[i];\n        EstimateRecord candest;\n        auto *cand_vector = entity_.get_vector(node);\n        ailego_prefetch(cand_vector);\n        get_full_est(cand_vector, candest, *query_entity);\n\n        if (!filter(node)) {\n          std::string group_id = group_by(node);\n\n          auto &topk_heap = group_topk_heaps[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(node, ResultRecord(candest));\n\n          if (group_topk_heaps.size() >= ctx->group_num()) {\n            break;\n          }\n        }\n        candidates.emplace(node, ResultRecord(candest));\n      }  // end for\n    }  // end while\n  }  // end if\n}\n\nvoid HnswRabitqQueryAlgorithm::get_bin_est(\n    const void *vector, EstimateRecord &res,\n    HnswRabitqQueryEntity &entity) const {\n  const auto &q_to_centroids = entity.q_to_centroids;\n  auto &query_wrapper = *entity.query_wrapper;\n  uint32_t cluster_id = entity_.get_cluster_id(vector);\n  const char *bin_data = entity_.get_bin_data(vector);\n  if (metric_type_ == RabitqMetricType::kIP) {\n    float norm = q_to_centroids[cluster_id];\n    float error = q_to_centroids[cluster_id + num_clusters_];\n    rabitqlib::split_single_estdist(bin_data, query_wrapper, padded_dim_,\n                                    res.ip_x0_qr, res.est_dist, res.low_dist,\n                                    -norm, error);\n  } else {\n    // L2 distance\n    float norm = q_to_centroids[cluster_id];\n    rabitqlib::split_single_estdist(bin_data, query_wrapper, padded_dim_,\n                                    res.ip_x0_qr, res.est_dist, res.low_dist,\n                                    norm * norm, norm);\n  }\n}\n\nvoid HnswRabitqQueryAlgorithm::get_full_est(\n    const void *vector, EstimateRecord &res,\n    HnswRabitqQueryEntity &entity) const {\n  const auto &q_to_centroids = entity.q_to_centroids;\n  auto &query_wrapper = *entity.query_wrapper;\n  uint32_t cluster_id = entity_.get_cluster_id(vector);\n  const char *bin_data = entity_.get_bin_data(vector);\n  const char *ex_data = entity_.get_ex_data(vector);\n\n  if (metric_type_ == RabitqMetricType::kIP) {\n    float norm = q_to_centroids[cluster_id];\n    float error = q_to_centroids[cluster_id + num_clusters_];\n    rabitqlib::split_single_fulldist(bin_data, ex_data, ip_func_, query_wrapper,\n                                     padded_dim_, ex_bits_, res.est_dist,\n                                     res.low_dist, res.ip_x0_qr, -norm, error);\n  } else {\n    // L2 distance\n    float norm = q_to_centroids[cluster_id];\n    rabitqlib::split_single_fulldist(\n        bin_data, ex_data, ip_func_, query_wrapper, padded_dim_, ex_bits_,\n        res.est_dist, res.low_dist, res.ip_x0_qr, norm * norm, norm);\n  }\n}\n\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_query_algorithm.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <ailego/parallel/lock.h>\n#include \"hnsw_rabitq_context.h\"\n#include \"hnsw_rabitq_dist_calculator.h\"\n#include \"hnsw_rabitq_entity.h\"\n#include \"rabitq_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqQueryEntity;\n\n//! hnsw graph algorithm implement\nclass HnswRabitqQueryAlgorithm {\n public:\n  typedef std::unique_ptr<HnswRabitqQueryAlgorithm> UPointer;\n\n public:\n  //! Constructor\n  explicit HnswRabitqQueryAlgorithm(HnswRabitqEntity &entity,\n                                    size_t num_clusters,\n                                    RabitqMetricType metric_type);\n\n  //! Destructor\n  ~HnswRabitqQueryAlgorithm() = default;\n\n  //! Cleanup HnswRabitqQueryAlgorithm\n  int cleanup();\n\n  //! do knn search in graph\n  //! return 0 on success, or errCode in failure. results saved in ctx\n  int search(HnswRabitqQueryEntity *entity, HnswRabitqContext *ctx) const;\n\n  //! Initiate HnswRabitqQueryAlgorithm\n  int init() {\n    level_probas_.clear();\n    double level_mult =\n        1 / std::log(static_cast<double>(entity_.scaling_factor()));\n    for (int level = 0;; level++) {\n      // refers faiss get_random_level alg\n      double proba =\n          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));\n      if (proba < 1e-9) {\n        break;\n      }\n      level_probas_.push_back(proba);\n    }\n\n    return 0;\n  }\n\n  //! Generate a random level\n  //! return graph level\n  uint32_t get_random_level() const {\n    // gen rand float (0, 1)\n    double f = mt_() / static_cast<float>(mt_.max());\n    for (size_t level = 0; level < level_probas_.size(); level++) {\n      if (f < level_probas_[level]) {\n        return level;\n      }\n      f -= level_probas_[level];\n    }\n    return level_probas_.size() - 1;\n  }\n  void get_full_est(node_id_t id, EstimateRecord &res,\n                    HnswRabitqQueryEntity &entity) const {\n    return get_full_est(entity_.get_vector(id), res, entity);\n  }\n\n private:\n  //! Select in upper layer to get entry point for next layer search\n  void select_entry_point(level_t level, node_id_t *entry_point,\n                          EstimateRecord *dist, HnswRabitqContext *ctx,\n                          HnswRabitqQueryEntity *entity) const;\n\n\n  //! Given a node id and level, search the nearest neighbors in graph\n  //! Note: the nearest neighbors result keeps in topk, and entry_point and\n  //! dist will be updated to current level nearest node id and distance\n  void search_neighbors(level_t level, node_id_t *entry_point,\n                        EstimateRecord *dist, TopkHeap &topk,\n                        HnswRabitqContext *ctx,\n                        HnswRabitqQueryEntity *entity) const;\n\n\n  //! expand neighbors until group nums are reached\n  void expand_neighbors_by_group(TopkHeap &topk, HnswRabitqContext *ctx,\n                                 HnswRabitqQueryEntity *query_entity) const;\n\n  void get_full_est(const void *vector, EstimateRecord &res,\n                    HnswRabitqQueryEntity &entity) const;\n  void get_bin_est(const void *vector, EstimateRecord &res,\n                   HnswRabitqQueryEntity &entity) const;\n\n private:\n  HnswRabitqQueryAlgorithm(const HnswRabitqQueryAlgorithm &) = delete;\n  HnswRabitqQueryAlgorithm &operator=(const HnswRabitqQueryAlgorithm &) =\n      delete;\n\n\n private:\n  static constexpr uint32_t kLockCnt{1U << 8};\n  static constexpr uint32_t kLockMask{kLockCnt - 1U};\n\n  HnswRabitqEntity &entity_;\n  mutable std::mt19937 mt_{};\n  std::vector<double> level_probas_{};\n\n  mutable ailego::SpinMutex spin_lock_{};  // global spin lock\n  std::mutex mutex_{};                     // global mutex\n  // TODO: spin lock?\n  std::vector<std::mutex> lock_pool_{};\n  size_t num_clusters_{0};\n  RabitqMetricType metric_type_{RabitqMetricType::kL2};\n  size_t padded_dim_{0};\n  size_t ex_bits_{0};\n  float (*ip_func_)(const float *, const uint8_t *, size_t);\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_query_entity.h",
    "content": "// Copyright 2025-present the centaurdb project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <memory>\n#include <vector>\n#include <rabitqlib/index/query.hpp>\nnamespace zvec::core {\n\nstruct HnswRabitqQueryEntity {\n  std::vector<float> rotated_query;\n  std::vector<float> q_to_centroids;\n  std::unique_ptr<rabitqlib::SplitSingleQuery<float>> query_wrapper;\n};\n\n}  // namespace zvec::core"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_register.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n#include \"hnsw_rabitq_builder.h\"\n#include \"hnsw_rabitq_searcher.h\"\n#include \"hnsw_rabitq_streamer.h\"\n#include \"rabitq_converter.h\"\n#include \"rabitq_reformer.h\"\n\nnamespace zvec::core {\n\nINDEX_FACTORY_REGISTER_STREAMER(HnswRabitqStreamer);\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(RabitqReformer, RabitqReformer);\nINDEX_FACTORY_REGISTER_SEARCHER(HnswRabitqSearcher);\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(RabitqConverter, RabitqConverter);\nINDEX_FACTORY_REGISTER_BUILDER(HnswRabitqBuilder);\n\n}  // namespace zvec::core"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_searcher.h\"\n#include <rabitqlib/utils/rotator.hpp>\n#include \"hnsw_rabitq_algorithm.h\"\n#include \"hnsw_rabitq_entity.h\"\n#include \"hnsw_rabitq_index_provider.h\"\n#include \"hnsw_rabitq_params.h\"\n#include \"hnsw_rabitq_query_entity.h\"\n#include \"hnsw_rabitq_searcher_entity.h\"\n#include \"rabitq_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqSearcher::HnswRabitqSearcher() {}\n\nHnswRabitqSearcher::~HnswRabitqSearcher() {}\n\nint HnswRabitqSearcher::init(const ailego::Params &search_params) {\n  params_ = search_params;\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_EF, &ef_);\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_ENABLE,\n              &bf_enabled_);\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_CHECK_CRC_ENABLE, &check_crc_enabled_);\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE,\n              &neighbors_in_memory_enabled_);\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB,\n              &bf_negative_probability_);\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD,\n              &bruteforce_threshold_);\n  params_.get(PARAM_HNSW_RABITQ_SEARCHER_FORCE_PADDING_RESULT_ENABLE,\n              &force_padding_topk_enabled_);\n\n  if (ef_ == 0) {\n    ef_ = HnswRabitqEntity::kDefaultEf;\n  }\n  if (bf_negative_probability_ <= 0.0f || bf_negative_probability_ >= 1.0f) {\n    LOG_ERROR(\n        \"[%s] must be in range (0,1)\",\n        PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  entity_.set_neighbors_in_memory(neighbors_in_memory_enabled_);\n\n  ailego::Params reformer_params;\n  reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());\n  int ret = reformer_.init(reformer_params);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to initialize RabitqReformer: %d\", ret);\n    return ret;\n  }\n\n  state_ = STATE_INITED;\n\n  LOG_DEBUG(\n      \"Init params: ef=%u maxScanRatio=%f bfEnabled=%u checkCrcEnabled=%u \"\n      \"neighborsInMemoryEnabled=%u bfNagtiveProb=%f bruteForceThreshold=%u \"\n      \"forcePadding=%u\",\n      ef_, max_scan_ratio_, bf_enabled_, check_crc_enabled_,\n      neighbors_in_memory_enabled_, bf_negative_probability_,\n      bruteforce_threshold_, force_padding_topk_enabled_);\n\n  return 0;\n}\n\nvoid HnswRabitqSearcher::print_debug_info() {\n  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n    Neighbors neighbours = entity_.get_neighbors(0, id);\n    std::cout << \"node: \" << id << \"; \";\n    for (uint32_t i = 0; i < neighbours.size(); ++i) {\n      std::cout << neighbours[i];\n\n      if (i == neighbours.size() - 1) {\n        std::cout << std::endl;\n      } else {\n        std::cout << \", \";\n      }\n    }\n  }\n}\n\nint HnswRabitqSearcher::cleanup() {\n  LOG_INFO(\"Begin HnswRabitqSearcher:cleanup\");\n\n  metric_.reset();\n  meta_.clear();\n  stats_.clear_attributes();\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  max_scan_ratio_ = HnswRabitqEntity::kDefaultScanRatio;\n  max_scan_num_ = 0U;\n  ef_ = HnswRabitqEntity::kDefaultEf;\n  bf_enabled_ = false;\n  bf_negative_probability_ = HnswRabitqEntity::kDefaultBFNegativeProbability;\n  bruteforce_threshold_ = HnswRabitqEntity::kDefaultBruteForceThreshold;\n  check_crc_enabled_ = false;\n  neighbors_in_memory_enabled_ = false;\n  entity_.cleanup();\n  state_ = STATE_INIT;\n\n  LOG_INFO(\"End HnswRabitqSearcher:cleanup\");\n\n  return 0;\n}\n\nint HnswRabitqSearcher::load(IndexStorage::Pointer container,\n                             IndexMetric::Pointer metric) {\n  if (state_ != STATE_INITED) {\n    LOG_ERROR(\"Init the searcher first before load index\");\n    return IndexError_Runtime;\n  }\n\n  LOG_INFO(\"Begin HnswRabitqSearcher:load\");\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from container\");\n    return ret;\n  }\n\n  ret = reformer_.load(container);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load reformer from container: %d\", ret);\n    return ret;\n  }\n\n  ret = entity_.load(container, check_crc_enabled_);\n  if (ret != 0) {\n    LOG_ERROR(\"HnswRabitqSearcher load index failed\");\n    return ret;\n  }\n\n  alg_ = HnswRabitqQueryAlgorithm::UPointer(new HnswRabitqQueryAlgorithm(\n      entity_, reformer_.num_clusters(), reformer_.rabitq_metric_type()));\n\n  if (metric) {\n    metric_ = metric;\n  } else {\n    metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n    if (!metric_) {\n      LOG_ERROR(\"CreateMetric failed, name: %s\", meta_.metric_name().c_str());\n      return IndexError_NoExist;\n    }\n    ret = metric_->init(meta_, meta_.metric_params());\n    if (ret != 0) {\n      LOG_ERROR(\"IndexMetric init failed, ret=%d\", ret);\n      return ret;\n    }\n    if (metric_->query_metric()) {\n      metric_ = metric_->query_metric();\n    }\n  }\n\n  if (!metric_->is_matched(meta_)) {\n    LOG_ERROR(\"IndexMetric not match index meta\");\n    return IndexError_Mismatch;\n  }\n\n  max_scan_num_ = static_cast<uint32_t>(max_scan_ratio_ * entity_.doc_cnt());\n  max_scan_num_ = std::max(4096U, max_scan_num_);\n\n  stats_.set_loaded_count(entity_.doc_cnt());\n  stats_.set_loaded_costtime(ailego::Monotime::MilliSeconds() - start_time);\n  state_ = STATE_LOADED;\n  magic_ = IndexContext::GenerateMagic();\n\n  LOG_INFO(\"End HnswRabitqSearcher::load\");\n\n  return 0;\n}\n\nint HnswRabitqSearcher::unload() {\n  LOG_INFO(\"HnswRabitqSearcher unload index\");\n\n  meta_.clear();\n  entity_.cleanup();\n  metric_.reset();\n  max_scan_num_ = 0;\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswRabitqSearcher::update_context(HnswRabitqContext *ctx) const {\n  const HnswRabitqEntity::Pointer entity = entity_.clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone search context entity\");\n    return IndexError_Runtime;\n  }\n  ctx->set_max_scan_num(max_scan_num_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n\n  return ctx->update_context(HnswRabitqContext::kSearcherContext, meta_,\n                             metric_, entity, magic_);\n}\n\nint HnswRabitqSearcher::search_impl(const void *query,\n                                    const IndexQueryMeta &qmeta, uint32_t count,\n                                    Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n\n  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {\n    return search_bf_impl(query, qmeta, count, context);\n  }\n\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n  for (size_t q = 0; q < count; ++q) {\n    HnswRabitqQueryEntity entity;\n    int ret = reformer_.transform_to_entity(query, &entity);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher transform failed\");\n      return ret;\n    }\n    ctx->reset_query(query);\n    ret = alg_->search(&entity, ctx);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher fast search failed\");\n      return ret;\n    }\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswRabitqSearcher::search_bf_impl(const void *query,\n                                       const IndexQueryMeta &qmeta,\n                                       uint32_t count,\n                                       Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      int ret = reformer_.transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw searcher transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          EstimateRecord dist;\n          alg_->get_full_est(id, dist, entity);\n\n          std::string group_id = group_by(id);\n\n          auto &topk_heap = ctx->group_topk_heaps()[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      int ret = reformer_.transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw searcher transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->topk_heap().clear();\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          EstimateRecord dist;\n          alg_->get_full_est(id, dist, entity);\n          ctx->topk_heap().emplace(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswRabitqSearcher::search_bf_by_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n\n  if (ailego_unlikely(p_keys.size() != count)) {\n    LOG_ERROR(\"The size of p_keys is not equal to count\");\n    return IndexError_InvalidArgument;\n  }\n\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      int ret = reformer_.transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw searcher transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            EstimateRecord dist;\n            alg_->get_full_est(id, dist, entity);\n            std::string group_id = group_by(id);\n\n            auto &topk_heap = ctx->group_topk_heaps()[group_id];\n            if (topk_heap.empty()) {\n              topk_heap.limit(ctx->group_topk());\n            }\n            topk_heap.emplace_back(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      int ret = reformer_.transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw searcher transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->topk_heap().clear();\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            EstimateRecord dist;\n            alg_->get_full_est(id, dist, entity);\n            ctx->topk_heap().emplace(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nIndexSearcher::Context::Pointer HnswRabitqSearcher::create_context() const {\n  if (ailego_unlikely(state_ != STATE_LOADED)) {\n    LOG_ERROR(\"Load the index first before create context\");\n    return Context::Pointer();\n  }\n  const HnswRabitqEntity::Pointer search_ctx_entity = entity_.clone();\n  if (!search_ctx_entity) {\n    LOG_ERROR(\"Failed to create search context entity\");\n    return Context::Pointer();\n  }\n  HnswRabitqContext *ctx = new (std::nothrow)\n      HnswRabitqContext(meta_.dimension(), metric_, search_ctx_entity);\n  if (ailego_unlikely(ctx == nullptr)) {\n    LOG_ERROR(\"Failed to new HnswRabitqContext\");\n    return Context::Pointer();\n  }\n  ctx->set_ef(ef_);\n  ctx->set_max_scan_num(max_scan_num_);\n  uint32_t filter_mode =\n      bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;\n  ctx->set_filter_mode(filter_mode);\n  ctx->set_filter_negative_probability(bf_negative_probability_);\n  ctx->set_magic(magic_);\n  ctx->set_force_padding_topk(force_padding_topk_enabled_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n  if (ailego_unlikely(ctx->init(HnswRabitqContext::kSearcherContext)) != 0) {\n    LOG_ERROR(\"Init HnswRabitqContext failed\");\n    delete ctx;\n    return Context::Pointer();\n  }\n\n  return Context::Pointer(ctx);\n}\n\nIndexProvider::Pointer HnswRabitqSearcher::create_provider(void) const {\n  LOG_DEBUG(\"HnswRabitqSearcher create provider\");\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone HnswRabitqEntity failed\");\n    return Provider::Pointer();\n  }\n  return Provider::Pointer(new (std::nothrow) HnswRabitqIndexProvider(\n      meta_, entity, \"HnswRabitqSearcher\"));\n}\n\nconst void *HnswRabitqSearcher::get_vector(uint64_t key) const {\n  return entity_.get_vector_by_key(key);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"zvec/core/framework/index_framework.h\"\n#include \"hnsw_rabitq_query_algorithm.h\"\n#include \"hnsw_rabitq_searcher_entity.h\"\n#include \"rabitq_reformer.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqSearcher : public IndexSearcher {\n public:\n  using ContextPointer = IndexSearcher::Context::Pointer;\n\n public:\n  HnswRabitqSearcher(void);\n  ~HnswRabitqSearcher(void);\n\n  HnswRabitqSearcher(const HnswRabitqSearcher &) = delete;\n  HnswRabitqSearcher &operator=(const HnswRabitqSearcher &) = delete;\n\n protected:\n  //! Initialize Searcher\n  virtual int init(const ailego::Params &params) override;\n\n  //! Cleanup Searcher\n  virtual int cleanup(void) override;\n\n  //! Load Index from storage\n  virtual int load(IndexStorage::Pointer container,\n                   IndexMetric::Pointer metric) override;\n\n  //! Unload index from storage\n  virtual int unload(void) override;\n\n  //! KNN Search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          ContextPointer &context) const override {\n    return search_impl(query, qmeta, 1, context);\n  }\n\n  //! KNN Search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          uint32_t count,\n                          ContextPointer &context) const override;\n\n  //! Linear Search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             ContextPointer &context) const override {\n    return search_bf_impl(query, qmeta, 1, context);\n  }\n\n  //! Linear Search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count,\n                             ContextPointer &context) const override;\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, ContextPointer &context) const override {\n    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, uint32_t count,\n      ContextPointer &context) const override;\n\n  //! Fetch vector by key\n  virtual const void *get_vector(uint64_t key) const override;\n\n  //! Create a searcher context\n  virtual ContextPointer create_context() const override;\n\n  //! Create a new iterator\n  virtual IndexProvider::Pointer create_provider(void) const override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  //! Retrieve params of index\n  virtual const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  virtual void print_debug_info() override;\n\n private:\n  //! To share ctx across streamer/searcher, we need to update the context for\n  //! current streamer/searcher\n  int update_context(HnswRabitqContext *ctx) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };\n\n  HnswRabitqSearcherEntity entity_{};\n  HnswRabitqQueryAlgorithm::UPointer alg_;  // impl graph algorithm\n\n  IndexMetric::Pointer metric_{};\n  IndexMeta meta_{};\n  ailego::Params params_{};\n  Stats stats_;\n  uint32_t ef_{HnswRabitqEntity::kDefaultEf};\n  uint32_t max_scan_num_{0U};\n  uint32_t bruteforce_threshold_{HnswRabitqEntity::kDefaultBruteForceThreshold};\n  float max_scan_ratio_{HnswRabitqEntity::kDefaultScanRatio};\n  bool bf_enabled_{false};\n  bool check_crc_enabled_{false};\n  bool neighbors_in_memory_enabled_{false};\n  bool force_padding_topk_enabled_{false};\n  float bf_negative_probability_{\n      HnswRabitqEntity::kDefaultBFNegativeProbability};\n  uint32_t magic_{0U};\n  RabitqReformer reformer_;\n\n  State state_{STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_searcher_entity.h\"\n#include <zvec/ailego/hash/crc32c.h>\n#include \"utility/sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqSearcherEntity::HnswRabitqSearcherEntity() {}\n\nint HnswRabitqSearcherEntity::cleanup(void) {\n  storage_.reset();\n  vectors_.reset();\n  keys_.reset();\n  neighbors_.reset();\n  neighbors_meta_.reset();\n  neighbors_in_memory_enabled_ = false;\n  loaded_ = false;\n\n  this->HnswRabitqEntity::cleanup();\n\n  return 0;\n}\n\nkey_t HnswRabitqSearcherEntity::get_key(node_id_t id) const {\n  const void *key;\n  if (ailego_unlikely(keys_->read(id * sizeof(key_t), &key, sizeof(key_t)) !=\n                      sizeof(key_t))) {\n    LOG_ERROR(\"Read key from segment failed\");\n    return kInvalidKey;\n  }\n  return *(reinterpret_cast<const key_t *>(key));\n}\n\n//! Get vector local id by key\nnode_id_t HnswRabitqSearcherEntity::get_id(key_t key) const {\n  if (ailego_unlikely(!mapping_)) {\n    LOG_ERROR(\"Index missing mapping segment\");\n    return kInvalidNodeId;\n  }\n\n  //! Do binary search\n  node_id_t start = 0UL;\n  node_id_t end = doc_cnt();\n  const void *data;\n  node_id_t idx = 0u;\n  while (start < end) {\n    idx = start + (end - start) / 2;\n    if (ailego_unlikely(\n            mapping_->read(idx * sizeof(node_id_t), &data, sizeof(node_id_t)) !=\n            sizeof(node_id_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    const key_t *mkey;\n    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);\n    if (ailego_unlikely(keys_->read(local_id * sizeof(key_t),\n                                    (const void **)(&mkey),\n                                    sizeof(key_t)) != sizeof(key_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    if (*mkey < key) {\n      start = idx + 1;\n    } else if (*mkey > key) {\n      end = idx;\n    } else {\n      return local_id;\n    }\n  }\n  return kInvalidNodeId;\n}\n\nconst void *HnswRabitqSearcherEntity::get_vector_by_key(key_t key) const {\n  node_id_t local_id = get_id(key);\n  if (ailego_unlikely(local_id == kInvalidNodeId)) {\n    return nullptr;\n  }\n\n  return get_vector(local_id);\n}\n\nconst void *HnswRabitqSearcherEntity::get_vector(node_id_t id) const {\n  size_t read_size = vector_size();\n  size_t offset = node_size() * id;\n\n  const void *vec;\n  if (ailego_unlikely(vectors_->read(offset, &vec, read_size) != read_size)) {\n    LOG_ERROR(\"Read vector from segment failed\");\n    return nullptr;\n  }\n  return vec;\n}\n\nint HnswRabitqSearcherEntity::get_vector(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_vector(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\nconst void *HnswRabitqSearcherEntity::get_vectors() const {\n  const void *vec;\n  size_t len = node_size() * doc_cnt();\n  if (vectors_->read(0, &vec, len) != len) {\n    LOG_ERROR(\"Read vectors from segment failed\");\n    return nullptr;\n  }\n  return vec;\n}\n\nint HnswRabitqSearcherEntity::get_vector(const node_id_t *ids, uint32_t count,\n                                         const void **vecs) const {\n  ailego_assert_with(count <= segment_datas_.size(), \"invalid count\");\n\n  size_t read_size = vector_size();\n\n  for (uint32_t i = 0; i < count; ++i) {\n    segment_datas_[i].offset = node_size() * ids[i];\n    segment_datas_[i].length = read_size;\n\n    ailego_assert_with(segment_datas_[i].offset < vectors_->data_size(),\n                       \"invalid offset\");\n  }\n  if (ailego_unlikely(!vectors_->read(&segment_datas_[0], count))) {\n    LOG_ERROR(\"Read vectors from segment failed\");\n    return IndexError_ReadData;\n  }\n  for (uint32_t i = 0; i < count; ++i) {\n    vecs[i] = segment_datas_[i].data;\n  }\n\n  return 0;\n}\n\nint HnswRabitqSearcherEntity::get_vector(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {\n  const void *vecs[count];\n  get_vector(ids, count, vecs);\n  for (uint32_t i = 0; i < count; ++i) {\n    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));\n  }\n  return 0;\n}\n\nconst Neighbors HnswRabitqSearcherEntity::get_neighbors(level_t level,\n                                                        node_id_t id) const {\n  if (level == 0) {\n    if (neighbors_in_memory_enabled_) {\n      auto hd = reinterpret_cast<const NeighborsHeader *>(\n          fixed_neighbors_.get() + neighbors_size() * id);\n      return {hd->neighbor_cnt, hd->neighbors};\n    }\n\n    const GraphNeighborMeta *m;\n    if (ailego_unlikely(neighbors_meta_->read(id * sizeof(GraphNeighborMeta),\n                                              (const void **)(&m),\n                                              sizeof(GraphNeighborMeta)) !=\n                        sizeof(GraphNeighborMeta))) {\n      LOG_ERROR(\"Read neighbors meta from segment failed\");\n      return {0, nullptr};\n    }\n\n    const void *data;\n    if (ailego_unlikely(neighbors_->read(m->offset, &data,\n                                         m->neighbor_cnt * sizeof(node_id_t)) !=\n                        m->neighbor_cnt * sizeof(node_id_t))) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return {0, nullptr};\n    }\n    return {static_cast<uint32_t>(m->neighbor_cnt),\n            reinterpret_cast<const node_id_t *>(data)};\n  }\n\n  //! Read level > 0 neighbors\n  const HnswNeighborMeta *m;\n  if (ailego_unlikely(upper_neighbors_meta_->read(id * sizeof(HnswNeighborMeta),\n                                                  (const void **)(&m),\n                                                  sizeof(HnswNeighborMeta)) !=\n                      sizeof(HnswNeighborMeta))) {\n    LOG_ERROR(\"Read neighbors meta from segment failed\");\n    return {0, nullptr};\n  }\n\n  ailego_assert_with(level <= m->level, \"invalid level\");\n  size_t offset = m->offset + (level - 1) * upper_neighbors_size();\n  ailego_assert_with(offset <= upper_neighbors_->data_size(), \"invalid offset\");\n  const void *data;\n  if (ailego_unlikely(\n          upper_neighbors_->read(offset, &data, upper_neighbors_size()) !=\n          upper_neighbors_size())) {\n    LOG_ERROR(\"Read neighbors from segment failed\");\n    return {0, nullptr};\n  }\n\n  auto hd = reinterpret_cast<const NeighborsHeader *>(data);\n  return {hd->neighbor_cnt, hd->neighbors};\n}\n\nint HnswRabitqSearcherEntity::load(const IndexStorage::Pointer &container,\n                                   bool check_crc) {\n  storage_ = container;\n\n  int ret = load_segments(check_crc);\n  if (ret != 0) {\n    return ret;\n  }\n\n  loaded_ = true;\n\n  LOG_INFO(\n      \"Index info: docCnt=%u entryPoint=%u maxLevel=%d efConstruct=%zu \"\n      \"l0NeighborCnt=%zu upperNeighborCnt=%zu scalingFactor=%zu \"\n      \"vectorSize=%zu nodeSize=%zu vectorSegmentSize=%zu keySegmentSize=%zu \"\n      \"neighborsSegmentSize=%zu neighborsMetaSegmentSize=%zu \",\n      doc_cnt(), entry_point(), cur_max_level(), ef_construction(),\n      l0_neighbor_cnt(), upper_neighbor_cnt(), scaling_factor(), vector_size(),\n      node_size(), vectors_->data_size(), keys_->data_size(),\n      neighbors_ == nullptr ? 0 : neighbors_->data_size(),\n      neighbors_meta_ == nullptr ? 0 : neighbors_meta_->data_size());\n\n  return 0;\n}\n\nint HnswRabitqSearcherEntity::load_segments(bool check_crc) {\n  //! load header\n  const void *data = nullptr;\n  HNSWHeader hd;\n  auto graph_hd_segment = storage_->get(kGraphHeaderSegmentId);\n  if (!graph_hd_segment || graph_hd_segment->data_size() < sizeof(hd.graph)) {\n    LOG_ERROR(\"Miss or invalid segment %s\", kGraphHeaderSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (graph_hd_segment->read(0, reinterpret_cast<const void **>(&data),\n                             sizeof(hd.graph)) != sizeof(hd.graph)) {\n    LOG_ERROR(\"Read segment %s failed\", kGraphHeaderSegmentId.c_str());\n    return IndexError_ReadData;\n  }\n  memcpy(&hd.graph, data, sizeof(hd.graph));\n\n  auto hnsw_hd_segment = storage_->get(kHnswHeaderSegmentId);\n  if (!hnsw_hd_segment || hnsw_hd_segment->data_size() < sizeof(hd.hnsw)) {\n    LOG_ERROR(\"Miss or invalid segment %s\", kHnswHeaderSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (hnsw_hd_segment->read(0, reinterpret_cast<const void **>(&data),\n                            sizeof(hd.hnsw)) != sizeof(hd.hnsw)) {\n    LOG_ERROR(\"Read segment %s failed\", kHnswHeaderSegmentId.c_str());\n    return IndexError_ReadData;\n  }\n  memcpy(&hd.hnsw, data, sizeof(hd.hnsw));\n  *mutable_header() = hd;\n  segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()));\n\n  vectors_ = storage_->get(kGraphFeaturesSegmentId);\n  if (!vectors_) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              kGraphFeaturesSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  keys_ = storage_->get(kGraphKeysSegmentId);\n  if (!keys_) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              kGraphKeysSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  neighbors_ = storage_->get(kGraphNeighborsSegmentId);\n  if (!neighbors_ || (neighbors_->data_size() == 0 && doc_cnt() > 1)) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or empty\",\n              kGraphNeighborsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n  neighbors_meta_ = storage_->get(kGraphOffsetsSegmentId);\n  if (!neighbors_meta_ ||\n      neighbors_meta_->data_size() < sizeof(GraphNeighborMeta) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  upper_neighbors_ = storage_->get(kHnswNeighborsSegmentId);\n  if (!upper_neighbors_ ||\n      (upper_neighbors_->data_size() == 0 && cur_max_level() > 0)) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or empty\",\n              kHnswNeighborsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  upper_neighbors_meta_ = storage_->get(kHnswOffsetsSegmentId);\n  if (!upper_neighbors_meta_ || upper_neighbors_meta_->data_size() <\n                                    sizeof(HnswNeighborMeta) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kHnswOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  mapping_ = storage_->get(kGraphMappingSegmentId);\n  if (!mapping_ || mapping_->data_size() < sizeof(node_id_t) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kGraphMappingSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (check_crc) {\n    std::vector<SegmentPointer> segments;\n    segments.emplace_back(graph_hd_segment);\n    segments.emplace_back(hnsw_hd_segment);\n    segments.emplace_back(vectors_);\n    segments.emplace_back(keys_);\n\n    segments.emplace_back(neighbors_);\n    segments.emplace_back(neighbors_meta_);\n    segments.emplace_back(upper_neighbors_);\n    segments.emplace_back(upper_neighbors_meta_);\n\n    if (!do_crc_check(segments)) {\n      LOG_ERROR(\"Check index crc failed, the index may broken\");\n      return IndexError_Runtime;\n    }\n  }\n\n  if (neighbors_in_memory_enabled_) {\n    int ret = load_and_flat_neighbors();\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  return 0;\n}\n\nint HnswRabitqSearcherEntity::load_and_flat_neighbors() {\n  fixed_neighbors_.reset(\n      new (std::nothrow) char[neighbors_size() * doc_cnt()]{},\n      std::default_delete<char[]>());\n  if (!fixed_neighbors_) {\n    LOG_ERROR(\"Malloc memory failed\");\n    return IndexError_NoMemory;\n  }\n\n  //! Get a new segemnt to release the buffer after loading neighbors\n  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);\n  if (!neighbors_meta) {\n    LOG_ERROR(\"IndexStorage get segment graph.offsets failed\");\n    return IndexError_InvalidArgument;\n  }\n\n  const GraphNeighborMeta *neighbors_index = nullptr;\n  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),\n                           neighbors_meta->data_size()) !=\n      neighbors_meta->data_size()) {\n    LOG_ERROR(\"Read segment %s data failed\", kGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  const char *neighbor_data;\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    size_t rd_size = neighbors_index[id].neighbor_cnt * sizeof(node_id_t);\n    if (ailego_unlikely(\n            neighbors_->read(neighbors_index[id].offset,\n                             reinterpret_cast<const void **>(&neighbor_data),\n                             rd_size) != rd_size)) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return IndexError_ReadData;\n    }\n    // copy level 0 neighbors to fixed size neighbors memory\n    char *dst = fixed_neighbors_.get() + neighbors_size() * id;\n    *reinterpret_cast<uint32_t *>(dst) = neighbors_index[id].neighbor_cnt;\n    memcpy(dst + sizeof(uint32_t), neighbor_data, rd_size);\n  }\n\n  return 0;\n}\n\nint HnswRabitqSearcherEntity::get_fixed_neighbors(\n    std::vector<uint32_t> *fixed_neighbors) const {\n  //! Get a new segemnt to release the buffer after loading neighbors\n  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);\n  if (!neighbors_meta) {\n    LOG_ERROR(\"IndexStorage get segment graph.offsets failed\");\n    return IndexError_InvalidArgument;\n  }\n\n  const GraphNeighborMeta *neighbors_index = nullptr;\n  size_t meta_size = neighbors_meta->data_size();\n  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),\n                           meta_size) != meta_size) {\n    LOG_ERROR(\"Read segment %s data failed\", kGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  size_t fixed_neighbor_cnt = l0_neighbor_cnt();\n  fixed_neighbors->resize((fixed_neighbor_cnt + 1) * doc_cnt(), kInvalidNodeId);\n\n  size_t neighbors_cnt_offset = fixed_neighbor_cnt * doc_cnt();\n  size_t total_neighbor_cnt = 0;\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    size_t cur_neighbor_cnt = neighbors_index[id].neighbor_cnt;\n    if (cur_neighbor_cnt == 0) {\n      (*fixed_neighbors)[neighbors_cnt_offset + id] = 0;\n      continue;\n    }\n    size_t rd_size = cur_neighbor_cnt * sizeof(node_id_t);\n    const uint32_t *neighbors;\n    if (neighbors_->read(neighbors_index[id].offset,\n                         reinterpret_cast<const void **>(&neighbors),\n                         rd_size) != rd_size) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return IndexError_ReadData;\n    }\n\n    // copy level 0 neighbors to fixed size neighbors memory\n    auto it = fixed_neighbors->begin() + id * fixed_neighbor_cnt;\n    std::copy(neighbors, neighbors + cur_neighbor_cnt, it);\n\n    (*fixed_neighbors)[neighbors_cnt_offset + id] = cur_neighbor_cnt;\n    total_neighbor_cnt += cur_neighbor_cnt;\n  }\n  LOG_INFO(\"total neighbor cnt: %zu, average neighbor cnt: %zu\",\n           total_neighbor_cnt, total_neighbor_cnt / doc_cnt());\n\n  return 0;\n}\n\nbool HnswRabitqSearcherEntity::do_crc_check(\n    std::vector<SegmentPointer> &segments) const {\n  constexpr size_t blk_size = 4096;\n  const void *data;\n  for (auto &segment : segments) {\n    size_t offset = 0;\n    size_t rd_size;\n    uint32_t crc = 0;\n    while (offset < segment->data_size()) {\n      size_t size = std::min(blk_size, segment->data_size() - offset);\n      if ((rd_size = segment->read(offset, &data, size)) <= 0) {\n        break;\n      }\n      offset += rd_size;\n      crc = ailego::Crc32c::Hash(data, rd_size, crc);\n    }\n    if (crc != segment->data_crc()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nconst HnswRabitqEntity::Pointer HnswRabitqSearcherEntity::clone() const {\n  auto vectors = vectors_->clone();\n  if (ailego_unlikely(!vectors)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphFeaturesSegmentId.c_str());\n    return HnswRabitqEntity::Pointer();\n  }\n  auto keys = keys_->clone();\n  if (ailego_unlikely(!keys)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphKeysSegmentId.c_str());\n    return HnswRabitqEntity::Pointer();\n  }\n\n  auto mapping = mapping_->clone();\n  if (ailego_unlikely(!mapping)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphMappingSegmentId.c_str());\n    return HnswRabitqEntity::Pointer();\n  }\n\n  auto neighbors = neighbors_->clone();\n  if (ailego_unlikely(!neighbors)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphNeighborsSegmentId.c_str());\n    return HnswRabitqEntity::Pointer();\n  }\n  auto upper_neighbors = upper_neighbors_->clone();\n  if (ailego_unlikely(!neighbors)) {\n    LOG_ERROR(\"clone segment %s failed\", kHnswNeighborsSegmentId.c_str());\n    return HnswRabitqEntity::Pointer();\n  }\n  auto neighbors_meta = neighbors_meta_->clone();\n  if (ailego_unlikely(!neighbors_meta)) {\n    LOG_ERROR(\"clone segment %s failed\", kGraphOffsetsSegmentId.c_str());\n    return HnswRabitqEntity::Pointer();\n  }\n  auto upper_neighbors_meta = upper_neighbors_meta_->clone();\n  if (ailego_unlikely(!upper_neighbors_meta)) {\n    LOG_ERROR(\"clone segment %s failed\", kHnswOffsetsSegmentId.c_str());\n    return HnswRabitqEntity::Pointer();\n  }\n\n  SegmentGroupParam neighbor_group{neighbors, neighbors_meta, upper_neighbors,\n                                   upper_neighbors_meta};\n\n  HnswRabitqSearcherEntity *entity = new (std::nothrow)\n      HnswRabitqSearcherEntity(header(), vectors, keys, mapping, neighbor_group,\n                               fixed_neighbors_, neighbors_in_memory_enabled_);\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"HnswRabitqSearcherEntity new failed\");\n  }\n\n  return HnswRabitqEntity::Pointer(entity);\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"hnsw_rabitq_builder_entity.h\"\n#include \"hnsw_rabitq_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqSearcherEntity : public HnswRabitqEntity {\n public:\n  using Pointer = std::shared_ptr<HnswRabitqSearcherEntity>;\n  using SegmentPointer = IndexStorage::Segment::Pointer;\n\n public:\n  struct SegmentGroupParam {\n    SegmentGroupParam(SegmentPointer neighbors_in,\n                      SegmentPointer neighbors_meta_in,\n                      SegmentPointer upper_neighbors_in,\n                      SegmentPointer upper_neighbors_meta_in)\n        : neighbors{neighbors_in},\n          neighbors_meta{neighbors_meta_in},\n          upper_neighbors{upper_neighbors_in},\n          upper_neighbors_meta{upper_neighbors_meta_in} {}\n\n    SegmentPointer neighbors{nullptr};\n    SegmentPointer neighbors_meta{nullptr};\n    SegmentPointer upper_neighbors{nullptr};\n    SegmentPointer upper_neighbors_meta{nullptr};\n  };\n\n  //! Constructor\n  HnswRabitqSearcherEntity();\n\n  //! Make a copy of searcher entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswRabitqEntity::Pointer clone() const override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector local id by key\n  node_id_t get_id(key_t key) const;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector_by_key(key_t key) const override;\n\n  //! Get vector feature data by id\n  virtual const void *get_vector(node_id_t id) const override;\n\n  //! Get vector feature data by id\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const override;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const override;\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;\n\n  //! Get all vectors\n  const void *get_vectors() const;\n\n  //! Get the node id's neighbors on graph level\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  virtual int load(const IndexStorage::Pointer &container,\n                   bool check_crc) override;\n\n  int load_segments(bool check_crc);\n\n  virtual int cleanup(void) override;\n\n public:\n  bool is_loaded() const {\n    return loaded_;\n  }\n\n  void set_neighbors_in_memory(bool enabled) {\n    neighbors_in_memory_enabled_ = enabled;\n  }\n\n  //! get fixed length neighbors data\n  int get_fixed_neighbors(std::vector<uint32_t> *fixed_neighbors) const;\n\n private:\n  //! Constructor\n  HnswRabitqSearcherEntity(const HNSWHeader &hd, const SegmentPointer &vectors,\n                           const SegmentPointer &keys,\n                           const SegmentPointer &mapping,\n                           const SegmentGroupParam &neighbor_group,\n                           const std::shared_ptr<char> &fixed_neighbors,\n                           bool neighbors_in_memory_enabled)\n      : HnswRabitqEntity(hd),\n        vectors_(vectors),\n        keys_(keys),\n        mapping_(mapping),\n        neighbors_(neighbor_group.neighbors),\n        neighbors_meta_(neighbor_group.neighbors_meta),\n        upper_neighbors_(neighbor_group.upper_neighbors),\n        upper_neighbors_meta_(neighbor_group.upper_neighbors_meta),\n        neighbors_in_memory_enabled_(neighbors_in_memory_enabled) {\n    segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()),\n                          IndexStorage::SegmentData(0U, 0U));\n    fixed_neighbors_ = fixed_neighbors;\n  }\n\n  bool do_crc_check(std::vector<SegmentPointer> &segments) const;\n\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! If neighbors_in_memory_enabled, load the level0 neighbors to memory\n  int load_and_flat_neighbors(void);\n\n public:\n  HnswRabitqSearcherEntity(const HnswRabitqSearcherEntity &) = delete;\n  HnswRabitqSearcherEntity &operator=(const HnswRabitqSearcherEntity &) =\n      delete;\n\n private:\n  IndexStorage::Pointer storage_{};\n\n  SegmentPointer vectors_{};\n  SegmentPointer keys_{};\n  SegmentPointer mapping_{};\n\n  SegmentPointer neighbors_{};\n  SegmentPointer neighbors_meta_{};\n  SegmentPointer upper_neighbors_{};\n  SegmentPointer upper_neighbors_meta_{};\n\n  mutable std::vector<IndexStorage::SegmentData> segment_datas_{};\n  std::shared_ptr<char> fixed_neighbors_{};  // level 0 fixed size neighbors\n  bool neighbors_in_memory_enabled_{false};\n  bool loaded_{false};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_rabitq_streamer.h\"\n#include <iostream>\n#include <memory>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/pattern/defer.h>\n#include <ailego/utility/memory_helper.h>\n#include <arrow/compute/ordering.h>\n#include \"algorithm/hnsw_rabitq/rabitq_reformer.h\"\n#include \"zvec/ailego/container/params.h\"\n#include \"zvec/ailego/logger/logger.h\"\n#include \"hnsw_rabitq_algorithm.h\"\n#include \"hnsw_rabitq_context.h\"\n#include \"hnsw_rabitq_dist_calculator.h\"\n#include \"hnsw_rabitq_index_provider.h\"\n#include \"hnsw_rabitq_query_entity.h\"\n#include \"rabitq_params.h\"\n#include \"rabitq_utils.h\"\n\nnamespace zvec {\nnamespace core {\nHnswRabitqStreamer::HnswRabitqStreamer() : entity_(stats_) {}\n\nHnswRabitqStreamer::HnswRabitqStreamer(IndexProvider::Pointer provider,\n                                       RabitqReformer::Pointer reformer)\n    : entity_(stats_),\n      reformer_(std::move(reformer)),\n      provider_(std::move(provider)) {}\n\nHnswRabitqStreamer::~HnswRabitqStreamer() {\n  if (state_ == STATE_INITED) {\n    this->cleanup();\n  }\n}\n\nint HnswRabitqStreamer::init(const IndexMeta &imeta,\n                             const ailego::Params &params) {\n  meta_ = imeta;\n  meta_.set_streamer(\"HnswRabitqStreamer\", HnswRabitqEntity::kRevision, params);\n\n  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_INDEX_SIZE, &max_index_size_);\n\n  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT,\n             &upper_max_neighbor_cnt_);\n  float multiplier = HnswRabitqEntity::kDefaultL0MaxNeighborCntMultiplier;\n  params.get(PARAM_HNSW_RABITQ_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,\n             &multiplier);\n  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;\n\n  multiplier = HnswRabitqEntity::kDefaultNeighborPruneMultiplier;\n  params.get(PARAM_HNSW_RABITQ_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);\n  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;\n  scaling_factor_ = upper_max_neighbor_cnt_;\n  params.get(PARAM_HNSW_RABITQ_STREAMER_SCALING_FACTOR, &scaling_factor_);\n\n  params.get(PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT, &docs_hard_limit_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_EF, &ef_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_EFCONSTRUCTION, &ef_construction_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB,\n             &bf_negative_prob_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD,\n             &bruteforce_threshold_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_CHECK_CRC_ENABLE, &check_crc_enabled_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_CHUNK_SIZE, &chunk_size_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_FILTER_SAME_KEY, &filter_same_key_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_GET_VECTOR_ENABLE,\n             &get_vector_enabled_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_FORCE_PADDING_RESULT_ENABLE,\n             &force_padding_topk_enabled_);\n  params.get(PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP, &use_id_map_);\n  entity_.set_use_key_info_map(use_id_map_);\n\n  params.get(PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT, &docs_soft_limit_);\n  if (docs_soft_limit_ > 0 && docs_soft_limit_ > docs_hard_limit_) {\n    LOG_ERROR(\"[%s] must be >= [%s]\",\n              PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT.c_str(),\n              PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    return IndexError_InvalidArgument;\n  } else if (docs_soft_limit_ == 0UL) {\n    docs_soft_limit_ =\n        docs_hard_limit_ * HnswRabitqEntity::kDefaultDocsSoftLimitRatio;\n  }\n\n  if (ef_ == 0U) {\n    ef_ = HnswRabitqEntity::kDefaultEf;\n  }\n  if (ef_construction_ == 0U) {\n    ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;\n  }\n  if (upper_max_neighbor_cnt_ == 0U) {\n    upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (upper_max_neighbor_cnt_ > HnswRabitqEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"[%s] must be in range (0,%d)\",\n              PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),\n              HnswRabitqEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (l0_max_neighbor_cnt_ == 0U) {\n    l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultL0MaxNeighborCnt;\n  }\n  if (l0_max_neighbor_cnt_ > HnswRabitqEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"MaxL0NeighborCnt must be in range (0,%d)\",\n              HnswRabitqEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {\n    LOG_ERROR(\"[%s]-[%zu] must be <= [%s]-[%zu]\",\n              PARAM_HNSW_RABITQ_STREAMER_MIN_NEIGHBOR_COUNT.c_str(),\n              static_cast<size_t>(min_neighbor_cnt_),\n              PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),\n              static_cast<size_t>(upper_max_neighbor_cnt_));\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_negative_prob_ <= 0.0f || bf_negative_prob_ >= 1.0f) {\n    LOG_ERROR(\n        \"[%s] must be in range (0,1)\",\n        PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (scaling_factor_ == 0U) {\n    scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;\n  }\n  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {\n    LOG_ERROR(\"[%s] must be in range [5,1000]\",\n              PARAM_HNSW_RABITQ_STREAMER_SCALING_FACTOR.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {\n    LOG_ERROR(\"[%s] must be in range (0.0f,1.0f]\",\n              PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (max_scan_limit_ < min_scan_limit_) {\n    LOG_ERROR(\"[%s] must be >= [%s]\",\n              PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT.c_str(),\n              PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (prune_cnt == 0UL) {\n    prune_cnt = upper_max_neighbor_cnt_;\n  }\n  if (chunk_size_ == 0UL) {\n    chunk_size_ = HnswRabitqEntity::kDefaultChunkSize;\n  }\n  if (chunk_size_ > HnswRabitqEntity::kMaxChunkSize) {\n    LOG_ERROR(\"[%s] must be < %zu\",\n              PARAM_HNSW_RABITQ_STREAMER_CHUNK_SIZE.c_str(),\n              HnswRabitqEntity::kMaxChunkSize);\n    return IndexError_InvalidArgument;\n  }\n  uint32_t total_bits = 0;\n  params.get(PARAM_RABITQ_TOTAL_BITS, &total_bits);\n  if (total_bits == 0) {\n    total_bits = kDefaultRabitqTotalBits;\n  }\n  if (total_bits < 1 || total_bits > 9) {\n    LOG_ERROR(\"Invalid total_bits: %zu, must be in [1, 9]\", (size_t)total_bits);\n    return IndexError_InvalidArgument;\n  }\n  uint8_t ex_bits = total_bits - 1;\n  entity_.set_ex_bits(ex_bits);\n\n  uint32_t dimension = 0;\n  params.get(PARAM_HNSW_RABITQ_GENERAL_DIMENSION, &dimension);\n  if (dimension == 0) {\n    LOG_ERROR(\"%s not set\", PARAM_HNSW_RABITQ_GENERAL_DIMENSION.c_str());\n    return IndexError_InvalidArgument;\n  }\n  if (dimension < kMinRabitqDimSize || dimension > kMaxRabitqDimSize) {\n    LOG_ERROR(\"Invalid dimension: %u, must be in [%d, %d]\", dimension,\n              kMinRabitqDimSize, kMaxRabitqDimSize);\n    return IndexError_InvalidArgument;\n  }\n  entity_.update_rabitq_params_and_vector_size(dimension);\n\n  entity_.set_ef_construction(ef_construction_);\n  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);\n  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);\n  entity_.set_scaling_factor(scaling_factor_);\n  entity_.set_prune_cnt(prune_cnt);\n\n  entity_.set_chunk_size(chunk_size_);\n  entity_.set_filter_same_key(filter_same_key_);\n  entity_.set_get_vector(get_vector_enabled_);\n  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);\n\n  int ret = entity_.init(docs_hard_limit_);\n  if (ret != 0) {\n    LOG_ERROR(\"Hnsw entity init failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n\n  LOG_DEBUG(\n      \"Init params: maxIndexSize=%zu docsHardLimit=%zu docsSoftLimit=%zu \"\n      \"efConstruction=%u ef=%u upperMaxNeighborCnt=%u l0MaxNeighborCnt=%u \"\n      \"scalingFactor=%u maxScanRatio=%.3f minScanLimit=%zu maxScanLimit=%zu \"\n      \"bfEnabled=%d bruteFoceThreshold=%zu bfNegativeProbability=%.5f \"\n      \"checkCrcEnabled=%d pruneSize=%zu vectorSize=%u chunkSize=%zu \"\n      \"filterSameKey=%u getVectorEnabled=%u minNeighborCount=%u \"\n      \"forcePadding=%u \",\n      max_index_size_, docs_hard_limit_, docs_soft_limit_, ef_construction_,\n      ef_, upper_max_neighbor_cnt_, l0_max_neighbor_cnt_, scaling_factor_,\n      max_scan_ratio_, min_scan_limit_, max_scan_limit_, bf_enabled_,\n      bruteforce_threshold_, bf_negative_prob_, check_crc_enabled_, prune_cnt,\n      meta_.element_size(), chunk_size_, filter_same_key_, get_vector_enabled_,\n      min_neighbor_cnt_, force_padding_topk_enabled_);\n\n  alg_ = HnswRabitqAlgorithm::UPointer(new HnswRabitqAlgorithm(entity_));\n\n  ret = alg_->init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswRabitqStreamer::cleanup(void) {\n  if (state_ == STATE_OPENED) {\n    this->close();\n  }\n\n  LOG_INFO(\"HnswRabitqStreamer cleanup\");\n\n  meta_.clear();\n  metric_.reset();\n  stats_.clear();\n  entity_.cleanup();\n\n  if (alg_) {\n    alg_->cleanup();\n  }\n\n  max_index_size_ = 0UL;\n  docs_hard_limit_ = HnswRabitqEntity::kDefaultDocsHardLimit;\n  docs_soft_limit_ = 0UL;\n  upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;\n  l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultL0MaxNeighborCnt;\n  ef_ = HnswRabitqEntity::kDefaultEf;\n  ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;\n  bf_enabled_ = false;\n  scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;\n  bruteforce_threshold_ = HnswRabitqEntity::kDefaultBruteForceThreshold;\n  max_scan_limit_ = HnswRabitqEntity::kDefaultMaxScanLimit;\n  min_scan_limit_ = HnswRabitqEntity::kDefaultMinScanLimit;\n  chunk_size_ = HnswRabitqEntity::kDefaultChunkSize;\n  bf_negative_prob_ = HnswRabitqEntity::kDefaultBFNegativeProbability;\n  max_scan_ratio_ = HnswRabitqEntity::kDefaultScanRatio;\n  state_ = STATE_INIT;\n  check_crc_enabled_ = false;\n  filter_same_key_ = false;\n  get_vector_enabled_ = false;\n\n  return 0;\n}\n\nint HnswRabitqStreamer::open(IndexStorage::Pointer stg) {\n  LOG_INFO(\"HnswRabitqStreamer open\");\n\n  if (ailego_unlikely(state_ != STATE_INITED)) {\n    LOG_ERROR(\"Open storage failed, init streamer first!\");\n    return IndexError_NoReady;\n  }\n\n  // try to load reformer\n  if (reformer_ == nullptr) {\n    reformer_ = std::make_shared<RabitqReformer>();\n    ailego::Params reformer_params;\n    reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());\n    int ret = reformer_->init(reformer_params);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to initialize RabitqReformer: %d\", ret);\n      return ret;\n    }\n\n    ret = reformer_->load(stg);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to load reformer, ret=%d\", ret);\n      return ret;\n    }\n  } else {\n    if (!stg->has(RABITQ_CONVERTER_SEG_ID)) {\n      int ret = reformer_->dump(stg);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to dump reformer, ret=%d\", ret);\n        return ret;\n      }\n      LOG_INFO(\"Dump reformer success.\");\n    }\n  }\n\n  int ret = entity_.open(std::move(stg), max_index_size_, check_crc_enabled_);\n  if (ret != 0) {\n    return ret;\n  }\n  IndexMeta index_meta;\n  ret = entity_.get_index_meta(&index_meta);\n  if (ret == IndexError_NoExist) {\n    // Set IndexMeta for the new index\n    ret = entity_.set_index_meta(meta_);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to set index meta for %s\", IndexError::What(ret));\n      return ret;\n    }\n  } else if (ret != 0) {\n    LOG_ERROR(\"Failed to get index meta for %s\", IndexError::What(ret));\n    return ret;\n  } else {\n    if (index_meta.dimension() != meta_.dimension() ||\n        index_meta.element_size() != meta_.element_size() ||\n        index_meta.metric_name() != meta_.metric_name() ||\n        index_meta.data_type() != meta_.data_type()) {\n      LOG_ERROR(\"IndexMeta mismatch from the previous in index\");\n      return IndexError_Mismatch;\n    }\n    // The IndexMetric Params may be updated like MipsSquaredEuclidean\n    auto metric_params = index_meta.metric_params();\n    metric_params.merge(meta_.metric_params());\n    meta_.set_metric(index_meta.metric_name(), 0, metric_params);\n  }\n\n  metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"Failed to create metric %s\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  ret = metric_->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to init metric, ret=%d\", ret);\n    return ret;\n  }\n\n  if (!metric_->distance()) {\n    LOG_ERROR(\"Invalid metric distance\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!metric_->batch_distance()) {\n    LOG_ERROR(\"Invalid metric batch distance\");\n    return IndexError_InvalidArgument;\n  }\n\n  add_distance_ = metric_->distance();\n  add_batch_distance_ = metric_->batch_distance();\n\n  search_distance_ = add_distance_;\n  search_batch_distance_ = add_batch_distance_;\n\n  if (metric_->query_metric() && metric_->query_metric()->distance() &&\n      metric_->query_metric()->batch_distance()) {\n    search_distance_ = metric_->query_metric()->distance();\n    search_batch_distance_ = metric_->query_metric()->batch_distance();\n  }\n\n  state_ = STATE_OPENED;\n  magic_ = IndexContext::GenerateMagic();\n\n  query_alg_ = HnswRabitqQueryAlgorithm::UPointer(new HnswRabitqQueryAlgorithm(\n      entity_, reformer_->num_clusters(), reformer_->rabitq_metric_type()));\n\n  return 0;\n}\n\nint HnswRabitqStreamer::close(void) {\n  LOG_INFO(\"HnswRabitqStreamer close\");\n\n  stats_.clear();\n  meta_.set_metric(metric_->name(), 0, metric_->params());\n  entity_.set_index_meta(meta_);\n  int ret = entity_.close();\n  if (ret != 0) {\n    return ret;\n  }\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswRabitqStreamer::flush(uint64_t checkpoint) {\n  LOG_INFO(\"HnswRabitqStreamer flush checkpoint=%zu\", (size_t)checkpoint);\n\n  meta_.set_metric(metric_->name(), 0, metric_->params());\n  entity_.set_index_meta(meta_);\n  return entity_.flush(checkpoint);\n}\n\nint HnswRabitqStreamer::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"HnswRabitqStreamer dump\");\n\n  shared_mutex_.lock();\n  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });\n\n  meta_.set_searcher(\"HnswRabitqSearcher\", HnswRabitqEntity::kRevision,\n                     ailego::Params());\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n  ret = reformer_->dump(dumper);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump reformer into dumper.\");\n    return ret;\n  }\n  return entity_.dump(dumper);\n}\n\nIndexStreamer::Context::Pointer HnswRabitqStreamer::create_context(void) const {\n  if (ailego_unlikely(state_ != STATE_OPENED)) {\n    LOG_ERROR(\"Create context failed, open storage first!\");\n    return Context::Pointer();\n  }\n\n  HnswRabitqEntity::Pointer entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"CreateContext clone init failed\");\n    return Context::Pointer();\n  }\n  HnswRabitqContext *ctx =\n      new (std::nothrow) HnswRabitqContext(meta_.dimension(), metric_, entity);\n  if (ailego_unlikely(ctx == nullptr)) {\n    LOG_ERROR(\"Failed to new HnswRabitqContext\");\n    return Context::Pointer();\n  }\n  ctx->set_ef(ef_);\n  ctx->set_max_scan_limit(max_scan_limit_);\n  ctx->set_min_scan_limit(min_scan_limit_);\n  ctx->set_max_scan_ratio(max_scan_ratio_);\n  ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter\n                                   : VisitFilter::ByteMap);\n  ctx->set_filter_negative_probability(bf_negative_prob_);\n  ctx->set_magic(magic_);\n  ctx->set_force_padding_topk(force_padding_topk_enabled_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n\n  if (ailego_unlikely(ctx->init(HnswRabitqContext::kStreamerContext)) != 0) {\n    LOG_ERROR(\"Init HnswRabitqContext failed\");\n    delete ctx;\n    return Context::Pointer();\n  }\n  uint32_t estimate_doc_count = 0;\n  if (meta_.streamer_params().get(PARAM_HNSW_RABITQ_STREAMER_ESTIMATE_DOC_COUNT,\n                                  &estimate_doc_count)) {\n    LOG_DEBUG(\"HnswRabitqStreamer doc_count[%zu] estimate[%zu]\",\n              (size_t)entity_.doc_cnt(), (size_t)estimate_doc_count);\n  }\n  ctx->check_need_adjuct_ctx(std::max(entity_.doc_cnt(), estimate_doc_count));\n\n  return Context::Pointer(ctx);\n}\n\nIndexProvider::Pointer HnswRabitqStreamer::create_provider(void) const {\n  LOG_DEBUG(\"HnswRabitqStreamer create provider\");\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone HnswRabitqEntity failed\");\n    return nullptr;\n  }\n  return Provider::Pointer(\n      new HnswRabitqIndexProvider(meta_, entity, \"HnswRabitqStreamer\"));\n}\n\nint HnswRabitqStreamer::update_context(HnswRabitqContext *ctx) const {\n  const HnswRabitqEntity::Pointer entity = entity_.clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone search context entity\");\n    return IndexError_Runtime;\n  }\n  ctx->set_max_scan_limit(max_scan_limit_);\n  ctx->set_min_scan_limit(min_scan_limit_);\n  ctx->set_max_scan_ratio(max_scan_ratio_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n  return ctx->update_context(HnswRabitqContext::kStreamerContext, meta_,\n                             metric_, entity, magic_);\n}\n\n//! Add a vector with id into index\nint HnswRabitqStreamer::add_with_id_impl(\n    uint32_t id, const void *query, const IndexQueryMeta &qmeta,\n    IndexStreamer::Context::Pointer &context) {\n  if (!provider_) {\n    LOG_ERROR(\"Provider is nullptr, cannot add vector\");\n    return IndexError_InvalidArgument;\n  }\n\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {\n    if (entity_.doc_cnt() >= docs_hard_limit_) {\n      LOG_ERROR(\"Current docs %zu exceed [%s]\",\n                static_cast<size_t>(entity_.doc_cnt()),\n                PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT.c_str());\n      const std::lock_guard<std::mutex> lk(mutex_);\n      (*stats_.mutable_discarded_count())++;\n      return IndexError_IndexFull;\n    } else {\n      LOG_WARN(\"Current docs %zu exceed [%s]\",\n               static_cast<size_t>(entity_.doc_cnt()),\n               PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    }\n  }\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);\n  ctx->reset_query(query);\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n  ctx->set_provider(provider_);\n\n  if (metric_->support_train()) {\n    const std::lock_guard<std::mutex> lk(mutex_);\n    ret = metric_->train(query, meta_.dimension());\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw streamer metric train failed\");\n      (*stats_.mutable_discarded_count())++;\n      return ret;\n    }\n  }\n\n  std::string converted_vector;\n  IndexQueryMeta converted_meta;\n  ret = reformer_->convert(query, qmeta, &converted_vector, &converted_meta);\n  if (ret != 0) {\n    LOG_ERROR(\"Rabitq hnsw convert failed, ret=%d\", ret);\n    return ret;\n  }\n\n  level_t level = alg_->get_random_level();\n  ret = entity_.add_vector_with_id(level, id, converted_vector.data());\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw streamer add vector failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  ret = alg_->add_node(id, level, ctx);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw steamer add node failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Runtime;\n  }\n  (*stats_.mutable_added_count())++;\n\n  return 0;\n}\n\n//! Add a vector into index\nint HnswRabitqStreamer::add_impl(uint64_t pkey, const void *query,\n                                 const IndexQueryMeta &qmeta,\n                                 IndexStreamer::Context::Pointer &context) {\n  if (!provider_) {\n    LOG_ERROR(\"Provider is nullptr, cannot add vector\");\n    return IndexError_InvalidArgument;\n  }\n\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {\n    if (entity_.doc_cnt() >= docs_hard_limit_) {\n      LOG_ERROR(\"Current docs %zu exceed [%s]\",\n                static_cast<size_t>(entity_.doc_cnt()),\n                PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT.c_str());\n      const std::lock_guard<std::mutex> lk(mutex_);\n      (*stats_.mutable_discarded_count())++;\n      return IndexError_IndexFull;\n    } else {\n      LOG_WARN(\"Current docs %zu exceed [%s]\",\n               static_cast<size_t>(entity_.doc_cnt()),\n               PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    }\n  }\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);\n  ctx->reset_query(query);\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n  ctx->set_provider(provider_);\n\n  if (metric_->support_train()) {\n    const std::lock_guard<std::mutex> lk(mutex_);\n    ret = metric_->train(query, meta_.dimension());\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw streamer metric train failed\");\n      (*stats_.mutable_discarded_count())++;\n      return ret;\n    }\n  }\n\n  std::string converted_vector;\n  IndexQueryMeta converted_meta;\n  ret = reformer_->convert(query, qmeta, &converted_vector, &converted_meta);\n  if (ret != 0) {\n    LOG_ERROR(\"Rabitq hnsw convert failed, ret=%d\", ret);\n    return ret;\n  }\n\n  level_t level = alg_->get_random_level();\n  node_id_t id;\n  ret = entity_.add_vector(level, pkey, converted_vector.data(), &id);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw streamer add vector failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  ret = alg_->add_node(id, level, ctx);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw steamer add node failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Runtime;\n  }\n  (*stats_.mutable_added_count())++;\n\n  return 0;\n}\n\n\nint HnswRabitqStreamer::search_impl(\n    const void *query, const IndexQueryMeta &qmeta,\n    IndexStreamer::Context::Pointer &context) const {\n  return search_impl(query, qmeta, 1, context);\n}\n\n//! Similarity search\nint HnswRabitqStreamer::search_impl(\n    const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n    IndexStreamer::Context::Pointer &context) const {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n\n  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {\n    return search_bf_impl(query, qmeta, count, context);\n  }\n\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);\n  ctx->resize_results(count);\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n  for (size_t q = 0; q < count; ++q) {\n    HnswRabitqQueryEntity entity;\n    ret = reformer_->transform_to_entity(query, &entity);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher transform failed\");\n      return ret;\n    }\n    ctx->reset_query(query);\n    ret = query_alg_->search(&entity, ctx);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher fast search failed\");\n      return ret;\n    }\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + qmeta.element_size();\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nvoid HnswRabitqStreamer::print_debug_info() {\n  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n    if (entity_.get_key(id) == kInvalidKey) {\n      continue;\n    }\n    Neighbors neighbours = entity_.get_neighbors(0, id);\n    std::cout << \"node: \" << id << \"; \";\n    if (neighbours.size() == 0) std::cout << std::endl;\n    for (uint32_t i = 0; i < neighbours.size(); ++i) {\n      std::cout << neighbours[i];\n\n      if (i == neighbours.size() - 1) {\n        std::cout << std::endl;\n      } else {\n        std::cout << \", \";\n      }\n    }\n  }\n\n  // entity_.print_key_map();\n}\n\nint HnswRabitqStreamer::search_bf_impl(\n    const void *query, const IndexQueryMeta &qmeta,\n    IndexStreamer::Context::Pointer &context) const {\n  return search_bf_impl(query, qmeta, 1, context);\n}\n\nint HnswRabitqStreamer::search_bf_impl(\n    const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n    IndexStreamer::Context::Pointer &context) const {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      ret = reformer_->transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw rabitq streamer transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          EstimateRecord dist;\n          query_alg_->get_full_est(id, dist, entity);\n\n          std::string group_id = group_by(id);\n\n          auto &topk_heap = ctx->group_topk_heaps()[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      ret = reformer_->transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw rabitq streamer transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->topk_heap().clear();\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          EstimateRecord dist;\n          query_alg_->get_full_est(id, dist, entity);\n          ctx->topk_heap().emplace(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswRabitqStreamer::search_bf_by_p_keys_impl(\n    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  int ret = check_params(query, qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  if (ailego_unlikely(p_keys.size() != count)) {\n    LOG_ERROR(\"The size of p_keys is not equal to count\");\n    return IndexError_InvalidArgument;\n  }\n\n  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswRabitqContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);\n  ctx->resize_results(count);\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_InvalidArgument;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      ret = reformer_->transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw rabitq streamer transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->group_topk_heaps().clear();\n\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            EstimateRecord dist;\n            query_alg_->get_full_est(id, dist, entity);\n            std::string group_id = group_by(id);\n\n            auto &topk_heap = ctx->group_topk_heaps()[group_id];\n            if (topk_heap.empty()) {\n              topk_heap.limit(ctx->group_topk());\n            }\n            topk_heap.emplace_back(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      HnswRabitqQueryEntity entity;\n      ret = reformer_->transform_to_entity(query, &entity);\n      if (ailego_unlikely(ret != 0)) {\n        LOG_ERROR(\"Hnsw rabitq streamer transform failed\");\n        return ret;\n      }\n      ctx->reset_query(query);\n      ctx->topk_heap().clear();\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        key_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            EstimateRecord dist;\n            query_alg_->get_full_est(id, dist, entity);\n            ctx->topk_heap().emplace(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n      query = static_cast<const char *>(query) + qmeta.element_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <ailego/parallel/lock.h>\n#include \"algorithm/hnsw_rabitq/rabitq_reformer.h\"\n#include \"zvec/core/framework/index_framework.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"hnsw_rabitq_algorithm.h\"\n#include \"hnsw_rabitq_query_algorithm.h\"\n#include \"hnsw_rabitq_streamer_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqStreamer : public IndexStreamer {\n public:\n  using ContextPointer = IndexStreamer::Context::Pointer;\n\n  HnswRabitqStreamer();\n  explicit HnswRabitqStreamer(IndexProvider::Pointer provider,\n                              RabitqReformer::Pointer reformer = nullptr);\n  virtual ~HnswRabitqStreamer(void);\n\n  HnswRabitqStreamer(const HnswRabitqStreamer &streamer) = delete;\n  HnswRabitqStreamer &operator=(const HnswRabitqStreamer &streamer) = delete;\n\n  void set_provider(IndexProvider::Pointer provider) {\n    provider_ = std::move(provider);\n  }\n\n  void set_reformer(IndexReformer::Pointer reformer) {\n    reformer_ = std::dynamic_pointer_cast<RabitqReformer>(reformer);\n  }\n\n protected:\n  //! Initialize Streamer\n  virtual int init(const IndexMeta &imeta,\n                   const ailego::Params &params) override;\n\n  //! Cleanup Streamer\n  virtual int cleanup(void) override;\n\n  //! Create a context\n  virtual Context::Pointer create_context(void) const override;\n\n  //! Create a new iterator\n  virtual IndexProvider::Pointer create_provider(void) const override;\n\n  //! Add a vector into index\n  virtual int add_impl(uint64_t pkey, const void *query,\n                       const IndexQueryMeta &qmeta,\n                       Context::Pointer &context) override;\n\n  //! Add a vector with id into index\n  virtual int add_with_id_impl(uint32_t id, const void *query,\n                               const IndexQueryMeta &qmeta,\n                               Context::Pointer &context) override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          Context::Pointer &context) const override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          uint32_t count,\n                          Context::Pointer &context) const override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             Context::Pointer &context) const override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count,\n                             Context::Pointer &context) const override;\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, ContextPointer &context) const override {\n    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, uint32_t count,\n      ContextPointer &context) const override;\n\n  //! Fetch vector by key\n  virtual const void *get_vector(uint64_t key) const override {\n    return entity_.get_vector_by_key(key);\n  }\n\n  virtual int get_vector(const uint64_t key,\n                         IndexStorage::MemoryBlock &block) const override {\n    return entity_.get_vector_by_key(key, block);\n  }\n\n  //! Fetch vector by id\n  virtual const void *get_vector_by_id(uint32_t id) const override {\n    return entity_.get_vector(id);\n  }\n\n  virtual int get_vector_by_id(\n      const uint32_t id, IndexStorage::MemoryBlock &block) const override {\n    return entity_.get_vector(id, block);\n  }\n\n  //! Open index from file path\n  virtual int open(IndexStorage::Pointer stg) override;\n\n  //! Close file\n  virtual int close(void) override;\n\n  //! flush file\n  virtual int flush(uint64_t checkpoint) override;\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  virtual void print_debug_info() override;\n\n private:\n  inline int check_params(const void *query,\n                          const IndexQueryMeta &qmeta) const {\n    if (ailego_unlikely(!query)) {\n      LOG_ERROR(\"null query\");\n      return IndexError_InvalidArgument;\n    }\n    if (ailego_unlikely(qmeta.dimension() != meta_.dimension() ||\n                        qmeta.data_type() != meta_.data_type() ||\n                        qmeta.element_size() != meta_.element_size())) {\n      LOG_ERROR(\"Unsupported query meta\");\n      return IndexError_Mismatch;\n    }\n    return 0;\n  }\n\n  inline int check_sparse_count_is_zero(const uint32_t *sparse_count,\n                                        uint32_t count) const {\n    for (uint32_t i = 0; i < count; ++i) {\n      if (sparse_count[i] != 0)\n        LOG_ERROR(\"Sparse cout is not empty. Index: %u, Sparse Count: %u\", i,\n                  sparse_count[i]);\n      return IndexError_InvalidArgument;\n    }\n\n    return 0;\n  }\n\n private:\n  //! To share ctx across streamer/searcher, we need to update the context for\n  //! current streamer/searcher\n  int update_context(HnswRabitqContext *ctx) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };\n  class Stats : public IndexStreamer::Stats {\n   public:\n    void clear(void) {\n      set_revision_id(0u);\n      set_loaded_count(0u);\n      set_added_count(0u);\n      set_discarded_count(0u);\n      set_index_size(0u);\n      set_dumped_size(0u);\n      set_check_point(0u);\n      set_create_time(0u);\n      set_update_time(0u);\n      clear_attributes();\n    }\n  };\n\n  HnswRabitqStreamerEntity entity_;\n  HnswRabitqAlgorithm::UPointer alg_;\n  IndexMeta meta_{};\n  IndexMetric::Pointer metric_{};\n\n  IndexMetric::MatrixDistance add_distance_{};\n  IndexMetric::MatrixDistance search_distance_{};\n\n  IndexMetric::MatrixBatchDistance add_batch_distance_{};\n  IndexMetric::MatrixBatchDistance search_batch_distance_{};\n\n  RabitqReformer::Pointer reformer_{};            // RaBitQ reformer\n  HnswRabitqQueryAlgorithm::UPointer query_alg_;  // query algorithm\n  // provider_ provides raw vector, which is used to build graph\n  IndexProvider::Pointer provider_{};\n\n  Stats stats_{};\n  std::mutex mutex_{};\n\n  size_t max_index_size_{0UL};\n  size_t chunk_size_{HnswRabitqEntity::kDefaultChunkSize};\n  size_t docs_hard_limit_{HnswRabitqEntity::kDefaultDocsHardLimit};\n  size_t docs_soft_limit_{0UL};\n  uint32_t min_neighbor_cnt_{0u};\n  uint32_t upper_max_neighbor_cnt_{\n      HnswRabitqEntity::kDefaultUpperMaxNeighborCnt};\n  uint32_t l0_max_neighbor_cnt_{HnswRabitqEntity::kDefaultL0MaxNeighborCnt};\n  uint32_t ef_{HnswRabitqEntity::kDefaultEf};\n  uint32_t ef_construction_{HnswRabitqEntity::kDefaultEfConstruction};\n  uint32_t scaling_factor_{HnswRabitqEntity::kDefaultScalingFactor};\n  size_t bruteforce_threshold_{HnswRabitqEntity::kDefaultBruteForceThreshold};\n  size_t max_scan_limit_{HnswRabitqEntity::kDefaultMaxScanLimit};\n  size_t min_scan_limit_{HnswRabitqEntity::kDefaultMinScanLimit};\n  float bf_negative_prob_{HnswRabitqEntity::kDefaultBFNegativeProbability};\n  float max_scan_ratio_{HnswRabitqEntity::kDefaultScanRatio};\n\n  uint32_t magic_{0U};\n  State state_{STATE_INIT};\n  bool bf_enabled_{false};\n  bool check_crc_enabled_{false};\n  bool filter_same_key_{false};\n  bool get_vector_enabled_{false};\n  bool force_padding_topk_enabled_{false};\n  bool use_id_map_{true};\n\n  //! avoid add vector while dumping index\n  ailego::SharedMutex shared_mutex_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"hnsw_rabitq_streamer_entity.h\"\n#include <ailego/utility/memory_helper.h>\n\n// #define DEBUG_PRINT\n\nnamespace zvec {\nnamespace core {\n\nHnswRabitqStreamerEntity::HnswRabitqStreamerEntity(IndexStreamer::Stats &stats)\n    : stats_(stats) {}\n\nHnswRabitqStreamerEntity::~HnswRabitqStreamerEntity() {}\n\nint HnswRabitqStreamerEntity::init(size_t max_doc_cnt) {\n  if (std::pow(scaling_factor(), kMaxGraphLayers) < max_doc_cnt) {\n    LOG_ERROR(\"scalingFactor=%zu is too small\", scaling_factor());\n    return IndexError_InvalidArgument;\n  }\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  broker_ = std::make_shared<HnswRabitqChunkBroker>(stats_);\n  upper_neighbor_index_ = std::make_shared<NIHashMap>();\n  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();\n  keys_map_ = std::make_shared<HashMap<key_t, node_id_t>>();\n  if (!keys_map_ || !upper_neighbor_index_ || !broker_ || !keys_map_lock_) {\n    LOG_ERROR(\"HnswRabitqStreamerEntity new object failed\");\n    return IndexError_NoMemory;\n  }\n  keys_map_->set_empty_key(kInvalidKey);\n\n  neighbor_size_ = neighbors_size();\n  upper_neighbor_size_ = upper_neighbors_size();\n\n  //! vector + key + level 0 neighbors\n  size_t size = vector_size() + sizeof(key_t) + neighbor_size_;\n\n  size = AlignSize(size);\n  set_node_size(size);\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::cleanup() {\n  std::lock_guard<std::mutex> lock(mutex_);\n  mutable_header()->clear();\n  chunk_size_ = kDefaultChunkSize;\n  node_index_mask_bits_ = 0U;\n  node_index_mask_ = 0U;\n  node_cnt_per_chunk_ = 0U;\n  neighbor_size_ = 0U;\n  upper_neighbor_size_ = 0U;\n  if (upper_neighbor_index_) {\n    upper_neighbor_index_->cleanup();\n  }\n  if (keys_map_) {\n    keys_map_->clear();\n  }\n  node_chunks_.clear();\n  upper_neighbor_chunks_.clear();\n  filter_same_key_ = false;\n  get_vector_enabled_ = false;\n  broker_.reset();\n\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::update_neighbors(\n    level_t level, node_id_t id,\n    const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors) {\n  char buffer[neighbor_size_];\n  NeighborsHeader *hd = reinterpret_cast<NeighborsHeader *>(buffer);\n  hd->neighbor_cnt = neighbors.size();\n  size_t i = 0;\n  for (; i < neighbors.size(); ++i) {\n    hd->neighbors[i] = neighbors[i].first;\n  }\n\n  auto loc = get_neighbor_chunk_loc(level, id);\n  size_t size = reinterpret_cast<char *>(&hd->neighbors[i]) - &buffer[0];\n  size_t ret = loc.first->write(loc.second, hd, size);\n  if (ailego_unlikely(ret != size)) {\n    LOG_ERROR(\"Write neighbor header failed, ret=%zu\", ret);\n\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nconst Neighbors HnswRabitqStreamerEntity::get_neighbors(level_t level,\n                                                        node_id_t id) const {\n  Chunk *chunk = nullptr;\n  size_t offset = 0UL;\n  size_t neighbor_size = neighbor_size_;\n  if (level == 0UL) {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    offset =\n        (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);\n\n    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,\n                &node_chunks_);\n    ailego_assert_with(chunk_idx < node_chunks_.size(), \"invalid chunk idx\");\n    chunk = node_chunks_[chunk_idx].get();\n  } else {\n    auto p = get_upper_neighbor_chunk_loc(level, id);\n    chunk = upper_neighbor_chunks_[p.first].get();\n    offset = p.second;\n    neighbor_size = upper_neighbor_size_;\n  }\n\n  ailego_assert_with(offset < chunk->data_size(), \"invalid chunk offset\");\n  IndexStorage::MemoryBlock neighbor_block;\n  size_t size = chunk->read(offset, neighbor_block, neighbor_size);\n  if (ailego_unlikely(size != neighbor_size)) {\n    LOG_ERROR(\"Read neighbor header failed, ret=%zu\", size);\n    return Neighbors();\n  }\n  return Neighbors(std::move(neighbor_block));\n}\n\n//! Get vector data by key\nconst void *HnswRabitqStreamerEntity::get_vector(node_id_t id) const {\n  auto loc = get_vector_chunk_loc(id);\n  const void *vec = nullptr;\n  ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t read_size = vector_size();\n\n  size_t ret = node_chunks_[loc.first]->read(loc.second, &vec, read_size);\n  if (ailego_unlikely(ret != read_size)) {\n    LOG_ERROR(\"Read vector failed, offset=%zu, read size=%zu, ret=%zu\",\n              static_cast<size_t>(loc.second), read_size, ret);\n  }\n\n  return vec;\n}\n\nint HnswRabitqStreamerEntity::get_vector(const node_id_t *ids, uint32_t count,\n                                         const void **vecs) const {\n  for (auto i = 0U; i < count; ++i) {\n    auto loc = get_vector_chunk_loc(ids[i]);\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n\n    size_t read_size = vector_size();\n\n    size_t ret = node_chunks_[loc.first]->read(loc.second, &vecs[i], read_size);\n    if (ailego_unlikely(ret != read_size)) {\n      LOG_ERROR(\"Read vector failed, offset=%zu, read size=%zu, ret=%zu\",\n                static_cast<size_t>(loc.second), read_size, ret);\n      return IndexError_ReadData;\n    }\n  }\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::get_vector(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  auto loc = get_vector_chunk_loc(id);\n  ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t read_size = vector_size();\n\n  size_t ret = node_chunks_[loc.first]->read(loc.second, block, read_size);\n  if (ailego_unlikely(ret != read_size)) {\n    LOG_ERROR(\"Read vector failed, offset=%zu, read size=%zu, ret=%zu\",\n              static_cast<size_t>(loc.second), read_size, ret);\n    return IndexError_ReadData;\n  }\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::get_vector(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {\n  vec_blocks.resize(count);\n  for (auto i = 0U; i < count; ++i) {\n    auto loc = get_vector_chunk_loc(ids[i]);\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n\n    size_t read_size = vector_size();\n\n    size_t ret =\n        node_chunks_[loc.first]->read(loc.second, vec_blocks[i], read_size);\n    if (ailego_unlikely(ret != read_size)) {\n      LOG_ERROR(\"Read vector failed, offset=%zu, read size=%zu, ret=%zu\",\n                static_cast<size_t>(loc.second), read_size, ret);\n      return IndexError_ReadData;\n    }\n  }\n  return 0;\n}\n\nkey_t HnswRabitqStreamerEntity::get_key(node_id_t id) const {\n  if (use_key_info_map_) {\n    auto loc = get_key_chunk_loc(id);\n    IndexStorage::MemoryBlock key_block;\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n    size_t ret =\n        node_chunks_[loc.first]->read(loc.second, key_block, sizeof(key_t));\n    if (ailego_unlikely(ret != sizeof(key_t))) {\n      LOG_ERROR(\"Read vector failed, ret=%zu\", ret);\n      return kInvalidKey;\n    }\n\n    return *reinterpret_cast<const key_t *>(key_block.data());\n  } else {\n    return id;\n  }\n}\n\nvoid HnswRabitqStreamerEntity::add_neighbor(level_t level, node_id_t id,\n                                            uint32_t size,\n                                            node_id_t neighbor_id) {\n  auto loc = get_neighbor_chunk_loc(level, id);\n  size_t offset =\n      loc.second + sizeof(NeighborsHeader) + size * sizeof(node_id_t);\n  ailego_assert_with(size < neighbor_cnt(level), \"invalid neighbor size\");\n  ailego_assert_with(offset < loc.first->data_size(), \"invalid chunk offset\");\n  size_t ret = loc.first->write(offset, &neighbor_id, sizeof(node_id_t));\n  if (ailego_unlikely(ret != sizeof(node_id_t))) {\n    LOG_ERROR(\"Write neighbor id failed, ret=%zu\", ret);\n    return;\n  }\n\n  uint32_t neighbors = size + 1;\n  ret = loc.first->write(loc.second, &neighbors, sizeof(uint32_t));\n  if (ailego_unlikely(ret != sizeof(uint32_t))) {\n    LOG_ERROR(\"Write neighbor cnt failed, ret=%zu\", ret);\n  }\n\n  return;\n}\n\nint HnswRabitqStreamerEntity::init_chunks(const Chunk::Pointer &header_chunk) {\n  if (header_chunk->data_size() < header_size()) {\n    LOG_ERROR(\"Invalid header chunk size\");\n    return IndexError_InvalidFormat;\n  }\n  IndexStorage::MemoryBlock header_block;\n  size_t size = header_chunk->read(0UL, header_block, header_size());\n  if (ailego_unlikely(size != header_size())) {\n    LOG_ERROR(\"Read header chunk failed\");\n    return IndexError_ReadData;\n  }\n  *mutable_header() =\n      *reinterpret_cast<const HNSWHeader *>(header_block.data());\n\n  int ret = check_hnsw_index(&header());\n  if (ret != 0) {\n    broker_->close();\n    return ret;\n  }\n\n  node_chunks_.resize(\n      broker_->get_chunk_cnt(HnswRabitqChunkBroker::CHUNK_TYPE_NODE));\n  for (auto seq = 0UL; seq < node_chunks_.size(); ++seq) {\n    node_chunks_[seq] =\n        broker_->get_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, seq);\n    if (!node_chunks_[seq]) {\n      LOG_ERROR(\"Missing hnsw streamer data chunk %zu th of %zu\", seq,\n                node_chunks_.size());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  upper_neighbor_chunks_.resize(\n      broker_->get_chunk_cnt(HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR));\n  for (auto seq = 0UL; seq < upper_neighbor_chunks_.size(); ++seq) {\n    upper_neighbor_chunks_[seq] = broker_->get_chunk(\n        HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, seq);\n    if (!upper_neighbor_chunks_[seq]) {\n      LOG_ERROR(\"Missing hnsw streamer index chunk %zu th of %zu\", seq,\n                upper_neighbor_chunks_.size());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::open(IndexStorage::Pointer stg,\n                                   uint64_t max_index_size, bool check_crc) {\n  std::lock_guard<std::mutex> lock(mutex_);\n  bool huge_page = stg->isHugePage();\n  LOG_DEBUG(\"huge_page: %d\", (int)huge_page);\n  int ret = init_chunk_params(max_index_size, huge_page);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"init_chunk_params failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  ret = broker_->open(std::move(stg), max_index_size_, chunk_size_, check_crc);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Open index failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  ret = upper_neighbor_index_->init(broker_, upper_neighbor_chunk_size_,\n                                    scaling_factor(), estimate_doc_capacity(),\n                                    kUpperHashMemoryInflateRatio);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Init neighbor hash map failed\");\n    return ret;\n  }\n\n  //! init header\n  auto header_chunk =\n      broker_->get_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_HEADER,\n                         HnswRabitqChunkBroker::kDefaultChunkSeqId);\n  if (!header_chunk) {  // open empty index, create one\n    auto p = broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_HEADER,\n                                  HnswRabitqChunkBroker::kDefaultChunkSeqId,\n                                  header_size());\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc header chunk failed\");\n      return p.first;\n    }\n    size_t size = p.second->write(0UL, &header(), header_size());\n    if (ailego_unlikely(size != header_size())) {\n      LOG_ERROR(\"Write header chunk failed\");\n      return IndexError_WriteData;\n    }\n    return 0;\n  }\n\n  //! Open an exist hnsw index\n  ret = init_chunks(header_chunk);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  //! total docs including features wrote in index but neighbors may not ready\n  node_id_t total_vecs = 0;\n  if (node_chunks_.size() > 0) {\n    size_t last_idx = node_chunks_.size() - 1;\n    auto last_chunk = node_chunks_[last_idx];\n    if (last_chunk->data_size() % node_size()) {\n      LOG_WARN(\"The index may broken\");\n      return IndexError_InvalidFormat;\n    }\n    total_vecs = last_idx * node_cnt_per_chunk_ +\n                 node_chunks_[last_idx]->data_size() / node_size();\n  }\n\n  LOG_INFO(\n      \"Open index, l0NeighborCnt=%zu upperNeighborCnt=%zu \"\n      \"efConstruction=%zu curDocCnt=%u totalVecs=%u maxLevel=%u\",\n      l0_neighbor_cnt(), upper_neighbor_cnt(), ef_construction(), doc_cnt(),\n      total_vecs, cur_max_level());\n  //! try to correct the docCnt if index not fully flushed\n  if (doc_cnt() != total_vecs) {\n    LOG_WARN(\"Index closed abnormally, using totalVecs as curDocCnt\");\n    *mutable_doc_cnt() = total_vecs;\n  }\n  if (filter_same_key_ || get_vector_enabled_) {\n    if (use_key_info_map_) {\n      for (node_id_t id = 0U; id < doc_cnt(); ++id) {\n        if (get_key(id) == kInvalidKey) {\n          continue;\n        }\n        (*keys_map_)[get_key(id)] = id;\n      }\n    }\n  }\n\n  stats_.set_loaded_count(doc_cnt());\n\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::close() {\n  LOG_DEBUG(\"close index\");\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  flush_header();\n  mutable_header()->reset();\n  upper_neighbor_index_->cleanup();\n  keys_map_->clear();\n  header_.clear();\n  node_chunks_.clear();\n  upper_neighbor_chunks_.clear();\n\n  return broker_->close();\n}\n\nint HnswRabitqStreamerEntity::flush(uint64_t checkpoint) {\n  LOG_INFO(\"Flush index, curDocs=%zu\", static_cast<size_t>(doc_cnt()));\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  flush_header();\n  int ret = broker_->flush(checkpoint);\n  if (ret != 0) {\n    return ret;\n  }\n\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"Dump index, curDocs=%zu\", static_cast<size_t>(doc_cnt()));\n\n  //! sort by keys, to support get_vector by key in searcher\n  std::vector<key_t> keys(doc_cnt());\n  for (node_id_t i = 0; i < doc_cnt(); ++i) {\n    keys[i] = get_key(i);\n  }\n\n  //! dump neighbors\n  auto get_level = [&](node_id_t id) {\n    auto it = upper_neighbor_index_->find(id);\n    if (it == upper_neighbor_index_->end()) {\n      return 0U;\n    };\n    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);\n    return meta->level;\n  };\n  auto ret = dump_segments(dumper, keys.data(), get_level);\n  if (ailego_unlikely(ret < 0)) {\n    return ret;\n  }\n  *stats_.mutable_dumped_size() += ret;\n\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::check_hnsw_index(const HNSWHeader *hd) const {\n  if (l0_neighbor_cnt() != hd->l0_neighbor_cnt() ||\n      upper_neighbor_cnt() != hd->upper_neighbor_cnt()) {\n    LOG_ERROR(\"Param neighbor cnt: %zu:%zu mismatch index previous %zu:%zu\",\n              l0_neighbor_cnt(), upper_neighbor_cnt(), hd->l0_neighbor_cnt(),\n              hd->upper_neighbor_cnt());\n    return IndexError_Mismatch;\n  }\n  if (vector_size() != hd->vector_size()) {\n    LOG_ERROR(\"vector size %zu mismatch index previous %zu\", vector_size(),\n              hd->vector_size());\n    return IndexError_Mismatch;\n  }\n  if (ef_construction() != hd->ef_construction()) {\n    LOG_WARN(\"Param efConstruction %zu mismatch index previous %zu\",\n             ef_construction(), hd->ef_construction());\n  }\n  if (scaling_factor() != hd->scaling_factor()) {\n    LOG_WARN(\"Param scalingFactor %zu mismatch index previous %zu\",\n             scaling_factor(), hd->scaling_factor());\n    return IndexError_Mismatch;\n  }\n  if (prune_cnt() != hd->neighbor_prune_cnt()) {\n    LOG_WARN(\"Param pruneCnt %zu mismatch index previous %zu\", prune_cnt(),\n             hd->neighbor_prune_cnt());\n    return IndexError_Mismatch;\n  }\n  if ((hd->entry_point() != kInvalidNodeId &&\n       hd->entry_point() >= hd->doc_cnt()) ||\n      (hd->entry_point() == kInvalidNodeId && hd->doc_cnt() > 0U)) {\n    LOG_WARN(\"Invalid entryPoint %zu, docCnt %zu\",\n             static_cast<size_t>(hd->entry_point()),\n             static_cast<size_t>(hd->doc_cnt()));\n    return IndexError_InvalidFormat;\n  }\n  if (hd->entry_point() == kInvalidNodeId &&\n      broker_->get_chunk_cnt(HnswRabitqChunkBroker::CHUNK_TYPE_NODE) > 0) {\n    LOG_WARN(\"The index is broken, maybe it haven't flush\");\n    return IndexError_InvalidFormat;\n  }\n\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::add_vector(level_t level, key_t key,\n                                         const void *vec, node_id_t *id) {\n  Chunk::Pointer node_chunk;\n  size_t chunk_offset = -1UL;\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  // duplicate check\n  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {\n    LOG_WARN(\"Try to add duplicate key, ignore it\");\n    return IndexError_Duplicate;\n  }\n\n  node_id_t local_id = static_cast<node_id_t>(doc_cnt());\n  uint32_t chunk_index = node_chunks_.size() - 1U;\n  if (chunk_index == -1U ||\n      (node_chunks_[chunk_index]->data_size() >=\n       node_cnt_per_chunk_ * node_size())) {  // no space left and need to alloc\n    if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      return IndexError_IndexFull;\n    }\n    chunk_index++;\n    auto p = broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_NODE,\n                                  chunk_index, chunk_size_);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return p.first;\n    }\n    node_chunk = p.second;\n    chunk_offset = 0UL;\n    node_chunks_.emplace_back(node_chunk);\n  } else {\n    node_chunk = node_chunks_[chunk_index];\n    chunk_offset = node_chunk->data_size();\n  }\n\n  size_t size = node_chunk->write(chunk_offset, vec, vector_size());\n  if (ailego_unlikely(size != vector_size())) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));\n  if (ailego_unlikely(size != sizeof(key_t))) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n  //! level 0 neighbors is inited to zero by default\n\n  int ret = add_upper_neighbor(level, local_id);\n  if (ret != 0) {\n    return ret;\n  }\n\n  chunk_offset += node_size();\n  if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n    LOG_ERROR(\"Chunk resize to %zu failed\", chunk_offset);\n    return IndexError_Runtime;\n  }\n  if (filter_same_key_ || get_vector_enabled_) {\n    if (use_key_info_map_) {\n      keys_map_lock_->lock();\n      (*keys_map_)[key] = local_id;\n      keys_map_lock_->unlock();\n    }\n  }\n\n  *mutable_doc_cnt() += 1;\n  broker_->mark_dirty();\n  *id = local_id;\n\n  return 0;\n}\n\nint HnswRabitqStreamerEntity::add_vector_with_id(level_t level, node_id_t id,\n                                                 const void *vec) {\n  Chunk::Pointer node_chunk;\n  size_t chunk_offset = -1UL;\n  key_t key = id;\n\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  // duplicate check\n  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {\n    LOG_WARN(\"Try to add duplicate key, ignore it\");\n    return IndexError_Duplicate;\n  }\n\n  // set node_chunk & chunk_offset if succeed\n  auto func_get_node_chunk_and_offset = [&](node_id_t node_id) -> int {\n    uint32_t chunk_index = node_id >> node_index_mask_bits_;\n    ailego_assert_with(chunk_index <= node_chunks_.size(), \"invalid chunk idx\");\n    // belongs to next chunk\n    if (chunk_index == node_chunks_.size()) {\n      if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {\n        LOG_ERROR(\"add vector failed for no memory quota\");\n        return IndexError_IndexFull;\n      }\n      auto p = broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_NODE,\n                                    chunk_index, chunk_size_);\n      if (ailego_unlikely(p.first != 0)) {\n        LOG_ERROR(\"Alloc data chunk failed\");\n        return p.first;\n      }\n      node_chunk = p.second;\n      node_chunks_.emplace_back(node_chunk);\n    }\n\n    node_chunk = node_chunks_[chunk_index];\n    chunk_offset = (node_id & node_index_mask_) * node_size();\n    return 0;\n  };\n\n  for (size_t start_id = doc_cnt(); start_id < id; ++start_id) {\n    if (auto ret = func_get_node_chunk_and_offset(start_id); ret != 0) {\n      LOG_ERROR(\"func_get_node_chunk_and_offset failed\");\n      return ret;\n    }\n    size_t size = node_chunk->write(chunk_offset + vector_size(), &kInvalidKey,\n                                    sizeof(key_t));\n    if (ailego_unlikely(size != sizeof(key_t))) {\n      LOG_ERROR(\"Chunk write key failed, ret=%zu\", size);\n      return IndexError_WriteData;\n    }\n\n    chunk_offset += node_size();\n    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"Chunk resize to %zu failed\", chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  if (auto ret = func_get_node_chunk_and_offset(id); ret != 0) {\n    LOG_ERROR(\"func_get_node_chunk_and_offset failed\");\n    return ret;\n  }\n\n  size_t size = node_chunk->write(chunk_offset, vec, vector_size());\n  if (ailego_unlikely(size != vector_size())) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));\n  if (ailego_unlikely(size != sizeof(key_t))) {\n    LOG_ERROR(\"Chunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n  //! level 0 neighbors is inited to zero by default\n\n  int ret = add_upper_neighbor(level, id);\n  if (ret != 0) {\n    return ret;\n  }\n\n  if (*mutable_doc_cnt() <= id) {\n    *mutable_doc_cnt() = id + 1;\n    chunk_offset += node_size();\n    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"Chunk resize to %zu failed\", chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  if (filter_same_key_ || get_vector_enabled_) {\n    if (use_key_info_map_) {\n      keys_map_lock_->lock();\n      (*keys_map_)[key] = id;\n      keys_map_lock_->unlock();\n    }\n  }\n\n  broker_->mark_dirty();\n\n  return 0;\n}\n\nvoid HnswRabitqStreamerEntity::update_ep_and_level(node_id_t ep,\n                                                   level_t level) {\n  HnswRabitqEntity::update_ep_and_level(ep, level);\n  flush_header();\n\n  return;\n}\n\nconst HnswRabitqEntity::Pointer HnswRabitqStreamerEntity::clone() const {\n  std::vector<Chunk::Pointer> node_chunks;\n  node_chunks.reserve(node_chunks_.size());\n  for (size_t i = 0UL; i < node_chunks_.size(); ++i) {\n    node_chunks.emplace_back(node_chunks_[i]->clone());\n    if (ailego_unlikely(!node_chunks[i])) {\n      LOG_ERROR(\"HnswRabitqStreamerEntity get chunk failed in clone\");\n      return HnswRabitqEntity::Pointer();\n    }\n  }\n\n  std::vector<Chunk::Pointer> upper_neighbor_chunks;\n  upper_neighbor_chunks.reserve(upper_neighbor_chunks_.size());\n  for (size_t i = 0UL; i < upper_neighbor_chunks_.size(); ++i) {\n    upper_neighbor_chunks.emplace_back(upper_neighbor_chunks_[i]->clone());\n    if (ailego_unlikely(!upper_neighbor_chunks[i])) {\n      LOG_ERROR(\"HnswRabitqStreamerEntity get chunk failed in clone\");\n      return HnswRabitqEntity::Pointer();\n    }\n  }\n\n  HnswRabitqStreamerEntity *entity =\n      new (std::nothrow) HnswRabitqStreamerEntity(\n          stats_, header(), chunk_size_, node_index_mask_bits_,\n          upper_neighbor_mask_bits_, filter_same_key_, get_vector_enabled_,\n          upper_neighbor_index_, keys_map_lock_, keys_map_, use_key_info_map_,\n          std::move(node_chunks), std::move(upper_neighbor_chunks), broker_);\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"HnswRabitqStreamerEntity new failed\");\n  }\n  return HnswRabitqEntity::Pointer(entity);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <iostream>\n#include <ailego/parallel/lock.h>\n#include <sparsehash/dense_hash_map>\n#include <sparsehash/dense_hash_set>\n#include <zvec/ailego/container/heap.h>\n#include \"zvec/core/framework/index_framework.h\"\n#include \"hnsw_rabitq_chunk.h\"\n#include \"hnsw_rabitq_entity.h\"\n#include \"hnsw_rabitq_index_hash.h\"\n#include \"hnsw_rabitq_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! HnswRabitqStreamerEntity manage vector data, pkey, and node's neighbors\nclass HnswRabitqStreamerEntity : public HnswRabitqEntity {\n public:\n  //! Cleanup\n  //! return 0 on success, or errCode in failure\n  virtual int cleanup() override;\n\n  //! Make a copy of streamer entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswRabitqEntity::Pointer clone() const override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector(node_id_t id) const override;\n\n  //! Get vectors feature data by local ids\n  virtual int get_vector(const node_id_t *ids, uint32_t count,\n                         const void **vecs) const override;\n\n  virtual int get_vector(const node_id_t id,\n                         IndexStorage::MemoryBlock &block) const override;\n\n  virtual int get_vector(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;\n\n  //! Get the node id's neighbors on graph level\n  //! Note: the neighbors cannot be modified, using the following\n  //! method to get WritableNeighbors if want to\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  //! Add vector and key to hnsw entity, and local id will be saved in id\n  virtual int add_vector(level_t level, key_t key, const void *vec,\n                         node_id_t *id) override;\n\n  //! Add vector and id to hnsw entity\n  virtual int add_vector_with_id(level_t level, node_id_t id,\n                                 const void *vec) override;\n\n  virtual int update_neighbors(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors)\n      override;\n\n  //! Append neighbor_id to node id neighbors on level\n  //! Notice: the caller must be ensure the neighbors not full\n  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,\n                            node_id_t neighbor_id) override;\n\n  //! Dump index by dumper\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  virtual void update_ep_and_level(node_id_t ep, level_t level) override;\n\n  void set_use_key_info_map(bool use_id_map) {\n    use_key_info_map_ = use_id_map;\n    LOG_DEBUG(\"use_key_info_map_: %d\", (int)use_key_info_map_);\n  }\n\n public:\n  //! Constructor\n  HnswRabitqStreamerEntity(IndexStreamer::Stats &stats);\n\n  //! Destructor\n  ~HnswRabitqStreamerEntity();\n\n  //! Get vector feature data by key\n  virtual const void *get_vector_by_key(key_t key) const override {\n    auto id = get_id(key);\n    return id == kInvalidNodeId ? nullptr : get_vector(id);\n  }\n\n  virtual int get_vector_by_key(\n      const key_t key, IndexStorage::MemoryBlock &block) const override {\n    auto id = get_id(key);\n    if (id != kInvalidNodeId) {\n      return get_vector(id, block);\n    } else {\n      return IndexError_InvalidArgument;\n    }\n  }\n\n  //! Init entity\n  int init(size_t max_doc_cnt);\n\n  //! Flush graph entity to disk\n  //! return 0 on success, or errCode in failure\n  int flush(uint64_t checkpoint);\n\n  //! Open entity from storage\n  //! return 0 on success, or errCode in failure\n  int open(IndexStorage::Pointer stg, uint64_t max_index_size, bool check_crc);\n\n  //! Close entity\n  //! return 0 on success, or errCode in failure\n  int close();\n\n  //! Set meta information from entity\n  int set_index_meta(const IndexMeta &meta) const {\n    return IndexHelper::SerializeToStorage(meta, broker_->storage().get());\n  }\n\n  //! Get meta information from entity\n  int get_index_meta(IndexMeta *meta) const {\n    return IndexHelper::DeserializeFromStorage(broker_->storage().get(), meta);\n  }\n\n  //! Set params: chunk size\n  inline void set_chunk_size(size_t val) {\n    chunk_size_ = val;\n  }\n\n  //! Set params\n  inline void set_filter_same_key(bool val) {\n    filter_same_key_ = val;\n  }\n\n  //! Set params\n  inline void set_get_vector(bool val) {\n    get_vector_enabled_ = val;\n  }\n\n  //! Get vector local id by key\n  inline node_id_t get_id(key_t key) const {\n    if (use_key_info_map_) {\n      keys_map_lock_->lock_shared();\n      auto it = keys_map_->find(key);\n      keys_map_lock_->unlock_shared();\n      return it == keys_map_->end() ? kInvalidNodeId : it->second;\n    } else {\n      return key;\n    }\n  }\n\n  void print_key_map() const {\n    std::cout << \"key map begins\" << std::endl;\n\n    auto iter = keys_map_->begin();\n    while (iter != keys_map_->end()) {\n      std::cout << \"key: \" << iter->first << \", id: \" << iter->second\n                << std::endl;\n      ;\n      iter++;\n    }\n\n    std::cout << \"key map ends\" << std::endl;\n  }\n\n  //! Get l0 neighbors size\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! Get neighbors size for level > 0\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n\n private:\n  union UpperNeighborIndexMeta {\n    struct {\n      uint32_t level : 4;\n      uint32_t index : 28;  // index is composite type: chunk idx, and the\n                            // N th neighbors in chunk, they two composite\n                            // the 28 bits location\n    };\n    uint32_t data;\n  };\n\n  template <class Key, class T>\n  using HashMap = google::dense_hash_map<Key, T, std::hash<Key>>;\n  template <class Key, class T>\n  using HashMapPointer = std::shared_ptr<HashMap<Key, T>>;\n\n  template <class Key>\n  using HashSet = google::dense_hash_set<Key, std::hash<Key>>;\n  template <class Key>\n  using HashSetPointer = std::shared_ptr<HashSet<Key>>;\n\n  //! upper neighbor index hashmap\n  using NIHashMap = HnswIndexHashMap<node_id_t, uint32_t>;\n  using NIHashMapPointer = std::shared_ptr<NIHashMap>;\n\n  //! Private construct, only be called by clone method\n  HnswRabitqStreamerEntity(IndexStreamer::Stats &stats, const HNSWHeader &hd,\n                           size_t chunk_size, uint32_t node_index_mask_bits,\n                           uint32_t upper_neighbor_mask_bits,\n                           bool filter_same_key, bool get_vector_enabled,\n                           const NIHashMapPointer &upper_neighbor_index,\n                           std::shared_ptr<ailego::SharedMutex> &keys_map_lock,\n                           const HashMapPointer<key_t, node_id_t> &keys_map,\n                           bool use_key_info_map,\n                           std::vector<Chunk::Pointer> &&node_chunks,\n                           std::vector<Chunk::Pointer> &&upper_neighbor_chunks,\n                           const HnswRabitqChunkBroker::Pointer &broker)\n      : stats_(stats),\n        chunk_size_(chunk_size),\n        node_index_mask_bits_(node_index_mask_bits),\n        node_cnt_per_chunk_(1UL << node_index_mask_bits_),\n        node_index_mask_(node_cnt_per_chunk_ - 1),\n        upper_neighbor_mask_bits_(upper_neighbor_mask_bits),\n        upper_neighbor_mask_((1U << upper_neighbor_mask_bits_) - 1),\n        filter_same_key_(filter_same_key),\n        get_vector_enabled_(get_vector_enabled),\n        use_key_info_map_(use_key_info_map),\n        upper_neighbor_index_(upper_neighbor_index),\n        keys_map_lock_(keys_map_lock),\n        keys_map_(keys_map),\n        node_chunks_(std::move(node_chunks)),\n        upper_neighbor_chunks_(std::move(upper_neighbor_chunks)),\n        broker_(broker) {\n    *mutable_header() = hd;\n\n    neighbor_size_ = neighbors_size();\n    upper_neighbor_size_ = upper_neighbors_size();\n  }\n\n  //! Called only in searching procedure per context, so no need to lock\n  void sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE type, size_t idx,\n                   std::vector<Chunk::Pointer> *chunks) const {\n    if (ailego_likely(idx < chunks->size())) {\n      return;\n    }\n    for (size_t i = chunks->size(); i <= idx; ++i) {\n      auto chunk = broker_->get_chunk(type, i);\n      // the storage can ensure get chunk will success after the first get\n      ailego_assert_with(!!chunk, \"get chunk failed\");\n      chunks->emplace_back(std::move(chunk));\n    }\n  }\n\n  //! return pair: chunk index + chunk offset\n  inline std::pair<uint32_t, uint32_t> get_vector_chunk_loc(\n      node_id_t id) const {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    uint32_t offset = (id & node_index_mask_) * node_size();\n\n    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,\n                &node_chunks_);\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  //! return pair: chunk index + chunk offset\n  inline std::pair<uint32_t, uint32_t> get_key_chunk_loc(node_id_t id) const {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    uint32_t offset = (id & node_index_mask_) * node_size() + vector_size();\n\n    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,\n                &node_chunks_);\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  inline std::pair<uint32_t, uint32_t> get_upper_neighbor_chunk_loc(\n      level_t level, node_id_t id) const {\n    auto it = upper_neighbor_index_->find(id);\n    ailego_assert_abort(it != upper_neighbor_index_->end(),\n                        \"Get upper neighbor header failed\");\n    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);\n    uint32_t chunk_idx = (meta->index) >> upper_neighbor_mask_bits_;\n    uint32_t offset = (((meta->index) & upper_neighbor_mask_) + level - 1) *\n                      upper_neighbor_size_;\n    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, chunk_idx,\n                &upper_neighbor_chunks_);\n    ailego_assert_abort(chunk_idx < upper_neighbor_chunks_.size(),\n                        \"invalid chunk idx\");\n    ailego_assert_abort(offset < upper_neighbor_chunks_[chunk_idx]->data_size(),\n                        \"invalid chunk offset\");\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  //! return pair: chunk + chunk offset\n  inline std::pair<Chunk *, size_t> get_neighbor_chunk_loc(level_t level,\n                                                           node_id_t id) const {\n    if (level == 0UL) {\n      uint32_t chunk_idx = id >> node_index_mask_bits_;\n      uint32_t offset =\n          (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);\n\n      sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,\n                  &node_chunks_);\n      ailego_assert_abort(chunk_idx < node_chunks_.size(), \"invalid chunk idx\");\n      ailego_assert_abort(offset < node_chunks_[chunk_idx]->data_size(),\n                          \"invalid chunk offset\");\n      return std::make_pair(node_chunks_[chunk_idx].get(), offset);\n    } else {\n      auto p = get_upper_neighbor_chunk_loc(level, id);\n      return std::make_pair(upper_neighbor_chunks_[p.first].get(), p.second);\n    }\n  }\n\n  //! Chunk hnsw index valid\n  int check_hnsw_index(const HNSWHeader *hd) const;\n\n  size_t get_total_upper_neighbors_size(level_t level) const {\n    return level * upper_neighbor_size_;\n  }\n\n  //! Add upper neighbor header and reserve space for upper neighbor\n  int add_upper_neighbor(level_t level, node_id_t id) {\n    if (level == 0) {\n      return 0;\n    }\n    Chunk::Pointer chunk;\n    uint64_t chunk_offset = -1UL;\n    size_t neighbors_size = get_total_upper_neighbors_size(level);\n    uint64_t chunk_index = upper_neighbor_chunks_.size() - 1UL;\n    if (chunk_index == -1UL ||\n        (upper_neighbor_chunks_[chunk_index]->padding_size() <\n         neighbors_size)) {  // no space left and need to alloc\n      chunk_index++;\n      if (ailego_unlikely(upper_neighbor_chunks_.capacity() ==\n                          upper_neighbor_chunks_.size())) {\n        LOG_ERROR(\"add upper neighbor failed for no memory quota\");\n        return IndexError_IndexFull;\n      }\n      auto p =\n          broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR,\n                               chunk_index, upper_neighbor_chunk_size_);\n      if (ailego_unlikely(p.first != 0)) {\n        LOG_ERROR(\"Alloc data chunk failed\");\n        return p.first;\n      }\n      chunk = p.second;\n      chunk_offset = 0UL;\n      upper_neighbor_chunks_.emplace_back(chunk);\n    } else {\n      chunk = upper_neighbor_chunks_[chunk_index];\n      chunk_offset = chunk->data_size();\n    }\n    ailego_assert_with((size_t)level < kMaxGraphLayers, \"invalid level\");\n    ailego_assert_with(chunk_offset % upper_neighbor_size_ == 0,\n                       \"invalid offset\");\n    ailego_assert_with((chunk_offset / upper_neighbor_size_) <\n                           (1U << upper_neighbor_mask_bits_),\n                       \"invalid offset\");\n    ailego_assert_with(chunk_index < (1U << (28 - upper_neighbor_mask_bits_)),\n                       \"invalid chunk index\");\n    UpperNeighborIndexMeta meta;\n    meta.level = level;\n    meta.index = (chunk_index << upper_neighbor_mask_bits_) |\n                 (chunk_offset / upper_neighbor_size_);\n    chunk_offset += upper_neighbor_size_ * level;\n    if (ailego_unlikely(!upper_neighbor_index_->insert(id, meta.data))) {\n      LOG_ERROR(\"HashMap insert value failed\");\n      return IndexError_Runtime;\n    }\n\n    if (ailego_unlikely(chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"Chunk resize to %zu failed\", (size_t)chunk_offset);\n      return IndexError_Runtime;\n    }\n\n    return 0;\n  }\n\n  size_t estimate_doc_capacity() const {\n    return node_chunks_.capacity() * node_cnt_per_chunk_;\n  }\n\n  int init_chunk_params(size_t max_index_size, bool huge_page) {\n    node_cnt_per_chunk_ = std::max<uint32_t>(1, chunk_size_ / node_size());\n    //! align node cnt per chunk to pow of 2\n    node_index_mask_bits_ = std::ceil(std::log2(node_cnt_per_chunk_));\n    node_cnt_per_chunk_ = 1UL << node_index_mask_bits_;\n    if (huge_page) {\n      chunk_size_ = AlignHugePageSize(node_cnt_per_chunk_ * node_size());\n    } else {\n      chunk_size_ = AlignPageSize(node_cnt_per_chunk_ * node_size());\n    }\n    node_index_mask_ = node_cnt_per_chunk_ - 1;\n\n    if (max_index_size == 0UL) {\n      max_index_size_ = chunk_size_ * kDefaultMaxChunkCnt;\n    } else {\n      max_index_size_ = max_index_size;\n    }\n\n    //! To get a balanced upper neighbor chunk size.\n    //! If the upper chunk size is equal to node chunk size, it may waste\n    //! upper neighbor chunk space; if the upper neighbor chunk size is too\n    //! small, the will need large upper neighbor chunks index space. So to\n    //! get a balanced ratio be sqrt of the node/neighbor size ratio\n    float ratio =\n        std::sqrt(node_size() * scaling_factor() * 1.0f / upper_neighbor_size_);\n    if (huge_page) {\n      upper_neighbor_chunk_size_ = AlignHugePageSize(\n          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),\n                   static_cast<size_t>(chunk_size_ / ratio)));\n    } else {\n      upper_neighbor_chunk_size_ = AlignPageSize(\n          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),\n                   static_cast<size_t>(chunk_size_ / ratio)));\n    }\n    upper_neighbor_mask_bits_ =\n        std::ceil(std::log2(upper_neighbor_chunk_size_ / upper_neighbor_size_));\n    upper_neighbor_mask_ = (1 << upper_neighbor_mask_bits_) - 1;\n\n    size_t max_node_chunk_cnt = std::ceil(max_index_size_ / chunk_size_);\n    size_t max_upper_chunk_cnt = std::ceil(\n        (max_node_chunk_cnt * node_cnt_per_chunk_ * 1.0f / scaling_factor()) /\n        (upper_neighbor_chunk_size_ / upper_neighbor_size_));\n    max_upper_chunk_cnt =\n        max_upper_chunk_cnt + std::ceil(max_upper_chunk_cnt / scaling_factor());\n\n    //! reserve space to avoid memmove in chunks vector emplace chunk, so\n    //! as to lock-free in reading chunk\n    node_chunks_.reserve(max_node_chunk_cnt);\n    upper_neighbor_chunks_.reserve(max_upper_chunk_cnt);\n\n    LOG_DEBUG(\n        \"Settings: nodeSize=%zu chunkSize=%u upperNeighborSize=%u \"\n        \"upperNeighborChunkSize=%u \"\n        \"nodeCntPerChunk=%u maxChunkCnt=%zu maxNeighborChunkCnt=%zu \"\n        \"maxIndexSize=%zu ratio=%.3f\",\n        node_size(), chunk_size_, upper_neighbor_size_,\n        upper_neighbor_chunk_size_, node_cnt_per_chunk_, max_node_chunk_cnt,\n        max_upper_chunk_cnt, max_index_size_, ratio);\n\n    return 0;\n  }\n\n  //! Init node chunk and neighbor chunks\n  int init_chunks(const Chunk::Pointer &header_chunk);\n\n  int flush_header(void) {\n    if (!broker_->dirty()) {\n      // do not need to flush\n      return 0;\n    }\n    auto header_chunk =\n        broker_->get_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_HEADER,\n                           HnswRabitqChunkBroker::kDefaultChunkSeqId);\n    if (ailego_unlikely(!header_chunk)) {\n      LOG_ERROR(\"get header chunk failed\");\n      return IndexError_Runtime;\n    }\n    size_t size = header_chunk->write(0UL, &header(), header_size());\n    if (ailego_unlikely(size != header_size())) {\n      LOG_ERROR(\"Write header chunk failed\");\n      return IndexError_WriteData;\n    }\n\n    return 0;\n  }\n\n private:\n  HnswRabitqStreamerEntity(const HnswRabitqStreamerEntity &) = delete;\n  HnswRabitqStreamerEntity &operator=(const HnswRabitqStreamerEntity &) =\n      delete;\n  static constexpr uint64_t kUpperHashMemoryInflateRatio = 2.0f;\n\n private:\n  IndexStreamer::Stats &stats_;\n  HNSWHeader header_{};\n  std::mutex mutex_{};\n  size_t max_index_size_{0UL};\n  uint32_t chunk_size_{kDefaultChunkSize};\n  uint32_t upper_neighbor_chunk_size_{kDefaultChunkSize};\n  uint32_t node_index_mask_bits_{0U};\n  uint32_t node_cnt_per_chunk_{0U};\n  uint32_t node_index_mask_{0U};\n  uint32_t neighbor_size_{0U};\n  uint32_t upper_neighbor_size_{0U};\n  //! UpperNeighborIndex.index composite chunkIdx and offset in chunk by the\n  //! following mask\n  uint32_t upper_neighbor_mask_bits_{0U};\n  uint32_t upper_neighbor_mask_{0U};\n  bool filter_same_key_{false};\n  bool get_vector_enabled_{false};\n  bool use_key_info_map_{true};\n\n  NIHashMapPointer upper_neighbor_index_{};\n\n  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};\n  HashMapPointer<key_t, node_id_t> keys_map_{};\n\n  //! the chunks will be changed in searcher, so need mutable\n  //! data chunk include: vector, key, level 0 neighbors\n  mutable std::vector<Chunk::Pointer> node_chunks_{};\n\n  //! upper neighbor chunk inlude: UpperNeighborHeader + (1~level) neighbors\n  mutable std::vector<Chunk::Pointer> upper_neighbor_chunks_{};\n\n  HnswRabitqChunkBroker::Pointer broker_{};  // chunk broker\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/rabitq_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"rabitq_converter.h\"\n#include <cstring>\n#include <memory>\n#include <rabitqlib/utils/rotator.hpp>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include \"ailego/pattern/defer.h\"\n#include \"algorithm/hnsw_rabitq/rabitq_reformer.h\"\n#include \"zvec/core/framework/index_cluster.h\"\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_features.h\"\n#include \"zvec/core/framework/index_holder.h\"\n#include \"zvec/core/framework/index_memory.h\"\n#include \"zvec/core/framework/index_meta.h\"\n#include \"rabitq_params.h\"\n#include \"rabitq_utils.h\"\n\n#ifdef _MSC_VER\n#define strncasecmp _strnicmp\n#endif\n\nnamespace zvec {\nnamespace core {\n\nRabitqConverter::~RabitqConverter() {\n  this->cleanup();\n}\n\nint RabitqConverter::init(const IndexMeta &meta, const ailego::Params &params) {\n  // Copy meta and ensure it has metric information\n  meta_ = meta;\n  dimension_ = meta.dimension();\n\n  if (meta_.metric_name().empty()) {\n    LOG_ERROR(\"Meta metric is empty\");\n    return IndexError_InvalidArgument;\n  }\n\n  // Round up dimension to multiple of 64\n  padded_dim_ = ((dimension_ + 63) / 64) * 64;\n\n  // Get RaBitQ parameters with defaults\n  uint32_t total_bits = 0;\n  params.get(PARAM_RABITQ_TOTAL_BITS, &total_bits);\n  if (total_bits == 0) {\n    total_bits = kDefaultRabitqTotalBits;\n  }\n  if (total_bits < 1 || total_bits > 9) {\n    LOG_ERROR(\"Invalid total_bits: %zu, must be in [1, 9]\", (size_t)total_bits);\n    return IndexError_InvalidArgument;\n  }\n  ex_bits_ = total_bits - 1;\n\n  params.get(PARAM_RABITQ_NUM_CLUSTERS, &num_clusters_);\n  if (num_clusters_ == 0) {\n    num_clusters_ = kDefaultNumClusters;\n  }\n\n  if (ex_bits_ > 8) {\n    LOG_ERROR(\"Invalid ex_bits: %zu, must be <= 8\", ex_bits_);\n    return IndexError_InvalidArgument;\n  }\n\n  if (meta.data_type() != IndexMeta::DataType::DT_FP32) {\n    LOG_ERROR(\"RaBitQ only supports FP32 data type\");\n    return IndexError_Unsupported;\n  }\n  params.get(PARAM_RABITQ_SAMPLE_COUNT, &sample_count_);\n\n  std::string rotator_type_str;\n  params.get(PARAM_RABITQ_ROTATOR_TYPE, &rotator_type_str);\n  if (rotator_type_str.empty()) {\n    rotator_type_ = rabitqlib::RotatorType::FhtKacRotator;\n  } else if (strncasecmp(rotator_type_str.c_str(), \"fht\", 3) == 0) {\n    rotator_type_ = rabitqlib::RotatorType::FhtKacRotator;\n  } else if (strncasecmp(rotator_type_str.c_str(), \"matrix\", 6) == 0) {\n    rotator_type_ = rabitqlib::RotatorType::MatrixRotator;\n  } else {\n    LOG_ERROR(\"Invalid rotator_type: %s\", rotator_type_str.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  // Create rotator\n  rotator_.reset(\n      rabitqlib::choose_rotator<float>(dimension_, rotator_type_, padded_dim_));\n\n  LOG_INFO(\n      \"RabitqConverter initialized: dim=%zu, padded_dim=%zu, \"\n      \"num_clusters=%zu, ex_bits=%zu, rotator_type=%d[%s] sample_count[%zu]\",\n      dimension_, padded_dim_, num_clusters_, ex_bits_, (int)rotator_type_,\n      rotator_type_str.c_str(), sample_count_);\n\n  return 0;\n}\n\nint RabitqConverter::cleanup() {\n  centroids_.clear();\n  rotated_centroids_.clear();\n  result_holder_.reset();\n  rotator_.reset();\n  return 0;\n}\n\nint RabitqConverter::train(IndexHolder::Pointer holder) {\n  if (!holder) {\n    LOG_ERROR(\"Null holder for training\");\n    return IndexError_InvalidArgument;\n  }\n\n  ailego::ElapsedTime timer;\n\n  size_t vector_count = holder->count();\n  if (vector_count == 0) {\n    LOG_ERROR(\"No vectors for training\");\n    return IndexError_InvalidArgument;\n  }\n\n  // do sampling from all data\n  size_t sample_count = vector_count;\n  if (sample_count_ > 0) {\n    sample_count = std::min(sample_count_, vector_count);\n  }\n  LOG_INFO(\"Training with %zu vectors from %zu of holder\", sample_count,\n           vector_count);\n  auto sampler = std::make_shared<SampleIndexFeatures<CompactIndexFeatures>>(\n      meta_, sample_count);\n  auto iter = holder->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Create iterator error\");\n    return IndexError_Runtime;\n  }\n  for (; iter->is_valid(); iter->next()) {\n    sampler->emplace(iter->data());\n  }\n\n  // Holder is not needed, cleanup it.\n  holder.reset();\n\n  if (sampler->count() == 0) {\n    LOG_ERROR(\"Load training data error\");\n    return IndexError_InvalidLength;\n  }\n\n\n  // Create KmeansCluster for training centroids\n  auto cluster = IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  if (!cluster) {\n    LOG_ERROR(\"Failed to create OptKmeansCluster\");\n    return IndexError_NoExist;\n  }\n\n  // Initialize cluster\n  LOG_INFO(\n      \"Initializing KmeansCluster with meta: dim=%u, data_type=%d, metric=%s\",\n      meta_.dimension(), (int)meta_.data_type(), meta_.metric_name().c_str());\n  ailego::Params cluster_params;\n  int ret = cluster->init(meta_, cluster_params);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to initialize KmeansCluster: %d\", ret);\n    return ret;\n  }\n\n  ret = cluster->mount(sampler);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to mount training data: %d\", ret);\n    return ret;\n  }\n  cluster->suggest(num_clusters_);\n\n  // Perform clustering\n  IndexCluster::CentroidList cents;\n  // TODO: support specify threads with argument\n  auto threads = std::make_shared<SingleQueueIndexThreads>(0, false);\n  ret = cluster->cluster(threads, cents);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to perform clustering: %d\", ret);\n    return ret;\n  }\n\n  if (cents.size() != num_clusters_) {\n    LOG_WARN(\"Expected %zu clusters, got %zu\", num_clusters_, cents.size());\n    num_clusters_ = cents.size();\n  }\n  // Extract original centroids (for LinearSeeker query)\n  centroids_.resize(num_clusters_ * dimension_);\n  // Extract rotated centroids (for quantization)\n  rotated_centroids_.resize(num_clusters_ * padded_dim_);\n  for (uint32_t i = 0; i < num_clusters_; ++i) {\n    const float *cent_data = static_cast<const float *>(cents[i].feature());\n    // Save original centroids\n    std::memcpy(&centroids_[i * dimension_], cent_data,\n                dimension_ * sizeof(float));\n    // Save rotated centroids\n    this->rotator_->rotate(cent_data, &rotated_centroids_[i * padded_dim_]);\n  }\n\n  stats_.set_trained_count(sampler->count());\n  stats_.set_trained_costtime(timer.milli_seconds());\n\n  LOG_INFO(\"Training completed: %zu centroids, cost %zu ms\", num_clusters_,\n           static_cast<size_t>(timer.milli_seconds()));\n\n  return 0;\n}\n\n\nint RabitqConverter::transform(IndexHolder::Pointer holder) {\n  if (!holder) {\n    LOG_ERROR(\"Null holder for transformation\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (rotated_centroids_.empty()) {\n    LOG_ERROR(\"Centroids not trained yet\");\n    return IndexError_NoReady;\n  }\n\n  LOG_ERROR(\"Not implemented\");\n  return IndexError_NotImplemented;\n}\n\nint RabitqConverter::dump(const IndexDumper::Pointer &dumper) {\n  if (!dumper) {\n    LOG_ERROR(\"Null dumper\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (rotated_centroids_.empty() || centroids_.empty()) {\n    LOG_ERROR(\"No centroids to dump\");\n    return IndexError_NoReady;\n  }\n\n  ailego::ElapsedTime timer;\n  size_t dumped_size = 0;\n\n  int ret = dump_rabitq_centroids(\n      dumper, dimension_, padded_dim_, ex_bits_, num_clusters_, rotator_type_,\n      rotated_centroids_, centroids_, rotator_, &dumped_size);\n  if (ret != 0) {\n    return ret;\n  }\n\n  stats_.set_dumped_size(dumped_size);\n  stats_.set_dumped_costtime(timer.milli_seconds());\n\n  LOG_INFO(\"Dump completed: %zu bytes, cost %zu ms\", stats_.dumped_size(),\n           static_cast<size_t>(timer.milli_seconds()));\n  return 0;\n}\n\nint RabitqConverter::to_reformer(IndexReformer::Pointer *reformer) {\n  auto memory_dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  memory_dumper->init(ailego::Params());\n  std::string file_id = ailego::StringHelper::Concat(\n      \"rabitq_converter_\", ailego::Monotime::MilliSeconds(), rand());\n  int ret = memory_dumper->create(file_id);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to create memory dumper: %d\", ret);\n    return ret;\n  }\n  // Release memory\n  AILEGO_DEFER([&file_id]() { IndexMemory::Instance()->remove(file_id); });\n  ret = this->dump(memory_dumper);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to dump RabitqConverter: %d\", ret);\n    return ret;\n  }\n  ret = memory_dumper->close();\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to close memory dumper: %d\", ret);\n    return ret;\n  }\n\n  auto res = std::make_shared<RabitqReformer>();\n  ailego::Params reformer_params;\n  reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());\n  ret = res->init(reformer_params);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to initialize RabitqReformer: %d\", ret);\n    return ret;\n  }\n  auto memory_storage = IndexFactory::CreateStorage(\"MemoryReadStorage\");\n  ret = memory_storage->open(file_id, false);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to open memory storage: %d\", ret);\n    return ret;\n  }\n  ret = res->load(memory_storage);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load RabitqReformer: %d\", ret);\n    return ret;\n  }\n  *reformer = std::move(res);\n  return 0;\n}\n\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/rabitq_converter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <vector>\n#include <rabitqlib/utils/rotator.hpp>\n#include \"zvec/core/framework/index_cluster.h\"\n#include \"zvec/core/framework/index_converter.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"zvec/core/framework/index_threads.h\"\n#include \"rabitq_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass RabitqReformer;\n\n/*! RaBitQ Converter\n * Trains KMeans centroids and quantizes vectors using RaBitQ\n */\nclass RabitqConverter : public IndexConverter {\n public:\n  //! Constructor\n  RabitqConverter() = default;\n\n  //! Destructor\n  ~RabitqConverter() override;\n\n  //! Initialize Converter\n  int init(const IndexMeta &meta, const ailego::Params &params) override;\n\n  //! Cleanup Converter\n  int cleanup(void) override;\n\n  //! Train the data - perform KMeans clustering\n  int train(IndexHolder::Pointer holder) override;\n\n  //! Transform the data - quantize vectors using RaBitQ\n  int transform(IndexHolder::Pointer holder) override;\n\n  //! Dump centroids and config into storage\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexHolder::Pointer result(void) const override {\n    return result_holder_;\n  }\n\n  //! Retrieve Index Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  int to_reformer(IndexReformer::Pointer *reformer) override;\n\n private:\n  static inline size_t AlignSize(size_t size) {\n    return (size + 0x1F) & (~0x1F);\n  }\n\n private:\n  IndexMeta meta_;\n  IndexHolder::Pointer result_holder_;\n  Stats stats_;\n  size_t sample_count_{0};\n\n  // RaBitQ parameters\n  size_t num_clusters_{0};\n  size_t ex_bits_{0};\n  size_t dimension_{0};\n  size_t padded_dim_{0};\n\n  // Original centroids: num_clusters * dimension (for LinearSeeker query)\n  std::vector<float> centroids_;\n  // Rotated centroids: num_clusters * padded_dim (for quantization)\n  std::vector<float> rotated_centroids_;\n\n  // Rotator for vector transformation\n  rabitqlib::RotatorType rotator_type_{rabitqlib::RotatorType::FhtKacRotator};\n  std::unique_ptr<rabitqlib::Rotator<float>> rotator_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/rabitq_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\n// Local metric type enum that mirrors rabitqlib::MetricType,\n// without exposing rabitqlib headers to consumers of this file.\nenum class RabitqMetricType {\n  kL2 = 0,\n  kIP = 1,\n};\n\n// RaBitQ Converter parameters\nstatic const std::string PARAM_RABITQ_NUM_CLUSTERS(\n    \"proxima.rabitq.num_clusters\");\nstatic const std::string PARAM_RABITQ_TOTAL_BITS(\"proxima.rabitq.total_bits\");\nstatic const std::string PARAM_RABITQ_METRIC_NAME(\"proxima.rabitq.metric_name\");\nstatic const std::string PARAM_RABITQ_ROTATOR_TYPE(\n    \"proxima.rabitq.rotator.type\");\nstatic const std::string PARAM_RABITQ_SAMPLE_COUNT(\n    \"proxima.rabitq.sample_count\");\n\n// Default values\nconstexpr size_t kDefaultNumClusters = 16;\n// 4-bit, 5-bit, and 7-bit quantization typically achieve 90%, 95%, and 99%\n// recall, respectively—without accessing raw vectors for reranking\nconstexpr size_t kDefaultRabitqTotalBits = 7;\n\nconstexpr int kMinRabitqDimSize = 64;\nconstexpr int kMaxRabitqDimSize = 4095;\n\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/rabitq_reformer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"rabitq_reformer.h\"\n#include <string>\n#include <vector>\n#include <rabitqlib/defines.hpp>\n#include <rabitqlib/index/query.hpp>\n#include <rabitqlib/quantization/rabitq.hpp>\n#include <rabitqlib/utils/rotator.hpp>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include \"core/algorithm/cluster/linear_seeker.h\"\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_features.h\"\n#include \"zvec/core/framework/index_meta.h\"\n#include \"zvec/core/framework/index_storage.h\"\n#include \"hnsw_rabitq_query_entity.h\"\n#include \"rabitq_converter.h\"\n#include \"rabitq_utils.h\"\n\nnamespace zvec {\nnamespace core {\n\n// All rabitqlib types are confined to this translation unit via pimpl.\nstruct RabitqReformer::Impl {\n  // RaBitQ parameters\n  size_t num_clusters{0};\n  size_t ex_bits{0};\n  size_t dimension{0};\n  size_t padded_dim{0};\n  size_t size_bin_data{0};\n  size_t size_ex_data{0};\n  bool loaded{false};\n\n  // Original centroids: num_clusters * dimension (for LinearSeeker query)\n  std::vector<float> centroids;\n  // Rotated centroids: num_clusters * padded_dim (for quantization)\n  std::vector<float> rotated_centroids;\n\n  rabitqlib::RotatorType rotator_type{rabitqlib::RotatorType::FhtKacRotator};\n  std::unique_ptr<rabitqlib::Rotator<float>> rotator;\n  rabitqlib::quant::RabitqConfig query_config;\n  rabitqlib::quant::RabitqConfig config;\n  rabitqlib::MetricType metric_type{rabitqlib::METRIC_L2};\n\n  LinearSeeker::Pointer centroid_seeker;\n  CoherentIndexFeatures::Pointer centroid_features;\n\n  // Translate local enum to rabitqlib enum (used only inside this .cc).\n  static rabitqlib::MetricType to_rabitq(RabitqMetricType m) {\n    return m == RabitqMetricType::kIP ? rabitqlib::METRIC_IP\n                                      : rabitqlib::METRIC_L2;\n  }\n\n  // Translate rabitqlib enum to local enum.\n  static RabitqMetricType from_rabitq(rabitqlib::MetricType m) {\n    return m == rabitqlib::METRIC_IP ? RabitqMetricType::kIP\n                                     : RabitqMetricType::kL2;\n  }\n\n  int quantize_vector(const float *raw_vector, uint32_t cluster_id,\n                      std::string *quantized_data) const;\n};\n\nRabitqReformer::RabitqReformer() : impl_(std::make_unique<Impl>()) {}\n\nRabitqReformer::~RabitqReformer() {\n  this->cleanup();\n}\n\nsize_t RabitqReformer::num_clusters() const {\n  return impl_->num_clusters;\n}\n\nRabitqMetricType RabitqReformer::rabitq_metric_type() const {\n  return Impl::from_rabitq(impl_->metric_type);\n}\n\nint RabitqReformer::init(const ailego::Params &params) {\n  std::string metric_name = params.get_as_string(PARAM_RABITQ_METRIC_NAME);\n  if (metric_name == \"SquaredEuclidean\") {\n    impl_->metric_type = rabitqlib::METRIC_L2;\n  } else if (metric_name == \"InnerProduct\") {\n    impl_->metric_type = rabitqlib::METRIC_IP;\n  } else if (metric_name == \"Cosine\") {\n    impl_->metric_type = rabitqlib::METRIC_IP;\n  } else {\n    LOG_ERROR(\"Unsupported metric name: %s\", metric_name.c_str());\n    return IndexError_InvalidArgument;\n  }\n  LOG_DEBUG(\"Rabitq reformer init done. metric_name=%s metric_type=%d\",\n            metric_name.c_str(), static_cast<int>(impl_->metric_type));\n  return 0;\n}\n\nint RabitqReformer::cleanup() {\n  impl_->centroids.clear();\n  impl_->rotated_centroids.clear();\n  impl_->centroid_seeker.reset();\n  impl_->centroid_features.reset();\n  impl_->loaded = false;\n  impl_->rotator.reset();\n  return 0;\n}\n\nint RabitqReformer::unload() {\n  return this->cleanup();\n}\n\nint RabitqReformer::load(IndexStorage::Pointer storage) {\n  if (!storage) {\n    LOG_ERROR(\"Invalid storage for load\");\n    return IndexError_InvalidArgument;\n  }\n\n  auto segment = storage->get(RABITQ_CONVERTER_SEG_ID);\n  if (!segment) {\n    LOG_ERROR(\"Failed to get segment %s\", RABITQ_CONVERTER_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  size_t offset = 0;\n  RabitqConverterHeader header;\n  IndexStorage::MemoryBlock block;\n  size_t size = segment->read(offset, block, sizeof(header));\n  if (size != sizeof(header)) {\n    LOG_ERROR(\"Failed to read header\");\n    return IndexError_InvalidFormat;\n  }\n  memcpy(&header, block.data(), sizeof(header));\n  impl_->dimension = header.dim;\n  impl_->padded_dim = header.padded_dim;\n  impl_->ex_bits = header.ex_bits;\n  impl_->num_clusters = header.num_clusters;\n  impl_->rotator_type =\n      static_cast<rabitqlib::RotatorType>(header.rotator_type);\n  offset += sizeof(header);\n\n  // Read rotated centroids\n  size_t rotated_centroids_size =\n      sizeof(float) * header.num_clusters * header.padded_dim;\n  size = segment->read(offset, block, rotated_centroids_size);\n  if (size != rotated_centroids_size) {\n    LOG_ERROR(\"Failed to read rotated centroids\");\n    return IndexError_InvalidFormat;\n  }\n  impl_->rotated_centroids.resize(header.num_clusters * header.padded_dim);\n  memcpy(impl_->rotated_centroids.data(), block.data(), rotated_centroids_size);\n  offset += size;\n\n  // Read original centroids (for LinearSeeker query)\n  size_t centroids_size = sizeof(float) * header.num_clusters * header.dim;\n  size = segment->read(offset, block, centroids_size);\n  if (size != centroids_size) {\n    LOG_ERROR(\"Failed to read centroids\");\n    return IndexError_InvalidFormat;\n  }\n  impl_->centroids.resize(header.num_clusters * header.dim);\n  memcpy(impl_->centroids.data(), block.data(), centroids_size);\n  offset += size;\n\n  // Read rotator\n  size_t rotator_size = header.rotator_size;\n  size = segment->read(offset, block, rotator_size);\n  if (size != rotator_size) {\n    LOG_ERROR(\"Failed to read rotator\");\n    return IndexError_InvalidFormat;\n  }\n  impl_->rotator.reset(rabitqlib::choose_rotator<float>(\n      impl_->dimension, impl_->rotator_type, impl_->padded_dim));\n  impl_->rotator->load(reinterpret_cast<const char *>(block.data()));\n  offset += size;\n\n  impl_->query_config = rabitqlib::quant::faster_config(\n      impl_->padded_dim, rabitqlib::SplitSingleQuery<float>::kNumBits);\n  impl_->config =\n      rabitqlib::quant::faster_config(impl_->padded_dim, impl_->ex_bits + 1);\n\n  impl_->size_bin_data =\n      rabitqlib::BinDataMap<float>::data_bytes(impl_->padded_dim);\n  impl_->size_ex_data = rabitqlib::ExDataMap<float>::data_bytes(\n      impl_->padded_dim, impl_->ex_bits);\n\n  // Initialize LinearSeeker for centroid search\n  IndexMeta centroid_meta;\n  centroid_meta.set_data_type(IndexMeta::DataType::DT_FP32);\n  centroid_meta.set_dimension(static_cast<uint32_t>(impl_->dimension));\n  // Note:\n  // 1. spherical kmeans is used for InnerProduct and Cosine, so centroids are\n  // normalized.\n  // 2. for Cosine metric, `transform_to_entity` input is normalized, need to\n  // use InnerProduct metric as Cosine metric requires extra dimension which is\n  // unsuitable for centroids.\n  centroid_meta.set_metric(impl_->metric_type == rabitqlib::METRIC_L2\n                               ? \"SquaredEuclidean\"\n                               : \"InnerProduct\",\n                           0, ailego::Params());\n\n  impl_->centroid_features = std::make_shared<CoherentIndexFeatures>();\n  impl_->centroid_features->mount(centroid_meta, impl_->centroids.data(),\n                                  impl_->centroids.size() * sizeof(float));\n\n  impl_->centroid_seeker = std::make_shared<LinearSeeker>();\n  int ret = impl_->centroid_seeker->init(centroid_meta);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to init centroid seeker. ret[%d]\", ret);\n    return ret;\n  }\n  ret = impl_->centroid_seeker->mount(impl_->centroid_features);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to mount centroid features. ret[%d]\", ret);\n    return ret;\n  }\n\n  LOG_INFO(\n      \"Rabitq reformer load done. dimension=%zu, padded_dim=%zu, \"\n      \"ex_bits=%zu, num_clusters=%zu, size_bin_data=%zu, size_ex_data=%zu \"\n      \"rotator_type=%d\",\n      impl_->dimension, impl_->padded_dim, impl_->ex_bits, impl_->num_clusters,\n      impl_->size_bin_data, impl_->size_ex_data, (int)impl_->rotator_type);\n  impl_->loaded = true;\n  return 0;\n}\n\nint RabitqReformer::convert(const void *record, const IndexQueryMeta &rmeta,\n                            std::string *out, IndexQueryMeta *ometa) const {\n  if (!impl_->loaded) {\n    LOG_ERROR(\"Centroids not loaded yet\");\n    return IndexError_NoReady;\n  }\n\n  if (!record || !out) {\n    LOG_ERROR(\"Invalid arguments for convert\");\n    return IndexError_InvalidArgument;\n  }\n\n  // input may be transformed, require rmeta.dimension >= dimension\n  if (rmeta.dimension() < impl_->dimension ||\n      rmeta.data_type() != IndexMeta::DataType::DT_FP32) {\n    LOG_ERROR(\"Invalid record meta: dimension=%zu, data_type=%d\",\n              static_cast<size_t>(rmeta.dimension()), (int)rmeta.data_type());\n    return IndexError_InvalidArgument;\n  }\n\n  // Find nearest centroid using LinearSeeker\n  Seeker::Document doc;\n  int ret = impl_->centroid_seeker->seek(\n      record, impl_->dimension * sizeof(float), &doc);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to seek centroid. ret[%d]\", ret);\n    return ret;\n  }\n  uint32_t cluster_id = doc.index;\n\n  const float *vector = static_cast<const float *>(record);\n  ret = impl_->quantize_vector(vector, cluster_id, out);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to quantize vector\");\n    return ret;\n  }\n\n  ometa->set_meta(IndexMeta::DataType::DT_INT8, (uint32_t)out->size());\n  return 0;\n}\n\nint RabitqReformer::transform(const void *, const IndexQueryMeta &,\n                              std::string *, IndexQueryMeta *) const {\n  return IndexError_NotImplemented;\n}\n\nint RabitqReformer::transform_to_entity(const void *query,\n                                        HnswRabitqQueryEntity *entity) const {\n  if (!impl_->loaded) {\n    LOG_ERROR(\"Centroids not loaded yet\");\n    return IndexError_NoReady;\n  }\n\n  if (!query) {\n    LOG_ERROR(\"Invalid arguments for transform\");\n    return IndexError_InvalidArgument;\n  }\n\n  const float *query_vector = static_cast<const float *>(query);\n\n  // Apply rotator\n  entity->rotated_query.resize(impl_->padded_dim);\n  impl_->rotator->rotate(query_vector, entity->rotated_query.data());\n\n  // Quantize query to 4-bit representation\n  entity->query_wrapper = std::make_unique<rabitqlib::SplitSingleQuery<float>>(\n      entity->rotated_query.data(), impl_->padded_dim, impl_->ex_bits,\n      impl_->query_config, impl_->metric_type);\n\n  // Preprocess - get the distance from query to all centroids\n  entity->q_to_centroids.resize(impl_->num_clusters);\n\n  if (impl_->metric_type == rabitqlib::METRIC_L2) {\n    for (size_t i = 0; i < impl_->num_clusters; i++) {\n      entity->q_to_centroids[i] = std::sqrt(rabitqlib::euclidean_sqr(\n          entity->rotated_query.data(),\n          impl_->rotated_centroids.data() + (i * impl_->padded_dim),\n          impl_->padded_dim));\n    }\n  } else if (impl_->metric_type == rabitqlib::METRIC_IP) {\n    entity->q_to_centroids.resize(impl_->num_clusters * 2);\n    // first half as g_add, second half as g_error\n    for (size_t i = 0; i < impl_->num_clusters; i++) {\n      entity->q_to_centroids[i] = rabitqlib::dot_product(\n          entity->rotated_query.data(),\n          impl_->rotated_centroids.data() + (i * impl_->padded_dim),\n          impl_->padded_dim);\n      entity->q_to_centroids[i + impl_->num_clusters] =\n          std::sqrt(rabitqlib::euclidean_sqr(\n              entity->rotated_query.data(),\n              impl_->rotated_centroids.data() + (i * impl_->padded_dim),\n              impl_->padded_dim));\n    }\n  }\n\n  return 0;\n}\n\nint RabitqReformer::Impl::quantize_vector(const float *raw_vector,\n                                          uint32_t cluster_id,\n                                          std::string *quantized_data) const {\n  std::vector<float> rotated_data(padded_dim);\n  rotator->rotate(raw_vector, rotated_data.data());\n\n  // quantized format: cluster_id + bin_data + ex_data\n  quantized_data->resize(sizeof(cluster_id) + size_bin_data + size_ex_data);\n  memcpy(&(*quantized_data)[0], &cluster_id, sizeof(cluster_id));\n  int bin_data_offset = sizeof(cluster_id);\n  int ex_data_offset = bin_data_offset + size_bin_data;\n  rabitqlib::quant::quantize_split_single(\n      rotated_data.data(), rotated_centroids.data() + (cluster_id * padded_dim),\n      padded_dim, ex_bits, &(*quantized_data)[bin_data_offset],\n      &(*quantized_data)[ex_data_offset], metric_type, config);\n\n  return 0;\n}\n\nint RabitqReformer::dump(const IndexDumper::Pointer &dumper) {\n  if (!dumper) {\n    LOG_ERROR(\"Null dumper\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!impl_->loaded || impl_->rotated_centroids.empty() ||\n      impl_->centroids.empty()) {\n    LOG_ERROR(\"No centroids to dump\");\n    return IndexError_NoReady;\n  }\n\n  size_t dumped_size = 0;\n  int ret = dump_rabitq_centroids(\n      dumper, impl_->dimension, impl_->padded_dim, impl_->ex_bits,\n      impl_->num_clusters, impl_->rotator_type, impl_->rotated_centroids,\n      impl_->centroids, impl_->rotator, &dumped_size);\n  if (ret != 0) {\n    return ret;\n  }\n\n  LOG_INFO(\"RabitqReformer dump completed: %zu bytes\", dumped_size);\n  return 0;\n}\n\nint RabitqReformer::dump(const IndexStorage::Pointer &storage) {\n  if (!storage) {\n    LOG_ERROR(\"Null storage\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!impl_->loaded || impl_->rotated_centroids.empty() ||\n      impl_->centroids.empty()) {\n    LOG_ERROR(\"No centroids to dump\");\n    return IndexError_NoReady;\n  }\n\n  auto align_size = [](size_t size) -> size_t {\n    return (size + 0x1F) & (~0x1F);\n  };\n\n  size_t header_size = sizeof(RabitqConverterHeader);\n  size_t rotated_centroids_size =\n      impl_->rotated_centroids.size() * sizeof(float);\n  size_t centroids_size = impl_->centroids.size() * sizeof(float);\n  size_t rotator_size = impl_->rotator->dump_bytes();\n  size_t data_size =\n      header_size + rotated_centroids_size + centroids_size + rotator_size;\n  size_t total_size = align_size(data_size);\n\n  int ret = storage->append(RABITQ_CONVERTER_SEG_ID, total_size);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to append segment %s, ret=%d\",\n              RABITQ_CONVERTER_SEG_ID.c_str(), ret);\n    return ret;\n  }\n\n  auto segment = storage->get(RABITQ_CONVERTER_SEG_ID);\n  if (!segment) {\n    LOG_ERROR(\"Failed to get segment %s\", RABITQ_CONVERTER_SEG_ID.c_str());\n    return IndexError_ReadData;\n  }\n\n  size_t offset = 0;\n\n  RabitqConverterHeader header;\n  header.dim = static_cast<uint32_t>(impl_->dimension);\n  header.padded_dim = static_cast<uint32_t>(impl_->padded_dim);\n  header.num_clusters = static_cast<uint32_t>(impl_->num_clusters);\n  header.ex_bits = static_cast<uint8_t>(impl_->ex_bits);\n  header.rotator_type = static_cast<uint8_t>(impl_->rotator_type);\n  header.rotator_size = static_cast<uint32_t>(rotator_size);\n  size_t written = segment->write(offset, &header, header_size);\n  if (written != header_size) {\n    LOG_ERROR(\"Failed to write header: written=%zu, expected=%zu\", written,\n              header_size);\n    return IndexError_WriteData;\n  }\n  offset += header_size;\n\n  written = segment->write(offset, impl_->rotated_centroids.data(),\n                           rotated_centroids_size);\n  if (written != rotated_centroids_size) {\n    LOG_ERROR(\"Failed to write rotated centroids: written=%zu, expected=%zu\",\n              written, rotated_centroids_size);\n    return IndexError_WriteData;\n  }\n  offset += rotated_centroids_size;\n\n  written = segment->write(offset, impl_->centroids.data(), centroids_size);\n  if (written != centroids_size) {\n    LOG_ERROR(\"Failed to write centroids: written=%zu, expected=%zu\", written,\n              centroids_size);\n    return IndexError_WriteData;\n  }\n  offset += centroids_size;\n\n  std::vector<char> buffer(rotator_size);\n  impl_->rotator->save(buffer.data());\n  written = segment->write(offset, buffer.data(), rotator_size);\n  if (written != rotator_size) {\n    LOG_ERROR(\"Failed to write rotator data: written=%zu, expected=%zu\",\n              written, rotator_size);\n    return IndexError_WriteData;\n  }\n\n  LOG_INFO(\"RabitqReformer dump to storage completed: %zu bytes\", data_size);\n  return 0;\n}\n\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/rabitq_reformer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n//\n#pragma once\n\n#include <memory>\n#include \"zvec/core/framework/index_dumper.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"zvec/core/framework/index_storage.h\"\n#include \"rabitq_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswRabitqQueryEntity;\n\n/*! RaBitQ Reformer\n * Loads centroids and performs query transformation and vector quantization.\n *\n * All rabitqlib types are hidden behind a pimpl to avoid leaking rabitqlib\n * headers to consumers of this class.\n */\nclass RabitqReformer : public IndexReformer {\n public:\n  typedef std::shared_ptr<RabitqReformer> Pointer;\n\n  RabitqReformer();\n  ~RabitqReformer() override;\n\n  // Non-copyable\n  RabitqReformer(const RabitqReformer &) = delete;\n  RabitqReformer &operator=(const RabitqReformer &) = delete;\n\n  int init(const ailego::Params &params) override;\n  int cleanup(void) override;\n  int load(IndexStorage::Pointer storage) override;\n  int unload(void) override;\n\n  // transform() is not implemented for RabitqReformer; use transform_to_entity.\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override;\n\n  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,\n              IndexQueryMeta *ometa) const override;\n\n  int dump(const IndexDumper::Pointer &dumper);\n  int dump(const IndexStorage::Pointer &storage);\n\n  int transform_to_entity(const void *query,\n                          HnswRabitqQueryEntity *entity) const;\n\n  size_t num_clusters() const;\n  RabitqMetricType rabitq_metric_type() const;\n\n private:\n  struct Impl;\n  std::unique_ptr<Impl> impl_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/rabitq_utils.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"rabitq_utils.h\"\n#include <string>\n#include <zvec/ailego/hash/crc32c.h>\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_logger.h\"\n\nnamespace zvec {\nnamespace core {\n\nint dump_rabitq_centroids(\n    const IndexDumper::Pointer &dumper, size_t dimension, size_t padded_dim,\n    size_t ex_bits, size_t num_clusters, rabitqlib::RotatorType rotator_type,\n    const std::vector<float> &rotated_centroids,\n    const std::vector<float> &centroids,\n    const std::unique_ptr<rabitqlib::Rotator<float>> &rotator,\n    size_t *out_dumped_size) {\n  auto align_size = [](size_t size) -> size_t {\n    return (size + 0x1F) & (~0x1F);\n  };\n\n  uint32_t crc = 0;\n  size_t dumped_size = 0;\n\n  // Write header\n  RabitqConverterHeader header;\n  header.dim = static_cast<uint32_t>(dimension);\n  header.padded_dim = static_cast<uint32_t>(padded_dim);\n  header.num_clusters = static_cast<uint32_t>(num_clusters);\n  header.ex_bits = static_cast<uint8_t>(ex_bits);\n  header.rotator_type = static_cast<uint8_t>(rotator_type);\n  header.rotator_size = static_cast<uint32_t>(rotator->dump_bytes());\n  size_t size = dumper->write(&header, sizeof(header));\n  if (size != sizeof(header)) {\n    LOG_ERROR(\"Failed to write header: written=%zu, expected=%zu\", size,\n              sizeof(header));\n    return IndexError_WriteData;\n  }\n  crc = ailego::Crc32c::Hash(&header, sizeof(header), crc);\n  dumped_size += size;\n\n  // Write rotated centroids\n  size = dumper->write(rotated_centroids.data(),\n                       rotated_centroids.size() * sizeof(float));\n  if (size != rotated_centroids.size() * sizeof(float)) {\n    LOG_ERROR(\"Failed to write rotated centroids: written=%zu, expected=%zu\",\n              size, rotated_centroids.size() * sizeof(float));\n    return IndexError_WriteData;\n  }\n  crc = ailego::Crc32c::Hash(rotated_centroids.data(),\n                             rotated_centroids.size() * sizeof(float), crc);\n  dumped_size += size;\n\n  // Write original centroids\n  size = dumper->write(centroids.data(), centroids.size() * sizeof(float));\n  if (size != centroids.size() * sizeof(float)) {\n    LOG_ERROR(\"Failed to write centroids: written=%zu, expected=%zu\", size,\n              centroids.size() * sizeof(float));\n    return IndexError_WriteData;\n  }\n  crc = ailego::Crc32c::Hash(centroids.data(), centroids.size() * sizeof(float),\n                             crc);\n  dumped_size += size;\n\n  // Write rotator data\n  std::vector<char> buffer(rotator->dump_bytes());\n  rotator->save(buffer.data());\n  size = dumper->write(buffer.data(), buffer.size());\n  if (size != buffer.size()) {\n    LOG_ERROR(\"Failed to write rotator data: written=%zu, expected=%zu\", size,\n              buffer.size());\n    return IndexError_WriteData;\n  }\n  crc = ailego::Crc32c::Hash(buffer.data(), buffer.size(), crc);\n  dumped_size += size;\n\n  // Write padding\n  size_t padding_size = align_size(dumped_size) - dumped_size;\n  if (padding_size > 0) {\n    std::string padding(padding_size, '\\0');\n    if (dumper->write(padding.data(), padding_size) != padding_size) {\n      LOG_ERROR(\"Append padding failed, size %lu\", padding_size);\n      return IndexError_WriteData;\n    }\n  }\n\n  int ret =\n      dumper->append(RABITQ_CONVERTER_SEG_ID, dumped_size, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s meta failed, ret=%d\",\n              RABITQ_CONVERTER_SEG_ID.c_str(), ret);\n    return ret;\n  }\n\n  if (out_dumped_size) {\n    *out_dumped_size = dumped_size;\n  }\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_rabitq/rabitq_utils.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <vector>\n#include <rabitqlib/utils/rotator.hpp>\n#include \"zvec/core/framework/index_dumper.h\"\n\nnamespace zvec {\nnamespace core {\n\ninline const std::string RABITQ_CONVERTER_SEG_ID{\"rabitq.converter\"};\n\nstruct RabitqConverterHeader {\n  uint32_t num_clusters;\n  uint32_t dim;\n  uint32_t padded_dim;\n  uint32_t rotator_size;\n  uint8_t ex_bits;\n  uint8_t rotator_type;\n  uint8_t padding[2];\n  uint32_t reserve[3];\n\n  RabitqConverterHeader() {\n    memset(this, 0, sizeof(RabitqConverterHeader));\n  }\n};\nstatic_assert(sizeof(RabitqConverterHeader) % 32 == 0,\n              \"RabitqConverterHeader must be aligned with 32 bytes\");\n\n// Common dump implementation for RabitqConverter and RabitqReformer\nint dump_rabitq_centroids(\n    const IndexDumper::Pointer &dumper, size_t dimension, size_t padded_dim,\n    size_t ex_bits, size_t num_clusters, rabitqlib::RotatorType rotator_type,\n    const std::vector<float> &rotated_centroids,\n    const std::vector<float> &centroids,\n    const std::unique_ptr<rabitqlib::Rotator<float>> &rotator,\n    size_t *out_dumped_size = nullptr);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_knn_hnsw_sparse \n    STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS core_framework sparsehash\n    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_algorithm.cc",
    "content": "\n// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_algorithm.h\"\n#include <chrono>\n#include <iostream>\n#include <vector>\n#include <ailego/internal/cpu_features.h>\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseAlgorithm::HnswSparseAlgorithm(HnswSparseEntity &entity)\n    : entity_(entity),\n      mt_(std::chrono::system_clock::now().time_since_epoch().count()),\n      lock_pool_(kLockCnt) {}\n\nint HnswSparseAlgorithm::cleanup() {\n  return 0;\n}\n\nint HnswSparseAlgorithm::add_node(node_id_t id, level_t level,\n                                  HnswSparseContext *ctx) {\n  spin_lock_.lock();\n\n  // std::cout << \"id: \" << id << \", level: \" << level << std::endl;\n\n  auto cur_max_level = entity_.cur_max_level();\n  auto entry_point = entity_.entry_point();\n  if (ailego_unlikely(entry_point == kInvalidNodeId)) {\n    entity_.update_ep_and_level(id, level);\n    spin_lock_.unlock();\n    return 0;\n  }\n  spin_lock_.unlock();\n\n  if (ailego_unlikely(level > cur_max_level)) {\n    mutex_.lock();\n    // re-check max level\n    cur_max_level = entity_.cur_max_level();\n    entry_point = entity_.entry_point();\n    if (level <= cur_max_level) {\n      mutex_.unlock();\n    }\n  }\n\n  level_t cur_level = cur_max_level;\n  dist_t dist = ctx->dist_calculator()(entry_point);\n  for (; cur_level > level; --cur_level) {\n    select_entry_point(cur_level, &entry_point, &dist, ctx);\n  }\n\n  for (; cur_level >= 0; --cur_level) {\n    search_neighbors(cur_level, &entry_point, &dist, ctx->level_topk(cur_level),\n                     ctx);\n  }\n\n  // add neighbors from down level to top level, to avoid upper level visible\n  // to knn_search but the under layer level not ready\n  for (cur_level = 0; cur_level <= level; ++cur_level) {\n    add_neighbors(id, cur_level, ctx->level_topk(cur_level), ctx);\n    ctx->level_topk(cur_level).clear();\n  }\n\n  if (ailego_unlikely(level > cur_max_level)) {\n    spin_lock_.lock();\n    entity_.update_ep_and_level(id, level);\n    spin_lock_.unlock();\n    mutex_.unlock();\n  }\n\n  return 0;\n}\n\nint HnswSparseAlgorithm::search(HnswSparseContext *ctx) const {\n  spin_lock_.lock();\n  auto maxLevel = entity_.cur_max_level();\n  auto entry_point = entity_.entry_point();\n  spin_lock_.unlock();\n\n  if (ailego_unlikely(entry_point == kInvalidNodeId)) {\n    return 0;\n  }\n\n  dist_t dist = ctx->dist_calculator().dist(entry_point);\n  for (level_t cur_level = maxLevel; cur_level >= 1; --cur_level) {\n    select_entry_point(cur_level, &entry_point, &dist, ctx);\n  }\n\n  auto &topk_heap = ctx->topk_heap();\n  topk_heap.clear();\n  search_neighbors(0, &entry_point, &dist, topk_heap, ctx);\n\n  if (ctx->group_by_search()) {\n    expand_neighbors_by_group(topk_heap, ctx);\n  }\n\n  return 0;\n}\n\n//! select_entry_point on hnsw level, ef = 1\nvoid HnswSparseAlgorithm::select_entry_point(level_t level,\n                                             node_id_t *entry_point,\n                                             dist_t *dist,\n                                             HnswSparseContext *ctx) const {\n  auto &entity = ctx->get_entity();\n  HnswSparseDistCalculator &dc = ctx->dist_calculator();\n  while (true) {\n    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n    uint32_t size = neighbors.size();\n    if (size == 0) {\n      break;\n    }\n\n    std::vector<IndexStorage::MemoryBlock> neighbor_block_vecs;\n    int ret = entity.get_vector_metas(&neighbors[0], size, neighbor_block_vecs);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_vector())++;\n    }\n    if (ailego_unlikely(ret != 0)) {\n      break;\n    }\n    bool find_closer = false;\n    for (uint32_t i = 0; i < size; ++i) {\n      dist_t cur_dist = dc.dist(neighbor_block_vecs[i].data());\n      if (cur_dist < *dist) {\n        *entry_point = neighbors[i];\n        *dist = cur_dist;\n        find_closer = true;\n      }\n    }\n\n    if (!find_closer) {\n      break;\n    }\n  }\n\n  return;\n}\n\nvoid HnswSparseAlgorithm::add_neighbors(node_id_t id, level_t level,\n                                        TopkHeap &topk_heap,\n                                        HnswSparseContext *ctx) {\n  if (ailego_unlikely(topk_heap.size() == 0)) {\n    return;\n  }\n\n  HnswSparseDistCalculator &dc = ctx->dist_calculator();\n\n  update_neighbors(dc, id, level, topk_heap);\n\n  // reverse update neighbors\n  for (size_t i = 0; i < topk_heap.size(); ++i) {\n    reverse_update_neighbors(dc, topk_heap[i].first, level, id,\n                             topk_heap[i].second, ctx->update_heap());\n  }\n\n  return;\n}\n\nvoid HnswSparseAlgorithm::search_neighbors(level_t level,\n                                           node_id_t *entry_point, dist_t *dist,\n                                           TopkHeap &topk,\n                                           HnswSparseContext *ctx) const {\n  const auto &entity = ctx->get_entity();\n  HnswSparseDistCalculator &dc = ctx->dist_calculator();\n  VisitFilter &visit = ctx->visit_filter();\n  CandidateHeap &candidates = ctx->candidates();\n  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };\n  if (ctx->filter().is_valid()) {\n    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };\n  }\n\n  candidates.clear();\n  visit.clear();\n  visit.set_visited(*entry_point);\n  if (!filter(*entry_point)) {\n    topk.emplace(*entry_point, *dist);\n  }\n\n  candidates.emplace(*entry_point, *dist);\n  while (!candidates.empty() && !ctx->reach_scan_limit()) {\n    auto top = candidates.begin();\n    node_id_t main_node = top->first;\n    dist_t main_dist = top->second;\n\n    if (topk.full() && main_dist > topk[0].second) {\n      break;\n    }\n\n    candidates.pop();\n    const Neighbors neighbors = entity.get_neighbors(level, main_node);\n    ailego_prefetch(neighbors.data);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_neighbors())++;\n    }\n\n    std::vector<node_id_t> neighbor_ids(neighbors.size());\n    uint32_t size = 0;\n    for (uint32_t i = 0; i < neighbors.size(); ++i) {\n      node_id_t node = neighbors[i];\n      if (visit.visited(node)) {\n        if (ailego_unlikely(ctx->debugging())) {\n          (*ctx->mutable_stats_visit_dup_cnt())++;\n        }\n        continue;\n      }\n      visit.set_visited(node);\n      neighbor_ids[size++] = node;\n    }\n    if (size == 0) {\n      continue;\n    }\n\n    std::vector<IndexStorage::MemoryBlock> neighbor_block_vecs;\n    int ret =\n        entity.get_vector_metas(neighbor_ids.data(), size, neighbor_block_vecs);\n    if (ailego_unlikely(ctx->debugging())) {\n      (*ctx->mutable_stats_get_vector())++;\n    }\n    if (ailego_unlikely(ret != 0)) {\n      break;\n    }\n\n    static constexpr node_id_t PREFETCH_STEP = 2;\n    static constexpr node_id_t SPARSE_PREFETCH_STEP = 1;\n\n    for (uint32_t i = 0; i < std::min(PREFETCH_STEP, size); ++i) {\n      ailego_prefetch(neighbor_block_vecs[i].data());\n    }\n    for (uint32_t i = 0; i < size; ++i) {\n      node_id_t node = neighbor_ids[i];\n      node_id_t prefetch_id = i + PREFETCH_STEP;\n      if (prefetch_id < size) {\n        ailego_prefetch(neighbor_block_vecs[prefetch_id].data());\n      }\n\n      node_id_t sparse_prefetch_id = i + SPARSE_PREFETCH_STEP;\n      if (sparse_prefetch_id < size) {\n        IndexStorage::MemoryBlock sparse_block;\n        int sparse_length = 0;\n        entity.get_sparse_data_from_vector(\n            neighbor_block_vecs[sparse_prefetch_id].data(), sparse_block,\n            sparse_length);\n        auto sparse_data = std::make_pair(sparse_block.data(), sparse_length);\n        if (sparse_data.first != nullptr) {\n          ailego_prefetch(sparse_data.first);\n        }\n      }\n\n      dist_t cur_dist = dc.dist(neighbor_block_vecs[i].data());\n      if ((!topk.full()) || cur_dist < topk[0].second) {\n        candidates.emplace(node, cur_dist);\n        // update entry_point for next level scan\n        if (cur_dist < *dist) {\n          *entry_point = node;\n          *dist = cur_dist;\n        }\n        if (!filter(node)) {\n          topk.emplace(node, cur_dist);\n        }\n      }  // end if\n    }  // end for\n  }  // while\n\n  return;\n}\n\nvoid HnswSparseAlgorithm::expand_neighbors_by_group(\n    TopkHeap &topk, HnswSparseContext *ctx) const {\n  if (!ctx->group_by().is_valid()) {\n    return;\n  }\n\n  const auto &entity = ctx->get_entity();\n  std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n    return ctx->group_by()(entity.get_key(id));\n  };\n\n  // devide into groups\n  std::map<std::string, TopkHeap> &group_topk_heaps = ctx->group_topk_heaps();\n  for (uint32_t i = 0; i < topk.size(); ++i) {\n    node_id_t id = topk[i].first;\n    auto score = topk[i].second;\n\n    std::string group_id = group_by(id);\n\n    auto &topk_heap = group_topk_heaps[group_id];\n    if (topk_heap.empty()) {\n      topk_heap.limit(ctx->group_topk());\n    }\n    topk_heap.emplace_back(id, score);\n  }\n\n  // stage 2, expand to reach group num as possible\n  if (group_topk_heaps.size() < ctx->group_num()) {\n    VisitFilter &visit = ctx->visit_filter();\n    CandidateHeap &candidates = ctx->candidates();\n    HnswSparseDistCalculator &dc = ctx->dist_calculator();\n\n    std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };\n    if (ctx->filter().is_valid()) {\n      filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };\n    }\n\n    // refill to get enough groups\n    candidates.clear();\n    visit.clear();\n    for (uint32_t i = 0; i < topk.size(); ++i) {\n      node_id_t id = topk[i].first;\n      float score = topk[i].second;\n\n      visit.set_visited(id);\n      candidates.emplace_back(id, score);\n    }\n\n    // do expand\n    while (!candidates.empty() && !ctx->reach_scan_limit()) {\n      auto top = candidates.begin();\n      node_id_t main_node = top->first;\n\n      candidates.pop();\n      const Neighbors neighbors = entity.get_neighbors(0, main_node);\n      if (ailego_unlikely(ctx->debugging())) {\n        (*ctx->mutable_stats_get_neighbors())++;\n      }\n\n      std::vector<node_id_t> neighbor_ids(neighbors.size());\n      uint32_t size = 0;\n      for (uint32_t i = 0; i < neighbors.size(); ++i) {\n        node_id_t node = neighbors[i];\n        if (visit.visited(node)) {\n          if (ailego_unlikely(ctx->debugging())) {\n            (*ctx->mutable_stats_visit_dup_cnt())++;\n          }\n          continue;\n        }\n        visit.set_visited(node);\n        neighbor_ids[size++] = node;\n      }\n      if (size == 0) {\n        continue;\n      }\n\n      std::vector<IndexStorage::MemoryBlock> neighbor_block_vecs;\n      int ret = entity.get_vector_metas(neighbor_ids.data(), size,\n                                        neighbor_block_vecs);\n      if (ailego_unlikely(ctx->debugging())) {\n        (*ctx->mutable_stats_get_vector())++;\n      }\n      if (ailego_unlikely(ret != 0)) {\n        break;\n      }\n\n      static constexpr node_id_t PREFETCH_STEP = 2;\n      for (uint32_t i = 0; i < size; ++i) {\n        node_id_t node = neighbor_ids[i];\n        node_id_t prefetch_id = i + PREFETCH_STEP;\n        if (prefetch_id < size) {\n          ailego_prefetch(neighbor_block_vecs[prefetch_id].data());\n        }\n        dist_t cur_dist = dc.dist(neighbor_block_vecs[i].data());\n\n        if (!filter(node)) {\n          std::string group_id = group_by(node);\n\n          auto &topk_heap = group_topk_heaps[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(node, cur_dist);\n\n          if (group_topk_heaps.size() >= ctx->group_num()) {\n            break;\n          }\n        }\n\n        candidates.emplace(node, cur_dist);\n      }  // end for\n    }  // end while\n  }  // end if\n}\n\nvoid HnswSparseAlgorithm::update_neighbors(HnswSparseDistCalculator &dc,\n                                           node_id_t id, level_t level,\n                                           TopkHeap &topk_heap) {\n  topk_heap.sort();\n\n  uint32_t max_neighbor_cnt = entity_.neighbor_cnt(level);\n  if (topk_heap.size() <= static_cast<size_t>(entity_.prune_cnt())) {\n    if (topk_heap.size() <= static_cast<size_t>(max_neighbor_cnt)) {\n      entity_.update_neighbors(level, id, topk_heap);\n      return;\n    }\n  }\n\n  uint32_t cur_size = 0;\n  for (size_t i = 0; i < topk_heap.size(); ++i) {\n    node_id_t cur_node = topk_heap[i].first;\n    dist_t cur_node_dist = topk_heap[i].second;\n    bool good = true;\n    for (uint32_t j = 0; j < cur_size; ++j) {\n      dist_t tmp_dist = dc.dist(cur_node, topk_heap[j].first);\n      if (tmp_dist <= cur_node_dist) {\n        good = false;\n        break;\n      }\n    }\n\n    if (good) {\n      topk_heap[cur_size].first = cur_node;\n      topk_heap[cur_size].second = cur_node_dist;\n      cur_size++;\n      if (cur_size >= max_neighbor_cnt) {\n        break;\n      }\n    }\n  }\n\n  // when after-prune neighbor count is too seldom,\n  // we use this strategy to make-up enough edges\n  // not only just make-up out-degrees\n  // we also make-up enough in-degrees\n  uint32_t min_neighbors = entity_.min_neighbor_cnt();\n  for (size_t k = cur_size; cur_size < min_neighbors && k < topk_heap.size();\n       ++k) {\n    bool exist = false;\n    for (size_t j = 0; j < cur_size; ++j) {\n      if (topk_heap[j].first == topk_heap[k].first) {\n        exist = true;\n        break;\n      }\n    }\n    if (!exist) {\n      topk_heap[cur_size].first = topk_heap[k].first;\n      topk_heap[cur_size].second = topk_heap[k].second;\n      cur_size++;\n    }\n  }\n\n  topk_heap.resize(cur_size);\n  entity_.update_neighbors(level, id, topk_heap);\n\n  return;\n}\n\nvoid HnswSparseAlgorithm::reverse_update_neighbors(HnswSparseDistCalculator &dc,\n                                                   node_id_t id, level_t level,\n                                                   node_id_t link_id,\n                                                   dist_t dist,\n                                                   TopkHeap &update_heap) {\n  const size_t max_neighbor_cnt = entity_.neighbor_cnt(level);\n\n  uint32_t lock_idx = id & kLockMask;\n  lock_pool_[lock_idx].lock();\n  const Neighbors neighbors = entity_.get_neighbors(level, id);\n  size_t size = neighbors.size();\n  ailego_assert_with(size <= max_neighbor_cnt, \"invalid neighbor size\");\n  if (size < max_neighbor_cnt) {\n    entity_.add_neighbor(level, id, size, link_id);\n    lock_pool_[lock_idx].unlock();\n    return;\n  }\n\n  update_heap.emplace(link_id, dist);\n\n  for (size_t i = 0; i < size; ++i) {\n    node_id_t node = neighbors[i];\n    dist_t cur_dist = dc.dist(id, node);\n    update_heap.emplace(node, cur_dist);\n  }\n\n  //! TODO: optimize prune\n  //! prune edges\n  update_heap.sort();\n  size_t cur_size = 0;\n  for (size_t i = 0; i < update_heap.size(); ++i) {\n    node_id_t cur_node = update_heap[i].first;\n    dist_t cur_node_dist = update_heap[i].second;\n    bool good = true;\n    for (size_t j = 0; j < cur_size; ++j) {\n      dist_t tmp_dist = dc.dist(cur_node, update_heap[j].first);\n      if (tmp_dist <= cur_node_dist) {\n        good = false;\n        break;\n      }\n    }\n\n    if (good) {\n      update_heap[cur_size].first = cur_node;\n      update_heap[cur_size].second = cur_node_dist;\n      cur_size++;\n      if (cur_size >= max_neighbor_cnt) {\n        break;\n      }\n    }\n  }\n\n  update_heap.resize(cur_size);\n  entity_.update_neighbors(level, id, update_heap);\n\n  lock_pool_[lock_idx].unlock();\n\n  update_heap.clear();\n\n  return;\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_algorithm.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <ailego/parallel/lock.h>\n#include \"hnsw_sparse_context.h\"\n#include \"hnsw_sparse_dist_calculator.h\"\n#include \"hnsw_sparse_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! hnsw graph algorithm implement\nclass HnswSparseAlgorithm {\n public:\n  typedef std::unique_ptr<HnswSparseAlgorithm> UPointer;\n\n public:\n  //! Constructor\n  explicit HnswSparseAlgorithm(HnswSparseEntity &entity);\n\n  //! Cleanup HnswSparseAlgorithm\n  int cleanup();\n\n  //! Add a node to hnsw graph\n  //! @id:     the node unique id\n  //! @level:  a node will be add to graph in each level [0, level]\n  //! return 0 on success, or errCode in failure\n  int add_node(node_id_t id, level_t level, HnswSparseContext *ctx);\n\n  //! do knn search in graph\n  //! return 0 on success, or errCode in failure. results saved in ctx\n  int search(HnswSparseContext *ctx) const;\n\n  //! Initiate HnswAlgorithm\n  int init() {\n    level_probas_.clear();\n    double level_mult =\n        1 / std::log(static_cast<double>(entity_.scaling_factor()));\n    for (int level = 0;; level++) {\n      // refers faiss get_random_level alg\n      double proba =\n          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));\n      if (proba < 1e-9) {\n        break;\n      }\n      level_probas_.push_back(proba);\n    }\n\n    return 0;\n  }\n\n  //! Generate a random level\n  //! return graph level\n  uint32_t get_random_level() const {\n    // gen rand float (0, 1)\n    double f = mt_() / static_cast<float>(mt_.max());\n    for (size_t level = 0; level < level_probas_.size(); level++) {\n      if (f < level_probas_[level]) {\n        return level;\n      }\n      f -= level_probas_[level];\n    }\n    return level_probas_.size() - 1;\n  }\n\n private:\n  //! Select in upper layer to get entry point for next layer search\n  void select_entry_point(level_t level, node_id_t *entry_point, dist_t *dist,\n                          HnswSparseContext *ctx) const;\n\n  //! update node id neighbors from topkHeap, and reverse link is also updated\n  void add_neighbors(node_id_t id, level_t level, TopkHeap &topk_heap,\n                     HnswSparseContext *ctx);\n\n  //! Given a node id and level, search the nearest neighbors in graph\n  //! Note: the nearest neighbors result keeps in topk, and entry_point and\n  //! dist will be updated to current level nearest node id and distance\n  void search_neighbors(level_t level, node_id_t *entry_point, dist_t *dist,\n                        TopkHeap &topk, HnswSparseContext *ctx) const;\n\n  //! Update the node's neighbors\n  void update_neighbors(HnswSparseDistCalculator &dc, node_id_t id,\n                        level_t level, TopkHeap &topk_heap);\n\n  //! Checking linkId could be id's new neighbor, and add as neighbor if true\n  //! @dc         distance calculator\n  //! @updateHeap temporary heap in updating neighbors\n  void reverse_update_neighbors(HnswSparseDistCalculator &dc, node_id_t id,\n                                level_t level, node_id_t link_id, dist_t dist,\n                                TopkHeap &update_heap);\n\n  //! expand neighbors until group nums are reached\n  void expand_neighbors_by_group(TopkHeap &topk, HnswSparseContext *ctx) const;\n\n private:\n  HnswSparseAlgorithm(const HnswSparseAlgorithm &) = delete;\n  HnswSparseAlgorithm &operator=(const HnswSparseAlgorithm &) = delete;\n\n private:\n  static constexpr uint32_t kLockCnt{1U << 8};\n  static constexpr uint32_t kLockMask{kLockCnt - 1U};\n\n  HnswSparseEntity &entity_;\n  mutable std::mt19937 mt_{};\n  std::vector<double> level_probas_{};\n\n  mutable ailego::SpinMutex spin_lock_{};  // global spin lock\n  std::mutex mutex_{};                     // global mutex\n  // TODO: spin lock?\n  std::vector<std::mutex> lock_pool_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_builder.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_builder.h\"\n#include <iostream>\n#include <thread>\n#include <ailego/pattern/defer.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_logger.h>\n#include \"hnsw_sparse_algorithm.h\"\n#include \"hnsw_sparse_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseBuilder::HnswSparseBuilder() {}\n\nint HnswSparseBuilder::init(const IndexMeta &meta,\n                            const ailego::Params &params) {\n  LOG_INFO(\"Begin HnswSparseBuilder::init\");\n\n  meta_ = meta;\n  auto params_copy = params;\n  meta_.set_builder(\"HnswSparseBuilder\", HnswSparseEntity::kRevision,\n                    std::move(params_copy));\n\n  size_t memory_quota = 0UL;\n  params.get(PARAM_HNSW_SPARSE_BUILDER_MEMORY_QUOTA, &memory_quota);\n  params.get(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, &thread_cnt_);\n  params.get(PARAM_HNSW_SPARSE_BUILDER_EFCONSTRUCTION, &ef_construction_);\n  params.get(PARAM_HNSW_SPARSE_BUILDER_CHECK_INTERVAL_SECS,\n             &check_interval_secs_);\n\n  params.get(PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT,\n             &upper_max_neighbor_cnt_);\n  float multiplier = HnswSparseEntity::kDefaultL0MaxNeighborCntMultiplier;\n  params.get(PARAM_HNSW_SPARSE_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,\n             &multiplier);\n  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;\n  scaling_factor_ = upper_max_neighbor_cnt_;\n  params.get(PARAM_HNSW_SPARSE_BUILDER_SCALING_FACTOR, &scaling_factor_);\n\n  multiplier = HnswSparseEntity::kDefaultNeighborPruneMultiplier;\n  params.get(PARAM_HNSW_SPARSE_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);\n  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;\n\n  if (ef_construction_ == 0) {\n    ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;\n  }\n  if (upper_max_neighbor_cnt_ == 0) {\n    upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (upper_max_neighbor_cnt_ > kMaxNeighborCnt) {\n    LOG_ERROR(\"[%s] must be in range (0,%d]\",\n              PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),\n              kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {\n    LOG_ERROR(\"[%s]-[%d] must be <= [%s]-[%d]\",\n              PARAM_HNSW_SPARSE_BUILDER_MIN_NEIGHBOR_COUNT.c_str(),\n              min_neighbor_cnt_,\n              PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),\n              upper_max_neighbor_cnt_);\n    return IndexError_InvalidArgument;\n  }\n  if (l0_max_neighbor_cnt_ == 0) {\n    l0_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (l0_max_neighbor_cnt_ > HnswSparseEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"L0MaxNeighborCnt must be in range (0,%d)\",\n              HnswSparseEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (scaling_factor_ == 0U) {\n    scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;\n  }\n  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {\n    LOG_ERROR(\"[%s] must be in range [5,1000]\",\n              PARAM_HNSW_SPARSE_BUILDER_SCALING_FACTOR.c_str());\n    return IndexError_InvalidArgument;\n  }\n  if (thread_cnt_ == 0) {\n    thread_cnt_ = std::thread::hardware_concurrency();\n  }\n  if (thread_cnt_ > std::thread::hardware_concurrency()) {\n    LOG_WARN(\"[%s] greater than cpu cores %u\",\n             PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT.c_str(),\n             std::thread::hardware_concurrency());\n  }\n  if (prune_cnt == 0UL) {\n    prune_cnt = upper_max_neighbor_cnt_;\n  }\n\n  metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"CreateMeasure failed, name: %s\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  int ret = metric_->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"IndexMeasure init failed, ret=%d\", ret);\n    return ret;\n  }\n\n  entity_.set_ef_construction(ef_construction_);\n  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);\n  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);\n  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);\n  entity_.set_scaling_factor(scaling_factor_);\n  entity_.set_memory_quota(memory_quota);\n  entity_.set_prune_cnt(prune_cnt);\n\n  entity_.set_sparse_meta_size(HnswSparseEntity::kSparseMetaSize);\n  entity_.set_sparse_unit_size(meta.unit_size());\n\n  ret = entity_.init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  alg_ = HnswSparseAlgorithm::UPointer(new HnswSparseAlgorithm(entity_));\n\n  ret = alg_->init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  state_ = BUILD_STATE_INITED;\n  LOG_INFO(\n      \"End HnswSparseBuilder::init, params: efConstruction=%u \"\n      \"l0NeighborCnt=%u upperNeighborCnt=%u scalingFactor=%u \"\n      \"memoryQuota=%zu neighborPruneCnt=%zu measureName=%s \",\n      ef_construction_, l0_max_neighbor_cnt_, upper_max_neighbor_cnt_,\n      scaling_factor_, memory_quota, prune_cnt, meta_.metric_name().c_str());\n\n  return 0;\n}\n\nint HnswSparseBuilder::cleanup(void) {\n  LOG_INFO(\"Begin HnswSparseBuilder::cleanup\");\n\n  l0_max_neighbor_cnt_ = HnswSparseEntity::kDefaultL0MaxNeighborCnt;\n  min_neighbor_cnt_ = 0;\n  upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;\n  ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;\n  scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;\n  check_interval_secs_ = kDefaultLogIntervalSecs;\n  errcode_ = 0;\n  error_ = false;\n  entity_.cleanup();\n  alg_->cleanup();\n  meta_.clear();\n  metric_.reset();\n  stats_.clear_attributes();\n  stats_.set_trained_count(0UL);\n  stats_.set_built_count(0UL);\n  stats_.set_dumped_count(0UL);\n  stats_.set_discarded_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  stats_.set_built_costtime(0UL);\n  stats_.set_dumped_costtime(0UL);\n  state_ = BUILD_STATE_INIT;\n\n  LOG_INFO(\"End HnswSparseBuilder::cleanup\");\n\n  return 0;\n}\n\nint HnswSparseBuilder::train(IndexThreads::Pointer,\n                             IndexSparseHolder::Pointer /*holder*/) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before HnswSparseBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  stats_.set_trained_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End HnswSparseBuilder::train\");\n\n  return 0;\n}\n\nint HnswSparseBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {\n  if (state_ != BUILD_STATE_INITED) {\n    LOG_ERROR(\"Init the builder before HnswSparseBuilder::train\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin HnswSparseBuilder::train by trainer\");\n\n  stats_.set_trained_count(0UL);\n  stats_.set_trained_costtime(0UL);\n  state_ = BUILD_STATE_TRAINED;\n\n  LOG_INFO(\"End HnswSparseBuilder::train by trainer\");\n\n  return 0;\n}\n\nint HnswSparseBuilder::build(IndexThreads::Pointer threads,\n                             IndexSparseHolder::Pointer holder) {\n  if (!holder) {\n    LOG_ERROR(\"Input holder is nullptr while building index\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while building index\");\n    return IndexError_Mismatch;\n  }\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  LOG_INFO(\"Begin HnswSparseBuilder::build sparse\");\n\n  // holder should be hybrid holder\n  auto sparse_holder = std::dynamic_pointer_cast<IndexSparseHolder>(holder);\n\n  if (sparse_holder == nullptr) {\n    LOG_ERROR(\"HnswSparseBuilder failed to cast holder\");\n    return IndexError_Runtime;\n  }\n\n  if (sparse_holder->count() != static_cast<size_t>(-1)) {\n    LOG_DEBUG(\"HnswSparseBuilder holder documents count %lu\",\n              sparse_holder->count());\n\n    int ret = entity_.reserve_space(sparse_holder->count(),\n                                    sparse_holder->total_sparse_count());\n    if (ret != 0) {\n      LOG_ERROR(\"HnswBuilde reserver space failed\");\n      return ret;\n    }\n  }\n  auto iter = sparse_holder->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Create iterator for holder failed\");\n    return IndexError_Runtime;\n  }\n\n  int ret;\n  error_ = false;\n  while (iter->is_valid()) {\n    level_t level = alg_->get_random_level();\n    node_id_t id;\n\n    ret = entity_.add_vector(level, iter->key(), iter->sparse_count(),\n                             iter->sparse_indices(), iter->sparse_data(), &id);\n\n    if (ailego_unlikely(ret != 0) && ret != IndexError_InvalidValue) {\n      return ret;\n    }\n\n    iter->next();\n  }\n  // Holder is not needed, cleanup it.\n  sparse_holder.reset();\n\n  LOG_INFO(\"Finished save vector, start build graph...\");\n\n  std::atomic<node_id_t> finished{0};\n\n  ret = build_graph(threads, finished);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to build graph\");\n    return ret;\n  }\n\n  stats_.set_built_count(finished.load());\n  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);\n  state_ = BUILD_STATE_BUILT;\n\n  LOG_INFO(\"End HnswSparseBuilder::build\");\n  return 0;\n}\n\nint HnswSparseBuilder::build_graph(IndexThreads::Pointer threads,\n                                   std::atomic<node_id_t> &finished) {\n  auto task_group = threads->make_group();\n  if (!task_group) {\n    LOG_ERROR(\"Failed to create task group\");\n    return IndexError_Runtime;\n  }\n\n  for (size_t i = 0; i < threads->count(); ++i) {\n    task_group->submit(ailego::Closure ::New(this, &HnswSparseBuilder::do_build,\n                                             i, threads->count(), &finished));\n  }\n\n  while (!task_group->is_finished()) {\n    std::unique_lock<std::mutex> lk(mutex_);\n    cond_.wait_until(lk, std::chrono::system_clock::now() +\n                             std::chrono::seconds(check_interval_secs_));\n    if (error_.load(std::memory_order_acquire)) {\n      LOG_ERROR(\"Failed to build index while waiting finish\");\n      return errcode_;\n    }\n    LOG_INFO(\"Built cnt %u, finished percent %.3f%%\", finished.load(),\n             finished.load() * 100.0f / entity_.doc_cnt());\n  }\n  if (error_.load(std::memory_order_acquire)) {\n    LOG_ERROR(\"Failed to build index while waiting finish\");\n    return errcode_;\n  }\n  task_group->wait_finish();\n\n  return 0;\n}\n\nvoid HnswSparseBuilder::do_build(node_id_t idx, size_t step_size,\n                                 std::atomic<node_id_t> *finished) {\n  AILEGO_DEFER([&]() {\n    std::lock_guard<std::mutex> latch(mutex_);\n    cond_.notify_one();\n  });\n\n  HnswSparseContext *ctx = new (std::nothrow) HnswSparseContext(\n      metric_,\n      std::shared_ptr<HnswSparseEntity>(&entity_, [](HnswSparseEntity *) {}));\n  if (ailego_unlikely(ctx == nullptr)) {\n    if (!error_.exchange(true)) {\n      LOG_ERROR(\"Failed to create context\");\n      errcode_ = IndexError_NoMemory;\n    }\n    return;\n  }\n  HnswSparseContext::Pointer auto_ptr(ctx);\n  ctx->set_max_scan_num(entity_.doc_cnt());\n  int ret = ctx->init(HnswSparseContext::kSparseBuilderContext);\n  if (ret != 0) {\n    if (!error_.exchange(true)) {\n      LOG_ERROR(\"Failed to init context\");\n      errcode_ = IndexError_Runtime;\n    }\n    return;\n  }\n\n  IndexQueryMeta qmeta(meta_.data_type());\n  for (node_id_t id = idx; id < entity_.doc_cnt(); id += step_size) {\n    const void *vec = entity_.get_vector_meta(id);\n\n    auto sparse_data = entity_.get_sparse_data_from_vector(vec);\n\n    ctx->reset_query(sparse_data.first);\n\n    ret = alg_->add_node(id, entity_.get_level(id), ctx);\n    if (ailego_unlikely(ret != 0)) {\n      if (!error_.exchange(true)) {\n        LOG_ERROR(\"Hnsw graph add node failed\");\n        errcode_ = ret;\n      }\n      return;\n    }\n    ctx->clear();\n    (*finished)++;\n  }\n}\n\nint HnswSparseBuilder::dump(const IndexDumper::Pointer &dumper) {\n  if (state_ != BUILD_STATE_BUILT) {\n    LOG_INFO(\"Build the index before HnswSparseBuilder::dump\");\n    return IndexError_NoReady;\n  }\n\n  LOG_INFO(\"Begin HnswSparseBuilder::dump\");\n\n  meta_.set_searcher(\"HnswSparseSearcher\", HnswSparseEntity::kRevision,\n                     ailego::Params());\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n\n  ret = entity_.dump(dumper);\n  if (ret != 0) {\n    LOG_ERROR(\"HnswSparseBuilder dump index failed\");\n    return ret;\n  }\n\n  stats_.set_dumped_count(entity_.doc_cnt());\n  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);\n\n  LOG_INFO(\"EndHnswSparseBuilder::dump\");\n  return 0;\n}\n\nint HnswSparseBuilder::build(IndexThreads::Pointer threads, size_t count,\n                             const uint64_t *keys,\n                             const uint64_t *sparse_indptr,\n                             const uint32_t *sparse_indices,\n                             const void *sparse_data) {\n  IndexQueryMeta qmeta(meta_.data_type());\n\n  return build(threads, qmeta, count, keys, sparse_indptr, sparse_indices,\n               sparse_data);\n}\n\nint HnswSparseBuilder::build(IndexThreads::Pointer threads,\n                             const IndexQueryMeta &qmeta, size_t count,\n                             const uint64_t *keys,\n                             const uint64_t *sparse_indptr,\n                             const uint32_t *sparse_indices,\n                             const void *sparse_data) {\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  LOG_INFO(\"Begin HnswSparseBuilder::build sparse, documents count %lu\", count);\n\n  size_t total_sparse_count = sparse_indptr[count];\n\n  int ret = entity_.reserve_space(count, total_sparse_count);\n  if (ret != 0) {\n    LOG_ERROR(\"HnswBuilde reserver space failed\");\n    return ret;\n  }\n\n  if (qmeta.data_type() == meta_.data_type()) {\n    for (size_t i = 0; i < count; i++) {\n      level_t level = alg_->get_random_level();\n      node_id_t id;\n\n      uint32_t sparse_count = sparse_indptr[i + 1] - sparse_indptr[i];\n      const uint32_t *sparse_indices_temp = sparse_indices + sparse_indptr[i];\n\n      const void *sparse_data_temp = static_cast<const char *>(sparse_data) +\n                                     sparse_indptr[i] * qmeta.unit_size();\n\n      ret = entity_.add_vector(level, keys[i], sparse_count,\n                               sparse_indices_temp, sparse_data_temp, &id);\n      if (ailego_unlikely(ret != 0) && ret != IndexError_InvalidValue) {\n        return ret;\n      }\n    }\n  } else if (meta_.data_type() == IndexMeta::DataType::DT_FP16 &&\n             qmeta.data_type() == IndexMeta::DataType::DT_FP32) {\n    // transform from float 32 to float 16\n    auto reformer = IndexFactory::CreateReformer(\"HalfFloatSparseReformer\");\n    if (!reformer) {\n      LOG_ERROR(\"Sparse reformer not existed.\");\n\n      return IndexError_NoExist;\n    }\n\n    meta_.set_converter(\"HalfFloatSparseConverter\", 0, ailego::Params());\n    meta_.set_reformer(\"HalfFloatSparseReformer\", 0, ailego::Params());\n\n    for (size_t i = 0; i < count; i++) {\n      level_t level = alg_->get_random_level();\n      node_id_t id;\n\n      uint32_t sparse_count = sparse_indptr[i + 1] - sparse_indptr[i];\n      const uint32_t *sparse_indices_temp = sparse_indices + sparse_indptr[i];\n\n      const void *sparse_data_temp = static_cast<const char *>(sparse_data) +\n                                     sparse_indptr[i] * qmeta.unit_size();\n\n      std::string query_fp16;\n      IndexQueryMeta ometa;\n\n      reformer->transform(sparse_count, sparse_indices_temp, sparse_data_temp,\n                          qmeta, &query_fp16, &ometa);\n\n      ret = entity_.add_vector(level, keys[i], sparse_count,\n                               sparse_indices_temp, query_fp16.data(), &id);\n      if (ailego_unlikely(ret != 0) && ret != IndexError_InvalidValue) {\n        return ret;\n      }\n    }\n  } else {\n    LOG_ERROR(\"Format not supported.\");\n\n    return IndexError_Unsupported;\n  }\n\n  LOG_INFO(\"Finished save vector, start build graph...\");\n\n  std::atomic<node_id_t> finished{0};\n\n  ret = build_graph(threads, finished);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to build graph\");\n    return ret;\n  }\n\n  stats_.set_built_count(finished.load());\n  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);\n  state_ = BUILD_STATE_BUILT;\n\n  LOG_INFO(\"End HnswSparseBuilder::build\");\n  return 0;\n}\n\nINDEX_FACTORY_REGISTER_BUILDER(HnswSparseBuilder);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_builder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/core/framework/index_builder.h>\n#include \"hnsw_sparse_algorithm.h\"\n#include \"hnsw_sparse_builder_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseBuilder : public IndexBuilder {\n public:\n  //! Constructor\n  HnswSparseBuilder();\n\n  //! Initialize the builder\n  int init(const IndexMeta &meta, const ailego::Params &params) override;\n\n  //! Cleanup the builder\n  int cleanup(void) override;\n\n  //! Train the data\n  int train(IndexThreads::Pointer, IndexSparseHolder::Pointer holder) override;\n\n  //! Train the data\n  int train(const IndexTrainer::Pointer &trainer) override;\n\n  //! Build the index\n  int build(IndexThreads::Pointer threads,\n            IndexSparseHolder::Pointer holder) override;\n\n  //! Build the index with indptr format\n  int build(IndexThreads::Pointer threads, const IndexQueryMeta &qmeta,\n            size_t count, const uint64_t *keys, const uint64_t *sparse_indptr,\n            const uint32_t *sparse_indices, const void *sparse_data) override;\n\n  //! Build the index with indptr format\n  int build(IndexThreads::Pointer threads, size_t count, const uint64_t *keys,\n            const uint64_t *sparse_indptr, const uint32_t *sparse_indices,\n            const void *sparse_data) override;\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n private:\n  int build_graph(IndexThreads::Pointer threads,\n                  std::atomic<node_id_t> &finished);\n  void do_build(node_id_t idx, size_t step_size,\n                std::atomic<node_id_t> *finished);\n\n  constexpr static uint32_t kDefaultLogIntervalSecs = 15U;\n  constexpr static uint32_t kMaxNeighborCnt = 65535;\n\n private:\n  enum BUILD_STATE {\n    BUILD_STATE_INIT = 0,\n    BUILD_STATE_INITED = 1,\n    BUILD_STATE_TRAINED = 2,\n    BUILD_STATE_BUILT = 3\n  };\n\n  HnswSparseBuilderEntity entity_{};\n  HnswSparseAlgorithm::UPointer alg_;  // impl graph algorithm\n  uint32_t thread_cnt_{0};\n  uint32_t l0_max_neighbor_cnt_{HnswSparseEntity::kDefaultL0MaxNeighborCnt};\n  uint32_t min_neighbor_cnt_{0};\n  uint32_t upper_max_neighbor_cnt_{\n      HnswSparseEntity::kDefaultUpperMaxNeighborCnt};\n  uint32_t ef_construction_{HnswSparseEntity::kDefaultEfConstruction};\n  uint32_t scaling_factor_{HnswSparseEntity::kDefaultScalingFactor};\n  uint32_t check_interval_secs_{kDefaultLogIntervalSecs};\n\n  int errcode_{0};\n  std::atomic_bool error_{false};\n  IndexMeta meta_{};\n  IndexMetric::Pointer metric_{};\n  std::mutex mutex_{};\n  std::condition_variable cond_{};\n  Stats stats_{};\n\n  BUILD_STATE state_{BUILD_STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_builder_entity.h\"\n#include <zvec/ailego/hash/crc32c.h>\n#include \"utility/sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseBuilderEntity::HnswSparseBuilderEntity() {\n  update_ep_and_level(kInvalidNodeId, 0U);\n}\n\nint HnswSparseBuilderEntity::cleanup() {\n  memory_quota_ = 0UL;\n  neighbors_size_ = 0U;\n  upper_neighbors_size_ = 0U;\n  padding_size_ = 0U;\n  vectors_buffer_.clear();\n  keys_buffer_.clear();\n  neighbors_buffer_.clear();\n  upper_neighbors_buffer_.clear();\n  neighbors_index_.clear();\n\n  vectors_buffer_.shrink_to_fit();\n  keys_buffer_.shrink_to_fit();\n  neighbors_buffer_.shrink_to_fit();\n  upper_neighbors_buffer_.shrink_to_fit();\n  neighbors_index_.shrink_to_fit();\n\n  this->HnswSparseEntity::cleanup();\n\n  return 0;\n}\n\nint HnswSparseBuilderEntity::init() {\n  size_t size = vector_size();\n\n  size += sparse_meta_size();\n\n  //! aligned size to 32\n  set_node_size(AlignSize(size));\n  //! if node size is aligned to 1k, the build performance will downgrade\n  if (node_size() % 1024 == 0) {\n    set_node_size(AlignSize(node_size() + 1));\n  }\n\n  padding_size_ = node_size() - size;\n\n  neighbors_size_ = neighbors_size();\n  upper_neighbors_size_ = upper_neighbors_size();\n\n  return 0;\n}\n\nint HnswSparseBuilderEntity::reserve_space(size_t docs,\n                                           size_t total_sparse_count) {\n  if (memory_quota_ > 0 && (node_size() * docs + neighbors_size_ * docs +\n                                sizeof(SparseNeighborIndex) * docs >\n                            memory_quota_)) {\n    return IndexError_NoMemory;\n  }\n\n  vectors_buffer_.reserve(node_size() * docs);\n  keys_buffer_.reserve(sizeof(key_t) * docs);\n  neighbors_buffer_.reserve(neighbors_size_ * docs);\n  neighbors_index_.reserve(docs);\n\n  sparse_data_buffer_.reserve(sizeof(uint32_t) * docs +\n                              (sizeof(uint32_t)) * total_sparse_count +\n                              sparse_unit_size() * total_sparse_count);\n\n  return 0;\n}\n\nint HnswSparseBuilderEntity::add_vector(level_t level, key_t key,\n                                        const uint32_t sparse_count,\n                                        const uint32_t *sparse_indices,\n                                        const void *sparse_vec, node_id_t *id) {\n  if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) {\n    LOG_WARN(\n        \"Failed to add sparse vector: number of non-zero elements (%u) exceeds \"\n        \"maximum allowed (%u), key=%zu\",\n        sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)key);\n    return IndexError_InvalidValue;\n  }\n\n  std::string sparse_buffer;\n  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_vec,\n                                   sparse_unit_size(), sparse_buffer);\n\n  uint32_t sparse_len = sparse_buffer.size();\n\n  if (memory_quota_ > 0 &&\n      (vectors_buffer_.capacity() + keys_buffer_.capacity() +\n           neighbors_buffer_.capacity() + upper_neighbors_buffer_.capacity() +\n           neighbors_index_.capacity() * sizeof(SparseNeighborIndex) +\n           sparse_len >\n       memory_quota_)) {\n    LOG_ERROR(\"Add vector failed, used memory exceed quota, cur_doc=%u\",\n              doc_cnt());\n    return IndexError_NoMemory;\n  }\n\n  vectors_buffer_.append(reinterpret_cast<const char *>(&sparse_data_offset_),\n                         sizeof(uint64_t));\n  vectors_buffer_.append(reinterpret_cast<const char *>(&sparse_len),\n                         sizeof(uint32_t));\n  vectors_buffer_.append(sizeof(uint32_t),\n                         '\\0');  // reserve to make it up to meta size\n  vectors_buffer_.append(padding_size_, '\\0');\n\n  keys_buffer_.append(reinterpret_cast<const char *>(&key), sizeof(key));\n\n  sparse_data_buffer_.append(sparse_buffer.data(), sparse_len);\n  sparse_data_offset_ += sparse_len;\n\n  // init level 0 neighbors\n  neighbors_buffer_.append(neighbors_size_, '\\0');\n\n  neighbors_index_.emplace_back(upper_neighbors_buffer_.size(), level);\n\n  // init upper layer neighbors\n  for (level_t cur_level = 1; cur_level <= level; ++cur_level) {\n    upper_neighbors_buffer_.append(upper_neighbors_size_, '\\0');\n  }\n\n  *id = (*mutable_doc_cnt())++;\n\n  return 0;\n}\n\nkey_t HnswSparseBuilderEntity::get_key(node_id_t id) const {\n  return *(reinterpret_cast<const key_t *>(keys_buffer_.data() +\n                                           id * sizeof(key_t)));\n}\n\nconst void *HnswSparseBuilderEntity::get_vector_meta(node_id_t id) const {\n  return vectors_buffer_.data() + id * node_size();\n}\n\nint HnswSparseBuilderEntity::get_vector_meta(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_vector_meta(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\nint HnswSparseBuilderEntity::get_vector_metas(const node_id_t *ids,\n                                              uint32_t count,\n                                              const void **vecs) const {\n  for (uint32_t i = 0; i < count; ++i) {\n    vecs[i] = vectors_buffer_.data() + ids[i] * node_size();\n  }\n\n  return 0;\n}\n\nint HnswSparseBuilderEntity::get_vector_metas(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &block_vecs) const {\n  const void *vecs[count];\n  get_vector_metas(ids, count, vecs);\n  for (uint32_t i = 0; i < count; ++i) {\n    block_vecs.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));\n  }\n  return 0;\n}\n\n//! Get vector feature data by key\nconst void *HnswSparseBuilderEntity::get_sparse_data(uint64_t offset,\n                                                     uint32_t /*len*/) const {\n  return reinterpret_cast<const uint8_t *>(sparse_data_buffer_.data()) + offset;\n}\n\nint HnswSparseBuilderEntity::get_sparse_data(\n    uint64_t offset, uint32_t len, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_sparse_data(offset, len);\n  block.reset((void *)vec);\n  return 0;\n}\n\n//! Get sparse data from id\nconst void *HnswSparseBuilderEntity::get_sparse_data(node_id_t id) const {\n  auto sparse_data = get_sparse_data_from_vector(get_vector_meta(id));\n\n  return sparse_data.first;\n}\n\nint HnswSparseBuilderEntity::get_sparse_data(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_sparse_data(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\n//! Get sparse data from vector\nstd::pair<const void *, uint32_t>\nHnswSparseBuilderEntity::get_sparse_data_from_vector(const void *vec) const {\n  uint32_t vec_size = vector_size();\n  const char *vec_ptr = reinterpret_cast<const char *>(vec);\n\n  uint64_t offset = *((uint64_t *)(vec_ptr + vec_size));\n  uint32_t sparse_vector_len =\n      *((uint32_t *)(vec_ptr + vec_size + sizeof(uint64_t)));\n\n  const void *sparse_data = get_sparse_data(offset, sparse_vector_len);\n  if (ailego_unlikely(sparse_data == nullptr)) {\n    LOG_ERROR(\"Get nullptr sparse, offset=%zu, len=%u\", (size_t)offset,\n              sparse_vector_len);\n\n    return std::make_pair(nullptr, 0);\n  }\n\n  return std::make_pair(sparse_data, sparse_vector_len);\n}\n\nint HnswSparseBuilderEntity::get_sparse_data_from_vector(\n    const void *vec, IndexStorage::MemoryBlock &block,\n    int &sparse_length) const {\n  std::pair<const void *, uint32_t> sparse_data =\n      get_sparse_data_from_vector(vec);\n  block.reset((void *)sparse_data.first);\n  sparse_length = sparse_data.second;\n  return 0;\n}\n\nconst Neighbors HnswSparseBuilderEntity::get_neighbors(level_t level,\n                                                       node_id_t id) const {\n  const NeighborsHeader *hd = get_neighbor_header(level, id);\n  return {hd->neighbor_cnt, hd->neighbors};\n}\n\nint HnswSparseBuilderEntity::update_neighbors(\n    level_t level, node_id_t id,\n    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {\n  NeighborsHeader *hd =\n      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));\n  for (size_t i = 0; i < neighbors.size(); ++i) {\n    hd->neighbors[i] = neighbors[i].first;\n  }\n  hd->neighbor_cnt = neighbors.size();\n\n  // std::cout << \"id: \" << id << \", neighbour, id: \";\n  // for (size_t i = 0; i < neighbors.size(); ++i) {\n  //   if (i == neighbors.size()-1)\n  //     std::cout << neighbors[i].first << \", score:\" << neighbors[i].second <<\n  //     std::endl;\n  //   else\n  //     std::cout << neighbors[i].first << \", score:\" << neighbors[i].second <<\n  //     \", id: \";\n  // }\n\n  return 0;\n}\n\nvoid HnswSparseBuilderEntity::add_neighbor(level_t level, node_id_t id,\n                                           uint32_t /*size*/,\n                                           node_id_t neighbor_id) {\n  NeighborsHeader *hd =\n      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));\n  hd->neighbors[hd->neighbor_cnt++] = neighbor_id;\n\n  return;\n}\n\nint HnswSparseBuilderEntity::dump(const IndexDumper::Pointer &dumper) {\n  key_t *keys =\n      reinterpret_cast<key_t *>(const_cast<char *>(keys_buffer_.data()));\n  auto ret =\n      dump_segments(dumper, keys, [&](node_id_t id) { return get_level(id); });\n  if (ailego_unlikely(ret < 0)) {\n    return ret;\n  }\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n#include \"hnsw_sparse_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseBuilderEntity : public HnswSparseEntity {\n public:\n  //! Add vector and key to hnsw entity, and local id will be saved in id\n  virtual int add_vector(level_t level, key_t key, const uint32_t sparse_count,\n                         const uint32_t *sparse_indices, const void *sparse_vec,\n                         node_id_t *id) override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector_meta(node_id_t id) const override;\n\n  virtual int get_vector_meta(const node_id_t id,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  //! Batch get vectors feature data by keys\n  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,\n                               const void **vecs) const override;\n  virtual int get_vector_metas(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &block_vecs) const override;\n\n  //! Get the node id's neighbors on graph level\n  const NeighborsHeader *get_neighbor_header(level_t level,\n                                             node_id_t id) const {\n    if (level == 0) {\n      return reinterpret_cast<const NeighborsHeader *>(\n          neighbors_buffer_.data() + neighbors_size_ * id);\n    } else {\n      size_t offset = neighbors_index_[id].offset;\n      return reinterpret_cast<const NeighborsHeader *>(\n          upper_neighbors_buffer_.data() + offset +\n          (level - 1) * upper_neighbors_size_);\n    }\n  }\n\n  //! Get the node id's neighbors on graph level\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  //! Replace node id in level's neighbors\n  virtual int update_neighbors(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;\n\n  //! add a neighbor to id in graph level\n  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,\n                            node_id_t neighbor_id) override;\n\n  //! Get vector sparse feature data by chunk index and offset\n  virtual const void *get_sparse_data(uint64_t offset,\n                                      uint32_t len) const override;\n  //! Get sparse data from id\n  virtual const void *get_sparse_data(node_id_t id) const override;\n\n  virtual int get_sparse_data(uint64_t offset, uint32_t len,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  virtual int get_sparse_data(const node_id_t id,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  //! Get sparse data from vector\n  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(\n      const void *vec) const override;\n\n  virtual int get_sparse_data_from_vector(const void *vec,\n                                          IndexStorage::MemoryBlock &block,\n                                          int &sparse_length) const override;\n\n  //! Dump the hnsw graph to dumper\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Cleanup the entity\n  virtual int cleanup(void) override;\n\n public:\n  //! Constructor\n  HnswSparseBuilderEntity();\n\n  //! Get the node graph level by id\n  level_t get_level(node_id_t id) const {\n    return neighbors_index_[id].level;\n  }\n\n  //! Init builerEntity\n  int init();\n\n  //! reserve buffer space for documents\n  //! @param  docs    number of documents\n  //! @param  total_sparse_count    total dim of sparse count\n  int reserve_space(size_t docs, size_t total_sparse_count);\n\n  //! Set memory quota params\n  inline void set_memory_quota(size_t memory_quota) {\n    memory_quota_ = memory_quota;\n  }\n\n  //! Get neighbors size\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! Get upper neighbors size\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n public:\n  HnswSparseBuilderEntity(const HnswSparseBuilderEntity &) = delete;\n  HnswSparseBuilderEntity &operator=(const HnswSparseBuilderEntity &) = delete;\n\n private:\n  friend class HnswSparseSearcherEntity;\n\n  //! class internal used only\n  struct SparseNeighborIndex {\n    SparseNeighborIndex(size_t off, level_t l) : offset(off), level(l) {}\n    uint64_t offset : 48;\n    uint64_t level : 16;\n  };\n\n  std::string vectors_buffer_{};          // aligned vectors\n  std::string keys_buffer_{};             // aligned vectors\n  std::string neighbors_buffer_{};        // level 0 neighbors buffer\n  std::string upper_neighbors_buffer_{};  // upper layer neighbors buffer\n\n  std::string sparse_data_buffer_{};  // aligned spase data buffer\n  size_t sparse_data_offset_{0};      //\n\n  // upper layer offset + level in upper_neighbors_buffer_\n  std::vector<SparseNeighborIndex> neighbors_index_{};\n  size_t memory_quota_{0UL};\n  size_t neighbors_size_{0U};        // level 0 neighbors size\n  size_t upper_neighbors_size_{0U};  // level 0 neighbors size\n  size_t padding_size_{};            // padding size for each vector element\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_chunk.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_chunk.h\"\n#include <chrono>\n#include <random>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_streamer.h>\n\nnamespace zvec {\nnamespace core {\n\nint SparseChunkBroker::init_storage(size_t chunk_size) {\n  chunk_meta_.clear();\n  chunk_meta_.chunk_size = chunk_size;\n  chunk_meta_.create_time = ailego::Realtime::Seconds();\n  stats_.set_create_time(chunk_meta_.create_time);\n  chunk_meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(chunk_meta_.update_time);\n\n  //! alloc meta chunk\n  size_t size = sizeof(HnswSparseChunkMeta);\n  size = (size + page_mask_) & (~page_mask_);\n  const std::string segment_id =\n      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  int ret = stg_->append(segment_id, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Storage append segment failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  chunk_meta_segment_ = get_chunk(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  if (ailego_unlikely(!chunk_meta_segment_)) {\n    LOG_ERROR(\"Get meta segment failed\");\n    return IndexError_Runtime;\n  }\n\n  //! update meta info and write to storage\n  chunk_meta_.chunk_cnts[CHUNK_TYPE_META] += 1;\n  chunk_meta_.total_size += size;\n  (*stats_.mutable_index_size()) += size;\n  size = chunk_meta_segment_->write(0UL, &chunk_meta_,\n                                    sizeof(HnswSparseChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswSparseChunkMeta))) {\n    LOG_ERROR(\"Storage write data failed, wsize=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint SparseChunkBroker::load_storage(size_t chunk_size) {\n  IndexStorage::MemoryBlock data_block;\n  size_t size = chunk_meta_segment_->read(0UL, data_block,\n                                          chunk_meta_segment_->data_size());\n  if (size != sizeof(HnswSparseChunkMeta)) {\n    LOG_ERROR(\"Invalid hnsw meta chunk, read size=%zu chunk size=%zu\", size,\n              chunk_meta_segment_->data_size());\n    return IndexError_InvalidFormat;\n  }\n  std::memcpy(&chunk_meta_, data_block.data(), size);\n  if (chunk_meta_.chunk_size != chunk_size) {\n    LOG_ERROR(\n        \"Params hnsw chunk size=%zu mismatch from previous %zu \"\n        \"in index\",\n        chunk_size, (size_t)chunk_meta_.chunk_size);\n    return IndexError_Mismatch;\n  }\n\n  *stats_.mutable_check_point() = stg_->check_point();\n  stats_.set_revision_id(chunk_meta_.revision_id);\n  stats_.set_update_time(chunk_meta_.update_time);\n  stats_.set_create_time(chunk_meta_.create_time);\n\n  char create_time[32];\n  char update_time[32];\n  ailego::Realtime::Gmtime(chunk_meta_.create_time, \"%Y-%m-%d %H:%M:%S\",\n                           create_time, sizeof(create_time));\n  ailego::Realtime::Gmtime(chunk_meta_.update_time, \"%Y-%m-%d %H:%M:%S\",\n                           update_time, sizeof(update_time));\n  LOG_DEBUG(\n      \"Load index, indexSize=%zu chunkSize=%zu nodeChunks=%zu \"\n      \"upperNeighborChunks=%zu revisionId=%zu \"\n      \"createTime=%s updateTime=%s\",\n      (size_t)chunk_meta_.total_size, (size_t)chunk_meta_.chunk_size,\n      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_NODE],\n      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_UPPER_NEIGHBOR],\n      (size_t)chunk_meta_.revision_id, create_time, update_time);\n\n  return 0;\n}\n\nint SparseChunkBroker::open(IndexStorage::Pointer stg, size_t max_index_size,\n                            size_t chunk_size, bool check_crc) {\n  if (ailego_unlikely(stg_)) {\n    LOG_ERROR(\"An storage instance is already opened\");\n    return IndexError_Duplicate;\n  }\n  stg_ = std::move(stg);\n  check_crc_ = check_crc;\n  max_chunks_size_ = max_index_size;\n  dirty_ = false;\n\n  const std::string segment_id =\n      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);\n  chunk_meta_segment_ = stg_->get(segment_id);\n  if (!chunk_meta_segment_) {\n    LOG_DEBUG(\"Create new index\");\n    return init_storage(chunk_size);\n  }\n\n  return load_storage(chunk_size);\n}\n\nint SparseChunkBroker::close(void) {\n  flush(0UL);\n\n  stg_.reset();\n  check_crc_ = false;\n  dirty_ = false;\n\n  return 0;\n}\n\nint SparseChunkBroker::flush(uint64_t checkpoint) {\n  ailego_assert_with(chunk_meta_segment_, \"invalid meta segment\");\n\n  chunk_meta_.update_time = ailego::Realtime::Seconds();\n  stats_.set_update_time(chunk_meta_.update_time);\n\n  size_t size = chunk_meta_segment_->write(0UL, &chunk_meta_,\n                                           sizeof(HnswSparseChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswSparseChunkMeta))) {\n    LOG_ERROR(\"Storage write data failed, wsize=%zu\", size);\n  }\n\n  stg_->refresh(checkpoint);\n  int ret = stg_->flush();\n  if (ret == 0) {\n    (*stats_.mutable_check_point()) = checkpoint;\n  } else {\n    LOG_ERROR(\"Storage flush failed for %s\", IndexError::What(ret));\n  }\n  return ret;\n}\n\nstd::pair<int, SparseChunk::Pointer> SparseChunkBroker::alloc_chunk(\n    int type, uint64_t seq_id, size_t size) {\n  ailego_assert_with(type < CHUNK_TYPE_MAX, \"chunk type overflow\");\n\n  SparseChunk::Pointer chunk;\n  if (ailego_unlikely(!stg_)) {\n    LOG_ERROR(\"Init storage first\");\n    return std::make_pair(IndexError_Uninitialized, chunk);\n  }\n\n  //! check exist a empty chunk with the same name\n  chunk = get_chunk(type, seq_id);\n  if (chunk) {\n    if (ailego_unlikely(chunk->capacity() == size &&\n                        chunk->data_size() == 0UL)) {\n      LOG_ERROR(\"Exist invalid chunk size %zu, expect size %zu\",\n                chunk->capacity(), size);\n      chunk.reset();\n      return std::make_pair(IndexError_Runtime, chunk);\n    }\n    return std::make_pair(0, chunk);\n  }\n  //! align to page size\n  size = (size + page_mask_) & (~page_mask_);\n  if (ailego_unlikely(chunk_meta_.total_size + size >= max_chunks_size_)) {\n    LOG_ERROR(\"No space to new a chunk, curIndexSize=%zu allocSize=%zu\",\n              (size_t)chunk_meta_.total_size, size);\n    return std::make_pair(IndexError_IndexFull, chunk);\n  }\n\n  std::string segment_id = make_segment_id(type, seq_id);\n  int ret = stg_->append(segment_id, size);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Storage append segment failed for %s\", IndexError::What(ret));\n    return std::make_pair(ret, chunk);\n  }\n  chunk_meta_.chunk_cnts[type] += 1;\n  chunk_meta_.total_size += size;\n  (*stats_.mutable_index_size()) += size;\n\n  size = chunk_meta_segment_->write(0UL, &chunk_meta_,\n                                    sizeof(HnswSparseChunkMeta));\n  if (ailego_unlikely(size != sizeof(HnswSparseChunkMeta))) {\n    LOG_ERROR(\"Storage append segment failed, wsize=%zu\", size);\n  }\n\n  chunk = get_chunk(type, seq_id);\n  return std::make_pair(chunk ? 0 : IndexError_NoMemory, chunk);\n}\n\nSparseChunk::Pointer SparseChunkBroker::get_chunk(int type,\n                                                  uint64_t seq_id) const {\n  std::string segment_id = make_segment_id(type, seq_id);\n  return stg_->get(segment_id);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_chunk.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <string.h>\n#include <unistd.h>\n#include <atomic>\n#include <cstddef>\n#include <mutex>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/framework/index_streamer.h>\n\nnamespace zvec {\nnamespace core {\n\nusing SparseChunk = IndexStorage::Segment;\n\nclass SparseChunkBroker {\n public:\n  typedef std::shared_ptr<SparseChunkBroker> Pointer;\n\n  enum CHUNK_TYPE {\n    CHUNK_TYPE_HEADER = 1,\n    CHUNK_TYPE_META = 2,\n    CHUNK_TYPE_NODE = 3,\n    CHUNK_TYPE_UPPER_NEIGHBOR = 4,\n    CHUNK_TYPE_NEIGHBOR_INDEX = 5,\n    CHUNK_TYPE_SPARSE_NODE = 6,\n    CHUNK_TYPE_MAX = 8\n  };\n  static constexpr size_t kDefaultChunkSeqId = 0UL;\n\n  SparseChunkBroker(IndexStreamer::Stats &stats) : stats_(stats) {\n    page_mask_ = ailego::MemoryHelper::PageSize() - 1;\n  }\n\n  //! Open storage\n  int open(IndexStorage::Pointer stg, size_t max_index_size, size_t chunk_size,\n           bool check_crc);\n\n  int close(void);\n\n  int flush(uint64_t checkpoint);\n\n  //! alloc a new chunk with size, not thread-safe\n  std::pair<int, SparseChunk::Pointer> alloc_chunk(int type, uint64_t seq_id,\n                                                   size_t size);\n\n  //! alloc a new chunk with chunk size\n  inline std::pair<int, SparseChunk::Pointer> alloc_chunk(int type,\n                                                          uint64_t seq_id) {\n    return alloc_chunk(type, seq_id, chunk_meta_.chunk_size);\n  }\n\n  SparseChunk::Pointer get_chunk(int type, uint64_t seq_id) const;\n\n  inline size_t get_chunk_cnt(int type) const {\n    ailego_assert_with(type < CHUNK_TYPE_MAX, \"chunk type overflow\");\n    return chunk_meta_.chunk_cnts[type];\n  }\n\n  inline bool dirty(void) const {\n    return dirty_;\n  }\n\n  inline void mark_dirty(void) {\n    if (!dirty_) {\n      dirty_ = true;\n      chunk_meta_.revision_id += 1;\n      stats_.set_revision_id(chunk_meta_.revision_id);\n    }\n  }\n\n  const IndexStorage::Pointer storage(void) const {\n    return stg_;\n  }\n\n private:\n  SparseChunkBroker(const SparseChunkBroker &) = delete;\n  SparseChunkBroker &operator=(const SparseChunkBroker &) = delete;\n\n  struct HnswSparseChunkMeta {\n    HnswSparseChunkMeta(void) {\n      memset(this, 0, sizeof(HnswSparseChunkMeta));\n    }\n    void clear() {\n      memset(this, 0, sizeof(HnswSparseChunkMeta));\n    }\n\n    uint64_t chunk_cnts[CHUNK_TYPE_MAX];\n    uint64_t chunk_size;   // size of per chunk\n    uint64_t total_size;   // total size of allocated chunk\n    uint64_t revision_id;  // index revision\n    uint64_t create_time;\n    uint64_t update_time;\n    uint64_t reserved[3];\n  };\n\n  static_assert(sizeof(HnswSparseChunkMeta) % 32 == 0,\n                \"HnswSparseChunkMeta must be aligned with 32 bytes\");\n\n  //! Init the storage after open an empty index\n  int init_storage(size_t chunk_size);\n\n  //! Load index from storage\n  int load_storage(size_t chunk_size);\n\n  static inline const std::string make_segment_id(int type, uint64_t seq_id) {\n    return \"HnswT\" + ailego::StringHelper::ToString(type) + \"S\" +\n           ailego::StringHelper::ToString(seq_id);\n  }\n\n private:\n  IndexStreamer::Stats &stats_;\n  HnswSparseChunkMeta chunk_meta_{};\n  size_t page_mask_{0UL};\n  size_t max_chunks_size_{0UL};\n  IndexStorage::Pointer stg_{};\n  IndexStorage::Segment::Pointer chunk_meta_segment_{};\n  bool check_crc_{false};\n  bool dirty_{false};  // set as true if index is modified , the flag\n                       // will not be cleared even if flushed\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_context.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_context.h\"\n#include <chrono>\n#include \"hnsw_sparse_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseContext::HnswSparseContext(const IndexMetric::Pointer &metric,\n                                     const HnswSparseEntity::Pointer &entity)\n    : IndexContext(metric), entity_(entity), dc_(entity_.get(), metric) {}\n\nHnswSparseContext::~HnswSparseContext() {\n  visit_filter_.destroy();\n}\n\nint HnswSparseContext::init(ContextType type) {\n  int ret;\n  uint32_t doc_cnt;\n\n  type_ = type;\n\n  switch (type) {\n    case kSparseBuilderContext:\n      ret = visit_filter_.init(VisitFilter::ByteMap, entity_->doc_cnt(),\n                               max_scan_num_, negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n      candidates_.limit(max_scan_num_);\n      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);\n      break;\n\n    case kSparseSearcherContext:\n      ret = visit_filter_.init(filter_mode_, entity_->doc_cnt(), max_scan_num_,\n                               negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n      candidates_.limit(max_scan_num_);\n      break;\n\n    case kSparseStreamerContext:\n      // maxScanNum is unknown if inited from streamer, so the docCnt may\n      // change. we need to compute maxScanNum by scan ratio, and preserve\n      // max_doc_cnt space from visit filter\n      doc_cnt = entity_->doc_cnt();\n      max_scan_num_ = compute_max_scan_num(doc_cnt);\n      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);\n      ret = visit_filter_.init(filter_mode_, reserve_max_doc_cnt_,\n                               max_scan_num_, negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n\n      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);\n      candidates_.limit(max_scan_num_);\n\n      check_need_adjuct_ctx();\n      break;\n\n    default:\n      LOG_ERROR(\"Init context failed\");\n      return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nint HnswSparseContext::update(const ailego::Params &params) {\n  LOG_DEBUG(\"Update hnsw context params\");\n\n  auto update_visit_filter_param = [&]() {\n    bool need_update = false;\n    std::string p;\n    switch (type_) {\n      case kSparseSearcherContext:\n        p = PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_ENABLE;\n        break;\n      case kSparseStreamerContext:\n        p = PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE;\n        break;\n    }\n\n    if (params.has(p)) {\n      bool bf_enabled;\n      params.get(p, &bf_enabled);\n      if (bf_enabled ^ (filter_mode_ == VisitFilter::BloomFilter)) {\n        need_update = true;\n        filter_mode_ =\n            bf_enabled ? VisitFilter::BloomFilter : VisitFilter::ByteMap;\n      }\n    }\n\n    float prob = negative_probability_;\n    p.clear();\n    switch (type_) {\n      case kSparseSearcherContext:\n        p = PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB;\n        break;\n      case kSparseStreamerContext:\n        p = PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB;\n        break;\n    }\n    params.get(p, &prob);\n    if (filter_mode_ == VisitFilter::BloomFilter &&\n        std::abs(prob - negative_probability_) > 1e-6) {\n      need_update = true;\n    }\n    if (need_update) {\n      visit_filter_.destroy();\n      int max_doc_cnt = 0;\n      if (type_ == kSparseSearcherContext) {\n        max_doc_cnt = entity_->doc_cnt();\n      } else {\n        max_doc_cnt = reserve_max_doc_cnt_;\n      }\n      int ret = visit_filter_.init(filter_mode_, max_doc_cnt, max_scan_num_,\n                                   negative_probability_);\n      if (ret != 0) {\n        LOG_ERROR(\"Create filter failed,  mode %d\", filter_mode_);\n        return ret;\n      }\n    }\n    return 0;\n  };\n\n  switch (type_) {\n    case kSparseSearcherContext:\n      if (params.has(PARAM_HNSW_SPARSE_SEARCHER_EF)) {\n        params.get(PARAM_HNSW_SPARSE_SEARCHER_EF, &ef_);\n        topk_heap_.limit(std::max(topk_, ef_));\n      }\n\n      if (params.has(PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO)) {\n        params.get(PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);\n        max_scan_num_ =\n            static_cast<uint32_t>(max_scan_ratio_ * entity_->doc_cnt());\n        max_scan_num_ = std::max(10000U, max_scan_num_);\n      }\n\n      if (params.has(PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD)) {\n        params.get(PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD,\n                   &bruteforce_threshold_);\n      }\n\n      return update_visit_filter_param();\n\n    case kSparseStreamerContext:\n      if (params.has(PARAM_HNSW_SPARSE_STREAMER_EF)) {\n        params.get(PARAM_HNSW_SPARSE_STREAMER_EF, &ef_);\n        topk_heap_.limit(std::max(topk_, ef_));\n      }\n      params.get(PARAM_HNSW_SPARSE_STREAMER_EF, &ef_);\n      params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);\n      params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);\n      params.get(PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);\n      if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {\n        LOG_ERROR(\"[%s] must be in range (0.0f,1.0f]\",\n                  PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO.c_str());\n        return IndexError_InvalidArgument;\n      }\n      if (max_scan_limit_ < min_scan_limit_) {\n        LOG_ERROR(\"[%s] must be >= [%s]\",\n                  PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT.c_str(),\n                  PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT.c_str());\n        return IndexError_InvalidArgument;\n      }\n\n      if (params.has(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD)) {\n        params.get(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD,\n                   &bruteforce_threshold_);\n      }\n\n      return update_visit_filter_param();\n\n    default:\n      LOG_ERROR(\"update context failed, type=%u\", type_);\n      return IndexError_Runtime;\n  }\n}\n\n\nint HnswSparseContext::update_context(ContextType type,\n                                      const IndexMeta & /*meta*/,\n                                      const IndexMetric::Pointer &metric,\n                                      const HnswSparseEntity::Pointer &entity,\n                                      uint32_t magic_num) {\n  uint32_t doc_cnt;\n\n  if (ailego_unlikely(type != type_)) {\n    LOG_ERROR(\n        \"HnswSparseContext doesn't support shared by different type, \"\n        \"src=%u dst=%u\",\n        type_, type);\n    return IndexError_Unsupported;\n  }\n\n  magic_ = kInvalidMgic;\n\n  // TODO: support change filter mode?\n  switch (type) {\n    case kSparseBuilderContext:\n      LOG_ERROR(\"BuildContext doesn't support update\");\n      return IndexError_NotImplemented;\n\n    case kSparseSearcherContext:\n      if (!visit_filter_.reset(entity->doc_cnt(), max_scan_num_)) {\n        LOG_ERROR(\"Reset filter failed, mode %d\", visit_filter_.get_mode());\n        return IndexError_Runtime;\n      }\n\n      candidates_.limit(max_scan_num_);\n      topk_heap_.limit(std::max(topk_, ef_));\n      break;\n\n    case kSparseStreamerContext:\n      doc_cnt = entity->doc_cnt();\n      max_scan_num_ = compute_max_scan_num(doc_cnt);\n      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);\n      if (!visit_filter_.reset(reserve_max_doc_cnt_, max_scan_num_)) {\n        LOG_ERROR(\"Reset filter failed, mode %d\", visit_filter_.get_mode());\n        return IndexError_Runtime;\n      }\n\n      update_heap_.limit(entity->l0_neighbor_cnt() + 1);\n      candidates_.limit(max_scan_num_);\n      topk_heap_.limit(std::max(topk_, ef_));\n      break;\n\n    default:\n      LOG_ERROR(\"update context failed\");\n      return IndexError_Runtime;\n  }\n\n  entity_ = entity;\n  dc_.update(entity_.get(), metric);\n  magic_ = magic_num;\n  level_topks_.clear();\n\n  return 0;\n}\n\nvoid HnswSparseContext::fill_random_to_topk_full(void) {\n  static std::mt19937 mt(\n      std::chrono::system_clock::now().time_since_epoch().count());\n  std::uniform_int_distribution<node_id_t> dt(0, entity_->doc_cnt() - 1);\n  std::function<node_id_t()> gen;\n  node_id_t seqid;\n  std::function<bool(node_id_t)> myfilter = [](node_id_t) { return false; };\n  if (this->filter().is_valid()) {\n    myfilter = [&](node_id_t id) {\n      return this->filter()(entity_->get_key(id));\n    };\n  }\n\n  if (topk_heap_.limit() < entity_->doc_cnt() / 2) {\n    gen = [&](void) { return dt(mt); };\n  } else {\n    // If topk limit is big value, gen sequential id from an random initial\n    seqid = dt(mt);\n    gen = [&](void) {\n      seqid = seqid == (entity_->doc_cnt() - 1) ? 0 : (seqid + 1);\n      return seqid;\n    };\n  }\n\n  for (size_t i = 0; !topk_heap_.full() && i < entity_->doc_cnt(); ++i) {\n    const auto id = gen();\n    if (!visit_filter_.visited(id) && !myfilter(id)) {\n      visit_filter_.set_visited(id);\n      topk_heap_.emplace(id, dc_.dist(id));\n    }\n  }\n  return;\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_context.h>\n#include \"utility/sparse_utility.h\"\n#include \"utility/visit_filter.h\"\n#include \"hnsw_sparse_dist_calculator.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseContext : public IndexContext {\n public:\n  //! Index Context Pointer\n  typedef std::unique_ptr<HnswSparseContext> Pointer;\n\n  enum ContextType {\n    kUnknownContext = 0,\n    kSparseSearcherContext = 1,\n    kSparseBuilderContext = 2,\n    kSparseStreamerContext = 3,\n  };\n\n  //! Construct\n  HnswSparseContext(const IndexMetric::Pointer &metric,\n                    const HnswSparseEntity::Pointer &entity);\n\n  //! Destructor\n  virtual ~HnswSparseContext();\n\n public:\n  //! Set topk of search result\n  virtual void set_topk(uint32_t val) override {\n    topk_ = val;\n    topk_heap_.limit(std::max(val, ef_));\n  }\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(void) const override {\n    return results_[0];\n  }\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(size_t idx) const override {\n    return results_[idx];\n  }\n\n  //! Retrieve result object for output\n  virtual IndexDocumentList *mutable_result(size_t idx) override {\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    return &results_[idx];\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(void) const override {\n    return group_results_[0];\n  }\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(\n      size_t idx) const override {\n    return group_results_[idx];\n  }\n\n  virtual uint32_t magic(void) const override {\n    return magic_;\n  }\n\n  //! Set mode of debug\n  virtual void set_debug_mode(bool enable) override {\n    debug_mode_ = enable;\n  }\n\n  //! Retrieve mode of debug\n  virtual bool debug_mode(void) const override {\n    return this->debugging();\n  }\n\n  //! Retrieve string of debug\n  virtual std::string debug_string(void) const override {\n    char buf[4096];\n    size_t size = snprintf(\n        buf, sizeof(buf),\n        \"scan_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u\",\n        get_scan_num(), stats_get_vector_cnt_, stats_get_neighbors_cnt_,\n        stats_visit_dup_cnt_);\n    return std::string(buf, size);\n  }\n\n  //! Update the parameters of context\n  virtual int update(const ailego::Params &params) override;\n\n public:\n  //! Init context\n  int init(ContextType type);\n\n  //! Update context, the context may be shared by different searcher/streamer\n  int update_context(ContextType type, const IndexMeta &meta,\n                     const IndexMetric::Pointer &metric,\n                     const HnswSparseEntity::Pointer &entity,\n                     uint32_t magic_num);\n\n  inline const HnswSparseEntity &get_entity() const {\n    return *entity_;\n  }\n\n  inline void resize_results(size_t size) {\n    if (group_by_search()) {\n      group_results_.resize(size);\n    } else {\n      results_.resize(size);\n    }\n  }\n\n  inline void topk_to_result() {\n    return topk_to_result(0);\n  }\n\n  //! Construct result from topk heap, result will be normalized\n  inline void topk_to_result(uint32_t idx) {\n    if (group_by_search()) {\n      topk_to_group_result(idx);\n    } else {\n      topk_to_single_result(idx);\n    }\n  }\n\n  inline void recal_topk_dist() {\n    TopkHeap heap(topk_heap_);\n    topk_heap_.clear();\n\n    for (size_t i = 0; i < heap.size(); ++i) {\n      node_id_t id = heap[i].first;\n      dist_t dist = dc_.dist(id);\n      topk_heap_.emplace_back(id, dist);\n    }\n  }\n\n  inline void topk_to_single_result(uint32_t idx) {\n    if (force_padding_topk_ && !topk_heap_.full() &&\n        topk_heap_.size() < entity_->doc_cnt()) {\n      this->fill_random_to_topk_full();\n    }\n    if (ailego_unlikely(topk_heap_.size() == 0)) {\n      return;\n    }\n\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    int size = std::min(topk_, static_cast<uint32_t>(topk_heap_.size()));\n    topk_heap_.sort();\n    results_[idx].clear();\n\n    for (int i = 0; i < size; ++i) {\n      auto score = topk_heap_[i].second;\n      if (score > this->threshold()) {\n        break;\n      }\n\n      node_id_t id = topk_heap_[i].first;\n      if (fetch_vector_) {\n        IndexSparseDocument sparse_doc;\n        IndexStorage::MemoryBlock vec_block;\n        entity_->get_sparse_data(id, vec_block);\n        const void *sparse_data = vec_block.data();\n        if (sparse_data != nullptr) {\n          SparseUtility::ReverseSparseFormat(sparse_data, sparse_doc,\n                                             entity_->sparse_unit_size());\n        }\n\n        results_[idx].emplace_back(entity_->get_key(id), score, id,\n                                   entity_->get_vector_meta(id), sparse_doc);\n      } else {\n        results_[idx].emplace_back(entity_->get_key(id), score, id);\n      }\n    }\n\n    return;\n  }\n\n  //! Construct result from topk heap, result will be normalized\n  inline void topk_to_group_result(uint32_t idx) {\n    ailego_assert_with(idx < group_results_.size(), \"invalid idx\");\n\n    group_results_[idx].clear();\n\n    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;\n    std::vector<std::pair<std::string, float>> best_score_in_groups;\n    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();\n         itr++) {\n      const std::string &group_id = (*itr).first;\n      auto &heap = (*itr).second;\n      heap.sort();\n\n      if (heap.size() > 0) {\n        float best_score = heap[0].second;\n        best_score_in_groups.push_back(std::make_pair(group_id, best_score));\n      }\n    }\n\n    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),\n              [](const std::pair<std::string, float> &a,\n                 const std::pair<std::string, float> &b) -> int {\n                return a.second < b.second;\n              });\n\n    // truncate to group num\n    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();\n         ++i) {\n      const std::string &group_id = best_score_in_groups[i].first;\n\n      group_topk_list.emplace_back(\n          std::make_pair(group_id, group_topk_heaps_[group_id]));\n    }\n\n    group_results_[idx].resize(group_topk_list.size());\n\n    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {\n      const std::string &group_id = group_topk_list[i].first;\n      group_results_[idx][i].set_group_id(group_id);\n\n      uint32_t size = std::min(\n          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));\n\n      for (uint32_t j = 0; j < size; ++j) {\n        auto score = group_topk_list[i].second[j].second;\n        if (score > this->threshold()) {\n          break;\n        }\n\n        node_id_t id = group_topk_list[i].second[j].first;\n\n        if (fetch_vector_) {\n          group_results_[idx][i].mutable_docs()->emplace_back(\n              entity_->get_key(id), score, id, entity_->get_vector_meta(id));\n        } else {\n          group_results_[idx][i].mutable_docs()->emplace_back(\n              entity_->get_key(id), score, id);\n        }\n      }\n    }\n  }\n\n  inline void reset_query(const void *query) {\n    dc_.reset_query(query);\n    dc_.clear_compare_cnt();\n  }\n\n  inline HnswSparseDistCalculator &dist_calculator() {\n    return dc_;\n  }\n\n  inline TopkHeap &topk_heap() {\n    return topk_heap_;\n  }\n\n  inline TopkHeap &update_heap() {\n    return update_heap_;\n  }\n\n  inline VisitFilter &visit_filter() {\n    return visit_filter_;\n  }\n\n  inline CandidateHeap &candidates() {\n    return candidates_;\n  }\n\n  inline void set_max_scan_num(uint32_t max_scan_num) {\n    max_scan_num_ = max_scan_num;\n  }\n\n  inline void set_max_scan_limit(uint32_t max_scan_limit) {\n    max_scan_limit_ = max_scan_limit;\n  }\n\n  inline void set_min_scan_limit(uint32_t min_scan_limit) {\n    min_scan_limit_ = min_scan_limit;\n  }\n\n  inline void set_ef(uint32_t v) {\n    ef_ = v;\n  }\n\n  inline void set_filter_mode(uint32_t v) {\n    filter_mode_ = v;\n  }\n\n  inline void set_filter_negative_probability(float v) {\n    negative_probability_ = v;\n  }\n\n  inline void set_max_scan_ratio(float v) {\n    max_scan_ratio_ = v;\n  }\n\n  virtual void set_magic(uint32_t v) {\n    magic_ = v;\n  }\n\n  virtual void set_force_padding_topk(bool v) {\n    force_padding_topk_ = v;\n  }\n\n  virtual void set_bruteforce_threshold(uint32_t v) override {\n    bruteforce_threshold_ = v;\n  }\n\n  inline uint32_t get_bruteforce_threshold() const {\n    return bruteforce_threshold_;\n  }\n\n  virtual void set_fetch_vector(bool v) override {\n    fetch_vector_ = v;\n  }\n\n  virtual bool fetch_vector() const override {\n    return fetch_vector_;\n  }\n\n  //! Reset context\n  void reset(void) override {\n    set_filter(nullptr);\n    reset_threshold();\n    set_fetch_vector(false);\n    set_group_params(0, 0);\n    reset_group_by();\n  }\n\n  inline std::map<std::string, TopkHeap> &group_topk_heaps() {\n    return group_topk_heaps_;\n  }\n\n  inline TopkHeap &level_topk(int level) {\n    if (ailego_unlikely(level_topks_.size() <= static_cast<size_t>(level))) {\n      int cur_level = level_topks_.size();\n      level_topks_.resize(level + 1);\n      for (; cur_level <= level; ++cur_level) {\n        size_t heap_size = std::max(entity_->neighbor_cnt(cur_level),\n                                    entity_->ef_construction());\n        level_topks_[cur_level].clear();\n        level_topks_[cur_level].limit(heap_size);\n      }\n    }\n\n    return level_topks_[level];\n  }\n\n  inline void check_need_adjuct_ctx(void) {\n    check_need_adjuct_ctx(entity_->doc_cnt());\n  }\n\n  inline size_t compute_reserve_cnt(uint32_t cur_doc) const {\n    if (cur_doc > kMaxReserveDocCnt) {\n      return kMaxReserveDocCnt;\n    } else if (cur_doc < kMinReserveDocCnt) {\n      return kMinReserveDocCnt;\n    }\n    return cur_doc;\n  }\n\n  //! candidates heap and visitfilter need to resize as doc cnt growing up\n  inline void check_need_adjuct_ctx(uint32_t doc_cnt) {\n    if (ailego_unlikely(doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_)) {\n      while (doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_) {\n        reserve_max_doc_cnt_ =\n            reserve_max_doc_cnt_ + compute_reserve_cnt(reserve_max_doc_cnt_);\n      }\n      uint32_t max_scan_cnt = compute_max_scan_num(reserve_max_doc_cnt_);\n      max_scan_num_ = max_scan_cnt;\n      visit_filter_.reset(reserve_max_doc_cnt_, max_scan_cnt);\n      candidates_.clear();\n      candidates_.limit(max_scan_num_);\n    }\n  }\n\n  inline uint32_t compute_max_scan_num(uint32_t max_doc_cnt) const {\n    uint32_t max_scan = max_doc_cnt * max_scan_ratio_;\n    if (max_scan < min_scan_limit_) {\n      max_scan = min_scan_limit_;\n    } else if (max_scan > max_scan_limit_) {\n      max_scan = max_scan_limit_;\n    }\n    return max_scan;\n  }\n\n  inline size_t get_scan_num() const {\n    return dc_.compare_cnt();\n  }\n\n  inline uint64_t reach_scan_limit() const {\n    return dc_.compare_cnt() >= max_scan_num_;\n  }\n\n  inline bool error() const {\n    return dc_.error();\n  }\n\n  inline void clear() {\n    dc_.clear();\n    if (ailego_unlikely(this->debugging())) {\n      stats_get_neighbors_cnt_ = 0u;\n      stats_get_vector_cnt_ = 0u;\n      stats_visit_dup_cnt_ = 0u;\n    }\n    // do not clear results_ for the next query will need it\n    for (auto &it : results_) {\n      it.clear();\n    }\n  }\n\n  uint32_t *mutable_stats_get_neighbors() {\n    return &stats_get_neighbors_cnt_;\n  }\n\n  uint32_t *mutable_stats_get_vector() {\n    return &stats_get_vector_cnt_;\n  }\n\n  uint32_t *mutable_stats_visit_dup_cnt() {\n    return &stats_visit_dup_cnt_;\n  }\n\n  inline bool debugging(void) const {\n    return debug_mode_;\n  }\n\n  inline void update_dist_caculator_distance(\n      const IndexMetric::MatrixSparseDistance &distance) {\n    dc_.update_distance(distance);\n  }\n\n  //! Get topk\n  inline uint32_t topk() const override {\n    return topk_;\n  }\n\n  //! Get group topk\n  inline uint32_t group_topk() const {\n    return group_topk_;\n  }\n\n  //! Get group num\n  inline uint32_t group_num() const {\n    return group_num_;\n  }\n\n  //! Get if group by search\n  inline bool group_by_search() {\n    return group_num_ > 0;\n  }\n\n  //! Set group params\n  void set_group_params(uint32_t group_num, uint32_t group_topk) override {\n    group_num_ = group_num;\n    group_topk_ = group_topk;\n\n    topk_ = group_topk_ * group_num_;\n\n    topk_heap_.limit(std::max(topk_, ef_));\n\n    group_topk_heaps_.clear();\n  }\n\n private:\n  // Filling random nodes if topk not full\n  void fill_random_to_topk_full(void);\n\n  constexpr static uint32_t kTriggerReserveCnt = 4096UL;\n  constexpr static uint32_t kMinReserveDocCnt = 4096UL;\n  constexpr static uint32_t kMaxReserveDocCnt = 128 * 1024UL;\n  constexpr static uint32_t kInvalidMgic = -1U;\n\n private:\n  HnswSparseEntity::Pointer entity_;\n  HnswSparseDistCalculator dc_;\n  bool debug_mode_{false};\n  bool force_padding_topk_{false};\n  uint32_t max_scan_num_{0};\n  uint32_t max_scan_limit_{0};\n  uint32_t min_scan_limit_{0};\n  uint32_t reserve_max_doc_cnt_{kMinReserveDocCnt};\n  uint32_t topk_{0};\n  uint32_t group_topk_{0};\n  uint32_t filter_mode_{VisitFilter::ByteMap};\n  float negative_probability_{HnswSparseEntity::kDefaultBFNegativeProbability};\n  uint32_t ef_{HnswSparseEntity::kDefaultEf};\n  float max_scan_ratio_{HnswSparseEntity::kDefaultScanRatio};\n  uint32_t magic_{0U};\n  std::vector<IndexDocumentList> results_{};\n  std::vector<IndexGroupDocumentList> group_results_{};\n  TopkHeap topk_heap_{};\n  TopkHeap update_heap_{};\n  std::vector<TopkHeap> level_topks_{};\n  CandidateHeap candidates_{};\n  VisitFilter visit_filter_{};\n  uint32_t bruteforce_threshold_{};\n  bool fetch_vector_{false};\n\n  uint32_t group_num_{0};\n  std::map<std::string, TopkHeap> group_topk_heaps_{};\n\n  uint32_t type_{kUnknownContext};\n  //! debug stats info\n  uint32_t stats_get_neighbors_cnt_{0u};\n  uint32_t stats_get_vector_cnt_{0u};\n  uint32_t stats_visit_dup_cnt_{0u};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_dist_calculator.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_meta.h>\n#include \"hnsw_sparse_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseDistCalculator {\n public:\n  typedef std::shared_ptr<HnswSparseDistCalculator> Pointer;\n\n public:\n  //! Constructor\n  HnswSparseDistCalculator(const HnswSparseEntity *entity,\n                           const IndexMetric::Pointer &metric)\n      : entity_(entity),\n        distance_(metric->sparse_distance()),\n        query_{nullptr},\n        compare_cnt_(0) {}\n\n  //! Constructor\n  HnswSparseDistCalculator(const HnswSparseEntity *entity,\n                           const IndexMetric::Pointer &metric,\n                           const void *query)\n      : entity_(entity),\n        distance_(metric->sparse_distance()),\n        query_(query),\n        compare_cnt_(0) {}\n\n  void update(const HnswSparseEntity *entity,\n              const IndexMetric::Pointer &metric) {\n    entity_ = entity;\n    distance_ = metric->sparse_distance();\n  }\n\n  inline void update_distance(\n      const IndexMetric::MatrixSparseDistance &distance) {\n    distance_ = distance;\n  }\n\n  //! Reset query vector data\n  inline void reset_query(const void *query) {\n    error_ = false;\n    query_ = query;\n  }\n\n  //! Returns distance\n  inline dist_t dist(const void *sparse_data_lhs, const void *sparse_data_rhs) {\n    float score{0.0f};\n\n    if (ailego_unlikely(sparse_data_lhs == nullptr ||\n                        sparse_data_rhs == nullptr)) {\n      // LOG_WARN(\"Nullptr of sparse vector. Return dense score only\");\n      // error_ = true;\n      return score;\n    }\n\n    distance_(sparse_data_lhs, sparse_data_rhs, &score);\n\n    return score;\n  }\n\n  //! Returns distance between query and vec.\n  inline dist_t dist(const void *vec) {\n    compare_cnt_++;\n\n    auto sparse_data = entity_->get_sparse_data_from_vector(vec);\n    if (sparse_data.first == nullptr) {\n      error_ = true;\n      return 0.0f;\n    }\n\n    return dist(sparse_data.first, query_);\n  }\n\n  //! Return distance between query and node id.\n  inline dist_t dist(node_id_t id) {\n    compare_cnt_++;\n\n    const void *feat = entity_->get_vector_meta(id);\n    if (ailego_unlikely(feat == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector, id=%u\", id);\n      error_ = true;\n      return 0.0f;\n    }\n\n    auto sparse_data = entity_->get_sparse_data_from_vector(feat);\n    if (sparse_data.first == nullptr) {\n      error_ = true;\n      return 0.0f;\n    }\n\n    return dist(sparse_data.first, query_);\n  }\n\n  //! Return dist node lhs between node rhs\n  inline dist_t dist(node_id_t lhs, node_id_t rhs) {\n    compare_cnt_++;\n\n    const void *feat = entity_->get_vector_meta(lhs);\n    const void *query = entity_->get_vector_meta(rhs);\n    if (ailego_unlikely(feat == nullptr || query == nullptr)) {\n      LOG_ERROR(\"Get nullptr vector\");\n      error_ = true;\n      return 0.0f;\n    }\n\n    auto feat_sparse_data = entity_->get_sparse_data_from_vector(feat);\n    if (feat_sparse_data.first == nullptr) {\n      error_ = true;\n      return 0.0f;\n    }\n\n    auto query_sparse_data = entity_->get_sparse_data_from_vector(query);\n    if (query_sparse_data.first == nullptr) {\n      error_ = true;\n      return 0.0f;\n    }\n\n    return dist(feat_sparse_data.first, query_sparse_data.first);\n  }\n\n  dist_t operator()(const void *vec) {\n    return dist(vec);\n  }\n\n  dist_t operator()(id_t i) {\n    return dist(i);\n  }\n\n  dist_t operator()(id_t lhs, id_t rhs) {\n    return dist(lhs, rhs);\n  }\n\n  inline void clear() {\n    compare_cnt_ = 0;\n    error_ = false;\n  }\n\n  inline void clear_compare_cnt() {\n    compare_cnt_ = 0;\n  }\n\n  inline bool error() const {\n    return error_;\n  }\n\n  //! Get distances compute times\n  inline uint32_t compare_cnt() const {\n    return compare_cnt_;\n  }\n\n private:\n  HnswSparseDistCalculator(const HnswSparseDistCalculator &) = delete;\n  HnswSparseDistCalculator &operator=(const HnswSparseDistCalculator &) =\n      delete;\n\n private:\n  const HnswSparseEntity *entity_;\n\n  IndexMetric::MatrixSparseDistance distance_;\n\n  const void *query_;\n\n  uint32_t compare_cnt_;  // record distance compute times\n  bool error_{false};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"hnsw_sparse_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nconst std::string HnswSparseEntity::kSparseGraphHeaderSegmentId =\n    \"sparse_graph.header\";\nconst std::string HnswSparseEntity::kSparseGraphFeaturesSegmentId =\n    \"sparse_graph.features\";\nconst std::string HnswSparseEntity::kSparseGraphKeysSegmentId =\n    \"sparse_graph.keys\";\nconst std::string HnswSparseEntity::kSparseGraphNeighborsSegmentId =\n    \"sparse_graph.neighbors\";\nconst std::string HnswSparseEntity::kSparseGraphOffsetsSegmentId =\n    \"sparse_graph.offsets\";\nconst std::string HnswSparseEntity::kSparseGraphMappingSegmentId =\n    \"sparse_graph.mapping\";\nconst std::string HnswSparseEntity::kSparseHnswHeaderSegmentId =\n    \"sparse_hnsw.header\";\nconst std::string HnswSparseEntity::kSparseHnswNeighborsSegmentId =\n    \"sparse_hnsw.neighbors\";\nconst std::string HnswSparseEntity::kSparseHnswOffsetsSegmentId =\n    \"sparse_hnsw.offsets\";\nconst std::string HnswSparseEntity::kSparseGraphVectorsSegmentId =\n    \"sparse_graph.vectors\";\nconst std::string HnswSparseEntity::kSparseGraphVectorMetaSegmentId =\n    \"sparse_graph.vector_meta\";\n\nint HnswSparseEntity::CalcAndAddPadding(const IndexDumper::Pointer &dumper,\n                                        size_t data_size,\n                                        size_t *padding_size) {\n  *padding_size = AlignSize(data_size) - data_size;\n  if (*padding_size == 0) {\n    return 0;\n  }\n\n  std::string padding(*padding_size, '\\0');\n  if (dumper->write(padding.data(), *padding_size) != *padding_size) {\n    LOG_ERROR(\"Append padding failed, size %lu\", *padding_size);\n    return IndexError_WriteData;\n  }\n  return 0;\n}\n\nint64_t HnswSparseEntity::dump_segment(const IndexDumper::Pointer &dumper,\n                                       const std::string &segment_id,\n                                       const void *data, size_t size) const {\n  size_t len = dumper->write(data, size);\n  if (len != size) {\n    LOG_ERROR(\"Dump segment %s data failed, expect: %lu, actual: %lu\",\n              segment_id.c_str(), size, len);\n    return IndexError_WriteData;\n  }\n\n  size_t padding_size = AlignSize(size) - size;\n  if (padding_size > 0) {\n    std::string padding(padding_size, '\\0');\n    if (dumper->write(padding.data(), padding_size) != padding_size) {\n      LOG_ERROR(\"Append padding failed, size %lu\", padding_size);\n      return IndexError_WriteData;\n    }\n  }\n\n  uint32_t crc = ailego::Crc32c::Hash(data, size);\n  int ret = dumper->append(segment_id, size, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s meta failed, ret=%d\", segment_id.c_str(), ret);\n    return ret;\n  }\n\n  return len + padding_size;\n}\n\nint64_t HnswSparseEntity::dump_header(const IndexDumper::Pointer &dumper,\n                                      const HNSWSparseHeader &hd) const {\n  //! dump basic graph header. header is aligned and does not need padding\n  int64_t graph_hd_size = dump_segment(dumper, kSparseGraphHeaderSegmentId,\n                                       &hd.graph, hd.graph.size);\n  if (graph_hd_size < 0) {\n    return graph_hd_size;\n  }\n\n  //! dump basic graph header. header is aligned and does not need padding\n  int64_t hnsw_hd_size =\n      dump_segment(dumper, kSparseHnswHeaderSegmentId, &hd.hnsw, hd.hnsw.size);\n  if (hnsw_hd_size < 0) {\n    return hnsw_hd_size;\n  }\n\n  return graph_hd_size + hnsw_hd_size;\n}\n\nvoid HnswSparseEntity::reshuffle_vectors(\n    const std::function<level_t(node_id_t)> & /*get_level*/,\n    std::vector<node_id_t> * /*n2o_mapping*/,\n    std::vector<node_id_t> * /*o2n_mapping*/, key_t * /*keys*/) const {\n  // TODO\n  return;\n}\n\nint64_t HnswSparseEntity::dump_mapping_segment(\n    const IndexDumper::Pointer &dumper, const key_t *keys) const {\n  std::vector<node_id_t> mapping(doc_cnt());\n\n  std::iota(mapping.begin(), mapping.end(), 0U);\n  std::sort(mapping.begin(), mapping.end(),\n            [&](node_id_t i, node_id_t j) { return keys[i] < keys[j]; });\n\n  size_t size = mapping.size() * sizeof(node_id_t);\n  return dump_segment(dumper, kSparseGraphMappingSegmentId, mapping.data(),\n                      size);\n}\n\nint64_t HnswSparseEntity::dump_segments(\n    const IndexDumper::Pointer &dumper, key_t *keys,\n    const std::function<level_t(node_id_t)> &get_level) const {\n  HNSWSparseHeader dump_hd(header());\n\n  dump_hd.graph.node_size = sparse_meta_size();\n\n  std::vector<node_id_t> n2o_mapping;  // map new id to origin id\n  std::vector<node_id_t> o2n_mapping;  // map origin id to new id\n  reshuffle_vectors(get_level, &n2o_mapping, &o2n_mapping, keys);\n  if (!o2n_mapping.empty()) {\n    dump_hd.hnsw.entry_point = o2n_mapping[entry_point()];\n  }\n\n  //! Dump header\n  int64_t hd_size = dump_header(dumper, dump_hd);\n  if (hd_size < 0) {\n    return hd_size;\n  }\n\n  //! Dump vectors\n  int64_t sparse_vector_meta_size =\n      dump_sparse_vector_meta(dumper, n2o_mapping);\n  if (sparse_vector_meta_size < 0) {\n    return sparse_vector_meta_size;\n  }\n\n  int64_t sparse_vecs_size = dump_sparse_vector(dumper, n2o_mapping);\n  if (sparse_vecs_size < 0) {\n    return sparse_vecs_size;\n  }\n\n  //! Dump neighbors\n  auto neighbors_size =\n      dump_neighbors(dumper, get_level, n2o_mapping, o2n_mapping);\n  if (neighbors_size < 0) {\n    return neighbors_size;\n  }\n  //! free memory\n  n2o_mapping = std::vector<node_id_t>();\n  o2n_mapping = std::vector<node_id_t>();\n\n  //! Dump keys\n  size_t key_segment_size = doc_cnt() * sizeof(key_t);\n  int64_t keys_size =\n      dump_segment(dumper, kSparseGraphKeysSegmentId, keys, key_segment_size);\n  if (keys_size < 0) {\n    return keys_size;\n  }\n\n  //! Dump mapping\n  int64_t mapping_size = dump_mapping_segment(dumper, keys);\n  if (mapping_size < 0) {\n    return mapping_size;\n  }\n\n  return hd_size + keys_size + sparse_vector_meta_size + sparse_vecs_size +\n         neighbors_size + mapping_size;\n}\n\n\nint64_t HnswSparseEntity::dump_sparse_vector_meta(\n    const IndexDumper::Pointer &dumper,\n    const std::vector<node_id_t> &reorder_mapping) const {\n  const void *data = nullptr;\n  uint32_t crc = 0U;\n  size_t dump_size = 0UL;\n\n  uint64_t sparse_data_offset = 0UL;\n  uint64_t sparse_data_len = 0UL;\n\n  //! dump vectors\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    data = get_vector_meta(reorder_mapping.empty() ? id : reorder_mapping[id]);\n    if (ailego_unlikely(!data)) {\n      return IndexError_ReadData;\n    }\n\n    const char *data_ptr = reinterpret_cast<const char *>(data);\n    sparse_data_len = *((uint32_t *)(data_ptr + sizeof(uint64_t)));\n\n    size_t len = dumper->write(&sparse_data_offset, sizeof(uint64_t));\n    if (len != sizeof(uint64_t)) {\n      LOG_ERROR(\"Dump sparse data offset failed, write=%zu expect=%zu\", len,\n                sizeof(uint64_t));\n      return IndexError_WriteData;\n    }\n\n    crc = ailego::Crc32c::Hash(&sparse_data_offset, sizeof(uint64_t), crc);\n    dump_size += sizeof(uint64_t);\n\n    len = dumper->write(&sparse_data_len, sizeof(uint64_t));\n    if (len != sizeof(uint64_t)) {\n      LOG_ERROR(\"Dump sparse data len failed, write=%zu expect=%zu\", len,\n                sizeof(uint64_t));\n      return IndexError_WriteData;\n    }\n\n    crc = ailego::Crc32c::Hash(&sparse_data_len, sizeof(uint64_t), crc);\n    dump_size += sizeof(uint64_t);\n\n    sparse_data_offset += sparse_data_len;\n  }\n\n  int ret =\n      dumper->append(kSparseGraphVectorMetaSegmentId, dump_size, 0UL, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump vectors segment meta failed, ret %d\", ret);\n    return ret;\n  }\n\n  return dump_size;\n}\n\nint64_t HnswSparseEntity::dump_sparse_vector(\n    const IndexDumper::Pointer &dumper,\n    const std::vector<node_id_t> &reorder_mapping) const {\n  uint32_t crc = 0U;\n  size_t data_size = 0UL;\n  const void *data = nullptr;\n\n  uint64_t sparse_data_len = 0UL;\n  uint32_t sparse_chunk_index = 0U;\n  uint32_t sparse_chunk_offset = 0U;\n\n  //! dump vectors\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    data = get_vector_meta(reorder_mapping.empty() ? id : reorder_mapping[id]);\n    if (ailego_unlikely(!data)) {\n      return IndexError_ReadData;\n    }\n\n    const char *data_ptr = reinterpret_cast<const char *>(data);\n\n    sparse_data_len = *((uint32_t *)(data_ptr + sizeof(uint64_t)));\n\n    uint64_t sparse_offset = *((uint64_t *)(data_ptr));\n\n    const void *sparse = get_sparse_data(sparse_offset, sparse_data_len);\n    if (ailego_unlikely(sparse == nullptr)) {\n      LOG_ERROR(\"Get nullptr sparse, chunk index=%u, chunk offset=%u, len=%zu\",\n                sparse_chunk_index, sparse_chunk_offset,\n                (size_t)sparse_data_len);\n      return IndexError_ReadData;\n    }\n\n    size_t len = dumper->write(sparse, sparse_data_len);\n    if (len != sparse_data_len) {\n      LOG_ERROR(\"Dump sparse data failed, write=%zu expect=%zu\", len,\n                (size_t)sparse_data_len);\n      return IndexError_WriteData;\n    }\n\n    crc = ailego::Crc32c::Hash(sparse, sparse_data_len, crc);\n    data_size += sparse_data_len;\n  }\n\n  int ret = dumper->append(kSparseGraphVectorsSegmentId, data_size, 0UL, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump vectors segment meta failed, ret %d\", ret);\n    return ret;\n  }\n\n  return data_size;\n}\n\nint64_t HnswSparseEntity::dump_graph_neighbors(\n    const IndexDumper::Pointer &dumper,\n    const std::vector<node_id_t> &reorder_mapping,\n    const std::vector<node_id_t> &neighbor_mapping) const {\n  std::vector<SparseGraphNeighborMeta> graph_meta;\n  graph_meta.reserve(doc_cnt());\n  size_t offset = 0;\n  uint32_t crc = 0;\n  std::vector<node_id_t> mapping(l0_neighbor_cnt());\n\n  uint32_t min_neighbor_count = 10000;\n  uint32_t max_neighbor_count = 0;\n  size_t sum_neighbor_count = 0;\n\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    const Neighbors neighbors =\n        get_neighbors(0, reorder_mapping.empty() ? id : reorder_mapping[id]);\n    ailego_assert_with(!!neighbors.data, \"invalid neighbors\");\n    ailego_assert_with(neighbors.size() <= l0_neighbor_cnt(),\n                       \"invalid neighbors\");\n\n    uint32_t neighbor_count = neighbors.size();\n    if (neighbor_count < min_neighbor_count) {\n      min_neighbor_count = neighbor_count;\n    }\n    if (neighbor_count > max_neighbor_count) {\n      max_neighbor_count = neighbor_count;\n    }\n    sum_neighbor_count += neighbor_count;\n\n    graph_meta.emplace_back(offset, neighbor_count);\n    size_t size = neighbors.size() * sizeof(node_id_t);\n    const node_id_t *data = &neighbors[0];\n    if (!neighbor_mapping.empty()) {\n      for (node_id_t i = 0; i < neighbors.size(); ++i) {\n        mapping[i] = neighbor_mapping[neighbors[i]];\n      }\n      data = mapping.data();\n    }\n    if (dumper->write(data, size) != size) {\n      LOG_ERROR(\"Dump graph neighbor id=%u failed, size %lu\", id, size);\n      return IndexError_WriteData;\n    }\n    crc = ailego::Crc32c::Hash(data, size, crc);\n    offset += size;\n  }\n\n  uint32_t average_neighbor_count = 0;\n  if (doc_cnt() > 0) {\n    average_neighbor_count = sum_neighbor_count / doc_cnt();\n  }\n  LOG_INFO(\n      \"Dump hnsw graph: min_neighbor_count[%u] max_neighbor_count[%u] \"\n      \"average_neighbor_count[%u]\",\n      min_neighbor_count, max_neighbor_count, average_neighbor_count);\n\n  size_t padding_size = 0;\n  int ret = CalcAndAddPadding(dumper, offset, &padding_size);\n  if (ret != 0) {\n    return ret;\n  }\n  ret =\n      dumper->append(kSparseGraphNeighborsSegmentId, offset, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s failed, ret %d\",\n              kSparseGraphNeighborsSegmentId.c_str(), ret);\n    return ret;\n  }\n\n  //! dump level 0 neighbors meta\n  auto len =\n      dump_segment(dumper, kSparseGraphOffsetsSegmentId, graph_meta.data(),\n                   graph_meta.size() * sizeof(SparseGraphNeighborMeta));\n  if (len < 0) {\n    return len;\n  }\n\n  return len + offset + padding_size;\n}\n\nint64_t HnswSparseEntity::dump_upper_neighbors(\n    const IndexDumper::Pointer &dumper,\n    const std::function<level_t(node_id_t)> &get_level,\n    const std::vector<node_id_t> &reorder_mapping,\n    const std::vector<node_id_t> &neighbor_mapping) const {\n  std::vector<HnswSparseNeighborMeta> hnsw_meta;\n  hnsw_meta.reserve(doc_cnt());\n  size_t offset = 0;\n  uint32_t crc = 0;\n  std::vector<node_id_t> buffer(upper_neighbor_cnt() + 1);\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    node_id_t new_id = reorder_mapping.empty() ? id : reorder_mapping[id];\n    auto level = get_level(new_id);\n    if (level == 0) {\n      hnsw_meta.emplace_back(0U, 0U);\n      continue;\n    }\n    hnsw_meta.emplace_back(offset, level);\n    ailego_assert_with((size_t)level < kMaxGraphLayers, \"invalid level\");\n    for (level_t cur_level = 1; cur_level <= level; ++cur_level) {\n      const Neighbors neighbors = get_neighbors(cur_level, new_id);\n      ailego_assert_with(!!neighbors.data, \"invalid neighbors\");\n      ailego_assert_with(neighbors.size() <= neighbor_cnt(cur_level),\n                         \"invalid neighbors\");\n      memset(buffer.data(), 0, sizeof(node_id_t) * buffer.size());\n      buffer[0] = neighbors.size();\n      if (neighbor_mapping.empty()) {\n        memcpy(&buffer[1], &neighbors[0], neighbors.size() * sizeof(node_id_t));\n      } else {\n        for (node_id_t i = 0; i < neighbors.size(); ++i) {\n          buffer[i + 1] = neighbor_mapping[neighbors[i]];\n        }\n      }\n      if (dumper->write(buffer.data(), sizeof(node_id_t) * buffer.size()) !=\n          sizeof(node_id_t) * buffer.size()) {\n        LOG_ERROR(\"Dump graph neighbor id=%u failed, size %lu\", id,\n                  sizeof(node_id_t) * buffer.size());\n        return IndexError_WriteData;\n      }\n      crc = ailego::Crc32c::Hash(buffer.data(),\n                                 sizeof(node_id_t) * buffer.size(), crc);\n      offset += sizeof(node_id_t) * buffer.size();\n    }\n  }\n  size_t padding_size = 0;\n  int ret = CalcAndAddPadding(dumper, offset, &padding_size);\n  if (ret != 0) {\n    return ret;\n  }\n\n  ret =\n      dumper->append(kSparseHnswNeighborsSegmentId, offset, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s failed, ret %d\",\n              kSparseHnswNeighborsSegmentId.c_str(), ret);\n    return ret;\n  }\n\n  //! dump level 0 neighbors meta\n  auto len = dump_segment(dumper, kSparseHnswOffsetsSegmentId, hnsw_meta.data(),\n                          hnsw_meta.size() * sizeof(HnswSparseNeighborMeta));\n  if (len < 0) {\n    return len;\n  }\n\n  return len + offset + padding_size;\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string.h>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_storage.h>\n\nnamespace zvec {\nnamespace core {\n\nusing node_id_t = uint32_t;\nusing key_t = uint64_t;\nusing level_t = int32_t;\nusing dist_t = float;\nusing TopkHeap = ailego::KeyValueHeap<node_id_t, dist_t>;\nusing CandidateHeap =\n    ailego::KeyValueHeap<node_id_t, dist_t, std::greater<dist_t>>;\nconstexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);\nconstexpr key_t kInvalidKey = static_cast<key_t>(-1);\nclass HnswSparseDistCalculator;\n\nstruct SparseGraphHeader {\n  uint32_t size;\n  uint32_t version;\n  uint32_t graph_type;\n  uint32_t doc_count;\n  uint32_t vector_size;\n  uint32_t node_size;\n  uint32_t l0_neighbor_count;\n  uint32_t prune_type;\n  uint32_t prune_neighbor_count;\n  uint32_t ef_construction;\n  uint32_t options;\n  uint32_t min_neighbor_count;\n  uint32_t sparse_meta_size;\n  uint32_t sparse_unit_size;\n  uint32_t total_sparse_count;\n  uint8_t reserved[868];\n};\n\nstatic_assert(sizeof(SparseGraphHeader) % 32 == 0,\n              \"SparseGraphHeader must be aligned with 32 bytes\");\n\n//! Hnsw upper neighbor header\nstruct HnswSparseHeader {\n  uint32_t size;      // header size\n  uint32_t revision;  // current total docs of the graph\n  uint32_t upper_neighbor_count;\n  uint32_t ef_construction;\n  uint32_t scaling_factor;\n  uint32_t max_level;\n  uint32_t entry_point;\n  uint32_t options;\n  uint8_t reserved[30];\n};\n\nstruct SparseData {\n public:\n  SparseData() {};\n\n  SparseData(uint32_t sparse_count, const uint32_t *sparse_indices,\n             const void *sparse_vec)\n      : count(sparse_count), indices(sparse_indices), vec(sparse_vec) {}\n\n  uint32_t count{0};\n  const uint32_t *indices{nullptr};\n  const void *vec{nullptr};\n};\n\nstatic_assert(sizeof(HnswSparseHeader) % 32 == 0,\n              \"SparseGraphHeader must be aligned with 32 bytes\");\n\n//! Hnsw common header and upper neighbor header\nstruct HNSWSparseHeader {\n  HNSWSparseHeader() {\n    clear();\n  }\n\n  HNSWSparseHeader(const HNSWSparseHeader &header) {\n    memcpy(this, &header, sizeof(header));\n  }\n\n  HNSWSparseHeader &operator=(const HNSWSparseHeader &header) {\n    memcpy(this, &header, sizeof(header));\n    return *this;\n  }\n\n  //! Reset state to zero, and the params is untouched\n  void inline reset() {\n    graph.doc_count = 0U;\n    hnsw.entry_point = kInvalidNodeId;\n    hnsw.max_level = 0;\n    graph.total_sparse_count = 0U;\n  }\n\n  //! Clear all fields to init value\n  void inline clear() {\n    memset(this, 0, sizeof(HNSWSparseHeader));\n    hnsw.entry_point = kInvalidNodeId;\n    graph.size = sizeof(SparseGraphHeader);\n    hnsw.size = sizeof(HnswSparseHeader);\n    graph.total_sparse_count = 0U;\n  }\n\n  size_t neighbor_cnt() const {\n    return graph.l0_neighbor_count;\n  }\n\n  size_t upper_neighbor_cnt() const {\n    return hnsw.upper_neighbor_count;\n  }\n\n  size_t vector_size() const {\n    return graph.vector_size;\n  }\n\n  size_t ef_construction() const {\n    return graph.ef_construction;\n  }\n\n  size_t scaling_factor() const {\n    return hnsw.scaling_factor;\n  }\n\n  size_t neighbor_prune_cnt() const {\n    return graph.prune_neighbor_count;\n  }\n\n  node_id_t entry_point() const {\n    return hnsw.entry_point;\n  }\n\n  node_id_t doc_cnt() const {\n    return graph.doc_count;\n  }\n\n  uint32_t total_sparse_count() const {\n    return graph.total_sparse_count;\n  }\n\n  SparseGraphHeader graph;\n  HnswSparseHeader hnsw;\n};\n\nstruct NeighborsHeader {\n  uint32_t neighbor_cnt;\n  node_id_t neighbors[0];\n};\n\nstruct Neighbors {\n  Neighbors() : cnt{0}, data{nullptr} {}\n\n  Neighbors(uint32_t cnt_in, const node_id_t *data_in)\n      : cnt{cnt_in}, data{data_in} {}\n\n  Neighbors(IndexStorage::MemoryBlock &&mem_block)\n      : neighbor_block{std::move(mem_block)} {\n    auto hd = reinterpret_cast<const NeighborsHeader *>(neighbor_block.data());\n    cnt = hd->neighbor_cnt;\n    data = hd->neighbors;\n  }\n\n  size_t size(void) const {\n    return cnt;\n  }\n\n  const node_id_t &operator[](size_t idx) const {\n    return data[idx];\n  }\n\n  uint32_t cnt;\n  const node_id_t *data;\n  IndexStorage::MemoryBlock neighbor_block;\n};\n\n//! level 0 neighbors offset\nstruct SparseGraphNeighborMeta {\n  SparseGraphNeighborMeta(size_t o, size_t cnt)\n      : offset(o), neighbor_cnt(cnt) {}\n\n  uint64_t offset : 48;\n  uint64_t neighbor_cnt : 16;\n};\n\n//! hnsw upper neighbors meta\nstruct HnswSparseNeighborMeta {\n  HnswSparseNeighborMeta(size_t o, size_t l) : offset(o), level(l) {}\n\n  uint64_t offset : 48;  // offset = idx * upper neighors size\n  uint64_t level : 16;\n};\n\nclass HnswSparseEntity {\n public:\n  //! Constructor\n  HnswSparseEntity() {}\n\n  //! Constructor\n  HnswSparseEntity(const HNSWSparseHeader &hd) {\n    header_ = hd;\n  }\n\n  //! Destructor\n  virtual ~HnswSparseEntity() {}\n\n  //! HnswSparseEntity Pointerd;\n  typedef std::shared_ptr<HnswSparseEntity> Pointer;\n\n  //! Get max neighbor size of graph level\n  inline size_t neighbor_cnt(level_t level) const {\n    return level == 0 ? header_.graph.l0_neighbor_count\n                      : header_.hnsw.upper_neighbor_count;\n  }\n\n  //! get max neighbor size of graph level 0\n  inline size_t l0_neighbor_cnt() const {\n    return header_.graph.l0_neighbor_count;\n  }\n\n  //! get min neighbor size of graph\n  inline size_t min_neighbor_cnt() const {\n    return header_.graph.min_neighbor_count;\n  }\n\n  //! get upper neighbor size of graph level other than 0\n  inline size_t upper_neighbor_cnt() const {\n    return header_.hnsw.upper_neighbor_count;\n  }\n\n  //! Get current total doc of the hnsw graph\n  inline node_id_t *mutable_doc_cnt() {\n    return &header_.graph.doc_count;\n  }\n\n  inline node_id_t doc_cnt() const {\n    return header_.graph.doc_count;\n  }\n\n  inline uint32_t *mutable_total_sparse_count() {\n    return &header_.graph.total_sparse_count;\n  }\n\n  uint32_t total_sparse_count() const {\n    return header_.graph.total_sparse_count;\n  }\n\n  //! Get hnsw graph scaling params\n  inline size_t scaling_factor() const {\n    return header_.hnsw.scaling_factor;\n  }\n\n  //! Get prune_size\n  inline size_t prune_cnt() const {\n    return header_.graph.prune_neighbor_count;\n  }\n\n  //! Current entity of top level graph\n  inline node_id_t entry_point() const {\n    return header_.hnsw.entry_point;\n  }\n\n  //! Current max graph level\n  inline level_t cur_max_level() const {\n    return header_.hnsw.max_level;\n  }\n\n  //! Retrieve index vector size\n  size_t vector_size() const {\n    return header_.graph.vector_size;\n  }\n\n  //! Retrieve node size\n  size_t node_size() const {\n    return header_.graph.node_size;\n  }\n\n  //! Retrieve ef constuction\n  size_t ef_construction() const {\n    return header_.graph.ef_construction;\n  }\n\n  //! Retrieve sparse meta size\n  size_t sparse_meta_size() const {\n    return header_.graph.sparse_meta_size;\n  }\n\n  //! Retrieve sparse unit size\n  size_t sparse_unit_size() const {\n    return header_.graph.sparse_unit_size;\n  }\n\n  void set_vector_size(size_t size) {\n    header_.graph.vector_size = size;\n  }\n\n  void set_prune_cnt(size_t v) {\n    header_.graph.prune_neighbor_count = v;\n  }\n\n  void set_scaling_factor(size_t val) {\n    header_.hnsw.scaling_factor = val;\n  }\n\n  void set_l0_neighbor_cnt(size_t cnt) {\n    header_.graph.l0_neighbor_count = cnt;\n  }\n\n  void set_min_neighbor_cnt(size_t cnt) {\n    header_.graph.min_neighbor_count = cnt;\n  }\n\n  void set_upper_neighbor_cnt(size_t cnt) {\n    header_.hnsw.upper_neighbor_count = cnt;\n  }\n\n  void set_ef_construction(size_t ef) {\n    header_.graph.ef_construction = ef;\n  }\n\n  void set_sparse_meta_size(size_t size) {\n    header_.graph.sparse_meta_size = size;\n  }\n\n  void set_sparse_unit_size(size_t size) {\n    header_.graph.sparse_unit_size = size;\n  }\n\n protected:\n  inline const HNSWSparseHeader &header() const {\n    return header_;\n  }\n\n  inline HNSWSparseHeader *mutable_header() {\n    return &header_;\n  }\n\n  inline size_t header_size() const {\n    return sizeof(header_);\n  }\n\n  void set_node_size(size_t size) {\n    header_.graph.node_size = size;\n  }\n\n  //! Dump all segment by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_segments(\n      const IndexDumper::Pointer &dumper, key_t *keys,\n      const std::function<level_t(node_id_t)> &get_level) const;\n\n private:\n  //! dump mapping segment, for get_vector_by_key in provider\n  int64_t dump_mapping_segment(const IndexDumper::Pointer &dumper,\n                               const key_t *keys) const;\n\n  //! dump hnsw head by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_header(const IndexDumper::Pointer &dumper,\n                      const HNSWSparseHeader &hd) const;\n\n  //! dump vectors by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_sparse_vector_meta(\n      const IndexDumper::Pointer &dumper,\n      const std::vector<node_id_t> &reorder_mapping) const;\n\n  //! dump sparse vectors by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_sparse_vector(\n      const IndexDumper::Pointer &dumper,\n      const std::vector<node_id_t> &reorder_mapping) const;\n\n  //! dump hnsw neighbors by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_neighbors(const IndexDumper::Pointer &dumper,\n                         const std::function<level_t(node_id_t)> &get_level,\n                         const std::vector<node_id_t> &reorder_mapping,\n                         const std::vector<node_id_t> &neighbor_mapping) const {\n    auto len1 = dump_graph_neighbors(dumper, reorder_mapping, neighbor_mapping);\n    if (len1 < 0) {\n      return len1;\n    }\n    auto len2 = dump_upper_neighbors(dumper, get_level, reorder_mapping,\n                                     neighbor_mapping);\n    if (len2 < 0) {\n      return len2;\n    }\n\n    return len1 + len2;\n  }\n\n  //! dump segment by dumper\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_segment(const IndexDumper::Pointer &dumper,\n                       const std::string &segment_id, const void *data,\n                       size_t size) const;\n\n  //! Dump level 0 neighbors\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_graph_neighbors(\n      const IndexDumper::Pointer &dumper,\n      const std::vector<node_id_t> &reorder_mapping,\n      const std::vector<node_id_t> &neighbor_mapping) const;\n\n  //! Dump upper level neighbors\n  //! Return dump size if success, errno(<0) in failure\n  int64_t dump_upper_neighbors(\n      const IndexDumper::Pointer &dumper,\n      const std::function<level_t(node_id_t)> &get_level,\n      const std::vector<node_id_t> &reorder_mapping,\n      const std::vector<node_id_t> &neighbor_mapping) const;\n\n public:\n  //! Cleanup the entity\n  virtual int cleanup(void) {\n    header_.clear();\n    return 0;\n  }\n\n  //! Make a copy of searcher entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswSparseEntity::Pointer clone() const {\n    LOG_ERROR(\"Update neighbors not implemented\");\n    return HnswSparseEntity::Pointer();\n  }\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const = 0;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector_meta(node_id_t id) const = 0;\n\n  virtual int get_vector_meta(const node_id_t id,\n                              IndexStorage::MemoryBlock &block) const = 0;\n\n  //! Get vectors feature data by keys\n  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,\n                               const void **vecs) const = 0;\n  virtual int get_vector_metas(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &block_vecs) const = 0;\n\n  //! Retrieve a sparse vector using a primary key\n  virtual int get_sparse_vector_by_key(\n      uint64_t /*key*/, uint32_t * /*sparse_count*/,\n      std::string * /*sparse_indices_buffer*/,\n      std::string * /*sparse_values_buffer*/) const {\n    LOG_ERROR(\"get sparse vector not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  //! Retrieve a sparse vector using a primary key\n  virtual int get_sparse_vector_by_id(\n      node_id_t /*id*/, uint32_t * /*sparse_count*/,\n      std::string * /*sparse_indices_buffer*/,\n      std::string * /*sparse_values_buffer*/) const {\n    LOG_ERROR(\"get sparse vector not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  //! Get vector sparse feature data by chunk index and offset\n  virtual const void *get_sparse_data(uint64_t offset, uint32_t len) const = 0;\n\n  //! Get sparse data from id\n  virtual const void *get_sparse_data(node_id_t id) const = 0;\n\n  virtual int get_sparse_data(uint64_t offset, uint32_t len,\n                              IndexStorage::MemoryBlock &block) const = 0;\n\n  virtual int get_sparse_data(const node_id_t id,\n                              IndexStorage::MemoryBlock &block) const = 0;\n\n  //! Get sparse data from vector\n  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(\n      const void *vec) const = 0;\n  virtual int get_sparse_data_from_vector(const void *vec,\n                                          IndexStorage::MemoryBlock &block,\n                                          int &sparse_length) const = 0;\n\n  //! Get the node id's neighbors on graph level\n  //! Note: the neighbors cannot be modified, using the following\n  //! method to get WritableNeighbors if want to\n  virtual const Neighbors get_neighbors(level_t level, node_id_t id) const = 0;\n\n  //! Add vector and key to hnsw entity, and local id will be saved in id\n  virtual int add_vector(level_t /*level*/, key_t /*key*/,\n                         const std::string & /*vec*/, uint32_t /*sparse_count*/,\n                         node_id_t * /*id*/) {\n    return IndexError_NotImplemented;\n  }\n\n  virtual int add_vector(level_t /*level*/, key_t /*key*/,\n                         const uint32_t /*sparse_count*/,\n                         const uint32_t * /*sparse_indices*/,\n                         const void * /*sparse_vec*/, node_id_t * /*id*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Add vector and id\n  virtual int add_vector_with_id(level_t /*level*/, node_id_t /*id*/,\n                                 const std::string & /*vec*/,\n                                 uint32_t /*sparse_count*/) {\n    return IndexError_NotImplemented;\n  }\n\n  virtual int update_neighbors(\n      level_t /*level*/, node_id_t /*id*/,\n      const std::vector<std::pair<node_id_t, dist_t>> & /*neighbors*/) {\n    LOG_ERROR(\"Update neighbors dense not implemented\");\n\n    return 0;\n  }\n\n  //! Append neighbor_id to node id neighbors on level, size is the current\n  //! neighbors size. Notice: the caller must be ensure the neighbors not full\n  virtual void add_neighbor(level_t /*level*/, node_id_t /*id*/,\n                            uint32_t /*size*/, node_id_t /*neighbor_id*/) {\n    LOG_ERROR(\"Add neighbor not implemented\");\n  }\n\n  //! Update entry point and max level\n  virtual void update_ep_and_level(node_id_t ep, level_t level) {\n    header_.hnsw.entry_point = ep;\n    header_.hnsw.max_level = level;\n  }\n\n  virtual int load(const IndexStorage::Pointer & /*container*/,\n                   bool /*check_crc*/) {\n    LOG_ERROR(\"Load not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {\n    LOG_ERROR(\"Dump not implemented\");\n    return IndexError_NotImplemented;\n  }\n\n  static int CalcAndAddPadding(const IndexDumper::Pointer &dumper,\n                               size_t data_size, size_t *padding_size);\n\n protected:\n  static inline size_t AlignSize(size_t size) {\n    return (size + 0x1F) & (~0x1F);\n  }\n\n  static inline size_t AlignPageSize(size_t size) {\n    size_t page_mask = ailego::MemoryHelper::PageSize() - 1;\n    return (size + page_mask) & (~page_mask);\n  }\n\n  //! rearrange vectors to improve cache locality\n  void reshuffle_vectors(const std::function<level_t(node_id_t)> &get_level,\n                         std::vector<node_id_t> *n2o_mapping,\n                         std::vector<node_id_t> *o2n_mapping,\n                         key_t *keys) const;\n\n public:\n  const static std::string kSparseGraphHeaderSegmentId;\n  const static std::string kSparseGraphFeaturesSegmentId;\n  const static std::string kSparseGraphKeysSegmentId;\n  const static std::string kSparseGraphNeighborsSegmentId;\n  const static std::string kSparseGraphOffsetsSegmentId;\n  const static std::string kSparseGraphMappingSegmentId;\n  const static std::string kSparseHnswHeaderSegmentId;\n  const static std::string kSparseHnswNeighborsSegmentId;\n  const static std::string kSparseHnswOffsetsSegmentId;\n  const static std::string kSparseGraphVectorsSegmentId;\n  const static std::string kSparseGraphVectorMetaSegmentId;\n\n  constexpr static uint32_t kRevision = 0U;\n  constexpr static size_t kMaxGraphLayers = 15;\n  constexpr static uint32_t kDefaultEfConstruction = 500;\n  constexpr static uint32_t kDefaultEf = 500;\n  constexpr static uint32_t kDefaultUpperMaxNeighborCnt = 50;  // M of HNSW\n  constexpr static uint32_t kDefaultL0MaxNeighborCnt = 100;\n  constexpr static uint32_t kMaxNeighborCnt = 65535;\n  constexpr static float kDefaultScanRatio = 0.1f;\n  constexpr static uint32_t kDefaultMinScanLimit = 10000;\n  constexpr static uint32_t kDefaultMaxScanLimit =\n      std::numeric_limits<uint32_t>::max();\n  constexpr static float kDefaultBFNegativeProbability = 0.001f;\n  constexpr static uint32_t kDefaultScalingFactor = 50U;\n  constexpr static uint32_t kDefaultBruteForceThreshold = 1000U;\n  constexpr static uint32_t kDefaultDocsHardLimit = 1 << 30U;  // 1 billion\n  constexpr static float kDefaultDocsSoftLimitRatio = 0.9f;\n  constexpr static size_t kMaxChunkSize = 0xFFFFFFFF;\n  constexpr static size_t kDefaultChunkSize = 2UL * 1024UL * 1024UL;\n  constexpr static size_t kDefaultMaxChunkCnt = 50000UL;\n  constexpr static float kDefaultNeighborPruneMultiplier =\n      1.0f;  // prune_cnt = upper_max_neighbor_cnt * multiplier\n  constexpr static float kDefaultL0MaxNeighborCntMultiplier =\n      2.0f;  // l0_max_neighbor_cnt = upper_max_neighbor_cnt * multiplier\n\n  constexpr static uint32_t kSparseMetaSize = 2u * sizeof(uint64_t);\n  constexpr static float kDefaultSparseNeighborRatio = 0.5f;\n  constexpr static uint32_t kSparseMaxDimSize = 16384;\n  constexpr static float kDefaultQueryFilteringRatio = 0.0f;  // turn off\n\n protected:\n  HNSWSparseHeader header_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_index_hash.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"hnsw_sparse_chunk.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! Persistent hashmap implement through open addressing algorithm\ntemplate <class Key, class Val, Val EmptyVal = 0U,\n          typename =\n              typename std::enable_if<std::is_integral<Key>::value>::type>\nclass HnswSparseIndexHashMap {\n  using key_type = Key;\n  using val_type = Val;\n\n  struct Iterator {\n    key_type first;\n    val_type second;\n  };\n  typedef Iterator *iterator;\n  typedef Iterator Item;\n  typedef const Iterator *const_iterator;\n\n  class Slot {\n   public:\n    Slot(SparseChunk::Pointer &&chunk, const void *data)\n        : chunk_(std::move(chunk)),\n          items_(reinterpret_cast<const Item *>(data)) {}\n    //! Return a empty loc or the key item loc\n\n    Slot(SparseChunk::Pointer &&chunk, IndexStorage::MemoryBlock &&mem_block)\n        : chunk_(std::move(chunk)), items_block_(std::move(mem_block)) {\n      items_ = reinterpret_cast<const Item *>(items_block_.data());\n    }\n    const_iterator find(key_type key, uint32_t max_items, uint32_t mask) const {\n      auto it = &items_[key & mask];\n      for (auto i = 0U; i < max_items; ++i) {\n        if (it->first == key || it->second == EmptyVal) {\n          // LOG_DEBUG(\"i=%u\", i);\n          return it;\n        }\n        ++it;\n        if (it == &items_[max_items]) {\n          it = &items_[0];\n        }\n      }\n      return nullptr;\n    }\n\n    bool update(const_iterator it) {\n      uint32_t offset = reinterpret_cast<const uint8_t *>(it) -\n                        reinterpret_cast<const uint8_t *>(&items_[0]);\n      if (ailego_unlikely(chunk_->write(offset, it, sizeof(Item)) !=\n                          sizeof(Item))) {\n        LOG_ERROR(\"Chunk write failed\");\n        return false;\n      }\n      return true;\n    }\n\n   private:\n    SparseChunk::Pointer chunk_{};\n    const Item *items_{nullptr};  // point to chunk data\n    IndexStorage::MemoryBlock items_block_{};\n  };\n\n public:\n  //! Init the hash\n  //! broker      the index allocator\n  //! chunk_size  the size of per chunk allocated, actual size may greater\n  //! factor      factor = 1/ratio, ratio is the probability of a squence\n  //! number inserted to this container\n  //! max         the max number key can be inserted\n  //! expansion_ratio   memory expansion ratio\n  int init(SparseChunkBroker::Pointer &broker, uint32_t chunk_size,\n           uint32_t factor, size_t max, float expansion_ratio) {\n    ailego_assert_with(expansion_ratio > 1.0f, \"ratio must > 1.0f\");\n    broker_ = broker;\n\n    size_t items = std::ceil(chunk_size * 1.0f / sizeof(Item));\n    slot_items_ = 1UL << static_cast<size_t>((std::ceil(std::log2(items))));\n    size_t range = slot_items_ * factor / expansion_ratio;\n    mask_bits_ = std::floor(std::log2(range));\n    range = 1UL << mask_bits_;\n    size_t max_slots = std::ceil(max * 1.0f / range);\n    slots_.reserve(max_slots);\n    slot_loc_mask_ = slot_items_ - 1U;\n\n    int ret = load();\n    if (ret != 0) {\n      return ret;\n    }\n\n    LOG_DEBUG(\n        \"HnswIndexHash init, chunkSize=%u factor=%u max=%zu \"\n        \"ratio=%f slotItems=%u maxSlots=%zu maskBits=%u \"\n        \"range=%zu\",\n        chunk_size, factor, max, expansion_ratio, slot_items_, max_slots,\n        mask_bits_, range);\n\n    return 0;\n  }\n\n  int cleanup(void) {\n    broker_.reset();\n    slots_.clear();\n    slots_.shrink_to_fit();\n    mask_bits_ = 0U;\n    slot_items_ = 0U;\n    slot_loc_mask_ = 0U;\n\n    return 0;\n  }\n\n  const_iterator end(void) const {\n    return nullptr;\n  }\n\n  const_iterator find(const key_type key) const {\n    auto idx = key >> mask_bits_;\n    if (idx >= slots_.size()) {\n      return end();\n    }\n    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);\n    return it && it->second != EmptyVal ? it : nullptr;\n  }\n\n  bool insert(key_type key, val_type val) {\n    auto idx = key >> mask_bits_;\n    if (idx >= slots_.size()) {\n      if (ailego_unlikely(idx >= slots_.capacity())) {\n        LOG_ERROR(\"no space to insert\");\n        return false;\n      }\n      for (auto i = slots_.size(); i <= idx; ++i) {\n        if (ailego_unlikely(!alloc_slot(i))) {\n          return false;\n        }\n      }\n    }\n    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);\n    if (ailego_unlikely(it == nullptr)) {\n      LOG_ERROR(\"no space to insert\");\n      return false;\n    }\n\n    //! TODO: write memory is ok?\n    const_cast<iterator>(it)->first = key;\n    const_cast<iterator>(it)->second = val;\n\n    return slots_[idx].update(it);\n  }\n\n private:\n  bool alloc_slot(size_t idx) {\n    ailego_assert_with(idx == slots_.size(), \"invalid idx\");\n\n    size_t size = slot_items_ * sizeof(Item);\n    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX,\n                                  idx, size);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return false;\n    }\n    SparseChunk::Pointer chunk = p.second;\n    if (ailego_unlikely(chunk->resize(size) != size)) {\n      LOG_ERROR(\"Chunk resize failed, size=%zu\", size);\n      return false;\n    }\n    //! Read the whole data to memory\n    IndexStorage::MemoryBlock data_block;\n    if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {\n      LOG_ERROR(\"Chunk read failed, size=%zu\", size);\n      return false;\n    }\n\n    slots_.emplace_back(std::move(chunk), std::move(data_block));\n    return true;\n  }\n\n  int load(void) {\n    size_t slots_cnt =\n        broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX);\n    for (size_t i = 0UL; i < slots_cnt; ++i) {\n      auto chunk =\n          broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, i);\n      if (!chunk) {\n        LOG_ERROR(\"Get chunk failed, seq=%zu\", i);\n        return IndexError_InvalidFormat;\n      }\n      size_t size = sizeof(Item) * slot_items_;\n      if (chunk->data_size() < size) {\n        LOG_ERROR(\n            \"Hash params may be mismatch, seq=%zu, data_size=%zu \"\n            \"expect=%zu\",\n            i, chunk->data_size(), size);\n        return IndexError_InvalidFormat;\n      }\n      //! Read the whole data to memory\n      IndexStorage::MemoryBlock data_block;\n      if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {\n        LOG_ERROR(\"Chunk read failed, size=%zu\", size);\n        return false;\n      }\n      slots_.emplace_back(std::move(chunk), std::move(data_block));\n    }\n    return 0;\n  }\n\n private:\n  SparseChunkBroker::Pointer broker_{};  // chunk broker\n  std::vector<Slot> slots_{};\n  uint32_t mask_bits_{0U};\n  uint32_t slot_items_{};  // must be a power of 2\n  uint32_t slot_loc_mask_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_index_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"hnsw_sparse_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseIndexProvider : public IndexSparseProvider {\n public:\n  HnswSparseIndexProvider(const IndexMeta &meta,\n                          const HnswSparseEntity::Pointer &entity,\n                          const std::string &owner)\n      : meta_(meta), entity_(entity), owner_class_(owner) {}\n\n  HnswSparseIndexProvider(const HnswSparseIndexProvider &) = delete;\n  HnswSparseIndexProvider &operator=(const HnswSparseIndexProvider &) = delete;\n\n public:\n  //! Create a new iterator\n  IndexSparseProvider::Iterator::Pointer create_iterator(void) override {\n    return IndexSparseProvider::Iterator::Pointer(new (std::nothrow)\n                                                      Iterator(entity_));\n  }\n\n  //! Retrieve count of vectors\n  size_t count(void) const override {\n    return entity_->doc_cnt();\n  }\n\n  size_t total_sparse_count(void) const override {\n    return entity_->total_sparse_count();\n  }\n\n  //! Retrieve type of vector\n  IndexMeta::DataType data_type(void) const override {\n    return meta_.data_type();\n  }\n\n  //! Retrieve a vector using a primary key\n  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,\n                        std::string *sparse_indices_buffer,\n                        std::string *sparse_values_buffer) const override {\n    return entity_->get_sparse_vector_by_key(\n        key, sparse_count, sparse_indices_buffer, sparse_values_buffer);\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_class_;\n  }\n\n private:\n  class Iterator : public IndexSparseProvider::Iterator {\n   public:\n    Iterator(const HnswSparseEntity::Pointer &entity)\n        : entity_(entity), cur_id_(0U), valid_(false) {\n      const void *sparse_data = entity_->get_sparse_data(cur_id_);\n      if (sparse_data != nullptr) {\n        valid_ = true;\n\n        sparse_indices_buffer_.clear();\n        sparse_data_buffer_.clear();\n\n        SparseUtility::ReverseSparseFormat(\n            sparse_data, &sparse_count_, &sparse_indices_buffer_,\n            &sparse_data_buffer_, entity_->sparse_unit_size());\n      }\n    }\n\n    //! Retrieve sparse count\n    virtual uint32_t sparse_count() const override {\n      return sparse_count_;\n    }\n\n    //! Retrieve sparse indices\n    virtual const uint32_t *sparse_indices() const override {\n      return reinterpret_cast<const uint32_t *>(sparse_indices_buffer_.data());\n    }\n\n    //! Retrieve sparse data\n    virtual const void *sparse_data() const override {\n      return reinterpret_cast<const void *>(sparse_data_buffer_.data());\n    }\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return cur_id_ < entity_->doc_cnt() && valid_;\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return entity_->get_key(cur_id_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      cur_id_ = get_next_valid_id(cur_id_ + 1);\n\n      if (cur_id_ < entity_->doc_cnt()) {\n        const void *sparse_data = entity_->get_sparse_data(cur_id_);\n        if (sparse_data != nullptr) {\n          valid_ = true;\n\n          sparse_indices_buffer_.clear();\n          sparse_data_buffer_.clear();\n\n          SparseUtility::ReverseSparseFormat(\n              sparse_data, &sparse_count_, &sparse_indices_buffer_,\n              &sparse_data_buffer_, entity_->sparse_unit_size());\n        } else {\n          valid_ = false;\n        }\n      }\n    }\n\n    //! Reset the iterator\n    void reset(void) {\n      cur_id_ = get_next_valid_id(0);\n      const void *sparse_data = entity_->get_sparse_data(cur_id_);\n      if (sparse_data != nullptr) {\n        valid_ = true;\n\n        SparseUtility::ReverseSparseFormat(\n            sparse_data, &sparse_count_, &sparse_indices_buffer_,\n            &sparse_data_buffer_, entity_->sparse_unit_size());\n      }\n    }\n\n   private:\n    node_id_t get_next_valid_id(node_id_t start_id) {\n      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {\n        if (entity_->get_key(i) != kInvalidNodeId) {\n          return i;\n        }\n      }\n      return kInvalidNodeId;\n    }\n\n   private:\n    const HnswSparseEntity::Pointer entity_;\n    node_id_t cur_id_;\n    uint32_t sparse_count_{0};\n    std::string sparse_indices_buffer_;\n    std::string sparse_data_buffer_;\n    bool valid_{false};\n  };\n\n private:\n  const IndexMeta &meta_;\n  const HnswSparseEntity::Pointer entity_;\n  const std::string owner_class_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT(\n    \"proxima.hnsw.sparse_builder.thread_count\");\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_MEMORY_QUOTA(\n    \"proxima.hnsw.sparse_builder.memory_quota\");\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_EFCONSTRUCTION(\n    \"proxima.hnsw.sparse_builder.efconstruction\");\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_SCALING_FACTOR(\n    \"proxima.hnsw.sparse_builder.scaling_factor\");\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_CHECK_INTERVAL_SECS(\n    \"proxima.hnsw.sparse_builder.check_interval_secs\");\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER(\n    \"proxima.hnsw.sparse_builder.neighbor_prune_multiplier\");\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_MIN_NEIGHBOR_COUNT(\n    \"proxima.hnsw.sparse_builder.min_neighbor_count\");\nstatic const std::string PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT(\n    \"proxima.hnsw.sparse_builder.max_neighbor_count\");\nstatic const std::string\n    PARAM_HNSW_SPARSE_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(\n        \"proxima.hnsw.sparse_builder.l0_max_neighbor_count_multiplier\");\n\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_EF(\n    \"proxima.hnsw.sparse_searcher.ef\");\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD(\n    \"proxima.hnsw.sparse_searcher.brute_force_threshold\");\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE(\n    \"proxima.hnsw.sparse_searcher.neighbors_in_memory_enable\");\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO(\n    \"proxima.hnsw.sparse_searcher.max_scan_ratio\");\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_CHECK_CRC_ENABLE(\n    \"proxima.hnsw.sparse_searcher.check_crc_enable\");\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_ENABLE(\n    \"proxima.hnsw.sparse_searcher.visit_bloomfilter_enable\");\nstatic const std::string\n    PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB(\n        \"proxima.hnsw.sparse_searcher.visit_bloomfilter_negative_prob\");\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_FORCE_PADDING_RESULT_ENABLE(\n    \"proxima.hnsw.sparse_searcher.force_padding_result_enable\");\nstatic const std::string PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO(\n    \"proxima.hnsw.sparse_searcher.query_filtering_ratio\");\n\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO(\n    \"proxima.hnsw.sparse_streamer.max_scan_ratio\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT(\n    \"proxima.hnsw.sparse_streamer.min_scan_limit\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT(\n    \"proxima.hnsw.sparse_streamer.max_scan_limit\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_EF(\n    \"proxima.hnsw.sparse_streamer.ef\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION(\n    \"proxima.hnsw.sparse_streamer.efconstruction\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT(\n    \"proxima.hnsw.sparse_streamer.max_neighbor_count\");\nstatic const std::string\n    PARAM_HNSW_SPARSE_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(\n        \"proxima.hnsw.sparse_streamer.l0_max_neighbor_count_multiplier\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR(\n    \"proxima.hnsw.sparse_streamer.scaling_factor\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD(\n    \"proxima.hnsw.sparse_streamer.brute_force_threshold\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT(\n    \"proxima.hnsw.sparse_streamer.docs_hard_limit\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT(\n    \"proxima.hnsw.sparse_streamer.docs_soft_limit\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE(\n    \"proxima.hnsw.sparse_streamer.max_index_size\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE(\n    \"proxima.hnsw.sparse_streamer.visit_bloomfilter_enable\");\nstatic const std::string\n    PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB(\n        \"proxima.hnsw.sparse_streamer.visit_bloomfilter_negative_prob\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_CHECK_CRC_ENABLE(\n    \"proxima.hnsw.sparse_streamer.check_crc_enable\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER(\n    \"proxima.hnsw.sparse_streamer.neighbor_prune_multiplier\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE(\n    \"proxima.hnsw.sparse_streamer.chunk_size\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY(\n    \"proxima.hnsw.sparse_streamer.filter_same_key\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE(\n    \"proxima.hnsw.sparse_streamer.get_vector_enable\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_MIN_NEIGHBOR_COUNT(\n    \"proxima.hnsw.sparse_streamer.min_neighbor_count\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_FORCE_PADDING_RESULT_ENABLE(\n    \"proxima.hnsw.sparse_streamer.force_padding_result_enable\");\nstatic const std::string PARAM_HNSW_SPARSE_STREAMER_QUERY_FILTERING_RATIO(\n    \"proxima.hnsw.sparse_streamer.query_filtering_ratio\");\n\nstatic const std::string PARAM_HNSW_SPARSE_REDUCER_WORKING_PATH(\n    \"proxima.hnsw.sparse_reducer.working_path\");\nstatic const std::string PARAM_HNSW_SPARSE_REDUCER_NUM_OF_ADD_THREADS(\n    \"proxima.hnsw.sparse_reducer.num_of_add_threads\");\nstatic const std::string PARAM_HNSW_SPARSE_REDUCER_INDEX_NAME(\n    \"proxima.hnsw.sparse_reducer.index_name\");\nstatic const std::string PARAM_HNSW_SPARSE_REDUCER_EFCONSTRUCTION(\n    \"proxima.hnsw.sparse_reducer.efconstruction\");\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_searcher.h\"\n#include \"hnsw_sparse_algorithm.h\"\n#include \"hnsw_sparse_index_provider.h\"\n#include \"hnsw_sparse_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseSearcher::HnswSparseSearcher() {}\n\nHnswSparseSearcher::~HnswSparseSearcher() {}\n\nint HnswSparseSearcher::init(const ailego::Params &search_params) {\n  params_ = search_params;\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_EF, &ef_);\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_ENABLE,\n              &bf_enabled_);\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_CHECK_CRC_ENABLE, &check_crc_enabled_);\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE,\n              &neighbors_in_memory_enabled_);\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB,\n              &bf_negative_probability_);\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD,\n              &bruteforce_threshold_);\n  params_.get(PARAM_HNSW_SPARSE_SEARCHER_FORCE_PADDING_RESULT_ENABLE,\n              &force_padding_topk_enabled_);\n\n  query_filtering_enabled_ =\n      params_.get(PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO,\n                  &query_filtering_ratio_);\n\n  if (ef_ == 0) {\n    ef_ = HnswSparseEntity::kDefaultEf;\n  }\n  if (bf_negative_probability_ <= 0.0f || bf_negative_probability_ >= 1.0f) {\n    LOG_ERROR(\n        \"[%s] must be in range (0,1)\",\n        PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (query_filtering_enabled_ &&\n      (query_filtering_ratio_ <= 0.0f || query_filtering_ratio_ >= 1.0f)) {\n    LOG_ERROR(\"[%s] must be in range (0, 1)\",\n              PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  entity_.set_neighbors_in_memory(neighbors_in_memory_enabled_);\n\n  state_ = STATE_INITED;\n\n  LOG_DEBUG(\n      \"Init params: ef=%u maxScanRatio=%f bfEnabled=%u checkCrcEnabled=%u \"\n      \"neighborsInMemoryEnabled=%u bfNagtiveProb=%f bruteForceThreshold=%u \"\n      \"forcePadding=%u filteringRatio=%f\",\n      ef_, max_scan_ratio_, bf_enabled_, check_crc_enabled_,\n      neighbors_in_memory_enabled_, bf_negative_probability_,\n      bruteforce_threshold_, force_padding_topk_enabled_,\n      query_filtering_ratio_);\n\n  return 0;\n}\n\nvoid HnswSparseSearcher::print_debug_info() {\n  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n    Neighbors neighbours = entity_.get_neighbors(0, id);\n    std::cout << \"node: \" << id << \"; \";\n    for (uint32_t i = 0; i < neighbours.size(); ++i) {\n      std::cout << neighbours[i];\n\n      if (i == neighbours.size() - 1) {\n        std::cout << std::endl;\n      } else {\n        std::cout << \", \";\n      }\n    }\n  }\n}\n\nint HnswSparseSearcher::cleanup() {\n  LOG_INFO(\"Begin HnswSparseSearcher:cleanup\");\n\n  metric_.reset();\n  meta_.clear();\n  stats_.clear_attributes();\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  max_scan_ratio_ = HnswSparseEntity::kDefaultScanRatio;\n  max_scan_num_ = 0U;\n  ef_ = HnswSparseEntity::kDefaultEf;\n  bf_enabled_ = false;\n  bf_negative_probability_ = HnswSparseEntity::kDefaultBFNegativeProbability;\n  bruteforce_threshold_ = HnswSparseEntity::kDefaultBruteForceThreshold;\n  check_crc_enabled_ = false;\n  neighbors_in_memory_enabled_ = false;\n  entity_.cleanup();\n  state_ = STATE_INIT;\n\n  LOG_INFO(\"End HnswSparseSearcher:cleanup\");\n\n  return 0;\n}\n\nint HnswSparseSearcher::load(IndexStorage::Pointer container,\n                             IndexMetric::Pointer metric) {\n  if (state_ != STATE_INITED) {\n    LOG_ERROR(\"Init the searcher first before load index\");\n    return IndexError_Runtime;\n  }\n\n  LOG_INFO(\"Begin HnswSparseSearcher:load\");\n\n  auto start_time = ailego::Monotime::MilliSeconds();\n\n  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from container\");\n    return ret;\n  }\n\n  ret = entity_.load(container, check_crc_enabled_);\n  if (ret != 0) {\n    LOG_ERROR(\"HnswSparseSearcher load index failed\");\n    return ret;\n  }\n\n  alg_ = HnswSparseAlgorithm::UPointer(new HnswSparseAlgorithm(entity_));\n\n  if (metric) {\n    metric_ = metric;\n  } else {\n    metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n    if (!metric_) {\n      LOG_ERROR(\"CreateMeasure failed, name: %s\", meta_.metric_name().c_str());\n      return IndexError_NoExist;\n    }\n    ret = metric_->init(meta_, meta_.metric_params());\n    if (ret != 0) {\n      LOG_ERROR(\"IndexMetric init failed, ret=%d\", ret);\n      return ret;\n    }\n    if (metric_->query_metric()) {\n      metric_ = metric_->query_metric();\n    }\n  }\n\n  // if (!metric_->is_matched(meta_)) {\n  //   LOG_ERROR(\"IndexMeasure not match index meta\");\n  //   return IndexError_Mismatch;\n  // }\n\n  max_scan_num_ = static_cast<uint32_t>(max_scan_ratio_ * entity_.doc_cnt());\n  max_scan_num_ = std::max(4096U, max_scan_num_);\n\n  stats_.set_loaded_count(entity_.doc_cnt());\n  stats_.set_loaded_costtime(ailego::Monotime::MilliSeconds() - start_time);\n  state_ = STATE_LOADED;\n  magic_ = IndexContext::GenerateMagic();\n\n  LOG_INFO(\"End HnswSparseSearcher::load\");\n\n  return 0;\n}\n\nint HnswSparseSearcher::unload() {\n  LOG_INFO(\"HnswSparseSearcher unload index\");\n\n  meta_.clear();\n  entity_.cleanup();\n  metric_.reset();\n  max_scan_num_ = 0;\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswSparseSearcher::update_context(HnswSparseContext *ctx) const {\n  const HnswSparseEntity::Pointer entity = entity_.clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone search context entity\");\n    return IndexError_Runtime;\n  }\n  ctx->set_max_scan_num(max_scan_num_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n\n  return ctx->update_context(HnswSparseContext::kSparseSearcherContext, meta_,\n                             metric_, entity, magic_);\n}\n\n//! Similarity search with sparse inputs\nint HnswSparseSearcher::search_impl(const uint32_t *sparse_count,\n                                    const uint32_t *sparse_indices,\n                                    const void *sparse_query,\n                                    const IndexQueryMeta &qmeta, uint32_t count,\n                                    Context::Pointer &context) const {\n  if (ailego_unlikely(!context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n\n  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {\n    return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,\n                          count, context);\n  }\n\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n\n  const uint32_t *sparse_indices_tmp = sparse_indices;\n  const void *sparse_query_tmp = sparse_query;\n\n  for (size_t q = 0; q < count; ++q) {\n    std::string sparse_query_buffer;\n    std::string sparse_query_filtered_buffer;\n\n    SparseUtility::TransSparseFormat(\n        sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n        entity_.sparse_unit_size(), sparse_query_buffer);\n\n    if (query_filtering_enabled_) {\n      if (!SparseUtility::FilterSparseQuery(\n              sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n              qmeta.data_type(), entity_.sparse_unit_size(),\n              query_filtering_ratio_, &sparse_query_filtered_buffer)) {\n        LOG_ERROR(\"Hnsw filtering failed\");\n        return IndexError_Runtime;\n      }\n\n      ctx->reset_query(sparse_query_filtered_buffer.data());\n    } else {\n      ctx->reset_query(sparse_query_buffer.data());\n    }\n\n    int ret = alg_->search(ctx);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher fast search failed\");\n      return ret;\n    }\n\n    if (query_filtering_enabled_) {\n      ctx->reset_query(sparse_query_buffer.data());\n      ctx->recal_topk_dist();\n    }\n\n    ctx->topk_to_result(q);\n\n    sparse_indices_tmp += sparse_count[q];\n    sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                       sparse_count[q] * qmeta.unit_size();\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\n//! Similarity search with sparse inputs\nint HnswSparseSearcher::search_bf_impl(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const IndexQueryMeta &qmeta, uint32_t count,\n    IndexStreamer::Context::Pointer &context) const {\n  if (ailego_unlikely(!context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n\n  const uint32_t *sparse_indices_tmp = sparse_indices;\n  const void *sparse_query_tmp = sparse_query;\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_Runtime;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n\n      ctx->group_topk_heaps().clear();\n\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().dist(id);\n\n          std::string group_id = group_by(id);\n\n          auto &topk_heap = ctx->group_topk_heaps()[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n\n      ctx->topk_heap().clear();\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().dist(id);\n          ctx->topk_heap().emplace(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\n//! Similarity search with sparse inputs\nint HnswSparseSearcher::search_bf_by_p_keys_impl(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  if (ailego_unlikely(!context)) {\n    LOG_ERROR(\"The context is not created by this searcher\");\n    return IndexError_Mismatch;\n  }\n\n  if (ailego_unlikely(p_keys.size() != count)) {\n    LOG_ERROR(\"The size of p_keys is not equal to count\");\n    return IndexError_InvalidArgument;\n  }\n\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    int ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->resize_results(count);\n\n  const uint32_t *sparse_indices_tmp = sparse_indices;\n  const void *sparse_query_tmp = sparse_query;\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_Runtime;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n      ctx->group_topk_heaps().clear();\n\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().dist(id);\n\n            std::string group_id = group_by(id);\n\n            auto &topk_heap = ctx->group_topk_heaps()[group_id];\n            if (topk_heap.empty()) {\n              topk_heap.limit(ctx->group_topk());\n            }\n            topk_heap.emplace_back(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  } else {\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n      ctx->topk_heap().clear();\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().dist(id);\n            ctx->topk_heap().emplace(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nIndexSearcher::Context::Pointer HnswSparseSearcher::create_context() const {\n  if (ailego_unlikely(state_ != STATE_LOADED)) {\n    LOG_ERROR(\"Load the index first before create context\");\n    return Context::Pointer();\n  }\n  const HnswSparseEntity::Pointer search_ctx_entity = entity_.clone();\n  if (!search_ctx_entity) {\n    LOG_ERROR(\"Failed to create search context entity\");\n    return Context::Pointer();\n  }\n  HnswSparseContext *ctx =\n      new (std::nothrow) HnswSparseContext(metric_, search_ctx_entity);\n  if (ailego_unlikely(ctx == nullptr)) {\n    LOG_ERROR(\"Failed to new HnswSparseContext\");\n    return Context::Pointer();\n  }\n  ctx->set_ef(ef_);\n  ctx->set_max_scan_num(max_scan_num_);\n  uint32_t filter_mode =\n      bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;\n  ctx->set_filter_mode(filter_mode);\n  ctx->set_filter_negative_probability(bf_negative_probability_);\n  ctx->set_magic(magic_);\n  ctx->set_force_padding_topk(force_padding_topk_enabled_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n  if (ailego_unlikely(ctx->init(HnswSparseContext::kSparseSearcherContext)) !=\n      0) {\n    LOG_ERROR(\"Init HnswSparseContext failed\");\n    delete ctx;\n    return Context::Pointer();\n  }\n\n  return Context::Pointer(ctx);\n}\n\nIndexSearcher::SparseProvider::Pointer\nHnswSparseSearcher::create_sparse_provider(void) const {\n  LOG_DEBUG(\"HnswSparseSearcher create sparse provider\");\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone HnswSparseEntity failed\");\n    return SparseProvider::Pointer();\n  }\n  return SparseProvider::Pointer(new (std::nothrow) HnswSparseIndexProvider(\n      meta_, entity, \"HnswSparseSearcher\"));\n}\n\nint HnswSparseSearcher::get_sparse_vector(\n    uint64_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n    std::string *sparse_values_buffer) const {\n  return entity_.get_sparse_vector_by_key(\n      key, sparse_count, sparse_indices_buffer, sparse_values_buffer);\n}\n\nINDEX_FACTORY_REGISTER_SEARCHER(HnswSparseSearcher);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_framework.h>\n#include \"hnsw_sparse_searcher_entity.h\"\n#include \"hnsw_sparse_streamer.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseSearcher : public IndexSearcher {\n public:\n  using ContextPointer = IndexSearcher::Context::Pointer;\n\n public:\n  HnswSparseSearcher(void);\n  virtual ~HnswSparseSearcher(void);\n\n  HnswSparseSearcher(const HnswSparseSearcher &) = delete;\n  HnswSparseSearcher &operator=(const HnswSparseSearcher &) = delete;\n\n protected:\n  //! Initialize Searcher\n  int init(const ailego::Params &params) override;\n\n  //! Cleanup Searcher\n  int cleanup(void) override;\n\n  //! Load Index from storage\n  int load(IndexStorage::Pointer container,\n           IndexMetric::Pointer measure) override;\n\n  //! Unload index from storage\n  int unload(void) override;\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  Context::Pointer &context) const override {\n    return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                       context);\n  }\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::Pointer &context) const override;\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta,\n                     Context::Pointer &context) const override {\n    return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                          context);\n  }\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t *sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta, uint32_t count,\n                     Context::Pointer &context) const override;\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const uint32_t sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta,\n                               ContextPointer &context) const override {\n    return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,\n                                    p_keys, qmeta, 1, context);\n  }\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta, uint32_t count,\n                               ContextPointer &context) const override;\n\n  //! Fetch sparser vector by key\n  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,\n                        std::string *sparse_indices_buffer,\n                        std::string *sparse_values_buffer) const override;\n\n  //! Create a searcher context\n  ContextPointer create_context() const override;\n\n  //! Create a new iterator\n  IndexSearcher::SparseProvider::Pointer create_sparse_provider(\n      void) const override;\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve meta of index\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  //! Retrieve params of index\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  void print_debug_info() override;\n\n private:\n  //! To share ctx across streamer/searcher, we need to update the context for\n  //! current streamer/searcher\n  int update_context(HnswSparseContext *ctx) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };\n\n  HnswSparseSearcherEntity entity_{};\n  HnswSparseAlgorithm::UPointer alg_;  // impl graph algorithm\n\n  IndexMetric::Pointer metric_{};\n  IndexMeta meta_{};\n  ailego::Params params_{};\n  Stats stats_;\n  uint32_t ef_{HnswSparseEntity::kDefaultEf};\n  uint32_t max_scan_num_{0U};\n  uint32_t bruteforce_threshold_{HnswSparseEntity::kDefaultBruteForceThreshold};\n  float max_scan_ratio_{HnswSparseEntity::kDefaultScanRatio};\n  bool bf_enabled_{false};\n  bool check_crc_enabled_{false};\n  bool neighbors_in_memory_enabled_{false};\n  bool force_padding_topk_enabled_{false};\n  float bf_negative_probability_{\n      HnswSparseEntity::kDefaultBFNegativeProbability};\n\n  bool query_filtering_enabled_{false};\n  float query_filtering_ratio_{HnswSparseEntity::kDefaultQueryFilteringRatio};\n\n  uint32_t magic_{0U};\n\n  State state_{STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_searcher_entity.h\"\n#include <zvec/ailego/hash/crc32c.h>\n#include \"utility/sparse_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseSearcherEntity::HnswSparseSearcherEntity() {}\n\nint HnswSparseSearcherEntity::cleanup(void) {\n  container_.reset();\n  sparse_vector_meta_.reset();\n  keys_.reset();\n  neighbors_.reset();\n  neighbors_meta_.reset();\n  sparse_vectors_.reset();\n  neighbors_in_memory_enabled_ = false;\n  loaded_ = false;\n\n  this->HnswSparseEntity::cleanup();\n\n  return 0;\n}\n\nkey_t HnswSparseSearcherEntity::get_key(node_id_t id) const {\n  const void *key;\n  if (ailego_unlikely(keys_->read(id * sizeof(key_t), &key, sizeof(key_t)) !=\n                      sizeof(key_t))) {\n    LOG_ERROR(\"Read key from segment failed\");\n    return kInvalidKey;\n  }\n  return *(reinterpret_cast<const key_t *>(key));\n}\n\n//! Get vector local id by key\nnode_id_t HnswSparseSearcherEntity::get_id(key_t key) const {\n  if (ailego_unlikely(!mapping_)) {\n    LOG_ERROR(\"Index missing mapping segment\");\n    return kInvalidNodeId;\n  }\n\n  //! Do binary search\n  node_id_t start = 0UL;\n  node_id_t end = doc_cnt();\n  const void *data;\n  node_id_t idx = 0u;\n  while (start < end) {\n    idx = start + (end - start) / 2;\n    if (ailego_unlikely(\n            mapping_->read(idx * sizeof(node_id_t), &data, sizeof(node_id_t)) !=\n            sizeof(node_id_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    const key_t *mkey;\n    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);\n    if (ailego_unlikely(keys_->read(local_id * sizeof(key_t),\n                                    (const void **)(&mkey),\n                                    sizeof(key_t)) != sizeof(key_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return kInvalidNodeId;\n    }\n    if (*mkey < key) {\n      start = idx + 1;\n    } else if (*mkey > key) {\n      end = idx;\n    } else {\n      return local_id;\n    }\n  }\n  return kInvalidNodeId;\n}\n\nint HnswSparseSearcherEntity::get_sparse_vector_by_key(\n    key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n    std::string *sparse_values_buffer) const {\n  *sparse_count = 0;\n\n  auto id = get_id(key);\n  if (id == kInvalidNodeId) {\n    return IndexError_NoExist;\n  }\n\n  const void *sparse_data = get_sparse_data(id);\n  if (sparse_data == nullptr) {\n    return IndexError_InvalidValue;\n  }\n\n  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,\n                                     sparse_indices_buffer,\n                                     sparse_values_buffer, sparse_unit_size());\n\n  return 0;\n}\n\nconst void *HnswSparseSearcherEntity::get_vector_meta(node_id_t id) const {\n  size_t read_size = sparse_meta_size();\n  size_t offset = sparse_meta_size() * id;\n\n  const void *vec;\n  if (ailego_unlikely(sparse_vector_meta_->read(offset, &vec, read_size) !=\n                      read_size)) {\n    LOG_ERROR(\"Read vector from segment failed\");\n    return nullptr;\n  }\n  return vec;\n}\n\nint HnswSparseSearcherEntity::get_vector_meta(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_vector_meta(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\nint HnswSparseSearcherEntity::get_vector_metas(const node_id_t *ids,\n                                               uint32_t count,\n                                               const void **vecs) const {\n  ailego_assert_with(count <= segment_datas_.size(), \"invalid count\");\n\n  size_t read_size = sparse_meta_size();\n\n  for (uint32_t i = 0; i < count; ++i) {\n    segment_datas_[i].offset = sparse_meta_size() * ids[i];\n    segment_datas_[i].length = read_size;\n\n    ailego_assert_with(\n        segment_datas_[i].offset < sparse_vector_meta_->data_size(),\n        \"invalid offset\");\n  }\n  if (ailego_unlikely(!sparse_vector_meta_->read(&segment_datas_[0], count))) {\n    LOG_ERROR(\"Read vectors from segment failed\");\n    return IndexError_ReadData;\n  }\n  for (uint32_t i = 0; i < count; ++i) {\n    vecs[i] = segment_datas_[i].data;\n  }\n\n  return 0;\n}\n\nint HnswSparseSearcherEntity::get_vector_metas(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &block_vecs) const {\n  const void *vecs[count];\n  get_vector_metas(ids, count, vecs);\n  for (uint32_t i = 0; i < count; ++i) {\n    block_vecs.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));\n  }\n  return 0;\n}\n\nconst Neighbors HnswSparseSearcherEntity::get_neighbors(level_t level,\n                                                        node_id_t id) const {\n  if (level == 0) {\n    if (neighbors_in_memory_enabled_) {\n      auto hd = reinterpret_cast<const NeighborsHeader *>(\n          fixed_neighbors_.get() + neighbors_size() * id);\n      return {hd->neighbor_cnt, hd->neighbors};\n    }\n\n    const SparseGraphNeighborMeta *m;\n    if (ailego_unlikely(\n            neighbors_meta_->read(id * sizeof(SparseGraphNeighborMeta),\n                                  (const void **)(&m),\n                                  sizeof(SparseGraphNeighborMeta)) !=\n            sizeof(SparseGraphNeighborMeta))) {\n      LOG_ERROR(\"Read neighbors meta from segment failed\");\n      return {0, nullptr};\n    }\n\n    const void *data;\n    if (ailego_unlikely(neighbors_->read(m->offset, &data,\n                                         m->neighbor_cnt * sizeof(node_id_t)) !=\n                        m->neighbor_cnt * sizeof(node_id_t))) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return {0, nullptr};\n    }\n    return {static_cast<uint32_t>(m->neighbor_cnt),\n            reinterpret_cast<const node_id_t *>(data)};\n  }\n\n  //! Read level > 0 neighbors\n  const HnswSparseNeighborMeta *m;\n  if (ailego_unlikely(\n          upper_neighbors_meta_->read(id * sizeof(HnswSparseNeighborMeta),\n                                      (const void **)(&m),\n                                      sizeof(HnswSparseNeighborMeta)) !=\n          sizeof(HnswSparseNeighborMeta))) {\n    LOG_ERROR(\"Read neighbors meta from segment failed\");\n    return {0, nullptr};\n  }\n\n  ailego_assert_with(level <= m->level, \"invalid level\");\n  size_t offset = m->offset + (level - 1) * upper_neighbors_size();\n  ailego_assert_with(offset <= upper_neighbors_->data_size(), \"invalid offset\");\n  const void *data;\n  if (ailego_unlikely(\n          upper_neighbors_->read(offset, &data, upper_neighbors_size()) !=\n          upper_neighbors_size())) {\n    LOG_ERROR(\"Read neighbors from segment failed\");\n    return {0, nullptr};\n  }\n\n  auto hd = reinterpret_cast<const NeighborsHeader *>(data);\n  return {hd->neighbor_cnt, hd->neighbors};\n}\n\nint HnswSparseSearcherEntity::load(const IndexStorage::Pointer &container,\n                                   bool check_crc) {\n  container_ = container;\n\n  int ret = load_segments(check_crc);\n  if (ret != 0) {\n    return ret;\n  }\n\n  loaded_ = true;\n\n  LOG_INFO(\n      \"Index info: docCnt=%u entryPoint=%u maxLevel=%d efConstruct=%zu \"\n      \"l0NeighborCnt=%zu upperNeighborCnt=%zu scalingFactor=%zu \"\n      \"nodeSize=%zu sparesMetaSegmentSize=%zu keySegmentSize=%zu \"\n      \"neighborsSegmentSize=%zu neighborsMetaSegmentSize=%zu \"\n      \"sparseVectorSegmentSize=%zu\",\n      doc_cnt(), entry_point(), cur_max_level(), ef_construction(),\n      l0_neighbor_cnt(), upper_neighbor_cnt(), scaling_factor(), node_size(),\n      sparse_vector_meta_->data_size(), keys_->data_size(),\n      neighbors_->data_size(), neighbors_meta_->data_size(),\n      sparse_vectors_->data_size());\n\n  return 0;\n}\n\nint HnswSparseSearcherEntity::load_segments(bool check_crc) {\n  //! load header\n  const void *data = nullptr;\n  HNSWSparseHeader hd;\n  auto graph_hd_segment = container_->get(kSparseGraphHeaderSegmentId);\n  if (!graph_hd_segment || graph_hd_segment->data_size() < sizeof(hd.graph)) {\n    LOG_ERROR(\"Miss or invalid segment %s\",\n              kSparseGraphHeaderSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (graph_hd_segment->read(0, reinterpret_cast<const void **>(&data),\n                             sizeof(hd.graph)) != sizeof(hd.graph)) {\n    LOG_ERROR(\"Read segment %s failed\", kSparseGraphHeaderSegmentId.c_str());\n    return IndexError_ReadData;\n  }\n  memcpy(&hd.graph, data, sizeof(hd.graph));\n\n  auto hnsw_hd_segment = container_->get(kSparseHnswHeaderSegmentId);\n  if (!hnsw_hd_segment || hnsw_hd_segment->data_size() < sizeof(hd.hnsw)) {\n    LOG_ERROR(\"Miss or invalid segment %s\", kSparseHnswHeaderSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (hnsw_hd_segment->read(0, reinterpret_cast<const void **>(&data),\n                            sizeof(hd.hnsw)) != sizeof(hd.hnsw)) {\n    LOG_ERROR(\"Read segment %s failed\", kSparseHnswHeaderSegmentId.c_str());\n    return IndexError_ReadData;\n  }\n  memcpy(&hd.hnsw, data, sizeof(hd.hnsw));\n  *mutable_header() = hd;\n  segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()));\n\n  sparse_vector_meta_ = container_->get(kSparseGraphVectorMetaSegmentId);\n  if (!sparse_vector_meta_) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              kSparseGraphVectorMetaSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  keys_ = container_->get(kSparseGraphKeysSegmentId);\n  if (!keys_) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              kSparseGraphKeysSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  sparse_vectors_ = container_->get(kSparseGraphVectorsSegmentId);\n  if (!sparse_vectors_) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              kSparseGraphVectorsSegmentId.c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  neighbors_ = container_->get(kSparseGraphNeighborsSegmentId);\n  if (!neighbors_ || (neighbors_->data_size() == 0 && doc_cnt() > 1)) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or empty\",\n              kSparseGraphNeighborsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n  neighbors_meta_ = container_->get(kSparseGraphOffsetsSegmentId);\n  if (!neighbors_meta_ || neighbors_meta_->data_size() <\n                              sizeof(SparseGraphNeighborMeta) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kSparseGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  upper_neighbors_ = container_->get(kSparseHnswNeighborsSegmentId);\n  if (!upper_neighbors_ ||\n      (upper_neighbors_->data_size() == 0 && cur_max_level() > 0)) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or empty\",\n              kSparseHnswNeighborsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  upper_neighbors_meta_ = container_->get(kSparseHnswOffsetsSegmentId);\n  if (!upper_neighbors_meta_ ||\n      upper_neighbors_meta_->data_size() <\n          sizeof(HnswSparseNeighborMeta) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kSparseHnswOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  mapping_ = container_->get(kSparseGraphMappingSegmentId);\n  if (!mapping_ || mapping_->data_size() < sizeof(node_id_t) * doc_cnt()) {\n    LOG_ERROR(\"IndexStorage get segment %s failed or invalid size\",\n              kSparseGraphMappingSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (check_crc) {\n    std::vector<SegmentPointer> segments;\n    segments.emplace_back(graph_hd_segment);\n    segments.emplace_back(hnsw_hd_segment);\n    segments.emplace_back(sparse_vector_meta_);\n    segments.emplace_back(keys_);\n\n    segments.emplace_back(sparse_vectors_);\n\n    segments.emplace_back(neighbors_);\n    segments.emplace_back(neighbors_meta_);\n    segments.emplace_back(upper_neighbors_);\n    segments.emplace_back(upper_neighbors_meta_);\n\n    if (!do_crc_check(segments)) {\n      LOG_ERROR(\"Check index crc failed, the index may broken\");\n      return IndexError_Runtime;\n    }\n  }\n\n  if (neighbors_in_memory_enabled_) {\n    int ret = load_and_flat_neighbors();\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  return 0;\n}\n\nint HnswSparseSearcherEntity::load_and_flat_neighbors() {\n  fixed_neighbors_.reset(\n      new (std::nothrow) char[neighbors_size() * doc_cnt()]{},\n      std::default_delete<char[]>());\n  if (!fixed_neighbors_) {\n    LOG_ERROR(\"Malloc memory failed\");\n    return IndexError_NoMemory;\n  }\n\n  //! Get a new segemnt to release the buffer after loading neighbors\n  auto neighbors_meta = container_->get(kSparseGraphOffsetsSegmentId);\n  if (!neighbors_meta) {\n    LOG_ERROR(\"IndexStorage get segment graph.offsets failed\");\n    return IndexError_InvalidArgument;\n  }\n\n  const SparseGraphNeighborMeta *neighbors_index = nullptr;\n  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),\n                           neighbors_meta->data_size()) !=\n      neighbors_meta->data_size()) {\n    LOG_ERROR(\"Read segment %s data failed\",\n              kSparseGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  const char *neighbor_data;\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    size_t rd_size = neighbors_index[id].neighbor_cnt * sizeof(node_id_t);\n    if (ailego_unlikely(\n            neighbors_->read(neighbors_index[id].offset,\n                             reinterpret_cast<const void **>(&neighbor_data),\n                             rd_size) != rd_size)) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return IndexError_ReadData;\n    }\n    // copy level 0 neighbors to fixed size neighbors memory\n    char *dst = fixed_neighbors_.get() + neighbors_size() * id;\n    *reinterpret_cast<uint32_t *>(dst) = neighbors_index[id].neighbor_cnt;\n    memcpy(dst + sizeof(uint32_t), neighbor_data, rd_size);\n  }\n\n  return 0;\n}\n\nint HnswSparseSearcherEntity::get_fixed_neighbors(\n    std::vector<uint32_t> *fixed_neighbors) const {\n  //! Get a new segemnt to release the buffer after loading neighbors\n  auto neighbors_meta = container_->get(kSparseGraphOffsetsSegmentId);\n  if (!neighbors_meta) {\n    LOG_ERROR(\"IndexStorage get segment graph.offsets failed\");\n    return IndexError_InvalidArgument;\n  }\n\n  const SparseGraphNeighborMeta *neighbors_index = nullptr;\n  size_t meta_size = neighbors_meta->data_size();\n  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),\n                           meta_size) != meta_size) {\n    LOG_ERROR(\"Read segment %s data failed\",\n              kSparseGraphOffsetsSegmentId.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  size_t fixed_neighbor_cnt = l0_neighbor_cnt();\n  fixed_neighbors->resize((fixed_neighbor_cnt + 1) * doc_cnt(), kInvalidNodeId);\n\n  size_t neighbors_cnt_offset = fixed_neighbor_cnt * doc_cnt();\n  size_t total_neighbor_cnt = 0;\n  for (node_id_t id = 0; id < doc_cnt(); ++id) {\n    size_t cur_neighbor_cnt = neighbors_index[id].neighbor_cnt;\n    if (cur_neighbor_cnt == 0) {\n      (*fixed_neighbors)[neighbors_cnt_offset + id] = 0;\n      continue;\n    }\n    size_t rd_size = cur_neighbor_cnt * sizeof(node_id_t);\n    const uint32_t *neighbors;\n    if (neighbors_->read(neighbors_index[id].offset,\n                         reinterpret_cast<const void **>(&neighbors),\n                         rd_size) != rd_size) {\n      LOG_ERROR(\"Read neighbors from segment failed\");\n      return IndexError_ReadData;\n    }\n\n    // copy level 0 neighbors to fixed size neighbors memory\n    auto it = fixed_neighbors->begin() + id * fixed_neighbor_cnt;\n    std::copy(neighbors, neighbors + cur_neighbor_cnt, it);\n\n    (*fixed_neighbors)[neighbors_cnt_offset + id] = cur_neighbor_cnt;\n    total_neighbor_cnt += cur_neighbor_cnt;\n  }\n  LOG_INFO(\"total neighbor cnt: %zu, average neighbor cnt: %zu\",\n           total_neighbor_cnt, total_neighbor_cnt / doc_cnt());\n\n  return 0;\n}\n\nbool HnswSparseSearcherEntity::do_crc_check(\n    std::vector<SegmentPointer> &segments) const {\n  constexpr size_t blk_size = 4096;\n  const void *data;\n  for (auto &segment : segments) {\n    size_t offset = 0;\n    size_t rd_size;\n    uint32_t crc = 0;\n    while (offset < segment->data_size()) {\n      size_t size = std::min(blk_size, segment->data_size() - offset);\n      if ((rd_size = segment->read(offset, &data, size)) <= 0) {\n        break;\n      }\n      offset += rd_size;\n      crc = ailego::Crc32c::Hash(data, rd_size, crc);\n    }\n    if (crc != segment->data_crc()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nconst HnswSparseEntity::Pointer HnswSparseSearcherEntity::clone() const {\n  auto keys = keys_->clone();\n  if (ailego_unlikely(!keys)) {\n    LOG_ERROR(\"clone segment %s failed\", kSparseGraphKeysSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n\n  auto mapping = mapping_->clone();\n  if (ailego_unlikely(!mapping)) {\n    LOG_ERROR(\"clone segment %s failed\", kSparseGraphMappingSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n\n  auto sparse_vector_meta = sparse_vector_meta_->clone();\n  if (ailego_unlikely(!sparse_vector_meta)) {\n    LOG_ERROR(\"clone segment %s failed\",\n              kSparseGraphVectorMetaSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n\n  auto sparse_vectors = sparse_vectors_->clone();\n  if (ailego_unlikely(!sparse_vectors)) {\n    LOG_ERROR(\"clone segment %s failed\", kSparseGraphVectorsSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n\n  auto neighbors = neighbors_->clone();\n  if (ailego_unlikely(!neighbors)) {\n    LOG_ERROR(\"clone segment %s failed\",\n              kSparseGraphNeighborsSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n  auto upper_neighbors = upper_neighbors_->clone();\n  if (ailego_unlikely(!neighbors)) {\n    LOG_ERROR(\"clone segment %s failed\", kSparseHnswNeighborsSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n  auto neighbors_meta = neighbors_meta_->clone();\n  if (ailego_unlikely(!neighbors_meta)) {\n    LOG_ERROR(\"clone segment %s failed\", kSparseGraphOffsetsSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n  auto upper_neighbors_meta = upper_neighbors_meta_->clone();\n  if (ailego_unlikely(!upper_neighbors_meta)) {\n    LOG_ERROR(\"clone segment %s failed\", kSparseHnswOffsetsSegmentId.c_str());\n    return HnswSparseEntity::Pointer();\n  }\n\n  SegmentGroupParam neighbor_group{neighbors, neighbors_meta, upper_neighbors,\n                                   upper_neighbors_meta};\n  SegmentGroupParam dense_neighbor_group{nullptr, nullptr, nullptr, nullptr};\n  SegmentGroupParam sparse_neighbor_group{nullptr, nullptr, nullptr, nullptr};\n\n  HnswSparseSearcherEntity *entity = new (std::nothrow)\n      HnswSparseSearcherEntity(header(), keys, mapping, neighbor_group,\n                               sparse_vector_meta, sparse_vectors,\n                               fixed_neighbors_, neighbors_in_memory_enabled_);\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"HnswSparseSearcherEntity new failed\");\n  }\n\n  return HnswSparseEntity::Pointer(entity);\n}\n\n//! Get vector sparse feature data by chunk index and offset\nconst void *HnswSparseSearcherEntity::get_sparse_data(uint64_t offset,\n                                                      uint32_t len) const {\n  const void *sparse_data = nullptr;\n\n  uint32_t real_length = sparse_vectors_->read(offset, &sparse_data, len);\n\n  if (ailego_unlikely(real_length != len)) {\n    LOG_ERROR(\"Read sparse data from segment failed, %u vs %u\", real_length,\n              len);\n    return nullptr;\n  }\n\n  return sparse_data;\n}\n\nint HnswSparseSearcherEntity::get_sparse_data(\n    uint64_t offset, uint32_t len, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_sparse_data(offset, len);\n  block.reset((void *)vec);\n  return 0;\n}\n\n//! Get sparse data from id\nconst void *HnswSparseSearcherEntity::get_sparse_data(node_id_t id) const {\n  const void *vec = get_vector_meta(id);\n  if (vec == nullptr) {\n    LOG_ERROR(\"get vector failed, id: %u\", id);\n\n    return nullptr;\n  }\n\n  auto sparse_data = get_sparse_data_from_vector(vec);\n\n  return sparse_data.first;\n}\n\nint HnswSparseSearcherEntity::get_sparse_data(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  const void *vec = get_sparse_data(id);\n  block.reset((void *)vec);\n  return 0;\n}\n\n//! Get sparse data from vector\nstd::pair<const void *, uint32_t>\nHnswSparseSearcherEntity::get_sparse_data_from_vector(const void *vec) const {\n  if (vec == nullptr) {\n    LOG_ERROR(\"vec is nullptr\");\n\n    return std::make_pair(nullptr, 0);\n  }\n\n  const char *vec_ptr = reinterpret_cast<const char *>(vec);\n\n  uint64_t offset = *((uint64_t *)(vec_ptr));\n  uint32_t sparse_vector_len = *((uint32_t *)(vec_ptr + sizeof(uint64_t)));\n\n  const void *sparse_data = get_sparse_data(offset, sparse_vector_len);\n  if (ailego_unlikely(sparse_data == nullptr)) {\n    LOG_ERROR(\"Get nullptr sparse, offset=%zu, len=%u\", (size_t)offset,\n              sparse_vector_len);\n\n    return std::make_pair(nullptr, 0);\n  }\n\n  return std::make_pair(sparse_data, sparse_vector_len);\n}\n\nint HnswSparseSearcherEntity::get_sparse_data_from_vector(\n    const void *vec, IndexStorage::MemoryBlock &block,\n    int &sparse_length) const {\n  std::pair<const void *, uint32_t> sparse_data =\n      get_sparse_data_from_vector(vec);\n  block.reset((void *)sparse_data.first);\n  sparse_length = sparse_data.second;\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"hnsw_sparse_builder_entity.h\"\n#include \"hnsw_sparse_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseSearcherEntity : public HnswSparseEntity {\n public:\n  using Pointer = std::shared_ptr<HnswSparseSearcherEntity>;\n  using SegmentPointer = IndexStorage::Segment::Pointer;\n\n public:\n  struct SegmentGroupParam {\n    SegmentGroupParam(SegmentPointer neighbors_in,\n                      SegmentPointer neighbors_meta_in,\n                      SegmentPointer upper_neighbors_in,\n                      SegmentPointer upper_neighbors_meta_in)\n        : neighbors{neighbors_in},\n          neighbors_meta{neighbors_meta_in},\n          upper_neighbors{upper_neighbors_in},\n          upper_neighbors_meta{upper_neighbors_meta_in} {}\n\n    SegmentPointer neighbors{nullptr};\n    SegmentPointer neighbors_meta{nullptr};\n    SegmentPointer upper_neighbors{nullptr};\n    SegmentPointer upper_neighbors_meta{nullptr};\n  };\n\n  //! Constructor\n  HnswSparseSearcherEntity();\n\n  //! Make a copy of searcher entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswSparseEntity::Pointer clone() const override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector local id by key\n  node_id_t get_id(key_t key) const;\n\n  //! Get sparse vector feature data by key\n  virtual int get_sparse_vector_by_key(\n      key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n      std::string *sparse_values_buffer) const override;\n\n  //! Get vector feature data by id\n  virtual const void *get_vector_meta(node_id_t id) const override;\n\n  virtual int get_vector_meta(const node_id_t id,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  //! Get vector feature data by id\n  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,\n                               const void **vecs) const override;\n\n  virtual int get_vector_metas(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &block_vecs) const override;\n\n  //! Get vector sparse feature data by chunk index and offset\n  virtual const void *get_sparse_data(uint64_t offset,\n                                      uint32_t len) const override;\n\n  //! Get sparse data from id\n  virtual const void *get_sparse_data(node_id_t id) const override;\n\n  virtual int get_sparse_data(uint64_t offset, uint32_t len,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  virtual int get_sparse_data(const node_id_t id,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  //! Get sparse data from vector\n  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(\n      const void *vec) const override;\n\n  virtual int get_sparse_data_from_vector(const void *vec,\n                                          IndexStorage::MemoryBlock &block,\n                                          int &sparse_length) const override;\n\n  //! Get the node id's neighbors on graph level\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n  virtual int load(const IndexStorage::Pointer &container,\n                   bool check_crc) override;\n\n  int load_segments(bool check_crc);\n\n  virtual int cleanup(void) override;\n\n public:\n  bool is_loaded() const {\n    return loaded_;\n  }\n\n  void set_neighbors_in_memory(bool enabled) {\n    neighbors_in_memory_enabled_ = enabled;\n  }\n\n  //! get fixed length neighbors data\n  int get_fixed_neighbors(std::vector<uint32_t> *fixed_neighbors) const;\n\n private:\n  //! Constructor\n  HnswSparseSearcherEntity(const HNSWSparseHeader &hd,\n                           const SegmentPointer &keys,\n                           const SegmentPointer &mapping,\n                           const SegmentGroupParam &neighbor_group,\n                           const SegmentPointer &sparse_vector_meta,\n                           const SegmentPointer &sparse_vectors,\n                           const std::shared_ptr<char> &fixed_neighbors,\n                           bool neighbors_in_memory_enabled)\n      : HnswSparseEntity(hd),\n        keys_(keys),\n        mapping_(mapping),\n        neighbors_(neighbor_group.neighbors),\n        neighbors_meta_(neighbor_group.neighbors_meta),\n        upper_neighbors_(neighbor_group.upper_neighbors),\n        upper_neighbors_meta_(neighbor_group.upper_neighbors_meta),\n        sparse_vector_meta_(sparse_vector_meta),\n        sparse_vectors_(sparse_vectors),\n        neighbors_in_memory_enabled_(neighbors_in_memory_enabled) {\n    segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()),\n                          IndexStorage::SegmentData(0U, 0U));\n    fixed_neighbors_ = fixed_neighbors;\n  }\n\n  bool do_crc_check(std::vector<SegmentPointer> &segments) const;\n\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! If neighbors_in_memory_enabled, load the level0 neighbors to memory\n  int load_and_flat_neighbors(void);\n\n public:\n  HnswSparseSearcherEntity(const HnswSparseSearcherEntity &) = delete;\n  HnswSparseSearcherEntity &operator=(const HnswSparseSearcherEntity &) =\n      delete;\n\n private:\n  IndexStorage::Pointer container_{};\n\n  SegmentPointer keys_{};\n  SegmentPointer mapping_{};\n\n  SegmentPointer neighbors_{};\n  SegmentPointer neighbors_meta_{};\n  SegmentPointer upper_neighbors_{};\n  SegmentPointer upper_neighbors_meta_{};\n\n  SegmentPointer sparse_vector_meta_{};\n  SegmentPointer sparse_vectors_{};\n\n  mutable std::vector<IndexStorage::SegmentData> segment_datas_{};\n  std::shared_ptr<char> fixed_neighbors_{};  // level 0 fixed size neighbors\n  bool neighbors_in_memory_enabled_{false};\n  bool loaded_{false};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_streamer.h\"\n#include <iostream>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/pattern/defer.h>\n#include <ailego/utility/memory_helper.h>\n#include \"hnsw_sparse_algorithm.h\"\n#include \"hnsw_sparse_context.h\"\n#include \"hnsw_sparse_dist_calculator.h\"\n#include \"hnsw_sparse_index_provider.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseStreamer::HnswSparseStreamer() : entity_(stats_) {}\n\nHnswSparseStreamer::~HnswSparseStreamer() {\n  if (state_ == STATE_INITED) {\n    this->cleanup();\n  }\n}\n\nint HnswSparseStreamer::init(const IndexMeta &imeta,\n                             const ailego::Params &params) {\n  meta_ = imeta;\n  meta_.set_streamer(\"HnswSparseStreamer\", HnswSparseEntity::kRevision, params);\n\n  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, &max_index_size_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT,\n             &upper_max_neighbor_cnt_);\n  float multiplier = HnswSparseEntity::kDefaultL0MaxNeighborCntMultiplier;\n  params.get(PARAM_HNSW_SPARSE_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,\n             &multiplier);\n  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;\n\n  multiplier = HnswSparseEntity::kDefaultNeighborPruneMultiplier;\n  params.get(PARAM_HNSW_SPARSE_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);\n  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;\n  scaling_factor_ = upper_max_neighbor_cnt_;\n  params.get(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, &scaling_factor_);\n\n  params.get(PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT, &docs_hard_limit_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_EF, &ef_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, &ef_construction_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB,\n             &bf_negative_prob_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD,\n             &bruteforce_threshold_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_CHECK_CRC_ENABLE, &check_crc_enabled_);\n\n  params.get(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, &chunk_size_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, &filter_same_key_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE,\n             &get_vector_enabled_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);\n  params.get(PARAM_HNSW_SPARSE_STREAMER_FORCE_PADDING_RESULT_ENABLE,\n             &force_padding_topk_enabled_);\n\n  query_filtering_enabled_ =\n      params.get(PARAM_HNSW_SPARSE_STREAMER_QUERY_FILTERING_RATIO,\n                 &query_filtering_ratio_);\n\n  params.get(PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT, &docs_soft_limit_);\n  if (docs_soft_limit_ > 0 && docs_soft_limit_ > docs_hard_limit_) {\n    LOG_ERROR(\"[%s] must be >= [%s]\",\n              PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT.c_str(),\n              PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    return IndexError_InvalidArgument;\n  } else if (docs_soft_limit_ == 0UL) {\n    docs_soft_limit_ =\n        docs_hard_limit_ * HnswSparseEntity::kDefaultDocsSoftLimitRatio;\n  }\n\n  if (ef_ == 0U) {\n    ef_ = HnswSparseEntity::kDefaultEf;\n  }\n  if (ef_construction_ == 0U) {\n    ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;\n  }\n  if (upper_max_neighbor_cnt_ == 0U) {\n    upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;\n  }\n  if (upper_max_neighbor_cnt_ > HnswSparseEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"[%s] must be in range (0,%d)\",\n              PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),\n              HnswSparseEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (l0_max_neighbor_cnt_ == 0U) {\n    l0_max_neighbor_cnt_ = HnswSparseEntity::kDefaultL0MaxNeighborCnt;\n  }\n  if (l0_max_neighbor_cnt_ > HnswSparseEntity::kMaxNeighborCnt) {\n    LOG_ERROR(\"UpperNeighborCnt must be in range (0,%d)\",\n              HnswSparseEntity::kMaxNeighborCnt);\n    return IndexError_InvalidArgument;\n  }\n  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {\n    LOG_ERROR(\"[%s]-[%u] must be <= [%s]-[%u]\",\n              PARAM_HNSW_SPARSE_STREAMER_MIN_NEIGHBOR_COUNT.c_str(),\n              min_neighbor_cnt_,\n              PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),\n              upper_max_neighbor_cnt_);\n    return IndexError_InvalidArgument;\n  }\n\n  if (bf_negative_prob_ <= 0.0f || bf_negative_prob_ >= 1.0f) {\n    LOG_ERROR(\n        \"[%s] must be in range (0,1)\",\n        PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (scaling_factor_ == 0U) {\n    scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;\n  }\n  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {\n    LOG_ERROR(\"[%s] must be in range [5,1000]\",\n              PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {\n    LOG_ERROR(\"[%s] must be in range (0.0f,1.0f]\",\n              PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (max_scan_limit_ < min_scan_limit_) {\n    LOG_ERROR(\"[%s] must be >= [%s]\",\n              PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT.c_str(),\n              PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  if (prune_cnt == 0UL) {\n    prune_cnt = upper_max_neighbor_cnt_;\n  }\n  if (chunk_size_ == 0UL) {\n    chunk_size_ = HnswSparseEntity::kDefaultChunkSize;\n  }\n  if (chunk_size_ > HnswSparseEntity::kMaxChunkSize) {\n    LOG_ERROR(\"[%s] must be < %zu\",\n              PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE.c_str(),\n              HnswSparseEntity::kMaxChunkSize);\n    return IndexError_InvalidArgument;\n  }\n\n  if (query_filtering_enabled_ &&\n      (query_filtering_ratio_ <= 0.0f || query_filtering_ratio_ >= 1.0f)) {\n    LOG_ERROR(\"[%s] must be in range (0, 1)\",\n              PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  entity_.set_ef_construction(ef_construction_);\n  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);\n  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);\n  entity_.set_scaling_factor(scaling_factor_);\n  entity_.set_prune_cnt(prune_cnt);\n\n  entity_.set_chunk_size(chunk_size_);\n  entity_.set_filter_same_key(filter_same_key_);\n  entity_.set_get_vector(get_vector_enabled_);\n  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);\n\n  entity_.set_sparse_meta_size(HnswSparseEntity::kSparseMetaSize);\n  entity_.set_sparse_unit_size(meta_.unit_size());\n\n  int ret = entity_.init(max_index_size_, docs_hard_limit_);\n  if (ret != 0) {\n    LOG_ERROR(\"Hnsw entity init failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  LOG_DEBUG(\n      \"Init params: maxIndexSize=%zu docsHardLimit=%zu docsSoftLimit=%zu \"\n      \"efConstruction=%u ef=%u l0NeighborCnt=%u upperNeighborCnt=%u \"\n      \"scalingFactor=%u maxScanRatio=%.3f minScanLimit=%zu maxScanLimit=%zu \"\n      \"bfEnabled=%d bruteFoceThreshold=%zu bfNegativeProbability=%.5f \"\n      \"checkCrcEnabled=%d pruneSize=%zu chunkSize=%zu \"\n      \"filterSameKey=%u getVectorEnabled=%u \"\n      \"minNeighborCount=%u forcePadding=%u filteringRatio=%f\",\n      max_index_size_, docs_hard_limit_, docs_soft_limit_, ef_construction_,\n      ef_, l0_max_neighbor_cnt_, upper_max_neighbor_cnt_, scaling_factor_,\n      max_scan_ratio_, min_scan_limit_, max_scan_limit_, bf_enabled_,\n      bruteforce_threshold_, bf_negative_prob_, check_crc_enabled_, prune_cnt,\n      chunk_size_, filter_same_key_, get_vector_enabled_, min_neighbor_cnt_,\n      force_padding_topk_enabled_, query_filtering_ratio_);\n\n  alg_ = HnswSparseAlgorithm::UPointer(new HnswSparseAlgorithm(entity_));\n\n  ret = alg_->init();\n  if (ret != 0) {\n    return ret;\n  }\n\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswSparseStreamer::cleanup(void) {\n  if (state_ == STATE_OPENED) {\n    this->close();\n  }\n\n  LOG_INFO(\"HnswSparseStreamer cleanup\");\n\n  meta_.clear();\n  metric_.reset();\n  stats_.clear();\n  entity_.cleanup();\n\n  if (alg_) {\n    alg_->cleanup();\n  }\n\n  max_index_size_ = 0UL;\n  docs_hard_limit_ = HnswSparseEntity::kDefaultDocsHardLimit;\n  docs_soft_limit_ = 0UL;\n  upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;\n  ef_ = HnswSparseEntity::kDefaultEf;\n  ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;\n  bf_enabled_ = false;\n  scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;\n  bruteforce_threshold_ = HnswSparseEntity::kDefaultBruteForceThreshold;\n  max_scan_limit_ = HnswSparseEntity::kDefaultMaxScanLimit;\n  min_scan_limit_ = HnswSparseEntity::kDefaultMinScanLimit;\n  chunk_size_ = HnswSparseEntity::kDefaultChunkSize;\n  bf_negative_prob_ = HnswSparseEntity::kDefaultBFNegativeProbability;\n  max_scan_ratio_ = HnswSparseEntity::kDefaultScanRatio;\n  state_ = STATE_INIT;\n  check_crc_enabled_ = false;\n  filter_same_key_ = false;\n  get_vector_enabled_ = false;\n\n  sparse_neighbor_ratio_ = HnswSparseEntity::kDefaultSparseNeighborRatio;\n  sparse_neighbor_cnt_ = 0UL;\n  sparse_min_neighbor_cnt_ = 0UL;\n  upper_sparse_neighbor_cnt_ = 0UL;\n\n  return 0;\n}\n\nint HnswSparseStreamer::open(IndexStorage::Pointer stg) {\n  LOG_INFO(\"HnswSparseStreamer open\");\n\n  if (ailego_unlikely(state_ != STATE_INITED)) {\n    LOG_ERROR(\"Open storage failed, init streamer first!\");\n    return IndexError_NoReady;\n  }\n  int ret = entity_.open(std::move(stg), check_crc_enabled_);\n  if (ret != 0) {\n    return ret;\n  }\n  IndexMeta index_meta;\n  ret = entity_.get_index_meta(&index_meta);\n  if (ret == IndexError_NoExist) {\n    // Set IndexMeta for the new index\n    ret = entity_.set_index_meta(meta_);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to set index meta for %s\", IndexError::What(ret));\n      return ret;\n    }\n  } else if (ret != 0) {\n    LOG_ERROR(\"Failed to get index meta for %s\", IndexError::What(ret));\n    return ret;\n  } else {\n    if (index_meta.metric_name() != meta_.metric_name() ||\n        index_meta.data_type() != meta_.data_type()) {\n      LOG_ERROR(\"IndexMeta mismatch from the previous in index\");\n      return IndexError_Mismatch;\n    }\n    // The IndexMetric Params may be updated like MipsSquaredEuclidean\n    auto metric_params = index_meta.metric_params();\n    metric_params.merge(meta_.metric_params());\n    meta_.set_metric(index_meta.metric_name(), 0, metric_params);\n  }\n\n  metric_ = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric_) {\n    LOG_ERROR(\"Failed to create metric %s\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  ret = metric_->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"Failled to init metric, ret=%d\", ret);\n    return ret;\n  }\n\n  if (!metric_->sparse_distance()) {\n    LOG_ERROR(\"Invalid metric distance\");\n    return IndexError_InvalidArgument;\n  }\n\n  add_distance_ = metric_->sparse_distance();\n  search_distance_ = add_distance_;\n\n  if (metric_->query_metric() && metric_->query_metric()->distance()) {\n    search_distance_ = metric_->query_metric()->sparse_distance();\n  }\n\n  state_ = STATE_OPENED;\n  magic_ = IndexContext::GenerateMagic();\n\n  return 0;\n}\n\nint HnswSparseStreamer::close(void) {\n  LOG_INFO(\"HnswSparseStreamer close\");\n\n  stats_.clear();\n  meta_.set_metric(metric_->name(), 0, metric_->params());\n  entity_.set_index_meta(meta_);\n  int ret = entity_.close();\n  if (ret != 0) {\n    return ret;\n  }\n  state_ = STATE_INITED;\n\n  return 0;\n}\n\nint HnswSparseStreamer::flush(uint64_t checkpoint) {\n  LOG_INFO(\"HnswSparseStreamer flush checkpoint=%zu\", (size_t)checkpoint);\n\n  meta_.set_metric(metric_->name(), 0, metric_->params());\n  entity_.set_index_meta(meta_);\n  return entity_.flush(checkpoint);\n}\n\nint HnswSparseStreamer::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"HnswSparseStreamer dump\");\n\n  shared_mutex_.lock();\n  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });\n\n  meta_.set_searcher(\"HnswSparseSearcher\", HnswSparseEntity::kRevision,\n                     ailego::Params());\n\n  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n  return entity_.dump(dumper);\n}\n\nIndexStreamer::Context::Pointer HnswSparseStreamer::create_context(void) const {\n  if (ailego_unlikely(state_ != STATE_OPENED)) {\n    LOG_ERROR(\"Create context failed, open storage first!\");\n    return Context::Pointer();\n  }\n\n  HnswSparseEntity::Pointer entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"CreateContext clone init failed\");\n    return Context::Pointer();\n  }\n  HnswSparseContext *ctx =\n      new (std::nothrow) HnswSparseContext(metric_, entity);\n  if (ailego_unlikely(ctx == nullptr)) {\n    LOG_ERROR(\"Failed to new HnswSparseContext\");\n    return Context::Pointer();\n  }\n  ctx->set_ef(ef_);\n  ctx->set_max_scan_limit(max_scan_limit_);\n  ctx->set_min_scan_limit(min_scan_limit_);\n  ctx->set_max_scan_ratio(max_scan_ratio_);\n  ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter\n                                   : VisitFilter::ByteMap);\n  ctx->set_filter_negative_probability(bf_negative_prob_);\n  ctx->set_magic(magic_);\n  ctx->set_force_padding_topk(force_padding_topk_enabled_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n\n  if (ailego_unlikely(ctx->init(HnswSparseContext::kSparseStreamerContext)) !=\n      0) {\n    LOG_ERROR(\"Init HnswSparseContext failed\");\n    delete ctx;\n    return Context::Pointer();\n  }\n\n  return Context::Pointer(ctx);\n}\n\nIndexStreamer::SparseProvider::Pointer\nHnswSparseStreamer::create_sparse_provider(void) const {\n  LOG_DEBUG(\"HnswSparseStreamer create sparse provider\");\n\n  auto entity = entity_.clone();\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"Clone HnswSparseEntity failed\");\n    return SparseProvider::Pointer();\n  }\n  return SparseProvider::Pointer(\n      new HnswSparseIndexProvider(meta_, entity, \"HnswSparseStreamer\"));\n}\n\nint HnswSparseStreamer::update_context(HnswSparseContext *ctx) const {\n  const HnswSparseEntity::Pointer entity = entity_.clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone search context entity\");\n    return IndexError_Runtime;\n  }\n  ctx->set_max_scan_limit(max_scan_limit_);\n  ctx->set_min_scan_limit(min_scan_limit_);\n  ctx->set_max_scan_ratio(max_scan_ratio_);\n  ctx->set_bruteforce_threshold(bruteforce_threshold_);\n  return ctx->update_context(HnswSparseContext::kSparseStreamerContext, meta_,\n                             metric_, entity, magic_);\n}\n\n//! Add a vector with id  into index with sparse inputs\nint HnswSparseStreamer::add_with_id_impl(uint32_t id,\n                                         const uint32_t sparse_count,\n                                         const uint32_t *sparse_indices,\n                                         const void *sparse_query,\n                                         const IndexQueryMeta &qmeta,\n                                         Context::Pointer &context) {\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) {\n    LOG_WARN(\n        \"Failed to add sparse vector: number of non-zero elements (%u) exceeds \"\n        \"maximum allowed (%u), id=%u\",\n        sparse_count, HnswSparseEntity::kSparseMaxDimSize, id);\n    return IndexError_InvalidValue;\n  }\n\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {\n    if (entity_.doc_cnt() >= docs_hard_limit_) {\n      LOG_ERROR(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n                PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT.c_str());\n      const std::lock_guard<std::mutex> lk(mutex_);\n      (*stats_.mutable_discarded_count())++;\n      return IndexError_IndexFull;\n    } else {\n      LOG_WARN(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n               PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    }\n  }\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(add_distance_);\n\n  std::string sparse_query_buffer;\n  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,\n                                   entity_.sparse_unit_size(),\n                                   sparse_query_buffer);\n\n  ctx->reset_query(sparse_query_buffer.data());\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n\n  level_t level = alg_->get_random_level();\n  ret =\n      entity_.add_vector_with_id(level, id, sparse_query_buffer, sparse_count);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw streamer add vector failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  ret = alg_->add_node(id, level, ctx);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw stramer add node failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Runtime;\n  }\n  (*stats_.mutable_added_count())++;\n\n  return 0;\n}\n\n//! Add a vector into index with sparse inputs\nint HnswSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count,\n                                 const uint32_t *sparse_indices,\n                                 const void *sparse_query,\n                                 const IndexQueryMeta &qmeta,\n                                 Context::Pointer &context) {\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) {\n    LOG_WARN(\n        \"Failed to add sparse vector: number of non-zero elements (%u) exceeds \"\n        \"maximum allowed (%u), key=%zu\",\n        sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)pkey);\n    return IndexError_InvalidValue;\n  }\n\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {\n    if (entity_.doc_cnt() >= docs_hard_limit_) {\n      LOG_ERROR(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n                PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT.c_str());\n      const std::lock_guard<std::mutex> lk(mutex_);\n      (*stats_.mutable_discarded_count())++;\n      return IndexError_IndexFull;\n    } else {\n      LOG_WARN(\"Current docs %u exceed [%s]\", entity_.doc_cnt(),\n               PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT.c_str());\n    }\n  }\n  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {\n    LOG_ERROR(\"Cannot add vector while dumping index\");\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Unsupported;\n  }\n  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(add_distance_);\n\n  std::string sparse_query_buffer;\n  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,\n                                   entity_.sparse_unit_size(),\n                                   sparse_query_buffer);\n\n  ctx->reset_query(sparse_query_buffer.data());\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n\n  level_t level = alg_->get_random_level();\n  node_id_t id;\n  ret = entity_.add_vector(level, pkey, sparse_query_buffer, sparse_count, &id);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw streamer add vector failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  ret = alg_->add_node(id, level, ctx);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Hnsw stramer add node failed\");\n    (*stats_.mutable_discarded_count())++;\n    return ret;\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    (*stats_.mutable_discarded_count())++;\n    return IndexError_Runtime;\n  }\n  (*stats_.mutable_added_count())++;\n\n  return 0;\n}\n\n//! Similarity search with sparse inputs\nint HnswSparseStreamer::search_impl(\n    const uint32_t sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const IndexQueryMeta &qmeta,\n    IndexStreamer::Context::Pointer &context) const {\n  return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                     context);\n}\n\n//! Similarity search with sparse inputs\nint HnswSparseStreamer::search_impl(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const IndexQueryMeta &qmeta, uint32_t count,\n    IndexStreamer::Context::Pointer &context) const {\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n\n  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {\n    return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,\n                          count, context);\n  }\n\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_);\n  ctx->resize_results(count);\n  ctx->check_need_adjuct_ctx(entity_.doc_cnt());\n\n  const uint32_t *sparse_indices_tmp = sparse_indices;\n  const void *sparse_query_tmp = sparse_query;\n\n  for (size_t q = 0; q < count; ++q) {\n    std::string sparse_query_buffer;\n    std::string sparse_query_filtered_buffer;\n\n    SparseUtility::TransSparseFormat(\n        sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n        entity_.sparse_unit_size(), sparse_query_buffer);\n\n    if (query_filtering_enabled_) {\n      if (!SparseUtility::FilterSparseQuery(\n              sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n              qmeta.data_type(), entity_.sparse_unit_size(),\n              query_filtering_ratio_, &sparse_query_filtered_buffer)) {\n        LOG_ERROR(\"Hnsw filtering failed\");\n\n        return IndexError_Runtime;\n      }\n\n      ctx->reset_query(sparse_query_filtered_buffer.data());\n    } else {\n      ctx->reset_query(sparse_query_buffer.data());\n    }\n\n    ret = alg_->search(ctx);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Hnsw searcher fast search failed\");\n      return ret;\n    }\n\n    if (query_filtering_enabled_) {\n      ctx->reset_query(sparse_query_buffer.data());\n      ctx->recal_topk_dist();\n    }\n\n    ctx->topk_to_result(q);\n\n    sparse_indices_tmp += sparse_count[q];\n    sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                       sparse_count[q] * qmeta.unit_size();\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\n//! Similarity search with sparse inputs\nint HnswSparseStreamer::search_bf_impl(\n    const uint32_t sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const IndexQueryMeta &qmeta,\n    IndexStreamer::Context::Pointer &context) const {\n  return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,\n                        context);\n}\n\n//! Similarity search with sparse inputs\nint HnswSparseStreamer::search_bf_impl(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const IndexQueryMeta &qmeta, uint32_t count,\n    IndexStreamer::Context::Pointer &context) const {\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_);\n  ctx->resize_results(count);\n\n  const uint32_t *sparse_indices_tmp = sparse_indices;\n  const void *sparse_query_tmp = sparse_query;\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_Runtime;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n      ctx->group_topk_heaps().clear();\n\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n\n        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().dist(id);\n\n          std::string group_id = group_by(id);\n\n          auto &topk_heap = ctx->group_topk_heaps()[group_id];\n          if (topk_heap.empty()) {\n            topk_heap.limit(ctx->group_topk());\n          }\n          topk_heap.emplace_back(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  } else {\n    auto &filter = ctx->filter();\n    auto &topk = ctx->topk_heap();\n\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n      topk.clear();\n      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n        if (entity_.get_key(id) == kInvalidKey) {\n          continue;\n        }\n\n        if (!filter.is_valid() || !filter(entity_.get_key(id))) {\n          dist_t dist = ctx->dist_calculator().dist(id);\n          topk.emplace(id, dist);\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n\n    if (ailego_unlikely(ctx->error())) {\n      return IndexError_Runtime;\n    }\n  }\n\n  return 0;\n}\n\n//! Linear search by primary keys\nint HnswSparseStreamer::search_bf_by_p_keys_impl(\n    const uint32_t sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, ContextPointer &context) const {\n  return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,\n                                  p_keys, qmeta, 1, context);\n}\n\n//! Linear search by primary keys with sparse inputs\nint HnswSparseStreamer::search_bf_by_p_keys_impl(\n    const uint32_t *sparse_count, const uint32_t *sparse_indices,\n    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,\n    const IndexQueryMeta &qmeta, uint32_t count,\n    Context::Pointer &context) const {\n  int ret = check_params(qmeta);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  if (ailego_unlikely(p_keys.size() != count)) {\n    LOG_ERROR(\"The size of p_keys is not equal to count\");\n    return IndexError_InvalidArgument;\n  }\n\n  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());\n  ailego_do_if_false(ctx) {\n    LOG_ERROR(\"Cast context to HnswSparseContext failed\");\n    return IndexError_Cast;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher or streamer\n    ret = update_context(ctx);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  ctx->clear();\n  ctx->update_dist_caculator_distance(search_distance_);\n  ctx->resize_results(count);\n\n  const uint32_t *sparse_indices_tmp = sparse_indices;\n  const void *sparse_query_tmp = sparse_query;\n\n  if (ctx->group_by_search()) {\n    if (!ctx->group_by().is_valid()) {\n      LOG_ERROR(\"Invalid group-by function\");\n      return IndexError_Runtime;\n    }\n\n    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {\n      return ctx->group_by()(entity_.get_key(id));\n    };\n\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n      ctx->group_topk_heaps().clear();\n\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        uint64_t pk = p_keys[q][idx];\n        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().dist(id);\n\n            std::string group_id = group_by(id);\n\n            auto &topk_heap = ctx->group_topk_heaps()[group_id];\n            if (topk_heap.empty()) {\n              topk_heap.limit(ctx->group_topk());\n            }\n            topk_heap.emplace_back(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  } else {\n    auto &filter = ctx->filter();\n    auto &topk = ctx->topk_heap();\n\n    for (size_t q = 0; q < count; ++q) {\n      std::string sparse_query_buffer;\n      SparseUtility::TransSparseFormat(\n          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,\n          entity_.sparse_unit_size(), sparse_query_buffer);\n\n      ctx->reset_query(sparse_query_buffer.data());\n      topk.clear();\n      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {\n        key_t pk = p_keys[q][idx];\n        if (!filter.is_valid() || !filter(pk)) {\n          node_id_t id = entity_.get_id(pk);\n          if (id != kInvalidNodeId) {\n            dist_t dist = ctx->dist_calculator().dist(id);\n            topk.emplace(id, dist);\n          }\n        }\n      }\n      ctx->topk_to_result(q);\n\n      sparse_indices_tmp += sparse_count[q];\n      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +\n                         sparse_count[q] * qmeta.unit_size();\n    }\n  }\n\n  if (ailego_unlikely(ctx->error())) {\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nvoid HnswSparseStreamer::print_debug_info() {\n  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {\n    Neighbors neighbours = entity_.get_neighbors(0, id);\n    std::cout << \"node: \" << id << \"; \";\n    for (uint32_t i = 0; i < neighbours.size(); ++i) {\n      std::cout << neighbours[i];\n\n      if (i == neighbours.size() - 1) {\n        std::cout << std::endl;\n      } else {\n        std::cout << \", \";\n      }\n    }\n  }\n\n  // entity_.print_key_map();\n}\n\nINDEX_FACTORY_REGISTER_STREAMER(HnswSparseStreamer);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <ailego/parallel/lock.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"hnsw_sparse_algorithm.h\"\n#include \"hnsw_sparse_streamer_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseStreamer : public IndexStreamer {\n public:\n  using ContextPointer = IndexStreamer::Context::Pointer;\n\n  HnswSparseStreamer(void);\n  virtual ~HnswSparseStreamer(void);\n\n  HnswSparseStreamer(const HnswSparseStreamer &streamer) = delete;\n  HnswSparseStreamer &operator=(const HnswSparseStreamer &streamer) = delete;\n\n protected:\n  //! Initialize Streamer\n  int init(const IndexMeta &imeta, const ailego::Params &params) override;\n\n  //! Cleanup Streamer\n  int cleanup(void) override;\n\n  //! Create a context\n  Context::Pointer create_context(void) const override;\n\n  //! Create a new sparse iterator\n  IndexStreamer::SparseProvider::Pointer create_sparse_provider(\n      void) const override;\n\n  int add_impl(uint64_t pkey, const uint32_t sparse_count,\n               const uint32_t *sparse_indices, const void *sparse_query,\n               const IndexQueryMeta &qmeta, Context::Pointer &context) override;\n\n  int add_with_id_impl(uint32_t id, const uint32_t sparse_count,\n                       const uint32_t *sparse_indices, const void *sparse_query,\n                       const IndexQueryMeta &qmeta,\n                       Context::Pointer &context) override;\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  Context::Pointer &context) const override;\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::Pointer &context) const override;\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta,\n                     Context::Pointer &context) const override;\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t *sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta, uint32_t count,\n                     Context::Pointer &context) const override;\n\n  //! Linear search by primary keys\n  int search_bf_by_p_keys_impl(const uint32_t sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta,\n                               ContextPointer &context) const override;\n\n  //! Linear search by primary keys with sparse inputs\n  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,\n                               const uint32_t *sparse_indices,\n                               const void *sparse_query,\n                               const std::vector<std::vector<uint64_t>> &p_keys,\n                               const IndexQueryMeta &qmeta, uint32_t count,\n                               ContextPointer &context) const override;\n\n  //! Fetch sparse vector by key\n  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,\n                        std::string *sparse_indices_buffer,\n                        std::string *sparse_values_buffer) const override {\n    return entity_.get_sparse_vector_by_key(\n        key, sparse_count, sparse_indices_buffer, sparse_values_buffer);\n  }\n\n  //! Fetch vector by id\n  int get_sparse_vector_by_id(\n      uint32_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n      std::string *sparse_values_buffer) const override {\n    return entity_.get_sparse_vector_by_id(\n        id, sparse_count, sparse_indices_buffer, sparse_values_buffer);\n  }\n\n  //! Open index from file path\n  int open(IndexStorage::Pointer stg) override;\n\n  //! Close file\n  int close(void) override;\n\n  //! flush file\n  int flush(uint64_t checkpoint) override;\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve sparse meta of index\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  void print_debug_info() override;\n\n private:\n  inline int check_params(const IndexQueryMeta &qmeta) const {\n    if (ailego_unlikely(qmeta.data_type() != meta_.data_type())) {\n      LOG_ERROR(\"Unsupported query meta\");\n      return IndexError_Mismatch;\n    }\n    return 0;\n  }\n\n  inline int check_sparse_count_is_zero(const uint32_t *sparse_count,\n                                        uint32_t count) const {\n    for (uint32_t i = 0; i < count; ++i) {\n      if (sparse_count[i] != 0)\n        LOG_ERROR(\"Sparse cout is not empty. Index: %u, Sparse Count: %u\", i,\n                  sparse_count[i]);\n      return IndexError_InvalidArgument;\n    }\n\n    return 0;\n  }\n\n private:\n  //! To share ctx across streamer/searcher, we need to update the context for\n  //! current streamer/searcher\n  int update_context(HnswSparseContext *ctx) const;\n\n private:\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };\n  class Stats : public IndexStreamer::Stats {\n   public:\n    void clear(void) {\n      set_revision_id(0u);\n      set_loaded_count(0u);\n      set_added_count(0u);\n      set_discarded_count(0u);\n      set_index_size(0u);\n      set_dumped_size(0u);\n      set_check_point(0u);\n      set_create_time(0u);\n      set_update_time(0u);\n      clear_attributes();\n    }\n  };\n\n  HnswSparseStreamerEntity entity_;\n  HnswSparseAlgorithm::UPointer alg_;\n  IndexMeta meta_{};\n  IndexMetric::Pointer metric_{};\n\n  IndexMetric::MatrixSparseDistance add_distance_{};\n  IndexMetric::MatrixSparseDistance search_distance_{};\n  Stats stats_{};\n  std::mutex mutex_{};\n\n  size_t max_index_size_{0UL};\n  size_t chunk_size_{HnswSparseEntity::kDefaultChunkSize};\n  size_t docs_hard_limit_{HnswSparseEntity::kDefaultDocsHardLimit};\n  size_t docs_soft_limit_{0UL};\n  uint32_t min_neighbor_cnt_{0u};\n  uint32_t upper_max_neighbor_cnt_{\n      HnswSparseEntity::kDefaultUpperMaxNeighborCnt};\n  uint32_t l0_max_neighbor_cnt_{HnswSparseEntity::kDefaultL0MaxNeighborCnt};\n  uint32_t ef_{HnswSparseEntity::kDefaultEf};\n  uint32_t ef_construction_{HnswSparseEntity::kDefaultEfConstruction};\n  uint32_t scaling_factor_{HnswSparseEntity::kDefaultScalingFactor};\n  size_t bruteforce_threshold_{HnswSparseEntity::kDefaultBruteForceThreshold};\n  size_t max_scan_limit_{HnswSparseEntity::kDefaultMaxScanLimit};\n  size_t min_scan_limit_{HnswSparseEntity::kDefaultMinScanLimit};\n  float bf_negative_prob_{HnswSparseEntity::kDefaultBFNegativeProbability};\n  float max_scan_ratio_{HnswSparseEntity::kDefaultScanRatio};\n  float sparse_neighbor_ratio_{HnswSparseEntity::kDefaultSparseNeighborRatio};\n  uint32_t sparse_neighbor_cnt_{0UL};\n  uint32_t sparse_min_neighbor_cnt_{0UL};\n  uint32_t upper_sparse_neighbor_cnt_{0UL};\n\n  bool query_filtering_enabled_{false};\n  float query_filtering_ratio_{HnswSparseEntity::kDefaultQueryFilteringRatio};\n\n  uint32_t magic_{0U};\n  State state_{STATE_INIT};\n  bool bf_enabled_{false};\n  bool check_crc_enabled_{false};\n  bool filter_same_key_{false};\n  bool get_vector_enabled_{false};\n  bool force_padding_topk_enabled_{false};\n\n  //! avoid add vector while dumping index\n  ailego::SharedMutex shared_mutex_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_streamer_entity.h\"\n#include <algorithm>\n#include <cstddef>\n#include <iostream>\n#include <ailego/utility/memory_helper.h>\n#include \"utility/sparse_utility.h\"\n#include \"hnsw_sparse_dist_calculator.h\"\n\nnamespace zvec {\nnamespace core {\n\nHnswSparseStreamerEntity::HnswSparseStreamerEntity(IndexStreamer::Stats &stats)\n    : stats_(stats) {}\n\nHnswSparseStreamerEntity::~HnswSparseStreamerEntity() {}\n\nint HnswSparseStreamerEntity::init(uint64_t max_index_size,\n                                   size_t max_doc_cnt) {\n  if (std::pow(scaling_factor(), kMaxGraphLayers) < max_doc_cnt) {\n    LOG_ERROR(\"scalingFactor=%zu is too small\", scaling_factor());\n    return IndexError_InvalidArgument;\n  }\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  broker_ = std::make_shared<SparseChunkBroker>(stats_);\n  upper_neighbor_index_ = std::make_shared<NIHashMap>();\n  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();\n  keys_map_ = std::make_shared<HashMap<key_t, node_id_t>>();\n  if (!keys_map_ || !upper_neighbor_index_ || !broker_ || !keys_map_lock_) {\n    LOG_ERROR(\"HnswSparseStreamerEntity new object failed\");\n    return IndexError_NoMemory;\n  }\n  keys_map_->set_empty_key(kInvalidKey);\n\n  neighbor_size_ = neighbors_size();\n  upper_neighbor_size_ = upper_neighbors_size();\n\n  //! vector + key + level 0 neighbors\n  size_t size = sizeof(key_t) + neighbor_size_ + sparse_meta_size();\n\n  size = AlignSize(size);\n  set_node_size(size);\n\n  return init_chunk_params(max_index_size);\n}\n\nint HnswSparseStreamerEntity::cleanup() {\n  std::lock_guard<std::mutex> lock(mutex_);\n  mutable_header()->clear();\n  chunk_size_ = kDefaultChunkSize;\n  node_index_mask_bits_ = 0U;\n  node_index_mask_ = 0U;\n  node_cnt_per_chunk_ = 0U;\n  neighbor_size_ = 0U;\n  upper_neighbor_size_ = 0U;\n  if (upper_neighbor_index_) {\n    upper_neighbor_index_->cleanup();\n  }\n  if (keys_map_) {\n    keys_map_->clear();\n  }\n  node_chunks_.clear();\n  upper_neighbor_chunks_.clear();\n  filter_same_key_ = false;\n  get_vector_enabled_ = false;\n  broker_.reset();\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::update_neighbors(\n    level_t level, node_id_t id,\n    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {\n  std::vector<char> buffer(neighbor_size_);\n  NeighborsHeader *hd = reinterpret_cast<NeighborsHeader *>(buffer.data());\n  hd->neighbor_cnt = neighbors.size();\n  size_t i = 0;\n  for (; i < neighbors.size(); ++i) {\n    hd->neighbors[i] = neighbors[i].first;\n  }\n\n  auto loc = get_neighbor_chunk_loc(level, id);\n  size_t size = reinterpret_cast<char *>(&hd->neighbors[i]) - &buffer[0];\n  size_t ret = loc.first->write(loc.second, hd, size);\n  if (ailego_unlikely(ret != size)) {\n    LOG_ERROR(\"Write neighbor header failed, ret=%zu\", ret);\n\n    return IndexError_Runtime;\n  }\n\n  return 0;\n}\n\nconst Neighbors HnswSparseStreamerEntity::get_neighbors(level_t level,\n                                                        node_id_t id) const {\n  SparseChunk *chunk = nullptr;\n  size_t offset = 0UL;\n  size_t neighbor_size = neighbor_size_;\n  if (level == 0UL) {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    offset = (id & node_index_mask_) * node_size() + sizeof(key_t) +\n             sparse_meta_size();\n\n    sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n    ailego_assert_with(chunk_idx < node_chunks_.size(), \"invalid chunk idx\");\n    chunk = node_chunks_[chunk_idx].get();\n  } else {\n    auto p = get_upper_neighbor_chunk_loc(level, id);\n    chunk = upper_neighbor_chunks_[p.first].get();\n    offset = p.second;\n    neighbor_size = upper_neighbor_size_;\n  }\n\n  ailego_assert_with(offset < chunk->data_size(), \"invalid chunk offset\");\n  IndexStorage::MemoryBlock neighbor_block;\n  size_t size = chunk->read(offset, neighbor_block, neighbor_size);\n  if (ailego_unlikely(size != neighbor_size)) {\n    LOG_ERROR(\"Read neighbor header failed, ret=%zu\", size);\n    return Neighbors();\n  }\n  return Neighbors(std::move(neighbor_block));\n}\n\n//! Get vector feature data by key\nconst void *HnswSparseStreamerEntity::get_vector_meta(node_id_t id) const {\n  auto loc = get_vector_chunk_loc(id);\n  const void *vec = nullptr;\n  ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t read_size = sparse_meta_size();\n\n  size_t ret = node_chunks_[loc.first]->read(loc.second, &vec, read_size);\n  if (ailego_unlikely(ret != read_size)) {\n    LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n              loc.second, read_size, ret);\n  }\n\n  return vec;\n}\n\nint HnswSparseStreamerEntity::get_vector_meta(\n    const node_id_t id, IndexStorage::MemoryBlock &block) const {\n  auto loc = get_vector_chunk_loc(id);\n  ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t read_size = sparse_meta_size();\n\n  size_t ret = node_chunks_[loc.first]->read(loc.second, block, read_size);\n  if (ailego_unlikely(ret != read_size)) {\n    LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n              loc.second, read_size, ret);\n    return IndexError_ReadData;\n  }\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::get_vector_metas(const node_id_t *ids,\n                                               uint32_t count,\n                                               const void **vecs) const {\n  for (auto i = 0U; i < count; ++i) {\n    auto loc = get_vector_chunk_loc(ids[i]);\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n\n    size_t read_size = sparse_meta_size();\n\n    size_t ret = node_chunks_[loc.first]->read(loc.second, &vecs[i], read_size);\n    if (ailego_unlikely(ret != read_size)) {\n      LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n                loc.second, read_size, ret);\n      return IndexError_ReadData;\n    }\n  }\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::get_vector_metas(\n    const node_id_t *ids, uint32_t count,\n    std::vector<IndexStorage::MemoryBlock> &block_vecs) const {\n  block_vecs.resize(count);\n  for (auto i = 0U; i < count; ++i) {\n    auto loc = get_vector_chunk_loc(ids[i]);\n    ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                       \"invalid chunk offset\");\n\n    size_t read_size = sparse_meta_size();\n\n    size_t ret =\n        node_chunks_[loc.first]->read(loc.second, block_vecs[i], read_size);\n    if (ailego_unlikely(ret != read_size)) {\n      LOG_ERROR(\"Read vector failed, offset=%u, read size=%zu, ret=%zu\",\n                loc.second, read_size, ret);\n      return IndexError_ReadData;\n    }\n  }\n\n  return 0;\n}\n\n//! Get vector feature data by key\nconst void *HnswSparseStreamerEntity::get_sparse_data(uint64_t offset,\n                                                      uint32_t len) const {\n  uint32_t chunk_index = offset >> 32;\n  uint32_t chunk_offset = offset & 0xFFFFFFFF;\n\n  auto loc = get_sparse_chunk_loc(chunk_index, chunk_offset);\n  const void *data = nullptr;\n\n  ailego_assert_with(loc.first < sparse_node_chunks_.size(),\n                     \"invalid chunk idx\");\n  ailego_assert_with(loc.second < sparse_node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t ret = sparse_node_chunks_[loc.first]->read(loc.second, &data, len);\n  if (ailego_unlikely(ret != len)) {\n    LOG_ERROR(\"Read sparse vector failed, offset=%zu, read size=%u, ret=%zu\",\n              (size_t)offset, len, ret);\n  }\n  return data;\n}\n\nint HnswSparseStreamerEntity::get_sparse_data(\n    uint64_t offset, uint32_t len, IndexStorage::MemoryBlock &block) const {\n  uint32_t chunk_index = offset >> 32;\n  uint32_t chunk_offset = offset & 0xFFFFFFFF;\n\n  auto loc = get_sparse_chunk_loc(chunk_index, chunk_offset);\n  ailego_assert_with(loc.first < sparse_node_chunks_.size(),\n                     \"invalid chunk idx\");\n  ailego_assert_with(loc.second < sparse_node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n\n  size_t ret = sparse_node_chunks_[loc.first]->read(loc.second, block, len);\n  if (ailego_unlikely(ret != len)) {\n    LOG_ERROR(\"Read sparse vector failed, offset=%zu, read size=%u, ret=%zu\",\n              (size_t)offset, len, ret);\n    return IndexError_ReadData;\n  }\n  return 0;\n}\n\n//! Get sparse data from id\nconst void *HnswSparseStreamerEntity::get_sparse_data(node_id_t id) const {\n  auto sparse_data = get_sparse_data_from_vector(get_vector_meta(id));\n\n  return sparse_data.first;\n}\n\nint HnswSparseStreamerEntity::get_sparse_data(\n    node_id_t id, IndexStorage::MemoryBlock &block) const {\n  IndexStorage::MemoryBlock meta_block;\n  get_vector_meta(id, meta_block);\n  int sparse_length = 0;\n  return get_sparse_data_from_vector(meta_block.data(), block, sparse_length);\n}\n\n//! Get sparse data from vector\nstd::pair<const void *, uint32_t>\nHnswSparseStreamerEntity::get_sparse_data_from_vector(const void *vec) const {\n  const char *vec_ptr = reinterpret_cast<const char *>(vec);\n\n  uint64_t offset = *((uint64_t *)(vec_ptr));\n  uint32_t sparse_vector_len = *((uint32_t *)(vec_ptr + sizeof(uint64_t)));\n\n  if (sparse_vector_len > 0) {\n    const void *sparse_data = get_sparse_data(offset, sparse_vector_len);\n    if (ailego_unlikely(sparse_data == nullptr)) {\n      LOG_ERROR(\"Get nullptr sparse, offset=%zu, len=%u\", (size_t)offset,\n                sparse_vector_len);\n\n      return std::make_pair(nullptr, 0);\n    }\n\n    return std::make_pair(sparse_data, sparse_vector_len);\n  }\n\n  return std::make_pair(nullptr, 0);\n}\n\nint HnswSparseStreamerEntity::get_sparse_data_from_vector(\n    const void *vec, IndexStorage::MemoryBlock &block,\n    int &sparse_length) const {\n  const char *vec_ptr = reinterpret_cast<const char *>(vec);\n\n  uint64_t offset = *((uint64_t *)(vec_ptr));\n  uint32_t sparse_vector_len = *((uint32_t *)(vec_ptr + sizeof(uint64_t)));\n\n  if (sparse_vector_len > 0) {\n    int ret = get_sparse_data(offset, sparse_vector_len, block);\n    if (ailego_unlikely(ret != 0)) {\n      LOG_ERROR(\"Get nullptr sparse, offset=%zu, len=%u\", (size_t)offset,\n                sparse_vector_len);\n      return IndexError_ReadData;\n    }\n    sparse_length = sparse_vector_len;\n  }\n  return 0;\n}\n\nkey_t HnswSparseStreamerEntity::get_key(node_id_t id) const {\n  auto loc = get_key_chunk_loc(id);\n  IndexStorage::MemoryBlock key_block;\n  ailego_assert_with(loc.first < node_chunks_.size(), \"invalid chunk idx\");\n  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),\n                     \"invalid chunk offset\");\n  size_t ret =\n      node_chunks_[loc.first]->read(loc.second, key_block, sizeof(key_t));\n  if (ailego_unlikely(ret != sizeof(key_t))) {\n    LOG_ERROR(\"Read vector failed, ret=%zu\", ret);\n    return kInvalidKey;\n  }\n\n  return *reinterpret_cast<const key_t *>(key_block.data());\n}\n\nvoid HnswSparseStreamerEntity::add_neighbor(level_t level, node_id_t id,\n                                            uint32_t size,\n                                            node_id_t neighbor_id) {\n  auto loc = get_neighbor_chunk_loc(level, id);\n  size_t offset =\n      loc.second + sizeof(NeighborsHeader) + size * sizeof(node_id_t);\n  ailego_assert_with(size < neighbor_cnt(level), \"invalid neighbor size\");\n  ailego_assert_with(offset < loc.first->data_size(), \"invalid chunk offset\");\n  size_t ret = loc.first->write(offset, &neighbor_id, sizeof(node_id_t));\n  if (ailego_unlikely(ret != sizeof(node_id_t))) {\n    LOG_ERROR(\"Write neighbor id failed, ret=%zu\", ret);\n    return;\n  }\n\n  uint32_t neighbors = size + 1;\n  ret = loc.first->write(loc.second, &neighbors, sizeof(uint32_t));\n  if (ailego_unlikely(ret != sizeof(uint32_t))) {\n    LOG_ERROR(\"Write neighbor cnt failed, ret=%zu\", ret);\n  }\n\n  return;\n}\n\nint HnswSparseStreamerEntity::init_chunks(\n    const SparseChunk::Pointer &header_chunk) {\n  if (header_chunk->data_size() < header_size()) {\n    LOG_ERROR(\"Invalid header chunk size\");\n    return IndexError_InvalidFormat;\n  }\n  IndexStorage::MemoryBlock data_block;\n  size_t size = header_chunk->read(0UL, data_block, header_size());\n  if (ailego_unlikely(size != header_size())) {\n    LOG_ERROR(\"Read header chunk failed\");\n    return IndexError_ReadData;\n  }\n  *mutable_header() =\n      *reinterpret_cast<const HNSWSparseHeader *>(data_block.data());\n\n  int ret = check_hnsw_index(&header());\n  if (ret != 0) {\n    broker_->close();\n    return ret;\n  }\n\n  node_chunks_.resize(\n      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_NODE));\n  for (auto seq = 0UL; seq < node_chunks_.size(); ++seq) {\n    node_chunks_[seq] =\n        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_NODE, seq);\n    if (!node_chunks_[seq]) {\n      LOG_ERROR(\"Missing hnsw streamer data chunk %zu th of %zu\", seq,\n                node_chunks_.size());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  upper_neighbor_chunks_.resize(\n      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR));\n  for (auto seq = 0UL; seq < upper_neighbor_chunks_.size(); ++seq) {\n    upper_neighbor_chunks_[seq] =\n        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, seq);\n    if (!upper_neighbor_chunks_[seq]) {\n      LOG_ERROR(\"Missing hnsw streamer index chunk %zu th of %zu\", seq,\n                upper_neighbor_chunks_.size());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  sparse_node_chunks_.resize(\n      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE));\n  for (auto seq = 0UL; seq < sparse_node_chunks_.size(); ++seq) {\n    sparse_node_chunks_[seq] =\n        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE, seq);\n    if (!sparse_node_chunks_[seq]) {\n      LOG_ERROR(\"Missing hnsw streamer sparse data chunk %zu th of %zu\", seq,\n                sparse_node_chunks_.size());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::open(IndexStorage::Pointer stg, bool check_crc) {\n  std::lock_guard<std::mutex> lock(mutex_);\n  int ret =\n      broker_->open(std::move(stg), max_index_size_, chunk_size_, check_crc);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Open index failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  ret = upper_neighbor_index_->init(broker_, upper_neighbor_chunk_size_,\n                                    scaling_factor(), estimate_doc_capacity(),\n                                    kUpperHashMemoryInflateRatio);\n  if (ailego_unlikely(ret != 0)) {\n    LOG_ERROR(\"Init neighbor hash map failed\");\n    return ret;\n  }\n\n  //! init header\n  auto header_chunk = broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_HEADER,\n                                         SparseChunkBroker::kDefaultChunkSeqId);\n  if (!header_chunk) {  // open empty index, create one\n    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_HEADER,\n                                  SparseChunkBroker::kDefaultChunkSeqId,\n                                  header_size());\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc header chunk failed\");\n      return p.first;\n    }\n    size_t size = p.second->write(0UL, &header(), header_size());\n    if (ailego_unlikely(size != header_size())) {\n      LOG_ERROR(\"Write header chunk failed\");\n      return IndexError_WriteData;\n    }\n    return 0;\n  }\n\n  //! Open an exist hnsw index\n  ret = init_chunks(header_chunk);\n  if (ailego_unlikely(ret != 0)) {\n    return ret;\n  }\n\n  //! total docs including features wrote in index but neighbors may not ready\n  node_id_t total_vecs = 0;\n  if (node_chunks_.size() > 0) {\n    size_t last_idx = node_chunks_.size() - 1;\n    auto last_chunk = node_chunks_[last_idx];\n    if (last_chunk->data_size() % node_size()) {\n      LOG_WARN(\"The index may broken\");\n      return IndexError_InvalidFormat;\n    }\n    total_vecs = last_idx * node_cnt_per_chunk_ +\n                 node_chunks_[last_idx]->data_size() / node_size();\n  }\n\n  LOG_INFO(\n      \"Open index, l0NeighborCnt=%zu upperneighborCnt=%zu \"\n      \"efConstruction=%zu curDocCnt=%u totalVecs=%u maxLevel=%u\",\n      l0_neighbor_cnt(), upper_neighbor_cnt(), ef_construction(), doc_cnt(),\n      total_vecs, cur_max_level());\n  //! try to correct the docCnt if index not fully flushed\n  if (doc_cnt() != total_vecs) {\n    LOG_WARN(\"Index closed abnormally, using totalVecs as curDocCnt\");\n    *mutable_doc_cnt() = total_vecs;\n  }\n  if (filter_same_key_ || get_vector_enabled_) {\n    for (node_id_t id = 0U; id < doc_cnt(); ++id) {\n      (*keys_map_)[get_key(id)] = id;\n    }\n  }\n\n  stats_.set_loaded_count(doc_cnt());\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::close() {\n  LOG_DEBUG(\"close index\");\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  flush_header();\n  mutable_header()->reset();\n  upper_neighbor_index_->cleanup();\n  keys_map_->clear();\n  header_.clear();\n  node_chunks_.clear();\n  upper_neighbor_chunks_.clear();\n\n  sparse_node_chunks_.clear();\n\n  return broker_->close();\n}\n\nint HnswSparseStreamerEntity::flush(uint64_t checkpoint) {\n  LOG_INFO(\"Flush index, curDocs=%u\", doc_cnt());\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  flush_header();\n  int ret = broker_->flush(checkpoint);\n  if (ret != 0) {\n    return ret;\n  }\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"Dump index, curDocs=%u\", doc_cnt());\n\n  //! sort by keys, to support get_vector by key in searcher\n  std::vector<key_t> keys(doc_cnt());\n  for (node_id_t i = 0; i < doc_cnt(); ++i) {\n    keys[i] = get_key(i);\n  }\n\n  //! dump neighbors\n  auto get_level = [&](node_id_t id) {\n    auto it = upper_neighbor_index_->find(id);\n    if (it == upper_neighbor_index_->end()) {\n      return 0U;\n    };\n    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);\n    return meta->level;\n  };\n  auto ret = dump_segments(dumper, keys.data(), get_level);\n  if (ailego_unlikely(ret < 0)) {\n    return ret;\n  }\n  *stats_.mutable_dumped_size() += ret;\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::check_hnsw_index(\n    const HNSWSparseHeader *hd) const {\n  if (l0_neighbor_cnt() != hd->neighbor_cnt() ||\n      upper_neighbor_cnt() != hd->upper_neighbor_cnt()) {\n    LOG_ERROR(\"Param neighbors:%zu:%zu mismatch index previous %zu:%zu\",\n              l0_neighbor_cnt(), upper_neighbor_cnt(), hd->neighbor_cnt(),\n              hd->upper_neighbor_cnt());\n    return IndexError_Mismatch;\n  }\n  if (ef_construction() != hd->ef_construction()) {\n    LOG_WARN(\"Param efConstruction %zu mismatch index previous %zu\",\n             ef_construction(), hd->ef_construction());\n  }\n  if (scaling_factor() != hd->scaling_factor()) {\n    LOG_WARN(\"Param scalingFactor %zu mismatch index previous %zu\",\n             scaling_factor(), hd->scaling_factor());\n    return IndexError_Mismatch;\n  }\n  if (prune_cnt() != hd->neighbor_prune_cnt()) {\n    LOG_WARN(\"Param pruneCnt %zu mismatch index previous %zu\", prune_cnt(),\n             hd->neighbor_prune_cnt());\n    return IndexError_Mismatch;\n  }\n  if ((hd->entry_point() != kInvalidNodeId &&\n       hd->entry_point() >= hd->doc_cnt()) ||\n      (hd->entry_point() == kInvalidNodeId && hd->doc_cnt() > 0U)) {\n    LOG_WARN(\"Invalid entryPoint %u, docCnt %u\", hd->entry_point(),\n             hd->doc_cnt());\n    return IndexError_InvalidFormat;\n  }\n  if (hd->entry_point() == kInvalidNodeId &&\n      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_NODE) > 0) {\n    LOG_WARN(\"The index is broken, maybe it haven't flush\");\n    return IndexError_InvalidFormat;\n  }\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::add_vector(level_t level, key_t key,\n                                         const std::string &sparse_vec,\n                                         uint32_t sparse_count, node_id_t *id) {\n  // allocat sparse chunk\n  uint32_t sparse_vector_len = sparse_vec.size();\n\n  sparse_vector_len = AlignSize(sparse_vector_len);\n\n  if (sparse_vector_len > sparse_chunk_size_) {\n    LOG_ERROR(\n        \"Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk \"\n        \"size: %u\",\n        sparse_vector_len, sparse_chunk_size_);\n    return IndexError_InvalidArgument;\n  }\n\n  SparseChunk::Pointer node_chunk;\n  SparseChunk::Pointer sparse_node_chunk;\n\n  size_t chunk_offset = -1UL;\n  size_t sparse_chunk_offset = -1UL;\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  // duplicate check\n  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {\n    LOG_WARN(\"Try to add duplicate key, ignore it\");\n    return IndexError_Duplicate;\n  }\n\n  node_id_t local_id = static_cast<node_id_t>(doc_cnt());\n\n  uint32_t chunk_index = node_chunks_.size() - 1U;\n  if (chunk_index == -1U ||\n      (node_chunks_[chunk_index]->data_size() >=\n       node_cnt_per_chunk_ * node_size())) {  // no space left and need to alloc\n    if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      return IndexError_IndexFull;\n    }\n    chunk_index++;\n    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_NODE,\n                                  chunk_index, chunk_size_);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return p.first;\n    }\n    node_chunk = p.second;\n    chunk_offset = 0UL;\n    node_chunks_.emplace_back(node_chunk);\n  } else {\n    node_chunk = node_chunks_[chunk_index];\n    chunk_offset = node_chunk->data_size();\n  }\n\n  uint32_t sparse_chunk_index = sparse_node_chunks_.size() - 1U;\n  if (sparse_chunk_index == -1U ||\n      sparse_node_chunks_[sparse_chunk_index]->data_size() + sparse_vector_len >\n          sparse_chunk_size_) {\n    if (ailego_unlikely(sparse_node_chunks_.capacity() ==\n                        sparse_node_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      return IndexError_IndexFull;\n    }\n    sparse_chunk_index++;\n    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE,\n                                  sparse_chunk_index, sparse_chunk_size_);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return p.first;\n    }\n    sparse_node_chunk = p.second;\n\n    sparse_node_chunks_.emplace_back(sparse_node_chunk);\n\n    sparse_chunk_offset = 0UL;\n  } else {\n    sparse_node_chunk = sparse_node_chunks_[sparse_chunk_index];\n    sparse_chunk_offset = sparse_node_chunk->data_size();\n  }\n\n  // write sparse vector\n  if (sparse_vec.size() > 0) {\n    size_t size = sparse_node_chunk->write(\n        sparse_chunk_offset, sparse_vec.data(), sparse_vec.size());\n    if (ailego_unlikely(size != sparse_vec.size())) {\n      LOG_ERROR(\"SparseChunk write sparse vec failed, ret=%zu\", size);\n      return IndexError_WriteData;\n    }\n  }\n\n  uint64_t sparse_offset = sparse_chunk_index;\n  sparse_offset = (sparse_offset << 32) + sparse_chunk_offset;\n\n  size_t size =\n      node_chunk->write(chunk_offset, &sparse_offset, sizeof(uint64_t));\n  if (ailego_unlikely(size != sizeof(uint64_t))) {\n    LOG_ERROR(\"SparseChunk write sparse vec index failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  size = node_chunk->write(chunk_offset + sizeof(uint64_t), &sparse_vector_len,\n                           sizeof(uint32_t));\n  if (ailego_unlikely(size != sizeof(uint32_t))) {\n    LOG_ERROR(\"SparseChunk write sparse vec len failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  size =\n      node_chunk->write(chunk_offset + sparse_meta_size(), &key, sizeof(key_t));\n  if (ailego_unlikely(size != sizeof(key_t))) {\n    LOG_ERROR(\"SparseChunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  //! level 0 neighbors is inited to zero by default\n  int ret = add_upper_neighbor(level, local_id);\n  if (ret != 0) {\n    return ret;\n  }\n\n  if (sparse_vector_len > 0) {\n    sparse_chunk_offset += sparse_vector_len;\n    if (ailego_unlikely(sparse_node_chunk->resize(sparse_chunk_offset) !=\n                        sparse_chunk_offset)) {\n      LOG_ERROR(\"SparseChunk resize to %zu failed\", sparse_chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  chunk_offset += node_size();\n  if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n    LOG_ERROR(\"SparseChunk resize to %zu failed\", chunk_offset);\n    return IndexError_Runtime;\n  }\n\n  if (filter_same_key_ || get_vector_enabled_) {\n    keys_map_lock_->lock();\n    (*keys_map_)[key] = local_id;\n    keys_map_lock_->unlock();\n  }\n\n  *mutable_doc_cnt() += 1;\n  *mutable_total_sparse_count() += sparse_count;\n\n  broker_->mark_dirty();\n  *id = local_id;\n\n  return 0;\n}\n\nint HnswSparseStreamerEntity::add_vector_with_id(level_t level, node_id_t id,\n                                                 const std::string &sparse_vec,\n                                                 uint32_t sparse_count) {\n  key_t key = id;\n  SparseChunk::Pointer node_chunk;\n  SparseChunk::Pointer sparse_node_chunk;\n  size_t chunk_offset = -1UL;\n  size_t sparse_chunk_offset = -1UL;\n\n  // allocat sparse chunk\n  uint32_t sparse_vector_len = sparse_vec.size();\n\n  sparse_vector_len = AlignSize(sparse_vector_len);\n\n  if (sparse_vector_len > sparse_chunk_size_) {\n    LOG_ERROR(\n        \"Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk \"\n        \"size: %u\",\n        sparse_vector_len, sparse_chunk_size_);\n    return IndexError_InvalidArgument;\n  }\n\n\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  // duplicate check\n  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {\n    LOG_WARN(\"Try to add duplicate key, ignore it\");\n    return IndexError_Duplicate;\n  }\n\n  auto func_get_sparse_node_chunk_and_offset = [&](node_id_t node_id) -> int {\n    uint32_t chunk_index = node_id >> node_index_mask_bits_;\n    ailego_assert_with(chunk_index <= node_chunks_.size(), \"invalid chunk idx\");\n    // belongs to next chunk\n    if (chunk_index == node_chunks_.size()) {\n      if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {\n        LOG_ERROR(\"add vector failed for no memory quota\");\n        return IndexError_IndexFull;\n      }\n      auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_NODE,\n                                    chunk_index, chunk_size_);\n      if (ailego_unlikely(p.first != 0)) {\n        LOG_ERROR(\"Alloc data chunk failed\");\n        return p.first;\n      }\n      node_chunk = p.second;\n      node_chunks_.emplace_back(node_chunk);\n    }\n\n    node_chunk = node_chunks_[chunk_index];\n    chunk_offset = (node_id & node_index_mask_) * node_size();\n    return 0;\n  };\n\n  for (size_t start_id = doc_cnt(); start_id < id; ++start_id) {\n    if (auto ret = func_get_sparse_node_chunk_and_offset(start_id); ret != 0) {\n      LOG_ERROR(\"func_get_sparse_node_chunk_and_offset failed\");\n      return ret;\n    }\n    size_t size = node_chunk->write(chunk_offset + sparse_meta_size(),\n                                    &kInvalidKey, sizeof(key_t));\n    if (ailego_unlikely(size != sizeof(key_t))) {\n      LOG_ERROR(\"SparseChunk write key failed, ret=%zu\", size);\n      return IndexError_WriteData;\n    }\n\n    chunk_offset += node_size();\n    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"SparseChunk resize to %zu failed\", chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  if (auto ret = func_get_sparse_node_chunk_and_offset(id); ret != 0) {\n    LOG_ERROR(\"func_get_sparse_node_chunk_and_offset failed\");\n    return ret;\n  }\n\n  uint32_t sparse_chunk_index = sparse_node_chunks_.size() - 1U;\n  if (sparse_chunk_index == -1U ||\n      sparse_node_chunks_[sparse_chunk_index]->data_size() + sparse_vector_len >\n          sparse_chunk_size_) {\n    if (ailego_unlikely(sparse_node_chunks_.capacity() ==\n                        sparse_node_chunks_.size())) {\n      LOG_ERROR(\"add vector failed for no memory quota\");\n      return IndexError_IndexFull;\n    }\n    sparse_chunk_index++;\n    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE,\n                                  sparse_chunk_index, sparse_chunk_size_);\n    if (ailego_unlikely(p.first != 0)) {\n      LOG_ERROR(\"Alloc data chunk failed\");\n      return p.first;\n    }\n    sparse_node_chunk = p.second;\n\n    sparse_node_chunks_.emplace_back(sparse_node_chunk);\n\n    sparse_chunk_offset = 0UL;\n  } else {\n    sparse_node_chunk = sparse_node_chunks_[sparse_chunk_index];\n    sparse_chunk_offset = sparse_node_chunk->data_size();\n  }\n\n  // write sparse vector\n  if (sparse_vec.size() > 0) {\n    size_t size = sparse_node_chunk->write(\n        sparse_chunk_offset, sparse_vec.data(), sparse_vec.size());\n    if (ailego_unlikely(size != sparse_vec.size())) {\n      LOG_ERROR(\"SparseChunk write sparse vec failed, ret=%zu\", size);\n      return IndexError_WriteData;\n    }\n  }\n\n  uint64_t sparse_offset = sparse_chunk_index;\n  sparse_offset = (sparse_offset << 32) + sparse_chunk_offset;\n\n  size_t size =\n      node_chunk->write(chunk_offset, &sparse_offset, sizeof(uint64_t));\n  if (ailego_unlikely(size != sizeof(uint64_t))) {\n    LOG_ERROR(\"SparseChunk write sparse vec index failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  size = node_chunk->write(chunk_offset + sizeof(uint64_t), &sparse_vector_len,\n                           sizeof(uint32_t));\n  if (ailego_unlikely(size != sizeof(uint32_t))) {\n    LOG_ERROR(\"SparseChunk write sparse vec len failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  size =\n      node_chunk->write(chunk_offset + sparse_meta_size(), &key, sizeof(key_t));\n  if (ailego_unlikely(size != sizeof(key_t))) {\n    LOG_ERROR(\"SparseChunk write vec failed, ret=%zu\", size);\n    return IndexError_WriteData;\n  }\n\n  //! level 0 neighbors is inited to zero by default\n  int ret = add_upper_neighbor(level, id);\n  if (ret != 0) {\n    return ret;\n  }\n\n  if (sparse_vector_len > 0) {\n    sparse_chunk_offset += sparse_vector_len;\n    if (ailego_unlikely(sparse_node_chunk->resize(sparse_chunk_offset) !=\n                        sparse_chunk_offset)) {\n      LOG_ERROR(\"SparseChunk resize to %zu failed\", sparse_chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n\n  if (*mutable_doc_cnt() <= id) {\n    *mutable_doc_cnt() = id + 1;\n    chunk_offset += node_size();\n    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"Chunk resize to %zu failed\", chunk_offset);\n      return IndexError_Runtime;\n    }\n  }\n  *mutable_total_sparse_count() += sparse_count;\n\n  if (filter_same_key_ || get_vector_enabled_) {\n    keys_map_lock_->lock();\n    (*keys_map_)[key] = id;\n    keys_map_lock_->unlock();\n  }\n\n  broker_->mark_dirty();\n\n  return 0;\n}\n\nvoid HnswSparseStreamerEntity::update_ep_and_level(node_id_t ep,\n                                                   level_t level) {\n  HnswSparseEntity::update_ep_and_level(ep, level);\n  flush_header();\n\n  return;\n}\n\nconst HnswSparseEntity::Pointer HnswSparseStreamerEntity::clone() const {\n  std::vector<SparseChunk::Pointer> node_chunks;\n  node_chunks.reserve(node_chunks_.size());\n  for (size_t i = 0UL; i < node_chunks_.size(); ++i) {\n    node_chunks.emplace_back(node_chunks_[i]->clone());\n    if (ailego_unlikely(!node_chunks[i])) {\n      LOG_ERROR(\"HnswSparseStreamerEntity get chunk failed in clone\");\n      return HnswSparseEntity::Pointer();\n    }\n  }\n\n  std::vector<SparseChunk::Pointer> sparse_node_chunks;\n  sparse_node_chunks.reserve(sparse_node_chunks_.size());\n  for (size_t i = 0UL; i < sparse_node_chunks_.size(); ++i) {\n    sparse_node_chunks.emplace_back(sparse_node_chunks_[i]->clone());\n    if (ailego_unlikely(!sparse_node_chunks[i])) {\n      LOG_ERROR(\"HnswSparseStreamerEntity get sparse chunk failed in clone\");\n      return HnswSparseEntity::Pointer();\n    }\n  }\n\n  std::vector<SparseChunk::Pointer> upper_neighbor_chunks;\n  upper_neighbor_chunks.reserve(upper_neighbor_chunks_.size());\n  for (size_t i = 0UL; i < upper_neighbor_chunks_.size(); ++i) {\n    upper_neighbor_chunks.emplace_back(upper_neighbor_chunks_[i]->clone());\n    if (ailego_unlikely(!upper_neighbor_chunks[i])) {\n      LOG_ERROR(\"HnswSparseStreamerEntity get chunk failed in clone\");\n      return HnswSparseEntity::Pointer();\n    }\n  }\n\n  HnswSparseStreamerEntity *entity =\n      new (std::nothrow) HnswSparseStreamerEntity(\n          stats_, header(), chunk_size_, node_index_mask_bits_,\n          upper_neighbor_mask_bits_, filter_same_key_, get_vector_enabled_,\n          sparse_chunk_size_, upper_neighbor_index_, keys_map_lock_, keys_map_,\n          std::move(node_chunks), std::move(upper_neighbor_chunks),\n          std::move(sparse_node_chunks), broker_);\n  if (ailego_unlikely(!entity)) {\n    LOG_ERROR(\"HnswSparseStreamerEntity new failed\");\n  }\n  return HnswSparseEntity::Pointer(entity);\n}\n\n//! Get sparse vector feature data by key\nint HnswSparseStreamerEntity::get_sparse_vector_by_key(\n    key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n    std::string *sparse_values_buffer) const {\n  *sparse_count = 0;\n\n  auto id = get_id(key);\n  if (id == kInvalidNodeId) {\n    return IndexError_NoExist;\n  }\n\n  return get_sparse_vector_by_id(id, sparse_count, sparse_indices_buffer,\n                                 sparse_values_buffer);\n}\n\nint HnswSparseStreamerEntity::get_sparse_vector_by_id(\n    node_id_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n    std::string *sparse_values_buffer) const {\n  IndexStorage::MemoryBlock block;\n  get_sparse_data(id, block);\n  const void *sparse_data = block.data();\n  if (sparse_data == nullptr) {\n    return IndexError_InvalidValue;\n  }\n\n  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,\n                                     sparse_indices_buffer,\n                                     sparse_values_buffer, sparse_unit_size());\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <iostream>\n#include <ailego/parallel/lock.h>\n#include <sparsehash/dense_hash_map>\n#include <sparsehash/dense_hash_set>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"hnsw_sparse_chunk.h\"\n#include \"hnsw_sparse_entity.h\"\n#include \"hnsw_sparse_index_hash.h\"\n#include \"hnsw_sparse_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! HnswSparseStreamerEntity manage vector data, pkey, and node's neighbors\nclass HnswSparseStreamerEntity : public HnswSparseEntity {\n public:\n  //! Cleanup\n  //! return 0 on success, or errCode in failure\n  virtual int cleanup() override;\n\n  //! Make a copy of streamer entity, to support thread-safe operation.\n  //! The segment in container cannot be read concurrenly\n  virtual const HnswSparseEntity::Pointer clone() const override;\n\n  //! Get primary key of the node id\n  virtual key_t get_key(node_id_t id) const override;\n\n  //! Get vector feature data by key\n  virtual const void *get_vector_meta(node_id_t id) const override;\n\n  virtual int get_vector_meta(const node_id_t id,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  //! Get vectors feature data by local ids\n  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,\n                               const void **vecs) const override;\n  virtual int get_vector_metas(\n      const node_id_t *ids, uint32_t count,\n      std::vector<IndexStorage::MemoryBlock> &block_vecs) const override;\n\n  //! Get vector sparse feature data by chunk index and offset\n  virtual const void *get_sparse_data(uint64_t offset,\n                                      uint32_t len) const override;\n\n  //! Get sparse data from id\n  virtual const void *get_sparse_data(node_id_t id) const override;\n\n  virtual int get_sparse_data(uint64_t offset, uint32_t len,\n                              IndexStorage::MemoryBlock &block) const override;\n  virtual int get_sparse_data(node_id_t id,\n                              IndexStorage::MemoryBlock &block) const override;\n\n  //! Get sparse data from vector\n  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(\n      const void *vec) const override;\n  virtual int get_sparse_data_from_vector(const void *vec,\n                                          IndexStorage::MemoryBlock &block,\n                                          int &sparse_length) const override;\n\n  //! Get sparse vector feature data by key\n  virtual int get_sparse_vector_by_key(\n      key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n      std::string *sparse_values_buffer) const override;\n\n  //! Get sparse vector feature data by id\n  virtual int get_sparse_vector_by_id(\n      node_id_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,\n      std::string *sparse_values_buffer) const override;\n\n  //! Get the node id's neighbors on graph level\n  //! Note: the neighbors cannot be modified, using the following\n  //! method to get WritableNeighbors if want to\n  virtual const Neighbors get_neighbors(level_t level,\n                                        node_id_t id) const override;\n\n\n  //! Add vector and key to hnsw entity, and local id will be saved in id\n  virtual int add_vector(level_t level, key_t key,\n                         const std::string &sparse_vec_buffer,\n                         uint32_t sparse_count, node_id_t *id) override;\n\n  //! Add vector and id to hnsw entity\n  virtual int add_vector_with_id(level_t level, node_id_t id,\n                                 const std::string &sparse_vec,\n                                 uint32_t sparse_count) override;\n\n  virtual int update_neighbors(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;\n\n  //! Replace node id in level's neighbors\n  int update_neighbors_dense(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, dist_t>> &neighbors);\n\n  //! Replace node id in level's neighbors\n  int update_neighbors_sparse(\n      level_t level, node_id_t id,\n      const std::vector<std::pair<node_id_t, dist_t>> &neighbors);\n\n  //! Append neighbor_id to node id neighbors on level\n  //! Notice: the caller must be ensure the neighbors not full\n  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,\n                            node_id_t neighbor_id) override;\n\n  //! Dump index by dumper\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  virtual void update_ep_and_level(node_id_t ep, level_t level) override;\n\n public:\n  //! Constructor\n  HnswSparseStreamerEntity(IndexStreamer::Stats &stats);\n\n  //! Destructor\n  ~HnswSparseStreamerEntity();\n\n  //! Init entity\n  int init(uint64_t max_index_size, size_t max_doc_cnt);\n\n  //! Flush graph entity to disk\n  //! return 0 on success, or errCode in failure\n  int flush(uint64_t checkpoint);\n\n  //! Open entity from storage\n  //! return 0 on success, or errCode in failure\n  int open(IndexStorage::Pointer stg, bool check_crc);\n\n  //! Close entity\n  //! return 0 on success, or errCode in failure\n  int close();\n\n  //! Set meta information from entity\n  int set_index_meta(const IndexMeta &meta) const {\n    return IndexHelper::SerializeToStorage(meta, broker_->storage().get());\n  }\n\n  //! Get meta information from entity\n  int get_index_meta(IndexMeta *meta) const {\n    return IndexHelper::DeserializeFromStorage(broker_->storage().get(), meta);\n  }\n\n  //! Set params: chunk size\n  inline void set_chunk_size(size_t val) {\n    chunk_size_ = val;\n  }\n\n  //! Set params\n  inline void set_filter_same_key(bool val) {\n    filter_same_key_ = val;\n  }\n\n  //! Set params\n  inline void set_get_vector(bool val) {\n    get_vector_enabled_ = val;\n  }\n\n  //! Get vector local id by key\n  inline node_id_t get_id(key_t key) const {\n    keys_map_lock_->lock_shared();\n    auto it = keys_map_->find(key);\n    keys_map_lock_->unlock_shared();\n    return it == keys_map_->end() ? kInvalidNodeId : it->second;\n  }\n\n  void print_key_map() {\n    std::cout << \"key map begins\" << std::endl;\n\n    auto iter = keys_map_->begin();\n    while (iter != keys_map_->end()) {\n      std::cout << \"key: \" << iter->first << \", id: \" << iter->second\n                << std::endl;\n      ;\n      iter++;\n    }\n\n    std::cout << \"key map ends\" << std::endl;\n  }\n\n  //! Get neighbors size\n  inline size_t neighbors_size() const {\n    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n  //! Get upper neighbors size\n  inline size_t upper_neighbors_size() const {\n    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);\n  }\n\n private:\n  union UpperNeighborIndexMeta {\n    struct {\n      uint32_t level : 4;\n      uint32_t index : 28;  // index is composite type: chunk idx, and the\n                            // N th neighbors in chunk, they two composite\n                            // the 28 bits location\n    };\n    uint32_t data;\n  };\n\n  template <class Key, class T>\n  using HashMap = google::dense_hash_map<Key, T, std::hash<Key>>;\n  template <class Key, class T>\n  using HashMapPointer = std::shared_ptr<HashMap<Key, T>>;\n\n  template <class Key>\n  using HashSet = google::dense_hash_set<Key, std::hash<Key>>;\n  template <class Key>\n  using HashSetPointer = std::shared_ptr<HashSet<Key>>;\n\n  //! upper neighbor index hashmap\n  using NIHashMap = HnswSparseIndexHashMap<node_id_t, uint32_t>;\n  using NIHashMapPointer = std::shared_ptr<NIHashMap>;\n\n  //! Private construct, only be called by clone method\n  HnswSparseStreamerEntity(\n      IndexStreamer::Stats &stats, const HNSWSparseHeader &hd,\n      size_t chunk_size, uint32_t node_index_mask_bits,\n      uint32_t upper_neighbor_mask_bits, bool filter_same_key,\n      bool get_vector_enabled, uint32_t sparse_chunk_size,\n      const NIHashMapPointer &upper_neighbor_index,\n      std::shared_ptr<ailego::SharedMutex> &keys_map_lock,\n      const HashMapPointer<key_t, node_id_t> &keys_map,\n      std::vector<SparseChunk::Pointer> &&node_chunks,\n      std::vector<SparseChunk::Pointer> &&upper_neighbor_chunks,\n      std::vector<SparseChunk::Pointer> &&sparse_node_chunks,\n      const SparseChunkBroker::Pointer &broker)\n      : stats_(stats),\n        chunk_size_(chunk_size),\n        node_index_mask_bits_(node_index_mask_bits),\n        node_cnt_per_chunk_(1UL << node_index_mask_bits_),\n        node_index_mask_(node_cnt_per_chunk_ - 1),\n        upper_neighbor_mask_bits_(upper_neighbor_mask_bits),\n        upper_neighbor_mask_((1U << upper_neighbor_mask_bits_) - 1),\n        filter_same_key_(filter_same_key),\n        get_vector_enabled_(get_vector_enabled),\n        sparse_chunk_size_(sparse_chunk_size),\n        upper_neighbor_index_(upper_neighbor_index),\n        keys_map_lock_(keys_map_lock),\n        keys_map_(keys_map),\n        node_chunks_(std::move(node_chunks)),\n        upper_neighbor_chunks_(std::move(upper_neighbor_chunks)),\n        sparse_node_chunks_(std::move(sparse_node_chunks)),\n        broker_(broker) {\n    *mutable_header() = hd;\n\n    neighbor_size_ = neighbors_size();\n    upper_neighbor_size_ = upper_neighbors_size();\n  }\n\n  //! Called only in searching procedure per context, so no need to lock\n  void sync_chunks(SparseChunkBroker::CHUNK_TYPE type, size_t idx,\n                   std::vector<SparseChunk::Pointer> *chunks) const {\n    if (ailego_likely(idx < chunks->size())) {\n      return;\n    }\n    for (size_t i = chunks->size(); i <= idx; ++i) {\n      auto chunk = broker_->get_chunk(type, i);\n      // the storage can ensure get chunk will success after the first get\n      ailego_assert_with(!!chunk, \"get chunk failed\");\n      chunks->emplace_back(std::move(chunk));\n    }\n  }\n\n  //! return pair: chunk index + chunk offset\n  inline std::pair<uint32_t, uint32_t> get_vector_chunk_loc(\n      node_id_t id) const {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    uint32_t offset = (id & node_index_mask_) * node_size();\n\n    sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  //! return pair: chunk index + chunk offset\n  inline std::pair<uint32_t, uint32_t> get_key_chunk_loc(node_id_t id) const {\n    uint32_t chunk_idx = id >> node_index_mask_bits_;\n    uint32_t offset = (id & node_index_mask_) * node_size() + vector_size();\n\n    offset += sparse_meta_size();\n\n    sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  //! return pair: chunk index + chunk offset\n  inline std::pair<uint32_t, uint32_t> get_sparse_chunk_loc(\n      uint32_t chunk_index, uint32_t chunk_offset) const {\n    sync_chunks(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE, chunk_index,\n                &sparse_node_chunks_);\n\n    return std::make_pair(chunk_index, chunk_offset);\n  }\n\n  inline std::pair<uint32_t, uint32_t> get_upper_neighbor_chunk_loc(\n      level_t level, node_id_t id) const {\n    auto it = upper_neighbor_index_->find(id);\n    ailego_assert_abort(it != upper_neighbor_index_->end(),\n                        \"Get upper neighbor header failed\");\n    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);\n    uint32_t chunk_idx = (meta->index) >> upper_neighbor_mask_bits_;\n    uint32_t offset = (((meta->index) & upper_neighbor_mask_) + level - 1) *\n                      upper_neighbor_size_;\n    sync_chunks(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, chunk_idx,\n                &upper_neighbor_chunks_);\n    ailego_assert_abort(chunk_idx < upper_neighbor_chunks_.size(),\n                        \"invalid chunk idx\");\n    ailego_assert_abort(offset < upper_neighbor_chunks_[chunk_idx]->data_size(),\n                        \"invalid chunk offset\");\n    return std::make_pair(chunk_idx, offset);\n  }\n\n  //! return pair: chunk + chunk offset\n  inline std::pair<SparseChunk *, size_t> get_neighbor_chunk_loc(\n      level_t level, node_id_t id) const {\n    if (level == 0UL) {\n      uint32_t chunk_idx = id >> node_index_mask_bits_;\n      uint32_t offset =\n          (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);\n\n      offset += sparse_meta_size();\n\n      sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);\n      ailego_assert_abort(chunk_idx < node_chunks_.size(), \"invalid chunk idx\");\n      ailego_assert_abort(offset < node_chunks_[chunk_idx]->data_size(),\n                          \"invalid chunk offset\");\n      return std::make_pair(node_chunks_[chunk_idx].get(), offset);\n    } else {\n      auto p = get_upper_neighbor_chunk_loc(level, id);\n      return std::make_pair(upper_neighbor_chunks_[p.first].get(), p.second);\n    }\n  }\n\n  //! Chunk hnsw index valid\n  int check_hnsw_index(const HNSWSparseHeader *hd) const;\n\n  size_t get_total_upper_neighbors_size(level_t level) const {\n    return level * upper_neighbor_size_;\n  }\n\n  //! Add upper neighbor header and reserve space for upper neighbor\n  int add_upper_neighbor(level_t level, node_id_t id) {\n    if (level == 0) {\n      return 0;\n    }\n    SparseChunk::Pointer chunk;\n    uint64_t chunk_offset = -1UL;\n    size_t neighbors_size = get_total_upper_neighbors_size(level);\n    uint64_t chunk_index = upper_neighbor_chunks_.size() - 1UL;\n    if (chunk_index == -1UL ||\n        (upper_neighbor_chunks_[chunk_index]->padding_size() <\n         neighbors_size)) {  // no space left and need to alloc\n      chunk_index++;\n      if (ailego_unlikely(upper_neighbor_chunks_.capacity() ==\n                          upper_neighbor_chunks_.size())) {\n        LOG_ERROR(\"add upper neighbor failed for no memory quota\");\n        return IndexError_IndexFull;\n      }\n      auto p =\n          broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR,\n                               chunk_index, upper_neighbor_chunk_size_);\n      if (ailego_unlikely(p.first != 0)) {\n        LOG_ERROR(\"Alloc data chunk failed\");\n        return p.first;\n      }\n      chunk = p.second;\n      chunk_offset = 0UL;\n      upper_neighbor_chunks_.emplace_back(chunk);\n    } else {\n      chunk = upper_neighbor_chunks_[chunk_index];\n      chunk_offset = chunk->data_size();\n    }\n    ailego_assert_with((size_t)level < kMaxGraphLayers, \"invalid level\");\n    ailego_assert_with(chunk_offset % upper_neighbor_size_ == 0,\n                       \"invalid offset\");\n    ailego_assert_with((chunk_offset / upper_neighbor_size_) <\n                           (1U << upper_neighbor_mask_bits_),\n                       \"invalid offset\");\n    ailego_assert_with(chunk_index < (1U << (28 - upper_neighbor_mask_bits_)),\n                       \"invalid chunk index\");\n    UpperNeighborIndexMeta meta;\n    meta.level = level;\n    meta.index = (chunk_index << upper_neighbor_mask_bits_) |\n                 (chunk_offset / upper_neighbor_size_);\n    chunk_offset += upper_neighbor_size_ * level;\n    if (ailego_unlikely(!upper_neighbor_index_->insert(id, meta.data))) {\n      LOG_ERROR(\"HashMap insert value failed\");\n      return IndexError_Runtime;\n    }\n\n    if (ailego_unlikely(chunk->resize(chunk_offset) != chunk_offset)) {\n      LOG_ERROR(\"SparseChunk resize to %zu failed\", (size_t)chunk_offset);\n      return IndexError_Runtime;\n    }\n\n    return 0;\n  }\n\n  size_t estimate_doc_capacity() const {\n    return node_chunks_.capacity() * node_cnt_per_chunk_;\n  }\n\n  int init_chunk_params(size_t max_index_size) {\n    sparse_chunk_size_ = AlignPageSize(chunk_size_);\n\n    node_cnt_per_chunk_ = std::max<uint32_t>(1, chunk_size_ / node_size());\n    //! align node cnt per chunk to pow of 2\n    node_index_mask_bits_ = std::ceil(std::log2(node_cnt_per_chunk_));\n    node_cnt_per_chunk_ = 1UL << node_index_mask_bits_;\n    chunk_size_ = AlignPageSize(node_cnt_per_chunk_ * node_size());\n    node_index_mask_ = node_cnt_per_chunk_ - 1;\n\n    if (max_index_size == 0UL) {\n      max_index_size_ = chunk_size_ * kDefaultMaxChunkCnt;\n    } else {\n      max_index_size_ = max_index_size;\n    }\n\n    //! To get a balanced upper neighbor chunk size.\n    //! If the upper chunk size is equal to node chunk size, it may waste\n    //! upper neighbor chunk space; if the upper neighbor chunk size is too\n    //! small, the will need large upper neighbor chunks index space. So to\n    //! get a balanced ratio be sqrt of the node/neighbor size ratio\n    float ratio =\n        std::sqrt(node_size() * scaling_factor() * 1.0f / upper_neighbor_size_);\n    upper_neighbor_chunk_size_ =\n        AlignPageSize(std::max(get_total_upper_neighbors_size(kMaxGraphLayers),\n                               static_cast<size_t>(chunk_size_ / ratio)));\n    upper_neighbor_mask_bits_ =\n        std::ceil(std::log2(upper_neighbor_chunk_size_ / upper_neighbor_size_));\n    upper_neighbor_mask_ = (1 << upper_neighbor_mask_bits_) - 1;\n\n    size_t max_node_chunk_cnt = std::ceil(max_index_size_ / chunk_size_);\n    size_t max_upper_chunk_cnt = std::ceil(\n        (max_node_chunk_cnt * node_cnt_per_chunk_ * 1.0f / scaling_factor()) /\n        (upper_neighbor_chunk_size_ / upper_neighbor_size_));\n    max_upper_chunk_cnt =\n        max_upper_chunk_cnt + std::ceil(max_upper_chunk_cnt / scaling_factor());\n\n    //! reserve space to avoid memmove in chunks vector emplace chunk, so\n    //! as to lock-free in reading chunk\n    node_chunks_.reserve(max_node_chunk_cnt);\n    sparse_node_chunks_.reserve(max_node_chunk_cnt);\n    upper_neighbor_chunks_.reserve(max_upper_chunk_cnt);\n\n    LOG_DEBUG(\n        \"Settings: nodeSize=%zu chunkSize=%u upperNeighborSize=%u \"\n        \"upperNeighborChunkSize=%u \"\n        \"nodeCntPerChunk=%u maxChunkCnt=%zu maxNeighborChunkCnt=%zu \"\n        \"maxIndexSize=%zu ratio=%.3f\",\n        node_size(), chunk_size_, upper_neighbor_size_,\n        upper_neighbor_chunk_size_, node_cnt_per_chunk_, max_node_chunk_cnt,\n        max_upper_chunk_cnt, max_index_size_, ratio);\n\n    return 0;\n  }\n\n  //! Init node chunk and neighbor chunks\n  int init_chunks(const SparseChunk::Pointer &header_chunk);\n\n  int flush_header(void) {\n    if (!broker_->dirty()) {\n      // do not need to flush\n      return 0;\n    }\n    auto header_chunk =\n        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_HEADER,\n                           SparseChunkBroker::kDefaultChunkSeqId);\n    if (ailego_unlikely(!header_chunk)) {\n      LOG_ERROR(\"get header chunk failed\");\n      return IndexError_Runtime;\n    }\n    size_t size = header_chunk->write(0UL, &header(), header_size());\n    if (ailego_unlikely(size != header_size())) {\n      LOG_ERROR(\"Write header chunk failed\");\n      return IndexError_WriteData;\n    }\n\n    return 0;\n  }\n\n private:\n  HnswSparseStreamerEntity(const HnswSparseStreamerEntity &) = delete;\n  HnswSparseStreamerEntity &operator=(const HnswSparseStreamerEntity &) =\n      delete;\n  static constexpr uint64_t kUpperHashMemoryInflateRatio = 2.0f;\n\n private:\n  IndexStreamer::Stats &stats_;\n  HNSWSparseHeader header_{};\n  std::mutex mutex_{};\n  size_t max_index_size_{0UL};\n  uint32_t chunk_size_{kDefaultChunkSize};\n  uint32_t upper_neighbor_chunk_size_{kDefaultChunkSize};\n  uint32_t node_index_mask_bits_{0U};\n  uint32_t node_cnt_per_chunk_{0U};\n  uint32_t node_index_mask_{0U};\n  uint32_t neighbor_size_{0U};\n  uint32_t upper_neighbor_size_{0U};\n  //! UpperNeighborIndex.index composite chunkIdx and offset in chunk by the\n  //! following mask\n  uint32_t upper_neighbor_mask_bits_{0U};\n  uint32_t upper_neighbor_mask_{0U};\n  bool filter_same_key_{false};\n  bool get_vector_enabled_{false};\n\n  uint32_t sparse_chunk_size_{kDefaultChunkSize};\n\n  NIHashMapPointer upper_neighbor_index_{};\n\n  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};\n  HashMapPointer<key_t, node_id_t> keys_map_{};\n\n  //! the chunks will be changed in searcher, so need mutable\n  //! data chunk include: vector, key, level 0 neighbors\n  mutable std::vector<SparseChunk::Pointer> node_chunks_{};\n  //! upper neighbor chunk inlude: UpperNeighborHeader + (1~level) neighbors\n  mutable std::vector<SparseChunk::Pointer> upper_neighbor_chunks_{};\n  //! chunk that holds up sparse part\n  mutable std::vector<SparseChunk::Pointer> sparse_node_chunks_{};\n\n  SparseChunkBroker::Pointer broker_{};  // chunk broker\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/ivf/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_knn_ivf STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS zvec_ailego core_framework core_knn_cluster\n    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_builder.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_builder.h\"\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include \"algorithm/cluster/cluster_params.h\"\n#include \"ivf_dumper.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IndexHolder support filtered by vector labels\n */\nclass LabelFilteredIndexHolder : public IndexHolder {\n public:\n  /*! Index Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(const IVFBuilder::RandomAccessIndexHolder::Pointer &holder,\n             const std::vector<uint32_t> *elems)\n        : holder_(holder), elems_(elems) {}\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    virtual const void *data(void) const override {\n      return holder_->element((*elems_)[index_]);\n    }\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return index_ < elems_->size();\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return (*elems_)[index_];\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      ++index_;\n    }\n\n   private:\n    //! Members\n    const IVFBuilder::RandomAccessIndexHolder::Pointer holder_{nullptr};\n    const std::vector<uint32_t> *elems_{nullptr};\n    size_t index_{0};\n  };\n\n  //! Constructor\n  LabelFilteredIndexHolder(\n      const IVFBuilder::RandomAccessIndexHolder::Pointer &holder,\n      const std::vector<uint32_t> &items)\n      : holder_(holder), elems_(&items) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  virtual size_t count(void) const override {\n    return elems_->size();\n  }\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const override {\n    return holder_->dimension();\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const override {\n    return holder_->data_type();\n  }\n\n  //! Retrieve element size in bytes\n  virtual size_t element_size(void) const override {\n    return holder_->element_size();\n  }\n\n  //! Retrieve if it can multi-pass\n  virtual bool multipass(void) const override {\n    return true;\n  }\n\n  //! Create a new iterator\n  virtual IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new LabelFilteredIndexHolder::Iterator(holder_, elems_));\n  }\n\n private:\n  //! Members\n  const IVFBuilder::RandomAccessIndexHolder::Pointer holder_{};\n  const std::vector<uint32_t> *elems_{};\n};\n\nIVFBuilder::IVFBuilder() {}\n\nIVFBuilder::~IVFBuilder() {\n  this->cleanup();\n}\n\nint IVFBuilder::init(const IndexMeta &meta, const ailego::Params &params) {\n  LOG_INFO(\"Begin IVFBuilder::init!\");\n\n  if (state_ != INIT) {\n    LOG_ERROR(\"IVFBuilder state wrong. state=%d\", state_);\n    return IndexError_Logic;\n  }\n\n  meta_ = meta;\n  converted_meta_ = meta;\n  quantized_meta_ = meta;\n  // Clear the converter/reformer params for external transforms\n  converted_meta_.set_reformer(std::string(), 0, ailego::Params());\n  converted_meta_.set_converter(std::string(), 0, ailego::Params());\n  quantized_meta_.set_reformer(std::string(), 0, ailego::Params());\n  quantized_meta_.set_converter(std::string(), 0, ailego::Params());\n  params_ = params;\n\n  if (!IndexFactory::HasMetric(meta_.metric_name())) {\n    LOG_ERROR(\"Metric %s not exist\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n\n  int ret = parse_centroids_num(params);\n  ivf_check_with_msg(ret, \"Failed to parse centroids, ret=%d\", ret);\n\n  ret = parse_clustering_params(params);\n  ivf_check_with_msg(ret, \"Failed to parse clustering params, ret=%d\", ret);\n\n  ret = parse_general_params(params);\n  ivf_check_with_msg(ret, \"Failed to parse general params, ret=%d\", ret);\n\n  LOG_INFO(\"End IVFBuilder::init!\");\n\n  LOG_DEBUG(\n      \"Converter=%s Quantizer=%s Optimizer=%s \"\n      \"OptimizerQuantizer=%s QuantizeByCentroid=%u StoreFeatures=%u \"\n      \"ClusterClass=%s TrainSamplesCount=%u TrainSampleRatio=%f \"\n      \"BlockVectorCount=%u\",\n      params.get_as_string(PARAM_IVF_BUILDER_CONVERTER_CLASS).c_str(),\n      params.get_as_string(PARAM_IVF_BUILDER_QUANTIZER_CLASS).c_str(),\n      params.get_as_string(PARAM_IVF_BUILDER_OPTIMIZER_CLASS).c_str(),\n      params.get_as_string(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS).c_str(),\n      params.get_as_bool(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID),\n      params.get_as_bool(PARAM_IVF_BUILDER_STORE_ORIGINAL_FEATURES),\n      params.get_as_string(PARAM_IVF_BUILDER_CLUSTER_CLASS).c_str(),\n      params.get_as_uint32(PARAM_IVF_BUILDER_TRAIN_SAMPLE_COUNT),\n      params.get_as_float(PARAM_IVF_BUILDER_TRAIN_SAMPLE_RATIO),\n      block_vector_count_);\n\n  state_ = INITED;\n  return 0;\n}\n\nint IVFBuilder::cleanup(void) {\n  LOG_INFO(\"Begin IVFBuilder::cleanup\");\n\n  state_ = INIT;\n  stats_.clear_attributes();\n  stats_.set_built_costtime(0u);\n  stats_.set_built_count(0u);\n  stats_.set_discarded_count(0u);\n  stats_.set_dumped_costtime(0u);\n  stats_.set_dumped_count(0u);\n  stats_.set_trained_costtime(0u);\n  stats_.set_trained_count(0u);\n\n  centroid_num_vec_.clear();\n  cluster_class_.clear();\n  converter_class_.clear();\n  cluster_params_.clear();\n\n  labels_.clear();\n  centroid_index_.reset();\n  holder_.reset();\n  converted_meta_ = meta_;\n  converter_.reset();\n  quantized_meta_ = meta_;\n  quantizers_.clear();\n\n  error_ = false;\n  err_code_ = 0;\n\n  thread_count_ = 0;\n  sample_count_ = 0;\n  cluster_auto_tuning_ = false;\n  store_original_features_ = false;\n  quantize_by_centroid_ = false;\n\n  LOG_INFO(\"End IVFBuilder::cleanup\");\n\n  return 0;\n}\n\nint IVFBuilder::train(IndexThreads::Pointer threads,\n                      IndexHolder::Pointer holder) {\n  LOG_INFO(\"Begin IVFBuilder::train with holder\");\n  if (state_ != INITED) {\n    LOG_ERROR(\"IVFBuilder train failed, wrong state=%d\", state_);\n    return IndexError_Runtime;\n  }\n\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n  ailego::ElapsedTime timer;\n  if (!holder || holder->count() == 0) {\n    LOG_ERROR(\"Input holder is nullptr or empty while train index\");\n    return IndexError_InvalidArgument;\n  }\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while train index\");\n    return IndexError_Mismatch;\n  }\n\n  if (converter_) {\n    int ret = IndexConverter::TrainAndTransform(converter_, std::move(holder));\n    ivf_check_with_msg(ret, \"Failed to train or transform by converter %s\",\n                       converter_->name().c_str());\n    converted_meta_ = converter_->meta();\n    holder = converter_->result();\n  }\n\n  ailego::Params train_params;\n  int ret = prepare_trainer_params(train_params);\n  ivf_check_with_msg(ret, \"Failed to prepare trainer params, ret=%d\", ret);\n\n  IndexTrainer::Pointer trainer =\n      IndexFactory::CreateTrainer(\"StratifiedClusterTrainer\");\n  ivf_assert_with_msg(trainer, IndexError_NoExist, \"Failed to create trainer\");\n\n  ret = trainer->init(converted_meta_, train_params);\n  ivf_check_with_msg(ret, \"Trainer init failed with ret %d\", ret);\n\n  ret = trainer->train(std::move(threads), std::move(holder));\n  ivf_check_with_msg(ret, \"Trainer train failed with ret %d\", ret);\n\n  ret = this->train(trainer);\n  ivf_check_error_code(ret);\n\n  stats_.set_trained_costtime(timer.milli_seconds());\n\n  LOG_INFO(\"End IVFBuilder::train with holder\");\n\n  state_ = TRAINED;\n  return 0;\n}\n\nint IVFBuilder::train(const IndexTrainer::Pointer &trainer) {\n  LOG_DEBUG(\"Begin IVFBuilder::train by trainer\");\n  ailego::ElapsedTime timer;\n\n  if (state_ != INITED) {\n    LOG_ERROR(\"IVFBuilder train failed, wrong state=%d\", state_);\n    return IndexError_Runtime;\n  }\n\n  if (!trainer) {\n    LOG_ERROR(\"Input trainer is nullptr while train index\");\n    return IndexError_InvalidArgument;\n  }\n\n  IndexCluster::CentroidList centroid_list;\n  IndexBundle::Pointer boundle = trainer->indexes();\n  int ret = IndexCluster::Deserialize(trainer->meta(), boundle, &centroid_list);\n  ivf_check_with_msg(ret, \"Failed to deserialize index\");\n\n  const IndexMeta &meta = trainer->meta();\n  if (meta.data_type() != converted_meta_.data_type() ||\n      meta.metric_name().compare(converted_meta_.metric_name()) != 0 ||\n      meta.element_size() != converted_meta_.element_size()) {\n    if (meta.converter_name() != converter_class_) {\n      LOG_ERROR(\"Input trainer doesn't match index meta while train index\");\n      return IndexError_Mismatch;\n    }\n    //! Create converter from trainer params\n    LOG_INFO(\"Train IVFBuilder by trainer with converter\");\n    converter_ = CreateAndInitConverter(meta_, meta.converter_name(),\n                                        meta.converter_params());\n    ivf_assert(converter_, IndexError_Runtime);\n    converted_meta_ = meta;\n  }\n\n  centroid_index_ = std::make_shared<IVFCentroidIndex>();\n  if (!centroid_index_) {\n    return IndexError_NoMemory;\n  }\n  ret = centroid_index_->init(converted_meta_, params_);\n  ivf_check_error_code(ret);\n\n  ret = centroid_index_->build(centroid_list);\n  ivf_check_with_msg(ret, \"Failed to build centroid index\");\n\n  if (params_.has(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS)) {\n    //! Quantize the centroids for searcher\n    searcher_centroid_index_ = std::make_shared<IVFCentroidIndex>();\n    if (!searcher_centroid_index_) {\n      return IndexError_NoMemory;\n    }\n    ailego::Params params;\n    params_.get(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_PARAMS, &params);\n    searcher_centroid_index_->set_quantizer(\n        params_.get_as_string(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS),\n        params);\n    ret = searcher_centroid_index_->init(converted_meta_, params_);\n    ivf_check_error_code(ret);\n\n    ret = searcher_centroid_index_->build(centroid_list);\n    ivf_check_with_msg(ret, \"Failed to build centroid index\");\n  }\n\n  stats_.set_trained_costtime(timer.milli_seconds());\n\n  LOG_DEBUG(\"End IVFBuilder::train by trainer\");\n\n  state_ = TRAINED;\n  return 0;\n}\n\nint IVFBuilder::build(IndexThreads::Pointer threads,\n                      IndexHolder::Pointer holder) {\n  LOG_INFO(\"Begin IVFBuilder::build!\");\n\n  if (state_ != TRAINED) {\n    LOG_ERROR(\"Train the index first before build\");\n    return IndexError_Runtime;\n  }\n\n  ailego::ElapsedTime timer;\n  if (!holder || holder->count() == 0) {\n    LOG_ERROR(\"Input holder is nullptr or empty while building index\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (!holder->is_matched(meta_)) {\n    LOG_ERROR(\"Input holder doesn't match index meta while building index\");\n    return IndexError_Mismatch;\n  }\n  if (!threads) {\n    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);\n    if (!threads) {\n      return IndexError_NoMemory;\n    }\n  }\n\n  holder_ = std::make_shared<RandomAccessIndexHolder>(meta_);\n  if (!holder_) {\n    return IndexError_NoMemory;\n  }\n  if (holder->count() > 0) {\n    holder_->reserve(holder->count());\n  }\n  for (auto iter = holder->create_iterator(); iter && iter->is_valid();\n       iter->next()) {\n    holder_->emplace(iter->key(), iter->data());\n  }\n\n  // Holder is not needed, cleanup it.\n  holder.reset();\n\n  IndexHolder::Pointer converted_holder = holder_;\n  if (converter_) {\n    int ret = converter_->transform(holder_);\n    ivf_check_with_msg(ret, \"Failed to transform by converter %s\",\n                       converter_->name().c_str());\n    converted_holder = converter_->result();\n  }\n\n  labels_.resize(centroid_index_->centroids_count());\n  int ret = this->build_label_index(threads.get(), converted_holder);\n  ivf_check_with_msg(ret, \"Failed to build index for %s\",\n                     IndexError::What(ret));\n\n  ret = this->prepare_quantizer(threads.get());\n  ivf_check_error_code(ret);\n\n  stats_.set_built_costtime(timer.milli_seconds());\n\n  LOG_INFO(\"End IVFBuilder::build\");\n\n  state_ = BUILT;\n  return 0;\n}\n\nint IVFBuilder::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"Begin IVFBuilder::dump\");\n\n  if (state_ != BUILT) {\n    LOG_ERROR(\"Build the index before dump QC Index\");\n    return IndexError_Runtime;\n  }\n\n  ailego::ElapsedTime timer;\n  int ret = this->dump_index(dumper);\n  ivf_check_with_msg(ret, \"Failed to dump index with ret=%d\", ret);\n\n  // the fitting function for the follow points: 1000000(0.02) 10000000(0.01)\n  // 50000000(0.005) 100000000(0.001)\n  float scan_ratio = -0.004 * std::log(holder_->count()) + 0.0751;\n  scan_ratio = std::max(scan_ratio, 0.0001f);\n\n  // Set Searcher Params\n  ailego::Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, scan_ratio);\n  meta_.set_searcher(\"IVFSearcher\", 0, std::move(params));\n  meta_.set_builder(\"IVFBuilder\", 0, std::move(params_));\n\n  ret = IndexHelper::SerializeToDumper(meta_, dumper.get());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to serialize meta into dumper.\");\n    return ret;\n  }\n\n  stats_.set_discarded_count(stats_.built_count() - stats_.dumped_count());\n  stats_.set_dumped_costtime(timer.milli_seconds());\n\n  LOG_INFO(\"End IVFBuilder::dump\");\n\n  return 0;\n}\n\nint IVFBuilder::CheckAndUpdateMajorOrder(IndexMeta &meta) {\n  const std::string &metric_name = meta.metric_name();\n  auto metric = IndexFactory::CreateMetric(metric_name);\n  if (!metric) {\n    LOG_ERROR(\"CreateMetric %s failed\", metric_name.c_str());\n    return IndexError_InvalidArgument;\n  }\n  int ret = metric->init(meta, meta.metric_params());\n  ivf_check_with_msg(ret, \"IndexMetric %s init failed\", metric_name.c_str());\n\n  bool support_column_major = true;\n  for (size_t m = 32; m != 0; m /= 2) {\n    for (size_t n = m; n != 0; n /= 2) {\n      if (metric->distance_matrix(m, n) == nullptr) {\n        support_column_major = false;\n        break;\n      }\n    }\n    if (!support_column_major) {\n      break;\n    }\n  }\n  support_column_major &=\n      meta.element_size() % IndexMeta::AlignSizeof(meta.data_type()) == 0;\n\n  if (meta.major_order() == IndexMeta::MO_UNDEFINED) {\n    if (support_column_major && meta.dimension() <= 512) {\n      meta.set_major_order(IndexMeta::MO_COLUMN);\n    } else {\n      meta.set_major_order(IndexMeta::MO_ROW);\n    }\n  } else {\n    if (!support_column_major && meta.major_order() == IndexMeta::MO_COLUMN) {\n      LOG_WARN(\n          \"Index Metric %s Unsupported \"\n          \"Column Major Order\",\n          metric_name.c_str());\n      return IndexError_Unsupported;\n    }\n  }\n\n  if (block_vector_count_ * quantized_meta_.element_size() % 32 != 0) {\n    LOG_ERROR(\n        \"block_vector_count * quantized_element_size not align with 32 bytes.\");\n    return IndexError_InvalidArgument;\n  }\n\n  return 0;\n}\n\nint IVFBuilder::parse_centroids_num(const ailego::Params &params) {\n  std::string centroids_num =\n      params.get_as_string(PARAM_IVF_BUILDER_CENTROID_COUNT);\n  if (centroids_num.empty()) {\n    LOG_ERROR(\"Param %s is required\", PARAM_IVF_BUILDER_CENTROID_COUNT.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  std::vector<std::string> centroid_str_vec;\n  ailego::StringHelper::Split(centroids_num, CENTROID_SEPERATOR,\n                              &centroid_str_vec);\n  size_t level_cnt = centroid_str_vec.size();\n  if ((level_cnt <= 0) || (level_cnt > 2)) {\n    LOG_ERROR(\"Centroids level count must be [1,2]\");\n    return IndexError_InvalidArgument;\n  }\n\n  for (size_t idx = 0; idx < level_cnt; ++idx) {\n    uint32_t centroid_cnt = 0;\n    if (!ailego::StringHelper::ToUint32(centroid_str_vec[idx], &centroid_cnt)) {\n      LOG_ERROR(\"Invalid centroids count %s\", centroid_str_vec[idx].c_str());\n      return IndexError_InvalidArgument;\n    }\n    centroid_num_vec_.push_back(centroid_cnt);\n  }\n\n  return 0;\n}\n\nint IVFBuilder::parse_clustering_params(const ailego::Params &params) {\n  params.get(PARAM_IVF_BUILDER_CLUSTER_AUTO_TUNING, &cluster_auto_tuning_);\n\n  cluster_class_ = params.get_as_string(PARAM_IVF_BUILDER_CLUSTER_CLASS);\n  if (cluster_class_.empty()) {\n    // OptKmeansCluster does not support custom metric\n    cluster_class_ = meta_.metric_name() == kMipsMetricName\n                         ? \"KmeansCluster\"\n                         : \"OptKmeansCluster\";\n    LOG_INFO(\"Using [%s] as default cluster class\", cluster_class_.c_str());\n  }\n  for (size_t i = 1; i <= centroid_num_vec_.size(); ++i) {\n    std::string level_params_key =\n        PARAM_IVF_BUILDER_CLUSTER_PARAMS_IN_LEVEL_PREFIX + std::to_string(i);\n    ailego::Params level_params;\n    params.get<ailego::Params>(level_params_key, &level_params);\n    cluster_params_.push_back(level_params);\n  }\n\n  return 0;\n}\n\nint IVFBuilder::parse_general_params(const ailego::Params &params) {\n  thread_count_ = params.get_as_uint32(PARAM_IVF_BUILDER_THREAD_COUNT);\n  sample_count_ = params.get_as_uint32(PARAM_IVF_BUILDER_TRAIN_SAMPLE_COUNT);\n  sample_ratio_ = params.get_as_float(PARAM_IVF_BUILDER_TRAIN_SAMPLE_RATIO);\n\n  params.get(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID, &quantize_by_centroid_);\n  params.get(PARAM_IVF_BUILDER_STORE_ORIGINAL_FEATURES,\n             &store_original_features_);\n\n  //! Prepare Converter for training\n  if (meta_.metric_name() == kIPMetricName) {\n    converter_class_ = kMipsConverterName;\n  }\n  params.get(PARAM_IVF_BUILDER_CONVERTER_CLASS, &converter_class_);\n  if (!converter_class_.empty()) {\n    ailego::Params converter_params;\n    params_.get(PARAM_IVF_BUILDER_CONVERTER_PARAMS, &converter_params);\n    converter_ =\n        CreateAndInitConverter(meta_, converter_class_, converter_params);\n    ivf_assert(converter_, IndexError_NoExist);\n  }\n\n  params_.get(PARAM_IVF_BUILDER_BLOCK_VECTOR_COUNT, &block_vector_count_);\n  if (block_vector_count_ == 0) {\n    block_vector_count_ = kDefaultBlockCount;\n  }\n  if (block_vector_count_ > kDefaultBlockCount ||\n      block_vector_count_ & (block_vector_count_ - 1)) {\n    LOG_ERROR(\"block_vector_count only can be [1|2|4|8|16|32].\");\n    return IndexError_InvalidArgument;\n  }\n  if (block_vector_count_ * meta_.element_size() % 32 != 0) {\n    LOG_ERROR(\"block_vector_count * element_size not align with 32 bytes.\");\n    return IndexError_InvalidArgument;\n  }\n  return 0;\n}\n\nint IVFBuilder::prepare_trainer_params(ailego::Params &params) {\n  params.set(STRATIFIED_TRAINER_SAMPLE_COUNT, sample_count_);\n  params.set(STRATIFIED_TRAINER_SAMPLE_RATIO, sample_ratio_);\n  params.set(STRATIFIED_TRAINER_THREAD_COUNT, thread_count_);\n  params.set(STRATIFIED_TRAINER_AUTOAUNE, cluster_auto_tuning_);\n  if (centroid_num_vec_.empty()) {\n    LOG_ERROR(\"Centroids no specified.\");\n    return IndexError_InvalidArgument;\n  }\n  std::string cluster_count = std::to_string(centroid_num_vec_[0]);\n  if (centroid_num_vec_.size() > 1) {\n    cluster_count +=\n        (CENTROID_SEPERATOR + std::to_string(centroid_num_vec_[1]));\n  }\n  params.set(STRATIFIED_TRAINER_CLUSTER_COUNT, cluster_count);\n\n  for (size_t i = 1; i <= cluster_params_.size(); ++i) {\n    std::string level_params_key =\n        STRATIFIED_TRAINER_PARAMS_IN_LEVEL_PREFIX + std::to_string(i);\n    params.set(level_params_key, cluster_params_[i - 1]);\n  }\n  params.set(STRATIFIED_TRAINER_CLASS_NAME, cluster_class_);\n\n  return 0;\n}\n\nint IVFBuilder::build_label_index(IndexThreads *threads,\n                                  const IndexHolder::Pointer &holder) {\n  auto iter = holder->create_iterator();\n  if (!iter) {\n    LOG_ERROR(\"Create iterator for holder failed\");\n    return IndexError_Runtime;\n  }\n\n  auto task_group = threads->make_group();\n  if (!task_group) {\n    LOG_ERROR(\"Failed to create task group\");\n    return IndexError_Runtime;\n  }\n\n  size_t id = 0UL;\n  AILEGO_DEFER([&]() {\n    task_group->wait_finish();\n    stats_.set_built_count(id);\n    LOG_INFO(\"Finished building, total=%zu\", id);\n  });\n\n  size_t elem_size = holder->element_size();\n  std::shared_ptr<VectorList> vectors = std::make_shared<VectorList>();\n  ivf_assert(vectors, IndexError_NoMemory);\n  for (; iter && iter->is_valid(); iter->next()) {\n    ivf_assert(!error_, err_code_);\n    vectors->emplace_back(iter->data(), elem_size, id);\n    id++;\n    if (vectors->size() == kBatchSize || id == holder_->count()) {\n      auto task = ailego::Closure ::New(const_cast<IVFBuilder *>(this),\n                                        &IVFBuilder::label, vectors);\n      task_group->submit(std::move(task));\n      vectors = std::make_shared<VectorList>();\n      ivf_assert(vectors, IndexError_NoMemory);\n      vectors->reserve(kBatchSize);\n    }\n    if (!(id & 0xFFFFF)) {\n      LOG_INFO(\"Current built count:%zu\", id);\n    }\n  }\n  ailego_assert_with(vectors->size() == 0, \"invalid size\");\n\n  return err_code_;\n}\n\nint IVFBuilder::dump_index(const IndexDumper::Pointer &dumper) {\n  int ret = CheckAndUpdateMajorOrder(quantized_meta_);\n  ivf_check_error_code(ret);\n\n  IVFDumper::Pointer ivf_dumper = std::make_shared<IVFDumper>(\n      quantized_meta_, dumper, centroid_index_->centroids_count(),\n      block_vector_count_);\n  if (!ivf_dumper) {\n    LOG_ERROR(\"Alloc IVFDumper failed\");\n    return IndexError_NoMemory;\n  }\n\n  //! Dump inverted vectors\n  std::vector<uint32_t> dumped_ids;\n  std::function<void(uint32_t)> record_dumped_id = [&](uint32_t) {};\n  if (store_original_features_) {\n    dumped_ids.reserve(holder_->count());\n    record_dumped_id = [&](uint32_t id) { dumped_ids.emplace_back(id); };\n  }\n  if (quantizers_.size() == 0) {\n    //! No quantizer for inverted vectors\n    for (size_t i = 0; i < centroid_index_->centroids_count(); ++i) {\n      ailego_assert_with(i < labels_.size(), \"Index Overflow\");\n      for (size_t j = 0; j < labels_[i].size(); ++j) {\n        auto id = labels_[i][j];\n        record_dumped_id(id);\n        ret = ivf_dumper->dump_inverted_vector(i, holder_->key(id),\n                                               holder_->element(id));\n        ivf_check_error_code(ret);\n      }\n    }\n  } else {\n    for (size_t i = 0; i < centroid_index_->centroids_count(); ++i) {\n      ailego_assert_with(i < labels_.size(), \"Index Overflow\");\n      auto holder =\n          std::make_shared<LabelFilteredIndexHolder>(holder_, labels_[i]);\n      if (!holder) {\n        return IndexError_NoMemory;\n      }\n      auto quantizer = quantize_by_centroid_ ? quantizers_[i] : quantizers_[0];\n      ret = quantizer->transform(holder);\n      ivf_check_error_code(ret);\n\n      auto iter = quantizer->result()->create_iterator();\n      for (; iter->is_valid(); iter->next()) {\n        uint32_t id = iter->key();\n        record_dumped_id(id);\n        ret =\n            ivf_dumper->dump_inverted_vector(i, holder_->key(id), iter->data());\n        ivf_check_error_code(ret);\n      }\n    }\n  }\n\n  ret = ivf_dumper->dump_inverted_vector_finished();\n  ivf_check_error_code(ret);\n\n  ret = ivf_dumper->dump_quantizer_params(quantizers_);\n  ivf_check_error_code(ret);\n\n  auto centroid_index =\n      searcher_centroid_index_ ? searcher_centroid_index_ : centroid_index_;\n  ret = ivf_dumper->dump_centroid_index(centroid_index->data(),\n                                        centroid_index->size());\n  ivf_check_with_msg(ret, \"Failed to dump CentroidIndex\");\n\n  if (store_original_features_) {\n    for (size_t i = 0; i < dumped_ids.size(); ++i) {\n      ret = ivf_dumper->dump_original_vector(holder_->element(dumped_ids[i]),\n                                             holder_->element_size());\n      ivf_check_error_code(ret);\n    }\n  }\n\n  stats_.set_dumped_count(stats_.dumped_count() + ivf_dumper->dumped_count());\n\n  return 0;\n}\n\nint IVFBuilder::prepare_quantizer(IndexThreads *threads) {\n  std::string quantizer_name;\n  params_.get(PARAM_IVF_BUILDER_QUANTIZER_CLASS, &quantizer_name);\n  if (quantizer_name.empty()) {\n    return 0;\n  }\n\n  //! Prepare Quantizers for inverted index\n  ailego::Params quantizer_params;\n  params_.get(PARAM_IVF_BUILDER_QUANTIZER_PARAMS, &quantizer_params);\n  if (((quantizer_name != kInt8QuantizerName &&\n        quantizer_name != kInt4QuantizerName) ||\n       meta_.metric_name() != kIPMetricName) &&\n      quantize_by_centroid_) {\n    LOG_WARN(\"%s is supported in InnerProduct only\",\n             PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID.c_str());\n    quantize_by_centroid_ = false;\n  }\n  if (quantizer_name == kInt4QuantizerName && meta_.dimension() & 0x1) {\n    LOG_ERROR(\"Unsupport quantizer=%s for dim=%u\", kInt4QuantizerName,\n              meta_.dimension());\n    return IndexError_Unsupported;\n  }\n\n  int ret = 0;\n  auto create_and_init_quantizer = [&]() {\n    auto quantizer = IndexFactory::CreateConverter(quantizer_name);\n    if (!quantizer) {\n      LOG_ERROR(\"Failed to create converter %s\", quantizer_name.c_str());\n      ret = IndexError_NoExist;\n      return IndexConverter::Pointer();\n    }\n    ret = quantizer->init(meta_, quantizer_params);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to initialize converter %s for %s\",\n                quantizer_name.c_str(), IndexError::What(ret));\n      return IndexConverter::Pointer();\n    }\n    return quantizer;\n  };\n  for (size_t i = 0; i < centroid_index_->centroids_count(); ++i) {\n    quantizers_.emplace_back(create_and_init_quantizer());\n    ivf_check_error_code(ret);\n    if (!quantize_by_centroid_) {\n      break;\n    }\n  }\n\n  //! Train the quantizers\n  auto train_data = [&](size_t i) {\n    IndexHolder::Pointer holder = holder_;\n    size_t idx = 0;\n    if (quantize_by_centroid_) {\n      holder = std::make_shared<LabelFilteredIndexHolder>(holder_, labels_[i]);\n      if (!holder && !error_.exchange(true)) {\n        err_code_ = IndexError_NoMemory;\n        return;\n      }\n      idx = i;\n    }\n    if (holder->count() == 0) {\n      return;\n    }\n    ret = quantizers_[idx]->train(holder);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to train converter %s for %s\", quantizer_name.c_str(),\n                IndexError::What(ret));\n      if (!error_.exchange(true)) {\n        err_code_ = IndexError_Runtime;\n      }\n    }\n  };\n\n  auto task_group = threads->make_group();\n  if (!task_group) {\n    LOG_ERROR(\"Failed to create task group\");\n    return IndexError_Runtime;\n  }\n\n  for (size_t i = 0; i < quantizers_.size(); ++i) {\n    if (error_) {\n      task_group->wait_finish();\n      return err_code_;\n    }\n    task_group->submit(ailego::Closure ::New(train_data, i));\n  }\n\n  task_group->wait_finish();\n  if (quantizers_.size() > 0) {\n    quantized_meta_ = quantizers_[0]->meta();\n  }\n\n  return 0;\n}\n\nINDEX_FACTORY_REGISTER_BUILDER(IVFBuilder);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_builder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_builder.h>\n#include <zvec/core/framework/index_meta.h>\n#include \"ivf_centroid_index.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IVF Builder\n */\nclass IVFBuilder : public IndexBuilder {\n public:\n  //! Constructor\n  IVFBuilder();\n\n  //! Destructor\n  ~IVFBuilder();\n\n  //! Disable them\n  IVFBuilder(const IVFBuilder &) = delete;\n  IVFBuilder &operator=(const IVFBuilder &) = delete;\n\n public:\n  //! Initialize the builder\n  virtual int init(const IndexMeta &meta,\n                   const ailego::Params &params) override;\n\n  //! Cleanup the builder\n  virtual int cleanup(void) override;\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer threads,\n                    IndexHolder::Pointer holder) override;\n\n  //! Train the data\n  virtual int train(const IndexTrainer::Pointer &trainer) override;\n\n  //! Build the index\n  virtual int build(IndexThreads::Pointer threads,\n                    IndexHolder::Pointer holder) override;\n\n  //! Dump index into file system\n  virtual int dump(const IndexDumper::Pointer &dumper) override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  IVFCentroidIndex::Pointer centroid_index() const {\n    return centroid_index_;\n  }\n\n public:\n  /*! Random Access Index Holder\n   */\n  class RandomAccessIndexHolder : public IndexHolder {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::shared_ptr<RandomAccessIndexHolder> Pointer;\n\n    /*! Random Access Index Holder Iterator\n     */\n    class Iterator : public IndexHolder::Iterator {\n     public:\n      //! Index Holder Iterator Pointer\n      typedef std::unique_ptr<Iterator> Pointer;\n\n      //! Constructor\n      Iterator(RandomAccessIndexHolder *owner) : holder_(owner) {}\n\n      //! Destructor\n      virtual ~Iterator(void) {}\n\n      //! Retrieve pointer of data\n      virtual const void *data(void) const override {\n        return holder_->element(id_);\n      }\n\n      //! Test if the iterator is valid\n      virtual bool is_valid(void) const override {\n        return id_ < holder_->count();\n      }\n\n      //! Retrieve primary key\n      virtual uint64_t key(void) const override {\n        return holder_->key(id_);\n      }\n\n      //! Next iterator\n      virtual void next(void) override {\n        ++id_;\n      }\n\n     private:\n      //! Members\n      RandomAccessIndexHolder *holder_{nullptr};\n      uint32_t id_{0};\n    };\n\n    //! Constructor\n    RandomAccessIndexHolder(const IndexMeta &meta)\n        : features_(std::make_shared<CompactIndexFeatures>(meta)) {}\n\n    //! Retrieve count of elements in holder (-1 indicates unknown)\n    virtual size_t count(void) const override {\n      return features_->count();\n    }\n\n    //! Retrieve dimension\n    virtual size_t dimension(void) const override {\n      return features_->dimension();\n    }\n\n    //! Retrieve type information\n    virtual IndexMeta::DataType data_type(void) const override {\n      return features_->data_type();\n    }\n\n    //! Retrieve element size in bytes\n    virtual size_t element_size(void) const override {\n      return features_->element_size();\n    }\n\n    //! Retrieve if it can multi-pass\n    virtual bool multipass(void) const override {\n      return true;\n    }\n\n    //! Create a new iterator\n    virtual IndexHolder::Iterator::Pointer create_iterator(void) override {\n      return IndexHolder::Iterator::Pointer(\n          new RandomAccessIndexHolder::Iterator(this));\n    }\n\n    void reserve(size_t elems) {\n      features_->reserve(elems);\n      keys_.reserve(elems);\n    }\n\n    //! Append an element into holder\n    void emplace(uint64_t pkey, const void *vec) {\n      features_->emplace(vec);\n      keys_.emplace_back(pkey);\n    }\n\n    //! Retrieve feature via local id\n    const void *element(size_t id) const {\n      return features_->element(id);\n    }\n\n    //! Retrieve key via local id\n    uint64_t key(size_t id) const {\n      ailego_assert_with(id < keys_.size(), \"Index Overflow\");\n      return keys_[id];\n    }\n\n   private:\n    //! Disable them\n    RandomAccessIndexHolder(void) = delete;\n\n    //! Members\n    CompactIndexFeatures::Pointer features_{};\n    std::vector<uint64_t> keys_{};\n  };\n\n private:\n  /*! Wrapper of feature\n   */\n  class Vector {\n   public:\n    typedef std::shared_ptr<Vector> Pointer;\n\n    Vector(const void *vec, size_t len, uint32_t idx)\n        : vec_(reinterpret_cast<const char *>(vec), len), id_{idx} {}\n\n    const void *data() const {\n      return vec_.data();\n    }\n\n    size_t size() const {\n      return vec_.size();\n    }\n\n    uint32_t id(void) const {\n      return id_;\n    }\n\n   private:\n    std::string vec_{};\n    uint32_t id_{0u};\n  };\n\n  using VectorList = std::vector<Vector>;\n\n  //! Check MajorOrder in meta, and update the major order if needed\n  int CheckAndUpdateMajorOrder(IndexMeta &meta);\n\n  //! Parse params\n  int parse_centroids_num(const ailego::Params &params);\n  int parse_clustering_params(const ailego::Params &params);\n  int parse_general_params(const ailego::Params &params);\n\n  //! Prepare params for trainer\n  int prepare_trainer_params(ailego::Params &params);\n\n  //! Build the index\n  int build_label_index(IndexThreads *threads,\n                        const IndexHolder::Pointer &holder);\n\n  //! Dump the index to dumper\n  int dump_index(const IndexDumper::Pointer &dumper);\n\n  //! Prepare the quantizer for inverted index\n  int prepare_quantizer(IndexThreads *threads);\n\n  //! Quantize the centrods list\n  int quantize_centroids();\n\n  //! Create converter and init with params\n  static IndexConverter::Pointer CreateAndInitConverter(\n      const IndexMeta &meta, const std::string &name,\n      const ailego::Params &params) {\n    auto converter = IndexFactory::CreateConverter(name);\n    if (!converter) {\n      LOG_ERROR(\"Failed to create converter %s\", name.c_str());\n      return IndexConverter::Pointer();\n    }\n    int ret = converter->init(meta, params);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to initialize converter %s for %s\", name.c_str(),\n                IndexError::What(ret));\n      return IndexConverter::Pointer();\n    }\n    return converter;\n  }\n\n  //! Select the nearest centroid id for the vector\n  void label(const std::shared_ptr<VectorList> &vecs) {\n    for (size_t i = 0; i < vecs->size(); ++i) {\n      auto &vec = (*vecs)[i];\n\n      uint32_t centroid_idx =\n          centroid_index_->search_nearest_centroid(vec.data(), vec.size());\n      if (centroid_idx == IVFCentroidIndex::kInvalidID) {\n        LOG_ERROR(\"Failed to search nearest centroid in CentroidIndex\");\n        if (!error_.exchange(true)) {\n          err_code_ = IndexError_Runtime;\n        }\n        return;\n      }\n      ailego_assert_with(centroid_idx < labels_.size(), \"Index Overflow\");\n      mutex_.lock();\n      labels_[centroid_idx].emplace_back(vec.id());\n      mutex_.unlock();\n    }\n  }\n\n\n private:\n  //! Constants\n  static constexpr size_t kThreadPoolQueueSize = 300u;\n  static constexpr size_t kBatchSize = 10u;\n  static constexpr size_t kDefaultBlockCount = 32u;\n\n  enum BuilderState { INIT = 0, INITED = 1, TRAINED = 2, BUILT = 3 };\n\n  //! Members\n  BuilderState state_{INIT};\n  Stats stats_{};\n  ailego::Params params_{};\n  IndexMeta meta_{};\n\n  std::vector<uint32_t> centroid_num_vec_{};\n  std::string cluster_class_{};\n  std::string converter_class_{};\n  std::vector<ailego::Params> cluster_params_{};\n\n  std::vector<std::vector<uint32_t>> labels_{};\n  std::mutex mutex_{};\n  IVFCentroidIndex::Pointer centroid_index_{};\n  IVFCentroidIndex::Pointer searcher_centroid_index_{};\n  RandomAccessIndexHolder::Pointer holder_{};\n  IndexMeta converted_meta_{};\n  IndexConverter::Pointer converter_{};\n  IndexMeta quantized_meta_{};\n  std::vector<IndexConverter::Pointer> quantizers_{};\n\n  std::atomic_bool error_{false};\n  int err_code_{0};\n\n  uint32_t thread_count_{0};\n  uint32_t sample_count_{0};\n  float sample_ratio_{0.0};\n  uint32_t block_vector_count_{kDefaultBlockCount};\n  bool cluster_auto_tuning_{false};\n  bool store_original_features_{false};\n  bool quantize_by_centroid_{false};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_centroid_index.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_centroid_index.h\"\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"metric/metric_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Fake Trainer to supply centroids in bundle\n */\nclass FakeClusterTrainer : public IndexTrainer {\n public:\n  //! Constructor\n  FakeClusterTrainer(const IndexMeta &imeta, const IndexBundle::Pointer &bundle)\n      : meta_(imeta), bundle_(bundle) {}\n\n  //! Destructor\n  ~FakeClusterTrainer(void) {}\n\n protected:\n  //! Initialize Trainer\n  virtual int init(const IndexMeta &, const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup Trainer\n  virtual int cleanup(void) override {\n    return 0;\n  }\n\n  //! Train the data\n  virtual int train(IndexHolder::Pointer) override {\n    return 0;\n  }\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer, IndexHolder::Pointer) override {\n    return 0;\n  }\n\n  //! Load index from file path or dir\n  virtual int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Dump index into file path or dir\n  virtual int dump(const IndexDumper::Pointer &) override {\n    return 0;\n  }\n\n  //! Retrieve Index Meta\n  virtual const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  //! Retrieve statistics\n  virtual const IndexTrainer::Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve the output indexes\n  virtual IndexBundle::Pointer indexes(void) const override {\n    return bundle_;\n  }\n\n private:\n  //! Members\n  IndexMeta meta_{};\n  Stats stats_{};\n  IndexBundle::Pointer bundle_{};\n};\n\n/*! Int8QuantizerReformer for InnerProduct Measure\n */\nclass Int8QuantizerReformer4IP : public IndexReformer {\n public:\n  //! Initialize Reformer\n  virtual int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  virtual int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  virtual int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  virtual int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  virtual int transform(const void * /*query*/,\n                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,\n                        IndexQueryMeta * /*ometa*/) const override {\n#if 0\n        size_t dim = qmeta.dimension();\n        out->resize(IndexMeta::ElementSizeof(\n            IndexMeta::DataType::DT_INT8, dim));\n        ometa->set_meta(IndexMeta::DataType::DT_INT8, dim);\n        const float *ivec = reinterpret_cast<const float *>(query);\n        int8_t *ovec = reinterpret_cast<int8_t *>(&(*out)[0]);\n        float abs_max = 0.0f;\n        for (size_t i = 0; i < dim; ++i) {\n            auto abs = std::abs(ivec[i]);\n            if (abs > abs_max) {\n                abs_max = abs;\n            }\n        }\n        if (abs_max > 0.0f) {\n            float scale = 127 / abs_max;\n            for (size_t i = 0; i < dim; ++i) {\n                ovec[i] = static_cast<int8_t>(std::round(ivec[i] * scale));\n            }\n        } else {\n            std::fill(ovec, ovec + dim, static_cast<int8_t>(1));\n        }\n        return 0;\n#else\n    return IndexError_NotImplemented;\n#endif\n  }\n\n  //! Transform queries\n  virtual int transform(const void *query, const IndexQueryMeta &qmeta,\n                        uint32_t count, std::string *oquery,\n                        IndexQueryMeta *ometa) const override {\n    size_t dim = qmeta.dimension();\n    oquery->resize(count *\n                   IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8, dim));\n    ometa->set_meta(IndexMeta::DataType::DT_INT8, dim);\n    const float *ivec = reinterpret_cast<const float *>(query);\n    int8_t *ovec = reinterpret_cast<int8_t *>(&(*oquery)[0]);\n    for (size_t q = 0; q < count; ++q) {\n      float abs_max = 0.0f;\n      const float *in = &ivec[q * dim];\n      int8_t *out = &ovec[q * dim];\n      for (size_t i = 0; i < dim; ++i) {\n        auto abs = std::abs(in[i]);\n        if (abs > abs_max) {\n          abs_max = abs;\n        }\n      }\n      if (abs_max > 0.0f) {\n        float scale = 127 / abs_max;\n        for (size_t i = 0; i < dim; ++i) {\n          out[i] = static_cast<int8_t>(std::round(in[i] * scale));\n        }\n      } else {\n        std::fill(out, out + dim, static_cast<int8_t>(1));\n      }\n    }\n    return 0;\n  }\n\n  //! Normalize results\n  virtual int normalize(const void * /*query*/,\n                        const IndexQueryMeta & /*qmeta*/,\n                        IndexDocumentList & /*result*/) const override {\n    return 0;\n  }\n};\n\n/*! Int4QuantizerReformer for InnerProduct Metric\n */\nclass Int4QuantizerReformer4IP : public IndexReformer {\n public:\n  //! Initialize Reformer\n  virtual int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  virtual int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  virtual int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  virtual int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  virtual int transform(const void * /*query*/,\n                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,\n                        IndexQueryMeta * /*ometa*/) const override {\n    return IndexError_NotImplemented;\n  }\n\n  //! Transform queries\n  virtual int transform(const void *query, const IndexQueryMeta &qmeta,\n                        uint32_t count, std::string *oquery,\n                        IndexQueryMeta *ometa) const override {\n    if (qmeta.dimension() & 0x1) {\n      LOG_ERROR(\"Unsuuport dim=%u for transform\", qmeta.dimension());\n      return IndexError_Unsupported;\n    }\n\n    size_t dim = qmeta.dimension();\n    oquery->resize(count *\n                   IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT4, dim));\n    ometa->set_meta(IndexMeta::DataType::DT_INT4, dim);\n    const float *ivec = reinterpret_cast<const float *>(query);\n    uint8_t *ovec = reinterpret_cast<uint8_t *>(&(*oquery)[0]);\n    for (size_t q = 0; q < count; ++q) {\n      float abs_max = 0.0f;\n      float max = -std::numeric_limits<float>::max();\n      const float *in = &ivec[q * dim];\n      uint8_t *out = &ovec[q * dim / 2];\n      for (size_t i = 0; i < dim; ++i) {\n        float abs = std::abs(in[i]);\n        abs_max = std::max(abs_max, abs);\n        max = std::max(max, in[i]);\n      }\n      if (abs_max > 0.0f) {\n        float scale = ((7 * abs_max > 8 * max) ? 8 : 7) / abs_max;\n        for (size_t i = 0; i < dim; i += 2) {\n          auto v1 = static_cast<int8_t>(std::round(in[i] * scale));\n          auto v2 = static_cast<int8_t>(std::round(in[i + 1] * scale));\n          out[i / 2] = (static_cast<uint8_t>(v1) << 4) |\n                       (static_cast<uint8_t>(v2) & 0xF);\n        }\n      } else {\n        std::fill(out, out + dim / 2, static_cast<uint8_t>(9));\n      }\n    }\n    return 0;\n  }\n\n  //! Normalize results\n  virtual int normalize(const void * /*query*/,\n                        const IndexQueryMeta & /*qmeta*/,\n                        IndexDocumentList & /*result*/) const override {\n    return 0;\n  }\n};\n\nint IVFCentroidIndex::init(const IndexMeta &meta,\n                           const ailego::Params &params) {\n  meta_ = meta;\n\n  params.get(PARAM_IVF_BUILDER_OPTIMIZER_CLASS, &builder_class_);\n  params.get(PARAM_IVF_BUILDER_OPTIMIZER_PARAMS, &builder_params_);\n  params.get(PARAM_IVF_SEARCHER_OPTIMIZER, &searcher_class_);\n  params.get(PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS, &searcher_params_);\n\n  return 0;\n}\n\nint IVFCentroidIndex::search(const void *query, const IndexQueryMeta &qmeta,\n                             size_t count,\n                             IndexSearcher::Context::Pointer &ctx) {\n  int ret = 0;\n\n  if (reformer_) {\n    std::string buffer;\n    IndexQueryMeta ometa;\n    ret = reformer_->transform(query, qmeta, count, &buffer, &ometa);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to transform querys by reformer\");\n      return ret;\n    }\n    ret = searcher_->search_impl(buffer.data(), ometa, count, ctx);\n  } else {\n    ret = searcher_->search_impl(query, qmeta, count, ctx);\n  }\n\n  ivf_check_with_msg(ret, \"Failed to search in centroid index for %s\",\n                     IndexError::What(ret));\n\n  return 0;\n}\n\nuint32_t IVFCentroidIndex::search_nearest_centroid(const void *query,\n                                                   size_t len) {\n  //! Called in building index precedure, so transform the query is needless\n  if (len != meta_.element_size()) {\n    LOG_ERROR(\"Invalid query size actual: %zu, expected: %u\", len,\n              meta_.element_size());\n    return kInvalidID;\n  }\n\n  thread_local IndexSearcher::Context::Pointer context(\n      searcher_->create_context());\n  context->set_topk(1);\n\n  IndexQueryMeta qmeta(meta_.data_type(), meta_.dimension());\n  int ret = searcher_->search_impl(query, qmeta, context);\n  if (ret != 0 || context->result().empty()) {\n    LOG_ERROR(\"Failed to search nearest centroid, with ret %d\", ret);\n    return kInvalidID;\n  }\n\n  return static_cast<uint32_t>(context->result()[0].key());\n}\n\nuint32_t IVFCentroidIndex::transform_and_search_nearest_centroid(\n    const void *record, const IndexQueryMeta &rmeta,\n    IndexSearcher::Context::Pointer &ctx) const {\n  int ret = 0;\n  if (reformer_) {\n    std::string buffer;\n    IndexQueryMeta ometa;\n    ret = reformer_->convert(record, rmeta, &buffer, &ometa);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to transform querys by reformer\");\n      return kInvalidID;\n    }\n    ret = searcher_->search_impl(buffer.data(), ometa, ctx);\n  } else {\n    ret = searcher_->search_impl(record, rmeta, ctx);\n  }\n  if (ret != 0 || ctx->result().empty()) {\n    LOG_ERROR(\"Failed to search in centroid index for %s\",\n              IndexError::What(ret));\n    return kInvalidID;\n  }\n\n  return static_cast<uint32_t>(ctx->result()[0].key());\n}\n\nIndexHolder::Pointer IVFCentroidIndex::quantize_holder(\n    const IndexHolder::Pointer &holder) {\n  auto input = holder;\n  if (meta_.reformer_name() == kMipsReformerName &&\n      meta_.metric_name() == kL2MetricName &&\n      (quantizer_class_ == kInt8QuantizerName ||\n       quantizer_class_ == kInt4QuantizerName)) {\n    //! Reverse for Mips if do convert by integer quantizer\n    auto reverse = IndexFactory::CreateConverter(kMipsRevConverterName);\n    if (!reverse) {\n      LOG_ERROR(\"Failed to create converter %s\", kMipsRevConverterName);\n      return nullptr;\n    }\n    ailego::Params params;\n    auto p = meta_.reformer_params();\n    params.set(MIPS_REVERSE_CONVERTER_M_VALUE,\n               p.get_as_uint32(MIPS_REFORMER_M_VALUE));\n    params.set(MIPS_REVERSE_CONVERTER_U_VALUE,\n               p.get_as_float(MIPS_REFORMER_U_VALUE));\n    params.set(MIPS_REVERSE_CONVERTER_L2_NORM,\n               p.get_as_uint32(MIPS_REFORMER_L2_NORM));\n    params.set(MIPS_REVERSE_CONVERTER_FORCED_SINGLE_FLOAT,\n               p.get_as_float(MIPS_REFORMER_FORCED_HALF_FLOAT));\n    int ret = reverse->init(meta_, params);\n    if (ret != 0) {\n      LOG_ERROR(\"Fail to init converter %s\", kMipsRevConverterName);\n      return nullptr;\n    }\n    ret = IndexConverter::TrainAndTransform(reverse, holder);\n    if (ret != 0) {\n      LOG_ERROR(\"Fail to transform converter %s\", kMipsRevConverterName);\n      return nullptr;\n    }\n    input = reverse->result();\n    meta_ = reverse->meta();\n    meta_.set_metric(kIPMetricName, 0, ailego::Params());\n    meta_.set_reformer(\"\", 0, ailego::Params());\n  }\n\n  auto converter = IndexFactory::CreateConverter(quantizer_class_);\n  if (!converter) {\n    LOG_ERROR(\"Failed to create converter %s\", quantizer_class_.c_str());\n    return nullptr;\n  }\n  int ret = converter->init(meta_, quantizer_params_);\n  if (ret != 0) {\n    LOG_ERROR(\"Fail to init converter %s\", quantizer_class_.c_str());\n    return nullptr;\n  }\n\n  ret = IndexConverter::TrainAndTransform(converter, input);\n  if (ret != 0) {\n    LOG_ERROR(\"Fail to tranform converter %s\", quantizer_class_.c_str());\n    return nullptr;\n  }\n\n  meta_ = converter->meta();\n  return converter->result();\n}\n\nint IVFCentroidIndex::build_index(\n    const IndexCluster::CentroidList &centroid_list,\n    const IndexDumper::Pointer &dumper) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(builder_class_);\n  if (!builder) {\n    LOG_ERROR(\"Failed to create builder %s\", builder_class_.c_str());\n    return IndexError_NoExist;\n  }\n\n  IndexHolder::Pointer holder =\n      std::make_shared<CentroidsIndexHolder>(meta_, centroid_list);\n  if (!holder) {\n    return IndexError_NoMemory;\n  }\n  if (holder->count() == 0) {\n    LOG_ERROR(\"No centroids to build\");\n    return IndexError_InvalidArgument;\n  }\n  centroids_count_ = holder->count();\n\n  //! Set default params if not given\n  auto count = std::to_string(\n      static_cast<size_t>(std::ceil(std::sqrt(centroids_count_ / 10.0))));\n  // if (IsHcBuilder(builder_class_) &&\n  //     !builder_params_.has(hc::PARAM_HC_BUILDER_CENTROID_COUNT)) {\n  //   builder_params_.set(hc::PARAM_HC_BUILDER_CENTROID_COUNT, count);\n  // } else if (builder_class_ == \"GcBuilder\" &&\n  //            !builder_params_.has(hc::PARAM_GC_BUILDER_CENTROID_COUNT)) {\n  //   builder_params_.set(hc::PARAM_GC_BUILDER_CENTROID_COUNT, count);\n  // }\n  if (!quantizer_class_.empty()) {\n    holder = this->quantize_holder(holder);\n    if (!holder) {\n      return IndexError_Runtime;\n    }\n  }\n\n  const auto name = builder_class_.c_str();\n  int ret = builder->init(meta_, builder_params_);\n  ivf_check_with_msg(ret, \"%s init failed, ret=%d\", name, ret);\n\n  // if (IsHcBuilder(builder_class_) && quantizer_class_.empty()) {\n  //   auto trainer = this->prepare_trainer(centroid_list);\n  //   ret = trainer ? builder->train(trainer) : builder->train(holder);\n  // } else {\n  //   ret = builder->train(holder);\n  // }\n\n  ret = builder->train(holder);\n  ivf_check_with_msg(ret, \"%s train failed, ret=%d\", name, ret);\n\n  ret = builder->build(holder);\n  ivf_check_with_msg(ret, \"%s build failed, ret=%d\", name, ret);\n\n  ret = builder->dump(dumper);\n  ivf_check_with_msg(ret, \"%s dump failed, ret=%d\", name, ret);\n\n  ret = dumper->close();\n  ivf_check_error_code(ret);\n\n  return 0;\n}\n\nint IVFCentroidIndex::build(const IndexCluster::CentroidList &centroid_list) {\n  index_building_ = true;\n  //! Build and dump the index\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  if (!dumper) {\n    LOG_ERROR(\"Failed to create MemoryDumper\");\n    return IndexError_NoExist;\n  }\n  path_ = IVFUtility::GenerateRandomPath(kTempralPathPrefix);\n  int ret = dumper->create(path_);\n  if (ret != 0) {\n    LOG_ERROR(\"IndexDumper create path %s failed\", path_.c_str());\n    return ret;\n  }\n  ret = this->build_index(centroid_list, dumper);\n  ivf_check_error_code(ret);\n\n  auto rope = IndexMemory::Instance()->open(path_);\n  if (!rope) {\n    LOG_ERROR(\"Open memory path %s failed.\", path_.c_str());\n    return ret;\n  }\n  if (rope->count() != 1) {\n    LOG_ERROR(\"Graph Rope block count not equal with 1.\");\n    return ret;\n  }\n  (*rope)[0].read(0, &data_, 0);\n  size_ = (*rope)[0].size();\n\n  //! Load the index\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MemoryReadStorage\");\n  if (!container) {\n    LOG_ERROR(\"Failed to create MemoryReadStorage\");\n    return IndexError_NoExist;\n  }\n  ret = container->init(ailego::Params());\n  ivf_check_with_msg(ret, \"Failed to initialize MemoryReadStorage for %s\",\n                     IndexError::What(ret));\n  ret = container->open(path_, false);\n  ivf_check_with_msg(ret, \"Failed to load path in MemoryReadStorage for %s\",\n                     IndexError::What(ret));\n\n  ailego::Params searcher_params;\n  if (!searcher_class_.empty()) {\n    searcher_params.set(PARAM_IVF_SEARCHER_OPTIMIZER, searcher_class_);\n  }\n  if (!searcher_params_.empty()) {\n    searcher_params.set(PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS, searcher_params_);\n  }\n  ret = this->load(container, searcher_params);\n  ivf_check_with_msg(ret, \"IVFCentroidIndex load failed with %s\",\n                     IndexError::What(ret));\n\n  return 0;\n}\n\nint IVFCentroidIndex::load(const IndexStorage::Pointer &container,\n                           const ailego::Params params) {\n  if (!container) {\n    LOG_ERROR(\"Invalid container\");\n    return IndexError_InvalidArgument;\n  }\n\n  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from container\");\n    return ret;\n  }\n\n  auto reformer_name = meta_.reformer_name();\n  if (!reformer_name.empty()) {\n    LOG_DEBUG(\"Load CentroidIndex with reformer %s, metric %s\",\n              reformer_name.c_str(), meta_.metric_name().c_str());\n    if ((reformer_name == kInt8ReformerName ||\n         reformer_name == kInt4ReformerName) &&\n        meta_.metric_name() == kIPMetricName) {\n      if (reformer_name == kInt8ReformerName) {\n        reformer_ = std::make_shared<Int8QuantizerReformer4IP>();\n      } else {\n        reformer_ = std::make_shared<Int4QuantizerReformer4IP>();\n      }\n      if (!reformer_) {\n        return IndexError_NoMemory;\n      }\n    } else {\n      reformer_ = IndexFactory::CreateReformer(reformer_name);\n      if (!reformer_) {\n        LOG_ERROR(\"Failed to create reformer %s\", reformer_name.c_str());\n        return IndexError_NoExist;\n      }\n    }\n    ret = reformer_->init(meta_.reformer_params());\n    ivf_check_with_msg(ret, \"Failed to initialize reformer %s\",\n                       reformer_name.c_str());\n  }\n\n  searcher_class_ = meta_.searcher_name();\n  params.get(PARAM_IVF_SEARCHER_OPTIMIZER, &searcher_class_);\n  params.get(PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS, &searcher_params_);\n  searcher_ = IndexFactory::CreateSearcher(searcher_class_);\n  if (!searcher_) {\n    LOG_ERROR(\"Failed to create searcher %s\", searcher_class_.c_str());\n    return IndexError_Runtime;\n  }\n\n  auto searcher_params = meta_.searcher_params();\n  searcher_params.merge(searcher_params_);\n  ret = searcher_->init(searcher_params);\n  ivf_check_with_msg(ret, \"Failed to initialize searcher %s\",\n                     searcher_class_.c_str());\n\n  IndexMetric::Pointer metric;\n  if (index_building_) {\n    // The searcher index metric should specified in building process,\n    // otherwise the query_metric will be used in searching\n    metric = IndexFactory::CreateMetric(meta_.metric_name());\n    ivf_assert_with_msg(metric, IndexError_NoExist,\n                        \"Failed to create metric %s\",\n                        meta_.metric_name().c_str());\n    ret = metric->init(meta_, meta_.metric_params());\n    ivf_check_with_msg(ret, \"Failed to initialize metric\");\n  }\n  ret = searcher_->load(container, metric);\n  ivf_check_with_msg(ret, \"Failed to load searcher %s\",\n                     searcher_class_.c_str());\n\n  return 0;\n}\n\nIndexTrainer::Pointer IVFCentroidIndex::prepare_trainer(\n    const IndexCluster::CentroidList &centroid_list) {\n  IndexCluster::CentroidList level1_centroids;\n  bool two_level = false;\n  for (auto &it : centroid_list) {\n    auto centroid = it;\n    if (!centroid.subitems().empty()) {\n      two_level = true;\n    }\n    centroid.mutable_subitems()->clear();\n    centroid.mutable_similars()->clear();\n    level1_centroids.emplace_back(centroid);\n  }\n  if (!two_level) {\n    return IndexTrainer::Pointer();\n  }\n\n  IndexBundle::Pointer bundle;\n  IndexCluster::Serialize(meta_, level1_centroids, &bundle);\n  return std::make_shared<FakeClusterTrainer>(meta_, bundle);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_centroid_index.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"ivf_params.h\"\n#include \"ivf_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IVF Centroid Index\n */\nclass IVFCentroidIndex {\n public:\n  typedef std::shared_ptr<IVFCentroidIndex> Pointer;\n\n  //! Constructor\n  IVFCentroidIndex(void) {}\n\n  //! Destructor\n  ~IVFCentroidIndex(void) {\n    IndexMemory *instance = IndexMemory::Instance();\n    if (instance) {\n      if (instance->has(path_)) {\n        instance->remove(path_);\n      }\n    }\n  }\n\n  //! Initialize\n  int init(const IndexMeta &meta, const ailego::Params &params);\n\n  //! Set Quantizer for the index\n  void set_quantizer(const std::string &quantizer_name,\n                     ailego::Params &quantizer_params) {\n    quantizer_class_ = quantizer_name;\n    quantizer_params_ = quantizer_params;\n  }\n\n  //! Retrieve data address of the index\n  const void *data(void) const {\n    return data_;\n  }\n\n  //! Retrieve size of the index\n  size_t size(void) const {\n    return size_;\n  }\n\n  //! Create searcher context for centroid index\n  IndexSearcher::Context::Pointer create_context() const {\n    return searcher_ ? searcher_->create_context() : nullptr;\n  }\n\n  //! Similarity search\n  int search(const void *query, const IndexQueryMeta &qmeta, size_t count,\n             IndexSearcher::Context::Pointer &ctx);\n\n  //! Search the nearest point, must be called in local thread pool\n  uint32_t search_nearest_centroid(const void *query, size_t len);\n\n  //! Transform Data and Search the nearest point, called while adding record\n  uint32_t transform_and_search_nearest_centroid(\n      const void *record, const IndexQueryMeta &rmeta,\n      IndexSearcher::Context::Pointer &ctx) const;\n\n  //! Build Centroid Index From Centroid List\n  int build(const IndexCluster::CentroidList &centroid_list);\n\n  //! Load Centroid Index From container\n  int load(const IndexStorage::Pointer &container, const ailego::Params params);\n\n  //! Retrieve centroid count of the index\n  size_t centroids_count(void) const {\n    return centroids_count_;\n  }\n\n  //! Retrieve meta\n  const IndexMeta &meta() const {\n    return meta_;\n  }\n\n  //! Retrieve reformer of the index\n  const IndexReformer::Pointer reformer(void) const {\n    return reformer_;\n  }\n\n  static constexpr uint32_t kInvalidID = std::numeric_limits<uint32_t>::max();\n\n private:\n  /*! Centroids IndexHolder\n   */\n  class CentroidsIndexHolder : public IndexHolder {\n   public:\n    class Iterator : public IndexHolder::Iterator {\n     public:\n      //! Index Holder Iterator Pointer\n      typedef std::unique_ptr<Iterator> Pointer;\n\n      //! Constructor\n      Iterator(std::vector<const void *> *features) : features_(features) {}\n\n      //! Destructor\n      virtual ~Iterator(void) {}\n\n      //! Retrieve pointer of data\n      virtual const void *data(void) const override {\n        return (*features_)[id_];\n      }\n\n      //! Test if the iterator is valid\n      virtual bool is_valid(void) const override {\n        return id_ < features_->size();\n      }\n\n      //! Retrieve primary key\n      virtual uint64_t key(void) const override {\n        return id_;\n      }\n\n      //! Next iterator\n      virtual void next(void) override {\n        ++id_;\n      }\n\n     private:\n      //! Members\n      std::vector<const void *> *features_{nullptr};\n      uint32_t id_{0};\n    };\n\n    //! Constructor\n    CentroidsIndexHolder(const IndexMeta &meta,\n                         const IndexCluster::CentroidList &centroid_list)\n        : dimension_(meta.dimension()),\n          element_size_(meta.element_size()),\n          data_type_(meta.data_type()) {\n      using CentroidList = IndexCluster::CentroidList;\n\n      std::function<void(const CentroidList &)> get_leaf_features =\n          [&](const CentroidList &cents) {\n            if (cents.empty()) {\n              return;\n            }\n            for (const auto &it : cents) {\n              if (it.subitems().empty()) {\n                features_.emplace_back(it.feature());\n              } else {\n                get_leaf_features(it.subitems());\n              }\n            }\n          };\n\n      get_leaf_features(centroid_list);\n    }\n\n    //! Retrieve count of elements in holder (-1 indicates unknown)\n    virtual size_t count(void) const override {\n      return features_.size();\n    }\n\n    //! Retrieve dimension\n    virtual size_t dimension(void) const override {\n      return dimension_;\n    }\n\n    //! Retrieve type information\n    virtual IndexMeta::DataType data_type(void) const override {\n      return data_type_;\n    }\n\n    //! Retrieve element size in bytes\n    virtual size_t element_size(void) const override {\n      return element_size_;\n    }\n\n    //! Retrieve if it can multi-pass\n    virtual bool multipass(void) const override {\n      return true;\n    }\n\n    //! Create a new iterator\n    virtual IndexHolder::Iterator::Pointer create_iterator(void) override {\n      return IndexHolder::Iterator::Pointer(\n          new CentroidsIndexHolder::Iterator(&features_));\n    }\n\n   private:\n    //! Members\n    std::vector<const void *> features_{};\n    size_t dimension_{0};\n    size_t element_size_{0};\n    IndexMeta::DataType data_type_{IndexMeta::DataType::DT_UNDEFINED};\n  };\n\n  int build_index(const IndexCluster::CentroidList &centroid_list,\n                  const IndexDumper::Pointer &dumper);\n\n  //! Prepare trainer for clustering index\n  IndexTrainer::Pointer prepare_trainer(\n      const IndexCluster::CentroidList &centroid_list);\n\n  //! Quantize the centroid vectors in holder\n  IndexHolder::Pointer quantize_holder(const IndexHolder::Pointer &holder);\n\n\n private:\n  //! Constants\n  constexpr static const char *kDefaultBuilder = \"FlatBuilder\";\n  constexpr static const char *kTempralPathPrefix = \"IVF\";\n\n  //! Members\n  IndexMeta meta_{};\n\n  IndexSearcher::Pointer searcher_{};\n  IndexReformer::Pointer reformer_{};\n  std::string builder_class_{kDefaultBuilder};\n  std::string searcher_class_{};\n  std::string quantizer_class_{};\n\n  std::string path_{};\n\n  ailego::Params builder_params_{};\n  ailego::Params searcher_params_{};\n  ailego::Params quantizer_params_{};\n\n  const void *data_{};\n  size_t size_{};\n  size_t centroids_count_{0};\n  bool index_building_{false};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_distance_calculator.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_distance_calculator.h\"\n#include <iostream>\n\nnamespace zvec {\nnamespace core {\n\nIVFDistanceCalculator::IVFDistanceCalculator(const IndexMeta &meta,\n                                             const IndexMetric::Pointer &metric,\n                                             uint32_t block_vec_cnt)\n    : metric_ptr_(metric), block_vec_cnt_(block_vec_cnt) {\n  row_distance_ = metric->distance();\n  distanceXx1_ = metric->distance_matrix(block_vec_cnt, 1);\n  distances_.resize(33);\n  for (size_t b = 32; b != 0; b /= 2) {\n    distances_[b] = metric->distance_matrix(block_vec_cnt, b);\n  }\n  element_size_ = meta.element_size();\n  dimension_ = meta.dimension();\n  if (meta.major_order() == IndexMeta::MajorOrder::MO_COLUMN) {\n    column_major_order_ = true;\n  } else {\n    column_major_order_ = false;\n  }\n}\n\nIVFDistanceCalculator::~IVFDistanceCalculator() {\n  row_distance_ = nullptr;\n  distanceXx1_ = nullptr;\n  distances_.clear();\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_distance_calculator.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_framework.h>\n\nnamespace zvec {\nnamespace core {\n\nclass IVFDistanceCalculator {\n public:\n  typedef std::shared_ptr<IVFDistanceCalculator> Pointer;\n\n  //! Constructor\n  IVFDistanceCalculator(const IndexMeta &meta,\n                        const IndexMetric::Pointer &metric,\n                        uint32_t block_vec_cnt);\n\n  virtual ~IVFDistanceCalculator();\n\n public:\n  inline void query_centroids_distance(const void *query, size_t qnum,\n                                       const void *feature, size_t fnum,\n                                       float *distances);\n\n  inline void query_centroids_distance(const void *query, const void *feature,\n                                       size_t fnum, float *distances);\n\n  inline void query_features_distance(const void *query, const void *feature,\n                                      size_t fnum, float *distances);\n\n  inline void query_features_distance(const void *query, const void *feature,\n                                      bool column_major, size_t fnum,\n                                      float *distances);\n\n protected:\n  //! Row Major Distances -> Online\n  inline void row_major_distance(const void *query, size_t qnum,\n                                 const void *feature, size_t fnum, float *out);\n\n  inline void row_major_distance(const void *query, const void *feature,\n                                 size_t fnum, float *out);\n\n  template <size_t Q>\n  inline void batch_query_centroids_distance(const void *query,\n                                             const void *feature, size_t fnum,\n                                             float *distances);\n\n protected:\n  IndexMetric::Pointer metric_ptr_{};\n  IndexMetric::MatrixDistance row_distance_{nullptr};\n  IndexMetric::MatrixDistance distanceXx1_{nullptr};\n  std::vector<IndexMetric::MatrixDistance> distances_{};\n\n  size_t element_size_{0};\n  size_t dimension_{0};\n  uint32_t block_vec_cnt_{0};\n  bool column_major_order_{false};\n};\n\nvoid IVFDistanceCalculator::query_centroids_distance(const void *query,\n                                                     size_t qnum,\n                                                     const void *feature,\n                                                     size_t fnum,\n                                                     float *distances) {\n  if (column_major_order_) {\n    switch (qnum) {\n      case 1:\n        batch_query_centroids_distance<1>(query, feature, fnum, distances);\n        break;\n      case 16:\n        batch_query_centroids_distance<16>(query, feature, fnum, distances);\n        break;\n      case 8:\n        batch_query_centroids_distance<8>(query, feature, fnum, distances);\n        break;\n      case 4:\n        batch_query_centroids_distance<4>(query, feature, fnum, distances);\n        break;\n      case 2:\n        batch_query_centroids_distance<2>(query, feature, fnum, distances);\n        break;\n      case 32:\n        batch_query_centroids_distance<32>(query, feature, fnum, distances);\n        break;\n      default:\n        LOG_ERROR(\"Unsupported query num %zu.\", qnum);\n        break;\n    }\n  } else {\n    const uint8_t *cur_query = reinterpret_cast<const uint8_t *>(query);\n    for (size_t q = 0; q < qnum; ++q) {\n      this->row_major_distance(cur_query, feature, fnum, distances);\n      cur_query += element_size_;\n      distances += block_vec_cnt_;\n    }\n  }\n}\n\nvoid IVFDistanceCalculator::query_centroids_distance(const void *query,\n                                                     const void *feature,\n                                                     size_t fnum,\n                                                     float *distances) {\n  this->query_features_distance(query, feature, fnum, distances);\n}\n\nvoid IVFDistanceCalculator::query_features_distance(const void *query,\n                                                    const void *feature,\n                                                    size_t fnum,\n                                                    float *distances) {\n  if (column_major_order_) {\n    if (fnum == block_vec_cnt_) {\n      distanceXx1_(feature, query, dimension_, distances);\n    } else {\n      this->row_major_distance(query, feature, fnum, distances);\n    }\n  } else {\n    this->row_major_distance(query, feature, fnum, distances);\n  }\n}\n\nvoid IVFDistanceCalculator::query_features_distance(const void *query,\n                                                    const void *feature,\n                                                    bool column_major,\n                                                    size_t fnum,\n                                                    float *distances) {\n  if (column_major) {\n    ailego_assert_with(fnum == block_vec_cnt_, \"Invalid Block\");\n    distanceXx1_(feature, query, dimension_, distances);\n  } else {\n    this->row_major_distance(query, feature, fnum, distances);\n  }\n}\n\ntemplate <size_t Q>\nvoid IVFDistanceCalculator::batch_query_centroids_distance(const void *query,\n                                                           const void *feature,\n                                                           size_t fnum,\n                                                           float *distances) {\n  if (fnum == block_vec_cnt_) {\n    distances_[Q](feature, query, dimension_, distances);\n  } else {\n    row_major_distance(query, Q, feature, fnum, distances);\n  }\n}\n\nvoid IVFDistanceCalculator::row_major_distance(const void *query, size_t qnum,\n                                               const void *feature, size_t fnum,\n                                               float *out) {\n  const uint8_t *cur_query = reinterpret_cast<const uint8_t *>(query);\n  for (size_t q = 0; q < qnum; ++q) {\n    const uint8_t *tmp_feature = reinterpret_cast<const uint8_t *>(feature);\n    float *cur_out = out + q * fnum;\n    for (size_t f = 0; f < fnum; ++f) {\n      row_distance_(cur_query, tmp_feature, dimension_, cur_out + f);\n      tmp_feature += element_size_;\n    }\n    cur_query += element_size_;\n  }\n}\n\nvoid IVFDistanceCalculator::row_major_distance(const void *query,\n                                               const void *feature, size_t fnum,\n                                               float *out) {\n  const uint8_t *cur_feature = reinterpret_cast<const uint8_t *>(feature);\n  for (size_t f = 0; f < fnum; ++f) {\n    row_distance_(query, cur_feature, dimension_, out + f);\n    cur_feature += element_size_;\n  }\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_dumper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_dumper.h\"\n\nnamespace zvec {\nnamespace core {\n\nint IVFDumper::dump_inverted_vector(uint32_t inverted_list_id, uint64_t key,\n                                    const void *vec) {\n  int ret = this->check_dump_inverted_list(inverted_list_id);\n  ivf_check_error_code(ret);\n\n  ++inverted_lists_meta_[cur_list_id_].vector_count;\n  ++header_.total_vector_count;\n  block_.emplace(key, vec, IndexMeta::MajorOrder::MO_ROW);\n  if (block_.full()) {\n    ret = this->dump_block();\n    ivf_check_error_code(ret);\n  }\n  return 0;\n}\n\nint IVFDumper::dump_inverted_block(uint32_t inverted_list_id,\n                                   const uint64_t *keys, const void *vecs,\n                                   uint32_t vector_count, bool column_major) {\n  int ret = this->check_dump_inverted_list(inverted_list_id);\n  ivf_check_error_code(ret);\n\n  if (block_.match_order(column_major ? IndexMeta::MajorOrder::MO_COLUMN\n                                      : IndexMeta::MajorOrder::MO_ROW) &&\n      vector_count == block_.capacity()) {\n    // Dump the block directly\n    size_t size = vector_count * meta_.element_size();\n    size_t pd_size = ailego_align(size, 32) - size;\n    if (dumper_->write(vecs, size) != size) {\n      LOG_ERROR(\"Failed to write data into dumper %s\", dumper_->name().c_str());\n      return IndexError_WriteData;\n    }\n    if (pd_size > 0) {\n      std::string padding(pd_size, '\\0');\n      if (dumper_->write(padding.data(), pd_size) != pd_size) {\n        return IndexError_WriteData;\n      }\n    }\n    std::copy(keys, keys + vector_count, std::back_inserter(keys_));\n    ++inverted_lists_meta_[cur_list_id_].block_count;\n    ++header_.block_count;\n    header_.inverted_body_size += size;\n  } else {\n    size_t step_size = meta_.element_size();\n    if (column_major) {\n      step_size = IndexMeta::AlignSizeof(meta_.data_type());\n    }\n    for (size_t i = 0; i < vector_count; ++i) {\n      auto v = reinterpret_cast<const char *>(vecs) + i * step_size;\n      block_.emplace(keys[i], v,\n                     column_major ? IndexMeta::MajorOrder::MO_COLUMN\n                                  : IndexMeta::MajorOrder::MO_ROW);\n      if (block_.full()) {\n        ret = this->dump_block();\n        ivf_check_error_code(ret);\n      }\n    }\n  }\n\n  inverted_lists_meta_[cur_list_id_].vector_count += vector_count;\n  header_.total_vector_count += vector_count;\n\n  return 0;\n}\n\nint IVFDumper::dump_container_segment(const IndexStorage::Pointer &container,\n                                      const std::string &segmemt_id) {\n  auto seg = container->get(segmemt_id, 2);\n  if (!seg) {\n    LOG_ERROR(\"Failed to fetch segment %s from %s\", segmemt_id.c_str(),\n              container->name().c_str());\n    return IndexError_InvalidFormat;\n  }\n\n  const size_t batch_size = 32 * 1024;\n  const size_t total_size = seg->data_size() + seg->padding_size();\n  size_t off = 0;\n  while (off < total_size) {\n    const void *data = nullptr;\n    size_t rd_size = std::min(batch_size, total_size - off);\n    if (seg->read(off, &data, rd_size) != rd_size) {\n      LOG_ERROR(\"Failed to read data, off=%zu size=%zu\", off, rd_size);\n      return IndexError_ReadData;\n    }\n    if (dumper_->write(data, rd_size) != rd_size) {\n      LOG_ERROR(\"Failed to write data, size=%zu\", rd_size);\n      return IndexError_WriteData;\n    }\n    off += rd_size;\n  }\n\n  int ret = dumper_->append(segmemt_id, seg->data_size(), seg->padding_size(),\n                            seg->data_crc());\n  ivf_check_with_msg(ret, \"Failed to append %s\", segmemt_id.c_str());\n\n  dumped_size_ += total_size;\n\n  return 0;\n}\n\nint IVFDumper::dump_inverted_vector_finished(void) {\n  //! Dump Inverted Index Segment\n  if (!block_.empty()) {\n    int ret = this->dump_block();\n    ivf_check_error_code(ret);\n  }\n  header_.block_size = block_.block_size();\n  size_t segment_size = header_.inverted_body_size;\n  int ret = dumper_->append(IVF_INVERTED_BODY_SEG_ID, segment_size, 0, 0);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to append to segment %s, ret=%d\",\n              IVF_INVERTED_BODY_SEG_ID.c_str(), ret);\n    return ret;\n  }\n  dumped_size_ += segment_size;\n\n  //! Dump Inverted Index Header Segment\n  std::string str;\n  meta_.serialize(&str);\n  header_.header_size = sizeof(header_) + str.size();\n  header_.index_meta_size = str.size();\n  header_.inverted_list_count = inverted_lists_meta_.size();\n  if (dumper_->write(&header_, sizeof(header_)) != sizeof(header_)) {\n    LOG_ERROR(\"Failed to write data, size %zu\", sizeof(header_));\n    return IndexError_WriteData;\n  }\n  if (dumper_->write(str.data(), str.size()) != str.size()) {\n    LOG_ERROR(\"Failed to write data, size %zu\", str.size());\n    return IndexError_WriteData;\n  }\n  size_t padding_size = 0;\n  ret = this->dump_padding(header_.header_size, &padding_size);\n  ivf_check_error_code(ret);\n  ret = dumper_->append(IVF_INVERTED_HEADER_SEG_ID, header_.header_size,\n                        padding_size, 0);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to append to segment %s, ret:%d\",\n              IVF_INVERTED_HEADER_SEG_ID.c_str(), ret);\n    return ret;\n  }\n  dumped_size_ += header_.header_size + padding_size;\n\n  LOG_DEBUG(\n      \"Dump header info: blocks=%u block_size=%u block_vec_count=%u \"\n      \"inverted_list_count=%u total_vecs=%u inverted_size=%zu\",\n      header_.block_count, header_.block_size, header_.block_vector_count,\n      header_.inverted_list_count, header_.total_vector_count,\n      static_cast<size_t>(header_.inverted_body_size));\n\n  //! Dump Inverted Lists Meta Segment\n  segment_size = inverted_lists_meta_.size() * sizeof(InvertedListMeta);\n  ret = this->dump_segment(IVF_INVERTED_META_SEG_ID,\n                           inverted_lists_meta_.data(), segment_size);\n  ivf_check_error_code(ret);\n\n  //! Dump Keys Segment\n  ret = this->dump_segment(IVF_KEYS_SEG_ID, keys_.data(),\n                           keys_.size() * sizeof(keys_[0]));\n  ivf_check_error_code(ret);\n\n  //! Dump Mapping Segment\n  auto mapping = std::make_shared<std::vector<uint32_t>>();\n  IVFUtility::Sort(keys_.data(), mapping.get(), keys_.size());\n  ret = this->dump_segment(IVF_MAPPING_SEG_ID, mapping->data(),\n                           mapping->size() * sizeof(uint32_t));\n  ivf_check_error_code(ret);\n  mapping.reset();\n\n  //! Dump the Offsets Segment\n  return this->dump_offsets_segment();\n}\n\nint IVFDumper::dump_centroid_index(const void *data, size_t size) {\n  int ret = this->dump_segment(IVF_CENTROID_SEG_ID, data, size);\n  ivf_check_error_code(ret);\n\n  return 0;\n}\n\nint IVFDumper::dump_quantizer_params(\n    const std::vector<IndexConverter::Pointer> &quantizers) {\n  if (meta_.reformer_name() != kInt8ReformerName &&\n      meta_.reformer_name() != kInt4ReformerName) {\n    // IntegerQuantizer params is support only\n    return 0;\n  }\n  if (quantizers.size() == 1) {\n    //! Donot dump, using reformer params in IndexMeta\n    return 0;\n  }\n\n  if (quantizers.size() != header_.inverted_list_count) {\n    LOG_ERROR(\"Mismatch size, quantizers=%zu, inverted_list_count=%u\",\n              quantizers.size(), header_.inverted_list_count);\n    return IndexError_Logic;\n  }\n  bool int8_quantizer = meta_.reformer_name() == kInt8ReformerName;\n  std::vector<InvertedIntegerQuantizerParams> params;\n  params.resize(header_.inverted_list_count);\n  for (size_t i = 0; i < quantizers.size(); ++i) {\n    auto &p = quantizers[i]->meta().reformer_params();\n    auto &scale_key = int8_quantizer ? INT8_QUANTIZER_REFORMER_SCALE\n                                     : INT4_QUANTIZER_REFORMER_SCALE;\n    auto &bias_key = int8_quantizer ? INT8_QUANTIZER_REFORMER_BIAS\n                                    : INT4_QUANTIZER_REFORMER_BIAS;\n    if (inverted_lists_meta_[i].vector_count > 0 &&\n        (!p.has(scale_key) || !p.has(bias_key))) {\n      LOG_ERROR(\"Miss reformer params %s or %s\", bias_key.c_str(),\n                scale_key.c_str());\n      return IndexError_Logic;\n    }\n\n    params[i].bias = p.get_as_float(bias_key);\n    params[i].scale = p.get_as_float(scale_key);\n  }\n\n  return this->dump_segment(\n      int8_quantizer ? IVF_INT8_QUANTIZED_PARAMS_SEG_ID\n                     : IVF_INT4_QUANTIZED_PARAMS_SEG_ID,\n      params.data(), params.size() * sizeof(InvertedIntegerQuantizerParams));\n}\n\nint IVFDumper::dump_original_vector(const void *data, size_t size) {\n  if (dumped_feature_count_ >= header_.total_vector_count) {\n    LOG_ERROR(\"Dump too much orignal features, expect=%u\",\n              header_.total_vector_count);\n    return IndexError_Logic;\n  }\n\n  if (dumper_->write(data, size) != size) {\n    LOG_ERROR(\"Dumper write features failed\");\n    return IndexError_WriteData;\n  }\n  dumped_features_size_ += size;\n  ++dumped_feature_count_;\n  if (dumped_feature_count_ == header_.total_vector_count) {\n    //! Dump features finished, dump the meta\n    size_t padding_size = 0;\n    int ret = this->dump_padding(size, &padding_size);\n    ivf_check_error_code(ret);\n\n    ret = dumper_->append(IVF_FEATURES_SEG_ID, dumped_features_size_,\n                          padding_size, 0);\n    if (ret != 0) {\n      LOG_ERROR(\"Dumper append segment %s failed, ret:%d\",\n                IVF_FEATURES_SEG_ID.c_str(), ret);\n      return ret;\n    }\n    dumped_size_ += dumped_features_size_;\n  }\n\n  return 0;\n}\n\nint IVFDumper::check_dump_inverted_list(uint32_t inverted_list_id) {\n  if (inverted_list_id < cur_list_id_) {\n    LOG_ERROR(\"Invalid backward vector dumping, want=%u cur=%u\",\n              inverted_list_id, cur_list_id_);\n    return IndexError_Logic;\n  }\n  if (inverted_list_id >= inverted_lists_meta_.size()) {\n    LOG_ERROR(\"Invalid inverted_list_id=%u, lists_size=%zu\", inverted_list_id,\n              inverted_lists_meta_.size());\n    return IndexError_Logic;\n  }\n  if (inverted_list_id != cur_list_id_) {\n    //! flush previous inverted_list block\n    int ret = this->dump_block();\n    ivf_check_error_code(ret);\n    for (auto idx = cur_list_id_ + 1; idx <= inverted_list_id; ++idx) {\n      inverted_lists_meta_[idx].offset = header_.inverted_body_size;\n      inverted_lists_meta_[idx].id_offset = header_.total_vector_count;\n    }\n    cur_list_id_ = inverted_list_id;\n  }\n\n  return 0;\n}\n\nint IVFDumper::dump_offsets_segment(void) const {\n  bool col_pri = meta_.major_order() == IndexMeta::MajorOrder::MO_COLUMN;\n  size_t total_size = 0;\n  for (size_t i = 0; i < inverted_lists_meta_.size(); ++i) {\n    std::vector<InvertedVecLocation> offsets;\n    const auto &m = inverted_lists_meta_[i];\n    size_t vec_cnt = m.vector_count;\n    size_t idx = 0;\n    uint64_t off = m.offset;\n    size_t align_idx = vec_cnt - vec_cnt % block_vector_count_;\n    for (size_t j = 0; j < vec_cnt; ++j) {\n      if (col_pri && j < align_idx) {\n        offsets.emplace_back(off + idx * block_.align_size(), true);\n      } else {\n        offsets.emplace_back(off + idx * block_.element_size(), false);\n      }\n      ++idx;\n      if (idx == block_vector_count_) {\n        off += header_.block_size;\n        idx = 0;\n      }\n    }\n    if (idx != 0) {\n      off += (vec_cnt - align_idx) * meta_.element_size();\n    }\n\n    size_t len = offsets.size() * sizeof(offsets[0]);\n    size_t actual_len = dumper_->write(offsets.data(), len);\n    if (actual_len != len) {\n      LOG_ERROR(\"Write offsets failed expect %zu, actual: %zu.\", len,\n                actual_len);\n      return IndexError_WriteData;\n    }\n    total_size += len;\n  }\n\n  size_t padding_size = 0;\n  int ret = this->dump_padding(total_size, &padding_size);\n  ivf_check_error_code(ret);\n\n  ret = dumper_->append(IVF_OFFSETS_SEG_ID, total_size, padding_size, 0);\n  if (ret != 0) {\n    LOG_ERROR(\"Dumper append segment %s failed, ret:%d\",\n              IVF_OFFSETS_SEG_ID.c_str(), ret);\n    return ret;\n  }\n\n  dumped_size_ += total_size + padding_size;\n\n  return 0;\n}\n\nint IVFDumper::dump_segment(const std::string &segment_id, const void *data,\n                            size_t size) const {\n  size_t len = dumper_->write(data, size);\n  if (len != size) {\n    LOG_ERROR(\"Dump segment %s data failed, expect=%zu, actual=%zu\",\n              segment_id.c_str(), size, len);\n    return IndexError_WriteData;\n  }\n\n  size_t padding_size = 0;\n  int ret = this->dump_padding(size, &padding_size);\n  ivf_check_error_code(ret);\n\n  uint32_t crc = ailego::Crc32c::Hash(data, size);\n  ret = dumper_->append(segment_id, size, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s meta failed, ret=%d\", segment_id.c_str(), ret);\n    return ret;\n  }\n  dumped_size_ += size + padding_size;\n\n  return 0;\n}\n\nint IVFDumper::dump_padding(size_t data_size, size_t *padding_size) const {\n  *padding_size = IVFUtility::AlignedSize(data_size) - data_size;\n  if (*padding_size == 0) {\n    return 0;\n  }\n\n  std::string padding(*padding_size, '\\0');\n  if (dumper_->write(padding.data(), *padding_size) != *padding_size) {\n    LOG_ERROR(\"Append padding failed, size %lu\", *padding_size);\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint IVFDumper::dump_block(void) {\n  if (block_.empty()) {\n    return 0;\n  }\n\n  size_t size = ailego_align(block_.bytes(), 32);\n  if (dumper_->write(block_.data(), size) != size) {\n    LOG_ERROR(\"Failed to write data into dumper %s\", dumper_->name().c_str());\n    return IndexError_WriteData;\n  }\n  auto &keys = block_.keys();\n  std::copy(keys.begin(), keys.end(), std::back_inserter(keys_));\n  ++inverted_lists_meta_[cur_list_id_].block_count;\n  ++header_.block_count;\n  header_.inverted_body_size += size;\n  block_.clear();\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_dumper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"metric/metric_params.h\"\n#include \"ivf_index_format.h\"\n#include \"ivf_params.h\"\n#include \"ivf_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Quantized Clustering Dumper\n */\nclass IVFDumper {\n public:\n  typedef std::shared_ptr<IVFDumper> Pointer;\n\n  //! Vectors block\n  class Block {\n   public:\n    //! Initialize block\n    void init(const IndexMeta &meta, uint32_t max_vec_count) {\n      element_size_ = meta.element_size();\n      auto bsize = IVFUtility::AlignedSize(max_vec_count, element_size_);\n      data_.resize(bsize);\n      count_ = 0u;\n      major_order_ = meta.major_order();\n      align_size_ = IndexMeta::AlignSizeof(meta.data_type());\n      units_ = element_size_ / align_size_;\n      max_vec_count_ = max_vec_count;\n      keys_.reserve(max_vec_count_);\n    }\n\n    //! Add a vector to the block in row major order\n    //! If the block is full and the block order is column, make a\n    //! transpose\n    void emplace(uint64_t key, const void *vec, IndexMeta::MajorOrder order) {\n      switch (align_size_) {\n        case 2:\n          do_emplace<uint16_t>(vec, order);\n          break;\n        case 4:\n          do_emplace<uint32_t>(vec, order);\n          break;\n        case 8:\n          do_emplace<uint64_t>(vec, order);\n          break;\n        default:\n          ailego_check_with(false, \"Unsupport Aligned Size\");\n      }\n      keys_.emplace_back(key);\n    }\n\n    bool full(void) const {\n      return count_ == max_vec_count_;\n    }\n\n    const void *data(void) const {\n      return data_.data();\n    }\n\n    void clear(void) {\n      count_ = 0u;\n      keys_.clear();\n    }\n\n    bool empty(void) const {\n      return count_ == 0u;\n    }\n\n    size_t size(void) const {\n      return count_;\n    }\n\n    size_t capacity(void) const {\n      return max_vec_count_;\n    }\n\n    size_t align_size(void) const {\n      return align_size_;\n    }\n\n    size_t element_size(void) const {\n      return element_size_;\n    }\n\n    //! Retrieve block data size\n    size_t bytes(void) const {\n      return element_size_ * count_;\n    }\n\n    //! Retrieve max block size in bytes\n    size_t block_size(void) const {\n      return data_.size();\n    }\n\n    IndexMeta::MajorOrder major_order(void) const {\n      return major_order_;\n    }\n\n    const std::vector<uint64_t> &keys(void) const {\n      return keys_;\n    }\n\n    bool match_order(IndexMeta::MajorOrder column_major) const {\n      return major_order_ == column_major;\n    }\n\n   private:\n    //! Transpose the block vectors\n    void transpose() {\n      std::vector<uint8_t> buf(data_.size());\n      IVFUtility::Transpose(align_size_, data_.data(), count_, units_,\n                            buf.data());\n      data_.swap(buf);\n    }\n\n    template <typename T>\n    void do_emplace(const void *vec, IndexMeta::MajorOrder order) {\n      ailego_assert_with(count_ < max_vec_count_, \"emplace a full block\");\n\n      T *dst = reinterpret_cast<T *>(data_.data() + element_size_ * count_);\n      const T *src = reinterpret_cast<const T *>(vec);\n      size_t step = order == IndexMeta::MO_ROW ? 1 : max_vec_count_;\n      for (auto i = 0u; i < units_; ++i) {\n        *dst = *src;\n        dst++;\n        src += step;\n      }\n\n      count_++;\n      if (full() && major_order_ == IndexMeta::MO_COLUMN) {\n        transpose();\n      }\n    }\n\n   private:\n    //! Members\n    std::vector<uint8_t> data_{};\n    std::vector<uint64_t> keys_{};\n    uint32_t count_{0u};\n    uint32_t units_{0u};\n    uint32_t align_size_{0u};\n    uint32_t element_size_{0u};\n    uint32_t max_vec_count_{0u};\n    IndexMeta::MajorOrder major_order_{};\n  };\n\n  //! Constructor\n  IVFDumper(const IndexMeta &meta, const IndexDumper::Pointer &dumper,\n            size_t inverted_list_count, size_t block_vector_count)\n      : meta_(meta),\n        dumper_(dumper),\n        block_vector_count_(block_vector_count),\n        inverted_lists_meta_(inverted_list_count) {\n    block_.init(meta, block_vector_count_);\n    header_.block_vector_count = block_vector_count_;\n  }\n\n  //! Constructor\n  IVFDumper(const IndexMeta &meta, const IndexDumper::Pointer &dumper,\n            size_t inverted_list_count)\n      : IVFDumper(meta, dumper, inverted_list_count, kDefaultBlockCount) {}\n\n  //! Destructor\n  ~IVFDumper() {\n    // Check the dumper status\n    if (dumped_feature_count_ > 0 &&\n        dumped_feature_count_ != header_.total_vector_count) {\n      LOG_ERROR(\"Dumped features=%u mismatch from invertedVecs=%u\",\n                dumped_feature_count_, header_.total_vector_count);\n      ailego_assert_with(false, \"invalid status\");\n    }\n  }\n\n  //! Dump a vector in row major order\n  int dump_inverted_vector(uint32_t inverted_list_id, uint64_t key,\n                           const void *vec);\n\n  int dump_inverted_block(uint32_t inverted_list_id, const uint64_t *keys,\n                          const void *vecs, uint32_t vector_count,\n                          bool column_major);\n\n  //! Finish dump the inverted vectors\n  int dump_inverted_vector_finished(void);\n\n  //! Dump the centroids index\n  int dump_centroid_index(const void *data, size_t size);\n\n  //! Dump params for each inverted list quantizer\n  int dump_quantizer_params(\n      const std::vector<IndexConverter::Pointer> &quantizers);\n\n  //! Dump the original vector, which doesnot been quantized\n  int dump_original_vector(const void *data, size_t size);\n\n  //! Retrieve total dumped size\n  size_t dumped_size(void) const {\n    return dumped_size_;\n  }\n\n  //! Retrieve total dumped vector count\n  size_t dumped_count(void) const {\n    return header_.total_vector_count;\n  }\n\n  //! Dump the segment from container\n  int dump_container_segment(const IndexStorage::Pointer &container,\n                             const std::string &segmemt_id);\n\n private:\n  int check_dump_inverted_list(uint32_t inverted_list_id);\n\n  //! Dump offsets segment\n  int dump_offsets_segment(void) const;\n\n  //! Dump a segment\n  int dump_segment(const std::string &segment_id, const void *data,\n                   size_t size) const;\n\n  //! Dump segment padding\n  int dump_padding(size_t data_size, size_t *padding_size) const;\n\n  //! Dump a vector block\n  int dump_block(void);\n\n private:\n  //! Constants\n  static constexpr size_t kDefaultBlockCount = 32u;\n\n  //! Members\n  Block block_{};           // vectors grouped in block\n  const IndexMeta meta_{};  // IndexMeta of the inverted index\n  const IndexDumper::Pointer dumper_{};\n  size_t block_vector_count_{kDefaultBlockCount};\n  std::vector<InvertedListMeta> inverted_lists_meta_{};\n  std::vector<uint64_t> keys_{};\n  InvertedIndexHeader header_{};\n  uint32_t cur_list_id_{0};\n  uint32_t dumped_feature_count_{0};\n  size_t dumped_features_size_{0};\n  mutable size_t dumped_size_{0};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_entity.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_entity.h\"\n#include <iostream>\n#include \"ivf_utility.h\"\nnamespace zvec {\nnamespace core {\n\n//! Initialize\nint IVFEntity::IVFReformerWrapper::init(const IndexMeta &imeta) {\n  auto &name = imeta.reformer_name();\n\n  if (name.empty()) {\n    type_ = kReformerTpNone;\n    return 0;\n  }\n\n  auto reformer = IndexFactory::CreateReformer(name);\n  if (!reformer) {\n    LOG_ERROR(\"Failed to create reformer %s\", name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = reformer->init(imeta.reformer_params());\n  ivf_check_with_msg(ret, \"Failed to init reformer %s\", name.c_str());\n\n  reformer_ = std::move(reformer);\n\n  if (name == kInt8ReformerName) {\n    if (imeta.metric_name() == kIPMetricName) {\n      type_ = kReformerTpInnerProductInt8;\n      return 0;\n    }\n    auto &key = INT8_QUANTIZER_REFORMER_SCALE;\n    if (!imeta.reformer_params().has(key)) {\n      LOG_ERROR(\"Missing param %s in reformer %s\", key.c_str(), name.c_str());\n      return IndexError_InvalidArgument;\n    };\n    float scale = imeta.reformer_params().get_as_float(key);\n    reciprocal_ = scale == 0.0 ? 1.0 : (1.0 / scale);\n    type_ = kReformerTpInt8;\n  } else if (name == kInt4ReformerName) {\n    if (imeta.metric_name() == kIPMetricName) {\n      type_ = kReformerTpInnerProductInt4;\n      return 0;\n    }\n    auto &key = INT4_QUANTIZER_REFORMER_SCALE;\n    if (!imeta.reformer_params().has(key)) {\n      LOG_ERROR(\"Missing param %s in reformer %s\", key.c_str(), name.c_str());\n      return IndexError_InvalidArgument;\n    };\n    float scale = imeta.reformer_params().get_as_float(key);\n    reciprocal_ = scale == 0.0 ? 1.0 : (1.0 / scale);\n    type_ = kReformerTpInt4;\n  } else {\n    type_ = kReformerTpDefault;\n  }\n\n  LOG_DEBUG(\"Init QcReformer with %s, type=%u\", name.c_str(), type_);\n\n  return 0;\n}\n\n//! Update the params, Called by gpu searcher only\nint IVFEntity::IVFReformerWrapper::update(const IndexMeta &meta) {\n  auto &name = meta.reformer_name();\n  if (name == kInt4ReformerName && meta.metric_name() == kL2MetricName) {\n    auto &key = INT4_QUANTIZER_REFORMER_SCALE;\n    if (!meta.reformer_params().has(key)) {\n      LOG_ERROR(\"Missing param %s in reformer %s\", key.c_str(), name.c_str());\n      return IndexError_InvalidArgument;\n    };\n    float scale = meta.reformer_params().get_as_float(key);\n    reciprocal_ = scale == 0.0 ? 1.0 : (1.0 / scale / kNormalizeScaleFactor);\n    type_ = kReformerTpInt8;\n\n    ailego::Params params;\n    float int8_scale = scale * kNormalizeScaleFactor;\n    params.set(INT8_QUANTIZER_REFORMER_SCALE, int8_scale);\n    float bias =\n        meta.reformer_params().get_as_float(INT4_QUANTIZER_REFORMER_BIAS);\n    params.set(INT8_QUANTIZER_REFORMER_BIAS, bias);\n    params.set(\n        INT4_QUANTIZER_REFORMER_METRIC,\n        meta.reformer_params().get_as_string(INT4_QUANTIZER_REFORMER_METRIC));\n\n    auto reformer = IndexFactory::CreateReformer(kInt8ReformerName);\n    if (!reformer) {\n      LOG_ERROR(\"Failed to create reformer %s\", name.c_str());\n      return IndexError_NoExist;\n    }\n    int ret = reformer->init(params);\n    ivf_check_with_msg(ret, \"Failed to init reformer %s\", name.c_str());\n\n    reformer_ = reformer;\n\n    LOG_DEBUG(\"Init QcReformer with %s, type=%u\", name.c_str(), type_);\n  }\n\n  return 0;\n}\n\n//! Transform a query\nint IVFEntity::IVFReformerWrapper::transform(const void *query,\n                                             const IndexQueryMeta &qmeta,\n                                             const void **out,\n                                             IndexQueryMeta *ometa) {\n  int ret = 0;\n\n  switch (type_) {\n    case kReformerTpNone:\n      *out = query;\n      *ometa = qmeta;\n      break;\n\n    case kReformerTpInnerProductInt8:\n      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {\n        return IndexError_Unsupported;\n      }\n      scales_.resize(1);\n      buffer_.resize(IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8,\n                                              qmeta.dimension()));\n      this->transform(0, static_cast<const float *>(query), qmeta.dimension(),\n                      reinterpret_cast<int8_t *>(&buffer_[0]));\n      *ometa = qmeta;\n      ometa->set_meta(IndexMeta::DataType::DT_INT8, qmeta.dimension());\n      *out = buffer_.data();\n      break;\n\n    case kReformerTpInnerProductInt4:\n      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {\n        return IndexError_Unsupported;\n      }\n      scales_.resize(1);\n      buffer_.resize(IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT4,\n                                              qmeta.dimension()));\n      this->transform(0, static_cast<const float *>(query), qmeta.dimension(),\n                      reinterpret_cast<uint8_t *>(&buffer_[0]));\n      *ometa = qmeta;\n      ometa->set_meta(IndexMeta::DataType::DT_INT4, qmeta.dimension());\n      *out = buffer_.data();\n      break;\n\n    case kReformerTpInt8:\n    case kReformerTpInt4:\n      /* FALLTHRU */\n    case kReformerTpDefault:\n      ret = reformer_->transform(query, qmeta, &buffer_, ometa);\n      *out = buffer_.data();\n      break;\n\n    default:\n      ret = IndexError_Unsupported;\n      break;\n  }\n\n  return ret;\n}\n\n//! Transform querys\nint IVFEntity::IVFReformerWrapper::transform(const void *query,\n                                             const IndexQueryMeta &qmeta,\n                                             uint32_t count, const void **out,\n                                             IndexQueryMeta *ometa) {\n  int ret = 0;\n\n  switch (type_) {\n    case kReformerTpNone:\n      *out = query;\n      *ometa = qmeta;\n      break;\n\n    case kReformerTpInnerProductInt8:\n      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {\n        return IndexError_Unsupported;\n      }\n      scales_.resize(count);\n      buffer_.resize(count *\n                     IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8,\n                                              qmeta.dimension()));\n      {\n        const float *ivec = reinterpret_cast<const float *>(query);\n        int8_t *ovec = reinterpret_cast<int8_t *>(&buffer_[0]);\n        for (size_t i = 0; i < count; ++i) {\n          this->transform(i, &ivec[i * qmeta.dimension()], qmeta.dimension(),\n                          &ovec[i * qmeta.dimension()]);\n        }\n      }\n      *ometa = qmeta;\n      ometa->set_meta(IndexMeta::DataType::DT_INT8, qmeta.dimension());\n      *out = buffer_.data();\n      break;\n\n    case kReformerTpInnerProductInt4:\n      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {\n        return IndexError_Unsupported;\n      }\n      scales_.resize(count);\n      buffer_.resize(count *\n                     IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT4,\n                                              qmeta.dimension()));\n      {\n        const float *ivec = reinterpret_cast<const float *>(query);\n        uint8_t *ovec = reinterpret_cast<uint8_t *>(&buffer_[0]);\n        for (size_t i = 0; i < count; ++i) {\n          this->transform(i, &ivec[i * qmeta.dimension()], qmeta.dimension(),\n                          &ovec[i * qmeta.dimension() / 2]);\n        }\n      }\n      *ometa = qmeta;\n      ometa->set_meta(IndexMeta::DataType::DT_INT4, qmeta.dimension());\n      *out = buffer_.data();\n      break;\n\n    case kReformerTpInt8:\n    case kReformerTpInt4:\n      /* FALLTHRU */\n    case kReformerTpDefault:\n      ret = reformer_->transform(query, qmeta, count, &buffer_, ometa);\n      *out = buffer_.data();\n      break;\n\n    default:\n      ret = IndexError_Unsupported;\n      break;\n  }\n\n  return ret;\n}\n\n//! Transform querys\nint IVFEntity::IVFReformerWrapper::transform_gpu(const void *query,\n                                                 const IndexQueryMeta &qmeta,\n                                                 uint32_t count,\n                                                 const void **out,\n                                                 IndexQueryMeta *ometa) {\n  int ret = 0;\n\n  switch (type_) {\n    case kReformerTpNone:\n    case kReformerTpDefault:\n      *out = query;\n      *ometa = qmeta;\n      break;\n\n    case kReformerTpInnerProductInt4:\n    case kReformerTpInnerProductInt8:\n      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {\n        return IndexError_Unsupported;\n      }\n      scales_.resize(count);\n      buffer_.resize(count *\n                     IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8,\n                                              qmeta.dimension()));\n      {\n        const float *ivec = reinterpret_cast<const float *>(query);\n        int8_t *ovec = reinterpret_cast<int8_t *>(&buffer_[0]);\n        for (size_t i = 0; i < count; ++i) {\n          this->transform(i, &ivec[i * qmeta.dimension()], qmeta.dimension(),\n                          &ovec[i * qmeta.dimension()]);\n        }\n      }\n      *ometa = qmeta;\n      ometa->set_meta(IndexMeta::DataType::DT_INT8, qmeta.dimension());\n      *out = buffer_.data();\n      break;\n\n    case kReformerTpInt8:\n    case kReformerTpInt4:\n      ret = reformer_->transform(query, qmeta, count, &buffer_, ometa);\n      *out = buffer_.data();\n      break;\n\n    default:\n      ret = IndexError_Unsupported;\n      break;\n  }\n\n  return ret;\n}\n\n\n//! Convert a record\nint IVFEntity::IVFReformerWrapper::convert(const void *record,\n                                           const IndexQueryMeta &rmeta,\n                                           const void **out,\n                                           IndexQueryMeta *ometa) {\n  if (type_ == kReformerTpNone) {\n    *out = record;\n    *ometa = rmeta;\n    return 0;\n  }\n\n  int ret = reformer_->convert(record, rmeta, &buffer_, ometa);\n  *out = buffer_.data();\n  return ret;\n}\n\n//! Convert records\nint IVFEntity::IVFReformerWrapper::convert(const void *records,\n                                           const IndexQueryMeta &rmeta,\n                                           uint32_t count, const void **out,\n                                           IndexQueryMeta *ometa) {\n  if (type_ == kReformerTpNone) {\n    *out = records;\n    *ometa = rmeta;\n    return 0;\n  }\n  int ret = reformer_->convert(records, rmeta, count, &buffer_, ometa);\n  *out = buffer_.data();\n  return ret;\n}\n\n//! Normalize score\nvoid IVFEntity::IVFReformerWrapper::normalize(size_t qidx,\n                                              IndexDocumentHeap *heap) const {\n  switch (type_) {\n    case kReformerTpNone:\n      return;\n\n    case kReformerTpInnerProductInt8:\n    case kReformerTpInnerProductInt4:\n      ailego_assert_with(qidx < scales_.size(), \"invalid index\");\n      {\n        auto reciprocal = 1.0f / scales_[qidx];\n        for (auto &it : *heap) {\n          *it.mutable_score() *= reciprocal;\n        }\n      }\n      break;\n\n    case kReformerTpInt8:\n    case kReformerTpInt4:\n      for (auto &it : *heap) {\n        *it.mutable_score() *= reciprocal_;\n      }\n      break;\n\n    default:\n      // Not support\n      break;\n  }\n}\n\n//! Normalize score\nvoid IVFEntity::IVFReformerWrapper::normalize(size_t qidx, const void *query,\n                                              const IndexQueryMeta &qmeta,\n                                              IndexDocumentHeap *heap) const {\n  switch (type_) {\n    case kReformerTpNone:\n      return;\n\n    case kReformerTpInnerProductInt8:\n    case kReformerTpInnerProductInt4:\n      ailego_assert_with(qidx < scales_.size(), \"invalid index\");\n      {\n        auto reciprocal = 1.0f / scales_[qidx];\n        for (auto &it : *heap) {\n          *it.mutable_score() *= reciprocal;\n        }\n      }\n      break;\n\n    case kReformerTpInt8:\n    case kReformerTpInt4:\n      for (auto &it : *heap) {\n        *it.mutable_score() *= reciprocal_;\n      }\n      break;\n\n    case kReformerTpDefault:\n      reformer_->normalize(query, qmeta, *heap);\n      break;\n\n    default:\n      // Not support\n      LOG_ERROR(\"Not a supported type in QC reformer, type: %u\", type_);\n      break;\n  }\n}\n\nvoid IVFEntity::IVFReformerWrapper::transform(size_t qidx, const float *in,\n                                              size_t dim, int8_t *out) {\n  ailego_assert_with(qidx < scales_.size(), \"invalid index\");\n\n  float abs_max = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    auto abs = std::abs(in[i]);\n    if (abs > abs_max) {\n      abs_max = abs;\n    }\n  }\n\n  if (abs_max > 0.0f) {\n    float scale = 127 / abs_max;\n    for (size_t i = 0; i < dim; ++i) {\n      out[i] = static_cast<int8_t>(std::round(in[i] * scale));\n    }\n    scales_[qidx] = scale;\n  } else {\n    std::fill(out, out + dim, static_cast<int8_t>(1));\n    scales_[qidx] = std::numeric_limits<float>::max();\n  }\n}\n\nvoid IVFEntity::IVFReformerWrapper::transform(size_t qidx, const float *in,\n                                              size_t dim, uint8_t *out) {\n  ailego_assert_with(qidx < scales_.size(), \"invalid index\");\n  ailego_assert_with(dim % 2 == 0, \"invalid dim\");\n\n  float abs_max = 0.0f;\n  float max = -std::numeric_limits<float>::max();\n  for (size_t i = 0; i < dim; ++i) {\n    float abs = std::abs(in[i]);\n    abs_max = std::max(abs_max, abs);\n    max = std::max(max, in[i]);\n  }\n  if (abs_max > 0.0f) {\n    float scale = ((7 * abs_max > 8 * max) ? 8 : 7) / abs_max;\n    for (size_t i = 0; i < dim; i += 2) {\n      auto v1 = static_cast<int8_t>(std::round(in[i] * scale));\n      auto v2 = static_cast<int8_t>(std::round(in[i + 1] * scale));\n      out[i / 2] =\n          (static_cast<uint8_t>(v1) & 0xF) | (static_cast<uint8_t>(v2) << 4);\n    }\n    scales_[qidx] = scale;\n  } else {\n    std::fill(out, out + dim / 2, static_cast<uint8_t>(9));\n    scales_[qidx] = std::numeric_limits<float>::max();\n  }\n}\n\nint IVFEntity::load_header(const IndexStorage::Pointer &container) {\n  //! Load the Header Segment\n  auto header = container->get(IVF_INVERTED_HEADER_SEG_ID);\n  if (!header) {\n    LOG_ERROR(\"Failed to get segment %s\", IVF_INVERTED_HEADER_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  if (header->data_size() < sizeof(header_)) {\n    LOG_ERROR(\"Invalid format for segment %s\",\n              IVF_INVERTED_HEADER_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  const void *data = nullptr;\n  if (header->read(0, &data, header->data_size()) != header->data_size()) {\n    LOG_ERROR(\"Failed to read data, segment %s\",\n              IVF_INVERTED_HEADER_SEG_ID.c_str());\n    return IndexError_ReadData;\n  }\n  std::memcpy(&header_, data, sizeof(header_));\n  if (header_.header_size < sizeof(header_) + header_.index_meta_size ||\n      header_.header_size > header->data_size()) {\n    LOG_ERROR(\"Invalid header size %u\", header_.header_size);\n    return IndexError_InvalidFormat;\n  }\n\n  //! Load the index meta\n  if (!meta_.deserialize(\n          reinterpret_cast<const uint8_t *>(data) + sizeof(header_),\n          header_.index_meta_size)) {\n    LOG_ERROR(\"Failed to deserialize index meta\");\n    return IndexError_InvalidFormat;\n  }\n\n  int ret = reformer_.init(meta_);\n  ivf_check_error_code(ret);\n\n  //! Create the distance calculator\n  auto metric = IndexFactory::CreateMetric(meta_.metric_name());\n  if (!metric) {\n    LOG_ERROR(\"Failed to create metric %s\", meta_.metric_name().c_str());\n    return IndexError_NoExist;\n  }\n  ret = metric->init(meta_, meta_.metric_params());\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to initialize metric %s\", meta_.metric_name().c_str());\n    return ret;\n  }\n  calculator_ = std::make_shared<IVFDistanceCalculator>(\n      meta_, metric->query_metric() ? metric->query_metric() : metric,\n      header_.block_vector_count);\n  if (!calculator_) {\n    return IndexError_NoMemory;\n  }\n\n  return 0;\n}\n\nint IVFEntity::load(const IndexStorage::Pointer &container) {\n  int ret = this->load_header(container);\n  ivf_check_error_code(ret);\n\n  //! Load the remaining segments\n  container_ = container;\n  size_t expect_size = header_.inverted_body_size;\n  inverted_ = load_segment(IVF_INVERTED_BODY_SEG_ID, expect_size);\n  if (!inverted_) {\n    LOG_ERROR(\"Failed to load segment, inverted_size=%zu block_count=%u\",\n              static_cast<size_t>(header_.inverted_body_size),\n              header_.block_count);\n    return IndexError_InvalidFormat;\n  }\n\n  expect_size = header_.inverted_list_count * sizeof(InvertedListMeta);\n  inverted_meta_ = load_segment(IVF_INVERTED_META_SEG_ID, expect_size);\n  if (!inverted_meta_) {\n    LOG_ERROR(\"Failed to load segment, inverted_lists=%u\",\n              header_.inverted_list_count);\n    return IndexError_InvalidFormat;\n  }\n\n  expect_size = header_.total_vector_count * sizeof(uint64_t);\n  keys_ = load_segment(IVF_KEYS_SEG_ID, expect_size);\n  if (!keys_) {\n    return IndexError_InvalidFormat;\n  }\n\n  expect_size = header_.total_vector_count * sizeof(InvertedVecLocation);\n  offsets_ = load_segment(IVF_OFFSETS_SEG_ID, expect_size);\n  if (!offsets_) {\n    return IndexError_InvalidFormat;\n  }\n\n  expect_size = header_.total_vector_count * sizeof(uint32_t);\n  mapping_ = load_segment(IVF_MAPPING_SEG_ID, expect_size);\n  if (!mapping_) {\n    return IndexError_InvalidFormat;\n  }\n\n  norm_value_sqrt_ =\n      meta_.metric_name() == \"Euclidean\" || meta_.metric_name() == \"Manhattan\";\n  if (container_->get(IVF_INT8_QUANTIZED_PARAMS_SEG_ID) ||\n      container->get(IVF_INT4_QUANTIZED_PARAMS_SEG_ID)) {\n    expect_size =\n        header_.inverted_list_count * sizeof(InvertedIntegerQuantizerParams);\n    auto &seg_id = meta_.reformer_name() == kInt8ReformerName\n                       ? IVF_INT8_QUANTIZED_PARAMS_SEG_ID\n                       : IVF_INT4_QUANTIZED_PARAMS_SEG_ID;\n    integer_quantizer_params_ = load_segment(seg_id, expect_size);\n    if (!integer_quantizer_params_) {\n      return IndexError_InvalidFormat;\n    }\n    norm_value_ = 0.0f;\n  } else if (meta_.reformer_name() == kInt8ReformerName ||\n             meta_.reformer_name() == kInt4ReformerName) {\n    auto &scale_key = meta_.reformer_name() == kInt8ReformerName\n                          ? INT8_QUANTIZER_REFORMER_SCALE\n                          : INT4_QUANTIZER_REFORMER_SCALE;\n    auto scale = meta_.reformer_params().get_as_float(scale_key);\n    norm_value_ = this->convert_to_normalize_value(scale);\n  } else {\n    norm_value_ = 1.0f;\n  }\n\n  if (container_->get(IVF_FEATURES_SEG_ID)) {\n    features_ = load_segment(IVF_FEATURES_SEG_ID, 0);\n    if (!features_) {\n      return IndexError_InvalidFormat;\n    }\n    if (features_->data_size() % vector_count() != 0) {\n      LOG_ERROR(\"Invalid featureSegment size=%zu, totalVecs=%zu\",\n                features_->data_size(), vector_count());\n      return IndexError_InvalidFormat;\n    }\n  }\n\n  LOG_DEBUG(\n      \"Load inverted index done, docs=%u invertedListCnt=%u \"\n      \"elementSize=%u metric=%s reformer=%s\",\n      header_.total_vector_count, header_.inverted_list_count,\n      meta_.element_size(), meta_.metric_name().c_str(),\n      meta_.reformer_name().c_str());\n  return 0;\n}\n\nint IVFEntity::search(size_t inverted_list_id, const void *query,\n                      const IndexFilter &filter, uint32_t *scan_count,\n                      IndexDocumentHeap *heap,\n                      IndexContext::Stats *context_stats) const {\n  ailego_assert_with(inverted_list_id < header_.inverted_list_count,\n                     \"invalid id\");\n  auto list_meta = this->inverted_list_meta(inverted_list_id);\n  ivf_assert(list_meta, IndexError_ReadData);\n\n  const void *data = nullptr;\n  const size_t block_vecs = header_.block_vector_count;\n  std::vector<float> distances(block_vecs);\n  const size_t batch_size = kBatchBlocks;\n  const size_t block_size = header_.block_size;\n  const auto norm_val = this->inverted_list_normalize_value(inverted_list_id);\n  for (size_t i = 0; i < list_meta->block_count; i += batch_size) {\n    //! Read vecs\n    const size_t off = list_meta->offset + i * block_size;\n    const size_t blocks = std::min(batch_size, list_meta->block_count - i);\n    const size_t size =\n        std::min(blocks * block_size,\n                 static_cast<size_t>(header_.inverted_body_size - off));\n    if (inverted_->read(off, &data, size) != size) {\n      LOG_ERROR(\"Failed to read block, off=%zu, size=%zu\", off, size);\n      return IndexError_ReadData;\n    }\n\n    //! Read keys\n    size_t items = std::min(blocks * block_vecs,\n                            list_meta->vector_count - (i * block_vecs));\n    auto keys = get_keys(list_meta->id_offset + i * block_vecs, items);\n    if (!keys) {\n      return IndexError_ReadData;\n    }\n\n    //! Compute distances for each block\n    for (size_t b = 0; b < blocks; ++b) {\n      const size_t vecs_count =\n          std::min(block_vecs, list_meta->vector_count - (i + b) * block_vecs);\n      auto block_keys = keys + b * block_vecs;\n      size_t keeps = 0;\n      ailego_assert_with(block_vecs < sizeof(keeps) * 8, \"bits overflow\");\n      for (size_t k = 0; k < vecs_count; ++k) {\n        if (!filter(block_keys[k])) {\n          keeps |= (1 << k);\n        } else {\n          ++(*context_stats->mutable_filtered_count());\n        }\n      }\n      if (keeps == 0) {\n        continue;\n      }\n\n      const void *block_data = static_cast<const char *>(data) + b * block_size;\n      calculator_->query_features_distance(query, block_data, vecs_count,\n                                           distances.data());\n\n      *(context_stats->mutable_dist_calced_count()) += vecs_count;\n\n      uint32_t id_off = list_meta->id_offset + (i + b) * block_vecs;\n      for (size_t k = 0; k < vecs_count; ++k) {\n        if (keeps & (1 << k)) {\n          if (block_keys[k] != kInvalidKey) {\n            heap->emplace(block_keys[k], distances[k] * norm_val, id_off + k);\n          }\n        }\n      }\n    }\n  }\n\n  *scan_count = list_meta->vector_count;\n  return 0;\n}\n\n//! search in inverted list without filter\nint IVFEntity::search(size_t inverted_list_id, const void *query,\n                      uint32_t *scan_count, IndexDocumentHeap *heap,\n                      IndexContext::Stats *context_stats) const {\n  ailego_assert_with(inverted_list_id < header_.inverted_list_count,\n                     \"invalid id\");\n  auto list_meta = inverted_list_meta(inverted_list_id);\n  ivf_assert(list_meta, IndexError_ReadData);\n\n  const void *data = nullptr;\n  const size_t block_vecs = header_.block_vector_count;\n  std::vector<float> distances(block_vecs);\n  const size_t batch_size = kBatchBlocks;\n  const size_t block_size = header_.block_size;\n  const auto norm_val = this->inverted_list_normalize_value(inverted_list_id);\n  for (size_t i = 0; i < list_meta->block_count; i += batch_size) {\n    //! Read vecs\n    const size_t off = list_meta->offset + i * block_size;\n    const size_t blocks = std::min(batch_size, list_meta->block_count - i);\n    const size_t size =\n        std::min(blocks * block_size,\n                 static_cast<size_t>(header_.inverted_body_size - off));\n    if (inverted_->read(off, &data, size) != size) {\n      LOG_ERROR(\"Failed to read block, off=%zu, size=%zu\", off, size);\n      return IndexError_ReadData;\n    }\n\n    //! Read keys\n    size_t items = std::min(blocks * block_vecs,\n                            list_meta->vector_count - (i * block_vecs));\n    auto keys = get_keys(list_meta->id_offset + i * block_vecs, items);\n    if (!keys) {\n      return IndexError_ReadData;\n    }\n\n    //! Compute distances for each block\n    for (size_t b = 0; b < blocks; ++b) {\n      const size_t vecs_count =\n          std::min(block_vecs, list_meta->vector_count - (i + b) * block_vecs);\n      auto block_keys = keys + b * block_vecs;\n      const void *block_data = static_cast<const char *>(data) + b * block_size;\n      calculator_->query_features_distance(query, block_data, vecs_count,\n                                           distances.data());\n      for (size_t k = 0; k < vecs_count; ++k) {\n        if (block_keys[k] != kInvalidKey) {\n          uint32_t id = list_meta->id_offset + (i + b) * block_vecs + k;\n          heap->emplace(block_keys[k], distances[k] * norm_val, id);\n        }\n      }\n      *(context_stats->mutable_dist_calced_count()) += vecs_count;\n    }\n  }\n\n  *scan_count = list_meta->vector_count;\n  return 0;\n}\n\n//! search all inverted list with filter\nint IVFEntity::search(const void *query, const IndexFilter &filter,\n                      IndexDocumentHeap *heap,\n                      IndexContext::Stats *context_stats) const {\n  for (size_t i = 0; i < header_.inverted_list_count; ++i) {\n    uint32_t scan_count;\n    int ret = this->search(i, query, filter, &scan_count, heap, context_stats);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  return 0;\n}\n\n//! search all inverted list without filter\nint IVFEntity::search(const void *query, IndexDocumentHeap *heap,\n                      IndexContext::Stats *context_stats) const {\n  for (size_t i = 0; i < header_.inverted_list_count; ++i) {\n    uint32_t scan_count;\n    int ret = this->search(i, query, &scan_count, heap, context_stats);\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  return 0;\n}\n\nconst void *IVFEntity::get_vector(size_t id) const {\n  if (features_) {\n    const void *data = nullptr;\n    size_t element_size = features_->data_size() / vector_count();\n    size_t off = id * element_size;\n    if (features_->read(off, &data, element_size) != element_size) {\n      LOG_ERROR(\"Failed to read segment, off=%zu size=%zu\", off, element_size);\n      return nullptr;\n    }\n    return data;\n  }\n\n  const void *data = nullptr;\n  size_t size = sizeof(InvertedVecLocation);\n  if (offsets_->read(id * size, &data, size) != size) {\n    LOG_ERROR(\"Failed to read offsets segment, id=%zu\", id);\n    return nullptr;\n  }\n  auto &loc = *reinterpret_cast<const InvertedVecLocation *>(data);\n  if (loc.column_major) {\n    vector_.resize(meta_.element_size());\n    auto unit_size = IndexMeta::AlignSizeof(meta_.data_type());\n    size_t cols = meta_.element_size() / unit_size;\n    size_t step = block_vector_count() * unit_size;\n    size_t rd_size = step * (cols - 1) + unit_size;\n    if (inverted_->read(loc.offset, &data, rd_size) != rd_size) {\n      LOG_ERROR(\"Failed to read data, off=%zu size=%zu\",\n                static_cast<size_t>(loc.offset), rd_size);\n      return nullptr;\n    }\n    for (size_t c = 0; c < cols; ++c) {\n      vector_.replace(c * unit_size, unit_size,\n                      reinterpret_cast<const char *>(data) + c * step,\n                      unit_size);\n    }\n    return vector_.data();\n  } else {\n    if (inverted_->read(loc.offset, &data, meta_.element_size()) !=\n        meta_.element_size()) {\n      LOG_ERROR(\"Failed to read data, off=%zu size=%u\",\n                static_cast<size_t>(loc.offset), meta_.element_size());\n      return nullptr;\n    }\n    return data;\n  }\n}\n\nint IVFEntity::get_vector(size_t id, IndexStorage::MemoryBlock &block) const {\n  if (features_) {\n    size_t element_size = features_->data_size() / vector_count();\n    size_t off = id * element_size;\n    if (features_->read(off, block, element_size) != element_size) {\n      LOG_ERROR(\"Failed to read segment, off=%zu size=%zu\", off, element_size);\n      return IndexError_Runtime;\n    }\n    return 0;\n  }\n\n\n  IndexStorage::MemoryBlock data_block;\n  size_t size = sizeof(InvertedVecLocation);\n  if (offsets_->read(id * size, data_block, size) != size) {\n    LOG_ERROR(\"Failed to read offsets segment, id=%zu\", id);\n    return IndexError_Runtime;\n  }\n  const void *data = data_block.data();\n  auto &loc = *reinterpret_cast<const InvertedVecLocation *>(data);\n  if (loc.column_major) {\n    vector_.resize(meta_.element_size());\n    auto unit_size = IndexMeta::AlignSizeof(meta_.data_type());\n    size_t cols = meta_.element_size() / unit_size;\n    size_t step = block_vector_count() * unit_size;\n    size_t rd_size = step * (cols - 1) + unit_size;\n    if (inverted_->read(loc.offset, &data, rd_size) != rd_size) {\n      LOG_ERROR(\"Failed to read data, off=%zu size=%zu\",\n                static_cast<size_t>(loc.offset), rd_size);\n      return IndexError_Runtime;\n    }\n    for (size_t c = 0; c < cols; ++c) {\n      vector_.replace(c * unit_size, unit_size,\n                      reinterpret_cast<const char *>(data) + c * step,\n                      unit_size);\n    }\n    block.reset(vector_.data());\n    return 0;\n  } else {\n    if (inverted_->read(loc.offset, block, meta_.element_size()) !=\n        meta_.element_size()) {\n      LOG_ERROR(\"Failed to read data, off=%zu size=%u\",\n                static_cast<size_t>(loc.offset), meta_.element_size());\n      return IndexError_Runtime;\n    }\n    return 0;\n  }\n}\n\nuint32_t IVFEntity::key_to_id(uint64_t key) const {\n  //! Do binary search\n  uint32_t start = 0UL;\n  uint32_t end = vector_count();\n  const void *data = nullptr;\n  uint32_t idx = 0u;\n  while (start < end) {\n    idx = start + (end - start) / 2;\n    if (ailego_unlikely(mapping_->read(idx * sizeof(uint32_t), &data,\n                                       sizeof(uint32_t)) != sizeof(uint32_t))) {\n      LOG_ERROR(\"Failed to read mapping segment, idx=%u\", idx);\n      return std::numeric_limits<uint32_t>::max();\n    }\n    const uint64_t *mkey;\n    uint32_t local_id = *reinterpret_cast<const uint32_t *>(data);\n    if (ailego_unlikely(keys_->read(local_id * sizeof(uint64_t),\n                                    (const void **)(&mkey),\n                                    sizeof(uint64_t)) != sizeof(uint64_t))) {\n      LOG_ERROR(\"Read key from segment failed\");\n      return std::numeric_limits<uint32_t>::max();\n    }\n    if (*mkey < key) {\n      start = idx + 1;\n    } else if (*mkey > key) {\n      end = idx;\n    } else {\n      return local_id;\n    }\n  }\n  return std::numeric_limits<uint32_t>::max();\n}\n\nconst void *IVFEntity::get_vector_by_key(uint64_t key) const {\n  uint32_t id = this->key_to_id(key);\n  if (id != std::numeric_limits<uint32_t>::max()) {\n    return get_vector(id);\n  } else {\n    return nullptr;\n  }\n}\n\nint IVFEntity::get_vector_by_key(uint64_t key,\n                                 IndexStorage::MemoryBlock &block) const {\n  uint32_t id = this->key_to_id(key);\n  if (id != std::numeric_limits<uint32_t>::max()) {\n    return get_vector(id, block);\n  } else {\n    return IndexError_Runtime;\n  }\n}\n\nIVFEntity::Pointer IVFEntity::clone(void) const {\n  auto entity = std::make_shared<IVFEntity>();\n  return clone(entity);\n}\n\nIVFEntity::Pointer IVFEntity::clone(const IVFEntity::Pointer &entity) const {\n  if (!entity) {\n    LOG_ERROR(\"Failed to alloc IVFEntity\");\n    return nullptr;\n  }\n\n  auto inverted = inverted_->clone();\n  ivf_assert_with_msg(inverted, nullptr, \"Failed to clone inverted segment\");\n\n  auto inverted_meta = inverted_meta_->clone();\n  ivf_assert_with_msg(inverted_meta, nullptr,\n                      \"Failed to clone inverted meta segment\");\n\n  auto keys = keys_->clone();\n  ivf_assert_with_msg(keys, nullptr, \"Failed to clone keys segment\");\n\n  auto offsets = offsets_->clone();\n  ivf_assert_with_msg(offsets, nullptr, \"Failed to clone offsets segment\");\n\n  auto mapping = mapping_->clone();\n  ivf_assert_with_msg(mapping, nullptr, \"Failed to clone mapping segment\");\n\n  IndexStorage::Segment::Pointer integer_quantizer_params;\n  if (integer_quantizer_params_) {\n    integer_quantizer_params = integer_quantizer_params_->clone();\n    if (!integer_quantizer_params) {\n      LOG_ERROR(\"Failed to clone integer quantizer params segment\");\n      return nullptr;\n    }\n  }\n  IndexStorage::Segment::Pointer features;\n  if (features_) {\n    features = features_->clone();\n    if (!features) {\n      LOG_ERROR(\"Failed to clone features segment\");\n      return nullptr;\n    }\n  }\n\n  entity->meta_ = this->meta_;\n  entity->reformer_ = this->reformer_;\n  entity->calculator_ = this->calculator_;\n  entity->header_ = this->header_;\n  entity->container_ = this->container_;\n\n  entity->inverted_ = inverted;\n  entity->inverted_meta_ = inverted_meta;\n  entity->keys_ = keys;\n  entity->offsets_ = offsets;\n  entity->mapping_ = mapping;\n  entity->integer_quantizer_params_ = integer_quantizer_params;\n  entity->features_ = features;\n  entity->norm_value_ = this->norm_value_;\n  entity->norm_value_sqrt_ = this->norm_value_sqrt_;\n\n  return entity;\n}\n\nIndexStorage::Segment::Pointer IVFEntity::load_segment(\n    const std::string &seg_id, size_t expect_size) const {\n  auto segment = container_->get(seg_id);\n  if (!segment) {\n    LOG_ERROR(\"Failed to get segment %s\", seg_id.c_str());\n    return nullptr;\n  }\n  if (expect_size && segment->data_size() != expect_size) {\n    LOG_ERROR(\"Invalid segment %s size=%zu, total_vecs=%u\", seg_id.c_str(),\n              segment->data_size(), header_.total_vector_count);\n    return nullptr;\n  }\n  return segment;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_entity.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_framework.h>\n#include \"metric/metric_params.h\"\n#include \"ivf_distance_calculator.h\"\n#include \"ivf_index_format.h\"\n#include \"ivf_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IVF Entity\n */\nclass IVFEntity {\n public:\n  typedef std::shared_ptr<IVFEntity> Pointer;\n\n  class IVFReformerWrapper;\n\n  //! Constructor\n  IVFEntity() {}\n\n  //! Destructor\n  virtual ~IVFEntity() {}\n\n  //! Disable them\n  IVFEntity(const IVFEntity &) = delete;\n  IVFEntity &operator=(const IVFEntity &) = delete;\n\n  //! load the index from container\n  virtual int load(const IndexStorage::Pointer &container);\n\n  //! search in inverted list with filter\n  int search(size_t inverted_list_id, const void *query,\n             const IndexFilter &filter, uint32_t *scan_count,\n             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;\n\n  //! search in inverted list without filter\n  int search(size_t inverted_list_id, const void *query, uint32_t *scan_count,\n             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;\n\n  //! search all inverted list with filter\n  int search(const void *query, const IndexFilter &filter,\n             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;\n\n  //! search all inverted list without filter\n  int search(const void *query, IndexDocumentHeap *heap,\n             IndexContext::Stats *context_stats) const;\n\n  //! Clone the entity\n  virtual IVFEntity::Pointer clone(void) const;\n\n  //! Clone the entity\n  IVFEntity::Pointer clone(const IVFEntity::Pointer &entity) const;\n\n  //! Retrieve the primary keys by local id in heap\n  int retrieve_keys(IndexDocumentHeap *heap) const {\n    for (auto &it : (*heap)) {\n      uint64_t key = this->get_key(it.index());\n      if (key == kInvalidKey) {\n        return IndexError_ReadData;\n      }\n      it.set_key(key);\n    }\n\n    return 0;\n  }\n\n  //! Retrieve the total vectors in the index\n  size_t vector_count(void) const {\n    return header_.total_vector_count;\n  }\n\n  //! Retrieve the inverted list count\n  size_t inverted_list_count(void) const {\n    return header_.inverted_list_count;\n  }\n\n  //! Retrieve block size of the inverted vector\n  size_t inverted_block_size(void) const {\n    return header_.block_size;\n  }\n\n  //! Retrieve the vectors count in one block\n  size_t block_vector_count(void) const {\n    return header_.block_vector_count;\n  }\n\n  //! Retrieve IndexMeta of the inverted index\n  const IndexMeta &meta(void) const {\n    return meta_;\n  }\n\n  //! Retrieve a block of vectors\n  const void *read_block(size_t inverted_list_id, size_t local_block_id,\n                         size_t *vecs_count) const {\n    auto iv_meta = this->inverted_list_meta(inverted_list_id);\n    if (!iv_meta || local_block_id >= iv_meta->block_count) {\n      LOG_ERROR(\"Failed to read inverted list, listId=%zu blockIdx=%zu\",\n                inverted_list_id, local_block_id);\n      return nullptr;\n    }\n\n    size_t block_vecs = header_.block_vector_count;\n    *vecs_count = std::min(block_vecs,\n                           iv_meta->vector_count - local_block_id * block_vecs);\n    ailego_assert_with(*vecs_count <= header_.block_vector_count,\n                       \"invalid vecs\");\n    const size_t off = iv_meta->offset + local_block_id * header_.block_size;\n    const size_t size = *vecs_count * meta_.element_size();\n    const void *data = nullptr;\n    if (inverted_->read(off, &data, size) != size) {\n      LOG_ERROR(\"Failed to read block off=%zu size=%zu\", off, size);\n      return nullptr;\n    }\n\n    return data;\n  }\n\n  //! Retrieve the inverted list meta\n  const InvertedListMeta *inverted_list_meta(size_t inverted_list_id) const {\n    const void *data = nullptr;\n    const size_t size = sizeof(InvertedListMeta);\n    const size_t offset = inverted_list_id * size;\n    if (inverted_meta_->read(offset, &data, size) != size) {\n      LOG_ERROR(\"Failed to read inverted meta, id=%zu, size=%zu\",\n                inverted_list_id, size);\n      return nullptr;\n    }\n\n    return static_cast<const InvertedListMeta *>(data);\n  }\n\n  //! Retrieve the keys by consecutive local ids\n  const uint64_t *get_keys(size_t id, size_t count) const {\n    const void *data = nullptr;\n    const size_t offset = id * sizeof(uint64_t);\n    const size_t size = count * sizeof(uint64_t);\n    if (keys_->read(offset, &data, size) != size) {\n      LOG_ERROR(\"Failed to read keys, id=%zu, size=%zu\", id, size);\n      return nullptr;\n    }\n\n    return static_cast<const uint64_t *>(data);\n  }\n\n  //! Retrieve the key by local id\n  uint64_t get_key(size_t id) const {\n    const void *data = nullptr;\n    const size_t offset = id * sizeof(uint64_t);\n    const size_t size = sizeof(uint64_t);\n    if (keys_->read(offset, &data, size) != size) {\n      LOG_ERROR(\"Failed to read key, id=%zu\", id);\n      return kInvalidKey;\n    }\n\n    return *static_cast<const uint64_t *>(data);\n  }\n\n  //! Retrieve vector by local id\n  const void *get_vector(size_t id) const;\n\n  //! Retrieve vector by local id\n  const void *get_vector_by_key(uint64_t key) const;\n\n  int get_vector(size_t id, IndexStorage::MemoryBlock &block) const;\n\n  int get_vector_by_key(uint64_t key, IndexStorage::MemoryBlock &block) const;\n\n  uint32_t key_to_id(uint64_t key) const;\n\n  //! Transform a query\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                const void **out, IndexQueryMeta *ometa) const {\n    return reformer_.transform(query, qmeta, out, ometa);\n  }\n\n  //! Transform queries\n  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n                const void **out, IndexQueryMeta *ometa) const {\n    return reformer_.transform(query, qmeta, count, out, ometa);\n  }\n\n  //! Normalize the score in query part\n  void normalize(size_t qidx, IndexDocumentHeap *heap) const {\n    return reformer_.normalize(qidx, heap);\n  }\n\n  //! Retrieve the value for each inverted list to multiply for normalizing\n  float inverted_list_normalize_value(size_t inverted_list_id) const {\n    if (norm_value_ != 0.0f) {\n      return norm_value_;\n    }\n\n    // ailego_assert_with(integer_quantizer_params_, \"nullptr\");\n    if (integer_quantizer_params_ != nullptr) {\n      const void *data = nullptr;\n      size_t size = sizeof(InvertedIntegerQuantizerParams);\n      size_t off = inverted_list_id * size;\n      if (integer_quantizer_params_->read(off, &data, size) != size) {\n        LOG_ERROR(\"Failed to read data from segment, off=%zu\", off);\n        return 1.0f;\n      }\n      auto scale =\n          static_cast<const InvertedIntegerQuantizerParams *>(data)->scale;\n      return this->convert_to_normalize_value(scale);\n    }\n\n    return norm_value_;\n  }\n\n  //! Check whether the feature segment exist\n  bool has_orignal_feature() const {\n    return !!features_;\n  }\n\n  //! Retrieve reformer\n  const IVFReformerWrapper &reformer(void) const {\n    return reformer_;\n  }\n\n  /*! Index Reformer Wrapper\n   *  To transform query in inverted index searching, and normalize the score\n   */\n  class IVFReformerWrapper {\n   public:\n    //! Constructor\n    IVFReformerWrapper() {}\n\n    //! Assignment\n    IVFReformerWrapper &operator=(const IVFReformerWrapper &wrapper) {\n      reformer_ = wrapper.reformer_;\n      type_ = wrapper.type_;\n      buffer_.clear();\n      buffer_.shrink_to_fit();\n      reciprocal_ = wrapper.reciprocal_;\n      return *this;\n    }\n\n    //! Initialize\n    int init(const IndexMeta &imeta);\n\n    //! Update\n    int update(const IndexMeta &meta);\n\n    //! Transform a query\n    int transform(const void *query, const IndexQueryMeta &qmeta,\n                  const void **out, IndexQueryMeta *ometa);\n\n    //! Transform queries\n    int transform(const void *query, const IndexQueryMeta &qmeta,\n                  uint32_t count, const void **out, IndexQueryMeta *ometa);\n\n    //! Convert a record\n    virtual int convert(const void *record, const IndexQueryMeta &rmeta,\n                        const void **out, IndexQueryMeta *ometa);\n\n    //! Convert records\n    virtual int convert(const void *records, const IndexQueryMeta &rmeta,\n                        uint32_t count, const void **out,\n                        IndexQueryMeta *ometa);\n\n    //! Transform queries\n    int transform_gpu(const void *query, const IndexQueryMeta &qmeta,\n                      uint32_t count, const void **out, IndexQueryMeta *ometa);\n\n    //! Normalize the score in query part\n    void normalize(size_t qidx, IndexDocumentHeap *heap) const;\n\n    //! Normalize the score in query part\n    void normalize(size_t qidx, const void *query, const IndexQueryMeta &qmeta,\n                   IndexDocumentHeap *heap) const;\n\n   private:\n    //! Transform query from fp32 to int8\n    void transform(size_t qidx, const float *in, size_t dim, int8_t *out);\n\n    //! Transform query from fp32 to int4\n    void transform(size_t qidx, const float *in, size_t dim, uint8_t *out);\n\n   private:\n    //! Constants\n    enum Type {\n      kReformerTpNone = 0,\n      kReformerTpInnerProductInt8 = 1,\n      kReformerTpInnerProductInt4 = 2,\n      kReformerTpInt8 = 3,\n      kReformerTpInt4 = 4,\n      kReformerTpDefault = 7,\n    };\n\n    //! Members\n    Type type_{kReformerTpNone};\n    IndexReformer::Pointer reformer_{};\n    std::string buffer_{};\n    float reciprocal_{0.0};        // for int8\n    std::vector<float> scales_{};  // for int8 IP\n  };\n\n private:\n  //! Load the segment by seg_id in expect_size segment size\n  IndexStorage::Segment::Pointer load_segment(const std::string &seg_id,\n                                              size_t expect_size) const;\n\n  //! Load the header segment\n  int load_header(const IndexStorage::Pointer &container);\n\n  //! Convert the int8 quantizer scale to normalize value\n  float convert_to_normalize_value(float scale) const {\n    auto v = scale == 0.0 ? 1.0 : (1.0 / scale);\n    return !norm_value_sqrt_ ? v : std::sqrt(v);\n  }\n\n protected:\n  //! Constants\n  static constexpr size_t kBatchBlocks = 10u;\n\n  //! Members\n  IndexMeta meta_{};\n  mutable IVFReformerWrapper reformer_{};\n  IVFDistanceCalculator::Pointer calculator_{};\n  InvertedIndexHeader header_{};\n  IndexStorage::Pointer container_{};\n  IndexStorage::Segment::Pointer inverted_{};\n  IndexStorage::Segment::Pointer inverted_meta_{};\n  IndexStorage::Segment::Pointer keys_{};\n  IndexStorage::Segment::Pointer offsets_{};\n  IndexStorage::Segment::Pointer mapping_{};\n  IndexStorage::Segment::Pointer features_{};\n  IndexStorage::Segment::Pointer integer_quantizer_params_{};\n  mutable std::string vector_{};  // temporary buffer for colomn major order\n  float norm_value_{0.0f};  // normalize the inverted vector to orignal score\n  bool norm_value_sqrt_{false};  // does the norm value need to sqrt\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_index_format.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <ailego/container/bitmap.h>\n#include <zvec/core/framework/index_framework.h>\n\nnamespace zvec {\nnamespace core {\n\nstatic constexpr uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();\n\n/*! Index Format of Inverted Index Header\n */\nstruct InvertedIndexHeader {\n  uint32_t header_size{0};\n  uint32_t total_vector_count{0};\n  uint64_t inverted_body_size{0};\n  uint32_t inverted_list_count{0};\n  uint32_t block_vector_count{0};\n  uint32_t block_size{0};\n  uint32_t block_count{0};\n  uint32_t index_meta_size{0};\n  char reserved_[28];\n  char index_meta[0];\n};\n\n/*! Index Format of Inverted Index Meta for each Inverted list\n */\nstruct InvertedListMeta {\n  uint64_t offset{0};\n  uint32_t block_count{0};\n  uint32_t vector_count{0};\n  uint32_t id_offset{0};\n  char reserved_[16];\n};\n\n/*! Index Format of Location in Inverted Index for each vector\n */\nstruct InvertedVecLocation {\n  InvertedVecLocation(size_t off, bool col)\n      : offset(off), column_major(col), reserved(0u) {}\n\n  uint64_t offset : 48;       // feature offset in posting block segment\n  uint64_t column_major : 1;  // coloum major if true\n  uint64_t reserved : 15;\n};\n\n/*! Index Format of Integer Quantizer params for each inverted list\n */\nstruct InvertedIntegerQuantizerParams {\n  float scale{1.0};\n  float bias{0.0};\n};\n\n/*! Location of Vectors Block in Storage Segment\n */\nstruct BlockLocation {\n  uint16_t segment_id;\n  uint16_t block_index;\n};\n\n/*! The Header of a Block in Storage Segment\n */\nstruct BlockHeader {\n  BlockLocation next;\n  uint16_t vector_count;\n  uint16_t column_major : 1;\n  uint16_t reserved_ : 15;\n};\n\nstruct DeletionMap {\n  void set(uint32_t index) {\n    bitset.set(index);\n  }\n\n  void reset(uint32_t index) {\n    bitset.reset(index);\n  }\n\n  bool test(uint32_t index) const {\n    return bitset.test(index);\n  }\n\n  bool is_dirty() const {\n    return bitset.test_any();\n  }\n\n  ailego::FixedBitset<32> bitset{};\n};\n\nstatic_assert(sizeof(DeletionMap) == 4, \"DeletionMap must be 4 bytes\");\n\n/*! Meta Information of Streamer Entity\n */\nstruct StreamerInvertedMeta {\n  uint64_t create_time{0};\n  uint64_t update_time{0};\n  uint64_t revision_id{0};\n  uint32_t segment_count{0};\n  uint32_t segment_size{0};\n  uint8_t reserved_[32];\n  InvertedIndexHeader header;\n};\n\n/*! Location of Vector in Storage Segment\n */\nstruct VectorLocation {\n  //! Constructor\n  VectorLocation(void) {}\n\n  //! Constructor\n  VectorLocation(uint16_t id, bool col, uint32_t off)\n      : segment_id(id), column_major(col), offset(off) {}\n\n  uint16_t segment_id;\n  uint16_t column_major : 1;\n  uint16_t reserved_ : 15;\n  uint32_t offset;\n\n public:\n  bool operator==(const VectorLocation &other) const {\n    return segment_id == other.segment_id &&\n           column_major == other.column_major && offset == other.offset;\n  }\n};\n\nstatic_assert(sizeof(VectorLocation) == sizeof(uint64_t),\n              \"VectorLocation must be size of 8 bytes\");\n\nstruct KeyInfo {\n  KeyInfo(void) {}\n  KeyInfo(uint32_t idx, const VectorLocation &loc)\n      : centroid_idx(idx), location(loc) {}\n  KeyInfo(VectorLocation loc) : location(loc) {}\n  uint32_t centroid_idx;\n  VectorLocation location;\n};\n\n// Segments ID\nconst std::string IVF_CENTROID_SEG_ID(\"ivf.centroid\");\nconst std::string IVF_INVERTED_BODY_SEG_ID(\"ivf.inverted_body\");\nconst std::string IVF_INVERTED_HEADER_SEG_ID(\"ivf.inverted_header\");\nconst std::string IVF_INVERTED_META_SEG_ID(\"ivf.inverted_meta\");\nconst std::string IVF_KEYS_SEG_ID(\"hc.keys\");\nconst std::string IVF_OFFSETS_SEG_ID(\"ivf.offsets\");\nconst std::string IVF_MAPPING_SEG_ID(\"ivf.mapping\");\nconst std::string IVF_FEATURES_SEG_ID(\"ivf.features\");\nconst std::string IVF_INT8_QUANTIZED_PARAMS_SEG_ID(\"ivf.int8_quantized_params\");\nconst std::string IVF_INT4_QUANTIZED_PARAMS_SEG_ID(\"ivf.int4_quantized_params\");\n\nconst std::string IVF_INVERTED_LIST_HEAD_SEG_ID(\"ivf.inverted_list_head\");\nconst std::string IVF_STORAGE_SEGMENT_ID(\"ivf.S\");\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_index_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_searcher.h>\n#include \"ivf_entity.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IVF IndexProvider\n */\nclass IVFIndexProvider : public IndexProvider {\n public:\n  IVFIndexProvider(const IndexMeta &meta, const IVFEntity::Pointer &entity,\n                   const std::string &owner)\n      : meta_(meta), entity_(entity), owner_class_(owner) {}\n\n  IVFIndexProvider(const IVFIndexProvider &) = delete;\n  IVFIndexProvider &operator=(const IVFIndexProvider &) = delete;\n\n public:\n  //! Create a new iterator\n  virtual Iterator::Pointer create_iterator(void) override {\n    return Iterator::Pointer(new (std::nothrow) Iterator(entity_));\n  }\n\n  //! Retrieve count of vectors\n  virtual size_t count(void) const override {\n    return entity_->vector_count();\n  }\n\n  //! Retrieve dimension of vector\n  virtual size_t dimension(void) const override {\n    return meta_.dimension();\n  }\n\n  //! Retrieve type of vector\n  virtual IndexMeta::DataType data_type(void) const override {\n    return meta_.data_type();\n  }\n\n  //! Retrieve vector size in bytes\n  virtual size_t element_size(void) const override {\n    return meta_.element_size();\n  }\n\n  //! Retrieve a vector using a primary key\n  virtual const void *get_vector(uint64_t key) const override {\n    return entity_->get_vector_by_key(key);\n  }\n\n  //! Retrieve the owner class\n  virtual const std::string &owner_class(void) const override {\n    return owner_class_;\n  }\n\n private:\n  class Iterator : public IndexProvider::Iterator {\n   public:\n    Iterator(const IVFEntity::Pointer &entity) : entity_(entity) {}\n\n    //! Retrieve pointer of data\n    //! NOTICE: the vec feature will be changed after iterating to next, so\n    //! the caller need to keep a copy of it before iterator to next vector\n    virtual const void *data(void) const override {\n      return entity_->get_vector(index_);\n    }\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return index_ < entity_->vector_count();\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return entity_->get_key(index_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      ++index_;\n    }\n\n   private:\n    //! Members\n    IVFEntity::Pointer entity_;\n    size_t index_{0};\n  };\n\n private:\n  //! Members\n  const IndexMeta &meta_;\n  IVFEntity::Pointer entity_;\n  std::string owner_class_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\nstatic const std::string SEPARATOR(\"/\");\nstatic const std::string CENTROID_SEPERATOR = \"*\";\n\n// builder params\nstatic const std::string PARAM_IVF_BUILDER_CENTROID_COUNT(\n    \"proxima.ivf.builder.centroid_count\");\nstatic const std::string PARAM_IVF_BUILDER_CLUSTER_CLASS(\n    \"proxima.ivf.builder.cluster_class\");\nstatic const std::string PARAM_IVF_BUILDER_THREAD_COUNT(\n    \"proxima.ivf.builder.thread_count\");\nstatic const std::string PARAM_IVF_BUILDER_CLUSTER_AUTO_TUNING(\n    \"proxima.ivf.builder.cluster_auto_tuning\");\nstatic const std::string PARAM_IVF_BUILDER_TRAIN_SAMPLE_COUNT(\n    \"proxima.ivf.builder.train_sample_count\");\nstatic const std::string PARAM_IVF_BUILDER_TRAIN_SAMPLE_RATIO(\n    \"proxima.ivf.builder.train_sample_ratio\");\nstatic const std::string PARAM_IVF_BUILDER_CONVERTER_PARAMS(\n    \"proxima.ivf.builder.converter_params\");\nstatic const std::string PARAM_IVF_BUILDER_CONVERTER_CLASS(\n    \"proxima.ivf.builder.converter_class\");\nstatic const std::string PARAM_IVF_BUILDER_STORE_ORIGINAL_FEATURES(\n    \"proxima.ivf.builder.store_original_features\");\nstatic const std::string PARAM_IVF_BUILDER_QUANTIZER_CLASS(\n    \"proxima.ivf.builder.quantizer_class\");\nstatic const std::string PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID(\n    \"proxima.ivf.builder.quantize_by_centroid\");\nstatic const std::string PARAM_IVF_BUILDER_QUANTIZER_PARAMS(\n    \"proxima.ivf.builder.quantizer_params\");\nstatic const std::string PARAM_IVF_BUILDER_CLUSTER_PARAMS_IN_LEVEL_PREFIX(\n    \"proxima.ivf.builder.cluster_params_in_level_\");\nstatic const std::string PARAM_IVF_BUILDER_OPTIMIZER_CLASS(\n    \"proxima.ivf.builder.optimizer_class\");\nstatic const std::string PARAM_IVF_BUILDER_OPTIMIZER_PARAMS(\n    \"proxima.ivf.builder.optimizer_params\");\nstatic const std::string PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS(\n    \"proxima.ivf.builder.optimizer_quantizer_class\");\nstatic const std::string PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_PARAMS(\n    \"proxima.ivf.builder.optimizer_quantizer_params\");\nstatic const std::string PARAM_IVF_BUILDER_BLOCK_VECTOR_COUNT(\n    \"proxima.ivf.builder.block_vector_count\");\n\n// searcher params\nstatic const std::string PARAM_IVF_SEARCHER_SCAN_RATIO(\n    \"proxima.ivf.searcher.scan_ratio\");\nstatic const std::string PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD(\n    \"proxima.ivf.searcher.brute_force_threshold\");\nstatic const std::string PARAM_IVF_SEARCHER_OPTIMIZER(\n    \"proxima.ivf.searcher.optimizer\");\nstatic const std::string PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS(\n    \"proxima.ivf.searcher.optimizer_params\");\nstatic const std::string PARAM_IVF_SEARCHER_CONVERTER_REFORMER(\n    \"proxima.ivf.searcher.converter_reformer\");\n\n// Constants\nstatic constexpr char const *kIPMetricName = \"InnerProduct\";\nstatic constexpr char const *kMipsMetricName = \"MipsSquaredEuclidean\";\nstatic constexpr char const *kL2MetricName = \"SquaredEuclidean\";\nstatic constexpr char const *kMipsConverterName = \"MipsConverter\";\nstatic constexpr char const *kMipsRevConverterName = \"MipsReverseConverter\";\nstatic constexpr char const *kMipsReformerName = \"MipsReformer\";\nstatic constexpr char const *kInt8QuantizerName = \"Int8QuantizerConverter\";\nstatic constexpr char const *kInt4QuantizerName = \"Int4QuantizerConverter\";\nstatic constexpr char const *kInt8ReformerName = \"Int8QuantizerReformer\";\nstatic constexpr char const *kInt4ReformerName = \"Int4QuantizerReformer\";\nstatic constexpr float kNormalizeScaleFactor = 16.0f;\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_searcher.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_searcher.h\"\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_segment_storage.h>\n#include \"ivf_centroid_index.h\"\n#include \"ivf_index_provider.h\"\n#include \"ivf_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nint IVFSearcher::init(const ailego::Params &parameters) {\n  params_ = parameters;\n\n  params_.get(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, &bruteforce_threshold_);\n\n  searcher_state_ = STATE_INITED;\n\n  return 0;\n}\n\nint IVFSearcher::cleanup(void) {\n  this->unload();\n\n  params_.clear();\n  bruteforce_threshold_ = kDefaultBfThreshold;\n\n  searcher_state_ = STATE_INIT;\n  return 0;\n}\n\nint IVFSearcher::load(IndexStorage::Pointer container,\n                      IndexMetric::Pointer /*metric*/) {\n  if (!container) {\n    LOG_ERROR(\"Invalid container\");\n    return IndexError_InvalidArgument;\n  }\n  if (searcher_state_ != STATE_INITED) {\n    LOG_ERROR(\"Initalize the searcher first before load index\");\n    return IndexError_Runtime;\n  }\n\n  ailego::ElapsedTime timer;\n  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from container\");\n    return ret;\n  }\n\n  //! Load centroid index\n  centroid_index_ = std::make_shared<IVFCentroidIndex>();\n  if (!centroid_index_) {\n    return IndexError_NoMemory;\n  }\n  auto seg = container->get(IVF_CENTROID_SEG_ID, 0);\n  if (!seg) {\n    LOG_ERROR(\"Failed to get segment %s\", IVF_CENTROID_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  IndexStorage::Pointer seg_container =\n      std::make_shared<IndexSegmentStorage>(seg);\n  if (!seg_container) {\n    return IndexError_NoMemory;\n  }\n  ret = seg_container->open(std::string(), false);\n  if (ret != 0) {\n    LOG_ERROR(\"IndexSegmentStorage load failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  ret = centroid_index_->load(seg_container, params_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load index for %s\", IndexError::What(ret));\n    return ret;\n  }\n\n  auto reformer = centroid_index_->reformer();\n  params_.set(PARAM_IVF_SEARCHER_CONVERTER_REFORMER, reformer);\n\n  //! load iverted index\n  entity_ = std::make_shared<IVFEntity>();\n  if (!entity_) {\n    return IndexError_NoMemory;\n  }\n  ret = entity_->load(container);\n  ivf_check_error_code(ret);\n\n  magic_ = IndexContext::GenerateMagic();\n\n  stats_.set_loaded_count(entity_->vector_count());\n  stats_.set_loaded_costtime(timer.milli_seconds());\n\n  searcher_state_ = STATE_LOADED;\n  return 0;\n}\n\nint IVFSearcher::unload(void) {\n  magic_ = 0;\n  centroid_index_.reset();\n  entity_.reset();\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  stats_.clear_attributes();\n  searcher_state_ = STATE_INITED;\n\n  return 0;\n}\n\nint IVFSearcher::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                                Context::Pointer &context) const {\n  return search_bf_impl(query, qmeta, 1, context);\n}\n\nint IVFSearcher::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                                uint32_t count,\n                                Context::Pointer &context) const {\n  if (!query || qmeta.element_size() != meta_.element_size()) {\n    LOG_ERROR(\"Null query or invalid qmeta\");\n    return IndexError_InvalidArgument;\n  }\n  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());\n  if (!ctx || ctx->topk() == 0) {\n    LOG_ERROR(\"Invalid context or topk not set yet\");\n    return IndexError_InvalidArgument;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher\n    int ret = this->update_context(ctx);\n    ivf_check_error_code(ret);\n  }\n\n  ctx->reset_results(count);\n  auto &entity = ctx->entity();\n  auto &filter = ctx->filter();\n\n  //! Transform the querys for querying in inverted vector index later\n  IndexQueryMeta iv_qmeta;\n  int ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);\n  ivf_check_with_msg(ret, \"Failed to transform querys\");\n\n  // TODO: do batch search in matrix\n  for (size_t q = 0; q < count; ++q) {\n    auto &context_stats = ctx->mutable_stats(q);\n    auto &heap = ctx->mutable_result_heap();\n    heap.clear();\n    if (!filter.is_valid()) {\n      ret = entity->search(query, &heap, &context_stats);\n    } else {\n      ret = entity->search(query, filter, &heap, &context_stats);\n    }\n    ivf_check_with_msg(ret, \"Failed to search in entity for %s\",\n                       IndexError::What(ret));\n    heap.sort();  // sort the results\n    if (!filter.is_valid()) {\n      // mapping the local id to key if query without filter\n      ret = entity->retrieve_keys(&heap);\n      ivf_check_error_code(ret);\n    }\n    entity->normalize(q, &heap);\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + iv_qmeta.element_size();\n  }\n\n  return 0;\n}\n\nint IVFSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                             Context::Pointer &context) const {\n  return this->search_impl(query, qmeta, 1, context);\n}\n\nint IVFSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count, Context::Pointer &context) const {\n  if (entity_->vector_count() <= bruteforce_threshold_) {\n    return this->search_bf_impl(query, qmeta, count, context);\n  }\n  if (!query || qmeta.element_size() != meta_.element_size()) {\n    LOG_ERROR(\"Null query or invalid qmeta\");\n    return IndexError_InvalidArgument;\n  }\n\n  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());\n  if (!ctx || ctx->topk() == 0) {\n    LOG_ERROR(\"Invalid context or topk not set yet\");\n    return IndexError_InvalidArgument;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher\n    int ret = update_context(ctx);\n    ivf_check_error_code(ret);\n  }\n\n  ctx->reset_results(count);\n  auto &entity = ctx->entity();\n  auto &filter = ctx->filter();\n\n  auto &centroid_index_ctx = ctx->centroid_searcher_ctx();\n  int ret = centroid_index_->search(query, qmeta, count, centroid_index_ctx);\n  ivf_check_error_code(ret);\n\n  //! Transform the querys for querying in inverted vector index later\n  IndexQueryMeta iv_qmeta;\n  ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);\n  ivf_check_with_msg(ret, \"Failed to transform querys\");\n\n  for (size_t q = 0; q < count; ++q) {\n    auto &centroids = centroid_index_ctx->result(q);\n    auto &context_stats = ctx->mutable_stats(q);\n    auto &heap = ctx->mutable_result_heap();\n    heap.clear();\n    uint32_t total_scan_count = 0;\n    for (size_t i = 0;\n         i < centroids.size() && total_scan_count < ctx->max_scan_count();\n         ++i) {\n      auto cid = centroids[i].key();\n      uint32_t scan_count = 0;\n      if (!filter.is_valid()) {\n        ret = entity->search(cid, query, &scan_count, &heap, &context_stats);\n      } else {\n        ret = entity->search(cid, query, filter, &scan_count, &heap,\n                             &context_stats);\n      }\n      ivf_check_with_msg(ret, \"Failed to search in entity for %s\",\n                         IndexError::What(ret));\n      total_scan_count += scan_count;\n    }\n    heap.sort();  // sort the results\n    if (!filter.is_valid()) {\n      // mapping the local id to key if query without filter\n      ret = entity->retrieve_keys(&heap);\n      ivf_check_error_code(ret);\n    }\n    entity->normalize(q, &heap);\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + iv_qmeta.element_size();\n  }\n\n  return 0;\n}\n\nconst IndexSearcher::Stats &IVFSearcher::stats(void) const {\n  return stats_;\n}\n\nIndexSearcher::Context::Pointer IVFSearcher::create_context() const {\n  if (searcher_state_ != STATE_LOADED) {\n    LOG_ERROR(\"Load the index first before create context\");\n    return nullptr;\n  }\n\n  auto entity = entity_->clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone IVFEntity\");\n    return nullptr;\n  }\n\n  auto centroid_index_ctx = centroid_index_->create_context();\n  if (!centroid_index_ctx) {\n    LOG_ERROR(\"Failed to create centroid index context\");\n    return nullptr;\n  }\n\n  auto context =\n      new (std::nothrow) IVFSearcherContext(entity, centroid_index_ctx);\n  if (!context) {\n    LOG_ERROR(\"Failed to alloc IVFSearcherContext\");\n    return nullptr;\n  }\n  int ret = context->init(params_);\n  if (ret != 0) {\n    delete context;\n    return nullptr;\n  }\n\n  context->set_magic(magic_);\n\n  return Context::Pointer(context);\n}\n\nIndexProvider::Pointer IVFSearcher::create_provider(void) const {\n  if (searcher_state_ != STATE_LOADED) {\n    LOG_ERROR(\"Load the index first before create provider\");\n    return nullptr;\n  }\n\n  auto entity = entity_->clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone IVFEntity\");\n    return Provider::Pointer();\n  }\n\n  auto *provider = new (std::nothrow)\n      IVFIndexProvider(entity->has_orignal_feature() ? meta_ : entity->meta(),\n                       entity, \"IVFSearcher\");\n  if (!provider) {\n    LOG_ERROR(\"Failed to alloc IVFIndexProvider\");\n    return Provider::Pointer();\n  }\n\n  return Provider::Pointer(provider);\n}\n\nINDEX_FACTORY_REGISTER_SEARCHER(IVFSearcher);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_searcher.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_searcher.h>\n#include \"ivf_centroid_index.h\"\n#include \"ivf_entity.h\"\n#include \"ivf_searcher_context.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IVF Searcher\n */\nclass IVFSearcher : public IndexSearcher {\n public:\n  //! Initialize Searcher\n  virtual int init(const ailego::Params &parameters) override;\n\n  //! Cleanup Searcher\n  virtual int cleanup(void) override;\n\n  //! Load index from container\n  virtual int load(IndexStorage::Pointer container,\n                   IndexMetric::Pointer metric) override;\n\n  //! Unload index\n  virtual int unload(void) override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             Context::Pointer &context) const override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count,\n                             Context::Pointer &context) const override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          Context::Pointer &context) const override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          uint32_t count,\n                          Context::Pointer &context) const override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override;\n\n  //! Create a searcher context\n  virtual Context::Pointer create_context(void) const override;\n\n  //! Create a new iterator\n  virtual IndexProvider::Pointer create_provider(void) const override;\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  //! Retrieve params of index\n  virtual const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n protected:\n  int update_context(IVFSearcherContext *ctx) const {\n    auto entity = entity_->clone();\n    if (!entity) {\n      LOG_ERROR(\"Failed to clone QcEntity\");\n      return IndexError_Runtime;\n    }\n\n    //! The centroid index searcher may be different, so need to create one\n    auto centroid_ctx = centroid_index_->create_context();\n    if (!centroid_ctx) {\n      LOG_ERROR(\"Failed to create centroid index searcher context\");\n      return IndexError_Runtime;\n    }\n\n    return ctx->update_context(entity, centroid_ctx, params_, magic_);\n  }\n\n private:\n  //! Constants\n  static constexpr uint32_t kDefaultBfThreshold = 1000u;\n\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };\n\n  //! Members\n  IndexMeta meta_{};\n  ailego::Params params_{};\n  IVFCentroidIndex::Pointer centroid_index_{};\n  IVFEntity::Pointer entity_{};\n  uint32_t bruteforce_threshold_{kDefaultBfThreshold};\n  uint32_t magic_{0};\n  Stats stats_{};\n  State searcher_state_{STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_searcher_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/container/heap.h>\n#include \"ivf_entity.h\"\n#include \"ivf_utility.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IVF Searcher Context\n */\nclass IVFSearcherContext : public IndexSearcher::Context {\n public:\n  IVFSearcherContext(const IVFEntity::Pointer &ivf_entity,\n                     IndexSearcher::Context::Pointer &centroid_ctx)\n      : entity_(ivf_entity), centroid_searcher_ctx_(std::move(centroid_ctx)) {}\n\n public:\n  //! Set topk of search result\n  virtual void set_topk(uint32_t k) override {\n    topk_ = k;\n    result_heap_.limit(topk_);\n    result_heap_.set_threshold(this->threshold());\n  }\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(void) const override {\n    return results_[0];\n  }\n\n  //! Retrieve search result with index\n  virtual const IndexDocumentList &result(size_t idx) const override {\n    ailego_assert_with(results_.size() > idx, \"invalid index\");\n    return results_[idx];\n  }\n\n  //! Retrieve mutable result with index\n  virtual IndexDocumentList *mutable_result(size_t idx) override {\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    return &results_[idx];\n  }\n\n  inline IndexDocumentHeap *result_heap() {\n    return &result_heap_;\n  }\n\n  //! Update the parameters of context\n  virtual int update(const ailego::Params &params) override {\n    params.get(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD,\n               &bruteforce_threshold_);\n    params.get(PARAM_IVF_SEARCHER_SCAN_RATIO, &scan_ratio_);\n    if (scan_ratio_ <= 0.0) {\n      LOG_ERROR(\"Invalid params %s=%f\", PARAM_IVF_SEARCHER_SCAN_RATIO.c_str(),\n                scan_ratio_);\n      return IndexError_InvalidArgument;\n    }\n    size_t topk_val =\n        std::max(static_cast<uint32_t>(\n                     std::round(entity_->inverted_list_count() * scan_ratio_)),\n                 1u);\n    centroid_searcher_ctx_->set_topk(topk_val);\n    max_scan_count_ =\n        static_cast<uint32_t>(std::ceil(entity_->vector_count() * scan_ratio_));\n    max_scan_count_ = std::max(bruteforce_threshold_, max_scan_count_);\n    return 0;\n  }\n\n  //! Retrieve magic number\n  virtual uint32_t magic(void) const override {\n    return magic_;\n  }\n\n public:\n  //! Initialize the context\n  int init(const ailego::Params &params) {\n    return this->update(params);\n  }\n\n  //! Update the magic number\n  void set_magic(uint32_t mag) {\n    magic_ = mag;\n  }\n\n  //! Get Topk Value\n  uint32_t topk() const override {\n    return topk_;\n  }\n\n  //! Retrieve scan ratio\n  float scan_ratio(void) const {\n    return scan_ratio_;\n  }\n\n  //! Retrieve max scan count\n  uint32_t max_scan_count(void) const {\n    return max_scan_count_;\n  }\n\n  uint32_t bruteforce_threshold() const {\n    return bruteforce_threshold_;\n  }\n\n  //! Retrieve magic number\n  const IVFEntity::Pointer &entity() const {\n    return entity_;\n  }\n\n  //! Retrieve Mutable Query Result By Query Index\n  IndexDocumentHeap &mutable_result_heap() {\n    return result_heap_;\n  }\n\n  void set_fetch_vector(bool v) override {\n    fetch_vector_ = v;\n  }\n\n  bool fetch_vector(void) const override {\n    return fetch_vector_;\n  }\n\n  //! Reset all the query results\n  void reset_results(size_t qnum) {\n    results_.resize(qnum);\n    stats_vec_.resize(qnum);\n    for (size_t i = 0; i < qnum; ++i) {\n      results_[i].clear();\n      stats_vec_[i].clear();\n    }\n    result_heap_.clear();\n    result_heap_.limit(topk_);\n    result_heap_.set_threshold(this->threshold());\n  }\n\n  //! Update context, the context may be shared by different searcher\n  int update_context(IVFEntity::Pointer &new_entity,\n                     IndexSearcher::Context::Pointer &centroid_ctx,\n                     const ailego::Params &params, uint32_t magic_num) {\n    entity_ = new_entity;\n    centroid_searcher_ctx_ = std::move(centroid_ctx);\n    int ret = this->update(params);\n    ivf_check_error_code(ret);\n\n    magic_ = magic_num;\n\n    return 0;\n  }\n\n  //! Retrieve the centroid index context\n  IndexSearcher::Context::Pointer &centroid_searcher_ctx(void) {\n    return centroid_searcher_ctx_;\n  }\n\n  const Stats &stats(size_t idx = 0) const {\n    ailego_assert_with(stats_vec_.size() > idx, \"invalid index\");\n    return stats_vec_[idx];\n  }\n\n  Stats &mutable_stats(size_t idx = 0) {\n    ailego_assert_with(stats_vec_.size() > idx, \"invalid index\");\n    return stats_vec_[idx];\n  }\n\n  void topk_to_result(uint32_t idx) {\n    if (ailego_unlikely(result_heap_.size() == 0)) {\n      return;\n    }\n\n    ailego_assert_with(idx < results_.size(), \"invalid idx\");\n    int size = std::min(topk_, static_cast<uint32_t>(result_heap_.size()));\n    result_heap_.sort();\n    results_[idx].clear();\n    for (int i = 0; i < size; ++i) {\n      auto score = result_heap_[i].score();\n      if (score > this->threshold()) {\n        break;\n      }\n\n      key_t key = result_heap_[i].key();\n      if (fetch_vector_) {\n        IndexStorage::MemoryBlock block;\n        entity_->get_vector_by_key(key, block);\n        results_[idx].emplace_back(key, score, key, block);\n      } else {\n        results_[idx].emplace_back(key, score);\n      }\n    }\n  }\n\n private:\n  //! Constants\n  static constexpr float kDefaultScanRatio = 0.1f;\n  static constexpr uint32_t kDefaultBfThreshold = 1000u;\n\n  //! Members\n  IVFEntity::Pointer entity_{};\n  IndexSearcher::Context::Pointer centroid_searcher_ctx_{};\n  IndexDocumentHeap result_heap_;\n  std::vector<IndexDocumentList> results_{};\n  std::vector<Stats> stats_vec_{};\n\n  bool fetch_vector_{false};\n  uint32_t topk_{0};\n  uint32_t magic_{0};\n  float scan_ratio_{kDefaultScanRatio};\n  uint32_t max_scan_count_{0};\n  uint32_t bruteforce_threshold_{kDefaultBfThreshold};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_streamer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_streamer.h\"\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_segment_storage.h>\n#include \"ivf_centroid_index.h\"\n#include \"ivf_index_provider.h\"\n#include \"ivf_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nint IVFStreamer::init(const IndexMeta &meta, const ailego::Params &parameters) {\n  meta_ = meta;\n  params_ = parameters;\n\n  params_.get(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, &bruteforce_threshold_);\n\n  searcher_state_ = STATE_INITED;\n\n  return 0;\n}\n\nint IVFStreamer::cleanup(void) {\n  this->unload();\n\n  params_.clear();\n  bruteforce_threshold_ = kDefaultBfThreshold;\n\n  searcher_state_ = STATE_INIT;\n  return 0;\n}\n\nint IVFStreamer::open(IndexStorage::Pointer storage) {\n  if (!storage) {\n    LOG_ERROR(\"Invalid storage\");\n    return IndexError_InvalidArgument;\n  }\n  if (searcher_state_ != STATE_INITED) {\n    LOG_ERROR(\"Initalize the searcher first before load index\");\n    return IndexError_Runtime;\n  }\n\n  ailego::ElapsedTime timer;\n  int ret = IndexHelper::DeserializeFromStorage(storage.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize meta from storage\");\n    return ret;\n  }\n\n  //! Load centroid index\n  centroid_index_ = std::make_shared<IVFCentroidIndex>();\n  if (!centroid_index_) {\n    return IndexError_NoMemory;\n  }\n  auto seg = storage->get(IVF_CENTROID_SEG_ID, 0);\n  if (!seg) {\n    LOG_ERROR(\"Failed to get segment %s\", IVF_CENTROID_SEG_ID.c_str());\n    return IndexError_InvalidFormat;\n  }\n  IndexStorage::Pointer seg_container =\n      std::make_shared<IndexSegmentStorage>(seg);\n  if (!seg_container) {\n    return IndexError_NoMemory;\n  }\n  ret = seg_container->open(std::string(), false);\n  if (ret != 0) {\n    LOG_ERROR(\"IndexSegmentStorage load failed for %s\", IndexError::What(ret));\n    return ret;\n  }\n  ret = centroid_index_->load(seg_container, params_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load index for %s\", IndexError::What(ret));\n    return ret;\n  }\n\n  auto reformer = centroid_index_->reformer();\n  params_.set(PARAM_IVF_SEARCHER_CONVERTER_REFORMER, reformer);\n\n  //! load iverted index\n  entity_ = std::make_shared<IVFEntity>();\n  if (!entity_) {\n    return IndexError_NoMemory;\n  }\n  ret = entity_->load(storage);\n  ivf_check_error_code(ret);\n\n  magic_ = IndexContext::GenerateMagic();\n\n  stats_.set_loaded_count(entity_->vector_count());\n  stats_.set_loaded_costtime(timer.milli_seconds());\n\n  searcher_state_ = STATE_LOADED;\n  return 0;\n}\n\nint IVFStreamer::unload(void) {\n  magic_ = 0;\n  centroid_index_.reset();\n  entity_.reset();\n  stats_.set_loaded_count(0UL);\n  stats_.set_loaded_costtime(0UL);\n  stats_.clear_attributes();\n  searcher_state_ = STATE_INITED;\n\n  return 0;\n}\n\nint IVFStreamer::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                                Context::Pointer &context) const {\n  return search_bf_impl(query, qmeta, 1, context);\n}\n\nint IVFStreamer::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                                uint32_t count,\n                                Context::Pointer &context) const {\n  if (!query || qmeta.element_size() != meta_.element_size()) {\n    LOG_ERROR(\"Null query or invalid qmeta\");\n    return IndexError_InvalidArgument;\n  }\n  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());\n  if (!ctx || ctx->topk() == 0) {\n    LOG_ERROR(\"Invalid context or topk not set yet\");\n    return IndexError_InvalidArgument;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher\n    int ret = this->update_context(ctx);\n    ivf_check_error_code(ret);\n  }\n\n  ctx->reset_results(count);\n  auto &entity = ctx->entity();\n  auto &filter = ctx->filter();\n\n  //! Transform the querys for querying in inverted vector index later\n  IndexQueryMeta iv_qmeta;\n  int ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);\n  ivf_check_with_msg(ret, \"Failed to transform querys\");\n\n  // TODO: do batch search in matrix\n  for (size_t q = 0; q < count; ++q) {\n    auto &context_stats = ctx->mutable_stats(q);\n    auto &heap = ctx->mutable_result_heap();\n    heap.clear();\n    if (!filter.is_valid()) {\n      ret = entity->search(query, &heap, &context_stats);\n    } else {\n      ret = entity->search(query, filter, &heap, &context_stats);\n    }\n    ivf_check_with_msg(ret, \"Failed to search in entity for %s\",\n                       IndexError::What(ret));\n    heap.sort();  // sort the results\n    if (!filter.is_valid()) {\n      // mapping the local id to key if query without filter\n      ret = entity->retrieve_keys(&heap);\n      ivf_check_error_code(ret);\n    }\n    entity->normalize(q, &heap);\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + iv_qmeta.element_size();\n  }\n\n  return 0;\n}\n\nint IVFStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                             Context::Pointer &context) const {\n  return this->search_impl(query, qmeta, 1, context);\n}\n\nint IVFStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count, Context::Pointer &context) const {\n  if (entity_->vector_count() <= bruteforce_threshold_) {\n    return this->search_bf_impl(query, qmeta, count, context);\n  }\n  if (!query || qmeta.element_size() != meta_.element_size()) {\n    LOG_ERROR(\"Null query or invalid qmeta\");\n    return IndexError_InvalidArgument;\n  }\n\n  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());\n  if (!ctx || ctx->topk() == 0) {\n    LOG_ERROR(\"Invalid context or topk not set yet\");\n    return IndexError_InvalidArgument;\n  }\n  if (ctx->magic() != magic_) {\n    //! context is created by another searcher\n    int ret = update_context(ctx);\n    ivf_check_error_code(ret);\n  }\n\n  ctx->reset_results(count);\n  auto &entity = ctx->entity();\n  auto &filter = ctx->filter();\n\n  auto &centroid_index_ctx = ctx->centroid_searcher_ctx();\n  int ret = centroid_index_->search(query, qmeta, count, centroid_index_ctx);\n  ivf_check_error_code(ret);\n\n  //! Transform the querys for querying in inverted vector index later\n  IndexQueryMeta iv_qmeta;\n  ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);\n  ivf_check_with_msg(ret, \"Failed to transform querys\");\n\n  for (size_t q = 0; q < count; ++q) {\n    auto &centroids = centroid_index_ctx->result(q);\n    auto &context_stats = ctx->mutable_stats(q);\n    auto &heap = ctx->mutable_result_heap();\n    heap.clear();\n    uint32_t total_scan_count = 0;\n    for (size_t i = 0;\n         i < centroids.size() && total_scan_count < ctx->max_scan_count();\n         ++i) {\n      auto cid = centroids[i].key();\n      uint32_t scan_count = 0;\n      if (!filter.is_valid()) {\n        ret = entity->search(cid, query, &scan_count, &heap, &context_stats);\n      } else {\n        ret = entity->search(cid, query, filter, &scan_count, &heap,\n                             &context_stats);\n      }\n      ivf_check_with_msg(ret, \"Failed to search in entity for %s\",\n                         IndexError::What(ret));\n      total_scan_count += scan_count;\n    }\n    heap.sort();  // sort the results\n    if (!filter.is_valid()) {\n      // mapping the local id to key if query without filter\n      ret = entity->retrieve_keys(&heap);\n      ivf_check_error_code(ret);\n    }\n    entity->normalize(q, &heap);\n    ctx->topk_to_result(q);\n    query = static_cast<const char *>(query) + iv_qmeta.element_size();\n  }\n\n  return 0;\n}\n\nconst IndexSearcher::Stats &IVFStreamer::stats(void) const {\n  return stats_;\n}\n\nIndexSearcher::Context::Pointer IVFStreamer::create_context() const {\n  if (searcher_state_ != STATE_LOADED) {\n    LOG_ERROR(\"Load the index first before create context\");\n    return nullptr;\n  }\n\n  auto entity = entity_->clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone IVFEntity\");\n    return nullptr;\n  }\n\n  auto centroid_index_ctx = centroid_index_->create_context();\n  if (!centroid_index_ctx) {\n    LOG_ERROR(\"Failed to create centroid index context\");\n    return nullptr;\n  }\n\n  auto context =\n      new (std::nothrow) IVFSearcherContext(entity, centroid_index_ctx);\n  if (!context) {\n    LOG_ERROR(\"Failed to alloc IVFSearcherContext\");\n    return nullptr;\n  }\n  int ret = context->init(params_);\n  if (ret != 0) {\n    delete context;\n    return nullptr;\n  }\n\n  context->set_magic(magic_);\n\n  return Context::Pointer(context);\n}\n\nIndexProvider::Pointer IVFStreamer::create_provider(void) const {\n  if (searcher_state_ != STATE_LOADED) {\n    LOG_ERROR(\"Load the index first before create provider\");\n    return nullptr;\n  }\n\n  auto entity = entity_->clone();\n  if (!entity) {\n    LOG_ERROR(\"Failed to clone IVFEntity\");\n    return Provider::Pointer();\n  }\n\n  auto *provider = new (std::nothrow)\n      IVFIndexProvider(entity->has_orignal_feature() ? meta_ : entity->meta(),\n                       entity, \"IVFStreamer\");\n  if (!provider) {\n    LOG_ERROR(\"Failed to alloc IVFIndexProvider\");\n    return Provider::Pointer();\n  }\n\n  return Provider::Pointer(provider);\n}\n\nINDEX_FACTORY_REGISTER_STREAMER(IVFStreamer);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_streamer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#ifndef __IVF_STREAMER_H__\n#define __IVF_STREAMER_H__\n\n#include <zvec/core/framework/index_streamer.h>\n#include \"ivf_centroid_index.h\"\n#include \"ivf_entity.h\"\n#include \"ivf_searcher_context.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! IVF Searcher\n */\nclass IVFStreamer : public IndexStreamer {\n public:\n  //! Initialize Searcher\n  virtual int init(const IndexMeta & /*meta*/,\n                   const ailego::Params & /*params*/) override;\n\n  //! Cleanup Searcher\n  virtual int cleanup(void) override;\n\n  //! Load index from container\n  virtual int open(IndexStorage::Pointer storage) override;\n\n  virtual int flush(uint64_t /*check_point*/) override {\n    return 0;\n  }\n  virtual int close(void) override {\n    return this->unload();\n  }\n\n  //! Unload index\n  virtual int unload(void) override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             Context::Pointer &context) const override;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                             uint32_t count,\n                             Context::Pointer &context) const override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          Context::Pointer &context) const override;\n\n  //! Similarity search\n  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                          uint32_t count,\n                          Context::Pointer &context) const override;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const override;\n\n  //! Create a searcher context\n  virtual Context::Pointer create_context(void) const override;\n\n  //! Create a new iterator\n  virtual IndexProvider::Pointer create_provider(void) const override;\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  virtual int get_vector_by_id(\n      const uint32_t id, IndexStorage::MemoryBlock &block) const override {\n    return entity_->get_vector_by_key(id, block);\n  }\n\n protected:\n  int update_context(IVFSearcherContext *ctx) const {\n    auto entity = entity_->clone();\n    if (!entity) {\n      LOG_ERROR(\"Failed to clone QcEntity\");\n      return IndexError_Runtime;\n    }\n\n    //! The centroid index searcher may be different, so need to create one\n    auto centroid_ctx = centroid_index_->create_context();\n    if (!centroid_ctx) {\n      LOG_ERROR(\"Failed to create centroid index searcher context\");\n      return IndexError_Runtime;\n    }\n\n    return ctx->update_context(entity, centroid_ctx, params_, magic_);\n  }\n\n private:\n  //! Constants\n  static constexpr uint32_t kDefaultBfThreshold = 1000u;\n\n  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };\n\n  //! Members\n  IndexMeta meta_{};\n  ailego::Params params_{};\n  IndexBuilder::Pointer builder_;\n  IVFCentroidIndex::Pointer centroid_index_{};\n  IVFEntity::Pointer entity_{};\n  uint32_t bruteforce_threshold_{kDefaultBfThreshold};\n  uint32_t magic_{0};\n  Stats stats_{};\n  State searcher_state_{STATE_INIT};\n};\n\n}  // namespace core\n}  // namespace zvec\n\n#endif  //__IVF_STREAMER_H__\n"
  },
  {
    "path": "src/core/algorithm/ivf/ivf_utility.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <algorithm>\n#include <memory>\n#include <mutex>\n#include <numeric>\n#include <vector>\n#include <ailego/utility/matrix_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nnamespace zvec {\nnamespace core {\n\n#ifndef ivf_check_error_code\n#define ivf_check_error_code(code) \\\n  if (ailego_unlikely((code) != 0)) return code\n#endif\n\n#ifndef ivf_assert\n#define ivf_assert(cond, code) \\\n  if (ailego_unlikely(!(cond))) return code\n#endif\n\n#ifndef ivf_check_with_msg\n#define ivf_check_with_msg(code, fmt, args...) \\\n  do {                                         \\\n    if (ailego_unlikely((code) != 0)) {        \\\n      LOG_ERROR(fmt, ##args);                  \\\n      return code;                             \\\n    }                                          \\\n  } while (0)\n#endif\n\n#ifndef ivf_assert_with_msg\n#define ivf_assert_with_msg(cond, err, fmt, args...) \\\n  do {                                               \\\n    if (ailego_unlikely(!(cond))) {                  \\\n      LOG_ERROR(fmt, ##args);                        \\\n      return err;                                    \\\n    }                                                \\\n  } while (0)\n#endif\n\n/*! Quantized Clustering Utility\n */\nclass IVFUtility {\n public:\n  //! Generator a random path with specificed prefix\n  static inline std::string GenerateRandomPath(const std::string &prefix) {\n    uint64_t timestamp = ailego::Monotime::MicroSeconds();\n    return prefix + std::to_string(timestamp);\n  }\n\n  //! Compute the default scan ratio for total vectors\n  static inline float ComputeScanRatio(size_t vector_count) {\n    // the fitting function for the follow points: 1000000(0.02)\n    // 10000000(0.01) 50000000(0.005) 100000000(0.001)\n    float scan_ratio = -0.004 * std::log(vector_count) + 0.0751;\n    scan_ratio = std::max(scan_ratio, 0.0001f);\n    return scan_ratio;\n  }\n\n  //! Transpose the vectors in row major order to column major order\n  static inline void Transpose(size_t align_size, const void *src, size_t m,\n                               size_t dim, void *dst);\n\n  //! Transpose the vectors in column major order to row major order\n  static inline void ReverseTranspose(size_t align_size, const void *src,\n                                      size_t m, size_t dim, void *dst);\n\n  //! Aligned size of a block vectors buffer\n  static inline size_t AlignedSize(size_t fnum, size_t element_size);\n\n  //! Aligned size of one vector buffer\n  static inline size_t AlignedSize(size_t element_size);\n\n  //! Sort arr with size in ascending order, and keep the index postion\n  //! n2o keep the mapping: new position => origin postion\n  //! For example, the input arr = [5, 3, 9, 6, 7], size = 5, after sort\n  //      arr = [3, 5, 6, 7, 9]\n  //      n2o = [1, 0, 3, 4, 2]\n  //! To save memory, no extra memory is allocated\n  template <typename T, typename I>\n  static void Sort(T *arr, std::vector<I> *n2o, size_t size) {\n    std::vector<I> o2n;\n    o2n.resize(size);\n    n2o->resize(size);\n\n    std::iota(n2o->begin(), n2o->end(), 0U);\n    std::sort(n2o->begin(), n2o->end(),\n              [&](I i, I j) { return arr[i] < arr[j]; });\n    for (I i = 0U; i < size; ++i) {\n      o2n[(*n2o)[i]] = i;\n    }\n    //! reorder arr in place, according to given n2o index\n    for (I i = 0; i < size; ++i) {\n      if (i != (*n2o)[i]) {\n        T tmp = arr[i];\n        I j = i, k;\n        while (i != (k = (*n2o)[j])) {\n          arr[j] = arr[k];\n          (*n2o)[j] = j;\n          j = k;\n        }\n        arr[j] = tmp;\n        (*n2o)[j] = j;\n      }\n    }\n\n    for (I i = 0U; i < size; ++i) {\n      (*n2o)[o2n[i]] = i;\n    }\n  }\n\n  //! Transpose one vector in block\n  template <typename T>\n  static inline void TransposeOne(const void *src, size_t M, size_t N,\n                                  void *dst) {\n    for (size_t i = 0; i < N; ++i) {\n      reinterpret_cast<T *>(dst)[i] = reinterpret_cast<const T *>(src)[i * M];\n    }\n  }\n};\n\nvoid IVFUtility::Transpose(size_t align_size, const void *src, size_t m,\n                           size_t dim, void *dst) {\n  switch (align_size) {\n    case 2:\n      ailego::MatrixHelper::Transpose<uint16_t>(src, m, dim, dst);\n      break;\n    case 4:\n      ailego::MatrixHelper::Transpose<uint32_t>(src, m, dim, dst);\n      break;\n    case 8:\n      ailego::MatrixHelper::Transpose<uint64_t>(src, m, dim, dst);\n      break;\n  }\n}\n\nvoid IVFUtility::ReverseTranspose(size_t align_size, const void *src, size_t m,\n                                  size_t dim, void *dst) {\n  switch (align_size) {\n    case 2:\n      ailego::MatrixHelper::ReverseTranspose<uint16_t>(src, m, dim, dst);\n      break;\n    case 4:\n      ailego::MatrixHelper::ReverseTranspose<uint32_t>(src, m, dim, dst);\n      break;\n    case 8:\n      ailego::MatrixHelper::ReverseTranspose<uint64_t>(src, m, dim, dst);\n      break;\n  }\n}\n\nsize_t IVFUtility::AlignedSize(size_t fnum, size_t element_size) {\n  return ailego_align(fnum * element_size, 32);\n}\n\nsize_t IVFUtility::AlignedSize(size_t element_size) {\n  return ailego_align(element_size, 32);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_framework \n    STATIC STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS zvec_ailego\n    INCS . ${PROJECT_ROOT_DIR}/src/core\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/framework/index_cluster.cc",
    "content": "// namespace aitheta2\n// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/core/framework/index_bundle.h>\n#include <zvec/core/framework/index_cluster.h>\n#include <zvec/core/framework/index_error.h>\n\nnamespace zvec {\nnamespace core {\n\nstatic const std::string CLUSTER_CENTROIDS_FEATURES_NAME =\n    \"IndexCluster.Centroids.Features\";\nstatic const std::string CLUSTER_CENTROIDS_INDEXES_NAME =\n    \"IndexCluster.Centroids.Indexes\";\n\n/*! Item Centroid Format\n */\nstruct ItemCentroidFormat {\n  uint32_t parent;\n  uint32_t reserved0_;\n  uint64_t follows;\n  double score;\n  uint64_t reserved1_;\n};\n\nstatic inline bool GatherSubitemsCount(const ItemCentroidFormat *format,\n                                       size_t count,\n                                       std::vector<uint32_t> *out) {\n  out->resize(count + 1);\n\n  for (const ItemCentroidFormat *it = format, *end = format + count; it != end;\n       ++it) {\n    uint32_t parent = it->parent + 1;\n    if (parent > count) {\n      return false;\n    }\n    (*out)[parent] += 1;\n  }\n  return (out->front() != 0);\n}\n\nint IndexCluster::Deserialize(const IndexMeta &meta,\n                              IndexBundle::Pointer bundle,\n                              CentroidList *cents) {\n  if (!bundle || !cents) {\n    return IndexError_InvalidArgument;\n  }\n\n  ailego::BlobWrap features = bundle->get(CLUSTER_CENTROIDS_FEATURES_NAME);\n  ailego::BlobWrap indexes = bundle->get(CLUSTER_CENTROIDS_INDEXES_NAME);\n\n  if (!features.is_valid() || !indexes.is_valid()) {\n    return IndexError_InvalidArgument;\n  }\n\n  if (features.size() % meta.element_size() != 0 ||\n      indexes.size() % sizeof(ItemCentroidFormat) != 0) {\n    return IndexError_InvalidLength;\n  }\n\n  size_t count = features.size() / meta.element_size();\n  if (indexes.size() / sizeof(ItemCentroidFormat) != count) {\n    return IndexError_InvalidLength;\n  }\n\n  const ItemCentroidFormat *format =\n      reinterpret_cast<const ItemCentroidFormat *>(indexes.buffer());\n  std::vector<uint32_t> subitems;\n\n  if (!GatherSubitemsCount(format, count, &subitems)) {\n    return IndexError_InvalidFormat;\n  }\n\n  std::vector<Centroid *> items;\n  items.reserve(count);\n  cents->clear();\n  cents->reserve(subitems.front());\n\n  const uint8_t *feat = reinterpret_cast<const uint8_t *>(features.buffer());\n  size_t feat_size = meta.element_size();\n\n  for (size_t i = 0; i < count; ++i, ++format, feat += feat_size) {\n    CentroidList *current = cents;\n\n    if (format->parent != static_cast<uint32_t>(-1)) {\n      if (format->parent >= items.size()) {\n        return IndexError_InvalidFormat;\n      }\n      current = items[format->parent]->mutable_subitems();\n    }\n    current->emplace_back(feat, feat_size);\n\n    // Update information\n    Centroid *last_one = &(current->back());\n    last_one->set_follows(static_cast<size_t>(format->follows));\n    last_one->set_score(format->score);\n    last_one->mutable_subitems()->reserve(subitems[i + 1]);\n    items.push_back(last_one);\n  }\n  return 0;\n}\n\nstatic void SerializeToBuffers(const IndexCluster::CentroidList &cents,\n                               std::string *features, std::string *indexes) {\n  uint32_t parent =\n      static_cast<uint32_t>(indexes->size() / sizeof(ItemCentroidFormat)) - 1;\n\n  for (const auto &it : cents) {\n    ItemCentroidFormat format{parent, 0, it.follows(), it.score(), 0};\n    indexes->append(reinterpret_cast<const char *>(&format), sizeof(format));\n    features->append(reinterpret_cast<const char *>(it.feature()), it.size());\n\n    if (!it.subitems().empty()) {\n      SerializeToBuffers(it.subitems(), features, indexes);\n    }\n  }\n}\n\nint IndexCluster::Serialize(const IndexMeta &meta, const CentroidList &cents,\n                            IndexBundle::Pointer *out) {\n  size_t cents_total = cents.size();\n\n  // Check the centroids\n  for (const auto &it : cents) {\n    if (!it.is_matched(meta)) {\n      return IndexError_Mismatch;\n    }\n    cents_total += it.subcount();\n  }\n\n  std::string features, indexes;\n  features.reserve(cents_total * meta.element_size());\n  indexes.reserve(cents_total * sizeof(ItemCentroidFormat));\n  SerializeToBuffers(cents, &features, &indexes);\n\n  std::shared_ptr<MemoryIndexBundle> bundle =\n      std::make_shared<MemoryIndexBundle>();\n\n  bundle->set(CLUSTER_CENTROIDS_FEATURES_NAME, std::move(features));\n  bundle->set(CLUSTER_CENTROIDS_INDEXES_NAME, std::move(indexes));\n  *out = std::move(bundle);\n\n  return 0;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_context.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <atomic>\n#include <random>\n#include <zvec/core/framework/index_context.h>\n\nnamespace zvec {\nnamespace core {\n\nuint32_t IndexContext::GenerateMagic(void) {\n  static std::atomic_uint32_t magic_number{std::random_device()()};\n  return magic_number.fetch_add(1);\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/core/framework/index_converter.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_helper.h>\n\nnamespace zvec {\nnamespace core {\n\nint IndexConverter::TrainAndTransform(const IndexConverter::Pointer &converter,\n                                      IndexHolder::Pointer holder) {\n  auto two_pass_holder = IndexHelper::MakeTwoPassHolder(std::move(holder));\n  int ret = converter->train(two_pass_holder);\n  if (ret == 0) {\n    ret = converter->transform(std::move(two_pass_holder));\n  }\n  return ret;\n}\n\nint IndexConverter::TrainTransformAndDump(\n    const IndexConverter::Pointer &converter, IndexHolder::Pointer holder,\n    const IndexDumper::Pointer &dumper) {\n  int ret = IndexConverter::TrainAndTransform(converter, std::move(holder));\n  if (ret == 0) {\n    ret = converter->dump(dumper);\n  }\n  return ret;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_error.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <zvec/core/framework/index_error.h>\n\nnamespace zvec {\nnamespace core {\n\nINDEX_ERROR_CODE_DEFINE(Success, 0, \"Success\");\nINDEX_ERROR_CODE_DEFINE(Runtime, 1, \"Runtime error\");\nINDEX_ERROR_CODE_DEFINE(Logic, 2, \"Logic error\");\nINDEX_ERROR_CODE_DEFINE(Type, 3, \"Type error\");\nINDEX_ERROR_CODE_DEFINE(System, 4, \"System call error\");\nINDEX_ERROR_CODE_DEFINE(Cast, 5, \"Cast error\");\nINDEX_ERROR_CODE_DEFINE(IO, 6, \"IO error\");\nINDEX_ERROR_CODE_DEFINE(AuthExpired, 7, \"Auth expired error\");\n\nINDEX_ERROR_CODE_DEFINE(NotImplemented, 11, \"Not implemented\");\nINDEX_ERROR_CODE_DEFINE(Unsupported, 12, \"Unsupported\");\nINDEX_ERROR_CODE_DEFINE(Denied, 13, \"Permission denied\");\nINDEX_ERROR_CODE_DEFINE(Canceled, 14, \"Operation canceled\");\nINDEX_ERROR_CODE_DEFINE(Overflow, 15, \"Overflow\");\nINDEX_ERROR_CODE_DEFINE(Underflow, 16, \"Underflow\");\nINDEX_ERROR_CODE_DEFINE(OutOfRange, 17, \"Out of range\");\nINDEX_ERROR_CODE_DEFINE(NoBuffer, 18, \"No buffer space available\");\nINDEX_ERROR_CODE_DEFINE(NoMemory, 19, \"Not enough space\");\nINDEX_ERROR_CODE_DEFINE(NoParamFound, 20, \"No parameter found\");\nINDEX_ERROR_CODE_DEFINE(NoReady, 21, \"No ready\");\nINDEX_ERROR_CODE_DEFINE(NoExist, 22, \"No exist\");\nINDEX_ERROR_CODE_DEFINE(Exist, 23, \"Already exist\");\nINDEX_ERROR_CODE_DEFINE(Mismatch, 24, \"Mismatch\");\nINDEX_ERROR_CODE_DEFINE(Duplicate, 25, \"Duplicate\");\nINDEX_ERROR_CODE_DEFINE(Uninitialized, 26, \"Uninitialized\");\n\nINDEX_ERROR_CODE_DEFINE(InvalidArgument, 31, \"Invalid argument\");\nINDEX_ERROR_CODE_DEFINE(InvalidFormat, 32, \"Invalid format\");\nINDEX_ERROR_CODE_DEFINE(InvalidLength, 33, \"Invalid length\");\nINDEX_ERROR_CODE_DEFINE(InvalidChecksum, 34, \"Invalid checksum\");\nINDEX_ERROR_CODE_DEFINE(InvalidValue, 35, \"Invalid value\");\n\nINDEX_ERROR_CODE_DEFINE(CreateDirectory, 101, \"Create directory error\");\nINDEX_ERROR_CODE_DEFINE(OpenDirectory, 102, \"Open directory error\");\nINDEX_ERROR_CODE_DEFINE(Serialize, 105, \"Serialize error\");\nINDEX_ERROR_CODE_DEFINE(Deserialize, 106, \"Deserialize error\");\nINDEX_ERROR_CODE_DEFINE(CreateFile, 111, \"Create file error\");\nINDEX_ERROR_CODE_DEFINE(OpenFile, 112, \"Open file error\");\nINDEX_ERROR_CODE_DEFINE(SeekFile, 113, \"Seek file error\");\nINDEX_ERROR_CODE_DEFINE(CloseFile, 114, \"Close file error\");\nINDEX_ERROR_CODE_DEFINE(TruncateFile, 115, \"TruncateFile file error\");\nINDEX_ERROR_CODE_DEFINE(MMapFile, 116, \"MMap file error\");\nINDEX_ERROR_CODE_DEFINE(FlushFile, 117, \"Flush file error\");\nINDEX_ERROR_CODE_DEFINE(WriteData, 121, \"Write data error\");\nINDEX_ERROR_CODE_DEFINE(ReadData, 122, \"Read data error\");\n\nINDEX_ERROR_CODE_DEFINE(PackIndex, 201, \"Read data error\");\nINDEX_ERROR_CODE_DEFINE(UnpackIndex, 202, \"Read data error\");\nINDEX_ERROR_CODE_DEFINE(IndexLoaded, 203, \"Index loaded\");\nINDEX_ERROR_CODE_DEFINE(NoIndexLoaded, 204, \"No index loaded\");\nINDEX_ERROR_CODE_DEFINE(NoTrained, 205, \"No trained\");\nINDEX_ERROR_CODE_DEFINE(IndexFull, 206, \"Index full\");\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_factory.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/core/framework/index_factory.h>\n\nnamespace zvec {\nnamespace core {\n\nIndexMetric::Pointer IndexFactory::CreateMetric(const std::string &name) {\n  IndexMetric::Pointer obj =\n      ailego::Factory<IndexMetric>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasMetric(const std::string &name) {\n  return ailego::Factory<IndexMetric>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllMetrics(void) {\n  return ailego::Factory<IndexMetric>::Classes();\n}\n\nIndexLogger::Pointer IndexFactory::CreateLogger(const std::string &name) {\n  IndexLogger::Pointer obj =\n      ailego::Factory<IndexLogger>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasLogger(const std::string &name) {\n  return ailego::Factory<IndexLogger>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllLoggers(void) {\n  return ailego::Factory<IndexLogger>::Classes();\n}\n\nIndexDumper::Pointer IndexFactory::CreateDumper(const std::string &name) {\n  IndexDumper::Pointer obj =\n      ailego::Factory<IndexDumper>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasDumper(const std::string &name) {\n  return ailego::Factory<IndexDumper>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllDumpers(void) {\n  return ailego::Factory<IndexDumper>::Classes();\n}\n\nIndexStorage::Pointer IndexFactory::CreateStorage(const std::string &name) {\n  IndexStorage::Pointer obj =\n      ailego::Factory<IndexStorage>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasStorage(const std::string &name) {\n  return ailego::Factory<IndexStorage>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllStorages(void) {\n  return ailego::Factory<IndexStorage>::Classes();\n}\n\nIndexConverter::Pointer IndexFactory::CreateConverter(const std::string &name) {\n  IndexConverter::Pointer obj =\n      ailego::Factory<IndexConverter>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasConverter(const std::string &name) {\n  return ailego::Factory<IndexConverter>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllConverters(void) {\n  return ailego::Factory<IndexConverter>::Classes();\n}\n\nIndexReformer::Pointer IndexFactory::CreateReformer(const std::string &name) {\n  IndexReformer::Pointer obj =\n      ailego::Factory<IndexReformer>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasReformer(const std::string &name) {\n  return ailego::Factory<IndexReformer>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllReformers(void) {\n  return ailego::Factory<IndexReformer>::Classes();\n}\n\nIndexTrainer::Pointer IndexFactory::CreateTrainer(const std::string &name) {\n  IndexTrainer::Pointer obj =\n      ailego::Factory<IndexTrainer>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasTrainer(const std::string &name) {\n  return ailego::Factory<IndexTrainer>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllTrainers(void) {\n  return ailego::Factory<IndexTrainer>::Classes();\n}\n\nIndexBuilder::Pointer IndexFactory::CreateBuilder(const std::string &name) {\n  IndexBuilder::Pointer obj =\n      ailego::Factory<IndexBuilder>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasBuilder(const std::string &name) {\n  return ailego::Factory<IndexBuilder>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllBuilders(void) {\n  return ailego::Factory<IndexBuilder>::Classes();\n}\n\nIndexSearcher::Pointer IndexFactory::CreateSearcher(const std::string &name) {\n  IndexSearcher::Pointer obj =\n      ailego::Factory<IndexSearcher>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasSearcher(const std::string &name) {\n  return ailego::Factory<IndexSearcher>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllSearchers(void) {\n  return ailego::Factory<IndexSearcher>::Classes();\n}\n\nIndexStreamer::Pointer IndexFactory::CreateStreamer(const std::string &name) {\n  IndexStreamer::Pointer obj =\n      ailego::Factory<IndexStreamer>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasStreamer(const std::string &name) {\n  return ailego::Factory<IndexStreamer>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllStreamers(void) {\n  return ailego::Factory<IndexStreamer>::Classes();\n}\n\nIndexReducer::Pointer IndexFactory::CreateReducer(const std::string &name) {\n  IndexReducer::Pointer obj =\n      ailego::Factory<IndexReducer>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasReducer(const std::string &name) {\n  return ailego::Factory<IndexReducer>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllReducers(void) {\n  return ailego::Factory<IndexReducer>::Classes();\n}\n\n\nIndexCluster::Pointer IndexFactory::CreateCluster(const std::string &name) {\n  IndexCluster::Pointer obj =\n      ailego::Factory<IndexCluster>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasCluster(const std::string &name) {\n  return ailego::Factory<IndexCluster>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllClusters(void) {\n  return ailego::Factory<IndexCluster>::Classes();\n}\n\nIndexStreamerReducer::Pointer IndexFactory::CreateStreamerReducer(\n    const std::string &name) {\n  IndexStreamerReducer::Pointer obj =\n      ailego::Factory<IndexStreamerReducer>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasStreamerReducer(const std::string &name) {\n  return ailego::Factory<IndexStreamerReducer>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllStreamerReducers(void) {\n  return ailego::Factory<IndexStreamerReducer>::Classes();\n}\n\nIndexRefiner::Pointer IndexFactory::CreateRefiner(const std::string &name) {\n  IndexRefiner::Pointer obj =\n      ailego::Factory<IndexRefiner>::MakeShared(name.c_str());\n  if (obj) {\n    obj->set_name(name);\n  }\n  return obj;\n}\n\nbool IndexFactory::HasRefiner(const std::string &name) {\n  return ailego::Factory<IndexRefiner>::Has(name.c_str());\n}\n\nstd::vector<std::string> IndexFactory::AllRefiners(void) {\n  return ailego::Factory<IndexRefiner>::Classes();\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_flow.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_flow.h>\n#include <zvec/core/framework/index_helper.h>\n\n//! Default storage\n#define INDEX_FLOW_STORAGE_DEFAULT \"MMapFileReadStorage\"\n\nnamespace zvec {\nnamespace core {\n\n// Index Flow\nint IndexFlow::set_storage(const std::string &name,\n                           const ailego::Params &params) {\n  storage_ = IndexFactory::CreateStorage(name);\n  if (!storage_) {\n    LOG_ERROR(\"Failed to create a index storage with name: %s\", name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = storage_->init(params);\n  if (ret < 0) {\n    storage_ = nullptr;\n    LOG_ERROR(\"Failed to initialize index storage %s\", name.c_str());\n    return ret;\n  }\n  return 0;\n}\n\nint IndexFlow::set_searcher(IndexSearcher::Pointer searcher) {\n  user_searcher_ = searcher;\n\n  return 0;\n}\n\nint IndexFlow::set_searcher(const std::string &name,\n                            const ailego::Params &params) {\n  user_searcher_ = IndexFactory::CreateSearcher(name);\n  if (!user_searcher_) {\n    LOG_ERROR(\"Failed to create a index searcher with name: %s\", name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = user_searcher_->init(params);\n  if (ret < 0) {\n    user_searcher_ = nullptr;\n    LOG_ERROR(\"Failed to initialize index searcher %s\", name.c_str());\n    return ret;\n  }\n  return 0;\n}\n\nint IndexFlow::set_reformer(const std::string &name,\n                            const ailego::Params &params) {\n  user_reformer_ = IndexFactory::CreateReformer(name);\n  if (!user_reformer_) {\n    LOG_ERROR(\"Failed to create a index reformer with name: %s\", name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = user_reformer_->init(params);\n  if (ret < 0) {\n    user_reformer_ = nullptr;\n    LOG_ERROR(\"Failed to initialize index reformer %s\", name.c_str());\n    return ret;\n  }\n  return 0;\n}\n\nint IndexFlow::set_metric(const std::string &name,\n                          const ailego::Params &params) {\n  if (!IndexFactory::HasMetric(name)) {\n    LOG_ERROR(\"The index metric with name %s does not exist.\", name.c_str());\n    return IndexError_NoExist;\n  }\n  user_metric_name_ = name;\n  user_metric_params_ = params;\n  return 0;\n}\n\nint IndexFlow::load(const std::string &path) {\n  // Prepare storage\n  if (!storage_) {\n    this->set_storage(INDEX_FLOW_STORAGE_DEFAULT, ailego::Params());\n  }\n\n  if (!storage_) {\n    LOG_ERROR(\"The index storage is uninitialized.\");\n    return IndexError_Uninitialized;\n  }\n\n  int ret = storage_->open(path, false);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load index with storage %s\", storage_->name().c_str());\n    return ret;\n  }\n\n  ret = IndexHelper::DeserializeFromStorage(storage_.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize index meta with storage %s\",\n              storage_->name().c_str());\n    return ret;\n  }\n\n  ret = load_internal();\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load index with storage %s\", storage_->name().c_str());\n    return ret;\n  }\n\n  return 0;\n}\n\nint IndexFlow::load_internal() {\n  // Prepare metric\n  const std::string &metric_name =\n      user_metric_name_.empty() ? meta_.metric_name() : user_metric_name_;\n  const ailego::Params &metric_params =\n      user_metric_name_.empty() ? meta_.metric_params() : user_metric_params_;\n  if (metric_name.empty()) {\n    LOG_ERROR(\"The metric name from index file is empty.\");\n    return IndexError_NoExist;\n  }\n  metric_ = IndexFactory::CreateMetric(metric_name);\n  if (!metric_) {\n    LOG_ERROR(\"Failed to create a index metric with name: %s\",\n              metric_name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = metric_->init(meta_, metric_params);\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to initialize index metric %s\", metric_name.c_str());\n    metric_ = nullptr;\n    return ret;\n  }\n  if (!metric_->is_matched(meta_)) {\n    LOG_ERROR(\"The index meta is unmatched for index metric %s\",\n              metric_->name().c_str());\n    return IndexError_Mismatch;\n  }\n  auto query_metric = metric_->query_metric();\n  if (query_metric) {\n    metric_ = query_metric;\n  }\n\n  // Prepare reformer\n  if (!user_reformer_) {\n    const std::string &reformer_name = meta_.reformer_name();\n    if (!reformer_name.empty()) {\n      reformer_ = IndexFactory::CreateReformer(reformer_name);\n      if (!reformer_) {\n        LOG_ERROR(\"Failed to create a index reformer with name: %s\",\n                  reformer_name.c_str());\n        return IndexError_NoExist;\n      }\n      ret = reformer_->init(meta_.reformer_params());\n      if (ret < 0) {\n        LOG_ERROR(\"Failed to initialize index reformer %s\",\n                  reformer_name.c_str());\n        reformer_ = nullptr;\n        return ret;\n      }\n    }\n  } else {\n    // Using user reformer\n    reformer_ = user_reformer_;\n  }\n\n  if (reformer_) {\n    ret = reformer_->load(storage_);\n    if (ret < 0) {\n      LOG_ERROR(\"Failed to load index with reformer %s, storage %s\",\n                reformer_->name().c_str(), storage_->name().c_str());\n      return ret;\n    }\n  }\n\n  // Prepare searcher\n  if (!user_searcher_) {\n    const std::string &name = meta_.searcher_name();\n    if (name.empty()) {\n      LOG_ERROR(\"The searcher name from index file is empty.\");\n      return IndexError_NoExist;\n    }\n    searcher_ = IndexFactory::CreateSearcher(name);\n    if (!searcher_) {\n      LOG_ERROR(\"Failed to create a index searcher with name: %s\",\n                name.c_str());\n      return IndexError_NoExist;\n    }\n    ret = searcher_->init(meta_.searcher_params());\n    if (ret < 0) {\n      LOG_ERROR(\"Failed to initialize index searcher %s\", name.c_str());\n      searcher_ = nullptr;\n      return ret;\n    }\n  } else {\n    // Using user searcher\n    searcher_ = user_searcher_;\n  }\n\n  ret = searcher_->load(storage_, metric_);\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to load index with searcher %s, storage %s, metric %s\",\n              searcher_->name().c_str(), storage_->name().c_str(),\n              metric_->name().c_str());\n    return ret;\n  }\n\n  // searcher_->print_all_neighbour();\n\n  return 0;\n}\n\nint IndexFlow::unload(void) {\n  if (searcher_) {\n    int ret = searcher_->unload();\n    if (ret < 0) {\n      LOG_WARN(\"Unload index searcher %s error, %d\", searcher_->name().c_str(),\n               ret);\n    }\n    searcher_ = nullptr;\n  }\n  if (reformer_) {\n    int ret = reformer_->unload();\n    if (ret < 0) {\n      LOG_WARN(\"Unload index reformer %s error, %d\", reformer_->name().c_str(),\n               ret);\n    }\n    reformer_ = nullptr;\n  }\n  if (metric_) {\n    int ret = metric_->cleanup();\n    if (ret < 0) {\n      LOG_WARN(\"Cleanup index metric %s error, %d\", metric_->name().c_str(),\n               ret);\n    }\n    metric_ = nullptr;\n  }\n  if (storage_) {\n    int ret = storage_->cleanup();\n    if (ret < 0) {\n      LOG_WARN(\"Unload index searcher %s error, %d\", storage_->name().c_str(),\n               ret);\n    }\n    storage_ = nullptr;\n  }\n  return 0;\n}\n\nint IndexFlow::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                              Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    IndexQueryMeta new_qmeta;\n    error_code = reformer_->transform(query, qmeta, context->mutable_features(),\n                                      &new_qmeta);\n    if (error_code == 0) {\n      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n        return IndexError_Mismatch;\n      }\n      error_code = searcher_->search_bf_impl(\n          reinterpret_cast<const void *>(context->features().data()), new_qmeta,\n          context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code =\n        searcher_->search_bf_impl(query, qmeta, context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (auto &it : const_cast<IndexDocumentList &>(\n               context->searcher_context()->result())) {\n        metric_->normalize(it.mutable_score());\n      }\n    }\n    if (reformer_) {\n      error_code =\n          reformer_->normalize(query, qmeta,\n                               const_cast<IndexDocumentList &>(\n                                   context->searcher_context()->result()));\n    }\n  }\n  return error_code;\n}\n\nint IndexFlow::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                           Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    IndexQueryMeta new_qmeta;\n    error_code = reformer_->transform(query, qmeta, context->mutable_features(),\n                                      &new_qmeta);\n    if (error_code == 0) {\n      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n        return IndexError_Mismatch;\n      }\n      error_code = searcher_->search_impl(\n          reinterpret_cast<const void *>(context->features().data()), new_qmeta,\n          context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code =\n        searcher_->search_impl(query, qmeta, context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (auto &it : const_cast<IndexDocumentList &>(\n               context->searcher_context()->result())) {\n        metric_->normalize(it.mutable_score());\n      }\n    }\n    if (reformer_) {\n      error_code =\n          reformer_->normalize(query, qmeta,\n                               const_cast<IndexDocumentList &>(\n                                   context->searcher_context()->result()));\n    }\n  }\n  return error_code;\n}\n\nint IndexFlow::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                              uint32_t count, Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !count || !context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    IndexQueryMeta new_qmeta;\n    error_code = reformer_->transform(query, qmeta, count,\n                                      context->mutable_features(), &new_qmeta);\n    if (error_code == 0) {\n      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n        return IndexError_Mismatch;\n      }\n      error_code = searcher_->search_bf_impl(\n          reinterpret_cast<const void *>(context->features().data()), new_qmeta,\n          count, context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code = searcher_->search_bf_impl(query, qmeta, count,\n                                           context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (uint32_t i = 0; i < count; ++i) {\n        IndexDocumentList &result = const_cast<IndexDocumentList &>(\n            context->searcher_context()->result(i));\n\n        for (auto &it : result) {\n          metric_->normalize(it.mutable_score());\n        }\n      }\n    }\n    if (reformer_) {\n      size_t offset = 0;\n      for (uint32_t i = 0; i < count; ++i) {\n        error_code = reformer_->normalize(\n            reinterpret_cast<const uint8_t *>(query) + offset, qmeta,\n            const_cast<IndexDocumentList &>(\n                context->searcher_context()->result(i)));\n        if (error_code != 0) {\n          break;\n        }\n        offset += qmeta.element_size();\n      }\n    }\n  }\n  return error_code;\n}\n\nint IndexFlow::search_impl(const void *query, const IndexQueryMeta &qmeta,\n                           uint32_t count, Context::Pointer &context) const {\n  if (ailego_unlikely(!query || !count || !context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    IndexQueryMeta new_qmeta;\n    error_code = reformer_->transform(query, qmeta, count,\n                                      context->mutable_features(), &new_qmeta);\n    if (error_code == 0) {\n      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n        return IndexError_Mismatch;\n      }\n      error_code = searcher_->search_impl(\n          reinterpret_cast<const void *>(context->features().data()), new_qmeta,\n          count, context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code = searcher_->search_impl(query, qmeta, count,\n                                        context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (uint32_t i = 0; i < count; ++i) {\n        IndexDocumentList &result = const_cast<IndexDocumentList &>(\n            context->searcher_context()->result(i));\n\n        for (auto &it : result) {\n          metric_->normalize(it.mutable_score());\n        }\n      }\n    }\n    if (reformer_) {\n      size_t offset = 0;\n      for (uint32_t i = 0; i < count; ++i) {\n        error_code = reformer_->normalize(\n            reinterpret_cast<const uint8_t *>(query) + offset, qmeta,\n            const_cast<IndexDocumentList &>(\n                context->searcher_context()->result(i)));\n        if (error_code != 0) {\n          break;\n        }\n        offset += qmeta.element_size();\n      }\n    }\n  }\n  return error_code;\n}\n\n// Index Sparse Flow\nint IndexSparseFlow::set_storage(const std::string &name,\n                                 const ailego::Params &params) {\n  storage_ = IndexFactory::CreateStorage(name);\n  if (!storage_) {\n    LOG_ERROR(\"Failed to create a index storage with name: %s\", name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = storage_->init(params);\n  if (ret < 0) {\n    storage_ = nullptr;\n    LOG_ERROR(\"Failed to initialize index storage %s\", name.c_str());\n    return ret;\n  }\n  return 0;\n}\n\nint IndexSparseFlow::set_searcher(IndexSearcher::Pointer searcher) {\n  user_searcher_ = searcher;\n\n  return 0;\n}\n\nint IndexSparseFlow::set_searcher(const std::string &name,\n                                  const ailego::Params &params) {\n  user_searcher_ = IndexFactory::CreateSearcher(name);\n  if (!user_searcher_) {\n    LOG_ERROR(\"Failed to create a index sparse searcher with name: %s\",\n              name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = user_searcher_->init(params);\n  if (ret < 0) {\n    user_searcher_ = nullptr;\n    LOG_ERROR(\"Failed to initialize index sparse searcher %s\", name.c_str());\n    return ret;\n  }\n  return 0;\n}\n\nint IndexSparseFlow::set_reformer(const std::string &name,\n                                  const ailego::Params &params) {\n  user_reformer_ = IndexFactory::CreateReformer(name);\n  if (!user_reformer_) {\n    LOG_ERROR(\"Failed to create a index sparse reformer with name: %s\",\n              name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = user_reformer_->init(params);\n  if (ret < 0) {\n    user_reformer_ = nullptr;\n    LOG_ERROR(\"Failed to initialize index sparse reformer %s\", name.c_str());\n    return ret;\n  }\n  return 0;\n}\n\nint IndexSparseFlow::set_metric(const std::string &name,\n                                const ailego::Params &params) {\n  if (!IndexFactory::HasMetric(name)) {\n    LOG_ERROR(\"The index metric with name %s does not exist.\", name.c_str());\n    return IndexError_NoExist;\n  }\n  user_metric_name_ = name;\n  user_metric_params_ = params;\n  return 0;\n}\n\nint IndexSparseFlow::load(const std::string &path) {\n  // Prepare storage\n  if (!storage_) {\n    this->set_storage(INDEX_FLOW_STORAGE_DEFAULT, ailego::Params());\n  }\n\n  if (!storage_) {\n    LOG_ERROR(\"The index storage is uninitialized.\");\n    return IndexError_Uninitialized;\n  }\n\n  int ret = storage_->open(path, false);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load index with storage %s\", storage_->name().c_str());\n    return ret;\n  }\n\n  ret = IndexHelper::DeserializeFromStorage(storage_.get(), &meta_);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to deserialize index meta with storage %s\",\n              storage_->name().c_str());\n    return ret;\n  }\n\n  ret = load_internal();\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load index with storage %s\", storage_->name().c_str());\n    return ret;\n  }\n\n  return 0;\n}\n\nint IndexSparseFlow::load_internal() {\n  // Prepare metric\n  const std::string &metric_name =\n      user_metric_name_.empty() ? meta_.metric_name() : user_metric_name_;\n  const ailego::Params &metric_params =\n      user_metric_name_.empty() ? meta_.metric_params() : user_metric_params_;\n  if (metric_name.empty()) {\n    LOG_ERROR(\"The metric name from index file is empty.\");\n    return IndexError_NoExist;\n  }\n  metric_ = IndexFactory::CreateMetric(metric_name);\n  if (!metric_) {\n    LOG_ERROR(\"Failed to create a index metric with name: %s\",\n              metric_name.c_str());\n    return IndexError_NoExist;\n  }\n  int ret = metric_->init(meta_, metric_params);\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to initialize index metric %s\", metric_name.c_str());\n    metric_ = nullptr;\n    return ret;\n  }\n\n  auto query_metric = metric_->query_metric();\n  if (query_metric) {\n    metric_ = query_metric;\n  }\n\n  // Prepare reformer\n  if (!user_reformer_) {\n    const std::string &reformer_name = meta_.reformer_name();\n    if (!reformer_name.empty()) {\n      reformer_ = IndexFactory::CreateReformer(reformer_name);\n      if (!reformer_) {\n        LOG_ERROR(\"Failed to create a index sparse reformer with name: %s\",\n                  reformer_name.c_str());\n        return IndexError_NoExist;\n      }\n      ret = reformer_->init(meta_.reformer_params());\n      if (ret < 0) {\n        LOG_ERROR(\"Failed to initialize index reformer %s\",\n                  reformer_name.c_str());\n        reformer_ = nullptr;\n        return ret;\n      }\n    }\n  } else {\n    // Using user reformer\n    reformer_ = user_reformer_;\n  }\n\n  if (reformer_) {\n    ret = reformer_->load(storage_);\n    if (ret < 0) {\n      LOG_ERROR(\"Failed to load index with reformer %s, storage %s\",\n                reformer_->name().c_str(), storage_->name().c_str());\n      return ret;\n    }\n  }\n\n  // Prepare searcher\n  if (!user_searcher_) {\n    const std::string &name = meta_.searcher_name();\n    if (name.empty()) {\n      LOG_ERROR(\"The searcher name from index file is empty.\");\n      return IndexError_NoExist;\n    }\n    searcher_ = IndexFactory::CreateSearcher(name);\n    if (!searcher_) {\n      LOG_ERROR(\"Failed to create a index searcher with name: %s\",\n                name.c_str());\n      return IndexError_NoExist;\n    }\n    ret = searcher_->init(meta_.searcher_params());\n    if (ret < 0) {\n      LOG_ERROR(\"Failed to initialize index searcher %s\", name.c_str());\n      searcher_ = nullptr;\n      return ret;\n    }\n  } else {\n    // Using user searcher\n    searcher_ = user_searcher_;\n  }\n\n  ret = searcher_->load(storage_, metric_);\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to load index with searcher %s, storage %s, metric %s\",\n              searcher_->name().c_str(), storage_->name().c_str(),\n              metric_->name().c_str());\n    return ret;\n  }\n\n  // searcher_->print_all_neighbour();\n\n  return 0;\n}\n\nint IndexSparseFlow::unload(void) {\n  if (searcher_) {\n    int ret = searcher_->unload();\n    if (ret < 0) {\n      LOG_WARN(\"Unload index searcher %s error, %d\", searcher_->name().c_str(),\n               ret);\n    }\n    searcher_ = nullptr;\n  }\n  if (reformer_) {\n    int ret = reformer_->unload();\n    if (ret < 0) {\n      LOG_WARN(\"Unload index reformer %s error, %d\", reformer_->name().c_str(),\n               ret);\n    }\n    reformer_ = nullptr;\n  }\n  if (metric_) {\n    int ret = metric_->cleanup();\n    if (ret < 0) {\n      LOG_WARN(\"Cleanup index metric %s error, %d\", metric_->name().c_str(),\n               ret);\n    }\n    metric_ = nullptr;\n  }\n  if (storage_) {\n    int ret = storage_->cleanup();\n    if (ret < 0) {\n      LOG_WARN(\"Unload index searcher %s error, %d\", storage_->name().c_str(),\n               ret);\n    }\n    storage_ = nullptr;\n  }\n  return 0;\n}\n\nint IndexSparseFlow::search_bf_impl(const uint32_t sparse_count,\n                                    const uint32_t *sparse_indices,\n                                    const void *sparse_query,\n                                    const IndexQueryMeta &qmeta,\n                                    Context::Pointer &context) const {\n  if (ailego_unlikely(!context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    std::string ovec;\n    IndexQueryMeta new_qmeta;\n    error_code = reformer_->transform(sparse_count, sparse_indices,\n                                      sparse_query, qmeta, &ovec, &new_qmeta);\n    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n      return IndexError_Mismatch;\n    }\n    if (error_code == 0) {\n      error_code =\n          searcher_->search_bf_impl(sparse_count, sparse_indices, ovec.data(),\n                                    new_qmeta, context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code =\n        searcher_->search_bf_impl(sparse_count, sparse_indices, sparse_query,\n                                  qmeta, context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (auto &it : const_cast<IndexDocumentList &>(\n               context->searcher_context()->result())) {\n        metric_->normalize(it.mutable_score());\n      }\n    }\n  }\n  return error_code;\n}\n\nint IndexSparseFlow::search_impl(const uint32_t sparse_count,\n                                 const uint32_t *sparse_indices,\n                                 const void *sparse_query,\n                                 const IndexQueryMeta &qmeta,\n                                 Context::Pointer &context) const {\n  if (ailego_unlikely(!context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    std::string ovec;\n    IndexQueryMeta new_qmeta;\n    error_code = reformer_->transform(sparse_count, sparse_indices,\n                                      sparse_query, qmeta, &ovec, &new_qmeta);\n    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n      return IndexError_Mismatch;\n    }\n    if (error_code == 0) {\n      error_code =\n          searcher_->search_impl(sparse_count, sparse_indices, ovec.data(),\n                                 new_qmeta, context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code =\n        searcher_->search_impl(sparse_count, sparse_indices, sparse_query,\n                               qmeta, context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (auto &it : const_cast<IndexDocumentList &>(\n               context->searcher_context()->result())) {\n        metric_->normalize(it.mutable_score());\n      }\n    }\n  }\n  return error_code;\n}\n\nint IndexSparseFlow::search_bf_impl(const uint32_t *sparse_count,\n                                    const uint32_t *sparse_indices,\n                                    const void *sparse_query,\n                                    const IndexQueryMeta &qmeta, uint32_t count,\n                                    Context::Pointer &context) const {\n  if (ailego_unlikely(!count || !context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    std::string ovec;\n    IndexQueryMeta new_qmeta;\n    error_code =\n        reformer_->transform(sparse_count, sparse_indices, sparse_query, qmeta,\n                             count, &ovec, &new_qmeta);\n\n    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n      return IndexError_Mismatch;\n    }\n\n    if (error_code == 0) {\n      error_code = searcher_->search_bf_impl(sparse_count, sparse_indices,\n                                             ovec.data(), new_qmeta, count,\n                                             context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code =\n        searcher_->search_bf_impl(sparse_count, sparse_indices, sparse_query,\n                                  qmeta, count, context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (uint32_t i = 0; i < count; ++i) {\n        IndexDocumentList &result = const_cast<IndexDocumentList &>(\n            context->searcher_context()->result(i));\n\n        for (auto &it : result) {\n          metric_->normalize(it.mutable_score());\n        }\n      }\n    }\n  }\n  return error_code;\n}\n\nint IndexSparseFlow::search_impl(const uint32_t *sparse_count,\n                                 const uint32_t *sparse_indices,\n                                 const void *sparse_query,\n                                 const IndexQueryMeta &qmeta, uint32_t count,\n                                 Context::Pointer &context) const {\n  if (ailego_unlikely(!count || !context)) {\n    return IndexError_InvalidArgument;\n  }\n\n  int error_code = 0;\n  if (reformer_) {\n    std::string ovec;\n    IndexQueryMeta new_qmeta;\n    error_code =\n        reformer_->transform(sparse_count, sparse_indices, sparse_query, qmeta,\n                             count, &ovec, &new_qmeta);\n\n    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {\n      return IndexError_Mismatch;\n    }\n\n    if (error_code == 0) {\n      error_code =\n          searcher_->search_impl(sparse_count, sparse_indices, ovec.data(),\n                                 new_qmeta, count, context->searcher_context());\n    }\n  } else {\n    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {\n      return IndexError_Mismatch;\n    }\n    error_code =\n        searcher_->search_impl(sparse_count, sparse_indices, sparse_query,\n                               qmeta, count, context->searcher_context());\n  }\n\n  if (error_code == 0) {\n    if (metric_->support_normalize()) {\n      for (uint32_t i = 0; i < count; ++i) {\n        IndexDocumentList &result = const_cast<IndexDocumentList &>(\n            context->searcher_context()->result(i));\n\n        for (auto &it : result) {\n          metric_->normalize(it.mutable_score());\n        }\n      }\n    }\n  }\n  return error_code;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/utility/memory_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_helper.h>\n\nnamespace zvec {\nnamespace core {\n\nint IndexHelper::SerializeToDumper(const IndexMeta &mt, IndexDumper *dumper,\n                                   const std::string &key) {\n  std::string buffer;\n  mt.serialize(&buffer);\n\n  size_t data_size = buffer.size();\n  uint32_t data_crc = ailego::Crc32c::Hash(buffer.data(), buffer.size(), 0);\n  buffer.resize((data_size + 31u) & ~31u);\n\n  if (dumper->write(buffer.data(), buffer.size()) != buffer.size()) {\n    return IndexError_WriteData;\n  }\n  if (dumper->append(key, data_size, buffer.size() - data_size, data_crc) !=\n      0) {\n    return IndexError_WriteData;\n  }\n  return IndexError_Success;\n}\n\nint IndexHelper::SerializeToStorage(const IndexMeta &mt, IndexStorage *storage,\n                                    const std::string &key) {\n  std::string buffer;\n  mt.serialize(&buffer);\n\n  auto segment = storage->get(key);\n  if (!segment) {\n    const size_t align_size = 4096 * 4;\n    size_t meta_size =\n        (buffer.size() + align_size - 1) / align_size * align_size;\n\n    if (storage->append(key, meta_size) != 0) {\n      return IndexError_WriteData;\n    }\n\n    segment = storage->get(key);\n    if (!segment) {\n      return IndexError_NoExist;\n    }\n  }\n\n  if (segment->write(0, buffer.data(), buffer.size()) != buffer.size()) {\n    return IndexError_WriteData;\n  }\n  segment->resize(buffer.size());\n  segment->update_data_crc(\n      ailego::Crc32c::Hash(buffer.data(), buffer.size(), 0));\n  return IndexError_Success;\n}\n\nint IndexHelper::DeserializeFromStorage(IndexStorage *storage,\n                                        const std::string &key,\n                                        IndexMeta *out) {\n  auto segment = storage->get(key);\n  if (!segment) {\n    return IndexError_NoExist;\n  }\n\n  uint32_t crc = segment->data_crc();\n  size_t len = segment->data_size();\n  const void *data = nullptr;\n\n  if (segment->read(0, &data, len) != len) {\n    return IndexError_ReadData;\n  }\n  if (crc != 0u && ailego::Crc32c::Hash(data, len, 0u) != crc) {\n    return IndexError_InvalidChecksum;\n  }\n  if (!out->deserialize(data, len)) {\n    return IndexError_Deserialize;\n  }\n  return IndexError_Success;\n}\n\n/*! Two Pass Index Holder\n */\nclass TwoPassIndexHolder : public IndexHolder {\n private:\n  /*! First Pass Iterator\n   * store elements during iterating for second iterating.\n   */\n  class FirstPassIterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<FirstPassIterator> Pointer;\n\n    //! Constructor\n    FirstPassIterator(TwoPassIndexHolder *owner,\n                      IndexHolder::Iterator::Pointer &&iter)\n        : holder_(owner), front_iter_(std::move(iter)) {}\n\n    //! Destructor\n    virtual ~FirstPassIterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return front_iter_->data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      holder_->features_.emplace_back(\n          front_iter_->key(), std::string((const char *)front_iter_->data(),\n                                          holder_->front_->element_size()));\n      front_iter_->next();\n    }\n\n   private:\n    TwoPassIndexHolder *holder_{nullptr};\n    IndexHolder::Iterator::Pointer front_iter_{};\n  };\n\n  class SecondPassIterator : public IndexHolder::Iterator {\n   public:\n    //! Second Pass Iterator Pointer\n    typedef std::unique_ptr<SecondPassIterator> Pointer;\n\n    //! Constructor\n    SecondPassIterator(TwoPassIndexHolder *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~SecondPassIterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return features_iter_->second.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      holder_->features_.erase(features_iter_++);\n    }\n\n   private:\n    TwoPassIndexHolder *holder_{nullptr};\n    typename std::list<std::pair<uint64_t, std::string>>::iterator\n        features_iter_{};\n  };\n\n public:\n  //! Constructor\n  TwoPassIndexHolder(IndexHolder::Pointer &&front)\n      : front_(std::move(front)),\n        data_type_(front_->data_type()),\n        dimension_(front_->dimension()),\n        element_size_(front_->element_size()),\n        count_(front_->count()) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return count_;\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return data_type_;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return element_size_;\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return false;\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    ++pass_;\n    if (pass_ == 1) {\n      IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n      return iter ? IndexHolder::Iterator::Pointer(\n                        new TwoPassIndexHolder::FirstPassIterator(\n                            this, std::move(iter)))\n                  : IndexHolder::Iterator::Pointer();\n    } else if (pass_ == 2) {\n      return IndexHolder::Iterator::Pointer(\n          new TwoPassIndexHolder::SecondPassIterator(this));\n    }\n    return nullptr;\n  }\n\n private:\n  //! Disable them\n  TwoPassIndexHolder(void) = delete;\n\n  //! Members\n  IndexHolder::Pointer front_{};\n  std::list<std::pair<uint64_t, std::string>> features_{};\n  size_t pass_{0};\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_UNDEFINED};\n  size_t dimension_;\n  size_t element_size_;\n  size_t count_;\n};\n\nIndexHolder::Pointer IndexHelper::MakeTwoPassHolder(\n    IndexHolder::Pointer holder) {\n  if (holder->multipass()) {\n    return holder;\n  }\n  return IndexHolder::Pointer(new TwoPassIndexHolder(std::move(holder)));\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_logger.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <sstream>\n#include <thread>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_logger.h>\n\nnamespace zvec {\nnamespace core {\n\nconst int IndexLogger::LEVEL_DEBUG = 0;\nconst int IndexLogger::LEVEL_INFO = 1;\nconst int IndexLogger::LEVEL_WARN = 2;\nconst int IndexLogger::LEVEL_ERROR = 3;\nconst int IndexLogger::LEVEL_FATAL = 4;\n\n/*! Console Logger\n */\nstruct ConsoleLogger : public IndexLogger {\n  //! Initialize Logger\n  int init(const zvec::ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup Logger\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Log Message\n  void log(int level, const char *file, int line, const char *format,\n           va_list args) override {\n    char buffer[8192];\n    std::ostringstream stream;\n\n    ailego::Realtime::Localtime(buffer, sizeof(buffer));\n    stream << '[' << LevelString(level) << ' ' << buffer << ' '\n           << std::this_thread::get_id() << ' ' << ailego::File::BaseName(file)\n           << ':' << line << \"] \";\n\n    vsnprintf(buffer, sizeof(buffer), format, args);\n    stream << buffer << '\\n';\n\n    if (level <= LEVEL_INFO) {\n      std::cout << stream.str() << std::flush;\n    } else {\n      std::cerr << stream.str() << std::flush;\n    }\n  }\n};\n\n//! Logger Level\nint IndexLoggerBroker::logger_level_ = 0;\n\n//! Logger\nIndexLogger::Pointer IndexLoggerBroker::logger_(new ConsoleLogger);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/framework/index_mapping.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/io/mmap_file.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_mapping.h>\n#include \"ailego/utility/memory_helper.h\"\n\n#ifdef __linux__\n#include <sys/statfs.h>\n#include <sys/vfs.h>\n#ifndef HUGETLBFS_MAGIC\n#define HUGETLBFS_MAGIC 0x958458f6\n#endif\n#endif\n\nnamespace zvec {\nnamespace core {\n\nstatic inline size_t CalcPageAlignedSize(size_t size, bool huge_size) {\n  size_t page_size = ailego::MemoryHelper::PageSize();\n  if (huge_size) {\n    page_size = ailego::MemoryHelper::HugePageSize();\n  }\n  return (size + page_size - 1) / page_size * page_size;\n}\n\nstatic inline bool WritePadding(ailego::File &file, size_t size) {\n  std::string padding(ailego::MemoryHelper::PageSize(), 0);\n  for (size_t i = 0, count = size / padding.size(); i < count; ++i) {\n    if (file.write(padding.data(), padding.size()) != padding.size()) {\n      return false;\n    }\n  }\n  padding.resize(size % padding.size());\n  if (padding.size()) {\n    if (file.write(padding.data(), padding.size()) != padding.size()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nstatic inline int UnpackMappingSize(ailego::File &file, size_t *len) {\n  IndexFormat::MetaHeader header;\n  if (file.read(&header, sizeof(header)) != sizeof(header)) {\n    LOG_ERROR(\"Failed to read file, errno %d, %s\", errno, std::strerror(errno));\n    return IndexError_ReadData;\n  }\n\n  if (header.meta_header_size != sizeof(IndexFormat::MetaHeader) ||\n      header.meta_footer_size != sizeof(IndexFormat::MetaFooter)) {\n    return IndexError_InvalidValue;\n  }\n\n  if (ailego::Crc32c::Hash(&header, sizeof(header), header.header_crc) !=\n      header.header_crc) {\n    return IndexError_InvalidChecksum;\n  }\n\n  if ((int32_t)header.meta_footer_offset < 0) {\n    return IndexError_Unsupported;\n  }\n\n  *len = header.meta_footer_offset + header.meta_footer_size;\n  if (*len > file.size()) {\n    return IndexError_InvalidLength;\n  }\n  return 0;\n}\n\nint IndexMapping::open(const std::string &path, bool cow, bool full_mode) {\n  path_ = path;\n  full_mode_ = full_mode;\n  copy_on_write_ = cow;\n  huge_page_ = Ishugetlbfs(path);\n\n  bool read_only = copy_on_write_ && !full_mode_;\n  if (!file_.open(path.c_str(), read_only, false)) {\n    LOG_ERROR(\"Failed to open file %s, errno %d, %s\", path.c_str(), errno,\n              std::strerror(errno));\n    return IndexError_OpenFile;\n  }\n\n  size_t mapping_size = 0u;\n  int error_code = UnpackMappingSize(file_, &mapping_size);\n  if (error_code != 0) {\n    file_.close();\n    return error_code;\n  }\n\n  if (!file_.seek(0, ailego::File::Origin::End)) {\n    LOG_ERROR(\"Failed to seek file %s, errno %d, %s\", path.c_str(), errno,\n              std::strerror(errno));\n    return IndexError_SeekFile;\n  }\n  return this->init_index_mapping(mapping_size);\n}\n\nint IndexMapping::create(const std::string &path, size_t seg_meta_capacity) {\n  path_ = path;\n  seg_meta_capacity_ = seg_meta_capacity;\n  current_header_start_offset_ = 0;\n\n  // write() & copying to mmap() will auto extend the file size\n  if (!file_.create(path.c_str(), 0)) {\n    LOG_ERROR(\"Failed to create file %s, errno %d, %s\", path.c_str(), errno,\n              std::strerror(errno));\n    return IndexError_CreateFile;\n  }\n  huge_page_ = Ishugetlbfs(path);\n  if (huge_page_) {\n    return init_hugepage_meta_section();\n  }\n  return init_meta_section();\n}\n\nint IndexMapping::init_meta_section() {\n  if (current_header_start_offset_ % ailego::MemoryHelper::PageSize() != 0) {\n    LOG_ERROR(\"File offset %zu is not a multiple of the page size: %zu\",\n              current_header_start_offset_, ailego::MemoryHelper::PageSize());\n    return IndexError_InvalidValue;\n  }\n\n  auto &path = path_;\n  size_t len =\n      CalcPageAlignedSize(seg_meta_capacity_ + sizeof(IndexFormat::MetaHeader) +\n                              sizeof(IndexFormat::MetaFooter),\n                          false);\n\n  IndexFormat::MetaHeader meta_header;\n  IndexFormat::MetaFooter meta_footer;\n\n  // Write index header\n  IndexFormat::SetupMetaHeader(&meta_header, len - sizeof(meta_footer), len);\n  if (!file_.seek(current_header_start_offset_, ailego::File::Origin::Begin)) {\n    LOG_ERROR(\"Failed to seek file %s, errno %d, %s\", path.c_str(), errno,\n              std::strerror(errno));\n    return IndexError_SeekFile;\n  }\n  if (file_.write(&meta_header, sizeof(meta_header)) != sizeof(meta_header)) {\n    LOG_ERROR(\"Failed to write file: %s, errno %d, %s\", path.c_str(), errno,\n              std::strerror(errno));\n    return IndexError_WriteData;\n  }\n\n  // Write padding data\n  uint32_t segments_meta_size =\n      static_cast<uint32_t>(len - (sizeof(meta_header) + sizeof(meta_footer)));\n  if (!WritePadding(file_, segments_meta_size)) {\n    LOG_ERROR(\"Failed to write file: %s, errno %d, %s\", path.c_str(), errno,\n              std::strerror(errno));\n    return IndexError_WriteData;\n  }\n\n  // Write index footer\n  IndexFormat::SetupMetaFooter(&meta_footer);\n  meta_footer.segments_meta_size = segments_meta_size;\n  meta_footer.total_size = len;\n  IndexFormat::UpdateMetaFooter(&meta_footer, 0);\n  if (file_.write(&meta_footer, sizeof(meta_footer)) != sizeof(meta_footer)) {\n    LOG_ERROR(\"Failed to write file: %s, errno %d, %s\", path.c_str(), errno,\n              std::strerror(errno));\n    return IndexError_WriteData;\n  }\n  return this->init_index_mapping(len);\n}\n\nint IndexMapping::init_hugepage_meta_section() {\n  ssize_t file_offset = (ssize_t)current_header_start_offset_;\n  if (file_offset % ailego::MemoryHelper::HugePageSize() != 0) {\n    LOG_ERROR(\"File offset %zu is not a multiple of the page size: %zu\",\n              file_offset, ailego::MemoryHelper::HugePageSize());\n    return IndexError_InvalidValue;\n  }\n\n  size_t len =\n      CalcPageAlignedSize(seg_meta_capacity_ + sizeof(IndexFormat::MetaHeader) +\n                              sizeof(IndexFormat::MetaFooter),\n                          true);\n  int opts = ailego::File::MMAP_SHARED | ailego::File::MMAP_HUGE_PAGE;\n  void *addr =\n      ailego::File::MemoryMap(file_.native_handle(), file_offset, len, opts);\n\n  IndexFormat::MetaHeader meta_header;\n  IndexFormat::MetaFooter meta_footer;\n\n  // Write index header\n  IndexFormat::SetupMetaHeader(&meta_header, len - sizeof(meta_footer), len);\n  memcpy((char *)addr + file_offset, &meta_header, sizeof(meta_header));\n  file_offset += sizeof(meta_header);\n\n  // Write padding data\n  uint32_t segments_meta_size =\n      static_cast<uint32_t>(len - (sizeof(meta_header) + sizeof(meta_footer)));\n  std::string padding(ailego::MemoryHelper::HugePageSize(), 0);\n  for (size_t i = 0, count = segments_meta_size / padding.size(); i < count;\n       ++i) {\n    memcpy((char *)addr + file_offset, padding.data(), padding.size());\n    file_offset += padding.size();\n  }\n  padding.resize(segments_meta_size % padding.size());\n  if (padding.size()) {\n    memcpy((char *)addr + file_offset, padding.data(), padding.size());\n    file_offset += padding.size();\n  }\n\n  // Write index footer\n  IndexFormat::SetupMetaFooter(&meta_footer);\n  meta_footer.segments_meta_size = segments_meta_size;\n  meta_footer.total_size = len;\n  IndexFormat::UpdateMetaFooter(&meta_footer, 0);\n  memcpy((char *)addr + file_offset, &meta_footer, sizeof(meta_footer));\n  file_offset += sizeof(meta_footer);\n\n  return this->init_index_mapping(len);\n}\n\nvoid IndexMapping::close(void) {\n  // Unmap all memory\n  this->unmap_all();\n  if (header_) {\n    for (auto item : header_addr_map_) {\n      auto header = item.second;\n      ailego::File::MemoryUnmap(header, header->content_offset);\n    }\n  }\n  // Reset members\n  segment_ids_offset_ = 0;\n  segment_start_ = nullptr;\n  header_ = nullptr;\n  header_addr_map_.clear();\n  footer_ = nullptr;\n  index_size_ = 0u;\n  segments_.clear();\n  file_.close();\n  copy_on_write_ = false;\n  full_mode_ = false;\n  header_dirty_ = false;\n  huge_page_ = false;\n}\n\nvoid IndexMapping::refresh(uint64_t check_point) {\n  // support add_with_id\n  for (auto item : header_addr_map_) {\n    auto header_start_offset = item.first;\n    auto header = item.second;\n    auto footer = reinterpret_cast<IndexFormat::MetaFooter *>(\n        reinterpret_cast<uint8_t *>(header) + header->meta_footer_offset);\n    auto segment_start = reinterpret_cast<IndexFormat::SegmentMeta *>(\n        reinterpret_cast<uint8_t *>(header) +\n        (header->meta_footer_offset - footer->segments_meta_size));\n    footer->segments_meta_crc =\n        ailego::Crc32c::Hash(segment_start, footer->segments_meta_size, 0);\n    IndexFormat::UpdateMetaFooter(footer, check_point);\n  }\n  header_dirty_ = true;\n}\n\nint IndexMapping::append(const std::string &id, size_t size) {\n  size = CalcPageAlignedSize(size, huge_page_);\n  if (size == 0) {\n    return IndexError_InvalidArgument;\n  }\n\n  if (segments_.find(id) != segments_.end()) {\n    return IndexError_Duplicate;\n  }\n\n  size_t id_size = std::strlen(id.c_str()) + 1;\n  size_t need_size = sizeof(IndexFormat::SegmentMeta) + id_size;\n  if (sizeof(IndexFormat::SegmentMeta) * footer_->segment_count + need_size >\n      segment_ids_offset_) {\n    LOG_DEBUG(\"segment meta section expanded: %s\", path_.c_str());\n    footer_->next_meta_header_offset = index_size_;\n    refresh(0);\n    flush();\n    // mmap file storage write() will update segment's meta\n    // ailego::File::MemoryUnmap(header_, header_->content_offset);\n    header_ = nullptr;\n    footer_ = nullptr;\n\n    current_header_start_offset_ = index_size_;\n    const int ret =\n        huge_page_ ? init_hugepage_meta_section() : init_meta_section();\n    if (ret != 0) {\n      return ret;\n    }\n  }\n\n  if (!copy_on_write_ && !file_.truncate(index_size_ + size)) {\n    LOG_ERROR(\"Failed to truncate file, errno %d, %s\", errno,\n              std::strerror(errno));\n    return IndexError_TruncateFile;\n  }\n\n  // Update segment table\n  segment_ids_offset_ -= static_cast<uint32_t>(id_size);\n  IndexFormat::SegmentMeta *segment = segment_start_ + footer_->segment_count;\n  segment->segment_id_offset = segment_ids_offset_;\n  segment->data_index =\n      index_size_ - header_->content_offset - current_header_start_offset_;\n  segment->data_size = 0;\n  segment->data_crc = 0;\n  segment->padding_size = size;\n  memcpy((uint8_t *)segment_start_ + segment_ids_offset_, id.c_str(), id_size);\n  index_size_ += size;\n\n  // Update index footer\n  footer_->segments_meta_crc =\n      ailego::Crc32c::Hash(segment_start_, footer_->segments_meta_size, 0);\n  footer_->segment_count += 1;\n  footer_->content_size += size;\n  footer_->total_size += size;\n  IndexFormat::UpdateMetaFooter(footer_, 0);\n  segments_.emplace(\n      id, SegmentInfo{Segment{segment}, current_header_start_offset_, header_});\n  header_dirty_ = true;\n  return 0;\n}\n\nIndexMapping::Segment *IndexMapping::map(const std::string &id, bool warmup,\n                                         bool locked) {\n  auto iter = segments_.find(id);\n  if (iter == segments_.end()) {\n    return nullptr;\n  }\n  SegmentInfo &segment_info = iter->second;\n  Segment *item = &segment_info.segment;\n  if (!item->data()) {\n    auto meta = item->meta();\n    size_t mapping_size = meta->data_size + meta->padding_size;\n    size_t offset = segment_info.segment_header_start_offset +\n                    segment_info.segment_header->content_offset +\n                    meta->data_index;\n\n    void *addr = nullptr;\n    if (!copy_on_write_) {\n      int opts = ailego::File::MMAP_SHARED;\n      if (huge_page_) {\n        opts |= ailego::File::MMAP_HUGE_PAGE;\n      }\n      addr = ailego::File::MemoryMap(file_.native_handle(), offset,\n                                     mapping_size, opts);\n    } else {\n      size_t file_size = file_.size();\n      int opts = ailego::File::MMAP_POPULATE;\n      if (huge_page_) {\n        opts |= ailego::File::MMAP_HUGE_PAGE;\n      }\n      if (offset < file_size) {\n        ailego_assert(offset + mapping_size <= file_size);\n        addr = ailego::File::MemoryMap(file_.native_handle(), offset,\n                                       mapping_size, opts);\n      } else {\n        addr = ailego::File::MemoryMap(mapping_size, opts);\n      }\n    }\n\n    if (!addr) {\n      LOG_ERROR(\"Map segment failed, segment id %s\", id.c_str());\n      return nullptr;\n    }\n    item->set_data(addr);\n\n    // Lock memory\n    if (locked) {\n      ailego::File::MemoryLock(item->data(), mapping_size);\n    }\n    // Warmup memory\n    if (warmup && meta->data_size) {\n      ailego::File::MemoryWarmup(item->data(), meta->data_size);\n    }\n  }\n  return item;\n}\n\nvoid IndexMapping::unmap(const std::string &id) {\n  auto iter = segments_.find(id);\n  if (iter != segments_.end()) {\n    SegmentInfo &segment_info = iter->second;\n    Segment *item = &segment_info.segment;\n\n    if (item->data()) {\n      ailego::File::MemoryUnmap(\n          item->data(), item->meta()->data_size + item->meta()->padding_size);\n      item->set_data(nullptr);\n    }\n  }\n}\n\nvoid IndexMapping::unmap_all(void) {\n  for (auto iter = segments_.begin(); iter != segments_.end(); ++iter) {\n    SegmentInfo &segment_info = iter->second;\n    Segment *item = &segment_info.segment;\n\n    if (item->data()) {\n      ailego::File::MemoryUnmap(\n          item->data(), item->meta()->data_size + item->meta()->padding_size);\n      item->set_data(nullptr);\n    }\n  }\n}\n\nint IndexMapping::flush(void) {\n  if ((file_.size() < index_size_) && !file_.truncate(index_size_)) {\n    LOG_ERROR(\"Failed to truncate file size %zu, errno %d, %s\", index_size_,\n              errno, std::strerror(errno));\n    return IndexError_TruncateFile;\n  }\n\n  for (auto iter = segments_.begin(); iter != segments_.end(); ++iter) {\n    SegmentInfo &segment_info = iter->second;\n    Segment *item = &segment_info.segment;\n    if (!item->data() || !item->dirty()) {\n      continue;\n    }\n\n    size_t segment_size = item->meta()->data_size + item->meta()->padding_size;\n    if (full_mode_ && copy_on_write_) {\n      size_t off = segment_info.segment_header_start_offset +\n                   segment_info.segment_header->content_offset +\n                   item->meta()->data_index;\n      if (file_.write(off, item->data(), segment_size) != segment_size) {\n        LOG_ERROR(\"Failed to write segment, size %zu, errno %d, %s\",\n                  segment_size, errno, std::strerror(errno));\n        return IndexError_WriteData;\n      }\n    } else {\n      ailego::File::MemoryFlush(item->data(), segment_size);\n    }\n    item->reset_dirty();\n  }\n\n  if (!header_dirty_) {\n    return 0;\n  }\n\n  header_dirty_ = false;\n  if (full_mode_ && copy_on_write_) {\n    for (auto item : header_addr_map_) {\n      auto header_start_offset = item.first;\n      auto header = item.second;\n      if (file_.write(header_start_offset, header, header->content_offset) !=\n          header->content_offset) {\n        LOG_ERROR(\"Failed to write segment, size %lu, errno %d, %s\",\n                  header->content_offset, errno, std::strerror(errno));\n        return IndexError_WriteData;\n      }\n    }\n  } else {\n    for (auto item : header_addr_map_) {\n      auto header = item.second;\n      ailego::File::MemoryFlush(header, header->content_offset);\n    }\n  }\n  return 0;\n}\n\nint IndexMapping::init_index_mapping(size_t len) {\n  int opts =\n      copy_on_write_ ? ailego::File::MMAP_POPULATE : ailego::File::MMAP_SHARED;\n  if (huge_page_) {\n    opts |= ailego::File::MMAP_HUGE_PAGE;\n  }\n  uint8_t *start = reinterpret_cast<uint8_t *>(ailego::File::MemoryMap(\n      file_.native_handle(), current_header_start_offset_, len, opts));\n  if (!start) {\n    LOG_ERROR(\"Failed to map file, errno %d, %s\", errno, std::strerror(errno));\n    return IndexError_MMapFile;\n  }\n\n  // Unpack header\n  header_ = reinterpret_cast<IndexFormat::MetaHeader *>(start);\n  header_addr_map_.insert({current_header_start_offset_, header_});\n  if (header_->meta_header_size != sizeof(IndexFormat::MetaHeader)) {\n    return IndexError_InvalidLength;\n  }\n  if (ailego::Crc32c::Hash(header_, sizeof(*header_), header_->header_crc) !=\n      header_->header_crc) {\n    return IndexError_InvalidChecksum;\n  }\n\n  switch (header_->version) {\n    case IndexFormat::FORMAT_VERSION:\n      break;\n    default:\n      LOG_ERROR(\"Unsupported index version: %u\", header_->version);\n      return IndexError_Unsupported;\n  }\n\n  // Unpack footer\n  if (header_->meta_footer_size != sizeof(IndexFormat::MetaFooter)) {\n    return IndexError_InvalidLength;\n  }\n  if ((int32_t)header_->meta_footer_offset < 0) {\n    return IndexError_Unsupported;\n  }\n  size_t footer_offset = header_->meta_footer_offset;\n  if (footer_offset + header_->meta_footer_size > len) {\n    return IndexError_InvalidLength;\n  }\n\n  footer_ = reinterpret_cast<IndexFormat::MetaFooter *>(start + footer_offset);\n  if (footer_offset < footer_->segments_meta_size) {\n    return IndexError_InvalidLength;\n  }\n\n  index_size_ = file_.size();\n  if ((footer_->total_size > index_size_) ||\n      (footer_->content_size + footer_->content_padding_size +\n           header_->content_offset >\n       index_size_)) {\n    return IndexError_InvalidLength;\n  }\n  if (ailego::Crc32c::Hash(footer_, sizeof(*footer_), footer_->footer_crc) !=\n      footer_->footer_crc) {\n    return IndexError_InvalidChecksum;\n  }\n\n  // Unpack segment table\n  if (sizeof(IndexFormat::SegmentMeta) * footer_->segment_count >\n      footer_->segments_meta_size) {\n    return IndexError_InvalidLength;\n  }\n\n  segment_start_ = reinterpret_cast<IndexFormat::SegmentMeta *>(\n      start + (footer_offset - footer_->segments_meta_size));\n  if (ailego::Crc32c::Hash(segment_start_, footer_->segments_meta_size, 0u) !=\n      footer_->segments_meta_crc) {\n    LOG_ERROR(\"Index segments meta checksum is invalid.\");\n    return IndexError_InvalidChecksum;\n  }\n\n  segment_ids_offset_ = footer_->segments_meta_size;\n  for (IndexFormat::SegmentMeta *iter = segment_start_,\n                                *end = segment_start_ + footer_->segment_count;\n       iter != end; ++iter) {\n    if (iter->segment_id_offset > footer_->segments_meta_size) {\n      return IndexError_InvalidValue;\n    }\n    if (iter->data_index > footer_->content_size) {\n      return IndexError_InvalidValue;\n    }\n    if (iter->data_index + iter->data_size > footer_->content_size) {\n      return IndexError_InvalidLength;\n    }\n\n    if (iter->segment_id_offset < segment_ids_offset_) {\n      segment_ids_offset_ = iter->segment_id_offset;\n    }\n    segments_.emplace(\n        std::string(reinterpret_cast<const char *>(segment_start_) +\n                    iter->segment_id_offset),\n        SegmentInfo{Segment{iter}, current_header_start_offset_, header_});\n  }\n  if (sizeof(IndexFormat::SegmentMeta) * footer_->segment_count >\n      segment_ids_offset_) {\n    return IndexError_InvalidLength;\n  }\n\n  // if (header_->version == IndexFormat::COMPATIBLE_FORMAT_VERSION_0X0002) {\n  //   header_->version = IndexFormat::CURRENT_FORMAT_VERSION;\n  //   LOG_INFO(\"Index file format upgraded\");\n  //   IndexFormat::UpdateMetaHeader(header_);\n  //   footer_->segments_meta_crc =\n  //       ailego::Crc32c::Hash(segment_start_, footer_->segments_meta_size, 0);\n  //   IndexFormat::UpdateMetaFooter(footer_, 0);\n  //   header_dirty_ = true;\n  // }\n\n  if (footer_->next_meta_header_offset > 0) {\n    current_header_start_offset_ = footer_->next_meta_header_offset;\n    // Meta sections have all the same size, so we can use the same size to map\n    // the next meta section\n    return this->init_index_mapping(len);\n  }\n\n  return 0;\n}\n\nbool IndexMapping::Ishugetlbfs(const std::string &path) const {\n#ifdef __linux__\n  struct statfs buf;\n  if (statfs(path.c_str(), &buf) != 0) {\n    perror(\"statfs\");\n    return false;\n  }\n  return static_cast<unsigned long>(buf.f_type) == HUGETLBFS_MAGIC;\n#else\n  static_cast<void>(path);\n  return false;\n#endif\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_meta.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/encoding/json.h>\n#include <zvec/core/framework/index_meta.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Meta Buffer Format\n */\nstruct IndexMetaFormatHeader {\n  uint32_t header_size;\n  uint32_t meta_type;\n  uint32_t major_order;\n  uint32_t data_type;\n  uint32_t dimension;\n  uint32_t unit_size;\n  uint32_t space_id;\n  uint32_t attachment_offset;\n  uint32_t attachment_size;\n  uint8_t reserved_[4092];\n};\n\nstatic_assert(sizeof(IndexMetaFormatHeader) % 32 == 0,\n              \"IndexMetaBufferFormat must be aligned with 32 bytes\");\n\nvoid IndexMeta::serialize(std::string *out) const {\n  ailego::Params attachment;\n  IndexMetaFormatHeader format;\n  memset(&format, 0, sizeof(format));\n  format.header_size = sizeof(format);\n  format.meta_type = static_cast<uint32_t>(meta_type_);\n  format.major_order = static_cast<uint32_t>(major_order_);\n  format.data_type = static_cast<uint32_t>(data_type_);\n  format.dimension = dimension_;\n  format.unit_size = unit_size_;\n  format.space_id = space_id_;\n\n  if (!metric_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", metric_name_);\n    item.set(\"revision\", metric_revision_);\n    item.set(\"params\", metric_params_);\n    attachment.set(\"metric\", std::move(item));\n  }\n\n  if (!converter_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", converter_name_);\n    item.set(\"revision\", converter_revision_);\n    item.set(\"params\", converter_params_);\n    attachment.set(\"converter\", std::move(item));\n  }\n  if (!reformer_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", reformer_name_);\n    item.set(\"revision\", reformer_revision_);\n    item.set(\"params\", reformer_params_);\n    attachment.set(\"reformer\", std::move(item));\n  }\n  if (!trainer_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", trainer_name_);\n    item.set(\"revision\", trainer_revision_);\n    item.set(\"params\", trainer_params_);\n    attachment.set(\"trainer\", std::move(item));\n  }\n  if (!builder_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", builder_name_);\n    item.set(\"revision\", builder_revision_);\n    item.set(\"params\", builder_params_);\n    attachment.set(\"builder\", std::move(item));\n  }\n  if (!reducer_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", reducer_name_);\n    item.set(\"revision\", reducer_revision_);\n    item.set(\"params\", reducer_params_);\n    attachment.set(\"reducer\", std::move(item));\n  }\n  if (!searcher_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", searcher_name_);\n    item.set(\"revision\", searcher_revision_);\n    item.set(\"params\", searcher_params_);\n    attachment.set(\"searcher\", std::move(item));\n  }\n  if (!streamer_name_.empty()) {\n    ailego::Params item;\n    item.set(\"name\", streamer_name_);\n    item.set(\"revision\", streamer_revision_);\n    item.set(\"params\", streamer_params_);\n    attachment.set(\"streamer\", std::move(item));\n  }\n\n  if (!attributes_.empty()) {\n    attachment.set(\"attributes\", attributes_);\n  }\n\n  out->assign(reinterpret_cast<const char *>(&format), sizeof(format));\n  size_t offset = static_cast<uint32_t>(out->size());\n\n  if (!attachment.empty()) {\n    std::string buf;\n    ailego::Params::SerializeToBuffer(attachment, &buf);\n    out->append(buf.data(), buf.size());\n    IndexMetaFormatHeader *header = (IndexMetaFormatHeader *)out->data();\n    header->attachment_offset = static_cast<uint32_t>(offset);\n    header->attachment_size = static_cast<uint32_t>(buf.size());\n    offset += buf.size();\n  }\n}\n\nbool IndexMeta::deserialize(const void *data, size_t len) {\n  const IndexMetaFormatHeader *format =\n      reinterpret_cast<const IndexMetaFormatHeader *>(data);\n\n  this->clear();\n  if (sizeof(IndexMetaFormatHeader) > len) {\n    return false;\n  }\n  if (sizeof(IndexMetaFormatHeader) > format->header_size) {\n    return false;\n  }\n\n  meta_type_ = static_cast<IndexMeta::MetaType>(format->meta_type);\n  major_order_ = static_cast<IndexMeta::MajorOrder>(format->major_order);\n  data_type_ = static_cast<IndexMeta::DataType>(format->data_type);\n  dimension_ = format->dimension;\n  unit_size_ = format->unit_size;\n  element_size_ = IndexMeta::ElementSizeof(data_type_, unit_size_, dimension_);\n  space_id_ = format->space_id;\n\n  // Read attachment\n  ailego::Params attachment;\n  if (format->attachment_size) {\n    if (format->attachment_offset + format->attachment_size > len) {\n      return false;\n    }\n    std::string str(\n        reinterpret_cast<const char *>(data) + format->attachment_offset,\n        format->attachment_size);\n    if (!ailego::Params::ParseFromBuffer(str, &attachment)) {\n      return false;\n    }\n  }\n\n  ailego::Params item;\n  if (attachment.get(\"metric\", &item)) {\n    item.get(\"name\", &metric_name_);\n    item.get(\"revision\", &metric_revision_);\n    item.get(\"params\", &metric_params_);\n  }\n  if (attachment.get(\"converter\", &item)) {\n    item.get(\"name\", &converter_name_);\n    item.get(\"revision\", &converter_revision_);\n    item.get(\"params\", &converter_params_);\n  }\n  if (attachment.get(\"reformer\", &item)) {\n    item.get(\"name\", &reformer_name_);\n    item.get(\"revision\", &reformer_revision_);\n    item.get(\"params\", &reformer_params_);\n  }\n  if (attachment.get(\"trainer\", &item)) {\n    item.get(\"name\", &trainer_name_);\n    item.get(\"revision\", &trainer_revision_);\n    item.get(\"params\", &trainer_params_);\n  }\n  if (attachment.get(\"builder\", &item)) {\n    item.get(\"name\", &builder_name_);\n    item.get(\"revision\", &builder_revision_);\n    item.get(\"params\", &builder_params_);\n  }\n  if (attachment.get(\"reducer\", &item)) {\n    item.get(\"name\", &reducer_name_);\n    item.get(\"revision\", &reducer_revision_);\n    item.get(\"params\", &reducer_params_);\n  }\n  if (attachment.get(\"searcher\", &item)) {\n    item.get(\"name\", &searcher_name_);\n    item.get(\"revision\", &searcher_revision_);\n    item.get(\"params\", &searcher_params_);\n  }\n  if (attachment.get(\"streamer\", &item)) {\n    item.get(\"name\", &streamer_name_);\n    item.get(\"revision\", &streamer_revision_);\n    item.get(\"params\", &streamer_params_);\n  }\n  attachment.get(\"attributes\", &attributes_);\n\n  return true;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_plugin.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/utility/dl_helper.h>\n#include <zvec/core/framework/index_plugin.h>\n\nnamespace zvec {\nnamespace core {\n\nbool IndexPlugin::load(const std::string &path) {\n  if (handle_) {\n    return false;\n  }\n  handle_ = ailego::DLHelper::Load(path, nullptr);\n  return (!!handle_);\n}\n\nbool IndexPlugin::load(const std::string &path, std::string *err) {\n  if (handle_) {\n    *err = \"plugin loaded\";\n    return false;\n  }\n  handle_ = ailego::DLHelper::Load(path, err);\n  return !!handle_;\n}\n\nvoid IndexPlugin::unload(void) {\n  if (handle_) {\n    ailego::DLHelper::Unload(handle_);\n    handle_ = nullptr;\n  }\n}\n\nbool IndexPluginBroker::emplace(IndexPlugin &&plugin) {\n  if (!plugin.is_valid()) {\n    return false;\n  }\n  for (auto iter = plugins_.begin(); iter != plugins_.end(); ++iter) {\n    if (iter->handle() == plugin.handle()) {\n      plugin.unload();\n      return true;\n    }\n  }\n  plugins_.push_back(std::move(plugin));\n  return true;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/framework/index_version.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/version.i>\n#include <zvec/core/framework/index_version.h>\n\n\nnamespace zvec {\nnamespace core {\n\nstatic const char AITHETA_VERSION_DETAILS[] =\n    AILEGO_VERSION_COMPILE_DETAILS(\"All rights reserved.\\n\");\n\nconst char *IndexVersion::String(void) {\n  return AITHETA_VERSION_DETAILS;\n}\n\nconst char *IndexVersion::Details(void) {\n  return AITHETA_VERSION_DETAILS;\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/interface/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n        NAME core_interface STATIC STRICT ALWAYS_LINK\n        SRCS *.cc indexes/*.cc\n        INCS . ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core\n        LIBS zvec_ailego core_framework sparsehash magic_enum rabitqlib\n        VERSION \"${PROXIMA_ZVEC_VERSION}\"\n)\n"
  },
  {
    "path": "src/core/interface/index.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <magic_enum/magic_enum.hpp>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/interface/index.h>\n#include \"mixed_reducer/mixed_reducer_params.h\"\n\nnamespace zvec::core_interface {\n\n// eliminate the pre-alloc of the context pool\nthread_local static std::array<core::IndexContext::Pointer,\n                               (magic_enum::enum_count<IndexType>() - 1) * 2>\n    _context_list;\n\n\nbool Index::init_context() {\n  context_index_ = (magic_enum::enum_integer(param_.index_type) - 1) * 2 +\n                   static_cast<size_t>(is_sparse_);\n  if (_context_list[context_index_] == nullptr) {\n    if ((_context_list[context_index_] = streamer_->create_context()) ==\n        nullptr) {\n      LOG_ERROR(\"Failed to create context\");\n      return false;\n    }\n  }\n  return true;\n}\n\ncore::IndexContext::Pointer &Index::acquire_context() {\n  init_context();\n  return _context_list[context_index_];\n}\n\nint Index::ParseMetricName(const BaseIndexParam &param) {\n  std::string metric_name;\n  if (is_sparse_) {\n    // only inner product is supported for sparse index\n    switch (param.metric_type) {\n      case MetricType::kInnerProduct:\n        metric_name = \"InnerProductSparse\";\n        break;\n      case MetricType::kMIPSL2sq:\n        metric_name = \"MipsSquaredEuclideanSparse\";\n        break;\n      default:\n        LOG_ERROR(\"Unsupported metric type\");\n        return core::IndexError_Runtime;\n    }\n  } else {\n    switch (param.metric_type) {\n      case MetricType::kL2sq:\n        metric_name = \"SquaredEuclidean\";\n        break;\n      case MetricType::kInnerProduct:\n        metric_name = \"InnerProduct\";\n        break;\n      case MetricType::kCosine:\n        metric_name = \"Cosine\";  // This is already the normalizedCosine\n        break;\n      case MetricType::kMIPSL2sq:\n        metric_name = \"MipsSquaredEuclidean\";\n        break;\n      default:\n        LOG_ERROR(\"Unsupported metric type\");\n        return core::IndexError_Runtime;\n    }\n  }\n  // TODO: MIPS need to set some param\n  // for streamer open()\n  proxima_index_meta_.set_metric(metric_name, 0, ailego::Params());\n  return 0;\n}\n\nint Index::CreateAndInitMetric(const BaseIndexParam & /*param*/) {\n  auto &metric_name = proxima_index_meta_.metric_name();\n\n  metric_ = core::IndexFactory::CreateMetric(metric_name);\n  if (!metric_) {\n    LOG_ERROR(\"Failed to create metric, name %s\", metric_name.c_str());\n    return core::IndexError_Runtime;\n  }\n  if (const auto ret = metric_->init(proxima_index_meta_,\n                                     proxima_index_meta_.metric_params());\n      ret != 0) {\n    LOG_ERROR(\"Failed to create and init metric, name %s, code %d, desc: %s\",\n              metric_name.c_str(), ret, core::IndexError::What(ret));\n    return core::IndexError_Runtime;\n  }\n  if (metric_->query_metric()) {\n    metric_ = metric_->query_metric();\n  }\n\n  return core::IndexError_Success;\n}\n\nint Index::CreateAndInitConverterReformer(const QuantizerParam &param,\n                                          const BaseIndexParam &index_param) {\n  ailego::Params converter_params;\n  std::string converter_name;\n  if (is_sparse_) {\n    switch (param.type) {\n      case QuantizerType::kNone:\n        return core::IndexError_Success;\n      case QuantizerType::kFP16:\n        converter_name = \"HalfFloatSparseConverter\";\n        break;\n      default:\n        LOG_ERROR(\"Unsupported quantizer type: \");\n        return core::IndexError_Unsupported;\n    }\n  } else {\n    if (index_param.metric_type == MetricType::kCosine) {\n      switch (param.type) {\n        case QuantizerType::kNone:\n          if (index_param.data_type == DataType::DT_FP16) {\n            converter_name = \"CosineHalfFloatConverter\";\n          } else if (index_param.data_type == DataType::DT_FP32) {\n            converter_name = \"CosineNormalizeConverter\";\n          } else {\n            LOG_ERROR(\"Unsupported data type: \");\n            return core::IndexError_Unsupported;\n          }\n          break;\n        case QuantizerType::kRabitq:\n          if (index_param.data_type == DataType::DT_FP32) {\n            converter_name = \"CosineNormalizeConverter\";\n          } else {\n            LOG_ERROR(\"Unsupported data type: \");\n            return core::IndexError_Unsupported;\n          }\n          break;\n        case QuantizerType::kFP16:\n          converter_name = \"CosineFp16Converter\";\n          break;\n        case QuantizerType::kInt8:\n          converter_name = \"CosineInt8Converter\";\n          break;\n        case QuantizerType::kInt4:\n          converter_name = \"CosineInt4Converter\";\n          break;\n        default:\n          LOG_ERROR(\"Unsupported quantizer type: \");\n          return core::IndexError_Unsupported;\n      }\n    } else {\n      switch (param.type) {\n        case QuantizerType::kNone:\n          return core::IndexError_Success;\n        case QuantizerType::kFP16:\n          converter_name = \"HalfFloatConverter\";\n          break;\n        case QuantizerType::kInt8:\n          converter_name = \"Int8StreamingConverter\";\n          break;\n        case QuantizerType::kInt4:\n          converter_name = \"Int4StreamingConverter\";\n          break;\n        case QuantizerType::kRabitq:\n          // no converter here\n          return 0;\n        default:\n          LOG_ERROR(\"Unsupported quantizer type: \");\n          return core::IndexError_Unsupported;\n      }\n    }\n  }\n\n  proxima_index_meta_.set_converter(converter_name, 0, converter_params);\n  converter_ = core::IndexFactory::CreateConverter(converter_name);\n  if (converter_ == nullptr ||\n      converter_->init(proxima_index_meta_, converter_params) != 0) {\n    LOG_ERROR(\"Failed to create and init converter\");\n    return core::IndexError_Runtime;\n  }\n\n  proxima_index_meta_ = converter_->meta();\n  reformer_ =\n      core::IndexFactory::CreateReformer(proxima_index_meta_.reformer_name());\n  if (reformer_ == nullptr ||\n      reformer_->init(proxima_index_meta_.reformer_params()) != 0) {\n    LOG_ERROR(\"Failed to create and init reformer\");\n    return core::IndexError_Runtime;\n  }\n  streamer_vector_meta_.set_meta(proxima_index_meta_.data_type(),\n                                 proxima_index_meta_.dimension());\n  streamer_vector_meta_.set_meta_type(proxima_index_meta_.meta_type());\n\n  return core::IndexError_Success;\n}\n\nint Index::Init(const BaseIndexParam &param) {\n  param_ = param;  // will lose the original type info\n\n  is_sparse_ = param.is_sparse;\n  is_huge_page_ = param.is_huge_page;\n\n  proxima_index_meta_.set_meta(param.data_type, param.dimension);\n  proxima_index_meta_.set_meta_type(is_sparse_ ? IndexMeta::MetaType::MT_SPARSE\n                                               : IndexMeta::MetaType::MT_DENSE);\n\n  input_vector_meta_.set_meta(proxima_index_meta_.data_type(),\n                              proxima_index_meta_.dimension());\n  input_vector_meta_.set_meta_type(proxima_index_meta_.meta_type());\n  streamer_vector_meta_ = input_vector_meta_;\n\n\n  // when quantizer=int8/int4, the converter.init() will change the metric to\n  // QuantizedInteger with params\n  if (ParseMetricName(param) != 0) {\n    LOG_ERROR(\"Failed to parse metric name\");\n    return core::IndexError_Runtime;\n  }\n\n  if (CreateAndInitConverterReformer(param.quantizer_param, param) != 0) {\n    LOG_ERROR(\"Failed to create and init converter\");\n    return core::IndexError_Runtime;\n  }\n\n  // must after quantizer handled. e.g., cosine doesn't support int8 quantizer\n  if (CreateAndInitMetric(param) != 0) {\n    LOG_ERROR(\"Failed to create and init metric\");\n    return core::IndexError_Runtime;\n  }\n\n  if (CreateAndInitStreamer(param) != 0) {\n    LOG_ERROR(\"Failed to create and init streamer\");\n    return core::IndexError_Runtime;\n  }\n  return 0;\n}\n\n\nint Index::Open(const std::string &file_path, StorageOptions storage_options) {\n  ailego::Params storage_params;\n  // storage_params.set(\"proxima.mmap_file.storage.memory_warmup\", true);\n  // storage_params.set(\"proxima.mmap_file.storage.segment_meta_capacity\",\n  // 1024);\n  switch (storage_options.type) {\n    case StorageOptions::StorageType::kMMAP: {\n      storage_ = core::IndexFactory::CreateStorage(\"MMapFileStorage\");\n      if (storage_ == nullptr) {\n        LOG_ERROR(\"Failed to create MMapFileStorage\");\n        return core::IndexError_Runtime;\n      }\n      int ret = storage_->init(storage_params);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to init MMapFileStorage, path: %s, err: %s\",\n                  file_path.c_str(), core::IndexError::What(ret));\n        return ret;\n      }\n      break;\n    }\n    case StorageOptions::StorageType::kBufferPool: {\n      storage_ = core::IndexFactory::CreateStorage(\"BufferStorage\");\n      if (storage_ == nullptr) {\n        LOG_ERROR(\"Failed to create BufferStorage\");\n        return core::IndexError_Runtime;\n      }\n      int ret = storage_->init(storage_params);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to init BufferStorage, path: %s, err: %s\",\n                  file_path.c_str(), core::IndexError::What(ret));\n        return ret;\n      }\n      break;\n    }\n    default:\n      LOG_ERROR(\"Unsupported storage type\");\n      return core::IndexError_Unsupported;\n  }\n\n  // read_options.create_new\n  int ret = storage_->open(file_path, storage_options.create_new);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to open storage, path: %s, err: %s\", file_path.c_str(),\n              core::IndexError::What(ret));\n    return core::IndexError_Runtime;\n  }\n  if (streamer_ == nullptr || streamer_->open(storage_) != 0) {\n    LOG_ERROR(\"Failed to open streamer, path: %s\", file_path.c_str());\n    return core::IndexError_Runtime;\n  }\n\n  // converter/reformer/metric are created in IndexFactory::CreateIndex\n  // TODO: init\n\n  // TODO: context pool\n  if (!init_context()) {  // to validate if any error, will be overwritten\n    LOG_ERROR(\"Failed to init context\");\n    return core::IndexError_Runtime;\n  }\n\n  is_open_ = true;\n  is_read_only_ = storage_options.read_only;\n  return 0;\n}\n\nint Index::Close() {\n  if (!is_open_) {\n    LOG_ERROR(\"Index is not open\");\n    return core::IndexError_Runtime;\n  }\n\n  if (!is_read_only_) {\n    if (ailego_unlikely(Flush() != 0)) {\n      LOG_ERROR(\"Failed to cleanup streamer\");\n      return core::IndexError_Runtime;\n    }\n  }\n  if (ailego_unlikely(streamer_->cleanup() != 0)) {\n    LOG_ERROR(\"Failed to cleanup streamer\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(storage_->close() != 0)) {\n    LOG_ERROR(\"Failed to close storage\");\n    return core::IndexError_Runtime;\n  }\n  is_open_ = false;\n  return 0;\n}\n\nint Index::Flush() {\n  if (!is_open_) {\n    LOG_ERROR(\"Index is not open\");\n    return core::IndexError_Runtime;\n  }\n\n  if (is_read_only_) {\n    LOG_ERROR(\"Cannot flush read-only index\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(streamer_->flush(0) != 0)) {\n    LOG_ERROR(\"Failed to flush streamer\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(storage_->flush() != 0)) {\n    LOG_ERROR(\"Failed to flush storage\");\n    return core::IndexError_Runtime;\n  }\n  return 0;\n}\n\nint Index::Fetch(const uint32_t doc_id, VectorDataBuffer *vector_data_buffer) {\n  if (!is_open_) {\n    LOG_ERROR(\"Index is not open\");\n    return core::IndexError_Runtime;\n  }\n  if (is_sparse_) {\n    return _sparse_fetch(doc_id, vector_data_buffer);\n  }\n  return _dense_fetch(doc_id, vector_data_buffer);\n}\n\nint Index::Add(const VectorData &vector_data, const uint32_t doc_id) {\n  if (!is_open_) {\n    LOG_ERROR(\"Index is not open\");\n    return core::IndexError_Runtime;\n  }\n\n  if (is_read_only_) {\n    LOG_ERROR(\"Cannot add to read-only index\");\n    return core::IndexError_Runtime;\n  }\n\n  auto &context = acquire_context();\n  if (!context) {\n    LOG_ERROR(\"Failed to acquire context\");\n    return core::IndexError_Runtime;\n  }\n\n  int ret = 0;\n  if (is_sparse_) {\n    ret = _sparse_add(vector_data, doc_id, context);\n  } else {\n    ret = _dense_add(vector_data, doc_id, context);\n  }\n  context->reset();\n  return ret;\n}\n\n\nint Index::Search(const VectorData &vector_data,\n                  const BaseIndexQueryParam::Pointer &search_param,\n                  SearchResult *result) {\n  if (!is_open_) {\n    LOG_ERROR(\"Index is not open\");\n    return core::IndexError_Runtime;\n  }\n\n  if (!is_trained_ && this->Train() != 0) {\n    LOG_ERROR(\"Failed to train index\");\n    return core::IndexError_Runtime;\n  }\n\n  auto &context = acquire_context();\n  if (!context) {\n    LOG_ERROR(\"Failed to acquire context\");\n    return core::IndexError_Runtime;\n  }\n\n  if (_prepare_for_search(vector_data, search_param, context) != 0) {\n    LOG_ERROR(\"Failed to prepare for search\");\n    context->reset();\n    return core::IndexError_Runtime;\n  }\n\n  if (is_sparse_) {\n    int ret = _sparse_search(vector_data, search_param, result, context);\n    context->reset();\n    return ret;\n  }\n\n  // dense support refiner, but sparse doesn't\n  int ret = 0;\n  if (search_param->refiner_param == nullptr) {\n    ret = _dense_search(vector_data, search_param, result, context);\n    context->reset();\n  } else {\n    auto &reference_index = search_param->refiner_param->reference_index;\n    if (reference_index == nullptr) {\n      LOG_ERROR(\"Reference index is not set\");\n      context->reset();\n      return core::IndexError_Runtime;\n    }\n    // TODO: tackle query_param's type info loss to loosen the constraint\n    if (reference_index->param_.index_type != IndexType::kFlat) {\n      LOG_ERROR(\"Reference index is not flat\");\n      context->reset();\n      return core::IndexError_Runtime;\n    }\n\n    context->set_topk(_get_coarse_search_topk(search_param));\n    context->set_fetch_vector(false);  // no need to fetch vector\n    if (_dense_search(vector_data, search_param, result, context) != 0) {\n      LOG_ERROR(\"Failed to search\");\n      context->reset();\n      return core::IndexError_Runtime;\n    }\n\n    auto &base_result = context->result();\n    std::vector<uint64_t> keys(base_result.size());\n    for (size_t i = 0; i < base_result.size(); ++i) {\n      keys[i] = base_result[i].key();\n    }\n\n    FlatQueryParam::Pointer flat_search_param =\n        std::make_shared<FlatQueryParam>();\n    flat_search_param->topk = search_param->topk;\n    flat_search_param->fetch_vector = search_param->fetch_vector;\n    flat_search_param->filter = search_param->filter;\n    // TODO: should copy other params?\n    flat_search_param->bf_pks = std::make_shared<std::vector<uint64_t>>(keys);\n\n    ret = reference_index->Search(vector_data, flat_search_param, result);\n  }\n  context->reset();\n  return ret;\n}\n\n\nint Index::_dense_fetch(const uint32_t doc_id,\n                        VectorDataBuffer *vector_data_buffer) {\n  core::IndexStorage::MemoryBlock vector_block;\n  int ret = streamer_->get_vector_by_id(doc_id, vector_block);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to fetch vector, doc_id: %u\", doc_id);\n    return core::IndexError_Runtime;\n  }\n  const void *vector = vector_block.data();\n\n  DenseVectorBuffer dense_vector_buffer;\n  std::string &out_vector_buffer = dense_vector_buffer.data;\n  // for int4, unit_size * dim != element_size\n  out_vector_buffer.resize(input_vector_meta_.element_size());\n\n  if (reformer_ != nullptr) {\n    if (reformer_->revert(vector, streamer_vector_meta_, &out_vector_buffer) !=\n        0) {\n      LOG_ERROR(\"Failed to convert vector\");\n      return core::IndexError_Runtime;\n    }\n  } else {\n    out_vector_buffer = std::string(\n        static_cast<const char *>(vector),\n        input_vector_meta_.dimension() * input_vector_meta_.unit_size());\n  }\n  vector_data_buffer->vector_buffer = std::move(dense_vector_buffer);\n  return 0;\n}\n\n\nint Index::_sparse_fetch(const uint32_t doc_id,\n                         VectorDataBuffer *vector_data_buffer) {\n  SparseVectorBuffer sparse_vector_buffer;\n\n  if (0 != streamer_->get_sparse_vector_by_id(\n               doc_id, &sparse_vector_buffer.count,\n               &sparse_vector_buffer.indices, &sparse_vector_buffer.values)) {\n    LOG_ERROR(\"Failed to fetch vector\");\n    return core::IndexError_Runtime;\n  }\n\n  if (reformer_ != nullptr) {\n    std::string reverted_sparse_values_buffer;\n    if (reformer_->revert(\n            sparse_vector_buffer.count, sparse_vector_buffer.get_indices(),\n            sparse_vector_buffer.get_values(), streamer_vector_meta_,\n            &reverted_sparse_values_buffer) != 0) {\n      LOG_ERROR(\"Failed to convert vector\");\n      return core::IndexError_Runtime;\n    }\n    sparse_vector_buffer.values = std::move(reverted_sparse_values_buffer);\n  }\n  vector_data_buffer->vector_buffer = std::move(sparse_vector_buffer);\n  return 0;\n}\n\nint Index::_dense_add(const VectorData &vector_data, const uint32_t doc_id,\n                      core::IndexContext::Pointer &context) {\n  if (!std::holds_alternative<DenseVector>(vector_data.vector)) {\n    LOG_ERROR(\"Invalid vector data\");\n    return core::IndexError_Runtime;\n  }\n  const DenseVector &dense_vector = std::get<DenseVector>(vector_data.vector);\n  if (reformer_ != nullptr) {\n    core::IndexQueryMeta new_meta;\n    std::string new_vector;\n    int ret;\n    ret = reformer_->convert(dense_vector.data, input_vector_meta_, &new_vector,\n                             &new_meta);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to convert vector\");\n      return core::IndexError_Runtime;\n    }\n    ret = streamer_->add_with_id_impl(doc_id, new_vector.data(), new_meta,\n                                      context);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to add vector\");\n      return core::IndexError_Runtime;\n    }\n  } else {\n    int ret = streamer_->add_with_id_impl(doc_id, dense_vector.data,\n                                          input_vector_meta_, context);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to add vector\");\n      return core::IndexError_Runtime;\n    }\n  }\n  return 0;\n}\n\n\nint Index::_sparse_add(const VectorData &vector_data, const uint32_t doc_id,\n                       core::IndexContext::Pointer &context) {\n  if (!std::holds_alternative<SparseVector>(vector_data.vector)) {\n    LOG_ERROR(\"Invalid vector data\");\n    return core::IndexError_Runtime;\n  }\n  const SparseVector &sparse_vector =\n      std::get<SparseVector>(vector_data.vector);\n\n  if (reformer_ != nullptr) {\n    std::string converted_sparse_values_buffer;\n    core::IndexQueryMeta new_meta;\n    int ret;\n    ret = reformer_->convert(sparse_vector.count, sparse_vector.get_indices(),\n                             sparse_vector.get_values(), input_vector_meta_,\n                             &converted_sparse_values_buffer, &new_meta);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to convert vector\");\n      return core::IndexError_Runtime;\n    }\n    ret = streamer_->add_with_id_impl(\n        doc_id, sparse_vector.count, sparse_vector.get_indices(),\n        converted_sparse_values_buffer.data(), new_meta, context);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to add vector\");\n      return core::IndexError_Runtime;\n    }\n  } else {\n    int ret = streamer_->add_with_id_impl(\n        doc_id, sparse_vector.count, sparse_vector.get_indices(),\n        sparse_vector.get_values(), input_vector_meta_, context);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to add vector\");\n      return core::IndexError_Runtime;\n    }\n  }\n  return 0;\n}\n\n\nint Index::_dense_search(const VectorData &vector_data,\n                         const BaseIndexQueryParam::Pointer &search_param,\n                         SearchResult *result,\n                         core::IndexContext::Pointer &context) {\n  if (!std::holds_alternative<DenseVector>(vector_data.vector)) {\n    LOG_ERROR(\"Invalid vector data\");\n    return core::IndexError_Runtime;\n  }\n  const DenseVector &dense_vector = std::get<DenseVector>(vector_data.vector);\n  auto vector = dense_vector.data;\n  // Check if need to transform feature\n  std::string new_vector;\n  core::IndexQueryMeta new_meta = input_vector_meta_;\n  if (reformer_ != nullptr) {\n    if (reformer_->transform(dense_vector.data, input_vector_meta_, &new_vector,\n                             &new_meta) != 0) {\n      LOG_ERROR(\"Failed to transform vector\");\n      return core::IndexError_Runtime;\n    }\n    vector = new_vector.data();\n  }\n  // TODO: group by\n  if (search_param->bf_pks != nullptr) {\n    // should we eliminate the copy of bf_pks?\n    if (streamer_->search_bf_by_p_keys_impl(\n            vector, std::vector<std::vector<uint64_t>>{*search_param->bf_pks},\n            new_meta, 1, context) != 0) {\n      LOG_ERROR(\"Failed to search_bf_by_p_keys_impl vector\");\n      return core::IndexError_Runtime;\n    }\n  } else if (search_param->is_linear) {\n    if (streamer_->search_bf_impl(vector, new_meta, 1, context) != 0) {\n      LOG_ERROR(\"Failed to search vector\");\n      return core::IndexError_Runtime;\n    }\n  } else {\n    if (streamer_->search_impl(vector, new_meta, 1, context) != 0) {\n      LOG_ERROR(\"Failed to search vector\");\n      return core::IndexError_Runtime;\n    }\n  }\n  result->doc_list_ = std::move(context->result());\n\n  if (metric_->support_normalize()) {\n    for (uint32_t i = 0; i < result->doc_list_.size(); ++i) {\n      metric_->normalize(result->doc_list_[i].mutable_score());\n    }\n  }\n  if (reformer_) {\n    if (reformer_->normalize(dense_vector.data, input_vector_meta_,\n                             result->doc_list_) != 0) {\n      LOG_ERROR(\"Failed to normalize vector\");\n      return core::IndexError_Runtime;\n    }\n    if (context->fetch_vector() && reformer_->need_revert()) {\n      // TODO: use std::pmr to optimize memory allocation\n      result->reverted_vector_list_.resize(context->result().size());\n      for (uint32_t i = 0; i < context->result().size(); ++i) {\n        std::string &reverted_vector = result->reverted_vector_list_[i];\n        reverted_vector.resize(input_vector_meta_.dimension() *\n                               input_vector_meta_.unit_size());\n        if (reformer_->revert(context->result()[i].vector(), new_meta,\n                              &reverted_vector) != 0) {\n          LOG_ERROR(\"Failed to revert vector\");\n          return core::IndexError_Runtime;\n        }\n      }\n    }\n  }\n\n  return 0;\n}\n\n\nint Index::_sparse_search(const VectorData &vector_data,\n                          const BaseIndexQueryParam::Pointer &search_param,\n                          SearchResult *result,\n                          core::IndexContext::Pointer &context) {\n  if (!std::holds_alternative<SparseVector>(vector_data.vector)) {\n    LOG_ERROR(\"Invalid vector data\");\n    return core::IndexError_Runtime;\n  }\n  const SparseVector &sparse_vector =\n      std::get<SparseVector>(vector_data.vector);\n  auto indices = sparse_vector.get_indices();\n  auto values = sparse_vector.get_values();\n\n  std::string converted_sparse_values_buffer;\n  core::IndexQueryMeta new_meta = input_vector_meta_;\n  if (reformer_ != nullptr) {\n    if (reformer_->transform(sparse_vector.count, indices, values,\n                             input_vector_meta_,\n                             &converted_sparse_values_buffer, &new_meta) != 0) {\n      LOG_ERROR(\"Failed to transform vector\");\n      return core::IndexError_Runtime;\n    }\n    values = converted_sparse_values_buffer.data();\n  }\n\n  if (search_param->bf_pks != nullptr) {\n    if (streamer_->search_bf_by_p_keys_impl(\n            sparse_vector.count, indices, values,\n            std::vector<std::vector<uint64_t>>{*search_param->bf_pks}, new_meta,\n            context) != 0) {\n      LOG_ERROR(\"Failed to search_bf_by_p_keys_impl vector\");\n      return core::IndexError_Runtime;\n    }\n  } else if (search_param->is_linear) {\n    if (streamer_->search_bf_impl(sparse_vector.count, indices, values,\n                                  new_meta, context) != 0) {\n      LOG_ERROR(\"Failed to search vector\");\n      return core::IndexError_Runtime;\n    }\n  } else {\n    if (streamer_->search_impl(sparse_vector.count, indices, values, new_meta,\n                               context) != 0) {\n      LOG_ERROR(\"Failed to search vector\");\n      return core::IndexError_Runtime;\n    }\n  }\n  result->doc_list_ = std::move(context->result());\n\n  if (metric_->support_normalize()) {\n    for (uint32_t i = 0; i < result->doc_list_.size(); ++i) {\n      metric_->normalize(result->doc_list_[i].mutable_score());\n    }\n  }\n  if (reformer_) {\n    // TODO: no need to call reformer_->normalize() when sparse?\n    if (context->fetch_vector() && reformer_->need_revert()) {\n      // TODO: use std::pmr to optimize memory allocation\n      auto &result_doc_list = context->result();\n      result->reverted_sparse_values_list_.resize(result_doc_list.size());\n      for (uint32_t i = 0; i < result_doc_list.size(); ++i) {\n        auto &result_doc = result_doc_list[i].sparse_doc();\n        std::string &reverted_sparse_values =\n            result->reverted_sparse_values_list_[i];\n        reverted_sparse_values.resize(result_doc.sparse_count() *\n                                      input_vector_meta_.unit_size());\n        if (reformer_->revert(result_doc.sparse_count(),\n                              reinterpret_cast<const uint32_t *>(\n                                  result_doc.sparse_indices().data()),\n                              reinterpret_cast<const void *>(\n                                  result_doc.sparse_values().data()),\n                              new_meta, &reverted_sparse_values) != 0) {\n          LOG_ERROR(\"Failed to revert sparse vector\");\n          return core::IndexError_Runtime;\n        }\n      }\n    }\n  }\n  return 0;\n}\n\n\nint Index::Merge(const std::vector<Index::Pointer> &indexes,\n                 const IndexFilter &filter, const MergeOptions &options) {\n  if (indexes.empty()) {\n    return core::IndexError_Success;\n  }\n  // ivf need builder\n  auto reducer =\n      core::IndexFactory::CreateStreamerReducer(\"MixedStreamerReducer\");\n  if (reducer == nullptr) {\n    LOG_ERROR(\"Failed to create reducer\");\n    return core::IndexError_Runtime;\n  }\n\n  if (options.write_concurrency == 0) {\n    LOG_ERROR(\"Write concurrency must be greater than 0\");\n    return core::IndexError_InvalidArgument;\n  }\n  // must declare here to ensure its lifespan can cover reducer->reduce()\n  std::unique_ptr<ailego::ThreadPool> local_thread_pool = nullptr;\n  if (options.pool != nullptr) {\n    reducer->set_thread_pool(options.pool);\n  } else {\n    local_thread_pool =\n        std::make_unique<ailego::ThreadPool>(options.write_concurrency);\n    reducer->set_thread_pool(local_thread_pool.get());\n  }\n\n  ailego::Params reducer_params;\n  reducer_params.set(core::PARAM_MIXED_STREAMER_REDUCER_ENABLE_PK_REWRITE,\n                     true);\n  reducer_params.set(core::PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS,\n                     options.write_concurrency);\n  if (reducer->init(reducer_params) != 0) {\n    LOG_ERROR(\"Failed to init reducer\");\n    return core::IndexError_Runtime;\n  }\n  if (reducer->set_target_streamer_wiht_info(builder_, streamer_, converter_,\n                                             reformer_,\n                                             input_vector_meta_) != 0) {\n    LOG_ERROR(\"Failed to set target streamer\");\n    return core::IndexError_Runtime;\n  }\n\n  for (const auto &index : indexes) {\n    if (reducer->feed_streamer_with_reformer(index->streamer_,\n                                             index->reformer_) != 0) {\n      LOG_ERROR(\"Failed to feed streamer\");\n      return core::IndexError_Runtime;\n    }\n  }\n  if (reducer->reduce(filter) != 0) {\n    LOG_ERROR(\"Failed to reduce\");\n    return core::IndexError_Runtime;\n  }\n  is_trained_ = true;\n  return 0;\n}\n\nint Index::_get_coarse_search_topk(\n    const BaseIndexQueryParam::Pointer &search_param) {\n  float scale_factor = search_param->refiner_param->scale_factor_;\n  if (scale_factor == 0) {\n    scale_factor = 1;\n  }\n  return floor(search_param->topk * scale_factor);\n}\n\nstd::string Index::get_metric_name(MetricType metric_type, bool is_sparse) {\n  if (is_sparse) {\n    switch (metric_type) {\n      case MetricType::kInnerProduct:\n        return \"InnerProductSparse\";\n      case MetricType::kMIPSL2sq:\n        return \"MipsSquaredEuclideanSparse\";\n      default:\n        return \"\";\n    }\n  } else {\n    switch (metric_type) {\n      case MetricType::kL2sq:\n        return \"SquaredEuclidean\";\n      case MetricType::kInnerProduct:\n        return \"InnerProduct\";\n      case MetricType::kCosine:\n        return \"Cosine\";\n      case MetricType::kMIPSL2sq:\n        return \"MipsSquaredEuclidean\";\n      default:\n        return \"\";\n    }\n  }\n}\n\n}  // namespace zvec::core_interface\n"
  },
  {
    "path": "src/core/interface/index_factory.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/interface/index_factory.h>\n#include <zvec/core/interface/index_param.h>\n#include \"core/interface/utils/utils.h\"\n\nnamespace zvec::core_interface {\n\n\nIndex::Pointer IndexFactory::CreateAndInitIndex(const BaseIndexParam &param) {\n  Index::Pointer ptr = nullptr;\n  // if (param.index_type == IndexType::kIVF) {\n  //   const IVFIndexParam *_param = dynamic_cast<const IVFIndexParam\n  //   *>(&param); ptr = std::make_shared<IVFIndex>(param);\n\n  //   if (_param->l1Index) {\n  //     // TODO: create l1 index\n  //   }\n  //   if (_param->l2Index) {\n  //     // TODO: create l2 index\n  //   }\n  // }\n  // if (param.index_type == IndexType::kHNSW) {\n  //   ptr = std::make_shared<HNSWIndex>(param);\n  // }\n  if (param.index_type == IndexType::kFlat) {\n    // ptr = std::make_shared<FlatIndex>(param);\n    ptr = std::make_shared<FlatIndex>();\n  } else if (param.index_type == IndexType::kHNSW) {\n    ptr = std::make_shared<HNSWIndex>();\n  } else if (param.index_type == IndexType::kIVF) {\n    ptr = std::make_shared<IVFIndex>();\n  } else if (param.index_type == IndexType::kHNSWRabitq) {\n    ptr = std::make_shared<HNSWRabitqIndex>();\n  } else {\n    LOG_ERROR(\"Unsupported index type: \");\n    return nullptr;\n  }\n\n  if (!ptr) {\n    LOG_ERROR(\"Failed to create index\");\n    return nullptr;\n  }\n  if (0 != ptr->Init(param)) {\n    LOG_ERROR(\"Failed to init index\");\n    return nullptr;\n  }\n  return ptr;\n}\n\nBaseIndexParam::Pointer IndexFactory::DeserializeIndexParamFromJson(\n    const std::string &json_str) {\n  ailego::JsonValue json_value;\n  if (!json_value.parse(json_str)) {\n    LOG_ERROR(\"Failed to parse json string: %s\", json_str.c_str());\n    return nullptr;\n  }\n  ailego::JsonObject json_obj = json_value.as_object();\n  ailego::JsonValue tmp_json_value;\n\n  IndexType index_type;\n\n  if (!extract_enum_from_json<IndexType>(json_obj, \"index_type\", index_type,\n                                         tmp_json_value)) {\n    LOG_ERROR(\"Failed to deserialize index type\");\n    return nullptr;\n  }\n\n  switch (index_type) {\n    case IndexType::kFlat: {\n      FlatIndexParam::Pointer param = std::make_shared<FlatIndexParam>();\n      if (!param->DeserializeFromJson(json_str)) {\n        LOG_ERROR(\"Failed to deserialize flat index param\");\n        return nullptr;\n      }\n      return param;\n    }\n    case IndexType::kHNSW: {\n      HNSWIndexParam::Pointer param = std::make_shared<HNSWIndexParam>();\n      if (!param->DeserializeFromJson(json_str)) {\n        LOG_ERROR(\"Failed to deserialize hnsw index param\");\n        return nullptr;\n      }\n      return param;\n    }\n    case IndexType::kIVF: {\n      IVFIndexParam::Pointer param = std::make_shared<IVFIndexParam>();\n      if (!param->DeserializeFromJson(json_str)) {\n        LOG_ERROR(\"Failed to deserialize hnsw index param\");\n        return nullptr;\n      }\n      return param;\n    }\n    case IndexType::kHNSWRabitq: {\n      HNSWRabitqIndexParam::Pointer param =\n          std::make_shared<HNSWRabitqIndexParam>();\n      if (!param->DeserializeFromJson(json_str)) {\n        LOG_ERROR(\"Failed to deserialize hnsqrabitq index param\");\n        return nullptr;\n      }\n      return param;\n    }\n    default:\n      LOG_ERROR(\"Unsupported index type: %s\",\n                magic_enum::enum_name(index_type).data());\n      return nullptr;\n  }\n}\n\ntemplate <typename QueryParamType,\n          std::enable_if_t<\n              std::is_base_of_v<BaseIndexQueryParam, QueryParamType>, bool> >\nstd::string IndexFactory::QueryParamSerializeToJson(const QueryParamType &param,\n                                                    bool omit_empty_value) {\n  ailego::JsonObject json_obj;\n\n  // BaseIndexQueryParam\n  // omit filter & bf_pks\n  if (!omit_empty_value || param.topk != 0) {\n    json_obj.set(\"topk\", ailego::JsonValue(param.topk));\n  }\n  if (!omit_empty_value || param.fetch_vector) {\n    json_obj.set(\"fetch_vector\", ailego::JsonValue(param.fetch_vector));\n  }\n  if (!omit_empty_value || param.radius != 0.0f) {\n    json_obj.set(\"radius\", ailego::JsonValue(param.radius));\n  }\n  if (!omit_empty_value || param.is_linear) {\n    json_obj.set(\"is_linear\", ailego::JsonValue(param.is_linear));\n  }\n\n  IndexType index_type{IndexType::kNone};\n  if constexpr (std::is_same_v<QueryParamType, FlatQueryParam>) {\n    // index_type\n    index_type = IndexType::kFlat;\n  } else if constexpr (std::is_same_v<QueryParamType, HNSWQueryParam>) {\n    if (!omit_empty_value || param.ef_search != 0) {\n      json_obj.set(\"ef_search\", ailego::JsonValue(param.ef_search));\n    }\n    index_type = IndexType::kHNSW;\n  } else if constexpr (std::is_same_v<QueryParamType, IVFQueryParam>) {\n    if (!omit_empty_value || param.nprobe != 0) {\n      json_obj.set(\"nprobe\", ailego::JsonValue(param.nprobe));\n    }\n    index_type = IndexType::kIVF;\n    // json_obj.set(\"l1QueryParam\",\n    // ailego::JsonValue(QueryParamSerializeToJson(param.l1QueryParam)));\n    // json_obj.set(\"l2QueryParam\",\n    // ailego::JsonValue(QueryParamSerializeToJson(param.l2QueryParam)));\n  } else if constexpr (std::is_same_v<QueryParamType, HNSWRabitqQueryParam>) {\n    if (!omit_empty_value || param.ef_search != 0) {\n      json_obj.set(\"ef_search\", ailego::JsonValue(param.ef_search));\n    }\n    index_type = IndexType::kHNSWRabitq;\n  }\n\n  json_obj.set(\"index_type\",\n               ailego::JsonValue(magic_enum::enum_name(index_type).data()));\n\n  return ailego::JsonValue(json_obj).as_json_string().as_stl_string();\n}\n\ntemplate std::string\nIndexFactory::QueryParamSerializeToJson<BaseIndexQueryParam>(\n    const BaseIndexQueryParam &param, bool omit_empty_value);\ntemplate std::string IndexFactory::QueryParamSerializeToJson<FlatQueryParam>(\n    const FlatQueryParam &param, bool omit_empty_value);\ntemplate std::string IndexFactory::QueryParamSerializeToJson<HNSWQueryParam>(\n    const HNSWQueryParam &param, bool omit_empty_value);\ntemplate std::string IndexFactory::QueryParamSerializeToJson<IVFQueryParam>(\n    const IVFQueryParam &param, bool omit_empty_value);\n\ntemplate <typename QueryParamType,\n          std::enable_if_t<\n              std::is_base_of_v<BaseIndexQueryParam, QueryParamType>, bool> >\ntypename QueryParamType::Pointer IndexFactory::QueryParamDeserializeFromJson(\n    const std::string &json_str) {\n  ailego::JsonValue tmp_json_value;\n  if (!tmp_json_value.parse(json_str)) {\n    LOG_ERROR(\"Failed to parse json string: %s\", json_str.c_str());\n    return nullptr;\n  }\n  ailego::JsonObject json_obj = tmp_json_value.as_object();\n\n  auto parse_common_fields = [&](auto &param) -> bool {\n    if (!extract_value_from_json(json_obj, \"topk\", param->topk,\n                                 tmp_json_value)) {\n      LOG_ERROR(\"Failed to deserialize topk\");\n      return false;\n    }\n\n    if (!extract_value_from_json(json_obj, \"fetch_vector\", param->fetch_vector,\n                                 tmp_json_value)) {\n      LOG_ERROR(\"Failed to deserialize fetch_vector\");\n      return false;\n    }\n\n    if (!extract_value_from_json(json_obj, \"radius\", param->radius,\n                                 tmp_json_value)) {\n      LOG_ERROR(\"Failed to deserialize radius\");\n      return false;\n    }\n\n    if (!extract_value_from_json(json_obj, \"is_linear\", param->is_linear,\n                                 tmp_json_value)) {\n      LOG_ERROR(\"Failed to deserialize is_linear\");\n      return false;\n    }\n    return true;\n  };\n\n  IndexType index_type;\n\n  if (!extract_enum_from_json<IndexType>(json_obj, \"index_type\", index_type,\n                                         tmp_json_value)) {\n    LOG_ERROR(\"Failed to deserialize index type\");\n    return nullptr;\n  }\n\n  if constexpr (std::is_same_v<QueryParamType, BaseIndexQueryParam>) {\n    if (index_type == IndexType::kFlat) {\n      auto param = std::make_shared<FlatQueryParam>();\n      if (!parse_common_fields(param)) {\n        return nullptr;\n      }\n      return param;\n    } else if (index_type == IndexType::kHNSW) {\n      auto param = std::make_shared<HNSWQueryParam>();\n      if (!parse_common_fields(param)) {\n        return nullptr;\n      }\n      if (!extract_value_from_json(json_obj, \"ef_search\", param->ef_search,\n                                   tmp_json_value)) {\n        LOG_ERROR(\"Failed to deserialize ef_search\");\n        return nullptr;\n      }\n      return param;\n    } else if (index_type == IndexType::kIVF) {\n      auto param = std::make_shared<IVFQueryParam>();\n      if (!parse_common_fields(param)) {\n        return nullptr;\n      }\n      if (!extract_value_from_json(json_obj, \"nprobe\", param->nprobe,\n                                   tmp_json_value)) {\n        LOG_ERROR(\"Failed to deserialize nprobe\");\n        return nullptr;\n      }\n      return param;\n    } else if (index_type == IndexType::kHNSWRabitq) {\n      auto param = std::make_shared<HNSWRabitqQueryParam>();\n      if (!parse_common_fields(param)) {\n        return nullptr;\n      }\n      if (!extract_value_from_json(json_obj, \"ef_search\", param->ef_search,\n                                   tmp_json_value)) {\n        LOG_ERROR(\"Failed to deserialize ef_search\");\n        return nullptr;\n      }\n      return param;\n    } else {\n      LOG_ERROR(\"Unsupported index type: %s\",\n                magic_enum::enum_name(index_type).data());\n      return nullptr;\n    }\n  } else {\n    auto param = std::make_shared<QueryParamType>();\n    if (!parse_common_fields(param)) {\n      return nullptr;\n    }\n    if constexpr (std::is_same_v<QueryParamType, FlatQueryParam>) {\n    } else if constexpr (std::is_same_v<QueryParamType, HNSWQueryParam>) {\n      if (!extract_value_from_json(json_obj, \"ef_search\", param->ef_search,\n                                   tmp_json_value)) {\n        LOG_ERROR(\"Failed to deserialize ef_search\");\n        return nullptr;\n      }\n    } else if constexpr (std::is_same_v<QueryParamType, IVFQueryParam>) {\n      if (!extract_value_from_json(json_obj, \"nprobe\", param->nprobe,\n                                   tmp_json_value)) {\n        LOG_ERROR(\"Failed to deserialize nprobe\");\n        return nullptr;\n      }\n    } else if constexpr (std::is_same_v<QueryParamType, HNSWRabitqQueryParam>) {\n      if (!extract_value_from_json(json_obj, \"ef_search\", param->ef_search,\n                                   tmp_json_value)) {\n        LOG_ERROR(\"Failed to deserialize ef_search\");\n        return nullptr;\n      }\n    } else {\n      LOG_ERROR(\"Unsupported index type: %s\",\n                magic_enum::enum_name(index_type).data());\n      return nullptr;\n    }\n    return param;\n  }\n}\n\ntemplate BaseIndexQueryParam::Pointer\nIndexFactory::QueryParamDeserializeFromJson<BaseIndexQueryParam>(\n    const std::string &json_str);\ntemplate FlatQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<\n    FlatQueryParam>(const std::string &json_str);\ntemplate HNSWQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<\n    HNSWQueryParam>(const std::string &json_str);\ntemplate IVFQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<\n    IVFQueryParam>(const std::string &json_str);\n\n}  // namespace zvec::core_interface\n"
  },
  {
    "path": "src/core/interface/index_param.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/core/interface/index_param.h>\n#include \"core/interface/utils/utils.h\"\n\nnamespace zvec {\nnamespace core_interface {\nailego::JsonObject BaseIndexParam::SerializeToJsonObject(\n    bool omit_empty_value) const {\n  ailego::JsonObject json_obj;\n\n  if (!omit_empty_value || index_type != IndexType::kNone) {\n    json_obj.set(\"index_type\",\n                 ailego::JsonValue(magic_enum::enum_name(index_type).data()));\n  }\n  if (!omit_empty_value || metric_type != MetricType::kNone) {\n    json_obj.set(\"metric_type\",\n                 ailego::JsonValue(magic_enum::enum_name(metric_type).data()));\n  }\n  if (!omit_empty_value || dimension != 0) {\n    json_obj.set(\"dimension\", ailego::JsonValue(dimension));\n  }\n  if (!omit_empty_value || version != 0) {\n    json_obj.set(\"version\", ailego::JsonValue(version));\n  }\n  if (!omit_empty_value || is_sparse) {\n    json_obj.set(\"is_sparse\", ailego::JsonValue(is_sparse));\n  }\n  if (!omit_empty_value || data_type != DataType::DT_UNDEFINED) {\n    json_obj.set(\"data_type\",\n                 ailego::JsonValue(magic_enum::enum_name(data_type).data()));\n  }\n  if (!omit_empty_value || use_id_map) {\n    json_obj.set(\"use_id_map\", ailego::JsonValue(use_id_map));\n  }\n  if (!omit_empty_value || is_huge_page) {\n    json_obj.set(\"is_huge_page\", ailego::JsonValue(is_huge_page));\n  }\n\n  // if (preprocess_param) {\n  //   json.set(\"preprocess_param\", preprocess_param->SerializeToJson());\n  // }\n  if (!omit_empty_value || quantizer_param.type != QuantizerType::kNone) {\n    json_obj.set(\"quantizer_param\",\n                 quantizer_param.SerializeToJsonObject(omit_empty_value));\n  }\n  // if (refiner_param) {\n  //   json.set(\"refiner_param\", refiner_param->SerializeToJson());\n  // }\n  // if (default_query_param) {\n  //   json.set(\"default_query_param\",\n  //   default_query_param->SerializeToJson());\n  // }\n  return json_obj;\n}\n\n\nailego::JsonObject FlatIndexParam::SerializeToJsonObject(\n    bool omit_empty_value) const {\n  auto json_obj = BaseIndexParam::SerializeToJsonObject(omit_empty_value);\n  if (!omit_empty_value || major_order != IndexMeta::MajorOrder::MO_UNDEFINED) {\n    json_obj.set(\"major_order\",\n                 ailego::JsonValue(magic_enum::enum_name(major_order).data()));\n  }\n  return json_obj;\n}\n\nailego::JsonObject HNSWIndexParam::SerializeToJsonObject(\n    bool omit_empty_value) const {\n  auto json_obj = BaseIndexParam::SerializeToJsonObject(omit_empty_value);\n  json_obj.set(\"m\", ailego::JsonValue(m));\n  json_obj.set(\"ef_construction\", ailego::JsonValue(ef_construction));\n  return json_obj;\n}\n\nbool BaseIndexParam::DeserializeFromJsonObject(\n    const ailego::JsonObject &json_obj) {\n  DESERIALIZE_ENUM_FIELD(json_obj, index_type, IndexType);\n  DESERIALIZE_ENUM_FIELD(json_obj, metric_type, MetricType);\n  DESERIALIZE_ENUM_FIELD(json_obj, data_type, DataType);\n\n  DESERIALIZE_VALUE_FIELD(json_obj, dimension);\n  DESERIALIZE_VALUE_FIELD(json_obj, version);\n  DESERIALIZE_VALUE_FIELD(json_obj, is_sparse);\n  DESERIALIZE_VALUE_FIELD(json_obj, use_id_map);\n  DESERIALIZE_VALUE_FIELD(json_obj, is_huge_page);\n\n  ailego::JsonValue tmp_json_value;\n  if (json_obj.has(\"quantizer_param\")) {\n    if (json_obj.get(\"quantizer_param\", &tmp_json_value);\n        tmp_json_value.is_object()) {\n      quantizer_param.DeserializeFromJsonObject(tmp_json_value.as_object());\n    }\n  }\n\n  return true;\n}\n\nbool FlatIndexParam::DeserializeFromJsonObject(\n    const ailego::JsonObject &json_obj) {\n  if (!BaseIndexParam::DeserializeFromJsonObject(json_obj)) {\n    return false;\n  }\n\n  if (index_type != IndexType::kFlat) {\n    LOG_ERROR(\"index_type is not kFlat\");\n    return false;\n  }\n\n  DESERIALIZE_ENUM_FIELD(json_obj, major_order, IndexMeta::MajorOrder);\n  return true;\n}\n\nbool HNSWIndexParam::DeserializeFromJsonObject(\n    const ailego::JsonObject &json_obj) {\n  if (!BaseIndexParam::DeserializeFromJsonObject(json_obj)) {\n    return false;\n  }\n\n  if (index_type != IndexType::kHNSW) {\n    LOG_ERROR(\"index_type is not kHNSW\");\n    return false;\n  }\n\n  DESERIALIZE_VALUE_FIELD(json_obj, m);\n  DESERIALIZE_VALUE_FIELD(json_obj, ef_construction);\n\n  return true;\n}\n\nbool HNSWRabitqIndexParam::DeserializeFromJsonObject(\n    const ailego::JsonObject &json_obj) {\n  if (!BaseIndexParam::DeserializeFromJsonObject(json_obj)) {\n    return false;\n  }\n\n  if (index_type != IndexType::kHNSWRabitq) {\n    LOG_ERROR(\"index_type is not kHNSWRabitq\");\n    return false;\n  }\n\n  DESERIALIZE_VALUE_FIELD(json_obj, m);\n  DESERIALIZE_VALUE_FIELD(json_obj, ef_construction);\n  DESERIALIZE_VALUE_FIELD(json_obj, total_bits);\n  DESERIALIZE_VALUE_FIELD(json_obj, num_clusters);\n  DESERIALIZE_VALUE_FIELD(json_obj, sample_count);\n\n  return true;\n}\n\nailego::JsonObject HNSWRabitqIndexParam::SerializeToJsonObject(\n    bool omit_empty_value) const {\n  auto json_obj = BaseIndexParam::SerializeToJsonObject(omit_empty_value);\n  json_obj.set(\"m\", ailego::JsonValue(m));\n  json_obj.set(\"ef_construction\", ailego::JsonValue(ef_construction));\n  json_obj.set(\"total_bits\", ailego::JsonValue(total_bits));\n  json_obj.set(\"num_clusters\", ailego::JsonValue(num_clusters));\n  if (!omit_empty_value || sample_count != 0) {\n    json_obj.set(\"sample_count\", ailego::JsonValue(sample_count));\n  }\n  return json_obj;\n}\n\nailego::JsonObject QuantizerParam::SerializeToJsonObject(\n    bool omit_empty_value) const {\n  ailego::JsonObject json_obj;\n  if (!omit_empty_value || type != QuantizerType::kNone) {\n    json_obj.set(\"type\",\n                 zvec::ailego::JsonValue(magic_enum::enum_name(type).data()));\n  }\n  return json_obj;\n}\n\nbool QuantizerParam::DeserializeFromJsonObject(\n    const ailego::JsonObject &json_obj) {\n  DESERIALIZE_ENUM_FIELD(json_obj, type, QuantizerType);\n  return true;\n}\n\n// bool BaseIndexQueryParam::DeserializeFromJsonObject(\n//     const ailego::JsonObject &json_obj) {\n//   DESERIALIZE_ENUM_FIELD(json_obj, index_type, IndexType);\n//   DESERIALIZE_VALUE_FIELD(json_obj, topk);\n//   DESERIALIZE_VALUE_FIELD(json_obj, fetch_vector);\n//   DESERIALIZE_VALUE_FIELD(json_obj, radius);\n//   DESERIALIZE_VALUE_FIELD(json_obj, is_linear);\n//   return true;\n// }\n\n// ailego::JsonObject BaseIndexQueryParam::SerializeToJsonObject(\n//     bool omit_empty_value) const {\n//   ailego::JsonObject json_obj;\n//   if (!omit_empty_value || index_type != IndexType::kNone) {\n//     json_obj.set(\"index_type\",\n//                  ailego::JsonValue(magic_enum::enum_name(index_type).data()));\n//   }\n//   if (!omit_empty_value || topk != 0) {\n//     json_obj.set(\"topk\", ailego::JsonValue(topk));\n//   }\n//   if (!omit_empty_value || fetch_vector) {\n//     json_obj.set(\"fetch_vector\", ailego::JsonValue(fetch_vector));\n//   }\n//   if (!omit_empty_value || radius != 0.0f) {\n//     json_obj.set(\"radius\", ailego::JsonValue(radius));\n//   }\n//   if (!omit_empty_value || is_linear) {\n//     json_obj.set(\"is_linear\", ailego::JsonValue(is_linear));\n//   }\n//   return json_obj;\n// }\n\n// bool FlatQueryParam::DeserializeFromJsonObject(\n//     const ailego::JsonObject &json_obj) {\n//   if (!BaseIndexQueryParam::DeserializeFromJsonObject(json_obj)) {\n//     return false;\n//   }\n//   if (index_type != IndexType::kFlat) {\n//     LOG_ERROR(\"index_type is not kFlat\");\n//     return false;\n//   }\n//   return true;\n// }\n\n// ailego::JsonObject FlatQueryParam::SerializeToJsonObject(\n//     bool omit_empty_value) const {\n//   auto json_obj =\n//       BaseIndexQueryParam::SerializeToJsonObject(omit_empty_value);\n//   return json_obj;\n// }\n\n// bool HNSWQueryParam::DeserializeFromJsonObject(\n//     const ailego::JsonObject &json_obj) {\n//   if (!BaseIndexQueryParam::DeserializeFromJsonObject(json_obj)) {\n//     return false;\n//   }\n//   if (index_type != IndexType::kHNSW) {\n//     LOG_ERROR(\"index_type is not kHNSW\");\n//     return false;\n//   }\n//   DESERIALIZE_VALUE_FIELD(json_obj, ef_search);\n//   return true;\n// }\n\n// ailego::JsonObject HNSWQueryParam::SerializeToJsonObject(\n//     bool omit_empty_value) const {\n//   auto json_obj =\n//       BaseIndexQueryParam::SerializeToJsonObject(omit_empty_value);\n//   if (!omit_empty_value || ef_search != 0) {\n//     json_obj.set(\"ef_search\", ailego::JsonValue(ef_search));\n//   }\n//   return json_obj;\n// }\n\n\n}  // namespace core_interface\n}  // namespace zvec"
  },
  {
    "path": "src/core/interface/indexes/flat_index.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <memory>\n#include <string>\n#include <zvec/core/interface/index.h>\n#include \"algorithm/flat/flat_utility.h\"\n\nnamespace zvec::core_interface {\n\nint FlatIndex::CreateAndInitStreamer(const BaseIndexParam &param) {\n  param_ = dynamic_cast<const FlatIndexParam &>(param);\n\n  proxima_index_params_.set(core::PARAM_FLAT_COLUMN_MAJOR_ORDER,\n                            param_.major_order == IndexMeta::MO_COLUMN);\n  proxima_index_params_.set(core::PARAM_FLAT_USE_ID_MAP, param_.use_id_map);\n  if (is_sparse_) {\n    streamer_ = core::IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  } else {\n    streamer_ = core::IndexFactory::CreateStreamer(\"FlatStreamer\");\n  }\n\n  if (ailego_unlikely(!streamer_)) {\n    LOG_ERROR(\"Failed to create streamer\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(\n          streamer_->init(proxima_index_meta_, proxima_index_params_) != 0)) {\n    LOG_ERROR(\"Failed to init streamer\");\n    return core::IndexError_Runtime;\n  }\n  return 0;\n}\n\nint FlatIndex::_prepare_for_search(\n    const VectorData & /*vector_data*/,\n    const BaseIndexQueryParam::Pointer &search_param,\n    core::IndexContext::Pointer &context) {\n  auto flat_search_param =\n      std::dynamic_pointer_cast<FlatQueryParam>(search_param);\n\n  if (ailego_unlikely(!flat_search_param)) {\n    LOG_ERROR(\"Invalid search param type, expected FlatQueryParam\");\n    return core::IndexError_Runtime;\n  }\n\n  context->set_topk(flat_search_param->topk);\n  context->set_fetch_vector(flat_search_param->fetch_vector);\n  if (flat_search_param->filter) {\n    context->set_filter(std::move(*flat_search_param->filter));\n  }\n  if (flat_search_param->radius > 0.0f) {\n    context->set_threshold(flat_search_param->radius);\n  }\n\n  return 0;\n}\n\n\n}  // namespace zvec::core_interface"
  },
  {
    "path": "src/core/interface/indexes/hnsw_index.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <memory>\n#include <string>\n#include <zvec/core/interface/index.h>\n#include \"algorithm/hnsw/hnsw_params.h\"\n#include \"algorithm/hnsw_sparse/hnsw_sparse_params.h\"\n\nnamespace zvec::core_interface {\n\nint HNSWIndex::CreateAndInitStreamer(const BaseIndexParam &param) {\n  param_ = dynamic_cast<const HNSWIndexParam &>(param);\n\n  // valid\n  param_.ef_construction = std::max(1, std::min(2048, param_.ef_construction));\n  param_.m = std::max(5, std::min(1024, param_.m));\n\n  if (is_sparse_) {\n    proxima_index_params_.set(core::PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION,\n                              param_.ef_construction);\n    proxima_index_params_.set(\n        core::PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, param_.m);\n\n    // TODO: add_vector_with_id & fetch_by_id don't rely on this param\n    proxima_index_params_.set(\n        core::PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);\n\n    // TODO: use index params'  default query param here\n    proxima_index_params_.set(core::PARAM_HNSW_SPARSE_STREAMER_EF,\n                              kDefaultHnswEfSearch);\n    streamer_ = core::IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n\n  } else {\n    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_EFCONSTRUCTION,\n                              param_.ef_construction);\n    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT,\n                              param_.m);\n\n    // TODO: add_vector_with_id & fetch_by_id don't rely on this param\n    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE,\n                              true);\n\n    // TODO: use index params' default query param here\n    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_EF,\n                              kDefaultHnswEfSearch);\n    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_USE_ID_MAP,\n                              param_.use_id_map);\n    streamer_ = core::IndexFactory::CreateStreamer(\"HnswStreamer\");\n  }\n\n  if (ailego_unlikely(!streamer_)) {\n    LOG_ERROR(\"Failed to create streamer\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(\n          streamer_->init(proxima_index_meta_, proxima_index_params_) != 0)) {\n    LOG_ERROR(\"Failed to init streamer\");\n    return core::IndexError_Runtime;\n  }\n  return 0;\n}\n\n\nint HNSWIndex::_prepare_for_search(\n    const VectorData & /*vector_data*/,\n    const BaseIndexQueryParam::Pointer &search_param,\n    core::IndexContext::Pointer &context) {\n  const auto &hnsw_search_param =\n      std::dynamic_pointer_cast<HNSWQueryParam>(search_param);\n\n  if (ailego_unlikely(!hnsw_search_param)) {\n    LOG_ERROR(\"Invalid search param type, expected HNSWQueryParam\");\n    return core::IndexError_Runtime;\n  }\n\n  if (0 >= hnsw_search_param->ef_search ||\n      hnsw_search_param->ef_search > 2048) {\n    LOG_ERROR(\n        \"ef_search must be greater than 0 and less than or equal to 2048.\");\n    return core::IndexError_Runtime;\n  }\n\n  context->set_topk(hnsw_search_param->topk);\n  context->set_fetch_vector(hnsw_search_param->fetch_vector);\n  if (hnsw_search_param->filter) {\n    context->set_filter(std::move(*hnsw_search_param->filter));\n  }\n  if (hnsw_search_param->radius > 0.0f) {\n    context->set_threshold(hnsw_search_param->radius);\n  }\n  ailego::Params params;\n  const int real_search_ef =\n      std::max(1u, std::min(2048u, hnsw_search_param->ef_search));\n  params.set(core::PARAM_HNSW_STREAMER_EF, real_search_ef);\n  context->update(params);\n  return 0;\n}\n\nint HNSWIndex::_get_coarse_search_topk(\n    const BaseIndexQueryParam::Pointer &search_param) {\n  const auto &hnsw_search_param =\n      std::dynamic_pointer_cast<HNSWQueryParam>(search_param);\n\n  // scale_factor doesn't take effect for hnsw.\n  auto ret = std::max(search_param->topk, hnsw_search_param->ef_search);\n  return ret;\n}\n\n}  // namespace zvec::core_interface"
  },
  {
    "path": "src/core/interface/indexes/hnsw_rabitq_index.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <memory>\n#include <string>\n#include <zvec/core/interface/index.h>\n#include \"zvec/core/framework/index_error.h\"\n\n#if RABITQ_SUPPORTED\n#include \"algorithm/hnsw_rabitq/hnsw_rabitq_params.h\"\n#include \"algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h\"\n#include \"algorithm/hnsw_rabitq/rabitq_params.h\"\n#endif\n\nnamespace zvec::core_interface {\n\nint HNSWRabitqIndex::CreateAndInitStreamer(const BaseIndexParam &param) {\n#if !RABITQ_SUPPORTED\n  LOG_ERROR(\"RaBitQ is not supported on this platform (Linux x86_64 only)\");\n  return core::IndexError_Unsupported;\n#else\n  param_ = dynamic_cast<const HNSWRabitqIndexParam &>(param);\n\n  if (is_sparse_) {\n    LOG_ERROR(\"Sparse index is not supported\");\n    return core::IndexError_Runtime;\n  }\n\n  if (param.dimension < core::kMinRabitqDimSize ||\n      param.dimension > core::kMaxRabitqDimSize) {\n    LOG_ERROR(\"Unsupported dimension: %d\", param.dimension);\n    return core::IndexError_Unsupported;\n  }\n\n  // validate parameters\n  param_.ef_construction = std::max(1, std::min(2048, param_.ef_construction));\n  param_.m = std::max(5, std::min(1024, param_.m));\n\n  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_EFCONSTRUCTION,\n                            param_.ef_construction);\n  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT,\n                            param_.m);\n  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_GET_VECTOR_ENABLE,\n                            true);\n  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_EF,\n                            kDefaultHnswEfSearch);\n  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP,\n                            param_.use_id_map);\n  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_GENERAL_DIMENSION,\n                            input_vector_meta_.dimension());\n  proxima_index_params_.set(core::PARAM_RABITQ_TOTAL_BITS, param_.total_bits);\n  // num_clusters, sample_count are parameters for rabitq converter\n  // proxima_index_params_.set(core::PARAM_RABITQ_NUM_CLUSTERS,\n  //                           param_.num_clusters);\n\n  auto streamer = std::make_shared<core::HnswRabitqStreamer>();\n  streamer->set_provider(param_.provider);\n  streamer->set_reformer(param_.reformer);\n  streamer_ = streamer;\n\n  if (ailego_unlikely(!streamer_)) {\n    LOG_ERROR(\"Failed to create HnswRabitqStreamer\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(\n          streamer_->init(proxima_index_meta_, proxima_index_params_) != 0)) {\n    LOG_ERROR(\"Failed to init HnswRabitqStreamer\");\n    return core::IndexError_Runtime;\n  }\n  return 0;\n#endif  // RABITQ_SUPPORTED\n}\n\nint HNSWRabitqIndex::_prepare_for_search(\n    const VectorData & /*vector_data*/,\n    const BaseIndexQueryParam::Pointer &search_param,\n    core::IndexContext::Pointer &context) {\n#if !RABITQ_SUPPORTED\n  LOG_ERROR(\"RaBitQ is not supported on this platform (Linux x86_64 only)\");\n  return core::IndexError_Unsupported;\n#else\n  const auto &hnsw_search_param =\n      std::dynamic_pointer_cast<HNSWRabitqQueryParam>(search_param);\n\n  if (ailego_unlikely(!hnsw_search_param)) {\n    LOG_ERROR(\"Invalid search param type, expected HNSWRabitqQueryParam\");\n    return core::IndexError_Runtime;\n  }\n\n  if (0 >= hnsw_search_param->ef_search ||\n      hnsw_search_param->ef_search > 2048) {\n    LOG_ERROR(\n        \"ef_search must be greater than 0 and less than or equal to 2048.\");\n    return core::IndexError_Runtime;\n  }\n\n  context->set_topk(hnsw_search_param->topk);\n  context->set_fetch_vector(hnsw_search_param->fetch_vector);\n  if (hnsw_search_param->filter) {\n    context->set_filter(std::move(*hnsw_search_param->filter));\n  }\n  if (hnsw_search_param->radius > 0.0f) {\n    context->set_threshold(hnsw_search_param->radius);\n  }\n  ailego::Params params;\n  const int real_search_ef =\n      std::max(1u, std::min(2048u, hnsw_search_param->ef_search));\n  params.set(core::PARAM_HNSW_RABITQ_STREAMER_EF, real_search_ef);\n  context->update(params);\n  return 0;\n#endif  // RABITQ_SUPPORTED\n}\n\nint HNSWRabitqIndex::_get_coarse_search_topk(\n    const BaseIndexQueryParam::Pointer &search_param) {\n#if !RABITQ_SUPPORTED\n  LOG_ERROR(\"RaBitQ is not supported on this platform (Linux x86_64 only)\");\n  return -1;\n#else\n  const auto &hnsw_search_param =\n      std::dynamic_pointer_cast<HNSWRabitqQueryParam>(search_param);\n\n  auto ret = std::max(search_param->topk, hnsw_search_param->ef_search);\n  return ret;\n#endif  // RABITQ_SUPPORTED\n}\n\n\n}  // namespace zvec::core_interface\n"
  },
  {
    "path": "src/core/interface/indexes/ivf_index.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <memory>\n#include <string>\n#include <zvec/core/interface/index.h>\n#include \"algorithm/ivf/ivf_params.h\"\n\nnamespace zvec::core_interface {\n\nstatic constexpr uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();\n\nint IVFIndex::CreateAndInitStreamer(const BaseIndexParam &param) {\n  if (is_sparse_) {\n    LOG_ERROR(\"IVF Index not support sparse vector\");\n    return core::IndexError_InvalidArgument;\n  }\n\n  param_ = dynamic_cast<const IVFIndexParam &>(param);\n  param_.nlist = std::max(1, std::min(1024, param_.nlist));\n  param_.niters = std::max(1, std::min(1024, param_.niters));\n\n  proxima_index_params_.set(core::PARAM_IVF_BUILDER_CENTROID_COUNT,\n                            param_.nlist);\n\n  // TODO: add_vector_with_id & fetch_by_id don't rely on this param\n  builder_ = core::IndexFactory::CreateBuilder(\"IVFBuilder\");\n  streamer_ = core::IndexFactory::CreateStreamer(\"IVFStreamer\");\n\n  if (ailego_unlikely(!builder_)) {\n    LOG_ERROR(\"Failed to create builder\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(!streamer_)) {\n    LOG_ERROR(\"Failed to create streamer\");\n    return core::IndexError_Runtime;\n  }\n  IndexMeta real_meta;\n  if (converter_) {\n    real_meta = converter_->meta();\n  } else {\n    real_meta = proxima_index_meta_;\n  }\n  if (ailego_unlikely(builder_->init(real_meta, proxima_index_params_) != 0)) {\n    LOG_ERROR(\"Failed to init builder\");\n    return core::IndexError_Runtime;\n  }\n  if (ailego_unlikely(streamer_->init(real_meta, proxima_index_params_) != 0)) {\n    LOG_ERROR(\"Failed to init streamer\");\n    return core::IndexError_Runtime;\n  }\n  return 0;\n}\n\nint IVFIndex::Open(const std::string &file_path,\n                   StorageOptions storage_options) {\n  ailego::Params storage_params;\n  file_path_ = file_path;\n  is_read_only_ = storage_options.read_only;\n  switch (storage_options.type) {\n    case StorageOptions::StorageType::kMMAP: {\n      storage_ = core::IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n      if (storage_ == nullptr) {\n        LOG_ERROR(\"Failed to create MMapFileStorage\");\n        return core::IndexError_Runtime;\n      }\n      int ret = storage_->init(storage_params);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to init MMapFileStorage, path: %s, err: %s\",\n                  file_path_.c_str(), core::IndexError::What(ret));\n        return ret;\n      }\n      break;\n    }\n    case StorageOptions::StorageType::kBufferPool: {\n      storage_ = core::IndexFactory::CreateStorage(\"BufferStorage\");\n      if (storage_ == nullptr) {\n        LOG_ERROR(\"Failed to create BufferStorage\");\n        return core::IndexError_Runtime;\n      }\n      int ret = storage_->init(storage_params);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to init BufferStorage, path: %s, err: %s\",\n                  file_path_.c_str(), core::IndexError::What(ret));\n        return ret;\n      }\n      break;\n    }\n    default: {\n      LOG_ERROR(\"Unsupported storage type\");\n      return core::IndexError_Unsupported;\n    }\n  }\n\n  if (is_read_only_ || !storage_options.create_new) {\n    // read_options.create_new\n    int ret = storage_->open(file_path_, false);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to open storage, path: %s, err: %s\", file_path_.c_str(),\n                core::IndexError::What(ret));\n      return core::IndexError_Runtime;\n    }\n    if (streamer_ == nullptr || streamer_->open(storage_) != 0) {\n      LOG_ERROR(\"Failed to open streamer, path: %s\", file_path_.c_str());\n      return core::IndexError_Runtime;\n    }\n    is_trained_ = true;\n  }\n  is_open_ = true;\n  return 0;\n}\n\nint IVFIndex::GenerateHolder() {\n  if (param_.data_type == DataType::DT_FP16) {\n    auto holder =\n        std::make_shared<zvec::core::MultiPassIndexHolder<DataType::DT_FP16>>(\n            param_.dimension);\n    for (auto doc : doc_cache_) {\n      ailego::NumericalVector<uint16_t> vec(doc.second);\n      if (doc.first == kInvalidKey) {\n        continue;\n      }\n      if (!holder->emplace(doc.first, vec)) {\n        LOG_ERROR(\"Failed to add vector\");\n        return core::IndexError_Runtime;\n      }\n    }\n    holder_ = holder;\n  } else if (param_.data_type == DataType::DT_FP32) {\n    auto holder =\n        std::make_shared<zvec::core::MultiPassIndexHolder<DataType::DT_FP32>>(\n            param_.dimension);\n    for (auto doc : doc_cache_) {\n      ailego::NumericalVector<float> vec(doc.second);\n      if (doc.first == kInvalidKey) {\n        continue;\n      }\n      if (!holder->emplace(doc.first, vec)) {\n        LOG_ERROR(\"Failed to add vector\");\n        return core::IndexError_Runtime;\n      }\n    }\n    holder_ = holder;\n  } else if (param_.data_type == DataType::DT_INT8) {\n    auto holder =\n        std::make_shared<zvec::core::MultiPassIndexHolder<DataType::DT_INT8>>(\n            param_.dimension);\n    for (auto doc : doc_cache_) {\n      ailego::NumericalVector<uint8_t> vec(doc.second);\n      if (doc.first == kInvalidKey) {\n        continue;\n      }\n      if (!holder->emplace(doc.first, vec)) {\n        LOG_ERROR(\"Failed to add vector\");\n        return core::IndexError_Runtime;\n      }\n    }\n    holder_ = holder;\n  } else {\n    LOG_ERROR(\"data_type is not support\");\n    return core::IndexError_Runtime;\n  }\n  if (converter_) {\n    core::IndexConverter::TrainAndTransform(converter_, holder_);\n    holder_ = converter_->result();\n  }\n  return 0;\n}\n\nint IVFIndex::Add(const VectorData &vector, uint32_t doc_id) {\n  if (is_trained_) {\n    LOG_ERROR(\"this IVF index is trained\");\n    return core::IndexError_Runtime;\n  }\n  if (!std::holds_alternative<DenseVector>(vector.vector)) {\n    LOG_ERROR(\"Invalid vector data\");\n    return core::IndexError_Runtime;\n  }\n  const DenseVector &dense_vector = std::get<DenseVector>(vector.vector);\n  std::string out_vector_buffer = std::string(\n      static_cast<const char *>(dense_vector.data),\n      input_vector_meta_.dimension() * input_vector_meta_.unit_size());\n\n  std::lock_guard<std::mutex> lock(mutex_);\n  while (doc_cache_.size() <= doc_id) {\n    std::string fake_data(\n        input_vector_meta_.dimension() * input_vector_meta_.unit_size(), 0);\n    doc_cache_.push_back(std::make_pair(kInvalidKey, fake_data));\n  }\n  doc_cache_[doc_id] = std::make_pair(doc_id, out_vector_buffer);\n  return 0;\n}\n\nint IVFIndex::Train() {\n  GenerateHolder();\n  builder_->train(holder_);\n  builder_->build(holder_);\n  auto dumper = core::IndexFactory::CreateDumper(\"FileDumper\");\n\n  dumper->create(file_path_);\n  builder_->dump(dumper);\n  dumper->close();\n  int ret = storage_->open(file_path_, false);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to open storage, path: %s, err: %s\", file_path_.c_str(),\n              core::IndexError::What(ret));\n    return core::IndexError_Runtime;\n  }\n  if (streamer_ == nullptr || streamer_->open(storage_) != 0) {\n    LOG_ERROR(\"Failed to open streamer, path: %s\", file_path_.c_str());\n    return core::IndexError_Runtime;\n  }\n  is_trained_ = true;\n  return 0;\n}\n\nint IVFIndex::_dense_fetch(const uint32_t doc_id,\n                           VectorDataBuffer *vector_data_buffer) {\n  if (is_trained_) {\n    return Index::_dense_fetch(doc_id, vector_data_buffer);\n  } else {\n    DenseVectorBuffer dense_vector_buffer;\n    std::string &out_vector_buffer = dense_vector_buffer.data;\n    out_vector_buffer = doc_cache_[doc_id].second;\n    vector_data_buffer->vector_buffer = std::move(dense_vector_buffer);\n    return 0;\n  }\n}\n\nint IVFIndex::_prepare_for_search(\n    const VectorData & /*query*/,\n    const BaseIndexQueryParam::Pointer &search_param,\n    core::IndexContext::Pointer &context) {\n  const auto &ivf_search_param =\n      std::dynamic_pointer_cast<IVFQueryParam>(search_param);\n\n  context->set_topk(ivf_search_param->topk);\n  context->set_fetch_vector(ivf_search_param->fetch_vector);\n  if (ivf_search_param->filter) {\n    context->set_filter(std::move(*ivf_search_param->filter));\n  }\n  if (ivf_search_param->radius > 0.0f) {\n    context->set_threshold(ivf_search_param->radius);\n  }\n\n  if (ivf_search_param->nprobe > 0) {\n    // TODO: 1. sparse; 2. default ef\n    ailego::Params params;\n    // need fix\n    params.set(core::PARAM_IVF_BUILDER_CENTROID_COUNT,\n               ivf_search_param->nprobe);\n    context->update(params);\n  }\n  return 0;\n}\n\nint IVFIndex::Merge(const std::vector<Index::Pointer> &indexes,\n                    const IndexFilter &filter, const MergeOptions &options) {\n  int pre_ret = Index::Merge(indexes, filter, options);\n  if (pre_ret != 0) {\n    return pre_ret;\n  }\n  auto dumper = core::IndexFactory::CreateDumper(\"FileDumper\");\n\n  dumper->create(file_path_);\n  builder_->dump(dumper);\n  dumper->close();\n  int ret = storage_->open(file_path_, false);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to open storage, path: %s, err: %s\", file_path_.c_str(),\n              core::IndexError::What(ret));\n    return core::IndexError_Runtime;\n  }\n  if (streamer_ == nullptr || streamer_->open(storage_) != 0) {\n    LOG_ERROR(\"Failed to open streamer, path: %s\", file_path_.c_str());\n    return core::IndexError_Runtime;\n  }\n  is_trained_ = true;\n  return 0;\n}\n}  // namespace zvec::core_interface"
  },
  {
    "path": "src/core/interface/utils/utils.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <magic_enum/magic_enum.hpp>\n#include <zvec/ailego/encoding/json.h>\n#include <zvec/ailego/logger/logger.h>\n\nnamespace zvec {\nnamespace core_interface {\n\ntemplate <typename EnumType>\nconstexpr bool extract_enum_from_json(const ailego::JsonObject &json_obj,\n                                      const char *key, EnumType &enum_value,\n                                      ailego::JsonValue &tmp_json_value) {\n  if (json_obj.has(key)) {\n    if (json_obj.get(key, &tmp_json_value); tmp_json_value.is_string()) {\n      auto optional_enum_value =\n          magic_enum::enum_cast<EnumType>(tmp_json_value.as_stl_string());\n      if (optional_enum_value.has_value()) {\n        enum_value = optional_enum_value.value();\n      } else {\n        LOG_ERROR(\"Invalid enum value for key: %s, value: %s\", key,\n                  tmp_json_value.as_c_string());\n        return false;\n      }\n    } else {\n      LOG_ERROR(\"Invalid json field type for key: %s\", key);\n      return false;\n    }\n  }\n  return true;\n}\n\ntemplate <typename T>\nconstexpr bool extract_value_from_json(const ailego::JsonObject &json_obj,\n                                       const char *key, T &value,\n                                       ailego::JsonValue &tmp_json_value) {\n  if (json_obj.has(key)) {\n    json_obj.get(key, &tmp_json_value);\n    if constexpr (std::is_same_v<T, bool>) {\n      if (tmp_json_value.is_boolean()) {\n        value = tmp_json_value.as_bool();\n      } else {\n        LOG_ERROR(\"Invalid json field type for key: %s; expected: boolean\",\n                  key);\n        return false;\n      }\n    } else if constexpr (std::is_floating_point_v<T>) {\n      if (tmp_json_value.is_float() || tmp_json_value.is_integer()) {\n        value = static_cast<T>(tmp_json_value.as_float());\n      } else {\n        LOG_ERROR(\"Invalid json field type for key: %s; expected: float\", key);\n        return false;\n      }\n    } else if constexpr (std::is_integral_v<T>) {\n      if (tmp_json_value.is_integer()) {\n        value = static_cast<T>(tmp_json_value.as_integer());\n      } else {\n        LOG_ERROR(\"Invalid json field type for key: %s; expected: integer\",\n                  key);\n        return false;\n      }\n    } else {\n      abort();\n    }\n  }\n  return true;\n}\n\n#define DESERIALIZE_ENUM_FIELD(json_obj, field_name, EnumType)               \\\n  {                                                                          \\\n    ailego::JsonValue tmp_json_value;                                        \\\n    if (!extract_enum_from_json<EnumType>(json_obj, #field_name, field_name, \\\n                                          tmp_json_value)) {                 \\\n      LOG_ERROR(\"Error when deserialize json - field:%s\", #field_name);      \\\n      return false;                                                          \\\n    }                                                                        \\\n  }\n\n\n#define DESERIALIZE_VALUE_FIELD(json_obj, field_name)                   \\\n  {                                                                     \\\n    ailego::JsonValue tmp_json_value;                                   \\\n    if (!extract_value_from_json(json_obj, #field_name, field_name,     \\\n                                 tmp_json_value)) {                     \\\n      LOG_ERROR(\"Error when deserialize json - field:%s\", #field_name); \\\n      return false;                                                     \\\n    }                                                                   \\\n  }\n}  // namespace core_interface\n}  // namespace zvec"
  },
  {
    "path": "src/core/metric/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_metric \n    STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS zvec_ailego zvec_turbo core_framework \n    INCS . ${PROJECT_ROOT_DIR}/src/core\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/metric/cosine_metric.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/cosine_distance_matrix.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math_batch/distance_batch.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n\nnamespace zvec {\nnamespace core {\n\n//! Retrieve distance function for index features\ninline IndexMetric::MatrixDistanceHandle CosineDistanceMatrixFp32(size_t m,\n                                                                  size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<float, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features\ninline IndexMetric::MatrixDistanceHandle CosineDistanceMatrixFp16(size_t m,\n                                                                  size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::CosineDistanceMatrix<ailego::Float16, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n/*! Cosine Metric\n */\nclass CosineMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    IndexMeta::DataType ft = meta.data_type();\n    if (ft != IndexMeta::DataType::DT_FP16 &&\n        ft != IndexMeta::DataType::DT_FP32) {\n      return IndexError_Unsupported;\n    }\n    if (IndexMeta::UnitSizeof(ft) != meta.unit_size()) {\n      return IndexError_Unsupported;\n    }\n    data_type_ = ft;\n    params_ = index_params;\n\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == data_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == data_type_ &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&\n            qmeta.dimension() == meta.dimension());\n  }\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::CosineDistanceMatrix<ailego::Float16, 1, 1>::Compute);\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::CosineDistanceMatrix<float, 1, 1>::Compute);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve distance function for index features\n  MatrixDistance distance_matrix(size_t m, size_t n) const override {\n    if (m != 1 || n != 1) {\n      return nullptr;\n    }\n    return distance();\n  }\n\n  //! Retrieve distance function for query\n  MatrixBatchDistance batch_distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::CosineDistanceMatrix, float, 12,\n                                 2>::ComputeBatch);\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::CosineDistanceMatrix, ailego::Float16,\n                                 12, 2>::ComputeBatch);\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Retrieve query metric object of this index metric\n  Pointer query_metric(void) const override {\n    return nullptr;\n  }\n\n private:\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};\n  ailego::Params params_{};\n};\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(Cosine, CosineMetric);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/metric/euclidean_metric.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/euclidean_distance_matrix.h>\n#include <ailego/math/hamming_distance_matrix.h>\n#include <ailego/math_batch/distance_batch.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_metric.h>\n\nnamespace zvec {\nnamespace core {\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle\nSquaredEuclideanDistanceMatrixFp32(size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<float, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle\nSquaredEuclideanDistanceMatrixFp16(size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 1,\n                                                  1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 2,\n                                                  1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 2,\n                                                  2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 4,\n                                                  1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 4,\n                                                  2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 4,\n                                                  4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,\n                                                  1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,\n                                                  2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,\n                                                  4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,\n                                                  8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,\n                                                  1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,\n                                                  2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,\n                                                  4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,\n                                                  8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,\n                                                  16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,\n                                                  1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,\n                                                  2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,\n                                                  4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,\n                                                  8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,\n                                                  16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,\n                                                  32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\nstatic inline IndexMetric::MatrixDistanceHandle\nSquaredEuclideanDistanceMatrixInt8(size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features in Int4\nstatic inline IndexMetric::MatrixDistanceHandle\nSquaredEuclideanDistanceMatrixInt4(size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixFp32(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<float, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixFp16(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\nstatic inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixInt8(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<int8_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features in Int4\nstatic inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixInt4(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::EuclideanDistanceMatrix<uint8_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix32(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n#if defined(AILEGO_M64)\nstatic inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix64(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n#endif  // AILEGO_M64\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle\nHammingSquareRootDistanceMatrix32(size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n#if defined(AILEGO_M64)\nstatic inline IndexMetric::MatrixDistanceHandle\nHammingSquareRootDistanceMatrix64(size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n#endif  // AILEGO_M64\n\n/*! Squared Euclidean Distance Metric\n */\nclass SquaredEuclideanMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    IndexMeta::DataType dt = meta.data_type();\n    if (dt != IndexMeta::DataType::DT_FP16 &&\n        dt != IndexMeta::DataType::DT_FP32 &&\n        dt != IndexMeta::DataType::DT_INT8 &&\n        dt != IndexMeta::DataType::DT_INT4 &&\n        dt != IndexMeta::DataType::DT_BINARY32 &&\n        dt != IndexMeta::DataType::DT_BINARY64) {\n      return IndexError_Unsupported;\n    }\n    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {\n      return IndexError_Unsupported;\n    }\n    data_type_ = dt;\n    params_ = index_params;\n\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == data_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == data_type_ &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&\n            qmeta.dimension() == meta.dimension());\n  }\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_BINARY32:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute);\n\n#if defined(AILEGO_M64)\n      case IndexMeta::DataType::DT_BINARY64:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute);\n#endif  // AILEGO_M64\n\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 1,\n                                                   1>::Compute);\n\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_INT8:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_INT4:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve sparse distance function for query\n  MatrixSparseDistance sparse_distance(void) const override {\n    return reinterpret_cast<MatrixSparseDistanceHandle>(\n        ailego::SquaredEuclideanSparseDistanceMatrix<float>::Compute);\n  }\n\n  //! Retrieve distance function for index features\n  MatrixDistance distance_matrix(size_t m, size_t n) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_BINARY32:\n        return HammingDistanceMatrix32(m, n);\n\n#if defined(AILEGO_M64)\n      case IndexMeta::DataType::DT_BINARY64:\n        return HammingDistanceMatrix64(m, n);\n#endif  // AILEGO_M64\n\n      case IndexMeta::DataType::DT_FP16:\n        return SquaredEuclideanDistanceMatrixFp16(m, n);\n\n      case IndexMeta::DataType::DT_FP32:\n        return SquaredEuclideanDistanceMatrixFp32(m, n);\n\n      case IndexMeta::DataType::DT_INT8:\n        return SquaredEuclideanDistanceMatrixInt8(m, n);\n\n      case IndexMeta::DataType::DT_INT4:\n        return SquaredEuclideanDistanceMatrixInt4(m, n);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve distance function for query\n  MatrixBatchDistance batch_distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_BINARY32:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::HammingDistanceMatrix, uint32_t, 1,\n                                 1>::ComputeBatch);\n\n#if defined(AILEGO_M64)\n      case IndexMeta::DataType::DT_BINARY64:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::HammingDistanceMatrix, uint64_t, 1,\n                                 1>::ComputeBatch);\n#endif  // AILEGO_M64\n\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix,\n                                 ailego::Float16, 1, 1>::ComputeBatch);\n\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix, float,\n                                 1, 1>::ComputeBatch);\n\n      case IndexMeta::DataType::DT_INT8:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix, int8_t,\n                                 1, 1>::ComputeBatch);\n\n      case IndexMeta::DataType::DT_INT4:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix,\n                                 uint8_t, 1, 1>::ComputeBatch);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Retrieve query metric object of this index metric\n  Pointer query_metric(void) const override {\n    return nullptr;\n  }\n\n private:\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};\n  ailego::Params params_{};\n};\n\n/*! Euclidean Distance Metric\n */\nclass EuclideanMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    IndexMeta::DataType dt = meta.data_type();\n    if (dt != IndexMeta::DataType::DT_FP16 &&\n        dt != IndexMeta::DataType::DT_FP32 &&\n        dt != IndexMeta::DataType::DT_INT8 &&\n        dt != IndexMeta::DataType::DT_INT4 &&\n        dt != IndexMeta::DataType::DT_BINARY32 &&\n        dt != IndexMeta::DataType::DT_BINARY64) {\n      return IndexError_Unsupported;\n    }\n    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {\n      return IndexError_Unsupported;\n    }\n    data_type_ = dt;\n    params_ = index_params;\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == data_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == data_type_ &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&\n            qmeta.dimension() == meta.dimension());\n  }\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_BINARY32:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute);\n\n#if defined(AILEGO_M64)\n      case IndexMeta::DataType::DT_BINARY64:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute);\n#endif  // AILEGO_M64\n\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::EuclideanDistanceMatrix<ailego::Float16, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::EuclideanDistanceMatrix<float, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_INT8:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::EuclideanDistanceMatrix<int8_t, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_INT4:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve distance function for index features\n  MatrixDistance distance_matrix(size_t m, size_t n) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_BINARY32:\n        return HammingSquareRootDistanceMatrix32(m, n);\n\n#if defined(AILEGO_M64)\n      case IndexMeta::DataType::DT_BINARY64:\n        return HammingSquareRootDistanceMatrix64(m, n);\n#endif  // AILEGO_M64\n\n      case IndexMeta::DataType::DT_FP16:\n        return EuclideanDistanceMatrixFp16(m, n);\n\n      case IndexMeta::DataType::DT_FP32:\n        return EuclideanDistanceMatrixFp32(m, n);\n\n      case IndexMeta::DataType::DT_INT8:\n        return EuclideanDistanceMatrixInt8(m, n);\n\n      case IndexMeta::DataType::DT_INT4:\n        return EuclideanDistanceMatrixInt4(m, n);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Retrieve query metric object of this index metric\n  Pointer query_metric(void) const override {\n    return nullptr;\n  }\n\n private:\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};\n  ailego::Params params_{};\n};\n\n/*! Squared Euclidean Sparse Metric\n */\nclass SquaredEuclideanSparseMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    IndexMeta::DataType data_type = meta.data_type();\n    if (data_type != IndexMeta::DataType::DT_FP16 &&\n        data_type != IndexMeta::DataType::DT_FP32) {\n      return IndexError_Unsupported;\n    }\n\n    if (IndexMeta::UnitSizeof(data_type) != meta.unit_size()) {\n      return IndexError_Unsupported;\n    }\n\n    data_type_ = data_type;\n    params_ = index_params;\n\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == data_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == data_type_ &&\n            qmeta.data_type() == meta.data_type() &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&\n            qmeta.unit_size() == meta.unit_size());\n  }\n\n  //! Retrieve sparse distance function for query\n  MatrixSparseDistance sparse_distance(void) const override {\n    return reinterpret_cast<MatrixSparseDistanceHandle>(\n        ailego::SquaredEuclideanSparseDistanceMatrix<float>::Compute);\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Retrieve query metric object of this index metric\n  Pointer query_metric(void) const override {\n    return nullptr;\n  }\n\n private:\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};\n\n  ailego::Params params_{};\n};\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(SquaredEuclidean, SquaredEuclideanMetric);\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(Euclidean, EuclideanMetric);\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(SquaredEuclideanSparse,\n                                    SquaredEuclideanSparseMetric);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/metric/hamming_metric.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/math/hamming_distance_matrix.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"ailego/math_batch/distance_batch.h\"\n\nnamespace zvec {\nnamespace core {\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix32(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint32_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n#if defined(AILEGO_M64)\nstatic inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix64(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::HammingDistanceMatrix<uint64_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n#endif  // AILEOG_M64\n\n/*! Hamming Metric\n */\nclass HammingMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    if (meta.data_type() != IndexMeta::DataType::DT_BINARY32 &&\n        meta.data_type() != IndexMeta::DataType::DT_BINARY64) {\n      return IndexError_Unsupported;\n    }\n    if (IndexMeta::UnitSizeof(meta.data_type()) != meta.unit_size()) {\n      return IndexError_Unsupported;\n    }\n    feature_type_ = meta.data_type();\n    params_ = index_params;\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == feature_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(feature_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == feature_type_ &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(feature_type_) &&\n            qmeta.dimension() == meta.dimension());\n  }\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n#if defined(AILEGO_M64)\n    if (feature_type_ == IndexMeta::DataType::DT_BINARY64) {\n      return reinterpret_cast<MatrixDistanceHandle>(\n          ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute);\n    }\n#endif\n    if (feature_type_ == IndexMeta::DataType::DT_BINARY32) {\n      return reinterpret_cast<MatrixDistanceHandle>(\n          ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute);\n    }\n    return nullptr;\n  }\n\n  MatrixBatchDistance batch_distance(void) const override {\n#if defined(AILEGO_M64)\n    if (feature_type_ == IndexMeta::DataType::DT_BINARY64) {\n      return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n          ailego::BaseDistance<ailego::HammingDistanceMatrix, uint64_t, 1,\n                               1>::ComputeBatch);\n    }\n#endif\n    if (feature_type_ == IndexMeta::DataType::DT_BINARY32) {\n      return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n          ailego::BaseDistance<ailego::HammingDistanceMatrix, uint32_t, 1,\n                               1>::ComputeBatch);\n    }\n    return nullptr;\n  }\n\n  //! Retrieve distance function for index features\n  MatrixDistance distance_matrix(size_t m, size_t n) const override {\n#if defined(AILEGO_M64)\n    if (feature_type_ == IndexMeta::DataType::DT_BINARY64) {\n      return HammingDistanceMatrix64(m, n);\n    }\n#endif\n    if (feature_type_ == IndexMeta::DataType::DT_BINARY32) {\n      return HammingDistanceMatrix32(m, n);\n    }\n    return nullptr;\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Retrieve query metric object of this index metric\n  Pointer query_metric(void) const override {\n    return nullptr;\n  }\n\n private:\n  IndexMeta::DataType feature_type_{IndexMeta::DataType::DT_BINARY32};\n  ailego::Params params_{};\n};\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(Hamming, HammingMetric);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/metric/inner_product_metric.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/math_batch/distance_batch.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_metric.h>\n\nnamespace zvec {\nnamespace core {\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixFp32(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<float, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features\nstatic inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixFp16(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\nstatic inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixInt8(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<int8_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n//! Retrieve distance function for index features in Int4\nstatic inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixInt4(\n    size_t m, size_t n) {\n  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 1, 1>::Compute),\n       nullptr, nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 2, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 2, 2>::Compute),\n       nullptr, nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 4, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 4, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 4, 4>::Compute),\n       nullptr, nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 8, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 8, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 8, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 8, 8>::Compute),\n       nullptr, nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 16, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 16, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 16, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 16, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 16, 16>::Compute),\n       nullptr},\n      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 32, 1>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 32, 2>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 32, 4>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 32, 8>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 32, 16>::Compute),\n       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(\n           ailego::MinusInnerProductMatrix<uint8_t, 32, 32>::Compute)},\n  };\n  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {\n    return nullptr;\n  }\n  return distance_table[ailego_ctz(m)][ailego_ctz(n)];\n}\n\n/*! Inner Product Metric\n */\nclass InnerProductMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    IndexMeta::MetaType mt = meta.meta_type();\n    if (mt != IndexMeta::MetaType::MT_DENSE) {\n      return IndexError_Unsupported;\n    }\n\n    IndexMeta::DataType dt = meta.data_type();\n    if (dt != IndexMeta::DataType::DT_FP16 &&\n        dt != IndexMeta::DataType::DT_FP32 &&\n        dt != IndexMeta::DataType::DT_INT8 &&\n        dt != IndexMeta::DataType::DT_INT4) {\n      return IndexError_Unsupported;\n    }\n    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {\n      return IndexError_Unsupported;\n    }\n\n    meta_type_ = mt;\n    data_type_ = dt;\n    params_ = index_params;\n\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == data_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == data_type_ &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&\n            qmeta.dimension() == meta.dimension());\n  }\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::MinusInnerProductMatrix<ailego::Float16, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::MinusInnerProductMatrix<float, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_INT8:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::MinusInnerProductMatrix<int8_t, 1, 1>::Compute);\n\n      case IndexMeta::DataType::DT_INT4:\n        return reinterpret_cast<MatrixDistanceHandle>(\n            ailego::MinusInnerProductMatrix<uint8_t, 1, 1>::Compute);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve sparse distance function for query\n  MatrixSparseDistance sparse_distance(void) const override {\n    return reinterpret_cast<MatrixSparseDistanceHandle>(\n        ailego::MinusInnerProductSparseMatrix<float>::Compute);\n  }\n\n  //! Retrieve distance function for index features\n  MatrixDistance distance_matrix(size_t m, size_t n) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_FP16:\n        return MinusInnerProductMatrixFp16(m, n);\n\n      case IndexMeta::DataType::DT_FP32:\n        return MinusInnerProductMatrixFp32(m, n);\n\n      case IndexMeta::DataType::DT_INT8:\n        return MinusInnerProductMatrixInt8(m, n);\n\n      case IndexMeta::DataType::DT_INT4:\n        return MinusInnerProductMatrixInt4(m, n);\n\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Retrieve distance function for query\n  MatrixBatchDistance batch_distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::MinusInnerProductMatrix, float, 1,\n                                 1>::ComputeBatch);\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::MinusInnerProductMatrix,\n                                 ailego::Float16, 1, 1>::ComputeBatch);\n      case IndexMeta::DataType::DT_INT8:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::MinusInnerProductMatrix, int8_t, 1,\n                                 1>::ComputeBatch);\n      case IndexMeta::DataType::DT_INT4:\n        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n            ailego::BaseDistance<ailego::MinusInnerProductMatrix, uint8_t, 1,\n                                 1>::ComputeBatch);\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Normalize result\n  void normalize(float *score) const override {\n    *score = -(*score);\n  }\n\n  //! Denormalize threshold\n  void denormalize(float *score) const override {\n    *score = -(*score);\n  }\n\n  //! Retrieve if it supports normalization\n  bool support_normalize(void) const override {\n    return true;\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Retrieve query measure object of this index measure\n  Pointer query_metric(void) const override {\n    return nullptr;\n  }\n\n private:\n  IndexMeta::MetaType meta_type_{IndexMeta::MetaType::MT_DENSE};\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};\n  ailego::Params params_{};\n};\n\n/*! Normalized Cosine Metric\n */\nclass NormalizedCosineMetric : public InnerProductMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    IndexMeta::DataType dt = meta.data_type();\n    if (dt != IndexMeta::DataType::DT_FP16 &&\n        dt != IndexMeta::DataType::DT_FP32) {\n      return IndexError_Unsupported;\n    }\n\n    InnerProductMetric::init(meta, index_params);\n\n    return 0;\n  }\n\n  //! Normalize result\n  void normalize(float *score) const override {\n    *score = 1 + (*score);\n  }\n\n  //! Denormalize threshold\n\n  void denormalize(float *score) const override {\n    *score -= 1;\n  }\n};\n\n/*! Inner Product Sparse Metric\n */\nclass InnerProductSparseMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    IndexMeta::DataType dt = meta.data_type();\n    if (dt != IndexMeta::DataType::DT_FP16 &&\n        dt != IndexMeta::DataType::DT_FP32) {\n      return IndexError_Unsupported;\n    }\n\n    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {\n      return IndexError_Unsupported;\n    }\n\n    data_type_ = dt;\n    params_ = index_params;\n\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == data_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == data_type_ &&\n            qmeta.data_type() == meta.data_type() &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&\n            qmeta.unit_size() == meta.unit_size());\n  }\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n    return nullptr;\n  }\n\n  //! Retrieve sparse distance function for query\n  MatrixSparseDistance sparse_distance(void) const override {\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_FP16:\n        return reinterpret_cast<MatrixSparseDistanceHandle>(\n            ailego::MinusInnerProductSparseMatrix<ailego::Float16>::Compute);\n      case IndexMeta::DataType::DT_FP32:\n        return reinterpret_cast<MatrixSparseDistanceHandle>(\n            ailego::MinusInnerProductSparseMatrix<float>::Compute);\n      default:\n        return nullptr;\n    }\n  }\n\n  //! Normalize result\n  void normalize(float *score) const override {\n    *score = -(*score);\n  }\n\n  //! Denormalize threshold\n  void denormalize(float *score) const override {\n    *score = -(*score);\n  }\n\n  //! Retrieve if it supports normalization\n  bool support_normalize(void) const override {\n    return true;\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Retrieve query measure object of this index measure\n  Pointer query_metric(void) const override {\n    return nullptr;\n  }\n\n private:\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};\n  ailego::Params params_{};\n};\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(InnerProduct, InnerProductMetric);\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(NormalizedCosine, NormalizedCosineMetric);\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(InnerProductSparse,\n                                    InnerProductSparseMetric);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/metric/metric_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\n//! MipsEuclideanMetric\nstatic const std::string MIPS_EUCLIDEAN_METRIC_M_VALUE =\n    \"mips_euclidean.metric.m_value\";\nstatic const std::string MIPS_EUCLIDEAN_METRIC_U_VALUE =\n    \"mips_euclidean.metric.u_value\";\nstatic const std::string MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM =\n    \"mips_euclidean.metric.max_l2_norm\";\nstatic const std::string MIPS_EUCLIDEAN_METRIC_INJECTION_TYPE =\n    \"mips_euclidean.metric.injection_type\";\n\n//! QuantizedInteger Metric\nstatic const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME =\n    \"proxima.quantized_integer.metric.origin_metric_name\";\nstatic const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS =\n    \"proxima.quantized_integer.metric.origin_metric_params\";\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/metric/mips_euclidean_metric.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/euclidean_distance_matrix.h>\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/math/mips_euclidean_distance_matrix.h>\n#include <ailego/math/norm2_matrix.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"metric_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Mips Squared Euclidean Metric\n */\ntemplate <bool is_spares = false>\nclass MipsSquaredEuclideanMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    data_type_ = meta.data_type();\n    dimension_ = meta.dimension();\n\n    int injection_type = static_cast<int>(kDefaultInjectionType);\n    index_params.get(MIPS_EUCLIDEAN_METRIC_INJECTION_TYPE, &injection_type);\n    if (injection_type >= static_cast<int>(Injection::kNumInjections)) {\n      LOG_WARN(\"Unsupported injection_type %u, using '%s' instead\",\n               injection_type, InjectionName(0));\n      injection_type = static_cast<int>(Injection::kLocalizedSpherical);\n    }\n    injection_ = static_cast<Injection>(injection_type);\n    LOG_DEBUG(\n        \"Initializing MipsSquaredEuclideanMetric with injection %s\"\n        \" type %d dimension %d\",\n        InjectionName(injection_), data_type_, dimension_);\n\n    float max_l2_norm = 0.0f;\n    float u_value = 0.0f;\n    index_params.get(MIPS_EUCLIDEAN_METRIC_M_VALUE, &m_value_);\n    index_params.get(MIPS_EUCLIDEAN_METRIC_U_VALUE, &u_value);\n    index_params.get(MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM, &max_l2_norm);\n    CheckAndFixM(injection_, &m_value_);\n    CheckAndFixU(injection_, m_value_, &u_value);\n\n    squared_u_value_ = u_value * u_value;\n    max_squared_l2_norm_ = max_l2_norm * max_l2_norm;\n    if (injection_ == Injection::kIdentity ||\n        injection_ == Injection::kLocalizedSpherical) {\n      eta_ = 0.0f;\n    } else if (max_squared_l2_norm_ < std::numeric_limits<float>::epsilon()) {\n      eta_ = kDefaultEta;\n    } else {\n      eta_ = squared_u_value_ / max_squared_l2_norm_;\n    }\n\n    switch (data_type_) {\n      case IndexMeta::DataType::DT_FP32:\n        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(\n            ailego::SquaredNorm2Matrix<float, 1>::Compute);\n        break;\n\n      case IndexMeta::DataType::DT_FP16:\n        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(\n            ailego::SquaredNorm2Matrix<ailego::Float16, 1>::Compute);\n        break;\n\n      case IndexMeta::DataType::DT_INT8:\n        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(\n            ailego::SquaredNorm2Matrix<int8_t, 1>::Compute);\n        break;\n\n      case IndexMeta::DataType::DT_INT4:\n        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(\n            ailego::SquaredNorm2Matrix<uint8_t, 1>::Compute);\n        break;\n\n      default:\n        return IndexError_Unsupported;\n    }\n\n    query_metric_ = IndexFactory::CreateMetric(kQueryMetric);\n    if (!query_metric_) {\n      LOG_ERROR(\"Failed to create metric %s\", kQueryMetric);\n      return IndexError_NoExist;\n    }\n    int ret = query_metric_->init(meta, ailego::Params());\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to initialize metric %s\", kQueryMetric);\n      return ret;\n    }\n    params_ = index_params;\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    eta_ = 0.0f;\n    m_value_ = 0;\n    squared_u_value_ = 0.0f;\n    max_squared_l2_norm_ = 0.0f;\n    query_metric_.reset();\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return (meta.data_type() == data_type_ &&\n            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return (qmeta.data_type() == data_type_ &&\n            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&\n            qmeta.dimension() == meta.dimension());\n  }\n\n  //! Retrieve distance function for query\n  MatrixBatchDistance batch_distance() const override {\n    MatrixDistance dist_func = distance();\n\n    return\n        [=](const void **m, const void *q, size_t num, size_t dim, float *out) {\n          for (size_t i = 0; i < num; ++i) {\n            dist_func(m[i], q, dim, out + i);\n          }\n        };\n  }\n\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n    if (injection_ == Injection::kLocalizedSpherical) {\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n                reinterpret_cast<const float *>(m),\n                reinterpret_cast<const float *>(q), dim, 0.0f, out);\n          };\n\n        case IndexMeta::DataType::DT_FP16:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<ailego::Float16, 1, 1>::\n                Compute(reinterpret_cast<const ailego::Float16 *>(m),\n                        reinterpret_cast<const ailego::Float16 *>(q), dim, 0.0f,\n                        out);\n          };\n\n        case IndexMeta::DataType::DT_INT8:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n                reinterpret_cast<const int8_t *>(m),\n                reinterpret_cast<const int8_t *>(q), dim, 0.0f, out);\n          };\n\n        case IndexMeta::DataType::DT_INT4:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n                reinterpret_cast<const uint8_t *>(m),\n                reinterpret_cast<const uint8_t *>(q), dim, 0.0f, out);\n          };\n\n        default:\n          return nullptr;\n      }\n    }\n\n    if (injection_ == Injection::kRepeatedQuadratic) {\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n                reinterpret_cast<const float *>(m),\n                reinterpret_cast<const float *>(q), dim, m_value_, eta_, out);\n          };\n\n        case IndexMeta::DataType::DT_FP16:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<ailego::Float16, 1, 1>::\n                Compute(reinterpret_cast<const ailego::Float16 *>(m),\n                        reinterpret_cast<const ailego::Float16 *>(q), dim,\n                        m_value_, eta_, out);\n          };\n\n        case IndexMeta::DataType::DT_INT8:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n                reinterpret_cast<const int8_t *>(m),\n                reinterpret_cast<const int8_t *>(q), dim, m_value_, eta_, out);\n          };\n\n        case IndexMeta::DataType::DT_INT4:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n                reinterpret_cast<const uint8_t *>(m),\n                reinterpret_cast<const uint8_t *>(q), dim, m_value_, eta_, out);\n          };\n\n        default:\n          return nullptr;\n      }\n    }\n\n    if (injection_ == Injection::kSpherical) {\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n                reinterpret_cast<const float *>(m),\n                reinterpret_cast<const float *>(q), dim, eta_, out);\n          };\n\n        case IndexMeta::DataType::DT_FP16:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<ailego::Float16, 1, 1>::\n                Compute(reinterpret_cast<const ailego::Float16 *>(m),\n                        reinterpret_cast<const ailego::Float16 *>(q), dim, eta_,\n                        out);\n          };\n\n        case IndexMeta::DataType::DT_INT8:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n                reinterpret_cast<const int8_t *>(m),\n                reinterpret_cast<const int8_t *>(q), dim, eta_, out);\n          };\n\n        case IndexMeta::DataType::DT_INT4:\n          return [&](const void *m, const void *q, size_t dim, float *out) {\n            ailego::MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n                reinterpret_cast<const uint8_t *>(m),\n                reinterpret_cast<const uint8_t *>(q), dim, eta_, out);\n          };\n\n        default:\n          return nullptr;\n      }\n    }\n\n    if (injection_ == Injection::kIdentity) {\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          return reinterpret_cast<MatrixDistanceHandle>(\n              ailego::SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute);\n\n        case IndexMeta::DataType::DT_FP16:\n          return reinterpret_cast<MatrixDistanceHandle>(\n              ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 1,\n                                                     1>::Compute);\n\n        case IndexMeta::DataType::DT_INT8:\n          return reinterpret_cast<MatrixDistanceHandle>(\n              ailego::SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute);\n\n        case IndexMeta::DataType::DT_INT4:\n          return reinterpret_cast<MatrixDistanceHandle>(\n              ailego::SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute);\n\n        default:\n          return nullptr;\n      }\n    }\n    return nullptr;\n  }\n\n  //! Retrieve distance function for query\n  MatrixSparseDistance sparse_distance(void) const override {\n    if (injection_ == Injection::kLocalizedSpherical) {\n      return [&](const void *m_sparse, const void *q_sparse, float *out) {\n        ailego::MipsSquaredEuclideanSparseDistanceMatrix<float>::Compute(\n            m_sparse, q_sparse, out);\n      };\n    }\n\n    if (injection_ == Injection::kRepeatedQuadratic) {\n      LOG_ERROR(\n          \"Repeated Quadratic is not supported in MipsEuclideanMetric for \"\n          \"Hybrid Vector!\");\n\n      return nullptr;\n    }\n\n    if (injection_ == Injection::kSpherical) {\n      LOG_ERROR(\n          \"Spherical is not supported in MipsEuclideanMetric for Hybrid \"\n          \"Vector!\");\n\n      return nullptr;\n    }\n\n    if (injection_ == Injection::kIdentity) {\n      LOG_ERROR(\n          \"Identity is not supported in MipsEuclideanMetric for Hybrid \"\n          \"Vector!\");\n\n      return nullptr;\n    }\n\n    return nullptr;\n  }\n\n  //! Retrieve matrix distance function for index features\n  MatrixDistance distance_matrix(size_t m, size_t n) const override {\n    if (injection_ == Injection::kLocalizedSpherical) {\n      SphericalHandle<void> compute;\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    float, SphericalHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_FP16:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    ailego::Float16, SphericalHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_INT8:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    int8_t, SphericalHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_INT4:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    uint8_t, SphericalHandle>(m, n);\n          break;\n        default:\n          return nullptr;\n      }\n      return [=](const void *d, const void *q, size_t dim, float *out) {\n        compute(d, q, dim, 0.0f, out);\n      };\n    }\n\n    if (injection_ == Injection::kRepeatedQuadratic) {\n      RepeatedQuadraticHandle<void> compute;\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    float, RepeatedQuadraticHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_FP16:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    ailego::Float16, RepeatedQuadraticHandle>(\n                  m, n);\n          break;\n        case IndexMeta::DataType::DT_INT8:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    int8_t, RepeatedQuadraticHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_INT4:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    uint8_t, RepeatedQuadraticHandle>(m, n);\n          break;\n        default:\n          return nullptr;\n      }\n      return [=](const void *d, const void *q, size_t dim, float *out) {\n        compute(d, q, dim, m_value_, eta_, out);\n      };\n    }\n\n    if (injection_ == Injection::kSpherical) {\n      SphericalHandle<void> compute;\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    float, SphericalHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_FP16:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    ailego::Float16, SphericalHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_INT8:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    int8_t, SphericalHandle>(m, n);\n          break;\n        case IndexMeta::DataType::DT_INT4:\n          compute =\n              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,\n                                    uint8_t, SphericalHandle>(m, n);\n          break;\n        default:\n          return nullptr;\n      }\n      return [=](const void *d, const void *q, size_t dim, float *out) {\n        compute(d, q, dim, eta_, out);\n      };\n    }\n\n    if (injection_ == Injection::kIdentity) {\n      switch (data_type_) {\n        case IndexMeta::DataType::DT_FP32:\n          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,\n                                       float, TypedDistanceHandle>(m, n);\n        case IndexMeta::DataType::DT_FP16:\n          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,\n                                       ailego::Float16, TypedDistanceHandle>(m,\n                                                                             n);\n        case IndexMeta::DataType::DT_INT8:\n          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,\n                                       int8_t, TypedDistanceHandle>(m, n);\n        case IndexMeta::DataType::DT_INT4:\n          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,\n                                       uint8_t, TypedDistanceHandle>(m, n);\n        default:\n          return nullptr;\n      }\n    }\n    return nullptr;\n  }\n\n  //! Normalize result\n  void normalize(float *score) const override {\n    query_metric_->normalize(score);\n  }\n\n  //! Denormalize threshold\n  void denormalize(float *score) const override {\n    query_metric_->denormalize(score);\n  }\n\n  //! Retrieve if it supports normalization\n  bool support_normalize(void) const override {\n    return query_metric_->support_normalize();\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Train the metric\n  int train(const void *vec, size_t dim) override {\n    if (eta_ == 0.0f) {  // No global norm scaling => no training.\n      return 0;\n    }\n    if (!squared_norm2_handle_) {\n      return IndexError_Unsupported;\n    }\n\n    float score;\n    squared_norm2_handle_(vec, dim, &score);\n    if (score > max_squared_l2_norm_) {\n      max_squared_l2_norm_ = score;\n      const float max_l2_norm = std::sqrt(score);\n      params_.set(MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM, max_l2_norm);\n      if (max_squared_l2_norm_ < 1.0 &&\n          max_squared_l2_norm_ > squared_u_value_) {\n        squared_u_value_ = max_squared_l2_norm_;\n        params_.set(MIPS_EUCLIDEAN_METRIC_U_VALUE, max_l2_norm);\n      }\n      eta_ = squared_u_value_ / max_squared_l2_norm_;\n    }\n    return 0;\n  }\n\n  //! Retrieve if it supports training\n  bool support_train(void) const override {\n    // No global norm scaling => eta_ == 0 => no training.\n    return eta_ != 0.0f;\n  }\n\n  //! Retrieve query metric object of this index metric\n  Pointer query_metric(void) const override {\n    return query_metric_;\n  }\n\n private:\n  //! Type of MipsSquaredEuclideanDistanceMatrix::Compute overloaded for\n  //  Spherical injection and LocalizedSpherical nonmetric.\n  template <typename T>\n  using SphericalHandle = void (*)(const T *m, const T *q, size_t dim,\n                                   float eta, float *out);\n\n  //! Type of MipsSquaredEuclideanDistanceMatrix::Compute overloaded for\n  //  RepeatedQuadratic injection.\n  template <typename T>\n  using RepeatedQuadraticHandle = void (*)(const T *m, const T *q, size_t dim,\n                                           size_t m_value, float eta,\n                                           float *out);\n\n  //! Type of squared L2 norm function.\n  using SquaredNorm2Handle = void (*)(const void *m, size_t dim, float *out);\n\n  enum struct Injection {     // Type of injective mapping into Euclidean space.\n    kLocalizedSpherical = 0,  // spherical with pair-only max-norm\n    kSpherical = 1,           // require global scaling/training\n    kRepeatedQuadratic = 2,   // require global scaling/training\n    kIdentity = 3,            // plain Euclidean distance\n    kNumInjections = 4\n  };\n\n  static const char *InjectionName(int injection) {\n    static const char *injection_names[] = {\"LocalizedSpherical\", \"Spherical\",\n                                            \"RepeatedQuadratic\", \"Identity\"};\n    if (injection >= 0 &&\n        injection < static_cast<int>(Injection::kNumInjections)) {\n      return injection_names[injection];\n    }\n    return \"Invalid\";\n  }\n\n  static const char *InjectionName(Injection injection) {\n    return InjectionName(static_cast<int>(injection));\n  }\n\n  // Checks (and fixes) `*m_value`, no. additional dimensions for injection.\n  // `dim` is the original dimension, used ONLY by RepeatedQuadratic\n  // injection, where dim = 1 induces the default *m_value = 3. It's\n  // positioned last to allow other injections to skip it.\n  // Returns true if `*m_value` is modified.\n  static bool CheckAndFixM(Injection injection, uint32_t *m_value) {\n    if (injection == Injection::kRepeatedQuadratic) {\n      if (*m_value == 0) {\n        *m_value = 3u;  // Recommend value in paper (3.5 Practical\n                        // Recommendation of Parameters)\n        return true;\n      }\n    } else if (injection == Injection::kSpherical) {\n      if (*m_value != 1) {\n        if (*m_value != 0) {\n          LOG_WARN(\"M value (%u) set to 1 for Spherical injection\", *m_value);\n        }\n        *m_value = 1;\n        return true;\n      }\n    } else {  // kLocalizedSpherical, kIdentity, or kInvalid\n      if (*m_value != 0) {\n        LOG_WARN(\"M value (%u) set to 0 for %s injections\", *m_value,\n                 InjectionName(injection));\n        *m_value = 0;\n        return true;\n      }\n    }\n    return false;\n  }\n\n  // Checks and fixes `*u_value`, global L2 norm scalar.\n  // `m_value` is no. additional dimensions, used ONLY by RepeatedQuadratic\n  // injection. It's positioned last to allow other injections to skip it.\n  // Returns true if `*u_value` is set to a new value.\n  static bool CheckAndFixU(Injection injection, uint32_t m_value,\n                           float *u_value) {\n    if (injection == Injection::kRepeatedQuadratic) {\n      if (*u_value <= std::numeric_limits<float>::epsilon() ||\n          *u_value >= 1.0) {\n        // Try computing a default U value\n        constexpr float kLogError = -5.0;  // log_10(distance_error)\n        float new_u_value = std::pow(10, kLogError / (1 << (m_value + 1)));\n        if (*u_value != 0) {\n          LOG_WARN(\"U value (%f) set to %f for RepeatedQuadratic injection\",\n                   *u_value, new_u_value);\n        }\n        *u_value = new_u_value;\n        return true;\n      } else if (std::pow(*u_value, (1 << m_value)) <\n                 std::numeric_limits<float>::epsilon()) {\n        LOG_WARN(\n            \"U value %f is too small, may cause loss of distance precision\",\n            *u_value);\n      }\n    } else if (injection == Injection::kSpherical) {\n      // Spherical injection requires ||x'|| <= 1.0 for computing\n      // std::sqrt(1 - ||x'||^2), x' = u_value * x / max_norm.  Set u_value\n      // to slightly < 1.0 in case of precision loss in float computation.\n      if (*u_value <= std::numeric_limits<float>::epsilon() ||\n          *u_value >= 1.0) {\n        static constexpr float kSphericalUValue = 1.0f - 1e-3;\n        if (*u_value != 0.0f) {\n          LOG_WARN(\"U value (%f) set to %f for Spherical injection\", *u_value,\n                   kSphericalUValue);\n        }\n        *u_value = kSphericalUValue;\n        return true;\n      }\n    } else {  // kLocalizedSpherical, kIdentity, or kInvalid\n      if (*u_value != 1.0) {\n        if (*u_value != 0) {\n          LOG_WARN(\"U value (%f) set to 1.0 for %s injection\", *u_value,\n                   InjectionName(injection));\n        }\n        *u_value = 1.0;\n        return true;\n      }\n    }\n    return false;\n  }\n\n private:\n  //! Type of basic DistanceMatrix::Compute function with typed parameter.\n  template <typename T>\n  using TypedDistanceHandle = void (*)(const T *m, const T *q, size_t dim,\n                                       float *out);\n\n  //! Returns m x n distance matrix compute function.\n  //  Handle is used to resolve potential DistanceMatrix<T>::Compute overload.\n  template <template <typename, size_t, size_t, typename = void>\n            class DistanceMatrix,\n            typename T, template <typename> class Handle = TypedDistanceHandle>\n  static Handle<void> DistanceMatrixCompute(size_t m, size_t n) {\n    static Handle<T> distance_table[6][6] = {\n        {DistanceMatrix<T, 1, 1, void>::Compute, nullptr, nullptr, nullptr,\n         nullptr, nullptr},\n        {DistanceMatrix<T, 2, 1, void>::Compute,\n         DistanceMatrix<T, 2, 2, void>::Compute, nullptr, nullptr, nullptr,\n         nullptr},\n        {DistanceMatrix<T, 4, 1, void>::Compute,\n         DistanceMatrix<T, 4, 2, void>::Compute,\n         DistanceMatrix<T, 4, 4, void>::Compute, nullptr, nullptr, nullptr},\n        {DistanceMatrix<T, 8, 1, void>::Compute,\n         DistanceMatrix<T, 8, 2, void>::Compute,\n         DistanceMatrix<T, 8, 4, void>::Compute,\n         DistanceMatrix<T, 8, 8, void>::Compute, nullptr, nullptr},\n        {DistanceMatrix<T, 16, 1, void>::Compute,\n         DistanceMatrix<T, 16, 2, void>::Compute,\n         DistanceMatrix<T, 16, 4, void>::Compute,\n         DistanceMatrix<T, 16, 8, void>::Compute,\n         DistanceMatrix<T, 16, 16, void>::Compute, nullptr},\n        {DistanceMatrix<T, 32, 1, void>::Compute,\n         DistanceMatrix<T, 32, 2, void>::Compute,\n         DistanceMatrix<T, 32, 4, void>::Compute,\n         DistanceMatrix<T, 32, 8, void>::Compute,\n         DistanceMatrix<T, 32, 16, void>::Compute,\n         DistanceMatrix<T, 32, 32, void>::Compute}};\n    if (m > 32 || n > 32 || ailego_popcount(m) != 1 ||\n        ailego_popcount(n) != 1) {\n      return nullptr;\n    }\n    return reinterpret_cast<Handle<void> >(\n        distance_table[ailego_ctz(m)][ailego_ctz(n)]);\n  }\n\n  //! Constants\n  // If the training data is not provided, we use a max squared l2 norm which\n  // is as big as possible but also keep the precision, so estimate eta =  U /\n  // max(l2 squared norm) = float epsilon\n  static constexpr float kDefaultEta = std::numeric_limits<float>::epsilon();\n  static constexpr char const *kQueryMetric =\n      is_spares ? \"InnerProductSparse\" : \"InnerProduct\";\n  static constexpr Injection kDefaultInjectionType =\n      Injection::kLocalizedSpherical;\n\n  //! Members\n  SquaredNorm2Handle squared_norm2_handle_{nullptr};\n  float eta_{0.0f};\n  uint32_t m_value_{0};\n  float squared_u_value_{0.0f};\n  float max_squared_l2_norm_{0.0f};\n  uint32_t dimension_{0};\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};\n  Injection injection_{kDefaultInjectionType};\n  IndexMetric::Pointer query_metric_{};\n  ailego::Params params_{};\n};\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(MipsSquaredEuclidean,\n                                    MipsSquaredEuclideanMetric<false>);\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(MipsSquaredEuclideanSparse,\n                                    MipsSquaredEuclideanMetric<true>);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/metric/quantized_integer_metric.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/euclidean_distance_matrix.h>\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/math/mips_euclidean_distance_matrix.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math_batch/distance_batch.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/turbo/turbo.h>\n#include \"metric_params.h\"\n#include \"quantized_integer_metric_batch.h\"\n#include \"quantized_integer_metric_matrix.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Metric for quantized integer by IntegerStreamingConverter\n */\nclass QuantizedIntegerMetric : public IndexMetric {\n public:\n  //! Initialize Metric\n  int init(const IndexMeta &meta, const ailego::Params &index_params) override {\n    if (meta.data_type() != IndexMeta::DataType::DT_INT8 &&\n        meta.data_type() != IndexMeta::DataType::DT_INT4) {\n      LOG_ERROR(\"Unsupported type %d\", meta.data_type());\n      return IndexError_Unsupported;\n    }\n    std::string metric_name;\n    ailego::Params metric_params;\n    index_params.get(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME, &metric_name);\n    index_params.get(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,\n                     &metric_params);\n    if (metric_name.empty()) {\n      LOG_ERROR(\"Param %s is required\",\n                QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME.c_str());\n      return IndexError_InvalidArgument;\n    }\n    if (metric_name == \"SquaredEuclidean\") {\n      origin_metric_type_ = MetricType::kSquaredEuclidean;\n    } else if (metric_name == \"InnerProduct\") {\n      origin_metric_type_ = MetricType::kInnerProduct;\n    } else if (metric_name == \"MipsSquaredEuclidean\") {\n      origin_metric_type_ = MetricType::kMipsSquaredEuclidean;\n    } else if (metric_name == \"NormalizedCosine\") {\n      origin_metric_type_ = MetricType::kNormalizedCosine;\n    } else if (metric_name == \"Cosine\") {\n      origin_metric_type_ = MetricType::kCosine;\n    } else {\n      LOG_ERROR(\"Unsupported metric %s\", metric_name.c_str());\n      return IndexError_Unsupported;\n    }\n    meta_ = meta;\n    params_ = index_params;\n\n    return 0;\n  }\n\n  //! Cleanup Metric\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta) const override {\n    return meta.data_type() == meta_.data_type() &&\n           meta.unit_size() == meta_.unit_size();\n  }\n\n  //! Retrieve if it matched\n  bool is_matched(const IndexMeta &meta,\n                  const IndexQueryMeta &qmeta) const override {\n    return qmeta.data_type() == meta_.data_type() &&\n           qmeta.unit_size() == meta_.unit_size() &&\n           qmeta.dimension() == meta.dimension();\n  }\n\n  //! Retrieve distance function for query\n  MatrixDistance distance(void) const override {\n    return distance_matrix(1, 1);\n  }\n\n  //! Retrieve matrix distance function for index features\n  MatrixDistance distance_matrix(size_t m, size_t n) const override {\n    switch (origin_metric_type_) {\n      case MetricType::kSquaredEuclidean:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          auto turbo_ret = turbo::get_distance_func(\n              turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,\n              turbo::QuantizeType::kDefault);\n          if (turbo_ret && m == 1 && n == 1) {\n            return turbo_ret;\n          }\n          return DistanceMatrixCompute<SquaredEuclidean, int8_t>(m, n);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return DistanceMatrixCompute<SquaredEuclidean, uint8_t>(m, n);\n        }\n        break;\n\n      case MetricType::kInnerProduct:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          return DistanceMatrixCompute<MinusInnerProduct, int8_t>(m, n);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return DistanceMatrixCompute<MinusInnerProduct, uint8_t>(m, n);\n        }\n        break;\n\n      case MetricType::kMipsSquaredEuclidean:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          return DistanceMatrixCompute<MipsSquaredEuclidean, int8_t>(m, n);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return DistanceMatrixCompute<MipsSquaredEuclidean, uint8_t>(m, n);\n        }\n        break;\n\n      case MetricType::kNormalizedCosine:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          return DistanceMatrixCompute<MinusInnerProduct, int8_t>(m, n);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return DistanceMatrixCompute<MinusInnerProduct, uint8_t>(m, n);\n        }\n        break;\n      case MetricType::kCosine:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          auto turbo_ret = turbo::get_distance_func(\n              turbo::MetricType::kCosine, turbo::DataType::kInt8,\n              turbo::QuantizeType::kDefault);\n          if (turbo_ret) {\n            return turbo_ret;\n          }\n          return DistanceMatrixCompute<CosineMinusInnerProduct, int8_t>(m, n);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return DistanceMatrixCompute<CosineMinusInnerProduct, uint8_t>(m, n);\n        }\n        break;\n    }\n    return nullptr;\n  }\n\n  //! Retrieve distance function for query\n  MatrixBatchDistance batch_distance(void) const override {\n    switch (origin_metric_type_) {\n      case MetricType::kSquaredEuclidean:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          auto turbo_ret = turbo::get_batch_distance_func(\n              turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,\n              turbo::QuantizeType::kDefault);\n          if (turbo_ret) {\n            return turbo_ret;\n          }\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<SquaredEuclidean, int8_t,\n                                                    12, 2>::ComputeBatch);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<SquaredEuclidean, uint8_t,\n                                                    12, 2>::ComputeBatch);\n        }\n        break;\n\n      case MetricType::kInnerProduct:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, int8_t,\n                                                    12, 2>::ComputeBatch);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, uint8_t,\n                                                    12, 2>::ComputeBatch);\n        }\n        break;\n      case MetricType::kMipsSquaredEuclidean:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<\n                  MipsSquaredEuclidean, int8_t, 12, 2>::ComputeBatch);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<\n                  MipsSquaredEuclidean, uint8_t, 12, 2>::ComputeBatch);\n        }\n        break;\n      case MetricType::kNormalizedCosine:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, int8_t,\n                                                    12, 2>::ComputeBatch);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, uint8_t,\n                                                    12, 2>::ComputeBatch);\n        }\n        break;\n      case MetricType::kCosine:\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n          auto turbo_ret = turbo::get_batch_distance_func(\n              turbo::MetricType::kCosine, turbo::DataType::kInt8,\n              turbo::QuantizeType::kDefault);\n          if (turbo_ret) {\n            return turbo_ret;\n          }\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<\n                  CosineMinusInnerProduct, int8_t, 12, 2>::ComputeBatch);\n        }\n        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {\n          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(\n              BaseDistanceBatchWithScoreUnquantized<\n                  CosineMinusInnerProduct, uint8_t, 12, 2>::ComputeBatch);\n        }\n        break;\n    }\n    return nullptr;\n  }\n\n  //! Retrieve params of Metric\n  const ailego::Params &params(void) const override {\n    return params_;\n  }\n\n  //! Train the metric\n  int train(const void * /*vec*/, size_t /*dim*/) override {\n    return 0;\n  }\n\n  //! Retrieve if it supports training\n  bool support_train(void) const override {\n    // No global norm scaling => eta_ == 0 => no training.\n    return false;\n  }\n\n  //! Normalize result\n  void normalize(float *score) const override {\n    if (origin_metric_type_ == MetricType::kInnerProduct) {\n      *score = -(*score);\n    } else if (origin_metric_type_ == MetricType::kNormalizedCosine) {\n      *score = 1.0f + *score;\n    } else if (origin_metric_type_ == MetricType::kCosine) {\n      *score = 1.0f + *score;\n    }\n  }\n\n  //! Retrieve if it supports normalization\n  bool support_normalize(void) const override {\n    return origin_metric_type_ == MetricType::kInnerProduct ||\n           origin_metric_type_ == MetricType::kNormalizedCosine ||\n           origin_metric_type_ == MetricType::kCosine;\n  }\n\n  //! Retrieve query metric object of this index metric\n  Pointer query_metric(void) const override {\n    if (origin_metric_type_ == MetricType::kMipsSquaredEuclidean) {\n      auto metric = IndexFactory::CreateMetric(\"QuantizedInteger\");\n      if (metric) {\n        ailego::Params metric_params;\n        metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,\n                          \"InnerProduct\");\n        metric->init(meta_, metric_params);\n      }\n      return metric;\n    }\n    return nullptr;\n  }\n\n  virtual DistanceBatchQueryPreprocessFunc get_query_preprocess_func()\n      const override {\n    if (origin_metric_type_ == MetricType::kCosine &&\n        meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n      auto turbo_ret = turbo::get_query_preprocess_func(\n          turbo::MetricType::kCosine, turbo::DataType::kInt8,\n          turbo::QuantizeType::kDefault);\n      if (turbo_ret) {\n        return turbo_ret;\n      }\n      return CosineMinusInnerProductDistanceBatchWithScoreUnquantized<\n          int8_t, 1, 1>::GetQueryPreprocessFunc();\n    } else if (origin_metric_type_ == MetricType::kSquaredEuclidean &&\n               meta_.data_type() == IndexMeta::DataType::DT_INT8) {\n      auto turbo_ret = turbo::get_query_preprocess_func(\n          turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,\n          turbo::QuantizeType::kDefault);\n      if (turbo_ret) {\n        return turbo_ret;\n      }\n      return SquaredEuclideanDistanceBatchWithScoreUnquantized<\n          int8_t, 1, 1>::GetQueryPreprocessFunc();\n    }\n    return nullptr;\n  }\n\n\n private:\n  //! Returns m x n distance matrix compute function.\n  template <template <typename, size_t, size_t> class DistanceMatrix,\n            typename T>\n  static MatrixDistanceHandle DistanceMatrixCompute(size_t m, size_t n) {\n    static void (*distance_table[6][6])(const T *, const T *, size_t,\n                                        float *) = {\n        {DistanceMatrix<T, 1, 1>::Compute, nullptr, nullptr, nullptr, nullptr,\n         nullptr},\n        {DistanceMatrix<T, 2, 1>::Compute, DistanceMatrix<T, 2, 2>::Compute,\n         nullptr, nullptr, nullptr, nullptr},\n        {DistanceMatrix<T, 4, 1>::Compute, DistanceMatrix<T, 4, 2>::Compute,\n         DistanceMatrix<T, 4, 4>::Compute, nullptr, nullptr, nullptr},\n        {DistanceMatrix<T, 8, 1>::Compute, DistanceMatrix<T, 8, 2>::Compute,\n         DistanceMatrix<T, 8, 4>::Compute, DistanceMatrix<T, 8, 8>::Compute,\n         nullptr, nullptr},\n        {DistanceMatrix<T, 16, 1>::Compute, DistanceMatrix<T, 16, 2>::Compute,\n         DistanceMatrix<T, 16, 4>::Compute, DistanceMatrix<T, 16, 8>::Compute,\n         DistanceMatrix<T, 16, 16>::Compute, nullptr},\n        {DistanceMatrix<T, 32, 1>::Compute, DistanceMatrix<T, 32, 2>::Compute,\n         DistanceMatrix<T, 32, 4>::Compute, DistanceMatrix<T, 32, 8>::Compute,\n         DistanceMatrix<T, 32, 16>::Compute,\n         DistanceMatrix<T, 32, 32>::Compute}};\n    if (m > 32 || n > 32 || ailego_popcount(m) != 1 ||\n        ailego_popcount(n) != 1) {\n      return nullptr;\n    }\n    return reinterpret_cast<MatrixDistanceHandle>(\n        distance_table[ailego_ctz(m)][ailego_ctz(n)]);\n  }\n\n  enum struct MetricType {\n    kSquaredEuclidean = 0,\n    kInnerProduct = 1,\n    kMipsSquaredEuclidean = 2,\n    kNormalizedCosine = 3,\n    kCosine = 4\n  };\n\n  //! Members\n  IndexMeta meta_{};\n  ailego::Params params_{};\n  MetricType origin_metric_type_{};\n};\n\nINDEX_FACTORY_REGISTER_METRIC_ALIAS(QuantizedInteger, QuantizedIntegerMetric);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/metric/quantized_integer_metric_batch.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/euclidean_distance_matrix.h>\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/math/mips_euclidean_distance_matrix.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math_batch/distance_batch.h>\n#include \"quantized_integer_metric_matrix.h\"\n\nnamespace zvec::core {\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep>\nstruct MinusInnerProductDistanceBatchWithScoreUnquantized;\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep>\nstruct CosineMinusInnerProductDistanceBatchWithScoreUnquantized;\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep>\nstruct SquaredEuclideanDistanceBatchWithScoreUnquantized;\n\ntemplate <typename T, size_t BatchSize, size_t PrefetchStep>\nstruct MipsSquaredEuclideanDistanceBatchWithScoreUnquantized;\n\n\ntemplate <template <typename, size_t, size_t> class DistanceType,\n          typename ValueType, size_t BatchSize, size_t PrefetchStep,\n          typename = void>\nstruct BaseDistanceBatchWithScoreUnquantized {\n  static inline void _ComputeBatch(const ValueType **m, const ValueType *q,\n                                   size_t num, size_t dim, float *out) {\n    for (size_t i = 0; i < num; ++i) {\n      DistanceType<ValueType, 1, 1>::Compute(m[i], q, dim, out + i);\n    }\n  }\n\n  // If Distance has ComputeBatch, use it; otherwise fall back to _ComputeBatch.\n  static inline void ComputeBatch(const ValueType **m, const ValueType *q,\n                                  size_t num, size_t dim, float *out) {\n    // if constexpr (detail::HasComputeBatch<Distance, ValueType>::value) {\n    //   return Distance::ComputeBatch(m, q, num, dim, out);\n    // }\n    if constexpr (std::is_same_v<DistanceType<ValueType, 1, 1>,\n                                 CosineMinusInnerProduct<ValueType, 1, 1>>) {\n      return CosineMinusInnerProductDistanceBatchWithScoreUnquantized<\n          ValueType, BatchSize, PrefetchStep>::ComputeBatch(m, q, num, dim,\n                                                            out);\n    } else if constexpr (std::is_same_v<DistanceType<ValueType, 1, 1>,\n                                        SquaredEuclidean<ValueType, 1, 1>>) {\n      return SquaredEuclideanDistanceBatchWithScoreUnquantized<\n          ValueType, BatchSize, PrefetchStep>::ComputeBatch(m, q, num, dim,\n                                                            out);\n    }\n\n    _ComputeBatch(m, q, num, dim, out);\n  }\n};\n\n//===========================================================\n// CosineMinusInnerProductDistanceBatchWithScoreUnquantized\n//===========================================================\n\n// Compute CosineMinusInnerProduct for quantized INT8\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct CosineMinusInnerProductDistanceBatchWithScoreUnquantized<\n    int8_t, BatchSize, PrefetchStep> {\n  using ImplType =\n      MinusInnerProductDistanceBatchWithScoreUnquantized<int8_t, BatchSize,\n                                                         PrefetchStep>;\n\n  static inline void ComputeBatch(const int8_t **vecs, const int8_t *query,\n                                  size_t num_vecs, size_t dim, float *results) {\n    size_t original_dim = dim - 24;\n\n    ImplType::ComputeBatch(vecs, query, num_vecs, original_dim, results);\n  }\n\n  static ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc\n  GetQueryPreprocessFunc() {\n    return QueryPreprocess;\n  }\n\n  static void QueryPreprocess(void *query, size_t dim) {\n    if (auto func = ImplType::GetQueryPreprocessFunc(); func != nullptr) {\n      return func(query, dim - 24);\n    }\n  }\n};\n\n// Compute CosineMinusInnerProduct for quantized INT4\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct CosineMinusInnerProductDistanceBatchWithScoreUnquantized<\n    uint8_t, BatchSize, PrefetchStep> {\n  static inline void ComputeBatch(const uint8_t **vecs, const uint8_t *query,\n                                  size_t num_vecs, size_t dim, float *results) {\n    size_t original_dim = dim - 40;\n    MinusInnerProductDistanceBatchWithScoreUnquantized<\n        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,\n                                                        original_dim, results);\n  }\n};\n\n//===========================================================\n// MinusInnerProductDistanceBatchWithScoreUnquantized\n//===========================================================\n\n// Compute MinusInnerProduct for quantized INT8\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct MinusInnerProductDistanceBatchWithScoreUnquantized<int8_t, BatchSize,\n                                                          PrefetchStep> {\n  using ImplType =\n      ailego::DistanceBatch::InnerProductDistanceBatch<int8_t, BatchSize,\n                                                       PrefetchStep>;\n  static inline void ComputeBatch(const int8_t **vecs, const int8_t *query,\n                                  size_t num_vecs, size_t dim, float *results) {\n    const size_t original_dim = dim;\n    ImplType::ComputeBatch(vecs, query, num_vecs, original_dim, results);\n\n    const float *q_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const uint8_t *>(query) + original_dim);\n    float qa = q_tail[0];\n    float qb = q_tail[1];\n    float qs = q_tail[2];\n    for (size_t i = 0; i < num_vecs; ++i) {\n      const float *m_tail = reinterpret_cast<const float *>(\n          reinterpret_cast<const uint8_t *>(vecs[i]) + original_dim);\n      float ma = m_tail[0];\n      float mb = m_tail[1];\n      float ms = m_tail[2];\n      float &result = results[i];\n      if (ImplType::GetQueryPreprocessFunc() != nullptr) {\n        int int_sum = reinterpret_cast<const int *>(m_tail)[4];\n        result -= 128 * int_sum;\n      }\n      result = -(ma * qa * result + mb * qa * qs + qb * ma * ms +\n                 original_dim * qb * mb);\n    }\n  }\n\n  static ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc\n  GetQueryPreprocessFunc() {\n    return ImplType::GetQueryPreprocessFunc();\n  }\n};\n\n// Compute MinusInnerProduct for quantized INT4\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct MinusInnerProductDistanceBatchWithScoreUnquantized<uint8_t, BatchSize,\n                                                          PrefetchStep> {\n  static inline void ComputeBatch(const uint8_t **vecs, const uint8_t *query,\n                                  size_t num_vecs, size_t dim, float *results) {\n    const size_t original_dim = dim;\n    const size_t original_dim_in_uint8_array = original_dim >> 1;\n\n    ailego::DistanceBatch::InnerProductDistanceBatch<\n        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,\n                                                        original_dim, results);\n    const float *q_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const uint8_t *>(query) + original_dim_in_uint8_array);\n    float qa = q_tail[0];\n    float qb = q_tail[1];\n    float qs = q_tail[2];\n    for (size_t i = 0; i < num_vecs; ++i) {\n      const float *m_tail = reinterpret_cast<const float *>(\n          reinterpret_cast<const uint8_t *>(vecs[i]) +\n          original_dim_in_uint8_array);\n      float ma = m_tail[0];\n      float mb = m_tail[1];\n      float ms = m_tail[2];\n      float &result = results[i];\n      result = -(ma * qa * result + mb * qa * qs + qb * ma * ms +\n                 original_dim * qb * mb);\n    }\n  }\n};\n\n//===========================================================\n// SquaredEuclideanDistanceBatchWithScoreUnquantized\n//===========================================================\n\n// Compute SquaredEuclidean for quantized INT8\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct SquaredEuclideanDistanceBatchWithScoreUnquantized<int8_t, BatchSize,\n                                                         PrefetchStep> {\n  using ImplType =\n      ailego::DistanceBatch::InnerProductDistanceBatch<int8_t, BatchSize,\n                                                       PrefetchStep>;\n  static void ComputeBatch(const int8_t **vecs, const int8_t *query,\n                           size_t num_vecs, size_t dim, float *results) {\n    const size_t original_dim = dim - 20;\n    ailego::DistanceBatch::InnerProductDistanceBatch<\n        int8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,\n                                                       original_dim, results);\n\n    const float *q_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const uint8_t *>(query) + original_dim);\n    float qa = q_tail[0];\n    float qb = q_tail[1];\n    float qs = q_tail[2];\n    float qs2 = q_tail[3];\n\n    const float sum = qa * qs;\n    const float sum2 = qa * qa * qs2;\n    for (size_t i = 0; i < num_vecs; ++i) {\n      const float *m_tail = reinterpret_cast<const float *>(\n          reinterpret_cast<const uint8_t *>(vecs[i]) + original_dim);\n      float ma = m_tail[0];\n      float mb = m_tail[1];\n      float ms = m_tail[2];\n      float ms2 = m_tail[3];\n      float &result = results[i];\n      if (ImplType::GetQueryPreprocessFunc() != nullptr) {\n        int int8_sum = reinterpret_cast<const int *>(m_tail)[4];\n        result -= 128 * int8_sum;\n      }\n      result = ma * ma * ms2 + sum2 - 2 * ma * qa * result +\n               (mb - qb) * (mb - qb) * original_dim +\n               2 * (mb - qb) * (ms * ma - sum);\n    }\n  }\n\n  static ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc\n  GetQueryPreprocessFunc() {\n    return QueryPreprocess;\n  }\n\n  static void QueryPreprocess(void *query, size_t dim) {\n    if (auto func = ImplType::GetQueryPreprocessFunc(); func != nullptr) {\n      return func(query, dim - 20);\n    }\n  }\n};\n\n// Compute SquaredEuclidean for quantized INT4\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct SquaredEuclideanDistanceBatchWithScoreUnquantized<uint8_t, BatchSize,\n                                                         PrefetchStep> {\n  static void ComputeBatch(const uint8_t **vecs, const uint8_t *query,\n                           size_t num_vecs, size_t dim, float *results) {\n    const size_t original_dim = dim - 32;\n    const size_t original_dim_in_uint8_array = original_dim >> 1;\n    ailego::DistanceBatch::InnerProductDistanceBatch<\n        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,\n                                                        original_dim, results);\n\n    const float *q_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const uint8_t *>(query) + original_dim_in_uint8_array);\n    float qa = q_tail[0];\n    float qb = q_tail[1];\n    float qs = q_tail[2];\n    float qs2 = q_tail[3];\n\n    const float sum = qa * qs;\n    const float sum2 = qa * qa * qs2;\n    for (size_t i = 0; i < num_vecs; ++i) {\n      const float *m_tail = reinterpret_cast<const float *>(\n          reinterpret_cast<const uint8_t *>(vecs[i]) +\n          original_dim_in_uint8_array);\n      float ma = m_tail[0];\n      float mb = m_tail[1];\n      float ms = m_tail[2];\n      float ms2 = m_tail[3];\n      *results = ma * ma * ms2 + sum2 - 2 * ma * qa * *results +\n                 (mb - qb) * (mb - qb) * original_dim +\n                 2 * (mb - qb) * (ms * ma - sum);\n      ++results;\n    }\n  }\n};\n\n\n//===========================================================\n// MipsSquaredEuclideanDistanceBatchWithScoreUnquantized\n//===========================================================\n\n// Compute MipsSquaredEuclidean for quantized INT8\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct MipsSquaredEuclideanDistanceBatchWithScoreUnquantized<int8_t, BatchSize,\n                                                             PrefetchStep> {\n  using ImplType =\n      ailego::DistanceBatch::InnerProductDistanceBatch<int8_t, BatchSize,\n                                                       PrefetchStep>;\n  static void ComputeBatch(const int8_t **vecs, const int8_t *query,\n                           size_t num_vecs, size_t dim, float *results) {\n    const size_t original_dim = dim - 20;\n    ailego::DistanceBatch::InnerProductDistanceBatch<\n        int8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,\n                                                       original_dim, results);\n\n    const float *q_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const int8_t *>(query) + original_dim);\n    float qa = q_tail[0];\n    float qb = q_tail[1];\n    float qs = q_tail[2];\n    float qs2 = q_tail[3];\n\n    const float sum = qa * qs;\n    const float sum2 = qa * qa * qs2;\n    for (size_t i = 0; i < num_vecs; ++i) {\n      const float *m_tail = reinterpret_cast<const float *>(\n          reinterpret_cast<const int8_t *>(vecs[i]) + original_dim);\n      float ma = m_tail[0];\n      float mb = m_tail[1];\n      float ms = m_tail[2];\n      float ms2 = m_tail[3];\n      *results = ma * ma * ms2 + sum2 - 2 * ma * qa * *results +\n                 (mb - qb) * (mb - qb) * original_dim +\n                 2 * (mb - qb) * (ms * ma - sum);\n      ++results;\n    }\n  }\n\n  static void QueryPreprocess(void *query, size_t dim) {\n    if (auto func = ImplType::GetQueryPreprocessFunc(); func != nullptr) {\n      return func(query, dim - 20);\n    }\n  }\n};\n\n// Compute SquaredEuclidean for quantized INT4\ntemplate <size_t BatchSize, size_t PrefetchStep>\nstruct MipsSquaredEuclideanDistanceBatchWithScoreUnquantized<uint8_t, BatchSize,\n                                                             PrefetchStep> {\n  static void ComputeBatch(const uint8_t **vecs, const uint8_t *query,\n                           size_t num_vecs, size_t dim, float *results) {\n    const size_t original_dim = dim - 32;\n    const size_t original_dim_in_uint8_array = original_dim >> 1;\n    ailego::DistanceBatch::InnerProductDistanceBatch<\n        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,\n                                                        original_dim, results);\n\n    const float *q_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const uint8_t *>(query) + original_dim_in_uint8_array);\n    float qa = q_tail[0];\n    float qb = q_tail[1];\n    float qs = q_tail[2];\n    float qs2 = q_tail[3];\n\n    const float sum = qa * qs;\n    const float sum2 = qa * qa * qs2;\n    for (size_t i = 0; i < num_vecs; ++i) {\n      const float *m_tail = reinterpret_cast<const float *>(\n          reinterpret_cast<const uint8_t *>(vecs[i]) +\n          original_dim_in_uint8_array);\n      float ma = m_tail[0];\n      float mb = m_tail[1];\n      float ms = m_tail[2];\n      float ms2 = m_tail[3];\n      *results = ma * ma * ms2 + sum2 - 2 * ma * qa * *results +\n                 (mb - qb) * (mb - qb) * original_dim +\n                 2 * (mb - qb) * (ms * ma - sum);\n      ++results;\n    }\n  }\n};\n\n}  // namespace zvec::core\n"
  },
  {
    "path": "src/core/metric/quantized_integer_metric_matrix.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include <ailego/math/euclidean_distance_matrix.h>\n#include <ailego/math/inner_product_matrix.h>\n#include <ailego/math/mips_euclidean_distance_matrix.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math_batch/distance_batch.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"metric_params.h\"\n\n\nnamespace zvec::core {\n//===========================================================\n// SquaredEuclidean\n//===========================================================\n\ntemplate <typename T, size_t M, size_t N>\nstruct SquaredEuclidean;\n\n// Compute SquaredEuclidean for quantized INT8\ntemplate <size_t M, size_t N>\nstruct SquaredEuclidean<int8_t, M, N> {\n  static void Compute(const int8_t *m, const int8_t *q, size_t dim,\n                      float *out) {\n    const size_t d = dim - 20;\n    ailego::InnerProductMatrix<int8_t, M, N>::Compute(m, q, d, out);\n\n    for (size_t i = 0; i < N; ++i) {\n      float qa = *reinterpret_cast<const float *>(&q[d * N + i * 4]);\n      float qb = *reinterpret_cast<const float *>(&q[(d + 4) * N + i * 4]);\n      float qs = *reinterpret_cast<const float *>(&q[(d + 8) * N + i * 4]);\n      float qs2 = *reinterpret_cast<const float *>(&q[(d + 12) * N + i * 4]);\n      const float sum = qa * qs;\n      const float sum2 = qa * qa * qs2;\n      for (size_t j = 0; j < M; ++j) {\n        float ma = *reinterpret_cast<const float *>(&m[d * M + j * 4]);\n        float mb = *reinterpret_cast<const float *>(&m[(d + 4) * M + j * 4]);\n        float ms = *reinterpret_cast<const float *>(&m[(d + 8) * M + j * 4]);\n        float ms2 = *reinterpret_cast<const float *>(&m[(d + 12) * M + j * 4]);\n        *out = ma * ma * ms2 + sum2 - 2 * ma * qa * *out +\n               (mb - qb) * (mb - qb) * d + 2 * (mb - qb) * (ms * ma - sum);\n        out++;\n      }\n    }\n  }\n};\n\n// Compute SquaredEuclidean for quantized INT4\ntemplate <size_t M, size_t N>\nstruct SquaredEuclidean<uint8_t, M, N> {\n  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,\n                      float *out) {\n    const size_t d = dim - 32;\n    const size_t p = d >> 1;  // params\n    ailego::InnerProductMatrix<uint8_t, M, N>::Compute(m, q, d, out);\n\n    for (size_t i = 0; i < N; ++i) {\n      float qa = *reinterpret_cast<const float *>(&q[p * N + i * 4]);\n      float qb = *reinterpret_cast<const float *>(&q[(p + 4) * N + i * 4]);\n      float qs = *reinterpret_cast<const float *>(&q[(p + 8) * N + i * 4]);\n      float qs2 = *reinterpret_cast<const float *>(&q[(p + 12) * N + i * 4]);\n      const float sum = qa * qs;\n      const float sum2 = qa * qa * qs2;\n      for (size_t j = 0; j < M; ++j) {\n        float ma = *reinterpret_cast<const float *>(&m[p * M + j * 4]);\n        float mb = *reinterpret_cast<const float *>(&m[(p + 4) * M + j * 4]);\n        float ms = *reinterpret_cast<const float *>(&m[(p + 8) * M + j * 4]);\n        float ms2 = *reinterpret_cast<const float *>(&m[(p + 12) * M + j * 4]);\n        *out = ma * ma * ms2 + sum2 - 2 * ma * qa * *out +\n               (mb - qb) * (mb - qb) * d + 2 * (mb - qb) * (ms * ma - sum);\n        out++;\n      }\n    }\n  }\n};\n//===========================================================\n// MinusInnerProduct\n//===========================================================\n\ntemplate <size_t M, size_t N>\nstatic void MinusInnerProductImplInt8(const int8_t *m, const int8_t *q,\n                                      size_t origin_dim, float *out) {\n  const size_t d = origin_dim;\n  ailego::InnerProductMatrix<int8_t, M, N>::Compute(m, q, d, out);\n\n  for (size_t i = 0; i < N; ++i) {\n    float qa = *reinterpret_cast<const float *>(&q[d * N + i * 4]);\n    float qb = *reinterpret_cast<const float *>(&q[(d + 4) * N + i * 4]);\n    float qs = *reinterpret_cast<const float *>(&q[(d + 8) * N + i * 4]);\n    for (size_t j = 0; j < M; ++j) {\n      float ma = *reinterpret_cast<const float *>(&m[d * M + j * 4]);\n      float mb = *reinterpret_cast<const float *>(&m[(d + 4) * M + j * 4]);\n      float ms = *reinterpret_cast<const float *>(&m[(d + 8) * M + j * 4]);\n      *out = -(ma * qa * *out + mb * qa * qs + qb * ma * ms + d * qb * mb);\n      out++;\n    }\n  }\n}\n\ntemplate <size_t M, size_t N>\nstatic void MinusInnerProductImplUint8(const uint8_t *m, const uint8_t *q,\n                                       size_t origin_dim, float *out) {\n  const size_t d = origin_dim;\n  const size_t p = d >> 1;  // params\n  ailego::InnerProductMatrix<uint8_t, M, N>::Compute(m, q, d, out);\n\n  for (size_t i = 0; i < N; ++i) {\n    float qa = *reinterpret_cast<const float *>(&q[p * N + i * 4]);\n    float qb = *reinterpret_cast<const float *>(&q[(p + 4) * N + i * 4]);\n    float qs = *reinterpret_cast<const float *>(&q[(p + 8) * N + i * 4]);\n    for (size_t j = 0; j < M; ++j) {\n      float ma = *reinterpret_cast<const float *>(&m[p * M + j * 4]);\n      float mb = *reinterpret_cast<const float *>(&m[(p + 4) * M + j * 4]);\n      float ms = *reinterpret_cast<const float *>(&m[(p + 8) * M + j * 4]);\n      *out = -(ma * qa * *out + mb * qa * qs + qb * ma * ms + d * qb * mb);\n      out++;\n    }\n  }\n}\n\n\ntemplate <typename T, size_t M, size_t N>\nstruct MinusInnerProduct;\n\n// Compute MinusInnerProduct for quantized INT8\ntemplate <size_t M, size_t N>\nstruct MinusInnerProduct<int8_t, M, N> {\n  static void Compute(const int8_t *m, const int8_t *q, size_t dim,\n                      float *out) {\n    const size_t origin_dim = dim - 20;\n    MinusInnerProductImplInt8<M, N>(m, q, origin_dim, out);\n  }\n};\n\n// Compute MinusInnerProduct for quantized INT4\ntemplate <size_t M, size_t N>\nstruct MinusInnerProduct<uint8_t, M, N> {\n  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,\n                      float *out) {\n    const size_t origin_dim = dim - 32;\n    MinusInnerProductImplUint8<M, N>(m, q, origin_dim, out);\n  }\n};\n\n\n//===========================================================\n// CosineMinusInnerProduct\n//===========================================================\ntemplate <typename T, size_t M, size_t N>\nstruct CosineMinusInnerProduct;\n\n// Compute CosineMinusInnerProduct for quantized INT8\ntemplate <size_t M, size_t N>\nstruct CosineMinusInnerProduct<int8_t, M, N> {\n  static void Compute(const int8_t *m, const int8_t *q, size_t dim,\n                      float *out) {\n    const size_t origin_dim = dim - 24;\n    MinusInnerProductImplInt8<M, N>(m, q, origin_dim, out);\n  }\n};\n\n// Compute CosineMinusInnerProduct for quantized INT4\ntemplate <size_t M, size_t N>\nstruct CosineMinusInnerProduct<uint8_t, M, N> {\n  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,\n                      float *out) {\n    const size_t origin_dim = dim - 40;\n    MinusInnerProductImplUint8<M, N>(m, q, origin_dim, out);\n  }\n};\n\n//===========================================================\n// MipsSquaredEuclidean\n//===========================================================\n\ntemplate <typename T, size_t M, size_t N>\nstruct MipsSquaredEuclidean;\n\n// Compute MipsSquaredEuclidean for quantized INT8\ntemplate <size_t M, size_t N>\nstruct MipsSquaredEuclidean<int8_t, M, N> {\n  static void Compute(const int8_t *m, const int8_t *q, size_t dim,\n                      float *out) {\n    const size_t d = dim - 20;\n    ailego::InnerProductMatrix<int8_t, M, N>::Compute(m, q, d, out);\n\n    for (size_t i = 0; i < N; ++i) {\n      float qa = *reinterpret_cast<const float *>(&q[d * N + i * 4]);\n      float qb = *reinterpret_cast<const float *>(&q[(d + 4) * N + i * 4]);\n      float qs = *reinterpret_cast<const float *>(&q[(d + 8) * N + i * 4]);\n      float qs2 = *reinterpret_cast<const float *>(&q[(d + 12) * N + i * 4]);\n      float q2 = qa * qa * qs2 + 2 * qa * qb * qs + d * qb * qb;\n      for (size_t j = 0; j < M; ++j) {\n        float ma = *reinterpret_cast<const float *>(&m[d * M + j * 4]);\n        float mb = *reinterpret_cast<const float *>(&m[(d + 4) * M + j * 4]);\n        float ms = *reinterpret_cast<const float *>(&m[(d + 8) * M + j * 4]);\n        float ms2 = *reinterpret_cast<const float *>(&m[(d + 12) * M + j * 4]);\n        float m2 = ma * ma * ms2 + 2 * ma * mb * ms + d * mb * mb;\n        *out = 2.0f - 2.0f *\n                          (ma * qa * *out + mb * qa * qs + qb * ma * ms +\n                           d * qb * mb) /\n                          std::max(q2, m2);\n        out++;\n      }\n    }\n  }\n};\n\n// Compute MipsSquaredEuclidean for quantized INT4\ntemplate <size_t M, size_t N>\nstruct MipsSquaredEuclidean<uint8_t, M, N> {\n  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,\n                      float *out) {\n    const size_t d = dim - 32;\n    const size_t p = d >> 1;  // params\n    ailego::InnerProductMatrix<uint8_t, M, N>::Compute(m, q, d, out);\n\n    for (size_t i = 0; i < N; ++i) {\n      float qa = *reinterpret_cast<const float *>(&q[p * N + i * 4]);\n      float qb = *reinterpret_cast<const float *>(&q[(p + 4) * N + i * 4]);\n      float qs = *reinterpret_cast<const float *>(&q[(p + 8) * N + i * 4]);\n      float qs2 = *reinterpret_cast<const float *>(&q[(p + 12) * N + i * 4]);\n      float q2 = qa * qa * qs2 + 2 * qa * qb * qs + d * qb * qb;\n      for (size_t j = 0; j < M; ++j) {\n        float ma = *reinterpret_cast<const float *>(&m[p * M + j * 4]);\n        float mb = *reinterpret_cast<const float *>(&m[(p + 4) * M + j * 4]);\n        float ms = *reinterpret_cast<const float *>(&m[(p + 8) * M + j * 4]);\n        float ms2 = *reinterpret_cast<const float *>(&m[(p + 12) * M + j * 4]);\n        float m2 = ma * ma * ms2 + 2 * ma * mb * ms + d * mb * mb;\n        *out = 2.0f - 2.0f *\n                          (ma * qa * *out + mb * qa * qs + qb * ma * ms +\n                           d * qb * mb) /\n                          std::max(q2, m2);\n        out++;\n      }\n    }\n  }\n};\n\n}  // namespace zvec::core\n"
  },
  {
    "path": "src/core/mixed_reducer/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n  NAME core_mix_reducer STATIC SHARED STRICT ALWAYS_LINK\n  SRCS *.cc\n  LIBS zvec_ailego core_framework\n  INCS . ${PROJECT_ROOT_DIR}/src/core\n  VERSION \"${PROXIMA_ZVEC_VERSION}\"\n)\n"
  },
  {
    "path": "src/core/mixed_reducer/mixed_reducer_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\n\nstatic const std::string PARAM_MIXED_STREAMER_REDUCER_ENABLE_PK_REWRITE(\n    \"proxima.mixed.reducer.enable_pk_rewrite\");\nstatic const std::string PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS(\n    \"proxima.mixed.reducer.num_of_add_threads\");\n\nstatic const std::string PARAM_MIXED_REDUCER_WORKING_PATH(\n    \"proxima.mixed.reducer.working_path\");\nstatic const std::string PARAM_MIXED_REDUCER_NUM_OF_ADD_THREADS(\n    \"proxima.mixed.reducer.num_of_add_threads\");\nstatic const std::string PARAM_MIXED_REDUCER_STREAMER_CLASS(\n    \"proxima.mixed.reducer.streamer_class\");\nstatic const std::string PARAM_MIXED_REDUCER_HYBRID_VECTOR_ENABLE(\n    \"proxima.mixed.reducer.hybrid_vector_enable\");\nstatic const std::string PARAM_MIXED_REDUCER_INDEX_NAME(\n    \"proxima.mixed.reducer.index_name\");\nstatic const std::string PARAM_MIXED_REDUCER_QUANTIZER_CLASS(\n    \"proxima.mixed.reducer.quantizer_class\");\n\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/mixed_reducer/mixed_streamer_reducer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"mixed_streamer_reducer.h\"\n#include <ailego/pattern/defer.h>\n#include <utility/sparse_utility.h>\n#include <zvec/ailego/utility/file_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_logger.h>\n#include \"mixed_reducer/mixed_reducer_params.h\"\n\nnamespace zvec {\nnamespace core {\n\nint MixedStreamerReducer::init(const ailego::Params &params) {\n  enable_pk_rewrite_ =\n      params.get_as_bool(PARAM_MIXED_STREAMER_REDUCER_ENABLE_PK_REWRITE);\n  params.get(PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS,\n             &num_of_add_threads_);\n  if (num_of_add_threads_ <= 0) {\n    LOG_ERROR(\"Wrong parameter. %s must be set greater than 0.\",\n              PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS.c_str());\n    return IndexError_InvalidArgument;\n  }\n\n  params_ = params;\n\n  state_ = STATE_INITED;\n  return 0;\n}\n\nint MixedStreamerReducer::cleanup(void) {\n  streamers_.clear();\n  target_streamer_->cleanup();\n\n  target_builder_->cleanup();\n  doc_cache_.clear();\n\n  stats_.clear_attributes();\n  state_ = STATE_UNINITED;\n  return 0;\n}\n\nint MixedStreamerReducer::set_target_streamer_wiht_info(\n    const IndexBuilder::Pointer builder, const IndexStreamer::Pointer streamer,\n    const IndexConverter::Pointer converter,\n    const IndexReformer::Pointer reformer,\n    const IndexQueryMeta &original_query_meta) {\n  if (state_ != STATE_INITED) {\n    LOG_ERROR(\"Set target streamer after init\");\n    return IndexError_Uninitialized;\n  }\n\n  target_builder_ = builder;\n  target_streamer_ = streamer;\n  target_builder_converter_ = converter;\n  target_streamer_reformer_ = reformer;\n  original_query_meta_ = original_query_meta;\n\n  is_sparse_ =\n      target_streamer_->meta().meta_type() == IndexMeta::MetaType::MT_SPARSE;\n\n  state_ = STATE_STREAMER_SET;\n  return 0;\n}\n\nint MixedStreamerReducer::feed_streamer_with_reformer(\n    IndexStreamer::Pointer streamer, const IndexReformer::Pointer reformer) {\n  if (!(state_ == STATE_STREAMER_SET || state_ == STATE_FEED)) {\n    LOG_ERROR(\"Set target streamer or feed before feed\");\n    return IndexError_Uninitialized;\n  }\n\n  if (!streamer) {\n    LOG_ERROR(\"Streamer nullptr\");\n    return IndexError_InvalidArgument;\n  }\n\n  auto check_datatype = [&](const IndexMeta & /*target_meta*/,\n                            const IndexMeta &source_meta) -> bool {\n    if (!streamers_.empty()) {\n      auto &last_meta = streamers_.back()->meta();\n      return last_meta.data_type() == source_meta.data_type() &&\n             last_meta.dimension() == source_meta.dimension() &&\n             last_meta.unit_size() == source_meta.unit_size();\n    }\n    // TODO: check target meta\n    return true;\n  };\n\n  auto check_other = [&](const IndexMeta &target_meta,\n                         const IndexMeta &source_meta) -> bool {\n    return target_meta.meta_type() == source_meta.meta_type();\n    // when create a new index, there is a case that ip_flat merged into l2_hnsw\n    // target_meta.metric_name() == source_meta.metric_name();\n  };\n\n  if (!(check_datatype(target_streamer_->meta(), streamer->meta()) &&\n        check_other(target_streamer_->meta(), streamer->meta()))) {\n    LOG_ERROR(\"Streamer meta mismatch\");\n    return IndexError_InvalidArgument;\n  }\n\n  if (streamers_.empty()) {\n    is_target_and_source_same_reformer_ =\n        target_streamer_->meta().reformer_name() ==\n        streamer->meta().reformer_name();\n  }\n\n  streamers_.push_back(streamer);\n  source_streamers_reformers_.push_back(reformer);\n\n  state_ = STATE_FEED;\n  return 0;\n}\n\nint MixedStreamerReducer::reduce(const IndexFilter &filter) {\n  if (state_ != STATE_FEED) {\n    LOG_ERROR(\"Feed streamers first\");\n    return IndexError_Uninitialized;\n  }\n  if (thread_pool_ == nullptr) {\n    LOG_ERROR(\"Thread pool is not set\");\n    return IndexError_Uninitialized;\n  }\n\n  ailego::ElapsedTime timer;\n\n\n  std::vector<int> add_results(num_of_add_threads_, -1);\n  auto add_group = thread_pool_->make_group();\n\n  std::vector<int> read_results(streamers_.size(), -1);\n  // TODO: use id instead of key\n  uint32_t id_offset = 0, next_id = 0;\n\n  if (is_sparse_) {\n    for (size_t i = 0; i < num_of_add_threads_; i++) {\n      add_group->submit(ailego::Closure::New(\n          this, &MixedStreamerReducer::add_sparse_vec, &add_results[i]));\n    }\n\n    for (size_t i = 0; i < streamers_.size(); i++) {\n      // due to filter, producing can't be parallel\n      read_results[i] = read_sparse_vec(i, filter, id_offset, &next_id);\n      id_offset += streamers_[i]->create_sparse_provider()->count();\n    }\n\n    sparse_mt_list_.done();\n  } else {\n    for (size_t i = 0; i < num_of_add_threads_; i++) {\n      add_group->submit(ailego::Closure::New(\n          this, &MixedStreamerReducer::add_vec, &add_results[i]));\n      // add_vec(&add_results[i]);\n    }\n\n    for (size_t i = 0; i < streamers_.size(); i++) {\n      read_results[i] = read_vec(i, filter, id_offset, &next_id);\n      id_offset += streamers_[i]->create_provider()->count();\n    }\n\n    mt_list_.done();\n  }\n  add_group->wait_finish();\n\n  auto check_results = [](const std::vector<int> &results) -> bool {\n    return std::all_of(std::begin(results), std::end(results),\n                       [](int item) { return item == 0; });\n  };\n\n  if (!check_results(read_results)) {\n    LOG_ERROR(\"Get vector from entities failed\");\n    return IndexError_Runtime;\n  }\n\n  if (!check_results(add_results)) {\n    LOG_ERROR(\"add vector failed\");\n    return IndexError_Runtime;\n  }\n\n  stats_.set_reduced_costtime(timer.seconds());\n  state_ = STATE_REDUCE;\n  if (target_builder_ != nullptr) {\n    IndexBuild();\n  }\n\n  LOG_INFO(\"End brute force reduce. cost time: [%zu]s\",\n           (size_t)timer.seconds());\n  return 0;\n}\n\nint MixedStreamerReducer::dump(const IndexDumper::Pointer &dumper) {\n  LOG_INFO(\"Begin brute force reducer dump\");\n\n  if (state_ != STATE_REDUCE) {\n    LOG_WARN(\"Reduce first before dump\");\n    return IndexError_NoReady;\n  }\n\n  ailego::ElapsedTime timer;\n  int ret = 0;\n  if (target_builder_ != nullptr) {\n    target_builder_->dump(dumper);\n  } else {\n    target_streamer_->dump(dumper);\n  }\n  if (ret == IndexError_NotImplemented) {\n    LOG_WARN(\"Dump index not implemented\");\n  } else if (ret < 0) {\n    LOG_ERROR(\"Failed to dump in streamer\");\n  }\n\n  return ret;\n}\n\nint MixedStreamerReducer::read_vec(size_t source_streamer_index,\n                                   const IndexFilter &filter,\n                                   const uint32_t id_offset,\n                                   uint32_t *next_id) {\n  const auto &streamer = streamers_[source_streamer_index];\n  const auto &reformer = source_streamers_reformers_[source_streamer_index];\n  const IndexQueryMeta source_streamer_query_meta{streamer->meta().data_type(),\n                                                  streamer->meta().dimension()};\n\n  bool need_revert = (target_streamer_->meta().reformer_name() !=\n                          streamer->meta().reformer_name() &&\n                      reformer != nullptr);\n  if (target_builder_ && reformer) {\n    need_revert = true;\n  }\n\n  IndexProvider::Pointer provider = streamer->create_provider();\n  IndexProvider::Iterator::Pointer iterator = provider->create_iterator();\n\n  while (iterator->is_valid()) {\n    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {\n      LOG_DEBUG(\"read_vec cancelled.\");\n      return 0;\n    }\n    if (filter(iterator->key() + (uint64_t)id_offset)) {\n      (*stats_.mutable_filtered_count())++;\n      iterator->next();\n      continue;\n    }\n\n    std::vector<uint8_t> bytes;\n    if (need_revert) {\n      std::string new_vector;\n      if (reformer->revert(iterator->data(), source_streamer_query_meta,\n                           &new_vector) != 0) {\n        LOG_ERROR(\"Failed to revert the vector\");\n        return IndexError_Runtime;\n      }\n      bytes.resize(new_vector.size());\n      memcpy(bytes.data(), new_vector.data(), bytes.size());\n    } else {\n      // TODO: eliminate the copy\n      bytes.resize(provider->element_size());\n      memcpy(bytes.data(), iterator->data(), bytes.size());\n    }\n\n    // TODO: use id instead of key\n    if (!mt_list_.produce(VectorItem((*next_id)++, std::move(bytes)))) {\n      LOG_ERROR(\"Produce vector to queue failed. key[%lu]\",\n                (size_t)iterator->key());\n      return IndexError_Runtime;\n    }\n    iterator->next();\n  }\n  return 0;\n}\n\nvoid MixedStreamerReducer::add_vec(int *result) {\n  if (target_builder_ != nullptr) {\n    add_vec_with_builder(result);\n    return;\n  }\n  ailego::ElapsedTime timer;\n  auto target_streamer_context = target_streamer_->create_context();\n  auto target_streamer_query_meta = IndexQueryMeta{\n      IndexMeta::MetaType::MT_DENSE, target_streamer_->meta().data_type(),\n      target_streamer_->meta().dimension()};\n  const bool need_convert = (!is_target_and_source_same_reformer_) &&\n                            target_streamer_reformer_ != nullptr;\n\n  AILEGO_DEFER([&]() {\n    // make producer quit\n    mt_list_.done();\n  });\n\n  VectorItem vector_item;\n  while (mt_list_.consume(&vector_item)) {\n    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {\n      LOG_DEBUG(\"add_vec cancelled.\");\n      return;\n    }\n\n    const void *vector = vector_item.vec_.data();\n    std::string new_vector;\n\n\n    if (need_convert) {\n      IndexQueryMeta new_meta;\n      if (target_streamer_reformer_->convert(vector, original_query_meta_,\n                                             &new_vector, &new_meta) != 0) {\n        LOG_ERROR(\"Failed to transform vector\");\n        *result = IndexError_Runtime;\n        return;\n      }\n      vector = new_vector.data();\n    }\n    // 1. no reformer: target_streamer_query_meta_ = original_query_meta_\n    // 2. has reformer, matched(need_convert = false): use\n    // target_streamer_query_meta_\n    // 3. has reformer, not matched(need_convert = true): use\n    // target_streamer_query_meta_\n\n\n    // TODO: use id instead of key\n    int ret = target_streamer_->add_with_id_impl(\n        (uint32_t)vector_item.pkey_, vector, target_streamer_query_meta,\n        target_streamer_context);\n    if (ret != 0) {\n      LOG_ERROR(\"Insert target streamer failed. ret[%d] reason[%s] pkey[%zu]\",\n                ret, IndexError::What(ret), (size_t)vector_item.pkey_);\n      *result = ret;\n      return;\n    }\n  }\n\n  *result = 0;\n  LOG_DEBUG(\"add_vec. cost time: [%zu]s\", (size_t)timer.seconds());\n  return;\n}\n\nvoid MixedStreamerReducer::add_vec_with_builder(int *result) {\n  ailego::ElapsedTime timer;\n  auto target_streamer_query_meta = IndexQueryMeta{\n      IndexMeta::MetaType::MT_DENSE, target_streamer_->meta().data_type(),\n      target_streamer_->meta().dimension()};\n\n  AILEGO_DEFER([&]() {\n    // make producer quit\n    mt_list_.done();\n  });\n\n  VectorItem vector_item;\n  while (mt_list_.consume(&vector_item)) {\n    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {\n      LOG_DEBUG(\"add_vec cancelled.\");\n      return;\n    }\n\n    const void *vector = vector_item.vec_.data();\n    std::string out_vector_buffer = std::string(\n        static_cast<const char *>(vector),\n        original_query_meta_.dimension() * original_query_meta_.unit_size());\n    PushToDocCache(original_query_meta_, (uint32_t)vector_item.pkey_,\n                   out_vector_buffer);\n  }\n\n  *result = 0;\n  LOG_DEBUG(\"add_vec. cost time: [%zu]s\", (size_t)timer.seconds());\n  return;\n}\n\nvoid MixedStreamerReducer::add_sparse_vec(int *result) {\n  ailego::ElapsedTime timer;\n  auto target_streamer_context = target_streamer_->create_context();\n  auto target_streamer_query_meta = IndexQueryMeta{\n      IndexMeta::MetaType::MT_SPARSE,\n      target_streamer_->meta().data_type(),\n  };\n\n  auto need_convert = !is_target_and_source_same_reformer_ &&\n                      target_streamer_reformer_ != nullptr;\n\n  AILEGO_DEFER([&]() {\n    // make producer quit\n    sparse_mt_list_.done();\n  });\n\n  SparseVectorItem sparse_vector_item;\n  while (sparse_mt_list_.consume(&sparse_vector_item)) {\n    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {\n      LOG_DEBUG(\"add_sparse_vec cancelled.\");\n      return;\n    }\n    auto sparse_count = sparse_vector_item.sparse_indices_.size();\n    auto indices = sparse_vector_item.sparse_indices_.data();\n    auto values = sparse_vector_item.sparse_values_.data();\n\n    std::string converted_sparse_values_buffer;\n    if (need_convert) {\n      IndexQueryMeta new_meta;\n      if (target_streamer_reformer_->convert(\n              sparse_count, indices, values, original_query_meta_,\n              &converted_sparse_values_buffer, &new_meta) != 0) {\n        LOG_ERROR(\"Failed to transform vector\");\n        *result = IndexError_Runtime;\n        return;\n      }\n      values = converted_sparse_values_buffer.data();\n      target_streamer_query_meta = new_meta;\n    }\n\n    // TODO: use id instead of key\n    int ret = target_streamer_->add_with_id_impl(\n        (uint32_t)sparse_vector_item.pkey_, sparse_count, indices, values,\n        target_streamer_query_meta, target_streamer_context);\n    if (ret != 0) {\n      LOG_ERROR(\"Insert target streamer failed. ret[%d] reason[%s] pkey[%zu]\",\n                ret, IndexError::What(ret), (size_t)sparse_vector_item.pkey_);\n      *result = ret;\n      return;\n    }\n  }\n\n  *result = 0;\n  LOG_DEBUG(\"add_sparse_vec. cost time: [%zu]s\", (size_t)timer.seconds());\n  return;\n}\n\n\nint MixedStreamerReducer::read_sparse_vec(size_t source_streamer_index,\n                                          const IndexFilter &filter,\n                                          const uint32_t id_offset,\n                                          uint32_t *next_id) {\n  const auto &streamer = streamers_[source_streamer_index];\n  const auto &reformer = source_streamers_reformers_[source_streamer_index];\n  const bool need_revert =\n      !is_target_and_source_same_reformer_ && reformer != nullptr;\n\n  IndexStreamer::SparseProvider::Pointer provider =\n      streamer->create_sparse_provider();\n  IndexStreamer::SparseProvider::Iterator::Pointer iterator =\n      provider->create_iterator();\n\n  while (iterator->is_valid()) {\n    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {\n      LOG_DEBUG(\"read_sparse_vec cancelled.\");\n      return 0;\n    }\n    if (filter(iterator->key() + (uint64_t)id_offset)) {\n      (*stats_.mutable_filtered_count())++;\n      iterator->next();\n      continue;\n    }\n\n    auto sparse_count = iterator->sparse_count();\n    std::vector<uint32_t> sparse_indices(sparse_count);\n    std::string sparse_values;\n\n    if (need_revert) {\n      std::string new_sparse_values;\n      if (reformer->revert(iterator->sparse_count(), iterator->sparse_indices(),\n                           iterator->sparse_data(),\n                           {\n                               IndexMeta::MetaType::MT_SPARSE,\n                               streamer->meta().data_type(),\n                           },\n                           &new_sparse_values) != 0) {\n        LOG_ERROR(\"Failed to revert the sparse vector\");\n        return IndexError_Runtime;\n      }\n      sparse_values = std::move(new_sparse_values);\n    } else {\n      sparse_values.resize(sparse_count * streamer->meta().unit_size());\n      memcpy(sparse_values.data(), iterator->sparse_data(),\n             sparse_values.size());\n    }\n\n    // TODO: eliminate the copy\n    memcpy(sparse_indices.data(), iterator->sparse_indices(),\n           sparse_indices.size() * sizeof(uint32_t));\n\n    // TODO: use id instead of key\n    if (!sparse_mt_list_.produce(SparseVectorItem((*next_id)++,\n                                                  std::move(sparse_indices),\n                                                  std::move(sparse_values)))) {\n      LOG_ERROR(\"Produce vector to queue failed. key[%lu]\",\n                (size_t)iterator->key());\n      return IndexError_Runtime;\n    }\n    iterator->next();\n  }\n  return 0;\n}\n\nvoid MixedStreamerReducer::PushToDocCache(const IndexQueryMeta &meta,\n                                          uint32_t doc_id, std::string &doc) {\n  std::lock_guard<std::mutex> lock(mutex_);\n  while (doc_cache_.size() <= doc_id) {\n    std::string fake_data(meta.dimension() * meta.unit_size(), 0);\n    doc_cache_.push_back(std::make_pair(kInvalidKey, fake_data));\n  }\n  doc_cache_[doc_id] = std::make_pair(doc_id, doc);\n}\n\nint MixedStreamerReducer::IndexBuild() {\n  IndexHolder::Pointer target_holder;\n  if (original_query_meta_.data_type() == core::IndexMeta::DataType::DT_FP16) {\n    auto holder = std::make_shared<\n        zvec::core::MultiPassIndexHolder<core::IndexMeta::DataType::DT_FP16>>(\n        original_query_meta_.dimension());\n    for (auto doc : doc_cache_) {\n      ailego::NumericalVector<uint16_t> vec(doc.second);\n      if (doc.first == kInvalidKey) {\n        continue;\n      }\n      if (!holder->emplace(doc.first, vec)) {\n        LOG_ERROR(\"Failed to add vector\");\n        return core::IndexError_Runtime;\n      }\n    }\n    target_holder = holder;\n  } else if (original_query_meta_.data_type() ==\n             core::IndexMeta::DataType::DT_FP32) {\n    auto holder = std::make_shared<\n        zvec::core::MultiPassIndexHolder<core::IndexMeta::DataType::DT_FP32>>(\n        original_query_meta_.dimension());\n    for (auto doc : doc_cache_) {\n      ailego::NumericalVector<float> vec(doc.second);\n      if (doc.first == kInvalidKey) {\n        continue;\n      }\n      if (!holder->emplace(doc.first, vec)) {\n        LOG_ERROR(\"Failed to add vector\");\n        return core::IndexError_Runtime;\n      }\n    }\n    target_holder = holder;\n  } else if (original_query_meta_.data_type() ==\n             core::IndexMeta::DataType::DT_INT8) {\n    auto holder = std::make_shared<\n        zvec::core::MultiPassIndexHolder<core::IndexMeta::DataType::DT_INT8>>(\n        original_query_meta_.dimension());\n    for (auto doc : doc_cache_) {\n      ailego::NumericalVector<uint8_t> vec(doc.second);\n      if (doc.first == kInvalidKey) {\n        continue;\n      }\n      if (!holder->emplace(doc.first, vec)) {\n        LOG_ERROR(\"Failed to add vector\");\n        return core::IndexError_Runtime;\n      }\n    }\n    target_holder = holder;\n  } else {\n    LOG_ERROR(\"data_type is not support\");\n    return core::IndexError_Runtime;\n  }\n  if (target_builder_converter_) {\n    core::IndexConverter::TrainAndTransform(target_builder_converter_,\n                                            target_holder);\n    target_holder = target_builder_converter_->result();\n  }\n  target_builder_->train(target_holder);\n  target_builder_->build(target_holder);\n  return 0;\n}\n\nINDEX_FACTORY_REGISTER_STREAMER_REDUCER_ALIAS(MixedStreamerReducer,\n                                              MixedStreamerReducer);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/mixed_reducer/mixed_streamer_reducer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <vector>\n#include <ailego/parallel/lock.h>\n#include <ailego/parallel/multi_thread_list.h>\n#include <utility/sparse_utility.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_reducer.h>\n#include <zvec/core/framework/index_reformer.h>\n#include <zvec/core/framework/index_searcher.h>\n#include <zvec/core/framework/index_streamer.h>\n\nnamespace zvec {\nnamespace core {\n\n\nclass MixedStreamerReducer : public IndexStreamerReducer {\n public:\n  //! Constructor\n  MixedStreamerReducer(void) {}\n\n  //! Initialize Reducer\n  int init(const ailego::Params &params) override;\n\n  //! Cleanup Reducer\n  int cleanup(void) override;\n\n  //! Reduce operator (with filter)\n  int reduce(const IndexFilter &filter) override;\n\n  //! Dump index by dumper\n  int dump(const IndexDumper::Pointer &dumper) override;\n\n public:  // StreamerReducer's unique methods\n  int set_target_streamer_wiht_info(\n      const IndexBuilder::Pointer builder,\n      const IndexStreamer::Pointer streamer,\n      const IndexConverter::Pointer converter,\n      const IndexReformer::Pointer reformer,\n      const IndexQueryMeta &original_query_meta) override;\n  // feed_streamer\n  int feed_streamer_with_reformer(\n      IndexStreamer::Pointer streamer,\n      const IndexReformer::Pointer reformer) override;\n\n private:\n  int read_vec(size_t source_streamer_index, const IndexFilter &filter,\n               const uint32_t id_offset, uint32_t *next_id);\n  void add_vec(int *result);\n  void add_vec_with_builder(int *result);\n  int read_sparse_vec(size_t source_streamer_index, const IndexFilter &filter,\n                      const uint32_t id_offset, uint32_t *next_id);\n  void add_sparse_vec(int *result);\n\n  void PushToDocCache(const IndexQueryMeta &meta, uint32_t doc_id,\n                      std::string &doc);\n  int IndexBuild();\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n private:\n  enum State {\n    STATE_UNINITED,\n    STATE_INITED,\n    STATE_STREAMER_SET,\n    STATE_FEED,\n    STATE_REDUCE\n  };\n\n  bool enable_pk_rewrite_{false};\n  bool is_sparse_{false};\n\n  Stats stats_{};\n  State state_{STATE_UNINITED};\n\n  size_t num_of_add_threads_{0};\n  ailego::MultiThreadList<VectorItem> mt_list_;\n  ailego::MultiThreadList<SparseVectorItem> sparse_mt_list_;\n\n\n  ailego::Params params_;\n  IndexStreamer::Pointer target_streamer_{nullptr};\n  IndexReformer::Pointer target_streamer_reformer_{nullptr};\n  bool is_target_and_source_same_reformer_{false};\n  IndexQueryMeta original_query_meta_{};\n\n  std::vector<IndexStreamer::Pointer> streamers_;\n  std::vector<IndexReformer::Pointer> source_streamers_reformers_;\n\n  IndexBuilder::Pointer target_builder_{nullptr};\n  IndexConverter::Pointer target_builder_converter_{nullptr};\n  std::mutex mutex_{};\n  std::vector<std::pair<uint64_t, std::string>> doc_cache_;\n  const uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n        NAME core_quantizer \n        STATIC SHARED STRICT ALWAYS_LINK\n        SRCS *.cc\n        LIBS zvec_ailego core_framework\n        INCS . ${PROJECT_ROOT_DIR}/src/core\n        VERSION \"${PROXIMA_ZVEC_VERSION}\"\n)\n"
  },
  {
    "path": "src/core/quantizer/binary_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iterator>\n#include <ailego/algorithm/binary_quantizer.h>\n#include <ailego/pattern/defer.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_factory.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Binary Quantizer Converter Holder\n */\nclass BinaryConverterHolder : public IndexHolder {\n public:\n  /*! Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Constructor\n    Iterator(const BinaryConverterHolder *owner,\n             IndexHolder::Iterator::Pointer &&iter)\n        : buffer_(ailego::BinaryQuantizer::EncodedSizeInBinary32(\n                      owner->dimension()),\n                  0),\n          front_iter_(std::move(iter)),\n          quantizer_(owner->quantizer_),\n          dim_{owner->dimension()} {\n      this->encode_record();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return buffer_.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->encode_record();\n    }\n\n   private:\n    //! Encode the data by quantizer\n    inline void encode_record(void) {\n      if (front_iter_->is_valid()) {\n        const float *vec = reinterpret_cast<const float *>(front_iter_->data());\n        quantizer_->encode(vec, dim_ / 2, buffer_.data());\n      }\n    }\n\n    //! Members\n    std::vector<uint32_t> buffer_{};\n    IndexHolder::Iterator::Pointer front_iter_{};\n    std::shared_ptr<ailego::BinaryQuantizer> quantizer_{};\n    size_t dim_{0u};\n  };\n\n  //! Constructor\n  BinaryConverterHolder(IndexHolder::Pointer front,\n                        std::shared_ptr<ailego::BinaryQuantizer> quantizer)\n      : front_(std::move(front)), quantizer_(std::move(quantizer)) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return ailego::BinaryQuantizer::EncodedSizeInBinary32(front_->dimension()) *\n           32u;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_BINARY32;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_BINARY32,\n                                    this->dimension());\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n    return iter\n               ? IndexHolder::Iterator::Pointer(\n                     new BinaryConverterHolder::Iterator(this, std::move(iter)))\n               : IndexHolder::Iterator::Pointer();\n  }\n\n private:\n  //! Members\n  IndexHolder::Pointer front_{};\n  std::shared_ptr<ailego::BinaryQuantizer> quantizer_{};\n};\n\n/*! Binary Converter\n */\nclass BinaryConverter : public IndexConverter {\n public:\n  //! Destructor\n  virtual ~BinaryConverter(void) {}\n\n  //! Initialize Converter\n  int init(const IndexMeta &mt, const ailego::Params &params) override {\n    if (ailego_unlikely(mt.data_type() != IndexMeta::DataType::DT_FP32 ||\n                        mt.unit_size() != IndexMeta::UnitSizeof(\n                                              IndexMeta::DataType::DT_FP32))) {\n      LOG_ERROR(\"Unsupported type %d with unit size %u.\", mt.data_type(),\n                mt.unit_size());\n      return IndexError_Unsupported;\n    }\n\n    meta_ = mt;\n\n    ailego::Params reformer_params;\n    meta_.set_reformer(\"BinaryReformer\", 0, reformer_params);\n\n    if (meta_.metric_name() != \"InnerProduct\") {\n      LOG_ERROR(\"Only InnerProduct Supported\");\n      return IndexError_Unsupported;\n    }\n\n    dimension_ = meta_.dimension();\n\n    size_t dim =\n        ailego::BinaryQuantizer::EncodedSizeInBinary32(dimension_) * 32u;\n\n    meta_.set_metric(\"Hamming\", 0, ailego::Params());\n    meta_.set_converter(\"BinaryConverter\", 0, params);\n    meta_.set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n\n    return 0;\n  }\n\n  //! Cleanup Converter\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexHolder::Pointer holder) override {\n    if (holder->dimension() != dimension_ ||\n        holder->data_type() != IndexMeta::DataType::DT_FP32) {\n      return IndexError_Mismatch;\n    }\n\n    return 0;\n  }\n\n  //! Transform the data\n  int transform(IndexHolder::Pointer holder) override {\n    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||\n        holder->dimension() != dimension_) {\n      return IndexError_Mismatch;\n    }\n\n    if (holder->count() > 0) {\n      *stats_.mutable_transformed_count() += holder->count();\n    }\n    holder_ =\n        std::make_shared<BinaryConverterHolder>(std::move(holder), quantizer_);\n    return 0;\n  }\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &) override {\n    return 0;\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexHolder::Pointer result(void) const override {\n    return holder_;\n  }\n\n  //! Retrieve Index Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n private:\n  //! Members\n  IndexMeta meta_{};\n  IndexHolder::Pointer holder_{};\n  std::shared_ptr<ailego::BinaryQuantizer> quantizer_{};\n  Stats stats_{};\n  size_t dimension_{0u};\n};\n\nINDEX_FACTORY_REGISTER_CONVERTER(BinaryConverter);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/quantizer/binary_reformer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/algorithm/binary_quantizer.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_factory.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Binary Reformer\n */\nclass BinaryReformer : public IndexReformer {\n public:\n  //! Initialize Reformer\n  int init(const ailego::Params & /*params*/) override {\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = qmeta.data_type();\n\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        qmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    size_t dim =\n        ailego::BinaryQuantizer::EncodedSizeInBinary32(qmeta.dimension()) * 32u;\n    out->resize(\n        IndexMeta::ElementSizeof(IndexMeta::DataType::DT_BINARY32, dim));\n    const float *vec = reinterpret_cast<const float *>(query);\n\n    quantizer_.encode(vec, qmeta.dimension(),\n                      reinterpret_cast<uint32_t *>(&(*out)[0]));\n    *ometa = qmeta;\n    ometa->set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n\n    return 0;\n  }\n\n  //! Transform queries\n  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = qmeta.data_type();\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        qmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    size_t dim =\n        ailego::BinaryQuantizer::EncodedSizeInBinary32(qmeta.dimension()) * 32u;\n    out->resize(count * IndexMeta::ElementSizeof(\n                            IndexMeta::DataType::DT_BINARY32, dim));\n    const float *vec = reinterpret_cast<const float *>(query);\n\n    quantizer_.encode(vec, qmeta.dimension() * count,\n                      reinterpret_cast<uint32_t *>(&(*out)[0]));\n    *ometa = qmeta;\n    ometa->set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n\n    return 0;\n  }\n\n  //! Normalize results\n  int normalize(const void *, const IndexQueryMeta &,\n                IndexDocumentList &) const override {\n    return 0;\n  }\n\n private:\n  //! Members\n  ailego::BinaryQuantizer quantizer_{};\n};\n\nINDEX_FACTORY_REGISTER_REFORMER(BinaryReformer);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/quantizer/cosine_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iterator>\n#include <ailego/algorithm/integer_quantizer.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math/normalizer.h>\n#include <ailego/pattern/defer.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"record_quantizer.h\"\n#include \"../metric/metric_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Cosine Converter Holder\n */\nclass CosineConverterHolder : public IndexHolder {\n public:\n  static constexpr size_t NORM_SIZE = sizeof(float);\n\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Constructor\n    Iterator(const CosineConverterHolder *owner,\n             IndexHolder::Iterator::Pointer &&iter,\n             IndexMeta::DataType original_type, IndexMeta::DataType type)\n        : owner_(owner),\n          front_iter_(std::move(iter)),\n          original_type_(original_type),\n          type_(type) {\n      dimension_ = owner_->dimension(),\n      original_dimension_ = dimension_ - ExtraDimension(type_);\n      size_t element_size = owner->element_size();\n\n      if (original_type_ == IndexMeta::DataType::DT_FP16) {\n        normalize_buffer_.resize(dimension_ * sizeof(ailego::Float16));\n      } else {  // original_type_ == IndexMeta::DataType::DT_FP32\n        normalize_buffer_.resize(dimension_ * sizeof(float));\n\n        if (type_ == IndexMeta::DataType::DT_FP16 ||\n            type_ == IndexMeta::DataType::DT_INT4 ||\n            type_ == IndexMeta::DataType::DT_INT8) {\n          buffer_.resize(element_size, 0);\n        }\n      }\n\n      this->convert_record();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return type_ == original_type_ ? normalize_buffer_.data()\n                                     : buffer_.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->convert_record();\n    }\n\n   private:\n    //! Encode the data by quantizer\n    void convert_record(void) {\n      if (!front_iter_->is_valid()) {\n        return;\n      }\n\n      size_t element_size = owner_->element_size();\n      size_t original_element_size =\n          IndexMeta::ElementSizeof(original_type_, original_dimension_);\n\n      if (original_type_ == IndexMeta::DataType::DT_FP16) {\n        ::memcpy(reinterpret_cast<char *>(&normalize_buffer_[0]),\n                 reinterpret_cast<const char *>(front_iter_->data()),\n                 original_element_size);\n\n        ailego::Float16 *buf =\n            reinterpret_cast<ailego::Float16 *>(&normalize_buffer_[0]);\n\n        float norm = 0.0f;\n        ailego::Normalizer<ailego::Float16>::L2(buf, original_dimension_,\n                                                &norm);\n\n        ::memcpy(reinterpret_cast<uint16_t *>(&normalize_buffer_[0]) +\n                     original_dimension_,\n                 &norm, NORM_SIZE);\n      } else {  // original_type_ == IndexMeta::DataType::DT_FP32\n        ::memcpy(reinterpret_cast<char *>(&normalize_buffer_[0]),\n                 reinterpret_cast<const char *>(front_iter_->data()),\n                 original_element_size);\n\n        float *buf = reinterpret_cast<float *>(&normalize_buffer_[0]);\n\n        float norm = 0.0f;\n        ailego::Normalizer<float>::L2(buf, original_dimension_, &norm);\n\n        if (type_ == IndexMeta::DataType::DT_FP32) {\n          ::memcpy(reinterpret_cast<float *>(&normalize_buffer_[0]) +\n                       original_dimension_,\n                   &norm, NORM_SIZE);\n        } else if (type_ == IndexMeta::DataType::DT_FP16) {\n          ailego::FloatHelper::ToFP16(\n              buf, original_dimension_,\n              reinterpret_cast<uint16_t *>(&buffer_[0]));\n\n          ::memcpy(\n              reinterpret_cast<uint16_t *>(&buffer_[0]) + original_dimension_,\n              &norm, NORM_SIZE);\n        } else if (type_ == IndexMeta::DataType::DT_INT4 ||\n                   type_ == IndexMeta::DataType::DT_INT8) {\n          RecordQuantizer::quantize_record(\n              reinterpret_cast<const float *>(normalize_buffer_.data()),\n              original_dimension_, type_, false, &buffer_[0]);\n\n          ::memcpy(reinterpret_cast<uint8_t *>(&buffer_[0]) + element_size -\n                       NORM_SIZE,\n                   &norm, NORM_SIZE);\n        }\n      }\n    }\n\n    //! Members\n    const CosineConverterHolder *owner_{nullptr};\n    std::string buffer_{};\n    std::string normalize_buffer_{};\n    IndexHolder::Iterator::Pointer front_iter_{};\n    size_t dimension_{0u};\n    size_t original_dimension_{0u};\n    IndexMeta::DataType original_type_{IndexMeta::DataType::DT_UNDEFINED};\n    IndexMeta::DataType type_{IndexMeta::DataType::DT_UNDEFINED};\n  };\n\n  //! Constructor\n  CosineConverterHolder(IndexHolder::Pointer front,\n                        IndexMeta::DataType original_type,\n                        IndexMeta::DataType type)\n      : front_(std::move(front)),\n        original_type_(original_type),\n        type_(type),\n        dimension_(front_->dimension()) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_ + ExtraDimension(type_);\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return type_;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return IndexMeta::ElementSizeof(this->data_type(), this->dimension());\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n\n    return iter ? IndexHolder::Iterator::Pointer(\n                      new CosineConverterHolder::Iterator(this, std::move(iter),\n                                                          this->original_type_,\n                                                          this->type_))\n                : IndexHolder::Iterator::Pointer();\n  }\n\n  static size_t ExtraDimension(IndexMeta::DataType type) {\n    // The extra quantized params storage size to save for each vector\n    if (type == IndexMeta::DataType::DT_INT4)\n      return 40;  // 5 * sizeof(float) / sizeof(FT_INT4)\n    else if (type == IndexMeta::DataType::DT_INT8)\n      return 24;  // (5 * sizeof(float) + sizeof(int)) / sizeof(FT_INT8)\n    else if (type == IndexMeta::DataType::DT_FP16)\n      return 2;  // 2* sizeof(float) / sizeof(FT_FP16)\n    else if (type == IndexMeta::DataType::DT_FP32) {\n      return 1;  // sizeof(float) / sizeof(FT_FP32)\n    } else {\n      return 0;\n    }\n  }\n\n private:\n  //! Members\n  IndexHolder::Pointer front_{};\n  IndexMeta::DataType original_type_{};\n  IndexMeta::DataType type_{};\n  uint32_t dimension_{0};\n};\n\n/*! Converter of Cosine\n */\nclass CosineConverter : public IndexConverter {\n public:\n  static constexpr size_t NORM_SIZE = sizeof(float);\n\n public:\n  //! Constructor\n  CosineConverter(IndexMeta::DataType original_type,\n                  IndexMeta::DataType dst_type)\n      : original_type_(original_type), dst_type_(dst_type) {}\n\n  //! Constructor\n  CosineConverter(IndexMeta::DataType dst_type)\n      : original_type_(IndexMeta::DataType::DT_FP32), dst_type_(dst_type) {}\n\n  CosineConverter()\n      : original_type_(IndexMeta::DataType::DT_UNDEFINED),\n        dst_type_(IndexMeta::DataType::DT_UNDEFINED) {}\n\n  //! Destructor\n  ~CosineConverter() override {}\n\n  //! Initialize Converter\n  int init(const IndexMeta &index_meta, const ailego::Params &params) override {\n    meta_ = index_meta;\n\n    IndexMeta::DataType type = meta_.data_type();\n\n    if (type != original_type_) {\n      LOG_ERROR(\"Orignal Type Not Matched: (%d, %d)\", type, original_type_);\n      return IndexError_Mismatch;\n    }\n\n    if (meta_.unit_size() != IndexMeta::UnitSizeof(type)) {\n      LOG_ERROR(\"Unsupported type %d with unit size %u\", type,\n                meta_.unit_size());\n      return IndexError_Unsupported;\n    }\n\n    ailego::Params reformer_params;\n\n    if (dst_type_ == IndexMeta::DataType::DT_INT8) {\n      meta_.set_converter(\"CosineInt8Converter\", 0, params);\n      meta_.set_reformer(\"CosineInt8Reformer\", 0, reformer_params);\n\n      ailego::Params metric_params;\n      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,\n                        index_meta.metric_name());\n      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,\n                        index_meta.metric_params());\n      meta_.set_metric(\"QuantizedInteger\", 0, metric_params);\n    } else if (dst_type_ == IndexMeta::DataType::DT_INT4) {\n      if (index_meta.dimension() % 2) {\n        LOG_ERROR(\"Unsupported dimension %u for INT4 type\",\n                  index_meta.dimension());\n        return IndexError_Unsupported;\n      }\n\n      meta_.set_converter(\"CosineInt4Converter\", 0, params);\n      meta_.set_reformer(\"CosineInt4Reformer\", 0, reformer_params);\n\n      ailego::Params metric_params;\n      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,\n                        index_meta.metric_name());\n      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,\n                        index_meta.metric_params());\n      meta_.set_metric(\"QuantizedInteger\", 0, metric_params);\n    } else if (dst_type_ == IndexMeta::DataType::DT_FP16) {\n      if (original_type_ == IndexMeta::DataType::DT_FP16) {\n        meta_.set_reformer(\"CosineHalfFloatReformer\", 0, reformer_params);\n        meta_.set_converter(\"CosineHalfFloatConverter\", 0, params);\n      } else {\n        meta_.set_reformer(\"CosineFp16Reformer\", 0, reformer_params);\n        meta_.set_converter(\"CosineFp16Converter\", 0, params);\n      }\n    } else {\n      dst_type_ = type;\n\n      meta_.set_reformer(\"CosineFp32Reformer\", 0, reformer_params);\n      meta_.set_converter(\"CosineFp32Converter\", 0, params);\n    }\n\n    meta_.set_meta(dst_type_, meta_.dimension() + ExtraDimension(dst_type_));\n\n    return 0;\n  }\n\n  //! Cleanup Converter\n  virtual int cleanup(void) override {\n    *stats_.mutable_transformed_count() = 0;\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexHolder::Pointer /*holder*/) override {\n    return 0;\n  }\n\n  //! Transform the data\n  int transform(IndexHolder::Pointer holder) override {\n    if (holder->data_type() != original_type_ ||\n        holder->dimension() != meta_.dimension() - ExtraDimension(dst_type_)) {\n      return IndexError_Mismatch;\n    }\n\n    *stats_.mutable_transformed_count() += holder->count();\n\n    holder_ = std::make_shared<CosineConverterHolder>(\n        holder, holder->data_type(), dst_type_);\n    return 0;\n  }\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer & /*dumper*/) override {\n    return 0;\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexHolder::Pointer result(void) const override {\n    return holder_;\n  }\n\n  //! Retrieve Index Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n  static size_t ExtraDimension(IndexMeta::DataType type) {\n    // The extra quantized params storage size to save for each vector\n    if (type == IndexMeta::DataType::DT_INT4)\n      return 40;  // 5 * sizeof(float) / sizeof(FT_INT4)\n    else if (type == IndexMeta::DataType::DT_INT8)\n      return 24;  // (5 * sizeof(float) + sizeof(int)) / sizeof(FT_INT8)\n    else if (type == IndexMeta::DataType::DT_FP16)\n      return 2;  // sizeof(float) / sizeof(FT_FP16)\n    else if (type == IndexMeta::DataType::DT_FP32) {\n      return 1;  // sizeof(float) / sizeof(FT_FP32)\n    } else {\n      return 0;\n    }\n  }\n\n  //! Members\n  IndexMeta meta_{};\n  Stats stats_{};\n  IndexHolder::Pointer holder_{};\n  IndexMeta::DataType original_type_{IndexMeta::DataType::DT_UNDEFINED};\n  IndexMeta::DataType dst_type_{IndexMeta::DataType::DT_UNDEFINED};\n};\n\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineNormalizeConverter,\n                                       CosineConverter,\n                                       IndexMeta::DataType::DT_FP32);\n\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineFp32Converter, CosineConverter,\n                                       IndexMeta::DataType::DT_FP32);\n\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineFp16Converter, CosineConverter,\n                                       IndexMeta::DataType::DT_FP16);\n\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineInt8Converter, CosineConverter,\n                                       IndexMeta::DataType::DT_INT8);\n\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineInt4Converter, CosineConverter,\n                                       IndexMeta::DataType::DT_INT4);\n\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineHalfFloatConverter,\n                                       CosineConverter,\n                                       IndexMeta::DataType::DT_FP16,\n                                       IndexMeta::DataType::DT_FP16);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/cosine_reformer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <memory>\n#include <ailego/algorithm/integer_quantizer.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math/normalizer.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"record_quantizer.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Reformer of Cosine\n */\nclass CosineReformer : public IndexReformer {\n public:\n  static constexpr size_t NORM_SIZE = sizeof(float);\n\n  //! Constructor\n  CosineReformer(IndexMeta::DataType original_type,\n                 IndexMeta::DataType dst_type)\n      : original_type_(original_type), dst_type_(dst_type) {}\n\n  //! Constructor\n  CosineReformer(IndexMeta::DataType dst_type)\n      : original_type_(IndexMeta::DataType::DT_FP32), dst_type_(dst_type) {}\n\n  //! Constructor\n  CosineReformer()\n      : original_type_(IndexMeta::DataType::DT_UNDEFINED),\n        dst_type_(IndexMeta::DataType::DT_UNDEFINED) {}\n\n  //! Initialize Reformer\n  int init(const ailego::Params & /*params*/) override {\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType type = qmeta.data_type();\n\n    if (type == IndexMeta::DataType::DT_FP32) {\n      if (dst_type_ != IndexMeta::DataType::DT_FP32 &&\n          dst_type_ != IndexMeta::DataType::DT_FP16 &&\n          dst_type_ != IndexMeta::DataType::DT_INT4 &&\n          dst_type_ != IndexMeta::DataType::DT_INT8) {\n        return IndexError_Unsupported;\n      }\n\n      if (qmeta.unit_size() != sizeof(float)) {\n        return IndexError_Unsupported;\n      }\n\n      *ometa = qmeta;\n      ometa->set_meta(dst_type_, qmeta.dimension() + ExtraDimension(dst_type_));\n      out->resize(ometa->element_size());\n\n      float norm = 0.0f;\n      size_t origin_dimension = qmeta.dimension();\n      std::string normalized_buffer(reinterpret_cast<const char *>(query),\n                                    qmeta.element_size());\n\n      float *buf = reinterpret_cast<float *>(&normalized_buffer[0]);\n\n      ailego::Normalizer<float>::L2(buf, origin_dimension, &norm);\n\n      ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]) + ometa->element_size() -\n                   NORM_SIZE,\n               &norm, NORM_SIZE);\n\n      if (dst_type_ == IndexMeta::DataType::DT_FP32) {\n        ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]), buf,\n                 ometa->element_size() - NORM_SIZE);\n      } else if (dst_type_ == IndexMeta::DataType::DT_FP16) {\n        RecordQuantizer::quantize_record(buf, origin_dimension, dst_type_,\n                                         false, &(*out)[0]);\n      } else if (dst_type_ == IndexMeta::DataType::DT_INT4 ||\n                 dst_type_ == IndexMeta::DataType::DT_INT8) {\n        RecordQuantizer::quantize_record(buf, qmeta.dimension(), dst_type_,\n                                         false, &(*out)[0]);\n      }\n    } else if (type == IndexMeta::DataType::DT_FP16) {\n      if (dst_type_ != IndexMeta::DataType::DT_FP16) {\n        return IndexError_Unsupported;\n      }\n\n      if (qmeta.unit_size() != sizeof(ailego::Float16)) {\n        return IndexError_Unsupported;\n      }\n\n      *ometa = qmeta;\n      ometa->set_meta(\n          IndexMeta::DataType::DT_FP16,\n          qmeta.dimension() + ExtraDimension(IndexMeta::DataType::DT_FP16));\n      out->resize(ometa->element_size());\n\n      ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]), query,\n               ometa->element_size() - NORM_SIZE);\n\n      float norm = 0.0f;\n      auto data = reinterpret_cast<ailego::Float16 *>(&(*out)[0]);\n      ailego::Normalizer<ailego::Float16>::L2(\n          data,\n          ometa->dimension() - ExtraDimension(IndexMeta::DataType::DT_FP16),\n          &norm);\n\n      ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]) + ometa->element_size() -\n                   NORM_SIZE,\n               &norm, NORM_SIZE);\n    } else {\n      return IndexError_Unsupported;\n    }\n\n    return 0;\n  }\n\n  //! Transform queries\n  int transform(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,\n                uint32_t /*count*/, std::string * /*out*/,\n                IndexQueryMeta * /*ometa*/) const override {\n    return IndexError_Unsupported;\n  }\n\n  //! Convert records\n  int convert(const void * /*records*/, const IndexQueryMeta & /*rmeta*/,\n              uint32_t /*count*/, std::string * /*out*/,\n              IndexQueryMeta * /*ometa*/) const override {\n    return IndexError_Unsupported;\n  }\n\n  //! Normalize results\n  int normalize(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,\n                IndexDocumentList & /*result*/) const override {\n    return 0;\n  }\n\n  bool need_revert() const override {\n    return true;\n  }\n\n  int revert(const void *in, const IndexQueryMeta &qmeta,\n             std::string *out) const override {\n    IndexMeta::DataType type = qmeta.data_type();\n\n    if (type != IndexMeta::DataType::DT_FP32 &&\n        type != IndexMeta::DataType::DT_INT8 &&\n        type != IndexMeta::DataType::DT_INT4 &&\n        type != IndexMeta::DataType::DT_FP16) {\n      return IndexError_Unsupported;\n    }\n\n    size_t dimension = qmeta.dimension() - ExtraDimension(dst_type_);\n    out->resize(dimension * IndexMeta::UnitSizeof(original_type_));\n\n    float norm;\n    ::memcpy(&norm,\n             reinterpret_cast<const uint8_t *>(in) + qmeta.element_size() -\n                 NORM_SIZE,\n             NORM_SIZE);\n\n    if (type == IndexMeta::DataType::DT_FP32) {\n      if (dst_type_ != IndexMeta::DataType::DT_FP32) {\n        return IndexError_Unsupported;\n      }\n\n      float *out_buf = reinterpret_cast<float *>(&(*out)[0]);\n      const float *in_buf = reinterpret_cast<const float *>(in);\n\n      this->denormalize(in_buf, out_buf, qmeta, norm);\n    } else if (type == IndexMeta::DataType::DT_FP16) {\n      if (dst_type_ != IndexMeta::DataType::DT_FP16) {\n        return IndexError_Unsupported;\n      }\n\n      if (original_type_ != IndexMeta::DataType::DT_FP16 &&\n          original_type_ != IndexMeta::DataType::DT_FP32) {\n        return IndexError_Unsupported;\n      }\n\n      if (original_type_ == IndexMeta::DataType::DT_FP32) {\n        float *out_buf = reinterpret_cast<float *>(&(*out)[0]);\n        RecordQuantizer::unquantize_record(in, dimension, dst_type_, out_buf);\n\n        this->denormalize(out_buf, out_buf, qmeta, norm);\n      } else {\n        ailego::Float16 *out_buf =\n            reinterpret_cast<ailego::Float16 *>(&(*out)[0]);\n        const ailego::Float16 *in_buf =\n            reinterpret_cast<const ailego::Float16 *>(in);\n        this->denormalize(in_buf, out_buf, qmeta, norm);\n      }\n    } else if (type == IndexMeta::DataType::DT_INT8 ||\n               type == IndexMeta::DataType::DT_INT4) {\n      if (dst_type_ != IndexMeta::DataType::DT_INT8 &&\n          dst_type_ != IndexMeta::DataType::DT_INT4) {\n        return IndexError_Unsupported;\n      }\n\n      float *out_buf = reinterpret_cast<float *>(&(*out)[0]);\n      RecordQuantizer::unquantize_record(in, dimension, dst_type_, out_buf);\n\n      this->denormalize(out_buf, out_buf, qmeta, norm);\n    }\n\n    return 0;\n  }\n\n private:\n  template <typename T>\n  void denormalize(const T *in, T *out, const IndexQueryMeta &qmeta,\n                   float norm) const {\n    size_t origin_dim = qmeta.dimension() - ExtraDimension(dst_type_);\n\n    for (size_t d = 0; d < origin_dim; ++d) {\n      out[d] = in[d] * norm;\n    }\n  }\n\n  static size_t ExtraDimension(IndexMeta::DataType type) {\n    // The extra quantized params storage size to save for each vector\n    if (type == IndexMeta::DataType::DT_INT4)\n      return 40;  // 5 * sizeof(float) / sizeof(FT_INT4)\n    else if (type == IndexMeta::DataType::DT_INT8)\n      return 24;  // (5 * sizeof(float) + sizeof(int)) / sizeof(FT_INT8)\n    else if (type == IndexMeta::DataType::DT_FP16)\n      return 2;  // sizeof(float) / sizeof(FT_FP16)\n    else if (type == IndexMeta::DataType::DT_FP32) {\n      return 1;  // sizeof(float) / sizeof(FT_FP32)\n    } else {\n      return 0;\n    }\n  }\n\n  //! Members\n  IndexMeta::DataType original_type_{IndexMeta::DataType::DT_UNDEFINED};\n  IndexMeta::DataType dst_type_{IndexMeta::DataType::DT_UNDEFINED};\n};\n\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineNormalizeReformer, CosineReformer,\n                                      IndexMeta::DataType::DT_FP32);\n\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineFp32Reformer, CosineReformer,\n                                      IndexMeta::DataType::DT_FP32);\n\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineFp16Reformer, CosineReformer,\n                                      IndexMeta::DataType::DT_FP16);\n\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineInt8Reformer, CosineReformer,\n                                      IndexMeta::DataType::DT_INT8);\n\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineInt4Reformer, CosineReformer,\n                                      IndexMeta::DataType::DT_INT4);\n\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineHalfFloatReformer, CosineReformer,\n                                      IndexMeta::DataType::DT_FP16,\n                                      IndexMeta::DataType::DT_FP16);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/half_float_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/core/framework/index_framework.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Half Float Holder\n */\nclass HalfFloatHolder : public IndexHolder {\n public:\n  /*! Half Float Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(const HalfFloatHolder *owner,\n             IndexHolder::Iterator::Pointer &&iter)\n        : buffer_(owner->dimension(), 0), front_iter_(std::move(iter)) {\n      this->transform_record();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return buffer_.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->transform_record();\n    }\n\n   private:\n    inline void transform_record(void) {\n      if (front_iter_->is_valid()) {\n        ailego::FloatHelper::ToFP16(\n            reinterpret_cast<const float *>(front_iter_->data()),\n            buffer_.size(), buffer_.data());\n      }\n    }\n\n    std::vector<uint16_t> buffer_{};\n    IndexHolder::Iterator::Pointer front_iter_{};\n  };\n\n  //! Constructor\n  HalfFloatHolder(IndexHolder::Pointer front) : front_(std::move(front)) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return front_->dimension();\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP16,\n                                    front_->dimension());\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n    return iter ? IndexHolder::Iterator::Pointer(\n                      new HalfFloatHolder::Iterator(this, std::move(iter)))\n                : IndexHolder::Iterator::Pointer();\n  }\n\n private:\n  //! Disable them\n  HalfFloatHolder(void) = delete;\n\n  //! Members\n  IndexHolder::Pointer front_{};\n};\n\n/*! Half Float Converter\n */\nclass HalfFloatConverter : public IndexConverter {\n public:\n  //! Destructor\n  virtual ~HalfFloatConverter(void) {}\n\n  //! Initialize Converter\n  int init(const IndexMeta &mt, const ailego::Params &) override {\n    if (ailego_unlikely(mt.data_type() != IndexMeta::DataType::DT_FP32 ||\n                        mt.unit_size() != sizeof(float))) {\n      LOG_ERROR(\"Unsupported type %d with unit size %u.\", mt.data_type(),\n                mt.unit_size());\n      return IndexError_Unsupported;\n    }\n\n    meta_ = mt;\n    meta_.set_meta(IndexMeta::DataType::DT_FP16, mt.dimension());\n    meta_.set_converter(\"HalfFloatConverter\", 0, ailego::Params());\n    meta_.set_reformer(\"HalfFloatReformer\", 0, ailego::Params());\n    return 0;\n  }\n\n  //! Cleanup Converter\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexHolder::Pointer) override {\n    return 0;\n  }\n\n  //! Transform the data\n  int transform(IndexHolder::Pointer holder) override {\n    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||\n        holder->dimension() != meta_.dimension()) {\n      return IndexError_Mismatch;\n    }\n    holder_ = std::make_shared<HalfFloatHolder>(std::move(holder));\n    return 0;\n  }\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &) override {\n    return 0;\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexHolder::Pointer result(void) const override {\n    return holder_;\n  }\n\n  //! Retrieve Index Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n private:\n  IndexMeta meta_{};\n  IndexHolder::Pointer holder_{};\n  Stats stats_{};\n};\n\n/*! Half Float Sparse Holder\n */\nclass HalfFloatSparseHolder : public IndexSparseHolder {\n public:\n  /*! Half Float Holder Iterator\n   */\n  class Iterator : public IndexSparseHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(const HalfFloatSparseHolder * /*owner*/,\n             IndexSparseHolder::Iterator::Pointer &&iter)\n        : sparse_buffer_(MAX_DIM_COUNT * sizeof(uint16_t), 0),\n          front_iter_(std::move(iter)) {\n      this->transform_record();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Retrieve sparse count\n    uint32_t sparse_count() const override {\n      return front_iter_->sparse_count();\n    }\n\n    //! Retrieve sparse indices\n    const uint32_t *sparse_indices() const override {\n      return front_iter_->sparse_indices();\n    }\n\n    //! Retrieve sparse data\n    const void *sparse_data() const override {\n      return sparse_buffer_.data();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->transform_record();\n    }\n\n   private:\n    inline void transform_record(void) {\n      if (front_iter_->is_valid()) {\n        ailego::FloatHelper::ToFP16(\n            reinterpret_cast<const float *>(front_iter_->sparse_data()),\n            front_iter_->sparse_count(), sparse_buffer_.data());\n      }\n    }\n\n    constexpr static uint32_t MAX_DIM_COUNT = 4096;\n    std::vector<uint16_t> sparse_buffer_{};\n\n    IndexSparseHolder::Iterator::Pointer front_iter_{};\n  };\n\n  //! Constructor\n  HalfFloatSparseHolder(IndexSparseHolder::Pointer front)\n      : front_(std::move(front)) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexSparseHolder::Iterator::Pointer create_iterator(void) override {\n    IndexSparseHolder::Iterator::Pointer iter = front_->create_iterator();\n    return iter\n               ? IndexSparseHolder::Iterator::Pointer(\n                     new HalfFloatSparseHolder::Iterator(this, std::move(iter)))\n               : IndexSparseHolder::Iterator::Pointer();\n  }\n\n  size_t total_sparse_count(void) const override {\n    return front_->total_sparse_count();\n  }\n\n private:\n  //! Disable them\n  HalfFloatSparseHolder(void) = delete;\n\n  //! Members\n  IndexSparseHolder::Pointer front_{};\n};\n\n/*! Half Float Sparse Converter\n */\nclass HalfFloatSparseConverter : public IndexConverter {\n public:\n  //! Destructor\n  virtual ~HalfFloatSparseConverter(void) {}\n\n  //! Initialize Converter\n  int init(const IndexMeta &mt, const ailego::Params &) override {\n    if (ailego_unlikely(mt.data_type() != IndexMeta::DataType::DT_FP32 ||\n                        mt.unit_size() != sizeof(float))) {\n      LOG_ERROR(\"Unsupported type %d with unit size %u.\", mt.data_type(),\n                mt.unit_size());\n      return IndexError_Unsupported;\n    }\n\n    meta_ = mt;\n    meta_.set_data_type(IndexMeta::DataType::DT_FP16);\n    meta_.set_converter(\"HalfFloatSparseConverter\", 0, ailego::Params());\n    meta_.set_reformer(\"HalfFloatSparseReformer\", 0, ailego::Params());\n    return 0;\n  }\n\n  //! Cleanup Converter\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexSparseHolder::Pointer) override {\n    return 0;\n  }\n\n  //! Transform the data\n  int transform(IndexSparseHolder::Pointer holder) override {\n    if (holder->data_type() != IndexMeta::DataType::DT_FP32) {\n      return IndexError_Mismatch;\n    }\n\n    holder_ = std::make_shared<HalfFloatSparseHolder>(std::move(holder));\n    return 0;\n  }\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &) override {\n    return 0;\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexSparseHolder::Pointer sparse_result(void) const override {\n    return holder_;\n  }\n\n  //! Retrieve Index Sparse Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n private:\n  IndexMeta meta_{};\n  IndexSparseHolder::Pointer holder_{};\n  Stats stats_{};\n};\n\nINDEX_FACTORY_REGISTER_CONVERTER(HalfFloatConverter);\nINDEX_FACTORY_REGISTER_CONVERTER(HalfFloatSparseConverter);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/quantizer/half_float_reformer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"record_quantizer.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Half Float Reformer\n */\nclass HalfFloatReformer : public IndexReformer {\n public:\n  //! Initialize Reformer\n  int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    switch (qmeta.data_type()) {\n      case IndexMeta::DataType::DT_FP16:\n        out->assign(reinterpret_cast<const char *>(query),\n                    qmeta.element_size());\n        *ometa = qmeta;\n        break;\n\n      case IndexMeta::DataType::DT_FP32:\n        if (qmeta.unit_size() != sizeof(float)) {\n          return IndexError_Unsupported;\n        }\n        out->resize(qmeta.dimension() * sizeof(ailego::Float16));\n        ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),\n                                    qmeta.dimension(),\n                                    reinterpret_cast<uint16_t *>(&(*out)[0]));\n        *ometa = qmeta;\n        ometa->set_meta(IndexMeta::DataType::DT_FP16, qmeta.dimension());\n        break;\n\n      default:\n        return IndexError_Unsupported;\n    }\n    return 0;\n  }\n\n  //! Transform queries\n  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    switch (qmeta.data_type()) {\n      case IndexMeta::DataType::DT_FP16:\n        out->assign(reinterpret_cast<const char *>(query),\n                    qmeta.element_size() * count);\n        *ometa = qmeta;\n        break;\n\n      case IndexMeta::DataType::DT_FP32:\n        if (qmeta.unit_size() != sizeof(float)) {\n          return IndexError_Unsupported;\n        }\n        out->resize(qmeta.dimension() * count * sizeof(ailego::Float16));\n        ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),\n                                    qmeta.dimension() * count,\n                                    reinterpret_cast<uint16_t *>(&(*out)[0]));\n        *ometa = qmeta;\n        ometa->set_meta(IndexMeta::DataType::DT_FP16, qmeta.dimension());\n        break;\n\n      default:\n        return IndexError_Unsupported;\n    }\n    return 0;\n  }\n\n  //! Normalize results\n  int normalize(const void *, const IndexQueryMeta &,\n                IndexDocumentList &) const override {\n    return 0;\n  }\n\n  bool need_revert() const override {\n    return true;\n  }\n\n  int revert(const void *in, const IndexQueryMeta &qmeta,\n             std::string *out) const override {\n    IndexMeta::DataType type = qmeta.data_type();\n\n    if (type != IndexMeta::DataType::DT_FP16) {\n      return IndexError_Unsupported;\n    }\n\n    if (type == IndexMeta::DataType::DT_FP16) {\n      size_t dimension = qmeta.dimension();\n\n      out->resize(dimension * sizeof(float));\n      float *out_buf = reinterpret_cast<float *>(out->data());\n\n      RecordQuantizer::unquantize_record(in, dimension,\n                                         IndexMeta::DataType::DT_FP16, out_buf);\n    }\n\n    return 0;\n  }\n};\n\n/*! Half Float Sparse Reformer\n */\nclass HalfFloatSparseReformer : public IndexReformer {\n public:\n  //! Initialize Reformer\n  int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  int transform(uint32_t sparse_count, const uint32_t * /*sparse_indices*/,\n                const void *sparse_query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    switch (qmeta.data_type()) {\n      case IndexMeta::DataType::DT_FP16:\n        out->assign(reinterpret_cast<const char *>(sparse_query),\n                    qmeta.unit_size() * sparse_count);\n        *ometa = qmeta;\n\n        break;\n\n      case IndexMeta::DataType::DT_FP32:\n        if (qmeta.unit_size() != sizeof(float)) {\n          return IndexError_Unsupported;\n        }\n\n        out->resize(sparse_count * sizeof(ailego::Float16));\n        ailego::FloatHelper::ToFP16(\n            reinterpret_cast<const float *>(sparse_query), sparse_count,\n            reinterpret_cast<uint16_t *>(&(*out)[0]));\n\n        *ometa = qmeta;\n        ometa->set_data_type(IndexMeta::DataType::DT_FP16);\n\n        break;\n\n      default:\n        return IndexError_Unsupported;\n    }\n\n    return 0;\n  }\n\n  //! Transform queries\n  int transform(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                const void *sparse_query, const IndexQueryMeta &qmeta,\n                uint32_t count, std::string *out,\n                IndexQueryMeta *ometa) const override {\n    size_t sparse_count_total = 0;\n    for (size_t i = 0; i < count; i++) {\n      sparse_count_total += sparse_count[i];\n    }\n\n    if (sparse_count_total > std::numeric_limits<uint32_t>::max()) {\n      return IndexError_OutOfRange;\n    }\n\n    return this->transform((uint32_t)sparse_count_total, sparse_indices,\n                           sparse_query, qmeta, out, ometa);\n  }\n\n  bool need_revert() const override {\n    return true;\n  }\n\n  int revert(const uint32_t sparse_count, const uint32_t * /*sparse_indices*/,\n             const void *sparse_query, const IndexQueryMeta &qmeta,\n             std::string *sparse_query_out) const override {\n    IndexMeta::DataType data_type = qmeta.data_type();\n\n    if (data_type != IndexMeta::DataType::DT_FP16) {\n      return IndexError_Unsupported;\n    }\n\n    if (data_type == IndexMeta::DataType::DT_FP16) {\n      sparse_query_out->resize(sparse_count * sizeof(float));\n\n      float *out_buf = reinterpret_cast<float *>(&(*sparse_query_out)[0]);\n      RecordQuantizer::unquantize_sparse_record(\n          sparse_query, sparse_count, IndexMeta::DataType::DT_FP16, out_buf);\n    }\n\n    return 0;\n  }\n};\n\nINDEX_FACTORY_REGISTER_REFORMER(HalfFloatReformer);\nINDEX_FACTORY_REGISTER_REFORMER(HalfFloatSparseReformer);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/integer_quantizer_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iterator>\n#include <ailego/algorithm/integer_quantizer.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math/normalizer.h>\n#include <ailego/pattern/defer.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"record_quantizer.h\"\n#include \"../metric/metric_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Integer Quantizer Converter Holder\n */\ntemplate <class Quantizer>\nclass IntegerQuantizerConverterHolder : public IndexHolder {\n public:\n  /*! Integer Quantizer Converter Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Constructor\n    Iterator(const IntegerQuantizerConverterHolder *owner,\n             IndexHolder::Iterator::Pointer &&iter)\n        : buffer_(owner->element_size(), 0),\n          front_iter_(std::move(iter)),\n          quantizer_(owner->quantizer_),\n          dim_(owner->dimension()) {\n      this->encode_record();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return buffer_.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->encode_record();\n    }\n\n   private:\n    //! Encode the data by quantizer\n    inline void encode_record(void) {\n      if (front_iter_->is_valid()) {\n        const float *vec = reinterpret_cast<const float *>(front_iter_->data());\n        quantizer_->encode(\n            vec, dim_,\n            reinterpret_cast<typename Quantizer::ValueType *>(buffer_.data()));\n      }\n    }\n\n    //! Members\n    std::vector<uint8_t> buffer_{};\n    IndexHolder::Iterator::Pointer front_iter_{};\n    std::shared_ptr<Quantizer> quantizer_{};\n    size_t dim_{0u};\n  };\n\n  //! Constructor\n  IntegerQuantizerConverterHolder(IndexHolder::Pointer front,\n                                  std::shared_ptr<Quantizer> quantizer,\n                                  IndexMeta::DataType tp)\n      : front_(std::move(front)),\n        quantizer_(std::move(quantizer)),\n        data_type_(tp) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return front_->dimension();\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return data_type_;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return IndexMeta::ElementSizeof(this->data_type(), front_->dimension());\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n    return iter ? IndexHolder::Iterator::Pointer(\n                      new IntegerQuantizerConverterHolder::Iterator(\n                          this, std::move(iter)))\n                : IndexHolder::Iterator::Pointer();\n  }\n\n private:\n  //! Members\n  IndexHolder::Pointer front_{};\n  std::shared_ptr<Quantizer> quantizer_{};\n  IndexMeta::DataType data_type_{};\n};\n\n\n/*! Integer Quantizer Converter\n */\ntemplate <class Quantizer>\nclass IntegerQuantizerConverter : public IndexConverter {\n public:\n  //! Constructor\n  IntegerQuantizerConverter(IndexMeta::DataType dst_type)\n      : data_type_(dst_type) {}\n\n  //! Destructor\n  virtual ~IntegerQuantizerConverter() {}\n\n//! Get param name\n#define P_NAME(NAME)                                                 \\\n  data_type_ == IndexMeta::DataType::DT_INT8 ? INT8_QUANTIZER_##NAME \\\n                                             : INT4_QUANTIZER_##NAME\n\n  //! Initialize Converter\n  int init(const IndexMeta &mt, const ailego::Params &params) override {\n    if (mt.data_type() != IndexMeta::DataType::DT_FP32 ||\n        mt.unit_size() != IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      LOG_ERROR(\"Unsupported type %d with unit size %u\", mt.data_type(),\n                mt.unit_size());\n      return IndexError_Unsupported;\n    }\n    quantizer_ = std::make_shared<Quantizer>();\n    if (!quantizer_) {\n      LOG_ERROR(\"Malloc EntropyIntegerQuantizer failed\");\n      return IndexError_NoMemory;\n    }\n\n    size_t count;\n    if (params.get(P_NAME(CONVERTER_HISTOGRAM_BINS_COUNT), &count)) {\n      quantizer_->set_histogram_bins(count);\n      LOG_DEBUG(\"Init Converter with bins=%zu\", count);\n    }\n    float scale;\n    if (params.get(P_NAME(CONVERTER_SCALE), &scale)) {\n      quantizer_->set_scale(scale);\n      LOG_DEBUG(\"Init Converter with scale=%f\", scale);\n    }\n    float bias = 0.0f;\n    if (params.get(P_NAME(CONVERTER_BIAS), &bias)) {\n      quantizer_->set_bias(bias);\n      LOG_DEBUG(\"Init Converter with bias=%f\", bias);\n    }\n\n    meta_ = mt;\n    meta_.set_meta(data_type_, meta_.dimension());\n    meta_.set_converter(data_type_ == IndexMeta::DataType::DT_INT8\n                            ? \"Int8QuantizerConverter\"\n                            : \"Int4QuantizerConverter\",\n                        0, params);\n\n    bool disable_bias = false;\n    if (meta_.metric_name() == \"InnerProduct\" ||\n        meta_.metric_name() == \"MipsSquaredEuclidean\") {\n      disable_bias = true;\n    }\n    params.get(P_NAME(CONVERTER_DISABLE_BIAS), &disable_bias);\n    quantizer_->set_non_bias(disable_bias);\n\n    return 0;\n  }\n\n  //! Cleanup Converter\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexHolder::Pointer holder) override {\n    if (holder->dimension() != meta_.dimension() ||\n        holder->data_type() != IndexMeta::DataType::DT_FP32) {\n      return IndexError_Mismatch;\n    }\n\n    ailego::ElapsedTime timer;\n    AILEGO_DEFER([&]() { stats_.set_trained_costtime(timer.milli_seconds()); });\n\n    if (holder->multipass()) {\n      {\n        //! step1: compute max/min value\n        auto iter = holder->create_iterator();\n        if (!iter) {\n          LOG_ERROR(\"Failed to create iterator of holder\");\n          return IndexError_Runtime;\n        }\n        float max = -std::numeric_limits<float>::max();\n        float min = std::numeric_limits<float>::max();\n        for (; iter->is_valid(); iter->next()) {\n          const float *vec = reinterpret_cast<const float *>(iter->data());\n          for (size_t i = 0; i < meta_.dimension(); ++i) {\n            max = std::max(max, vec[i]);\n            min = std::min(min, vec[i]);\n          }\n        }\n        quantizer_->set_max(max);\n        quantizer_->set_min(min);\n\n        //! step2: feed quantizer with training data\n        iter = holder->create_iterator();\n        if (!iter) {\n          LOG_ERROR(\"Failed to create iterator of holder\");\n          return IndexError_Runtime;\n        }\n        for (; iter->is_valid(); iter->next()) {\n          (*stats_.mutable_trained_count())++;\n          quantizer_->feed(reinterpret_cast<const float *>(iter->data()),\n                           meta_.dimension());\n        }\n      }\n    } else {\n      //! step1: compute max/min value\n      auto iter = holder->create_iterator();\n      if (!iter) {\n        LOG_ERROR(\"Failed to create iterator of holder\");\n        return IndexError_Runtime;\n      }\n      std::vector<float> features;\n      float max = -std::numeric_limits<float>::max();\n      float min = std::numeric_limits<float>::max();\n      for (; iter->is_valid(); iter->next()) {\n        const float *vec = reinterpret_cast<const float *>(iter->data());\n        for (size_t i = 0; i < meta_.dimension(); ++i) {\n          max = std::max(max, vec[i]);\n          min = std::min(min, vec[i]);\n          features.emplace_back(vec[i]);\n        }\n      }\n      quantizer_->set_max(max);\n      quantizer_->set_min(min);\n\n      //! step2: feed quantizer with training data\n      for (size_t i = 0; i < features.size(); i += meta_.dimension()) {\n        quantizer_->feed(&features[i], meta_.dimension());\n        (*stats_.mutable_trained_count())++;\n      }\n    }\n\n    //! step3: feed quantizer with training data\n    if (!quantizer_->train()) {\n      LOG_ERROR(\"Quantizer train failed\");\n      return IndexError_Runtime;\n    }\n\n    //! Setting of Integer Reformer\n    ailego::Params reformer_params;\n    float scale = quantizer_->scale();\n    float bias = quantizer_->bias();\n    float inf = std::numeric_limits<float>::infinity();\n    if (scale == inf || bias == inf) {\n      reformer_params.set(P_NAME(REFORMER_SCALE), std::to_string(scale));\n      reformer_params.set(P_NAME(REFORMER_BIAS), std::to_string(bias));\n    } else {\n      reformer_params.set(P_NAME(REFORMER_SCALE), scale);\n      reformer_params.set(P_NAME(REFORMER_BIAS), bias);\n    }\n    reformer_params.set(P_NAME(REFORMER_METRIC), meta_.metric_name());\n    meta_.set_reformer(data_type_ == IndexMeta::DataType::DT_INT8\n                           ? \"Int8QuantizerReformer\"\n                           : \"Int4QuantizerReformer\",\n                       0, reformer_params);\n\n    ailego::Params params = meta_.converter_params();\n    if (scale == inf || bias == inf) {\n      params.set(P_NAME(CONVERTER_SCALE), std::to_string(scale));\n      params.set(P_NAME(CONVERTER_BIAS), std::to_string(bias));\n    } else {\n      params.set(P_NAME(CONVERTER_SCALE), scale);\n      params.set(P_NAME(CONVERTER_BIAS), bias);\n    }\n    meta_.set_converter(meta_.converter_name(), 0, params);\n\n    LOG_DEBUG(\n        \"IntegerQuantizerConverter train done, costtime %zums, scale %f, bias \"\n        \"%f\",\n        (size_t)timer.milli_seconds(), quantizer_->scale(), quantizer_->bias());\n\n    return 0;\n  }\n\n  //! Transform the data\n  int transform(IndexHolder::Pointer holder) override {\n    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||\n        holder->dimension() != meta_.dimension()) {\n      return IndexError_Mismatch;\n    }\n\n    if (holder->count() > 0) {\n      *stats_.mutable_transformed_count() += holder->count();\n    }\n    holder_ = std::make_shared<IntegerQuantizerConverterHolder<Quantizer>>(\n        holder, quantizer_, data_type_);\n    return 0;\n  }\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &) override {\n    return 0;\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexHolder::Pointer result(void) const override {\n    return holder_;\n  }\n\n  //! Retrieve Index Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n private:\n  //! Members\n  IndexMeta meta_{};\n  IndexHolder::Pointer holder_{};\n  std::shared_ptr<Quantizer> quantizer_{};\n  Stats stats_{};\n  IndexMeta::DataType data_type_{};\n};\n\n\n/*! Converter of Integer Streaming Quantizer\n */\nclass IntegerStreamingConverter : public IndexConverter {\n public:\n  //! Constructor\n  IntegerStreamingConverter(IndexMeta::DataType dst_type)\n      : data_type_(dst_type) {}\n\n  //! Destructor\n  ~IntegerStreamingConverter() override {}\n\n  //! Initialize Converter\n  int init(const IndexMeta &index_meta, const ailego::Params &params) override {\n    meta_ = index_meta;\n    params.get(INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE,\n               &enable_normalize_);\n    ailego::Params reformer_params;\n    if (enable_normalize_) {\n      reformer_params.set(INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE, true);\n    }\n\n    is_euclidean_ = index_meta.metric_name() == \"MipsSquaredEuclidean\" ||\n                    index_meta.metric_name() == \"SquaredEuclidean\" ||\n                    index_meta.metric_name() == \"Euclidean\";\n    if (is_euclidean_) {\n      reformer_params.set(INTEGER_STREAMING_REFORMER_IS_EUCLIDEAN, true);\n    }\n\n\n    if (data_type_ == IndexMeta::DataType::DT_INT8) {\n      meta_.set_converter(\"Int8StreamingConverter\", 0, params);\n      meta_.set_reformer(\"Int8StreamingReformer\", 0, reformer_params);\n    } else {\n      if (index_meta.dimension() % 2) {\n        LOG_ERROR(\"Unsupported dimension %u for INT4 type\",\n                  index_meta.dimension());\n        return IndexError_Unsupported;\n      }\n      meta_.set_converter(\"Int4StreamingConverter\", 0, params);\n      meta_.set_reformer(\"Int4StreamingReformer\", 0, reformer_params);\n    }\n    ailego::Params metric_params;\n    metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,\n                      index_meta.metric_name());\n    metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,\n                      index_meta.metric_params());\n    meta_.set_metric(\"QuantizedInteger\", 0, metric_params);\n    meta_.set_meta(data_type_, meta_.dimension() + ExtraDimension(data_type_));\n    return 0;\n  }\n\n  //! Cleanup Converter\n  virtual int cleanup(void) override {\n    *stats_.mutable_transformed_count() = 0;\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexHolder::Pointer /*holder*/) override {\n    return 0;\n  }\n\n  //! Transform the data\n  int transform(IndexHolder::Pointer holder) override {\n    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||\n        holder->dimension() != meta_.dimension() - ExtraDimension(data_type_)) {\n      return IndexError_Mismatch;\n    }\n\n    *stats_.mutable_transformed_count() += holder->count();\n    holder_ = std::make_shared<IntegerStreamingConverterHolder>(\n        holder, data_type_, enable_normalize_, is_euclidean_);\n    return 0;\n  }\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer & /*dumper*/) override {\n    return 0;\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexHolder::Pointer result(void) const override {\n    return holder_;\n  }\n\n  //! Retrieve Index Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n private:\n  //! IndexHolder for IntegerStreamingConverter\n  class IntegerStreamingConverterHolder : public IndexHolder {\n   public:\n    class Iterator : public IndexHolder::Iterator {\n     public:\n      //! Constructor\n      Iterator(const IntegerStreamingConverterHolder *owner,\n               IndexHolder::Iterator::Pointer &&iter)\n          : owner_(owner),\n            buffer_(owner->element_size(), 0),\n            normalize_buffer_(owner->front_->element_size(), 0),\n            front_iter_(std::move(iter)) {\n        this->encode_record();\n      }\n\n      //! Destructor\n      virtual ~Iterator(void) {}\n\n      //! Retrieve pointer of data\n      const void *data(void) const override {\n        return buffer_.data();\n      }\n\n      //! Test if the iterator is valid\n      bool is_valid(void) const override {\n        return front_iter_->is_valid();\n      }\n\n      //! Retrieve primary key\n      uint64_t key(void) const override {\n        return front_iter_->key();\n      }\n\n      //! Next iterator\n      void next(void) override {\n        front_iter_->next();\n        this->encode_record();\n      }\n\n     private:\n      //! Encode the data by quantizer\n      void encode_record(void) {\n        if (front_iter_->is_valid()) {\n          const float *vec =\n              reinterpret_cast<const float *>(front_iter_->data());\n          if (owner_->enable_normalize_) {\n            float norm = 0.0;\n            memcpy((void *)normalize_buffer_.data(), vec,\n                   owner_->front_->element_size());\n            ailego::Normalizer<float>::L2((float *)normalize_buffer_.data(),\n                                          owner_->dimension_, &norm);\n            vec = (float *)normalize_buffer_.data();\n          }\n\n          RecordQuantizer::quantize_record(\n              vec, owner_->dimension_, owner_->data_type(),\n              owner_->is_euclidean_, buffer_.data());\n        }\n      }\n\n      //! Members\n      const IntegerStreamingConverterHolder *owner_{nullptr};\n      std::vector<uint8_t> buffer_{};\n      std::string normalize_buffer_{};\n      IndexHolder::Iterator::Pointer front_iter_{};\n    };\n\n    //! Constructor\n    IntegerStreamingConverterHolder(IndexHolder::Pointer front,\n                                    IndexMeta::DataType tp,\n                                    bool enable_normalize, bool is_euclidean)\n        : front_(std::move(front)),\n          data_type_(tp),\n          dimension_(front_->dimension()),\n          enable_normalize_(enable_normalize),\n          is_euclidean_(is_euclidean) {}\n\n    //! Retrieve count of elements in holder (-1 indicates unknown)\n    size_t count(void) const override {\n      return front_->count();\n    }\n\n    //! Retrieve dimension\n    size_t dimension(void) const override {\n      return dimension_ + ExtraDimension(data_type_);\n    }\n\n    //! Retrieve type information\n    IndexMeta::DataType data_type(void) const override {\n      return data_type_;\n    }\n\n    //! Retrieve element size in bytes\n    size_t element_size(void) const override {\n      return IndexMeta::ElementSizeof(this->data_type(), this->dimension());\n    }\n\n    //! Retrieve if it can multi-pass\n    bool multipass(void) const override {\n      return front_->multipass();\n    }\n\n    //! Create a new iterator\n    IndexHolder::Iterator::Pointer create_iterator(void) override {\n      IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n      return iter ? IndexHolder::Iterator::Pointer(\n                        new IntegerStreamingConverterHolder::Iterator(\n                            this, std::move(iter)))\n                  : IndexHolder::Iterator::Pointer();\n    }\n\n   private:\n    //! Members\n    IndexHolder::Pointer front_{};\n    IndexMeta::DataType data_type_{};\n    uint32_t dimension_{0};\n    bool enable_normalize_{false};\n    bool is_euclidean_{false};\n  };\n\n  static size_t ExtraDimension(IndexMeta::DataType type) {\n    // The extra quantized params storage size to save for each vector\n    constexpr size_t kExtraSize = 4 * sizeof(float);\n    constexpr size_t kAdditionalInt32 = sizeof(int32_t);\n    return type == IndexMeta::DataType::DT_INT8\n               ? (kExtraSize + kAdditionalInt32)\n               : (kExtraSize * 2);\n  }\n\n  //! Members\n  IndexMeta meta_{};\n  Stats stats_{};\n  IndexHolder::Pointer holder_{};\n  IndexMeta::DataType data_type_{};\n  bool enable_normalize_{false};\n  bool is_euclidean_{false};\n};\n\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(\n    Int8QuantizerConverter,\n    IntegerQuantizerConverter<ailego::EntropyInt8Quantizer>,\n    IndexMeta::DataType::DT_INT8);\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(\n    Int4QuantizerConverter,\n    IntegerQuantizerConverter<ailego::EntropyInt4Quantizer>,\n    IndexMeta::DataType::DT_INT4);\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(Int8StreamingConverter,\n                                       IntegerStreamingConverter,\n                                       IndexMeta::DataType::DT_INT8);\nINDEX_FACTORY_REGISTER_CONVERTER_ALIAS(Int4StreamingConverter,\n                                       IntegerStreamingConverter,\n                                       IndexMeta::DataType::DT_INT4);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/integer_quantizer_reformer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/algorithm/integer_quantizer.h>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/math/normalizer.h>\n#include <ailego/pattern/defer.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_factory.h>\n#include \"record_quantizer.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Integer Quantizer Reformer\n */\ntemplate <class Quantizer>\nclass IntegerQuantizerReformer : public IndexReformer {\n public:\n  using IndexReformer::transform;\n\n  //! Constructor\n  IntegerQuantizerReformer(IndexMeta::DataType dst_type)\n      : data_type_(dst_type) {}\n\n//! Get param name\n#define P_NAME(NAME)                                                 \\\n  data_type_ == IndexMeta::DataType::DT_INT8 ? INT8_QUANTIZER_##NAME \\\n                                             : INT4_QUANTIZER_##NAME\n\n  //! Initialize Reformer\n  int init(const ailego::Params &params) override {\n    float bias;\n    float scale;\n    if (!params.get(P_NAME(REFORMER_BIAS), &bias) ||\n        !params.get(P_NAME(REFORMER_SCALE), &scale)) {\n      LOG_ERROR(\"Init IntegerReformer failed, required params bias and scale\");\n      return IndexError_InvalidArgument;\n    }\n\n    quantizer_.set_bias(bias);\n    quantizer_.set_scale(scale);\n\n    auto metric = params.get_as_string(P_NAME(REFORMER_METRIC));\n    auto reciprocal = scale == 0.0 ? 1.0f : (1.0f / scale);\n    if (metric == \"SquaredEuclidean\") {\n      scale_reciprocal_ = reciprocal * reciprocal;\n    } else if (metric == \"Euclidean\") {\n      scale_reciprocal_ = reciprocal;\n    } else if (metric == \"Manhattan\") {\n      scale_reciprocal_ = reciprocal;\n    } else if (metric == \"InnerProduct\" || metric == \"MipsSquaredEuclidean\") {\n      inner_product_ = true;\n      scale_reciprocal_ = reciprocal;  // missing query part\n    } else {\n      LOG_WARN(\"Unsupported normalize the score for %s\", metric.c_str());\n      scale_reciprocal_ = 1.0f;\n    }\n    LOG_DEBUG(\"Init integer reformer, bias %f, scale %f\", bias, scale);\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  int cleanup(void) override {\n    inner_product_ = false;\n    return 0;\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = qmeta.data_type();\n\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        qmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = qmeta;\n    ometa->set_meta(data_type_, qmeta.dimension());\n    out->resize(\n        IndexMeta::ElementSizeof(ometa->data_type(), ometa->dimension()));\n    const float *vec = reinterpret_cast<const float *>(query);\n    auto ovec = reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]);\n\n    if (!inner_product_) {\n      quantizer_.encode(vec, qmeta.dimension(), ovec);\n    } else {\n      this->transform(vec, qmeta.dimension(), ovec);\n    }\n    return 0;\n  }\n\n  //! Transform queries\n  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = qmeta.data_type();\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        qmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = qmeta;\n    ometa->set_meta(data_type_, qmeta.dimension());\n    out->resize(count * IndexMeta::ElementSizeof(ometa->data_type(),\n                                                 ometa->dimension()));\n    const float *vec = reinterpret_cast<const float *>(query);\n\n    if (!inner_product_) {\n      quantizer_.encode(\n          vec, qmeta.dimension() * count,\n          reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]));\n    } else if (ometa->data_type() == IndexMeta::DataType::DT_INT8) {\n      int8_t *ovec = reinterpret_cast<int8_t *>(&(*out)[0]);\n      for (size_t i = 0; i < count; ++i) {\n        this->transform(&vec[i * qmeta.dimension()], qmeta.dimension(),\n                        &ovec[i * qmeta.dimension()]);\n      }\n    } else {\n      uint8_t *ovec = reinterpret_cast<uint8_t *>(&(*out)[0]);\n      for (size_t i = 0; i < count; ++i) {\n        this->transform(&vec[i * qmeta.dimension()], qmeta.dimension(),\n                        &ovec[i * qmeta.dimension() / 2]);\n      }\n    }\n\n    return 0;\n  }\n\n  //! Convert a record\n  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,\n              IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = rmeta.data_type();\n\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        rmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = rmeta;\n    ometa->set_meta(data_type_, rmeta.dimension());\n    out->resize(ometa->element_size());\n    const float *vec = reinterpret_cast<const float *>(record);\n    auto ovec = reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]);\n\n    quantizer_.encode(vec, rmeta.dimension(), ovec);\n\n    return 0;\n  }\n\n  //! Convert records\n  int convert(const void *records, const IndexQueryMeta &rmeta, uint32_t count,\n              std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = rmeta.data_type();\n\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        rmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = rmeta;\n    ometa->set_meta(data_type_, rmeta.dimension());\n    out->resize(count * ometa->element_size());\n    const float *vec = reinterpret_cast<const float *>(records);\n    quantizer_.encode(\n        vec, rmeta.dimension() * count,\n        reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]));\n\n    return 0;\n  }\n\n  //! Normalize results\n  int normalize(const void *query, const IndexQueryMeta &qmeta,\n                IndexDocumentList &result) const override {\n    IndexMeta::DataType ft = qmeta.data_type();\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        qmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    auto scale = scale_reciprocal_;\n    if (inner_product_) {\n      float abs_max = 0.0f;\n      const float *vec = static_cast<const float *>(query);\n      if (data_type_ == IndexMeta::DataType::DT_INT8) {\n        for (size_t i = 0; i < qmeta.dimension(); ++i) {\n          float abs = std::abs(vec[i]);\n          abs_max = std::max(abs, abs_max);\n        }\n        scale *= abs_max / 127;\n      } else {\n        float max = -std::numeric_limits<float>::max();\n        for (size_t i = 0; i < qmeta.dimension(); ++i) {\n          float abs = std::abs(vec[i]);\n          abs_max = std::max(abs_max, abs);\n          max = std::max(max, vec[i]);\n        }\n        scale *= abs_max / ((7 * abs_max > 8 * max) ? 8 : 7);\n      }\n    }\n    for (auto &it : result) {\n      *it.mutable_score() *= scale;\n    }\n\n    return 0;\n  }\n\n private:\n  //! Quantize the query to int8 in InnerProduct\n  void transform(const float *in, size_t dim, int8_t *out) const {\n    float abs_max = 0.0f;\n    for (size_t i = 0; i < dim; ++i) {\n      float abs = std::abs(in[i]);\n      abs_max = std::max(abs, abs_max);\n    }\n    float scale = 127 / abs_max;\n    for (size_t i = 0; i < dim; ++i) {\n      out[i] = static_cast<int8_t>(std::round(in[i] * scale));\n    }\n  }\n\n  //! Quantize the query to int4 in InnerProduct\n  void transform(const float *in, size_t dim, uint8_t *out) const {\n    float abs_max = 0.0f;\n    float max = -std::numeric_limits<float>::max();\n    for (size_t i = 0; i < dim; ++i) {\n      float abs = std::abs(in[i]);\n      abs_max = std::max(abs_max, abs);\n      max = std::max(max, in[i]);\n    }\n    float scale = ((7 * abs_max > 8 * max) ? 8 : 7) / abs_max;\n    for (size_t i = 0; i < dim; i += 2) {\n      auto lo = std::round(in[i] * scale);\n      auto hi = std::round(in[i + 1] * scale);\n      out[i / 2] = (static_cast_from_float_to_uint8(hi) << 4) |\n                   (static_cast_from_float_to_uint8(lo) & 0xF);\n    }\n  }\n\n private:\n  //! Members\n  Quantizer quantizer_;\n  float scale_reciprocal_{1.0};\n  bool inner_product_{false};\n  IndexMeta::DataType data_type_{};\n};\n\n\n/*! Reformer of Integer Streaming Quantizer\n */\nclass IntegerStreamingReformer : public IndexReformer {\n public:\n  //! Constructor\n  IntegerStreamingReformer(IndexMeta::DataType dst_type)\n      : data_type_(dst_type),\n        extra_dimension_(data_type_ == IndexMeta::DataType::DT_INT8 ? 20 : 32) {\n  }\n\n  //! Initialize Reformer\n  int init(const ailego::Params &params) override {\n    params.get(INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE, &enable_normalize_);\n    params.get(INTEGER_STREAMING_REFORMER_IS_EUCLIDEAN, &is_euclidean_);\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = qmeta.data_type();\n\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        qmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = qmeta;\n    ometa->set_meta(data_type_, qmeta.dimension() + extra_dimension_);\n    out->resize(ometa->element_size());\n    const float *vec = reinterpret_cast<const float *>(query);\n    std::unique_ptr<float[]> normalized;\n    if (enable_normalize_) {\n      normalized.reset(new float[qmeta.dimension()]);\n      vec = normalize(query, qmeta, normalized.get());\n    }\n\n    RecordQuantizer::quantize_record(vec, qmeta.dimension(), data_type_,\n                                     is_euclidean_, &(*out)[0]);\n\n    return 0;\n  }\n\n  //! Transform queries\n  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = qmeta.data_type();\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        qmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = qmeta;\n    ometa->set_meta(data_type_, qmeta.dimension() + extra_dimension_);\n    out->resize(count * ometa->element_size());\n    std::unique_ptr<float[]> normalized;\n    if (enable_normalize_) {\n      normalized.reset(new float[qmeta.dimension()]);\n    }\n    for (size_t i = 0; i < count; ++i) {\n      const float *vec =\n          reinterpret_cast<const float *>(query) + i * qmeta.dimension();\n      if (enable_normalize_) {\n        vec = normalize(vec, qmeta, normalized.get());\n      }\n\n      RecordQuantizer::quantize_record(vec, qmeta.dimension(), data_type_,\n                                       is_euclidean_,\n                                       &(*out)[i * ometa->element_size()]);\n    }\n\n    return 0;\n  }\n\n  //! Convert a record\n  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,\n              IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = rmeta.data_type();\n\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        rmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = rmeta;\n    ometa->set_meta(data_type_, rmeta.dimension() + extra_dimension_);\n    out->resize(ometa->element_size());\n    const float *vec = reinterpret_cast<const float *>(record);\n    std::unique_ptr<float[]> normalized;\n    if (enable_normalize_) {\n      normalized.reset(new float[rmeta.dimension()]);\n      vec = normalize(record, rmeta, normalized.get());\n    }\n\n    RecordQuantizer::quantize_record(vec, rmeta.dimension(), data_type_,\n                                     is_euclidean_, &(*out)[0]);\n\n    return 0;\n  }\n\n  //! Convert records\n  int convert(const void *records, const IndexQueryMeta &rmeta, uint32_t count,\n              std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType ft = rmeta.data_type();\n\n    if (ft != IndexMeta::DataType::DT_FP32 ||\n        rmeta.unit_size() !=\n            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {\n      return IndexError_Unsupported;\n    }\n\n    *ometa = rmeta;\n    ometa->set_meta(data_type_, rmeta.dimension() + extra_dimension_);\n    out->resize(count * ometa->element_size());\n    std::unique_ptr<float[]> normalized;\n    if (enable_normalize_) {\n      normalized.reset(new float[rmeta.dimension()]);\n    }\n    for (size_t i = 0; i < count; ++i) {\n      const float *vec =\n          reinterpret_cast<const float *>(records) + i * rmeta.dimension();\n      if (enable_normalize_) {\n        vec = normalize(vec, rmeta, normalized.get());\n      }\n\n      RecordQuantizer::quantize_record(vec, rmeta.dimension(), data_type_,\n                                       is_euclidean_,\n                                       &(*out)[i * ometa->element_size()]);\n    }\n\n    return 0;\n  }\n\n  //! Normalize results\n  int normalize(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,\n                IndexDocumentList & /*result*/) const override {\n    return 0;\n  }\n\n private:\n  //! Normalize a query to `normalized`\n  float *normalize(const void *query, const IndexQueryMeta &qmeta,\n                   float *normalized) const {\n    memcpy(normalized, query, qmeta.element_size());\n    float norm = 0.0;\n    ailego::Normalizer<float>::L2(normalized, qmeta.dimension(), &norm);\n    return normalized;\n  }\n\n  bool need_revert() const override {\n    return true;\n  }\n\n  int revert(const void *in, const IndexQueryMeta &qmeta,\n             std::string *out) const override {\n    if (enable_normalize_) {\n      LOG_ERROR(\"Unsupported revert for normalized value\");\n\n      return IndexError_Unsupported;\n    }\n\n    out->resize((qmeta.dimension() - extra_dimension_) * sizeof(float));\n    float *out_buf = reinterpret_cast<float *>(out->data());\n\n    RecordQuantizer::unquantize_record(in, qmeta.dimension() - extra_dimension_,\n                                       data_type_, out_buf);\n\n    return 0;\n  }\n\n  //! Members\n  IndexMeta::DataType data_type_{};\n  uint32_t extra_dimension_{0};\n  bool enable_normalize_{false};\n  bool is_euclidean_{false};\n};\n\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(\n    Int8QuantizerReformer,\n    IntegerQuantizerReformer<ailego::EntropyInt8Quantizer>,\n    IndexMeta::DataType::DT_INT8);\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(\n    Int4QuantizerReformer,\n    IntegerQuantizerReformer<ailego::EntropyInt4Quantizer>,\n    IndexMeta::DataType::DT_INT4);\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(Int8StreamingReformer,\n                                      IntegerStreamingReformer,\n                                      IndexMeta::DataType::DT_INT8);\nINDEX_FACTORY_REGISTER_REFORMER_ALIAS(Int4StreamingReformer,\n                                      IntegerStreamingReformer,\n                                      IndexMeta::DataType::DT_INT4);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/mips_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/norm2_matrix.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include <zvec/core/framework/index_factory.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Convert the vector By Mips RepeatedQuadraticInjection\n */\ntemplate <typename T1, typename T2,\n          typename =\n              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&\n                                      ailego::IsFloatingPoint<T2>::value>::type>\nstatic inline void ConvertRepeatedQuadraticInjection(const T1 *src, size_t dim,\n                                                     size_t m_value,\n                                                     float u_value,\n                                                     float l2_norm, T2 *dst) {\n  float squared_norm = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = src[i] * u_value / l2_norm;\n    dst[i] = val;\n    squared_norm += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    dst[i] = 0.5f - squared_norm;\n    squared_norm *= squared_norm;\n  }\n}\n\n/*! Convert the vector By Mips SphericalInjection\n */\ntemplate <typename T1, typename T2,\n          typename =\n              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&\n                                      ailego::IsFloatingPoint<T2>::value>::type>\nstatic inline void ConvertSphericalInjection(const T1 *src, size_t dim,\n                                             float u_value, float l2_norm,\n                                             T2 *dst) {\n  float squared_norm = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = src[i] * u_value / l2_norm;\n    dst[i] = val;\n    squared_norm += val * val;\n  }\n  dst[dim] = squared_norm < 1.0\n                 ? (1.0 - std::sqrt(1.0 - static_cast<double>(squared_norm)))\n                 : 1.0f;\n}\n\n/*! MIPS Holder (Float)\n */\nclass MipsConverterHolder : public IndexHolder {\n public:\n  /*! MIPS Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(const MipsConverterHolder *owner,\n             IndexHolder::Iterator::Pointer &&iter)\n        : buffer_(owner->dimension()),\n          m_value_(owner->m_value_),\n          u_value_(owner->u_value_),\n          l2_norm_(owner->l2_norm_),\n          spherical_injection_(owner->spherical_injection_),\n          front_iter_(std::move(iter)) {\n      this->transform_data();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return buffer_.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->transform_data();\n    }\n\n   private:\n    //! Transform the data\n    void transform_data(void) {\n      if (!front_iter_->is_valid()) {\n        return;\n      }\n\n      const float *src = reinterpret_cast<const float *>(front_iter_->data());\n      float *dst = buffer_.data();\n      if (!spherical_injection_) {\n        ConvertRepeatedQuadraticInjection(src, buffer_.size() - m_value_,\n                                          m_value_, u_value_, l2_norm_, dst);\n      } else {\n        ConvertSphericalInjection(src, buffer_.size() - m_value_, u_value_,\n                                  l2_norm_, dst);\n      }\n    }\n\n    std::vector<float> buffer_{};\n    uint32_t m_value_{0u};\n    float u_value_{0.0f};\n    float l2_norm_{0.0f};\n    bool spherical_injection_{false};\n    IndexHolder::Iterator::Pointer front_iter_{};\n  };\n\n  //! Constructor\n  MipsConverterHolder(IndexHolder::Pointer front, uint32_t m_val, float u_val,\n                      float l2_norm, bool spherical_injection)\n      : m_value_(m_val),\n        u_value_(u_val),\n        l2_norm_(l2_norm),\n        spherical_injection_(spherical_injection),\n        front_(std::move(front)) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return front_->dimension() + m_value_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP32,\n                                    front_->dimension() + m_value_);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n    return iter ? IndexHolder::Iterator::Pointer(\n                      new MipsConverterHolder::Iterator(this, std::move(iter)))\n                : IndexHolder::Iterator::Pointer();\n  }\n\n private:\n  //! Disable them\n  MipsConverterHolder(void) = delete;\n\n  //! Members\n  uint32_t m_value_{0u};\n  float u_value_{0.0f};\n  float l2_norm_{0.0f};\n  bool spherical_injection_{false};\n  IndexHolder::Pointer front_{};\n};\n\n/*! MIPS Holder (Forced Half Float)\n */\nclass MipsConverterForcedHalfHolder : public IndexHolder {\n public:\n  /*! MIPS Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(const MipsConverterForcedHalfHolder *owner,\n             IndexHolder::Iterator::Pointer &&iter)\n        : buffer_(owner->dimension()),\n          m_value_(owner->m_value_),\n          u_value_(owner->u_value_),\n          l2_norm_(owner->l2_norm_),\n          spherical_injection_(owner->spherical_injection_),\n          front_iter_(std::move(iter)) {\n      this->transform_record();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return buffer_.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->transform_record();\n    }\n\n   private:\n    void transform_record(void) {\n      if (!front_iter_->is_valid()) {\n        return;\n      }\n\n      const float *src = reinterpret_cast<const float *>(front_iter_->data());\n      ailego::Float16 *dst = buffer_.data();\n      if (!spherical_injection_) {\n        ConvertRepeatedQuadraticInjection(src, buffer_.size() - m_value_,\n                                          m_value_, u_value_, l2_norm_, dst);\n      } else {\n        ConvertSphericalInjection(src, buffer_.size() - m_value_, u_value_,\n                                  l2_norm_, dst);\n      }\n    }\n\n    std::vector<ailego::Float16> buffer_{};\n    uint32_t m_value_{0u};\n    float u_value_{0.0f};\n    float l2_norm_{0.0f};\n    bool spherical_injection_{false};\n    IndexHolder::Iterator::Pointer front_iter_{};\n  };\n\n  //! Constructor\n  MipsConverterForcedHalfHolder(IndexHolder::Pointer front, uint32_t m_val,\n                                float u_val, float l2_norm,\n                                bool spherical_injection)\n      : m_value_(m_val),\n        u_value_(u_val),\n        l2_norm_(l2_norm),\n        spherical_injection_(spherical_injection),\n        front_(std::move(front)) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return front_->dimension() + m_value_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP16,\n                                    front_->dimension() + m_value_);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n    return iter ? IndexHolder::Iterator::Pointer(\n                      new MipsConverterForcedHalfHolder::Iterator(\n                          this, std::move(iter)))\n                : IndexHolder::Iterator::Pointer();\n  }\n\n private:\n  //! Disable them\n  MipsConverterForcedHalfHolder(void) = delete;\n\n  //! Members\n  uint32_t m_value_{0u};\n  float u_value_{0.0f};\n  float l2_norm_{0.0f};\n  bool spherical_injection_{false};\n  IndexHolder::Pointer front_{};\n};\n\n/*! MIPS Holder (Half Float)\n */\nclass MipsConverterHalfHolder : public IndexHolder {\n public:\n  /*! MIPS Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(const MipsConverterHalfHolder *owner,\n             IndexHolder::Iterator::Pointer &&iter)\n        : buffer_(owner->dimension()),\n          m_value_(owner->m_value_),\n          u_value_(owner->u_value_),\n          l2_norm_(owner->l2_norm_),\n          spherical_injection_(owner->spherical_injection_),\n          front_iter_(std::move(iter)) {\n      this->transform_record();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return buffer_.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return front_iter_->is_valid();\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return front_iter_->key();\n    }\n\n    //! Next iterator\n    void next(void) override {\n      front_iter_->next();\n      this->transform_record();\n    }\n\n   private:\n    void transform_record(void) {\n      if (!front_iter_->is_valid()) {\n        return;\n      }\n\n      const ailego::Float16 *src =\n          reinterpret_cast<const ailego::Float16 *>(front_iter_->data());\n      ailego::Float16 *dst = buffer_.data();\n      if (!spherical_injection_) {\n        ConvertRepeatedQuadraticInjection(src, buffer_.size() - m_value_,\n                                          m_value_, u_value_, l2_norm_, dst);\n      } else {\n        ConvertSphericalInjection(src, buffer_.size() - m_value_, u_value_,\n                                  l2_norm_, dst);\n      }\n    }\n\n    std::vector<ailego::Float16> buffer_{};\n    uint32_t m_value_{0u};\n    float u_value_{0.0f};\n    float l2_norm_{0.0f};\n    bool spherical_injection_{false};\n    IndexHolder::Iterator::Pointer front_iter_{};\n  };\n\n  //! Constructor\n  MipsConverterHalfHolder(IndexHolder::Pointer front, uint32_t m_val,\n                          float u_val, float l2_norm, bool spherical_injection)\n      : m_value_(m_val),\n        u_value_(u_val),\n        l2_norm_(l2_norm),\n        spherical_injection_(spherical_injection),\n        front_(std::move(front)) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return front_->count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return front_->dimension() + m_value_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP16,\n                                    front_->dimension() + m_value_);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return front_->multipass();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    IndexHolder::Iterator::Pointer iter = front_->create_iterator();\n    return iter ? IndexHolder::Iterator::Pointer(\n                      new MipsConverterHalfHolder::Iterator(this,\n                                                            std::move(iter)))\n                : IndexHolder::Iterator::Pointer();\n  }\n\n private:\n  //! Disable them\n  MipsConverterHalfHolder(void) = delete;\n\n  //! Members\n  uint32_t m_value_{0u};\n  float u_value_{0.0f};\n  float l2_norm_{0.0f};\n  bool spherical_injection_{false};\n  IndexHolder::Pointer front_{};\n};\n\n/*! MIPS Converter\n */\nclass MipsConverter : public IndexConverter {\n public:\n  //! Destructor\n  virtual ~MipsConverter(void) {}\n\n  //! Initialize Converter\n  int init(const IndexMeta &mt, const ailego::Params &params) override {\n    IndexMeta::DataType dt = mt.data_type();\n    if (ailego_unlikely((dt != IndexMeta::DataType::DT_FP32 &&\n                         dt != IndexMeta::DataType::DT_FP16) ||\n                        mt.unit_size() != IndexMeta::UnitSizeof(dt))) {\n      LOG_ERROR(\"Unsupported type %d with unit size %u.\", dt, mt.unit_size());\n      return IndexError_Unsupported;\n    }\n\n    params.get(MIPS_CONVERTER_FORCED_HALF_FLOAT, &forced_half_float_);\n    params.get(MIPS_CONVERTER_SPHERICAL_INJECTION, &spherical_injection_);\n    params.get(MIPS_CONVERTER_M_VALUE, &m_value_);\n    params.get(MIPS_CONVERTER_U_VALUE, &u_value_);\n    params.get(MIPS_CONVERTER_L2_NORM, &l2_norm_);\n\n    if (!spherical_injection_) {\n      if (!m_value_) {\n        static const uint32_t m_values[4] = {4, 3, 6, 5};\n        m_value_ = m_values[mt.dimension() % 4];\n      }\n      if (u_value_ <= std::numeric_limits<float>::epsilon() ||\n          u_value_ >= 1.0) {\n        // Try computing a default U value\n        constexpr float kLogError = -5.0;  // log_10(distance_error)\n        u_value_ = std::pow(10, kLogError / (1 << (m_value_ + 1)));\n      }\n      if (std::pow(u_value_, (1 << m_value_)) <\n          std::numeric_limits<float>::epsilon()) {\n        LOG_WARN(\"U value %f too small, may cause loss of distance precision.\",\n                 u_value_);\n      }\n    } else {\n      if (m_value_ != 0u || u_value_ != 0.0f) {\n        LOG_WARN(\n            \"Ignore invalid M value or U value if spherical_injection enabled\");\n      }\n      // SphericalInjection requires ||x{i}|| <= 1 for the computation\n      // std::sqrt(1 - ||x{i}||^2), so let the u_value be a little less\n      // than 1.0 for its precision loss in float computation\n      u_value_ = 1.0f - 1e-2;\n      m_value_ = 1;\n    }\n\n    // Setting of MIPS Converter\n    meta_ = mt;\n    if (forced_half_float_) {\n      meta_.set_meta(IndexMeta::DataType::DT_FP16, mt.dimension() + m_value_);\n    } else {\n      meta_.set_meta(dt, mt.dimension() + m_value_);\n    }\n    meta_.set_converter(\"MipsConverter\", 0, params);\n    return 0;\n  }\n\n  //! Cleanup Converter\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Train the data\n  int train(IndexHolder::Pointer holder) override {\n    if (holder->dimension() + m_value_ != meta_.dimension()) {\n      return IndexError_Mismatch;\n    }\n\n    ailego::ElapsedTime timer;\n    auto iter = holder->create_iterator();\n    if (!iter) {\n      LOG_ERROR(\"Failed to create iterator of holder\");\n      return IndexError_Runtime;\n    }\n\n    size_t dim = holder->dimension();\n    switch (holder->data_type()) {\n      case IndexMeta::DataType::DT_FP16:\n        for (; iter->is_valid(); iter->next()) {\n          float score;\n          ailego::Norm2Matrix<ailego::Float16, 1>::Compute(\n              reinterpret_cast<const ailego::Float16 *>(iter->data()), dim,\n              &score);\n\n          if (score > l2_norm_) {\n            l2_norm_ = score;\n            if (l2_norm_ < 1.0 && l2_norm_ > u_value_) {\n              u_value_ = l2_norm_;\n            }\n          }\n          (*stats_.mutable_trained_count())++;\n        }\n        break;\n\n      case IndexMeta::DataType::DT_FP32:\n        for (; iter->is_valid(); iter->next()) {\n          float score;\n          ailego::Norm2Matrix<float, 1>::Compute(\n              reinterpret_cast<const float *>(iter->data()), dim, &score);\n\n          if (score > l2_norm_) {\n            l2_norm_ = score;\n            if (l2_norm_ < 1.0 && l2_norm_ > u_value_) {\n              u_value_ = l2_norm_;\n            }\n          }\n          (*stats_.mutable_trained_count())++;\n        }\n        break;\n\n      default:\n        return IndexError_Mismatch;\n    }\n\n    // Setting of MIPS Reformer\n    ailego::Params reformer_params;\n    reformer_params.set(MIPS_REFORMER_M_VALUE, m_value_);\n    reformer_params.set(MIPS_REFORMER_U_VALUE, u_value_);\n    reformer_params.set(MIPS_REFORMER_L2_NORM, l2_norm_);\n    reformer_params.set(MIPS_REFORMER_FORCED_HALF_FLOAT, forced_half_float_);\n    reformer_params.set(MIPS_REFORMER_NORMALIZE, true);\n    reformer_params.set(MIPS_REFORMER_SPHERICAL_INJECTION,\n                        spherical_injection_);\n    meta_.set_reformer(\"MipsReformer\", 0, reformer_params);\n    if (meta_.metric_name() == \"InnerProduct\") {\n      LOG_INFO(\"Convert IndexMeasure from InnerProduct to SquaredEuclidean\");\n      meta_.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n    }\n\n    // Setting of MIPS Converter Params\n    ailego::Params params = meta_.converter_params();\n    params.set(MIPS_CONVERTER_FORCED_HALF_FLOAT, forced_half_float_);\n    params.set(MIPS_CONVERTER_M_VALUE, m_value_);\n    params.set(MIPS_CONVERTER_U_VALUE, u_value_);\n    params.set(MIPS_CONVERTER_L2_NORM, l2_norm_);\n    params.set(MIPS_CONVERTER_SPHERICAL_INJECTION, spherical_injection_);\n    meta_.set_converter(\"MipsConverter\", 0, params);\n\n    stats_.set_trained_costtime(timer.milli_seconds());\n    return 0;\n  }\n\n  //! Transform the data\n  int transform(IndexHolder::Pointer holder) override {\n    if (holder->dimension() + m_value_ != meta_.dimension()) {\n      return IndexError_Mismatch;\n    }\n\n    switch (holder->data_type()) {\n      case IndexMeta::DataType::DT_FP16:\n        holder_ = std::make_shared<MipsConverterHalfHolder>(\n            holder, m_value_, u_value_, l2_norm_, spherical_injection_);\n        break;\n\n      case IndexMeta::DataType::DT_FP32:\n        if (forced_half_float_) {\n          holder_ = std::make_shared<MipsConverterForcedHalfHolder>(\n              holder, m_value_, u_value_, l2_norm_, spherical_injection_);\n        } else {\n          holder_ = std::make_shared<MipsConverterHolder>(\n              holder, m_value_, u_value_, l2_norm_, spherical_injection_);\n        }\n        break;\n\n      default:\n        return IndexError_Mismatch;\n    }\n    return 0;\n  }\n\n  //! Dump index into storage\n  int dump(const IndexDumper::Pointer &) override {\n    return 0;\n  }\n\n  //! Retrieve statistics\n  const Stats &stats(void) const override {\n    return stats_;\n  }\n\n  //! Retrieve a holder as result\n  IndexHolder::Pointer result(void) const override {\n    return holder_;\n  }\n\n  //! Retrieve Index Meta\n  const IndexMeta &meta(void) const override {\n    return meta_;\n  }\n\n private:\n  uint32_t m_value_{0u};\n  float u_value_{0.0f};\n  float l2_norm_{0.0f};\n  bool forced_half_float_{false};\n  bool spherical_injection_{false};\n  IndexMeta meta_{};\n  IndexHolder::Pointer holder_{};\n  Stats stats_{};\n};\n\nINDEX_FACTORY_REGISTER_CONVERTER(MipsConverter);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/mips_reformer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <ailego/math/normalizer.h>\n#include <core/quantizer/quantizer_params.h>\n#include <zvec/core/framework/index_factory.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Convert the vector By Mips RepeatedQuadraticInjection\n */\ntemplate <typename T1, typename T2,\n          typename =\n              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&\n                                      ailego::IsFloatingPoint<T2>::value>::type>\nstatic inline void ConvertRepeatedQuadraticInjection(const T1 *src, size_t dim,\n                                                     size_t m_value,\n                                                     float u_value,\n                                                     float l2_norm, T2 *dst) {\n  float squared_norm = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = src[i] * u_value / l2_norm;\n    dst[i] = val;\n    squared_norm += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    dst[i] = 0.5f - squared_norm;\n    squared_norm *= squared_norm;\n  }\n}\n\n/*! Convert the vector By Mips SphericalInjection\n */\ntemplate <typename T1, typename T2,\n          typename =\n              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&\n                                      ailego::IsFloatingPoint<T2>::value>::type>\nstatic inline void ConvertSphericalInjection(const T1 *src, size_t dim,\n                                             float u_value, float l2_norm,\n                                             T2 *dst) {\n  float squared_norm = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = src[i] * u_value / l2_norm;\n    dst[i] = val;\n    squared_norm += val * val;\n  }\n  dst[dim] = squared_norm < 1.0\n                 ? (1.0 - std::sqrt(1.0 - static_cast<double>(squared_norm)))\n                 : 1.0f;\n}\n\n/*! MIPS Reformer\n */\nclass MipsReformer : public IndexReformer {\n public:\n  //! Initialize Reformer\n  int init(const ailego::Params &params) override {\n    params.get(MIPS_REFORMER_M_VALUE, &m_value_);\n    params.get(MIPS_REFORMER_U_VALUE, &u_value_);\n    params.get(MIPS_REFORMER_L2_NORM, &l2_norm_);\n    params.get(MIPS_REFORMER_NORMALIZE, &normalize_);\n    params.get(MIPS_REFORMER_FORCED_HALF_FLOAT, &forced_half_float_);\n    params.get(MIPS_REFORMER_SPHERICAL_INJECTION, &spherical_injection_);\n    if (spherical_injection_) {\n      if (m_value_ != 1u) {\n        LOG_WARN(\"Invalid M value or U value if spherical_injection enabled\");\n      }\n      m_value_ = 1;\n    }\n    return 0;\n  }\n\n  //! Cleanup Reformer\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load index from container\n  int load(IndexStorage::Pointer) override {\n    return 0;\n  }\n\n  //! Unload index\n  int unload(void) override {\n    return 0;\n  }\n\n  //! Transform query\n  int transform(const void *query, const IndexQueryMeta &qmeta,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType dt = qmeta.data_type();\n\n    if (dt == IndexMeta::DataType::DT_FP32) {\n      if (qmeta.unit_size() != sizeof(float)) {\n        return IndexError_Unsupported;\n      }\n\n      if (forced_half_float_) {\n        out->clear();\n        out->resize((qmeta.dimension() + m_value_) * sizeof(ailego::Float16));\n\n        if (normalize_) {\n          float norm;\n          ailego::Norm2Matrix<float, 1>::Compute(\n              reinterpret_cast<const float *>(query), qmeta.dimension(), &norm);\n\n          ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),\n                                      qmeta.dimension(), norm,\n                                      reinterpret_cast<uint16_t *>(&(*out)[0]));\n        } else {\n          ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),\n                                      qmeta.dimension(),\n                                      reinterpret_cast<uint16_t *>(&(*out)[0]));\n        }\n        if (spherical_injection_) {\n          reinterpret_cast<ailego::Float16 *>(&(*out)[0])[qmeta.dimension()] =\n              1.0f;\n        }\n        *ometa = qmeta;\n        ometa->set_meta(IndexMeta::DataType::DT_FP16,\n                        qmeta.dimension() + m_value_);\n\n      } else {\n        out->assign(reinterpret_cast<const char *>(query),\n                    qmeta.element_size());\n        out->resize((qmeta.dimension() + m_value_) * sizeof(float));\n\n        if (normalize_) {\n          float norm;\n          ailego::Normalizer<float>::L2(reinterpret_cast<float *>(&(*out)[0]),\n                                        qmeta.dimension(), &norm);\n        }\n        if (spherical_injection_) {\n          reinterpret_cast<float *>(&(*out)[0])[qmeta.dimension()] = 1.0f;\n        }\n        *ometa = qmeta;\n        ometa->set_dimension(qmeta.dimension() + m_value_);\n      }\n    } else if (dt == IndexMeta::DataType::DT_FP16) {\n      if (qmeta.unit_size() != sizeof(ailego::Float16)) {\n        return IndexError_Unsupported;\n      }\n      out->assign(reinterpret_cast<const char *>(query), qmeta.element_size());\n      out->resize((qmeta.dimension() + m_value_) * sizeof(ailego::Float16));\n\n      if (normalize_) {\n        float norm;\n        ailego::Normalizer<ailego::Float16>::L2(\n            reinterpret_cast<ailego::Float16 *>(&(*out)[0]), qmeta.dimension(),\n            &norm);\n      }\n      if (spherical_injection_) {\n        reinterpret_cast<ailego::Float16 *>(&(*out)[0])[qmeta.dimension()] =\n            1.0f;\n      }\n      *ometa = qmeta;\n      ometa->set_dimension(qmeta.dimension() + m_value_);\n    } else {\n      return IndexError_Unsupported;\n    }\n    return 0;\n  }\n\n  //! Transform queries\n  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,\n                std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType dt = qmeta.data_type();\n\n    if (dt == IndexMeta::DataType::DT_FP32) {\n      if (qmeta.unit_size() != sizeof(float)) {\n        return IndexError_Unsupported;\n      }\n      out->clear();\n\n      if (forced_half_float_) {\n        for (uint32_t i = 0; i < count; ++i) {\n          size_t offset = out->size();\n          out->resize(offset +\n                      (qmeta.dimension() + m_value_) * sizeof(ailego::Float16));\n\n          const float *sub_query =\n              reinterpret_cast<const float *>(query) + i * qmeta.dimension();\n\n          if (normalize_) {\n            float norm;\n            ailego::Norm2Matrix<float, 1>::Compute(sub_query, qmeta.dimension(),\n                                                   &norm);\n            ailego::FloatHelper::ToFP16(\n                sub_query, qmeta.dimension(), norm,\n                reinterpret_cast<uint16_t *>(&(*out)[offset]));\n          } else {\n            ailego::FloatHelper::ToFP16(\n                sub_query, qmeta.dimension(),\n                reinterpret_cast<uint16_t *>(&(*out)[offset]));\n          }\n          if (spherical_injection_) {\n            reinterpret_cast<ailego::Float16 *>(\n                &(*out)[offset])[qmeta.dimension()] = 1.0f;\n          }\n        }\n        *ometa = qmeta;\n        ometa->set_meta(IndexMeta::DataType::DT_FP16,\n                        qmeta.dimension() + m_value_);\n\n      } else {\n        for (uint32_t i = 0; i < count; ++i) {\n          size_t offset = out->size();\n          out->append(\n              reinterpret_cast<const char *>(query) + i * qmeta.element_size(),\n              qmeta.element_size());\n          out->resize(offset + (qmeta.dimension() + m_value_) * sizeof(float));\n\n          if (normalize_) {\n            float norm;\n            ailego::Normalizer<float>::L2(\n                reinterpret_cast<float *>(&(*out)[offset]), qmeta.dimension(),\n                &norm);\n          }\n          if (spherical_injection_) {\n            reinterpret_cast<float *>(&(*out)[offset])[qmeta.dimension()] =\n                1.0f;\n          }\n        }\n        *ometa = qmeta;\n        ometa->set_dimension(qmeta.dimension() + m_value_);\n      }\n    } else if (dt == IndexMeta::DataType::DT_FP16) {\n      if (qmeta.unit_size() != sizeof(ailego::Float16)) {\n        return IndexError_Unsupported;\n      }\n      out->clear();\n\n      for (uint32_t i = 0; i < count; ++i) {\n        size_t offset = out->size();\n        out->append(\n            reinterpret_cast<const char *>(query) + i * qmeta.element_size(),\n            qmeta.element_size());\n        out->resize(offset +\n                    (qmeta.dimension() + m_value_) * sizeof(ailego::Float16));\n\n        if (normalize_) {\n          float norm;\n          ailego::Normalizer<ailego::Float16>::L2(\n              reinterpret_cast<ailego::Float16 *>(&(*out)[offset]),\n              qmeta.dimension(), &norm);\n        }\n        if (spherical_injection_) {\n          reinterpret_cast<ailego::Float16 *>(\n              &(*out)[offset])[qmeta.dimension()] = 1.0f;\n        }\n      }\n      *ometa = qmeta;\n      ometa->set_dimension(qmeta.dimension() + m_value_);\n\n    } else {\n      return IndexError_Unsupported;\n    }\n    return 0;\n  }\n\n  //! Convert a record\n  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,\n              IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType dt = rmeta.data_type();\n\n    if (dt == IndexMeta::DataType::DT_FP32) {\n      if (rmeta.unit_size() != sizeof(float)) {\n        return IndexError_Unsupported;\n      }\n\n      const float *vec = reinterpret_cast<const float *>(record);\n      if (forced_half_float_) {\n        *ometa = rmeta;\n        ometa->set_meta(IndexMeta::DataType::DT_FP16,\n                        rmeta.dimension() + m_value_);\n        out->resize(ometa->element_size());\n\n        ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(&(*out)[0]);\n        if (!spherical_injection_) {\n          ConvertRepeatedQuadraticInjection(vec, rmeta.dimension(), m_value_,\n                                            u_value_, l2_norm_, dst);\n        } else {\n          ConvertSphericalInjection(vec, rmeta.dimension(), u_value_, l2_norm_,\n                                    dst);\n        }\n      } else {\n        *ometa = rmeta;\n        ometa->set_dimension(rmeta.dimension() + m_value_);\n        out->resize(ometa->element_size());\n\n        float *dst = reinterpret_cast<float *>(&(*out)[0]);\n        if (!spherical_injection_) {\n          ConvertRepeatedQuadraticInjection(vec, rmeta.dimension(), m_value_,\n                                            u_value_, l2_norm_, dst);\n        } else {\n          ConvertSphericalInjection(vec, rmeta.dimension(), u_value_, l2_norm_,\n                                    dst);\n        }\n      }\n    } else if (dt == IndexMeta::DataType::DT_FP16) {\n      if (rmeta.unit_size() != sizeof(ailego::Float16)) {\n        return IndexError_Unsupported;\n      }\n      *ometa = rmeta;\n      ometa->set_dimension(rmeta.dimension() + m_value_);\n      out->resize(ometa->element_size());\n\n      const auto *vec = reinterpret_cast<const ailego::Float16 *>(record);\n      ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(&(*out)[0]);\n      if (!spherical_injection_) {\n        ConvertRepeatedQuadraticInjection(vec, rmeta.dimension(), m_value_,\n                                          u_value_, l2_norm_, dst);\n      } else {\n        ConvertSphericalInjection(vec, rmeta.dimension(), u_value_, l2_norm_,\n                                  dst);\n      }\n    } else {\n      return IndexError_Unsupported;\n    }\n    return 0;\n  }\n\n  //! Convert records\n  int convert(const void *records, const IndexQueryMeta &rmeta, uint32_t count,\n              std::string *out, IndexQueryMeta *ometa) const override {\n    IndexMeta::DataType dt = rmeta.data_type();\n\n    if (dt == IndexMeta::DataType::DT_FP32) {\n      if (rmeta.unit_size() != sizeof(float)) {\n        return IndexError_Unsupported;\n      }\n      *ometa = rmeta;\n\n      if (forced_half_float_) {\n        ometa->set_meta(IndexMeta::DataType::DT_FP16,\n                        rmeta.dimension() + m_value_);\n        out->resize(ometa->element_size() * count);\n        for (uint32_t i = 0; i < count; ++i) {\n          const float *sub_query =\n              reinterpret_cast<const float *>(records) + i * rmeta.dimension();\n          ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(\n              &(*out)[i * ometa->element_size()]);\n          if (!spherical_injection_) {\n            ConvertRepeatedQuadraticInjection(sub_query, rmeta.dimension(),\n                                              m_value_, u_value_, l2_norm_,\n                                              dst);\n          } else {\n            ConvertSphericalInjection(sub_query, rmeta.dimension(), u_value_,\n                                      l2_norm_, dst);\n          }\n        }\n      } else {\n        ometa->set_dimension(rmeta.dimension() + m_value_);\n        out->resize(ometa->element_size() * count);\n        for (uint32_t i = 0; i < count; ++i) {\n          const float *sub_query =\n              reinterpret_cast<const float *>(records) + i * rmeta.dimension();\n          float *dst =\n              reinterpret_cast<float *>(&(*out)[i * ometa->element_size()]);\n          if (!spherical_injection_) {\n            ConvertRepeatedQuadraticInjection(sub_query, rmeta.dimension(),\n                                              m_value_, u_value_, l2_norm_,\n                                              dst);\n          } else {\n            ConvertSphericalInjection(sub_query, rmeta.dimension(), u_value_,\n                                      l2_norm_, dst);\n          }\n        }\n      }\n    } else if (dt == IndexMeta::DataType::DT_FP16) {\n      if (rmeta.unit_size() != sizeof(ailego::Float16)) {\n        return IndexError_Unsupported;\n      }\n      *ometa = rmeta;\n      ometa->set_dimension(rmeta.dimension() + m_value_);\n      out->resize(ometa->element_size() * count);\n\n      for (uint32_t i = 0; i < count; ++i) {\n        const ailego::Float16 *sub_query =\n            reinterpret_cast<const ailego::Float16 *>(records) +\n            i * rmeta.dimension();\n        ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(\n            &(*out)[i * ometa->element_size()]);\n        if (!spherical_injection_) {\n          ConvertRepeatedQuadraticInjection(sub_query, rmeta.dimension(),\n                                            m_value_, u_value_, l2_norm_, dst);\n        } else {\n          ConvertSphericalInjection(sub_query, rmeta.dimension(), u_value_,\n                                    l2_norm_, dst);\n        }\n      }\n    } else {\n      return IndexError_Unsupported;\n    }\n    return 0;\n  }\n\n  //! Normalize results\n  int normalize(const void *query, const IndexQueryMeta &qmeta,\n                IndexDocumentList &result) const override {\n    IndexMeta::DataType dt = qmeta.data_type();\n    float norm = 1.0f;\n\n    if (dt == IndexMeta::DataType::DT_FP32) {\n      if (qmeta.unit_size() != sizeof(float)) {\n        return IndexError_Unsupported;\n      }\n      if (normalize_) {\n        ailego::Norm2Matrix<float, 1>::Compute(\n            reinterpret_cast<const float *>(query), qmeta.dimension(), &norm);\n      }\n    } else if (dt == IndexMeta::DataType::DT_FP16) {\n      if (qmeta.unit_size() != sizeof(ailego::Float16)) {\n        return IndexError_Unsupported;\n      }\n      if (normalize_) {\n        ailego::Norm2Matrix<ailego::Float16, 1>::Compute(\n            reinterpret_cast<const ailego::Float16 *>(query), qmeta.dimension(),\n            &norm);\n      }\n    } else {\n      return IndexError_Unsupported;\n    }\n\n    if (!spherical_injection_) {\n      const float a = 1.0f + m_value_ * 0.25f;\n      const float lamba = 0.5f * norm * l2_norm_ / u_value_;\n      for (auto &it : result) {\n        *it.mutable_score() = (a - it.score()) * lamba;\n      }\n    } else {\n      const float lambda = norm * l2_norm_ / u_value_;\n      for (auto &it : result) {\n        *it.mutable_score() = (1.0f - 0.5f * it.score()) * lambda;\n      }\n    }\n    return 0;\n  }\n\n private:\n  bool normalize_{false};\n  bool forced_half_float_{false};\n  bool spherical_injection_{false};\n  uint32_t m_value_{0u};\n  float u_value_{0.0f};\n  float l2_norm_{0.0f};\n};\n\nINDEX_FACTORY_REGISTER_REFORMER(MipsReformer);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/quantizer/quantizer_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\n//! MipsConverter\nstatic const std::string MIPS_CONVERTER_M_VALUE = \"mips.converter.m_value\";\nstatic const std::string MIPS_CONVERTER_U_VALUE = \"mips.converter.u_value\";\nstatic const std::string MIPS_CONVERTER_L2_NORM = \"mips.converter.l2_norm\";\nstatic const std::string MIPS_CONVERTER_FORCED_HALF_FLOAT =\n    \"mips.converter.forced_half_float\";\nstatic const std::string MIPS_CONVERTER_SPHERICAL_INJECTION =\n    \"mips.converter.spherical_injection\";\n\n//! MipsReverseConverter\nstatic const std::string MIPS_REVERSE_CONVERTER_M_VALUE =\n    \"mips_reverse.converter.m_value\";\nstatic const std::string MIPS_REVERSE_CONVERTER_U_VALUE =\n    \"mips_reverse.converter.u_value\";\nstatic const std::string MIPS_REVERSE_CONVERTER_L2_NORM =\n    \"mips_reverse.converter.l2_norm\";\nstatic const std::string MIPS_REVERSE_CONVERTER_FORCED_SINGLE_FLOAT =\n    \"mips_reverse.converter.forced_single_float\";\nstatic const std::string MIPS_REVERSE_CONVERTER_SPHERICAL_INJECTION =\n    \"mips_reverse.converter.spherical_injection\";\n\n//! MipsReformer\nstatic const std::string MIPS_REFORMER_M_VALUE = \"mips.reformer.m_value\";\nstatic const std::string MIPS_REFORMER_U_VALUE = \"mips.reformer.u_value\";\nstatic const std::string MIPS_REFORMER_L2_NORM = \"mips.reformer.l2_norm\";\nstatic const std::string MIPS_REFORMER_NORMALIZE = \"mips.reformer.normalize\";\nstatic const std::string MIPS_REFORMER_FORCED_HALF_FLOAT =\n    \"mips.reformer.forced_half_float\";\nstatic const std::string MIPS_REFORMER_SPHERICAL_INJECTION =\n    \"mips.reformer.spherical_injection\";\n\n//! NormalizeConverter\nstatic const std::string NORMALIZE_CONVERTER_FORCED_HALF_FLOAT =\n    \"normalize.converter.forced_half_float\";\nstatic const std::string NORMALIZE_CONVERTER_P_VALUE =\n    \"normalize.converter.p_value\";\n\n//! NormalizeReformer\nstatic const std::string NORMALIZE_REFORMER_FORCED_HALF_FLOAT =\n    \"normalize.reformer.forced_half_float\";\nstatic const std::string NORMALIZE_REFORMER_P_VALUE =\n    \"normalize.reformer.p_value\";\n\n//! Int8Converter\nstatic const std::string INT8_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =\n    \"int8_quantizer.converter.histogram_bins_count\";\nstatic const std::string INT8_QUANTIZER_CONVERTER_DISABLE_BIAS =\n    \"int8_quantizer.converter.disable_bias\";\nstatic const std::string INT8_QUANTIZER_CONVERTER_BIAS =\n    \"int8_quantizer.converter.bias\";\nstatic const std::string INT8_QUANTIZER_CONVERTER_SCALE =\n    \"int8_quantizer.converter.scale\";\n\n//! Int4Converter\nstatic const std::string INT4_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =\n    \"int4_quantizer.converter.histogram_bins_count\";\nstatic const std::string INT4_QUANTIZER_CONVERTER_DISABLE_BIAS =\n    \"int4_quantizer.converter.disable_bias\";\nstatic const std::string INT4_QUANTIZER_CONVERTER_BIAS =\n    \"int4_quantizer.converter.bias\";\nstatic const std::string INT4_QUANTIZER_CONVERTER_SCALE =\n    \"int4_quantizer.converter.scale\";\n\n//! Int8Reformer\nstatic const std::string INT8_QUANTIZER_REFORMER_BIAS =\n    \"int8_quantizer.reformer.bias\";\nstatic const std::string INT8_QUANTIZER_REFORMER_SCALE =\n    \"int8_quantizer.reformer.scale\";\nstatic const std::string INT8_QUANTIZER_REFORMER_METRIC =\n    \"int8_quantizer.reformer.metric\";\n\n//! Int4Reformer\nstatic const std::string INT4_QUANTIZER_REFORMER_BIAS =\n    \"int4_quantizer.reformer.bias\";\nstatic const std::string INT4_QUANTIZER_REFORMER_SCALE =\n    \"int4_quantizer.reformer.scale\";\nstatic const std::string INT4_QUANTIZER_REFORMER_METRIC =\n    \"int4_quantizer.reformer.metric\";\n\n//! CosineConverter\nstatic const std::string COSINE_CONVERTER_FORCED_HALF_FLOAT =\n    \"cosine.converter.forced_half_float\";\n\n//! CosineReformer\nstatic const std::string COSINE_REFORMER_FORCED_HALF_FLOAT =\n    \"cosine.reformer.forced_half_float\";\n\n//! IntegerStreamingConverter\nstatic const std::string INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE =\n    \"integer_streaming.converter.enable_normalize\";\n\n//! IntegerStreamingConverter\nstatic const std::string INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE =\n    \"integer_streaming.reformer.enable_normalize\";\nstatic const std::string INTEGER_STREAMING_REFORMER_IS_EUCLIDEAN =\n    \"integer_streaming.reformer.is_euclidean\";\n\n//! DoubleBitConverter\nstatic const std::string DOUBLE_BIT_CONVERTER_TRAIN_SAMPLE_COUNT =\n    \"double_bit.converter.train_sample_count\";\nstatic const std::string DOUBLE_BIT_CONVERTER_A_VALUE =\n    \"double_bit.converter.a_value\";\nstatic const std::string DOUBLE_BIT_CONVERTER_B_VALUE =\n    \"double_bit.converter.b_value\";\n\n//! DoubleBitReformer\nstatic const std::string DOUBLE_BIT_REFORMER_A_VALUE =\n    \"double_bit.reformer.a_value\";\nstatic const std::string DOUBLE_BIT_REFORMER_B_VALUE =\n    \"double_bit.reformer.b_value\";\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/quantizer/record_quantizer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <zvec/core/framework/index_meta.h>\n\n#pragma once\n\nnamespace zvec {\nnamespace core {\n\nclass RecordQuantizer {\n public:\n  //! Convert the float feature to int8 or int4 feature\n  static inline void quantize_record(const float *vec, size_t dim,\n                                     IndexMeta::DataType type,\n                                     bool is_euclidean, void *out) {\n    if (type == IndexMeta::DataType::DT_FP16) {\n      ailego::FloatHelper::ToFP16(vec, dim, reinterpret_cast<uint16_t *>(out));\n    } else if (type == IndexMeta::DataType::DT_INT4 ||\n               type == IndexMeta::DataType::DT_INT8) {\n      float min = std::numeric_limits<float>::max();\n      float max = std::numeric_limits<float>::lowest();\n      constexpr float epsilon = std::numeric_limits<float>::epsilon();\n      for (size_t i = 0; i < dim; ++i) {\n        min = std::min(min, vec[i]);\n        max = std::max(max, vec[i]);\n      }\n\n      float sum = 0.0f;\n      float squared_sum = 0.0f;\n      int int8_sum = 0;\n      float *extras, scale, bias;\n      if (type == IndexMeta::DataType::DT_INT8) {\n        scale = 254 / std::max(max - min, epsilon);\n        bias = -min * scale - 127;\n        for (size_t i = 0; i < dim; ++i) {\n          float v = vec[i] * scale + bias;\n          squared_sum += v * v;\n          sum += v;\n          (reinterpret_cast<int8_t *>(out))[i] =\n              static_cast<int8_t>(std::round(v));\n          int8_sum += (reinterpret_cast<int8_t *>(out))[i];\n        }\n        extras = reinterpret_cast<float *>(static_cast<int8_t *>(out) + dim);\n      } else {\n        scale = 15 / std::max(max - min, epsilon);\n        bias = -min * scale - 8;\n        for (size_t i = 0; i < dim; i += 2) {\n          float lo = vec[i] * scale + bias;\n          float hi = vec[i + 1] * scale + bias;\n          squared_sum += lo * lo;\n          sum += lo;\n          squared_sum += hi * hi;\n          sum += hi;\n          (reinterpret_cast<uint8_t *>(out))[i / 2] =\n              (static_cast_from_float_to_uint8(std::round(hi)) << 4) |\n              (static_cast_from_float_to_uint8(std::round(lo)) & 0xF);\n        }\n        extras =\n            reinterpret_cast<float *>(static_cast<uint8_t *>(out) + dim / 2);\n      }\n\n      // Save the feature quantization params for IndexMeasure\n      extras[0] = 1.0f / scale;\n      extras[1] = -bias / scale;\n      extras[2] = sum;\n\n      if (type == IndexMeta::DataType::DT_INT8) {\n        extras[3] = squared_sum;\n        reinterpret_cast<int32_t *>(extras + 4)[0] = int8_sum;\n      } else {\n        if (is_euclidean) {\n          extras[3] = squared_sum;\n        } else {\n          reinterpret_cast<int *>(extras)[3] = int8_sum;\n        }\n      }\n    }\n  }\n\n  static inline void unquantize_record(const void *vec, size_t origin_dim,\n                                       IndexMeta::DataType type, float *out) {\n    if (type == IndexMeta::DataType::DT_INT8) {\n      const float *extras = reinterpret_cast<const float *>(\n          static_cast<const int8_t *>(vec) + origin_dim);\n\n      const int8_t *buf = reinterpret_cast<const int8_t *>(vec);\n      for (size_t i = 0; i < origin_dim; ++i) {\n        out[i] = buf[i] * extras[0] + extras[1];\n      }\n\n    } else if (type == IndexMeta::DataType::DT_INT4) {\n      const float *extras = reinterpret_cast<const float *>(\n          static_cast<const uint8_t *>(vec) + origin_dim / 2);\n\n      const uint8_t *buf = reinterpret_cast<const uint8_t *>(vec);\n\n      for (size_t i = 0; i < origin_dim / 2; ++i) {\n        int8_t lo = (static_cast<int8_t>(buf[i] << 4) >> 4);\n        int8_t hi = (static_cast<int8_t>(buf[i] & 0xf0) >> 4);\n\n        out[2 * i] = lo * extras[0] + extras[1];\n        out[2 * i + 1] = hi * extras[0] + extras[1];\n      }\n    } else if (type == IndexMeta::DataType::DT_FP16) {\n      const uint16_t *in_buf = reinterpret_cast<const uint16_t *>(vec);\n      for (size_t i = 0; i < origin_dim; ++i) {\n        out[i] = ailego::FloatHelper::ToFP32(in_buf[i]);\n      }\n    }\n  }\n\n  static inline void unquantize_sparse_record(const void *sparse_value,\n                                              size_t sparse_count,\n                                              IndexMeta::DataType type,\n                                              float *sparse_value_out) {\n    if (type == IndexMeta::DataType::DT_FP16) {\n      const uint16_t *in_buf = reinterpret_cast<const uint16_t *>(sparse_value);\n      for (size_t i = 0; i < sparse_count; ++i) {\n        sparse_value_out[i] = ailego::FloatHelper::ToFP32(in_buf[i]);\n      }\n    }\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME core_utility \n    STATIC SHARED STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS zvec_ailego core_framework\n    INCS . ${PROJECT_ROOT_DIR}/src/core\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/core/utility/basic_refiner.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_refiner.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Basic Refiner\n */\nclass BasicRefiner : public IndexRefiner {\n public:\n  const uint32_t kScaleFactor = 10;\n\n public:\n  class BasicRefinerContext : public Context {\n   public:\n    //! Construct\n    BasicRefinerContext() = default;\n    ~BasicRefinerContext() = default;\n\n    int set_contexts(IndexRunner::Context::Pointer base_ctx,\n                     IndexRunner::Context::Pointer refine_ctx) override {\n      base_ctx_ = std::move(base_ctx);\n      refine_ctx_ = std::move(refine_ctx);\n\n      return 0;\n    }\n\n    //! Set topk of search result\n    void set_topk(uint32_t topk) override {\n      topk_ = topk;\n    }\n\n    uint32_t topk() const override {\n      return topk_;\n    }\n\n    //! Retrieve search result with index\n    const IndexDocumentList &result(void) const override {\n      return results_[0];\n    }\n\n    //! Retrieve search result with index\n    const IndexDocumentList &result(size_t idx) const override {\n      return results_[idx];\n    }\n\n    //! Retrieve mutable result with index\n    IndexDocumentList *mutable_result(size_t idx) override {\n      ailego_assert_with(idx < results_.size(), \"invalid idx\");\n      return &results_[idx];\n    }\n\n    void resize_results(size_t size) {\n      results_.resize(size);\n    }\n\n    IndexRunner::Context::Pointer &base_context() {\n      return base_ctx_;\n    }\n\n    IndexRunner::Context::Pointer &refine_context() {\n      return refine_ctx_;\n    }\n\n   private:\n    uint32_t topk_{0};\n    std::vector<IndexDocumentList> results_{};\n    std::vector<IndexGroupDocumentList> group_results_{};\n\n    IndexRunner::Context::Pointer base_ctx_{nullptr};\n    IndexRunner::Context::Pointer refine_ctx_{nullptr};\n  };\n\n public:\n  //! Create a context\n  Context::Pointer create_context(void) const override {\n    auto base_ctx = base_runner_->create_context();\n    auto refine_ctx = refine_runner_->create_context();\n\n    BasicRefinerContext *ctx = new (std::nothrow) BasicRefinerContext();\n\n    ctx->set_contexts(std::move(base_ctx), std::move(refine_ctx));\n\n    return Context::Pointer(ctx);\n  }\n\n  //! Initialize refiner with streamer\n  int init(IndexRunner::Pointer base_runner, IndexRunner::Pointer refine_runner,\n           const ailego::Params &params) override {\n    base_runner_ = base_runner;\n    refine_runner_ = refine_runner;\n\n    params_ = params;\n\n    return 0;\n  }\n\n  //! Cleanup\n  int cleanup() override {\n    return 0;\n  }\n\n  //! Add a vector into index\n  virtual int add_impl(uint64_t key, const void *base_query,\n                       const IndexQueryMeta &base_qmeta,\n                       const void *refine_query,\n                       const IndexQueryMeta &refine_qmeta,\n                       Context::Pointer &context) override {\n    BasicRefinerContext *ctx =\n        dynamic_cast<BasicRefinerContext *>(context.get());\n\n    int ret = base_runner_->add_impl(key, base_query, base_qmeta,\n                                     ctx->base_context());\n    if (ret != 0) {\n      LOG_ERROR(\"Error in adding vector to base index\");\n\n      return ret;\n    }\n\n    ret = refine_runner_->add_impl(key, refine_query, refine_qmeta,\n                                   ctx->refine_context());\n    if (ret != 0) {\n      LOG_ERROR(\"Error in adding vector to refine index\");\n\n      return ret;\n    }\n\n    return 0;\n  }\n\n  //! Similarity search\n  virtual int search_impl(const void *base_query,\n                          const IndexQueryMeta &base_qmeta,\n                          const void *refine_query,\n                          const IndexQueryMeta &refine_qmeta, uint32_t count,\n                          Context::Pointer &context) const override {\n    BasicRefinerContext *ctx =\n        dynamic_cast<BasicRefinerContext *>(context.get());\n\n    uint32_t topk = ctx->topk();\n\n    ctx->resize_results(count);\n\n    int ret;\n    for (size_t q = 0; q < count; ++q) {\n      auto &base_ctx = ctx->base_context();\n      auto &refine_ctx = ctx->refine_context();\n\n      base_ctx->set_topk(topk * scale_factor_);\n      ret = base_runner_->search_impl(base_query, base_qmeta, base_ctx);\n      if (ret != 0) {\n        LOG_ERROR(\"Error in searching vector from base index\");\n\n        return ret;\n      }\n\n      auto base_result = base_ctx->result();\n\n      std::vector<uint64_t> keys;\n      for (size_t i = 0; i < base_result.size(); ++i) {\n        keys.push_back(base_result[i].key());\n      }\n\n      std::vector<std::vector<uint64_t>> keys_array;\n      keys_array.push_back(std::move(keys));\n\n      refine_ctx->set_topk(topk);\n      ret = refine_runner_->search_bf_by_p_keys_impl(refine_query, keys_array,\n                                                     refine_qmeta, refine_ctx);\n      if (ret != 0) {\n        LOG_ERROR(\"Error in searching vector from refine index\");\n\n        return ret;\n      }\n\n      auto refine_result = refine_ctx->result();\n      *ctx->mutable_result(q) = refine_result;\n\n      base_query =\n          static_cast<const char *>(base_query) + base_qmeta.element_size();\n      refine_query =\n          static_cast<const char *>(refine_query) + refine_qmeta.element_size();\n    }\n\n    return 0;\n  }\n\n  //! Similarity search\n  virtual int search_impl(const void *base_query,\n                          const IndexQueryMeta &base_qmeta,\n                          const void *refine_query,\n                          const IndexQueryMeta &refine_qmeta,\n                          Context::Pointer &context) const override {\n    return search_impl(base_query, base_qmeta, refine_query, refine_qmeta, 1,\n                       context);\n  }\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *base_query,\n                             const IndexQueryMeta &base_qmeta,\n                             const void *refine_query,\n                             const IndexQueryMeta &refine_qmeta, uint32_t count,\n                             Context::Pointer &context) const override {\n    BasicRefinerContext *ctx =\n        dynamic_cast<BasicRefinerContext *>(context.get());\n\n    for (size_t q = 0; q < count; ++q) {\n      int ret;\n\n      auto &base_ctx = ctx->base_context();\n      auto &refine_ctx = ctx->refine_context();\n\n      ret = base_runner_->search_impl(base_query, base_qmeta, base_ctx);\n      if (ret != 0) {\n        LOG_ERROR(\"Error in searching vector from base index\");\n\n        return ret;\n      }\n\n      auto results = base_ctx->result();\n      std::vector<std::vector<uint64_t>> keys;\n\n      ret = refine_runner_->search_bf_by_p_keys_impl(refine_query, keys,\n                                                     refine_qmeta, refine_ctx);\n      if (ret != 0) {\n        LOG_ERROR(\"Error in searching vector from refine index\");\n\n        return ret;\n      }\n      auto refine_result = refine_ctx->result();\n      *ctx->mutable_result(q) = refine_result;\n\n      base_query =\n          static_cast<const char *>(base_query) + base_qmeta.element_size();\n      refine_query =\n          static_cast<const char *>(refine_query) + refine_qmeta.element_size();\n    }\n\n    return 0;\n  }\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *base_query,\n                             const IndexQueryMeta &base_qmeta,\n                             const void *refine_query,\n                             const IndexQueryMeta &refine_qmeta,\n                             Context::Pointer &context) const override {\n    return search_bf_impl(base_query, base_qmeta, refine_query, refine_qmeta, 1,\n                          context);\n  }\n\n private:\n  uint32_t scale_factor_{kScaleFactor};\n  ailego::Params params_;\n\n  IndexRunner::Pointer base_runner_{nullptr};\n  IndexRunner::Pointer refine_runner_{nullptr};\n};\n\nINDEX_FACTORY_REGISTER_REFINER(BasicRefiner);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/buffer_storage.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <algorithm>\n#include <mutex>\n#include <zvec/ailego/buffer/buffer_pool.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_mapping.h>\n#include <zvec/core/framework/index_version.h>\n#include \"utility_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! MMap File Storage\n */\nclass BufferStorage : public IndexStorage {\n public:\n  /*! Index Storage Segment\n   */\n  class WrappedSegment : public IndexStorage::Segment,\n                         public std::enable_shared_from_this<Segment> {\n   public:\n    //! Index Storage Pointer\n    typedef std::shared_ptr<Segment> Pointer;\n\n    //! Constructor\n    WrappedSegment(BufferStorage *owner, IndexMapping::Segment *segment,\n                   uint64_t segment_header_start_offset,\n                   IndexFormat::MetaHeader *segment_header, size_t segment_id)\n        : segment_(segment),\n          owner_(owner),\n          segment_id_(segment_id),\n          capacity_(static_cast<size_t>(segment->meta()->data_size +\n                                        segment->meta()->padding_size)),\n          segment_header_start_offset_(segment_header_start_offset),\n          segment_header_(segment_header) {}\n    //! Destructor\n    virtual ~WrappedSegment(void) {}\n\n    //! Retrieve size of data\n    size_t data_size(void) const override {\n      return static_cast<size_t>(segment_->meta()->data_size);\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const override {\n      return segment_->meta()->data_crc;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const override {\n      return static_cast<size_t>(segment_->meta()->padding_size);\n    }\n\n    //! Retrieve capacity of segment\n    size_t capacity(void) const override {\n      return capacity_;\n    }\n\n    //! Fetch data from segment (with own buffer)\n    size_t fetch(size_t offset, void *buf, size_t len) const override {\n      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {\n        auto meta = segment_->meta();\n        if (offset > meta->data_size) {\n          offset = meta->data_size;\n        }\n        len = meta->data_size - offset;\n      }\n      size_t buffer_offset = segment_header_start_offset_ +\n                             segment_header_->content_offset +\n                             segment_->meta()->data_index;\n      auto *raw = owner_->get_buffer(buffer_offset, capacity_, segment_id_);\n      if (!raw) {\n        return 0;\n      }\n      auto *data = raw + offset;\n      memmove(buf, data, len);\n      return len;\n    }\n\n    //! Read data from segment\n    size_t read(size_t offset, const void **data, size_t len) override {\n      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {\n        auto meta = segment_->meta();\n        if (offset > meta->data_size) {\n          offset = meta->data_size;\n        }\n        len = meta->data_size - offset;\n      }\n      size_t buffer_offset = segment_header_start_offset_ +\n                             segment_header_->content_offset +\n                             segment_->meta()->data_index;\n      auto *raw = owner_->get_buffer(buffer_offset, capacity_, segment_id_);\n      if (!raw) {\n        return 0;\n      }\n      *data = raw + offset;\n      return len;\n    }\n\n    size_t read(size_t offset, MemoryBlock &data, size_t len) override {\n      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {\n        auto meta = segment_->meta();\n        if (offset > meta->data_size) {\n          offset = meta->data_size;\n        }\n        len = meta->data_size - offset;\n      }\n      size_t buffer_offset = segment_header_start_offset_ +\n                             segment_header_->content_offset +\n                             segment_->meta()->data_index;\n      auto *raw = owner_->get_buffer(buffer_offset, capacity_, segment_id_);\n      if (!raw) {\n        return 0;\n      }\n\n      data.reset(owner_->buffer_pool_handle_.get(), segment_id_, raw + offset);\n      if (data.data()) {\n        return len;\n      } else {\n        LOG_ERROR(\"read error.\");\n        return -1;\n      }\n    }\n\n    //! Write data into the storage with offset\n    size_t write(size_t /*offset*/, const void * /*data*/,\n                 size_t len) override {\n      return len;\n    }\n\n    //! Resize size of data\n    size_t resize(size_t /*size*/) override {\n      return 0;\n    }\n\n    //! Update crc of data\n    void update_data_crc(uint32_t /*crc*/) override {}\n\n    //! Clone the segment\n    IndexStorage::Segment::Pointer clone(void) override {\n      return shared_from_this();\n    }\n\n   protected:\n    friend BufferStorage;\n    IndexMapping::Segment *segment_{};\n\n   private:\n    BufferStorage *owner_{nullptr};\n    size_t segment_id_{};\n    size_t capacity_{};\n    uint64_t segment_header_start_offset_;\n    IndexFormat::MetaHeader *segment_header_;\n  };\n\n  //! Destructor\n  virtual ~BufferStorage(void) {\n    this->cleanup();\n  }\n\n  //! Initialize storage\n  int init(const ailego::Params &params) override {\n    params.get(BUFFER_STORAGE_MEMORY_SIZE, &buffer_size_);\n    LOG_INFO(\"buffer size: %lu\", buffer_size_);\n    return 0;\n  }\n\n  //! Cleanup storage\n  int cleanup(void) override {\n    this->close_index();\n    return 0;\n  }\n\n  //! Open storage\n  int open(const std::string &path, bool /*create*/) override {\n    file_name_ = path;\n    buffer_pool_ = std::make_shared<ailego::VecBufferPool>(path);\n    buffer_pool_handle_ = std::make_shared<ailego::VecBufferPoolHandle>(\n        buffer_pool_->get_handle());\n    int ret = ParseToMapping();\n    if (ret != 0) {\n      return ret;\n    }\n    ret = buffer_pool_->init(buffer_size_, max_segment_size_, segments_.size());\n    // for (auto iter = segments_.begin(); iter != segments_.end(); iter++) {\n    //   auto seg = this->get(iter->first, 0);\n    //   MemoryBlock block;\n    //   int len = seg->read(0, block, 1);\n    //   LOG_ERROR(\"segment %s: %d\", iter->first.c_str(), len);\n    // }\n    if (ret != 0) {\n      return ret;\n    }\n    return 0;\n  }\n\n  char *get_buffer(size_t offset, size_t length, size_t block_id) {\n    return buffer_pool_handle_->get_block(offset, length, block_id);\n  }\n\n  int get_meta(size_t offset, size_t length, char *out) {\n    return buffer_pool_handle_->get_meta(offset, length, out);\n  }\n\n  int ParseHeader(size_t offset) {\n    std::unique_ptr<char[]> buffer(new char[sizeof(header_)]);\n    if (get_meta(offset, sizeof(header_), buffer.get()) != 0) {\n      LOG_ERROR(\"Get segment header failed.\");\n      return IndexError_Runtime;\n    }\n    uint8_t *header_ptr = reinterpret_cast<uint8_t *>(buffer.get());\n    memcpy(&header_, header_ptr, sizeof(header_));\n    if (header_.meta_header_size != sizeof(IndexFormat::MetaHeader)) {\n      LOG_ERROR(\"Header meta size is invalid.\");\n      return IndexError_InvalidLength;\n    }\n    if (ailego::Crc32c::Hash(&header_, sizeof(header_), header_.header_crc) !=\n        header_.header_crc) {\n      LOG_ERROR(\"Header meta checksum is invalid.\");\n      return IndexError_InvalidChecksum;\n    }\n    return 0;\n  }\n\n  int ParseFooter(size_t offset) {\n    std::unique_ptr<char[]> buffer(new char[sizeof(footer_)]);\n    if (get_meta(offset, sizeof(footer_), buffer.get()) != 0) {\n      LOG_ERROR(\"Get segment footer failed.\");\n      return IndexError_Runtime;\n    }\n    uint8_t *footer_ptr = reinterpret_cast<uint8_t *>(buffer.get());\n    memcpy(&footer_, footer_ptr, sizeof(footer_));\n    if (offset < (size_t)footer_.segments_meta_size) {\n      LOG_ERROR(\"Footer meta size is invalid.\");\n      return IndexError_InvalidLength;\n    }\n    if (ailego::Crc32c::Hash(&footer_, sizeof(footer_), footer_.footer_crc) !=\n        footer_.footer_crc) {\n      LOG_ERROR(\"Footer meta checksum is invalid.\");\n      return IndexError_InvalidChecksum;\n    }\n    return 0;\n  }\n\n  int ParseSegment(size_t offset) {\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    std::unique_ptr<char[]> segment_buffer =\n        std::make_unique<char[]>(footer_.segments_meta_size);\n    if (get_meta(offset, footer_.segments_meta_size, segment_buffer.get()) !=\n        0) {\n      LOG_ERROR(\"Get segment meta failed.\");\n      return IndexError_Runtime;\n    }\n    if (ailego::Crc32c::Hash(segment_buffer.get(), footer_.segments_meta_size,\n                             0u) != footer_.segments_meta_crc) {\n      LOG_ERROR(\"Index segments meta checksum is invalid.\");\n      return IndexError_InvalidChecksum;\n    }\n    IndexFormat::SegmentMeta *segment_start =\n        reinterpret_cast<IndexFormat::SegmentMeta *>(segment_buffer.get());\n    uint32_t segment_ids_offset = footer_.segments_meta_size;\n    for (IndexFormat::SegmentMeta *iter = segment_start,\n                                  *end = segment_start + footer_.segment_count;\n         iter != end; ++iter) {\n      if (iter->segment_id_offset > footer_.segments_meta_size) {\n        return IndexError_InvalidValue;\n      }\n      if (iter->data_index > footer_.content_size) {\n        return IndexError_InvalidValue;\n      }\n      if (iter->data_index + iter->data_size > footer_.content_size) {\n        return IndexError_InvalidLength;\n      }\n\n      if (iter->segment_id_offset < segment_ids_offset) {\n        segment_ids_offset = iter->segment_id_offset;\n      }\n      id_hash_.emplace(\n          std::string(reinterpret_cast<const char *>(segment_start) +\n                      iter->segment_id_offset),\n          segments_.size());\n      segments_.emplace(\n          std::string(reinterpret_cast<const char *>(segment_start) +\n                      iter->segment_id_offset),\n          IndexMapping::SegmentInfo{IndexMapping::Segment{iter},\n                                    current_header_start_offset_, &header_});\n      max_segment_size_ =\n          std::max(max_segment_size_, iter->data_size + iter->padding_size);\n      if (sizeof(IndexFormat::SegmentMeta) * footer_.segment_count >\n          footer_.segments_meta_size) {\n        return IndexError_InvalidLength;\n      }\n    }\n    buffer_pool_buffers_.push_back(std::move(segment_buffer));\n    return 0;\n  }\n\n  int ParseToMapping() {\n    while (true) {\n      int ret;\n      ret = ParseHeader(current_header_start_offset_);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to parse header, errno %d, %s\", ret,\n                  IndexError::What(ret));\n        return ret;\n      }\n\n      switch (header_.version) {\n        case IndexFormat::FORMAT_VERSION:\n          break;\n        default:\n          LOG_ERROR(\"Unsupported index version: %u\", header_.version);\n          return IndexError_Unsupported;\n      }\n\n      // Unpack footer\n      if (header_.meta_footer_size != sizeof(IndexFormat::MetaFooter)) {\n        return IndexError_InvalidLength;\n      }\n      if ((int32_t)header_.meta_footer_offset < 0) {\n        return IndexError_Unsupported;\n      }\n      uint64_t footer_offset =\n          header_.meta_footer_offset + current_header_start_offset_;\n      ret = ParseFooter(footer_offset);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to parse footer, errno %d, %s\", ret,\n                  IndexError::What(ret));\n        return ret;\n      }\n\n      // Unpack segment table\n      if (sizeof(IndexFormat::SegmentMeta) * footer_.segment_count >\n          footer_.segments_meta_size) {\n        return IndexError_InvalidLength;\n      }\n      const uint64_t segment_start_offset =\n          footer_offset - footer_.segments_meta_size;\n      ret = ParseSegment(segment_start_offset);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to parse segment, errno %d, %s\", ret,\n                  IndexError::What(ret));\n        return ret;\n      }\n\n      if (footer_.next_meta_header_offset == 0) {\n        break;\n      }\n      current_header_start_offset_ = footer_.next_meta_header_offset;\n    }\n    return 0;\n  }\n\n  //! Flush storage\n  int flush(void) override {\n    return this->flush_index();\n  }\n\n  //! Close storage\n  int close(void) override {\n    this->close_index();\n    return 0;\n  }\n\n  //! Append a segment into storage\n  int append(const std::string &id, size_t size) override {\n    return this->append_segment(id, size);\n  }\n\n  //! Refresh meta information (checksum, update time, etc.)\n  void refresh(uint64_t chkp) override {\n    this->refresh_index(chkp);\n  }\n\n  //! Retrieve check point of storage\n  uint64_t check_point(void) const override {\n    return footer_.check_point;\n  }\n\n  //! Retrieve a segment by id\n  IndexStorage::Segment::Pointer get(const std::string &id, int) override {\n    auto segment_info = this->get_segment_info(id);\n    if (!segment_info) {\n      return WrappedSegment::Pointer{};\n    }\n    return std::make_shared<WrappedSegment>(\n        this, &segment_info->segment, segment_info->segment_header_start_offset,\n        segment_info->segment_header, id_hash_[id]);\n  }\n\n  //! Test if it a segment exists\n  bool has(const std::string &id) const override {\n    return this->has_segment(id);\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return header_.magic;\n  }\n\n protected:\n  //! Initialize index version segment\n  int init_version_segment(void) {\n    size_t data_size = std::strlen(IndexVersion::Details());\n    int error_code =\n        this->append_segment(INDEX_VERSION_SEGMENT_NAME, data_size);\n    if (error_code != 0) {\n      return error_code;\n    }\n\n    auto segment = &get_segment_info(INDEX_VERSION_SEGMENT_NAME)->segment;\n    if (!segment) {\n      return IndexError_MMapFile;\n    }\n    auto meta = segment->meta();\n    size_t capacity = static_cast<size_t>(meta->padding_size + meta->data_size);\n    memcpy(segment->data(), IndexVersion::Details(), data_size);\n    segment->set_dirty();\n    meta->data_crc = ailego::Crc32c::Hash(segment->data(), data_size, 0);\n    meta->data_size = data_size;\n    meta->padding_size = capacity - data_size;\n    return 0;\n  }\n\n  //! Initialize index file\n  int init_index(const std::string & /*path*/) {\n    // Add index version\n    int error_code = this->init_version_segment();\n    if (error_code != 0) {\n      return error_code;\n    }\n\n    // Refresh mapping\n    this->refresh_index(0);\n    return 0;\n  }\n\n  //! Set the index file as dirty\n  void set_as_dirty(void) {\n    index_dirty_ = true;\n  }\n\n  //! Refresh meta information (checksum, update time, etc.)\n  void refresh_index(uint64_t /*chkp*/) {}\n\n  //! Flush index storage\n  int flush_index(void) {\n    return 0;\n  }\n\n  //! Close index storage\n  void close_index(void) {\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    file_name_.clear();\n    id_hash_.clear();\n    segments_.clear();\n    memset(&header_, 0, sizeof(header_));\n    memset(&footer_, 0, sizeof(footer_));\n    buffer_pool_handle_.reset();\n    buffer_pool_.reset();\n    max_segment_size_ = 0;\n    buffer_pool_buffers_.clear();\n  }\n\n  //! Append a segment into storage\n  int append_segment(const std::string & /*id*/, size_t /*size*/) {\n    return 0;\n  }\n\n  //! Test if a segment exists\n  bool has_segment(const std::string &id) const {\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    return (segments_.find(id) != segments_.end());\n  }\n\n  //! Get a segment from storage\n  IndexMapping::SegmentInfo *get_segment_info(const std::string &id) {\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    auto iter = segments_.find(id);\n    if (iter == segments_.end()) {\n      return nullptr;\n    }\n    return &iter->second;\n  }\n\n private:\n  bool index_dirty_{false};\n  mutable std::mutex mapping_mutex_{};\n\n  // buffer manager\n  std::string file_name_;\n  IndexFormat::MetaHeader header_{};\n  IndexFormat::MetaFooter footer_{};\n  std::unordered_map<std::string, IndexMapping::SegmentInfo> segments_{};\n  std::unordered_map<std::string, size_t> id_hash_{};\n  uint64_t max_segment_size_{0};\n  std::vector<std::unique_ptr<char[]>> buffer_pool_buffers_{};\n\n  ailego::VecBufferPool::Pointer buffer_pool_{nullptr};\n  ailego::VecBufferPoolHandle::Pointer buffer_pool_handle_{nullptr};\n  uint64_t current_header_start_offset_{0u};\n  uint64_t buffer_size_{2lu * 1024 * 1024 * 1024};  // 2G\n};\n\nINDEX_FACTORY_REGISTER_STORAGE(BufferStorage);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/file_dumper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cerrno>\n#include <zvec/ailego/io/file.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_format.h>\n#include <zvec/core/framework/index_packer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! File Dumper\n */\nstruct FileDumper : public IndexDumper {\n public:\n  //! Constructor\n  FileDumper(void) {}\n\n  //! Destructor\n  virtual ~FileDumper(void) {\n    this->cleanup();\n  }\n\n  //! Initialize dumper\n  int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup dumper\n  int cleanup(void) override {\n    if (!this->close_index()) {\n      return IndexError_PackIndex;\n    }\n    return 0;\n  }\n\n  //! Create a file for dumping\n  int create(const std::string &path) override {\n    size_t last_slash = path.rfind('/');\n    if (last_slash != std::string::npos) {\n      ailego::File::MakePath(path.substr(0, last_slash));\n    }\n\n    if (!file_.create(path.c_str(), sizeof(IndexFormat::MetaHeader))) {\n      LOG_ERROR(\"Failed to create file %s, errno %d, %s\", path.c_str(), errno,\n                std::strerror(errno));\n      return IndexError_CreateFile;\n    }\n\n    auto write_data = [this](const void *buf, size_t size) {\n      return this->file_.write(buf, size);\n    };\n    if (!packer_.setup(write_data)) {\n      LOG_ERROR(\"Failed to setup index package, errno %d, %s\", errno,\n                std::strerror(errno));\n      return IndexError_WriteData;\n    }\n    return 0;\n  }\n\n  //! Close file\n  int close(void) override {\n    if (!this->close_index()) {\n      return IndexError_PackIndex;\n    }\n    return 0;\n  }\n\n  //! Append a segment meta into table\n  int append(const std::string &id, size_t data_size, size_t padding_size,\n             uint32_t crc) override {\n    stab_.emplace_back(id, data_size, padding_size, crc);\n    return 0;\n  }\n\n  //! Write data to the storage\n  size_t write(const void *data, size_t len) override {\n    return packer_.pack(\n        [this](const void *buf, size_t size) {\n          return this->file_.write(buf, size);\n        },\n        data, len);\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return packer_.magic();\n  }\n\n protected:\n  //! Close index file\n  bool close_index(void) {\n    if (file_.is_valid()) {\n      auto write_data = [this](const void *buf, size_t size) {\n        return this->file_.write(buf, size);\n      };\n\n      if (!packer_.finish(write_data, stab_)) {\n        LOG_ERROR(\"Failed to finish packing index package\");\n        return false;\n      }\n      stab_.clear();\n      file_.close();\n      packer_.reset();\n    }\n    return true;\n  }\n\n private:\n  std::vector<IndexPacker::SegmentMeta> stab_{};\n  ailego::File file_{};\n  IndexPacker packer_{};\n};\n\nINDEX_FACTORY_REGISTER_DUMPER(FileDumper);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/file_read_storage.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cerrno>\n#include <ailego/utility/memory_helper.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_unpacker.h>\n#include \"utility_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! File Storage\n */\nclass FileReadStorage : public IndexStorage {\n public:\n  /*! File Storage Segment\n   */\n  class Segment : public IndexStorage::Segment {\n   public:\n    //! Index Storage Pointer\n    typedef std::shared_ptr<Segment> Pointer;\n\n    //! Constructor\n    Segment(const FileReadStorage &container,\n            const IndexUnpacker::SegmentMeta &segment,\n            const std::shared_ptr<ailego::File> &file_ptr, size_t offset)\n        : data_offset_(offset + segment.data_offset()),\n          data_size_(segment.data_size()),\n          padding_size_(segment.padding_size()),\n          region_size_(segment.data_size() + segment.padding_size()),\n          data_crc_(segment.data_crc()),\n          enable_direct_io_(container.enable_direct_io_),\n          alone_file_handle_(container.alone_file_handle_),\n          file_ptr_(file_ptr) {\n      if (alone_file_handle_) {\n        file_path_ = container.file_path_;\n      }\n    }\n\n    //! Constructor\n    Segment(const Segment &rhs, const std::shared_ptr<ailego::File> &file_ptr)\n        : data_offset_(rhs.data_offset_),\n          data_size_(rhs.data_size_),\n          padding_size_(rhs.padding_size_),\n          region_size_(rhs.region_size_),\n          data_crc_(rhs.data_crc_),\n          file_ptr_(file_ptr),\n          file_path_(rhs.file_path_) {}\n\n    //! Destructor\n    virtual ~Segment(void) {}\n\n    //! Retrieve size of data\n    size_t data_size(void) const override {\n      return data_size_;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const override {\n      return padding_size_;\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const override {\n      return data_crc_;\n    }\n\n    size_t capacity(void) const override {\n      return region_size_;\n    }\n\n    //! Fetch data from segment (with own buffer)\n    size_t fetch(size_t offset, void *buf, size_t len) const override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      return file_ptr_->read(data_offset_ + offset, buf, len);\n    }\n\n    //! Read data from segment\n    size_t read(size_t offset, const void **data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      buffer_.reserve(len);\n      *data = buffer_.data();\n      return file_ptr_->read(data_offset_ + offset, (void *)*data, len);\n    }\n\n    size_t read(size_t offset, MemoryBlock &data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      buffer_.reserve(len);\n      data.reset(buffer_.data());\n      return file_ptr_->read(data_offset_ + offset, (void *)data.data(), len);\n    }\n\n    //! Read data from segment\n    bool read(SegmentData *iovec, size_t count) override {\n      size_t total = 0u;\n      for (auto *it = iovec, *end = iovec + count; it != end; ++it) {\n        ailego_false_if_false(it->offset + it->length <= region_size_);\n        total += it->length;\n      }\n      ailego_false_if_false(total != 0);\n\n      buffer_.reserve(total);\n      uint8_t *buf = buffer_.data();\n      for (auto *it = iovec, *end = iovec + count; it != end; ++it) {\n        ailego_false_if_false(file_ptr_->read(data_offset_ + it->offset, buf,\n                                              it->length) == it->length);\n        it->data = buf;\n        buf += it->length;\n      }\n      return true;\n    }\n\n    size_t write(size_t, const void *, size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    size_t resize(size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    void update_data_crc(uint32_t) override {\n      return;\n    }\n\n    //! Clone the segment\n    IndexStorage::Segment::Pointer clone(void) override {\n      return this->clone_segment<FileReadStorage>();\n    }\n\n   protected:\n    //! Clone the segment\n    template <typename T>\n    inline IndexStorage::Segment::Pointer clone_segment(void) {\n      auto file_ptr = alone_file_handle_ ? FileReadStorage::OpenFile(\n                                               file_path_, enable_direct_io_)\n                                         : file_ptr_;\n      if (file_ptr) {\n        return std::make_shared<typename T::Segment>(\n            *(static_cast<typename T::Segment *>(this)), file_ptr);\n      }\n      return IndexStorage::Segment::Pointer();\n    }\n\n   protected:\n    size_t data_offset_{0u};\n    size_t data_size_{0u};\n    size_t padding_size_{0u};\n    size_t region_size_{0u};\n    uint32_t data_crc_{0u};\n    bool enable_direct_io_{false};\n    bool alone_file_handle_{false};\n    std::vector<uint8_t> buffer_{};\n    std::shared_ptr<ailego::File> file_ptr_{};\n    std::string file_path_{};\n  };\n\n  /*! MMapFile Storage Segment\n   */\n  class MMapSegment : public Segment,\n                      public std::enable_shared_from_this<Segment> {\n   public:\n    //! Constructor\n    MMapSegment(const FileReadStorage &container,\n                const IndexUnpacker::SegmentMeta &segment,\n                const std::shared_ptr<ailego::File> &file_ptr, size_t offset,\n                const void *data, std::function<void()> &&cb)\n        : Segment(container, segment, file_ptr, offset),\n          data_(static_cast<const char *>(data)),\n          cleanup_(std::move(cb)) {\n      ailego_assert_with(data_, \"Null Pointer\");\n    }\n\n    virtual ~MMapSegment(void) {\n      cleanup_();\n    }\n\n    //! Fetch data from segment (with own buffer)\n    size_t fetch(size_t offset, void *buf, size_t len) const override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      memcpy(buf, data_ + offset, len);\n      return len;\n    }\n\n    //! Read data from segment\n    size_t read(size_t offset, const void **data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      *data = data_ + offset;\n      return len;\n    }\n\n    size_t read(size_t offset, MemoryBlock &data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      data.reset((void *)(data_ + offset));\n      return len;\n    }\n\n    //! Read data from segment\n    bool read(SegmentData *iovec, size_t count) override {\n      for (auto *it = iovec, *end = iovec + count; it != end; ++it) {\n        ailego_false_if_false(it->offset + it->length <= region_size_);\n        it->data = data_ + it->offset;\n      }\n      return true;\n    }\n\n    size_t write(size_t, const void *, size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    size_t resize(size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    void update_data_crc(uint32_t) override {\n      return;\n    }\n\n    //! Clone the segment\n    IndexStorage::Segment::Pointer clone(void) override {\n      return shared_from_this();\n    }\n\n   private:\n    const char *data_{nullptr};\n    std::function<void()> cleanup_{nullptr};\n  };\n\n  //! Destructor\n  virtual ~FileReadStorage(void) {}\n\n  //! Initialize container\n  int init(const ailego::Params &params) override {\n    params.get(FILE_READ_STORAGE_CHECKSUM_VALIDATION, &checksum_validation_);\n    params.get(FILE_READ_STORAGE_ENABLE_DIRECT_IO, &enable_direct_io_);\n    params.get(FILE_READ_STORAGE_ALONE_FILE_HANDLE, &alone_file_handle_);\n    params.get(FILE_READ_STORAGE_MEMORY_LOCKED, &memory_locked_);\n    params.get(FILE_READ_STORAGE_MEMORY_WARMUP, &memory_warmup_);\n    params.get(FILE_READ_STORAGE_MEMORY_SHARED, &memory_shared_);\n    params.get(FILE_READ_STORAGE_HEADER_OFFSET, &header_offset_);\n    params.get(FILE_READ_STORAGE_FOOTER_OFFSET, &footer_offset_);\n    return 0;\n  }\n\n  int flush(void) override {\n    return IndexError_NotImplemented;\n  }\n\n  int append(const std::string & /*id*/, size_t /*size*/) override {\n    return IndexError_NotImplemented;\n  }\n\n  void refresh(uint64_t) override {\n    return;\n  }\n\n  uint64_t check_point(void) const override {\n    return 0;\n  }\n\n  //! Cleanup container\n  int cleanup(void) override {\n    return this->close();\n  }\n\n  //! Load a index file into container\n  int open(const std::string &path, bool) override {\n    auto file_ptr = FileReadStorage::OpenFile(path, enable_direct_io_);\n    if (!file_ptr) {\n      return IndexError_OpenFile;\n    }\n\n    index_offset_ =\n        (header_offset_ >= 0 ? 0 : file_ptr->size()) + header_offset_;\n    size_t end_offset =\n        (footer_offset_ > 0 ? 0 : file_ptr->size()) + footer_offset_;\n    size_t size = end_offset > index_offset_ ? end_offset - index_offset_ : 0;\n    auto read_data = [this, &file_ptr, end_offset](\n                         size_t offset, const void **data, size_t len) {\n      buffer_.reserve(len);\n      *data = buffer_.data();\n      size_t off = index_offset_ + offset;\n      if (off + len > end_offset) {\n        if (off > end_offset) {\n          off = end_offset;\n        }\n        len = end_offset - off;\n      }\n      return file_ptr->read(off, (void *)*data, len);\n    };\n\n    IndexUnpacker unpacker;\n    if (!unpacker.unpack(read_data, size, checksum_validation_)) {\n      LOG_ERROR(\"Failed to unpack file: %s\", path.c_str());\n      return IndexError_UnpackIndex;\n    }\n    segments_ = std::move(*unpacker.mutable_segments());\n    magic_ = unpacker.magic();\n    file_path_ = path;\n    file_ptr_ = alone_file_handle_ ? nullptr : file_ptr;\n    return 0;\n  }\n\n  int close(void) override {\n    file_ptr_ = nullptr;\n    segments_.clear();\n    return 0;\n  }\n\n  //! Retrieve a segment by id\n  IndexStorage::Segment::Pointer get(const std::string &id,\n                                     int level) override {\n    return level == 0 ? this->get_mmap_segment<FileReadStorage>(id)\n                      : this->get_segment<FileReadStorage>(id);\n  }\n\n  //! Retrieve all segments\n  std::map<std::string, IndexStorage::Segment::Pointer> get_all(\n      void) const override {\n    std::map<std::string, IndexStorage::Segment::Pointer> result;\n    auto file_ptr =\n        alone_file_handle_ && !file_path_.empty()\n            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)\n            : file_ptr_;\n    if (file_ptr) {\n      for (const auto &it : segments_) {\n        result.emplace(it.first,\n                       std::make_shared<FileReadStorage::Segment>(\n                           *(static_cast<const FileReadStorage *>(this)),\n                           it.second, file_ptr, index_offset_));\n      }\n    }\n    return result;\n  }\n\n  //! Test if it a segment exists\n  bool has(const std::string &id) const override {\n    return (segments_.find(id) != segments_.end());\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return magic_;\n  }\n\n protected:\n  //! Open a index file\n  static inline std::shared_ptr<ailego::File> OpenFile(const std::string &path,\n                                                       bool direct_io) {\n    auto file_ptr = std::make_shared<ailego::File>();\n    if (!file_ptr) {\n      LOG_ERROR(\"Failed to create file object, errno %d, %s\", errno,\n                std::strerror(errno));\n      return nullptr;\n    }\n    if (!file_ptr->open(path, true, direct_io)) {\n      LOG_ERROR(\"Failed to open file %s, errno %d, %s\", path.c_str(), errno,\n                std::strerror(errno));\n      return nullptr;\n    }\n    return file_ptr;\n  }\n\n  //! Retrieve a segment by id\n  template <typename T>\n  inline IndexStorage::Segment::Pointer get_segment(\n      const std::string &id) const {\n    auto it = segments_.find(id);\n    if (it == segments_.end()) {\n      return IndexStorage::Segment::Pointer();\n    }\n    auto file_ptr =\n        alone_file_handle_ && !file_path_.empty()\n            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)\n            : file_ptr_;\n    if (!file_ptr) {\n      return IndexStorage::Segment::Pointer();\n    }\n    return std::make_shared<typename T::Segment>(\n        *(static_cast<const T *>(this)), it->second, file_ptr, index_offset_);\n  }\n\n  //! Retrieve a mmap segment by id\n  template <typename T>\n  inline IndexStorage::Segment::Pointer get_mmap_segment(\n      const std::string &id) const {\n    auto it = segments_.find(id);\n    if (it == segments_.end()) {\n      return IndexStorage::Segment::Pointer();\n    }\n    const auto &segment = it->second;\n    auto file_ptr =\n        alone_file_handle_ && !file_path_.empty()\n            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)\n            : file_ptr_;\n    if (!file_ptr) {\n      return IndexStorage::Segment::Pointer();\n    }\n\n    int opt = memory_locked_ ? ailego::File::MMAP_LOCKED : 0;\n    opt |= memory_warmup_ ? ailego::File::MMAP_WARMUP : 0;\n    opt |= memory_shared_ ? ailego::File::MMAP_SHARED : 0;\n    size_t size = segment.data_size() + segment.padding_size();\n    size_t segment_offset = index_offset_ + segment.data_offset();\n    size_t offset = segment_offset / ailego::MemoryHelper::PageSize() *\n                    ailego::MemoryHelper::PageSize();\n    size_t bias = segment_offset - offset;\n\n    size += bias;\n    void *data = file_ptr->map(offset, size, opt);\n    if (data == nullptr) {\n      LOG_ERROR(\"Failed to mmap file: %s, offset: %zu, size: %zu\",\n                file_path_.c_str(), offset, size);\n      return IndexStorage::Segment::Pointer();\n    }\n    return std::make_shared<typename T::MMapSegment>(\n        *(static_cast<const T *>(this)), segment, file_ptr, index_offset_,\n        static_cast<char *>(data) + bias,\n        [=]() { ailego::File::MemoryUnmap(data, size); });\n  }\n\n  //! Retrieve all segments\n  template <typename T>\n  inline std::map<std::string, IndexStorage::Segment::Pointer> get_all_segments(\n      void) const {\n    std::map<std::string, IndexStorage::Segment::Pointer> result;\n    auto file_ptr =\n        alone_file_handle_ && !file_path_.empty()\n            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)\n            : file_ptr_;\n    if (file_ptr) {\n      for (const auto &it : segments_) {\n        result.emplace(it.first, std::make_shared<typename T::Segment>(\n                                     *(static_cast<const T *>(this)), it.second,\n                                     file_ptr, index_offset_));\n      }\n    }\n    return result;\n  }\n\n protected:\n  bool checksum_validation_{false};\n  bool enable_direct_io_{false};\n  bool alone_file_handle_{false};\n  bool memory_locked_{false};\n  bool memory_warmup_{false};\n  bool memory_shared_{false};\n  uint32_t magic_{0};\n  int64_t header_offset_{0};\n  int64_t footer_offset_{0};\n  size_t index_offset_{0};\n  std::vector<uint8_t> buffer_{};\n  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};\n  std::shared_ptr<ailego::File> file_ptr_{nullptr};\n  std::string file_path_{};\n};\n\nINDEX_FACTORY_REGISTER_STORAGE(FileReadStorage);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/memory_dumper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cerrno>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_format.h>\n#include <zvec/core/framework/index_memory.h>\n#include <zvec/core/framework/index_packer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Memory Dumper\n */\nstruct MemoryDumper : public IndexDumper {\n public:\n  //! Constructor\n  MemoryDumper(void) {}\n\n  //! Destructor\n  virtual ~MemoryDumper(void) {}\n\n  //! Initialize dumper\n  int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup dumper\n  int cleanup(void) override {\n    stab_.clear();\n    packer_.reset();\n    rope_ = nullptr;\n    return 0;\n  }\n\n  //! Create a memory block for dumping\n  int create(const std::string &path) override {\n    rope_ = IndexMemory::Instance()->create(path);\n    if (!rope_) {\n      LOG_ERROR(\"Failed to create memory block %s, errno %d, %s\", path.c_str(),\n                errno, std::strerror(errno));\n      return IndexError_CreateFile;\n    }\n    // Append a memory block\n    rope_->append(0);\n\n    auto write_data = [this](const void *buf, size_t size) {\n      return (*this->rope_)[0].append(buf, size);\n    };\n    if (!packer_.setup(write_data)) {\n      LOG_ERROR(\"Failed to setup index package, errno %d, %s\", errno,\n                std::strerror(errno));\n      return IndexError_WriteData;\n    }\n    return 0;\n  }\n\n  //! Close memory block\n  int close(void) override {\n    auto write_data = [this](const void *buf, size_t size) {\n      return (*this->rope_)[0].append(buf, size);\n    };\n\n    if (!packer_.finish(write_data, stab_)) {\n      LOG_ERROR(\"Failed to finish packing index package\");\n      return IndexError_PackIndex;\n    }\n    stab_.clear();\n    packer_.reset();\n    rope_ = nullptr;\n    return 0;\n  }\n\n  //! Append a segment meta into table\n  int append(const std::string &id, size_t data_size, size_t padding_size,\n             uint32_t crc) override {\n    stab_.emplace_back(id, data_size, padding_size, crc);\n    return 0;\n  }\n\n  //! Append data to the storage\n  size_t write(const void *data, size_t len) override {\n    return packer_.pack(\n        [this](const void *buf, size_t size) {\n          return (*this->rope_)[0].append(buf, size);\n        },\n        data, len);\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return packer_.magic();\n  }\n\n private:\n  std::vector<IndexPacker::SegmentMeta> stab_{};\n  IndexMemory::Rope::Pointer rope_{};\n  IndexPacker packer_{};\n};\n\nINDEX_FACTORY_REGISTER_DUMPER(MemoryDumper);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/memory_read_storage.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cerrno>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_format.h>\n#include <zvec/core/framework/index_memory.h>\n#include <zvec/core/framework/index_unpacker.h>\n#include \"utility_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Memory Storage\n */\nclass MemoryReadStorage : public IndexStorage {\n public:\n  /*! Memory Storage Segment\n   */\n  class Segment : public IndexStorage::Segment,\n                  public std::enable_shared_from_this<Segment> {\n   public:\n    //! Index Storage Pointer\n    typedef std::shared_ptr<Segment> Pointer;\n\n    //! Constructor\n    Segment(const IndexMemory::Rope::Pointer &rope,\n            const IndexUnpacker::SegmentMeta &segment)\n        : data_offset_(segment.data_offset()),\n          data_size_(segment.data_size()),\n          padding_size_(segment.padding_size()),\n          region_size_(segment.data_size() + segment.padding_size()),\n          data_crc_(segment.data_crc()),\n          block_(&(*rope)[0]),\n          rope_(rope) {}\n\n    //! Destructor\n    virtual ~Segment(void) {}\n\n    //! Retrieve size of data\n    size_t data_size(void) const override {\n      return data_size_;\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const override {\n      return data_crc_;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const override {\n      return padding_size_;\n    }\n\n    size_t capacity(void) const override {\n      return region_size_;\n    }\n\n    //! Fetch data from segment (with own buffer)\n    size_t fetch(size_t offset, void *buf, size_t len) const override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      return block_->fetch(data_offset_ + offset, buf, len);\n    }\n\n    //! Read data from segment\n    size_t read(size_t offset, const void **data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      return block_->read(data_offset_ + offset, data, len);\n    }\n\n    size_t read(size_t offset, MemoryBlock &data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      const void *data_ptr = nullptr;\n      size_t return_value = block_->read(data_offset_ + offset, &data_ptr, len);\n      data.reset((void *)data_ptr);\n      return return_value;\n    }\n\n    //! Read data from segment\n    bool read(SegmentData *iovec, size_t count) override {\n      for (auto *end = iovec + count; iovec != end; ++iovec) {\n        ailego_false_if_false(iovec->offset + iovec->length <= region_size_);\n        block_->read(data_offset_ + iovec->offset, &iovec->data, iovec->length);\n      }\n      return true;\n    }\n\n    size_t write(size_t, const void *, size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    size_t resize(size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    void update_data_crc(uint32_t) override {\n      return;\n    }\n\n    //! Clone the segment\n    IndexStorage::Segment::Pointer clone(void) override {\n      return shared_from_this();\n    }\n\n   private:\n    size_t data_offset_{0u};\n    size_t data_size_{0u};\n    size_t padding_size_{0u};\n    size_t region_size_{0u};\n    uint32_t data_crc_{0u};\n    IndexMemory::Block *block_{nullptr};\n    IndexMemory::Rope::Pointer rope_{};\n  };\n\n  //! Destructor\n  virtual ~MemoryReadStorage(void) {}\n\n  //! Initialize container\n  int init(const ailego::Params &params) override {\n    params.get(MEMORY_CONTAINER_CHECKSUM_VALIDATION, &checksum_validation_);\n    return 0;\n  }\n\n  //! Cleanup container\n  int flush(void) override {\n    return IndexError_NotImplemented;\n  }\n\n  int append(const std::string &, size_t) override {\n    return IndexError_NotImplemented;\n  }\n\n  void refresh(uint64_t) override {\n    return;\n  }\n\n  uint64_t check_point(void) const override {\n    return 0;\n  }\n\n  //! Cleanup container\n  int cleanup(void) override {\n    return this->close();\n  }\n\n  //! Load a index file into container\n  int open(const std::string &path, bool) override {\n    rope_ = IndexMemory::Instance()->open(path);\n    if (!rope_) {\n      LOG_ERROR(\"Failed to open memory rope %s\", path.c_str());\n      return IndexError_NoExist;\n    }\n    if (rope_->empty()) {\n      LOG_ERROR(\"The memory rope %s is empty.\", path.c_str());\n      return IndexError_NoExist;\n    }\n\n    auto read_data = [this](size_t offset, const void **data, size_t len) {\n      return (*this->rope_)[0].read(offset, data, len);\n    };\n\n    IndexUnpacker unpacker;\n    if (!unpacker.unpack(read_data, (*rope_)[0].size(), checksum_validation_)) {\n      LOG_ERROR(\"Failed to unpack memory block: %s\", path.c_str());\n      return IndexError_UnpackIndex;\n    }\n    segments_ = std::move(*unpacker.mutable_segments());\n    magic_ = unpacker.magic();\n    return 0;\n  }\n\n  //! Unload all indexes\n  int close(void) override {\n    rope_ = nullptr;\n    segments_.clear();\n    return 0;\n  }\n\n  //! Retrieve a segment by id\n  IndexStorage::Segment::Pointer get(const std::string &id, int) override {\n    if (!rope_) {\n      return IndexStorage::Segment::Pointer();\n    }\n    auto it = segments_.find(id);\n    if (it == segments_.end()) {\n      return IndexStorage::Segment::Pointer();\n    }\n    return std::make_shared<Segment>(rope_, it->second);\n  }\n\n  //! Retrieve all segments\n  std::map<std::string, IndexStorage::Segment::Pointer> get_all(\n      void) const override {\n    std::map<std::string, IndexStorage::Segment::Pointer> result;\n    if (rope_) {\n      for (const auto &it : segments_) {\n        result.emplace(it.first, std::make_shared<Segment>(rope_, it.second));\n      }\n    }\n    return result;\n  }\n\n  //! Test if it a segment exists\n  bool has(const std::string &id) const override {\n    return (segments_.find(id) != segments_.end());\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return magic_;\n  }\n\n private:\n  bool checksum_validation_{false};\n  uint32_t magic_{0};\n  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};\n  IndexMemory::Rope::Pointer rope_{};\n};\n\nINDEX_FACTORY_REGISTER_STORAGE(MemoryReadStorage);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/mmap_file_read_storage.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cerrno>\n#include <zvec/ailego/io/mmap_file.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_format.h>\n#include <zvec/core/framework/index_unpacker.h>\n#include \"utility_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! MMap File Storage\n */\nclass MMapFileReadStorage : public IndexStorage {\n public:\n  /*! MMap File Storage Segment\n   */\n  class Segment : public IndexStorage::Segment,\n                  public std::enable_shared_from_this<Segment> {\n   public:\n    //! Index Storage Pointer\n    typedef std::shared_ptr<Segment> Pointer;\n\n    //! Constructor\n    Segment(const std::shared_ptr<ailego::MMapFile> &file_ptr, size_t offset,\n            const IndexUnpacker::SegmentMeta &segment)\n        : data_ptr_(reinterpret_cast<uint8_t *>(file_ptr->region()) + offset +\n                    segment.data_offset()),\n          data_size_(segment.data_size()),\n          padding_size_(segment.padding_size()),\n          region_size_(segment.data_size() + segment.padding_size()),\n          data_crc_(segment.data_crc()),\n          file_ptr_(file_ptr) {}\n\n    //! Destructor\n    virtual ~Segment(void) {}\n\n    //! Retrieve size of data\n    size_t data_size(void) const override {\n      return data_size_;\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const override {\n      return data_crc_;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const override {\n      return padding_size_;\n    }\n\n    size_t capacity(void) const override {\n      return region_size_;\n    }\n\n    //! Fetch data from segment (with own buffer)\n    size_t fetch(size_t offset, void *buf, size_t len) const override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      memcpy(buf, data_ptr_ + offset, len);\n      return len;\n    }\n\n    //! Read data from segment\n    size_t read(size_t offset, const void **data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      *data = data_ptr_ + offset;\n      return len;\n    }\n\n    size_t read(size_t offset, MemoryBlock &data, size_t len) override {\n      if (ailego_unlikely(offset + len > region_size_)) {\n        if (offset > region_size_) {\n          offset = region_size_;\n        }\n        len = region_size_ - offset;\n      }\n      data.reset((void *)(data_ptr_ + offset));\n      return len;\n    }\n\n    //! Read data from segment\n    bool read(SegmentData *iovec, size_t count) override {\n      for (auto *end = iovec + count; iovec != end; ++iovec) {\n        ailego_false_if_false(iovec->offset + iovec->length <= region_size_);\n        iovec->data = data_ptr_ + iovec->offset;\n      }\n      return true;\n    }\n\n    size_t write(size_t, const void *, size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    size_t resize(size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    void update_data_crc(uint32_t) override {\n      return;\n    }\n\n    //! Clone the segment\n    IndexStorage::Segment::Pointer clone(void) override {\n      return shared_from_this();\n    }\n\n   private:\n    const uint8_t *data_ptr_{nullptr};\n    size_t data_size_{0u};\n    size_t padding_size_{0u};\n    size_t region_size_{0u};\n    uint32_t data_crc_{0u};\n    std::shared_ptr<ailego::MMapFile> file_ptr_{nullptr};\n  };\n\n  //! Destructor\n  virtual ~MMapFileReadStorage(void) {}\n\n  //! Initialize container\n  int init(const ailego::Params &params) override {\n    params.get(MMAPFILE_READ_STORAGE_MEMORY_LOCKED, &memory_locked_);\n    params.get(MMAPFILE_READ_STORAGE_MEMORY_WARMUP, &memory_warmup_);\n    params.get(MMAPFILE_READ_STORAGE_MEMORY_SHARED, &memory_shared_);\n    params.get(MMAPFILE_READ_STORAGE_CHECKSUM_VALIDATION,\n               &checksum_validation_);\n    params.get(MMAPFILE_READ_STORAGE_HEADER_OFFSET, &header_offset_);\n    params.get(MMAPFILE_READ_STORAGE_FOOTER_OFFSET, &footer_offset_);\n    return 0;\n  }\n\n  int flush(void) override {\n    return 0;\n  }\n\n  int append(const std::string &, size_t) override {\n    return IndexError_NotImplemented;\n  }\n\n  void refresh(uint64_t) override {\n    return;\n  }\n\n  uint64_t check_point(void) const override {\n    return 0;\n  }\n\n  //! Cleanup container\n  int cleanup(void) override {\n    return this->close();\n  }\n\n  //! Load a index file into container\n  int open(const std::string &path, bool) override {\n    file_ptr_ = std::make_shared<ailego::MMapFile>();\n    if (!file_ptr_) {\n      LOG_ERROR(\"Failed to create mmap file object, errno %d, %s\", errno,\n                std::strerror(errno));\n      return IndexError_NoMemory;\n    }\n\n    if (!file_ptr_->open(path.c_str(), true, memory_shared_)) {\n      LOG_ERROR(\"Failed to open file %s, errno %d, %s\", path.c_str(), errno,\n                std::strerror(errno));\n      return IndexError_OpenFile;\n    }\n\n    index_offset_ =\n        (header_offset_ >= 0 ? 0 : file_ptr_->size()) + header_offset_;\n    size_t end_offset =\n        (footer_offset_ > 0 ? 0 : file_ptr_->size()) + footer_offset_;\n    size_t size = end_offset > index_offset_ ? end_offset - index_offset_ : 0;\n    if (memory_locked_ && !file_ptr_->lock()) {\n      LOG_WARN(\"Failed to lock pages with size %zu, errno %d, %s\",\n               file_ptr_->size(), errno, std::strerror(errno));\n    }\n    if (memory_warmup_ && !checksum_validation_) {\n      ailego::File::MemoryWarmup(\n          static_cast<char *>(file_ptr_->region()) + index_offset_, size);\n    }\n\n    auto read_data = [this, end_offset](size_t offset, const void **data,\n                                        size_t len) {\n      size_t off = offset + index_offset_;\n      if (off + len > end_offset) {\n        if (off > end_offset) {\n          off = end_offset;\n        }\n        len = end_offset - off;\n      }\n      *data = (uint8_t *)file_ptr_->region() + off;\n      return len;\n    };\n\n    IndexUnpacker unpacker;\n    if (!unpacker.unpack(read_data, size, checksum_validation_)) {\n      LOG_ERROR(\"Failed to unpack file: %s\", path.c_str());\n      return IndexError_UnpackIndex;\n    }\n    segments_ = std::move(*unpacker.mutable_segments());\n    magic_ = unpacker.magic();\n    return 0;\n  }\n\n  int close(void) override {\n    file_ptr_ = nullptr;\n    segments_.clear();\n    return 0;\n  }\n\n  //! Retrieve a segment by id\n  IndexStorage::Segment::Pointer get(const std::string &id, int) override {\n    if (!file_ptr_) {\n      return IndexStorage::Segment::Pointer();\n    }\n    auto it = segments_.find(id);\n    if (it == segments_.end()) {\n      return IndexStorage::Segment::Pointer();\n    }\n    return std::make_shared<MMapFileReadStorage::Segment>(\n        file_ptr_, index_offset_, it->second);\n  }\n\n  std::map<std::string, IndexStorage::Segment::Pointer> get_all(\n      void) const override {\n    std::map<std::string, IndexStorage::Segment::Pointer> result;\n    if (file_ptr_) {\n      for (const auto &it : segments_) {\n        result.emplace(it.first, std::make_shared<MMapFileReadStorage::Segment>(\n                                     file_ptr_, index_offset_, it.second));\n      }\n    }\n    return result;\n  }\n\n  //! Test if it a segment exists\n  bool has(const std::string &id) const override {\n    return (segments_.find(id) != segments_.end());\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return magic_;\n  }\n\n private:\n  bool memory_locked_{false};\n  bool memory_warmup_{false};\n  bool memory_shared_{false};\n  bool checksum_validation_{false};\n  int64_t header_offset_{0};\n  int64_t footer_offset_{0};\n  size_t index_offset_{0};\n  uint32_t magic_{0};\n  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};\n  std::shared_ptr<ailego::MMapFile> file_ptr_{nullptr};\n};\n\nINDEX_FACTORY_REGISTER_STORAGE(MMapFileReadStorage);\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/utility/mmap_file_storage.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <mutex>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_mapping.h>\n#include <zvec/core/framework/index_version.h>\n#include \"utility_params.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! MMap File Storage\n */\nclass MMapFileStorage : public IndexStorage {\n public:\n  /*! Index Storage Segment\n   */\n  class Segment : public IndexStorage::Segment,\n                  public std::enable_shared_from_this<Segment> {\n   public:\n    //! Index Storage Pointer\n    typedef std::shared_ptr<Segment> Pointer;\n\n    //! Constructor\n    Segment(MMapFileStorage *owner, IndexMapping::Segment *segment)\n        : segment_(segment),\n          owner_(owner),\n          capacity_(static_cast<size_t>(segment->meta()->data_size +\n                                        segment->meta()->padding_size)) {}\n\n    //! Destructor\n    virtual ~Segment(void) {}\n\n    //! Retrieve size of data\n    size_t data_size(void) const override {\n      return static_cast<size_t>(segment_->meta()->data_size);\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const override {\n      return segment_->meta()->data_crc;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const override {\n      return static_cast<size_t>(segment_->meta()->padding_size);\n    }\n\n    //! Retrieve capacity of segment\n    size_t capacity(void) const override {\n      return capacity_;\n    }\n\n    //! Fetch data from segment (with own buffer)\n    size_t fetch(size_t offset, void *buf, size_t len) const override {\n      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {\n        auto meta = segment_->meta();\n        if (offset > meta->data_size) {\n          offset = meta->data_size;\n        }\n        len = meta->data_size - offset;\n      }\n      memmove(buf, (const uint8_t *)segment_->data() + offset, len);\n      return len;\n    }\n\n    //! Read data from segment\n    size_t read(size_t offset, const void **data, size_t len) override {\n      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {\n        auto meta = segment_->meta();\n        if (offset > meta->data_size) {\n          offset = meta->data_size;\n        }\n        len = meta->data_size - offset;\n      }\n      *data = (uint8_t *)segment_->data() + offset;\n      return len;\n    }\n\n    size_t read(size_t offset, MemoryBlock &data, size_t len) override {\n      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {\n        auto meta = segment_->meta();\n        if (offset > meta->data_size) {\n          offset = meta->data_size;\n        }\n        len = meta->data_size - offset;\n      }\n      data.reset((uint8_t *)segment_->data() + offset);\n      return len;\n    }\n\n    //! Write data into the storage with offset\n    size_t write(size_t offset, const void *data, size_t len) override {\n      size_t data_tail = offset + len;\n      ailego_zero_if_false(data_tail <= capacity_);\n      auto meta = segment_->meta();\n      if (data_tail > meta->data_size) {\n        meta->data_size = data_tail;\n        meta->padding_size = capacity_ - data_tail;\n        owner_->set_as_dirty();\n      }\n      memmove((uint8_t *)segment_->data() + offset, data, len);\n      segment_->set_dirty();\n      return len;\n    }\n\n    //! Resize size of data\n    size_t resize(size_t size) override {\n      auto meta = segment_->meta();\n      if (meta->data_size != size) {\n        if (size > capacity_) {\n          size = capacity_;\n        }\n        meta->data_size = size;\n        meta->padding_size = capacity_ - size;\n        owner_->set_as_dirty();\n      }\n      return size;\n    }\n\n    //! Update crc of data\n    void update_data_crc(uint32_t crc) override {\n      segment_->meta()->data_crc = crc;\n    }\n\n    //! Clone the segment\n    IndexStorage::Segment::Pointer clone(void) override {\n      return shared_from_this();\n    }\n\n   private:\n    IndexMapping::Segment *segment_{};\n    MMapFileStorage *owner_{nullptr};\n    size_t capacity_{};\n  };\n\n  //! Destructor\n  virtual ~MMapFileStorage(void) {\n    this->cleanup();\n  }\n\n  //! Initialize storage\n  int init(const ailego::Params &params) override {\n    uint32_t val = params.get_as_uint32(MMAPFILE_STORAGE_SEGMENT_META_CAPACITY);\n    if (val != 0) {\n      segment_meta_capacity_ = val;\n    }\n    params.get(MMAPFILE_STORAGE_COPY_ON_WRITE, &copy_on_write_);\n    params.get(MMAPFILE_STORAGE_FORCE_FLUSH, &force_flush_);\n    params.get(MMAPFILE_STORAGE_MEMORY_LOCKED, &memory_locked_);\n    params.get(MMAPFILE_STORAGE_MEMORY_WARMUP, &memory_warmup_);\n    return 0;\n  }\n\n  //! Cleanup storage\n  int cleanup(void) override {\n    this->close_index();\n    return 0;\n  }\n\n  //! Open storage\n  int open(const std::string &path, bool create) override {\n    if (!ailego::File::IsExist(path) && create) {\n      size_t last_slash = path.rfind('/');\n      if (last_slash != std::string::npos) {\n        ailego::File::MakePath(path.substr(0, last_slash));\n      }\n\n      int error_code = this->init_index(path);\n      if (error_code != 0) {\n        return error_code;\n      }\n    }\n    return mapping_.open(path, copy_on_write_, force_flush_);\n  }\n\n  //! Flush storage\n  int flush(void) override {\n    return this->flush_index();\n  }\n\n  //! Close storage\n  int close(void) override {\n    this->close_index();\n    return 0;\n  }\n\n  //! Append a segment into storage\n  int append(const std::string &id, size_t size) override {\n    return this->append_segment(id, size);\n  }\n\n  //! Refresh meta information (checksum, update time, etc.)\n  void refresh(uint64_t chkp) override {\n    this->refresh_index(chkp);\n  }\n\n  //! Retrieve check point of storage\n  uint64_t check_point(void) const override {\n    return mapping_.footer().check_point;\n  }\n\n  //! Retrieve a segment by id\n  IndexStorage::Segment::Pointer get(const std::string &id, int) override {\n    IndexMapping::Segment *segment = this->get_segment(id);\n    if (!segment) {\n      return MMapFileStorage::Segment::Pointer();\n    }\n    return std::make_shared<MMapFileStorage::Segment>(this, segment);\n  }\n\n  //! Test if it a segment exists\n  bool has(const std::string &id) const override {\n    return this->has_segment(id);\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return mapping_.magic();\n  }\n\n protected:\n  //! Initialize index version segment\n  int init_version_segment(void) {\n    size_t data_size = std::strlen(IndexVersion::Details());\n    int error_code =\n        this->append_segment(INDEX_VERSION_SEGMENT_NAME, data_size);\n    if (error_code != 0) {\n      return error_code;\n    }\n\n    IndexMapping::Segment *segment = get_segment(INDEX_VERSION_SEGMENT_NAME);\n    if (!segment) {\n      return IndexError_MMapFile;\n    }\n    auto meta = segment->meta();\n    size_t capacity = static_cast<size_t>(meta->padding_size + meta->data_size);\n    memcpy(segment->data(), IndexVersion::Details(), data_size);\n    segment->set_dirty();\n    meta->data_crc = ailego::Crc32c::Hash(segment->data(), data_size, 0);\n    meta->data_size = data_size;\n    meta->padding_size = capacity - data_size;\n    return 0;\n  }\n\n  //! Initialize index file\n  int init_index(const std::string &path) {\n    int error_code = mapping_.create(path, segment_meta_capacity_);\n    if (error_code != 0) {\n      return error_code;\n    }\n\n    // Add index version\n    error_code = this->init_version_segment();\n    if (error_code != 0) {\n      return error_code;\n    }\n\n    // Refresh mapping\n    this->refresh_index(0);\n\n    // Close mapping\n    mapping_.close();\n    return 0;\n  }\n\n  bool isHugePage(void) const override {\n    return mapping_.huge_page();\n  }\n\n  //! Set the index file as dirty\n  void set_as_dirty(void) {\n    index_dirty_ = true;\n  }\n\n  //! Refresh meta information (checksum, update time, etc.)\n  void refresh_index(uint64_t chkp) {\n    mapping_.refresh(chkp);\n    index_dirty_ = false;\n  }\n\n  //! Flush index storage\n  int flush_index(void) {\n    if (index_dirty_) {\n      this->refresh_index(0);\n    }\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    return mapping_.flush();\n  }\n\n  //! Close index storage\n  void close_index(void) {\n    if (index_dirty_) {\n      this->refresh_index(0);\n    }\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    mapping_.close();\n  }\n\n  //! Append a segment into storage\n  int append_segment(const std::string &id, size_t size) {\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    return mapping_.append(id, size);\n  }\n\n  //! Test if a segment exists\n  bool has_segment(const std::string &id) const {\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    return mapping_.has(id);\n  }\n\n  //! Get a segment from storage\n  IndexMapping::Segment *get_segment(const std::string &id) {\n    std::lock_guard<std::mutex> latch(mapping_mutex_);\n    return mapping_.map(id, memory_warmup_, memory_locked_);\n  }\n\n private:\n  uint32_t segment_meta_capacity_{1024 * 1024};\n  bool copy_on_write_{false};\n  bool force_flush_{false};\n  bool memory_locked_{false};\n  bool memory_warmup_{false};\n  bool index_dirty_{false};\n  mutable IndexMapping mapping_{};\n  mutable std::mutex mapping_mutex_{};\n};\n\nINDEX_FACTORY_REGISTER_STORAGE(MMapFileStorage);\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/sparse_utility.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <algorithm>\n#include <cmath>\n#include <iostream>\n#include <numeric>\n#include <vector>\n#include <zvec/core/framework/index_document.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_meta.h>\n\nnamespace zvec {\nnamespace core {\n\nconstexpr uint32_t SEGMENT_ID_BITS = 16;\nconstexpr uint32_t SEGMENT_ID_MASK = 0xFFFF;\n\nstruct SparseSegmentInfo {\n public:\n  uint32_t seg_id_{-1U};\n  uint32_t vec_cnt_{0};\n\n public:\n  SparseSegmentInfo() : seg_id_{-1U}, vec_cnt_{0} {}\n\n  SparseSegmentInfo(uint32_t seg_id, uint32_t vec_cnt)\n      : seg_id_{seg_id}, vec_cnt_{vec_cnt} {}\n};\n\nstruct VectorItem {\n  key_t pkey_{0};\n  std::vector<uint8_t> vec_{};\n  // TODO: drop support for hybrid vectors\n  std::string sparse_buffer_{};\n  uint32_t sparse_unit_size_{0};\n\n  VectorItem() {}\n  VectorItem(key_t pkey, std::vector<uint8_t> vec)\n      : pkey_(pkey), vec_(std::move(vec)) {}\n  // TODO: drop support for hybrid vectors\n  VectorItem(key_t pkey, std::vector<uint8_t> vec, std::string sparse_buffer,\n             uint32_t sparse_unit_size)\n      : pkey_(pkey),\n        vec_(std::move(vec)),\n        sparse_buffer_(std::move(sparse_buffer)),\n        sparse_unit_size_{sparse_unit_size} {}\n};\n\nstruct SparseVectorItem {\n  key_t pkey_{0};\n  std::vector<uint32_t> sparse_indices_{};\n  std::string sparse_values_{};\n\n  SparseVectorItem() {}\n  SparseVectorItem(key_t pkey, std::vector<uint32_t> sparse_indices,\n                   std::string sparse_values)\n      : pkey_(pkey),\n        sparse_indices_(std::move(sparse_indices)),\n        sparse_values_(std::move(sparse_values)) {}\n};\n\nclass SparseUtility {\n public:\n  //! Check the arr is an arithmetic sequence,\n  //! For example: 1,3,5,7,9,11...\n  template <typename T>\n  static bool IsArithmeticSequence(T *arr, size_t size) {\n    static_assert(std::is_integral<T>::value, \"Integral required\");\n    if (size <= 2) return true;\n\n    T step = arr[1] - arr[0];\n    for (size_t i = 2; i < size; ++i) {\n      if (arr[i] - arr[i - 1] != step) {\n        return false;\n      }\n    }\n    return true;\n  }\n\n  //! Sort arr with size in ascending order, and keep the index postion\n  //! o2n keep the mapping: origin position => new postion\n  //! n2o keep the mapping: new position => origin postion\n  //! For example, the input arr = [5, 3, 9, 6, 7], size = 5, after sort\n  //      arr = [3, 5, 6, 7, 9]\n  //      o2n = [1, 0, 4, 2, 3]\n  //      n2o = [1, 0, 3, 4, 2]\n  //! To save memory, no extra memory is allocated\n  //! return false, if the arr is in order and do not need sorting\n  template <typename T, typename I>\n  static bool Sort(T *arr, std::vector<I> *o2n, std::vector<I> *n2o,\n                   size_t size) {\n    {  //! checking the arr is already in ascending order\n      size_t i = 1;\n      for (; i < size; ++i) {\n        if (arr[i - 1] > arr[i]) {\n          break;\n        }\n      }\n      if (i >= size) {\n        return false;\n      }\n    }\n    o2n->resize(size);\n    n2o->resize(size);\n\n    std::iota(n2o->begin(), n2o->end(), 0U);\n    std::sort(n2o->begin(), n2o->end(),\n              [&](I i, I j) { return arr[i] < arr[j]; });\n    for (I i = 0U; i < size; ++i) {\n      (*o2n)[(*n2o)[i]] = i;\n    }\n    //! reorder arr in place, according to given n2o index\n    for (I i = 0; i < size; ++i) {\n      if (i != (*n2o)[i]) {\n        T tmp = arr[i];\n        I j = i, k;\n        while (i != (k = (*n2o)[j])) {\n          arr[j] = arr[k];\n          (*n2o)[j] = j;\n          j = k;\n        }\n        arr[j] = tmp;\n        (*n2o)[j] = j;\n      }\n    }\n\n    for (I i = 0U; i < size; ++i) {\n      (*n2o)[(*o2n)[i]] = i;\n    }\n\n    return true;\n  }\n\n  static inline bool filter_sparse_query_fp16(\n      const uint32_t sparse_count, const uint32_t *sparse_indices,\n      const ailego::Float16 *sparse_query, uint32_t &new_sparse_count,\n      std::vector<uint32_t> &new_sparse_indices, std::string &new_sparse_query,\n      float filtering_budget) {\n    ailego::Float16 max_sparse_dim_value{0.0f};\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      if (ailego::Float16::Absolute(sparse_query[i]) > max_sparse_dim_value) {\n        max_sparse_dim_value = ailego::Float16::Absolute(sparse_query[i]);\n      }\n    }\n\n    ailego::Float16 threshold{max_sparse_dim_value};\n    threshold *= filtering_budget;\n\n    size_t unit_size = IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP16);\n\n    new_sparse_count = 0;\n\n    std::vector<ailego::Float16> temp_sparse_query;\n    for (size_t i = 0; i < sparse_count; i++) {\n      if (ailego::Float16::Absolute(sparse_query[i]) > threshold) {\n        new_sparse_indices.push_back(sparse_indices[i]);\n        temp_sparse_query.push_back(sparse_query[i]);\n\n        new_sparse_count++;\n      }\n    }\n\n    size_t buffer_size = new_sparse_count * unit_size;\n    new_sparse_query.reserve(buffer_size);\n    new_sparse_query.append(\n        reinterpret_cast<const char *>(temp_sparse_query.data()), buffer_size);\n\n    return true;\n  }\n\n  static inline bool filter_sparse_query_fp32(\n      const uint32_t sparse_count, const uint32_t *sparse_indices,\n      const float *sparse_query, uint32_t &new_sparse_count,\n      std::vector<uint32_t> &new_sparse_indices, std::string &new_sparse_query,\n      float filtering_budget) {\n    float max_sparse_dim_value{0.0f};\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      if (std::fabs(sparse_query[i]) > max_sparse_dim_value) {\n        max_sparse_dim_value = std::fabs(sparse_query[i]);\n      }\n    }\n\n    float threshold = max_sparse_dim_value * filtering_budget;\n\n    size_t unit_size = IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32);\n\n    new_sparse_count = 0;\n\n    std::vector<float> temp_sparse_query;\n    for (size_t i = 0; i < sparse_count; i++) {\n      if (std::fabs(sparse_query[i]) > threshold) {\n        new_sparse_indices.push_back(sparse_indices[i]);\n        temp_sparse_query.push_back(sparse_query[i]);\n\n        new_sparse_count++;\n      }\n    }\n\n    size_t buffer_size = new_sparse_count * unit_size;\n    new_sparse_query.reserve(buffer_size);\n    new_sparse_query.append(\n        reinterpret_cast<const char *>(temp_sparse_query.data()), buffer_size);\n\n    return true;\n  }\n\n  static inline bool filter_sparse_query_impl(\n      const uint32_t sparse_count, const uint32_t *sparse_indices,\n      const void *sparse_query, uint32_t &new_sparse_count,\n      std::vector<uint32_t> &new_sparse_indices, std::string &new_sparse_query,\n      float filtering_budget, IndexMeta::DataType type) {\n    switch (type) {\n      case IndexMeta::DataType::DT_FP32:\n        return filter_sparse_query_fp32(\n            sparse_count, sparse_indices,\n            reinterpret_cast<const float *>(sparse_query), new_sparse_count,\n            new_sparse_indices, new_sparse_query, filtering_budget);\n      case IndexMeta::DataType::DT_FP16:\n        return filter_sparse_query_fp16(\n            sparse_count, sparse_indices,\n            reinterpret_cast<const ailego::Float16 *>(sparse_query),\n            new_sparse_count, new_sparse_indices, new_sparse_query,\n            filtering_budget);\n      default:\n        LOG_ERROR(\"Data type not supported\");\n        return false;\n    }\n\n    return false;\n  }\n\n  static int FilterSparseQuery(uint32_t sparse_count,\n                               const uint32_t *sparse_index,\n                               const void *sparse_value,\n                               IndexMeta::DataType type, uint32_t unit_size,\n                               float filtering_ratio,\n                               std::string *filtered_buffer) {\n    uint32_t new_sparse_count;\n    std::vector<uint32_t> new_sparse_indices;\n    std::string new_sparse_query;\n\n    bool ret = filter_sparse_query_impl(\n        sparse_count, sparse_index, sparse_value, new_sparse_count,\n        new_sparse_indices, new_sparse_query, filtering_ratio, type);\n    if (!ret) {\n      LOG_ERROR(\"sparse query filter failed\");\n      return false;\n    }\n\n    SparseUtility::TransSparseFormat(\n        new_sparse_count, new_sparse_indices.data(), new_sparse_query.data(),\n        unit_size, *filtered_buffer);\n\n    return true;\n  }\n\n  static void TransSparseFormat(uint32_t sparse_count,\n                                const uint32_t *sparse_index,\n                                const void *sparse_value, uint32_t unit_size,\n                                std::string &buffer) {\n    uint32_t seg_count = 0;\n    if (sparse_count == 0) {\n      buffer.reserve(sizeof(uint32_t) + sizeof(uint32_t));\n\n      buffer.append(reinterpret_cast<const char *>(&sparse_count),\n                    sizeof(uint32_t));\n\n      buffer.append(reinterpret_cast<const char *>(&seg_count),\n                    sizeof(uint32_t));\n\n      return;\n    }\n\n    std::vector<SparseSegmentInfo> seg_infos;\n\n    uint32_t cur_seg_id = -1U;\n    uint32_t cur_vec_cnt = 0;\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      uint32_t seg_id = sparse_index[i] >> SEGMENT_ID_BITS;\n      if (cur_seg_id == -1U) {\n        cur_seg_id = seg_id;\n        cur_vec_cnt++;\n      } else {\n        if (seg_id == cur_seg_id) {\n          cur_vec_cnt++;\n        } else if (seg_id > cur_seg_id) {\n          seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);\n\n          cur_seg_id = seg_id;\n          cur_vec_cnt = 1;\n        } else {\n          // std::abort();\n        }\n      }\n    }\n\n    if (cur_vec_cnt > 0) {\n      seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);\n    }\n\n    uint32_t buffer_len = 2 * sizeof(uint32_t) +\n                          seg_infos.size() * 2 * sizeof(uint32_t) +\n                          sparse_count * (sizeof(uint16_t) + sizeof(float));\n\n    buffer.reserve(buffer_len);\n\n    buffer.append(reinterpret_cast<const char *>(&sparse_count),\n                  sizeof(uint32_t));\n\n    seg_count = seg_infos.size();\n    buffer.append(reinterpret_cast<const char *>(&seg_count), sizeof(uint32_t));\n\n    for (size_t i = 0; i < seg_count; ++i) {\n      uint32_t seg_id = seg_infos[i].seg_id_;\n      buffer.append(reinterpret_cast<const char *>(&seg_id), sizeof(uint32_t));\n    }\n\n    for (size_t i = 0; i < seg_count; ++i) {\n      uint32_t vec_cnt = seg_infos[i].vec_cnt_;\n      buffer.append(reinterpret_cast<const char *>(&vec_cnt), sizeof(uint32_t));\n    }\n\n    for (size_t i = 0; i < sparse_count; ++i) {\n      uint16_t temp_dim = sparse_index[i] & SEGMENT_ID_MASK;\n      buffer.append(reinterpret_cast<const char *>(&temp_dim),\n                    sizeof(uint16_t));\n    }\n\n    const char *sparse_value_ptr = reinterpret_cast<const char *>(sparse_value);\n    for (size_t i = 0; i < sparse_count; ++i) {\n      buffer.append(sparse_value_ptr, unit_size);\n      sparse_value_ptr += unit_size;\n    }\n  }\n\n  static void ReverseSparseFormat(const void *buffer, uint32_t *sparse_count,\n                                  std::string *sparse_indices_buffer,\n                                  std::string *sparse_values_buffer,\n                                  uint32_t unit_size) {\n    const uint8_t *buffer_data = reinterpret_cast<const uint8_t *>(buffer);\n\n    *sparse_count = *reinterpret_cast<const uint32_t *>(buffer_data);\n\n    if (*sparse_count == 0) return;\n\n    uint32_t sparse_count_value = *sparse_count;\n\n    sparse_indices_buffer->reserve(sparse_count_value * sizeof(uint32_t));\n    sparse_values_buffer->reserve(sparse_count_value * unit_size);\n\n    const uint32_t seg_count =\n        *reinterpret_cast<const uint32_t *>(buffer_data + sizeof(uint32_t));\n    const uint32_t *seg_id =\n        reinterpret_cast<const uint32_t *>(buffer_data + 2 * sizeof(uint32_t));\n    const uint32_t *seg_vec_cnt = reinterpret_cast<const uint32_t *>(\n        buffer_data + 2 * sizeof(uint32_t) + seg_count * sizeof(uint32_t));\n    const uint16_t *sparse_indices = reinterpret_cast<const uint16_t *>(\n        buffer_data + 2 * sizeof(uint32_t) + seg_count * 2 * sizeof(uint32_t));\n    const char *sparse_value = reinterpret_cast<const char *>(\n        buffer_data + 2 * sizeof(uint32_t) + seg_count * 2 * sizeof(uint32_t) +\n        sparse_count_value * sizeof(uint16_t));\n\n    uint32_t cnt = 0;\n    for (size_t i = 0; i < seg_count; ++i) {\n      uint32_t cur_seg_id = *(seg_id + i);\n      uint32_t cur_seg_vec_cnt = *(seg_vec_cnt + i);\n\n      for (size_t j = 0; j < cur_seg_vec_cnt; ++j) {\n        uint32_t cur_sparse_index = *(sparse_indices + cnt);\n\n        cur_sparse_index = cur_sparse_index + (cur_seg_id << SEGMENT_ID_BITS);\n        sparse_indices_buffer->append(\n            reinterpret_cast<const char *>(&cur_sparse_index),\n            sizeof(uint32_t));\n\n        cnt++;\n      }\n    }\n\n    sparse_values_buffer->append(sparse_value, unit_size * sparse_count_value);\n  }\n\n  static void ReverseSparseFormat(const std::string &buffer,\n                                  uint32_t *sparse_count,\n                                  std::string *sparse_indices_buffer,\n                                  std::string *sparse_values_buffer,\n                                  uint32_t unit_size) {\n    return ReverseSparseFormat(buffer.data(), sparse_count,\n                               sparse_indices_buffer, sparse_values_buffer,\n                               unit_size);\n  }\n\n  static void ReverseSparseFormat(const void *buffer,\n                                  IndexSparseDocument &sparse_doc,\n                                  uint32_t unit_size) {\n    return ReverseSparseFormat(buffer, sparse_doc.mutable_sparse_count(),\n                               sparse_doc.mutable_sparse_indices(),\n                               sparse_doc.mutable_sparse_values(), unit_size);\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/core/utility/utility_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\nstatic const std::string INDEX_META_SEGMENT_NAME = \"IndexMeta\";\nstatic const std::string INDEX_VERSION_SEGMENT_NAME = \"IndexVersion\";\n\n//! FileLogger\nstatic const std::string FILE_LOGGER_PATH = \"proxima.file.logger.path\";\n\n//! FileContainer\nstatic const std::string FILE_READ_STORAGE_CHECKSUM_VALIDATION =\n    \"proxima.file.read_storage.checksum_validation\";\nstatic const std::string FILE_READ_STORAGE_ENABLE_DIRECT_IO =\n    \"proxima.file.read_storage.enable_direct_io\";\nstatic const std::string FILE_READ_STORAGE_ALONE_FILE_HANDLE =\n    \"proxima.file.read_storage.alone_file_handle\";\nstatic const std::string FILE_READ_STORAGE_MEMORY_LOCKED =\n    \"proxima.file.read_storage.memory_locked\";\nstatic const std::string FILE_READ_STORAGE_MEMORY_WARMUP =\n    \"proxima.file.read_storage.memory_warmup\";\nstatic const std::string FILE_READ_STORAGE_MEMORY_SHARED =\n    \"proxima.file.read_storage.memory_shared\";\nstatic const std::string FILE_READ_STORAGE_HEADER_OFFSET =\n    \"proxima.file.read_storage.header_offset\";\nstatic const std::string FILE_READ_STORAGE_FOOTER_OFFSET =\n    \"proxima.file.read_storage.footer_offset\";\n\n//! MemoryContainer\nstatic const std::string MEMORY_CONTAINER_CHECKSUM_VALIDATION =\n    \"proxima.memory.container.checksum_validation\";\n\n//! MMapFileContainer\nstatic const std::string MMAPFILE_READ_STORAGE_MEMORY_LOCKED =\n    \"proxima.mmap_file.container.memory_locked\";\nstatic const std::string MMAPFILE_READ_STORAGE_MEMORY_WARMUP =\n    \"proxima.mmap_file.container.memory_warmup\";\nstatic const std::string MMAPFILE_READ_STORAGE_MEMORY_SHARED =\n    \"proxima.mmap_file.container.memory_shared\";\nstatic const std::string MMAPFILE_READ_STORAGE_CHECKSUM_VALIDATION =\n    \"proxima.mmap_file.container.checksum_validation\";\nstatic const std::string MMAPFILE_READ_STORAGE_HEADER_OFFSET =\n    \"proxima.mmap_file.container.header_offset\";\nstatic const std::string MMAPFILE_READ_STORAGE_FOOTER_OFFSET =\n    \"proxima.mmap_file.container.footer_offset\";\n\n//! MMapFileStorage\nstatic const std::string MMAPFILE_STORAGE_MEMORY_LOCKED =\n    \"proxima.mmap_file.storage.memory_locked\";\nstatic const std::string MMAPFILE_STORAGE_MEMORY_WARMUP =\n    \"proxima.mmap_file.storage.memory_warmup\";\nstatic const std::string MMAPFILE_STORAGE_COPY_ON_WRITE =\n    \"proxima.mmap_file.storage.copy_on_write\";\nstatic const std::string MMAPFILE_STORAGE_FORCE_FLUSH =\n    \"proxima.mmap_file.storage.force_flush\";\nstatic const std::string MMAPFILE_STORAGE_SEGMENT_META_CAPACITY =\n    \"proxima.mmap_file.storage.segment_meta_capacity\";\n\n//! BufferStorage\nstatic const std::string BUFFER_STORAGE_MEMORY_SIZE =\n    \"proxima.buffer.storage.memory_size\";\n\n//! MipsConverter\nstatic const std::string MIPS_CONVERTER_M_VALUE =\n    \"proxima.mips.converter.m_value\";\nstatic const std::string MIPS_CONVERTER_U_VALUE =\n    \"proxima.mips.converter.u_value\";\nstatic const std::string MIPS_CONVERTER_L2_NORM =\n    \"proxima.mips.converter.l2_norm\";\nstatic const std::string MIPS_CONVERTER_FORCED_HALF_FLOAT =\n    \"proxima.mips.converter.forced_half_float\";\nstatic const std::string MIPS_CONVERTER_SPHERICAL_INJECTION =\n    \"proxima.mips.converter.spherical_injection\";\n\n//! MipsReverseConverter\nstatic const std::string MIPS_REVERSE_CONVERTER_M_VALUE =\n    \"proxima.mips_reverse.converter.m_value\";\nstatic const std::string MIPS_REVERSE_CONVERTER_U_VALUE =\n    \"proxima.mips_reverse.converter.u_value\";\nstatic const std::string MIPS_REVERSE_CONVERTER_L2_NORM =\n    \"proxima.mips_reverse.converter.l2_norm\";\nstatic const std::string MIPS_REVERSE_CONVERTER_FORCED_SINGLE_FLOAT =\n    \"proxima.mips_reverse.converter.forced_single_float\";\nstatic const std::string MIPS_REVERSE_CONVERTER_SPHERICAL_INJECTION =\n    \"proxima.mips_reverse.converter.spherical_injection\";\n\n//! MipsReformer\nstatic const std::string MIPS_REFORMER_M_VALUE =\n    \"proxima.mips.reformer.m_value\";\nstatic const std::string MIPS_REFORMER_U_VALUE =\n    \"proxima.mips.reformer.u_value\";\nstatic const std::string MIPS_REFORMER_L2_NORM =\n    \"proxima.mips.reformer.l2_norm\";\nstatic const std::string MIPS_REFORMER_NORMALIZE =\n    \"proxima.mips.reformer.normalize\";\nstatic const std::string MIPS_REFORMER_FORCED_HALF_FLOAT =\n    \"proxima.mips.reformer.forced_half_float\";\nstatic const std::string MIPS_REFORMER_SPHERICAL_INJECTION =\n    \"proxima.mips.reformer.spherical_injection\";\n\n//! MipsEuclideanMeasure\nstatic const std::string MIPS_EUCLIDEAN_METRIC_M_VALUE =\n    \"proxima.mips_euclidean.metric.m_value\";\nstatic const std::string MIPS_EUCLIDEAN_METRIC_U_VALUE =\n    \"proxima.mips_euclidean.metric.u_value\";\nstatic const std::string MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM =\n    \"proxima.mips_euclidean.metric.max_l2_norm\";\nstatic const std::string MIPS_EUCLIDEAN_METRIC_INJECTION_TYPE =\n    \"proxima.mips_euclidean.metric.injection_type\";\n\n//! NormalizeConverter\nstatic const std::string NORMALIZE_CONVERTER_FORCED_HALF_FLOAT =\n    \"proxima.normalize.converter.forced_half_float\";\nstatic const std::string NORMALIZE_CONVERTER_P_VALUE =\n    \"proxima.normalize.converter.p_value\";\n\n//! NormalizeReformer\nstatic const std::string NORMALIZE_REFORMER_FORCED_HALF_FLOAT =\n    \"proxima.normalize.reformer.forced_half_float\";\nstatic const std::string NORMALIZE_REFORMER_P_VALUE =\n    \"proxima.normalize.reformer.p_value\";\n\n//! Int8Converter\nstatic const std::string INT8_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =\n    \"proxima.int8_quantizer.converter.histogram_bins_count\";\nstatic const std::string INT8_QUANTIZER_CONVERTER_DISABLE_BIAS =\n    \"proxima.int8_quantizer.converter.disable_bias\";\nstatic const std::string INT8_QUANTIZER_CONVERTER_BIAS =\n    \"proxima.int8_quantizer.converter.bias\";\nstatic const std::string INT8_QUANTIZER_CONVERTER_SCALE =\n    \"proxima.int8_quantizer.converter.scale\";\n\n//! Int4Converter\nstatic const std::string INT4_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =\n    \"proxima.int4_quantizer.converter.histogram_bins_count\";\nstatic const std::string INT4_QUANTIZER_CONVERTER_DISABLE_BIAS =\n    \"proxima.int4_quantizer.converter.disable_bias\";\nstatic const std::string INT4_QUANTIZER_CONVERTER_BIAS =\n    \"proxima.int4_quantizer.converter.bias\";\nstatic const std::string INT4_QUANTIZER_CONVERTER_SCALE =\n    \"proxima.int4_quantizer.converter.scale\";\n\n//! Int8Reformer\nstatic const std::string INT8_QUANTIZER_REFORMER_BIAS =\n    \"proxima.int8_quantizer.reformer.bias\";\nstatic const std::string INT8_QUANTIZER_REFORMER_SCALE =\n    \"proxima.int8_quantizer.reformer.scale\";\nstatic const std::string INT8_QUANTIZER_REFORMER_METRIC =\n    \"proxima.int8_quantizer.reformer.metric\";\n\n//! Int4Reformer\nstatic const std::string INT4_QUANTIZER_REFORMER_BIAS =\n    \"proxima.int4_quantizer.reformer.bias\";\nstatic const std::string INT4_QUANTIZER_REFORMER_SCALE =\n    \"proxima.int4_quantizer.reformer.scale\";\nstatic const std::string INT4_QUANTIZER_REFORMER_METRIC =\n    \"proxima.int4_quantizer.reformer.metric\";\n\n//! CosineConverter\nstatic const std::string COSINE_CONVERTER_FORCED_HALF_FLOAT =\n    \"proxima.cosine.converter.forced_half_float\";\n\n//! CosineReformer\nstatic const std::string COSINE_REFORMER_FORCED_HALF_FLOAT =\n    \"proxima.cosine.reformer.forced_half_float\";\n\n//! QuantizedInteger Metric\nstatic const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME =\n    \"proxima.quantized_integer.metric.origin_metric_name\";\nstatic const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS =\n    \"proxima.quantized_integer.metric.origin_metric_params\";\n\n//! IntegerStreamingConverter\nstatic const std::string INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE =\n    \"proxima.integer_streaming.converter.enable_normalize\";\n\n//! IntegerStreamingConverter\nstatic const std::string INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE =\n    \"proxima.integer_streaming.reformer.enable_normalize\";\n\n//! DoubleBitConverter\nstatic const std::string DOUBLE_BIT_CONVERTER_TRAIN_SAMPLE_COUNT =\n    \"proxima.double_bit.converter.train_sample_count\";\nstatic const std::string DOUBLE_BIT_CONVERTER_A_VALUE =\n    \"proxima.double_bit.converter.a_value\";\nstatic const std::string DOUBLE_BIT_CONVERTER_B_VALUE =\n    \"proxima.double_bit.converter.b_value\";\n\n//! DoubleBitReformer\nstatic const std::string DOUBLE_BIT_REFORMER_A_VALUE =\n    \"proxima.double_bit.reformer.a_value\";\nstatic const std::string DOUBLE_BIT_REFORMER_B_VALUE =\n    \"proxima.double_bit.reformer.b_value\";\n\n//! SimpleForward\nstatic const std::string SIMPLE_FORWARD_DATA_BLOCK_SIZE =\n    \"proxima.simple.forward.data_block_size\";\nstatic const std::string SIMPLE_FORWARD_INDEX_BLOCK_SIZE =\n    \"proxima.simple.forward.index_block_size\";\n\n//! SimpleForward\nstatic const std::string SIMPLE_CLOSET_DATA_BLOCK_SIZE =\n    \"proxima.simple.closet.data_block_size\";\nstatic const std::string SIMPLE_CLOSET_INDEX_BLOCK_SIZE =\n    \"proxima.simple.closet.index_block_size\";\n\n//! ChainCloset\nstatic const std::string CHAIN_CLOSET_SLOT_SIZE =\n    \"proxima.chain.closet.slot_size\";\nstatic const std::string CHAIN_CLOSET_INDEX_BLOCK_SIZE =\n    \"proxima.chain.closet.index_block_size\";\nstatic const std::string CHAIN_CLOSET_DATA_BLOCK_SIZE =\n    \"proxima.chain.closet.data_block_size\";\n\n//! IndexForward\nstatic const std::string PARAM_FORWARD_MULTI_VALUE =\n    \"proxima.param.forward.multi_value\";\nstatic const std::string PARAM_FORWARD_MULTI_COUNT =\n    \"proxima.param.forward.multi_count\";\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/core/utility/visit_filter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <algorithm>\n#include <chrono>\n#include <cstdint>\n#include <limits>\n#include <random>\n#include <tuple>\n#include <vector>\n#include <ailego/container/bloom_filter.h>\n#include <ailego/utility/bitset_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_logger.h>\n\nnamespace zvec {\nnamespace core {\n\nstruct VisitFilterHeader {\n  VisitFilterHeader() : maxDocCnt(0), maxScanNum(0) {}\n  uint64_t maxDocCnt;\n  uint64_t maxScanNum;\n};\n\nconstexpr int PROXIMA_HNSW_VISITFILTER_CUSTOM_PARAMS_INDEX_NEGPROB = 0;\n\nclass VisitBloomFilter {\n public:\n  static constexpr int mode = 1;\n\n  static constexpr int N = 5;\n  struct Context {\n    Context()\n        : mt(std::chrono::system_clock::now().time_since_epoch().count()) {};\n    VisitFilterHeader h;\n    std::mt19937 mt;\n    ailego::BloomFilter<N> *filter{nullptr};\n    int offset[N] = {0};\n  };\n#define BLOOM_FILTER_HASH_BITS_OFFSETS(i)                                 \\\n  i + c->offset[0], i + c->offset[1], i + c->offset[2], i + c->offset[3], \\\n      i + c->offset[4]\n\n  VisitBloomFilter() = delete;\n\n  inline static void set_visited(Context *c, id_t idx) {\n    c->filter->force_insert(BLOOM_FILTER_HASH_BITS_OFFSETS(idx));\n    return;\n  }\n\n  inline static void *get_visited(Context *, id_t) {\n    // TODO\n    return nullptr;\n  }\n\n  inline static bool visited(Context *c, id_t idx) {\n    return c->filter->has(BLOOM_FILTER_HASH_BITS_OFFSETS(idx));\n  }\n\n  inline static int set_max_scan_num(Context *c, uint64_t maxScanNum) {\n    if (maxScanNum == c->h.maxScanNum) {\n      return 0;\n    }\n    c->h.maxScanNum = maxScanNum;\n    if (c->filter->reset(maxScanNum, c->filter->probability()) != 0) {\n      LOG_ERROR(\"reset BloomFilter failed\");\n      return IndexError_Runtime;\n    }\n    genRandomHashBits(c);\n    return 0;\n  }\n\n  inline static void clear(Context *c) {\n    c->filter->clear();\n    return;\n  }\n\n  inline static bool reset(Context *c, uint64_t maxDocCnt,\n                           uint64_t max_scan_num) {\n    if (ailego_unlikely(maxDocCnt > c->h.maxDocCnt ||\n                        max_scan_num > c->h.maxScanNum)) {\n      // Create a new one, if failed, we can reuse the old one\n      auto filter = new (std::nothrow) ailego::BloomFilter<VisitBloomFilter::N>(\n          max_scan_num, c->filter->probability());\n      if (ailego_unlikely(filter == nullptr)) {\n        LOG_ERROR(\"reset bloomfilter failed, maxScanNum %zu prob %f\",\n                  (size_t)max_scan_num, c->filter->probability());\n        c->filter->clear();\n        return false;\n      }\n\n      delete c->filter;\n      c->filter = filter;\n      c->h.maxScanNum = max_scan_num;\n      c->h.maxDocCnt = maxDocCnt;\n      genRandomHashBits(c);\n    }\n    return true;\n  }\n\n  inline static void genRandomHashBits(Context *c) {\n    std::uniform_int_distribution<int> dt(0, c->h.maxDocCnt);\n    for (size_t i = 0; i < sizeof(c->offset) / sizeof(c->offset[0]); ++i) {\n      int r = dt(c->mt);\n      size_t j = 0;\n      do {  // gen distinct number\n        for (j = 0; j < i; ++j) {\n          if (c->offset[j] == r) {\n            r = dt(c->mt);\n            break;\n          }\n        }\n      } while (j < i);\n      c->offset[i] = r;\n    }\n    std::sort(c->offset, c->offset + N);\n  }\n\n  template <class... T>\n  static int init(Context *, void **ctx, uint64_t maxDocCnt,\n                  uint64_t maxScanNum, std::tuple<T...> &&tpl) {\n    Context *c = new (std::nothrow) Context;\n    if (c == nullptr) {\n      LOG_ERROR(\"New memory in initVisitBitMap failed\");\n      return IndexError_NoMemory;\n    }\n    c->h.maxDocCnt = maxDocCnt;\n    c->h.maxScanNum = maxScanNum;\n    float p =\n        std::get<PROXIMA_HNSW_VISITFILTER_CUSTOM_PARAMS_INDEX_NEGPROB>(tpl);\n    c->filter = new (std::nothrow)\n        ailego::BloomFilter<VisitBloomFilter::N>(maxScanNum, p);\n    if (c->filter == nullptr) {\n      LOG_ERROR(\"New BloomFilter failed, reuse old one\");\n      return IndexError_NoMemory;\n    }\n    genRandomHashBits(c);\n    *ctx = c;\n    return 0;\n  }\n\n  inline static void destroy(Context *c) {\n    delete c->filter;\n    delete c;\n  }\n#undef BLOOM_FILTER_HASH_BITS_OFFSETS\n};  // end of VisitBloomFilter\n\nclass VisitBitMap {\n public:\n  static constexpr int mode = 2;\n\n  struct Context {\n    VisitFilterHeader h;\n    ailego::BitsetHelper bitset;\n    char *buf{nullptr};\n  };\n\n  VisitBitMap() = delete;\n\n  inline static void set_visited(Context *c, id_t idx) {\n    c->bitset.set(idx);\n    return;\n  }\n\n  inline static void *get_visited(Context *c, id_t idx) {\n    return &c->buf[idx >> 3];\n  }\n\n  inline static bool visited(Context *c, id_t idx) {\n    return c->bitset.test(idx);\n  }\n\n  inline static int set_max_scan_num(Context *c, uint64_t maxScanNum) {\n    c->h.maxScanNum = maxScanNum;\n    return 0;\n  }\n\n  inline static void clear(Context *c) {\n    c->bitset.clear();\n    return;\n  }\n\n  inline static bool reset(Context *c, uint64_t maxDocCnt,\n                           uint64_t maxScanNum) {\n    if (ailego_unlikely(maxDocCnt > c->h.maxDocCnt ||\n                        maxScanNum > c->h.maxScanNum)) {\n      uint64_t len = ((maxDocCnt + 31) >> 5) << 2;  // round to uint32_t\n      auto buf = new (std::nothrow) char[len];\n      if (buf == nullptr) {\n        LOG_ERROR(\"New memory in initVisitBitMap failed\");\n        c->bitset.clear();\n        return false;\n      }\n\n      c->h.maxDocCnt = maxDocCnt;\n      c->h.maxScanNum = maxScanNum;\n      delete[] c->buf;\n      c->buf = buf;\n      memset(c->buf, 0, len);\n      c->bitset.mount(c->buf, len);\n    }\n    return true;\n  }\n\n  template <class... T>\n  static int init(Context *, void **ctx, uint64_t maxDocCnt,\n                  uint64_t maxScanNum, std::tuple<T...> &&tpl) {\n    (void)tpl;  // unused warning\n    Context *c = new (std::nothrow) Context;\n    if (c == nullptr) {\n      LOG_ERROR(\"New memory in initVisitBitMap failed\");\n      return IndexError_NoMemory;\n    }\n    c->h.maxDocCnt = maxDocCnt;\n    c->h.maxScanNum = maxScanNum;\n    uint64_t len = ((maxDocCnt + 31) >> 5) << 2;  // round to uint32_t\n    c->buf = new (std::nothrow) char[len];\n    if (c->buf == nullptr) {\n      LOG_ERROR(\"New memory in initVisitBitMap failed, reuse old one\");\n      delete c;\n      return IndexError_NoMemory;\n    }\n    memset(c->buf, 0, len);\n    c->bitset.mount(c->buf, len);\n    *ctx = c;\n    return 0;\n  }\n\n  inline static void destroy(Context *c) {\n    delete[] c->buf;\n    delete c;\n  }\n};  // end of VisitBitMap\n\nclass VisitByteMap {\n public:\n  static constexpr int mode = 3;\n  struct Context {\n    VisitFilterHeader h;\n    uint8_t curNum{0};\n    std::vector<uint8_t> buf;\n  };\n\n  VisitByteMap() = delete;\n\n  inline static void set_visited(Context *c, id_t idx) {\n    if (ailego_unlikely(idx > c->h.maxDocCnt)) {\n      c->h.maxDocCnt = idx + 1024;  // reserved\n      c->buf.resize(c->h.maxDocCnt);\n    }\n    c->buf[idx] = c->curNum;\n    return;\n  }\n\n  inline static void *get_visited(Context *c, id_t idx) {\n    return c->buf.data() + idx;\n  }\n\n  inline static bool visited(Context *c, id_t idx) {\n    if (ailego_unlikely(idx > c->h.maxDocCnt)) {\n      return false;\n    }\n    return c->buf[idx] == c->curNum;\n  }\n\n  inline static int set_max_scan_num(Context *c, uint64_t maxScanNum) {\n    c->h.maxScanNum = maxScanNum;\n    return 0;\n  }\n\n  inline static void clear(Context *c) {\n    c->curNum++;\n    if (c->curNum == 0) {\n      memset(c->buf.data(), 0, c->h.maxDocCnt * sizeof(uint8_t));\n      c->curNum = 1;\n    }\n    return;\n  }\n\n  inline static bool reset(Context *c, uint64_t maxDocCnt,\n                           uint64_t maxScanNum) {\n    if (ailego_unlikely(maxDocCnt > c->h.maxDocCnt ||\n                        maxScanNum > c->h.maxScanNum)) {\n      try {\n        c->buf.resize(maxDocCnt);\n      } catch (const std::exception &e) {\n        LOG_ERROR(\"New memory in initVisitByteMap failed, reuse old one\");\n        return false;\n      }\n      memset(c->buf.data(), 0, maxDocCnt * sizeof(uint8_t));\n      c->curNum = 1;\n      c->h.maxDocCnt = maxDocCnt;\n      c->h.maxScanNum = maxScanNum;\n      return true;\n    }\n    return true;\n  }\n\n  template <class... T>\n  static int init(Context *, void **ctx, uint64_t maxDocCnt,\n                  uint64_t maxScanNum, std::tuple<T...> &&tpl) {\n    (void)tpl;  // unused warning\n    Context *c = new (std::nothrow) Context;\n    if (c == nullptr) {\n      LOG_ERROR(\"New memory in initVisitByteMap failed\");\n      return IndexError_NoMemory;\n    }\n    c->h.maxDocCnt = maxDocCnt;\n    c->h.maxScanNum = maxScanNum;\n    try {\n      c->buf.resize(maxDocCnt);\n    } catch (const std::exception &e) {\n      LOG_ERROR(\"New memory in initVisitByteMap failed\");\n      delete c;\n      return IndexError_NoMemory;\n    }\n    memset(c->buf.data(), 0, maxDocCnt * sizeof(uint8_t));\n    c->curNum = 1;\n    *ctx = c;\n    return 0;\n  }\n\n  inline static void destroy(Context *c) {\n    delete c;\n  }\n};  // end of VisitByteMap\n\n\n#define PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(cls, impl, ctx, ...) \\\n  case cls::mode:                                                 \\\n    return cls::impl(static_cast<cls::Context *>(ctx), ##__VA_ARGS__);\n\n#define PROXIMA_HNSW_VISITFILTER_CALL_IMPL(impl, ...)                  \\\n  switch (mode_) {                                                     \\\n    PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(VisitBloomFilter, impl, ctx_, \\\n                                         ##__VA_ARGS__)                \\\n    PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(VisitBitMap, impl, ctx_,      \\\n                                         ##__VA_ARGS__)                \\\n    PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(VisitByteMap, impl, ctx_,     \\\n                                         ##__VA_ARGS__)                \\\n  }\n\n\n// visit list will be called with high frequency,\n// so using switch instead of std::function or virtual class\n// funtion point, lambda, virtual class all cannot be inlined\nclass VisitFilter {\n public:\n  enum Mode {\n    Default = 0,\n    BloomFilter = VisitBloomFilter::mode,\n    BitMap = VisitBitMap::mode,\n    ByteMap = VisitByteMap::mode\n  };\n\n  VisitFilter() : mode_(0), ctx_(nullptr) {};\n\n  inline bool visited(id_t idx) {\n    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(visited, idx);\n    return true;  // place holder\n  }\n\n  inline void set_visited(id_t idx) {\n    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(set_visited, idx);\n  }\n\n  inline void *get_visited(id_t idx) {\n    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(get_visited, idx);\n    return nullptr;  // place holder\n  }\n\n  inline int set_max_scan_num(id_t idx) {\n    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(set_max_scan_num, idx);\n    return 0;  // place holder\n  }\n\n  inline void clear() {\n    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(clear);\n  }\n\n  inline bool reset(uint64_t maxDocCnt, uint64_t maxScanNum) {\n    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(reset, maxDocCnt, maxScanNum);\n    return true;\n  }\n\n  inline void destroy() {\n    if (ctx_ != nullptr) {\n      PROXIMA_HNSW_VISITFILTER_CALL_IMPL(destroy);\n    }\n  }\n\n  int init(int mode, uint64_t maxDocCnt, uint64_t maxScanNum,\n           float negativeProbability) {\n    mode_ = mode;\n    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(init, &ctx_, maxDocCnt, maxScanNum,\n                                       std::make_tuple(negativeProbability));\n    return 0;  // place holder\n  }\n\n  int get_mode(void) const {\n    return mode_;\n  }\n\n\n private:\n  VisitFilter(const VisitFilter &) = delete;\n  VisitFilter &operator=(const VisitFilter &) = delete;\n\n  int mode_{0U};  // custom data for each method\n  void *ctx_{nullptr};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_proto_library(\n  NAME zvec_proto STATIC\n  SRCS proto/*.proto\n  PROTOROOT ./\n)\n\ncc_directory(common)\ncc_directory(index)\ncc_directory(sqlengine)\n\nfile(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h)\n\ncc_library(\n  NAME zvec_db STATIC STRICT SRCS_NO_GLOB\n  SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc\n  INCS . ${CMAKE_CURRENT_BINARY_DIR}\n  PUBINCS ${PROJECT_ROOT_DIR}/src/include\n  LIBS \n    zvec_ailego\n    zvec_core\n    glog\n    roaring\n    rocksdb\n    antlr4\n    libprotobuf\n    Arrow::arrow_static\n    Arrow::arrow_compute\n    Arrow::arrow_dataset\n    Arrow::arrow_acero\n  DEPS zvec_proto\n  VERSION \"${PROXIMA_ZVEC_VERSION}\"\n)"
  },
  {
    "path": "src/db/collection.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <atomic>\n#include <cstdint>\n#include <memory>\n#include <mutex>\n#include <shared_mutex>\n#include <string>\n#include <variant>\n#include <vector>\n#include <ailego/io/file_lock.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/ailego/utility/file_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/db/collection.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/options.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include \"db/common/constants.h\"\n#include \"db/common/file_helper.h\"\n#include \"db/common/profiler.h\"\n#include \"db/common/typedef.h\"\n#include \"db/index/common/delete_store.h\"\n#include \"db/index/common/id_map.h\"\n#include \"db/index/common/index_filter.h\"\n#include \"db/index/common/version_manager.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/index/segment/segment_helper.h\"\n#include \"db/index/segment/segment_manager.h\"\n#include \"db/sqlengine/sqlengine.h\"\n\nnamespace zvec {\n\nenum class WriteMode : uint8_t {\n  UNDEFINED = 0,\n  INSERT,\n  UPDATE,\n  UPSERT,\n};\n\nCollection::~Collection() = default;\n\nclass CollectionImpl : public Collection {\n  friend class Collection;\n\n public:\n  explicit CollectionImpl(const std::string &path,\n                          const CollectionSchema &schema);\n\n  explicit CollectionImpl(const std::string &path);\n\n  ~CollectionImpl() override;\n\n private:\n  Status Open(const CollectionOptions &options);\n\n  Status Close();\n\n public:\n  Status Destroy() override;\n\n  Status Flush() override;\n\n  Result<std::string> Path() const override;\n\n  Result<CollectionStats> Stats() const override;\n\n  Result<CollectionSchema> Schema() const override;\n\n  Result<CollectionOptions> Options() const override;\n\n public:\n  Status CreateIndex(const std::string &column_name,\n                     const IndexParams::Ptr &index_params,\n                     const CreateIndexOptions &options) override;\n\n  Status DropIndex(const std::string &column_name) override;\n\n  Status Optimize(const OptimizeOptions &options) override;\n\n  Status AddColumn(const FieldSchema::Ptr &column_schema,\n                   const std::string &expression,\n                   const AddColumnOptions &options) override;\n\n  Status DropColumn(const std::string &column_name) override;\n\n  Status AlterColumn(\n      const std::string &column_name, const std::string &rename,\n      const FieldSchema::Ptr &new_column_schema = nullptr,\n      const AlterColumnOptions &options = AlterColumnOptions()) override;\n\n  Result<WriteResults> Insert(std::vector<Doc> &docs) override;\n\n  Result<WriteResults> Upsert(std::vector<Doc> &docs) override;\n\n  Result<WriteResults> Update(std::vector<Doc> &docs) override;\n\n  Result<WriteResults> Delete(const std::vector<std::string> &pks) override;\n\n  Status DeleteByFilter(const std::string &filter) override;\n\n  Result<DocPtrList> Query(const VectorQuery &query) const override;\n\n  Result<GroupResults> GroupByQuery(\n      const GroupByVectorQuery &query) const override;\n\n  Result<DocPtrMap> Fetch(const std::vector<std::string> &pks) const override;\n\n private:\n  void prepare_schema();\n\n  Status close_unsafe();\n\n  Status flush_unsafe();\n\n  Status create();\n\n  Status recovery();\n\n  Status create_idmap_and_delete_store();\n\n  Status recover_idmap_and_delete_store();\n\n  Status acquire_file_lock(bool create = false);\n\n  Status init_version_manager();\n\n  Status init_writing_segment();\n\n  bool need_switch_to_new_segment() const;\n\n  Status switch_to_new_segment_for_writing(\n      const CollectionSchema::Ptr &schema = nullptr);\n\n  Result<WriteResults> write_impl(std::vector<Doc> &docs, WriteMode mode);\n\n  std::vector<Segment::Ptr> get_all_segments() const;\n\n  std::vector<Segment::Ptr> get_all_persist_segments() const;\n\n  Segment::Ptr local_segment_by_doc_id(\n      uint64_t doc_id, const std::vector<Segment::Ptr> &segments) const;\n\n  SegmentID allocate_segment_id() {\n    return segment_id_allocator_.fetch_add(1);\n  }\n\n  SegmentID allocate_segment_id_for_tmp_segment() {\n    return tmp_segment_id_allocator_.fetch_add(1);\n  }\n\n  std::vector<SegmentTask::Ptr> build_compact_task(\n      const CollectionSchema::Ptr &schema,\n      const std::vector<Segment::Ptr> &segments, int concurrency,\n      const IndexFilter::Ptr filter);\n\n  Status execute_compact_task(std::vector<SegmentTask::Ptr> &tasks) const;\n\n  std::vector<SegmentTask::Ptr> build_create_vector_index_task(\n      const std::vector<Segment::Ptr> &segments, const std::string &column,\n      const IndexParams::Ptr &index_params, int concurrency);\n\n  std::vector<SegmentTask::Ptr> build_create_scalar_index_task(\n      const std::vector<Segment::Ptr> &segments, const std::string &column,\n      const IndexParams::Ptr &index_params, int concurrency);\n\n  std::vector<SegmentTask::Ptr> build_drop_vector_index_task(\n      const std::vector<Segment::Ptr> &segments, const std::string &column);\n\n  std::vector<SegmentTask::Ptr> build_drop_scalar_index_task(\n      const std::vector<Segment::Ptr> &segments, const std::string &column);\n\n  Status execute_tasks(std::vector<SegmentTask::Ptr> &tasks) const;\n\n private:\n  Status handle_upsert(Doc &doc);\n\n  Status handle_update(Doc &doc);\n\n  Status handle_insert(Doc &doc);\n\n  Status internal_fetch_by_doc(const Doc &doc, Doc::Ptr *doc_out);\n\n private:\n  // Helper functions for add/alter/drop column\n  Status validate(const std::string &column, const FieldSchema::Ptr &schema,\n                  const std::string &expression, const std::string &rename,\n                  ColumnOp op);\n\n private:\n  std::string path_;\n\n  bool destroyed_{false};\n\n  CollectionSchema::Ptr schema_;\n\n  CollectionOptions options_;\n\n  mutable std::shared_mutex schema_handle_mtx_;\n  mutable std::shared_mutex write_mtx_;\n\n  std::atomic<SegmentID> segment_id_allocator_;\n  std::atomic<SegmentID> tmp_segment_id_allocator_;\n\n  // writing segment\n  Segment::Ptr writing_segment_;\n  // non-writing segments, sort by doc_id range\n  SegmentManager::Ptr segment_manager_;\n\n  // latest version: std::vector<SegmentMeta>\n  VersionManager::Ptr version_manager_;\n\n  // file lock\n  ailego::File lock_file_;\n\n  IDMap::Ptr id_map_;\n  DeleteStore::Ptr delete_store_;\n\n  sqlengine::SQLEngine::Ptr sql_engine_;\n};\n\nResult<Collection::Ptr> Collection::CreateAndOpen(\n    const std::string &path, const CollectionSchema &schema,\n    const CollectionOptions &options) {\n  auto collection = std::make_shared<CollectionImpl>(path, schema);\n\n  auto s = collection->Open(options);\n  CHECK_RETURN_STATUS_EXPECTED(s);\n\n  return collection;\n}\n\nResult<Collection::Ptr> Collection::Open(const std::string &path,\n                                         const CollectionOptions &options) {\n  auto collection = std::make_shared<CollectionImpl>(path);\n\n  auto s = collection->Open(options);\n  CHECK_RETURN_STATUS_EXPECTED(s);\n\n  return collection;\n}\n\nCollectionImpl::CollectionImpl(const std::string &path,\n                               const CollectionSchema &schema)\n    : path_(path), schema_(std::make_shared<CollectionSchema>(schema)) {\n  prepare_schema();\n}\n\nvoid CollectionImpl::prepare_schema() {\n  // set default index params for vector fields\n  for (auto &field : schema_->fields()) {\n    if (field->is_vector_field()) {\n      if (field->index_params() == nullptr) {\n        field->set_index_params(DefaultVectorIndexParams);\n      }\n    }\n  }\n}\n\nCollectionImpl::CollectionImpl(const std::string &path) : path_(path) {}\n\nCollectionImpl::~CollectionImpl() {\n  if (!destroyed_) {\n    Close();\n  }\n}\n\nStatus CollectionImpl::Open(const CollectionOptions &options) {\n  options_ = options;\n\n  if (schema_ != nullptr && options_.read_only_) {\n    return Status::InvalidArgument(\n        \"Unable to create collection with read-only mode.\");\n  }\n\n  Status s;\n  if (schema_ == nullptr) {\n    // recovery from disk\n    s = recovery();\n  } else {\n    // create new collection with existing schema\n    s = create();\n  }\n\n  auto profiler = std::make_shared<Profiler>();\n  sql_engine_ = sqlengine::SQLEngine::create(profiler);\n\n  return s;\n}\n\nStatus CollectionImpl::Close() {\n  // only called in deconstructor\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  return close_unsafe();\n}\n\nStatus CollectionImpl::close_unsafe() {\n  // flush\n  if (!options_.read_only_) {\n    auto s = flush_unsafe();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  // reset\n  writing_segment_.reset();\n  segment_manager_.reset();\n  version_manager_.reset();\n  id_map_.reset();\n  delete_store_.reset();\n\n  lock_file_.close();\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::Destroy() {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  auto s = close_unsafe();\n  CHECK_RETURN_STATUS(s);\n\n  ailego::FileHelper::RemoveDirectory(path_.c_str());\n\n  destroyed_ = true;\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::Flush() {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::lock_guard lock(schema_handle_mtx_);\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  return flush_unsafe();\n}\n\nStatus CollectionImpl::flush_unsafe() {\n  if (!writing_segment_) {\n    return Status::InternalError(\n        \"flush writing segment failed because writing segment is nullptr\");\n  }\n  return writing_segment_->flush();\n}\n\nResult<std::string> CollectionImpl::Path() const {\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  return path_;\n}\n\nResult<CollectionStats> CollectionImpl::Stats() const {\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  auto segments = get_all_segments();\n\n  CollectionStats stats;\n  auto vector_fields = schema_->vector_fields();\n  if (segments.empty()) {\n    stats.doc_count = 0;\n    for (auto &field : vector_fields) {\n      stats.index_completeness[field->name()] =\n          1;  // if no doc, completeness is 1\n    }\n    return stats;\n  }\n\n  for (auto &segment : segments) {\n    stats.doc_count += segment->doc_count(delete_store_->make_filter());\n  }\n\n  for (auto &field : vector_fields) {\n    if (stats.doc_count == 0) {\n      stats.index_completeness[field->name()] = 1;\n      continue;\n    }\n\n    uint32_t indexed_doc_count{0};\n    for (auto &segment : segments) {\n      if (segment->meta()->vector_indexed(field->name())) {\n        indexed_doc_count += segment->doc_count(delete_store_->make_filter());\n      }\n    }\n    stats.index_completeness[field->name()] =\n        indexed_doc_count * 1.0 / stats.doc_count;\n  }\n\n  return stats;\n}\n\nResult<CollectionSchema> CollectionImpl::Schema() const {\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  return *schema_;\n}\n\nResult<CollectionOptions> CollectionImpl::Options() const {\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  return options_;\n}\n\nStatus CollectionImpl::CreateIndex(const std::string &column_name,\n                                   const IndexParams::Ptr &index_params,\n                                   const CreateIndexOptions &options) {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  auto new_schema = std::make_shared<CollectionSchema>(*schema_);\n  auto s = new_schema->add_index(column_name, index_params);\n  CHECK_RETURN_STATUS(s);\n  s = new_schema->validate();\n  CHECK_RETURN_STATUS(s);\n\n  auto field = schema_->get_field(column_name);\n  if (field->index_params() != nullptr &&\n      *field->index_params() == *index_params) {\n    // equal index params\n    return Status::OK();\n  }\n\n  // forbidden writing until index is ready\n  std::lock_guard write_lock(write_mtx_);\n\n  Version new_version = version_manager_->get_current_version();\n\n  if (writing_segment_->doc_count() > 0) {\n    s = writing_segment_->dump();\n    CHECK_RETURN_STATUS(s);\n\n    s = segment_manager_->add_segment(writing_segment_);\n    CHECK_RETURN_STATUS(s);\n\n    auto seg_options =\n        SegmentOptions{false, options_.enable_mmap_, options_.max_buffer_size_};\n    auto new_segment = Segment::CreateAndOpen(\n        path_, *new_schema, allocate_segment_id(),\n        writing_segment_->meta()->max_doc_id() + 1, id_map_, delete_store_,\n        version_manager_, seg_options);\n    if (!new_segment) {\n      return new_segment.error();\n    }\n\n    s = new_version.add_persisted_segment_meta(writing_segment_->meta());\n    CHECK_RETURN_STATUS(s);\n\n    writing_segment_ = new_segment.value();\n    new_version.set_next_segment_id(segment_id_allocator_.load());\n\n  } else {\n    // TODO: allocate new segment id and clear current writing segment at last\n    // recreate writing segment\n    s = writing_segment_->destroy();\n    CHECK_RETURN_STATUS(s);\n    auto id = writing_segment_->id();\n    auto min_doc_id = writing_segment_->meta()->min_doc_id();\n    writing_segment_.reset();\n    SegmentOptions seg_options;\n    seg_options.enable_mmap_ = options_.enable_mmap_;\n    seg_options.max_buffer_size_ = options_.max_buffer_size_;\n    seg_options.read_only_ = options_.read_only_;\n    auto writing_segment =\n        Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,\n                               delete_store_, version_manager_, seg_options);\n    if (!writing_segment) {\n      return writing_segment.error();\n    }\n    writing_segment_ = writing_segment.value();\n  }\n  new_version.reset_writing_segment_meta(writing_segment_->meta());\n\n  // get_all_segment will return writing segment if it has docs\n  auto persist_segments = get_all_persist_segments();\n\n  bool is_vector_field = field->is_vector_field();\n\n  std::vector<SegmentTask::Ptr> tasks;\n  if (is_vector_field) {\n    tasks = build_create_vector_index_task(persist_segments, column_name,\n                                           index_params, options.concurrency_);\n\n  } else {\n    tasks = build_create_scalar_index_task(persist_segments, column_name,\n                                           index_params, options.concurrency_);\n  }\n\n  if (tasks.empty()) {\n    new_version.set_schema(*new_schema);\n\n    s = version_manager_->apply(new_version);\n    CHECK_RETURN_STATUS(s);\n\n    // persist manifest\n    s = version_manager_->flush();\n    CHECK_RETURN_STATUS(s);\n\n    schema_ = new_schema;\n    return Status::OK();\n  }\n\n  s = execute_tasks(tasks);\n  CHECK_RETURN_STATUS(s);\n\n  new_version.set_schema(*new_schema);\n\n  for (auto &task : tasks) {\n    auto task_info = task->task_info();\n\n    if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {\n      auto create_index_task = std::get<CreateVectorIndexTask>(task_info);\n      s = new_version.update_persisted_segment_meta(\n          create_index_task.output_segment_meta_);\n    } else if (std::holds_alternative<CreateScalarIndexTask>(task_info)) {\n      auto create_index_task = std::get<CreateScalarIndexTask>(task_info);\n      s = new_version.update_persisted_segment_meta(\n          create_index_task.output_segment_meta_);\n    }\n    CHECK_RETURN_STATUS(s);\n  }\n\n  // 2. update version\n  s = version_manager_->apply(new_version);\n  CHECK_RETURN_STATUS(s);\n\n  // 3. persist version\n  s = version_manager_->flush();\n  CHECK_RETURN_STATUS(s);\n\n  // 4. remove old segments or block\n  for (auto &task : tasks) {\n    auto task_info = task->task_info();\n\n    if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {\n      auto create_index_task = std::get<CreateVectorIndexTask>(task_info);\n      s = create_index_task.input_segment_->reload_vector_index(\n          *new_schema, create_index_task.output_segment_meta_,\n          create_index_task.output_vector_indexers_,\n          create_index_task.output_quant_vector_indexers_);\n    } else if (std::holds_alternative<CreateScalarIndexTask>(task_info)) {\n      auto create_index_task = std::get<CreateScalarIndexTask>(task_info);\n      s = create_index_task.input_segment_->reload_scalar_index(\n          *new_schema, create_index_task.output_segment_meta_,\n          create_index_task.output_scalar_indexer_);\n    }\n    CHECK_RETURN_STATUS(s);\n  }\n\n  schema_ = new_schema;\n\n  return Status::OK();\n}\n\nstd::vector<SegmentTask::Ptr> CollectionImpl::build_create_vector_index_task(\n    const std::vector<Segment::Ptr> &segments, const std::string &column,\n    const IndexParams::Ptr &index_params, int concurrency) {\n  std::vector<SegmentTask::Ptr> tasks;\n  for (auto &segment : segments) {\n    if (!segment->vector_index_ready(column, index_params)) {\n      tasks.push_back(SegmentTask::CreateCreateVectorIndexTask(\n          CreateVectorIndexTask{segment, column, index_params, concurrency}));\n    }\n  }\n  return tasks;\n}\n\nstd::vector<SegmentTask::Ptr> CollectionImpl::build_create_scalar_index_task(\n    const std::vector<Segment::Ptr> &segments, const std::string &column,\n    const IndexParams::Ptr &index_params, int concurrency) {\n  std::vector<SegmentTask::Ptr> tasks;\n  for (auto &segment : segments) {\n    tasks.push_back(SegmentTask::CreateCreateScalarIndexTask(\n        CreateScalarIndexTask{segment, {column}, index_params, concurrency}));\n  }\n  return tasks;\n}\n\nStatus CollectionImpl::execute_tasks(\n    std::vector<SegmentTask::Ptr> &tasks) const {\n  Status s;\n  for (auto &task : tasks) {\n    s = SegmentHelper::Execute(task);\n    if (!s.ok()) {\n      return s;\n    }\n  }\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::DropIndex(const std::string &column_name) {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  std::lock_guard lock(schema_handle_mtx_);\n\n  auto new_schema = std::make_shared<CollectionSchema>(*schema_);\n  auto s = new_schema->drop_index(column_name);\n  CHECK_RETURN_STATUS(s);\n\n  auto field = schema_->get_field(column_name);\n  if (field->index_params() == nullptr) {\n    return Status::OK();  // return ok if not indexed\n  }\n\n  if (field->is_vector_field() &&\n      *field->index_params() == DefaultVectorIndexParams) {\n    return Status::OK();\n  }\n\n  // forbidden writing until index is ready\n  std::lock_guard write_lock(write_mtx_);\n\n  Version new_version = version_manager_->get_current_version();\n\n  bool is_vector_field = field->is_vector_field();\n\n  if (writing_segment_->doc_count() > 0) {\n    s = writing_segment_->dump();\n    CHECK_RETURN_STATUS(s);\n\n    s = segment_manager_->add_segment(writing_segment_);\n    CHECK_RETURN_STATUS(s);\n\n    auto new_segment =\n        Segment::CreateAndOpen(path_, *new_schema, allocate_segment_id(),\n                               writing_segment_->meta()->max_doc_id() + 1,\n                               id_map_, delete_store_, version_manager_,\n                               SegmentOptions{false, options_.enable_mmap_,\n                                              options_.max_buffer_size_});\n    if (!new_segment) {\n      return new_segment.error();\n    }\n\n    s = new_version.add_persisted_segment_meta(writing_segment_->meta());\n    CHECK_RETURN_STATUS(s);\n\n    writing_segment_ = new_segment.value();\n    new_version.set_next_segment_id(segment_id_allocator_.load());\n\n  } else {\n    // recreate writing segment\n    s = writing_segment_->destroy();\n    CHECK_RETURN_STATUS(s);\n    auto id = writing_segment_->id();\n    auto min_doc_id = writing_segment_->meta()->min_doc_id();\n    writing_segment_.reset();\n    SegmentOptions seg_options;\n    seg_options.enable_mmap_ = options_.enable_mmap_;\n    seg_options.max_buffer_size_ = options_.max_buffer_size_;\n    seg_options.read_only_ = options_.read_only_;\n    auto writing_segment =\n        Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,\n                               delete_store_, version_manager_, seg_options);\n    if (!writing_segment) {\n      return writing_segment.error();\n    }\n\n    writing_segment_ = writing_segment.value();\n  }\n  new_version.reset_writing_segment_meta(writing_segment_->meta());\n\n  auto persist_semgents = get_all_persist_segments();\n\n  std::vector<SegmentTask::Ptr> tasks;\n  if (is_vector_field) {\n    tasks = build_drop_vector_index_task(persist_semgents, column_name);\n  } else {\n    tasks = build_drop_scalar_index_task(persist_semgents, column_name);\n  }\n\n  if (tasks.empty()) {\n    new_version.set_schema(*new_schema);\n\n    s = version_manager_->apply(new_version);\n    CHECK_RETURN_STATUS(s);\n\n    // persist manifest\n    s = version_manager_->flush();\n    CHECK_RETURN_STATUS(s);\n\n    schema_ = new_schema;\n    return Status::OK();\n  }\n\n  s = execute_tasks(tasks);\n  CHECK_RETURN_STATUS(s);\n\n  new_version.set_schema(*new_schema);\n\n  for (auto &task : tasks) {\n    auto task_info = task->task_info();\n\n    if (std::holds_alternative<DropVectorIndexTask>(task_info)) {\n      auto drop_index_task = std::get<DropVectorIndexTask>(task_info);\n      s = new_version.update_persisted_segment_meta(\n          drop_index_task.output_segment_meta_);\n    } else if (std::holds_alternative<DropScalarIndexTask>(task_info)) {\n      auto drop_index_task = std::get<DropScalarIndexTask>(task_info);\n      s = new_version.update_persisted_segment_meta(\n          drop_index_task.output_segment_meta_);\n    }\n    CHECK_RETURN_STATUS(s);\n  }\n\n  s = version_manager_->apply(new_version);\n  CHECK_RETURN_STATUS(s);\n\n  // persist manifest\n  s = version_manager_->flush();\n  CHECK_RETURN_STATUS(s);\n\n  // 4. remove old segments or block\n  for (auto &task : tasks) {\n    auto task_info = task->task_info();\n\n    if (std::holds_alternative<DropVectorIndexTask>(task_info)) {\n      auto drop_index_task = std::get<DropVectorIndexTask>(task_info);\n      s = drop_index_task.input_segment_->reload_vector_index(\n          *new_schema, drop_index_task.output_segment_meta_,\n          drop_index_task.output_vector_indexers_);\n    } else if (std::holds_alternative<DropScalarIndexTask>(task_info)) {\n      auto drop_index_task = std::get<DropScalarIndexTask>(task_info);\n      s = drop_index_task.input_segment_->reload_scalar_index(\n          *new_schema, drop_index_task.output_segment_meta_,\n          drop_index_task.output_scalar_indexer_);\n    }\n    CHECK_RETURN_STATUS(s);\n  }\n\n  schema_ = new_schema;\n\n  return Status::OK();\n}\n\nstd::vector<SegmentTask::Ptr> CollectionImpl::build_drop_vector_index_task(\n    const std::vector<Segment::Ptr> &segments, const std::string &column) {\n  std::vector<SegmentTask::Ptr> tasks;\n  for (auto &segment : segments) {\n    tasks.emplace_back(SegmentTask::CreateDropVectorIndexTask(\n        DropVectorIndexTask{segment, column}));\n  }\n  return tasks;\n}\n\nstd::vector<SegmentTask::Ptr> CollectionImpl::build_drop_scalar_index_task(\n    const std::vector<Segment::Ptr> &segments, const std::string &column) {\n  std::vector<SegmentTask::Ptr> tasks;\n  for (auto &segment : segments) {\n    tasks.emplace_back(SegmentTask::CreateDropScalarIndexTask(\n        DropScalarIndexTask(segment, {column})));\n  }\n  return tasks;\n}\n\nStatus CollectionImpl::Optimize(const OptimizeOptions &options) {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::lock_guard lock(schema_handle_mtx_);\n  // when optimizing, schema operations(include another optimize) are not\n  // allowed\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  std::vector<Segment::Ptr> persist_segments;\n\n  {\n    // forbidden writing for a while\n    std::lock_guard write_lock(write_mtx_);\n\n    if (writing_segment_->doc_count() != 0) {\n      // flush and create new segment\n      auto s = switch_to_new_segment_for_writing();\n      if (!s.ok()) {\n        return s;\n      }\n    }\n\n    persist_segments =\n        get_all_persist_segments();  // will not return writing segment\n    // after leave this scope, writing action is allowed\n  }\n\n  if (persist_segments.size() == 0) {\n    // no need to optimize\n    return Status::OK();\n  }\n\n  // build segment compact task\n  auto delete_store_clone = delete_store_->clone();\n  auto tasks =\n      build_compact_task(schema_, persist_segments, options.concurrency_,\n                         delete_store_clone->make_filter());\n\n  // execute segment compact task\n  auto s = execute_compact_task(tasks);\n  CHECK_RETURN_STATUS(s);\n\n  {\n    // forbidden writing for updating version\n    // writing action may trigger updating version where confict occurs\n    std::lock_guard write_lock(write_mtx_);\n\n    Version new_version = version_manager_->get_current_version();\n\n    for (auto &task : tasks) {\n      auto task_info = task->task_info();\n\n      if (std::holds_alternative<CompactTask>(task_info)) {\n        auto compact_task = std::get<CompactTask>(task_info);\n\n        // 0. check if has output segment meta\n        if (compact_task.output_segment_meta_) {\n          // 1. rename built tmp segments\n          auto tmp_segment_id = compact_task.output_segment_id_;\n          auto tmp_segment_path =\n              FileHelper::MakeTempSegmentPath(path_, tmp_segment_id);\n\n          auto new_segment_id = allocate_segment_id();\n          auto new_segment_path =\n              FileHelper::MakeSegmentPath(path_, new_segment_id);\n\n          if (!FileHelper::MoveDirectory(tmp_segment_path, new_segment_path)) {\n            return Status::InternalError(\"move segment directory failed\");\n          }\n\n          // update output_segment_meta_'s segment id\n          compact_task.output_segment_meta_->set_id(new_segment_id);\n\n          s = new_version.add_persisted_segment_meta(\n              compact_task.output_segment_meta_);\n          CHECK_RETURN_STATUS(s);\n          new_version.set_next_segment_id(segment_id_allocator_.load());\n        }\n\n        for (auto input_segment : compact_task.input_segments_) {\n          s = new_version.remove_persisted_segment_meta(input_segment->id());\n          CHECK_RETURN_STATUS(s);\n        }\n      } else if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {\n        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);\n        s = new_version.update_persisted_segment_meta(\n            create_index_task.output_segment_meta_);\n        CHECK_RETURN_STATUS(s);\n      }\n    }\n\n    // 2. update version\n    s = version_manager_->apply(new_version);\n    CHECK_RETURN_STATUS(s);\n\n    // 3. persist version\n    s = version_manager_->flush();\n    CHECK_RETURN_STATUS(s);\n\n    // 4. remove old segments or block\n    for (auto &task : tasks) {\n      auto task_info = task->task_info();\n\n      if (std::holds_alternative<CompactTask>(task_info)) {\n        auto compact_task = std::get<CompactTask>(task_info);\n\n        if (compact_task.output_segment_meta_) {\n          auto new_segment =\n              Segment::Open(path_, *schema_, *compact_task.output_segment_meta_,\n                            id_map_, delete_store_, version_manager_,\n                            SegmentOptions{true, options_.enable_mmap_});\n          if (!new_segment.has_value()) {\n            return new_segment.error();\n          }\n          s = segment_manager_->add_segment(new_segment.value());\n          CHECK_RETURN_STATUS(s);\n        }\n\n        for (auto input_segment : compact_task.input_segments_) {\n          s = segment_manager_->destroy_segment(input_segment->id());\n          CHECK_RETURN_STATUS(s);\n        }\n      } else if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {\n        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);\n\n        s = create_index_task.input_segment_->reload_vector_index(\n            *schema_, create_index_task.output_segment_meta_,\n            create_index_task.output_vector_indexers_,\n            create_index_task.output_quant_vector_indexers_);\n        CHECK_RETURN_STATUS(s);\n      }\n    }\n  }\n\n  return Status::OK();\n}\n\nstd::vector<SegmentTask::Ptr> CollectionImpl::build_compact_task(\n    const CollectionSchema::Ptr &schema,\n    const std::vector<Segment::Ptr> &segments, int concurrency,\n    const IndexFilter::Ptr filter) {\n  std::vector<SegmentTask::Ptr> tasks;\n  if (segments.empty()) return tasks;\n\n  bool rebuild = false;\n  size_t current_doc_count = 0;\n  size_t current_actual_doc_count = 0;\n  for (auto &segment : segments) {\n    current_doc_count += segment->doc_count();\n    current_actual_doc_count += segment->doc_count(filter);\n  }\n  if (current_actual_doc_count <\n      current_doc_count * (1 - COMPACT_DELETE_RATIO_THRESHOLD)) {\n    // if delete ratio is large enough, rebuild\n    rebuild = true;\n  }\n\n  auto max_doc_count_per_segment = schema->max_doc_count_per_segment();\n\n  std::vector<Segment::Ptr> current_group;\n  current_doc_count = 0;\n  current_actual_doc_count = 0;\n\n  for (const auto &seg : segments) {\n    uint64_t doc_count = seg->doc_count();\n    uint64_t actual_doc_count = seg->doc_count(filter);\n\n    if (!current_group.empty()) {\n      SegmentTask::Ptr task;\n      bool skip_task{false};\n      if (rebuild) {\n        if (current_actual_doc_count + actual_doc_count >\n            max_doc_count_per_segment) {\n          // only create SegmentCompactTask when rebuild=true\n          task = SegmentTask::CreateComapctTask(\n              CompactTask{path_, schema, current_group,\n                          allocate_segment_id_for_tmp_segment(), filter,\n                          !options_.enable_mmap_, concurrency});\n        }\n      } else {\n        if (current_doc_count + doc_count > max_doc_count_per_segment) {\n          // check current_group size\n          if (current_group.size() == 1) {\n            task =\n                SegmentTask::CreateCreateVectorIndexTask(CreateVectorIndexTask{\n                    current_group[0], \"\", nullptr, concurrency});\n            skip_task = current_group[0]->all_vector_index_ready();\n          } else {\n            task = SegmentTask::CreateComapctTask(\n                CompactTask{path_, schema, current_group,\n                            allocate_segment_id_for_tmp_segment(), nullptr,\n                            !options_.enable_mmap_, concurrency});\n          }\n        }\n      }\n\n      if (task) {\n        current_group.clear();\n        current_doc_count = 0;\n        current_actual_doc_count = 0;\n        if (!skip_task) {\n          tasks.push_back(task);\n        }\n      }\n    }\n\n    current_group.push_back(seg);\n    current_doc_count += doc_count;\n    current_actual_doc_count += actual_doc_count;\n  }\n\n  if (current_group.size() > 0) {\n    SegmentTask::Ptr task;\n    if (current_group.size() == 1 && !rebuild) {\n      task = SegmentTask::CreateCreateVectorIndexTask(\n          CreateVectorIndexTask{current_group[0], \"\", nullptr, concurrency});\n    } else {\n      task = SegmentTask::CreateComapctTask(CompactTask{\n          path_, schema, current_group, allocate_segment_id_for_tmp_segment(),\n          rebuild ? filter : nullptr, !options_.enable_mmap_, concurrency});\n    }\n    tasks.push_back(task);\n  }\n\n  return tasks;\n}\n\nStatus CollectionImpl::execute_compact_task(\n    std::vector<SegmentTask::Ptr> &tasks) const {\n  Status s;\n  for (auto &task : tasks) {\n    s = SegmentHelper::Execute(task);\n    if (!s.ok()) {\n      return s;\n    }\n  }\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::validate(const std::string &column,\n                                const FieldSchema::Ptr &schema,\n                                const std::string &expression,\n                                const std::string &rename, ColumnOp op) {\n  auto check_data_type = [&](const FieldSchema *field) -> Status {\n    if (field->data_type() < DataType::INT32 ||\n        field->data_type() > DataType::DOUBLE) {\n      return Status::InvalidArgument(\n          \"Only support basic numeric data type [int32, int64, uint32, uint64, \"\n          \"float, double]: \",\n          field->to_string());\n    }\n    return Status::OK();\n  };\n\n  switch (op) {\n    case ColumnOp::ADD: {\n      if (schema == nullptr) {\n        return Status::InvalidArgument(\"Column schema is null\");\n      }\n\n      if (schema->name().empty()) {\n        return Status::InvalidArgument(\"Column name is empty\");\n      }\n      if (schema_->has_field(schema->name())) {\n        return Status::InvalidArgument(\"column already exists\");\n      }\n\n      auto s = schema->validate();\n      CHECK_RETURN_STATUS(s);\n\n      s = check_data_type(schema.get());\n      CHECK_RETURN_STATUS(s);\n\n      if (expression.empty() && !schema->nullable()) {\n        return Status::InvalidArgument(\n            \"Add column is not supported for non-nullable column\");\n      }\n\n      break;\n    }\n    case ColumnOp::ALTER: {\n      if (column.empty()) {\n        return Status::InvalidArgument(\"column name is empty\");\n      }\n\n      if (!schema_->has_field(column)) {\n        return Status::InvalidArgument(\"column \", column, \" not found\");\n      }\n\n      if (!rename.empty() && schema) {\n        return Status::InvalidArgument(\n            \"cannot specify both rename and new column schema\");\n      }\n\n      auto *old_field_schema = schema_->get_field(column);\n      auto s = check_data_type(old_field_schema);\n      CHECK_RETURN_STATUS(s);\n\n      if (!rename.empty()) {\n        // rename case\n        if (schema_->has_field(rename)) {\n          return Status::InvalidArgument(\"new column name \", rename,\n                                         \" already exists\");\n        }\n      } else {\n        // schema change case\n        if (!schema) {\n          return Status::InvalidArgument(\"New column schema is null\");\n        }\n\n        s = schema->validate();\n        CHECK_RETURN_STATUS(s);\n\n        if (schema->name().empty()) {\n          return Status::InvalidArgument(\"new column schema name is empty\");\n        }\n\n        if (!schema->nullable() && old_field_schema->nullable()) {\n          return Status::InvalidArgument(\n              \"new column schema is not nullable, but old column schema is \"\n              \"nullable\");\n        }\n\n        if (*old_field_schema == *schema) {\n          // equal schema\n          return Status::OK();\n        }\n\n        s = check_data_type(schema.get());\n        CHECK_RETURN_STATUS(s);\n      }\n\n      break;\n    }\n    case ColumnOp::DROP: {\n      if (!schema_->has_field(column)) {\n        return Status::InvalidArgument(\"Column not exists: \", column);\n      }\n\n      auto *old_field_schema = schema_->get_field(column);\n      auto s = check_data_type(old_field_schema);\n      CHECK_RETURN_STATUS(s);\n      break;\n    }\n    default:\n      break;\n  }\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::AddColumn(const FieldSchema::Ptr &column_schema,\n                                 const std::string &expression,\n                                 const AddColumnOptions &options) {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  // validate\n  auto s = validate(\"\", column_schema, expression, \"\", ColumnOp::ADD);\n  CHECK_RETURN_STATUS(s);\n\n  // forbidden writing until index is ready\n  std::lock_guard write_lock(write_mtx_);\n\n  auto new_schema = std::make_shared<CollectionSchema>(*schema_);\n  s = new_schema->add_field(column_schema);\n  CHECK_RETURN_STATUS(s);\n\n  if (writing_segment_->doc_count() > 0) {\n    s = switch_to_new_segment_for_writing();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  Version new_version = version_manager_->get_current_version();\n\n  // add column on segment manager\n  s = segment_manager_->add_column(column_schema, expression,\n                                   options.concurrency_);\n  CHECK_RETURN_STATUS(s);\n\n  // reset writing segment with new schema\n  auto id = writing_segment_->id();\n  auto min_doc_id = writing_segment_->meta()->min_doc_id();\n\n  s = writing_segment_->destroy();\n  CHECK_RETURN_STATUS(s);\n  writing_segment_.reset();\n\n  SegmentOptions seg_options;\n  seg_options.enable_mmap_ = options_.enable_mmap_;\n  seg_options.max_buffer_size_ = options_.max_buffer_size_;\n  seg_options.read_only_ = options_.read_only_;\n  auto writing_segment =\n      Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,\n                             delete_store_, version_manager_, seg_options);\n  if (!writing_segment) {\n    return writing_segment.error();\n  }\n  writing_segment_ = writing_segment.value();\n\n  // update new version\n  new_version.set_schema(*new_schema);\n  new_version.reset_writing_segment_meta(writing_segment_->meta());\n\n  auto new_segment_metas = segment_manager_->get_segments_meta();\n  for (auto meta : new_segment_metas) {\n    s = new_version.update_persisted_segment_meta(meta);\n    CHECK_RETURN_STATUS(s);\n  }\n\n  s = version_manager_->apply(new_version);\n  CHECK_RETURN_STATUS(s);\n\n  // persist manifest\n  s = version_manager_->flush();\n  CHECK_RETURN_STATUS(s);\n\n  schema_ = new_schema;\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::DropColumn(const std::string &column_name) {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  // validate\n  auto s = validate(column_name, nullptr, \"\", \"\", ColumnOp::DROP);\n  CHECK_RETURN_STATUS(s);\n\n  // forbidden writing until index is ready\n  std::lock_guard write_lock(write_mtx_);\n\n  auto new_schema = std::make_shared<CollectionSchema>(*schema_);\n  s = new_schema->drop_field(column_name);\n  CHECK_RETURN_STATUS(s);\n\n  if (writing_segment_->doc_count() > 0) {\n    s = switch_to_new_segment_for_writing();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  Version new_version = version_manager_->get_current_version();\n\n  // drop column on segment manager\n  s = segment_manager_->drop_column(column_name);\n  CHECK_RETURN_STATUS(s);\n\n  // reset writing segment with new schema\n  auto id = writing_segment_->id();\n  auto min_doc_id = writing_segment_->meta()->min_doc_id();\n\n  s = writing_segment_->destroy();\n  CHECK_RETURN_STATUS(s);\n  writing_segment_.reset();\n\n  SegmentOptions seg_options;\n  seg_options.enable_mmap_ = options_.enable_mmap_;\n  seg_options.max_buffer_size_ = options_.max_buffer_size_;\n  seg_options.read_only_ = options_.read_only_;\n  auto writing_segment =\n      Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,\n                             delete_store_, version_manager_, seg_options);\n  if (!writing_segment) {\n    return writing_segment.error();\n  }\n  writing_segment_ = writing_segment.value();\n\n  // update new version\n  new_version.set_schema(*new_schema);\n  new_version.reset_writing_segment_meta(writing_segment_->meta());\n\n  auto new_segment_metas = segment_manager_->get_segments_meta();\n  for (auto meta : new_segment_metas) {\n    s = new_version.update_persisted_segment_meta(meta);\n    CHECK_RETURN_STATUS(s);\n  }\n\n  s = version_manager_->apply(new_version);\n  CHECK_RETURN_STATUS(s);\n\n  // persist manifest\n  s = version_manager_->flush();\n  CHECK_RETURN_STATUS(s);\n\n  schema_ = new_schema;\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::AlterColumn(const std::string &column_name,\n                                   const std::string &rename,\n                                   const FieldSchema::Ptr &new_column_schema,\n                                   const AlterColumnOptions &options) {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::lock_guard lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  // validate\n  auto s =\n      validate(column_name, new_column_schema, \"\", rename, ColumnOp::ALTER);\n  CHECK_RETURN_STATUS(s);\n\n  // forbidden writing until index is ready\n  std::lock_guard write_lock(write_mtx_);\n\n  std::shared_ptr<FieldSchema> new_field_schema{nullptr};\n  if (!rename.empty()) {\n    new_field_schema =\n        std::make_shared<FieldSchema>(*schema_->get_field(column_name));\n    new_field_schema->set_name(rename);\n  } else {\n    new_field_schema = std::make_shared<FieldSchema>(*new_column_schema);\n  }\n\n  auto new_schema = std::make_shared<CollectionSchema>(*schema_);\n  s = new_schema->alter_field(column_name, new_field_schema);\n  CHECK_RETURN_STATUS(s);\n\n  if (writing_segment_->doc_count() > 0) {\n    s = switch_to_new_segment_for_writing();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  Version new_version = version_manager_->get_current_version();\n\n  // alter column on segment manager\n  s = segment_manager_->alter_column(column_name, new_field_schema,\n                                     options.concurrency_);\n  CHECK_RETURN_STATUS(s);\n\n  // reset writing segment with new schema\n  auto id = writing_segment_->id();\n  auto min_doc_id = writing_segment_->meta()->min_doc_id();\n\n  s = writing_segment_->destroy();\n  CHECK_RETURN_STATUS(s);\n  writing_segment_.reset();\n\n  SegmentOptions seg_options;\n  seg_options.enable_mmap_ = options_.enable_mmap_;\n  seg_options.max_buffer_size_ = options_.max_buffer_size_;\n  seg_options.read_only_ = options_.read_only_;\n  auto writing_segment =\n      Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,\n                             delete_store_, version_manager_, seg_options);\n  if (!writing_segment) {\n    return writing_segment.error();\n  }\n  writing_segment_ = writing_segment.value();\n\n  // update new version\n  new_version.set_schema(*new_schema);\n  new_version.reset_writing_segment_meta(writing_segment_->meta());\n\n  auto new_segment_metas = segment_manager_->get_segments_meta();\n  for (auto meta : new_segment_metas) {\n    s = new_version.update_persisted_segment_meta(meta);\n    CHECK_RETURN_STATUS(s);\n  }\n\n  s = version_manager_->apply(new_version);\n  CHECK_RETURN_STATUS(s);\n\n  // persist manifest\n  s = version_manager_->flush();\n  CHECK_RETURN_STATUS(s);\n\n  schema_ = new_schema;\n\n  return Status::OK();\n}\n\nResult<WriteResults> CollectionImpl::Insert(std::vector<Doc> &docs) {\n  return write_impl(docs, WriteMode::INSERT);\n}\n\nResult<WriteResults> CollectionImpl::Update(std::vector<Doc> &docs) {\n  return write_impl(docs, WriteMode::UPDATE);\n}\n\nResult<WriteResults> CollectionImpl::Upsert(std::vector<Doc> &docs) {\n  return write_impl(docs, WriteMode::UPSERT);\n}\n\nStatus CollectionImpl::internal_fetch_by_doc(const Doc &doc,\n                                             Doc::Ptr *doc_out) {\n  auto segments = get_all_segments();\n  uint64_t doc_id;\n  bool has = id_map_->has(doc.pk(), &doc_id);\n  if (!has) {\n    return Status::NotFound(\"Document not found\");\n  }\n  if (delete_store_->is_deleted(doc_id)) {\n    return Status::NotFound(\"Document already deleted\");\n  }\n\n  auto segment = local_segment_by_doc_id(doc_id, segments);\n  if (!segment) {\n    LOG_WARN(\"doc_id: %zu segment not found\", (size_t)doc_id);\n    return Status::InternalError(\"Segment not found\");\n  }\n\n  auto old_doc = segment->Fetch(doc_id);\n  if (!old_doc) {\n    LOG_WARN(\"doc_id: %zu fetch doc failed\", (size_t)doc_id);\n    return Status::InternalError(\"Fetch doc failed\");\n  }\n  *doc_out = old_doc;\n  return Status::OK();\n}\n\nStatus CollectionImpl::handle_upsert(Doc &doc) {\n  return writing_segment_->Upsert(doc);\n}\n\nStatus CollectionImpl::handle_update(Doc &doc) {\n  Doc::Ptr old_doc{nullptr};\n  auto s = internal_fetch_by_doc(doc, &old_doc);\n  CHECK_RETURN_STATUS(s);\n\n  old_doc->merge(doc);\n  return writing_segment_->Update(*old_doc);\n}\n\nStatus CollectionImpl::handle_insert(Doc &doc) {\n  return writing_segment_->Insert(doc);\n}\n\nResult<WriteResults> CollectionImpl::write_impl(std::vector<Doc> &docs,\n                                                WriteMode mode) {\n  CHECK_READONLY_RETURN_STATUS_EXPECTED();\n\n  std::shared_lock lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  for (auto &&doc : docs) {\n    auto validate = doc.validate(schema_, mode == WriteMode::UPDATE);\n    CHECK_RETURN_STATUS_EXPECTED(validate);\n  }\n\n  // TODO: The granularity of the write_lock is too coarse.\n  std::lock_guard write_lock(write_mtx_);\n\n  WriteResults results;\n  // validate write batch size\n  if (docs.size() > kMaxWriteBatchSize) {\n    CHECK_RETURN_STATUS_EXPECTED(Status::InvalidArgument(\"Too many docs\"));\n  }\n\n  // validate docs\n  for (auto &&doc : docs) {\n    if (need_switch_to_new_segment()) {\n      auto s = switch_to_new_segment_for_writing();\n      CHECK_RETURN_STATUS_EXPECTED(s);\n    }\n\n    Status s;\n\n    switch (mode) {\n      case WriteMode::UPSERT:\n        s = handle_upsert(doc);\n        break;\n      case WriteMode::UPDATE:\n        s = handle_update(doc);\n        break;\n      case WriteMode::INSERT:\n        s = handle_insert(doc);\n        break;\n      default:\n        s = Status::InvalidArgument(\"Invalid write mode\");\n    }\n\n    results.push_back(s);\n  }\n\n  return results;\n}\n\nbool CollectionImpl::need_switch_to_new_segment() const {\n  return writing_segment_->doc_count() >= schema_->max_doc_count_per_segment();\n}\n\nStatus CollectionImpl::switch_to_new_segment_for_writing(\n    const CollectionSchema::Ptr &schema) {\n  auto s = writing_segment_->dump();\n  CHECK_RETURN_STATUS(s);\n\n  s = segment_manager_->add_segment(writing_segment_);\n  CHECK_RETURN_STATUS(s);\n\n  // when create new segment, segment meta should create a first new block\n  // meta\n  auto new_segment = Segment::CreateAndOpen(\n      path_, schema == nullptr ? *schema_ : *schema, allocate_segment_id(),\n      writing_segment_->meta()->max_doc_id() + 1, id_map_, delete_store_,\n      version_manager_,\n      SegmentOptions{false, options_.enable_mmap_, options_.max_buffer_size_});\n  if (!new_segment) {\n    return new_segment.error();\n  }\n\n  Version version = version_manager_->get_current_version();\n  auto writing_segment_meta = writing_segment_->meta();\n  writing_segment_meta->remove_writing_forward_block();\n  s = version.add_persisted_segment_meta(writing_segment_meta);\n  CHECK_RETURN_STATUS(s);\n\n  writing_segment_ = new_segment.value();\n  version.reset_writing_segment_meta(writing_segment_->meta());\n  version.set_next_segment_id(segment_id_allocator_.load());\n\n  s = version_manager_->apply(version);\n  CHECK_RETURN_STATUS(s);\n  s = version_manager_->flush();\n  CHECK_RETURN_STATUS(s);\n\n  return Status::OK();\n}\n\nResult<WriteResults> CollectionImpl::Delete(\n    const std::vector<std::string> &pks) {\n  CHECK_READONLY_RETURN_STATUS_EXPECTED();\n\n  std::shared_lock lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  // TODO: The granularity of the write_lock is too coarse.\n  std::lock_guard write_lock(write_mtx_);\n  WriteResults results;\n  for (auto &&pk : pks) {\n    Status s = writing_segment_->Delete(pk);\n    results.push_back(s);\n  }\n\n  return results;\n}\n\nStatus CollectionImpl::DeleteByFilter(const std::string &filter) {\n  CHECK_COLLECTION_READONLY_RETURN_STATUS;\n\n  std::shared_lock lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);\n\n  auto segments = get_all_segments();\n\n  VectorQuery query;\n  query.filter_ = filter;\n  query.topk_ = INT32_MAX;\n  query.output_fields_ = std::vector<std::string>{};\n  query.include_doc_id_ = true;\n\n  auto ret = sql_engine_->execute(schema_, query, get_all_segments());\n  if (!ret.has_value()) {\n    return ret.error();\n  }\n\n  // TODO: The granularity of the write_lock is too coarse.\n  std::lock_guard write_lock(write_mtx_);\n  for (auto &doc : ret.value()) {\n    Status s = writing_segment_->Delete(doc->doc_id());\n    if (!s.ok()) {\n      LOG_ERROR(\"Delete doc_id failed\");\n      return s;\n    }\n  }\n\n  return Status::OK();\n}\n\nResult<DocPtrList> CollectionImpl::Query(const VectorQuery &query) const {\n  std::shared_lock lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  auto s = query.validate(schema_->get_vector_field(query.field_name_));\n  CHECK_RETURN_STATUS_EXPECTED(s);\n\n  auto segments = get_all_segments();\n  if (segments.empty()) {\n    return DocPtrList();\n  }\n\n  return sql_engine_->execute(schema_, query, segments);\n}\n\nResult<GroupResults> CollectionImpl::GroupByQuery(\n    const GroupByVectorQuery &query) const {\n  std::shared_lock lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  auto segments = get_all_segments();\n  if (segments.empty()) {\n    return GroupResults();\n  }\n\n  return sql_engine_->execute_group_by(schema_, query, segments);\n}\n\nResult<DocPtrMap> CollectionImpl::Fetch(\n    const std::vector<std::string> &pks) const {\n  std::shared_lock lock(schema_handle_mtx_);\n\n  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);\n\n  auto segments = get_all_segments();\n\n  DocPtrMap results;\n\n  for (auto &pk : pks) {\n    uint64_t doc_id;\n    bool has = id_map_->has(pk, &doc_id);\n    if (!has) {\n      results.insert({pk, nullptr});\n      continue;\n    }\n    if (delete_store_->is_deleted(doc_id)) {\n      results.insert({pk, nullptr});\n      continue;\n    }\n    auto segment = local_segment_by_doc_id(doc_id, segments);\n    if (!segment) {\n      LOG_WARN(\"doc_id: %zu segment not found\", (size_t)doc_id);\n      results.insert({pk, nullptr});\n      continue;\n    }\n    results.insert({pk, segment->Fetch(doc_id)});\n  }\n\n  return results;\n}\n\nStatus CollectionImpl::recovery() {\n  if (!FileHelper::DirectoryExists(path_.c_str())) {\n    return Status::InvalidArgument(\"collection path{\", path_, \"} not exist.\");\n  }\n\n  // get lock file\n  auto s = acquire_file_lock(false);\n  CHECK_RETURN_STATUS(s);\n\n  // recovery version first\n  auto version_manager = VersionManager::Recovery(path_);\n  if (!version_manager.has_value()) {\n    return version_manager.error();\n  }\n\n  version_manager_ = version_manager.value();\n  const auto v = version_manager_->get_current_version();\n  schema_ = std::make_shared<CollectionSchema>(v.schema());\n  options_.enable_mmap_ = v.enable_mmap();\n  s = recover_idmap_and_delete_store();\n  CHECK_RETURN_STATUS(s);\n\n  // recover persist segments\n  segment_manager_ = std::make_shared<SegmentManager>();\n\n  auto segment_metas = v.persisted_segment_metas();\n\n  SegmentOptions seg_options;\n  seg_options.read_only_ = true;\n  seg_options.enable_mmap_ = options_.enable_mmap_;\n  for (size_t i = 0; i < segment_metas.size(); ++i) {\n    auto segment = Segment::Open(path_, *schema_, *segment_metas[i], id_map_,\n                                 delete_store_, version_manager_, seg_options);\n    if (!segment) {\n      return segment.error();\n    }\n\n    segment_manager_->add_segment(segment.value());\n  }\n\n  seg_options.read_only_ = options_.read_only_;\n  seg_options.max_buffer_size_ = options_.max_buffer_size_;\n\n  // recover writing segment\n  auto writing_segment =\n      Segment::Open(path_, *schema_, *v.writing_segment_meta(), id_map_,\n                    delete_store_, version_manager_, seg_options);\n  if (!writing_segment) {\n    return writing_segment.error();\n  }\n\n  writing_segment_ = writing_segment.value();\n  segment_id_allocator_.store(v.next_segment_id());\n\n  // recover id map & delete store\n  return Status::OK();\n}\n\nStatus CollectionImpl::recover_idmap_and_delete_store() {\n  const auto v = version_manager_->get_current_version();\n\n  // idmap\n  std::string idmap_path =\n      FileHelper::MakeFilePath(path_, FileID::ID_FILE, v.id_map_path_suffix());\n  id_map_ = IDMap::CreateAndOpen(schema_->name(), idmap_path, false,\n                                 options_.read_only_);\n  if (!id_map_) {\n    return Status::InternalError(\"recovery idmap failed\");\n  }\n\n  // delete store\n  std::string delete_store_path = FileHelper::MakeFilePath(\n      path_, FileID::DELETE_FILE, v.delete_snapshot_path_suffix());\n  delete_store_ =\n      DeleteStore::CreateAndLoad(schema_->name(), delete_store_path);\n  if (!delete_store_) {\n    return Status::InternalError(\"recovery delete store failed\");\n  }\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::create() {\n  // check path\n  if (path_.empty()) {\n    return Status::InvalidArgument(\"path validate failed: path is empty\");\n  }\n  if (!std::regex_match(path_, COLLECTION_PATH_REGEX)) {\n    return Status::InvalidArgument(\"path validate failed: path[\", path_,\n                                   \"] cannot pass the regex verification\");\n  }\n  if (ailego::FileHelper::IsExist(path_.c_str())) {\n    return Status::InvalidArgument(\"path validate failed: path[\", path_,\n                                   \"] exists\");\n  }\n\n  // check schema\n  auto s = schema_->validate();\n  CHECK_RETURN_STATUS(s);\n\n  if (!ailego::FileHelper::MakePath(path_.c_str())) {\n    return Status::InvalidArgument(\"create collection path failed: \", path_,\n                                   \", error: \", strerror(errno));\n  }\n\n  // init lock file\n  s = acquire_file_lock(true);\n  CHECK_RETURN_STATUS(s);\n\n  // init idmap & delete store\n  s = create_idmap_and_delete_store();\n  CHECK_RETURN_STATUS(s);\n\n  // init version manager\n  s = init_version_manager();\n  CHECK_RETURN_STATUS(s);\n\n  // create segment\n  s = init_writing_segment();\n  CHECK_RETURN_STATUS(s);\n\n  // init version\n  Version version;\n  version.set_schema(*schema_);\n  version.set_enable_mmap(options_.enable_mmap_);\n  version.reset_writing_segment_meta(writing_segment_->meta());\n  version.set_id_map_path_suffix(0);\n  version.set_delete_snapshot_path_suffix(0);\n  version.set_next_segment_id(1);\n\n  version_manager_->apply(version);\n  s = version_manager_->flush();\n  CHECK_RETURN_STATUS(s);\n\n  segment_id_allocator_.store(1);\n  segment_manager_ = std::make_unique<SegmentManager>();\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::create_idmap_and_delete_store() {\n  // idmap\n  std::string idmap_path = FileHelper::MakeFilePath(path_, FileID::ID_FILE, 0);\n  id_map_ = IDMap::CreateAndOpen(schema_->name(), idmap_path, true,\n                                 options_.read_only_);\n  if (!id_map_) {\n    return Status::InternalError(\"create id map failed\");\n  }\n\n  std::string delete_store_path =\n      FileHelper::MakeFilePath(path_, FileID::DELETE_FILE, 0);\n  delete_store_ = std::make_shared<DeleteStore>(schema_->name());\n  // when first create collection, delete store will flush a empty snapshot\n  delete_store_->flush(delete_store_path);\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::init_version_manager() {\n  // use empty version to init version manager\n  auto version_manager = VersionManager::Create(path_, Version{});\n  if (!version_manager.has_value()) {\n    return version_manager.error();\n  }\n\n  version_manager_ = version_manager.value();\n  return Status::OK();\n}\n\nStatus CollectionImpl::init_writing_segment() {\n  SegmentOptions options;\n  options.enable_mmap_ = options_.enable_mmap_;\n  options.max_buffer_size_ = options_.max_buffer_size_;\n  options.read_only_ = options_.read_only_;\n\n  auto writing_segment = Segment::CreateAndOpen(\n      path_, *schema_, 0, 0, id_map_, delete_store_, version_manager_, options);\n\n  if (!writing_segment) {\n    return writing_segment.error();\n  }\n\n  writing_segment_ = writing_segment.value();\n\n  return Status::OK();\n}\n\nStatus CollectionImpl::acquire_file_lock(bool create) {\n  std::string lock_file_path = ailego::StringHelper::Concat(path_, \"/\", \"LOCK\");\n\n  if (create) {\n    if (!lock_file_.create(lock_file_path.c_str(), 0)) {\n      return Status::InternalError(\"Can't create lock file\");\n    }\n  } else {\n    if (!lock_file_.open(lock_file_path.c_str(), false)) {\n      return Status::InternalError(\"Can't open lock file\");\n    }\n  }\n\n  if (options_.read_only_) {\n    if (!ailego::FileLock::TryLockShared(lock_file_.native_handle())) {\n      return Status::InternalError(\"Can't lock read-only collection\");\n    }\n  } else {\n    if (!ailego::FileLock::TryLock(lock_file_.native_handle())) {\n      return Status::InternalError(\"Can't lock read-write collection\");\n    }\n  }\n\n  return Status::OK();\n}\n\nSegment::Ptr CollectionImpl::local_segment_by_doc_id(\n    uint64_t doc_id, const std::vector<Segment::Ptr> &segments) const {\n  size_t left = 0;\n  size_t right = segments.size();\n\n  while (left < right) {\n    size_t mid = left + (right - left) / 2;\n    uint64_t min_id = segments[mid]->meta()->min_doc_id();\n    uint64_t max_id = segments[mid]->meta()->max_doc_id();\n\n    if (doc_id < min_id) {\n      right = mid;\n    } else if (doc_id > max_id) {\n      left = mid + 1;\n    } else {\n      return segments[mid];\n    }\n  }\n\n  return nullptr;\n}\n\nstd::vector<Segment::Ptr> CollectionImpl::get_all_segments() const {\n  std::vector<Segment::Ptr> segments = get_all_persist_segments();\n  if (writing_segment_->doc_count() > 0) {\n    segments.push_back(writing_segment_);\n  }\n  return segments;\n}\n\nstd::vector<Segment::Ptr> CollectionImpl::get_all_persist_segments() const {\n  return segment_manager_->get_segments();\n}\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\n# proxima_zvec_store library\ncc_library(\n    NAME zvec_common STATIC STRICT ALWAYS_LINK\n    SRCS *.cc\n    LIBS    glog\n            zvec_ailego\n            roaring\n            rocksdb\n    INCS .\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n)\n"
  },
  {
    "path": "src/db/common/cgroup_util.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/common/cgroup_util.h\"\n\nnamespace zvec {\n\n// Static member definitions\nint CgroupUtil::cpu_cores_ = 0;\nuint64_t CgroupUtil::memory_limit_ = 0;\nbool CgroupUtil::initialized_ = false;\nunsigned long long CgroupUtil::last_idle_time_ = 0;\nunsigned long long CgroupUtil::last_total_time_ = 0;\nstd::chrono::steady_clock::time_point CgroupUtil::last_cpu_check_;\n\n#define ZVEC_CGROUP_MEMORY_UNLIMITED (9223372036854771712ULL)\n\n// Static initialization method\nvoid CgroupUtil::initialize() {\n  if (initialized_) {\n    return;\n  }\n\n  updateCpuCores();\n  updateMemoryLimit();\n  initializeCpuStats();\n\n  initialized_ = true;\n}\n\nint CgroupUtil::getCpuLimit() {\n  initialize();\n  return cpu_cores_;\n}\n\nuint64_t CgroupUtil::getMemoryLimit() {\n  initialize();\n  return memory_limit_;\n}\n\n// Other static methods implementation\ndouble CgroupUtil::getCpuUsage() {\n  initialize();\n  return calculateCpuUsage();\n}\n\nuint64_t CgroupUtil::getMemoryUsage() {\n  initialize();\n  return getCurrentMemoryUsage();\n}\n\nuint64_t CgroupUtil::getUptime() {\n#if defined(PLATFORM_LINUX)\n  struct sysinfo info;\n  if (sysinfo(&info) == 0) {\n    return info.uptime;\n  }\n#elif defined(PLATFORM_MACOS)\n  struct timeval boottime;\n  size_t len = sizeof(boottime);\n  int mib[2] = {CTL_KERN, KERN_BOOTTIME};\n  if (sysctl(mib, 2, &boottime, &len, NULL, 0) == 0) {\n    time_t bsec = boottime.tv_sec;\n    time_t csec = time(NULL);\n    return csec - bsec;\n  }\n#endif\n  return 0;\n}\n\nvoid CgroupUtil::updateCpuCores() {\n  if (readCpuCgroup()) {\n    return;\n  }\n\n#if defined(PLATFORM_MACOS)\n  int cores;\n  size_t len = sizeof(cores);\n  if (sysctlbyname(\"hw.ncpu\", &cores, &len, nullptr, 0) == 0) {\n    cpu_cores_ = cores;\n  } else {\n    cpu_cores_ = 1;\n  }\n#elif defined(PLATFORM_LINUX)\n  cpu_cores_ = sysconf(_SC_NPROCESSORS_ONLN);\n  if (cpu_cores_ <= 0) {\n    cpu_cores_ = 1;\n  }\n#endif\n}\n\nbool CgroupUtil::readCpuCgroup() {\n#if defined(PLATFORM_LINUX)\n  // cgroup v2\n  std::ifstream file(\"/sys/fs/cgroup/cpu.max\");\n  if (file.is_open()) {\n    uint64_t quota, period;\n    char slash;\n    file >> quota >> slash >> period;\n    file.close();\n\n    if (quota != std::numeric_limits<uint64_t>::max() && quota != 0 &&\n        period > 0) {\n      cpu_cores_ =\n          static_cast<int>(std::ceil(static_cast<double>(quota) / period));\n      return true;\n    } else {\n      return false;\n    }\n  }\n\n  // cgroup v1\n  std::ifstream quota_file(\"/sys/fs/cgroup/cpu/cpu.cfs_quota_us\");\n  std::ifstream period_file(\"/sys/fs/cgroup/cpu/cpu.cfs_period_us\");\n\n  if (quota_file.is_open() && period_file.is_open()) {\n    long long quota, period;\n    quota_file >> quota;\n    period_file >> period;\n    quota_file.close();\n    period_file.close();\n\n    if (quota > 0 && period > 0) {\n      cpu_cores_ =\n          static_cast<int>(std::ceil(static_cast<double>(quota) / period));\n      return true;\n    }\n  }\n#endif\n  return false;\n}\n\nvoid CgroupUtil::updateMemoryLimit() {\n  if (readMemoryCgroup()) {\n    return;\n  }\n\n#if defined(PLATFORM_MACOS)\n  uint64_t mem;\n  size_t len = sizeof(mem);\n  if (sysctlbyname(\"hw.memsize\", &mem, &len, nullptr, 0) == 0) {\n    memory_limit_ = mem;\n  } else {\n    memory_limit_ = 0;\n  }\n#elif defined(PLATFORM_LINUX)\n  long pages = sysconf(_SC_PHYS_PAGES);\n  long page_size = sysconf(_SC_PAGE_SIZE);\n  if (pages > 0 && page_size > 0) {\n    memory_limit_ = static_cast<uint64_t>(pages) * page_size;\n  } else {\n    memory_limit_ = 0;\n  }\n#endif\n}\n\nbool CgroupUtil::readMemoryCgroup() {\n#if defined(PLATFORM_LINUX)\n  // cgroup v2\n  std::ifstream file(\"/sys/fs/cgroup/memory.max\");\n  if (file.is_open()) {\n    uint64_t limit;\n    file >> limit;\n    file.close();\n\n    if (limit != std::numeric_limits<uint64_t>::max() && limit != 0 &&\n        limit != ZVEC_CGROUP_MEMORY_UNLIMITED) {\n      memory_limit_ = limit;\n      return true;\n    } else {\n      return false;\n    }\n  }\n\n  // cgroup v1\n  std::ifstream v1_file(\"/sys/fs/cgroup/memory/memory.limit_in_bytes\");\n  if (v1_file.is_open()) {\n    uint64_t limit;\n    v1_file >> limit;\n    v1_file.close();\n\n    if (limit < std::numeric_limits<uint64_t>::max() &&\n        limit != ZVEC_CGROUP_MEMORY_UNLIMITED) {\n      memory_limit_ = limit;\n      return true;\n    }\n  }\n#endif\n  return false;\n}\n\nvoid CgroupUtil::initializeCpuStats() {\n  last_cpu_check_ = std::chrono::steady_clock::now();\n#if defined(PLATFORM_LINUX)\n  readProcStat();\n#endif\n}\n\n#if defined(PLATFORM_LINUX)\nbool CgroupUtil::readProcStat() {\n  std::ifstream file(\"/proc/stat\");\n  if (!file.is_open()) {\n    return false;\n  }\n\n  std::string line;\n  if (!std::getline(file, line)) {\n    return false;\n  }\n\n  std::istringstream iss(line);\n  std::string cpu_label;\n  iss >> cpu_label;\n\n  if (cpu_label != \"cpu\") {\n    return false;\n  }\n\n  unsigned long long user, nice, system, idle, iowait, irq, softirq, steal;\n  iss >> user >> nice >> system >> idle >> iowait >> irq >> softirq >> steal;\n\n  unsigned long long idle_time = idle + iowait;\n  unsigned long long total_time =\n      user + nice + system + irq + softirq + steal + idle_time;\n\n  last_idle_time_ = idle_time;\n  last_total_time_ = total_time;\n\n  return true;\n}\n#endif\n\nuint64_t CgroupUtil::getCurrentMemoryUsage() {\n#if defined(PLATFORM_LINUX)\n  // cgroup\n  uint64_t usage = readMemoryUsageCgroup();\n  if (usage > 0) {\n    return usage;\n  }\n\n  // back to /proc/meminfo\n  return readMemoryUsageProc();\n#elif defined(PLATFORM_MACOS)\n  return getMacOSMemoryUsage();\n#endif\n}\n\n#if defined(PLATFORM_LINUX)\nuint64_t CgroupUtil::readMemoryUsageCgroup() {\n  // cgroup v2\n  std::ifstream file(\"/sys/fs/cgroup/memory.current\");\n  if (file.is_open()) {\n    uint64_t usage;\n    file >> usage;\n    file.close();\n    return usage;\n  }\n\n  // cgroup v1\n  std::ifstream v1_file(\"/sys/fs/cgroup/memory/memory.usage_in_bytes\");\n  if (v1_file.is_open()) {\n    uint64_t usage;\n    v1_file >> usage;\n    v1_file.close();\n    return usage;\n  }\n\n  return 0;\n}\n\nuint64_t CgroupUtil::readMemoryUsageProc() {\n  std::ifstream file(\"/proc/meminfo\");\n  if (!file.is_open()) {\n    return 0;\n  }\n\n  std::string line;\n  uint64_t total_mem = 0;\n  uint64_t free_mem = 0;\n  uint64_t available_mem = 0;\n  uint64_t buffers = 0;\n  uint64_t cached = 0;\n\n  while (std::getline(file, line)) {\n    if (line.find(\"MemTotal:\") == 0) {\n      total_mem = extractMemoryValue(line);\n    } else if (line.find(\"MemFree:\") == 0) {\n      free_mem = extractMemoryValue(line);\n    } else if (line.find(\"MemAvailable:\") == 0) {\n      available_mem = extractMemoryValue(line);\n    } else if (line.find(\"Buffers:\") == 0) {\n      buffers = extractMemoryValue(line);\n    } else if (line.find(\"Cached:\") == 0) {\n      cached = extractMemoryValue(line);\n    }\n  }\n\n  if (available_mem > 0 && total_mem > available_mem) {\n    return total_mem - available_mem;\n  }\n\n  if (total_mem > 0 && free_mem > 0) {\n    return total_mem - free_mem - buffers - cached;\n  }\n\n  return 0;\n}\n#endif\n\n#if defined(PLATFORM_MACOS)\nuint64_t CgroupUtil::getMacOSMemoryUsage() {\n  mach_port_t host_port = mach_host_self();\n  mach_msg_type_number_t host_size =\n      sizeof(vm_statistics64_data_t) / sizeof(integer_t);\n  vm_size_t page_size;\n  vm_statistics64_data_t vm_stat;\n\n  if (host_page_size(host_port, &page_size) != KERN_SUCCESS) {\n    return 0;\n  }\n\n  if (host_statistics64(host_port, HOST_VM_INFO64, (host_info64_t)&vm_stat,\n                        &host_size) != KERN_SUCCESS) {\n    return 0;\n  }\n\n  uint64_t used_memory =\n      ((vm_stat.active_count + vm_stat.inactive_count + vm_stat.wire_count) *\n       page_size);\n\n  return used_memory;\n}\n#endif\n\nuint64_t CgroupUtil::extractMemoryValue(const std::string &line) {\n  size_t colon_pos = line.find(':');\n  if (colon_pos == std::string::npos) {\n    return 0;\n  }\n\n  std::string value_str = line.substr(colon_pos + 1);\n  std::istringstream iss(value_str);\n  uint64_t value;\n  std::string unit;\n\n  iss >> value;\n  if (iss >> unit) {\n    if (unit == \"kB\") {\n      value *= 1024;\n    }\n  }\n\n  return value;\n}\n\ndouble CgroupUtil::calculateCpuUsage() {\n#if defined(PLATFORM_LINUX)\n  return calculateLinuxCpuUsage();\n#elif defined(PLATFORM_MACOS)\n  return calculateMacOSCpuUsage();\n#endif\n  return 0.0;\n}\n\n#if defined(PLATFORM_LINUX)\ndouble CgroupUtil::calculateLinuxCpuUsage() {\n  if (!readProcStat()) {\n    return 0.0;\n  }\n\n  std::this_thread::sleep_for(std::chrono::milliseconds(100));\n\n  std::ifstream file(\"/proc/stat\");\n  if (!file.is_open()) {\n    return 0.0;\n  }\n\n  std::string line;\n  if (!std::getline(file, line)) {\n    return 0.0;\n  }\n\n  std::istringstream iss(line);\n  std::string cpu_label;\n  iss >> cpu_label;\n\n  if (cpu_label != \"cpu\") {\n    return 0.0;\n  }\n\n  unsigned long long user, nice, system, idle, iowait, irq, softirq, steal;\n  iss >> user >> nice >> system >> idle >> iowait >> irq >> softirq >> steal;\n\n  unsigned long long current_idle = idle + iowait;\n  unsigned long long current_total =\n      user + nice + system + irq + softirq + steal + current_idle;\n\n  unsigned long long idle_delta = current_idle - last_idle_time_;\n  unsigned long long total_delta = current_total - last_total_time_;\n\n  last_idle_time_ = current_idle;\n  last_total_time_ = current_total;\n\n  if (total_delta == 0) {\n    return 0.0;\n  }\n\n  double cpu_usage =\n      100.0 * (1.0 - static_cast<double>(idle_delta) / total_delta);\n  return std::max(0.0, std::min(100.0, cpu_usage));\n}\n#endif\n\n#if defined(PLATFORM_MACOS)\ndouble CgroupUtil::calculateMacOSCpuUsage() {\n  host_cpu_load_info_data_t cpuinfo;\n  mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;\n\n  if (host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO,\n                      (host_info_t)&cpuinfo, &count) != KERN_SUCCESS) {\n    return 0.0;\n  }\n\n  unsigned long long total_tick =\n      cpuinfo.cpu_ticks[CPU_STATE_USER] + cpuinfo.cpu_ticks[CPU_STATE_SYSTEM] +\n      cpuinfo.cpu_ticks[CPU_STATE_NICE] + cpuinfo.cpu_ticks[CPU_STATE_IDLE];\n\n  unsigned long long idle_tick = cpuinfo.cpu_ticks[CPU_STATE_IDLE];\n\n  static unsigned long long prev_total = 0;\n  static unsigned long long prev_idle = 0;\n\n  if (prev_total == 0) {\n    prev_total = total_tick;\n    prev_idle = idle_tick;\n    std::this_thread::sleep_for(std::chrono::milliseconds(100));\n    return calculateMacOSCpuUsage();\n  }\n\n  unsigned long long total_delta = total_tick - prev_total;\n  unsigned long long idle_delta = idle_tick - prev_idle;\n\n  prev_total = total_tick;\n  prev_idle = idle_tick;\n\n  if (total_delta == 0) {\n    return 0.0;\n  }\n\n  double cpu_usage =\n      100.0 * (1.0 - static_cast<double>(idle_delta) / total_delta);\n  return std::max(0.0, std::min(100.0, cpu_usage));\n}\n#endif\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/cgroup_util.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <chrono>\n#include <cmath>\n#include <cstdint>\n#include <fstream>\n#include <iostream>\n#include <memory>\n#include <sstream>\n#include <stdexcept>\n#include <string>\n#include <thread>\n#include <unordered_map>\n#include <vector>\n\n#if defined(__APPLE__)\n#define PLATFORM_MACOS 1\n#include <mach/mach.h>\n#include <sys/sysctl.h>\n#elif defined(__linux__)\n#define PLATFORM_LINUX 1\n#include <sys/sysinfo.h>\n#include <unistd.h>\n#endif\n\nnamespace zvec {\n\nclass CgroupUtil {\n public:\n  // Static methods to get CPU and memory limits\n  static int getCpuLimit();\n  static uint64_t getMemoryLimit();\n\n  // Static methods to get other resources\n  static double getCpuUsage();\n  static uint64_t getMemoryUsage();\n  static uint64_t getUptime();\n\n private:\n  CgroupUtil() = default;\n  ~CgroupUtil() = default;\n\n  // Static member variables to store the computed values\n  static int cpu_cores_;\n  static uint64_t memory_limit_;\n  static bool initialized_;\n\n  // Other member variables for tracking state\n  static unsigned long long last_idle_time_;\n  static unsigned long long last_total_time_;\n  static std::chrono::steady_clock::time_point last_cpu_check_;\n\n  // Static initialization method\n  static void initialize();\n\n  // Helper methods (also made static)\n  static void updateCpuCores();\n  static bool readCpuCgroup();\n  static void updateMemoryLimit();\n  static bool readMemoryCgroup();\n  static void initializeCpuStats();\n\n#if defined(PLATFORM_LINUX)\n  static bool readProcStat();\n#endif\n\n  static uint64_t getCurrentMemoryUsage();\n\n#if defined(PLATFORM_LINUX)\n  static uint64_t readMemoryUsageCgroup();\n  static uint64_t readMemoryUsageProc();\n#endif\n\n#if defined(PLATFORM_MACOS)\n  static uint64_t getMacOSMemoryUsage();\n#endif\n\n  static uint64_t extractMemoryValue(const std::string &line);\n  static double calculateCpuUsage();\n\n#if defined(PLATFORM_LINUX)\n  static double calculateLinuxCpuUsage();\n#endif\n\n#if defined(PLATFORM_MACOS)\n  static double calculateMacOSCpuUsage();\n#endif\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/concurrent_roaring_bitmap.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"concurrent_roaring_bitmap.h\"\n#include <zvec/ailego/hash/crc32c.h>\n\n\nnamespace zvec {\n\n\nStatus ConcurrentRoaringBitmap32::serialize(std::string *out) {\n  std::unique_lock<std::shared_mutex> lock(mutex_);\n  size_t bitmap_size = roaring_bitmap_portable_size_in_bytes(bitmap_);\n  out->resize(bitmap_size);\n  size_t written_size = roaring_bitmap_portable_serialize(bitmap_, out->data());\n  if (written_size == bitmap_size) {\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to serialize bitmap\");\n    return Status::InternalError();\n  }\n}\n\n\nStatus ConcurrentRoaringBitmap32::deserialize(const std::string &in) {\n  std::unique_lock<std::shared_mutex> lock(mutex_);\n  roaring_bitmap_free(bitmap_);\n  bitmap_ = nullptr;\n  bitmap_ = roaring_bitmap_portable_deserialize_safe(in.data(), in.size());\n  if (bitmap_) {\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to deserialize bitmap\");\n    return Status::InternalError();\n  }\n}\n\n\nStatus ConcurrentRoaringBitmap64::serialize(const std::string &file_path,\n                                            bool overwrite) {\n  std::unique_lock<std::shared_mutex> lock(mutex_);\n  FILE file;\n  const std::string file_str = \"[\" + file_path + \"]\";\n\n  if (FILE::IsExist(file_path)) {\n    if (!FILE::IsRegular(file_path)) {\n      auto msg = debug_str(file_str, \" is not a regular file\");\n      LOG_ERROR(\"%s\", msg.c_str());\n      return Status::InvalidArgument(msg);\n    }\n    if (!overwrite) {\n      auto msg = debug_str(file_str, \" already exists\");\n      LOG_ERROR(\"%s\", msg.c_str());\n      return Status::AlreadyExists(msg);\n    }\n    if (!FILE::RemovePath(file_path)) {\n      auto msg = debug_str(\"failed to remove \", file_str);\n      LOG_ERROR(\"%s\", msg.c_str());\n      return Status::InternalError(msg);\n    }\n  }\n  if (!file.create(file_path.c_str(), 0)) {\n    auto msg = debug_str(\"failed to create \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n\n  // Serialize bitmap to buffer\n  BitmapMetaHeader header;\n  size_t bitmap_size;\n  std::vector<char> bitmap_buffer;\n  if (is_32bit_) {\n    bitmap_size = bitmap32_->getSizeInBytes();\n    bitmap_buffer.resize(bitmap_size);\n    if (bitmap32_->write(bitmap_buffer.data()) != bitmap_size) {\n      auto msg = debug_str(\"failed to serialize bitmap to buffer\");\n      LOG_ERROR(\"%s\", msg.c_str());\n      return Status::InternalError(msg);\n    }\n    header.is_32bit = 1;\n  } else {\n    bitmap_size = bitmap64_->getSizeInBytes();\n    bitmap_buffer.resize(bitmap_size);\n    if (bitmap64_->write(bitmap_buffer.data()) != bitmap_size) {\n      auto msg = debug_str(\"failed to serialize bitmap to buffer\");\n      LOG_ERROR(\"%s\", msg.c_str());\n      return Status::InternalError(msg);\n    }\n    header.is_32bit = 0;\n  }\n  header.magic = roaring_magic_number;\n  header.checksum = ailego::Crc32c::Hash(bitmap_buffer.data(), bitmap_size);\n  header.timestamp = time(nullptr);\n\n  // Write meta header to file\n  if (file.write(&header, sizeof(header)) != sizeof(header)) {\n    auto msg = debug_str(\"failed to serialize header to \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n\n  // Write serialized bitmap to file\n  if (file.write(bitmap_buffer.data(), bitmap_size) != bitmap_size) {\n    auto msg = debug_str(\"failed to write bitmap data to \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  };\n\n  LOG_DEBUG(\"%s: serialized bitmap to file[%s], checksum[%u], timestamp[%zu]\",\n            identifier_.c_str(), file_path.c_str(), header.checksum,\n            (size_t)header.timestamp);\n  return Status::OK();\n}\n\n\nStatus ConcurrentRoaringBitmap64::deserialize(const std::string &file_path) {\n  std::unique_lock<std::shared_mutex> lock(mutex_);\n  FILE file;\n  const std::string file_str = \"[\" + file_path + \"]\";\n\n  if (!FILE::IsExist(file_path)) {\n    auto msg = debug_str(file_str, \" does not exist\");\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::NotFound(msg);\n  }\n  if (!FILE::IsRegular(file_path)) {\n    auto msg = debug_str(file_str, \" is not a regular file\");\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InvalidArgument(msg);\n  }\n  if (!file.open(file_path.c_str(), true, false)) {\n    auto msg = debug_str(\"failed to open \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n\n  // Deserialize and verify the meta header\n  BitmapMetaHeader header;\n  if (file.size() < sizeof(BitmapMetaHeader)) {\n    auto msg =\n        debug_str(file_str, \" is too small to to contain a valid bitmap\");\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n  if (file.read(&header, sizeof(header)) != sizeof(header)) {\n    auto msg = debug_str(\"failed to read meta header from \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n  if (header.magic != roaring_magic_number) {\n    auto msg = debug_str(\"magic number mismatch, \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n  if (header.is_32bit != 0 && header.is_32bit != 1) {\n    auto msg = debug_str(\"bitmap type mismatch, \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n  is_32bit_ = header.is_32bit == 1 ? true : false;\n\n  // Read from file to buffer\n  size_t bitmap_size = file.size() - sizeof(BitmapMetaHeader);\n  std::vector<char> bitmap_buffer(bitmap_size);\n  if (file.read(bitmap_buffer.data(), bitmap_size) != bitmap_size) {\n    auto msg = debug_str(\"failed to read bitmap data from \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n  if (header.checksum !=\n      ailego::Crc32c::Hash(bitmap_buffer.data(), bitmap_size)) {\n    auto msg = debug_str(\"checksum mismatch, \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n\n  // Deserialize from buffer\n  try {\n    if (is_32bit_) {\n      bitmap32_ = std::make_unique<roaring::Roaring>(\n          roaring::Roaring::readSafe(bitmap_buffer.data(), bitmap_size));\n    } else {\n      bitmap64_ = std::make_unique<roaring::Roaring64Map>(\n          roaring::Roaring64Map::readSafe(bitmap_buffer.data(), bitmap_size));\n    }\n  } catch (...) {\n    auto msg = debug_str(\"failed to deserialize bitmap from \", file_str);\n    LOG_ERROR(\"%s\", msg.c_str());\n    return Status::InternalError(msg);\n  }\n\n  LOG_DEBUG(\n      \"%s: deserialized bitmap from file[%s], checksum[%u], timestamp[%zu]\",\n      identifier_.c_str(), file_path.c_str(), header.checksum,\n      (size_t)header.timestamp);\n  return Status::OK();\n}\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/concurrent_roaring_bitmap.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <mutex>\n#include <shared_mutex>\n#include <sstream>\n#include <roaring.hh>\n#include <roaring64map.hh>\n#include <roaring/roaring.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/status.h>\n\n\nnamespace zvec {\n\n\n/*\n * A thread-safe 32-bit Roaring bitmap implementation.\n */\nclass ConcurrentRoaringBitmap32 {\n public:\n  using Ptr = std::shared_ptr<ConcurrentRoaringBitmap32>;\n\n  explicit ConcurrentRoaringBitmap32()\n      : identifier_(\"Roaring bitmap[32-bit]\"),\n        bitmap_(roaring_bitmap_create()) {}\n\n  ~ConcurrentRoaringBitmap32() {\n    roaring_bitmap_free(bitmap_);\n  }\n\n  ConcurrentRoaringBitmap32(const ConcurrentRoaringBitmap32 &) = delete;\n  ConcurrentRoaringBitmap32 &operator=(const ConcurrentRoaringBitmap32 &) =\n      delete;\n  ConcurrentRoaringBitmap32 &operator=(ConcurrentRoaringBitmap32 &&) = delete;\n\n\n  /*****  Serialization and Deserialization - Start  *****/\n public:\n  Status serialize(std::string *out);\n\n  Status deserialize(const std::string &in);\n  /*****  Serialization and Deserialization - End  *****/\n\n\n public:\n  bool contains(uint32_t pos) const {\n    std::shared_lock<std::shared_mutex> lock(mutex_);\n    return roaring_bitmap_contains(bitmap_, pos);\n  }\n\n\n  size_t cardinality() const {\n    std::shared_lock<std::shared_mutex> lock(mutex_);\n    return roaring_bitmap_get_cardinality(bitmap_);\n  }\n\n\n  size_t range_cardinality(uint32_t min_doc_id, uint32_t max_doc_id) const {\n    if (ailego_unlikely(min_doc_id > max_doc_id)) {\n      LOG_WARN(\"%s: input range min_doc_id[%u] > max_doc_id[%u]\",\n               identifier_.c_str(), min_doc_id, max_doc_id);\n      return 0;\n    }\n    std::shared_lock<std::shared_mutex> lock(mutex_);\n    uint64_t max_rank{0}, min_rank{0};\n    max_rank = roaring_bitmap_rank(bitmap_, max_doc_id);\n    min_rank = roaring_bitmap_rank(bitmap_, min_doc_id - 1);\n    return max_rank - min_rank;\n  }\n\n\n  void add(uint32_t pos) {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    roaring_bitmap_add(bitmap_, pos);\n  }\n\n\n  void clear() {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    roaring_bitmap_clear(bitmap_);\n  }\n\n\n  //! Remove all values in the closed interval [min, max]\n  void remove_range_closed(uint32_t min, uint32_t max) {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    roaring_bitmap_remove_range_closed(bitmap_, min, max);\n  }\n\n\n  size_t storage_size_in_bytes() const {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    return roaring_bitmap_portable_size_in_bytes(bitmap_);\n  }\n\n\n  roaring_bitmap_t *bitmap() const {\n    return bitmap_;\n  }\n\n\n  roaring_bitmap_t *copy() const {\n    std::shared_lock<std::shared_mutex> lock(mutex_);\n    return roaring_bitmap_copy(bitmap_);\n  }\n\n\n private:\n  std::string identifier_;\n  roaring_bitmap_t *bitmap_{nullptr};\n  mutable std::shared_mutex mutex_;\n};\n\n\n/*\n * A thread-safe Roaring bitmap implementation supporting both 32-bit and 64-bit\n * bitmaps with transparent conversion between them.\n */\nclass ConcurrentRoaringBitmap64 {\n public:\n  using Ptr = std::shared_ptr<ConcurrentRoaringBitmap64>;\n\n\n  explicit ConcurrentRoaringBitmap64()\n      : is_32bit_(true),\n        identifier_(\"Roaring bitmap[32-bit]\"),\n        bitmap32_(std::make_unique<roaring::Roaring>()) {}\n\n  explicit ConcurrentRoaringBitmap64(const std::string &name)\n      : name_(name),\n        is_32bit_(true),\n        identifier_(\"Roaring bitmap[\" + name_ + \", 32-bit]\"),\n        bitmap32_(std::make_unique<roaring::Roaring>()) {}\n\n  ~ConcurrentRoaringBitmap64() = default;\n\n  ConcurrentRoaringBitmap64 &operator=(const ConcurrentRoaringBitmap64 &other) {\n    if (this != &other) {\n      std::unique_lock<std::shared_mutex> lock(mutex_, std::defer_lock);\n      std::shared_lock<std::shared_mutex> other_lock(other.mutex_,\n                                                     std::defer_lock);\n      std::lock(lock, other_lock);\n\n      name_ = other.name_;\n      is_32bit_ = other.is_32bit_;\n      identifier_ = other.identifier_;\n\n      if (other.is_32bit_) {\n        bitmap32_ = std::make_unique<roaring::Roaring>(*other.bitmap32_);\n        bitmap64_.reset();\n      } else {\n        bitmap64_ = std::make_unique<roaring::Roaring64Map>(*other.bitmap64_);\n        bitmap32_.reset();\n      }\n    }\n    return *this;\n  }\n\n  /*****  Serialization and Deserialization - Start  *****/\n public:\n  Status serialize(const std::string &file_path, bool overwrite);\n\n  Status deserialize(const std::string &file_path);\n\n private:\n  static const uint64_t roaring_magic_number{0x362DDA444AC1B99A};\n\n  struct BitmapMetaHeader {\n    uint64_t magic;\n    uint32_t is_32bit;\n    uint32_t checksum;\n    uint64_t timestamp;\n    uint32_t reserved_[10];\n  };\n  /*****  Serialization and Deserialization - End  *****/\n\n\n public:\n  bool contains(size_t pos) const {\n    std::shared_lock<std::shared_mutex> lock(mutex_);\n    if (is_32bit_) {\n      return bitmap32_->contains(static_cast<uint32_t>(pos));\n    } else {\n      return bitmap64_->contains(static_cast<uint64_t>(pos));\n    }\n  }\n\n\n  size_t cardinality() const {\n    std::shared_lock<std::shared_mutex> lock(mutex_);\n    if (is_32bit_) {\n      return bitmap32_->cardinality();\n    } else {\n      return bitmap64_->cardinality();\n    }\n  }\n\n\n  size_t range_cardinality(uint64_t min_doc_id, uint64_t max_doc_id) const {\n    if (ailego_unlikely(min_doc_id > max_doc_id)) {\n      LOG_WARN(\"%s: input range min_doc_id[%zu] > max_doc_id[%zu]\",\n               identifier_.c_str(), static_cast<size_t>(min_doc_id),\n               static_cast<size_t>(max_doc_id));\n      return 0;\n    }\n    std::shared_lock<std::shared_mutex> lock(mutex_);\n    uint64_t max_rank{0}, min_rank{0};\n    if (is_32bit_) {\n      max_rank = bitmap32_->rank(max_doc_id);\n      min_rank = min_doc_id <= 0 ? 0 : bitmap32_->rank(min_doc_id - 1);\n    } else {\n      max_rank = bitmap64_->rank(max_doc_id);\n      min_rank = min_doc_id <= 0 ? 0 : bitmap64_->rank(min_doc_id - 1);\n    }\n    return max_rank - min_rank;\n  }\n\n\n  void add(size_t pos) {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    if (ailego_unlikely(pos > std::numeric_limits<uint32_t>::max() &&\n                        is_32bit_)) {\n      upgrade_from_32_to_64();\n    }\n    if (is_32bit_) {\n      return bitmap32_->add(static_cast<uint32_t>(pos));\n    } else {\n      return bitmap64_->add(static_cast<uint64_t>(pos));\n    }\n  }\n\n\n  void clear() {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    bitmap32_.reset();\n    bitmap64_.reset();\n    if (is_32bit_) {\n      bitmap32_ = std::make_unique<roaring::Roaring>();\n    } else {\n      bitmap64_ = std::make_unique<roaring::Roaring64Map>();\n    }\n  }\n\n\n  //! Remove all values in the closed interval [min, max]\n  void remove_range_closed(uint64_t min, uint64_t max) {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    if (!is_32bit_) {\n      return bitmap64_->removeRangeClosed(min, max);\n    }\n    if (min > std::numeric_limits<uint32_t>::max()) {\n      return;  // No valid values in the 32-bit range that can be removed\n    }\n    if (max > std::numeric_limits<uint32_t>::max()) {\n      max = std::numeric_limits<uint32_t>::max();\n    }\n    bitmap32_->removeRangeClosed(min, max);\n  }\n\n\n  size_t storage_size_in_bytes() const {\n    std::unique_lock<std::shared_mutex> lock(mutex_);\n    if (is_32bit_) {\n      return bitmap32_->getSizeInBytes() + sizeof(BitmapMetaHeader);\n    } else {\n      return bitmap64_->getSizeInBytes() + sizeof(BitmapMetaHeader);\n    }\n  }\n\n\n private:\n  using FILE = ailego::File;\n\n\n  template <typename... Args>\n  std::string debug_str(Args &&...args) {\n    std::ostringstream oss;\n    oss << identifier_ << \": \";\n    (oss << ... << args);\n    return oss.str();\n  }\n\n\n  void upgrade_from_32_to_64() {\n    if (ailego_unlikely(!is_32bit_)) {\n      LOG_WARN(\"%s: bitmap is already 64-bit\", identifier_.c_str());\n      return;\n    }\n    bitmap64_ = std::make_unique<roaring::Roaring64Map>(\n        roaring::Roaring64Map{std::move(*bitmap32_)});\n    is_32bit_ = false;\n    bitmap32_.reset();\n    identifier_ = \"Roaring bitmap[\" + name_ + \", 64-bit]\";\n    LOG_DEBUG(\"%s: upgraded to 64-bit\", identifier_.c_str());\n  }\n\n\n  std::string name_;\n  bool is_32bit_;\n  std::string identifier_;\n  std::unique_ptr<roaring::Roaring> bitmap32_{nullptr};\n  std::unique_ptr<roaring::Roaring64Map> bitmap64_{nullptr};\n  mutable std::shared_mutex mutex_;\n};\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/config.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <memory>\n#include <zvec/db/config.h>\n#include <zvec/db/status.h>\n#include \"db/common/constants.h\"\n#include \"db/common/global_resource.h\"\n#include \"cgroup_util.h\"\n#include \"global_resource.h\"\n#include \"glogger.h\"\n#include \"logger.h\"\n#include \"typedef.h\"\n\nnamespace zvec {\n\nstatic void ExitLogHandler() {\n  LogUtil::Shutdown();\n}\n\nGlobalConfig::ConfigData::ConfigData()\n    : memory_limit_bytes(CgroupUtil::getMemoryLimit() *\n                         DEFAULT_MEMORY_LIMIT_RATIO),\n      log_config(std::make_shared<ConsoleLogConfig>()),\n      query_thread_count(CgroupUtil::getCpuLimit()),\n      invert_to_forward_scan_ratio(0.9),\n      brute_force_by_keys_ratio(0.1),\n      optimize_thread_count(CgroupUtil::getCpuLimit()) {}\n\nStatus GlobalConfig::Validate(const ConfigData &config) const {\n  if (config.memory_limit_bytes < MIN_MEMORY_LIMIT_BYTES) {\n    return Status::InvalidArgument(\"memory_limit_bytes must be greater than \",\n                                   MIN_MEMORY_LIMIT_BYTES);\n  }\n\n  if (config.memory_limit_bytes > CgroupUtil::getMemoryLimit()) {\n    return Status::InvalidArgument(\"memory_limit_bytes must be less than \",\n                                   CgroupUtil::getMemoryLimit());\n  }\n\n  // Validate query thread count\n  if (config.query_thread_count == 0) {\n    return Status::InvalidArgument(\"query_thread_count must be greater than 0\");\n  }\n\n  // Validate invert_to_forward_scan_ratio (should be between 0 and 1)\n  if (config.invert_to_forward_scan_ratio < 0.0f ||\n      config.invert_to_forward_scan_ratio > 1.0f) {\n    return Status::InvalidArgument(\n        \"invert_to_forward_scan_ratio must be between 0 and 1\");\n  }\n\n  // Validate brute_force_by_keys_ratio (should be between 0 and 1)\n  if (config.brute_force_by_keys_ratio < 0.0f ||\n      config.brute_force_by_keys_ratio > 1.0f) {\n    return Status::InvalidArgument(\n        \"brute_force_by_keys_ratio must be between 0 and 1\");\n  }\n\n  // Validate optimize thread count\n  if (config.optimize_thread_count == 0) {\n    return Status::InvalidArgument(\n        \"optimize_thread_count must be greater than 0\");\n  }\n\n  // Validate log configuration\n  if (config.log_config->GetLoggerType() == FILE_LOG_TYPE_NAME) {\n    auto log_config =\n        std::dynamic_pointer_cast<FileLogConfig>(config.log_config);\n\n    // Validate file log specific configurations\n    if (log_config->dir.empty()) {\n      return Status::InvalidArgument(\n          \"log_dir cannot be empty when set to FileLogger\");\n    }\n\n    if (log_config->basename.empty()) {\n      return Status::InvalidArgument(\n          \"log_file basename cannot be empty when set to FileLogger\");\n    }\n\n    if (log_config->file_size <= MIN_LOG_FILE_SIZE) {\n      return Status::InvalidArgument(\"log file_size must be greater than \",\n                                     MIN_LOG_FILE_SIZE,\n                                     \" when set to FileLogger\");\n    }\n\n    if (log_config->overdue_days == 0) {\n      return Status::InvalidArgument(\n          \"log_overdue_days must be greater than 0 when set to FileLogger\");\n    }\n  }\n\n  return Status::OK();\n}\n\nStatus GlobalConfig::Initialize(const ConfigData &config) {\n  // Use atomic compare-exchange to ensure only one initialization\n  bool expected = false;\n  if (!initialized_.compare_exchange_strong(expected, true)) {\n    return Status::OK();\n  }\n\n  auto s = Validate(config);\n  CHECK_RETURN_STATUS(s);\n\n  config_ = config;\n\n  s = LogUtil::Init(log_dir(), log_file_basename(), int(log_level()),\n                    log_type(), log_file_size(), log_overdue_days());\n  CHECK_RETURN_STATUS(s);\n\n  if (std::atexit(ExitLogHandler) != 0) {\n    std::cerr << \"Failed to register exit handler\" << std::endl;\n    return Status::InternalError(\"Failed to register exit handler\");\n  }\n\n  GlobalResource::Instance().initialize();\n  return Status::OK();\n}\n\nuint64_t GlobalConfig::memory_limit_bytes() const noexcept {\n  return config_.memory_limit_bytes;\n}\n\nFACTORY_REGISTER_LOGGER(AppendLogger);\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/constants.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n#include <regex>\n#include <string>\n\nnamespace zvec {\n\n// global config\nconst float DEFAULT_MEMORY_LIMIT_RATIO = 0.8f;\n\nconst uint32_t MIN_MEMORY_LIMIT_BYTES = 100 * 1024 * 1024;\n\nconst uint64_t INVALID_DOC_ID = -1UL;\n\nconst std::string LOCAL_ROW_ID = \"_zvec_row_id_\";\n\nconst std::string GLOBAL_DOC_ID = \"_zvec_g_doc_id_\";\n\nconst std::string USER_ID = \"_zvec_uid_\";\n\nconst int kSparseMaxDimSize = 16384;\n\nconst int64_t kMaxRecordBatchNumRows = 4096;\n\nconstexpr uint32_t MAX_ARRAY_FIELD_LEN = 32;\n\nconst float COMPACT_DELETE_RATIO_THRESHOLD = 0.3f;\n\nconst std::regex COLLECTION_NAME_REGEX(\"^[a-zA-Z0-9_-]{3,64}$\");\n\nconst std::regex FIELD_NAME_REGEX(\"^[a-zA-Z0-9_-]{1,32}$\");\n\nconst std::regex DOC_PK_REGEX(\"^[a-zA-Z0-9_!@#$%+=.-]{1,64}$\");\n\nconst std::regex COLLECTION_PATH_REGEX(\n    R\"(^/?(?:[a-zA-Z0-9_.\\-]+/)*[a-zA-Z0-9_.\\-]+$)\");\n\nconstexpr uint32_t kMaxDenseDimSize = 20000;\n\nconstexpr uint32_t kMaxScalarFieldSize = 1024;\n\nconstexpr uint32_t kMaxVectorFieldSize = 5;\n\nconstexpr uint32_t kMaxQueryTopk = 1024;\n\nconstexpr uint32_t kMaxOutputFieldSize = 1024;\n\nconstexpr uint32_t kMaxWriteBatchSize = 1024;\n\nconstexpr uint32_t kMinRabitqDimSize = 64;\nconstexpr uint32_t kMaxRabitqDimSize = 4095;\n\n// Inverted index\nconst std::string INVERT_SUFFIX_TERMS{\"$TERMS\"};\n\nconst std::string INVERT_SUFFIX_REVERSED_TERMS{\"$SMRET\"};\n\nconst std::string INVERT_SUFFIX_ARRAY_LEN{\"$ARRAY_LEN\"};\n\nconst std::string INVERT_SUFFIX_RANGES{\"$RANGES\"};\n\nconst std::string INVERT_CDF{\"$CDF\"};\n\nconst std::string INVERT_KEY_MAX_ID{\"$ZVEC$MAX_ID\"};\n\nconst std::string INVERT_KEY_NULL{\"$ZVEC$NULL\"};\n\nconst std::string INVERT_KEY_SEALED{\"$ZVEC$SEALED\"};\n\nconst uint32_t INVERT_ID_LIST_SIZE_THRESHOLD = 3;\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/error_code.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"error_code.h\"\n\nnamespace zvec {\n\n// 0~999  [Builtin]\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(Success, 0, \"Success\");\n\n// 1000~1999 [Common Error]\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(RuntimeError, 1000, \"Runtime Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(LogicError, 1001, \"Logic Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(StatusError, 1002, \"Status Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(LoadConfig, 1003, \"Load Config Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(ConfigError, 1004, \"Config Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidArgument, 1005, \"Invalid Argument\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(NotInitialized, 1006, \"Not Initialized\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(OpenFile, 1007, \"Open File Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(ReadData, 1008, \"Read Data Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(WriteData, 1009, \"Write Data Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(ExceedLimit, 1010, \"Exceed Limit\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(SerializeError, 1011, \"Serialize Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(DeserializeError, 1012, \"Deserialize Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(StartServer, 1013, \"Start Server Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(StoppedService, 1014, \"Visit Stopped Service\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(FileSystem, 1015, \"File System Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(RpcError, 1016, \"RPC Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InitChannelError, 1017,\n                               \"Init brpc channel Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(AddSubChannelError, 1018,\n                               \"Add sub channel Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(NoNeedProcess, 1019, \"No need process\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EtcdError, 1020, \"Etcd Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(MessageQueueError, 1021, \"Message Queue Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(KafkaSubTopicExistErr, 1022,\n                               \"Kafka topic subscribe already exist Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(KafkaUnSubTopicNotExistErr, 1023,\n                               \"Kafka topic unsubscribe not exist Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InitKafkaError, 1024, \"Init kafka error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(KafkaPublishError, 1025, \"Kafka publish error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(ExceedKafkaMessageSizeLimit, 1026,\n                               \"Exceed kafka message size limit\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(NotImplemented, 1027,\n                               \"The function is not implemented\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(Timeout, 1028, \"Timeout\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(MasterNoLeader, 1029, \"Master no leader\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(NeedRetry, 1030, \"Need retry\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(Abort, 1031, \"Abort\");\n\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyCollectionName, 2000,\n                               \"Empty Collection Name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyColumnName, 2001, \"Empty Column Name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyPartitionName, 2002,\n                               \"Empty collection partition name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyColumns, 2003, \"Empty Columns\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyPrimaryKey, 2004, \"Empty primary key\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyDocList, 2005, \"Empty doc list\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyDocFields, 2006, \"Empty doc fields\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyIndexField, 2007, \"Empty index field\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidRecord, 2008, \"Invalid Record\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidQuery, 2009, \"Invalid Query\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidWriteRequest, 2010,\n                               \"Invalid Write Request\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidVectorFormat, 2011,\n                               \"Invalid Vector Format\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidDataType, 2012, \"Invalid Data Type\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidIndexType, 2013, \"Invalid Index Type\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidFeature, 2014, \"Invalid Feature\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidFilter, 2015, \"Invalid Filter\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidPrimaryKey, 2016, \"Invalid primary key\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidField, 2017, \"Invalid field\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(MismatchedIndexColumn, 2018,\n                               \"Mismatched Index Column\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(MismatchedDimension, 2019,\n                               \"Mismatched Dimension\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(MismatchedDataType, 2020,\n                               \"Mismatched Data Type\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentCollection, 2021,\n                               \"Collection Not Exist\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentPartition, 2022,\n                               \"Inexistent collection partition\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentColumn, 2023, \"Column Not Exist\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentKey, 2024, \"Key Not Exist\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateCollection, 2025,\n                               \"Duplicate Collection\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicatePartition, 2026,\n                               \"Duplicate collection partition\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateKey, 2027, \"Duplicate Key\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateField, 2028, \"Duplicate field\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(UnreadyPartition, 2029,\n                               \"Status of collection partition is incorrect\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(UnreadyCollection, 2030,\n                               \"Status of collection is incorrect\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(UnsupportedCondition, 2031,\n                               \"Query condition has error or not supported\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(\n    OrderbyNotInSelectItems, 2032,\n    \"Order by column must exists in select item list\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(PbToSqlInfoError, 2033, \"Pb to sql info error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(ExceedRateLimit, 2034, \"Exceed Rate Limit\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidSparseValues, 2035,\n                               \"Invalid Sparse Values\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidBatchSize, 2036, \"Invalid batch size\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidDimension, 2037, \"Invalid dimension\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidExtraParam, 2038, \"Invalid extra param\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidRadius, 2039, \"Invalid radius\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidLinear, 2040, \"Invalid is linear\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidTopk, 2041, \"Invalid topk\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidCollectionName, 2042,\n                               \"Invalid collection name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidPartitionName, 2043,\n                               \"Invalid partition name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidFieldName, 2044, \"Invalid field name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidChannelCount, 2045, \"Invalid field name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidReplicaCount, 2046, \"Invalid field name\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidJson, 2047, \"Invalid json\");\n// used by master\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidClusterConfig, 2048,\n                               \"Invalid cluster config\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateCluster, 2049, \"Duplicate Cluster\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentCluster, 2050, \"Inexistent Cluster\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidClusterStatus, 2051,\n                               \"Invalid Cluster Status\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(RpcTimedout, 2052, \"Rpc Timedout\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidGroupBy, 2053, \"Invalid GroupBy Request\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyVectorField, 2054, \"Empty vector field\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(VectorNotAllowed, 2055, \"Vector not allowed\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidReferenceCollection, 2056,\n                               \"Invalid reference collection\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidOrderBy, 2057, \"Invalid OrderBy field\");\n\n\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(UnreadyQueue, 5002,\n                               \"Compute Queue Is Unready Yet\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(ScheduleError, 5003, \"Schedule Task Error\");\nPROXIMA_ZVEC_ERROR_CODE_DEFINE(TaskIsRunning, 5004,\n                               \"Task is running in other coroutine\");\n\nconst char *ErrorCode::What(int val) {\n  return ErrorCode::Instance()->what(val);\n}\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/error_code.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <map>\n#include <zvec/ailego/pattern/expected.hpp>\nnamespace zvec {\n\n/*! Error\n */\n\nclass ErrorCode;\n\nclass ErrorCode {\n public:\n  /*! Error Code\n   */\n  class Code {\n   public:\n    //! Constructor\n    Code(int val, const char *str) : value_(-val), desc_(str) {\n      ErrorCode::Instance()->emplace(this);\n    }\n\n    //! Retrieve the value of code\n    operator int() const {\n      return (this->value_);\n    }\n\n    //! Retrieve the value of code\n    int value() const {\n      return (this->value_);\n    }\n\n    //! Retrieve the description of code\n    const char *desc() const {\n      return (this->desc_);\n    }\n\n   private:\n    int value_;\n    const char *desc_;\n  };\n\n  //! Retrieve the description of code\n  static const char *What(int val);\n\n protected:\n  //! Constructor\n  ErrorCode(void) : map_() {}\n\n  //! Inserts a new code into map\n  void emplace(const ErrorCode::Code *code) {\n    map_.emplace(code->value(), code);\n  }\n\n  //! Retrieve the description of code\n  const char *what(int val) const {\n    auto iter = map_.find(val);\n    if (iter != map_.end()) {\n      return iter->second->desc();\n    }\n    return \"\";\n  }\n\n  //! Retrieve the singleton\n  static ErrorCode *Instance(void) {\n    static ErrorCode error;\n    return (&error);\n  }\n\n private:\n  //! Disable them\n  ErrorCode(const ErrorCode &) = delete;\n  ErrorCode(ErrorCode &&) = delete;\n  ErrorCode &operator=(const ErrorCode &) = delete;\n\n  //! Error code map\n  std::map<int, const ErrorCode::Code *> map_;\n};\n\n//! Error Code Define\n#define PROXIMA_ZVEC_ERROR_CODE_DEFINE(__NAME__, __VAL__, __DESC__)        \\\n  const zvec::ErrorCode::Code ErrorCode_##__NAME__((__VAL__), (__DESC__)); \\\n  const zvec::ErrorCode::Code &_ErrorCode_##__VAL__##_Register(            \\\n      ErrorCode_##__NAME__)\n\n//! Proxima SE Error Code Declare\n#define PROXIMA_ZVEC_ERROR_CODE_DECLARE(__NAME__) \\\n  extern const zvec::ErrorCode::Code ErrorCode_##__NAME__\n\n//! Error code helper\n#define PROXIMA_ZVEC_ERROR_CODE(__NAME__) zvec::ErrorCode_##__NAME__\n\n// 0~999  [Builtin]\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(Success);\n\n// 1000~1999 [Common Error]\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(RuntimeError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(LogicError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(StatusError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(LoadConfig);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(ConfigError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidArgument);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(NotInitialized);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(OpenFile);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(ReadData);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(WriteData);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(ExceedLimit);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(SerializeError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DeserializeError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(StartServer);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(StoppedService);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(FileSystem);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(RpcError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InitChannelError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(AddSubChannelError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(NoNeedProcess);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EtcdError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(MessageQueueError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(KafkaSubTopicExistErr);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(KafkaUnSubTopicNotExistErr);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InitKafkaError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(KafkaPublishError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(ExceedKafkaMessageSizeLimit);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(NotImplemented);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(Timeout);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(MasterNoLeader);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(NeedRetry);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(Abort);\n\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(UnreadyQueue);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(ScheduleError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(TaskIsRunning);\n\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DirectoryAlreadyExists);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DirectoryNotExists);\n\n// 2000~2999 [Client Check]\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyCollectionName);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyColumnName);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyPartitionName);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyColumns);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyPrimaryKey);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyDocList);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyDocFields);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyIndexField);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidRecord);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidQuery);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidWriteRequest);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidVectorFormat);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidDataType);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidIndexType);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidFeature);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidFilter);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidPrimaryKey);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidField);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(MismatchedIndexColumn);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(MismatchedDimension);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(MismatchedDataType);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentCollection);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentPartition);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentColumn);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentKey);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateCollection);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicatePartition);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateKey);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateField);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(UnreadyPartition);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(UnreadyCollection);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(UnsupportedCondition);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(OrderbyNotInSelectItems);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(PbToSqlInfoError);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(ExceedRateLimit);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidSparseValues);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidBatchSize);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidDimension);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidExtraParam);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidRadius);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidLinear);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidTopk);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidCollectionName);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidPartitionName);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidFieldName);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidChannelCount);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidReplicaCount);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidJson);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidClusterConfig);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateCluster);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentCluster);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidClusterStatus);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(RpcTimedout);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidGroupBy);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyVectorField);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(VectorNotAllowed);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidReferenceCollection);\nPROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidOrderBy);\n\n\n// 40000~49999 [De Admin]\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/file_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"file_helper.h\"\n#include <sys/stat.h>\n#include <dirent.h>\n#include <errno.h>\n#include <fcntl.h>\n#include <string.h>\n#include <unistd.h>\n#include <algorithm>\n#include <cstdio>\n#include <ailego/pattern/defer.h>\n\nnamespace zvec {\n\n\n// keep consistent with MANIFEST_BACKUP_FILE\nconst std::string FileHelper::BACKUP_SUFFIX = \".backup_\";\nconst std::string FileHelper::RECOVER_SUFFIX = \".recovering\";\n\nbool FileHelper::CopyFile(const std::string &src_file_path,\n                          const std::string &dst_file_path) {\n  int src_fd = open(src_file_path.c_str(), O_RDONLY, 0);\n  if (src_fd < 0) {\n    return false;\n  }\n  AILEGO_DEFER([src_fd] { close(src_fd); });\n\n  std::string dst_file_path_tmp = dst_file_path + \".tmp\";\n  int dst_fd =\n      open(dst_file_path_tmp.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644);\n  if (dst_fd < 0) {\n    return false;\n  }\n  AILEGO_DEFER([dst_fd] { close(dst_fd); });\n\n  ssize_t size;\n  char buf[BUFSIZ];\n  while ((size = read(src_fd, buf, BUFSIZ)) > 0) {\n    if (size != write(dst_fd, buf, size)) {\n      return false;\n    }\n  }\n  return rename(dst_file_path_tmp.c_str(), dst_file_path.c_str()) == 0;\n}\n\nbool FileHelper::CopyDirectory(const std::string &src_dir_path,\n                               const std::string &dst_dir_path) {\n  DIR *dir = opendir(src_dir_path.c_str());\n  if (!dir) {\n    return false;\n  }\n  AILEGO_DEFER([dir] { closedir(dir); });\n\n  if (!ailego::FileHelper::IsExist(dst_dir_path.c_str())) {\n    if (!ailego::FileHelper::MakePath(dst_dir_path.c_str())) {\n      return false;\n    }\n  }\n\n  struct dirent *dent;\n  while ((dent = readdir(dir)) != nullptr) {\n    if (!strcmp(dent->d_name, \".\") || !strcmp(dent->d_name, \"..\")) {\n      continue;\n    }\n    std::string src_full_path =\n        ailego::StringHelper::Concat(src_dir_path, \"/\", dent->d_name);\n    std::string dst_full_path =\n        ailego::StringHelper::Concat(dst_dir_path, \"/\", dent->d_name);\n\n    if (ailego::FileHelper::IsDirectory(src_full_path.c_str())) {\n      if (!CopyDirectory(src_full_path, dst_full_path)) {\n        return false;\n      }\n    } else {\n      if (!CopyFile(src_full_path, dst_full_path)) {\n        return false;\n      }\n    }\n  }\n  return true;\n}\n\nvoid FileHelper::CleanupDirectory(const std::string &backup_dir,\n                                  size_t max_backup_count,\n                                  const char *prefix_name) {\n  if (max_backup_count <= 0) {\n    return;\n  }\n\n  DIR *dir = opendir(backup_dir.c_str());\n  if (!dir) {\n    return;\n  }\n\n  AILEGO_DEFER([dir] { closedir(dir); });\n\n  size_t prefix_len = strlen(prefix_name);\n  std::vector<std::string> candidates;\n  struct dirent *dent;\n  while ((dent = readdir(dir)) != nullptr) {\n    if (strncmp(dent->d_name, prefix_name, prefix_len) == 0) {\n      candidates.emplace_back(dent->d_name);\n    }\n  }\n  if (candidates.size() <= max_backup_count) {\n    return;\n  }\n  std::sort(candidates.begin(), candidates.end());\n  for (size_t i = 0; i < candidates.size() - max_backup_count; ++i) {\n    std::string path =\n        ailego::StringHelper::Concat(backup_dir, \"/\", candidates[i].c_str());\n    ailego::FileHelper::RemovePath(path.c_str());\n  }\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/file_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <stdint.h>\n#include <cstdint>\n#include <string>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/utility/file_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n\nnamespace zvec {\n\n/*\n * File type and id\n */\nenum class FileID : uint32_t {\n  UNDEFINED = 0,\n  ID_FILE,\n  DELETE_FILE,\n  FORWARD_FILE,\n  PROXIMA_FILE,\n  SEGMENT_FILE,\n  LSN_FILE,\n  MANIFEST_FILE,\n  WAL_FILE,\n  RESHARD_STATE,\n};\n\n/*\n * File name coresponding to file id\n */\nstatic const char *GetFileName(FileID t) {\n  switch (t) {\n    case FileID::ID_FILE:\n      return \"idmap\";\n    case FileID::DELETE_FILE:\n      return \"del\";\n    case FileID::FORWARD_FILE:\n      return \"data.fwd\";\n    case FileID::PROXIMA_FILE:\n      return \"data.pxa\";\n    case FileID::SEGMENT_FILE:\n      return \"data.seg\";\n    case FileID::LSN_FILE:\n      return \"data.lsn\";\n    case FileID::MANIFEST_FILE:\n      return \"manifest\";\n    case FileID::WAL_FILE:\n      return \"data.wal\";\n    case FileID::RESHARD_STATE:\n      return \"reshard.state\";\n    default:\n      return \"UnknownFile\";\n  };\n}\n\n/*\n * This helper class is mainly to wrapper filesystem operations.\n */\nclass FileHelper {\n public:\n  static const std::string MakeWalPath(const std::string &path, uint32_t seg_id,\n                                       uint32_t block_id) {\n    return ailego::StringHelper::Concat(path, \"/\", seg_id, \"/\", block_id,\n                                        \".wal\");\n  }\n\n  static std::string MakeSegmentPath(const std::string &path, uint32_t id,\n                                     const std::string &suffix = \"\") {\n    if (suffix.empty()) {\n      return ailego::StringHelper::Concat(path, \"/\", id);\n    }\n    return ailego::StringHelper::Concat(path, \"/\", id, \".\", suffix);\n  }\n\n  static std::string MakeTempSegmentPath(const std::string &path, uint32_t id) {\n    return MakeSegmentPath(path, id, \"tmp\");\n  }\n\n  // e.g.: **/seg1/scalar.block.1.ipc, **/seg1/scalar.block.1.parquet\n  static const std::string MakeForwardBlockPath(const std::string &path,\n                                                uint32_t seg_id,\n                                                uint32_t block_id,\n                                                bool use_parquet = false) {\n    return use_parquet ? MakeForwardBlockPath(path, seg_id, block_id,\n                                              std::string(\"parquet\"))\n                       : MakeForwardBlockPath(path, seg_id, block_id,\n                                              std::string(\"ipc\"));\n  }\n\n  static const std::string MakeForwardBlockPath(const std::string &path,\n                                                uint32_t seg_id,\n                                                uint32_t block_id,\n                                                const std::string &suffix) {\n    return ailego::StringHelper::Concat(path, \"/\", seg_id, \"/scalar.\", block_id,\n                                        \".\", suffix);\n  }\n\n  static const std::string MakeForwardBlockPath(const std::string &seg_path,\n                                                uint32_t block_id,\n                                                bool use_parquet = false) {\n    return use_parquet ? ailego::StringHelper::Concat(seg_path, \"/scalar.\",\n                                                      block_id, \".parquet\")\n                       : ailego::StringHelper::Concat(seg_path, \"/scalar.\",\n                                                      block_id, \".ipc\");\n  }\n\n  static const std::string MakeForwardBlockPath(const std::string &seg_path,\n                                                uint32_t block_id,\n                                                const std::string &suffix) {\n    return ailego::StringHelper::Concat(seg_path, \"/scalar.\", block_id, \".\",\n                                        suffix);\n  }\n\n  // e.g.: **/seg1/scalar.index.block.1.rocksdb\n  static const std::string MakeInvertIndexPath(const std::string &path,\n                                               uint32_t seg_id,\n                                               uint32_t block_id) {\n    return ailego::StringHelper::Concat(path, \"/\", seg_id, \"/scalar.index.\",\n                                        block_id, \".rocksdb\");\n  }\n\n  static const std::string MakeInvertIndexPath(const std::string &seg_path,\n                                               uint32_t block_id) {\n    return ailego::StringHelper::Concat(seg_path, \"/scalar.index.\", block_id,\n                                        \".rocksdb\");\n  }\n\n  static const std::string MakeVectorIndexPath(const std::string &path,\n                                               const std::string &column,\n                                               uint32_t seg_id,\n                                               uint32_t block_id) {\n    return ailego::StringHelper::Concat(path, \"/\", seg_id, \"/\", column,\n                                        \".index.\", block_id, \".proxima\");\n  }\n\n  static const std::string MakeVectorIndexPath(const std::string &seg_path,\n                                               const std::string &column,\n                                               uint32_t block_id) {\n    return ailego::StringHelper::Concat(seg_path, \"/\", column, \".index.\",\n                                        block_id, \".proxima\");\n  }\n\n  // e.g.: **/{seg_id}/{column}.index.block.{block_id}.proxima\n  static const std::string MakeQuantizeVectorIndexPath(\n      const std::string &path, const std::string &column, uint32_t seg_id,\n      uint32_t block_id) {\n    return ailego::StringHelper::Concat(path, \"/\", seg_id, \"/\", column,\n                                        \".qindex.\", block_id, \".proxima\");\n  }\n\n  static const std::string MakeQuantizeVectorIndexPath(\n      const std::string &seg_path, const std::string &column,\n      uint32_t block_id) {\n    return ailego::StringHelper::Concat(seg_path, \"/\", column, \".qindex.\",\n                                        block_id, \".proxima\");\n  }\n\n  //! Make file path with ${prefix_path}/${file_name}\n  static std::string MakeFilePath(const std::string &prefix_path,\n                                  FileID file_id) {\n    return ailego::StringHelper::Concat(prefix_path, \"/\", GetFileName(file_id));\n  }\n\n  //! Make file path with ${prefix_path}/${file_name}.${number}\n  static std::string MakeFilePath(const std::string &prefix_path,\n                                  FileID file_id, uint32_t number) {\n    return ailego::StringHelper::Concat(prefix_path, \"/\", GetFileName(file_id),\n                                        \".\", number);\n  }\n\n  //! Make file path with ${prefix_path}/${file_name}.${suffix_name}.${number}\n  static std::string MakeFilePath(const std::string &prefix_path,\n                                  FileID file_id, uint32_t number,\n                                  const std::string &suffix_name) {\n    return ailego::StringHelper::Concat(prefix_path, \"/\", GetFileName(file_id),\n                                        \".\", suffix_name, \".\", number);\n  }\n\n  //! Create directory\n  static bool CreateDirectory(const std::string &dir_path) {\n    return ailego::File::MakePath(dir_path);\n  }\n\n  //! Remove directory\n  static bool RemoveDirectory(const std::string &dir_path) {\n    return ailego::File::RemoveDirectory(dir_path);\n  }\n\n  //! Remove file\n  static bool RemoveFile(const std::string &file_path) {\n    return ailego::File::Delete(file_path);\n  }\n\n  //! Move file\n  static bool MoveFile(const std::string &src_path,\n                       const std::string &dest_path) {\n    return ailego::File::Rename(src_path, dest_path);\n  }\n\n  //! Move directory\n  static bool MoveDirectory(const std::string &src_path,\n                            const std::string &dest_path) {\n    return ailego::File::Rename(src_path, dest_path);\n  }\n\n  //! Check if file exists\n  static bool FileExists(const std::string &file_path) {\n    return ailego::File::IsExist(file_path);\n  }\n\n  //! Check if directory exists\n  static bool DirectoryExists(const std::string &dir_path) {\n    return ailego::File::IsExist(dir_path);\n  }\n\n  //! Return file size\n  static size_t FileSize(const std::string &file_path) {\n    return ailego::FileHelper::FileSize(file_path.c_str());\n  }\n\n  //! Copy file\n  //! src_file_path and dst_file_path must be the full path\n  //! dst_file_path/.. must exist\n  static bool CopyFile(const std::string &src_file_path,\n                       const std::string &dst_file_path);\n\n  //! Copy directory recursively\n  //! src_dir_path and dst_dir_path must be the full path\n  //! dst_dir_path will be created if not exist\n  static bool CopyDirectory(const std::string &src_dir_path,\n                            const std::string &dst_dir_path);\n\n  //! Clean up file or directory with the prefix `prefix_name` under\n  //! `backup_dir`, keep at most `max_backup_count` file or directory.\n  //! If `max_backup_count` is 0, nothing is performed.\n  //!\n  //! The name pattern must be `prefix_name`_`number`, comparable by name.\n  static void CleanupDirectory(const std::string &backup_dir,\n                               size_t max_backup_count,\n                               const char *prefix_name);\n\n  static const std::string BACKUP_SUFFIX;\n  static const std::string RECOVER_SUFFIX;\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/global_resource.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"db/common/global_resource.h\"\n#include <mutex>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/db/config.h>\n\nnamespace zvec {\n\nvoid GlobalResource::initialize() {\n  static std::once_flag flag;\n  std::call_once(flag, [this]() mutable {\n    this->query_thread_pool_.reset(\n        new ailego::ThreadPool(GlobalConfig::Instance().query_thread_count()));\n    this->optimize_thread_pool_.reset(new ailego::ThreadPool(\n        GlobalConfig::Instance().optimize_thread_count()));\n    ailego::BufferManager::Instance().init(\n        GlobalConfig::Instance().memory_limit_bytes(), 1);\n  });\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/global_resource.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/pattern/singleton.h>\n\nnamespace zvec {\n\nclass GlobalResource : public ailego::Singleton<GlobalResource> {\n public:\n  void initialize();\n\n  ailego::ThreadPool *query_thread_pool() {\n    initialize();\n    return query_thread_pool_.get();\n  }\n\n  ailego::ThreadPool *optimize_thread_pool() {\n    initialize();\n    return optimize_thread_pool_.get();\n  }\n\n private:\n  std::unique_ptr<ailego::ThreadPool> query_thread_pool_;\n  std::unique_ptr<ailego::ThreadPool> optimize_thread_pool_;\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/glogger.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/logger/logger.h>\n\n#ifdef __GNUC__\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wshadow\"\n#pragma GCC diagnostic ignored \"-Wunused-parameter\"\n#endif\n\n#include <glog/logging.h>\n\n#ifdef __GNUC__\n#pragma GCC diagnostic pop\n#endif\n\nnamespace google {\nnamespace glog_internal_namespace_ {\nextern bool IsGoogleLoggingInitialized(void);\nextern bool ShutdownGoogleLoggingUtilities(void);\n}  // namespace glog_internal_namespace_\n}  // namespace google\n\nnamespace zvec {\n\nclass AppendLogger : public ailego::Logger {\n public:\n  AppendLogger() = default;\n\n  ~AppendLogger() {\n    this->cleanup();\n  }\n\n public:\n  int init(const ailego::Params &params) override {\n    if (!google::glog_internal_namespace_::IsGoogleLoggingInitialized()) {\n      std::string log_dir = params.get_as_string(\"proxima.file.logger.log_dir\");\n      std::string log_file =\n          params.get_as_string(\"proxima.file.logger.log_file\");\n      uint32_t log_file_size =\n          params.get_as_uint32(\"proxima.file.logger.file_size\");\n      uint32_t log_overdue_days =\n          params.get_as_uint32(\"proxima.file.logger.overdue_days\");\n\n      if (!ailego::File::IsExist(log_dir)) {\n        ailego::File::MakePath(log_dir);\n      }\n\n      FLAGS_log_dir = log_dir;\n      FLAGS_max_log_size = log_file_size;\n      FLAGS_logbufsecs = 1;\n      // it's really a bad feature for glog\n      // logs <= LOG_FATAL will also output to stderr\n      // and we can only set FATAL at most\n      // and so we should avoid to use LOG_FATAL\n      FLAGS_stderrthreshold = google::GLOG_FATAL;\n\n      static std::string new_log_file = log_file;\n      google::InitGoogleLogging(new_log_file.c_str());\n      google::EnableLogCleaner(log_overdue_days);\n    }\n    return 0;\n  }\n\n  int cleanup() override {\n    if (google::glog_internal_namespace_::IsGoogleLoggingInitialized()) {\n      google::DisableLogCleaner();\n      google::ShutdownGoogleLogging();\n    }\n    return 0;\n  }\n\n  void log(int level, const char *file, int line, const char *format,\n           va_list args) override {\n    static google::LogSeverity severities[] = {\n        google::GLOG_INFO, google::GLOG_INFO, google::GLOG_WARNING,\n        google::GLOG_ERROR, google::GLOG_FATAL};\n    char buf[2048];\n    vsnprintf(buf, sizeof(buf), format, args);\n    google::LogMessage(file, line, severities[level]).stream() << buf;\n    // NOTE: glog will flush WARN and above immediately, flush INFO every\n    // `FLAGS_logbufsecs` or every 1M bytes. FlushLogFiles not needed.\n    // google::FlushLogFiles(severities[level]);\n  }\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/logger.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <gflags/gflags.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/pattern/factory.h>\n#include <zvec/db/status.h>\n#include \"db/common/constants.h\"\n#include \"error_code.h\"\n\nnamespace zvec {\n\nclass LogUtil {\n public:\n  static Status Init(const std::string &log_dir, const std::string &log_file,\n                     int log_level, const std::string &logger_type,\n                     int log_file_size, int log_overdue_days) {\n    if (logger_type == FILE_LOG_TYPE_NAME) {\n      if (log_dir.empty() || log_file.empty()) {\n        return Status::InvalidArgument(\"log_dir or log_file is empty\");\n      }\n\n      if (!ailego::File::IsExist(log_dir)) {\n        ailego::File::MakePath(log_dir);\n      }\n    }\n\n    auto logger =\n        ailego::Factory<ailego::Logger>::MakeShared(logger_type.c_str());\n    if (!logger) {\n      LOG_FATAL(\"Invalid logger_type[%s]\", logger_type.c_str());\n      return Status::InvalidArgument(\"Invalid logger_type: \", logger_type);\n    }\n\n    ailego::Params params;\n    if (logger_type == FILE_LOG_TYPE_NAME) {\n      params.set(\"proxima.file.logger.log_dir\", log_dir);\n      params.set(\"proxima.file.logger.log_file\", log_file);\n      params.set(\"proxima.file.logger.path\", log_dir + \"/\" + log_file);\n      std::string program_name = ailego::File::BaseName(gflags::GetArgv0());\n      params.set(\"proxima.program.program_name\", program_name);\n      params.set(\"proxima.file.logger.file_size\", log_file_size);\n      params.set(\"proxima.file.logger.overdue_days\", log_overdue_days);\n    }\n\n    int ret = logger->init(params);\n    if (ret != 0) {\n      return Status::InternalError(ErrorCode::What(ret));\n    }\n\n    zvec::ailego::LoggerBroker::SetLevel(log_level);\n    zvec::ailego::LoggerBroker::Register(logger);\n    return Status::OK();\n  }\n\n  static void Shutdown() {\n    zvec::ailego::LoggerBroker::Unregister();\n  }\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/profiler.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <vector>\n#include <zvec/ailego/encoding/json.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"error_code.h\"\n\nnamespace zvec {\n\n//! Profiler collecting all the latency and other information during query\nclass Profiler {\n public:\n  using Ptr = std::shared_ptr<Profiler>;\n\n private:\n  //! Stage object\n  struct Stage {\n    //! Constructor\n    explicit Stage(ailego::JsonObject *node) : node_(node) {}\n    //! Stage node, which stored in JsonTree held by Profiler\n    ailego::JsonObject *node_{nullptr};\n    //! Stage latency, started when creating Stage object\n    ailego::ElapsedTime latency_;\n  };\n\n public:\n  //! Constructor\n  explicit Profiler(bool enable = false) : enable_(enable) {\n    if (enabled()) {\n      root_.assign(ailego::JsonObject());\n    }\n  }\n\n  //! Check enabled\n  bool enabled() const {\n    return (enabled_debug() || enabled_trace());\n  }\n\n  bool enabled_debug() const {\n    return enable_;\n  }\n\n  bool enabled_trace() const {\n    return !trace_id_.empty();\n  }\n\n  //! Start profiler\n  void start() {\n    if (enabled() && path_.empty()) {\n      path_.emplace_back(Stage(&root_.as_object()));\n    }\n  }\n\n  //! Stop profiler\n  void stop() {\n    if (enabled()) {\n      if (path_.size() == 1) {\n        // Root always held in path_[0]\n        close_stage();\n      } else {\n        LOG_WARN(\"There are stages have not been closed, stages[%zu]\",\n                 path_.size());\n        // Manually set latency to root, which should not be normal way\n        root_[\"latency\"] = path_.begin()->latency_.micro_seconds();\n      }\n    }\n  }\n\n  //! Open stage, start timer of stage\n  int open_stage(const std::string &name) {\n    if (enabled()) {\n      if (path_.empty()) {\n        LOG_ERROR(\"Profiler did not start yet. name[%s]\", name.c_str());\n        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);\n      }\n      if (name.empty()) {\n        LOG_ERROR(\"Can't open stage with empty name\");\n        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);\n      }\n      ailego::JsonString key(name);\n      ailego::JsonObject child;\n\n      current_path()->set(key, child);  // add child\n      path_.emplace_back(Stage(\n          &((*current_path())[name.c_str()].as_object())));  // move to child\n    }\n    return 0;\n  }\n\n  //! Close stage and stop timer of stage(represent by stage.latency)\n  int close_stage() {\n    if (enabled()) {\n      if (path_.empty()) {\n        LOG_ERROR(\"No available stage can be closed\");\n        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);\n      }\n      ailego::JsonValue latency(current()->latency_.micro_seconds());\n      current_path()->set(\"latency\", latency);\n      path_.pop_back();\n    }\n    return 0;\n  }\n\n  //! add value to profiler\n  template <typename VALUE_TYPE>\n  int add(const std::string &name, const VALUE_TYPE &v) {\n    if (enabled()) {\n      if (path_.empty()) {\n        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);\n      }\n\n      ailego::JsonString key(name);\n      ailego::JsonValue value(v);\n      current_path()->set(key, value);\n    }\n    return 0;\n  }\n\n  //! Serialize profiler to string(Json Format)\n  std::string as_json_string() const {\n    return enabled() ? root_.as_json_string().as_stl_string()\n                     : std::string(\"{}\");\n  }\n\n\n  void set_trace_id(const std::string &trace_id) {\n    trace_id_ = trace_id;\n    if (enabled()) {\n      root_.assign(ailego::JsonObject());\n    }\n  }\n\n\n  const std::string &trace_id() const {\n    return trace_id_;\n  }\n\n  const ailego::JsonValue &root() const {\n    return root_;\n  }\n\n private:\n  Stage *current() {\n    return path_.rbegin().operator->();\n  }\n\n  ailego::JsonObject *current_path() {\n    return current()->node_;\n  }\n\n private:\n  //! enable flag\n  bool enable_{false};\n\n  std::string trace_id_{};\n  //! root handler\n  ailego::JsonValue root_;\n\n  //! Depth-First paths\n  std::vector<Stage> path_;\n};\n\n//! Helper for latency\nclass ScopedLatency {\n public:\n  //! Constructor\n  explicit ScopedLatency(const char *name, Profiler::Ptr profiler)\n      : name_(name), profiler_(std::move(profiler)) {}\n\n  //! Destructor\n  ~ScopedLatency() {\n    profiler_->add(name_, latency_.micro_seconds());\n  }\n\n private:\n  //! Name of latency\n  const char *name_{nullptr};\n\n  //! Timer handler\n  ailego::ElapsedTime latency_;\n\n  //! Profiler handler\n  Profiler::Ptr profiler_;\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/common/rocbsdb_context.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <rocksdb/filter_policy.h>\n#include <rocksdb/statistics.h>\n#include <rocksdb/table.h>\n#include <rocksdb/utilities/checkpoint.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"rocksdb_context.h\"\n\n\nnamespace zvec {\n\n\nStatus RocksdbContext::create(\n    const std::string &db_path,\n    std::shared_ptr<rocksdb::MergeOperator> merge_op) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (db_) {\n    LOG_ERROR(\"RocksDB[%s] is already opened\", db_path_.c_str());\n    return Status::PermissionDenied();\n  }\n\n  if (auto s = validate_and_set_db_path(db_path, false); !s.ok()) {\n    return s;\n  }\n\n  create_opts_.create_if_missing = true;\n  prepare_options(merge_op);\n\n  // Open RocksDB\n  rocksdb::DB *db;\n  if (auto s = rocksdb::DB::Open(create_opts_, db_path, &db); !s.ok()) {\n    LOG_ERROR(\"Failed to create RocksDB[%s], code[%d], reason[%s]\",\n              db_path.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n\n  db_.reset(db);\n  read_only_ = false;\n  write_opts_.disableWAL = true;\n  LOG_DEBUG(\"Created RocksDB[%s]\", db_path.c_str());\n  return Status::OK();\n}\n\n\nStatus RocksdbContext::create(\n    const std::string &db_path, const std::vector<std::string> &column_names,\n    std::shared_ptr<rocksdb::MergeOperator> merge_op) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (db_) {\n    LOG_ERROR(\"RocksDB[%s] is already opened\", db_path_.c_str());\n    return Status::PermissionDenied();\n  }\n\n  if (auto s = validate_and_set_db_path(db_path, false); !s.ok()) {\n    return s;\n  }\n\n  create_opts_.create_if_missing = true;\n  prepare_options(merge_op);\n\n  // Open RocksDB\n  rocksdb::DB *db;\n  rocksdb::Status s = rocksdb::DB::Open(create_opts_, db_path, &db);\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to create RocksDB[%s], code[%d], reason[%s]\",\n              db_path.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n  db_.reset(db);\n\n  // Create column families\n  bool has_default = false;\n  for (auto const &column_name : column_names) {\n    if (column_name == rocksdb::kDefaultColumnFamilyName) {\n      cf_handles_.push_back(db->DefaultColumnFamily());\n      has_default = true;\n      continue;\n    }\n    rocksdb::ColumnFamilyHandle *cf_handle{nullptr};\n    rocksdb::ColumnFamilyOptions cf_options(create_opts_);\n    s = db->CreateColumnFamily(cf_options, column_name, &cf_handle);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to create cf[%s] in RocksDB[%s], code[%d], reason[%s]\",\n                column_name.c_str(), db_path.c_str(), s.code(),\n                s.ToString().c_str());\n      delete_cf_handles();\n      db->Close();\n      db_.reset();\n      return Status::InternalError();\n    }\n    cf_handles_.push_back(cf_handle);\n  }\n  if (!has_default) {\n    cf_handles_.push_back(db->DefaultColumnFamily());\n  }\n\n  read_only_ = false;\n  write_opts_.disableWAL = true;\n  LOG_DEBUG(\"Created RocksDB[%s]\", db_path.c_str());\n  return Status::OK();\n}\n\n\nStatus RocksdbContext::open(const std::string &db_path, bool read_only,\n                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (db_) {\n    LOG_ERROR(\"RocksDB[%s] is already opened\", db_path_.c_str());\n    return Status::PermissionDenied();\n  }\n\n  if (auto s = validate_and_set_db_path(db_path, true); !s.ok()) {\n    return s;\n  }\n\n  create_opts_.create_if_missing = false;\n  prepare_options(merge_op);\n\n  // Open RocksDB\n  rocksdb::DB *db;\n  rocksdb::Status s;\n  if (read_only) {\n    s = rocksdb::DB::OpenForReadOnly(create_opts_, db_path, &db);\n  } else {\n    s = rocksdb::DB::Open(create_opts_, db_path, &db);\n  }\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to open RocksDB[%s], code[%d], reason[%s]\",\n              db_path.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n\n  db_.reset(db);\n  read_only_ = read_only;\n  write_opts_.disableWAL = true;\n  LOG_DEBUG(\"Opened RocksDB[%s]\", db_path.c_str());\n  return Status::OK();\n}\n\n\nStatus RocksdbContext::open(const std::string &db_path,\n                            const std::vector<std::string> &column_names,\n                            bool read_only,\n                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (db_) {\n    LOG_ERROR(\"RocksDB[%s] is already opened\", db_path_.c_str());\n    return Status::PermissionDenied();\n  }\n\n  if (auto s = validate_and_set_db_path(db_path, true); !s.ok()) {\n    return s;\n  }\n\n  create_opts_.create_if_missing = false;\n  prepare_options(merge_op);\n\n  // Set up column families\n  rocksdb::Status s;\n  std::vector<std::string> existing_cf_names{};\n  std::vector<rocksdb::ColumnFamilyDescriptor> cf_descriptors{};\n  s = rocksdb::DB::ListColumnFamilies(create_opts_, db_path,\n                                      &existing_cf_names);\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to list cf in RocksDB[%s], code[%d], reason[%s]\",\n              db_path.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n  rocksdb::ColumnFamilyOptions cf_options(create_opts_);\n  if (column_names.empty()) {  // Get all column families from DB\n    for (auto const &column_name : existing_cf_names) {\n      cf_descriptors.emplace_back(column_name, cf_options);\n    }\n  } else {\n    bool has_default = false;\n    for (const auto &column_name : column_names) {\n      if (std::find(existing_cf_names.begin(), existing_cf_names.end(),\n                    column_name) == existing_cf_names.end()) {\n        LOG_ERROR(\"Column family[%s] does not exist in RocksDB[%s]\",\n                  column_name.c_str(), db_path.c_str());\n        return Status::InvalidArgument();\n      }\n      if (column_name == rocksdb::kDefaultColumnFamilyName) {\n        has_default = true;\n      }\n    }\n    if (read_only) {\n      for (const auto &column_name : column_names) {\n        cf_descriptors.emplace_back(column_name, cf_options);\n      }\n      if (!has_default) {\n        cf_descriptors.emplace_back(rocksdb::kDefaultColumnFamilyName,\n                                    cf_options);\n      }\n    } else {  // Rocksdb must be opened with all column families in write mode\n      for (auto const &column_name : existing_cf_names) {\n        cf_descriptors.emplace_back(column_name, cf_options);\n      }\n    }\n  }\n\n  // Open RocksDB\n  rocksdb::DB *db;\n  if (read_only) {\n    s = rocksdb::DB::OpenForReadOnly(create_opts_, db_path, cf_descriptors,\n                                     &cf_handles_, &db);\n  } else {\n    s = rocksdb::DB::Open(create_opts_, db_path, cf_descriptors, &cf_handles_,\n                          &db);\n  }\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to open RocksDB[%s], code[%d], reason[%s]\",\n              db_path.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n\n  db_.reset(db);\n  read_only_ = read_only;\n  write_opts_.disableWAL = true;\n  LOG_DEBUG(\"Opened RocksDB[%s]\", db_path.c_str());\n  return Status::OK();\n}\n\n\nStatus RocksdbContext::validate_and_set_db_path(const std::string &db_path,\n                                                bool should_exist) {\n  if (db_path.empty()) {\n    LOG_ERROR(\"RocksDB path cannot be empty\");\n    return Status::InvalidArgument();\n  }\n\n  if (FILE::IsExist(db_path)) {\n    if (!should_exist) {\n      LOG_ERROR(\"RocksDB path[%s] already exists\", db_path.c_str());\n      return Status::InvalidArgument();\n    }\n    if (!FILE::IsDirectory(db_path)) {\n      LOG_ERROR(\"RocksDB path[%s] is not a directory\", db_path.c_str());\n      return Status::InvalidArgument();\n    }\n  } else {\n    if (should_exist) {\n      LOG_ERROR(\"RocksDB path[%s] does not exist\", db_path.c_str());\n      return Status::NotFound();\n    }\n  }\n\n  db_path_ = db_path;\n  return Status::OK();\n}\n\n\nvoid RocksdbContext::prepare_options(\n    std::shared_ptr<rocksdb::MergeOperator> merge_op) {\n  // Increase parallelism with default thread count (typically 16)\n  create_opts_.IncreaseParallelism();\n\n  // Optimize for level-based compaction style with default setting\n  create_opts_.OptimizeLevelStyleCompaction();\n\n  // TODO: enable compression?\n\n  // Setting this to 1 means that when a memtable is full, it will be flushed\n  // to disk immediately rather than being merged with other memtables\n  create_opts_.min_write_buffer_number_to_merge = 1;\n\n  // Set the block size for the arena memory allocator to 64KB, which controls\n  // how much memory is allocated at a time for internal operations\n  create_opts_.arena_block_size = 1024 * 64;\n\n  // Do not create LOG.old when reopen\n  create_opts_.keep_log_file_num = 1;\n\n  // Warnings and errors only\n  create_opts_.info_log_level = rocksdb::WARN_LEVEL;\n\n  rocksdb::BlockBasedTableOptions table_options;\n\n  // Turn on bloom filters\n  table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));\n\n  // Merge operator\n  if (merge_op) {\n    create_opts_.merge_operator = merge_op;\n    create_opts_.max_successive_merges = 100;\n    create_opts_.write_buffer_size = 8 << 20;\n  }\n\n  // Create default cache\n  table_options.block_cache = nullptr;\n\n  auto table_factory = NewBlockBasedTableFactory(table_options);\n  create_opts_.table_factory.reset(table_factory);\n\n  // Enable statistics\n  create_opts_.statistics = rocksdb::CreateDBStatistics();\n\n  // Disable external write buffer manager, let RocksDB manage it\n  create_opts_.write_buffer_manager = nullptr;\n\n  // Reduce preallocation size for manifest file to 512KB to save disk space\n  create_opts_.manifest_preallocation_size = 512 * 1024;\n\n  // Disable direct reads (use buffered I/O instead)\n  create_opts_.use_direct_reads = false;\n}\n\n\nStatus RocksdbContext::close() {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (db_ == nullptr) {\n    LOG_ERROR(\"RocksDB[%s] is not opened\", db_path_.c_str());\n    return Status::InternalError();\n  }\n\n  if (!read_only_) {\n    if (auto s = flush_unlocked(); !s.ok()) {\n      LOG_ERROR(\"Failed to close RocksDB[%s] due to flush failure\",\n                db_path_.c_str());\n      return s;\n    }\n  }\n\n  delete_cf_handles();\n\n  if (auto s = db_->Close(); s.ok()) {\n    LOG_DEBUG(\"Closed RocksDB[%s]\", db_path_.c_str());\n    db_.reset();\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to close RocksDB[%s], code[%d], reason[%s]\",\n              db_path_.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nStatus RocksdbContext::flush_unlocked() {\n  if (read_only_) {\n    LOG_ERROR(\"Cannot flush RocksDB[%s] in read-only mode\", db_path_.c_str());\n    return Status::PermissionDenied();\n  }\n\n  for (const auto &cf : cf_handles_) {\n    if (auto s = db_->Flush(flush_opts_, cf); !s.ok()) {\n      LOG_ERROR(\"Failed to flush cf[%s] of RocksDB[%s], code[%d], reason[%s]\",\n                cf->GetName().c_str(), db_path_.c_str(), s.code(),\n                s.ToString().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  if (auto s = db_->Flush(flush_opts_); s.ok()) {\n    LOG_DEBUG(\"Flushed RocksDB[%s]\", db_path_.c_str());\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to flush Rocksdb[%s], code[%d], reason[%s]\",\n              db_path_.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nStatus RocksdbContext::flush() {\n  std::lock_guard<std::mutex> lock(mutex_);\n  return flush_unlocked();\n}\n\n\nStatus RocksdbContext::create_checkpoint(const std::string &checkpoint_dir) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  rocksdb::Checkpoint *cp{nullptr};\n  if (auto s = rocksdb::Checkpoint::Create(db_.get(), &cp); !s.ok()) {\n    LOG_ERROR(\n        \"Failed to create a checkpoint object of Rocksdb[%s], code[%d], \"\n        \"reason[%s]\",\n        db_path_.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n\n  if (auto s = cp->CreateCheckpoint(checkpoint_dir); s.ok()) {\n    LOG_DEBUG(\"Created a checkpoint of Rocksdb[%s] to [%s]\", db_path_.c_str(),\n              checkpoint_dir.c_str());\n    delete cp;\n    return Status::OK();\n  } else {\n    LOG_ERROR(\n        \"Failed to create a checkpoint of Rocksdb[%s], code[%d], reason[%s]\",\n        db_path_.c_str(), s.code(), s.ToString().c_str());\n    delete cp;\n    return Status::InternalError();\n  }\n}\n\n\nrocksdb::ColumnFamilyHandle *RocksdbContext::get_cf(\n    const std::string &cf_name) {\n  std::lock_guard<std::mutex> lock(mutex_);\n  for (auto cf_handle : cf_handles_) {\n    if (cf_handle->GetName() == cf_name) {\n      return cf_handle;\n    }\n  }\n  return nullptr;\n}\n\n\nStatus RocksdbContext::create_cf(const std::string &cf_name) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (cf_name == rocksdb::kDefaultColumnFamilyName) {\n    LOG_ERROR(\"Forbidden to create default cf in RocksDB[%s]\",\n              db_path_.c_str());\n    return Status::InvalidArgument();\n  }\n\n  for (auto cf_handle : cf_handles_) {\n    if (cf_handle->GetName() == cf_name) {\n      LOG_ERROR(\"Column family[%s] already exists in RocksDB[%s]\",\n                cf_name.c_str(), db_path_.c_str());\n      return Status::InvalidArgument();\n    }\n  }\n\n  rocksdb::ColumnFamilyHandle *cf_handle{nullptr};\n  auto s = db_->CreateColumnFamily(rocksdb::ColumnFamilyOptions(create_opts_),\n                                   cf_name, &cf_handle);\n  if (s.ok()) {\n    cf_handles_.push_back(cf_handle);\n    LOG_DEBUG(\"Created cf[%s] in RocksDB[%s]\", cf_name.c_str(),\n              db_path_.c_str());\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to create cf[%s] in RocksDB[%s], code[%d], reason[%s]\",\n              cf_name.c_str(), db_path_.c_str(), s.code(),\n              s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nStatus RocksdbContext::drop_cf(const std::string &cf_name) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (cf_name == rocksdb::kDefaultColumnFamilyName) {\n    LOG_ERROR(\"Forbidden to drop default cf in RocksDB[%s]\", db_path_.c_str());\n    return Status::InvalidArgument();\n  }\n\n  auto it = std::find_if(cf_handles_.begin(), cf_handles_.end(),\n                         [&cf_name](rocksdb::ColumnFamilyHandle *handle) {\n                           return handle->GetName() == cf_name;\n                         });\n  if (it == cf_handles_.end()) {\n    LOG_WARN(\"Failed to find column family[%s] in RocksDB[%s]\", cf_name.c_str(),\n             db_path_.c_str());\n    return Status::OK();\n  }\n\n  auto s = db_->DropColumnFamily(*it);\n  if (s.ok()) {\n    delete *it;\n    cf_handles_.erase(it);\n    LOG_DEBUG(\"Dropped cf[%s] in RocksDB[%s]\", cf_name.c_str(),\n              db_path_.c_str());\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to drop cf[%s] in RocksDB[%s], code[%d], reason[%s]\",\n              cf_name.c_str(), db_path_.c_str(), s.code(),\n              s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nStatus RocksdbContext::reset_cf(const std::string &cf_name) {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  if (cf_name == rocksdb::kDefaultColumnFamilyName) {\n    LOG_ERROR(\"Forbidden to reset default cf in RocksDB[%s]\", db_path_.c_str());\n    return Status::InvalidArgument();\n  }\n\n  rocksdb::ColumnFamilyHandle *cf_handle{nullptr};\n  size_t index;\n  for (size_t i = 0; i < cf_handles_.size(); ++i) {\n    if (cf_handles_[i]->GetName() == cf_name) {\n      cf_handle = cf_handles_[i];\n      index = i;\n      break;\n    }\n  }\n  if (cf_handle == nullptr) {\n    LOG_ERROR(\"Column family[%s] does not exist in RocksDB[%s]\",\n              cf_name.c_str(), db_path_.c_str());\n    return Status::InvalidArgument();\n  }\n\n  auto options = db_->GetOptions(cf_handle);\n  auto s = db_->DropColumnFamily(cf_handle);\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to drop cf[%s] in RocksDB[%s], code[%d], reason[%s]\",\n              cf_name.c_str(), db_path_.c_str(), s.code(),\n              s.ToString().c_str());\n    return Status::InternalError();\n  }\n  delete cf_handle;\n\n  rocksdb::ColumnFamilyHandle *new_cf_handle{nullptr};\n  s = db_->CreateColumnFamily(options, cf_name, &new_cf_handle);\n  if (s.ok()) {\n    cf_handles_[index] = new_cf_handle;\n    LOG_DEBUG(\"Reset cf[%s] in RocksDB[%s]\", cf_name.c_str(), db_path_.c_str());\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to create cf[%s] in RocksDB[%s], code[%d], reason[%s]\",\n              cf_name.c_str(), db_path_.c_str(), s.code(),\n              s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nvoid RocksdbContext::delete_cf_handles() {\n  for (auto cf : cf_handles_) {\n    db_->DestroyColumnFamilyHandle(cf);\n  }\n  cf_handles_.clear();\n}\n\n\nStatus RocksdbContext::compact() {\n  std::lock_guard<std::mutex> lock(mutex_);\n\n  for (auto cf : cf_handles_) {\n    auto s = db_->CompactRange(compact_range_opts_, cf, nullptr, nullptr);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to compact cf[%s] in RocksDB[%s], code[%d], reason[%s]\",\n                cf->GetName().c_str(), db_path_.c_str(), s.code(),\n                s.ToString().c_str());\n    }\n  }\n  auto s = db_->CompactRange(compact_range_opts_, nullptr, nullptr);\n  if (s.ok()) {\n    LOG_DEBUG(\"Compacted RocksDB[%s]\", db_path_.c_str());\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to compact RocksDB[%s], code[%d], reason[%s]\",\n              db_path_.c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nsize_t RocksdbContext::sst_file_size() {\n  uint64_t int_num = 0;\n  if (db_->GetIntProperty(\"rocksdb.live-sst-files-size\", &int_num)) {\n    return int_num;\n  } else {\n    return 0;\n  }\n}\n\n\nsize_t RocksdbContext::count() {\n  uint64_t int_num = 0;\n  if (db_->GetIntProperty(\"rocksdb.estimate-num-keys\", &int_num)) {\n    return int_num;\n  } else {\n    return 0;\n  }\n}\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/rocksdb_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <rocksdb/db.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/db/status.h>\n\n\nnamespace zvec {\n\n\n// A very thin wrapper around RocksDB\nstruct RocksdbContext {\n public:\n  std::unique_ptr<rocksdb::DB> db_{nullptr};\n  std::string db_path_;\n  bool read_only_;\n  std::vector<rocksdb::ColumnFamilyHandle *> cf_handles_;\n  rocksdb::Options create_opts_;\n  rocksdb::WriteOptions write_opts_;\n  rocksdb::ReadOptions read_opts_;\n  rocksdb::FlushOptions flush_opts_;\n  rocksdb::CompactRangeOptions compact_range_opts_;\n  std::mutex mutex_;\n\n\n public:\n  // Create a Rocksdb instance\n  Status create(const std::string &db_path,\n                std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);\n\n\n  // Create a Rocksdb instance\n  Status create(const std::string &db_path,\n                const std::vector<std::string> &column_names,\n                std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);\n\n\n  // Open an existing Rocksdb instance\n  Status open(const std::string &db_path, bool read_only = false,\n              std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);\n\n\n  // Open an existing Rocksdb instance\n  Status open(const std::string &db_path,\n              const std::vector<std::string> &column_names,\n              bool read_only = false,\n              std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);\n\n\n  // Close and flush data if needed\n  Status close();\n\n\n  // Flush data\n  Status flush();\n\n\n  // Create a checkpoint\n  Status create_checkpoint(const std::string &checkpoint_dir);\n\n\n  // Get a column family\n  rocksdb::ColumnFamilyHandle *get_cf(const std::string &cf_name);\n\n\n  // Create a column family\n  Status create_cf(const std::string &cf_name);\n\n\n  // Drop a column family\n  Status drop_cf(const std::string &cf_name);\n\n\n  // Reset a column family\n  Status reset_cf(const std::string &cf_name);\n\n\n  // Compact db\n  Status compact();\n\n\n  // Get the size of the SST files\n  size_t sst_file_size();\n\n\n  // Get the estimated number of keys in the database\n  size_t count();\n\n\n private:\n  using FILE = ailego::File;\n\n\n  Status validate_and_set_db_path(const std::string &db_path,\n                                  bool should_exist);\n\n  void prepare_options(std::shared_ptr<rocksdb::MergeOperator> merge_op);\n\n  Status flush_unlocked();\n\n  void delete_cf_handles();\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/status.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <unordered_map>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/db/status.h>\n\nnamespace zvec {\n\nconst char *GetDefaultMessage(StatusCode code) {\n  static const std::unordered_map<StatusCode, const char *> kMessages = {\n      {StatusCode::OK, \"OK\"},\n      {StatusCode::NOT_FOUND, \"Not found\"},\n      {StatusCode::ALREADY_EXISTS, \"Already exists\"},\n      {StatusCode::INVALID_ARGUMENT, \"Invalid argument\"},\n      {StatusCode::PERMISSION_DENIED, \"Permission denied\"},\n      {StatusCode::FAILED_PRECONDITION, \"Failed precondition\"},\n      {StatusCode::RESOURCE_EXHAUSTED, \"Resource exhausted\"},\n      {StatusCode::UNAVAILABLE, \"Unavailable\"},\n      {StatusCode::INTERNAL_ERROR, \"Internal error\"},\n      {StatusCode::NOT_SUPPORTED, \"Not supported\"},\n      {StatusCode::UNKNOWN, \"Unknown error\"}};\n  auto it = kMessages.find(code);\n  return it != kMessages.end() ? it->second : \"Unknown status code\";\n}\n\n// Implementation of operator<<\nstd::ostream &operator<<(std::ostream &os, const Status &s) {\n  if (s.ok()) {\n    os << \"OK\";\n  } else {\n    os << \"Status(\" << GetDefaultMessage(s.code()) << \", \" << s.message()\n       << \")\";\n  }\n  return os;\n}\n\n// Implementation of comparison\nbool Status::operator==(const Status &other) const noexcept {\n  if (code_ != other.code_) return false;\n  if (code_ == StatusCode::OK) return true;\n  return msg_ == other.msg_;\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/typedef.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/logger/logger.h>\n#include \"error_code.h\"\n\nusing idx_t = uint64_t;\n\n#define PROXIMA_DISALLOW_COPY_AND_ASSIGN(TypeName) \\\n  TypeName(const TypeName &) = delete;             \\\n  TypeName &operator=(const TypeName &) = delete;\n\n\n#define COLLECTION_FORMAT \" collection[%s] \"\n\n#define CLOG_DEBUG(format, ...) \\\n  LOG_DEBUG(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())\n\n#define CLOG_INFO(format, ...) \\\n  LOG_INFO(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())\n\n#define CLOG_WARN(format, ...) \\\n  LOG_WARN(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())\n\n#define CLOG_ERROR(format, ...) \\\n  LOG_ERROR(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())\n\n#define CLOG_FATAL(format, ...) \\\n  LOG_FATAL(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())\n\n#define ELOG_ERROR(format, ...) \\\n  LOG_ERROR(format \" errno[%s] \", ##__VA_ARGS__, std::strerror(errno))\n\n#define WAL_FORMAT \" wal_path_[%s] \"\n\n#define WLOG_DEBUG(format, ...) \\\n  LOG_DEBUG(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())\n\n#define WLOG_INFO(format, ...) \\\n  LOG_INFO(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())\n\n\n#define WLOG_WARN(format, ...) \\\n  LOG_WARN(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())\n\n#define WLOG_ERROR(format, ...) \\\n  LOG_ERROR(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())\n\n#define WLOG_FATAL(format, ...) \\\n  LOG_FATAL(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())\n\n#define SEGMENT_FORMAT \" segment[%zu] collection[%s] \"\n\n#define SLOG_DEBUG(format, ...)                                         \\\n  LOG_DEBUG(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \\\n            collection_name().c_str())\n\n#define SLOG_INFO(format, ...)                                         \\\n  LOG_INFO(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \\\n           collection_name().c_str())\n\n#define SLOG_WARN(format, ...)                                         \\\n  LOG_WARN(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \\\n           collection_name().c_str())\n\n#define SLOG_ERROR(format, ...)                                         \\\n  LOG_ERROR(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \\\n            collection_name().c_str())\n\n#define SLOG_FATAL(format, ...)                                         \\\n  LOG_FATAL(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \\\n            collection_name().c_str())\n\n#define COLUMN_FORMAT \" column[%s] segment[%zu] collection[%s] \"\n\n#define LLOG_DEBUG(format, ...)                                         \\\n  LOG_DEBUG(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \\\n            (size_t)segment_id(), collection_name().c_str())\n\n#define LLOG_INFO(format, ...)                                         \\\n  LOG_INFO(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \\\n           (size_t)segment_id(), collection_name().c_str())\n\n#define LLOG_WARN(format, ...)                                         \\\n  LOG_WARN(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \\\n           (size_t)segment_id(), collection_name().c_str())\n\n#define LLOG_ERROR(format, ...)                                         \\\n  LOG_ERROR(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \\\n            (size_t)segment_id(), collection_name().c_str())\n\n#define LLOG_FATAL(format, ...)                                         \\\n  LOG_FATAL(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \\\n            (size_t)segment_id(), collection_name().c_str())\n\n#define CHECK_STATUS(status, expect)                                         \\\n  if (status != expect) {                                                    \\\n    LOG_ERROR(\"Check status failed. status[%d] expect[%d]\", status, expect); \\\n    return PROXIMA_ZVEC_ERROR_CODE(StatusError);                             \\\n  }\n\n#define CHECK_STATUS_CLOSURE(status, expect)                                 \\\n  if (status != expect) {                                                    \\\n    LOG_ERROR(\"Check status failed. status[%d] expect[%d]\", status, expect); \\\n    done->set_code(PROXIMA_ZVEC_ERROR_CODE(StatusError));                    \\\n    return;                                                                  \\\n  }\n\n#define CHECK_RETURN(ret, expect_ret) \\\n  if (ret != expect_ret) {            \\\n    return ret;                       \\\n  }\n\n#define CHECK_RETURN_WITH_LOG(ret, expect_ret, format, ...) \\\n  if (ret != expect_ret) {                                  \\\n    LOG_ERROR(format, ##__VA_ARGS__);                       \\\n    return ret;                                             \\\n  }\n\n#define CHECK_RETURN_WITH_CLOG(ret, expect_ret, format, ...) \\\n  if (ret != expect_ret) {                                   \\\n    CLOG_ERROR(format, ##__VA_ARGS__);                       \\\n    return ret;                                              \\\n  }\n\n#define CHECK_RETURN_WITH_SLOG(ret, expect_ret, format, ...) \\\n  if (ret != expect_ret) {                                   \\\n    SLOG_ERROR(format, ##__VA_ARGS__);                       \\\n    return ret;                                              \\\n  }\n\n#define CHECK_RETURN_WITH_LLOG(ret, expect_ret, format, ...) \\\n  if (ret != expect_ret) {                                   \\\n    LLOG_ERROR(format, ##__VA_ARGS__);                       \\\n    return ret;                                              \\\n  }\n\n#define CHECK_DESTROY_RETURN_STATUS(status, expect)                     \\\n  if (status != expect) {                                               \\\n    LOG_ERROR(\"Collection[%s] is already destroyed.\",                   \\\n              schema_->name().c_str());                                 \\\n    return Status::InvalidArgument(\"collection is already destroyed.\"); \\\n  }\n\n#define CHECK_DESTROY_RETURN_STATUS_EXPECTED(status, expect)          \\\n  if (status != expect) {                                             \\\n    LOG_ERROR(\"Collection[%s] is already destroyed.\",                 \\\n              schema_->name().c_str());                               \\\n    return tl::make_unexpected(                                       \\\n        Status::InvalidArgument(\"collection is already destroyed.\")); \\\n  }\n\n#define CHECK_RETURN_STATUS(status) \\\n  if (!status.ok()) {               \\\n    return status;                  \\\n  }\n\n#define CHECK_RETURN_STATUS_EXPECTED(status) \\\n  if (!status.ok()) {                        \\\n    return tl::make_unexpected(status);      \\\n  }\n\n#define CHECK_COLLECTION_READONLY_RETURN_STATUS \\\n  CHECK_READONLY_RETURN_STATUS(Collection)\n\n#define CHECK_SEGMENT_READONLY_RETURN_STATUS \\\n  CHECK_READONLY_RETURN_STATUS(Segment)\n\n#define CHECK_READONLY_RETURN_STATUS(type)                \\\n  if (options_.read_only_) {                              \\\n    return Status::InvalidArgument(#type                  \\\n                                   \" is \"                 \\\n                                   \"opened in read-only \" \\\n                                   \"mode\");               \\\n  }\n\n#define CHECK_COLLECTION_READONLY_RETURN_STATUS_EXPECTED \\\n  CHECK_READONLY_RETURN_STATUS_EXPECTED(Collection)\n\n#define CHECK_SEGMENT_READONLY_RETURN_STATUS_EXPECTED \\\n  CHECK_READONLY_RETURN_STATUS_EXPECTED(Segment)\n\n#define CHECK_READONLY_RETURN_STATUS_EXPECTED(type)                           \\\n  if (options_.read_only_) {                                                  \\\n    return tl::make_unexpected(Status::InvalidArgument(#type                  \\\n                                                       \" is \"                 \\\n                                                       \"opened in read-only \" \\\n                                                       \"mode\"));              \\\n  }\n"
  },
  {
    "path": "src/db/common/utils.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"utils.h\"\n\nnamespace zvec {\n\nstd::string indent(int level) {\n  return std::string(level * 2, ' ');\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/common/utils.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <string>\n\nnamespace zvec {\nstd::string indent(int level);\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME zvec_index STATIC STRICT\n    SRCS *.cc segment/*.cc column/vector_column/*.cc column/inverted_column/*.cc storage/*.cc storage/wal/*.cc common/*.cc\n    LIBS zvec_common\n         zvec_proto\n         rocksdb\n         core_interface\n         Arrow::arrow_static\n         Arrow::arrow_compute\n         Arrow::arrow_dataset\n    INCS .  ${PROJECT_ROOT_DIR}/src\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/db/index/column/column_indexer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n\nnamespace zvec {\nclass ColumnIndexer {\n public:\n  using Ptr = std::shared_ptr<ColumnIndexer>;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/common/index_results.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include \"db/common/typedef.h\"\n#include \"db/index/column/vector_column/vector_column_params.h\"\n\nnamespace zvec {\n\nclass IndexResults {\n public:\n  using Ptr = std::shared_ptr<IndexResults>;\n  class Iterator {\n   public:\n    virtual ~Iterator() = default;\n\n    virtual idx_t doc_id() const = 0;\n\n    virtual float score() const = 0;\n\n    virtual void next() = 0;\n\n    virtual bool valid() const = 0;\n\n    virtual const std::string &group_id() const {\n      return kEmpty;\n    }\n\n    virtual const vector_column_params::VectorData vector() const {\n      return vector_column_params::VectorData{};\n    }\n\n    bool is_sparse() const {\n      return is_sparse_;\n    }\n    bool set_is_sparse(bool is_sparse) {\n      is_sparse_ = is_sparse;\n      return true;\n    }\n\n   private:\n    bool is_sparse_{false};\n    inline static const std::string kEmpty{\"\"};\n  };\n  using IteratorUPtr = std::unique_ptr<IndexResults::Iterator>;\n\n public:\n  virtual ~IndexResults() = default;\n\n  virtual size_t count() const = 0;\n\n  virtual IteratorUPtr create_iterator() = 0;\n};\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_codec.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <string>\n#include <roaring/roaring.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n#include \"db/common/constants.h\"\n\n\nnamespace zvec {\n\n\n/*\n *\n * This class provides encoding/decoding functionality for inverted index data.\n *\n *\n * RocksDB stores key-value pairs as byte strings. Therefore, all data types\n * must be converted to a string format for storage.\n *\n * To ensure correct lexicographic sorting of numeric values (integers and\n * floating-point numbers), we need to transform them so that their byte\n * representation sorts in the same order as their numerical values.\n *\n *\n * The codec also manages storage of mapped document IDs, using either list or\n * bitmap structures depending on the data size for optimal storage efficiency.\n *\n */\nclass InvertedIndexCodec {\n public:\n  static std::string Encode(const std::string &term, DataType type) {\n    switch (type) {\n      case DataType::INT32:\n      case DataType::INT64: {\n        auto result = term;\n        convert_to_big_endian(result.data(), result.size());\n        // For signed, reverse sign bit, set positive to 1, negative to 0\n        result[0] ^= 0x80;\n        return result;\n      }\n      case DataType::UINT32:\n      case DataType::UINT64: {\n        auto result = term;\n        convert_to_big_endian(result.data(), result.size());\n        return result;\n      }\n      case DataType::FLOAT:\n      case DataType::DOUBLE: {\n        auto result = term;\n        convert_to_big_endian(result.data(), result.size());\n        // If float is negative, negate each byte; else reverse sign bit\n        if ((result[0] & 0x80) > 0) {\n          for (size_t i = 0; i < result.size(); i++) {\n            result[i] = ~result[i];\n          }\n        } else {\n          result[0] ^= 0x80;\n        }\n        return result;\n      }\n      default:\n        return term;\n    }\n  }\n\n\n  static std::string Encode(const std::string_view &term, DataType type) {\n    switch (type) {\n      case DataType::INT32:\n      case DataType::INT64: {\n        std::string result(term);\n        convert_to_big_endian(result.data(), result.size());\n        // For signed, reverse sign bit, set positive to 1, negative to 0\n        result[0] ^= 0x80;\n        return result;\n      }\n      case DataType::UINT32:\n      case DataType::UINT64: {\n        std::string result(term);\n        convert_to_big_endian(result.data(), result.size());\n        return result;\n      }\n      case DataType::FLOAT:\n      case DataType::DOUBLE: {\n        std::string result(term);\n        convert_to_big_endian(result.data(), result.size());\n        // If float is negative, negate each byte; else reverse sign bit\n        if ((result[0] & 0x80) > 0) {\n          for (size_t i = 0; i < result.size(); i++) {\n            result[i] = ~result[i];\n          }\n        } else {\n          result[0] ^= 0x80;\n        }\n        return result;\n      }\n      default:\n        return std::string(term);\n    }\n  }\n\n\n  static std::string Encode(bool value) {\n    if (value) {\n      return \"true\";\n    } else {\n      return \"false\";\n    }\n  }\n\n\n  static std::string Encode_Reversed(const std::string &term) {\n    std::string reversed = term;\n    std::reverse(reversed.begin(), reversed.end());\n    return reversed;\n  }\n\n\n  // Format of range key:\n  // [range_begin_key][separator_byte][range_end_key][range_begin_key_size]\n  static void Decode_Range_Key(const char *range_key_ptr, size_t range_key_size,\n                               char **range_begin_pos,\n                               size_t *range_begin_key_size,\n                               char **range_end_pos,\n                               size_t *range_end_key_size) {\n    *range_begin_key_size =\n        *(uint64_t *)(range_key_ptr + (range_key_size - sizeof(uint64_t)));\n    *range_begin_pos = (char *)range_key_ptr;\n\n    *range_end_key_size =\n        range_key_size - sizeof(uint64_t) - (*range_begin_key_size) - 1;\n    *range_end_pos = (char *)(range_key_ptr + (*range_begin_key_size) + 1);\n  }\n\n\n  // Return negative number if s1 < s2, positive number if s1 > s2, 0 if equal\n  static int CMP(const char *s1, size_t s1_len, const char *s2, size_t s2_len) {\n    size_t min_len = std::min(s1_len, s2_len);\n    int r = memcmp(s1, s2, min_len);\n    if (r == 0) {\n      if (s1_len < s2_len)\n        r = -1;\n      else if (s1_len > s2_len)\n        r = +1;\n    }\n    return r;\n  }\n\n\n  static bool Has_Prefix(const char *value, size_t value_len,\n                         const char *prefix, size_t prefix_len) {\n    if (value_len < prefix_len) {\n      return false;\n    }\n    return memcmp(value, prefix, prefix_len) == 0;\n  }\n\n\n  static Status Serialize(roaring_bitmap_t *bitmap, std::string *out) {\n    if (!bitmap) {\n      LOG_ERROR(\"Invalid bitmap pointer\");\n      return Status::InvalidArgument();\n    }\n    if (!out) {\n      LOG_ERROR(\"Invalid output pointer\");\n      return Status::InvalidArgument();\n    }\n    out->clear();\n\n    uint64_t count = roaring_bitmap_get_cardinality(bitmap);\n    if (count == 0) {\n      LOG_ERROR(\"Bitmap is empty\");\n      return Status::InternalError();\n    } else if (count > INVERT_ID_LIST_SIZE_THRESHOLD) {\n      return serialize_bitmap(bitmap, out);\n    } else {\n      return serialize_docid_list(bitmap, out);\n    }\n  }\n\n\n  static Status Deserialize(const char *data, size_t size,\n                            roaring_bitmap_t **bitmap) {\n    if (!data || size == 0) {\n      LOG_ERROR(\"Input data is invalid\");\n      return Status::InvalidArgument();\n    }\n\n    unsigned char header = data[0];\n    if (header > INVERT_ID_LIST_SIZE_THRESHOLD) {\n      LOG_ERROR(\"Invalid header found in inverted data\");\n      return Status::InternalError();\n    }\n\n    if (header == 0) {  // This is a bitmap\n      *bitmap = roaring_bitmap_portable_deserialize_safe(data + 1, size - 1);\n      if (*bitmap) {\n        return Status::OK();\n      } else {\n        LOG_ERROR(\"Failed to deserialize bitmap\");\n        return Status::InternalError();\n      }\n    }\n\n    // This is a id list\n    if ((size - 1) != header * sizeof(uint32_t)) {\n      LOG_ERROR(\"Failed to deserialize docid_list\");\n      return Status::InternalError();\n    }\n    *bitmap = roaring_bitmap_create();\n    if (*bitmap == nullptr) {\n      LOG_ERROR(\"Failed to create bitmap\");\n      return Status::InternalError();\n    }\n    for (size_t i = 1; i < size; i += sizeof(uint32_t)) {\n      roaring_bitmap_add(*bitmap,\n                         *reinterpret_cast<const uint32_t *>(data + i));\n    }\n    return Status::OK();\n  }\n\n\n  static Status Merge_OR(const char *data, size_t size, bool lazy,\n                         roaring_bitmap_t *bitmap) {\n    if (!data || size == 0) {\n      LOG_ERROR(\"Input data is invalid\");\n      return Status::InvalidArgument();\n    }\n\n    unsigned char header = data[0];\n    if (header > INVERT_ID_LIST_SIZE_THRESHOLD) {\n      LOG_ERROR(\"Invalid header found in inverted data\");\n      return Status::InternalError();\n    }\n\n    if (header == 0) {  // This is a bitmap\n      auto bitmap_other = roaring_bitmap_portable_deserialize_frozen(data + 1);\n      if (!bitmap_other) {\n        LOG_ERROR(\"Failed to deserialize bitmap\");\n        return Status::InternalError();\n      }\n      if (lazy) {\n        roaring_bitmap_lazy_or_inplace(bitmap, bitmap_other, true);\n      } else {\n        roaring_bitmap_or_inplace(bitmap, bitmap_other);\n      }\n      roaring_bitmap_free(bitmap_other);\n      return Status::OK();\n    }\n\n    // This is a id list\n    if ((size - 1) != header * sizeof(uint32_t)) {\n      LOG_ERROR(\"Failed to deserialize docid_list\");\n      return Status::InternalError();\n    }\n    auto doc_list = reinterpret_cast<const uint32_t *>(data + 1);\n    for (size_t i = 0; i < header; ++i) {\n      roaring_bitmap_add(bitmap, doc_list[i]);\n    }\n    return Status::OK();\n  }\n\n\n  static Status Merge_AND(const char *data, size_t size,\n                          roaring_bitmap_t *bitmap) {\n    if (!data || size == 0) {\n      LOG_ERROR(\"Input data is invalid\");\n      return Status::InvalidArgument();\n    }\n\n    unsigned char header = data[0];\n    if (header > INVERT_ID_LIST_SIZE_THRESHOLD) {\n      LOG_ERROR(\"Invalid header found in inverted data\");\n      return Status::InternalError();\n    }\n\n    if (header == 0) {  // This is a bitmap\n      auto bitmap_other = roaring_bitmap_portable_deserialize_frozen(data + 1);\n      if (!bitmap_other) {\n        LOG_ERROR(\"Failed to deserialize bitmap\");\n        return Status::InternalError();\n      }\n      roaring_bitmap_and_inplace(bitmap, bitmap_other);\n      roaring_bitmap_free(bitmap_other);\n      return Status::OK();\n    }\n\n    // This is a id list\n    if ((size - 1) != header * sizeof(uint32_t)) {\n      LOG_ERROR(\"Failed to deserialize docid_list\");\n      return Status::InternalError();\n    }\n    auto doc_list = reinterpret_cast<const uint32_t *>(data + 1);\n    uint32_t tmp = 0;\n    for (size_t i = 0; i < header; ++i) {\n      tmp |= roaring_bitmap_contains(bitmap, doc_list[i]) << i;\n    }\n    roaring_bitmap_clear(bitmap);\n    for (size_t i = 0; i < header; ++i) {\n      if (tmp & (1 << i)) {\n        roaring_bitmap_add(bitmap, doc_list[i]);\n      }\n    }\n    return Status::OK();\n  }\n\n\n private:\n  static void convert_to_big_endian(char *in, size_t size) {\n    static const bool isBigEndianSystem = []() {\n      int i = 0x1243;\n      char *ch = (char *)&i;\n      return (*ch == 0x12);\n    }();\n\n    if (isBigEndianSystem) {\n      return;\n    }\n\n    char *p = in;\n    for (size_t i = 0; i < size / 2; ++i) {\n      std::swap(p[i], p[size - i - 1]);\n    }\n  }\n\n\n  static Status serialize_bitmap(const roaring_bitmap_t *bitmap,\n                                 std::string *out) {\n    size_t bitmap_size = roaring_bitmap_portable_size_in_bytes(bitmap);\n    out->resize(1 + bitmap_size);\n\n    // Set the first byte with value 0, indicating the data is a bitmap\n    (*out)[0] = static_cast<char>(0);\n    size_t written_size = roaring_bitmap_portable_serialize(\n        bitmap, const_cast<char *>(out->data()) + 1);\n    if (written_size == bitmap_size) {\n      return Status::OK();\n    } else {\n      LOG_ERROR(\"Failed to serialize bitmap\");\n      return Status::InternalError();\n    }\n  }\n\n\n  static Status serialize_docid_list(const roaring_bitmap_t *bitmap,\n                                     std::string *out) {\n    auto doc_count = roaring_bitmap_get_cardinality(bitmap);\n    out->reserve(1 + doc_count * sizeof(uint32_t));\n    // Adds a single byte at the beginning indicating the count of document IDs\n    out->append(1, static_cast<unsigned char>(doc_count));\n\n    auto iter = roaring_create_iterator(bitmap);\n    while (iter->has_value) {\n      out->append(reinterpret_cast<const char *>(&(iter->current_value)),\n                  sizeof(uint32_t));\n      roaring_advance_uint32_iterator(iter);\n    }\n    roaring_free_uint32_iterator(iter);\n    return Status::OK();\n  }\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_column_indexer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/db/schema.h>\n#include \"db/common/concurrent_roaring_bitmap.h\"\n#include \"db/common/rocksdb_context.h\"\n#include \"inverted_codec.h\"\n#include \"inverted_doc_range.h\"\n#include \"inverted_search_result.h\"\n\n\nnamespace zvec {\n\n\n//\n// An inverted column indexer manages document-term indexing in two states:\n// 1. Streaming:\n//      - allows insertion of new terms and indexing of document-term pairs\n// 2. Sealed:\n//      - read-only, no further writes permitted\n//      - statistical index generated for optimized search performance\n//\n//\n// InvertedColumnIndexer requires document IDs to be sequential integers\n// starting from 0 without any gaps.\n// While documents can be inserted in any order, the complete sequence from 0 to\n// max id must be present before sealing the index.\n//\n// Multiple inverted column indexers share the same RocksDB instance but each\n// indexer uses its own RocksDB column families within that shared RocksDB\n// instance. This design allows for efficient resource utilization while\n// maintaining data separation between different columns.\n//\nclass InvertedColumnIndexer {\n public:\n  using Ptr = std::shared_ptr<InvertedColumnIndexer>;\n\n\n  static Ptr CreateAndOpen(const std::string &collection_name,\n                           const FieldSchema &field, RocksdbContext &context,\n                           bool read_only = false);\n\n\n  virtual ~InvertedColumnIndexer();\n\n\n protected:\n  explicit InvertedColumnIndexer(const std::string &collection_name,\n                                 const FieldSchema &field,\n                                 RocksdbContext &context, bool read_only)\n      : collection_name_(collection_name),\n        field_(field),\n        path_(context.db_path_),\n        ctx_(context),\n        read_only_(read_only) {};\n\n  InvertedColumnIndexer(const InvertedColumnIndexer &) = delete;\n  InvertedColumnIndexer(InvertedColumnIndexer &&) = delete;\n  InvertedColumnIndexer &operator=(const InvertedColumnIndexer &) = delete;\n  InvertedColumnIndexer &operator=(InvertedColumnIndexer &&) = delete;\n\n\n  // TODO： for ut, remove this\n  InvertedColumnIndexer(RocksdbContext &ctx) : ctx_(ctx) {}\n\n\n public:\n  /*!\n   * \\brief Search for documents matching the given value and operation\n   * \\param value The value to compare against (e.g., \"5\", \"10\")\n   * \\param op The comparison operation (e.g., EQ, GT, LT)\n   * \\return Pointer to search results containing matching documents\n   */\n  virtual InvertedSearchResult::Ptr search(const std::string &value,\n                                           CompareOp op) const;\n\n\n  /*!\n   * \\brief Search for documents matching multiple values\n   * \\param values List of values to compare against (e.g., {\"5\", \"10\", \"15\"})\n   * \\param comp_op The comparison operation to apply (e.g., CONTAIN_ANY)\n   * \\return Pointer to search results containing matching documents\n   */\n  virtual InvertedSearchResult::Ptr multi_search(\n      const std::vector<std::string> &values, CompareOp op) const;\n\n\n  /*!\n   * \\brief Search for documents matching array length\n   * \\param len The array length value to compare against\n   * \\param op The comparison operation to apply (e.g., EQ)\n   * \\return Pointer to search results containing matching documents\n   */\n  virtual InvertedSearchResult::Ptr search_array_len(uint32_t len,\n                                                     CompareOp op) const;\n\n\n  /*!\n   * \\brief Search for documents that have null values\n   * \\return Pointer to search results containing matching documents\n   */\n  virtual InvertedSearchResult::Ptr search_null() const;\n\n\n  /*!\n   * \\brief Search for documents that have non-null values\n   * \\return Pointer to search results containing matching documents\n   */\n  virtual InvertedSearchResult::Ptr search_non_null() const;\n\n\n  /*!\n   * \\brief Evaluate the ratio of matching documents compared to total documents\n   * \\param value The value to compare against (e.g., \"5\", \"10\")\n   * \\param op The comparison operation (e.g., EQ, GT, LT)\n   * \\param total_size Pointer to store the total number of documents\n   * \\param range_size Pointer to store the number of matching documents\n   * \\return Status indicating success or failure of the evaluation\n   */\n  virtual Status evaluate_ratio(const std::string &value, CompareOp op,\n                                uint64_t *total_size,\n                                uint64_t *range_size) const;\n\n\n  /*!\n   * \\brief Insert a document-term pair into the inverted index\n   * \\param id The document ID to insert\n   * \\param value The string-encoded representation of the value to index. This\n   *              parameter may contain either a single value or an array of\n   *              values depending on the field type. The underlying data type\n   *              might differ from std::string - the string serves as a generic\n   *              serialization buffer for the actual typed data.\n   * \\return Status indicating success or failure of the insert operation\n   */\n  Status insert(uint32_t id, const std::string &value);\n\n\n  /*!\n   * \\brief Insert a document with multiple strings into the inverted index\n   * \\param id The document ID to insert\n   * \\param values Multiple string values to index\n   * \\return Status indicating success or failure of the insert operation\n   */\n  Status insert(uint32_t id, const std::vector<std::string> &values);\n\n\n  /*!\n   * \\brief Insert a document with a boolean value into the inverted index\n   * \\param id The document ID to insert\n   * \\param value The boolean value to index\n   * \\return Status indicating success or failure of the insert operation\n   */\n  Status insert(uint32_t id, bool value);\n\n\n  /*!\n   * \\brief Insert a document with multiple booleans into the inverted index\n   * \\param id The document ID to insert\n   * \\param value Multiple boolean values to index\n   * \\return Status indicating success or failure of the insert operation\n   */\n  Status insert(uint32_t id, const std::vector<bool> &values);\n\n\n  /*!\n   * \\brief Insert a document with null value into the inverted index\n   * \\param id The document ID to insert\n   * \\return Status indicating success or failure of the insert operation\n   */\n  Status insert_null(uint32_t id);\n\n\n  /*!\n   * \\brief Serialize special values, e.g., null-value bitmap and max doc id\n   * \\return Status indicating success or failure of the serialization\n   */\n  Status flush_special_values();\n\n\n  /*!\n   * \\brief Seal the index and generate statistical indexes\n   * \\return Status indicating success or failure of the operation\n   */\n  Status seal();\n\n\n  /*!\n   * \\brief Check if the index is sealed\n   * \\return True if the index is sealed, false otherwise\n   */\n  inline bool is_sealed() const {\n    return sealed_;\n  }\n\n\n  /*!\n   * \\brief Drop the index storage\n   * \\return Status indicating success or failure of the operation\n   */\n  Status drop_storage();\n\n\n  /*!\n   * \\brief Get the name of the corresponding collection\n   * \\return The name of the corresponding collection\n   */\n  inline const std::string &collection_name() const {\n    return collection_name_;\n  }\n\n\n  inline const std::string ID() const {\n    return \"InvertedColumnIndexer[collection:\" + collection_name_ +\n           \"|field:\" + field_.name() + \"|path:'\" + path_ + \"']\";\n  }\n\n\n private:\n  using Slice = rocksdb::Slice;\n  using PinnableSlice = rocksdb::PinnableSlice;\n\n\n  Status open();\n\n  inline std::string encode(const std::string &term) const {\n    return InvertedIndexCodec::Encode(term, field_.element_data_type());\n  }\n\n  inline std::vector<std::string> encode_array(const std::string &terms) const {\n    std::vector<std::string> result{};\n    size_t s = field_.element_data_size();\n    if (s == 0) {\n      return result;\n    }\n    size_t num_terms = terms.size() / s;\n    result.reserve(num_terms);\n    for (size_t i = 0; i < num_terms; ++i) {\n      std::string_view sv(terms.data() + (i * s), s);\n      result.emplace_back(\n          InvertedIndexCodec::Encode(sv, field_.element_data_type()));\n    }\n    return result;\n  }\n\n  inline std::vector<std::string> encode(\n      const std::vector<std::string> &terms) const {\n    std::vector<std::string> result;\n    result.reserve(terms.size());\n    for (auto &term : terms) {\n      result.emplace_back(encode(term));\n    }\n    return result;\n  }\n\n  inline std::string encode(bool value) {\n    return InvertedIndexCodec::Encode(value);\n  }\n\n  inline std::string encode_reversed(const std::string &term) const {\n    return InvertedIndexCodec::Encode_Reversed(term);\n  }\n\n  inline int cmp(const char *s1, size_t s1_len, const char *s2,\n                 size_t s2_len) const {\n    return InvertedIndexCodec::CMP(s1, s1_len, s2, s2_len);\n  }\n\n  inline bool cmp_lt(const char *s1, size_t s1_len, const char *s2,\n                     size_t s2_len, bool include_eq) const {\n    int ret = InvertedIndexCodec::CMP(s1, s1_len, s2, s2_len);\n    return (include_eq && ret <= 0) || (!include_eq && ret < 0);\n  }\n\n  inline bool has_prefix(const char *value, size_t value_len,\n                         const char *prefix, size_t prefix_len) const {\n    return InvertedIndexCodec::Has_Prefix(value, value_len, prefix, prefix_len);\n  }\n\n  inline void update_max_id(uint32_t id) {\n    uint32_t expected_id = max_id_.load();\n    while (expected_id < id &&\n           !max_id_.compare_exchange_weak(expected_id, id)) {\n      ;\n    }\n  }\n\n  inline Status estimate_range_ratio(const std::string &term, CompareOp op,\n                                     uint64_t *total_count,\n                                     uint64_t *matching_count) const;\n\n  inline bool range_covers_most_values(const std::string &term,\n                                       CompareOp op) const;\n\n  inline roaring_bitmap_t *flip_bitmap(roaring_bitmap_t *bitmap) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_eq(const std::string &term) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_contain(\n      const std::vector<std::string> &terms, bool is_any) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_ne(const std::string &term) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_not_contain(\n      const std::vector<std::string> &terms, bool is_any) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_lt(const std::string &term,\n                                           bool include_eq) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_gt(const std::string &term,\n                                           bool include_eq) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_array_len_eq(uint32_t len) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_array_len_ne(uint32_t len) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_array_len_lt(uint32_t len,\n                                                     bool include_eq) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_array_len_gt(uint32_t len,\n                                                     bool include_eq) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_like(std::string term) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_prefix(const std::string &term) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_suffix(const std::string &term) const;\n\n  Result<roaring_bitmap_t *> get_bitmap_null() const;\n\n  Result<roaring_bitmap_t *> get_bitmap_non_null() const;\n\n  rocksdb::Status index_array_len(uint32_t id, uint32_t len);\n\n  Status generate_statistical_indexes();\n\n\n private:\n  inline std::string cf_name_terms() const {\n    return field_.name() + INVERT_SUFFIX_TERMS;\n  };\n\n  inline std::string cf_name_reversed_terms() const {\n    return field_.name() + INVERT_SUFFIX_REVERSED_TERMS;\n  };\n\n  inline std::string cf_name_array_len() const {\n    return field_.name() + INVERT_SUFFIX_ARRAY_LEN;\n  };\n\n  inline std::string cf_name_ranges() const {\n    return field_.name() + INVERT_SUFFIX_RANGES;\n  };\n\n  inline std::string cf_name_cdf() const {\n    return INVERT_CDF;\n  };\n\n  inline std::string key_max_id() const {\n    return field_.name() + INVERT_KEY_MAX_ID;\n  };\n\n  inline std::string key_null() const {\n    return field_.name() + INVERT_KEY_NULL;\n  };\n\n  inline std::string key_sealed() const {\n    return field_.name() + INVERT_KEY_SEALED;\n  };\n\n  inline bool allow_range_optimization(const FieldSchema &field) const {\n    bool not_allowed =\n        field.is_array_type() || field.data_type() == DataType::BOOL;\n    return !not_allowed;\n  }\n\n  inline bool allow_extended_wildcard(const FieldSchema &field) const {\n    return field.data_type() == DataType::STRING;\n  }\n\n\n private:\n  const std::string collection_name_{};\n  const FieldSchema field_{};\n  const std::string path_{};\n\n\n  // Column families:\n  // 1. cf_terms_:              Inverted index for terms\n  // 2. cf_reversed_terms_:     Inverted index for reversed terms\n  // 3. cf_array_len_:          Inverted index for array length\n  // 4. cf_ranges_:             Range index\n  // 5. cf_cdf_:                Cumulative distribution function\n  // 6. default cf:             Stores special values\n  //                              - null-value bitmap\n  //                              - max id\n  //                              - is_sealed\n  //\n  // Some column families are optional and may be nullptr.\n  // For example, cf_ranges_ is nullptr when the indexer is not sealed (range\n  // index not yet generated) or when range optimization is explicitly disabled.\n  RocksdbContext &ctx_;\n  rocksdb::ColumnFamilyHandle *cf_terms_{nullptr};\n  rocksdb::ColumnFamilyHandle *cf_reversed_terms_{nullptr};\n  rocksdb::ColumnFamilyHandle *cf_array_len_{nullptr};\n  rocksdb::ColumnFamilyHandle *cf_ranges_{nullptr};\n  rocksdb::ColumnFamilyHandle *cf_cdf_{nullptr};\n\n\n  bool read_only_{false};\n  bool sealed_{false};\n  bool enable_range_optimization_{false};\n  bool enable_extended_wildcard_{false};\n  std::atomic<uint32_t> max_id_{0};\n  ConcurrentRoaringBitmap32 null_bitmap_{};\n  SegmentDocRangeStat::Ptr doc_range_stat_{nullptr};\n};\n\n\n};  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_column_indexer_search.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <optional>\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include \"inverted_codec.h\"\n#include \"inverted_column_indexer.h\"\n\n\nnamespace zvec {\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_eq(\n    const std::string &term) const {\n  PinnableSlice bitmap_slice;\n  auto s = ctx_.db_->Get(ctx_.read_opts_, cf_terms_, term, &bitmap_slice);\n  if (!s.ok()) {\n    if (s.code() == rocksdb::Status::kNotFound) {\n      return nullptr;\n    }\n    LOG_ERROR(\n        \"Failed to retrieve data for term[%s] from %s, code[%d], reason[%s]\",\n        term.c_str(), ID().c_str(), s.code(), s.ToString().c_str());\n    return tl::make_unexpected(Status::InternalError());\n  }\n\n  roaring_bitmap_t *bitmap{nullptr};\n  Status status = InvertedIndexCodec::Deserialize(bitmap_slice.data(),\n                                                  bitmap_slice.size(), &bitmap);\n  if (status.ok()) {\n    return bitmap;\n  } else {\n    LOG_ERROR(\n        \"Failed to deserialize bitmap for term[%s] from %s, bitmap size[%zu]\",\n        term.c_str(), ID().c_str(), bitmap_slice.size());\n    return tl::make_unexpected(Status::InternalError());\n  }\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_contain(\n    const std::vector<std::string> &terms, bool is_any) const {\n  if (terms.empty()) {\n    LOG_ERROR(\"Terms is empty\");\n    return tl::make_unexpected(Status::InvalidArgument());\n  }\n\n  // Shall we sort the terms here? Does it make any difference in performance?\n  std::vector<Slice> slice_terms(terms.begin(), terms.end());\n  std::vector<PinnableSlice> bitmap_slices;\n  bitmap_slices.resize(terms.size());\n  std::vector<rocksdb::Status> statuses;\n  statuses.resize(terms.size());\n  ctx_.db_->MultiGet(ctx_.read_opts_, cf_terms_, slice_terms.size(),\n                     slice_terms.data(), bitmap_slices.data(), statuses.data(),\n                     false);\n\n  roaring_bitmap_t *bitmap{nullptr};\n  Status s = Status::OK();\n  AILEGO_DEFER([&]() {\n    if (!s.ok() && bitmap) {\n      roaring_bitmap_free(bitmap);\n    }\n  });\n\n  auto init_or_merge_at_i = [&](size_t i) {\n    if (bitmap == nullptr) {\n      s = InvertedIndexCodec::Deserialize(bitmap_slices[i].data(),\n                                          bitmap_slices[i].size(), &bitmap);\n      if (!s.ok()) {\n        LOG_ERROR(\"Failed to deserialize bitmap for term[%s] from %s\",\n                  terms[i].c_str(), ID().c_str());\n      }\n      return;\n    }\n\n    if (is_any) {\n      s = InvertedIndexCodec::Merge_OR(bitmap_slices[i].data(),\n                                       bitmap_slices[i].size(), true, bitmap);\n    } else {\n      s = InvertedIndexCodec::Merge_AND(bitmap_slices[i].data(),\n                                        bitmap_slices[i].size(), bitmap);\n    }\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to merge bitmap for term[%s] from %s\", terms[i].c_str(),\n                ID().c_str());\n    }\n  };\n\n  for (size_t i = 0; i < terms.size(); i++) {\n    if (statuses[i].ok()) {\n      init_or_merge_at_i(i);\n      if (!s.ok()) {\n        return tl::make_unexpected(s);\n      }\n    } else if (statuses[i].code() == rocksdb::Status::kNotFound) {\n      if (!is_any) {  // For contain_all, if any term is not found, return empty\n        s = Status::NotFound();\n        return nullptr;\n      }\n    } else {\n      LOG_ERROR(\n          \"Failed to retrieve data for term[%s] from %s, code[%d], reason[%s]\",\n          terms[i].c_str(), ID().c_str(), statuses[i].code(),\n          statuses[i].ToString().c_str());\n      s = Status::InternalError();\n      return tl::make_unexpected(s);\n    }\n  }\n\n  if (is_any && bitmap) {\n    roaring_bitmap_repair_after_lazy(bitmap);\n  }\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_ne(\n    const std::string &term) const {\n  if (sealed_) {\n    auto ret = get_bitmap_eq(term);\n    if (ret) {\n      ret = flip_bitmap(ret.value());\n    } else {\n      LOG_ERROR(\"Failed to retrieve bitmap for term[%s] from %s\", term.c_str(),\n                ID().c_str());\n    }\n    return ret;\n  } else {\n    roaring_bitmap_t *bitmap = roaring_bitmap_create();\n    if (!bitmap) {\n      LOG_ERROR(\"Failed to create bitmap\");\n      return tl::make_unexpected(Status::InternalError());\n    }\n    auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);\n    AILEGO_DEFER([&]() { delete iter; });\n    Status s;\n    iter->SeekToFirst();\n    while (iter->Valid()) {\n      if (iter->key() == term) {\n        iter->Next();\n        continue;\n      }\n      s = InvertedIndexCodec::Merge_OR(iter->value().data(),\n                                       iter->value().size(), true, bitmap);\n      if (s.ok()) {\n        iter->Next();\n      } else {\n        roaring_bitmap_free(bitmap);\n        LOG_ERROR(\"Failed to merge bitmap from %s\", ID().c_str());\n        return tl::make_unexpected(s);\n      }\n    }\n    roaring_bitmap_repair_after_lazy(bitmap);\n    return bitmap;\n  }\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_not_contain(\n    const std::vector<std::string> &terms, bool is_any) const {\n  if (terms.empty()) {\n    LOG_ERROR(\"Terms is empty\");\n    return tl::make_unexpected(Status::InvalidArgument());\n  }\n\n  roaring_bitmap_t *non_null_bitmap{nullptr};\n  AILEGO_DEFER([&]() {\n    if (non_null_bitmap) {\n      roaring_bitmap_free(non_null_bitmap);\n    }\n  });\n\n  if (sealed_) {\n    non_null_bitmap = null_bitmap_.copy();\n    roaring_bitmap_flip_inplace(non_null_bitmap, 0, max_id_ + 1);\n  } else {\n    Status s;\n    non_null_bitmap = roaring_bitmap_create();\n    if (!non_null_bitmap) {\n      LOG_ERROR(\"Failed to create bitmap\");\n      return tl::make_unexpected(Status::InternalError());\n    }\n    auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);\n    AILEGO_DEFER([&]() { delete iter; });\n    iter->SeekToFirst();\n    while (iter->Valid()) {\n      s = InvertedIndexCodec::Merge_OR(\n          iter->value().data(), iter->value().size(), true, non_null_bitmap);\n      if (s.ok()) {\n        iter->Next();\n      } else {\n        LOG_ERROR(\"Failed to merge bitmap from %s\", ID().c_str());\n        return tl::make_unexpected(s);\n      }\n    }\n    roaring_bitmap_repair_after_lazy(non_null_bitmap);\n  }\n\n  auto ret = get_bitmap_contain(terms, is_any);\n  if (ret) {\n    if (ret.value() == nullptr) {\n      ret = roaring_bitmap_create();\n    }\n    roaring_bitmap_flip_inplace(ret.value(), 0, max_id_ + 1);\n  } else {\n    LOG_ERROR(\"Failed to retrieve bitmap[%s] from %s, term size[%zu]\",\n              is_any ? \"contain_any\" : \"contain_all\", ID().c_str(),\n              terms.size());\n    return ret;\n  }\n\n  roaring_bitmap_and_inplace(ret.value(), non_null_bitmap);\n  return ret;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_lt(\n    const std::string &term, bool include_eq) const {\n  if (field_.element_data_type() == DataType::BOOL) {\n    LOG_ERROR(\"Bool type is not supported for range query\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n\n  // For range queries that match most values, it's more efficient to compute\n  // the result by getting the complement and flipping bits.\n  if (range_covers_most_values(term, CompareOp::LT)) {\n    auto ret = get_bitmap_gt(term, !include_eq);\n    if (ret) {\n      ret = flip_bitmap(ret.value());\n    } else {\n      LOG_ERROR(\"Failed to retrieve range bitmap for term[%s] from %s\",\n                term.c_str(), ID().c_str());\n    }\n    return ret;\n  }\n\n  Status s = Status::OK();\n  roaring_bitmap_t *bitmap = roaring_bitmap_create();\n  if (!bitmap) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n  AILEGO_DEFER([&]() {\n    if (!s.ok()) {\n      roaring_bitmap_free(bitmap);\n    }\n  });\n\n  rocksdb::Iterator *iter_point, *iter_range;\n  iter_point = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);\n  iter_point->SeekToFirst();\n  AILEGO_DEFER([&]() { delete iter_point; });\n  if (sealed_ && cf_ranges_) {\n    iter_range = ctx_.db_->NewIterator(ctx_.read_opts_, cf_ranges_);\n    iter_range->SeekToFirst();\n  } else {\n    iter_range = nullptr;\n  }\n  AILEGO_DEFER([&]() {\n    if (iter_range) {\n      delete iter_range;\n    }\n  });\n\n  bool lt;  // True if the current range or term is \"less than\" the search term\n\n  // Process pre-aggregated range entries and merge matching bitmaps\n  if (iter_range && iter_range->Valid()) {\n    std::optional<std::string> point_seek_start_pos;\n    // 1. Merge ranges where the end boundary is less than the search term\n    while (iter_range->Valid()) {\n      char *range_begin, *range_end;\n      size_t range_begin_key_size, range_end_key_size;\n      InvertedIndexCodec::Decode_Range_Key(\n          iter_range->key().data(), iter_range->key().size(), &range_begin,\n          &range_begin_key_size, &range_end, &range_end_key_size);\n      lt = cmp_lt(range_end, range_end_key_size, term.data(), term.length(),\n                  include_eq);\n      if (!lt) {\n        point_seek_start_pos.emplace(range_begin, range_begin_key_size);\n        break;\n      }\n      s = InvertedIndexCodec::Merge_OR(\n          iter_range->value().data(), iter_range->value().size(), true, bitmap);\n      if (!s.ok()) {\n        LOG_ERROR(\"Failed to merge range bitmap from %s\", ID().c_str());\n        return tl::make_unexpected(s);\n      }\n      iter_range->Next();\n    }\n    // 2. Change the start position of the point iterator\n    if (point_seek_start_pos) {\n      iter_point->Seek(*point_seek_start_pos);\n      if (iter_point->Valid() && iter_point->key() != *point_seek_start_pos) {\n        LOG_ERROR(\n            \"Failed to initialize the point iterator, seek_pos[%s], \"\n            \"first_key_found[%s], term[%s]\",\n            (*point_seek_start_pos).c_str(),\n            iter_point->key().ToStringView().data(), term.c_str());\n        s = Status::InternalError();\n        return tl::make_unexpected(s);\n      }\n    } else {\n      iter_point->SeekToLast();\n    }\n  }\n\n  // Process individual point entries\n  while (iter_point->Valid()) {\n    lt = cmp_lt(iter_point->key().data(), iter_point->key().size(), term.data(),\n                term.size(), include_eq);\n    if (!lt) {\n      break;\n    }\n    s = InvertedIndexCodec::Merge_OR(iter_point->value().data(),\n                                     iter_point->value().size(), true, bitmap);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to merge range bitmap from %s\", ID().c_str());\n      return tl::make_unexpected(s);\n    }\n    iter_point->Next();\n  }\n\n  roaring_bitmap_repair_after_lazy(bitmap);\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_gt(\n    const std::string &term, bool include_eq) const {\n  if (field_.element_data_type() == DataType::BOOL) {\n    LOG_ERROR(\"Bool type is not supported for range query\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n\n  // For range queries that match most values, it's more efficient to compute\n  // the result by getting the complement and flipping bits.\n  if (range_covers_most_values(term, CompareOp::GT)) {\n    auto ret = get_bitmap_lt(term, !include_eq);\n    if (ret) {\n      ret = flip_bitmap(ret.value());\n    } else {\n      LOG_ERROR(\"Failed to retrieve range bitmap for term[%s] from %s\",\n                term.c_str(), ID().c_str());\n    }\n    return ret;\n  }\n\n  Status s = Status::OK();\n  roaring_bitmap_t *bitmap = roaring_bitmap_create();\n  if (!bitmap) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n  AILEGO_DEFER([&]() {\n    if (!s.ok()) {\n      roaring_bitmap_free(bitmap);\n    }\n  });\n\n  rocksdb::Iterator *iter_point, *iter_range;\n  iter_point = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);\n  AILEGO_DEFER([&]() { delete iter_point; });\n  if (sealed_ && cf_ranges_) {\n    iter_range = ctx_.db_->NewIterator(ctx_.read_opts_, cf_ranges_);\n  } else {\n    iter_range = nullptr;\n  }\n  AILEGO_DEFER([&]() {\n    if (iter_range) {\n      delete iter_range;\n    }\n  });\n\n  std::optional<std::string> point_seek_end_pos;\n\n  // Process pre-aggregated range entries and merge matching bitmaps\n  if (iter_range) {\n    // 1. Seek to the first range entry that is greater than the search term\n    iter_range->Seek(term);\n    if (iter_range->Valid()) {\n      char *range_begin, *range_end;\n      size_t range_begin_key_size, range_end_key_size;\n      InvertedIndexCodec::Decode_Range_Key(\n          iter_range->key().data(), iter_range->key().size(), &range_begin,\n          &range_begin_key_size, &range_end, &range_end_key_size);\n      int ret =\n          cmp(range_begin, range_begin_key_size, term.data(), term.size());\n      if (ret == 0 && !include_eq) {\n        iter_range->Next();\n        if (iter_range->Valid()) {\n          InvertedIndexCodec::Decode_Range_Key(\n              iter_range->key().data(), iter_range->key().size(), &range_begin,\n              &range_begin_key_size, &range_end, &range_end_key_size);\n          point_seek_end_pos.emplace(range_begin, range_begin_key_size);\n        }\n      } else {\n        point_seek_end_pos.emplace(range_begin, range_begin_key_size);\n      }\n    }\n    // 2. Merge ranges where the begin boundary is greater than the search term\n    while (iter_range->Valid()) {\n      s = InvertedIndexCodec::Merge_OR(\n          iter_range->value().data(), iter_range->value().size(), true, bitmap);\n      if (!s.ok()) {\n        LOG_ERROR(\"Failed to merge range bitmap from %s\", ID().c_str());\n        return tl::make_unexpected(s);\n      }\n      iter_range->Next();\n    }\n  }\n\n  // Process individual point entries\n  iter_point->Seek(term);\n  if (!include_eq) {\n    if (iter_point->Valid() && iter_point->key() == term) {\n      iter_point->Next();\n    }\n  }\n  while (iter_point->Valid()) {\n    if (point_seek_end_pos &&\n        cmp(iter_point->key().data(), iter_point->key().size(),\n            (*point_seek_end_pos).data(), (*point_seek_end_pos).size()) >= 0) {\n      break;\n    }\n    s = InvertedIndexCodec::Merge_OR(iter_point->value().data(),\n                                     iter_point->value().size(), true, bitmap);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to merge range bitmap from %s\", ID().c_str());\n      return tl::make_unexpected(s);\n    }\n    iter_point->Next();\n  }\n\n  roaring_bitmap_repair_after_lazy(bitmap);\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_eq(\n    uint32_t len) const {\n  std::string encoded_len = InvertedIndexCodec::Encode(\n      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);\n\n  PinnableSlice bitmap_slice;\n  auto rs =\n      ctx_.db_->Get(ctx_.read_opts_, cf_array_len_, encoded_len, &bitmap_slice);\n  if (!rs.ok()) {\n    if (rs.code() == rocksdb::Status::kNotFound) {\n      return nullptr;\n    }\n    LOG_ERROR(\n        \"Failed to retrieve data for len[%u] from %s, code[%d], reason[%s]\",\n        len, ID().c_str(), rs.code(), rs.ToString().c_str());\n    return tl::make_unexpected(Status::InternalError());\n  }\n\n  roaring_bitmap_t *bitmap{nullptr};\n  Status status = InvertedIndexCodec::Deserialize(bitmap_slice.data(),\n                                                  bitmap_slice.size(), &bitmap);\n  if (status.ok()) {\n    return bitmap;\n  } else {\n    LOG_ERROR(\n        \"Failed to deserialize bitmap for len[%u] from %s, bitmap size[%zu]\",\n        len, ID().c_str(), bitmap_slice.size());\n    return tl::make_unexpected(Status::InternalError());\n  }\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_ne(\n    uint32_t len) const {\n  std::string encoded_len = InvertedIndexCodec::Encode(\n      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);\n\n  roaring_bitmap_t *bitmap = roaring_bitmap_create();\n  if (!bitmap) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_array_len_);\n  AILEGO_DEFER([&]() { delete iter; });\n  Status s;\n  iter->SeekToFirst();\n  while (iter->Valid()) {\n    if (iter->key() == encoded_len) {\n      iter->Next();\n      continue;\n    }\n    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),\n                                     true, bitmap);\n    if (s.ok()) {\n      iter->Next();\n    } else {\n      roaring_bitmap_free(bitmap);\n      LOG_ERROR(\"Failed to merge bitmap from %s\", ID().c_str());\n      return tl::make_unexpected(s);\n    }\n  }\n  roaring_bitmap_repair_after_lazy(bitmap);\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_lt(\n    uint32_t len, bool include_eq) const {\n  std::string encoded_len = InvertedIndexCodec::Encode(\n      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);\n\n  roaring_bitmap_t *bitmap = roaring_bitmap_create();\n  if (!bitmap) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_array_len_);\n  AILEGO_DEFER([&]() { delete iter; });\n  Status s;\n  iter->SeekToFirst();\n  while (iter->Valid()) {\n    bool lt = cmp_lt(iter->key().data(), iter->key().size(), encoded_len.data(),\n                     encoded_len.size(), include_eq);\n    if (!lt) {\n      break;\n    }\n    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),\n                                     true, bitmap);\n    if (s.ok()) {\n      iter->Next();\n    } else {\n      roaring_bitmap_free(bitmap);\n      LOG_ERROR(\"Failed to merge bitmap from %s\", ID().c_str());\n      return tl::make_unexpected(s);\n    }\n  }\n  roaring_bitmap_repair_after_lazy(bitmap);\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_gt(\n    uint32_t len, bool include_eq) const {\n  std::string encoded_len = InvertedIndexCodec::Encode(\n      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);\n\n  roaring_bitmap_t *bitmap = roaring_bitmap_create();\n  if (!bitmap) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_array_len_);\n  AILEGO_DEFER([&]() { delete iter; });\n  Status s;\n  iter->Seek(encoded_len);\n  if (!include_eq) {\n    if (iter->Valid() && iter->key() == encoded_len) {\n      iter->Next();\n    }\n  }\n  while (iter->Valid()) {\n    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),\n                                     true, bitmap);\n    if (s.ok()) {\n      iter->Next();\n    } else {\n      roaring_bitmap_free(bitmap);\n      LOG_ERROR(\"Failed to merge bitmap from %s\", ID().c_str());\n      return tl::make_unexpected(s);\n    }\n  }\n  roaring_bitmap_repair_after_lazy(bitmap);\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_like(\n    std::string term) const {\n  // convert to `is not null` if `%` is the only character\n  if (term == \"%\") {\n    return get_bitmap_non_null();\n  }\n  size_t percent_loc = std::string::npos;\n  size_t size = 0;\n  int percent_count = 0;\n  // unescape \\% and \\_, detect % location\n  for (size_t i = 0; i < term.size(); i++) {\n    if (term[i] == '\\\\') {\n      i++;\n      if (i < term.size()) {\n        term[size++] = term[i];\n      }\n      continue;\n    }\n    if (term[i] == '%') {\n      percent_loc = size;\n      percent_count += 1;\n    }\n    term[size++] = term[i];\n  }\n  term.resize(size);\n  // convert to `=` filter if no percent\n  if (percent_count == 0) {\n    return get_bitmap_eq(term);\n  } else if (percent_count != 1) {\n    return tl::make_unexpected(Status::InvalidArgument(\n        \"like should have exactly one percent, unescaped:\", term));\n  }\n  if (percent_loc == 0) {\n    return get_bitmap_suffix(term);\n  } else if (percent_loc == size - 1) {\n    return get_bitmap_prefix(term.substr(0, percent_loc));\n  } else {\n    std::string prefix = term.substr(0, percent_loc - 1);\n    std::string suffix = term.substr(percent_loc + 1, size - percent_loc - 1);\n    auto prefix_bitmap = get_bitmap_prefix(prefix);\n    if (!prefix_bitmap.has_value()) {\n      return tl::make_unexpected(\n          Status::InternalError(\"Get bitmap prefix failed, unescaped:\", term));\n    }\n    auto suffix_bitmap = get_bitmap_suffix(suffix);\n    if (!suffix_bitmap.has_value()) {\n      return tl::make_unexpected(\n          Status::InternalError(\"Get bitmap suffix failed, unescaped:\", term));\n    }\n    auto *result = prefix_bitmap.value();\n    roaring_bitmap_and_inplace(result, suffix_bitmap.value());\n    roaring_bitmap_free(suffix_bitmap.value());\n    return result;\n  }\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_prefix(\n    const std::string &term) const {\n  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);\n  AILEGO_DEFER([&]() { delete iter; });\n\n  roaring_bitmap_t *bitmap = roaring_bitmap_create();\n  if (!bitmap) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n\n  Status s;\n  iter->Seek(term);\n  while (iter->Valid()) {\n    if (!has_prefix(iter->key().data(), iter->key().size(), term.data(),\n                    term.size())) {\n      break;\n    }\n    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),\n                                     true, bitmap);\n    if (!s.ok()) {\n      roaring_bitmap_free(bitmap);\n      LOG_ERROR(\"Failed to merge range bitmap from %s\", ID().c_str());\n      return tl::make_unexpected(s);\n    }\n    iter->Next();\n  }\n\n  roaring_bitmap_repair_after_lazy(bitmap);\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_suffix(\n    const std::string &term) const {\n  if (!cf_reversed_terms_) {\n    LOG_ERROR(\"%s doesn't support suffix matching\", ID().c_str());\n    return tl::make_unexpected(Status::PermissionDenied());\n  }\n\n  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_reversed_terms_);\n  AILEGO_DEFER([&]() { delete iter; });\n\n  roaring_bitmap_t *bitmap = roaring_bitmap_create();\n  if (!bitmap) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return tl::make_unexpected(Status::InternalError());\n  }\n\n  Status s;\n  auto reversed_term = encode_reversed(term);\n  iter->Seek(reversed_term);\n  while (iter->Valid()) {\n    if (!has_prefix(iter->key().data(), iter->key().size(),\n                    reversed_term.data(), reversed_term.size())) {\n      break;\n    }\n    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),\n                                     true, bitmap);\n    if (!s.ok()) {\n      roaring_bitmap_free(bitmap);\n      LOG_ERROR(\"Failed to merge range bitmap from %s\", ID().c_str());\n      return tl::make_unexpected(s);\n    }\n    iter->Next();\n  }\n\n  roaring_bitmap_repair_after_lazy(bitmap);\n  return bitmap;\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_null() const {\n  return null_bitmap_.copy();\n}\n\n\nResult<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_non_null() const {\n  if (sealed_) {\n    roaring_bitmap_t *bitmap = null_bitmap_.copy();\n    roaring_bitmap_flip_inplace(bitmap, 0, max_id_ + 1);\n    return bitmap;\n  } else {\n    Status s = Status::OK();\n    auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);\n    AILEGO_DEFER([&]() { delete iter; });\n    roaring_bitmap_t *bitmap = roaring_bitmap_create();\n    if (!bitmap) {\n      LOG_ERROR(\"Failed to create bitmap\");\n      return tl::make_unexpected(Status::InternalError());\n    }\n    iter->SeekToFirst();\n    while (iter->Valid()) {\n      s = InvertedIndexCodec::Merge_OR(iter->value().data(),\n                                       iter->value().size(), true, bitmap);\n      if (s.ok()) {\n        iter->Next();\n      } else {\n        roaring_bitmap_free(bitmap);\n        LOG_ERROR(\"Failed to merge bitmap from %s\", ID().c_str());\n        return tl::make_unexpected(s);\n      }\n    }\n    roaring_bitmap_repair_after_lazy(bitmap);\n    return bitmap;\n  }\n}\n\n\nInvertedSearchResult::Ptr InvertedColumnIndexer::search(\n    const std::string &value, CompareOp op) const {\n  if (field_.is_array_type()) {\n    LOG_ERROR(\"%s: array type doesn't support single value search\",\n              ID().c_str());\n    return nullptr;\n  }\n\n  std::string encoded_value = encode(value);\n  auto search_res = std::make_shared<InvertedSearchResult>();\n  Result<roaring_bitmap_t *> bitmap_res;\n\n  switch (op) {\n    case CompareOp::EQ: {\n      bitmap_res = get_bitmap_eq(encoded_value);\n      break;\n    }\n    case CompareOp::NE: {\n      bitmap_res = get_bitmap_ne(encoded_value);\n      break;\n    }\n    case CompareOp::LT: {\n      bitmap_res = get_bitmap_lt(encoded_value, false);\n      break;\n    }\n    case CompareOp::LE: {\n      bitmap_res = get_bitmap_lt(encoded_value, true);\n      break;\n    }\n    case CompareOp::GT: {\n      bitmap_res = get_bitmap_gt(encoded_value, false);\n      break;\n    }\n    case CompareOp::GE: {\n      bitmap_res = get_bitmap_gt(encoded_value, true);\n      break;\n    }\n    case CompareOp::LIKE: {\n      bitmap_res = get_bitmap_like(std::move(encoded_value));\n      break;\n    }\n    case CompareOp::HAS_PREFIX: {\n      bitmap_res = get_bitmap_prefix(std::move(encoded_value));\n      break;\n    }\n    case CompareOp::HAS_SUFFIX: {\n      bitmap_res = get_bitmap_suffix(std::move(encoded_value));\n      break;\n    }\n    default:\n      LOG_ERROR(\"%s: unsupported operator[%u]\", ID().c_str(),\n                static_cast<uint32_t>(op));\n      return nullptr;\n  }\n\n  if (bitmap_res) {\n    search_res->set_and_own_bitmap(bitmap_res.value());\n    return search_res;\n  } else {\n    LOG_ERROR(\"%s: failed to search, code[%d]\", ID().c_str(),\n              static_cast<int>(bitmap_res.error().code()));\n    return nullptr;\n  }\n}\n\n\nInvertedSearchResult::Ptr InvertedColumnIndexer::multi_search(\n    const std::vector<std::string> &values, CompareOp op) const {\n  auto encoded_values = encode(values);\n  auto search_res = std::make_shared<InvertedSearchResult>();\n  Result<roaring_bitmap_t *> bitmap_res;\n\n  switch (op) {\n    case CompareOp::CONTAIN_ANY: {\n      bitmap_res = get_bitmap_contain(encoded_values, true);\n      break;\n    }\n    case CompareOp::CONTAIN_ALL: {\n      bitmap_res = get_bitmap_contain(encoded_values, false);\n      break;\n    }\n    case CompareOp::NOT_CONTAIN_ANY: {\n      bitmap_res = get_bitmap_not_contain(encoded_values, true);\n      break;\n    }\n    case CompareOp::NOT_CONTAIN_ALL: {\n      bitmap_res = get_bitmap_not_contain(encoded_values, false);\n      break;\n    }\n    default:\n      LOG_ERROR(\"%s: unsupported operator[%u]\", ID().c_str(),\n                static_cast<uint32_t>(op));\n      return nullptr;\n  }\n\n  if (bitmap_res) {\n    search_res->set_and_own_bitmap(bitmap_res.value());\n    return search_res;\n  } else {\n    LOG_ERROR(\"%s: failed to search, code[%d]\", ID().c_str(),\n              static_cast<int>(bitmap_res.error().code()));\n    return nullptr;\n  }\n}\n\n\nInvertedSearchResult::Ptr InvertedColumnIndexer::search_array_len(\n    uint32_t len, CompareOp op) const {\n  if (!field_.is_array_type()) {\n    LOG_ERROR(\"%s: non-array type doesn't array length search\", ID().c_str());\n    return nullptr;\n  }\n\n  auto search_res = std::make_shared<InvertedSearchResult>();\n  Result<roaring_bitmap_t *> bitmap_res;\n\n  switch (op) {\n    case CompareOp::EQ: {\n      bitmap_res = get_bitmap_array_len_eq(len);\n      break;\n    }\n    case CompareOp::NE: {\n      bitmap_res = get_bitmap_array_len_ne(len);\n      break;\n    }\n    case CompareOp::LT: {\n      bitmap_res = get_bitmap_array_len_lt(len, false);\n      break;\n    }\n    case CompareOp::LE: {\n      bitmap_res = get_bitmap_array_len_lt(len, true);\n      break;\n    }\n    case CompareOp::GT: {\n      bitmap_res = get_bitmap_array_len_gt(len, false);\n      break;\n    }\n    case CompareOp::GE: {\n      bitmap_res = get_bitmap_array_len_gt(len, true);\n      break;\n    }\n    default:\n      LOG_ERROR(\"%s: unsupported operator[%u]\", ID().c_str(),\n                static_cast<uint32_t>(op));\n      return nullptr;\n  }\n\n  if (bitmap_res) {\n    search_res->set_and_own_bitmap(bitmap_res.value());\n    return search_res;\n  } else {\n    LOG_ERROR(\"%s: failed to search, code[%d]\", ID().c_str(),\n              static_cast<int>(bitmap_res.error().code()));\n    return nullptr;\n  }\n}\n\n\nInvertedSearchResult::Ptr InvertedColumnIndexer::search_null() const {\n  auto search_res = std::make_shared<InvertedSearchResult>();\n  auto bitmap_res = get_bitmap_null();\n  if (bitmap_res) {\n    search_res->set_and_own_bitmap(bitmap_res.value());\n    return search_res;\n  } else {\n    LOG_ERROR(\"%s: failed to search, code[%d]\", ID().c_str(),\n              static_cast<int>(bitmap_res.error().code()));\n    return nullptr;\n  }\n}\n\n\nInvertedSearchResult::Ptr InvertedColumnIndexer::search_non_null() const {\n  auto search_res = std::make_shared<InvertedSearchResult>();\n  auto bitmap_res = get_bitmap_non_null();\n  if (bitmap_res) {\n    search_res->set_and_own_bitmap(bitmap_res.value());\n    return search_res;\n  } else {\n    LOG_ERROR(\"%s: failed to search, code[%d]\", ID().c_str(),\n              static_cast<int>(bitmap_res.error().code()));\n    return nullptr;\n  }\n}\n\n\nStatus InvertedColumnIndexer::evaluate_ratio(const std::string &value,\n                                             CompareOp op, uint64_t *total_size,\n                                             uint64_t *range_size) const {\n  if (field_.is_array_type()) {\n    LOG_ERROR(\"%s: array type doesn't support ratio evaluation\", ID().c_str());\n    return Status::PermissionDenied();\n  }\n\n  if (sealed_ && doc_range_stat_) {\n    std::string encoded_value = encode(value);\n    doc_range_stat_->evaluate_ratio(encoded_value, op, total_size, range_size);\n  } else {\n    *range_size = 0;\n    *total_size = 1;\n  }\n  return Status::OK();\n}\n\n\ninline Status InvertedColumnIndexer::estimate_range_ratio(\n    const std::string &term, CompareOp op, uint64_t *total_count,\n    uint64_t *matching_count) const {\n  if (field_.is_array_type() || field_.element_data_type() == DataType::BOOL) {\n    LOG_ERROR(\"%s: type[%d] doesn't support range ratio estimation\",\n              ID().c_str(), (int)field_.data_type());\n    return Status::PermissionDenied();\n  }\n\n  if (sealed_ && doc_range_stat_) {\n    doc_range_stat_->evaluate_ratio(term, op, total_count, matching_count);\n  } else {\n    *matching_count = 0;\n    *total_count = 1;\n  }\n  return Status::OK();\n}\n\n\ninline bool InvertedColumnIndexer::range_covers_most_values(\n    const std::string &term, CompareOp op) const {\n  constexpr float HIGH_SELECTIVITY_THRESHOLD = 0.7;\n\n  // Estimation is only available for sealed indexes as they have the cumulative\n  // distribution index.\n  if (!sealed_) {\n    return false;\n  }\n\n  uint64_t total_cnt{0}, matching_cnt{0};\n  if (auto s = estimate_range_ratio(term, op, &total_cnt, &matching_cnt);\n      s.ok()) {\n    return (total_cnt != 0) &&\n           ((1.0f * matching_cnt / total_cnt) > HIGH_SELECTIVITY_THRESHOLD);\n\n  } else {\n    return false;\n  }\n}\n\n\ninline roaring_bitmap_t *InvertedColumnIndexer::flip_bitmap(\n    roaring_bitmap_t *bitmap) const {\n  roaring_bitmap_t *ret;\n  if (ret = bitmap; ret == nullptr) {\n    ret = null_bitmap_.copy();\n  } else {\n    roaring_bitmap_or_inplace(ret, null_bitmap_.bitmap());\n  }\n  roaring_bitmap_flip_inplace(ret, 0, max_id_ + 1);\n  return ret;\n}\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_column_indexer_util.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <ailego/pattern/defer.h>\n#include \"inverted_column_indexer.h\"\n\n\nnamespace zvec {\n\n\nInvertedColumnIndexer::~InvertedColumnIndexer() {\n  LOG_INFO(\"Closed %s\", ID().c_str());\n}\n\n\nStatus InvertedColumnIndexer::open() {\n  if (field_.index_type() != IndexType::INVERT) {\n    return Status::InvalidArgument();\n  }\n  auto params =\n      std::dynamic_pointer_cast<InvertIndexParams>(field_.index_params());\n  enable_range_optimization_ =\n      allow_range_optimization(field_) && params->enable_range_optimization();\n  enable_extended_wildcard_ =\n      allow_extended_wildcard(field_) && params->enable_extended_wildcard();\n\n  rocksdb::Status s;\n  std::string value{};\n\n  cf_terms_ = ctx_.get_cf(cf_name_terms());\n  if (!cf_terms_) {\n    LOG_ERROR(\"Failed to get cf_terms for %s\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  if (field_.is_array_type()) {\n    cf_array_len_ = ctx_.get_cf(cf_name_array_len());\n    if (!cf_array_len_) {\n      LOG_ERROR(\"Failed to get cf_array_len for %s\", ID().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  if (enable_range_optimization_) {\n    cf_ranges_ = ctx_.get_cf(cf_name_ranges());\n    if (!cf_ranges_) {\n      LOG_ERROR(\"Failed to get cf_ranges for %s\", ID().c_str());\n      return Status::InternalError();\n    }\n    cf_cdf_ = ctx_.get_cf(cf_name_cdf());\n    if (!cf_cdf_) {\n      LOG_ERROR(\"Failed to get cf_cdf for %s\", ID().c_str());\n      return Status::InternalError();\n    }\n    s = ctx_.db_->Get(ctx_.read_opts_, cf_cdf_, field_.name(), &value);\n    if (s.ok()) {\n      doc_range_stat_ = SegmentDocRangeStat::Create(value);\n      if (!doc_range_stat_) {\n        LOG_ERROR(\"Failed to create doc range stats from %s\", ID().c_str());\n        return Status::InternalError();\n      }\n    } else if (s.code() != rocksdb::Status::kNotFound) {\n      LOG_ERROR(\"Failed to retrieve cdf from %s\", ID().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  if (enable_extended_wildcard_) {\n    cf_reversed_terms_ = ctx_.get_cf(cf_name_reversed_terms());\n    if (!cf_reversed_terms_) {\n      LOG_ERROR(\"Failed to get cf_reversed_terms for %s\", ID().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  // Get max id if exists\n  s = ctx_.db_->Get(ctx_.read_opts_, key_max_id(), &value);\n  if (s.ok()) {\n    try {\n      max_id_ = std::stoul(value);\n    } catch (const std::exception &e) {\n      LOG_ERROR(\"Failed to parse max id from %s for %s, exception[%s]\",\n                value.c_str(), ID().c_str(), e.what());\n      return Status::InternalError();\n    }\n  } else if (s.code() != rocksdb::Status::kNotFound) {\n    LOG_ERROR(\"Failed to retrieve max id from %s\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  // Get null bitmap if exists\n  s = ctx_.db_->Get(ctx_.read_opts_, key_null(), &value);\n  if (s.ok()) {\n    if (auto status = null_bitmap_.deserialize(value); !status.ok()) {\n      LOG_ERROR(\"Failed to deserialize null bitmap from %s\", ID().c_str());\n      return status;\n    }\n  } else if (s.code() != rocksdb::Status::kNotFound) {\n    LOG_ERROR(\"Failed to retrieve null bitmap from %s\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  // Get indexer state\n  s = ctx_.db_->Get(ctx_.read_opts_, key_sealed(), &value);\n  if (s.ok()) {\n    sealed_ = true;\n    read_only_ = true;\n  } else if (s.code() == rocksdb::Status::kNotFound) {\n    sealed_ = false;\n  } else {\n    LOG_ERROR(\"Failed to retrieve indexer state from %s\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  LOG_INFO(\"Opened %s\", ID().c_str());\n  return Status::OK();\n}\n\n\nInvertedColumnIndexer::Ptr InvertedColumnIndexer::CreateAndOpen(\n    const std::string &collection_name, const FieldSchema &field,\n    RocksdbContext &context, bool read_only) {\n  auto ptr =\n      new InvertedColumnIndexer(collection_name, field, context, read_only);\n  auto indexer = std::shared_ptr<InvertedColumnIndexer>(ptr);\n  if (indexer->open().ok()) {\n    return indexer;\n  } else {\n    return nullptr;\n  }\n}\n\n\nStatus InvertedColumnIndexer::drop_storage() {\n  Status s = Status::OK();\n  rocksdb::Status rs;\n  AILEGO_DEFER([&]() {\n    if (s.ok()) {\n      LOG_INFO(\"Dropped storage of %s\", ID().c_str());\n    } else {\n      LOG_ERROR(\"Failed to drop storage of %s\", ID().c_str());\n    }\n  });\n\n  if (s = ctx_.drop_cf(cf_name_terms()); !s.ok()) {\n    return s;\n  }\n  if (field_.is_array_type()) {\n    if (s = ctx_.drop_cf(cf_name_array_len()); !s.ok()) {\n      return s;\n    }\n  }\n  if (enable_range_optimization_) {\n    if (s = ctx_.drop_cf(cf_name_ranges()); !s.ok()) {\n      return s;\n    }\n    rs = ctx_.db_->Delete(ctx_.write_opts_, cf_cdf_, field_.name());\n    if (!rs.ok()) {\n      LOG_ERROR(\"Failed to delete cdf of %s\", ID().c_str());\n      s = Status::InternalError();\n      return s;\n    }\n  }\n  if (enable_extended_wildcard_) {\n    if (s = ctx_.drop_cf(cf_name_reversed_terms()); !s.ok()) {\n      return s;\n    }\n  }\n\n  rs = ctx_.db_->Delete(ctx_.write_opts_, key_max_id());\n  if (!rs.ok()) {\n    LOG_ERROR(\"Failed to delete max_id of %s\", ID().c_str());\n    s = Status::InternalError();\n    return s;\n  }\n\n  rs = ctx_.db_->Delete(ctx_.write_opts_, key_null());\n  if (!rs.ok()) {\n    LOG_ERROR(\"Failed to delete null bitmap of %s\", ID().c_str());\n    s = Status::InternalError();\n    return s;\n  }\n\n  rs = ctx_.db_->Delete(ctx_.write_opts_, key_sealed());\n  if (!rs.ok()) {\n    LOG_ERROR(\"Failed to delete indexer state of %s\", ID().c_str());\n    s = Status::InternalError();\n    return s;\n  }\n\n  return s;\n}\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_column_indexer_write.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/encoding/json.h>\n#include \"inverted_codec.h\"\n#include \"inverted_column_indexer.h\"\n\n\nnamespace zvec {\n\n\nStatus InvertedColumnIndexer::insert(uint32_t id, const std::string &value) {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n\n  std::string encoded_id = std::string{1}.append(\n      reinterpret_cast<const char *>(&id), sizeof(uint32_t));\n\n  rocksdb::Status s;\n  AILEGO_DEFER([&]() {\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to insert terms of id[%u] to %s, code[%d], reason[%s]\",\n                id, ID().c_str(), s.code(), s.ToString().c_str());\n    }\n  });\n\n  if (field_.is_array_type()) {\n    std::vector<std::string> encoded_values = encode_array(value);\n    std::sort(encoded_values.begin(), encoded_values.end());\n    rocksdb::WriteBatch write_batch;\n    for (auto encoded_value : encoded_values) {\n      s = write_batch.Merge(cf_terms_, encoded_value, encoded_id);\n      if (!s.ok()) {\n        return Status::InternalError();\n      }\n    }\n    if (s = ctx_.db_->Write(ctx_.write_opts_, &write_batch); !s.ok()) {\n      return Status::InternalError();\n    }\n    if (s = index_array_len(id, encoded_values.size()); !s.ok()) {\n      return Status::InternalError();\n    }\n  } else {\n    std::string encoded_value = encode(value);\n    s = ctx_.db_->Merge(ctx_.write_opts_, cf_terms_, encoded_value, encoded_id);\n    if (!s.ok()) {\n      return Status::InternalError();\n    }\n    if (cf_reversed_terms_) {\n      s = ctx_.db_->Merge(ctx_.write_opts_, cf_reversed_terms_,\n                          encode_reversed(value), encoded_id);\n      if (!s.ok()) {\n        return Status::InternalError();\n      }\n    }\n  }\n\n  update_max_id(id);\n  return Status::OK();\n}\n\n\nStatus InvertedColumnIndexer::insert(uint32_t id,\n                                     const std::vector<std::string> &values) {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n\n  std::string encoded_id = std::string{1}.append(\n      reinterpret_cast<const char *>(&id), sizeof(uint32_t));\n  auto encoded_values = encode(values);\n\n  rocksdb::Status s;\n  AILEGO_DEFER([&]() {\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to insert terms of id[%u] to %s, code[%d], reason[%s]\",\n                id, ID().c_str(), s.code(), s.ToString().c_str());\n    }\n  });\n\n  if (s = index_array_len(id, encoded_values.size()); !s.ok()) {\n    return Status::InternalError();\n  }\n\n  std::sort(encoded_values.begin(), encoded_values.end());\n  rocksdb::WriteBatch write_batch;\n  for (auto encoded_value : encoded_values) {\n    s = write_batch.Merge(cf_terms_, encoded_value, encoded_id);\n    if (!s.ok()) {\n      return Status::InternalError();\n    }\n  }\n  s = ctx_.db_->Write(ctx_.write_opts_, &write_batch);\n  if (s.ok()) {\n    update_max_id(id);\n    return Status::OK();\n  } else {\n    return Status::InternalError();\n  }\n}\n\n\nStatus InvertedColumnIndexer::insert(uint32_t id, bool value) {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n\n  std::string encoded_id = std::string{1}.append(\n      reinterpret_cast<const char *>(&id), sizeof(uint32_t));\n  std::string encoded_value = encode(value);\n\n  auto s =\n      ctx_.db_->Merge(ctx_.write_opts_, cf_terms_, encoded_value, encoded_id);\n  if (s.ok()) {\n    update_max_id(id);\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to insert terms of id[%u] to %s, code[%d], reason[%s]\",\n              id, ID().c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nStatus InvertedColumnIndexer::insert(uint32_t id,\n                                     const std::vector<bool> &values) {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n\n  std::string encoded_id = std::string{1}.append(\n      reinterpret_cast<const char *>(&id), sizeof(uint32_t));\n\n  rocksdb::Status rs;\n  if (rs = index_array_len(id, values.size()); !rs.ok()) {\n    LOG_ERROR(\"Failed to index array length for %s\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  bool has_true = false;\n  bool has_false = false;\n  for (bool value : values) {\n    if (value) {\n      has_true = true;\n    } else {\n      has_false = true;\n    }\n  }\n\n  rocksdb::WriteBatch write_batch;\n  if (has_true) {\n    write_batch.Merge(cf_terms_, encode(true), encoded_id);\n  }\n  if (has_false) {\n    write_batch.Merge(cf_terms_, encode(false), encoded_id);\n  }\n  rs = ctx_.db_->Write(ctx_.write_opts_, &write_batch);\n  if (rs.ok()) {\n    update_max_id(id);\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to insert terms of id[%u] to %s, code[%d], reason[%s]\",\n              id, ID().c_str(), rs.code(), rs.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nStatus InvertedColumnIndexer::insert_null(uint32_t id) {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n  null_bitmap_.add(id);\n  update_max_id(id);\n  return Status::OK();\n}\n\n\nStatus InvertedColumnIndexer::flush_special_values() {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n\n  std::string value;\n  if (null_bitmap_.cardinality() != 0) {\n    if (!null_bitmap_.serialize(&value).ok()) {\n      LOG_ERROR(\"Failed to serialize null bitmap\");\n      return Status::InternalError();\n    }\n    auto s = ctx_.db_->Put(ctx_.write_opts_, key_null(), value);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to insert null bitmap to %s, code[%d], reason[%s]\",\n                ID().c_str(), s.code(), s.ToString().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  auto s =\n      ctx_.db_->Put(ctx_.write_opts_, key_max_id(), std::to_string(max_id_));\n  if (s.ok()) {\n    LOG_DEBUG(\"Special values flushed to %s\", ID().c_str());\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to insert max_id to %s, code[%d], reason[%s]\",\n              ID().c_str(), s.code(), s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nrocksdb::Status InvertedColumnIndexer::index_array_len(uint32_t id,\n                                                       uint32_t len) {\n  if (!cf_array_len_) {\n    LOG_ERROR(\"%s doesn't support array length index\", ID().c_str());\n    return rocksdb::Status::NotSupported();\n  }\n\n  std::string encoded_id = std::string{1}.append(\n      reinterpret_cast<const char *>(&id), sizeof(uint32_t));\n  std::string encoded_len = InvertedIndexCodec::Encode(\n      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);\n\n  return ctx_.db_->Merge(ctx_.write_opts_, cf_array_len_, encoded_len,\n                         encoded_id);\n}\n\n\nStatus InvertedColumnIndexer::generate_statistical_indexes() {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n  if (!enable_range_optimization_) {\n    return Status::PermissionDenied();\n  }\n\n  if (!ctx_.reset_cf(cf_name_ranges()).ok()) {\n    // Reset the range index in case it is corrupted\n    LOG_ERROR(\"Failed to reset range index\");\n    return Status::InternalError();\n  }\n  cf_ranges_ = ctx_.get_cf(cf_name_ranges());\n  if (!cf_ranges_) {\n    LOG_ERROR(\"Failed to get column families for %s\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  // TODO: make them configurable\n  const uint32_t num_range_slot = 1000;\n  const uint32_t num_cdf_slot = 100;\n\n  const uint32_t num_doc_per_range_slot = (max_id_ + 1) / num_range_slot;\n  const uint32_t num_doc_per_cdf_slot = (max_id_ + 1) / num_cdf_slot;\n\n  // Iterator for terms in the inverted index\n  auto iter_term = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);\n  iter_term->SeekToFirst();\n  AILEGO_DEFER([&]() { delete iter_term; });\n\n  size_t doc_count = 0, term_count = 0;\n  Status s;\n  rocksdb::Status rs;\n\n  // Range tracking variables\n  std::string range_begin_key{\"\"}, range_end_key{\"\"};\n  size_t range_slot_doc_count{0};\n  size_t num_range_slot_created = 0;\n  roaring_bitmap_t *bitmap_range = roaring_bitmap_create();\n  if (bitmap_range == nullptr) {\n    LOG_ERROR(\"Failed to create bitmap\");\n    return Status::InternalError();\n  }\n  AILEGO_DEFER([&]() { roaring_bitmap_free(bitmap_range); });\n\n  // Function to create a range slot\n  auto create_range_slot = [&]() -> Status {\n    std::string range_key = range_begin_key;\n    range_key.append(1, '\\0');  // Separator byte\n    range_key.append(range_end_key.data(), range_end_key.size());\n    uint64_t range_key_begin_size = range_begin_key.size();\n    range_key.append((char *)&range_key_begin_size, sizeof(uint64_t));\n    std::string range_value_str;\n    s = InvertedIndexCodec::Serialize(bitmap_range, &range_value_str);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to serialize bitmap\");\n      return Status::InternalError();\n    }\n    rs =\n        ctx_.db_->Put(ctx_.write_opts_, cf_ranges_, range_key, range_value_str);\n    if (!rs.ok()) {\n      LOG_ERROR(\"Failed to put range slot: %s\", rs.ToString().c_str());\n      return Status::InternalError();\n    }\n    num_range_slot_created++;\n    return Status::OK();\n  };\n\n  // CDF tracking variables\n  ailego::JsonArray cdf_json_array;\n  size_t cdf_slot_doc_count = 0;\n\n  // Function to create a CDF slot\n  auto create_cdf_slot = [&]() {\n    ailego::JsonObject json_obj;\n    json_obj.set(ailego::JsonString(\"key\").encode(),\n                 ailego::JsonString(iter_term->key().ToString()).encode());\n    json_obj.set(ailego::JsonString(\"doc_count\").encode(),\n                 ailego::JsonValue(doc_count));\n    cdf_json_array.push(json_obj);\n  };\n\n  // Is the current slot initialized?\n  bool range_slot_initialized{false}, cdf_slot_initialized{false};\n\n\n  // Scan\n  roaring_bitmap_t *bitmap_cur{nullptr};\n  AILEGO_DEFER([&]() {\n    if (bitmap_cur) {\n      roaring_bitmap_free(bitmap_cur);\n    }\n  });\n\n  while (iter_term->Valid()) {\n    term_count++;\n    s = InvertedIndexCodec::Deserialize(iter_term->value().data(),\n                                        iter_term->value().size(), &bitmap_cur);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to deserialize bitmap for term[%s] from %s\",\n                iter_term->key().ToString().c_str(), ID().c_str());\n      return Status::InternalError();\n    }\n    // The count of documents for the current term\n    auto term_doc_count = roaring_bitmap_get_cardinality(bitmap_cur);\n    doc_count += term_doc_count;\n\n    // Range\n    if (!range_slot_initialized) {\n      range_slot_initialized = true;\n      range_slot_doc_count = 0;\n      range_begin_key = iter_term->key().ToString();\n      roaring_bitmap_clear(bitmap_range);\n    }\n    range_end_key = iter_term->key().ToString();\n    range_slot_doc_count += term_doc_count;\n    roaring_bitmap_or_inplace(bitmap_range, bitmap_cur);\n    if (range_slot_doc_count >= num_doc_per_range_slot) {\n      if (create_range_slot().ok()) {\n        range_slot_initialized = false;\n      } else {\n        return Status::InternalError();\n      }\n    }\n\n    // CDF\n    if (!cdf_slot_initialized) {\n      cdf_slot_initialized = true;\n      cdf_slot_doc_count = 0;\n    }\n    cdf_slot_doc_count += term_doc_count;\n    if (cdf_slot_doc_count >= num_doc_per_cdf_slot) {\n      create_cdf_slot();\n      cdf_slot_initialized = false;\n    }\n\n    roaring_bitmap_free(bitmap_cur);\n    bitmap_cur = nullptr;\n    iter_term->Next();\n  }\n\n\n  // Finalize\n  if (range_slot_initialized) {\n    if (!create_range_slot().ok()) {\n      return Status::InternalError();\n    }\n  }\n  if (num_range_slot_created >= term_count) {\n    LOG_DEBUG(\n        \"Drop range index in %s, range_slot_count[%ld] vs term_count[%ld].\",\n        ID().c_str(), num_range_slot_created, term_count);\n    if (!ctx_.reset_cf(cf_name_ranges()).ok()) {\n      LOG_ERROR(\"Failed to drop range index\");\n      return Status::InternalError();\n    }\n    cf_ranges_ = ctx_.get_cf(cf_name_ranges());\n    if (!cf_ranges_) {\n      LOG_ERROR(\"Failed to get cf_ranges for %s\", ID().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  if (cdf_slot_initialized) {\n    iter_term->SeekToLast();\n    create_cdf_slot();\n  }\n  ailego::JsonObject cdf_json_obj;\n  cdf_json_obj.set(\"field_value_histogram\", cdf_json_array);\n  cdf_json_obj.set(\"total_doc_count\", ailego::JsonValue(doc_count));\n  ailego::JsonValue cdf_json(std::move(cdf_json_obj));\n  rs = ctx_.db_->Put(ctx_.write_opts_, cf_cdf_, field_.name(),\n                     cdf_json.as_json_string().as_stl_string());\n  if (!rs.ok()) {\n    LOG_ERROR(\"Failed to insert CDF of field[%s] to %s, code[%d], reason[%s]\",\n              field_.name().c_str(), ID().c_str(), rs.code(),\n              rs.ToString().c_str());\n    return Status::InternalError();\n  }\n\n  doc_range_stat_ =\n      SegmentDocRangeStat::Create(cdf_json.as_json_string().as_stl_string());\n  if (!doc_range_stat_) {\n    LOG_ERROR(\"Failed to create doc range stats from %s\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  LOG_INFO(\"Generated statistical indexes in %s\", ID().c_str());\n  return Status::OK();\n}\n\n\nStatus InvertedColumnIndexer::seal() {\n  if (read_only_) {\n    return Status::PermissionDenied();\n  }\n\n  Status status = flush_special_values();\n  if (!status.ok()) {\n    LOG_ERROR(\"Failed to flush special values to %s\", ID().c_str());\n    return status;\n  }\n\n  if (enable_range_optimization_) {\n    status = generate_statistical_indexes();\n    if (!status.ok()) {\n      LOG_ERROR(\"Failed to generate statistical indexes in %s\", ID().c_str());\n      return status;\n    }\n  }\n\n  auto rs = ctx_.db_->Put(ctx_.write_opts_, key_sealed(), \"sealed\");\n  if (rs.ok()) {\n    sealed_ = true;\n    read_only_ = true;\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to seal %s\", ID().c_str());\n    return Status::InternalError();\n  }\n}\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_doc_range.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <memory>\n#include <string>\n#include <vector>\n#include <zvec/ailego/encoding/json/mod_json_plus.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/type.h>\n\n\nnamespace zvec {\n\n\nstruct DocRange {\n  std::string key_{\"\"};\n  size_t doc_count_{0};\n\n  DocRange() {}\n\n  DocRange(const std::string &key, int count) : key_(key), doc_count_(count) {}\n\n  bool operator<(const std::string &key) const {\n    return key_ < key;\n  }\n};\n\n\nclass SegmentDocRangeStat {\n public:\n  using Ptr = std::shared_ptr<SegmentDocRangeStat>;\n\n\n  SegmentDocRangeStat(std::vector<DocRange> &&doc_ranges,\n                      uint64_t total_doc_count)\n      : doc_ranges_(std::move(doc_ranges)), total_doc_count_(total_doc_count) {\n    std::sort(\n        doc_ranges_.begin(), doc_ranges_.end(),\n        [](const DocRange &a, const DocRange &b) { return a.key_ < b.key_; });\n  }\n\n\n  static Ptr Create(const std::string &stat_json_str) {\n    ailego::JsonValue stat_json_value;\n    ailego::JsonParser parser;\n    if (!parser.parse(stat_json_str.c_str(), &stat_json_value)) {\n      LOG_ERROR(\"Failed to parse json string\");\n      return nullptr;\n    }\n\n    ailego::JsonObject stat_json_obj = stat_json_value.as_object();\n    ailego::JsonArray stat_json_array;\n    if (!stat_json_obj.get(\"field_value_histogram\", &stat_json_array)) {\n      LOG_ERROR(\"Failed to get histogram\");\n      return nullptr;\n    }\n\n    ailego::JsonValue stat_total_doc_count;\n    if (!stat_json_obj.get(\"total_doc_count\", &stat_total_doc_count)) {\n      LOG_ERROR(\"Failed to get total doc count\");\n      return nullptr;\n    }\n\n    std::vector<DocRange> doc_ranges;\n    for (auto it = stat_json_array.begin(); it != stat_json_array.end(); ++it) {\n      ailego::JsonString stat_key;\n      ailego::JsonValue stat_doc_count;\n      if (!it->as_object().get(\"key\", &stat_key)) {\n        LOG_ERROR(\"Failed to get key\");\n        return nullptr;\n      }\n      if (!it->as_object().get(\"doc_count\", &stat_doc_count)) {\n        LOG_ERROR(\"Failed to get doc count\");\n        return nullptr;\n      }\n      doc_ranges.emplace_back(stat_key.decode().as_stl_string(),\n                              stat_doc_count.as_integer());\n    }\n\n    return std::make_shared<SegmentDocRangeStat>(\n        std::move(doc_ranges), stat_total_doc_count.as_integer());\n  }\n\n\n  void evaluate_ratio(const std::string &value, CompareOp op,\n                      uint64_t *total_size, uint64_t *range_size) const {\n    if (doc_ranges_.size() == 0) {\n      *range_size = 0;\n      *total_size = total_doc_count_;\n    }\n\n    // Is greater than?\n    bool is_gt = (op == CompareOp::GT) || (op == CompareOp::GE);\n\n    auto it = std::lower_bound(doc_ranges_.begin(), doc_ranges_.end(), value);\n\n    if (it == doc_ranges_.end()) {\n      *range_size = is_gt ? 0 : total_doc_count_;\n    } else {\n      *range_size = is_gt ? total_doc_count_ - it->doc_count_ : it->doc_count_;\n    }\n    *total_size = total_doc_count_;\n  }\n\n\n private:\n  std::vector<DocRange> doc_ranges_;\n  uint64_t total_doc_count_;\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_indexer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include \"inverted_indexer.h\"\n#include <zvec/ailego/encoding/json.h>\n#include \"inverted_rocksdb_merger.h\"\n\n\nnamespace zvec {\n\n\nStatus InvertedIndexer::open(bool create_dir_if_missing, bool read_only) {\n  std::vector<std::string> cf_names{};  // Column families\n  for (const auto &field : fields_) {\n    if (field.index_type() != IndexType::INVERT) {\n      LOG_ERROR(\"Field[%s] is not an inverted field\", field.name().c_str());\n      return Status::InvalidArgument();\n    }\n    auto params =\n        std::dynamic_pointer_cast<InvertIndexParams>(field.index_params());\n    cf_names.emplace_back(field.name() + INVERT_SUFFIX_TERMS);\n    if (field.is_array_type()) {\n      cf_names.emplace_back(field.name() + INVERT_SUFFIX_ARRAY_LEN);\n    }\n    if (allow_range_optimization(field) &&\n        params->enable_range_optimization()) {\n      cf_names.emplace_back(field.name() + INVERT_SUFFIX_RANGES);\n    }\n    if (allow_extended_wildcard(field) && params->enable_extended_wildcard()) {\n      cf_names.emplace_back(field.name() + INVERT_SUFFIX_REVERSED_TERMS);\n    }\n  }\n  cf_names.emplace_back(INVERT_CDF);\n\n  Status s;\n  if (FILE::IsExist(working_dir_)) {\n    if (!FILE::IsDirectory(working_dir_)) {\n      LOG_ERROR(\"InvertedIndexer path[%s] is not a directory\",\n                working_dir_.c_str());\n      return Status::InvalidArgument();\n    }\n    s = rocksdb_context_.open(working_dir_, cf_names, read_only,\n                              std::make_shared<InvertedRocksdbValueMerger>());\n  } else {\n    if (!create_dir_if_missing) {\n      LOG_ERROR(\"InvertedIndexer path[%s] does not exist\",\n                working_dir_.c_str());\n      return Status::NotFound();\n    }\n    s = rocksdb_context_.create(working_dir_, cf_names,\n                                std::make_shared<InvertedRocksdbValueMerger>());\n  }\n\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to open %s\", ID().c_str());\n    return s;\n  }\n\n  for (const auto &field : fields_) {\n    auto column_indexer = InvertedColumnIndexer::CreateAndOpen(\n        collection_name_, field, rocksdb_context_, read_only);\n    if (column_indexer == nullptr) {\n      LOG_ERROR(\"Failed to create InvertedColumnIndexer[%s]\",\n                field.name().c_str());\n      return Status::InternalError();\n    }\n    indexers_.emplace(field.name(), std::move(column_indexer));\n  }\n\n  LOG_INFO(\"Opened %s\", ID().c_str());\n  return s;\n}\n\n\nStatus InvertedIndexer::flush() {\n  for (auto &[_, indexer] : indexers_) {\n    if (indexer->is_sealed()) {\n      continue;\n    }\n    if (!indexer->flush_special_values().ok()) {\n      LOG_ERROR(\"Failed to flush %s\", indexer->ID().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  auto s = rocksdb_context_.flush();\n  if (s.ok()) {\n    LOG_INFO(\"Flushed %s\", ID().c_str());\n  } else {\n    LOG_ERROR(\"Failed to flush %s\", ID().c_str());\n  }\n  return s;\n}\n\n\nStatus InvertedIndexer::create_snapshot(const std::string &snapshot_dir) {\n  Status s;\n  if (s = flush(); !s.ok()) {\n    LOG_ERROR(\"Failed to flush %s during creating a snapshot\", ID().c_str());\n    return s;\n  }\n\n  if (s = rocksdb_context_.create_checkpoint(snapshot_dir); s.ok()) {\n    LOG_INFO(\"Created snapshot[%s] of %s\", snapshot_dir.c_str(), ID().c_str());\n  } else {\n    LOG_ERROR(\"Failed to create snapshot[%s] of %s\", snapshot_dir.c_str(),\n              ID().c_str());\n  }\n  return s;\n}\n\n\nStatus InvertedIndexer::seal() {\n  Status s;\n  for (const auto &[_, indexer] : indexers_) {\n    if (indexer->is_sealed()) {\n      continue;\n    }\n    if (s = indexer->seal(); !s.ok()) {\n      LOG_ERROR(\"Failed to seal %s\", indexer->ID().c_str());\n    }\n  }\n\n  if (s = flush(); !s.ok()) {\n    LOG_ERROR(\"Failed to flush %s during sealing\", ID().c_str());\n    return s;\n  }\n\n  if (s = rocksdb_context_.compact(); s.ok()) {\n    LOG_INFO(\"Sealed %s\", ID().c_str());\n  } else {\n    LOG_ERROR(\"Failed to compact %s during sealing\", ID().c_str());\n  }\n  return s;\n}\n\n\nStatus InvertedIndexer::create_column_indexer(const FieldSchema &field) {\n  if (field.index_type() != IndexType::INVERT) {\n    return Status::InvalidArgument();\n  }\n  auto it = std::find_if(fields_.begin(), fields_.end(),\n                         [&field](FieldSchema &cur_field) {\n                           return cur_field.name() == field.name();\n                         });\n  if (it != fields_.end()) {\n    LOG_ERROR(\"InvertedColumnIndexer[%s] already exists in %s\",\n              field.name().c_str(), ID().c_str());\n    return Status::InvalidArgument();\n  }\n  auto params =\n      std::dynamic_pointer_cast<InvertIndexParams>(field.index_params());\n\n  Status s;\n  bool cf_terms_created{false};\n  bool cf_array_len_created{false};\n  bool cf_ranges_created{false};\n  bool cf_reversed_terms_created{false};\n  AILEGO_DEFER([&]() {\n    if (s.ok()) {\n      LOG_INFO(\"Created a new InvertedColumnIndexer[%s] in %s\",\n               field.name().c_str(), ID().c_str());\n    } else {\n      if (cf_terms_created) {\n        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_TERMS);\n      }\n      if (cf_array_len_created) {\n        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_ARRAY_LEN);\n      }\n      if (cf_ranges_created) {\n        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_RANGES);\n      }\n      if (cf_reversed_terms_created) {\n        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_REVERSED_TERMS);\n      }\n      LOG_ERROR(\"Failed to create InvertedColumnIndexer[%s] in %s\",\n                field.name().c_str(), ID().c_str());\n    }\n  });\n\n  s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_TERMS);\n  if (s.ok()) {\n    cf_terms_created = true;\n  } else {\n    return s;\n  }\n  if (field.is_array_type()) {\n    s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_ARRAY_LEN);\n    if (s.ok()) {\n      cf_array_len_created = true;\n    } else {\n      return s;\n    }\n  }\n  if (allow_range_optimization(field) && params->enable_range_optimization()) {\n    s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_RANGES);\n    if (s.ok()) {\n      cf_ranges_created = true;\n    } else {\n      return s;\n    }\n  }\n  if (allow_extended_wildcard(field) && params->enable_extended_wildcard()) {\n    s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_REVERSED_TERMS);\n    if (s.ok()) {\n      cf_reversed_terms_created = true;\n    } else {\n      return s;\n    }\n  }\n\n  auto column_indexer = InvertedColumnIndexer::CreateAndOpen(\n      collection_name_, field, rocksdb_context_);\n  if (column_indexer) {\n    fields_.emplace_back(field);\n    indexers_.emplace(field.name(), std::move(column_indexer));\n    s = Status::OK();\n  } else {\n    s = Status::InternalError();\n  }\n  return s;\n}\n\n\nStatus InvertedIndexer::remove_column_indexer(const std::string &field_name) {\n  auto it = std::find_if(fields_.begin(), fields_.end(),\n                         [&field_name](FieldSchema &cur_field) {\n                           return cur_field.name() == field_name;\n                         });\n  auto column_indexer = (*this)[field_name];\n  if (it == fields_.end() && !column_indexer) {\n    LOG_ERROR(\"InvertedColumnIndexer[%s] doesn't exists in %s\",\n              field_name.c_str(), ID().c_str());\n    return Status::NotFound();\n  }\n  if (it == fields_.end() || !column_indexer) {\n    LOG_ERROR(\"%s is in corrupted state\", ID().c_str());\n    return Status::InternalError();\n  }\n\n  if (auto s = column_indexer->drop_storage(); !s.ok()) {\n    LOG_ERROR(\"Failed to remove InvertedColumnIndexer[%s] in %s\",\n              field_name.c_str(), ID().c_str());\n    return s;\n  }\n\n  fields_.erase(it);\n  indexers_.erase(field_name);\n  LOG_INFO(\"Removed InvertedColumnIndexer[%s] in %s\", field_name.c_str(),\n           ID().c_str());\n  return Status::OK();\n}\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_indexer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <rocksdb/utilities/checkpoint.h>\n#include \"inverted_column_indexer.h\"\n\n\nnamespace zvec {\n\n\nclass InvertedIndexer {\n public:\n  using Ptr = std::shared_ptr<InvertedIndexer>;\n\n\n  explicit InvertedIndexer(const std::string &collection_name,\n                           const std::string &working_dir,\n                           const std::vector<FieldSchema> &fields)\n      : collection_name_(collection_name),\n        working_dir_(working_dir),\n        fields_(fields) {};\n\n\n  virtual ~InvertedIndexer() {\n    rocksdb_context_.close();\n    LOG_INFO(\"Closed %s\", ID().c_str());\n  }\n\n\n  static Ptr CreateAndOpen(const std::string &collection_name,\n                           const std::string &working_dir,\n                           const bool create_dir_if_missing,\n                           const std::vector<FieldSchema> &fields,\n                           bool read_only) {\n    Ptr indexer =\n        std::make_shared<InvertedIndexer>(collection_name, working_dir, fields);\n    if (indexer->open(create_dir_if_missing, read_only).ok()) {\n      return indexer;\n    } else {\n      return nullptr;\n    }\n  }\n\n\n  InvertedColumnIndexer::Ptr operator[](const std::string &field_name) {\n    auto it = indexers_.find(field_name);\n    if (it != indexers_.end()) {\n      return it->second;\n    }\n    return nullptr;\n  }\n\n\n  Status flush();\n\n  Status create_snapshot(const std::string &snapshot_dir);\n\n  Status seal();\n\n  Status create_column_indexer(const FieldSchema &field);\n\n  Status remove_column_indexer(const std::string &field_name);\n\n  inline std::string collection() const {\n    return collection_name_;\n  }\n\n  inline std::string working_dir() const {\n    return working_dir_;\n  }\n\n  inline const std::string ID() const {\n    return \"InvertedIndexer[collection:\" + collection_name_ + \"|path:'\" +\n           working_dir_ + \"']\";\n  }\n\n\n private:\n  using FILE = ailego::File;\n\n  Status open(bool create_dir_if_missing, bool read_only);\n\n  inline bool allow_range_optimization(const FieldSchema &field) const {\n    bool not_allowed =\n        field.is_array_type() || field.data_type() == DataType::BOOL;\n    return !not_allowed;\n  }\n\n  inline bool allow_extended_wildcard(const FieldSchema &field) const {\n    return field.data_type() == DataType::STRING;\n  }\n\n\n private:\n  const std::string collection_name_;\n  const std::string working_dir_;\n  std::vector<FieldSchema> fields_;\n\n  std::unordered_map<std::string, InvertedColumnIndexer::Ptr> indexers_;\n  RocksdbContext rocksdb_context_{};\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_rocksdb_merger.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <ailego/pattern/defer.h>\n#include <rocksdb/merge_operator.h>\n#include \"inverted_codec.h\"\n\n\nnamespace zvec {\n\n\nclass InvertedRocksdbValueMerger : public rocksdb::MergeOperator {\n public:\n  virtual bool FullMergeV2(const MergeOperationInput &merge_in,\n                           MergeOperationOutput *merge_out) const override {\n    if (merge_in.existing_value == nullptr &&\n        merge_in.operand_list.size() == 1) {\n      merge_out->new_value = std::string(merge_in.operand_list[0].data(),\n                                         merge_in.operand_list[0].size());\n      return true;\n    }\n\n    merge_out->new_value.clear();\n\n    Status s;\n    roaring_bitmap_t *bitmap{nullptr};\n    if (merge_in.existing_value != nullptr) {\n      s = InvertedIndexCodec::Deserialize(merge_in.existing_value->data(),\n                                          merge_in.existing_value->size(),\n                                          &bitmap);\n      if (!s.ok()) {\n        LOG_ERROR(\"Failed to deserialize existing value\");\n        return false;\n      }\n    } else {\n      bitmap = roaring_bitmap_create();\n      if (!bitmap) {\n        LOG_ERROR(\"Failed to create bitmap\");\n        return false;\n      }\n    }\n    AILEGO_DEFER([&]() { roaring_bitmap_free(bitmap); });\n\n    for (const rocksdb::Slice &m : merge_in.operand_list) {\n      s = InvertedIndexCodec::Merge_OR(m.data(), m.size(), true, bitmap);\n      if (!s.ok()) {\n        LOG_ERROR(\"Failed to merge bitmap\");\n        return false;\n      }\n    }\n    roaring_bitmap_repair_after_lazy(bitmap);\n\n    s = InvertedIndexCodec::Serialize(bitmap, &(merge_out->new_value));\n    if (s.ok()) {\n      return true;\n    } else {\n      LOG_ERROR(\"Failed to serialize bitmap\");\n      return false;\n    }\n  }\n\n\n  virtual bool PartialMerge(const rocksdb::Slice & /*key*/,\n                            const rocksdb::Slice &left_operand,\n                            const rocksdb::Slice &right_operand,\n                            std::string *new_value,\n                            rocksdb::Logger * /*logger*/) const override {\n    roaring_bitmap_t *bitmap{nullptr};\n    auto s = InvertedIndexCodec::Deserialize(left_operand.data(),\n                                             left_operand.size(), &bitmap);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to deserialize existing value\");\n      return false;\n    }\n    AILEGO_DEFER([&]() { roaring_bitmap_free(bitmap); });\n\n    s = InvertedIndexCodec::Merge_OR(right_operand.data(), right_operand.size(),\n                                     false, bitmap);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to merge bitmap\");\n      return false;\n    }\n\n    s = InvertedIndexCodec::Serialize(bitmap, new_value);\n    if (s.ok()) {\n      return true;\n    } else {\n      LOG_ERROR(\"Failed to serialize bitmap\");\n      return false;\n    }\n  }\n\n\n  const char *Name() const override {\n    return \"InvertedRocksdbValueMerger\";\n  }\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/inverted_column/inverted_search_result.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <roaring/roaring.h>\n#include \"db/common/constants.h\"\n#include \"db/index/column/common/index_results.h\"\n\n\nnamespace zvec {\n\n\nclass InvertedSearchResult\n    : public IndexResults,\n      public std::enable_shared_from_this<InvertedSearchResult> {\n public:\n  using Ptr = std::shared_ptr<InvertedSearchResult>;\n\n\n  class Filter : public IndexFilter {\n   public:\n    explicit Filter(std::shared_ptr<const InvertedSearchResult> result)\n        : result_(std::move(result)) {};\n\n    bool is_filtered(uint64_t id) const override {\n      return !result_->contains(id);\n    }\n\n   private:\n    const std::shared_ptr<const InvertedSearchResult> result_{};\n  };\n\n\n  const IndexFilter::Ptr make_filter() const {\n    return bitmap_ ? std::make_shared<Filter>(shared_from_this()) : nullptr;\n  }\n\n\n  explicit InvertedSearchResult() {}\n\n\n  explicit InvertedSearchResult(roaring_bitmap_t *bitmap) : bitmap_(bitmap) {}\n\n\n  ~InvertedSearchResult() {\n    destroy_bitmap();\n  }\n\n\n  InvertedSearchResult(const InvertedSearchResult &) = delete;\n  InvertedSearchResult(InvertedSearchResult &&) = delete;\n  InvertedSearchResult &operator=(const InvertedSearchResult &) = delete;\n  InvertedSearchResult &operator=(InvertedSearchResult &&) = delete;\n\n\n  bool contains(uint32_t id) const {\n    if (bitmap_) {\n      return roaring_bitmap_contains(bitmap_, id);\n    } else {\n      return false;\n    }\n  }\n\n\n  size_t count() const override {\n    if (bitmap_) {\n      return roaring_bitmap_get_cardinality(bitmap_);\n    } else {\n      return 0;\n    }\n  }\n\n\n  class InvertedIndexIterator : public Iterator {\n   public:\n    explicit InvertedIndexIterator(\n        std::shared_ptr<const InvertedSearchResult> result)\n        : result_(result) {\n      if (result_->bitmap_) {\n        iter_ = roaring_create_iterator(result_->bitmap_);\n      }\n    }\n\n    virtual ~InvertedIndexIterator() {\n      if (iter_) {\n        roaring_free_uint32_iterator(iter_);\n      }\n    }\n\n    virtual idx_t doc_id() const {\n      if (!iter_) {\n        return INVALID_DOC_ID;\n      }\n      if (iter_->has_value) {\n        return iter_->current_value;\n      } else {\n        return INVALID_DOC_ID;\n      }\n    }\n\n    virtual float score() const {\n      return 0.0f;\n    }\n\n    virtual void next() {\n      if (iter_ && iter_->has_value) {\n        roaring_advance_uint32_iterator(iter_);\n      }\n    }\n\n    virtual bool valid() const {\n      return iter_ ? iter_->has_value : false;\n    }\n\n   private:\n    const std::shared_ptr<const InvertedSearchResult> result_{};\n    roaring_uint32_iterator_t *iter_{nullptr};\n  };\n\n\n  IteratorUPtr create_iterator() override {\n    return std::make_unique<InvertedIndexIterator>(shared_from_this());\n  }\n\n\n  void extract_ids(std::vector<uint32_t> *ids) const {\n    if (!ids) {\n      LOG_ERROR(\"Failed to extract ids: ids pointer is null\");\n      return;\n    }\n    if (!bitmap_) {\n      return;\n    }\n\n    ids->reserve(static_cast<size_t>(count()));\n    roaring_uint32_iterator_t *iter = roaring_create_iterator(bitmap_);\n    while (iter->has_value) {\n      ids->push_back(iter->current_value);\n      roaring_advance_uint32_iterator(iter);\n    }\n    roaring_free_uint32_iterator(iter);\n  }\n\n\n  void set_and_own_bitmap(roaring_bitmap_t *bitmap) {\n    destroy_bitmap();\n    bitmap_ = bitmap;\n  }\n\n\n  void destroy_bitmap() {\n    if (bitmap_) {\n      roaring_bitmap_free(bitmap_);\n      bitmap_ = nullptr;\n    }\n  }\n\n\n  void AND(const InvertedSearchResult &other) {\n    if (!bitmap_ || !other.bitmap_) {\n      destroy_bitmap();\n    } else {\n      roaring_bitmap_and_inplace(bitmap_, other.bitmap_);\n    }\n  }\n\n\n  void OR(const InvertedSearchResult &other) {\n    if (!other.bitmap_) {\n      return;\n    }\n    if (!bitmap_) {\n      bitmap_ = roaring_bitmap_copy(other.bitmap_);\n      return;\n    }\n    roaring_bitmap_or_inplace(bitmap_, other.bitmap_);\n  }\n\n\n private:\n  roaring_bitmap_t *bitmap_{nullptr};\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/vector_column/combined_vector_column_indexer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"combined_vector_column_indexer.h\"\n#include <algorithm>\n#include <cstdint>\n\nnamespace zvec {\n\nCombinedVectorColumnIndexer::CombinedVectorColumnIndexer(\n    const std::vector<VectorColumnIndexer::Ptr> &indexers,\n    const std::vector<VectorColumnIndexer::Ptr> &normal_indexers,\n    const FieldSchema &field_schema, const SegmentMeta &segment_meta,\n    std::vector<BlockMeta> blocks, MetricType metric_type, bool is_quantized)\n    : field_schema_(field_schema),\n      indexers_(std::move(indexers)),\n      normal_indexers_(std::move(normal_indexers)),\n      blocks_(std::move(blocks)),\n      metric_type_(metric_type),\n      is_quantized_(is_quantized) {\n  if (segment_meta.has_writing_forward_block()) {\n    if (is_quantized_) {\n      BlockMeta quant_block = segment_meta.writing_forward_block().value();\n      quant_block.set_type(BlockType::VECTOR_INDEX_QUANTIZE);\n      blocks_.push_back(std::move(quant_block));\n    } else {\n      BlockMeta block = segment_meta.writing_forward_block().value();\n      block.set_type(BlockType::VECTOR_INDEX);\n      blocks_.push_back(std::move(block));\n    }\n  }\n\n  int block_offset = 0;\n  for (size_t i = 0; i < indexers_.size(); ++i) {\n    auto &block_meta = blocks_[i];\n    block_offsets_.push_back(block_offset);\n    block_offset += block_meta.doc_count_;\n  }\n\n  min_doc_id_ = segment_meta.min_doc_id();\n}\n\nResult<IndexResults::Ptr> CombinedVectorColumnIndexer::Search(\n    const vector_column_params::VectorData &vector_data,\n    const vector_column_params::QueryParams &query_params) {\n  core::IndexDocumentList doc_list;\n  std::vector<std::string> reverted_vector_list;\n  std::vector<std::string> reverted_sparse_values_list;\n\n  // query_params.bf_pks is segment level, here we need to convert it to block\n  // level\n  std::vector<std::vector<uint64_t>> block_bf_pks(indexers_.size());\n\n  if (!query_params.bf_pks.empty()) {\n    // dispatcher pks to corresponding block_bf_pks\n    for (auto &pk : query_params.bf_pks[0]) {\n      for (size_t i = 0; i < block_offsets_.size(); ++i) {\n        if (pk >= block_offsets_[i] &&\n            pk < block_offsets_[i] + blocks_[i].doc_count_) {\n          block_bf_pks[i].push_back(\n              static_cast<uint64_t>(pk - block_offsets_[i]));\n          break;\n        }\n      }\n    }\n  }\n\n  auto q_params = query_params.query_params;\n  for (size_t i = 0; i < indexers_.size(); ++i) {\n    if (!query_params.bf_pks.empty() && block_bf_pks[i].empty()) {\n      LOG_DEBUG(\n          \"query_params has bf_pks, but block_bf_pks[%zu] is empty, just skip \"\n          \"this indexer\",\n          i);\n      continue;\n    }\n    zvec::Result<zvec::IndexResults::Ptr> result{nullptr};\n    float scale_factor{};\n    bool need_refine{false};\n    if (q_params && q_params->is_using_refiner()) {\n      if (normal_indexers_.size() != indexers_.size()) {\n        return tl::make_unexpected(Status::InvalidArgument(\n            \"normal indexers size[\", normal_indexers_.size(),\n            \"] not match indexers size[\", indexers_.size(), \"]\"));\n      }\n      // query_params of HNSW doesn't have scale_factor\n      if (q_params->type() == IndexType::FLAT) {\n        scale_factor = std::dynamic_pointer_cast<FlatQueryParams>(q_params)\n                           ->scale_factor();\n      } else if (q_params->type() == IndexType::IVF) {\n        scale_factor =\n            std::dynamic_pointer_cast<IVFQueryParams>(q_params)->scale_factor();\n      }\n      need_refine = true;\n    }\n\n    const IndexFilter *filter{nullptr};\n    auto per_block_filter =\n        BlockOffsetFilter{query_params.filter, block_offsets_[i]};\n    if (query_params.filter) {\n      if (block_offsets_[i] > 0) {\n        filter = &per_block_filter;\n      } else {\n        filter = query_params.filter;\n      }\n    }\n\n    vector_column_params::QueryParams modified_query_params{\n        query_params.data_type,\n        query_params.dimension,\n        query_params.topk,\n        filter,\n        query_params.fetch_vector,\n        query_params.query_params,\n        query_params.group_by\n            ? std::make_unique<vector_column_params::GroupByParams>(\n                  query_params.group_by->group_topk,\n                  query_params.group_by->group_count,\n                  query_params.group_by->group_by)\n            : nullptr,\n        {},\n        need_refine ? std::shared_ptr<vector_column_params::RefinerParam>(\n                          new vector_column_params::RefinerParam{\n                              scale_factor, normal_indexers_[i]})\n                    : nullptr,\n        query_params.extra_params};\n\n    if (!query_params.bf_pks.empty()) {\n      modified_query_params.bf_pks.emplace_back(block_bf_pks[i]);\n    }\n\n    result = indexers_[i]->Search(vector_data, modified_query_params);\n    if (!result) {\n      return tl::make_unexpected(result.error());\n    }\n\n    auto index_results = result.value();\n    VectorIndexResults *vector_index_results =\n        dynamic_cast<VectorIndexResults *>(index_results.get());\n\n    const auto &sub_docs = vector_index_results->docs();\n    for (size_t j = 0; j < sub_docs.size(); ++j) {\n      auto doc = sub_docs[j];\n      doc.set_key(block_offsets_[i] + sub_docs[j].key());\n      doc_list.emplace_back(std::move(doc));\n    }\n\n    auto &&temp_vector_list = vector_index_results->reverted_vector_list();\n    reverted_vector_list.insert(\n        reverted_vector_list.end(),\n        std::make_move_iterator(temp_vector_list.begin()),\n        std::make_move_iterator(temp_vector_list.end()));\n\n    auto &&temp_sparse_list =\n        vector_index_results->reverted_sparse_values_list();\n    reverted_sparse_values_list.insert(\n        reverted_sparse_values_list.end(),\n        std::make_move_iterator(temp_sparse_list.begin()),\n        std::make_move_iterator(temp_sparse_list.end()));\n  }\n\n  if (doc_list.empty()) {\n    // return empty result\n    return std::make_unique<VectorIndexResults>(\n        field_schema_.is_sparse_vector(), std::move(doc_list),\n        std::move(reverted_vector_list),\n        std::move(reverted_sparse_values_list));\n  }\n\n  std::vector<size_t> indices(doc_list.size());\n  std::iota(indices.begin(), indices.end(), 0);\n\n  std::sort(indices.begin(), indices.end(),\n            [this, &doc_list](size_t lhs, size_t rhs) {\n              const auto &lhs_doc = doc_list[lhs];\n              const auto &rhs_doc = doc_list[rhs];\n\n              if (this->metric_type_ == MetricType::L2) {\n                return lhs_doc.score() < rhs_doc.score();\n              } else if (this->metric_type_ == MetricType::IP) {\n                return lhs_doc.score() > rhs_doc.score();\n              } else if (this->metric_type_ == MetricType::COSINE) {\n                return lhs_doc.score() < rhs_doc.score();\n              } else {\n                // default\n                return lhs_doc.score() < rhs_doc.score();\n              }\n            });\n\n  // doc_list\n  std::vector<core::IndexDocument> sorted_doc_list(doc_list.size());\n  for (size_t i = 0; i < indices.size(); ++i) {\n    sorted_doc_list[i] = std::move(doc_list[indices[i]]);\n  }\n  doc_list = std::move(sorted_doc_list);\n\n  // reverted_vector_list\n  if (!reverted_vector_list.empty()) {\n    std::vector<std::string> sorted_reverted_vector_list(\n        reverted_vector_list.size());\n    for (size_t i = 0; i < indices.size(); ++i) {\n      if (indices[i] < reverted_vector_list.size()) {\n        sorted_reverted_vector_list[i] =\n            std::move(reverted_vector_list[indices[i]]);\n      }\n    }\n    reverted_vector_list = std::move(sorted_reverted_vector_list);\n  }\n\n  // reverted_sparse_values_list\n  if (!reverted_sparse_values_list.empty()) {\n    std::vector<std::string> sorted_reverted_sparse_vector_list(\n        reverted_sparse_values_list.size());\n    for (size_t i = 0; i < indices.size(); ++i) {\n      if (indices[i] < reverted_sparse_values_list.size()) {\n        sorted_reverted_sparse_vector_list[i] =\n            std::move(reverted_sparse_values_list[indices[i]]);\n      }\n    }\n    reverted_sparse_values_list = std::move(sorted_reverted_sparse_vector_list);\n  }\n\n  // truncate to topk\n  if (doc_list.size() > query_params.topk) doc_list.resize(query_params.topk);\n  if (reverted_vector_list.size() > query_params.topk)\n    reverted_vector_list.resize(query_params.topk);\n  if (reverted_sparse_values_list.size() > query_params.topk)\n    reverted_sparse_values_list.resize(query_params.topk);\n\n  return std::make_unique<VectorIndexResults>(\n      field_schema_.is_sparse_vector(), std::move(doc_list),\n      std::move(reverted_vector_list), std::move(reverted_sparse_values_list));\n}\n\nResult<vector_column_params::VectorDataBuffer>\nCombinedVectorColumnIndexer::Fetch(uint32_t segment_doc_id) const {\n  int32_t target_block_doc_id = -1;\n  size_t target_block_idx = 0;\n\n  uint32_t block_offset = 0;\n  for (size_t i = 0; i < blocks_.size(); ++i) {\n    auto &block_meta = blocks_[i];\n    if (block_offset <= segment_doc_id &&\n        segment_doc_id < block_offset + block_meta.doc_count_) {\n      target_block_doc_id = segment_doc_id - block_offset;\n      target_block_idx = i;\n      break;\n    }\n    block_offset += block_meta.doc_count_;\n  }\n\n  if (target_block_doc_id == -1) {\n    LOG_ERROR(\"Can't find block for doc_id[%u]\", segment_doc_id);\n    return tl::make_unexpected(\n        Status::NotFound(\"Can't find block for doc_id:\", segment_doc_id));\n  }\n\n  auto indexer = indexers_[target_block_idx];\n  return indexer->Fetch(target_block_doc_id);\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/vector_column/combined_vector_column_indexer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <vector>\n#include \"db/index/common/index_filter.h\"\n#include \"vector_column_indexer.h\"\n#include \"vector_column_params.h\"\n\nnamespace zvec {\n\nclass CombinedVectorColumnIndexer {\n public:\n  using Ptr = std::shared_ptr<CombinedVectorColumnIndexer>;\n\n  explicit CombinedVectorColumnIndexer(\n      const std::vector<VectorColumnIndexer::Ptr> &indexers,\n      const std::vector<VectorColumnIndexer::Ptr> &normal_indexers,\n      const FieldSchema &field_schema, const SegmentMeta &segment_meta,\n      std::vector<BlockMeta> blocks, MetricType metric_type,\n      bool is_quantized = false);\n\n  virtual ~CombinedVectorColumnIndexer() = default;\n\n  virtual Result<IndexResults::Ptr> Search(\n      const vector_column_params::VectorData &vector_data,\n      const vector_column_params::QueryParams &query_params);\n\n  // doc_id is segment local id\n  virtual Result<vector_column_params::VectorDataBuffer> Fetch(\n      uint32_t segment_doc_id) const;\n\n\n protected:\n  /**\n   * A filter wrapper that applies an offset to document IDs before\n   * delegating to an inner filter.\n   *\n   * This is used when multiple blocks with different ID offsets are stored.\n   * Each block has its own local ID space, and this filter translates\n   * block-level IDs to segment-level IDs before checking the inner filter.\n   */\n  class BlockOffsetFilter : public IndexFilter {\n   public:\n    BlockOffsetFilter(const IndexFilter *inner_filter, uint64_t offset)\n        : inner_filter_(inner_filter), offset_(offset) {}\n\n    bool is_filtered(uint64_t id) const override {\n      return inner_filter_->is_filtered(id + offset_);\n    }\n\n   private:\n    const IndexFilter *inner_filter_;\n    uint64_t offset_;\n  };\n\n  // for ut\n  CombinedVectorColumnIndexer() = default;\n\n\n private:\n  FieldSchema field_schema_;\n  std::vector<VectorColumnIndexer::Ptr> indexers_;\n  std::vector<VectorColumnIndexer::Ptr> normal_indexers_;\n  std::vector<BlockMeta> blocks_;\n  std::vector<uint32_t> block_offsets_;\n  MetricType metric_type_{MetricType::UNDEFINED};\n  bool is_quantized_{false};\n  uint64_t min_doc_id_{0};\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/column/vector_column/engine_helper.hpp",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/core/interface/index.h>\n#include <zvec/core/interface/index_param_builders.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/query_params.h>\n#include <zvec/db/status.h>\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/type.h\"\n#include \"vector_column_params.h\"\n\n\nnamespace zvec {\n// TODO: rename file extension\nclass ProximaEngineHelper {\n public:\n  static Result<vector_column_params::VectorDataBuffer>\n  move_from_engine_vector_buffer(\n      const core_interface::VectorDataBuffer &&vector_data_buffer,\n      bool is_sparse) {\n    if (is_sparse) {\n      auto sparse_vector_buffer = std::get<core_interface::SparseVectorBuffer>(\n          vector_data_buffer.vector_buffer);\n      return vector_column_params::VectorDataBuffer{\n          vector_column_params::SparseVectorBuffer{\n              std::move(sparse_vector_buffer.indices),\n              std::move(sparse_vector_buffer.values)}};\n    }\n    auto dense_vector_buffer = std::get<core_interface::DenseVectorBuffer>(\n        vector_data_buffer.vector_buffer);\n    return vector_column_params::VectorDataBuffer{\n        vector_column_params::DenseVectorBuffer{\n            std::move(dense_vector_buffer.data)}};\n  }\n\n  static Result<vector_column_params::VectorData> convert_from_engine_vector(\n      const core_interface::VectorData &vector_data, bool is_sparse) {\n    if (is_sparse) {\n      auto engine_vector =\n          std::get<core_interface::SparseVector>(vector_data.vector);\n      return vector_column_params::VectorData{\n          vector_column_params::SparseVector{engine_vector.count,\n                                             engine_vector.indices,\n                                             engine_vector.values}};\n    }\n    auto engine_vector =\n        std::get<core_interface::DenseVector>(vector_data.vector);\n    return vector_column_params::VectorData{\n        vector_column_params::DenseVector{engine_vector.data}};\n  }\n\n  // convert to engine vector\n  static Result<core_interface::VectorData> convert_to_engine_vector(\n      const vector_column_params::VectorData &vector_data, bool is_sparse) {\n    if (is_sparse) {\n      auto db_vector =\n          std::get<vector_column_params::SparseVector>(vector_data.vector);\n      auto engine_vector = core_interface::SparseVector{\n          db_vector.count, const_cast<void *>(db_vector.indices),\n          const_cast<void *>(db_vector.values)};\n      return core_interface::VectorData{engine_vector};\n    }\n\n    auto db_vector =\n        std::get<vector_column_params::DenseVector>(vector_data.vector);\n    auto engine_vector =\n        core_interface::DenseVector{const_cast<void *>(db_vector.data)};\n    return core_interface::VectorData{engine_vector};\n  }\n\n  // convert_filter\n  static std::shared_ptr<core_interface::IndexFilter> convert_to_engine_filter(\n      const IndexFilter *filter) {\n    auto engine_filter = std::make_shared<core_interface::IndexFilter>();\n    if (filter != nullptr) {\n      engine_filter->set(\n          [filter](uint64_t id) { return filter->is_filtered(id); });\n    }\n    return engine_filter;\n  }\n\n private:\n  template <typename EngineQueryParamType>\n  static Result<std::unique_ptr<EngineQueryParamType>>\n  _build_common_query_param(\n      const vector_column_params::QueryParams &db_query_params) {\n    auto engine_query_param = std::make_unique<EngineQueryParamType>();\n    engine_query_param->topk = db_query_params.topk;\n    engine_query_param->fetch_vector = db_query_params.fetch_vector;\n\n    engine_query_param->filter =\n        convert_to_engine_filter(db_query_params.filter);\n\n    if (db_query_params.query_params) {\n      engine_query_param->radius = db_query_params.query_params->radius();\n      engine_query_param->is_linear = db_query_params.query_params->is_linear();\n    }\n    if (db_query_params.refiner_param) {\n      engine_query_param->refiner_param =\n          std::make_shared<core_interface::RefinerParam>(\n              core_interface::RefinerParam{\n                  .scale_factor_ = db_query_params.refiner_param->scale_factor_,\n                  .reference_index =\n                      db_query_params.refiner_param->reference_indexer->index});\n    }\n\n    return engine_query_param;\n  }\n\n public:\n  static Result<std::unique_ptr<core_interface::BaseIndexQueryParam>>\n  convert_to_engine_query_param(\n      const FieldSchema &field_schema,\n      const vector_column_params::QueryParams &query_params) {\n    if (!field_schema.index_params()) {\n      return tl::make_unexpected(Status::InvalidArgument(\"nullptr\"));\n    }\n    switch (field_schema.index_params()->type()) {\n      case IndexType::FLAT: {\n        // auto db_index_params =\n        //     dynamic_cast<const FlatIndexParams\n        //     *>(field_schema.index_params());\n        auto flat_query_param_result =\n            _build_common_query_param<core_interface::FlatQueryParam>(\n                query_params);\n        if (!flat_query_param_result.has_value()) {\n          return tl::make_unexpected(Status::InvalidArgument(\n              \"failed to build query param: \" +\n              flat_query_param_result.error().message()));\n        }\n        return std::move(flat_query_param_result.value());\n      }\n\n      case IndexType::HNSW: {\n        auto hnsw_query_param_result =\n            _build_common_query_param<core_interface::HNSWQueryParam>(\n                query_params);\n        if (!hnsw_query_param_result.has_value()) {\n          return tl::make_unexpected(Status::InvalidArgument(\n              \"failed to build query param: \" +\n              hnsw_query_param_result.error().message()));\n        }\n        auto &hnsw_query_param = hnsw_query_param_result.value();\n        if (query_params.query_params) {\n          auto db_hnsw_query_params = dynamic_cast<const HnswQueryParams *>(\n              query_params.query_params.get());\n          hnsw_query_param->ef_search = db_hnsw_query_params->ef();\n        }\n        return std::move(hnsw_query_param);\n      }\n\n      case IndexType::HNSW_RABITQ: {\n        auto hnsw_query_param_result =\n            _build_common_query_param<core_interface::HNSWRabitqQueryParam>(\n                query_params);\n        if (!hnsw_query_param_result.has_value()) {\n          return tl::make_unexpected(Status::InvalidArgument(\n              \"failed to build query param: \" +\n              hnsw_query_param_result.error().message()));\n        }\n        auto &hnsw_query_param = hnsw_query_param_result.value();\n        if (query_params.query_params) {\n          auto db_hnsw_rabitq_query_params =\n              dynamic_cast<const HnswRabitqQueryParams *>(\n                  query_params.query_params.get());\n          hnsw_query_param->ef_search = db_hnsw_rabitq_query_params->ef();\n        }\n        return std::move(hnsw_query_param);\n      }\n\n      case IndexType::IVF: {\n        auto ivf_query_param_result =\n            _build_common_query_param<core_interface::IVFQueryParam>(\n                query_params);\n        if (!ivf_query_param_result.has_value()) {\n          return tl::make_unexpected(Status::InvalidArgument(\n              \"failed to build query param: \" +\n              ivf_query_param_result.error().message()));\n        }\n        auto &ivf_query_param = ivf_query_param_result.value();\n        if (query_params.query_params) {\n          auto db_ivf_query_params = dynamic_cast<const IVFQueryParams *>(\n              query_params.query_params.get());\n          ivf_query_param->nprobe = db_ivf_query_params->nprobe();\n        }\n        return std::move(ivf_query_param);\n      }\n      default:\n        return tl::make_unexpected(Status::InvalidArgument(\"not supported\"));\n    }\n  }\n\n  static Result<core_interface::MetricType> convert_to_engine_metric_type(\n      MetricType metric_type) {\n    switch (metric_type) {\n      case MetricType::MIPSL2:\n        return core_interface::MetricType::kMIPSL2sq;\n      case MetricType::IP:\n        return core_interface::MetricType::kInnerProduct;\n      case MetricType::L2:\n        return core_interface::MetricType::kL2sq;\n      case MetricType::COSINE:\n        return core_interface::MetricType::kCosine;\n      default:\n        return tl::make_unexpected(\n            Status::InvalidArgument(\"unsupported metric type\"));\n    }\n  }\n\n  static Result<core_interface::QuantizerType> convert_to_engine_quantize_type(\n      QuantizeType quantize_type) {\n    switch (quantize_type) {\n      case QuantizeType::UNDEFINED:\n        return core_interface::QuantizerType::kNone;\n      case QuantizeType::FP16:\n        return core_interface::QuantizerType::kFP16;\n      case QuantizeType::INT8:\n        return core_interface::QuantizerType::kInt8;\n      case QuantizeType::INT4:\n        return core_interface::QuantizerType::kInt4;\n      case QuantizeType::RABITQ:\n        return core_interface::QuantizerType::kRabitq;\n      default:\n        return tl::make_unexpected(\n            Status::InvalidArgument(\"unsupported quantize type\"));\n    }\n  }\n\n  static Result<core_interface::DataType> convert_to_engine_data_type(\n      DataType data_type) {\n    switch (data_type) {\n      case DataType::VECTOR_FP32:\n      case DataType::SPARSE_VECTOR_FP32:\n        return core_interface::DataType::DT_FP32;\n\n      case DataType::VECTOR_FP16:\n      case DataType::SPARSE_VECTOR_FP16:\n        return core_interface::DataType::DT_FP16;\n\n      case DataType::VECTOR_INT8:\n        return core_interface::DataType::DT_INT8;\n\n      default:\n        return tl::make_unexpected(\n            Status::InvalidArgument(\"unsupported data type\"));\n    }\n  }\n\n private:\n  template <typename DBIndexParamType, typename IndexParamBuilderType>\n  static Result<std::shared_ptr<IndexParamBuilderType>>\n  _build_common_index_param(const FieldSchema &field_schema) {\n    auto db_index_params = dynamic_cast<const DBIndexParamType *>(\n        field_schema.index_params().get());\n    if (db_index_params == nullptr) {\n      return tl::make_unexpected(Status::InvalidArgument(\"bad_cast\"));\n    }\n    auto index_param_builder = std::make_shared<IndexParamBuilderType>();\n\n    // db will ensure the id is consecutive\n    index_param_builder->WithUseIDMap(false);\n\n    index_param_builder->WithIsSparse(field_schema.is_sparse_vector())\n        .WithDimension(field_schema.dimension());\n    if (auto data_type_result =\n            convert_to_engine_data_type(field_schema.data_type());\n        data_type_result.has_value()) {\n      index_param_builder->WithDataType(data_type_result.value());\n    } else {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"unsupported data type\"));\n    }\n    if (auto metric_type_result =\n            convert_to_engine_metric_type(db_index_params->metric_type());\n        metric_type_result.has_value()) {\n      index_param_builder->WithMetricType(metric_type_result.value());\n    } else {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"unsupported metric type\"));\n    }\n    if (auto quantize_type =\n            convert_to_engine_quantize_type(db_index_params->quantize_type());\n        quantize_type.has_value()) {\n      index_param_builder->WithQuantizerParam(\n          core_interface::QuantizerParam(quantize_type.value()));\n    } else {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"unsupported quantize type\"));\n    }\n    return index_param_builder;\n  }\n\n public:\n  static Result<core_interface::BaseIndexParam::Pointer>\n  convert_to_engine_index_param(const FieldSchema &field_schema) {\n    if (!field_schema.index_params()) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"field_schema.index_params nullptr\"));\n    }\n\n    switch (field_schema.index_params()->type()) {\n      case IndexType::FLAT: {\n        auto index_param_builder =\n            _build_common_index_param<FlatIndexParams,\n                                      core_interface::FlatIndexParamBuilder>(\n                field_schema);\n        if (!index_param_builder.has_value()) {\n          return tl::make_unexpected(\n              Status::InvalidArgument(\"failed to build index param: \" +\n                                      index_param_builder.error().message()));\n        }\n        return index_param_builder.value()->Build();\n      }\n\n      case IndexType::HNSW: {\n        auto index_param_builder_result =\n            _build_common_index_param<HnswIndexParams,\n                                      core_interface::HNSWIndexParamBuilder>(\n                field_schema);\n        if (!index_param_builder_result.has_value()) {\n          return tl::make_unexpected(Status::InvalidArgument(\n              \"failed to build index param: \" +\n              index_param_builder_result.error().message()));\n        }\n        auto index_param_builder = index_param_builder_result.value();\n\n        auto db_index_params = dynamic_cast<const HnswIndexParams *>(\n            field_schema.index_params().get());\n        index_param_builder->WithM(db_index_params->m());\n        index_param_builder->WithEFConstruction(\n            db_index_params->ef_construction());\n\n        return index_param_builder->Build();\n      }\n\n      case IndexType::HNSW_RABITQ: {\n        auto index_param_builder_result = _build_common_index_param<\n            HnswRabitqIndexParams, core_interface::HNSWRabitqIndexParamBuilder>(\n            field_schema);\n        if (!index_param_builder_result.has_value()) {\n          return tl::make_unexpected(Status::InvalidArgument(\n              \"failed to build index param: \" +\n              index_param_builder_result.error().message()));\n        }\n        auto index_param_builder = index_param_builder_result.value();\n\n        auto db_index_params = dynamic_cast<const HnswRabitqIndexParams *>(\n            field_schema.index_params().get());\n        index_param_builder->WithM(db_index_params->m());\n        index_param_builder->WithEFConstruction(\n            db_index_params->ef_construction());\n        index_param_builder->WithTotalBits(db_index_params->total_bits());\n        index_param_builder->WithNumClusters(db_index_params->num_clusters());\n        index_param_builder->WithSampleCount(db_index_params->sample_count());\n        index_param_builder->WithProvider(\n            db_index_params->raw_vector_provider());\n        index_param_builder->WithReformer(db_index_params->rabitq_reformer());\n\n        return index_param_builder->Build();\n      }\n\n      case IndexType::IVF: {\n        auto index_param_builder_result = _build_common_index_param<\n            IVFIndexParams, core_interface::IVFIndexParamBuilder>(field_schema);\n        if (!index_param_builder_result.has_value()) {\n          return tl::make_unexpected(Status::InvalidArgument(\n              \"failed to build index param: \" +\n              index_param_builder_result.error().message()));\n        }\n        auto index_param_builder = index_param_builder_result.value();\n\n        auto db_index_params = dynamic_cast<const IVFIndexParams *>(\n            field_schema.index_params().get());\n        index_param_builder->WithNList(db_index_params->n_list());\n        index_param_builder->WithNiters(db_index_params->n_iters());\n        index_param_builder->WithUseSoar(db_index_params->use_soar());\n\n        return index_param_builder->Build();\n      }\n\n      default:\n        return tl::make_unexpected(Status::InvalidArgument(\"not supported\"));\n    }\n  }\n};\n};  // namespace zvec"
  },
  {
    "path": "src/db/index/column/vector_column/vector_column_indexer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"vector_column_indexer.h\"\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/core/interface/index_factory.h>\n#include <zvec/db/status.h>\n#include \"engine_helper.hpp\"\n\n\nnamespace zvec {\n\nStatus VectorColumnIndexer::Open(\n    const vector_column_params::ReadOptions &read_options) {\n  if (index != nullptr) {\n    return Status::InvalidArgument(\"Index already opened\");\n  }\n\n  // TODO: pass read_options to proxima index\n  if (engine_name_ == \"proxima\") {\n    return CreateProximaIndex(read_options);\n  } else {\n    return Status::InvalidArgument(\"Engine name not supported\");\n  }\n}\n\nStatus VectorColumnIndexer::CreateProximaIndex(\n    const vector_column_params::ReadOptions &read_options) {\n  auto index_param_result =\n      ProximaEngineHelper::convert_to_engine_index_param(field_schema_);\n  if (!index_param_result.has_value()) {\n    return Status::InvalidArgument(index_param_result.error().message());\n  }\n  auto &index_param = index_param_result.value();\n\n  index = core_interface::IndexFactory::CreateAndInitIndex(*index_param);\n  if (index == nullptr) {\n    return Status::InternalError(\"Failed to create index\");\n  }\n\n  auto storage_type =\n      read_options.use_mmap\n          ? core_interface::StorageOptions::StorageType::kMMAP\n          : core_interface::StorageOptions::StorageType::kBufferPool;\n\n  if (0 != index->Open(this->index_file_path(),\n                       {storage_type, read_options.create_new,\n                        read_options.read_only})) {\n    return Status::InternalError(\"Failed to open index\");\n  }\n\n  return Status::OK();\n}\n\nStatus VectorColumnIndexer::Flush() {\n  if (index == nullptr) {\n    return Status::InvalidArgument(\"Index not opened\");\n  }\n\n  if (0 != index->Flush()) {\n    return Status::InternalError(\"Failed to flush index\");\n  }\n  return Status::OK();\n}\n\n\nStatus VectorColumnIndexer::Close() {\n  if (index == nullptr) {\n    return Status::InvalidArgument(\"Index not opened\");\n  }\n\n  if (0 != index->Close()) {\n    return Status::InternalError(\"Failed to close index\");\n  }\n  index.reset();\n  return Status::OK();\n}\n\nStatus VectorColumnIndexer::Destroy() {\n  if (index == nullptr) {\n    return Status::InvalidArgument(\"Index not opened\");\n  }\n\n  if (Close() != Status::OK()) {\n    return Status::InternalError(\"Failed to close index\");\n  }\n  if (!ailego::File::RemovePath(index_file_path_)) {\n    return Status::InternalError(\"Failed to remove index file\");\n  }\n  return Status::OK();\n}\n\nStatus VectorColumnIndexer::Merge(\n    const std::vector<VectorColumnIndexer::Ptr> &indexers,\n    const IndexFilter::Ptr &filter,\n    const vector_column_params::MergeOptions &merge_options) {\n  if (index == nullptr) {\n    return Status::InvalidArgument(\"Index not opened\");\n  }\n\n  if (indexers.empty()) {\n    return Status::OK();\n  }\n\n  auto engine_indexers = std::vector<core_interface::Index::Pointer>();\n\n  for (auto &indexer : indexers) {\n    if (indexer->index_file_path() == this->index_file_path()) {\n      continue;\n    }\n    engine_indexers.push_back(indexer->index);\n  }\n  auto engine_filter =\n      ProximaEngineHelper::convert_to_engine_filter(filter.get());\n  if (engine_filter == nullptr) {\n    return Status::InvalidArgument(\"Failed to convert filter\");\n  }\n  if (0 !=\n      index->Merge(engine_indexers, *engine_filter,\n                   {merge_options.write_concurrency, merge_options.pool})) {\n    return Status::InternalError(\"Failed to merge index\");\n  }\n  return Status::OK();\n}\n\nStatus VectorColumnIndexer::Insert(\n    const vector_column_params::VectorData &vector_data, uint32_t doc_id) {\n  if (index == nullptr) {\n    return Status::InvalidArgument(\"Index not opened\");\n  }\n\n  auto engine_vector_data =\n      ProximaEngineHelper::convert_to_engine_vector(vector_data, is_sparse_);\n  if (0 != index->Add(engine_vector_data.value(), doc_id)) {\n    return Status::InternalError(\"Failed to add vector to index\");\n  }\n  return Status::OK();\n}\n\nResult<vector_column_params::VectorDataBuffer> VectorColumnIndexer::Fetch(\n    uint32_t doc_id) const {\n  if (index == nullptr) {\n    return tl::make_unexpected(Status::InvalidArgument(\"Index not opened\"));\n  }\n\n  auto vector_data_buffer = core_interface::VectorDataBuffer();\n\n  if (0 != index->Fetch(doc_id, &vector_data_buffer)) {\n    return tl::make_unexpected(\n        Status::InternalError(\"Failed to fetch vector from index\"));\n  }\n  return ProximaEngineHelper::move_from_engine_vector_buffer(\n             std::move(vector_data_buffer), is_sparse_)\n      .value();\n}\n\nResult<IndexResults::Ptr> VectorColumnIndexer::Search(\n    const vector_column_params::VectorData &vector_data,\n    const vector_column_params::QueryParams &query_params) {\n  if (index == nullptr) {\n    return tl::make_unexpected(Status::InvalidArgument(\"Index not opened\"));\n  }\n\n  auto engine_vector_data =\n      ProximaEngineHelper::convert_to_engine_vector(vector_data, is_sparse_);\n  core_interface::SearchResult search_result;\n  auto engine_query_param_result =\n      ProximaEngineHelper::convert_to_engine_query_param(field_schema_,\n                                                         query_params);\n  if (!engine_query_param_result.has_value()) {\n    return tl::make_unexpected(engine_query_param_result.error());\n  }\n  auto &engine_query_param = engine_query_param_result.value();\n  if (query_params.bf_pks.size() > 1) {\n    LOG_ERROR(\"bf_pks size > 1 is not supported\");\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"bf_pks size > 1 is not supported\"));\n  } else if (query_params.bf_pks.size() == 1) {\n    auto &bf_pks = query_params.bf_pks[0];\n    engine_query_param->bf_pks =\n        std::make_shared<std::vector<uint64_t>>(std::move(bf_pks));\n  } else {\n    engine_query_param->bf_pks = nullptr;\n  }\n  if (0 != index->Search(engine_vector_data.value(),\n                         std::move(engine_query_param), &search_result)) {\n    return tl::make_unexpected(\n        Status::InternalError(\"Failed to search vector\"));\n  }\n\n  auto result = std::make_shared<VectorIndexResults>(\n      is_sparse_, std::move(search_result.doc_list_),\n      std::move(search_result.reverted_vector_list_),\n      std::move(search_result.reverted_sparse_values_list_));\n  return result;\n}\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/column/vector_column/vector_column_indexer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n#include <string>\n#include <utility>\n#include <variant>\n#include <ailego/parallel/lock.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/core/interface/index.h>\n#include <zvec/core/interface/index_param.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include \"db/common/constants.h\"\n#include \"db/common/typedef.h\"\n#include \"db/index/column/common/index_results.h\"\n#include \"db/index/common/meta.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"vector_column_params.h\"\n#include \"vector_index_results.h\"\n\nnamespace zvec {\n\nclass ProximaEngineHelper;\n\nclass VectorColumnIndexer {\n public:\n  using Ptr = std::shared_ptr<VectorColumnIndexer>;\n  PROXIMA_DISALLOW_COPY_AND_ASSIGN(VectorColumnIndexer);\n\n  VectorColumnIndexer(const std::string &index_file_path,\n                      const FieldSchema &field_schema,\n                      const std::string &engine_name = \"proxima\")\n      : field_schema_(field_schema),\n        index_file_path_(index_file_path),\n        engine_name_(engine_name) {\n    // assert(field_schema.is_dense_vector() ||\n    // field_schema.is_sparse_vector());\n    is_sparse_ = field_schema.is_sparse_vector();\n  }\n\n  virtual ~VectorColumnIndexer() = default;\n\n public:\n  Status Open(const vector_column_params::ReadOptions &read_options);\n\n  Status Flush();\n\n  // Close will call Flush()\n  Status Close();\n\n  // Destroy will call Close() and remove index file\n  Status Destroy();\n\n\n  // If HNSWIndexer.merge([FlatIndexer1, FlatIndexer2])\n  // then the merged indexer is a HNSWIndexer\n  Status Merge(const std::vector<VectorColumnIndexer::Ptr> &indexers,\n               const IndexFilter::Ptr &filter = nullptr,\n               const vector_column_params::MergeOptions &merge_options = {});\n  // TODO: should we use this function? or a Reducer?\n  //  TODO: sstatic reduce, optimize; iterator/scan\n\n\n  //! Insert vector\n  Status Insert(const vector_column_params::VectorData &vector_data,\n                uint32_t doc_id);\n  // TODO: batch insert\n\n  virtual Result<IndexResults::Ptr> Search(\n      const vector_column_params::VectorData &vector_data,\n      const vector_column_params::QueryParams &query_params);\n  // Result<std::vector<IndexResults::Ptr>> BatchSearch(\n  //     const VectorDataset &vector_data,\n  //     const  vector_column_params::QueryParams &query_params);\n\n  Result<vector_column_params::VectorDataBuffer> Fetch(uint32_t doc_id) const;\n  // Result<VectorDataset> BatchFetch(const std::vector<uint32_t> &doc_ids)\n  // const;\n\n  core::IndexProvider::Pointer create_index_provider() const {\n    return index->create_index_provider();\n  }\n\n public:\n  std::string index_file_path() const {\n    return index_file_path_;\n  }\n\n  size_t doc_count() const {\n    if (index == nullptr) {\n      return -1;\n    }\n    return index->GetDocCount();\n  }\n\n  // for ut\n protected:\n  VectorColumnIndexer() = default;\n\n private:\n  // protected:\n  //  virtual bool init_proxima_params() = 0;\n\n  // proxima or other engine index param like VSAGE\n  // build proxima index\n  Status CreateProximaIndex(\n      const vector_column_params::ReadOptions &read_options);\n\n protected:\n  friend ProximaEngineHelper;\n  core_interface::Index::Pointer index;\n  FieldSchema field_schema_{};\n  std::string index_file_path_{};\n\n\n  std::string engine_name_ = \"proxima\";\n  bool is_sparse_{false};  // TODO: eliminate the dynamic flag and make it\n                           // static/template/seperate class\n};\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/column/vector_column/vector_column_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <variant>\n#include <vector>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/core/interface/index_param.h>\n#include <zvec/db/query_params.h>\n#include <zvec/db/type.h>\n#include \"db/index/common/index_filter.h\"\n\nnamespace zvec {\nclass VectorColumnIndexer;\n\nnamespace vector_column_params {\nstruct DenseVector {\n  const void *data;\n};\n\nstruct SparseVector {\n  uint32_t count;\n  const void *indices;  // uint32\n  const void *values;   // FP16/FP32\n};\n\nstruct VectorData {\n  std::variant<DenseVector, SparseVector> vector;\n};\n\n\n// VectorData with memory management\nstruct DenseVectorBuffer {\n  std::string data;  // use string to manage memory\n};\n\nstruct SparseVectorBuffer {\n  std::string indices;  // uint32_t\n  std::string values;\n\n  using IndexType = uint32_t;\n  uint32_t count() const {\n    return indices.size() / sizeof(IndexType);\n  }\n};\n\nstruct VectorDataBuffer {\n  std::variant<DenseVectorBuffer, SparseVectorBuffer> vector_buffer;\n};\n\n\nstruct ReadOptions {\n  bool use_mmap{true};\n  bool create_new{false};\n  bool read_only{false};\n};\n\nstruct MergeOptions {\n  uint32_t write_concurrency{1};\n  ailego::ThreadPool *pool{nullptr};\n};\n\nstruct GroupByParams {\n  GroupByParams(uint32_t group_topk, uint32_t group_count,\n                std::function<std::string(uint64_t key)> group_by)\n      : group_topk(group_topk),\n        group_count(group_count),\n        group_by(std::move(group_by)) {}\n\n  uint32_t group_topk{0};\n  uint32_t group_count{0};\n  std::function<std::string(uint64_t key)> group_by{};\n};\n\nstruct RefinerParam {\n  float scale_factor_{10};\n  std::shared_ptr<VectorColumnIndexer> reference_indexer{nullptr};\n};\n\n// This is an internal version, while QueryParams in doc.h is an interface ver\nstruct QueryParams {\n  DataType data_type{DataType::UNDEFINED};\n  uint32_t dimension{0U};\n  uint32_t topk{0U};\n  mutable const IndexFilter *filter{nullptr};\n  bool fetch_vector{false};\n  zvec::QueryParams::Ptr query_params;\n  std::unique_ptr<GroupByParams> group_by;\n  // TODO: 1. should be uint32? 2. if no batch mode, change to optional<vector>\n  std::vector<std::vector<uint64_t>> bf_pks{};\n\n  std::shared_ptr<RefinerParam> refiner_param{nullptr};\n\n  ailego::Params extra_params{};\n};\n}  // namespace vector_column_params\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/column/vector_column/vector_index_results.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstddef>\n#include <memory>\n#include <zvec/core/framework/index_document.h>\n#include \"db/common/typedef.h\"\n#include \"db/index/column/common/index_results.h\"\n\n// TODO: eliminate aitheta2 dependency for decoupling\n\nnamespace zvec {\n\nclass VectorIndexResults : public IndexResults {\n public:\n  class VectorIterator : public IndexResults::Iterator {\n   public:\n    VectorIterator(const VectorIndexResults *rs) : rs_(rs) {}\n\n    VectorIterator(const VectorIndexResults *rs, uint32_t index)\n        : rs_(rs), index_(index) {}\n\n   public:\n    idx_t doc_id() const override {\n      return rs_->document(index_).key();\n    }\n\n    float score() const override {\n      return rs_->document(index_).score();\n    }\n\n    void next() override {\n      index_++;\n    }\n\n    bool valid() const override {\n      return (index_ < rs_->count());\n    }\n\n    const vector_column_params::VectorData vector() const override {\n      if (is_sparse()) {\n        return vector_column_params::VectorData{\n            vector_column_params::SparseVector{sparse_count(),\n                                               sparse_indices().data(),\n                                               sparse_values().data()}};\n      }\n      return vector_column_params::VectorData{\n          vector_column_params::DenseVector{dense_vector()}};\n    }\n\n   private:\n    const void *dense_vector() const {\n      if (!rs_->reverted_vector_list_.empty()) {\n        return rs_->reverted_vector_list_[index_].data();\n      }\n      return rs_->document(index_).vector();\n    }\n    uint32_t sparse_count() const {\n      return rs_->document(index_).sparse_doc().sparse_count();\n    }\n\n    const std::string &sparse_indices() const {\n      return rs_->document(index_).sparse_doc().sparse_indices();\n    }\n\n    const std::string &sparse_values() const {\n      if (!rs_->reverted_sparse_values_list_.empty()) {\n        return rs_->reverted_sparse_values_list_[index_];\n      }\n      return rs_->document(index_).sparse_doc().sparse_values();\n    }\n\n   private:\n    const VectorIndexResults *rs_{nullptr};\n    uint32_t index_{0U};\n  };\n\n  friend class VectorIterator;\n\n public:\n  // VectorIndexResults(core::IndexDocumentList &&doc_list)\n  //     : docs_(std::move(doc_list)) {}\n  //\n  // VectorIndexResults(core::IndexDocumentList &&doc_list,\n  //                    std::vector<std::string> &&reverted_vector_list)\n  //     : docs_(std::move(doc_list)),\n  //       reverted_vector_list_(std::move(reverted_vector_list)) {}\n  VectorIndexResults(bool is_sparse, core::IndexDocumentList &&doc_list,\n                     std::vector<std::string> &&reverted_vector_list,\n                     std::vector<std::string> &&reverted_sparse_values_list)\n      : is_sparse_(is_sparse),\n        docs_(std::move(doc_list)),\n        reverted_vector_list_(std::move(reverted_vector_list)),\n        reverted_sparse_values_list_(std::move(reverted_sparse_values_list)) {}\n\n public:\n  IndexResults::IteratorUPtr create_iterator() override {\n    auto ret = std::unique_ptr<VectorIterator>(new VectorIterator(this));\n    ret->set_is_sparse(is_sparse_);\n    return ret;\n  }\n\n  size_t count() const override {\n    return docs_.size();\n  }\n\n public:  // unique method\n  core::IndexDocumentList &docs() {\n    return docs_;\n  }\n\n  std::vector<std::string> &reverted_vector_list() {\n    return reverted_vector_list_;\n  }\n\n  std::vector<std::string> &reverted_sparse_values_list() {\n    return reverted_sparse_values_list_;\n  }\n\n\n private:\n  const core::IndexDocument &document(size_t index) const {\n    return docs_[index];\n  }\n\n private:\n  bool is_sparse_;\n  core::IndexDocumentList docs_{};\n  std::vector<std::string> reverted_vector_list_{};\n  std::vector<std::string> reverted_sparse_values_list_{};\n};\n\nclass GroupVectorIndexResults : public IndexResults {\n public:\n  class GroupVectorIterator : public IndexResults::Iterator {\n   public:\n    GroupVectorIterator(const GroupVectorIndexResults *rs) : rs_(rs) {}\n\n   public:\n    idx_t doc_id() const override {\n      return rs_->document(group_index_, doc_index_).key();\n    }\n\n    float score() const override {\n      return rs_->document(group_index_, doc_index_).score();\n    }\n\n    void next() override {\n      doc_index_++;\n      if (doc_index_ >= rs_->groups_[group_index_].docs().size()) {\n        group_index_++;\n        doc_index_ = 0;\n      }\n    }\n\n    bool valid() const override {\n      return group_index_ < rs_->groups_.size();\n    }\n\n    const vector_column_params::VectorData vector() const override {\n      if (is_sparse()) {\n        return vector_column_params::VectorData{\n            vector_column_params::SparseVector{sparse_count(),\n                                               sparse_indices().data(),\n                                               sparse_values().data()}};\n      }\n      return vector_column_params::VectorData{\n          vector_column_params::DenseVector{dense_vector()}};\n    }\n\n   private:\n    const void *dense_vector() const {\n      if (!rs_->reverted_vector_list_.empty()) {\n        return rs_->reverted_vector_list_[group_index_][doc_index_].data();\n      }\n      return rs_->document(group_index_, doc_index_).vector();\n    }\n\n    uint32_t sparse_count() const {\n      return rs_->document(group_index_, doc_index_)\n          .sparse_doc()\n          .sparse_count();\n    }\n\n    const std::string &sparse_indices() const {\n      return rs_->document(group_index_, doc_index_)\n          .sparse_doc()\n          .sparse_indices();\n    }\n\n    const std::string &sparse_values() const {\n      if (!rs_->reverted_sparse_values_list_.empty()) {\n        return rs_->reverted_sparse_values_list_[group_index_][doc_index_];\n      }\n      return rs_->document(group_index_, doc_index_)\n          .sparse_doc()\n          .sparse_values();\n    }\n\n    const std::string &group_id() const override {\n      return rs_->groups_[group_index_].group_id();\n    }\n\n   private:\n    const GroupVectorIndexResults *rs_{nullptr};\n    uint32_t group_index_{0U};\n    uint32_t doc_index_{0U};\n  };\n\n  friend class GroupVectorIterator;\n\n public:\n  GroupVectorIndexResults(core::IndexGroupDocumentList &&group_list)\n      : groups_(std::move(group_list)) {\n    init_count();\n  }\n\n  GroupVectorIndexResults(\n      core::IndexGroupDocumentList &&group_list,\n      std::vector<std::vector<std::string>> &&reverted_vector_list)\n      : groups_(std::move(group_list)),\n        reverted_vector_list_(std::move(reverted_vector_list)) {\n    init_count();\n  }\n\n  GroupVectorIndexResults(\n      core::IndexGroupDocumentList &&group_list,\n      std::vector<std::vector<std::string>> &&reverted_vector_list,\n      std::vector<std::vector<std::string>> &&reverted_sparse_values_list)\n      : groups_(std::move(group_list)),\n        reverted_vector_list_(std::move(reverted_vector_list)),\n        reverted_sparse_values_list_(std::move(reverted_sparse_values_list)) {}\n\n public:\n  IndexResults::IteratorUPtr create_iterator() override {\n    return std::unique_ptr<GroupVectorIterator>(new GroupVectorIterator(this));\n  }\n\n\n  size_t count() const override {\n    return count_;\n  }\n\n public:  // unique method\n  core::IndexGroupDocumentList &groups() {\n    return groups_;\n  }\n\n private:\n  const core::IndexDocument &document(size_t group_index,\n                                      size_t doc_index) const {\n    return groups_[group_index].docs()[doc_index];\n  }\n\n  void init_count() {\n    count_ = 0;\n    for (const auto &group : groups_) {\n      count_ += group.docs().size();\n    }\n  }\n\n private:\n  core::IndexGroupDocumentList groups_{};\n  std::vector<std::vector<std::string>> reverted_vector_list_{};\n  std::vector<std::vector<std::string>> reverted_sparse_values_list_{};\n  size_t count_{0};\n};\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/common/delete_store.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n\n#include <memory>\n#include <string>\n#include \"db/common/concurrent_roaring_bitmap.h\"\n#include \"index_filter.h\"\n\n\nnamespace zvec {\n\n\nclass DeleteStore : public std::enable_shared_from_this<DeleteStore> {\n public:\n  using Ptr = std::shared_ptr<DeleteStore>;\n\n  explicit DeleteStore(std::string collection_name)\n      : collection_name_(std::move(collection_name)) {};\n\n  ~DeleteStore() {\n    LOG_INFO(\"Closed delete store\");\n  }\n\n  static Ptr CreateAndLoad(std::string collection_name,\n                           const std::string &file_path) {\n    if (file_path.empty()) {\n      LOG_ERROR(\"File path is empty\");\n      return nullptr;\n    }\n    DeleteStore::Ptr ptr =\n        std::make_shared<DeleteStore>(std::move(collection_name));\n    if (ptr->load(file_path).ok()) {\n      return ptr;\n    } else {\n      return nullptr;\n    }\n  }\n\n\n private:\n  DeleteStore(const DeleteStore &) = delete;\n  DeleteStore &operator=(const DeleteStore &) = delete;\n  DeleteStore &operator=(DeleteStore &&) = delete;\n\n\n public:\n  class Filter : public IndexFilter {\n   public:\n    explicit Filter(std::shared_ptr<const DeleteStore> delete_store)\n        : delete_store_(std::move(delete_store)) {}\n\n    bool is_filtered(uint64_t id) const override {\n      return delete_store_->is_deleted(id);\n    }\n\n   private:\n    const std::shared_ptr<const DeleteStore> delete_store_;\n  };\n\n  Status load(const std::string &file_path) {\n    Status status = bitmap_.deserialize(file_path);\n    if (status.ok()) {\n      empty_ = bitmap_.cardinality() == 0 ? true : false;\n      LOG_INFO(\"Opened delete store, count[%lu]\", bitmap_.cardinality());\n    } else {\n      LOG_ERROR(\"Failed to load delete store from file[%s]\", file_path.c_str());\n    }\n    return status;\n  }\n\n  Status flush(const std::string &file_path) {\n    Status status = bitmap_.serialize(file_path, true);\n    if (status.ok()) {\n      LOG_DEBUG(\"Flushed delete store to file[%s]\", file_path.c_str());\n      modified_since_last_flush_ = false;\n    } else {\n      LOG_ERROR(\"Failed to flush delete store to file[%s]\", file_path.c_str());\n    }\n    return status;\n  }\n\n  void mark_deleted(uint64_t doc_id) {\n    bitmap_.add(doc_id);\n    empty_ = false;\n    modified_since_last_flush_ = true;\n  }\n\n  bool is_deleted(uint64_t doc_id) const {\n    return bitmap_.contains(doc_id);\n  }\n\n  std::shared_ptr<IndexFilter> make_filter() const {\n    return empty_ ? nullptr : std::make_shared<Filter>(shared_from_this());\n  };\n\n  size_t storage_size_in_bytes() const {\n    return bitmap_.storage_size_in_bytes();\n  }\n\n  size_t count() const {\n    return bitmap_.cardinality();\n  }\n\n  size_t range_count(uint64_t min_doc_id, uint64_t max_doc_id) const {\n    return bitmap_.range_cardinality(min_doc_id, max_doc_id);\n  }\n\n  const std::string &collection_name() const {\n    return collection_name_;\n  }\n\n  bool modified_since_last_flush() const {\n    return modified_since_last_flush_;\n  }\n\n  Ptr clone() const {\n    auto ptr = std::make_shared<DeleteStore>(collection_name_);\n    ptr->bitmap_ = bitmap_;\n    ptr->empty_ = bitmap_.cardinality() == 0 ? true : false;\n    ptr->modified_since_last_flush_ = false;\n    return ptr;\n  }\n\n  bool empty() const {\n    return empty_;\n  }\n\n private:\n  using FILE = ailego::File;\n\n  const std::string collection_name_{};\n  ConcurrentRoaringBitmap64 bitmap_{};\n  bool empty_{true};\n  bool modified_since_last_flush_{false};\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/doc.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cmath>\n#include <cstdint>\n#include <cstdlib>\n#include <cstring>\n#include <regex>\n#include <stdexcept>\n#include <zvec/db/doc.h>\n#include \"db/common/constants.h\"\n#include \"db/index/common/type_helper.h\"\n\n#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__\n#define IS_BIG_ENDIAN 1\n#else\n#define IS_BIG_ENDIAN 0\n#endif\n\n\nnamespace zvec {\n\nenum ValueType : uint8_t {\n  TYPE_EMPTY = 0,\n  TYPE_BOOL = 1,\n  TYPE_INT32 = 2,\n  TYPE_UINT32 = 3,\n  TYPE_INT64 = 4,\n  TYPE_UINT64 = 5,\n  TYPE_FLOAT = 6,\n  TYPE_DOUBLE = 7,\n  TYPE_STRING = 8,\n  TYPE_VECTOR_BOOL = 9,\n  TYPE_VECTOR_INT8 = 10,\n  TYPE_VECTOR_INT16 = 11,\n  TYPE_VECTOR_INT32 = 12,\n  TYPE_VECTOR_INT64 = 13,\n  TYPE_VECTOR_UINT32 = 14,\n  TYPE_VECTOR_UINT64 = 15,\n  TYPE_VECTOR_FLOAT16 = 16,\n  TYPE_VECTOR_FLOAT = 17,\n  TYPE_VECTOR_DOUBLE = 18,\n  TYPE_VECTOR_STRING = 19,\n  TYPE_VECTOR_PAIR_INT_FLOAT = 20,\n  TYPE_VECTOR_PAIR_INT_FLOAT16 = 21,\n};\n\nstd::string get_value_type_name(const Doc::Value &value, bool is_vector) {\n  return std::visit(\n      [&](const auto &v) -> std::string {\n        using T = std::decay_t<decltype(v)>;\n        if constexpr (std::is_same_v<T, std::monostate>) {\n          return \"EMPTY\";\n        } else if constexpr (std::is_same_v<T, bool>) {\n          return \"BOOL\";\n        } else if constexpr (std::is_same_v<T, int32_t>) {\n          return \"INT32\";\n        } else if constexpr (std::is_same_v<T, uint32_t>) {\n          return \"UINT32\";\n        } else if constexpr (std::is_same_v<T, int64_t>) {\n          return \"INT64\";\n        } else if constexpr (std::is_same_v<T, uint64_t>) {\n          return \"UINT64\";\n        } else if constexpr (std::is_same_v<T, float>) {\n          return \"FLOAT\";\n        } else if constexpr (std::is_same_v<T, double>) {\n          return \"DOUBLE\";\n        } else if constexpr (std::is_same_v<T, std::string>) {\n          return \"STRING\";\n        } else if constexpr (std::is_same_v<T, std::vector<bool>>) {\n          return \"ARRAY_BOOL\";\n        } else if constexpr (std::is_same_v<T, std::vector<int8_t>>) {\n          return \"VECTOR_INT8\";\n        } else if constexpr (std::is_same_v<T, std::vector<int16_t>>) {\n          return \"VECTOR_INT16\";\n        } else if constexpr (std::is_same_v<T, std::vector<int32_t>>) {\n          return is_vector ? \"VECTOR_INT32\" : \"ARRAY_INT32\";\n        } else if constexpr (std::is_same_v<T, std::vector<int64_t>>) {\n          return is_vector ? \"VECTOR_INT64\" : \"ARRAY_INT64\";\n        } else if constexpr (std::is_same_v<T, std::vector<uint32_t>>) {\n          return is_vector ? \"VECTOR_UINT32\" : \"ARRAY_UINT32\";\n        } else if constexpr (std::is_same_v<T, std::vector<uint64_t>>) {\n          return is_vector ? \"VECTOR_UINT64\" : \"ARRAY_UINT64\";\n        } else if constexpr (std::is_same_v<T, std::vector<float16_t>>) {\n          return \"VECTOR_FP16\";\n        } else if constexpr (std::is_same_v<T, std::vector<float>>) {\n          return \"VECTOR_FP32\";\n        } else if constexpr (std::is_same_v<T, std::vector<double>>) {\n          return \"VECTOR_FP64\";\n        } else if constexpr (std::is_same_v<T, std::vector<std::string>>) {\n          return \"ARRAY_STRING\";\n        } else if constexpr (std::is_same_v<T, std::pair<std::vector<uint32_t>,\n                                                         std::vector<float>>>) {\n          return \"SPARSE_VECTOR_FP32\";\n        } else if constexpr (std::is_same_v<\n                                 T, std::pair<std::vector<uint32_t>,\n                                              std::vector<float16_t>>>) {\n          return \"SPARSE_VECTOR_FP16\";\n        } else {\n          return \"unknown type\";\n        }\n      },\n      value);\n}\n\ntemplate <typename T>\nT byte_swap(T value) {\n  if constexpr (std::is_same_v<T, float16_t>) {\n    uint16_t val = *reinterpret_cast<uint16_t *>(&value);\n    val = __builtin_bswap16(val);\n    return *reinterpret_cast<float16_t *>(&val);\n  } else if constexpr (sizeof(T) == 1) {\n    return value;\n  } else if constexpr (sizeof(T) == 2) {\n    return (value << 8) | ((value >> 8) & 0xFF);\n  } else if constexpr (sizeof(T) == 4) {\n    return __builtin_bswap32(value);\n  } else if constexpr (sizeof(T) == 8) {\n    return __builtin_bswap64(value);\n  } else {\n    T result = 0;\n    for (size_t i = 0; i < sizeof(T); ++i) {\n      result |= ((value >> (i * 8)) & 0xFF) << ((sizeof(T) - 1 - i) * 8);\n    }\n    return result;\n  }\n}\n\ntemplate <typename T>\nvoid write_value_to_buffer(std::vector<uint8_t> &buffer, const T &value) {\n  T write_value = value;\n  if (IS_BIG_ENDIAN) {\n    write_value = byte_swap<T>(value);\n  }\n  const uint8_t *bytes = reinterpret_cast<const uint8_t *>(&write_value);\n  buffer.insert(buffer.end(), bytes, bytes + sizeof(T));\n}\n\ntemplate <typename T>\nT read_value_from_buffer(const uint8_t *&data) {\n  T value;\n  std::memcpy(&value, data, sizeof(T));\n  data += sizeof(T);\n\n  if (IS_BIG_ENDIAN) {\n    value = byte_swap<T>(value);\n  }\n  return value;\n}\n\n\nvoid Doc::write_to_buffer(std::vector<uint8_t> &buffer, const void *src,\n                          size_t size) {\n  const uint8_t *bytes = static_cast<const uint8_t *>(src);\n  buffer.insert(buffer.end(), bytes, bytes + size);\n}\n\nvoid Doc::read_from_buffer(const uint8_t *&data, void *dest, size_t size) {\n  std::memcpy(dest, data, size);\n  data += size;\n}\n\nvoid Doc::serialize_value(std::vector<uint8_t> &buffer, const Value &value) {\n  std::visit(\n      [&buffer](const auto &v) {\n        using T = std::decay_t<decltype(v)>;\n\n        if constexpr (std::is_same_v<T, std::monostate>) {\n          uint8_t type = TYPE_EMPTY;\n          write_to_buffer(buffer, &type, sizeof(type));\n        } else if constexpr (std::is_same_v<T, bool>) {\n          uint8_t type = TYPE_BOOL;\n          write_to_buffer(buffer, &type, sizeof(type));\n          write_to_buffer(buffer, &v, sizeof(v));\n        } else if constexpr (std::is_same_v<T, int32_t>) {\n          uint8_t type = TYPE_INT32;\n          write_to_buffer(buffer, &type, sizeof(type));\n          write_value_to_buffer<int32_t>(buffer, v);\n        } else if constexpr (std::is_same_v<T, int64_t>) {\n          uint8_t type = TYPE_INT64;\n          write_to_buffer(buffer, &type, sizeof(type));\n          write_value_to_buffer<int64_t>(buffer, v);\n        } else if constexpr (std::is_same_v<T, uint32_t>) {\n          uint8_t type = TYPE_UINT32;\n          write_to_buffer(buffer, &type, sizeof(type));\n          write_value_to_buffer<uint32_t>(buffer, v);\n        } else if constexpr (std::is_same_v<T, uint64_t>) {\n          uint8_t type = TYPE_UINT64;\n          write_to_buffer(buffer, &type, sizeof(type));\n          write_value_to_buffer<uint64_t>(buffer, v);\n        } else if constexpr (std::is_same_v<T, float>) {\n          uint8_t type = TYPE_FLOAT;\n          write_to_buffer(buffer, &type, sizeof(type));\n          write_value_to_buffer<float>(buffer, v);\n        } else if constexpr (std::is_same_v<T, double>) {\n          uint8_t type = TYPE_DOUBLE;\n          write_to_buffer(buffer, &type, sizeof(type));\n          write_value_to_buffer<double>(buffer, v);\n        } else if constexpr (std::is_same_v<T, std::string>) {\n          uint8_t type = TYPE_STRING;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          write_to_buffer(buffer, v.data(), len);\n        } else if constexpr (std::is_same_v<T, std::vector<bool>>) {\n          uint8_t type = TYPE_VECTOR_BOOL;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          for (bool b : v) {\n            write_to_buffer(buffer, &b, sizeof(b));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<int8_t>>) {\n          uint8_t type = TYPE_VECTOR_INT8;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          write_to_buffer(buffer, v.data(), len * sizeof(int8_t));\n        } else if constexpr (std::is_same_v<T, std::vector<int16_t>>) {\n          uint8_t type = TYPE_VECTOR_INT16;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              int16_t swapped = byte_swap<int16_t>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(int16_t));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<int32_t>>) {\n          uint8_t type = TYPE_VECTOR_INT32;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              int32_t swapped = byte_swap<int32_t>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(int32_t));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<int64_t>>) {\n          uint8_t type = TYPE_VECTOR_INT64;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              int64_t swapped = byte_swap<int64_t>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(int64_t));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<uint32_t>>) {\n          uint8_t type = TYPE_VECTOR_UINT32;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              uint32_t swapped = byte_swap<uint32_t>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(uint32_t));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<uint64_t>>) {\n          uint8_t type = TYPE_VECTOR_UINT64;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              uint64_t swapped = byte_swap<uint64_t>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(uint64_t));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<float>>) {\n          uint8_t type = TYPE_VECTOR_FLOAT;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              float swapped = byte_swap<float>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(float));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<double>>) {\n          uint8_t type = TYPE_VECTOR_DOUBLE;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              double swapped = byte_swap<double>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(double));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<float16_t>>) {\n          uint8_t type = TYPE_VECTOR_FLOAT16;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &val : v) {\n              float16_t swapped = byte_swap<float16_t>(val);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            write_to_buffer(buffer, v.data(), len * sizeof(float16_t));\n          }\n        } else if constexpr (std::is_same_v<T, std::vector<std::string>>) {\n          uint8_t type = TYPE_VECTOR_STRING;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          for (const auto &s : v) {\n            uint32_t str_len = static_cast<uint32_t>(s.size());\n            write_value_to_buffer<uint32_t>(buffer, str_len);\n            write_to_buffer(buffer, s.data(), str_len);\n          }\n        } else if constexpr (std::is_same_v<T, std::pair<std::vector<uint32_t>,\n                                                         std::vector<float>>>) {\n          uint8_t type = TYPE_VECTOR_PAIR_INT_FLOAT;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.first.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &p : v.first) {\n              uint32_t swapped = byte_swap<uint32_t>(p);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            for (const auto &p : v.first) {\n              write_to_buffer(buffer, &p, sizeof(p));\n            }\n          }\n          len = static_cast<uint32_t>(v.second.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &p : v.second) {\n              float swapped = byte_swap<float>(p);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            for (const auto &p : v.second) {\n              write_to_buffer(buffer, &p, sizeof(p));\n            }\n          }\n        } else if constexpr (std::is_same_v<\n                                 T, std::pair<std::vector<uint32_t>,\n                                              std::vector<float16_t>>>) {\n          uint8_t type = TYPE_VECTOR_PAIR_INT_FLOAT16;\n          write_to_buffer(buffer, &type, sizeof(type));\n          uint32_t len = static_cast<uint32_t>(v.first.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &p : v.first) {\n              uint32_t swapped = byte_swap<uint32_t>(p);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            for (const auto &p : v.first) {\n              write_to_buffer(buffer, &p, sizeof(p));\n            }\n          }\n          len = static_cast<uint32_t>(v.second.size());\n          write_value_to_buffer<uint32_t>(buffer, len);\n          if (IS_BIG_ENDIAN) {\n            for (const auto &p : v.second) {\n              float16_t swapped = byte_swap<float16_t>(p);\n              write_to_buffer(buffer, &swapped, sizeof(swapped));\n            }\n          } else {\n            for (const auto &p : v.second) {\n              write_to_buffer(buffer, &p, sizeof(p));\n            }\n          }\n        }\n      },\n      value);\n}\n\n\nDoc::Value Doc::deserialize_value(const uint8_t *&data) {\n  uint8_t type;\n  read_from_buffer(data, &type, sizeof(type));\n\n  switch (type) {\n    case TYPE_EMPTY: {\n      return std::monostate{};\n    }\n    case TYPE_BOOL: {\n      bool v;\n      read_from_buffer(data, &v, sizeof(v));\n      return v;\n    }\n    case TYPE_INT32: {\n      return read_value_from_buffer<int32_t>(data);\n    }\n    case TYPE_INT64: {\n      return read_value_from_buffer<int64_t>(data);\n    }\n    case TYPE_UINT32: {\n      return read_value_from_buffer<uint32_t>(data);\n    }\n    case TYPE_UINT64: {\n      return read_value_from_buffer<uint64_t>(data);\n    }\n    case TYPE_FLOAT: {\n      return read_value_from_buffer<float>(data);\n    }\n    case TYPE_DOUBLE: {\n      return read_value_from_buffer<double>(data);\n    }\n    case TYPE_STRING: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::string v(reinterpret_cast<const char *>(data), len);\n      data += len;\n      return v;\n    }\n    case TYPE_VECTOR_BOOL: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<bool> v;\n      v.reserve(len);\n      for (uint32_t i = 0; i < len; ++i) {\n        bool b;\n        read_from_buffer(data, &b, sizeof(b));\n        v.push_back(b);\n      }\n      return v;\n    }\n    case TYPE_VECTOR_INT8: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<int8_t> v(len);\n      read_from_buffer(data, v.data(), len * sizeof(int8_t));\n      return v;\n    }\n    case TYPE_VECTOR_INT16: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<int16_t> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<int16_t>(read_value_from_buffer<int16_t>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(int16_t));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_INT32: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<int32_t> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<int32_t>(read_value_from_buffer<int32_t>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(int32_t));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_INT64: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<int64_t> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<int64_t>(read_value_from_buffer<int64_t>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(int64_t));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_UINT32: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<uint32_t> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<uint32_t>(read_value_from_buffer<uint32_t>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(uint32_t));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_UINT64: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<uint64_t> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<uint64_t>(read_value_from_buffer<uint64_t>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(uint64_t));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_FLOAT: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<float> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<float>(read_value_from_buffer<float>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(float));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_DOUBLE: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<double> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<double>(read_value_from_buffer<double>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(double));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_FLOAT16: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<float16_t> v(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v[i] = byte_swap<float16_t>(read_value_from_buffer<float16_t>(data));\n        }\n      } else {\n        read_from_buffer(data, v.data(), len * sizeof(float16_t));\n      }\n      return v;\n    }\n    case TYPE_VECTOR_STRING: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::vector<std::string> v;\n      v.reserve(len);\n      for (uint32_t i = 0; i < len; ++i) {\n        uint32_t str_len = read_value_from_buffer<uint32_t>(data);\n        std::string s(reinterpret_cast<const char *>(data), str_len);\n        data += str_len;\n        v.push_back(s);\n      }\n      return v;\n    }\n    case TYPE_VECTOR_PAIR_INT_FLOAT: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::pair<std::vector<uint32_t>, std::vector<float>> v;\n      v.first.reserve(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v.first.push_back(\n              byte_swap<uint32_t>(read_value_from_buffer<uint32_t>(data)));\n        }\n      } else {\n        for (uint32_t i = 0; i < len; ++i) {\n          uint32_t first;\n          read_from_buffer(data, &first, sizeof(first));\n          v.first.push_back(first);\n        }\n      }\n      len = read_value_from_buffer<uint32_t>(data);\n      v.second.reserve(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v.second.push_back(\n              byte_swap<float>(read_value_from_buffer<float>(data)));\n        }\n      } else {\n        for (uint32_t i = 0; i < len; ++i) {\n          float second;\n          read_from_buffer(data, &second, sizeof(second));\n          v.second.push_back(second);\n        }\n      }\n      return v;\n    }\n    case TYPE_VECTOR_PAIR_INT_FLOAT16: {\n      uint32_t len = read_value_from_buffer<uint32_t>(data);\n      std::pair<std::vector<uint32_t>, std::vector<float16_t>> v;\n      v.first.reserve(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v.first.push_back(\n              byte_swap<uint32_t>(read_value_from_buffer<uint32_t>(data)));\n        }\n      } else {\n        for (uint32_t i = 0; i < len; ++i) {\n          uint32_t first;\n          read_from_buffer(data, &first, sizeof(first));\n          v.first.push_back(first);\n        }\n      }\n      len = read_value_from_buffer<uint32_t>(data);\n      v.second.reserve(len);\n      if (IS_BIG_ENDIAN) {\n        for (uint32_t i = 0; i < len; ++i) {\n          v.second.push_back(\n              byte_swap<float16_t>(read_value_from_buffer<float16_t>(data)));\n        }\n      } else {\n        for (uint32_t i = 0; i < len; ++i) {\n          float16_t second;\n          read_from_buffer(data, &second, sizeof(second));\n          v.second.push_back(second);\n        }\n      }\n      return v;\n    }\n\n    default:\n      throw std::runtime_error(\"Unknown value type: \" + std::to_string(type));\n  }\n}\n\nstd::vector<uint8_t> Doc::serialize() const {\n  std::vector<uint8_t> buffer;\n  uint32_t pk_len = static_cast<uint32_t>(pk_.size());\n  write_to_buffer(buffer, &pk_len, sizeof(pk_len));\n  write_to_buffer(buffer, pk_.data(), pk_len);\n\n  write_to_buffer(buffer, &score_, sizeof(score_));\n  write_to_buffer(buffer, &doc_id_, sizeof(doc_id_));\n  write_to_buffer(buffer, &op_, sizeof(op_));\n\n  uint32_t field_count = static_cast<uint32_t>(fields_.size());\n  write_to_buffer(buffer, &field_count, sizeof(field_count));\n\n  for (const auto &[field_name, value] : fields_) {\n    uint32_t name_len = static_cast<uint32_t>(field_name.size());\n    write_to_buffer(buffer, &name_len, sizeof(name_len));\n    write_to_buffer(buffer, field_name.data(), name_len);\n\n    serialize_value(buffer, value);\n  }\n\n  return buffer;\n}\n\nDoc::Ptr Doc::deserialize(const uint8_t *data, size_t /*size*/) {\n  const uint8_t *ptr = data;\n  Doc::Ptr doc = std::make_shared<Doc>();\n\n  uint32_t pk_len = read_value_from_buffer<uint32_t>(ptr);\n  std::string pk(reinterpret_cast<const char *>(ptr), pk_len);\n  ptr += pk_len;\n  doc->set_pk(pk);\n\n  float score = read_value_from_buffer<float>(ptr);\n  doc->set_score(score);\n\n  uint64_t doc_id = read_value_from_buffer<uint64_t>(ptr);\n  doc->set_doc_id(doc_id);\n\n  Operator op;\n  read_from_buffer(ptr, &op, sizeof(op));\n  doc->set_operator(op);\n\n  uint32_t field_count = read_value_from_buffer<uint32_t>(ptr);\n\n  for (uint32_t i = 0; i < field_count; ++i) {\n    uint32_t name_len = read_value_from_buffer<uint32_t>(ptr);\n    std::string field_name(reinterpret_cast<const char *>(ptr), name_len);\n    ptr += name_len;\n\n    Doc::Value value = deserialize_value(ptr);\n    doc->fields_[field_name] = value;\n  }\n\n  return doc;\n}\n\nStatus Doc::validate(const CollectionSchema::Ptr &schema,\n                     bool is_update) const {\n  if (!schema) {\n    return Status::InternalError(\"doc validate failed: schema is null\");\n  }\n\n  if (pk_.empty()) {\n    return Status::InvalidArgument(\"doc validate failed: doc_id is not set\");\n  }\n\n  if (!std::regex_match(pk_, DOC_PK_REGEX)) {\n    return Status::InvalidArgument(\"doc validate failed: doc_id[\", pk_,\n                                   \"] cannot pass the regex verification\");\n  }\n\n  // check doc fields match schema\n  for (auto &[name, value] : fields_) {\n    if (!schema->has_field(name)) {\n      return Status::InvalidArgument(\"doc validate failed: field[\", name,\n                                     \"] does not exist in collection's schema\");\n    }\n  }\n\n  const auto &fields = schema->fields();\n  for (auto const &field_schema : fields) {\n    auto field_name = field_schema->name();\n    auto field_pair = fields_.find(field_name);\n    if (field_pair == fields_.end()) {\n      if (field_schema->nullable() || is_update) {\n        continue;\n      }\n      return Status::InvalidArgument(\n          \"doc validate failed: field[\", field_name,\n          \"] is configured not nullable, but doc does not contain this field\");\n    } else {\n      if (std::holds_alternative<std::monostate>(field_pair->second)) {\n        if (field_schema->nullable()) {\n          continue;\n        }\n        return Status::InvalidArgument(\n            \"doc validate failed: field[\", field_name,\n            \"] is configured not nullable, but doc's field value is empty\");\n      }\n    }\n\n    const Value &field_value = field_pair->second;\n    DataType expected_type = field_schema->data_type();\n    bool type_match = true;\n    uint32_t value_dimension = 0;\n\n    switch (expected_type) {\n      case DataType::BINARY:\n        type_match = std::holds_alternative<std::string>(field_value);\n        break;\n      case DataType::STRING:\n        type_match = std::holds_alternative<std::string>(field_value);\n        break;\n      case DataType::BOOL:\n        type_match = std::holds_alternative<bool>(field_value);\n        break;\n      case DataType::INT32:\n        type_match = std::holds_alternative<int32_t>(field_value);\n        break;\n      case DataType::UINT32:\n        type_match = std::holds_alternative<uint32_t>(field_value);\n        break;\n      case DataType::INT64:\n        type_match = std::holds_alternative<int64_t>(field_value);\n        break;\n      case DataType::UINT64:\n        type_match = std::holds_alternative<uint64_t>(field_value);\n        break;\n      case DataType::FLOAT:\n        type_match = std::holds_alternative<float>(field_value);\n        break;\n      case DataType::DOUBLE:\n        type_match = std::holds_alternative<double>(field_value);\n        break;\n      case DataType::ARRAY_BINARY:\n        type_match =\n            std::holds_alternative<std::vector<std::string>>(field_value);\n        break;\n      case DataType::ARRAY_STRING:\n        type_match =\n            std::holds_alternative<std::vector<std::string>>(field_value);\n        break;\n      case DataType::ARRAY_BOOL:\n        type_match = std::holds_alternative<std::vector<bool>>(field_value);\n        break;\n      case DataType::ARRAY_INT32:\n        type_match = std::holds_alternative<std::vector<int32_t>>(field_value);\n        break;\n      case DataType::ARRAY_INT64:\n        type_match = std::holds_alternative<std::vector<int64_t>>(field_value);\n        break;\n      case DataType::ARRAY_UINT32:\n        type_match = std::holds_alternative<std::vector<uint32_t>>(field_value);\n        break;\n      case DataType::ARRAY_UINT64:\n        type_match = std::holds_alternative<std::vector<uint64_t>>(field_value);\n        break;\n      case DataType::ARRAY_FLOAT:\n        type_match = std::holds_alternative<std::vector<float>>(field_value);\n        break;\n      case DataType::ARRAY_DOUBLE:\n        type_match = std::holds_alternative<std::vector<double>>(field_value);\n        break;\n      case DataType::VECTOR_BINARY32: {\n        type_match = std::holds_alternative<std::vector<uint32_t>>(field_value);\n        if (type_match) {\n          value_dimension = std::get<std::vector<uint32_t>>(field_value).size();\n        }\n        break;\n      }\n      case DataType::VECTOR_BINARY64: {\n        type_match = std::holds_alternative<std::vector<uint64_t>>(field_value);\n        if (type_match) {\n          value_dimension = std::get<std::vector<uint64_t>>(field_value).size();\n        }\n        break;\n      }\n      case DataType::VECTOR_FP16: {\n        type_match =\n            std::holds_alternative<std::vector<float16_t>>(field_value);\n        if (type_match) {\n          value_dimension =\n              std::get<std::vector<float16_t>>(field_value).size();\n        }\n        break;\n      }\n      case DataType::VECTOR_FP32: {\n        type_match = std::holds_alternative<std::vector<float>>(field_value);\n        if (type_match) {\n          value_dimension = std::get<std::vector<float>>(field_value).size();\n        }\n        break;\n      }\n      case DataType::VECTOR_FP64: {\n        type_match = std::holds_alternative<std::vector<double>>(field_value);\n        if (type_match) {\n          value_dimension = std::get<std::vector<double>>(field_value).size();\n        }\n        break;\n      }\n      // case DataType::VECTOR_INT4:\n      //   type_match =\n      //   std::holds_alternative<std::vector<int8_t>>(field_value); break;\n      case DataType::VECTOR_INT8: {\n        type_match = std::holds_alternative<std::vector<int8_t>>(field_value);\n        if (type_match) {\n          value_dimension = std::get<std::vector<int8_t>>(field_value).size();\n        }\n        break;\n      }\n      case DataType::VECTOR_INT16: {\n        type_match = std::holds_alternative<std::vector<int16_t>>(field_value);\n        if (type_match) {\n          value_dimension = std::get<std::vector<int16_t>>(field_value).size();\n        }\n        break;\n      }\n      case DataType::SPARSE_VECTOR_FP16: {\n        type_match = std::holds_alternative<\n            std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(\n            field_value);\n        if (type_match) {\n          auto [sparse_indices, sparse_values] = std::get<\n              std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(\n              field_value);\n          if (sparse_values.size() != sparse_indices.size()) {\n            return Status::InvalidArgument(\n                \"doc validate failed: field[\", field_name,\n                \"]'s sparse vector indices and values size not match\");\n          }\n          if (sparse_indices.size() > kSparseMaxDimSize) {\n            return Status::InvalidArgument(\n                \"doc validate failed: vector[\", field_name,\n                \"], the number of sparse indices exceeds the maximum limit \",\n                kSparseMaxDimSize);\n          }\n        }\n        break;\n      }\n      case DataType::SPARSE_VECTOR_FP32: {\n        type_match = std::holds_alternative<\n            std::pair<std::vector<uint32_t>, std::vector<float>>>(field_value);\n        if (type_match) {\n          auto &[sparse_indices, sparse_values] =\n              std::get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n                  field_value);\n          if (sparse_values.size() != sparse_indices.size()) {\n            return Status::InvalidArgument(\n                \"doc validate failed: field[\", field_name,\n                \"]'s sparse vector indices and values size not match\");\n          }\n          if (sparse_indices.size() > kSparseMaxDimSize) {\n            return Status::InvalidArgument(\n                \"doc validate failed: vector[\", field_name,\n                \"], the number of sparse indices exceeds the maximum limit \",\n                kSparseMaxDimSize);\n          }\n        }\n        break;\n      }\n      default:\n        return Status::InvalidArgument(\"doc validate failed: field[\",\n                                       field_name,\n                                       \"]'s value type is not supported\");\n        break;\n    }\n\n    if (!type_match) {\n      return Status::InvalidArgument(\n          \"doc validate failed: field[\", field_name,\n          \"]'s value type mismatch, it should be \",\n          DataTypeCodeBook::AsString(expected_type), \", but got type: \",\n          get_value_type_name(field_value, field_schema->is_vector_field()));\n    }\n    if (field_schema->is_dense_vector()) {\n      if (value_dimension != field_schema->dimension()) {\n        return Status::InvalidArgument(\n            \"doc validate failed: field[\", field_name,\n            \"]'s dimension mismatch, it should be \", field_schema->dimension(),\n            \", but got dimension: \", value_dimension);\n      }\n    }\n  }\n  return Status::OK();\n}\n\nsize_t Doc::memory_usage() const {\n  // Base size of the object itself\n  size_t usage = sizeof(Doc);\n\n  // Calculate memory used by pk_ string\n  usage += pk_.capacity();\n\n  // Calculate memory used by fields_ hash map structure\n  usage += fields_.bucket_count() *\n           sizeof(std::unordered_map<std::string, Value>::value_type *);\n\n  // Iterate through all fields to calculate their actual memory usage\n  for (const auto &pair : fields_) {\n    const auto &key = pair.first;\n    const auto &value = pair.second;\n\n    // Memory for the key (string)\n    usage += key.capacity();\n\n    // Memory for the value (based on actual variant type)\n    usage += [&value]() -> size_t {\n      switch (value.index()) {\n        case 0:      // std::monostate\n          return 0;  // No additional memory needed\n\n        case 1:      // bool\n        case 2:      // int32_t\n        case 3:      // uint32_t\n        case 4:      // int64_t\n        case 5:      // uint64_t\n        case 6:      // float\n        case 7:      // double\n          return 0;  // Basic types are already allocated within the variant\n\n        case 8:  // std::string\n          return std::get<std::string>(value).capacity();\n\n        case 9:  // std::vector<bool>\n          return std::get<std::vector<bool>>(value).size() * sizeof(bool);\n\n        case 10:  // std::vector<int8_t>\n          return std::get<std::vector<int8_t>>(value).capacity() *\n                 sizeof(int8_t);\n\n        case 11:  // std::vector<int16_t>\n          return std::get<std::vector<int16_t>>(value).capacity() *\n                 sizeof(int16_t);\n\n        case 12:  // std::vector<int32_t>\n          return std::get<std::vector<int32_t>>(value).capacity() *\n                 sizeof(int32_t);\n\n        case 13:  // std::vector<int64_t>\n          return std::get<std::vector<int64_t>>(value).capacity() *\n                 sizeof(int64_t);\n\n        case 14:  // std::vector<uint32_t>\n          return std::get<std::vector<uint32_t>>(value).capacity() *\n                 sizeof(uint32_t);\n\n        case 15:  // std::vector<uint64_t>\n          return std::get<std::vector<uint64_t>>(value).capacity() *\n                 sizeof(uint64_t);\n\n        case 16:  // std::vector<float16_t>\n          return std::get<std::vector<float16_t>>(value).capacity() *\n                 sizeof(float16_t);\n\n        case 17:  // std::vector<float>\n          return std::get<std::vector<float>>(value).capacity() * sizeof(float);\n\n        case 18:  // std::vector<double>\n          return std::get<std::vector<double>>(value).capacity() *\n                 sizeof(double);\n\n        case 19:  // std::vector<std::string>\n        {\n          size_t vec_usage =\n              std::get<std::vector<std::string>>(value).capacity() *\n              sizeof(std::string);\n          for (const auto &str : std::get<std::vector<std::string>>(value)) {\n            vec_usage += str.capacity();\n          }\n          return vec_usage;\n        }\n\n        case 20:  // std::pair<std::vector<uint32_t>, std::vector<float>>\n        {\n          const auto &pair_val =\n              std::get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n                  value);\n          return pair_val.first.capacity() * sizeof(uint32_t) +\n                 pair_val.second.capacity() * sizeof(float);\n        }\n\n        case 21:  // std::pair<std::vector<uint32_t>, std::vector<float16_t>>\n        {\n          const auto &pair_val = std::get<\n              std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(value);\n          return pair_val.first.capacity() * sizeof(uint32_t) +\n                 pair_val.second.capacity() * sizeof(float16_t);\n        }\n\n        default:\n          return 0;\n      }\n    }();\n  }\n\n  return usage;\n}\n\ntemplate <typename T>\nstd::string vec_to_string(const std::vector<T> &v) {\n  std::ostringstream oss;\n  oss << \"[\";\n  for (size_t i = 0; i < v.size(); ++i) {\n    if (i > 0) oss << \", \";\n    oss << +v[i];  // + from print as char\n  }\n  oss << \"]\";\n  return oss.str();\n}\n\ntemplate <class... Ts>\nstruct overloaded : Ts... {\n  using Ts::operator()...;\n};\ntemplate <class... Ts>\noverloaded(Ts...) -> overloaded<Ts...>;\n\nstd::string Doc::to_detail_string() const {\n  std::stringstream oss;\n  oss << \"[op:\" << (uint32_t)op_ << \", doc_id: \" << doc_id_\n      << \", score: \" << score_ << \", pk: \" << pk_\n      << \", fields: \" << fields_.size() << \"]\";\n  oss << \"{\";\n  bool first_field = true;\n  for (const auto &[key, val] : fields_) {\n    if (!first_field) oss << \", \";\n    first_field = false;\n\n    oss << \"\\\"\" << key << \"\\\": \";\n\n    std::visit(\n        overloaded{\n            [&](std::monostate) { oss << \"null\"; },\n            [&](bool b) { oss << (b ? \"true\" : \"false\"); },\n            [&](int32_t i) { oss << i; },\n            [&](uint32_t u) { oss << u; },\n            [&](int64_t i) { oss << i; },\n            [&](uint64_t u) { oss << u; },\n            [&](float f) { oss << f; },\n            [&](double d) { oss << d; },\n            [&](const std::string &s) { oss << \"\\\"\" << s << \"\\\"\"; },\n            [&](const std::vector<bool> &vb) { oss << vec_to_string(vb); },\n            [&](const std::vector<int32_t> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<int8_t> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<int16_t> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<uint32_t> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<int64_t> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<uint64_t> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<float> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<double> &v) { oss << vec_to_string(v); },\n            [&](const std::vector<std::string> &v) {\n              oss << \"[\";\n              for (size_t i = 0; i < v.size(); ++i) {\n                if (i > 0) oss << \", \";\n                oss << \"\\\"\" << v[i] << \"\\\"\";\n              }\n              oss << \"]\";\n            },\n            [&](const std::vector<float16_t> &v) {\n              oss << \"[\";\n              for (size_t i = 0; i < v.size(); ++i) {\n                if (i > 0) oss << \", \";\n                oss << static_cast<float>(v[i]);  // print in float\n              }\n              oss << \"]\";\n            },\n            [&](const std::pair<std::vector<uint32_t>, std::vector<float>> &p) {\n              oss << \"{first:\" << vec_to_string(p.first)\n                  << \", second:\" << vec_to_string(p.second) << \"}\";\n            },\n            [&](const std::pair<std::vector<uint32_t>, std::vector<float16_t>>\n                    &p) {\n              oss << \"{first:\" << vec_to_string(p.first) << \", second:[\";\n              for (size_t i = 0; i < p.second.size(); ++i) {\n                if (i > 0) oss << \", \";\n                oss << static_cast<float>(p.second[i]);\n              }\n              oss << \"]}\";\n            }},\n        val);\n  }\n  oss << \"}\";\n  return oss.str();\n}\n\nstruct Doc::ValueEqual {\n  template <typename T, typename U>\n  bool operator()(const T &, const U &) const {\n    return false;\n  }\n\n  template <typename T>\n  bool operator()(const T &a, const T &b) const {\n    return a == b;\n  }\n\n  bool operator()(float a, float b) const {\n    return std::fabs(a - b) < 1e-6f;\n  }\n\n  bool operator()(double a, double b) const {\n    return std::fabs(a - b) < 1e-9;\n  }\n\n  bool operator()(const std::vector<float16_t> &a,\n                  const std::vector<float16_t> &b) const {\n    if (a.size() != b.size()) return false;\n    for (size_t i = 0; i < a.size(); ++i)\n      if (std::fabs(static_cast<float>(a[i]) - static_cast<float>(b[i])) >=\n          1e-3f)\n        return false;\n    return true;\n  }\n\n  bool operator()(const std::vector<float> &a,\n                  const std::vector<float> &b) const {\n    if (a.size() != b.size()) return false;\n    for (size_t i = 0; i < a.size(); ++i)\n      if (std::fabs(a[i] - b[i]) >= 1e-6f) return false;\n    return true;\n  }\n\n  bool operator()(const std::vector<double> &a,\n                  const std::vector<double> &b) const {\n    if (a.size() != b.size()) return false;\n    for (size_t i = 0; i < a.size(); ++i)\n      if (std::fabs(a[i] - b[i]) >= 1e-9) return false;\n    return true;\n  }\n};\n\nbool Doc::operator==(const Doc &other) const {\n  // Compare basic fields\n  if (pk_ != other.pk_) {\n    return false;\n  }\n\n  // Compare fields map sizes\n  if (fields_.size() != other.fields_.size()) {\n    return false;\n  }\n\n  // Compare each field\n  for (const auto &pair : fields_) {\n    const auto &field_name = pair.first;\n    const auto &field_value = pair.second;\n\n    auto it = other.fields_.find(field_name);\n    if (it == other.fields_.end()) {\n      return false;\n    }\n\n    // Compare variant values\n    if (field_value.index() != it->second.index()) {\n      return false;\n    }\n\n    // Use visitor to compare the actual values\n    if (!std::visit(ValueEqual{}, field_value, it->second)) return false;\n  }\n\n  return true;\n}\n\nStatus VectorQuery::validate(const FieldSchema *schema) const {\n  if ((uint32_t)topk_ > kMaxQueryTopk) {\n    return Status::InvalidArgument(\"query validate failed: topk[\", topk_,\n                                   \"] is too large, max is \", kMaxQueryTopk);\n  }\n  if (output_fields_.has_value() &&\n      output_fields_->size() > kMaxOutputFieldSize) {\n    return Status::InvalidArgument(\n        \"query validate failed: output_fields is too large, max is \",\n        kMaxOutputFieldSize);\n  }\n\n  if (schema == nullptr) {\n    // support query with vector\n    if (query_vector_.empty() && query_sparse_indices_.empty()) {\n      return Status::OK();\n    }\n\n    return Status::InvalidArgument(\"query validate failed:  vector_field[\",\n                                   field_name_,\n                                   \"] not defined in the collection schema\");\n  }\n  // validate dense/sparse vector\n  if (schema->is_dense_vector()) {\n    // validate dimension\n    auto dim = schema->dimension();\n    switch (schema->data_type()) {\n      case DataType::VECTOR_FP16:\n        if (dim * sizeof(float16_t) != query_vector_.size()) {\n          return Status::InvalidArgument(\n              \"query validate failed: dimension is invalid\");\n        }\n        break;\n      case DataType::VECTOR_FP32:\n        if (dim * sizeof(float) != query_vector_.size()) {\n          return Status::InvalidArgument(\n              \"query validate failed: dimension is invalid\");\n        }\n        break;\n      case DataType::VECTOR_FP64:\n        if (dim * sizeof(double) != query_vector_.size()) {\n          return Status::InvalidArgument(\n              \"query validate failed: dimension is invalid\");\n        }\n        break;\n      case DataType::VECTOR_INT8:\n        if (dim * sizeof(int8_t) != query_vector_.size()) {\n          return Status::InvalidArgument(\n              \"query validate failed: dimension is invalid\");\n        }\n        break;\n      case DataType::VECTOR_INT16:\n      case DataType::VECTOR_INT4:\n      case DataType::VECTOR_BINARY32:\n      case DataType::VECTOR_BINARY64:\n        return Status::NotSupported(\n            \"query validate failed: unsupported dense vector type\");\n      default:\n        return Status::InvalidArgument(\n            \"query validate failed: field is not dense vector\");\n    }\n  } else if (schema->is_sparse_vector()) {\n    // validate sparse indices size\n    if (query_sparse_indices_.size() > kSparseMaxDimSize * sizeof(uint32_t)) {\n      return Status::InvalidArgument(\n          \"query validate failed: the number of sparse indices exceeds the \"\n          \"maximum limit \",\n          kSparseMaxDimSize);\n    }\n  } else {\n    return Status::InvalidArgument(\n        \"query validate failed: field is not vector\");\n  }\n  return Status::OK();\n}\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/common/id_map.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"id_map.h\"\n#include <zvec/ailego/logger/logger.h>\n#include \"db/common/constants.h\"\n\n\nnamespace zvec {\n\n\nStatus IDMap::open(const std::string &working_dir, bool create_if_missing,\n                   bool read_only) {\n  if (opened_) {\n    LOG_ERROR(\"IDMap is already opened\");\n    return Status::InternalError();\n  }\n\n  Status s;\n  if (FILE::IsExist(working_dir)) {\n    if (!FILE::IsDirectory(working_dir)) {\n      LOG_ERROR(\"IDMap path[%s] is not a directory\", working_dir.c_str());\n      return Status::InvalidArgument();\n    }\n    s = rocksdb_context_.open(working_dir, read_only);\n  } else {\n    if (!create_if_missing) {\n      LOG_ERROR(\"IDMap path[%s] does not exist\", working_dir.c_str());\n      return Status::NotFound();\n    }\n    s = rocksdb_context_.create(working_dir);\n  }\n  if (s.ok()) {\n    LOG_INFO(\"Opened IDMap[%s]\", working_dir.c_str());\n    working_dir_ = working_dir;\n    opened_ = true;\n  } else {\n    LOG_ERROR(\"Failed to open IDMap[%s]\", working_dir.c_str());\n  }\n  return s;\n}\n\n\nIDMap::Ptr IDMap::CreateAndOpen(const std::string &collection_name,\n                                const std::string &working_dir,\n                                bool create_if_missing, bool read_only) {\n  IDMap::Ptr id_map = std::make_shared<IDMap>(collection_name);\n  if (id_map->open(working_dir, create_if_missing, read_only).ok()) {\n    return id_map;\n  } else {\n    return nullptr;\n  }\n}\n\n\nStatus IDMap::close() {\n  if (!opened_) {\n    return Status::OK();\n  }\n\n  Status status = rocksdb_context_.close();\n  if (status.ok()) {\n    LOG_INFO(\"Closed IDMap[%s]\", working_dir_.c_str());\n  } else {\n    LOG_ERROR(\"Failed to close IDMap[%s]\", working_dir_.c_str());\n  }\n  return status;\n}\n\n\nStatus IDMap::flush() {\n  if (!opened_) {\n    return Status::InternalError();\n  }\n\n  auto s = rocksdb_context_.flush();\n  if (s.ok()) {\n    LOG_INFO(\"Flushed IDMap[%s]\", working_dir_.c_str());\n  } else {\n    LOG_ERROR(\"Failed to flush IDMap[%s]\", working_dir_.c_str());\n  }\n  return s;\n}\n\n\nStatus IDMap::upsert(const std::string &key, uint64_t doc_id) {\n  if (!opened_) {\n    return Status::InternalError();\n  }\n\n  rocksdb::Slice value((const char *)&doc_id, sizeof(uint64_t));\n  auto s = rocksdb_context_.db_->Put(rocksdb_context_.write_opts_, key, value);\n  if (s.ok()) {\n    return Status::OK();\n  } else {\n    LOG_ERROR(\"Failed to put [%s, %zu] into IDMap[%s], code[%d], reason[%s]\",\n              key.c_str(), (size_t)doc_id, working_dir_.c_str(), s.code(),\n              s.ToString().c_str());\n    return Status::InternalError();\n  }\n}\n\n\nvoid IDMap::remove(const std::string &key) {\n  rocksdb_context_.db_->Delete(rocksdb_context_.write_opts_, key);\n}\n\n\nbool IDMap::has(const std::string &key, uint64_t *doc_id) const {\n  std::string value;\n  auto s = rocksdb_context_.db_->Get(rocksdb_context_.read_opts_, key, &value);\n  if (s.ok()) {\n    if (doc_id) {\n      *doc_id = *(uint64_t *)(value.data());\n    }\n    return true;\n  } else {\n    if (doc_id) {\n      *doc_id = INVALID_DOC_ID;\n    }\n    return false;\n  }\n}\n\n\nStatus IDMap::multi_get(const std::vector<std::string> &keys,\n                        std::vector<uint64_t> *doc_ids) const {\n  if (keys.empty()) {\n    doc_ids->clear();\n    return Status::InvalidArgument();\n  }\n\n  std::vector<rocksdb::Slice> slice_keys(keys.begin(), keys.end());\n  std::vector<rocksdb::PinnableSlice> pinnable_values;\n  pinnable_values.resize(keys.size());\n  std::vector<rocksdb::Status> statuses;\n  statuses.resize(keys.size());\n\n  auto db = rocksdb_context_.db_.get();\n\n  db->MultiGet(rocksdb_context_.read_opts_, db->DefaultColumnFamily(),\n               slice_keys.size(), slice_keys.data(), pinnable_values.data(),\n               statuses.data(), false);\n\n  doc_ids->resize(keys.size());\n  for (size_t i = 0; i < keys.size(); i++) {\n    if (statuses[i].ok()) {\n      (*doc_ids)[i] = *(uint64_t *)(pinnable_values[i].data());\n    } else if (statuses[i].code() == rocksdb::Status::kNotFound) {\n      (*doc_ids)[i] = INVALID_DOC_ID;\n    } else {\n      LOG_ERROR(\"Failed to get key[%s] from IDMap[%s], code[%d], reason[%s]\",\n                keys[i].c_str(), working_dir_.c_str(), statuses[i].code(),\n                statuses[i].ToString().c_str());\n      return Status::InternalError();\n    }\n  }\n\n  return Status::OK();\n}\n\n\nsize_t IDMap::storage_size_in_bytes() {\n  return rocksdb_context_.sst_file_size();\n}\n\n\nsize_t IDMap::count() {\n  return rocksdb_context_.count();\n}\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/common/id_map.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <vector>\n#include <zvec/ailego/io/file.h>\n#include <zvec/db/status.h>\n#include \"db/common/rocksdb_context.h\"\n\n\nnamespace zvec {\n\n\nclass IDMap {\n public:\n  using Ptr = std::shared_ptr<IDMap>;\n\n  explicit IDMap(std::string collection_name)\n      : collection_name_(std::move(collection_name)) {};\n\n  ~IDMap() {\n    if (opened_) {\n      close();\n    }\n  }\n\n  static Ptr CreateAndOpen(const std::string &collection_name,\n                           const std::string &working_dir,\n                           bool create_if_missing, bool read_only);\n\n\n private:\n  IDMap(const IDMap &) = delete;\n  IDMap &operator=(const IDMap &) = delete;\n  IDMap &operator=(IDMap &&) = delete;\n\n\n public:\n  Status open(const std::string &working_dir, bool create_if_missing,\n              bool read_only);\n\n  Status close();\n\n  Status create_snapshot(const std::string &snapshot_dir);\n\n  Status flush();\n\n  Status upsert(const std::string &key, uint64_t doc_id);\n\n  void remove(const std::string &key);\n\n  bool has(const std::string &key, uint64_t *doc_id = nullptr) const;\n\n  Status multi_get(const std::vector<std::string> &keys,\n                   std::vector<uint64_t> *doc_ids) const;\n\n  size_t storage_size_in_bytes();\n\n  size_t count();\n\n\n  const std::string &collection_name() const {\n    return collection_name_;\n  }\n\n  const std::string &working_dir() const {\n    return working_dir_;\n  }\n\n\n private:\n  using FILE = ailego::File;\n\n\n  const std::string collection_name_{};\n  std::string working_dir_{};\n\n  RocksdbContext rocksdb_context_{};\n  bool opened_{false};\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/index_filter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n#include <functional>\n#include <memory>\n\n\nnamespace zvec {\n\n\nclass IndexFilter {\n public:\n  using Ptr = std::shared_ptr<IndexFilter>;\n\n  IndexFilter() = default;\n\n  virtual ~IndexFilter() = default;\n\n  IndexFilter(const IndexFilter &) = delete;\n\n  IndexFilter &operator=(const IndexFilter &) = delete;\n\n  /**\n   * @return true if the document is filtered (should be excluded)\n   * @return false if the document is not filtered (should be included)\n   */\n  virtual bool is_filtered(uint64_t id) const = 0;\n};\n\nclass EasyIndexFilter : public IndexFilter {\n public:\n  using FilterFunction = std::function<bool(uint64_t)>;\n\n  /**\n   * Create an IndexFilter::Ptr from a lambda expression or function\n   * @param filter_func A function that takes a uint64_t id and returns true\n   *                    if the document should be filtered (excluded)\n   */\n  static IndexFilter::Ptr Create(FilterFunction filter_func) {\n    return std::make_shared<EasyIndexFilter>(std::move(filter_func));\n  }\n\n  /**\n   * Constructor that takes a filter function\n   * @param filter_func A function that takes a uint64_t id and returns true\n   *                    if the document should be filtered (excluded)\n   */\n  explicit EasyIndexFilter(FilterFunction filter_func)\n      : filter_func_(std::move(filter_func)) {}\n\n  /**\n   * Check if a document should be filtered\n   * @param id The document ID\n   * @return true if the document should be filtered (excluded)\n   * @return false if the document should not be filtered (included)\n   */\n  bool is_filtered(uint64_t id) const override {\n    return filter_func_(id);\n  }\n\n private:\n  FilterFunction filter_func_;\n};\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/index_params.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <sstream>\n#include <zvec/db/index_params.h>\n#include \"type_helper.h\"\n\nnamespace zvec {\n\nstd::string InvertIndexParams::to_string() const {\n  std::ostringstream oss;\n  oss << \"InvertIndexParams{\"\n      << \"enable_range_optimization:\"\n      << (enable_range_optimization_ ? \"true\" : \"false\")\n      << \", enable_extended_wildcard:\"\n      << (enable_extended_wildcard_ ? \"true\" : \"false\") << \"}\";\n  return oss.str();\n}\n\nstd::string VectorIndexParams::vector_index_params_to_string(\n    const std::string &class_name, MetricType metric_type,\n    QuantizeType quantize_type) const {\n  std::ostringstream oss;\n  oss << class_name << \"{\"\n      << \"metric:\" << MetricTypeCodeBook::AsString(metric_type)\n      << \",quantize:\" << QuantizeTypeCodeBook::AsString(quantize_type);\n  return oss.str();\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/meta.h",
    "content": "\n// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <algorithm>\n#include <cstdint>\n#include <memory>\n#include <optional>\n#include <set>\n#include <sstream>\n#include <vector>\n#include \"db/common/utils.h\"\n#include \"db/index/common/type_helper.h\"\n\nnamespace zvec {\n\nusing SegmentID = uint32_t;\nusing BlockID = uint32_t;\n\nclass BlockMeta {\n public:\n  using Ptr = std::shared_ptr<BlockMeta>;\n\n public:\n  BlockMeta() = default;\n\n  BlockMeta(uint32_t id, BlockType type, uint64_t min_doc_id,\n            uint64_t max_doc_id, uint32_t doc_count,\n            const std::vector<std::string> &columns)\n      : id_(id),\n        type_(type),\n        min_doc_id_(min_doc_id),\n        max_doc_id_(max_doc_id),\n        doc_count_(doc_count),\n        columns_(columns) {}\n\n  BlockMeta(uint32_t id, BlockType type, uint64_t min_doc_id,\n            uint64_t max_doc_id)\n      : id_(id),\n        type_(type),\n        min_doc_id_(min_doc_id),\n        max_doc_id_(max_doc_id) {}\n  uint32_t id() const {\n    return id_;\n  }\n\n  void set_id(uint32_t id) {\n    id_ = id;\n  }\n\n  BlockType type() const {\n    return type_;\n  }\n\n  void set_type(BlockType type) {\n    type_ = type;\n  }\n\n  uint64_t min_doc_id() const {\n    return min_doc_id_;\n  }\n\n  void set_min_doc_id(uint64_t min_doc_id) {\n    min_doc_id_ = min_doc_id;\n  }\n\n  uint64_t max_doc_id() const {\n    return max_doc_id_;\n  }\n\n  void set_max_doc_id(uint64_t max_doc_id) {\n    max_doc_id_ = max_doc_id;\n  }\n\n  uint32_t doc_count() const {\n    return doc_count_;\n  }\n\n  void set_doc_count(uint32_t doc_count) {\n    doc_count_ = doc_count;\n  }\n\n  const std::vector<std::string> &columns() const {\n    return columns_;\n  }\n\n  void set_columns(const std::vector<std::string> &columns) {\n    columns_ = columns;\n  }\n\n  void add_column(const std::string &column) {\n    columns_.push_back(column);\n  }\n\n  void del_column(const std::string &column) {\n    columns_.erase(std::remove(columns_.begin(), columns_.end(), column),\n                   columns_.end());\n  }\n\n  bool contain_column(const std::string &column) const {\n    return std::find(columns_.begin(), columns_.end(), column) !=\n           columns_.end();\n  }\n\n public:\n  bool operator==(const BlockMeta &other) const {\n    return id_ == other.id_ && type_ == other.type_ &&\n           min_doc_id_ == other.min_doc_id_ &&\n           max_doc_id_ == other.max_doc_id_ && columns_ == other.columns_ &&\n           doc_count_ == other.doc_count_;\n  }\n\n  std::string to_string() const {\n    std::ostringstream oss;\n    oss << \"BlockMeta{\"\n        << \"id:\" << id_ << \",type:\" << BlockTypeCodeBook::AsString(type_)\n        << \",min_doc_id:\" << min_doc_id_ << \",max_doc_id:\" << max_doc_id_\n        << \",doc_count:\" << doc_count_ << \",columns:[\";\n\n    for (size_t i = 0; i < columns_.size(); ++i) {\n      if (i > 0) oss << \",\";\n      oss << \"'\" << columns_[i] << \"'\";\n    }\n\n    oss << \"]}\";\n    return oss.str();\n  }\n\n  std::string to_string_formatted(int indent_level = 0) const {\n    std::ostringstream oss;\n    oss << indent(indent_level) << \"BlockMeta{\\n\"\n        << indent(indent_level + 1) << \"id: \" << id_ << \",\\n\"\n        << indent(indent_level + 1)\n        << \"type: \" << BlockTypeCodeBook::AsString(type_) << \",\\n\"\n        << indent(indent_level + 1) << \"min_doc_id: \" << min_doc_id_ << \",\\n\"\n        << indent(indent_level + 1) << \"max_doc_id: \" << max_doc_id_ << \",\\n\"\n        << indent(indent_level + 1) << \"doc_count: \" << doc_count_ << \",\\n\"\n        << indent(indent_level + 1) << \"columns: [\";\n\n    if (!columns_.empty()) {\n      oss << \"\\n\";\n      for (size_t i = 0; i < columns_.size(); ++i) {\n        oss << indent(indent_level + 2) << \"'\" << columns_[i] << \"'\";\n        if (i < columns_.size() - 1) {\n          oss << \",\";\n        }\n        oss << \"\\n\";\n      }\n      oss << indent(indent_level + 1);\n    }\n\n    oss << \"]\\n\" << indent(indent_level) << \"}\";\n    return oss.str();\n  }\n\n public:\n  uint32_t id_{0};\n  BlockType type_{BlockType::UNDEFINED};\n  uint64_t min_doc_id_{0};\n  uint64_t max_doc_id_{0};\n  uint32_t doc_count_{0};\n  std::vector<std::string> columns_{};\n};\n\nclass SegmentMeta {\n public:\n  using Ptr = std::shared_ptr<SegmentMeta>;\n\n public:\n  SegmentMeta() {};\n\n  explicit SegmentMeta(SegmentID id) : id_(id) {}\n\n  void set_id(SegmentID id) {\n    id_ = id;\n  }\n\n  SegmentID id() const {\n    return id_;\n  }\n\n  void add_persisted_block(const BlockMeta &block) {\n    persisted_blocks_.push_back(block);\n  }\n\n  void set_persisted_blocks(const std::vector<BlockMeta> &blocks) {\n    persisted_blocks_ = blocks;\n  }\n\n  bool remove_block(BlockID block_id) {\n    auto it = std::remove_if(\n        persisted_blocks_.begin(), persisted_blocks_.end(),\n        [block_id](const BlockMeta &block) { return block.id() == block_id; });\n    bool found = (it != persisted_blocks_.end());\n    persisted_blocks_.erase(it, persisted_blocks_.end());\n    return found;\n  }\n\n  void remove_vector_persisted_block(const std::string &column, bool quantize) {\n    std::vector<BlockMeta> new_persisted_blocks;\n    for (auto &b : persisted_blocks_) {\n      if (quantize) {\n        if (!(b.type() == BlockType::VECTOR_INDEX_QUANTIZE &&\n              b.contain_column(column))) {\n          new_persisted_blocks.push_back(b);\n        }\n      } else {\n        if (!(b.type() == BlockType::VECTOR_INDEX &&\n              b.contain_column(column))) {\n          new_persisted_blocks.push_back(b);\n        }\n      }\n    }\n    persisted_blocks_ = new_persisted_blocks;\n  }\n\n  void remove_vector_persisted_block(const std::string &column) {\n    std::vector<BlockMeta> new_persisted_blocks;\n    for (auto &b : persisted_blocks_) {\n      if (!b.contain_column(column)) {\n        new_persisted_blocks.push_back(b);\n      }\n    }\n    persisted_blocks_ = new_persisted_blocks;\n  }\n\n  void remove_scalar_index_block() {\n    std::vector<BlockMeta> new_persisted_blocks;\n    for (auto &b : persisted_blocks_) {\n      if (b.type() != BlockType::SCALAR_INDEX) {\n        new_persisted_blocks.push_back(b);\n      }\n    }\n    persisted_blocks_ = new_persisted_blocks;\n  }\n\n  void set_writing_forward_block(const BlockMeta &writing_forward_block) {\n    writing_forward_block_ = writing_forward_block;\n  }\n\n  void remove_writing_forward_block() {\n    writing_forward_block_ = std::nullopt;\n  }\n\n  void update_max_doc_id(uint64_t max_doc_id) {\n    if (writing_forward_block_.has_value()) {\n      writing_forward_block_->set_max_doc_id(max_doc_id);\n    }\n  }\n\n  uint64_t min_doc_id() const {\n    if (persisted_blocks_.empty()) {\n      if (writing_forward_block_.has_value()) {\n        return writing_forward_block_->min_doc_id();\n      }\n      return 0;\n    }\n    uint64_t min_doc_id{std::numeric_limits<uint64_t>::max()};\n    for (const auto &block : persisted_blocks_) {\n      if (block.type() == BlockType::SCALAR) {\n        min_doc_id = std::min(min_doc_id, block.min_doc_id());\n      }\n    }\n    if (min_doc_id == std::numeric_limits<uint64_t>::max() &&\n        writing_forward_block_.has_value()) {\n      min_doc_id = writing_forward_block_->min_doc_id();\n    }\n    return min_doc_id;\n  }\n\n  uint64_t max_doc_id() const {\n    if (writing_forward_block_.has_value() &&\n        writing_forward_block_->doc_count_ != 0) {\n      return writing_forward_block_->max_doc_id();\n    }\n    uint64_t max_doc_id{0};\n    for (const auto &block : persisted_blocks_) {\n      if (block.type() == BlockType::SCALAR) {\n        max_doc_id = std::max(max_doc_id, block.max_doc_id());\n      }\n    }\n    return max_doc_id;\n  }\n\n  uint32_t doc_count() const {\n    uint32_t count{0};\n    if (writing_forward_block_.has_value()) {\n      count = writing_forward_block_->doc_count();\n    }\n    for (const auto &block : persisted_blocks_) {\n      if (block.type() == BlockType::SCALAR) {\n        count += block.doc_count();\n      }\n    }\n    return count;\n  }\n\n  std::vector<BlockMeta> &persisted_blocks() {\n    return persisted_blocks_;\n  }\n\n  const std::vector<BlockMeta> &persisted_blocks() const {\n    return persisted_blocks_;\n  }\n\n  std::optional<BlockMeta> &writing_forward_block() {\n    return writing_forward_block_;\n  }\n\n  const std::optional<BlockMeta> &writing_forward_block() const {\n    return writing_forward_block_;\n  }\n\n  bool has_writing_forward_block() const {\n    return writing_forward_block_.has_value();\n  }\n\n  bool vector_indexed(const std::string &field) const {\n    return indexed_vector_fields_.count(field) > 0;\n  }\n\n  void add_indexed_vector_field(const std::string &field) {\n    indexed_vector_fields_.insert(field);\n  }\n\n  std::set<std::string> indexed_vector_fields() const {\n    return indexed_vector_fields_;\n  }\n\n  void set_indexed_vector_fields(const std::set<std::string> &fields) {\n    indexed_vector_fields_ = fields;\n  }\n\n public:\n  bool operator==(const SegmentMeta &other) const {\n    return id_ == other.id_ && persisted_blocks_ == other.persisted_blocks_ &&\n           writing_forward_block_ == other.writing_forward_block_ &&\n           indexed_vector_fields_ == other.indexed_vector_fields_;\n  }\n\n  bool operator!=(const SegmentMeta &other) const {\n    return !(*this == other);\n  }\n\n  // Add these methods to SegmentMeta class in meta.h\n\n  std::string to_string() const {\n    std::ostringstream oss;\n    oss << \"SegmentMeta{\"\n        << \"id:\" << id_ << \",persisted_blocks:[\";\n\n    for (size_t i = 0; i < persisted_blocks_.size(); ++i) {\n      if (i > 0) oss << \",\";\n      oss << persisted_blocks_[i].to_string();\n    }\n\n    oss << \"],writing_forward_block:\";\n    if (writing_forward_block_.has_value()) {\n      oss << writing_forward_block_->to_string();\n    } else {\n      oss << \"null\";\n    }\n\n    oss << \",indexed_vector_fields:[\";\n\n    size_t i = 0;\n    for (const auto &field : indexed_vector_fields_) {\n      if (i > 0) oss << \",\";\n      oss << \"'\" << field << \"'\";\n      ++i;\n    }\n\n    oss << \"]}\";\n    return oss.str();\n  }\n\n  std::string to_string_formatted(int indent_level = 0) const {\n    std::ostringstream oss;\n    oss << indent(indent_level) << \"SegmentMeta{\\n\"\n        << indent(indent_level + 1) << \"id: \" << id_ << \",\\n\"\n        << indent(indent_level + 1) << \"persisted_blocks: [\\n\";\n\n    for (size_t i = 0; i < persisted_blocks_.size(); ++i) {\n      oss << persisted_blocks_[i].to_string_formatted(indent_level + 2);\n      if (i < persisted_blocks_.size() - 1) {\n        oss << \",\";\n      }\n      oss << \"\\n\";\n    }\n\n    oss << \"\\n\"\n        << indent(indent_level + 1) << \"],\\n\"\n        << indent(indent_level + 1) << \"writing_forward_block: \";\n\n    if (writing_forward_block_.has_value()) {\n      oss << \"\\n\"\n          << writing_forward_block_->to_string_formatted(indent_level + 2)\n          << \"\\n\";\n    } else {\n      oss << \"null\\n\";\n    }\n\n    oss << indent(indent_level + 1) << \"indexed_vector_fields: [\";\n\n    size_t i = 0;\n    for (const auto &field : indexed_vector_fields_) {\n      if (i > 0) oss << \",\";\n      oss << \"'\" << field << \"'\";\n      ++i;\n    }\n\n    oss << \"]\\n\" << indent(indent_level) << \"}\";\n    return oss.str();\n  }\n\n private:\n  SegmentID id_{0};\n  std::vector<BlockMeta> persisted_blocks_;\n  std::optional<BlockMeta> writing_forward_block_;\n  std::set<std::string> indexed_vector_fields_;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/proto_converter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"proto_converter.h\"\n\nnamespace zvec {\n\nHnswIndexParams::OPtr ProtoConverter::FromPb(\n    const proto::HnswIndexParams &params_pb) {\n  auto params = std::make_shared<HnswIndexParams>(\n      MetricTypeCodeBook::Get(params_pb.base().metric_type()), params_pb.m(),\n      params_pb.ef_construction(),\n      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));\n\n  return params;\n}\n\nproto::HnswIndexParams ProtoConverter::ToPb(const HnswIndexParams *params) {\n  proto::HnswIndexParams params_pb;\n  params_pb.mutable_base()->set_metric_type(\n      MetricTypeCodeBook::Get(params->metric_type()));\n  params_pb.mutable_base()->set_quantize_type(\n      QuantizeTypeCodeBook::Get(params->quantize_type()));\n  params_pb.set_ef_construction(params->ef_construction());\n  params_pb.set_m(params->m());\n  return params_pb;\n}\n\n// HnswRabitqIndexParams\nHnswRabitqIndexParams::OPtr ProtoConverter::FromPb(\n    const proto::HnswRabitqIndexParams &params_pb) {\n  auto params = std::make_shared<HnswRabitqIndexParams>(\n      MetricTypeCodeBook::Get(params_pb.base().metric_type()),\n      params_pb.total_bits(), params_pb.num_clusters(), params_pb.m(),\n      params_pb.ef_construction(), params_pb.sample_count());\n\n  return params;\n}\n\nproto::HnswRabitqIndexParams ProtoConverter::ToPb(\n    const HnswRabitqIndexParams *params) {\n  proto::HnswRabitqIndexParams params_pb;\n  params_pb.mutable_base()->set_metric_type(\n      MetricTypeCodeBook::Get(params->metric_type()));\n  params_pb.mutable_base()->set_quantize_type(\n      QuantizeTypeCodeBook::Get(params->quantize_type()));\n  params_pb.set_m(params->m());\n  params_pb.set_ef_construction(params->ef_construction());\n  params_pb.set_total_bits(params->total_bits());\n  params_pb.set_num_clusters(params->num_clusters());\n  params_pb.set_sample_count(params->sample_count());\n  return params_pb;\n}\n\n// FlatIndexParams\nFlatIndexParams::OPtr ProtoConverter::FromPb(\n    const proto::FlatIndexParams &params_pb) {\n  return std::make_shared<FlatIndexParams>(\n      MetricTypeCodeBook::Get(params_pb.base().metric_type()),\n      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));\n}\n\nproto::FlatIndexParams ProtoConverter::ToPb(const FlatIndexParams *params) {\n  proto::FlatIndexParams params_pb;\n  params_pb.mutable_base()->set_metric_type(\n      MetricTypeCodeBook::Get(params->metric_type()));\n  params_pb.mutable_base()->set_quantize_type(\n      QuantizeTypeCodeBook::Get(params->quantize_type()));\n  return params_pb;\n}\n\n// IVFIndexParams\nIVFIndexParams::OPtr ProtoConverter::FromPb(\n    const proto::IVFIndexParams &params_pb) {\n  return std::make_shared<IVFIndexParams>(\n      MetricTypeCodeBook::Get(params_pb.base().metric_type()),\n      params_pb.n_list(), params_pb.n_iters(), params_pb.use_soar(),\n      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));\n}\n\nproto::IVFIndexParams ProtoConverter::ToPb(const IVFIndexParams *params) {\n  proto::IVFIndexParams params_pb;\n  params_pb.mutable_base()->set_metric_type(\n      MetricTypeCodeBook::Get(params->metric_type()));\n  params_pb.mutable_base()->set_quantize_type(\n      QuantizeTypeCodeBook::Get(params->quantize_type()));\n  params_pb.set_n_list(params->n_list());\n  params_pb.set_n_iters(params->n_iters());\n  params_pb.set_use_soar(params->use_soar());\n  return params_pb;\n}\n\n// InvertIndexParams\nInvertIndexParams::OPtr ProtoConverter::FromPb(\n    const proto::InvertIndexParams &params_pb) {\n  auto params = std::make_shared<InvertIndexParams>(\n      params_pb.enable_range_optimization());\n\n  return params;\n}\n\nproto::InvertIndexParams ProtoConverter::ToPb(const InvertIndexParams *params) {\n  proto::InvertIndexParams params_pb;\n  params_pb.set_enable_range_optimization(params->enable_range_optimization());\n  return params_pb;\n}\n\n// FieldSchema\nFieldSchema::Ptr ProtoConverter::FromPb(const proto::FieldSchema &schema_pb) {\n  auto schema = std::make_shared<FieldSchema>();\n\n  schema->set_name(schema_pb.name());\n  schema->set_data_type(DataTypeCodeBook::Get(schema_pb.data_type()));\n  schema->set_dimension(schema_pb.dimension());\n  schema->set_nullable(schema_pb.nullable());\n  if (schema_pb.has_index_params()) {\n    schema->set_index_params(ProtoConverter::FromPb(schema_pb.index_params()));\n  }\n  return schema;\n}\nproto::FieldSchema ProtoConverter::ToPb(const FieldSchema &schema) {\n  proto::FieldSchema schema_pb;\n\n  schema_pb.set_name(schema.name());\n  schema_pb.set_data_type(DataTypeCodeBook::Get(schema.data_type()));\n  schema_pb.set_dimension(schema.dimension());\n  schema_pb.set_nullable(schema.nullable());\n  auto index_params = schema.index_params();\n  if (index_params) {\n    auto index_params_pb = schema_pb.mutable_index_params();\n    index_params_pb->MergeFrom(ProtoConverter::ToPb(index_params.get()));\n  }\n  return schema_pb;\n}\n\n// CollectionSchema\nCollectionSchema::Ptr ProtoConverter::FromPb(\n    const proto::CollectionSchema &schema_pb) {\n  CollectionSchema::Ptr schema = std::make_shared<CollectionSchema>();\n\n  schema->set_name(schema_pb.name());\n\n  for (auto &column_schema_pb : schema_pb.fields()) {\n    FieldSchema::Ptr column_schema = ProtoConverter::FromPb(column_schema_pb);\n    schema->add_field(column_schema);\n  }\n\n  schema->set_max_doc_count_per_segment(schema_pb.max_doc_count_per_segment());\n\n  return schema;\n}\n\nproto::CollectionSchema ProtoConverter::ToPb(const CollectionSchema &schema) {\n  proto::CollectionSchema schema_pb;\n  schema_pb.set_name(schema.name());\n  for (auto &column_schema : schema.fields()) {\n    proto::FieldSchema *column_schema_pb = schema_pb.add_fields();\n    column_schema_pb->MergeFrom(ProtoConverter::ToPb(*column_schema));\n  }\n\n  schema_pb.set_max_doc_count_per_segment(schema.max_doc_count_per_segment());\n\n  return schema_pb;\n}\n\nIndexParams::Ptr ProtoConverter::FromPb(const proto::IndexParams &params_pb) {\n  if (params_pb.has_hnsw()) {\n    return ProtoConverter::FromPb(params_pb.hnsw());\n  } else if (params_pb.has_invert()) {\n    return ProtoConverter::FromPb(params_pb.invert());\n  } else if (params_pb.has_ivf()) {\n    return ProtoConverter::FromPb(params_pb.ivf());\n  } else if (params_pb.has_flat()) {\n    return ProtoConverter::FromPb(params_pb.flat());\n  } else if (params_pb.has_hnsw_rabitq()) {\n    return ProtoConverter::FromPb(params_pb.hnsw_rabitq());\n  }\n\n  return nullptr;\n}\n\n// BlockMeta\nBlockMeta::Ptr ProtoConverter::FromPb(const proto::BlockMeta &meta_pb) {\n  auto block_meta = std::make_shared<BlockMeta>();\n\n  block_meta->set_id(meta_pb.block_id());\n  block_meta->set_type(BlockTypeCodeBook::Get(meta_pb.block_type()));\n  block_meta->set_min_doc_id(meta_pb.min_doc_id());\n  block_meta->set_max_doc_id(meta_pb.max_doc_id());\n  block_meta->set_doc_count(meta_pb.doc_count());\n  for (auto &column : meta_pb.columns()) {\n    block_meta->add_column(column);\n  }\n\n  return block_meta;\n}\n\nproto::IndexParams ProtoConverter::ToPb(const IndexParams *params) {\n  proto::IndexParams params_pb;\n\n  switch (params->type()) {\n    case IndexType::INVERT: {\n      auto invert_params = dynamic_cast<const InvertIndexParams *>(params);\n      if (invert_params) {\n        params_pb.mutable_invert()->CopyFrom(\n            ProtoConverter::ToPb(invert_params));\n      }\n      break;\n    }\n    case IndexType::HNSW: {\n      auto hnsw_params = dynamic_cast<const HnswIndexParams *>(params);\n      if (hnsw_params) {\n        params_pb.mutable_hnsw()->CopyFrom(ProtoConverter::ToPb(hnsw_params));\n      }\n      break;\n    }\n    case IndexType::IVF: {\n      auto ivf_params = dynamic_cast<const IVFIndexParams *>(params);\n      if (ivf_params) {\n        params_pb.mutable_ivf()->CopyFrom(ProtoConverter::ToPb(ivf_params));\n      }\n      break;\n    }\n    case IndexType::FLAT: {\n      auto flat_params = dynamic_cast<const FlatIndexParams *>(params);\n      if (flat_params) {\n        params_pb.mutable_flat()->CopyFrom(ProtoConverter::ToPb(flat_params));\n      }\n      break;\n    }\n    case IndexType::HNSW_RABITQ: {\n      auto hnsw_rabitq_params =\n          dynamic_cast<const HnswRabitqIndexParams *>(params);\n      if (hnsw_rabitq_params) {\n        params_pb.mutable_hnsw_rabitq()->CopyFrom(\n            ProtoConverter::ToPb(hnsw_rabitq_params));\n      }\n    }\n    default:\n      break;\n  }\n\n  return params_pb;\n}\n\nproto::BlockMeta ProtoConverter::ToPb(const BlockMeta &meta) {\n  proto::BlockMeta meta_pb;\n  meta_pb.set_block_id(meta.id());\n  meta_pb.set_block_type(BlockTypeCodeBook::Get(meta.type()));\n  meta_pb.set_min_doc_id(meta.min_doc_id());\n  meta_pb.set_max_doc_id(meta.max_doc_id());\n  meta_pb.set_doc_count(meta.doc_count());\n  for (auto &column : meta.columns()) {\n    meta_pb.add_columns(column);\n  }\n\n  return meta_pb;\n}\n\n// SegmentMeta\nSegmentMeta::Ptr ProtoConverter::FromPb(const proto::SegmentMeta &meta_pb) {\n  auto meta = std::make_shared<SegmentMeta>(meta_pb.segment_id());\n\n  auto persisted_blocks = meta_pb.persisted_blocks();\n\n  for (auto &persisted_block_pb : persisted_blocks) {\n    BlockMeta::Ptr persisted_block = ProtoConverter::FromPb(persisted_block_pb);\n    meta->add_persisted_block(*persisted_block);\n  }\n\n  if (meta_pb.has_writing_forward_block()) {\n    meta->set_writing_forward_block(\n        *ProtoConverter::FromPb(meta_pb.writing_forward_block()));\n  }\n\n  auto indexed_vector_fields = meta_pb.indexed_vector_fields();\n  for (auto &indexed_vector_field : indexed_vector_fields) {\n    meta->add_indexed_vector_field(indexed_vector_field);\n  }\n\n  return meta;\n}\n\nproto::SegmentMeta ProtoConverter::ToPb(const SegmentMeta &meta) {\n  proto::SegmentMeta meta_pb;\n  meta_pb.set_segment_id(meta.id());\n\n  auto persisted_blocks = meta.persisted_blocks();\n  for (auto &persisted_block : persisted_blocks) {\n    auto persisted_block_pb = ProtoConverter::ToPb(persisted_block);\n    meta_pb.add_persisted_blocks()->MergeFrom(persisted_block_pb);\n  }\n\n  if (meta.has_writing_forward_block()) {\n    meta_pb.mutable_writing_forward_block()->MergeFrom(\n        ProtoConverter::ToPb(meta.writing_forward_block().value()));\n  }\n\n  auto indexed_vector_fields = meta.indexed_vector_fields();\n  for (auto &field : indexed_vector_fields) {\n    meta_pb.add_indexed_vector_fields(field);\n  }\n\n  return meta_pb;\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/proto_converter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/db/index_params.h>\n#include <zvec/db/schema.h>\n#include \"db/index/common/meta.h\"\n\nnamespace zvec {\n\nstruct ProtoConverter {\n  // HnswIndexParams\n  static HnswIndexParams::OPtr FromPb(const proto::HnswIndexParams &params_pb);\n\n  static proto::HnswIndexParams ToPb(const HnswIndexParams *params);\n\n  // HnswRabitqIndexParams\n  static HnswRabitqIndexParams::OPtr FromPb(\n      const proto::HnswRabitqIndexParams &params_pb);\n  static proto::HnswRabitqIndexParams ToPb(const HnswRabitqIndexParams *params);\n\n  // FlatIndexParams\n  static FlatIndexParams::OPtr FromPb(const proto::FlatIndexParams &params_pb);\n  static proto::FlatIndexParams ToPb(const FlatIndexParams *params);\n\n  // IVFIndexParams\n  static IVFIndexParams::OPtr FromPb(const proto::IVFIndexParams &params_pb);\n  static proto::IVFIndexParams ToPb(const IVFIndexParams *params);\n\n  // InvertIndexParams\n  static InvertIndexParams::OPtr FromPb(\n      const proto::InvertIndexParams &params_pb);\n  static proto::InvertIndexParams ToPb(const InvertIndexParams *params);\n\n  // IndexParams\n  static IndexParams::Ptr FromPb(const proto::IndexParams &params_pb);\n  static proto::IndexParams ToPb(const IndexParams *params);\n\n  // FieldSchema\n  static FieldSchema::Ptr FromPb(const proto::FieldSchema &field_pb);\n  static proto::FieldSchema ToPb(const FieldSchema &field);\n\n  // CollectionSchema\n  static CollectionSchema::Ptr FromPb(const proto::CollectionSchema &schema_pb);\n  static proto::CollectionSchema ToPb(const CollectionSchema &schema);\n\n  // BlockMeta\n  static BlockMeta::Ptr FromPb(const proto::BlockMeta &meta_pb);\n  static proto::BlockMeta ToPb(const BlockMeta &meta);\n\n  // SegmentMeta\n  static SegmentMeta::Ptr FromPb(const proto::SegmentMeta &meta_pb);\n  static proto::SegmentMeta ToPb(const SegmentMeta &meta);\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/schema.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <regex>\n#include <set>\n#include <unordered_map>\n#include <unordered_set>\n#include <zvec/db/index_params.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n#include \"ailego/internal/cpu_features.h\"\n#include \"db/common/constants.h\"\n#include \"db/common/typedef.h\"\n#include \"db/common/utils.h\"\n#include \"db/index/common/type_helper.h\"\n\nnamespace zvec {\n\n#if defined(RABITQ_COMPILED_AVX512)\nconstexpr const int kRabitqCompiledAvx512 = RABITQ_COMPILED_AVX512;\n#else\nconstexpr const int kRabitqCompiledAvx512 = 0;\n#endif\n\nstd::unordered_map<DataType, std::set<QuantizeType>> quantize_type_map = {\n    {DataType::VECTOR_FP32,\n     {QuantizeType::FP16, QuantizeType::INT4, QuantizeType::INT8,\n      QuantizeType::RABITQ}},\n    // {DataType::VECTOR_FP64, {QuantizeType::FP16}},\n    {DataType::SPARSE_VECTOR_FP32, {QuantizeType::FP16}},\n};\n\nstd::unordered_set<DataType> support_dense_vector_type = {\n    DataType::VECTOR_FP32,\n    DataType::VECTOR_FP16,\n    DataType::VECTOR_INT8,\n};\n\nstd::unordered_set<DataType> support_sparse_vector_type = {\n    DataType::SPARSE_VECTOR_FP32,\n    DataType::SPARSE_VECTOR_FP16,\n};\n\nstd::unordered_set<IndexType> support_dense_vector_index = {\n    IndexType::FLAT, IndexType::HNSW, IndexType::HNSW_RABITQ, IndexType::IVF};\n\nstd::unordered_set<IndexType> support_sparse_vector_index = {IndexType::FLAT,\n                                                             IndexType::HNSW};\n\nStatus FieldSchema::validate() const {\n  if (data_type_ == DataType::UNDEFINED) {\n    return Status::InvalidArgument(\"schema validate failed: field[\", name_,\n                                   \"]'s data_type is not defined\");\n  }\n  if (name_.empty()) {\n    return Status::InvalidArgument(\"schema validate failed: field[\", name_,\n                                   \"]'s name is empty\");\n  }\n  if (!std::regex_match(name_, FIELD_NAME_REGEX)) {\n    return Status::InvalidArgument(\n        \"schema validate failed: field[\", name_,\n        \"]'s name cannot pass the regex verification\");\n  }\n  if (is_vector_field()) {\n    auto is_sparse = is_sparse_vector();\n    if (!is_sparse && (dimension_ == 0 || dimension() > kMaxDenseDimSize)) {\n      return Status::InvalidArgument(\"schema validate failed: field[\", name_,\n                                     \"]'s dimension must be in (0,20000]\");\n    }\n\n    if (!is_sparse) {\n      if (support_dense_vector_type.find(data_type_) ==\n          support_dense_vector_type.end()) {\n        return Status::InvalidArgument(\n            \"schema validate failed: dense_vector's data type only \"\n            \"support FP32, \"\n            \"but field[\",\n            name_, \"]'s data type is \", DataTypeCodeBook::AsString(data_type_));\n      }\n    } else {\n      if (support_sparse_vector_type.find(data_type_) ==\n          support_sparse_vector_type.end()) {\n        return Status::InvalidArgument(\n            \"schema validate failed: sparse_vector's data type only \"\n            \"support FP32, \"\n            \"but field[\",\n            name_, \"]'s data type is \", DataTypeCodeBook::AsString(data_type_));\n      }\n    }\n\n    if (index_params_) {\n      auto vector_index_params =\n          std::dynamic_pointer_cast<VectorIndexParams>(index_params_);\n\n      if (is_sparse) {\n        if (support_sparse_vector_index.find(index_params_->type()) ==\n            support_sparse_vector_index.end()) {\n          return Status::InvalidArgument(\n              \"schema validate failed: sparse_vector's index_params only \"\n              \"support FLAT|HNSW index, \"\n              \"but field[\",\n              name_, \"]'s index_type is \",\n              IndexTypeCodeBook::AsString(index_params_->type()));\n        }\n        if (vector_index_params->metric_type() != MetricType::IP) {\n          return Status::InvalidArgument(\n              \"schema validate failed: sparse_vector's index_params only \"\n              \"support IP metric, but \"\n              \"field[\",\n              name_, \"]'s metric is \",\n              MetricTypeCodeBook::AsString(vector_index_params->metric_type()));\n        }\n\n      } else {\n        if (support_dense_vector_index.find(index_params_->type()) ==\n            support_dense_vector_index.end()) {\n          return Status::InvalidArgument(\n              \"schema validate failed: dense_vector's index_params only \"\n              \"support FLAT|HNSW|IVF index, but field[\",\n              name_, \"]'s index_type is \",\n              IndexTypeCodeBook::AsString(index_params_->type()));\n        }\n      }\n\n      if (index_params_->type() == IndexType::HNSW_RABITQ) {\n        if (dimension_ < kMinRabitqDimSize || dimension_ > kMaxRabitqDimSize) {\n          return Status::InvalidArgument(\n              \"schema validate failed: HNSW_RABITQ index only support \"\n              \"dimension in [\",\n              kMinRabitqDimSize, \", \", kMaxRabitqDimSize, \"]\");\n        }\n        if (data_type_ != DataType::VECTOR_FP32) {\n          return Status::InvalidArgument(\n              \"schema validate failed: HNSW_RABITQ index only support FP32 \"\n              \"data types\");\n        }\n        auto metric_type = vector_index_params->metric_type();\n        if (metric_type != MetricType::L2 && metric_type != MetricType::IP &&\n            metric_type != MetricType::COSINE) {\n          return Status::InvalidArgument(\n              \"schema validate failed: HNSW_RABITQ index only support \"\n              \"L2/IP/COSINE metric\");\n        }\n#if !RABITQ_SUPPORTED\n        return Status::NotSupported(\n            \"RabitQ is not supported on this platform (Linux x86_64 only)\");\n#endif\n        auto &flags = zvec::ailego::internal::CpuFeatures::static_flags_;\n        if (!flags.AVX2 && !flags.AVX512F) {\n          return Status::NotSupported(\n              \"RabitQ requires AVX2/AVX512F to be supported\");\n        }\n\n        if (kRabitqCompiledAvx512 && !flags.AVX512F) {\n          return Status::NotSupported(\n              \"RabitQ compiled with AVX512F while runtime does not support\");\n        }\n      }\n\n\n      if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {\n        auto iter = quantize_type_map.find(data_type_);\n        if (iter == quantize_type_map.end()) {\n          return Status::InvalidArgument(\n              \"schema validate failed: \",\n              is_sparse ? \"sparse_vector\" : \"dense_vector\",\n              \"'s index_params of \", DataTypeCodeBook::AsString(data_type_),\n              \" do not support quantize, but field[\", name_,\n              \"]'s quantize_type is \",\n              QuantizeTypeCodeBook::AsString(\n                  vector_index_params->quantize_type()));\n        } else {\n          if (iter->second.find(vector_index_params->quantize_type()) ==\n              iter->second.end()) {\n            return Status::InvalidArgument(\n                \"schema validate failed: \",\n                is_sparse ? \"sparse_vector\" : \"dense_vector\",\n                \"'s index_params of \", DataTypeCodeBook::AsString(data_type_),\n                \" support \", QuantizeTypeCodeBook::AsString(iter->second),\n                \" quantize, but field[\", name_, \"]'s quantize_type is \",\n                QuantizeTypeCodeBook::AsString(\n                    vector_index_params->quantize_type()));\n          }\n        }\n      }\n      if (index_params_->type() == IndexType::IVF &&\n          vector_index_params->metric_type() == MetricType::IP) {\n        if (data_type_ != DataType::VECTOR_FP16 &&\n            data_type_ != DataType::VECTOR_FP32) {\n          return Status::InvalidArgument(\n              \"schema validate failed: IVF index only support FP32/FP16 data \"\n              \"types according to the IP metric\");\n        }\n      }\n      if (vector_index_params->metric_type() == MetricType::COSINE) {\n        if (data_type_ != DataType::VECTOR_FP16 &&\n            data_type_ != DataType::VECTOR_FP32) {\n          return Status::InvalidArgument(\n              \"schema validate failed: cosine metric only supports FP32/FP16 \"\n              \"data types, but field[\",\n              name_, \"]'s data type is \",\n              DataTypeCodeBook::AsString(data_type_));\n        }\n      }\n    }\n  } else {\n    if (index_params_) {\n      if (index_params_->is_vector_index_type()) {\n        return Status::InvalidArgument(\n            \"schema validate failed: scalar_field's index_params only support \"\n            \"INVERT \"\n            \"index, \"\n            \"but field[\",\n            name_, \"]'s index_type is \",\n            IndexTypeCodeBook::AsString(index_params_->type()));\n      }\n    }\n  }\n  return Status::OK();\n}\n\nstd::string FieldSchema::to_string() const {\n  std::ostringstream oss;\n  oss << \"FieldSchema{\"\n      << \"name:'\" << name_ << \"'\"\n      << \",data_type:\" << DataTypeCodeBook::AsString(data_type_)\n      << \",nullable:\" << (nullable_ ? \"true\" : \"false\")\n      << \",dimension:\" << dimension_;\n\n  if (index_params_) {\n    oss << \",index_params:\" << index_params_->to_string();\n  } else {\n    oss << \",index_params:null\";\n  }\n\n  oss << \"}\";\n  return oss.str();\n}\n\nstd::string FieldSchema::to_string_formatted(int indent_level) const {\n  std::ostringstream oss;\n  if (is_vector_field()) {\n    oss << indent(indent_level) << \"FieldSchema[vector]{\\n\";\n  } else {\n    oss << indent(indent_level) << \"FieldSchema[scalar]{\\n\";\n  }\n\n  oss << indent(indent_level + 1) << \"name: '\" << name_ << \"',\\n\"\n      << indent(indent_level + 1)\n      << \"data_type: \" << DataTypeCodeBook::AsString(data_type_) << \",\\n\";\n\n  if (is_vector_field()) {\n    if (is_dense_vector()) {\n      oss << indent(indent_level + 1) << \"dimension: \" << dimension_ << \",\\n\";\n    }\n  } else {\n    oss << indent(indent_level + 1)\n        << \"nullable: \" << (nullable_ ? \"true\" : \"false\") << \",\\n\";\n  }\n\n  if (index_params_) {\n    oss << indent(indent_level + 1)\n        << \"index_params: \" << index_params_->to_string() << \"\\n\";\n  } else {\n    oss << indent(indent_level + 1) << \"index_params: null\\n\";\n  }\n\n  oss << indent(indent_level) << \"}\";\n  return oss.str();\n}\n\nStatus CollectionSchema::validate() const {\n  if (name_.empty()) {\n    return Status::InvalidArgument(\"schema validate failed: name is empty\");\n  }\n  if (!std::regex_match(name_, COLLECTION_NAME_REGEX)) {\n    return Status::InvalidArgument(\n        \"schema validate failed: collection[\", name_,\n        \"]'s name cannot pass the regex verification\");\n  }\n  if (forward_fields().size() > kMaxScalarFieldSize) {\n    return Status::InvalidArgument(\n        \"schema validate failed: collection[\", name_,\n        \"]'s field size must <= \", kMaxScalarFieldSize);\n  }\n  if (max_doc_count_per_segment_ < MAX_DOC_COUNT_PER_SEGMENT_MIN_THRESHOLD) {\n    return Status::InvalidArgument(\n        \"schema validate failed: max_doc_count_per_segment must >= \",\n        MAX_DOC_COUNT_PER_SEGMENT_MIN_THRESHOLD);\n  }\n  auto v_fields = vector_fields();\n  if (v_fields.empty()) {\n    return Status::InvalidArgument(\n        \"schema validate failed: vector fields is empty\");\n  }\n  if (v_fields.size() > kMaxVectorFieldSize) {\n    return Status::InvalidArgument(\n        \"schema validate failed: collection[\", name_,\n        \"]'s vector field size must <= \", kMaxVectorFieldSize);\n  }\n  for (auto &field : fields_) {\n    auto s = field->validate();\n    CHECK_RETURN_STATUS(s);\n  }\n  return Status::OK();\n}\n\nstd::string CollectionSchema::to_string() const {\n  std::ostringstream oss;\n  oss << \"CollectionSchema{\"\n      << \"name:'\" << name_ << \"'\"\n      << \",max_doc_count_per_segment:\" << max_doc_count_per_segment_\n      << \",fields:[\";\n\n  for (size_t i = 0; i < fields_.size(); ++i) {\n    if (i > 0) oss << \",\";\n    oss << fields_[i]->to_string();\n  }\n\n  oss << \"]}\";\n  return oss.str();\n}\n\n\nstd::string CollectionSchema::to_string_formatted(int indent_level) const {\n  std::ostringstream oss;\n  oss << indent(indent_level) << \"CollectionSchema{\\n\"\n      << indent(indent_level + 1) << \"name: '\" << name_ << \"',\\n\"\n      << indent(indent_level + 1)\n      << \"max_doc_count_per_segment: \" << max_doc_count_per_segment_ << \",\\n\"\n      << indent(indent_level + 1) << \"fields: [\\n\";\n\n  for (size_t i = 0; i < fields_.size(); ++i) {\n    oss << fields_[i]->to_string_formatted(indent_level + 2);\n    if (i < fields_.size() - 1) {\n      oss << \",\";\n    }\n    oss << \"\\n\";\n  }\n\n  oss << indent(indent_level + 1) << \"]\\n\" << indent(indent_level) << \"}\";\n  return oss.str();\n}\n\nStatus CollectionSchema::add_field(FieldSchema::Ptr column_schema) {\n  // Check if field already exists\n  if (has_field(column_schema->name())) {\n    return Status::AlreadyExists(\"field[\", column_schema->name(),\n                                 \"] already exists in schema\");\n  }\n\n  // Add field to list and map\n  if (column_schema->is_vector_field()) {\n    if (column_schema->index_params() == nullptr) {\n      column_schema->set_index_params(DefaultVectorIndexParams);\n    }\n  }\n\n  fields_.push_back(column_schema);\n  fields_map_[column_schema->name()] = column_schema;\n\n  return Status::OK();\n}\n\nStatus CollectionSchema::alter_field(\n    const std::string &column_name,\n    const FieldSchema::Ptr &new_column_options) {\n  // Check if field exists\n  if (!has_field(column_name)) {\n    return Status::NotFound(\"field[\", column_name, \"] not found in schema\");\n  }\n\n  std::string new_column_name = new_column_options->name();\n\n  // If renaming to an existing field name (and it's not the same field)\n  if (new_column_name != column_name && has_field(new_column_name)) {\n    return Status::AlreadyExists(\"field[\", new_column_name,\n                                 \"] already exists in schema\");\n  }\n\n  // Update map: remove old entry if name changed, add new entry\n  if (new_column_name != column_name) {\n    fields_map_.erase(column_name);\n  }\n  fields_map_[new_column_name] = new_column_options;\n\n  // Update list\n  for (auto &field : fields_) {\n    if (field->name() == column_name) {\n      field = new_column_options;\n      break;\n    }\n  }\n\n  return Status::OK();\n}\n\nStatus CollectionSchema::drop_field(const std::string &column_name) {\n  // Check if field exists\n  if (!has_field(column_name)) {\n    return Status::NotFound(\"field[\", column_name, \"] not found in schema\");\n  }\n\n  // Remove from map\n  fields_map_.erase(column_name);\n\n  // Remove from list\n  fields_.erase(std::remove_if(fields_.begin(), fields_.end(),\n                               [&column_name](const FieldSchema::Ptr &field) {\n                                 return field->name() == column_name;\n                               }),\n                fields_.end());\n\n  return Status::OK();\n}\n\nbool CollectionSchema::has_field(const std::string &column) const {\n  return fields_map_.find(column) != fields_map_.end();\n}\n\nconst FieldSchema *CollectionSchema::get_field(\n    const std::string &column) const {\n  auto it = fields_map_.find(column);\n  if (it != fields_map_.end()) {\n    return it->second.get();\n  }\n  return nullptr;\n}\n\nFieldSchema *CollectionSchema::get_field(const std::string &column) {\n  auto it = fields_map_.find(column);\n  if (it != fields_map_.end()) {\n    return it->second.get();\n  }\n  return nullptr;\n}\n\nconst FieldSchema *CollectionSchema::get_forward_field(\n    const std::string &column) const {\n  // Forward fields are typically non-vector fields\n  auto field = get_field(column);\n  if (field && !field->is_vector_field()) {\n    return field;\n  }\n  return nullptr;\n}\n\nFieldSchema *CollectionSchema::get_forward_field(const std::string &column) {\n  // Forward fields are typically non-vector fields\n  auto field = get_field(column);\n  if (field && !field->is_vector_field()) {\n    return field;\n  }\n  return nullptr;\n}\n\nconst FieldSchema *CollectionSchema::get_vector_field(\n    const std::string &column) const {\n  // Vector fields are fields with vector data types\n  auto field = get_field(column);\n  if (field && field->is_vector_field()) {\n    return field;\n  }\n  return nullptr;\n}\n\nFieldSchema *CollectionSchema::get_vector_field(const std::string &column) {\n  // Vector fields are fields with vector data types\n  auto field = get_field(column);\n  if (field && field->is_vector_field()) {\n    return field;\n  }\n  return nullptr;\n}\n\nFieldSchemaPtrList CollectionSchema::fields() const {\n  return fields_;\n}\n\nFieldSchemaPtrList CollectionSchema::forward_fields() const {\n  FieldSchemaPtrList forward_fields;\n  for (const auto &field : fields_) {\n    if (!field->is_vector_field()) {\n      forward_fields.push_back(field);\n    }\n  }\n  return forward_fields;\n}\n\nFieldSchemaPtrList CollectionSchema::forward_fields_with_index() const {\n  FieldSchemaPtrList forward_fields;\n  for (const auto &field : fields_) {\n    if (!field->is_vector_field() && field->index_params() != nullptr) {\n      forward_fields.push_back(field);\n    }\n  }\n  return forward_fields;\n}\n\nstd::vector<std::string> CollectionSchema::forward_field_names() const {\n  std::vector<std::string> names;\n  for (const auto &field : fields_) {\n    if (!field->is_vector_field()) {\n      names.push_back(field->name());\n    }\n  }\n  return names;\n}\n\nstd::vector<std::string> CollectionSchema::forward_field_names_with_index()\n    const {\n  std::vector<std::string> names;\n  for (const auto &field : fields_) {\n    if (!field->is_vector_field() && field->index_params() != nullptr) {\n      names.push_back(field->name());\n    }\n  }\n  return names;\n}\n\nstd::vector<std::string> CollectionSchema::all_field_names() const {\n  std::vector<std::string> names;\n  for (const auto &field : fields_) {\n    names.push_back(field->name());\n  }\n  return names;\n}\n\nFieldSchemaPtrList CollectionSchema::vector_fields() const {\n  FieldSchemaPtrList vector_fields;\n  for (const auto &field : fields_) {\n    if (field->is_vector_field()) {\n      vector_fields.push_back(field);\n    }\n  }\n  return vector_fields;\n}\n\nuint64_t CollectionSchema::max_doc_count_per_segment() const {\n  return max_doc_count_per_segment_;\n}\n\nvoid CollectionSchema::set_max_doc_count_per_segment(\n    uint64_t max_doc_count_per_segment) {\n  max_doc_count_per_segment_ = max_doc_count_per_segment;\n}\n\nStatus CollectionSchema::add_index(const std::string &column,\n                                   const IndexParams::Ptr &index_params) {\n  // Get field and set index params\n  auto field = get_field(column);\n  if (field) {\n    field->set_index_params(index_params);\n  } else {\n    return Status::NotFound(\"field[\", column, \"] not found in schema\");\n  }\n\n  return Status::OK();\n}\n\nStatus CollectionSchema::drop_index(const std::string &column) {\n  // Get field and clear index params\n  auto field = get_field(column);\n  if (field) {\n    if (field->is_vector_field()) {\n      field->set_index_params(DefaultVectorIndexParams);\n    } else {\n      field->set_index_params(nullptr);\n    }\n  } else {\n    return Status::NotFound(\"field[\", column, \"] not found in schema\");\n  }\n\n  return Status::OK();\n}\n\nbool CollectionSchema::has_index(const std::string &column) const {\n  auto field = get_field(column);\n  if (field) {\n    if (field->is_vector_field()) {\n      if (field->index_params() == nullptr) {\n        return false;\n      } else {\n        return *field->index_params() != DefaultVectorIndexParams;\n      }\n    }\n    return field->index_params() != nullptr;\n  }\n  return false;\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/stats.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <sstream>\n#include <zvec/db/stats.h>\n#include \"db/common/utils.h\"\n\nnamespace zvec {\nstd::string CollectionStats::to_string() const {\n  std::ostringstream oss;\n  oss << \"CollectionStats{\"\n      << \"doc_count:\" << doc_count << \",index_completeness:{\";\n\n  size_t i = 0;\n  for (const auto &pair : index_completeness) {\n    if (i > 0) oss << \",\";\n    oss << pair.first << \":\" << pair.second;\n    ++i;\n  }\n\n  oss << \"}}\";\n  return oss.str();\n}\n\nstd::string CollectionStats::to_string_formatted(int indent_level) const {\n  std::ostringstream oss;\n  oss << indent(indent_level) << \"CollectionStats{\\n\"\n      << indent(indent_level + 1) << \"doc_count: \" << doc_count << \",\\n\"\n      << indent(indent_level + 1) << \"index_completeness: {\\n\";\n\n  size_t i = 0;\n  for (const auto &pair : index_completeness) {\n    if (i > 0) oss << \",\\n\";\n    oss << indent(indent_level + 2) << pair.first << \": \" << pair.second;\n    ++i;\n  }\n\n  if (!index_completeness.empty()) {\n    oss << \"\\n\";\n  }\n  oss << indent(indent_level + 1) << \"}\\n\" << indent(indent_level) << \"}\";\n\n  return oss.str();\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/type_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"type_helper.h\"\n#include <zvec/core/framework/index_meta.h>\n\nnamespace zvec {\n\ncore::IndexMeta::DataType DataTypeCodeBook::to_data_type(DataType type) {\n  switch (type) {\n    case DataType::VECTOR_FP32:\n      return core::IndexMeta::DataType::DT_FP32;\n    case DataType::VECTOR_FP64:\n      return core::IndexMeta::DataType::DT_FP64;\n    case DataType::VECTOR_FP16:\n      return core::IndexMeta::DataType::DT_FP16;\n    case DataType::VECTOR_INT8:\n      return core::IndexMeta::DataType::DT_INT8;\n    case DataType::VECTOR_INT16:\n      return core::IndexMeta::DataType::DT_INT16;\n    case DataType::VECTOR_INT4:\n      return core::IndexMeta::DataType::DT_INT4;\n    case DataType::VECTOR_BINARY32:\n      return core::IndexMeta::DataType::DT_BINARY32;\n    case DataType::VECTOR_BINARY64:\n      return core::IndexMeta::DataType::DT_BINARY64;\n\n    case DataType::SPARSE_VECTOR_FP16:\n      return core::IndexMeta::DataType::DT_FP16;\n    case DataType::SPARSE_VECTOR_FP32:\n      return core::IndexMeta::DataType::DT_FP32;\n\n    default:\n      return core::IndexMeta::DataType::DT_UNDEFINED;\n  }\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/type_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/db/type.h>\n#include \"proto/zvec.pb.h\"\n\nnamespace zvec {\n\n//! Index Type Codebook\nstruct IndexTypeCodeBook {\n  //! convert protobuf IndexType to C++ IndexType\n  static IndexType Get(proto::IndexType type) {\n    switch (type) {\n      case proto::IT_HNSW:\n        return IndexType::HNSW;\n      case proto::IT_HNSW_RABITQ:\n        return IndexType::HNSW_RABITQ;\n      case proto::IT_FLAT:\n        return IndexType::FLAT;\n      case proto::IT_IVF:\n        return IndexType::IVF;\n      case proto::IT_INVERT:\n        return IndexType::INVERT;\n      default:\n        break;\n    }\n    return IndexType::UNDEFINED;\n  }\n\n  //! Convert C++ IndexType to protobuf IndexType\n  static proto::IndexType Get(IndexType type) {\n    switch (type) {\n      case IndexType::HNSW:\n        return proto::IT_HNSW;\n      case IndexType::HNSW_RABITQ:\n        return proto::IT_HNSW_RABITQ;\n      case IndexType::FLAT:\n        return proto::IT_FLAT;\n      case IndexType::IVF:\n        return proto::IT_IVF;\n      case IndexType::INVERT:\n        return proto::IT_INVERT;\n      default:\n        break;\n    }\n    return proto::IT_UNDEFINED;\n  }\n\n  //! Convert C++ IndexType to C++ String\n  static std::string AsString(IndexType type) {\n    switch (type) {\n      case IndexType::HNSW:\n        return \"HNSW\";\n      case IndexType::HNSW_RABITQ:\n        return \"HNSW_RABITQ\";\n      case IndexType::FLAT:\n        return \"FLAT\";\n      case IndexType::IVF:\n        return \"IVF\";\n      case IndexType::INVERT:\n        return \"INVERT\";\n      default:\n        break;\n    }\n    return \"UNDEFINED\";\n  }\n};\n\nstruct DataTypeCodeBook {\n  static bool IsArrayType(proto::DataType type) {\n    return proto::DataType::DT_ARRAY_BINARY <= type &&\n           type <= proto::DataType::DT_ARRAY_DOUBLE;\n  }\n\n  static DataType Get(proto::DataType type) {\n    DataType data_types = DataType::UNDEFINED;\n    switch (type) {\n      case proto::DataType::DT_BINARY:\n        data_types = DataType::BINARY;\n        break;\n      case proto::DataType::DT_STRING:\n        data_types = DataType::STRING;\n        break;\n      case proto::DataType::DT_BOOL:\n        data_types = DataType::BOOL;\n        break;\n      case proto::DataType::DT_INT32:\n        data_types = DataType::INT32;\n        break;\n      case proto::DataType::DT_INT64:\n        data_types = DataType::INT64;\n        break;\n      case proto::DataType::DT_UINT32:\n        data_types = DataType::UINT32;\n        break;\n      case proto::DataType::DT_UINT64:\n        data_types = DataType::UINT64;\n        break;\n      case proto::DataType::DT_FLOAT:\n        data_types = DataType::FLOAT;\n        break;\n      case proto::DataType::DT_DOUBLE:\n        data_types = DataType::DOUBLE;\n        break;\n      case proto::DataType::DT_VECTOR_BINARY32:\n        data_types = DataType::VECTOR_BINARY32;\n        break;\n      case proto::DataType::DT_VECTOR_BINARY64:\n        data_types = DataType::VECTOR_BINARY64;\n        break;\n      case proto::DataType::DT_VECTOR_FP16:\n        data_types = DataType::VECTOR_FP16;\n        break;\n      case proto::DataType::DT_VECTOR_FP32:\n        data_types = DataType::VECTOR_FP32;\n        break;\n      case proto::DataType::DT_VECTOR_FP64:\n        data_types = DataType::VECTOR_FP64;\n        break;\n      case proto::DataType::DT_VECTOR_INT4:\n        data_types = DataType::VECTOR_INT4;\n        break;\n      case proto::DataType::DT_VECTOR_INT8:\n        data_types = DataType::VECTOR_INT8;\n        break;\n      case proto::DataType::DT_VECTOR_INT16:\n        data_types = DataType::VECTOR_INT16;\n        break;\n      case proto::DataType::DT_SPARSE_VECTOR_FP16:\n        data_types = DataType::SPARSE_VECTOR_FP16;\n        break;\n      case proto::DataType::DT_SPARSE_VECTOR_FP32:\n        data_types = DataType::SPARSE_VECTOR_FP32;\n        break;\n      case proto::DataType::DT_ARRAY_BINARY:\n        data_types = DataType::ARRAY_BINARY;\n        break;\n      case proto::DataType::DT_ARRAY_STRING:\n        data_types = DataType::ARRAY_STRING;\n        break;\n      case proto::DataType::DT_ARRAY_BOOL:\n        data_types = DataType::ARRAY_BOOL;\n        break;\n      case proto::DataType::DT_ARRAY_INT32:\n        data_types = DataType::ARRAY_INT32;\n        break;\n      case proto::DataType::DT_ARRAY_INT64:\n        data_types = DataType::ARRAY_INT64;\n        break;\n      case proto::DataType::DT_ARRAY_UINT32:\n        data_types = DataType::ARRAY_UINT32;\n        break;\n      case proto::DataType::DT_ARRAY_UINT64:\n        data_types = DataType::ARRAY_UINT64;\n        break;\n      case proto::DataType::DT_ARRAY_FLOAT:\n        data_types = DataType::ARRAY_FLOAT;\n        break;\n      case proto::DataType::DT_ARRAY_DOUBLE:\n        data_types = DataType::ARRAY_DOUBLE;\n        break;\n\n      default:\n        break;\n    }\n    return data_types;\n  }\n\n  static proto::DataType Get(const DataType type) {\n    proto::DataType data_type = proto::DataType::DT_UNDEFINED;\n    switch (type) {\n      case DataType::BINARY:\n        data_type = proto::DataType::DT_BINARY;\n        break;\n      case DataType::STRING:\n        data_type = proto::DataType::DT_STRING;\n        break;\n      case DataType::BOOL:\n        data_type = proto::DataType::DT_BOOL;\n        break;\n      case DataType::INT32:\n        data_type = proto::DataType::DT_INT32;\n        break;\n      case DataType::INT64:\n        data_type = proto::DataType::DT_INT64;\n        break;\n      case DataType::UINT32:\n        data_type = proto::DataType::DT_UINT32;\n        break;\n      case DataType::UINT64:\n        data_type = proto::DataType::DT_UINT64;\n        break;\n      case DataType::FLOAT:\n        data_type = proto::DataType::DT_FLOAT;\n        break;\n      case DataType::DOUBLE:\n        data_type = proto::DataType::DT_DOUBLE;\n        break;\n      case DataType::VECTOR_BINARY32:\n        data_type = proto::DataType::DT_VECTOR_BINARY32;\n        break;\n      case DataType::VECTOR_BINARY64:\n        data_type = proto::DataType::DT_VECTOR_BINARY64;\n        break;\n      case DataType::VECTOR_FP16:\n        data_type = proto::DataType::DT_VECTOR_FP16;\n        break;\n      case DataType::VECTOR_FP32:\n        data_type = proto::DataType::DT_VECTOR_FP32;\n        break;\n      case DataType::VECTOR_FP64:\n        data_type = proto::DataType::DT_VECTOR_FP64;\n        break;\n      case DataType::VECTOR_INT4:\n        data_type = proto::DataType::DT_VECTOR_INT4;\n        break;\n      case DataType::VECTOR_INT8:\n        data_type = proto::DataType::DT_VECTOR_INT8;\n        break;\n      case DataType::VECTOR_INT16:\n        data_type = proto::DataType::DT_VECTOR_INT16;\n        break;\n      case DataType::SPARSE_VECTOR_FP16:\n        data_type = proto::DataType::DT_SPARSE_VECTOR_FP16;\n        break;\n      case DataType::SPARSE_VECTOR_FP32:\n        data_type = proto::DataType::DT_SPARSE_VECTOR_FP32;\n        break;\n      case DataType::ARRAY_BINARY:\n        data_type = proto::DataType::DT_ARRAY_BINARY;\n        break;\n      case DataType::ARRAY_BOOL:\n        data_type = proto::DataType::DT_ARRAY_BOOL;\n        break;\n      case DataType::ARRAY_DOUBLE:\n        data_type = proto::DataType::DT_ARRAY_DOUBLE;\n        break;\n      case DataType::ARRAY_FLOAT:\n        data_type = proto::DataType::DT_ARRAY_FLOAT;\n        break;\n      case DataType::ARRAY_INT32:\n        data_type = proto::DataType::DT_ARRAY_INT32;\n        break;\n      case DataType::ARRAY_INT64:\n        data_type = proto::DataType::DT_ARRAY_INT64;\n        break;\n      case DataType::ARRAY_STRING:\n        data_type = proto::DataType::DT_ARRAY_STRING;\n        break;\n      case DataType::ARRAY_UINT32:\n        data_type = proto::DataType::DT_ARRAY_UINT32;\n        break;\n      case DataType::ARRAY_UINT64:\n        data_type = proto::DataType::DT_ARRAY_UINT64;\n        break;\n      default:\n        break;\n    }\n\n    return data_type;\n  }\n\n  static std::string AsString(DataType type) {\n    std::string data_type;\n\n    switch (type) {\n      case DataType::BINARY:\n        data_type = \"BINARY\";\n        break;\n      case DataType::STRING:\n        data_type = \"STRING\";\n        break;\n      case DataType::BOOL:\n        data_type = \"BOOL\";\n        break;\n      case DataType::INT32:\n        data_type = \"INT32\";\n        break;\n      case DataType::INT64:\n        data_type = \"INT64\";\n        break;\n      case DataType::UINT32:\n        data_type = \"UINT32\";\n        break;\n      case DataType::UINT64:\n        data_type = \"UINT64\";\n        break;\n      case DataType::FLOAT:\n        data_type = \"FLOAT\";\n        break;\n      case DataType::DOUBLE:\n        data_type = \"DOUBLE\";\n        break;\n      case DataType::VECTOR_BINARY32:\n        data_type = \"VECTOR_BINARY32\";\n        break;\n      case DataType::VECTOR_BINARY64:\n        data_type = \"VECTOR_BINARY64\";\n        break;\n      case DataType::VECTOR_FP16:\n        data_type = \"VECTOR_FP16\";\n        break;\n      case DataType::VECTOR_FP32:\n        data_type = \"VECTOR_FP32\";\n        break;\n      case DataType::VECTOR_FP64:\n        data_type = \"VECTOR_FP64\";\n        break;\n      case DataType::VECTOR_INT4:\n        data_type = \"VECTOR_INT4\";\n        break;\n      case DataType::VECTOR_INT8:\n        data_type = \"VECTOR_INT8\";\n        break;\n      case DataType::VECTOR_INT16:\n        data_type = \"VECTOR_INT16\";\n        break;\n      case DataType::SPARSE_VECTOR_FP16:\n        data_type = \"SPARSE_VECTOR_FP16\";\n        break;\n      case DataType::SPARSE_VECTOR_FP32:\n        data_type = \"SPARSE_VECTOR_FP32\";\n        break;\n      case DataType::ARRAY_BINARY:\n        data_type = \"ARRAY_BINARY\";\n        break;\n      case DataType::ARRAY_BOOL:\n        data_type = \"ARRAY_BOOL\";\n        break;\n      case DataType::ARRAY_DOUBLE:\n        data_type = \"ARRAY_DOUBLE\";\n        break;\n      case DataType::ARRAY_FLOAT:\n        data_type = \"ARRAY_FLOAT\";\n        break;\n      case DataType::ARRAY_INT32:\n        data_type = \"ARRAY_INT32\";\n        break;\n      case DataType::ARRAY_INT64:\n        data_type = \"ARRAY_INT64\";\n        break;\n      case DataType::ARRAY_STRING:\n        data_type = \"ARRAY_STRING\";\n        break;\n      case DataType::ARRAY_UINT32:\n        data_type = \"ARRAY_UINT32\";\n        break;\n      case DataType::ARRAY_UINT64:\n        data_type = \"ARRAY_UINT64\";\n        break;\n      default:\n        break;\n    }\n\n    return data_type;\n  }\n\n  static core::IndexMeta::DataType to_data_type(DataType type);\n};\n\nstruct MetricTypeCodeBook {\n  static MetricType Get(proto::MetricType type) {\n    switch (type) {\n      case proto::MetricType::MT_IP:\n        return MetricType::IP;\n      case proto::MetricType::MT_L2:\n        return MetricType::L2;\n      case proto::MetricType::MT_COSINE:\n        return MetricType::COSINE;\n      default:\n        return MetricType::UNDEFINED;\n    }\n  }\n\n  static proto::MetricType Get(MetricType type) {\n    switch (type) {\n      case MetricType::IP:\n        return proto::MetricType::MT_IP;\n      case MetricType::L2:\n        return proto::MetricType::MT_L2;\n      case MetricType::COSINE:\n        return proto::MetricType::MT_COSINE;\n      default:\n        return proto::MetricType::MT_UNDEFINED;\n    }\n  }\n\n  static std::string AsString(MetricType type) {\n    switch (type) {\n      case MetricType::IP:\n        return \"IP\";\n      case MetricType::L2:\n        return \"L2\";\n      case MetricType::COSINE:\n        return \"COSINE\";\n      default:\n        return \"UNDEFINED\";\n    }\n  }\n};\n\nstruct QuantizeTypeCodeBook {\n  static QuantizeType Get(proto::QuantizeType type) {\n    switch (type) {\n      case proto::QuantizeType::QT_FP16:\n        return QuantizeType::FP16;\n      case proto::QuantizeType::QT_INT4:\n        return QuantizeType::INT4;\n      case proto::QuantizeType::QT_INT8:\n        return QuantizeType::INT8;\n      case proto::QuantizeType::QT_RABITQ:\n        return QuantizeType::RABITQ;\n      default:\n        return QuantizeType::UNDEFINED;\n    }\n  }\n\n  static proto::QuantizeType Get(QuantizeType type) {\n    switch (type) {\n      case QuantizeType::FP16:\n        return proto::QuantizeType::QT_FP16;\n      case QuantizeType::INT4:\n        return proto::QuantizeType::QT_INT4;\n      case QuantizeType::INT8:\n        return proto::QuantizeType::QT_INT8;\n      case QuantizeType::RABITQ:\n        return proto::QuantizeType::QT_RABITQ;\n      default:\n        return proto::QuantizeType::QT_UNDEFINED;\n    }\n  }\n\n  static std::string AsString(QuantizeType type) {\n    switch (type) {\n      case QuantizeType::FP16:\n        return \"FP16\";\n      case QuantizeType::INT4:\n        return \"INT4\";\n      case QuantizeType::INT8:\n        return \"INT8\";\n      case QuantizeType::RABITQ:\n        return \"RABITQ\";\n      default:\n        return \"UNDEFINED\";\n    }\n  }\n\n  static std::string AsString(std::set<QuantizeType> type) {\n    std::string str;\n    for (auto t : type) {\n      str += QuantizeTypeCodeBook::AsString(t) + \",\";\n    }\n    return str.substr(0, str.size() - 1);\n  }\n};\n\nstruct BlockTypeCodeBook {\n  static BlockType Get(proto::BlockType type) {\n    BlockType block_types = BlockType::UNDEFINED;\n    switch (type) {\n      case proto::BlockType::BT_SCALAR:\n        block_types = BlockType::SCALAR;\n        break;\n      case proto::BlockType::BT_SCALAR_INDEX:\n        block_types = BlockType::SCALAR_INDEX;\n        break;\n      case proto::BlockType::BT_VECTOR_INDEX:\n        block_types = BlockType::VECTOR_INDEX;\n        break;\n      case proto::BlockType::BT_VECTOR_INDEX_QUANTIZE:\n        block_types = BlockType::VECTOR_INDEX_QUANTIZE;\n        break;\n      default:\n        break;\n    }\n    return block_types;\n  }\n\n  static proto::BlockType Get(BlockType type) {\n    proto::BlockType block_types = proto::BlockType::BT_UNDEFINED;\n    switch (type) {\n      case BlockType::SCALAR:\n        block_types = proto::BlockType::BT_SCALAR;\n        break;\n      case BlockType::SCALAR_INDEX:\n        block_types = proto::BlockType::BT_SCALAR_INDEX;\n        break;\n      case BlockType::VECTOR_INDEX:\n        block_types = proto::BlockType::BT_VECTOR_INDEX;\n        break;\n      case BlockType::VECTOR_INDEX_QUANTIZE:\n        block_types = proto::BlockType::BT_VECTOR_INDEX_QUANTIZE;\n        break;\n      default:\n        break;\n    }\n\n    return block_types;\n  }\n\n  static std::string AsString(BlockType type) {\n    switch (type) {\n      case BlockType::SCALAR:\n        return \"SCALAR\";\n      case BlockType::SCALAR_INDEX:\n        return \"SCALAR_INDEX\";\n      case BlockType::VECTOR_INDEX:\n        return \"VECTOR_INDEX\";\n      case BlockType::VECTOR_INDEX_QUANTIZE:\n        return \"VECTOR_INDEX_QUANTIZE\";\n      default:\n        return \"UNDEFINED\";\n    }\n  }\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/version_manager.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"version_manager.h\"\n#include <cerrno>\n#include <cstdint>\n#include <cstring>\n#include <filesystem>\n#include <fstream>\n#include <mutex>\n#include <regex>\n#include <string>\n#include <proto/zvec.pb.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/db/status.h>\n#include \"db/common/file_helper.h\"\n#include \"db/common/typedef.h\"\n#include \"db/index/common/proto_converter.h\"\n#include \"db/index/common/type_helper.h\"\n\nnamespace zvec {\n\nStatus Version::Load(const std::string &path, Version *version) {\n  std::ifstream ifs(path, std::ios::binary);\n  if (!ifs.is_open()) {\n    LOG_ERROR(\"Failed to open file: %s\", path.c_str());\n    return Status::InternalError(\"Failed to open file\");\n  }\n\n  proto::Manifest manifest;\n\n  if (!manifest.ParseFromIstream(&ifs)) {\n    LOG_ERROR(\"Failed to parse manifest from file: %s\", path.c_str());\n    return Status::InternalError(\"Failed to parse manifest\");\n  }\n\n  CollectionSchema::Ptr schema = ProtoConverter::FromPb(manifest.schema());\n  version->set_schema(*schema);\n\n  version->set_enable_mmap(manifest.enable_mmap());\n\n  for (int i = 0; i < manifest.persisted_segment_metas_size(); ++i) {\n    SegmentMeta::Ptr meta =\n        ProtoConverter::FromPb(manifest.persisted_segment_metas(i));\n    version->add_persisted_segment_meta(meta);\n  }\n\n  if (manifest.has_writing_segment_meta()) {\n    SegmentMeta::Ptr meta =\n        ProtoConverter::FromPb(manifest.writing_segment_meta());\n    version->reset_writing_segment_meta(meta);\n  }\n\n  version->set_id_map_path_suffix(manifest.id_map_path_suffix());\n  version->set_delete_snapshot_path_suffix(\n      manifest.delete_snapshot_path_suffix());\n\n  version->set_next_segment_id(manifest.next_segment_id());\n\n  return Status::OK();\n}\n\nStatus Version::Save(const std::string &path, const Version &version) {\n  std::ofstream ofs(path, std::ios::binary);\n  if (!ofs.is_open()) {\n    LOG_ERROR(\"Failed to open file: %s, err: %s\", path.c_str(),\n              strerror(errno));\n    return Status::InternalError(\"Failed to open file: %s\", path.c_str());\n  }\n\n  proto::Manifest manifest;\n\n  // set schema\n  auto schema = ProtoConverter::ToPb(version.schema());\n  manifest.mutable_schema()->Swap(&schema);\n\n  manifest.set_enable_mmap(version.enable_mmap());\n\n  // set segments meta\n  for (auto &meta : version.persisted_segment_metas()) {\n    auto meta_pb = ProtoConverter::ToPb(*meta);\n    manifest.add_persisted_segment_metas()->Swap(&meta_pb);\n  }\n\n  if (version.writing_segment_meta()) {\n    auto meta_pb = ProtoConverter::ToPb(*version.writing_segment_meta());\n    manifest.mutable_writing_segment_meta()->Swap(&meta_pb);\n  }\n\n  manifest.set_id_map_path_suffix(version.id_map_path_suffix());\n  manifest.set_delete_snapshot_path_suffix(\n      version.delete_snapshot_path_suffix());\n  manifest.set_next_segment_id(version.next_segment_id());\n\n  if (!manifest.SerializeToOstream(&ofs)) {\n    LOG_ERROR(\"Failed to serialize manifest to file: %s\", path.c_str());\n    return Status::InternalError(\"Failed to serialize manifest to file\");\n  }\n\n  return Status::OK();\n}\n\nstd::string Version::to_string() const {\n  std::ostringstream oss;\n  oss << \"Version{\" << \"schema:\" << (schema_ ? schema_->to_string() : \"null\")\n      << \",persisted_segment_metas:[\";\n\n  size_t i = 0;\n  for (const auto &pair : persisted_segment_metas_map_) {\n    if (i > 0) oss << \",\";\n    oss << pair.second->to_string();\n    ++i;\n  }\n\n  oss << \"],writing_segment_meta:\";\n  if (writing_segment_meta_) {\n    oss << writing_segment_meta_->to_string();\n  } else {\n    oss << \"null\";\n  }\n\n  oss << \",id_map_path_suffix:\" << id_map_path_suffix_\n      << \",delete_snapshot_path_suffix:\" << delete_snapshot_path_suffix_\n      << \",next_segment_id:\" << next_segment_id_\n      << \",enable_mmap:\" << enable_mmap_ << \"}\";\n  return oss.str();\n}\n\nstd::string Version::to_string_formatted(int indent_level) const {\n  std::ostringstream oss;\n  oss << indent(indent_level) << \"Version{\\n\"\n      << indent(indent_level + 1) << \"schema: \";\n\n  if (schema_) {\n    oss << \"\\n\" << schema_->to_string_formatted(indent_level + 2) << \"\\n\";\n  } else {\n    oss << \"null\\n\";\n  }\n\n  oss << indent(indent_level + 1) << \"persisted_segment_metas: [\\n\";\n\n  size_t i = 0;\n  for (const auto &pair : persisted_segment_metas_map_) {\n    oss << pair.second->to_string_formatted(indent_level + 2);\n    if (i < persisted_segment_metas_map_.size() - 1) {\n      oss << \",\";\n    }\n    oss << \"\\n\";\n    ++i;\n  }\n\n  oss << \"\\n\"\n      << indent(indent_level + 1) << \"],\\n\"\n      << indent(indent_level + 1) << \"writing_segment_meta: \";\n\n  if (writing_segment_meta_) {\n    oss << \"\\n\"\n        << writing_segment_meta_->to_string_formatted(indent_level + 2) << \"\\n\";\n  } else {\n    oss << \"null\\n\";\n  }\n\n  oss << indent(indent_level + 1)\n      << \"id_map_path_suffix: \" << id_map_path_suffix_ << \",\\n\"\n      << indent(indent_level + 1)\n      << \"delete_snapshot_path_suffix: \" << delete_snapshot_path_suffix_\n      << \",\\n\"\n      << indent(indent_level + 1) << \"next_segment_id: \" << next_segment_id_\n      << \"\\n\"\n      << indent(indent_level + 1) << \"enable_mmap: \" << enable_mmap_ << \"\\n\"\n      << indent(indent_level) << \"}\";\n  return oss.str();\n}\n\nResult<VersionManager::Ptr> VersionManager::Recovery(const std::string &path) {\n  namespace fs = std::filesystem;\n  if (!fs::exists(path)) {\n    LOG_ERROR(\"VersionManager::Recovery: path %s does not exist\", path.c_str());\n    return tl::make_unexpected(\n        Status::NotFound(\"path \", path, \" does not exist\"));\n  }\n  if (!fs::is_directory(path)) {\n    LOG_ERROR(\"VersionManager::Recovery: path %s is not a directory\",\n              path.c_str());\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"path\", path, \" is not a directory\"));\n  }\n\n  std::string prefix = GetFileName(FileID::MANIFEST_FILE);\n  std::string manifest_pattern = \"^\" + prefix + R\"(\\.(\\d+)$)\";\n  std::regex regex(manifest_pattern);\n  std::smatch match;\n\n  uint64_t max_id = UINT64_MAX;\n  std::string version_path;\n\n  for (const auto &entry : fs::directory_iterator(path)) {\n    if (entry.is_regular_file()) {\n      std::string filename = entry.path().filename().string();\n      if (std::regex_match(filename, match, regex)) {\n        uint64_t id = std::stoull(match[1].str());\n        if (id > max_id || max_id == UINT64_MAX) {\n          max_id = id;\n          version_path = entry.path().string();\n        }\n      }\n    }\n  }\n\n  if (max_id == UINT64_MAX) {\n    LOG_ERROR(\"Failed to find the version file in collction_path(%s)\",\n              path.c_str());\n    return tl::make_unexpected(\n        Status::NotFound(\"Failed to find the version file\"));\n  }\n\n  Version version;\n  auto s = Version::Load(version_path, &version);\n  CHECK_RETURN_STATUS_EXPECTED(s);\n\n  VersionManager::Ptr manager =\n      VersionManager::Ptr(new VersionManager(path, version, max_id + 1));\n\n  return manager;\n}\n\nResult<VersionManager::Ptr> VersionManager::Create(\n    const std::string &path, const Version &initial_version) {\n  VersionManager::Ptr manager =\n      VersionManager::Ptr(new VersionManager(path, initial_version));\n  return manager;\n}\n\nVersionManager::VersionManager(const std::string &path,\n                               const Version &initial_version,\n                               uint64_t version_id)\n    : path_(path), current_version_(initial_version), version_id_(version_id) {}\n\nVersion VersionManager::get_current_version() {\n  std::lock_guard lock(mtx_);\n  return current_version_;\n}\n\nStatus VersionManager::apply(const Version &version) {\n  std::lock_guard lock(mtx_);\n  current_version_ = version;\n  return Status::OK();\n}\n\nStatus VersionManager::reset_writing_segment_meta(SegmentMeta::Ptr meta) {\n  std::lock_guard lock(mtx_);\n  current_version_.reset_writing_segment_meta(meta);\n  return Status::OK();\n}\n\nStatus VersionManager::add_persisted_segment_meta(SegmentMeta::Ptr meta) {\n  std::lock_guard lock(mtx_);\n  return current_version_.add_persisted_segment_meta(meta);\n}\n\nStatus VersionManager::remove_persisted_segment_meta(SegmentID id) {\n  std::lock_guard lock(mtx_);\n  return current_version_.remove_persisted_segment_meta(id);\n}\n\nStatus VersionManager::flush() {\n  std::lock_guard lock(mtx_);\n\n  std::string current_path;\n  if (version_id_ != 0) {\n    current_path =\n        FileHelper::MakeFilePath(path_, FileID::MANIFEST_FILE, version_id_ - 1);\n  }\n\n  auto s = Version::Save(\n      FileHelper::MakeFilePath(path_, FileID::MANIFEST_FILE, version_id_++),\n      current_version_);\n  CHECK_RETURN_STATUS(s);\n\n  if (!current_path.empty()) {\n    FileHelper::RemoveFile(current_path);\n  }\n\n  return Status::OK();\n}\n\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/common/version_manager.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <algorithm>\n#include <cstdint>\n#include <memory>\n#include <mutex>\n#include <unordered_map>\n#include <vector>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include \"db/index/common/meta.h\"\n\nnamespace zvec {\n\nclass Version {\n public:\n  using Ptr = std::shared_ptr<Version>;\n\n  Version() = default;\n\n  static Status Load(const std::string &path, Version *version);\n\n  static Status Save(const std::string &path, const Version &version);\n\n public:\n  void set_schema(const CollectionSchema &schema) {\n    schema_ = std::make_shared<CollectionSchema>(schema);\n  }\n\n  const CollectionSchema &schema() const {\n    return *schema_;\n  }\n\n  void set_enable_mmap(bool enable_mmap) {\n    enable_mmap_ = enable_mmap;\n  }\n\n  bool enable_mmap() const {\n    return enable_mmap_;\n  }\n\n  Status add_persisted_segment_meta(const SegmentMeta::Ptr &meta) {\n    if (meta == nullptr) {\n      return Status::InvalidArgument(\"Segment meta is null\");\n    }\n    auto iter = persisted_segment_metas_map_.find(meta->id());\n    if (iter != persisted_segment_metas_map_.end()) {\n      return Status::InvalidArgument(\"Segment meta already exists\");\n    }\n    persisted_segment_metas_map_[meta->id()] = meta;\n    return Status::OK();\n  }\n\n  Status remove_persisted_segment_meta(SegmentID segment_id) {\n    auto iter = persisted_segment_metas_map_.find(segment_id);\n    if (iter == persisted_segment_metas_map_.end()) {\n      return Status::NotFound(\"Segment meta not found\");\n    }\n    persisted_segment_metas_map_.erase(segment_id);\n    return Status::OK();\n  }\n\n  Status update_persisted_segment_meta(SegmentMeta::Ptr meta) {\n    if (meta == nullptr) {\n      return Status::InvalidArgument(\"Segment meta is null\");\n    }\n    auto iter = persisted_segment_metas_map_.find(meta->id());\n    if (iter == persisted_segment_metas_map_.end()) {\n      return Status::NotFound(\"Segment meta not found\");\n    }\n    persisted_segment_metas_map_[meta->id()] =\n        std::make_shared<SegmentMeta>(*meta);\n    return Status::OK();\n  }\n\n  void set_persisted_segment_metas(const std::vector<SegmentMeta::Ptr> &metas) {\n    for (auto &meta : metas) {\n      persisted_segment_metas_map_[meta->id()] = meta;\n    }\n  }\n\n  std::vector<SegmentMeta::Ptr> persisted_segment_metas() const {\n    std::vector<SegmentMeta::Ptr> segment_metas;\n    segment_metas.reserve(persisted_segment_metas_map_.size());\n    for (auto &segment_meta : persisted_segment_metas_map_) {\n      segment_metas.push_back(segment_meta.second);\n    }\n\n    std::sort(segment_metas.begin(), segment_metas.end(),\n              [](const SegmentMeta::Ptr &lhs, const SegmentMeta::Ptr &rhs) {\n                return lhs->min_doc_id() < rhs->min_doc_id();\n              });\n\n    return segment_metas;\n  }\n\n  void reset_writing_segment_meta(SegmentMeta::Ptr segment_meta) {\n    writing_segment_meta_ = segment_meta;\n  }\n\n  SegmentMeta::Ptr writing_segment_meta() const {\n    return writing_segment_meta_;\n  }\n\n  void set_id_map_path_suffix(uint32_t suffix) {\n    id_map_path_suffix_ = suffix;\n  }\n\n  uint32_t id_map_path_suffix() const {\n    return id_map_path_suffix_;\n  }\n\n  void set_delete_snapshot_path_suffix(uint32_t suffix) {\n    delete_snapshot_path_suffix_ = suffix;\n  }\n\n  uint32_t delete_snapshot_path_suffix() const {\n    return delete_snapshot_path_suffix_;\n  }\n\n  void set_next_segment_id(SegmentID id) {\n    next_segment_id_ = id;\n  }\n\n  SegmentID next_segment_id() const {\n    return next_segment_id_;\n  }\n\n public:\n  bool operator==(const Version &other) const {\n    if (*schema_ != *other.schema_ ||\n        persisted_segment_metas_map_.size() !=\n            other.persisted_segment_metas_map_.size()) {\n      return false;\n    }\n\n    for (const auto &item : persisted_segment_metas_map_) {\n      auto it = other.persisted_segment_metas_map_.find(item.first);\n      if (it == other.persisted_segment_metas_map_.end() ||\n          *item.second != *it->second) {\n        return false;\n      }\n    }\n\n    return true;\n  }\n\n  std::string to_string() const;\n\n  std::string to_string_formatted(int indent_level = 0) const;\n\n private:\n  CollectionSchema::Ptr schema_;\n  bool enable_mmap_;\n\n  std::unordered_map<SegmentID, SegmentMeta::Ptr> persisted_segment_metas_map_;\n\n  SegmentMeta::Ptr writing_segment_meta_;\n\n  uint32_t id_map_path_suffix_{0};\n  uint32_t delete_snapshot_path_suffix_{0};\n\n  SegmentID next_segment_id_{0};\n};\n\n// Wrapper of Current Version\nclass VersionManager {\n public:\n  using Ptr = std::shared_ptr<VersionManager>;\n\n  static Result<VersionManager::Ptr> Recovery(const std::string &path);\n\n  static Result<VersionManager::Ptr> Create(const std::string &path,\n                                            const Version &initial_version);\n\n private:\n  VersionManager(const std::string &path, const Version &initial_version,\n                 uint64_t version_id = 0);\n\n public:\n  Version get_current_version();\n\n  // overwrite the current version\n  Status apply(const Version &version);\n\n  Status reset_writing_segment_meta(SegmentMeta::Ptr meta);\n\n  Status add_persisted_segment_meta(SegmentMeta::Ptr meta);\n\n  Status remove_persisted_segment_meta(SegmentID id);\n\n  Status flush();\n\n  void set_id_map_path_suffix(uint32_t suffix) {\n    std::lock_guard lock(mtx_);\n    current_version_.set_id_map_path_suffix(suffix);\n  }\n\n  void set_delete_snapshot_path_suffix(uint32_t suffix) {\n    std::lock_guard lock(mtx_);\n    current_version_.set_delete_snapshot_path_suffix(suffix);\n  }\n\n  uint32_t delete_snapshot_path_suffix() const {\n    std::lock_guard lock(mtx_);\n    return current_version_.delete_snapshot_path_suffix();\n  }\n\n  void set_next_segment_id(SegmentID id) {\n    std::lock_guard lock(mtx_);\n    current_version_.set_next_segment_id(id);\n  }\n\n  void set_enable_mmap(bool enable_mmap) {\n    std::lock_guard lock(mtx_);\n    current_version_.set_enable_mmap(enable_mmap);\n  }\n\n private:\n  const std::string path_;\n  Version current_version_;\n  mutable std::mutex mtx_;\n\n  uint64_t version_id_ = 0;\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/segment/column_merging_reader.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"column_merging_reader.h\"\n#include <iostream>\n#include <arrow/array.h>\n#include <arrow/result.h>\n#include <arrow/status.h>\n#include <arrow/table.h>\n#include \"db/index/storage/store_helper.h\"\n\nnamespace zvec {\n\nstd::shared_ptr<ColumnMergingReader> ColumnMergingReader::Make(\n    const std::shared_ptr<arrow::Schema> &target_schema,\n    std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>\n        &&input_readers) {\n  return std::make_shared<ColumnMergingReader>(target_schema,\n                                               std::move(input_readers));\n}\n\nColumnMergingReader::ColumnMergingReader(\n    const std::shared_ptr<arrow::Schema> &target_schema,\n    std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> &&input_readers)\n    : target_schema_(target_schema), input_readers_(std::move(input_readers)) {\n  current_batches_.resize(input_readers_.size());\n  std::fill(current_batches_.begin(), current_batches_.end(), nullptr);\n}\n\nstd::shared_ptr<arrow::Schema> ColumnMergingReader::schema() const {\n  return target_schema_;\n}\n\narrow::Status ColumnMergingReader::ReadNext(\n    std::shared_ptr<arrow::RecordBatch> *out) {\n  *out = nullptr;\n\n  if (!has_more_) {\n    return arrow::Status::OK();\n  }\n\n  // Read next batch from each input reader\n  for (size_t i = 0; i < input_readers_.size(); ++i) {\n    arrow::Status status = input_readers_[i]->ReadNext(&current_batches_[i]);\n    if (!status.ok()) {\n      return status;\n    }\n  }\n\n  // Check if all readers have reached EOF\n  bool all_null = true;\n  for (const auto &batch : current_batches_) {\n    if (batch != nullptr) {\n      all_null = false;\n      break;\n    }\n  }\n\n  // All readers reached EOF\n  if (all_null) {\n    has_more_ = false;\n    return arrow::Status::OK();\n  }\n\n  // Verify that all non-null batches have consistent row counts\n  int64_t expected_rows = -1;\n  for (const auto &batch : current_batches_) {\n    if (batch) {\n      if (expected_rows == -1) {\n        expected_rows = batch->num_rows();\n      } else if (expected_rows != batch->num_rows()) {\n        return arrow::Status::Invalid(\n            \"Input readers have inconsistent row counts\");\n      }\n    }\n  }\n\n  if (expected_rows <= 0) {\n    has_more_ = false;\n    return arrow::Status::OK();\n  }\n\n  // Build each column\n  std::vector<std::shared_ptr<arrow::Array>> columns;\n  columns.reserve(target_schema_->num_fields());\n\n  for (int i = 0; i < target_schema_->num_fields(); ++i) {\n    auto field = target_schema_->field(i);\n    std::shared_ptr<arrow::Array> col_array = nullptr;\n\n    // Try to find this column from any batch\n    for (const auto &batch : current_batches_) {\n      if (!batch) continue;\n      int col_idx = batch->schema()->GetFieldIndex(field->name());\n      if (col_idx != -1) {\n        col_array = batch->column(col_idx);\n        break;\n      }\n    }\n\n    if (!col_array) {\n      return arrow::Status::Invalid(\n          \"Failed to find column in any input reader: \", field->name());\n    }\n\n    columns.push_back(std::move(col_array));\n  }\n\n  // Construct final batch\n  *out = arrow::RecordBatch::Make(target_schema_, expected_rows,\n                                  std::move(columns));\n  if (!*out) {\n    return arrow::Status::Invalid(\"Failed to create merged record batch\");\n  }\n\n  // Clear current batches, prepare for next read\n  std::fill(current_batches_.begin(), current_batches_.end(), nullptr);\n\n  return arrow::Status::OK();\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/column_merging_reader.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <vector>\n#include <arrow/api.h>\n#include <arrow/ipc/reader.h>\n\nnamespace zvec {\n\nclass ColumnMergingReader : public arrow::RecordBatchReader {\n public:\n  static std::shared_ptr<ColumnMergingReader> Make(\n      const std::shared_ptr<arrow::Schema> &target_schema,\n      std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>\n          &&input_readers);\n\n  explicit ColumnMergingReader(\n      const std::shared_ptr<arrow::Schema> &target_schema,\n      std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>\n          &&input_readers);\n\n  ~ColumnMergingReader() override = default;  // LCOV_EXCL_LINE\n\n  std::shared_ptr<arrow::Schema> schema() const override;\n\n  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override;\n\n private:\n  std::shared_ptr<arrow::Schema> target_schema_;\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> input_readers_;\n\n  std::vector<std::shared_ptr<arrow::RecordBatch>> current_batches_;\n  bool has_more_ = true;\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/segment/segment.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"segment.h\"\n#include <algorithm>\n#include <cstddef>\n#include <cstdint>\n#include <filesystem>\n#include <memory>\n#include <mutex>\n#include <string>\n#include <unordered_map>\n#include <ailego/parallel/multi_thread_list.h>\n#include <ailego/pattern/defer.h>\n#include <arrow/dataset/dataset.h>\n#include <arrow/dataset/scanner.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/table.h>\n#include <arrow/util/iterator.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/parallel/thread_queue.h>\n#include <zvec/db/config.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/index_params.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n#if RABITQ_SUPPORTED\n#include \"core/algorithm/hnsw_rabitq/rabitq_params.h\"\n#endif\n#include \"db/common/constants.h\"\n#include \"db/common/file_helper.h\"\n#include \"db/common/global_resource.h\"\n#include \"db/common/typedef.h\"\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n#include \"db/index/column/vector_column/engine_helper.hpp\"\n#include \"db/index/column/vector_column/vector_column_indexer.h\"\n#include \"db/index/column/vector_column/vector_column_params.h\"\n#include \"db/index/common/index_filter.h\"\n#include \"db/index/common/meta.h\"\n#include \"db/index/segment/segment_helper.h\"\n#include \"db/index/storage/base_forward_store.h\"\n#include \"db/index/storage/bufferpool_forward_store.h\"\n#include \"db/index/storage/memory_forward_store.h\"\n#include \"db/index/storage/mmap_forward_store.h\"\n#include \"db/index/storage/store_helper.h\"\n#include \"db/index/storage/wal/wal_file.h\"\n#include \"zvec/ailego/container/params.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_meta.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"column_merging_reader.h\"\n#include \"sql_expr_parser.h\"\n\nnamespace zvec {\n\nvoid global_init() {\n  static std::once_flag once;\n  // run once\n  std::call_once(once, []() {\n    auto status = arrow::compute::Initialize();\n    if (!status.ok()) {\n      LOG_ERROR(\"arrow compute init failed: [%s]\", status.ToString().c_str());\n      abort();\n    }\n  });\n}\n\nclass SegmentImpl : public Segment,\n                    public std::enable_shared_from_this<SegmentImpl> {\n public:\n  using Ptr = std::shared_ptr<SegmentImpl>;\n\n  class SegmentIndexFilter : public IndexFilter {\n   public:\n    SegmentIndexFilter(const DeleteStore::Ptr &delete_store,\n                       SegmentImpl::Ptr impl)\n        : delete_store_(delete_store), impl_(impl) {}\n\n    bool is_filtered(uint64_t id) const override;\n\n   private:\n    DeleteStore::Ptr delete_store_;\n    std::weak_ptr<SegmentImpl> impl_;\n  };\n\n  SegmentImpl(const std::string &path, const CollectionSchema &schema,\n              const SegmentMeta &segment_meta, const IDMap::Ptr &id_map,\n              const DeleteStore::Ptr &delete_store,\n              const VersionManager::Ptr &version_manager)\n      : path_(path),\n        collection_schema_(std::make_shared<CollectionSchema>(schema)),\n        segment_meta_(std::make_shared<SegmentMeta>(segment_meta)),\n        version_manager_(version_manager),\n        id_map_(id_map),\n        delete_store_(delete_store) {\n    seg_path_ = FileHelper::MakeSegmentPath(path_, segment_meta.id());\n  }\n\n  virtual ~SegmentImpl() {\n    close();\n    if (need_destroyed_) {\n      cleanup();\n    }\n  }\n\n  SegmentID id() const override;\n\n  SegmentMeta::Ptr meta() const override;\n\n  uint64_t doc_count(const IndexFilter::Ptr filter = nullptr) override;\n\n  Status Insert(Doc &doc) override;\n\n  Status Update(Doc &doc) override;\n\n  Status Upsert(Doc &doc) override;\n\n  Status Delete(const std::string &pk) override;\n\n  Status Delete(uint64_t g_doc_id) override;\n\n  Doc::Ptr Fetch(uint64_t g_doc_id) override;\n\n  CombinedVectorColumnIndexer::Ptr get_combined_vector_indexer(\n      const std::string &field_name) const override;\n\n  CombinedVectorColumnIndexer::Ptr get_quant_combined_vector_indexer(\n      const std::string &field_name) const override;\n\n  VectorColumnIndexer::Ptr get_memory_vector_indexer(\n      const std::string &field_name);\n\n  VectorColumnIndexer::Ptr get_memory_quant_vector_indexer(\n      const std::string &field_name);\n\n  std::vector<VectorColumnIndexer::Ptr> get_vector_indexer(\n      const std::string &field_name) const override;\n\n  virtual std::vector<VectorColumnIndexer::Ptr> get_quant_vector_indexer(\n      const std::string &field_name) const override;\n\n  InvertedColumnIndexer::Ptr get_scalar_indexer(\n      const std::string &field_name) const override;\n\n  const IndexFilter::Ptr get_filter() override;\n\n  Status create_all_vector_index(\n      int concurrency, SegmentMeta::Ptr *new_segment_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *quant_vector_indexers) override;\n\n  Status create_vector_index(\n      const std::string &column, const IndexParams::Ptr &index_params,\n      int concurrency, SegmentMeta::Ptr *new_segment_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *quant_vector_indexers) override;\n\n  Status drop_vector_index(\n      const std::string &column, SegmentMeta::Ptr *new_segment_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers) override;\n\n  Status reload_vector_index(\n      const CollectionSchema &schema, const SegmentMeta::Ptr &new_segment_meta,\n      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          &vector_indexers,\n      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          &quant_vector_indexers) override;\n\n  bool vector_index_ready(const std::string &column,\n                          const IndexParams::Ptr &index_params) const override;\n\n  bool all_vector_index_ready() const override;\n\n  Status create_scalar_index(const std::vector<std::string> &columns,\n                             const IndexParams::Ptr &index_params,\n                             SegmentMeta::Ptr *new_segment_meta,\n                             InvertedIndexer::Ptr *new_scalar_indexer) override;\n\n  Status drop_scalar_index(const std::vector<std::string> &columns,\n                           SegmentMeta::Ptr *new_segment_meta,\n                           InvertedIndexer::Ptr *new_scalar_indexer) override;\n\n  Status reload_scalar_index(\n      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,\n      const InvertedIndexer::Ptr &scalar_indexer) override;\n\n  Status dump() override;\n\n  Status flush() override;\n\n  Status destroy() override;\n\n  TablePtr fetch(const std::vector<std::string> &columns,\n                 const std::vector<int> &indices) const override;\n\n  ExecBatchPtr fetch(const std::vector<std::string> &columns,\n                     int indice) const override;\n\n  RecordBatchReaderPtr scan(\n      const std::vector<std::string> &columns) const override;\n\n  Status add_column(FieldSchema::Ptr column_schema,\n                    const std::string &expression,\n                    const AddColumnOptions &options) override;\n\n  Status alter_column(const std::string &column_name,\n                      const FieldSchema::Ptr &new_column_schema,\n                      const AlterColumnOptions &options) override;\n\n  Status drop_column(const std::string &column_name) override;\n\n public:\n  Status Open(const SegmentOptions &options);\n  Status Create(const SegmentOptions &options, uint64_t min_doc_id);\n\n private:\n  Status close();\n  Status cleanup();\n  bool ready_for_dump_block();\n\n  // Helper functions for Open()\n  Status load_persist_scalar_blocks();\n  Status load_scalar_index_blocks(bool create = false);\n  Status load_vector_index_blocks();\n  Status init_memory_components();\n  Status finish_memory_components();\n\n  void fresh_persist_block_offset();\n  void calculate_block_offsets();\n  int find_persist_block_id(BlockType type, int segment_doc_id,\n                            const std::string &col_name = \"\",\n                            int *out_offset_idx = nullptr) const;\n  const std::vector<int> &get_persist_block_offsets(\n      BlockType type, const std::string &col_name = \"\") const;\n  const std::vector<BlockMeta> &get_persist_block_metas(\n      BlockType type, const std::string &col_name = \"\") const;\n\n  VectorColumnIndexer::Ptr create_vector_indexer(const std::string &field_name,\n                                                 const FieldSchema &field,\n                                                 BlockID block_id,\n                                                 bool is_quantized = false);\n\n  Result<VectorColumnIndexer::Ptr> merge_vector_indexer(\n      const std::string &index_file_path, const std::string &column,\n      const FieldSchema &field, int concurrency);\n\n  // Helper functions for Insert/Update/Upsert/Delete\n  template <typename ValueType>\n  Status InsertScalar(InvertedColumnIndexer::Ptr &indexer, const Doc &doc,\n                      const FieldSchema::Ptr &field);\n  template <typename ValueType>\n  Status InsertVector(VectorColumnIndexer::Ptr &indexer, const Doc &doc,\n                      const FieldSchema::Ptr &field);\n  Status ConvertVectorDataBufferToDocField(\n      const FieldSchema::Ptr &field,\n      const vector_column_params::VectorDataBuffer &buf, Doc *doc);\n\n  Status insert_scalar_indexer(Doc &doc);\n  Status insert_vector_indexer(Doc &doc);\n  Status internal_insert(Doc &doc);\n  Status internal_update(Doc &doc);\n  Status internal_upsert(Doc &doc);\n  Status internal_delete(const Doc &doc);\n\n  Status recover();\n  Status open_wal_file();\n  Status append_wal(const Doc &doc);\n  Status update_version(uint32_t delete_snapshot_path_suffix);\n\n  Result<uint64_t> get_global_doc_id(uint32_t local_id) const;\n\n  BlockID allocate_block_id();\n\n  bool validate(const std::vector<std::string> &columns) const;\n\n  Status reopen_invert_indexer(bool read_only = false);\n\n  Status insert_array_to_invert_indexer(\n      const FieldSchema::Ptr &schema,\n      const std::shared_ptr<arrow::ChunkedArray> &data,\n      InvertedColumnIndexer::Ptr *column_indexer);\n\n  TablePtr fetch_normal(const std::vector<std::string> &columns,\n                        const std::shared_ptr<arrow::Schema> &result_schema,\n                        const std::vector<int> &indices) const;\n\n  // For performance tuning\n  TablePtr fetch_perf(const std::vector<std::string> &columns,\n                      const std::shared_ptr<arrow::Schema> &result_schema,\n                      const std::vector<int> &indices) const;\n\n  void fresh_persist_chunked_array();\n\n private:\n  // scalar forward (uses segment-local doc ID)\n  MemForwardStore::Ptr memory_store_;\n  std::vector<BaseForwardStore::Ptr> persist_stores_;\n\n  // scalar index (uses segment-local doc ID)\n  InvertedIndexer::Ptr invert_indexers_;\n\n  // vector index (uses block-local doc ID, each indexer starts from 0)\n  std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n      memory_vector_indexers_;\n\n  std::unordered_map<std::string, BlockID> memory_vector_block_ids_;\n\n  std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n      quant_memory_vector_indexers_;\n\n  std::unordered_map<std::string, BlockID> quant_memory_vector_block_ids_;\n\n  std::unordered_map<std::string, std::vector<VectorColumnIndexer::Ptr>>\n      vector_indexers_;\n\n  std::unordered_map<std::string, std::vector<VectorColumnIndexer::Ptr>>\n      quant_vector_indexers_;\n\n  // index filter\n  IndexFilter::Ptr filter_;\n\n  std::string path_;\n  std::string seg_path_;\n  CollectionSchema::Ptr collection_schema_;\n  SegmentMeta::Ptr segment_meta_;\n  VersionManager::Ptr version_manager_;\n  SegmentOptions options_;\n\n  IDMap::Ptr id_map_;\n  DeleteStore::Ptr delete_store_;\n\n  // Maps segment-local doc ID (array index) to global doc ID (stored value)\n  std::vector<uint64_t> doc_ids_;\n\n  std::array<std::variant<std::vector<int>,\n                          std::unordered_map<std::string, std::vector<int>>>,\n             static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE) + 1>\n      persist_block_offsets_;\n  std::array<\n      std::variant<std::vector<BlockMeta>,\n                   std::unordered_map<std::string, std::vector<BlockMeta>>>,\n      static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE) + 1>\n      persist_block_metas_;\n\n  std::atomic<uint64_t> doc_id_allocator_{0};\n  std::atomic<BlockID> block_id_allocator_{0};\n\n  // wal\n  WalFilePtr wal_file_{nullptr};\n\n  bool sealed_{false};\n\n  mutable std::mutex seg_mtx_;\n\n  // segment column lock\n  mutable std::shared_mutex seg_col_mtx_;\n\n  bool need_destroyed_{false};\n\n  // For performance tuning\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> persist_chunk_arrays_;\n  std::vector<uint64_t> chunk_offsets_;\n  std::unordered_map<std::string, int> col_idx_map_;\n  bool use_fetch_perf_{false};\n\n  // Inner classes\n  class CombinedRecordBatchReader;\n};\n\nclass SegmentImpl::CombinedRecordBatchReader : public arrow::RecordBatchReader {\n public:\n  CombinedRecordBatchReader(\n      std::shared_ptr<const SegmentImpl> segment,\n      std::vector<std::shared_ptr<arrow::RecordBatchReader>> readers,\n      const std::vector<std::string> &columns);\n\n  ~CombinedRecordBatchReader();\n\n  std::shared_ptr<arrow::Schema> schema() const override;\n\n  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *batch) override;\n\n private:\n  std::shared_ptr<const SegmentImpl> segment_;\n  std::vector<std::shared_ptr<arrow::RecordBatchReader>> readers_;\n  std::vector<uint64_t> offsets_;\n  std::shared_ptr<arrow::Schema> projected_schema_;\n  bool need_local_doc_id_ = false;\n  size_t current_reader_index_;\n  size_t local_doc_id_;\n  int local_doc_id_col_index_ = -1;\n};\n\n////////////////////////////////////////////////////////////////////////////////////\n// SegmentImpl implementation\n////////////////////////////////////////////////////////////////////////////////////\n\nbool SegmentImpl::SegmentIndexFilter::is_filtered(uint64_t id) const {\n  auto impl = impl_.lock();\n  if (!impl) return false;\n  auto result = impl->get_global_doc_id(id);\n  if (!result.has_value()) {\n    return false;\n  }\n  uint64_t doc_id = result.value();\n  if (delete_store_ && delete_store_->is_deleted(doc_id)) {\n    return true;\n  }\n  return false;\n}\n\nStatus SegmentImpl::Open(const SegmentOptions &options) {\n  options_ = options;\n  options_.enable_mmap_ = version_manager_->get_current_version().enable_mmap();\n\n  filter_ =\n      std::make_shared<SegmentIndexFilter>(delete_store_, shared_from_this());\n\n  // load persist forward blocks\n  auto s = load_persist_scalar_blocks();\n  CHECK_RETURN_STATUS(s);\n\n  // load scalar indexes\n  s = load_scalar_index_blocks();\n  CHECK_RETURN_STATUS(s);\n\n  // load vector indexes\n  s = load_vector_index_blocks();\n  CHECK_RETURN_STATUS(s);\n\n  auto writing_block = segment_meta_->writing_forward_block();\n  if (!writing_block.has_value() && !options_.read_only_) {\n    return Status::InternalError(\n        \"No writing block found when in writing mode.\");\n  }\n\n  if (writing_block.has_value()) {\n    // init doc_id_allocator and block_id_allocator\n    doc_id_allocator_ = writing_block.value().min_doc_id();\n    BlockID max_block_id{writing_block.value().id()};\n    for (auto &block : segment_meta_->persisted_blocks()) {\n      max_block_id = std::max(max_block_id, block.id());\n    }\n    block_id_allocator_ = max_block_id + 1;\n\n    // recover writing block\n    s = recover();\n    CHECK_RETURN_STATUS(s);\n  } else {\n    // Update block_id_allocator_\n    BlockID max_block_id{0};\n    auto &persist_blocks = segment_meta_->persisted_blocks();\n    for (const auto &block : persist_blocks) {\n      max_block_id = std::max(max_block_id, block.id());\n    }\n    block_id_allocator_.store(max_block_id + 1);\n  }\n\n  fresh_persist_block_offset();\n\n  fresh_persist_chunked_array();\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::Create(const SegmentOptions &options, uint64_t min_doc_id) {\n  options_ = options;\n  filter_ =\n      std::make_shared<SegmentIndexFilter>(delete_store_, shared_from_this());\n\n  // init memory forward block\n  auto block_id = allocate_block_id();\n  std::vector<std::string> columns{GLOBAL_DOC_ID, USER_ID};\n  std::vector<std::string> schema_forward_fields =\n      collection_schema_->forward_field_names();\n  columns.insert(columns.end(), schema_forward_fields.begin(),\n                 schema_forward_fields.end());\n\n  segment_meta_->set_writing_forward_block(\n      {block_id, BlockType::SCALAR, min_doc_id, min_doc_id, 0, columns});\n  auto vector_fields = collection_schema_->vector_fields();\n  for (auto &field : vector_fields) {\n    if (field->index_params()->type() == IndexType::FLAT) {\n      segment_meta_->add_indexed_vector_field(field->name());\n    }\n  }\n  auto s = load_scalar_index_blocks(true);\n  CHECK_RETURN_STATUS(s);\n\n  doc_id_allocator_.store(min_doc_id);\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::close() {\n  flush();\n  if (invert_indexers_) {\n    invert_indexers_.reset();\n  }\n  for (const auto &[name, indexers] : vector_indexers_) {\n    for (auto indexer : indexers) {\n      indexer->Close();\n    }\n  }\n  vector_indexers_.clear();\n  for (auto [name, indexer] : memory_vector_indexers_) {\n    indexer->Close();\n  }\n  memory_vector_indexers_.clear();\n\n  return Status::OK();\n}\n\nSegmentID SegmentImpl::id() const {\n  return segment_meta_->id();\n}\n\nSegmentMeta::Ptr SegmentImpl::meta() const {\n  return segment_meta_;\n}\n\nuint64_t SegmentImpl::doc_count(const IndexFilter::Ptr filter) {\n  uint64_t doc_count = doc_ids_.size();\n  if (filter) {\n    for (const auto &doc_id : doc_ids_) {\n      if (filter->is_filtered(doc_id)) {\n        doc_count--;\n      }\n    }\n  }\n\n  return doc_count;\n}\n\ntemplate <typename T>\nstruct is_vector : std::false_type {};\n\ntemplate <typename T, typename A>\nstruct is_vector<std::vector<T, A>> : std::true_type {};\n\ntemplate <typename ValueType>\nStatus SegmentImpl::InsertScalar(InvertedColumnIndexer::Ptr &indexer,\n                                 const Doc &doc,\n                                 const FieldSchema::Ptr &field) {\n  auto value = doc.get<ValueType>(field->name());\n  auto segment_doc_id = doc_ids_.size();\n  if (value.has_value()) {\n    if constexpr (std::is_same_v<ValueType, std::vector<bool>>) {\n      return indexer->insert(segment_doc_id, value.value());\n    } else if constexpr (std::is_same_v<ValueType, std::vector<std::string>>) {\n      return indexer->insert(segment_doc_id, value.value());\n    } else if constexpr (is_vector<ValueType>::value) {\n      const auto &vec = value.value();\n      std::string value_str(\n          reinterpret_cast<const char *>(vec.data()),\n          vec.size() * sizeof(typename ValueType::value_type));\n      return indexer->insert(segment_doc_id, value_str);\n    } else if constexpr (std::is_same_v<ValueType, std::string>) {\n      const ValueType &val = value.value();\n      return indexer->insert(segment_doc_id, val);\n    } else if constexpr (std::is_same_v<ValueType, bool>) {\n      const ValueType &val = value.value();\n      return indexer->insert(segment_doc_id, val);\n    } else {\n      const ValueType &val = value.value();\n      std::string value_str(reinterpret_cast<const char *>(&val),\n                            sizeof(ValueType));\n      return indexer->insert(segment_doc_id, value_str);\n    }\n  } else {\n    return indexer->insert_null(segment_doc_id);\n  }\n  return Status::OK();\n}\n\ntemplate <typename ValueType>\nStatus SegmentImpl::InsertVector(VectorColumnIndexer::Ptr &indexer,\n                                 const Doc &doc,\n                                 const FieldSchema::Ptr &field) {\n  auto value = doc.get<ValueType>(field->name());\n  if (value.has_value()) {\n    vector_column_params::VectorData vector_data;\n    if constexpr (std::is_same_v<ValueType,\n                                 std::pair<std::vector<uint32_t>,\n                                           std::vector<float16_t>>>) {\n      const std::vector<uint32_t> &sparse_indices = value.value().first;\n      const std::vector<float16_t> &sparse_value = value.value().second;\n      vector_data.vector = vector_column_params::SparseVector{\n          (uint32_t)sparse_indices.size(), (void *)sparse_indices.data(),\n          (void *)sparse_value.data()};\n    } else if constexpr (std::is_same_v<ValueType,\n                                        std::pair<std::vector<uint32_t>,\n                                                  std::vector<float>>>) {\n      const std::vector<uint32_t> &sparse_indices = value.value().first;\n      const std::vector<float> &sparse_value = value.value().second;\n      vector_data.vector = vector_column_params::SparseVector{\n          (uint32_t)sparse_indices.size(), (void *)sparse_indices.data(),\n          (void *)sparse_value.data()};\n    } else {\n      vector_data.vector =\n          vector_column_params::DenseVector{.data = value.value().data()};\n    }\n\n    auto &mem_block_meta = segment_meta_->writing_forward_block().value();\n    auto &block_doc_id = mem_block_meta.doc_count_;\n\n    return indexer->Insert(vector_data, block_doc_id);\n  } else {\n    LOG_WARN(\"Field %s not found or is null for doc: %s\", field->name().c_str(),\n             doc.to_detail_string().c_str());\n  }\n  return Status::OK();\n}\n\nStatus SegmentImpl::insert_scalar_indexer(Doc &doc) {\n  for (const auto &field : collection_schema_->forward_fields()) {\n    auto index_type = field->index_type();\n    if (index_type != IndexType::INVERT) {\n      continue;\n    }\n    auto indexer = get_scalar_indexer(field->name());\n    if (!indexer) {\n      return Status::InternalError(\"Field \", field->name(), \" indexer is null\");\n    }\n    Status status;\n    auto data_type = field->data_type();\n    switch (field->data_type()) {\n      case DataType::BINARY: {\n        status = InsertScalar<std::string>(indexer, doc, field);\n        break;\n      }\n      case DataType::STRING: {\n        status = InsertScalar<std::string>(indexer, doc, field);\n        break;\n      }\n      case DataType::BOOL:\n        status = InsertScalar<bool>(indexer, doc, field);\n        break;\n      case DataType::INT32:\n        status = InsertScalar<int32_t>(indexer, doc, field);\n        break;\n      case DataType::INT64:\n        status = InsertScalar<int64_t>(indexer, doc, field);\n        break;\n      case DataType::UINT32:\n        status = InsertScalar<uint32_t>(indexer, doc, field);\n        break;\n      case DataType::UINT64:\n        status = InsertScalar<uint64_t>(indexer, doc, field);\n        break;\n      case DataType::FLOAT:\n        status = InsertScalar<float>(indexer, doc, field);\n        break;\n      case DataType::DOUBLE:\n        status = InsertScalar<double>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_BINARY:\n        status = InsertScalar<std::vector<std::string>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_STRING:\n        status = InsertScalar<std::vector<std::string>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_BOOL:\n        status = InsertScalar<std::vector<bool>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_INT32:\n        status = InsertScalar<std::vector<int32_t>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_INT64:\n        status = InsertScalar<std::vector<int64_t>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_UINT32:\n        status = InsertScalar<std::vector<uint32_t>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_UINT64:\n        status = InsertScalar<std::vector<uint64_t>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_FLOAT:\n        status = InsertScalar<std::vector<float>>(indexer, doc, field);\n        break;\n      case DataType::ARRAY_DOUBLE:\n        status = InsertScalar<std::vector<double>>(indexer, doc, field);\n        break;\n      default:\n        status = Status::InternalError(\"unsupport data type \",\n                                       DataTypeCodeBook::AsString(data_type));\n    }\n    if (!status.ok()) {\n      LOG_ERROR(\"insert scalar failed[%s]\", status.message().c_str());\n      return status;\n    }\n  }\n  return Status::OK();\n}\n\nStatus SegmentImpl::insert_vector_indexer(Doc &doc) {\n  for (const auto &field : collection_schema_->vector_fields()) {\n    std::vector<VectorColumnIndexer::Ptr> indexers;\n    auto m_indexer = get_memory_vector_indexer(field->name());\n    if (!m_indexer) {\n      LOG_ERROR(\"vector indexer not found for field %s\", field->name().c_str());\n      return Status::InternalError(\"vector indexer not found for field: \",\n                                   field->name());\n    }\n    indexers.push_back(m_indexer);\n    auto vector_index_params =\n        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {\n      m_indexer = get_memory_quant_vector_indexer(field->name());\n      if (!m_indexer) {\n        LOG_ERROR(\"quant vector indexer not found for field %s\",\n                  field->name().c_str());\n        return Status::InternalError(\n            \"quant vector indexer not found for field: \", field->name());\n      }\n      indexers.push_back(m_indexer);\n    }\n\n    for (auto indexer : indexers) {\n      Status status;\n      auto data_type = field->data_type();\n      switch (data_type) {\n        case DataType::VECTOR_BINARY32:\n          status = InsertVector<std::vector<uint32_t>>(indexer, doc, field);\n          break;\n        case DataType::VECTOR_BINARY64:\n          status = InsertVector<std::vector<uint64_t>>(indexer, doc, field);\n          break;\n        case DataType::VECTOR_FP16:\n          status = InsertVector<std::vector<float16_t>>(indexer, doc, field);\n          break;\n        case DataType::VECTOR_FP32:\n          status = InsertVector<std::vector<float>>(indexer, doc, field);\n          break;\n        case DataType::VECTOR_FP64:\n          status = InsertVector<std::vector<double>>(indexer, doc, field);\n          break;\n        // case DataType::VECTOR_INT4:\n        //   status = InsertVector<std::vector<int8_t>>(indexer, doc, field);\n        //   break;\n        case DataType::VECTOR_INT8:\n          status = InsertVector<std::vector<int8_t>>(indexer, doc, field);\n          break;\n        case DataType::VECTOR_INT16:\n          status = InsertVector<std::vector<int16_t>>(indexer, doc, field);\n          break;\n        case DataType::SPARSE_VECTOR_FP16:\n          status = InsertVector<\n              std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(\n              indexer, doc, field);\n          break;\n        case DataType::SPARSE_VECTOR_FP32:\n          status = InsertVector<\n              std::pair<std::vector<uint32_t>, std::vector<float>>>(indexer,\n                                                                    doc, field);\n          break;\n        default:\n          status = Status::InvalidArgument(\n              \"unsupport data type\", DataTypeCodeBook::AsString(data_type));\n      }\n      if (!status.ok()) {\n        LOG_ERROR(\"insert vector failed[%s]\", status.message().c_str());\n        return status;\n      }\n    }\n  }\n  return Status::OK();\n}\n\nStatus SegmentImpl::internal_insert(Doc &doc) {\n  uint64_t g_doc_id = doc_id_allocator_.fetch_add(1);\n  doc.set_doc_id(g_doc_id);\n\n  if (ready_for_dump_block()) {\n    auto s = flush();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  // init writing memory components\n  if (!memory_store_) {\n    auto s = init_memory_components();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  // write idmap\n  auto s = id_map_->upsert(doc.pk(), g_doc_id);\n  CHECK_RETURN_STATUS(s);\n\n  // write forward\n  s = memory_store_->insert(doc);\n  CHECK_RETURN_STATUS(s);\n\n  // write scalar index\n  s = insert_scalar_indexer(doc);\n  if (!s.ok() && s.code() != StatusCode::ALREADY_EXISTS) {\n    return s;\n  }\n  // write vector index\n  s = insert_vector_indexer(doc);\n  if (!s.ok() && s != Status::AlreadyExists()) {\n    return s;\n  }\n\n  auto &mem_block = segment_meta_->writing_forward_block().value();\n  mem_block.max_doc_id_ = g_doc_id;\n  mem_block.doc_count_ = mem_block.doc_count_ + 1;\n\n  doc_ids_.push_back(g_doc_id);\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::internal_update(Doc &doc) {\n  delete_store_->mark_deleted(doc.doc_id());\n  return internal_insert(doc);\n}\n\nStatus SegmentImpl::internal_upsert(Doc &doc) {\n  uint64_t g_doc_id;\n  bool exist = id_map_->has(doc.pk(), &g_doc_id);\n  if (exist) {\n    delete_store_->mark_deleted(g_doc_id);\n  }\n  return internal_insert(doc);\n}\n\nStatus SegmentImpl::internal_delete(const Doc &doc) {\n  delete_store_->mark_deleted(doc.doc_id());\n  id_map_->remove(doc.pk());\n  return Status::OK();\n}\n\nStatus SegmentImpl::Insert(Doc &doc) {\n  std::lock_guard lock(seg_mtx_);\n\n  if (id_map_ && id_map_->has(doc.pk())) {\n    return Status::AlreadyExists(\"insert failed: doc_id[\", doc.pk(),\n                                 \"] already exists in collection\");\n  }\n\n  doc.set_operator(Operator::INSERT);\n\n  // append wal\n  auto s = append_wal(doc);\n  CHECK_RETURN_STATUS(s);\n\n  return internal_insert(doc);\n}\n\nStatus SegmentImpl::Update(Doc &doc) {\n  std::lock_guard lock(seg_mtx_);\n  uint64_t g_doc_id;\n  if (!id_map_->has(doc.pk(), &g_doc_id)) {\n    return Status::NotFound(\"update failed: doc_id[\", doc.pk(),\n                            \"] not found in collection\");\n  }\n\n  doc.set_doc_id(g_doc_id);\n  doc.set_operator(Operator::UPDATE);\n\n  // append wal\n  auto s = append_wal(doc);\n  CHECK_RETURN_STATUS(s);\n\n  return internal_update(doc);\n}\n\nStatus SegmentImpl::Upsert(Doc &doc) {\n  std::lock_guard lock(seg_mtx_);\n\n  doc.set_operator(Operator::UPSERT);\n\n  // append wal\n  auto s = append_wal(doc);\n  CHECK_RETURN_STATUS(s);\n\n  return internal_upsert(doc);\n}\n\nStatus SegmentImpl::Delete(const std::string &pk) {\n  std::lock_guard lock(seg_mtx_);\n\n  uint64_t g_doc_id;\n  if (!id_map_->has(pk, &g_doc_id)) {\n    return Status::NotFound(\"primary key: \", pk, \" not found\");\n  }\n  if (delete_store_->is_deleted(g_doc_id)) {\n    return Status::NotFound(\"primary key: \", pk, \" g_doc_id: \", g_doc_id,\n                            \" already deleted\");\n  }\n\n  Doc mutable_doc;\n  mutable_doc.set_pk(pk);\n  mutable_doc.set_doc_id(g_doc_id);\n  mutable_doc.set_operator(Operator::DELETE);\n\n  // append wal\n  auto s = append_wal(mutable_doc);\n  CHECK_RETURN_STATUS(s);\n\n  return internal_delete(mutable_doc);\n}\n\n// Note: Here we have no way to determine if g_doc_id is valid\nStatus SegmentImpl::Delete(uint64_t g_doc_id) {\n  std::lock_guard lock(seg_mtx_);\n  if (delete_store_->is_deleted(g_doc_id)) {\n    return Status::NotFound(\"g_doc_id:\", g_doc_id, \" already deleted\");\n  }\n\n  Doc mutable_doc;\n  mutable_doc.set_doc_id(g_doc_id);\n  mutable_doc.set_operator(Operator::DELETE);\n\n  // append wal\n  auto s = append_wal(mutable_doc);\n  CHECK_RETURN_STATUS(s);\n  return internal_delete(mutable_doc);\n}\n\ntemplate <typename T>\nStatus DenseVectorDataConverter(\n    const FieldSchema::Ptr &field,\n    const vector_column_params::DenseVectorBuffer &buffer, Doc *doc) {\n  const T *data_ptr = reinterpret_cast<const T *>(buffer.data.data());\n  size_t data_size = buffer.data.size() / sizeof(T);\n  std::vector<T> vector_data(data_ptr, data_ptr + data_size);\n  doc->set(field->name(), vector_data);\n  return Status::OK();\n}\n\ntemplate <typename IndexType, typename ValueType>\nStatus SparseVectorDataConverter(\n    const FieldSchema::Ptr &field,\n    const vector_column_params::SparseVectorBuffer &buffer, Doc *doc) {\n  const IndexType *indices_ptr =\n      reinterpret_cast<const IndexType *>(buffer.indices.data());\n  size_t indices_size = buffer.indices.size() / sizeof(IndexType);\n  std::vector<IndexType> indices_vector(indices_ptr,\n                                        indices_ptr + indices_size);\n\n  const ValueType *values_ptr =\n      reinterpret_cast<const ValueType *>(buffer.values.data());\n  size_t values_size = buffer.values.size() / sizeof(ValueType);\n  std::vector<ValueType> values_vector(values_ptr, values_ptr + values_size);\n\n  std::pair<std::vector<IndexType>, std::vector<ValueType>> sparse_vector_pair(\n      std::move(indices_vector), std::move(values_vector));\n  doc->set(field->name(), sparse_vector_pair);\n  return Status::OK();\n}\n\n\nStatus SegmentImpl::ConvertVectorDataBufferToDocField(\n    const FieldSchema::Ptr &field,\n    const vector_column_params::VectorDataBuffer &buf, Doc *doc) {\n  Status status;\n  if (std::holds_alternative<vector_column_params::DenseVectorBuffer>(\n          buf.vector_buffer)) {\n    const auto &dense_buffer =\n        std::get<vector_column_params::DenseVectorBuffer>(buf.vector_buffer);\n    switch (field->data_type()) {\n      case DataType::VECTOR_BINARY32: {\n        status = DenseVectorDataConverter<uint32_t>(field, dense_buffer, doc);\n        break;\n      }\n      case DataType::VECTOR_BINARY64: {\n        status = DenseVectorDataConverter<uint64_t>(field, dense_buffer, doc);\n        break;\n      }\n      case DataType::VECTOR_FP16: {\n        status = DenseVectorDataConverter<float16_t>(field, dense_buffer, doc);\n        break;\n      }\n      case DataType::VECTOR_FP32: {\n        status = DenseVectorDataConverter<float>(field, dense_buffer, doc);\n        break;\n      }\n      case DataType::VECTOR_FP64: {\n        status = DenseVectorDataConverter<double>(field, dense_buffer, doc);\n        break;\n      }\n      // case DataType::VECTOR_INT4: {\n      //   status = DenseVectorDataConverter<int8_t>(field, dense_buffer, doc);\n      //   break;\n      // }\n      case DataType::VECTOR_INT8: {\n        status = DenseVectorDataConverter<int8_t>(field, dense_buffer, doc);\n        break;\n      }\n      case DataType::VECTOR_INT16: {\n        status = DenseVectorDataConverter<int16_t>(field, dense_buffer, doc);\n        break;\n      }\n      default:\n        return Status::InvalidArgument(\n            \"Unsupported dense vector element type: \", field->data_type());\n    }\n  } else if (std::holds_alternative<vector_column_params::SparseVectorBuffer>(\n                 buf.vector_buffer)) {\n    const auto &sparse_buffer =\n        std::get<vector_column_params::SparseVectorBuffer>(buf.vector_buffer);\n    switch (field->data_type()) {\n      case DataType::SPARSE_VECTOR_FP16: {\n        status = SparseVectorDataConverter<uint32_t, float16_t>(\n            field, sparse_buffer, doc);\n        break;\n      }\n      case DataType::SPARSE_VECTOR_FP32: {\n        status = SparseVectorDataConverter<uint32_t, float>(field,\n                                                            sparse_buffer, doc);\n        break;\n      }\n      default:\n        return Status::InvalidArgument(\n            \"Unsupported sparse vector element type: \", field->data_type());\n    }\n  } else {\n    return Status::InvalidArgument(\"Unsupported vector buffer type\");\n  }\n\n  return status;\n}\n\n\nDoc::Ptr SegmentImpl::Fetch(uint64_t g_doc_id) {\n  std::lock_guard lock(seg_mtx_);\n\n  if (g_doc_id > segment_meta_->max_doc_id()) {\n    LOG_ERROR(\"g_doc_id[%zu] not exist in segment[%d] \", (size_t)g_doc_id,\n              id());\n    return nullptr;\n  }\n\n  int segment_doc_id = 0;\n  auto it = std::lower_bound(doc_ids_.begin(), doc_ids_.end(), g_doc_id);\n  if (it != doc_ids_.end() && *it == g_doc_id) {\n    segment_doc_id = static_cast<int>(std::distance(doc_ids_.begin(), it));\n  } else {\n    LOG_ERROR(\n        \"g_doc_id[%zu] not found in doc_ids_[%zu], min_doc_id[%zu] \"\n        \"max_doc_id[%zu], meta[%s]\",\n        (size_t)g_doc_id, doc_ids_.size(), (size_t)doc_ids_.front(),\n        (size_t)doc_ids_.back(), segment_meta_->to_string_formatted().c_str());\n    return nullptr;\n  }\n\n  std::vector<std::string> forward_columns;\n  forward_columns.push_back(GLOBAL_DOC_ID);\n  forward_columns.push_back(USER_ID);\n  for (const auto &field : collection_schema_->forward_fields()) {\n    forward_columns.push_back(field->name());\n  }\n\n  // Build result schema\n  std::vector<std::shared_ptr<arrow::Field>> fields;\n  for (size_t i = 0; i < forward_columns.size(); ++i) {\n    const auto &col = forward_columns[i];\n    if (col == GLOBAL_DOC_ID) {\n      fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64()));\n    } else if (col == USER_ID) {\n      fields.push_back(arrow::field(USER_ID, arrow::utf8()));\n    } else {\n      auto *field = collection_schema_->get_field(col);\n      std::shared_ptr<arrow::Field> arrow_field;\n      auto status = ConvertFieldSchemaToArrowField(field, &arrow_field);\n      if (!status.ok()) {\n        LOG_ERROR(\"Convert field schema failed: %s\",\n                  field->to_string().c_str());\n        return nullptr;\n      }\n      fields.push_back(std::move(arrow_field));\n    }\n  }\n  auto result_schema = std::make_shared<arrow::Schema>(fields);\n\n  // fetch forward columns\n  auto exec_batch = fetch(forward_columns, segment_doc_id);\n  if (!exec_batch) {\n    LOG_ERROR(\"Fetch failed, doc_id: %zu\", (size_t)g_doc_id);\n    return nullptr;\n  }\n  if (exec_batch->length != 1) {\n    LOG_ERROR(\"Fetch failed, doc_id: %zu, num_rows: %zu != 1\", (size_t)g_doc_id,\n              (size_t)exec_batch->length);\n    return nullptr;\n  }\n\n  if (exec_batch->num_values() != (int)forward_columns.size()) {\n    LOG_ERROR(\"table column size error, expect %zu, actual %d\",\n              forward_columns.size(), exec_batch->num_values());\n    return nullptr;\n  }\n\n  auto doc = std::make_shared<Doc>();\n\n  // column 0 is the global doc_id\n  if (auto doc_id_scalar = std::static_pointer_cast<arrow::Int64Scalar>(\n          (*exec_batch)[0].scalar())) {\n    doc->set_doc_id(doc_id_scalar->value);\n  } else {\n    LOG_ERROR(\"Global doc id scalar is not of int64 type\");\n    return nullptr;\n  }\n\n  // column 1 is the uid(pk)\n  if (auto str_scalar = std::dynamic_pointer_cast<arrow::StringScalar>(\n          (*exec_batch)[1].scalar())) {\n    doc->set_pk(std::string(str_scalar->view()));\n  } else {\n    LOG_ERROR(\"Primary key scalar is not of string type\");\n    return nullptr;\n  }\n\n  // other forward columns\n  for (int col_idx = 2; col_idx < exec_batch->num_values(); ++col_idx) {\n    auto column_name = forward_columns[col_idx];\n    auto column = result_schema->GetFieldByName(column_name);\n    auto &column_scalar = (*exec_batch)[col_idx].scalar();\n    if (column_scalar == nullptr || column_scalar->is_valid == false) {\n      continue;\n    }\n    switch (column->type()->id()) {\n      case arrow::Type::STRING: {\n        auto str_scalar =\n            std::dynamic_pointer_cast<arrow::StringScalar>(column_scalar);\n        doc->set(column_name, std::string(str_scalar->view()));\n        break;\n      }\n      case arrow::Type::INT32: {\n        auto int32_scalar =\n            std::dynamic_pointer_cast<arrow::Int32Scalar>(column_scalar);\n        doc->set(column_name, int32_scalar->value);\n        break;\n      }\n      case arrow::Type::INT64: {\n        auto int64_scalar =\n            std::dynamic_pointer_cast<arrow::Int64Scalar>(column_scalar);\n        doc->set(column_name, int64_scalar->value);\n        break;\n      }\n      case arrow::Type::UINT32: {\n        auto uint32_scalar =\n            std::dynamic_pointer_cast<arrow::UInt32Scalar>(column_scalar);\n        doc->set(column_name, uint32_scalar->value);\n        break;\n      }\n      case arrow::Type::UINT64: {\n        auto uint64_scalar =\n            std::dynamic_pointer_cast<arrow::UInt64Scalar>(column_scalar);\n        doc->set(column_name, uint64_scalar->value);\n        break;\n      }\n      case arrow::Type::DOUBLE: {\n        auto double_scalar =\n            std::dynamic_pointer_cast<arrow::DoubleScalar>(column_scalar);\n        doc->set(column_name, double_scalar->value);\n        break;\n      }\n      case arrow::Type::FLOAT: {\n        auto float_scalar =\n            std::dynamic_pointer_cast<arrow::FloatScalar>(column_scalar);\n        doc->set(column_name, float_scalar->value);\n        break;\n      }\n      case arrow::Type::BOOL: {\n        auto bool_scalar =\n            std::dynamic_pointer_cast<arrow::BooleanScalar>(column_scalar);\n        doc->set(column_name, bool_scalar->value);\n        break;\n      }\n      case arrow::Type::BINARY: {\n        auto binary_scalar =\n            std::dynamic_pointer_cast<arrow::BinaryScalar>(column_scalar);\n        doc->set(column_name, std::string(binary_scalar->view()));\n        break;\n      }\n      case arrow::Type::LIST: {\n        auto list_scalar =\n            std::dynamic_pointer_cast<arrow::ListScalar>(column_scalar);\n        if (list_scalar && list_scalar->value) {\n          auto list_type =\n              std::dynamic_pointer_cast<arrow::ListType>(column->type());\n          if (list_type) {\n            auto value_type = list_type->value_type();\n            switch (value_type->id()) {\n              case arrow::Type::BOOL: {\n                std::vector<bool> values;\n                auto array = std::dynamic_pointer_cast<arrow::BooleanArray>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->Value(i));\n                    } else {\n                      LOG_ERROR(\n                          \"Invalid arrow::boolean array value at index %zu\",\n                          (size_t)i);\n                      continue;\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::INT32: {\n                std::vector<int32_t> values;\n                auto array = std::dynamic_pointer_cast<arrow::Int32Array>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->Value(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::INT64: {\n                std::vector<int64_t> values;\n                auto array = std::dynamic_pointer_cast<arrow::Int64Array>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->Value(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::UINT32: {\n                std::vector<uint32_t> values;\n                auto array = std::dynamic_pointer_cast<arrow::UInt32Array>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->Value(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::UINT64: {\n                std::vector<uint64_t> values;\n                auto array = std::dynamic_pointer_cast<arrow::UInt64Array>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->Value(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::FLOAT: {\n                std::vector<float> values;\n                auto array = std::dynamic_pointer_cast<arrow::FloatArray>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->Value(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::DOUBLE: {\n                std::vector<double> values;\n                auto array = std::dynamic_pointer_cast<arrow::DoubleArray>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->Value(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::STRING: {\n                std::vector<std::string> values;\n                auto array = std::dynamic_pointer_cast<arrow::StringArray>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->GetString(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              case arrow::Type::BINARY: {\n                std::vector<std::string> values;\n                auto array = std::dynamic_pointer_cast<arrow::BinaryArray>(\n                    list_scalar->value);\n                if (array) {\n                  values.reserve(array->length());\n                  for (int64_t i = 0; i < array->length(); ++i) {\n                    if (array->IsValid(i)) {\n                      values.push_back(array->GetString(i));\n                    }\n                  }\n                  doc->set(column_name, values);\n                }\n                break;\n              }\n              default:\n                LOG_WARN(\"Unsupported list element type: %s\",\n                         value_type->ToString().c_str());\n                break;\n            }\n          }\n        }\n        break;\n      }\n      default:\n        LOG_ERROR(\"Unsupported type: %s\", column_name.c_str());\n        break;\n    }\n  }\n\n  // fetch vector\n  for (const auto &field : collection_schema_->vector_fields()) {\n    int block_idx = find_persist_block_id(BlockType::VECTOR_INDEX,\n                                          segment_doc_id, field->name());\n    if (block_idx != -1) {\n      const auto &block_offsets =\n          get_persist_block_offsets(BlockType::VECTOR_INDEX, field->name());\n      auto block_offset = block_offsets[block_idx];\n      auto local_row = segment_doc_id - block_offset;\n\n      auto column_name = field->name();\n      auto iter = vector_indexers_.find(column_name);\n      if (iter != vector_indexers_.end()) {\n        const auto &vector_indexers = iter->second;\n        if (block_idx >= (int)vector_indexers.size()) {\n          LOG_ERROR(\"block_idx[%d] out of range[%lu]\", block_idx,\n                    vector_indexers.size());\n          continue;\n        }\n        auto vector_indexer = vector_indexers[block_idx];\n        auto fetch_result = vector_indexer->Fetch(local_row);\n        if (!fetch_result) {\n          LOG_ERROR(\n              \"vector indexer fetch failed, local_row: %d, block_idx: %d, \"\n              \"segment_doc_id: %d\",\n              local_row, block_idx, segment_doc_id);\n          return nullptr;\n        }\n        const auto &vector_buffer = fetch_result.value();\n        auto status =\n            ConvertVectorDataBufferToDocField(field, vector_buffer, doc.get());\n        if (!status.ok()) {\n          LOG_ERROR(\"convert vector data buffer to doc field failed %s\",\n                    status.message().c_str());\n        }\n      }\n\n    } else {\n      if (segment_meta_->has_writing_forward_block()) {\n        const auto &p_block_offsets =\n            get_persist_block_offsets(BlockType::VECTOR_INDEX, field->name());\n        const auto &p_block_metas =\n            get_persist_block_metas(BlockType::VECTOR_INDEX, field->name());\n        auto mem_block_offset =\n            p_block_offsets.empty()\n                ? 0\n                : p_block_offsets.back() + p_block_metas.back().doc_count_;\n        int local_row = segment_doc_id - mem_block_offset;\n        auto column_name = field->name();\n        auto iter = memory_vector_indexers_.find(column_name);\n        if (iter != memory_vector_indexers_.end()) {\n          auto vector_indexer = iter->second;\n          auto fetch_result = vector_indexer->Fetch(local_row);\n          if (!fetch_result.has_value()) {\n            LOG_ERROR(\n                \"vector indexer fetch failed, column: %s, doc_count: %lu, \"\n                \"mem_block_offset: %d, local_row: %d\",\n                field->name().c_str(), vector_indexer->doc_count(),\n                mem_block_offset, local_row);\n            continue;\n          }\n          const auto &vector_buffer = fetch_result.value();\n          auto status = ConvertVectorDataBufferToDocField(field, vector_buffer,\n                                                          doc.get());\n          if (!status.ok()) {\n            LOG_ERROR(\"convert vector data buffer to doc field failed %s\",\n                      status.message().c_str());\n          }\n        }\n      } else {\n        LOG_ERROR(\"Can't find vector block for g_doc_id: %zu\",\n                  (size_t)g_doc_id);\n      }\n    }\n  }\n\n  return doc;\n}\n\nCombinedVectorColumnIndexer::Ptr SegmentImpl::get_combined_vector_indexer(\n    const std::string &field_name) const {\n  std::vector<VectorColumnIndexer::Ptr> indexers;\n  auto iter = vector_indexers_.find(field_name);\n  if (iter != vector_indexers_.end()) {\n    indexers = iter->second;\n  }\n  auto m_iter = memory_vector_indexers_.find(field_name);\n  if (m_iter != memory_vector_indexers_.end()) {\n    indexers.push_back(m_iter->second);\n  }\n\n  auto field = collection_schema_->get_field(field_name);\n  auto vector_index_params =\n      std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n  MetricType metric_type = vector_index_params->metric_type();\n  auto blocks = get_persist_block_metas(BlockType::VECTOR_INDEX, field_name);\n\n  auto normal_indexers = indexers;\n  return std::make_shared<CombinedVectorColumnIndexer>(\n      indexers, normal_indexers, *field, *segment_meta_, std::move(blocks),\n      metric_type);\n}\n\nCombinedVectorColumnIndexer::Ptr SegmentImpl::get_quant_combined_vector_indexer(\n    const std::string &field_name) const {\n  std::vector<VectorColumnIndexer::Ptr> indexers;\n  auto iter = quant_vector_indexers_.find(field_name);\n  if (iter != quant_vector_indexers_.end()) {\n    indexers = iter->second;\n  }\n  auto m_iter = quant_memory_vector_indexers_.find(field_name);\n  if (m_iter != quant_memory_vector_indexers_.end()) {\n    indexers.push_back(m_iter->second);\n  }\n\n  std::vector<VectorColumnIndexer::Ptr> normal_indexers;\n  iter = vector_indexers_.find(field_name);\n  if (iter != vector_indexers_.end()) {\n    normal_indexers = iter->second;\n  }\n  m_iter = memory_vector_indexers_.find(field_name);\n  if (m_iter != memory_vector_indexers_.end()) {\n    normal_indexers.push_back(m_iter->second);\n  }\n\n  auto field = collection_schema_->get_field(field_name);\n  auto vector_index_params =\n      std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n  MetricType metric_type = vector_index_params->metric_type();\n  auto blocks =\n      get_persist_block_metas(BlockType::VECTOR_INDEX_QUANTIZE, field_name);\n\n  return std::make_shared<CombinedVectorColumnIndexer>(\n      indexers, normal_indexers, *field, *segment_meta_, std::move(blocks),\n      metric_type, true);\n}\n\nVectorColumnIndexer::Ptr SegmentImpl::get_memory_vector_indexer(\n    const std::string &field_name) {\n  auto iter = memory_vector_indexers_.find(field_name);\n  if (iter != memory_vector_indexers_.end()) {\n    return iter->second;\n  }\n  return nullptr;\n}\n\nVectorColumnIndexer::Ptr SegmentImpl::get_memory_quant_vector_indexer(\n    const std::string &field_name) {\n  auto iter = quant_memory_vector_indexers_.find(field_name);\n  if (iter != quant_memory_vector_indexers_.end()) {\n    return iter->second;\n  }\n  return nullptr;\n}\n\nstd::vector<VectorColumnIndexer::Ptr> SegmentImpl::get_vector_indexer(\n    const std::string &field_name) const {\n  auto iter = vector_indexers_.find(field_name);\n  if (iter != vector_indexers_.end()) {\n    return iter->second;\n  }\n  return std::vector<VectorColumnIndexer::Ptr>();\n}\n\nstd::vector<VectorColumnIndexer::Ptr> SegmentImpl::get_quant_vector_indexer(\n    const std::string &field_name) const {\n  std::vector<VectorColumnIndexer::Ptr> col_indexers;\n  auto iter = quant_vector_indexers_.find(field_name);\n  if (iter != quant_vector_indexers_.end()) {\n    return iter->second;\n  }\n  return std::vector<VectorColumnIndexer::Ptr>();\n}\n\nInvertedColumnIndexer::Ptr SegmentImpl::get_scalar_indexer(\n    const std::string &field_name) const {\n  if (invert_indexers_) {\n    return (*invert_indexers_)[field_name];\n  }\n  return nullptr;\n}\n\nconst IndexFilter::Ptr SegmentImpl::get_filter() {\n  return delete_store_->empty() ? nullptr : filter_;\n}\n\nStatus SegmentImpl::create_all_vector_index(\n    int concurrency, SegmentMeta::Ptr *segment_meta,\n    std::unordered_map<std::string, VectorColumnIndexer::Ptr> *vector_indexers,\n    std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n        *quant_vector_indexers) {\n  const auto &vector_fields = collection_schema_->vector_fields();\n\n  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);\n  new_segment_meta->remove_writing_forward_block();\n\n  std::set<std::string> vector_field_names;\n  for (const auto &field : vector_fields) {\n    auto s = create_vector_index(field->name(), field->index_params(),\n                                 concurrency, &new_segment_meta,\n                                 vector_indexers, quant_vector_indexers);\n    CHECK_RETURN_STATUS(s);\n    vector_field_names.insert(field->name());\n  }\n\n  new_segment_meta->set_indexed_vector_fields(vector_field_names);\n  *segment_meta = new_segment_meta;\n\n  return Status::OK();\n}\n\nResult<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(\n    const std::string &index_file_path, const std::string &column,\n    const FieldSchema &field, int concurrency) {\n  VectorColumnIndexer::Ptr vector_indexer =\n      std::make_shared<VectorColumnIndexer>(index_file_path, field);\n\n  vector_column_params::ReadOptions options{options_.enable_mmap_, true};\n\n  auto s = vector_indexer->Open(options);\n  CHECK_RETURN_STATUS_EXPECTED(s);\n  std::vector<VectorColumnIndexer::Ptr> to_merge_indexers =\n      vector_indexers_[column];\n  vector_column_params::MergeOptions merge_options;\n  if (concurrency == 0) {\n    merge_options.pool = GlobalResource::Instance().optimize_thread_pool();\n    merge_options.write_concurrency =\n        GlobalConfig::Instance().optimize_thread_count();\n  } else {\n    merge_options.write_concurrency = concurrency;\n  }\n  s = vector_indexer->Merge(to_merge_indexers, filter_, merge_options);\n  CHECK_RETURN_STATUS_EXPECTED(s);\n  s = vector_indexer->Flush();\n  CHECK_RETURN_STATUS_EXPECTED(s);\n\n  return vector_indexer;\n}\n\nStatus SegmentImpl::create_vector_index(\n    const std::string &column, const IndexParams::Ptr &index_params,\n    int concurrency, SegmentMeta::Ptr *segment_meta,\n    std::unordered_map<std::string, VectorColumnIndexer::Ptr> *vector_indexers,\n    std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n        *quant_vector_indexers) {\n  auto field = collection_schema_->get_vector_field(column);\n  SegmentMeta::Ptr new_segment_meta;\n  if (*segment_meta == nullptr) {\n    new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);\n    new_segment_meta->remove_writing_forward_block();\n  } else {\n    new_segment_meta = *segment_meta;\n  }\n\n  if (segment_meta_->vector_indexed(column) &&\n      *field->index_params() == *index_params) {\n    // if segment is already indexed and index params are same, skip create\n    *segment_meta = new_segment_meta;\n    return Status::OK();\n  }\n  new_segment_meta->add_indexed_vector_field(column);\n\n  auto vector_index_params =\n      std::dynamic_pointer_cast<VectorIndexParams>(index_params);\n\n  if (vector_index_params->quantize_type() == QuantizeType::UNDEFINED) {\n    auto block_id = allocate_block_id();\n\n    auto field_with_new_index_params = std::make_shared<FieldSchema>(*field);\n    field_with_new_index_params->set_index_params(index_params);\n\n    std::string index_file_path = FileHelper::MakeVectorIndexPath(\n        path_, column, segment_meta_->id(), block_id);\n    auto vector_indexer = merge_vector_indexer(\n        index_file_path, column, *field_with_new_index_params, concurrency);\n    if (!vector_indexer.has_value()) {\n      return vector_indexer.error();\n    }\n\n    vector_indexers->insert({column, vector_indexer.value()});\n\n    new_segment_meta->remove_vector_persisted_block(column);\n    BlockMeta block;\n    block.set_id(block_id);\n    block.set_type(BlockType::VECTOR_INDEX);\n    block.set_columns({column});\n    block.set_min_doc_id(doc_ids_.front());\n    block.set_max_doc_id(doc_ids_.back());\n    block.set_doc_count(doc_ids_.size());\n    new_segment_meta->add_persisted_block(block);\n\n    *segment_meta = new_segment_meta;\n\n  } else {\n    auto original_index_params =\n        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n\n    core::IndexProvider::Pointer raw_vector_provider;\n\n    if (!(vector_index_params->metric_type() ==\n              original_index_params->metric_type() &&\n          vector_indexers_[column].size() == 1)) {\n      BlockID block_id = allocate_block_id();\n\n      auto field_with_flat = std::make_shared<FieldSchema>(*field);\n      field_with_flat->set_index_params(\n          MakeDefaultVectorIndexParams(vector_index_params->metric_type()));\n\n      std::string index_file_path = FileHelper::MakeVectorIndexPath(\n          path_, column, segment_meta_->id(), block_id);\n      auto vector_indexer = merge_vector_indexer(index_file_path, column,\n                                                 *field_with_flat, concurrency);\n      if (!vector_indexer.has_value()) {\n        return vector_indexer.error();\n      }\n\n      vector_indexers->insert({column, vector_indexer.value()});\n\n      new_segment_meta->remove_vector_persisted_block(column, false);\n      BlockMeta block;\n      block.set_id(block_id);\n      block.set_type(BlockType::VECTOR_INDEX);\n      block.set_columns({column});\n      block.set_min_doc_id(meta()->min_doc_id());\n      block.set_max_doc_id(meta()->max_doc_id());\n      block.set_doc_count(meta()->doc_count());\n      new_segment_meta->add_persisted_block(block);\n      if (vector_index_params->quantize_type() == QuantizeType::RABITQ) {\n        raw_vector_provider = vector_indexer.value()->create_index_provider();\n      }\n    } else {\n      raw_vector_provider =\n          vector_indexers_[column][0]->create_index_provider();\n    }\n\n    if (vector_index_params->quantize_type() != QuantizeType::RABITQ) {\n      auto quant_block_id = allocate_block_id();\n      auto field_with_new_index_params = std::make_shared<FieldSchema>(*field);\n      field_with_new_index_params->set_index_params(index_params);\n\n      std::string index_file_path = FileHelper::MakeQuantizeVectorIndexPath(\n          path_, column, segment_meta_->id(), quant_block_id);\n      auto vector_indexer = merge_vector_indexer(\n          index_file_path, column, *field_with_new_index_params, concurrency);\n      if (!vector_indexer.has_value()) {\n        return vector_indexer.error();\n      }\n\n      quant_vector_indexers->insert({column, vector_indexer.value()});\n\n      new_segment_meta->remove_vector_persisted_block(column, true);\n      BlockMeta block;\n      block.set_id(quant_block_id);\n      block.set_type(BlockType::VECTOR_INDEX_QUANTIZE);\n      block.set_columns({column});\n      block.set_min_doc_id(meta()->min_doc_id());\n      block.set_max_doc_id(meta()->max_doc_id());\n      block.set_doc_count(meta()->doc_count());\n      new_segment_meta->add_persisted_block(block);\n    } else {\n#if !RABITQ_SUPPORTED\n      return Status::NotSupported(\n          \"RabitQ is not supported on this platform (Linux x86_64 only)\");\n#else\n      // rabitq\n      auto rabitq_params = std::dynamic_pointer_cast<HnswRabitqIndexParams>(\n          vector_index_params->clone());\n      if (!rabitq_params) {\n        return Status::InternalError(\"Expect HnswRabitqIndexParams\");\n      }\n      // train rabitq converter\n      auto converter = core::IndexFactory::CreateConverter(\"RabitqConverter\");\n      if (!converter) {\n        return Status::NotSupported(\"RabitqConverter not found\");\n      }\n      core::IndexMeta index_meta;\n      index_meta.set_meta(\n          ProximaEngineHelper::convert_to_engine_data_type(field->data_type())\n              .value(),\n          // use field dimension\n          field->dimension());\n      index_meta.set_metric(\n          core_interface::Index::get_metric_name(\n              ProximaEngineHelper::convert_to_engine_metric_type(\n                  vector_index_params->metric_type())\n                  .value(),\n              false),\n          0, ailego::Params{});\n      ailego::Params converter_params;\n      converter_params.set(core::PARAM_RABITQ_TOTAL_BITS,\n                           rabitq_params->total_bits());\n      converter_params.set(core::PARAM_RABITQ_NUM_CLUSTERS,\n                           rabitq_params->num_clusters());\n      converter_params.set(core::PARAM_RABITQ_SAMPLE_COUNT,\n                           rabitq_params->sample_count());\n      if (int ret = converter->init(index_meta, converter_params); ret != 0) {\n        return Status::InternalError(\"Failed to init rabitq converter:\", ret);\n      }\n      if (int ret = converter->train(raw_vector_provider); ret != 0) {\n        return Status::InternalError(\"Failed to train rabitq converter:\", ret);\n      }\n      core::IndexReformer::Pointer reformer;\n      if (int ret = converter->to_reformer(&reformer); ret != 0) {\n        return Status::InternalError(\"Failed to to get rabitq reformer:\", ret);\n      }\n      rabitq_params->set_rabitq_reformer(reformer);\n      rabitq_params->set_raw_vector_provider(raw_vector_provider);\n\n      auto quant_block_id = allocate_block_id();\n      auto field_with_new_index_params = std::make_shared<FieldSchema>(*field);\n      field_with_new_index_params->set_index_params(rabitq_params);\n\n      std::string index_file_path = FileHelper::MakeQuantizeVectorIndexPath(\n          path_, column, segment_meta_->id(), quant_block_id);\n      auto vector_indexer = merge_vector_indexer(\n          index_file_path, column, *field_with_new_index_params, concurrency);\n      if (!vector_indexer.has_value()) {\n        return vector_indexer.error();\n      }\n\n      quant_vector_indexers->insert({column, vector_indexer.value()});\n\n      new_segment_meta->remove_vector_persisted_block(column, true);\n      BlockMeta block;\n      block.set_id(quant_block_id);\n      block.set_type(BlockType::VECTOR_INDEX_QUANTIZE);\n      block.set_columns({column});\n      block.set_min_doc_id(meta()->min_doc_id());\n      block.set_max_doc_id(meta()->max_doc_id());\n      block.set_doc_count(meta()->doc_count());\n      new_segment_meta->add_persisted_block(block);\n#endif\n    }\n\n    *segment_meta = new_segment_meta;\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::drop_vector_index(\n    const std::string &column, SegmentMeta::Ptr *segment_meta,\n    std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n        *vector_indexers) {\n  auto field = collection_schema_->get_vector_field(column);\n  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);\n  new_segment_meta->remove_writing_forward_block();\n  new_segment_meta->add_indexed_vector_field(column);\n\n  if (*field->index_params() == DefaultVectorIndexParams) {\n    *segment_meta = new_segment_meta;\n    return Status::OK();\n  }\n\n  auto vector_index_params =\n      std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n\n  auto block_id = allocate_block_id();\n\n  auto field_with_default_index = std::make_shared<FieldSchema>(*field);\n  field_with_default_index->set_index_params(DefaultVectorIndexParams);\n\n  std::string index_file_path = FileHelper::MakeVectorIndexPath(\n      path_, column, segment_meta_->id(), block_id);\n\n  auto new_vector_indexer = std::make_shared<VectorColumnIndexer>(\n      index_file_path, *field_with_default_index);\n  vector_column_params::ReadOptions options{options_.enable_mmap_, true};\n\n  auto s = new_vector_indexer->Open(options);\n  CHECK_RETURN_STATUS(s);\n  s = new_vector_indexer->Merge(vector_indexers_[column], nullptr);\n  CHECK_RETURN_STATUS(s);\n  s = new_vector_indexer->Flush();\n  CHECK_RETURN_STATUS(s);\n\n  (*vector_indexers)[column] = new_vector_indexer;\n  new_segment_meta->remove_vector_persisted_block(\n      column, vector_index_params->quantize_type() != QuantizeType::UNDEFINED);\n\n  BlockMeta block;\n  block.set_id(block_id);\n  block.set_type(BlockType::VECTOR_INDEX);\n  block.set_columns({column});\n  block.set_min_doc_id(meta()->min_doc_id());\n  block.set_max_doc_id(meta()->max_doc_id());\n  block.set_doc_count(meta()->doc_count());\n  new_segment_meta->add_persisted_block(block);\n\n  *segment_meta = new_segment_meta;\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::reload_vector_index(\n    const CollectionSchema &schema, const SegmentMeta::Ptr &new_segment_meta,\n    const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n        &vector_indexers,\n    const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n        &quant_vector_indexers) {\n  collection_schema_ = std::make_shared<CollectionSchema>(schema);\n  segment_meta_ = new_segment_meta;\n  fresh_persist_block_offset();\n\n  auto vector_fields = schema.vector_fields();\n\n  for (auto field : vector_fields) {\n    auto vector_index_params =\n        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n\n    if (vector_index_params->quantize_type() == QuantizeType::UNDEFINED) {\n      auto iter = vector_indexers.find(field->name());\n      if (iter != vector_indexers.end()) {\n        auto indexers = vector_indexers_[field->name()];\n        for (auto indexer : indexers) {\n          auto s = indexer->Destroy();\n          CHECK_RETURN_STATUS(s);\n        }\n        vector_indexers_[field->name()] = {iter->second};\n      }\n      auto q_iter = quant_vector_indexers_.find(field->name());\n      if (q_iter != quant_vector_indexers_.end()) {\n        auto q_indexers = q_iter->second;\n        for (auto q_indexer : q_indexers) {\n          auto s = q_indexer->Destroy();\n          CHECK_RETURN_STATUS(s);\n        }\n        quant_vector_indexers_.erase(q_iter);\n      }\n    } else {\n      auto iter = vector_indexers.find(field->name());\n      if (iter != vector_indexers.end()) {\n        auto indexers = vector_indexers_[field->name()];\n        for (auto indexer : indexers) {\n          auto s = indexer->Destroy();\n          CHECK_RETURN_STATUS(s);\n        }\n        vector_indexers_[field->name()] = {iter->second};\n      }\n      auto q_iter = quant_vector_indexers.find(field->name());\n      if (q_iter != quant_vector_indexers.end()) {\n        auto q_indexers = quant_vector_indexers_[field->name()];\n        for (auto q_indexer : q_indexers) {\n          auto s = q_indexer->Destroy();\n          CHECK_RETURN_STATUS(s);\n        }\n        quant_vector_indexers_[field->name()] = {q_iter->second};\n      }\n    }\n  }\n\n  return Status::OK();\n}\n\nbool SegmentImpl::vector_index_ready(\n    const std::string &column, const IndexParams::Ptr &index_params) const {\n  auto field = collection_schema_->get_vector_field(column);\n  return segment_meta_->vector_indexed(column) &&\n         *field->index_params() == *index_params;\n}\n\nbool SegmentImpl::all_vector_index_ready() const {\n  for (const auto &field : collection_schema_->vector_fields()) {\n    if (!segment_meta_->vector_indexed(field->name())) {\n      return false;\n    }\n  }\n  return true;\n}\n\nStatus SegmentImpl::create_scalar_index(const std::vector<std::string> &columns,\n                                        const IndexParams::Ptr &index_params,\n                                        SegmentMeta::Ptr *segment_meta,\n                                        InvertedIndexer::Ptr *scalar_indexer) {\n  // validate\n  std::vector<FieldSchema> fields;\n  std::vector<std::string> field_names;\n\n  for (const auto &column : columns) {\n    auto field = collection_schema_->get_field(column);\n    if (!field || field->is_vector_field()) {\n      return Status::InvalidArgument(\"Invalid column name\");\n    }\n\n    if (field->index_params() != nullptr &&\n        *field->index_params() == *index_params) {\n      // if already indexed, just skip it\n      continue;\n    }\n\n    auto new_field = std::make_shared<FieldSchema>(*field);\n    new_field->set_index_params(index_params);\n\n    fields.push_back(*new_field);\n    field_names.push_back(new_field->name());\n  }\n\n  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);\n  if (fields.empty()) {\n    *segment_meta = new_segment_meta;\n    return Status::OK();\n  }\n\n  new_segment_meta->remove_scalar_index_block();\n\n  // create scalar indexer\n  // clone original indexer\n  auto block_id = allocate_block_id();\n  std::string new_invert_index_path =\n      FileHelper::MakeInvertIndexPath(path_, id(), block_id);\n\n  Status s;\n  InvertedIndexer::Ptr new_scalar_indexer{nullptr};\n  if (invert_indexers_) {\n    s = invert_indexers_->create_snapshot(new_invert_index_path);\n    CHECK_RETURN_STATUS(s);\n\n    auto inverted_fields_ptr = collection_schema_->forward_fields_with_index();\n    std::vector<FieldSchema> inverted_fields;\n    std::vector<std::string> inverted_field_names;\n    for (auto field : inverted_fields_ptr) {\n      inverted_fields.push_back(*field);\n      inverted_field_names.push_back(field->name());\n    }\n\n    new_scalar_indexer = InvertedIndexer::CreateAndOpen(\n        collection_schema_->name(), new_invert_index_path, false,\n        inverted_fields, false);\n    if (!new_scalar_indexer) {\n      LOG_ERROR(\"Failed to create scalar indexer\");\n      return Status::InternalError(\"Failed to create scalar indexer\");\n    }\n    for (const auto &field : fields) {\n      if (std::find(inverted_field_names.begin(), inverted_field_names.end(),\n                    field.name()) != inverted_field_names.end()) {\n        s = new_scalar_indexer->remove_column_indexer(field.name());\n        CHECK_RETURN_STATUS(s);\n      }\n      s = new_scalar_indexer->create_column_indexer(field);\n      CHECK_RETURN_STATUS(s);\n    }\n  } else {\n    new_scalar_indexer = InvertedIndexer::CreateAndOpen(\n        collection_schema_->name(), new_invert_index_path, true, fields, false);\n    if (!new_scalar_indexer) {\n      LOG_ERROR(\"Failed to create scalar indexer\");\n      return Status::InternalError(\"Failed to create scalar indexer\");\n    }\n  }\n\n  // insert scalar indexer\n  auto reader = scan(columns);\n  if (reader == nullptr) {\n    return Status::InternalError(\"Failed to create reader\");\n  }\n\n  int accu_doc_count = 0;\n  while (true) {\n    auto batch = reader->Next();\n    if (!batch.ok()) {\n      return Status::InternalError(\"reader next failed: \",\n                                   batch.status().message());\n    }\n\n    auto batch_value = batch.ValueOrDie();\n\n    if (!batch_value) {\n      break;\n    }\n\n    s = SegmentHelper::ReduceScalarIndex(new_scalar_indexer, batch_value,\n                                         accu_doc_count);\n    if (!s.ok()) {\n      LOG_ERROR(\"Reduce Scalar Index faield, err: %s\", s.message().c_str());\n    }\n    CHECK_RETURN_STATUS(s);\n\n    accu_doc_count += batch_value->num_rows();\n  }\n\n  s = new_scalar_indexer->seal();\n  CHECK_RETURN_STATUS(s);\n\n  BlockMeta block;\n  block.set_id(block_id);\n  block.set_type(BlockType::SCALAR_INDEX);\n  block.set_columns(field_names);\n  new_segment_meta->add_persisted_block(block);\n\n  *segment_meta = new_segment_meta;\n  *scalar_indexer = new_scalar_indexer;\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::drop_scalar_index(const std::vector<std::string> &columns,\n                                      SegmentMeta::Ptr *segment_meta,\n                                      InvertedIndexer::Ptr *scalar_indexer) {\n  // validate\n  for (const auto &column : columns) {\n    auto field = collection_schema_->get_field(column);\n    if (!field || field->is_vector_field()) {\n      return Status::InvalidArgument(\n          \"Invalid column name to drop scalar index\");\n    }\n  }\n\n  std::vector<FieldSchema> fields;\n  std::vector<FieldSchema> drop_fields;\n  std::vector<FieldSchema> invert_fields;\n  std::vector<std::string> field_names;\n  for (const auto &field : collection_schema_->forward_fields()) {\n    if (field->index_type() == IndexType::INVERT) {\n      invert_fields.push_back(*field);\n      if (std::find(columns.begin(), columns.end(), field->name()) !=\n          columns.end()) {\n        drop_fields.push_back(*field);\n        continue;\n      }\n      fields.push_back(*field);\n      field_names.push_back(field->name());\n    }\n  }\n\n  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);\n  new_segment_meta->remove_scalar_index_block();\n\n  if (fields.empty()) {\n    *segment_meta = new_segment_meta;\n    *scalar_indexer = nullptr;\n    return Status::OK();\n  }\n\n  // clone original indexer\n  auto block_id = allocate_block_id();\n  std::string new_invert_index_path =\n      FileHelper::MakeInvertIndexPath(path_, id(), block_id);\n  auto s = invert_indexers_->create_snapshot(new_invert_index_path);\n  CHECK_RETURN_STATUS(s);\n\n  auto new_scalar_indexer = InvertedIndexer::CreateAndOpen(\n      collection_schema_->name(), new_invert_index_path, false, invert_fields,\n      options_.read_only_);\n  if (!new_scalar_indexer) {\n    LOG_ERROR(\"Failed to create scalar indexer\");\n    return Status::InternalError(\"Failed to create scalar indexer\");\n  }\n  for (const auto &field : drop_fields) {\n    s = new_scalar_indexer->remove_column_indexer(field.name());\n    CHECK_RETURN_STATUS(s);\n  }\n\n  s = new_scalar_indexer->seal();\n  CHECK_RETURN_STATUS(s);\n\n  BlockMeta block;\n  block.set_id(block_id);\n  block.set_type(BlockType::SCALAR_INDEX);\n  block.set_columns(field_names);\n\n  new_segment_meta->add_persisted_block(block);\n\n  *segment_meta = new_segment_meta;\n  *scalar_indexer = new_scalar_indexer;\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::reload_scalar_index(\n    const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,\n    const InvertedIndexer::Ptr &scalar_indexer) {\n  collection_schema_ = std::make_shared<CollectionSchema>(schema);\n  segment_meta_ = segment_meta;\n\n  if (!scalar_indexer) {\n    // no need to reload inverted indexer\n    return Status::OK();\n  }\n\n  fresh_persist_block_offset();\n\n  if (invert_indexers_) {\n    auto old_dir = invert_indexers_->working_dir();\n    invert_indexers_ = scalar_indexer;\n\n    FileHelper::RemoveDirectory(old_dir);\n  } else {\n    invert_indexers_ = scalar_indexer;\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::dump() {\n  if (sealed_) {\n    return Status::NotSupported(\"Segment has been dumped.\");\n  }\n  auto s = flush();\n  CHECK_RETURN_STATUS(s);\n\n  if (invert_indexers_) {\n    s = invert_indexers_->seal();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  sealed_ = true;\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::flush() {\n  CHECK_SEGMENT_READONLY_RETURN_STATUS;\n\n  if (wal_file_ == nullptr || !wal_file_->has_record()) {\n    return Status::OK();\n  }\n\n  if (wal_file_) {\n    if (wal_file_->flush() != 0) {\n      return Status::InternalError(\"Failed to flush wal\");\n    }\n  }\n\n  Status s;\n\n  if (memory_store_) {\n    s = memory_store_->flush();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  // flush scalar indexer\n  if (invert_indexers_) {\n    s = invert_indexers_->flush();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  // flush vector indexer\n  for (const auto &indexer : memory_vector_indexers_) {\n    if (indexer.second) {\n      s = indexer.second->Flush();\n      CHECK_RETURN_STATUS(s);\n    }\n  }\n\n  // flush quant vector indexer\n  for (const auto &indexer : quant_memory_vector_indexers_) {\n    if (indexer.second) {\n      s = indexer.second->Flush();\n      CHECK_RETURN_STATUS(s);\n    }\n  }\n\n  if (id_map_) {\n    s = id_map_->flush();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  auto block = segment_meta_->writing_forward_block().value();\n\n  uint32_t delete_snapshot_path_suffix = UINT32_MAX;\n  uint32_t delete_snapshot_path_suffix_current = UINT32_MAX;\n  if (delete_store_) {\n    if (delete_store_->modified_since_last_flush()) {\n      delete_snapshot_path_suffix_current =\n          version_manager_->delete_snapshot_path_suffix();\n      delete_snapshot_path_suffix =\n          version_manager_->delete_snapshot_path_suffix() + 1;\n      std::string delete_store_path = FileHelper::MakeFilePath(\n          path_, FileID::DELETE_FILE, delete_snapshot_path_suffix);\n      s = delete_store_->flush(delete_store_path);\n      CHECK_RETURN_STATUS(s);\n    }\n  }\n\n  if (memory_store_) {\n    // update segment meta with memory components\n    s = finish_memory_components();\n    CHECK_RETURN_STATUS(s);\n\n    // set a new mem block\n    auto block_id = allocate_block_id();\n    segment_meta_->set_writing_forward_block({block_id, BlockType::SCALAR,\n                                              block.max_doc_id_ + 1, 0, 0,\n                                              block.columns_});\n  }\n\n  // update version and flush\n  s = update_version(delete_snapshot_path_suffix);\n  CHECK_RETURN_STATUS(s);\n\n  // clear wal file\n  if (wal_file_) {\n    auto ret = wal_file_->remove();\n    if (ret != 0) {\n      LOG_ERROR(\"Remove wal file failed.\");\n      return Status::InternalError(\"Remove wal file failed\");\n    }\n    wal_file_.reset();\n  }\n\n  if (delete_snapshot_path_suffix_current != UINT32_MAX) {\n    std::string delete_store_path = FileHelper::MakeFilePath(\n        path_, FileID::DELETE_FILE, delete_snapshot_path_suffix_current);\n    FileHelper::RemoveFile(delete_store_path);\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::destroy() {\n  if (need_destroyed_) {\n    return Status::InvalidArgument(\"Segment has been marked need destroyed\");\n  }\n  need_destroyed_ = true;\n  return Status::OK();\n}\n\nStatus SegmentImpl::cleanup() {\n  auto seg_path = FileHelper::MakeSegmentPath(path_, segment_meta_->id());\n  FileHelper::RemoveDirectory(seg_path);\n  return Status::OK();\n}\n\nbool SegmentImpl::validate(const std::vector<std::string> &columns) const {\n  if (columns.empty()) {\n    LOG_ERROR(\"Empty columns\");\n    return false;\n  }\n  for (const auto &column : columns) {\n    if (column == LOCAL_ROW_ID || column == GLOBAL_DOC_ID ||\n        column == USER_ID) {\n      continue;\n    }\n    if (collection_schema_->get_forward_field(column) == nullptr) {\n      LOG_ERROR(\"Validate failed. unknown column: %s\", column.c_str());\n      return false;\n    }\n  }\n  return true;\n}\n\nTablePtr SegmentImpl::fetch_perf(\n    const std::vector<std::string> &columns,\n    const std::shared_ptr<arrow::Schema> &result_schema,\n    const std::vector<int> &indices) const {\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> chunk_arrays;\n  chunk_arrays.resize(columns.size());\n\n  bool need_local_doc_id = false;\n  size_t local_doc_id_col_index = 0;\n\n  for (size_t i = 0; i < columns.size(); ++i) {\n    if (columns[i] == LOCAL_ROW_ID) {\n      need_local_doc_id = true;\n      local_doc_id_col_index = i;\n      chunk_arrays[i] = nullptr;\n      continue;\n    }\n    chunk_arrays[i] = persist_chunk_arrays_[col_idx_map_.at(columns[i])];\n  }\n\n  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());\n\n  std::vector<std::pair<int64_t, int64_t>> indices_in_table;\n  for (const auto &target_index : indices) {\n    auto it = std::upper_bound(chunk_offsets_.begin(), chunk_offsets_.end(),\n                               target_index);\n    if (it == chunk_offsets_.begin()) {\n      LOG_ERROR(\"Target index %d is out of bounds\", target_index);\n      return nullptr;\n    }\n    int chunk_index =\n        static_cast<int>(std::distance(chunk_offsets_.begin(), it) - 1);\n    int64_t index_in_chunk = target_index - chunk_offsets_[chunk_index];\n    indices_in_table.emplace_back(chunk_index, index_in_chunk);\n  }\n\n  for (size_t i = 0; i < columns.size(); ++i) {\n    if (columns[i] == LOCAL_ROW_ID) {\n      continue;\n    }\n    const auto &source_column = chunk_arrays[i];\n    std::shared_ptr<arrow::Array> array;\n    auto status =\n        BuildArrayFromIndicesWithType(source_column, indices_in_table, &array);\n    if (!status.ok()) {\n      LOG_ERROR(\"BuildArrayFromIndices failed: %s\", status.ToString().c_str());\n      return nullptr;\n    }\n    result_arrays[i] = array;\n  }\n\n  if (need_local_doc_id) {\n    std::vector<uint64_t> values;\n    values.reserve(indices.size());\n    for (const auto idx : indices) {\n      values.push_back(idx);\n    }\n\n    arrow::UInt64Builder builder;\n    auto s = builder.AppendValues(values);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to append values to builder: %s\", s.message().c_str());\n      return nullptr;\n    }\n    std::shared_ptr<arrow::Array> array;\n    s = builder.Finish(&array);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to finish builder: %s\", s.message().c_str());\n      return nullptr;\n    }\n    result_arrays[local_doc_id_col_index] = array;\n  }\n\n  return arrow::Table::Make(result_schema, result_arrays,\n                            static_cast<int64_t>(indices.size()));\n}\n\nTablePtr SegmentImpl::fetch_normal(\n    const std::vector<std::string> &columns,\n    const std::shared_ptr<arrow::Schema> &result_schema,\n    const std::vector<int> &indices) const {\n  // Store scalars per column: column_index -> (output_row, scalar)\n  std::vector<std::vector<std::pair<int, std::shared_ptr<arrow::Scalar>>>>\n      column_results(columns.size());\n\n  // Collect local_doc_id values if needed\n  std::vector<std::pair<int, uint64_t>> local_doc_id_values;\n\n  // Group fetch requests by block: block_index -> {column -> [(output_row,\n  // local_row)]}\n  //   block_index >= 0: persisted store\n  //   block_index == -1: memory store\n  std::map<int, std::map<std::string, std::vector<std::pair<int, int>>>>\n      block_request_map;\n\n  std::shared_lock<std::shared_mutex> lock(seg_col_mtx_);\n\n  const auto &block_offsets = get_persist_block_offsets(BlockType::SCALAR);\n  const auto &block_metas = get_persist_block_metas(BlockType::SCALAR);\n\n  // Phase 1: Map each (doc_id, column) to its block and local row\n  for (int output_row = 0; output_row < static_cast<int>(indices.size());\n       ++output_row) {\n    int doc_id = indices[output_row];\n\n    for (size_t col_index = 0; col_index < columns.size(); ++col_index) {\n      const std::string &col = columns[col_index];\n      if (col == LOCAL_ROW_ID) {\n        local_doc_id_values.emplace_back(output_row, doc_id);\n        continue;\n      }\n      int offset_idx = -1;\n      int block_index =\n          find_persist_block_id(BlockType::SCALAR, doc_id, col, &offset_idx);\n\n      int local_row = -1;\n      if (block_index != -1 && offset_idx > -1 &&\n          offset_idx < static_cast<int>(block_offsets.size())) {\n        local_row = doc_id - block_offsets[offset_idx];\n        block_request_map[block_index][col].emplace_back(output_row, local_row);\n        continue;\n      }\n\n      // Check memory store\n      if (segment_meta_->has_writing_forward_block()) {\n        int mem_offset =\n            block_offsets.empty()\n                ? 0\n                : block_offsets.back() + block_metas.back().doc_count_;\n        const auto &mem_block = segment_meta_->writing_forward_block().value();\n\n        if (mem_offset <= doc_id &&\n            doc_id < mem_offset + static_cast<int>(mem_block.doc_count_)) {\n          local_row = doc_id - mem_offset;\n          block_request_map[-1][col].emplace_back(output_row, local_row);\n          continue;\n        }\n      }\n\n      LOG_ERROR(\"Document ID %d not found in segment %d\", doc_id, meta()->id());\n      return nullptr;\n    }\n  }\n\n  // Phase 2: Execute batched fetch per block\n  for (const auto &[block_index, col_to_rows] : block_request_map) {\n    std::vector<std::string> fetch_columns;\n    std::vector<int> fetch_local_rows;\n    std::vector<std::pair<int, int>>\n        output_to_result_index;  // (output_row, result_pos)\n\n    fetch_columns.reserve(col_to_rows.size());\n    for (const auto &kv : col_to_rows) {\n      fetch_columns.push_back(kv.first);\n    }\n\n    // all column has same output size, here just take first column\n    for (const auto &[output_row, local_row] :\n         col_to_rows.at(fetch_columns[0])) {\n      fetch_local_rows.push_back(local_row);\n      output_to_result_index.emplace_back(\n          output_row, static_cast<int>(fetch_local_rows.size() - 1));\n    }\n\n    std::shared_ptr<arrow::Table> block_table;\n    if (block_index >= 0 &&\n        block_index < static_cast<int>(persist_stores_.size())) {\n      block_table =\n          persist_stores_[block_index]->fetch(fetch_columns, fetch_local_rows);\n    } else if (block_index == -1 && memory_store_) {\n      block_table = memory_store_->fetch(fetch_columns, fetch_local_rows);\n    }\n\n    if (!block_table || block_table->num_rows() == 0) {\n      continue;\n    }\n\n    // Fill results\n    for (size_t i = 0; i < fetch_columns.size(); ++i) {\n      const std::string &col = fetch_columns[i];\n      auto col_it = std::find(columns.begin(), columns.end(), col);\n      if (col_it == columns.end()) continue;\n      size_t col_index = std::distance(columns.begin(), col_it);\n\n      auto chunked_array = block_table->column(i)->chunks();\n      auto flat_array_res =\n          arrow::Concatenate(chunked_array, arrow::default_memory_pool());\n      if (!flat_array_res.ok()) {\n        LOG_ERROR(\"Concatenate failed: %s\",\n                  flat_array_res.status().message().c_str());\n        return nullptr;\n      }\n      auto flat_array = flat_array_res.ValueOrDie();\n\n      for (size_t j = 0; j < fetch_local_rows.size(); ++j) {\n        auto scalar_result = flat_array->GetScalar(j);\n        if (!scalar_result.ok()) continue;\n        int output_row = output_to_result_index[j].first;\n        column_results[col_index].emplace_back(\n            output_row, std::move(scalar_result.ValueOrDie()));\n      }\n    }\n  }\n\n  // Phase 3: Construct result arrays\n  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());\n\n  bool need_local_doc_id = false;\n  size_t local_doc_id_col_index = -1;\n\n  for (size_t col_index = 0; col_index < columns.size(); ++col_index) {\n    const std::string &col = columns[col_index];\n    if (col == LOCAL_ROW_ID) {\n      need_local_doc_id = true;\n      local_doc_id_col_index = col_index;\n      continue;\n    }\n\n    auto &result_vec = column_results[col_index];\n    std::sort(result_vec.begin(), result_vec.end());\n\n    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars;\n    for (int i = 0; i < static_cast<int>(indices.size()); ++i) {\n      auto it = std::find_if(\n          result_vec.begin(), result_vec.end(),\n          [i](const std::pair<int, std::shared_ptr<arrow::Scalar>> &p) {\n            return p.first == i;\n          });\n      if (it != result_vec.end()) {\n        ordered_scalars.push_back(it->second);\n      } else {\n        auto field = result_schema->GetFieldByName(col);\n        ordered_scalars.push_back(\n            arrow::MakeNullScalar(field ? field->type() : arrow::null()));\n      }\n    }\n\n    auto status = ConvertScalarVectorToArrayByType(ordered_scalars,\n                                                   &result_arrays[col_index]);\n    if (!status.ok()) {\n      LOG_ERROR(\"Failed to convert scalars to array for column '%s': %s\",\n                col.c_str(), status.message().c_str());\n      return nullptr;\n    }\n  }\n\n  // Add LOCAL_ROW_ID array if requested\n  if (need_local_doc_id) {\n    std::sort(local_doc_id_values.begin(), local_doc_id_values.end());\n    std::vector<uint64_t> values;\n    values.reserve(local_doc_id_values.size());\n    for (const auto &[row, id] : local_doc_id_values) {\n      values.push_back(id);\n    }\n\n    arrow::UInt64Builder builder;\n    auto s = builder.AppendValues(values);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to append values to builder: %s\", s.message().c_str());\n      return nullptr;\n    }\n    std::shared_ptr<arrow::Array> array;\n    s = builder.Finish(&array);\n    if (!s.ok()) {\n      LOG_ERROR(\"Failed to finish builder: %s\", s.message().c_str());\n      return nullptr;\n    }\n    result_arrays[local_doc_id_col_index] = std::move(array);\n  }\n\n  // Wrap arrays into ChunkedArray and build final table\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns;\n  result_columns.reserve(result_arrays.size());\n  for (const auto &arr : result_arrays) {\n    result_columns.push_back(std::make_shared<arrow::ChunkedArray>(arr));\n  }\n\n  return arrow::Table::Make(result_schema, result_columns,\n                            static_cast<int64_t>(indices.size()));\n}\n\nTablePtr SegmentImpl::fetch(const std::vector<std::string> &columns,\n                            const std::vector<int> &indices) const {\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  // Build result schema\n  std::vector<std::shared_ptr<arrow::Field>> fields;\n\n  for (size_t i = 0; i < columns.size(); ++i) {\n    const auto &col = columns[i];\n    if (col == LOCAL_ROW_ID) {\n      fields.push_back(arrow::field(LOCAL_ROW_ID, arrow::uint64()));\n    } else if (col == GLOBAL_DOC_ID) {\n      fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64()));\n    } else if (col == USER_ID) {\n      fields.push_back(arrow::field(USER_ID, arrow::utf8()));\n    } else {\n      auto *field = collection_schema_->get_field(col);\n      std::shared_ptr<arrow::Field> arrow_field;\n      auto status = ConvertFieldSchemaToArrowField(field, &arrow_field);\n      if (!status.ok()) {\n        LOG_ERROR(\"Convert field schema failed: %s\",\n                  field->to_string().c_str());\n        return nullptr;\n      }\n      fields.push_back(std::move(arrow_field));\n    }\n  }\n\n  auto result_schema = std::make_shared<arrow::Schema>(fields);\n\n  // Early return for empty indices\n  if (indices.empty()) {\n    arrow::ArrayVector empty_arrays;\n    for (const auto &field : fields) {\n      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());\n    }\n    return arrow::Table::Make(result_schema, empty_arrays, 0);\n  }\n\n  if (segment_meta_->doc_count() == 0) {\n    LOG_ERROR(\"Segment has no rows\");\n    return nullptr;\n  }\n\n  if (use_fetch_perf_) {\n    return fetch_perf(columns, result_schema, indices);\n  }\n  return fetch_normal(columns, result_schema, indices);\n}\n\nExecBatchPtr SegmentImpl::fetch(const std::vector<std::string> &columns,\n                                int doc_id) const {\n  if (columns.empty()) {\n    LOG_ERROR(\"Empty columns\");\n    return nullptr;\n  }\n\n  std::shared_lock<std::shared_mutex> lock(seg_col_mtx_);\n\n  const auto &block_offsets = get_persist_block_offsets(BlockType::SCALAR);\n  const auto &block_metas = get_persist_block_metas(BlockType::SCALAR);\n\n  bool is_in_single_persist_store = false;\n  for (auto &block : block_metas) {\n    std::vector<bool> is_column_in_block;\n    is_column_in_block.reserve(columns.size());\n    for (const auto &column : columns) {\n      is_column_in_block.push_back(block.contain_column(column));\n    }\n\n    // Count how many columns are in this block\n    int count =\n        std::count(is_column_in_block.begin(), is_column_in_block.end(), true);\n\n    if (count == 0) {\n      // None of the query columns are in this block; continue to the next block\n      continue;\n    } else if (count == static_cast<int>(columns.size())) {\n      // All query columns are present in this block; stop searching\n      is_in_single_persist_store = true;\n      break;\n    } else {\n      // Some but not all query columns are in this block (spanning multiple\n      // blocks); stop searching\n      break;\n    }\n  }\n\n  if (is_in_single_persist_store) {\n    int offset_idx = -1;\n    int block_index = find_persist_block_id(BlockType::SCALAR, doc_id,\n                                            columns[0], &offset_idx);\n    if (block_index != -1 && offset_idx > -1 &&\n        offset_idx < static_cast<int>(block_offsets.size())) {\n      int local_row = doc_id - block_offsets[offset_idx];\n      return persist_stores_[block_index]->fetch(columns, local_row);\n    }\n\n    // Check memory store\n    if (segment_meta_->has_writing_forward_block()) {\n      int mem_offset =\n          block_offsets.empty()\n              ? 0\n              : block_offsets.back() + block_metas.back().doc_count_;\n      const auto &mem_block = segment_meta_->writing_forward_block().value();\n\n      if (mem_offset <= doc_id &&\n          doc_id < mem_offset + static_cast<int>(mem_block.doc_count_)) {\n        int local_row = doc_id - mem_offset;\n        return memory_store_->fetch(columns, local_row);\n      }\n    }\n  } else {\n    auto table = fetch(columns, std::vector<int>{doc_id});\n    if (table) {\n      std::vector<arrow::Datum> datums;\n      for (const auto &col : table->columns()) {\n        datums.emplace_back(col->chunk(0)->GetScalar(0).ValueOrDie());\n      }\n\n      arrow::Result<arrow::compute::ExecBatch> exec_batch_result =\n          arrow::compute::ExecBatch::Make(datums, table->num_rows());\n\n      if (exec_batch_result.ok()) {\n        arrow::compute::ExecBatch exec_batch = exec_batch_result.ValueOrDie();\n        return std::make_shared<arrow::compute::ExecBatch>(exec_batch);\n      }\n    }\n  }\n\n  LOG_ERROR(\"Document ID %d not found in persist segment\", doc_id);\n  return nullptr;\n}\n\nRecordBatchReaderPtr SegmentImpl::scan(\n    const std::vector<std::string> &columns) const {\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  std::shared_lock<std::shared_mutex> lock(seg_col_mtx_);\n\n  const std::vector<BlockMeta> &scalar_blocks =\n      get_persist_block_metas(BlockType::SCALAR);\n\n  std::map<std::pair<int64_t, int64_t>,\n           std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>>\n      block_groups;\n\n  for (size_t i = 0; i < scalar_blocks.size() && i < persist_stores_.size();\n       ++i) {\n    const auto &block = scalar_blocks[i];\n    const auto &store = persist_stores_[i];\n\n    std::vector<std::string> interested_cols;\n    for (const auto &col : columns) {\n      if (block.contain_column(col)) {\n        interested_cols.push_back(col);\n      }\n    }\n\n    if (interested_cols.empty()) {\n      continue;\n    }\n\n    auto reader = store->scan(interested_cols);\n    if (!reader) {\n      continue;\n    }\n\n    auto key = std::make_pair(block.min_doc_id(), block.max_doc_id());\n    block_groups[key].push_back(std::move(reader));\n  }\n\n  if (memory_store_ && memory_store_->num_rows() > 0) {\n    auto reader = memory_store_->scan(columns);\n    if (reader) {\n      auto &mem_block = segment_meta_->writing_forward_block().value();\n      auto key = std::make_pair(mem_block.min_doc_id(), mem_block.max_doc_id());\n      block_groups[key].push_back(std::move(reader));\n    }\n  }\n\n  std::vector<std::shared_ptr<arrow::Field>> fields;\n  for (const auto &col : columns) {\n    if (col == LOCAL_ROW_ID) {\n      continue;\n    } else if (col == GLOBAL_DOC_ID) {\n      fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64(), false));\n    } else if (col == USER_ID) {\n      fields.push_back(arrow::field(USER_ID, arrow::utf8(), false));\n    } else {\n      auto *field = collection_schema_->get_field(col);\n      std::shared_ptr<arrow::Field> arrow_field;\n      auto s = ConvertFieldSchemaToArrowField(field, &arrow_field);\n      if (!s.ok()) {\n        LOG_ERROR(\"convert field schema: %s to arrow field failed\",\n                  field->to_string().c_str());\n        return nullptr;\n      }\n      fields.push_back(arrow_field);\n    }\n  }\n  auto target_schema = std::make_shared<arrow::Schema>(fields);\n\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> merged_readers;\n  for (auto &kv : block_groups) {\n    auto &&readers = std::move(kv.second);\n    auto merging_reader =\n        ColumnMergingReader::Make(target_schema, std::move(readers));\n    if (merging_reader) {\n      merged_readers.push_back(std::move(merging_reader));\n    }\n  }\n\n  return std::make_shared<CombinedRecordBatchReader>(\n      shared_from_this(), std::move(merged_readers), columns);\n}\n\n\n////////////////////////////////////////////////////////////////////////////////////\n// CombinedRecordBatchReader implementation\n////////////////////////////////////////////////////////////////////////////////////\n\nSegmentImpl::CombinedRecordBatchReader::CombinedRecordBatchReader(\n    std::shared_ptr<const SegmentImpl> segment,\n    std::vector<std::shared_ptr<arrow::RecordBatchReader>> readers,\n    const std::vector<std::string> &columns)\n    : segment_(segment),\n      readers_(std::move(readers)),\n      current_reader_index_(0),\n      local_doc_id_(0) {\n  if (!readers_.empty()) {\n    auto schema = readers_[0]->schema();\n    std::vector<std::shared_ptr<arrow::Field>> selected_fields;\n    for (size_t i = 0; i < columns.size(); ++i) {\n      auto &col_name = columns[i];\n      if (col_name == LOCAL_ROW_ID) {\n        selected_fields.push_back(\n            arrow::field(LOCAL_ROW_ID, arrow::uint64(), false));\n        need_local_doc_id_ = true;\n        local_doc_id_col_index_ = static_cast<int>(i);\n      } else {\n        if (auto field = schema->GetFieldByName(col_name); field) {\n          selected_fields.push_back(field);\n        }\n      }\n    }\n\n    projected_schema_ = arrow::schema(selected_fields);\n\n    auto segment_meta = segment_->meta();\n    const auto &blocks = segment_meta->persisted_blocks();\n    for (const auto &block : blocks) {\n      if (block.type() != BlockType::SCALAR) continue;\n      offsets_.push_back(block.min_doc_id_);\n    }\n    if (segment_meta->has_writing_forward_block()) {\n      const auto &mem_block = segment_meta->writing_forward_block().value();\n      offsets_.push_back(mem_block.min_doc_id_);\n    }\n  }\n}\n\nSegmentImpl::CombinedRecordBatchReader::~CombinedRecordBatchReader() {}\n\nstd::shared_ptr<arrow::Schema> SegmentImpl::CombinedRecordBatchReader::schema()\n    const {\n  return projected_schema_;\n}\n\narrow::Status SegmentImpl::CombinedRecordBatchReader::ReadNext(\n    std::shared_ptr<arrow::RecordBatch> *batch) {\n  *batch = nullptr;\n  while (current_reader_index_ < readers_.size()) {\n    auto status = readers_[current_reader_index_]->ReadNext(batch);\n    if (!status.ok()) {\n      return status;\n    }\n\n    if (need_local_doc_id_ && *batch) {\n      auto num_rows = (*batch)->num_rows();\n      arrow::UInt64Builder builder;\n      ARROW_RETURN_NOT_OK(builder.Reserve(num_rows));\n\n      for (int64_t i = 0; i < num_rows; ++i) {\n        builder.UnsafeAppend(local_doc_id_++);\n      }\n      std::shared_ptr<arrow::Array> local_id_array;\n      ARROW_RETURN_NOT_OK(builder.Finish(&local_id_array));\n\n      auto result =\n          (*batch)->AddColumn(local_doc_id_col_index_,\n                              projected_schema_->GetFieldByName(LOCAL_ROW_ID),\n                              std::move(local_id_array));\n      if (result.ok()) {\n        *batch = std::move(result.ValueOrDie());\n      }\n    }\n\n    if (*batch) {\n      return arrow::Status::OK();\n    }\n\n    current_reader_index_++;\n    if (current_reader_index_ < readers_.size()) {\n      local_doc_id_ = offsets_[current_reader_index_];\n    }\n  }\n\n  *batch = nullptr;\n  return arrow::Status::OK();\n}\n\nbool SegmentImpl::ready_for_dump_block() {\n  if (memory_store_) return memory_store_->is_full();\n  return false;\n}\n\ntemplate <typename ArrayType, typename ValueType>\nStatus ProcessChunkData(InvertedColumnIndexer::Ptr *column_indexer,\n                        const std::shared_ptr<arrow::Array> &chunk,\n                        int64_t &doc_count) {\n  auto typed_array = std::dynamic_pointer_cast<ArrayType>(chunk);\n  if (typed_array) {\n    for (int64_t i = 0; i < typed_array->length(); ++i, ++doc_count) {\n      if (typed_array->IsNull(i)) {\n        auto status = (*column_indexer)->insert_null(doc_count);\n        if (!status.ok()) {\n          LOG_ERROR(\"Failed to insert null value to indexer for doc %zu: %s\",\n                    (size_t)doc_count, status.message().c_str());\n          return status;\n        }\n      } else {\n        ValueType value = typed_array->Value(i);\n        std::string value_str(reinterpret_cast<const char *>(&value),\n                              sizeof(ValueType));\n        auto status = (*column_indexer)->insert(doc_count, value_str);\n        if (!status.ok()) {\n          LOG_ERROR(\"Failed to insert numeric value to indexer for doc %zu: %s\",\n                    (size_t)doc_count, status.message().c_str());\n          return status;\n        }\n      }\n    }\n  }\n  return Status::OK();\n}\n\nStatus SegmentImpl::insert_array_to_invert_indexer(\n    const FieldSchema::Ptr &column_schema,\n    const std::shared_ptr<arrow::ChunkedArray> &new_column,\n    InvertedColumnIndexer::Ptr *column_indexer) {\n  // Iterate through the new column data and insert into the indexer\n  int64_t doc_count = 0;\n  for (int chunk_index = 0; chunk_index < new_column->num_chunks();\n       ++chunk_index) {\n    auto chunk = new_column->chunk(chunk_index);\n\n    // Handle different data types based on the column schema\n    switch (column_schema->data_type()) {\n      case DataType::INT32: {\n        auto status = ProcessChunkData<arrow::Int32Array, int32_t>(\n            column_indexer, chunk, doc_count);\n        CHECK_RETURN_STATUS(status);\n        break;\n      }\n      case DataType::INT64: {\n        auto status = ProcessChunkData<arrow::Int64Array, int64_t>(\n            column_indexer, chunk, doc_count);\n        CHECK_RETURN_STATUS(status);\n        break;\n      }\n      case DataType::UINT32: {\n        auto status = ProcessChunkData<arrow::UInt32Array, uint32_t>(\n            column_indexer, chunk, doc_count);\n        CHECK_RETURN_STATUS(status);\n        break;\n      }\n      case DataType::UINT64: {\n        auto status = ProcessChunkData<arrow::UInt64Array, uint64_t>(\n            column_indexer, chunk, doc_count);\n        CHECK_RETURN_STATUS(status);\n        break;\n      }\n      case DataType::FLOAT: {\n        auto status = ProcessChunkData<arrow::FloatArray, float>(\n            column_indexer, chunk, doc_count);\n        CHECK_RETURN_STATUS(status);\n        break;\n      }\n      case DataType::DOUBLE: {\n        auto status = ProcessChunkData<arrow::DoubleArray, double>(\n            column_indexer, chunk, doc_count);\n        CHECK_RETURN_STATUS(status);\n        break;\n      }\n      default:\n        LOG_WARN(\n            \"Unsupported data type for indexing: %s\",\n            DataTypeCodeBook::AsString(column_schema->data_type()).c_str());\n        break;\n    }\n  }\n\n  return Status::OK();\n}\n\n\nStatus SegmentImpl::reopen_invert_indexer(bool read_only) {\n  // build invert index path\n  uint32_t block_id = 0;\n  auto &persist_blocks = segment_meta_->persisted_blocks();\n  for (auto &block : persist_blocks) {\n    if (block.type() == BlockType::SCALAR_INDEX) {\n      block_id = block.id();\n      break;\n    }\n  }\n  std::string invert_index_path =\n      FileHelper::MakeInvertIndexPath(path_, id(), block_id);\n\n  // build invert index fields\n  std::vector<std::string> inverted_field_names;\n  auto inverted_fields_ptr = collection_schema_->forward_fields_with_index();\n  std::vector<FieldSchema> inverted_fields;\n  for (auto field : inverted_fields_ptr) {\n    inverted_fields.push_back(*field);\n    inverted_field_names.push_back(field->name());\n  }\n\n  // reopen invert indexer with read_only false\n  invert_indexers_.reset();\n  invert_indexers_ = InvertedIndexer::CreateAndOpen(collection_schema_->name(),\n                                                    invert_index_path, false,\n                                                    inverted_fields, read_only);\n  if (!invert_indexers_) {\n    LOG_ERROR(\"Failed to create scalar indexer\");\n    return Status::InternalError(\"Failed to create scalar indexer\");\n  }\n  return Status::OK();\n}\n\nStatus SegmentImpl::add_column(FieldSchema::Ptr column_schema,\n                               const std::string &expression,\n                               const AddColumnOptions & /*options*/) {\n  if (memory_store_) {\n    return Status::NotSupported(\n        \"Add column is not supported for segment with memory store\");\n  }\n\n  global_init();\n\n  std::vector<std::shared_ptr<arrow::Field>> fields;\n  arrow::Status status =\n      ConvertCollectionSchemaToArrowFields(collection_schema_, &fields);\n  if (!status.ok()) {\n    return Status::InvalidArgument(\n        \"ConvertCollectionSchemaToArrowFields failed:\", status.message());\n  }\n  auto physic_schema = std::make_shared<arrow::Schema>(fields);\n\n  auto &scalar_blocks = get_persist_block_metas(BlockType::SCALAR);\n  if (scalar_blocks.empty()) {\n    return Status::NotSupported(\n        \"Add column is not supported for empty scalar segment\");\n  }\n\n  std::shared_ptr<arrow::Field> arrow_field;\n  status = ConvertFieldSchemaToArrowField(column_schema.get(), &arrow_field);\n  if (!status.ok()) {\n    return Status::InvalidArgument(\"ConvertFieldSchemaToArrowField failed:\",\n                                   status.message());\n  }\n\n  std::shared_ptr<arrow::ChunkedArray> new_column;\n  auto expected_type = arrow_field->type();\n  if (expression.empty()) {\n    if (!column_schema->nullable()) {\n      return Status::InvalidArgument(\n          \"Add column is not supported for non-nullable column\");\n    }\n    arrow::Result<std::shared_ptr<arrow::Array>> result =\n        arrow::MakeArrayOfNull(expected_type, scalar_blocks[0].doc_count_);\n    if (!result.ok()) {\n      return Status::InternalError(\"MakeArrayOfNull failed\");\n    }\n    auto array = result.ValueOrDie();\n    new_column = std::make_shared<arrow::ChunkedArray>(\n        std::vector<std::shared_ptr<arrow::Array>>{array});\n\n  } else {\n    // Parse Simple sql expression\n    auto p_result = ParseToExpression(expression, physic_schema);\n    if (!p_result.ok()) {\n      return Status::InvalidArgument(\"parse expression failed:\",\n                                     p_result.status().message());\n    }\n    auto expr = p_result.ValueOrDie();\n\n    auto result = ReadBlocksAsDataset(scalar_blocks, path_, segment_meta_->id(),\n                                      !options_.enable_mmap_);\n    if (!result.ok()) {\n      return Status::InternalError(result.status().message());\n    }\n    auto dataset = std::move(result).ValueOrDie();\n    auto eval_result = EvaluateExpressionWithDataset(\n        dataset, column_schema->name(), expr, expected_type);\n    if (!eval_result.ok()) {\n      return Status::InternalError(\"evaluate expression failed:\",\n                                   eval_result.status().message());\n    }\n    auto result_table = eval_result.ValueOrDie();\n    if (result_table->num_columns() != 1) {\n      return Status::InvalidArgument(\n          \"Expression result must have exactly one column\");\n    }\n    new_column = result_table->column(0);\n  }\n\n  // write new column\n  const std::string &filter_column = scalar_blocks[0].columns()[0];\n  std::vector<BlockMeta> filter_column_blocks;\n  std::copy_if(scalar_blocks.begin(), scalar_blocks.end(),\n               std::back_inserter(filter_column_blocks),\n               [&filter_column](const BlockMeta &block) {\n                 return block.contain_column(filter_column);\n               });\n\n  std::vector<BlockMeta> new_blocks;\n  status = WriteColumnInBlocks(\n      column_schema->name(), new_column, filter_column_blocks, path_,\n      segment_meta_->id(), [this]() { return allocate_block_id(); },\n      !options_.enable_mmap_, &new_blocks);\n  if (!status.ok()) {\n    return Status::InternalError(status.message());\n  }\n\n  // create persist scalar indexer\n  if (column_schema->has_invert_index()) {\n    if (invert_indexers_) {\n      auto s = reopen_invert_indexer();\n      CHECK_RETURN_STATUS(s);\n\n      s = invert_indexers_->create_column_indexer(*column_schema);\n      CHECK_RETURN_STATUS(s);\n\n      // update segment meta\n      auto &persist_blocks = segment_meta_->persisted_blocks();\n      for (auto &block : persist_blocks) {\n        if (block.type() == BlockType::SCALAR_INDEX) {\n          block.add_column(column_schema->name());\n          break;\n        }\n      }\n    } else {\n      auto new_block_id = allocate_block_id();\n      std::string new_invert_index_path =\n          FileHelper::MakeInvertIndexPath(path_, id(), new_block_id);\n\n      invert_indexers_ = InvertedIndexer::CreateAndOpen(\n          collection_schema_->name(), new_invert_index_path, true,\n          {*column_schema}, false);\n      if (!invert_indexers_) {\n        LOG_ERROR(\"Failed to create scalar indexer\");\n        return Status::InternalError(\"Failed to create scalar indexer\");\n      }\n\n      // update segment meta\n      BlockMeta block;\n      block.set_id(new_block_id);\n      block.set_type(BlockType::SCALAR_INDEX);\n      block.set_doc_count(new_column->length());\n      block.set_min_doc_id(doc_ids_.front());\n      block.set_max_doc_id(doc_ids_.back());\n      block.set_columns({column_schema->name()});\n\n      segment_meta_->add_persisted_block(block);\n    }\n\n    auto column_indexer = (*invert_indexers_)[column_schema->name()];\n    auto s = insert_array_to_invert_indexer(column_schema, new_column,\n                                            &column_indexer);\n    CHECK_RETURN_STATUS(s);\n    column_indexer->seal();\n    invert_indexers_->flush();\n  }\n\n  std::unique_lock<std::shared_mutex> lock(seg_col_mtx_);\n  // create and append persist scalar indexer\n  for (auto &block : new_blocks) {\n    auto forward_path = FileHelper::MakeForwardBlockPath(\n        path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);\n\n    BaseForwardStore::Ptr forward_store;\n    if (options_.enable_mmap_) {\n      forward_store = std::make_shared<MmapForwardStore>(forward_path);\n    } else {\n      forward_store = std::make_shared<BufferPoolForwardStore>(forward_path);\n    }\n    auto s = forward_store->Open();\n    CHECK_RETURN_STATUS(s);\n    persist_stores_.push_back(forward_store);\n    segment_meta_->add_persisted_block(block);\n  }\n\n  // collection_schema append new field\n  auto s = collection_schema_->add_field(column_schema);\n  CHECK_RETURN_STATUS(s);\n\n  fresh_persist_block_offset();\n\n  fresh_persist_chunked_array();\n\n  return Status::OK();\n}\n\n\nStatus SegmentImpl::alter_column(const std::string &column_name,\n                                 const FieldSchema::Ptr &new_column_schema,\n                                 const AlterColumnOptions & /*options*/) {\n  if (memory_store_) {\n    return Status::NotSupported(\n        \"Add column is not supported for segment with memory store\");\n  }\n\n  global_init();\n\n  auto old_field_schema = collection_schema_->get_forward_field(column_name);\n  if (!old_field_schema) {\n    return Status::NotFound(\"Column not found: \" + column_name);\n  }\n\n  std::string new_column_name = new_column_schema->name();\n  std::shared_ptr<arrow::Field> new_arrow_field;\n  auto as =\n      ConvertFieldSchemaToArrowField(new_column_schema.get(), &new_arrow_field);\n  if (!as.ok()) {\n    return Status::InternalError(\"ConvertFieldSchemaToArrowField failed: \" +\n                                 as.ToString());\n  }\n\n  auto &scalar_blocks = get_persist_block_metas(BlockType::SCALAR);\n  if (scalar_blocks.empty()) {\n    return Status::NotSupported(\n        \"Add column is not supported for empty scalar segment\");\n  }\n\n  std::vector<BlockMeta> filter_column_blocks;\n  for (const auto &block : scalar_blocks) {\n    if (block.contain_column(column_name)) {\n      filter_column_blocks.push_back(block);\n    }\n  }\n\n  auto result = ReadBlocksAsDataset(\n      filter_column_blocks, path_, segment_meta_->id(), !options_.enable_mmap_);\n  if (!result.ok()) {\n    return Status::InternalError(result.status().message());\n  }\n  auto dataset = std::move(result).ValueOrDie();\n\n  arrow::Expression expr = arrow::compute::field_ref(old_field_schema->name());\n  auto eval_result = EvaluateExpressionWithDataset(\n      dataset, new_column_name, expr, new_arrow_field->type());\n  if (!eval_result.ok()) {\n    return Status::InternalError(\"evaluate expression failed:\",\n                                 eval_result.status().message());\n  }\n  auto result_table = eval_result.ValueOrDie();\n  if (result_table->num_columns() != 1) {\n    return Status::InvalidArgument(\n        \"Expression result must have exactly one column\");\n  }\n  auto new_column = result_table->column(0);\n\n  std::vector<BlockMeta> new_blocks;\n  auto status = WriteColumnInBlocks(\n      new_column_name, new_column, filter_column_blocks, path_,\n      segment_meta_->id(), [this]() { return allocate_block_id(); },\n      !options_.enable_mmap_, &new_blocks);\n  if (!status.ok()) {\n    return Status::InternalError(status.message());\n  }\n\n  if (new_column_schema->has_invert_index()) {\n    if (invert_indexers_) {\n      auto s = reopen_invert_indexer();\n      CHECK_RETURN_STATUS(s);\n\n      s = invert_indexers_->remove_column_indexer(column_name);\n      CHECK_RETURN_STATUS(s);\n\n      s = invert_indexers_->create_column_indexer(*new_column_schema);\n      CHECK_RETURN_STATUS(s);\n\n      // update segment meta\n      auto &persist_blocks = segment_meta_->persisted_blocks();\n      for (auto &block : persist_blocks) {\n        if (block.type() == BlockType::SCALAR_INDEX) {\n          block.del_column(old_field_schema->name());\n          block.add_column(new_column_schema->name());\n          break;\n        }\n      }\n    } else {\n      auto new_block_id = allocate_block_id();\n      std::string new_invert_index_path =\n          FileHelper::MakeInvertIndexPath(path_, id(), new_block_id);\n\n      invert_indexers_ = InvertedIndexer::CreateAndOpen(\n          collection_schema_->name(), new_invert_index_path, true,\n          {*new_column_schema}, false);\n      if (!invert_indexers_) {\n        LOG_ERROR(\"Failed to create scalar indexer\");\n        return Status::InternalError(\"Failed to create scalar indexer\");\n      }\n\n      // update segment meta\n      BlockMeta block;\n      block.set_id(new_block_id);\n      block.set_type(BlockType::SCALAR_INDEX);\n      block.set_doc_count(new_column->length());\n      block.set_min_doc_id(doc_ids_.front());\n      block.set_max_doc_id(doc_ids_.back());\n      block.set_columns({new_column_schema->name()});\n\n      segment_meta_->add_persisted_block(block);\n    }\n\n    // insert data into new invert indexer\n    auto column_indexer = (*invert_indexers_)[new_column_schema->name()];\n    auto s = insert_array_to_invert_indexer(new_column_schema, new_column,\n                                            &column_indexer);\n    CHECK_RETURN_STATUS(s);\n    column_indexer->seal();\n    invert_indexers_->flush();\n  } else if (old_field_schema->has_invert_index()) {\n    // drop old invert indexer\n    auto s = reopen_invert_indexer();\n    CHECK_RETURN_STATUS(s);\n\n    s = invert_indexers_->remove_column_indexer(column_name);\n    CHECK_RETURN_STATUS(s);\n\n    auto &persist_blocks = segment_meta_->persisted_blocks();\n    for (auto &block : persist_blocks) {\n      if (block.type() == BlockType::SCALAR_INDEX) {\n        block.del_column(old_field_schema->name());\n        if (block.columns().empty()) {\n          segment_meta_->remove_block(block.id());\n        }\n        break;\n      }\n    }\n  }\n\n  std::unique_lock<std::shared_mutex> lock(seg_col_mtx_);\n  // update old block, remove column\n  std::vector<BlockMeta> &persisted_blocks = segment_meta_->persisted_blocks();\n  std::vector<int> will_del_block_idx;\n  for (size_t idx = 0; idx < persisted_blocks.size(); idx++) {\n    auto &block = persisted_blocks[idx];\n    if (block.type() == BlockType::SCALAR) {\n      if (block.contain_column(column_name)) {\n        if (block.columns_.size() > 1) {\n          block.del_column(column_name);\n        } else {\n          will_del_block_idx.push_back(idx);\n        }\n      }\n    }\n  }\n\n  // delete single block\n  std::vector<int> will_del_block_ids;\n  for (int i = static_cast<int>(will_del_block_idx.size()) - 1; i >= 0; i--) {\n    int idx = will_del_block_idx[i];\n    auto &block = persisted_blocks[idx];\n    will_del_block_ids.push_back(block.id_);\n    persisted_blocks.erase(persisted_blocks.begin() + idx);\n  }\n\n  std::vector<int> will_del_local_block_idx;\n  auto &local_blocks = get_persist_block_metas(BlockType::SCALAR);\n  for (size_t idx = 0; idx < local_blocks.size(); idx++) {\n    auto &block = local_blocks[idx];\n    if (block.contain_column(column_name)) {\n      if (block.columns_.size() == 1) {\n        will_del_local_block_idx.push_back(idx);\n      }\n    }\n  }\n\n  for (int idx = static_cast<int>(will_del_local_block_idx.size()) - 1;\n       idx >= 0; idx--) {\n    int local_idx = will_del_local_block_idx[idx];\n    persist_stores_.erase(persist_stores_.begin() + local_idx);\n  }\n\n  if (!options_.enable_mmap_) {\n    ailego::BufferManager::Instance().init(\n        GlobalConfig::Instance().memory_limit_bytes(), 1);\n  }\n\n  // delete single column store file\n  for (auto block_id : will_del_block_ids) {\n    // delete forward store file\n    std::string filepath = FileHelper::MakeForwardBlockPath(\n        path_, meta()->id(), block_id, !options_.enable_mmap_);\n    if (!FileHelper::RemoveFile(filepath)) {\n      return Status::InternalError(\"remove \", filepath, \" failed\");\n    } else {\n      LOG_INFO(\"remove scalar store file: %s success\", filepath.c_str());\n    }\n  }\n\n  // create and append persist scalar indexer\n  for (auto &block : new_blocks) {\n    auto forward_path = FileHelper::MakeForwardBlockPath(\n        path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);\n\n    BaseForwardStore::Ptr forward_store;\n    if (options_.enable_mmap_) {\n      forward_store = std::make_shared<MmapForwardStore>(forward_path);\n    } else {\n      forward_store = std::make_shared<BufferPoolForwardStore>(forward_path);\n    }\n    auto s = forward_store->Open();\n    CHECK_RETURN_STATUS(s);\n    persist_stores_.push_back(forward_store);\n    segment_meta_->add_persisted_block(block);\n  }\n\n  // collection_schema append new field\n  auto alter_status =\n      collection_schema_->alter_field(column_name, new_column_schema);\n  CHECK_RETURN_STATUS(alter_status);\n\n  fresh_persist_block_offset();\n\n  fresh_persist_chunked_array();\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::drop_column(const std::string &column_name) {\n  if (memory_store_) {\n    return Status::NotSupported(\n        \"Add column is not supported for segment with memory store\");\n  }\n\n  std::unique_lock<std::shared_mutex> lock(seg_col_mtx_);\n  // update old block, remove column\n  std::vector<BlockMeta> &persisted_blocks = segment_meta_->persisted_blocks();\n  std::vector<int> will_del_block_idx;\n  for (size_t idx = 0; idx < persisted_blocks.size(); idx++) {\n    auto &block = persisted_blocks[idx];\n    if (block.type() == BlockType::SCALAR) {\n      if (block.contain_column(column_name)) {\n        if (block.columns_.size() > 1) {\n          block.del_column(column_name);\n        } else {\n          will_del_block_idx.push_back(idx);\n        }\n      }\n    }\n  }\n\n  // delete single block\n  std::vector<int> will_del_block_ids;\n  for (int i = static_cast<int>(will_del_block_idx.size()) - 1; i >= 0; i--) {\n    int idx = will_del_block_idx[i];\n    auto &block = persisted_blocks[idx];\n    will_del_block_ids.push_back(block.id_);\n    persisted_blocks.erase(persisted_blocks.begin() + idx);\n  }\n\n  std::vector<int> will_del_local_block_idx;\n  auto &local_blocks = get_persist_block_metas(BlockType::SCALAR);\n  for (size_t idx = 0; idx < local_blocks.size(); idx++) {\n    auto &block = local_blocks[idx];\n    if (block.contain_column(column_name)) {\n      if (block.columns_.size() == 1) {\n        will_del_local_block_idx.push_back(idx);\n      }\n    }\n  }\n\n  for (int idx = static_cast<int>(will_del_local_block_idx.size()) - 1;\n       idx >= 0; idx--) {\n    int local_idx = will_del_local_block_idx[idx];\n    persist_stores_.erase(persist_stores_.begin() + local_idx);\n  }\n\n  if (!options_.enable_mmap_) {\n    ailego::BufferManager::Instance().init(\n        GlobalConfig::Instance().memory_limit_bytes(), 1);\n  }\n\n  // delete single column store file\n  for (auto block_id : will_del_block_ids) {\n    // delete forward store file\n    std::string filepath = FileHelper::MakeForwardBlockPath(\n        path_, meta()->id(), block_id, !options_.enable_mmap_);\n    if (!FileHelper::RemoveFile(filepath)) {\n      return Status::InternalError(\"remove \", filepath, \" failed\");\n    } else {\n      LOG_INFO(\"remove scalar store file: %s success\", filepath.c_str());\n    }\n  }\n\n  auto old_field_schema = collection_schema_->get_forward_field(column_name);\n  if (old_field_schema->has_invert_index()) {\n    auto s = reopen_invert_indexer();\n    CHECK_RETURN_STATUS(s);\n\n    s = invert_indexers_->remove_column_indexer(old_field_schema->name());\n    CHECK_RETURN_STATUS(s);\n    invert_indexers_->flush();\n\n    auto &persist_blocks = segment_meta_->persisted_blocks();\n    for (auto &block : persist_blocks) {\n      if (block.type() == BlockType::SCALAR_INDEX) {\n        block.del_column(old_field_schema->name());\n        if (block.columns_.empty()) {\n          // remove block meta from segment meta\n          segment_meta_->remove_block(block.id_);\n        }\n        break;\n      }\n    }\n  }\n\n  // collection_schema append new field\n  auto alter_status = collection_schema_->drop_field(column_name);\n  CHECK_RETURN_STATUS(alter_status);\n\n  fresh_persist_block_offset();\n\n  fresh_persist_chunked_array();\n\n  return Status::OK();\n}\n\n////////////////////////////////////////////////////////////////////////////////////\n// Private methods implementation\n////////////////////////////////////////////////////////////////////////////////////\n\n\nvoid SegmentImpl::fresh_persist_block_offset() {\n  // Clear\n  for (size_t i = 0; i <= static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE);\n       ++i) {\n    if (std::holds_alternative<std::vector<int>>(persist_block_offsets_[i])) {\n      std::get<std::vector<int>>(persist_block_offsets_[i]).clear();\n    } else if (std::holds_alternative<\n                   std::unordered_map<std::string, std::vector<int>>>(\n                   persist_block_offsets_[i])) {\n      std::get<std::unordered_map<std::string, std::vector<int>>>(\n          persist_block_offsets_[i])\n          .clear();\n    }\n    std::visit(\n        [](auto &&arg) {\n          using T = std::decay_t<decltype(arg)>;\n          if constexpr (std::is_same_v<T, std::vector<BlockMeta>> ||\n                        std::is_same_v<\n                            T, std::unordered_map<std::string,\n                                                  std::vector<BlockMeta>>>) {\n            arg.clear();\n          }\n        },\n        persist_block_metas_[i]);\n  }\n\n  for (const auto &block : segment_meta_->persisted_blocks()) {\n    size_t type_index = static_cast<size_t>(block.type());\n    if (block.type() == BlockType::SCALAR) {\n      if (std::holds_alternative<std::vector<BlockMeta>>(\n              persist_block_metas_[type_index])) {\n        std::get<std::vector<BlockMeta>>(persist_block_metas_[type_index])\n            .push_back(block);\n      } else {\n        persist_block_metas_[type_index] = std::vector<BlockMeta>{block};\n        persist_block_offsets_[type_index] = std::vector<int>();\n      }\n    } else if (block.type() == BlockType::VECTOR_INDEX ||\n               block.type() == BlockType::VECTOR_INDEX_QUANTIZE) {\n      if (block.columns().size() == 1) {\n        auto column_name = block.columns()[0];\n        if (std::holds_alternative<\n                std::unordered_map<std::string, std::vector<BlockMeta>>>(\n                persist_block_metas_[type_index])) {\n          auto block_map =\n              std::get<std::unordered_map<std::string, std::vector<BlockMeta>>>(\n                  persist_block_metas_[type_index]);\n          auto iter = block_map.find(column_name);\n          if (iter != block_map.end()) {\n            auto &block_metas = iter->second;\n            block_metas.push_back(block);\n          } else {\n            block_map.insert(\n                std::make_pair(column_name, std::vector<BlockMeta>{block}));\n            auto block_offsets_map =\n                std::get<std::unordered_map<std::string, std::vector<int>>>(\n                    persist_block_offsets_[type_index]);\n            block_offsets_map.insert(\n                std::make_pair(column_name, std::vector<int>()));\n          }\n\n          std::get<std::unordered_map<std::string, std::vector<BlockMeta>>>(\n              persist_block_metas_[type_index])[column_name]\n              .push_back(block);\n        } else {\n          std::unordered_map<std::string, std::vector<BlockMeta>> new_map;\n          new_map[column_name].push_back(block);\n          persist_block_metas_[type_index] = std::move(new_map);\n          persist_block_offsets_[type_index] =\n              std::unordered_map<std::string, std::vector<int>>();\n        }\n      } else {\n        LOG_ERROR(\"Add block meta: %s failed, block.columns.size != 1\",\n                  block.to_string().c_str());\n      }\n    }\n  }\n\n  calculate_block_offsets();\n}\n\nvoid SegmentImpl::fresh_persist_chunked_array() {\n  if (options_.enable_mmap_ && options_.read_only_) {\n    persist_chunk_arrays_.clear();\n    chunk_offsets_.clear();\n    col_idx_map_.clear();\n    use_fetch_perf_ = false;\n\n    std::vector<std::vector<std::shared_ptr<arrow::ChunkedArray>>> chunk_arrays;\n    auto fields = collection_schema_->forward_field_names();\n    fields.insert(fields.begin(), USER_ID);\n    fields.insert(fields.begin(), GLOBAL_DOC_ID);\n    chunk_arrays.resize(fields.size());\n    persist_chunk_arrays_.resize(fields.size());\n\n    for (size_t i = 0; i < fields.size(); ++i) {\n      col_idx_map_[fields[i]] = i;\n    }\n\n    auto &block_metas = get_persist_block_metas(BlockType::SCALAR);\n    if (block_metas.empty()) {\n      return;\n    }\n\n    for (size_t i = 0; i < block_metas.size(); ++i) {\n      auto &block_meta = block_metas[i];\n      const auto table = persist_stores_[i]->get_table();\n      for (size_t j = 0; j < fields.size(); ++j) {\n        if (block_meta.contain_column(fields[j])) {\n          auto chunked_array = table->GetColumnByName(fields[j]);\n          if (chunked_array) {\n            chunk_arrays[j].push_back(chunked_array);\n          }\n        }\n      }\n    }\n\n    for (size_t i = 0; i < fields.size(); ++i) {\n      std::vector<std::shared_ptr<arrow::Array>> all_chunks;\n      for (const auto &arr : chunk_arrays[i]) {\n        for (int j = 0; j < arr->num_chunks(); ++j) {\n          all_chunks.push_back(arr->chunk(j));\n        }\n      }\n      persist_chunk_arrays_[i] =\n          std::make_shared<arrow::ChunkedArray>(all_chunks);\n    }\n\n    auto &first_chunked_array = persist_chunk_arrays_[0];\n    chunk_offsets_.reserve(first_chunked_array->num_chunks() + 1);\n    chunk_offsets_.push_back(0);\n\n    for (int chunk_idx = 0; chunk_idx < first_chunked_array->num_chunks();\n         ++chunk_idx) {\n      chunk_offsets_.push_back(chunk_offsets_.back() +\n                               first_chunked_array->chunk(chunk_idx)->length());\n    }\n\n    if (persist_chunk_arrays_.size() > 0 && chunk_offsets_.size() > 0) {\n      use_fetch_perf_ = true;\n    }\n\n    LOG_INFO(\n        \"fresh_persist_chunked_array persist_chunk_arrays[%zu] \"\n        \"chunk_offset[%zu]\",\n        persist_chunk_arrays_.size(), chunk_offsets_.size());\n  }\n}\n\nvoid SegmentImpl::calculate_block_offsets() {\n  for (size_t type_index = 0;\n       type_index <= static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE);\n       ++type_index) {\n    auto &block_offsets = persist_block_offsets_[type_index];\n    int current_offset = 0;\n\n    // Visit the appropriate container based on the variant type\n    std::visit(\n        [&current_offset, &block_offsets](auto &&blocks) {\n          using T = std::decay_t<decltype(blocks)>;\n\n          if constexpr (std::is_same_v<T, std::vector<BlockMeta>>) {\n            // For SCALAR type - simple vector\n            auto &offset_vector = std::get<std::vector<int>>(block_offsets);\n            offset_vector.clear();\n            offset_vector.reserve(blocks.size());\n            if (!blocks.empty()) {\n              auto &filter_col_name = blocks[0].columns()[0];\n              for (const auto &block : blocks) {\n                if (!block.contain_column(filter_col_name)) continue;\n                offset_vector.push_back(current_offset);\n                current_offset += static_cast<int>(block.doc_count_);\n              }\n            }\n          } else if constexpr (std::is_same_v<T, std::unordered_map<\n                                                     std::string,\n                                                     std::vector<BlockMeta>>>) {\n            // For other types - map with column names\n            auto &offset_map =\n                std::get<std::unordered_map<std::string, std::vector<int>>>(\n                    block_offsets);\n            offset_map.clear();\n\n            for (const auto &[column_name, block_list] : blocks) {\n              auto &column_offsets = offset_map[column_name];\n              column_offsets.reserve(block_list.size());\n              int column_offset = 0;\n\n              for (const auto &block : block_list) {\n                column_offsets.push_back(column_offset);\n                column_offset += static_cast<int>(block.doc_count_);\n              }\n            }\n          }\n        },\n        persist_block_metas_[type_index]);\n  }\n}\n\nint SegmentImpl::find_persist_block_id(BlockType type, int segment_doc_id,\n                                       const std::string &col_name,\n                                       int *out_offset_idx) const {\n  size_t type_index = static_cast<size_t>(type);\n\n  auto visitor = [segment_doc_id, col_name,\n                  out_offset_idx](const auto &blocks) -> int {\n    using T = std::decay_t<decltype(blocks)>;\n    int current_offset = 0;\n\n    if constexpr (std::is_same_v<T, std::vector<BlockMeta>>) {\n      if (!blocks.empty()) {\n        std::string filter_column = col_name;\n        if (col_name.empty() || col_name == GLOBAL_DOC_ID ||\n            col_name == USER_ID) {\n          filter_column = blocks[0].columns()[0];\n        }\n        int offset_idx = -1;\n        for (size_t block_idx = 0; block_idx < blocks.size(); block_idx++) {\n          const auto &block = blocks[block_idx];\n          if (!block.contain_column(filter_column)) {\n            continue;\n          }\n          offset_idx++;\n          if (segment_doc_id >= current_offset &&\n              segment_doc_id <\n                  current_offset + static_cast<int>(block.doc_count_)) {\n            if (out_offset_idx) {\n              *out_offset_idx = offset_idx;\n            }\n            return static_cast<int>(block_idx);\n          }\n          current_offset += static_cast<int>(block.doc_count_);\n        }\n      }\n    } else if constexpr (std::is_same_v<\n                             T, std::unordered_map<std::string,\n                                                   std::vector<BlockMeta>>>) {\n      for (const auto &[column_name, block_list] : blocks) {\n        if (!column_name.empty() && column_name != col_name) {\n          continue;\n        }\n\n        current_offset = 0;\n        for (size_t block_idx = 0; block_idx < block_list.size(); block_idx++) {\n          const auto &block = block_list[block_idx];\n          if (segment_doc_id >= current_offset &&\n              segment_doc_id <\n                  current_offset + static_cast<int>(block.doc_count_)) {\n            return static_cast<int>(block_idx);\n          }\n          current_offset += static_cast<int>(block.doc_count_);\n        }\n      }\n    }\n\n    return -1;\n  };\n\n  return std::visit(visitor, persist_block_metas_[type_index]);\n}\n\nconst std::vector<int> &SegmentImpl::get_persist_block_offsets(\n    BlockType type, const std::string &col_name) const {\n  size_t type_index = static_cast<size_t>(type);\n\n  auto visitor = [&col_name](const auto &offsets) -> const std::vector<int> & {\n    using T = std::decay_t<decltype(offsets)>;\n\n    static const std::vector<int> empty_offsets;\n\n    if constexpr (std::is_same_v<T, std::vector<int>>) {\n      return offsets;\n    } else if constexpr (std::is_same_v<T,\n                                        std::unordered_map<std::string,\n                                                           std::vector<int>>>) {\n      auto it = offsets.find(col_name);\n      if (it != offsets.end()) {\n        return it->second;\n      }\n    }\n\n    return empty_offsets;\n  };\n\n  return std::visit(visitor, persist_block_offsets_[type_index]);\n}\n\nconst std::vector<BlockMeta> &SegmentImpl::get_persist_block_metas(\n    BlockType type, const std::string &col_name) const {\n  size_t type_index = static_cast<size_t>(type);\n\n  auto visitor =\n      [&col_name](const auto &metas) -> const std::vector<BlockMeta> & {\n    using T = std::decay_t<decltype(metas)>;\n\n    static const std::vector<BlockMeta> empty_metas;\n\n    if constexpr (std::is_same_v<T, std::vector<BlockMeta>>) {\n      return metas;\n    } else if constexpr (std::is_same_v<\n                             T, std::unordered_map<std::string,\n                                                   std::vector<BlockMeta>>>) {\n      auto it = metas.find(col_name);\n      if (it != metas.end()) {\n        return it->second;\n      }\n    }\n\n    return empty_metas;\n  };\n\n  return std::visit(visitor, persist_block_metas_[type_index]);\n}\n\nStatus SegmentImpl::load_persist_scalar_blocks() {\n  doc_ids_.reserve(segment_meta_->doc_count());\n  for (const auto &block : segment_meta_->persisted_blocks()) {\n    if (block.type() == BlockType::SCALAR) {\n      auto forward_path = FileHelper::MakeForwardBlockPath(\n          path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);\n\n      BaseForwardStore::Ptr forward_store;\n      if (options_.enable_mmap_) {\n        forward_store = std::make_shared<MmapForwardStore>(forward_path);\n      } else {\n        forward_store = std::make_shared<BufferPoolForwardStore>(forward_path);\n      }\n      auto s = forward_store->Open();\n      CHECK_RETURN_STATUS(s);\n      persist_stores_.push_back(forward_store);\n\n      if (!block.contain_column(GLOBAL_DOC_ID)) {\n        continue;\n      }\n      auto rb_reader = forward_store->scan({GLOBAL_DOC_ID});\n      while (true) {\n        std::shared_ptr<arrow::RecordBatch> batch;\n        auto status = rb_reader->ReadNext(&batch);\n        if (!status.ok()) {\n          LOG_ERROR(\"Read batch failed: %s\", status.message().c_str());\n          return Status::InternalError(status.message());\n        }\n\n        if (batch == nullptr) {\n          break;\n        }\n\n        auto uint64_array =\n            std::dynamic_pointer_cast<arrow::UInt64Array>(batch->column(0));\n        if (!uint64_array) {\n          LOG_ERROR(\"Failed to cast column to UInt64Array\");\n          return Status::InternalError(\"Array type mismatch\");\n        }\n        auto *values = uint64_array->raw_values();\n        doc_ids_.insert(doc_ids_.end(), values,\n                        values + uint64_array->length());\n      }\n    }\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::load_scalar_index_blocks(bool create) {\n  std::vector<FieldSchema> fields;\n  std::vector<std::string> field_names;\n  for (const auto &field : collection_schema_->forward_fields()) {\n    if (field->index_type() == IndexType::INVERT) {\n      fields.push_back(*field);\n      field_names.push_back(field->name());\n    }\n  }\n\n  if (fields.empty()) {\n    LOG_INFO(\"No scalar index found\");\n    return Status::OK();\n  }\n\n  if (create) {\n    auto block_id = allocate_block_id();\n    auto invert_path = FileHelper::MakeInvertIndexPath(path_, id(), block_id);\n    auto collection_name = collection_schema_->name();\n    invert_indexers_ = InvertedIndexer::CreateAndOpen(\n        collection_name, invert_path, true, fields, options_.read_only_);\n    if (!invert_indexers_) {\n      LOG_ERROR(\"Failed to open scalar indexer\");\n      return Status::InternalError(\"Failed to open scalar indexer\");\n    }\n\n    // scalar index block\n    segment_meta_->add_persisted_block(\n        BlockMeta{block_id, BlockType::SCALAR_INDEX, 0, 0, 0, field_names});\n\n    return Status::OK();\n  } else {\n    for (const auto &block : segment_meta_->persisted_blocks()) {\n      if (block.type() == BlockType::SCALAR_INDEX) {\n        auto block_id = block.id();\n        auto invert_path =\n            FileHelper::MakeInvertIndexPath(path_, id(), block_id);\n        auto collection_name = collection_schema_->name();\n        invert_indexers_ = InvertedIndexer::CreateAndOpen(\n            collection_name, invert_path, false, fields, options_.read_only_);\n        if (!invert_indexers_) {\n          LOG_ERROR(\"Failed to open scalar indexer\");\n          return Status::InternalError(\"Failed to open scalar indexer\");\n        }\n        return Status::OK();\n      }\n    }\n\n    if (invert_indexers_ == nullptr) {\n      LOG_ERROR(\"No scalar index found\");\n      return Status::NotFound(\"No scalar index found\");\n    }\n  }\n  return Status::OK();\n}\n\nStatus SegmentImpl::load_vector_index_blocks() {\n  for (const auto &block : segment_meta_->persisted_blocks()) {\n    if (block.type() == BlockType::VECTOR_INDEX ||\n        block.type() == BlockType::VECTOR_INDEX_QUANTIZE) {\n      // vector block only contained 1 column\n      auto column = block.columns()[0];\n\n      FieldSchema new_field_params =\n          *collection_schema_->get_vector_field(column);\n\n      auto vector_index_params = std::dynamic_pointer_cast<VectorIndexParams>(\n          new_field_params.index_params());\n      if (block.type_ == BlockType::VECTOR_INDEX) {\n        if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED ||\n            !segment_meta_->vector_indexed(column)) {\n          new_field_params.set_index_params(\n              MakeDefaultVectorIndexParams(vector_index_params->metric_type()));\n        }\n      } else {\n        if (!segment_meta_->vector_indexed(column)) {\n          new_field_params.set_index_params(MakeDefaultQuantVectorIndexParams(\n              vector_index_params->metric_type(),\n              vector_index_params->quantize_type()));\n        }\n      }\n\n      std::string index_path;\n      if (block.type_ == BlockType::VECTOR_INDEX) {\n        index_path = FileHelper::MakeVectorIndexPath(\n            path_, column, segment_meta_->id(), block.id_);\n\n      } else {\n        index_path = FileHelper::MakeQuantizeVectorIndexPath(\n            path_, column, segment_meta_->id(), block.id_);\n      }\n\n      auto vector_indexer =\n          std::make_shared<VectorColumnIndexer>(index_path, new_field_params);\n      auto s = vector_indexer->Open(vector_column_params::ReadOptions{\n          options_.enable_mmap_, false, true});\n      CHECK_RETURN_STATUS(s);\n\n      if (block.type_ == BlockType::VECTOR_INDEX) {\n        auto it = vector_indexers_.find(column);\n        if (it == vector_indexers_.end()) {\n          std::vector<VectorColumnIndexer::Ptr> vector_indexers;\n          vector_indexers.push_back(vector_indexer);\n          vector_indexers_.emplace(column, std::move(vector_indexers));\n        } else {\n          it->second.push_back(vector_indexer);\n        }\n      } else {\n        auto it = quant_vector_indexers_.find(column);\n        if (it == quant_vector_indexers_.end()) {\n          std::vector<VectorColumnIndexer::Ptr> vector_indexers;\n          vector_indexers.push_back(vector_indexer);\n          quant_vector_indexers_.emplace(column, std::move(vector_indexers));\n        } else {\n          it->second.push_back(vector_indexer);\n        }\n      }\n    }\n  }\n  return Status::OK();\n}\n\nVectorColumnIndexer::Ptr SegmentImpl::create_vector_indexer(\n    const std::string &field_name, const FieldSchema &field, BlockID block_id,\n    bool is_quantized) {\n  std::string index_file_path;\n  if (is_quantized) {\n    index_file_path = FileHelper::MakeQuantizeVectorIndexPath(\n        path_, field_name, segment_meta_->id(), block_id);\n    quant_memory_vector_block_ids_[field_name] = block_id;\n  } else {\n    index_file_path = FileHelper::MakeVectorIndexPath(\n        path_, field_name, segment_meta_->id(), block_id);\n    memory_vector_block_ids_[field_name] = block_id;\n  }\n\n  if (FileHelper::FileExists(index_file_path)) {\n    LOG_WARN(\n        \"Index file[%s] already exists (possible crash residue); cleaning and \"\n        \"overwriting.\",\n        index_file_path.c_str());\n    FileHelper::RemoveFile(index_file_path);\n  }\n\n  auto vector_indexer =\n      std::make_shared<VectorColumnIndexer>(index_file_path, field);\n  vector_column_params::ReadOptions options{true, true};\n  auto status = vector_indexer->Open(options);\n  if (!status.ok()) {\n    LOG_ERROR(\"Failed to open vector indexer for field: %s, err: %s\",\n              field.to_string().c_str(), status.message().c_str());\n    return nullptr;\n  }\n  return vector_indexer;\n}\n\nStatus SegmentImpl::init_memory_components() {\n  // init memory block id\n  auto &mem_block = segment_meta_->writing_forward_block().value();\n\n  // create and open memory forward block\n  auto mem_path = FileHelper::MakeForwardBlockPath(seg_path_, mem_block.id_,\n                                                   !options_.enable_mmap_);\n  if (FileHelper::FileExists(mem_path)) {\n    LOG_WARN(\n        \"ForwardBlock file[%s] already exists (possible crash residue); \"\n        \"cleaning and overwriting.\",\n        mem_path.c_str());\n    FileHelper::RemoveFile(mem_path);\n  }\n  memory_store_ = std::make_shared<MemForwardStore>(\n      collection_schema_, mem_path,\n      options_.enable_mmap_ ? FileFormat::IPC : FileFormat::PARQUET,\n      options_.max_buffer_size_);\n  auto s = memory_store_->Open();\n  CHECK_RETURN_STATUS(s);\n\n  // create and open memory vector indexer\n  for (const auto &field : collection_schema_->vector_fields()) {\n    auto index_params =\n        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n\n    if (index_params->quantize_type() == QuantizeType::UNDEFINED) {\n      // create normal vector indexer\n      FieldSchema normal_field(*field);\n      normal_field.set_index_params(\n          MakeDefaultVectorIndexParams(index_params->metric_type()));\n      auto block_id = allocate_block_id();\n      auto vector_indexer =\n          create_vector_indexer(field->name(), normal_field, block_id);\n      if (!vector_indexer) {\n        return Status::InternalError(\"Create vector column indexer failed: \",\n                                     field->name());\n      }\n      memory_vector_indexers_.insert({field->name(), vector_indexer});\n    } else {\n      // first create normal vector indexer\n      FieldSchema normal_field(*field);\n      normal_field.set_index_params(\n          MakeDefaultVectorIndexParams(index_params->metric_type()));\n      auto block_id = allocate_block_id();\n      auto vector_indexer =\n          create_vector_indexer(field->name(), normal_field, block_id);\n      if (!vector_indexer) {\n        return Status::InternalError(\"Create vector column indexer failed: \",\n                                     field->name());\n      }\n      memory_vector_indexers_.insert({field->name(), vector_indexer});\n\n      // second create quantize vector indexer\n      block_id = allocate_block_id();\n      FieldSchema normal_quant_field(*field);\n      normal_quant_field.set_index_params(MakeDefaultQuantVectorIndexParams(\n          index_params->metric_type(), index_params->quantize_type()));\n      auto quant_vector_indexer = create_vector_indexer(\n          field->name(), normal_quant_field, block_id, true);\n\n      if (!quant_vector_indexer) {\n        return Status::InternalError(\"Create vector column indexer failed: \",\n                                     field->name());\n      }\n      quant_memory_vector_indexers_.insert(\n          {field->name(), quant_vector_indexer});\n    }\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::recover() {\n  // recover mem block meta\n  auto &mem_block = segment_meta_->writing_forward_block().value();\n  doc_id_allocator_.store(mem_block.min_doc_id());\n\n  std::string wal_file_path =\n      FileHelper::MakeWalPath(path_, segment_meta_->id(), mem_block.id_);\n  if (!std::filesystem::exists(wal_file_path)) {\n    LOG_INFO(\"Recover wal file not exists just return. path: %s\",\n             wal_file_path.c_str());\n    return Status::OK();\n  }\n\n  WalFilePtr recover_wal_file;\n  WalOptions wal_option;\n  wal_option.create_new = false;\n  if (WalFile::CreateAndOpen(wal_file_path, wal_option, &recover_wal_file) !=\n      0) {\n    LOG_WARN(\"Recover wal file failed. path: %s\", wal_file_path.c_str());\n    return Status::OK();\n  }\n  AILEGO_DEFER([&]() { recover_wal_file->close(); });\n\n  std::array<uint64_t, static_cast<size_t>(Operator::DELETE) + 1>\n      recovered_doc_count{};\n  uint64_t total_recovered_doc_count{0};\n\n  int ret = recover_wal_file->prepare_for_read();\n  if (ret != 0) {\n    LOG_ERROR(\"Recover wal file failed. path: %s\", wal_file_path.c_str());\n    return Status::InternalError(\"Failed to prepare wal file: \", wal_file_path,\n                                 \" for read\");\n  }\n\n  LOG_INFO(\"Recover start read wal [%s]\", wal_file_path.c_str());\n\n  std::lock_guard<std::mutex> lock(seg_mtx_);\n\n  while (true) {\n    std::string buf = recover_wal_file->next();\n    if (buf.empty()) {\n      LOG_INFO(\"Recover read wal finished\");\n      break;\n    }\n    total_recovered_doc_count++;\n    auto doc = Doc::deserialize(reinterpret_cast<const uint8_t *>(buf.data()),\n                                buf.size());\n    if (doc == nullptr) {\n      LOG_ERROR(\"Recover wal failed. doc deserialize failed at %zu\",\n                (size_t)total_recovered_doc_count);\n      continue;\n    }\n\n    Status status;\n    switch (doc->get_operator()) {\n      case Operator::INSERT: {\n        internal_insert(*doc);\n        break;\n      }\n      case Operator::UPDATE: {\n        internal_update(*doc);\n        break;\n      }\n      case Operator::UPSERT: {\n        internal_upsert(*doc);\n        break;\n      }\n      case Operator::DELETE: {\n        internal_delete(*doc);\n        break;\n      }\n      default:\n        LOG_ERROR(\"Unknown operator type: %d\", (int)doc->get_operator());\n        break;\n    }\n\n    if (!status.ok()) {\n      LOG_ERROR(\"Recover wal failed. Operation %d failed at %zu: %s\",\n                static_cast<int>(doc->get_operator()),\n                (size_t)total_recovered_doc_count, status.message().c_str());\n      continue;\n    }\n\n    recovered_doc_count[static_cast<size_t>(doc->get_operator())]++;\n  }\n\n  const auto added_docs = recovered_doc_count[0] +  // INSERT\n                          recovered_doc_count[1] +  // UPSERT\n                          recovered_doc_count[2];   // UPDATE\n  mem_block.max_doc_id_ += added_docs;\n\n  LOG_INFO(\n      \"Recover from wal finished. total_recovered_doc_count[%zu] insert[%zu] \"\n      \"upsert[%zu] update[%zu] delete[%zu] path[%s]\",\n      (size_t)total_recovered_doc_count,\n      (size_t)recovered_doc_count[0],  // INSERT\n      (size_t)recovered_doc_count[1],  // UPSERT\n      (size_t)recovered_doc_count[2],  // UPDATE\n      (size_t)recovered_doc_count[3],  // DELETE\n      wal_file_path.c_str());\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::open_wal_file() {\n  auto mem_block = segment_meta_->writing_forward_block().value();\n  std::string wal_file_path =\n      FileHelper::MakeWalPath(path_, segment_meta_->id(), mem_block.id_);\n  WalOptions wal_option;\n  if (std::filesystem::exists(wal_file_path)) {\n    wal_option.create_new = false;\n  } else {\n    wal_option.create_new = true;\n  }\n\n  if (WalFile::CreateAndOpen(wal_file_path, wal_option, &wal_file_) != 0) {\n    LOG_ERROR(\"Recover wal file failed. path: %s\", wal_file_path.c_str());\n\n    return Status::OK();\n  }\n\n  LOG_INFO(\"Open wal file succ. path: %s\", wal_file_path.c_str());\n  return Status::OK();\n}\n\nStatus SegmentImpl::append_wal(const Doc &doc) {\n  std::vector<uint8_t> buf = doc.serialize();\n\n  if (!wal_file_) {\n    auto s = open_wal_file();\n    CHECK_RETURN_STATUS(s);\n  }\n\n  auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));\n  if (ret != 0) {\n    LOG_ERROR(\"Append wal failed. ret: %d\", ret);\n    return Status::InternalError(\"Failed to append wal\");\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentImpl::finish_memory_components() {\n  auto block = segment_meta_->writing_forward_block().value();\n\n  // close for loading persist block\n  auto s = memory_store_->close();\n  CHECK_RETURN_STATUS(s);\n  memory_store_.reset();\n\n  // load forward store\n  auto persist_forward_store_path = FileHelper::MakeForwardBlockPath(\n      path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);\n\n  BaseForwardStore::Ptr persist_store;\n  if (options_.enable_mmap_) {\n    persist_store =\n        std::make_shared<MmapForwardStore>(persist_forward_store_path);\n  } else {\n    persist_store =\n        std::make_shared<BufferPoolForwardStore>(persist_forward_store_path);\n  }\n  s = persist_store->Open();\n  CHECK_RETURN_STATUS(s);\n  persist_stores_.push_back(persist_store);\n\n  BlockMeta b{block.id_,         block.type_,      block.min_doc_id_,\n              block.max_doc_id_, block.doc_count_, block.columns_};\n  segment_meta_->add_persisted_block(b);\n\n  // remove indexer from memory to persist\n  for (auto &[column_name, indexer] : memory_vector_indexers_) {\n    auto block_id = memory_vector_block_ids_[column_name];\n    BlockMeta vb =\n        BlockMeta{block_id,          BlockType::VECTOR_INDEX, block.min_doc_id_,\n                  block.max_doc_id_, block.doc_count_,        {column_name}};\n    auto it = vector_indexers_.find(column_name);\n    if (it == vector_indexers_.end()) {\n      std::vector<VectorColumnIndexer::Ptr> vector_indexers{indexer};\n      vector_indexers_.emplace(column_name, std::move(vector_indexers));\n    } else {\n      it->second.push_back(indexer);\n    }\n    segment_meta_->add_persisted_block(vb);\n  }\n\n  // remove quant indexer from memory to persist\n  for (auto &[column_name, indexer] : quant_memory_vector_indexers_) {\n    auto block_id = quant_memory_vector_block_ids_[column_name];\n    BlockMeta block_meta(block_id, BlockType::VECTOR_INDEX_QUANTIZE,\n                         block.min_doc_id_, block.max_doc_id_, block.doc_count_,\n                         {column_name});\n\n    auto it = quant_vector_indexers_.find(column_name);\n    if (it == quant_vector_indexers_.end()) {\n      std::vector<VectorColumnIndexer::Ptr> vector_indexers;\n      vector_indexers.push_back(indexer);\n      quant_vector_indexers_.emplace(column_name, std::move(vector_indexers));\n    } else {\n      it->second.push_back(indexer);\n    }\n    segment_meta_->add_persisted_block(block_meta);\n  }\n\n  // clear memory vector indexers\n  memory_vector_indexers_.clear();\n  quant_memory_vector_indexers_.clear();\n  memory_vector_block_ids_.clear();\n  quant_memory_vector_block_ids_.clear();\n\n  fresh_persist_block_offset();\n  return Status::OK();\n}\n\nStatus SegmentImpl::update_version(uint32_t delete_snapshot_path_suffix) {\n  if (version_manager_) {\n    if (delete_snapshot_path_suffix != UINT32_MAX) {\n      version_manager_->set_delete_snapshot_path_suffix(\n          delete_snapshot_path_suffix);\n    }\n    auto s = version_manager_->reset_writing_segment_meta(segment_meta_);\n    CHECK_RETURN_STATUS(s);\n    s = version_manager_->flush();\n    CHECK_RETURN_STATUS(s);\n  }\n  return Status::OK();\n}\n\nBlockID SegmentImpl::allocate_block_id() {\n  return block_id_allocator_.fetch_add(1);\n}\n\nResult<uint64_t> SegmentImpl::get_global_doc_id(uint32_t local_id) const {\n  std::lock_guard lock(seg_mtx_);\n  if (local_id >= doc_ids_.size()) {\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"local_id out of range\"));\n  }\n  // global doc_id\n  return doc_ids_[local_id];\n}\n\n\n////////////////////////////////////////////////////////////////////////////////////\n// Segment factory methods implementation\n////////////////////////////////////////////////////////////////////////////////////\n\nResult<Segment::Ptr> Segment::CreateAndOpen(\n    const std::string &path, const CollectionSchema &schema,\n    SegmentID segment_id, uint64_t min_doc_id, const IDMap::Ptr &id_map,\n    const DeleteStore::Ptr &delete_store,\n    const VersionManager::Ptr &version_manager, const SegmentOptions &options) {\n  auto segment = std::shared_ptr<SegmentImpl>(\n      new SegmentImpl(path, schema, SegmentMeta(segment_id), id_map,\n                      delete_store, version_manager));\n\n  auto segment_path = FileHelper::MakeSegmentPath(path, segment_id);\n  // check or create path\n  if (FileHelper::DirectoryExists(segment_path)) {\n    return tl::make_unexpected(Status::InternalError(\n        \"Segment path is already exists: \", segment_path));\n  } else {\n    if (!FileHelper::CreateDirectory(segment_path)) {\n      return tl::make_unexpected(Status::InternalError(\n          \"Create segment directory failed: \", segment_path));\n    }\n  }\n\n  auto s = segment->Create(options, min_doc_id);\n  CHECK_RETURN_STATUS_EXPECTED(s);\n\n  return segment;\n}\n\nResult<Segment::Ptr> Segment::Open(const std::string &path,\n                                   const CollectionSchema &schema,\n                                   const SegmentMeta &segment_meta,\n                                   const IDMap::Ptr &id_map,\n                                   const DeleteStore::Ptr &delete_store,\n                                   const VersionManager::Ptr &version_manager,\n                                   const SegmentOptions &options) {\n  auto segment = std::shared_ptr<SegmentImpl>(new SegmentImpl(\n      path, schema, segment_meta, id_map, delete_store, version_manager));\n\n  auto segment_path = FileHelper::MakeSegmentPath(path, segment_meta.id());\n  // check path\n  if (!FileHelper::DirectoryExists(segment_path)) {\n    return tl::make_unexpected(\n        Status::InternalError(\"Segment path is not exist: \", segment_path));\n  }\n\n  auto s = segment->Open(options);\n  CHECK_RETURN_STATUS_EXPECTED(s);\n\n  return segment;\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/segment.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <vector>\n#include <arrow/record_batch.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/db/doc.h>\n#include <zvec/db/index_params.h>\n#include <zvec/db/options.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include \"db/index/column/inverted_column/inverted_column_indexer.h\"\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n#include \"db/index/column/vector_column/combined_vector_column_indexer.h\"\n#include \"db/index/column/vector_column/vector_column_indexer.h\"\n#include \"db/index/common/delete_store.h\"\n#include \"db/index/common/id_map.h\"\n#include \"db/index/common/meta.h\"\n#include \"db/index/common/version_manager.h\"\n#include \"db/index/storage/base_forward_store.h\"\n\nnamespace zvec {\n\nclass CombinedRecordBatchReader;\n\nclass Segment {\n public:\n  using Ptr = std::shared_ptr<Segment>;\n\n  static Result<Ptr> CreateAndOpen(const std::string &path,\n                                   const CollectionSchema &schema,\n                                   SegmentID segment_id, uint64_t min_doc_id,\n                                   const IDMap::Ptr &id_map,\n                                   const DeleteStore::Ptr &delete_store,\n                                   const VersionManager::Ptr &version_manager,\n                                   const SegmentOptions &options);\n\n  static Result<Ptr> Open(const std::string &path,\n                          const CollectionSchema &schema,\n                          const SegmentMeta &segment_meta,\n                          const IDMap::Ptr &id_map,\n                          const DeleteStore::Ptr &delete_store,\n                          const VersionManager::Ptr &version_manager,\n                          const SegmentOptions &options);\n\n  virtual SegmentID id() const = 0;\n\n  virtual SegmentMeta::Ptr meta() const = 0;\n\n  virtual uint64_t doc_count(const IndexFilter::Ptr filter = nullptr) = 0;\n\n  // for collection\n  virtual Status add_column(FieldSchema::Ptr column_schema,\n                            const std::string &expression,\n                            const AddColumnOptions &options) = 0;\n\n  virtual Status alter_column(const std::string &column_name,\n                              const FieldSchema::Ptr &new_column_schema,\n                              const AlterColumnOptions &options) = 0;\n\n  virtual Status drop_column(const std::string &column_name) = 0;\n\n  virtual Status create_all_vector_index(\n      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *quant_vector_indexers) = 0;\n\n  // defined in segment.h cause it needs to access block_id generator\n  virtual Status create_vector_index(\n      const std::string &column, const IndexParams::Ptr &index_params,\n      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *quant_vector_indexers) = 0;\n\n  virtual Status drop_vector_index(\n      const std::string &column, SegmentMeta::Ptr *new_segmnet_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers) = 0;\n\n  virtual Status reload_vector_index(\n      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,\n      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          &vector_indexers,\n      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          &quant_vector_indexers = {}) = 0;\n\n  virtual bool vector_index_ready(\n      const std::string &column,\n      const IndexParams::Ptr &index_params) const = 0;\n\n  virtual bool all_vector_index_ready() const = 0;\n\n  // defined in segment.h cause it needs to access block_id generator\n  virtual Status create_scalar_index(\n      const std::vector<std::string> &columns,\n      const IndexParams::Ptr &index_params, SegmentMeta::Ptr *new_segment_meta,\n      InvertedIndexer::Ptr *new_scalar_indexer) = 0;\n\n  // defined in segment.h cause it needs to access block_id generator\n  virtual Status drop_scalar_index(\n      const std::vector<std::string> &columns,\n      SegmentMeta::Ptr *new_segment_meta,\n      InvertedIndexer::Ptr *new_scalar_indexer) = 0;\n\n  virtual Status reload_scalar_index(\n      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,\n      const InvertedIndexer::Ptr &scalar_indexer) = 0;\n\n  virtual Status Insert(Doc &doc) = 0;\n\n  virtual Status Upsert(Doc &doc) = 0;\n\n  virtual Status Update(Doc &doc) = 0;\n\n  virtual Status Delete(const std::string &pk) = 0;\n\n  virtual Status Delete(uint64_t g_doc_id) = 0;\n\n  virtual Doc::Ptr Fetch(uint64_t g_doc_id) = 0;\n\n  // for sqlengine\n  virtual TablePtr fetch(const std::vector<std::string> &columns,\n                         const std::vector<int> &indices) const = 0;\n\n  virtual ExecBatchPtr fetch(const std::vector<std::string> &columns,\n                             int index) const = 0;\n\n  // caller should hold segment shared_ptr for segment handle the indexer's\n  // lifetime\n  virtual RecordBatchReaderPtr scan(\n      const std::vector<std::string> &columns) const = 0;\n\n  // caller hold segment shared_ptr for segment handle the indexer's lifetime\n  virtual CombinedVectorColumnIndexer::Ptr get_combined_vector_indexer(\n      const std::string &field_name) const = 0;\n\n  // caller hold segment shared_ptr for segment handle the indexer's lifetime\n  virtual CombinedVectorColumnIndexer::Ptr get_quant_combined_vector_indexer(\n      const std::string &field_name) const = 0;\n\n  // caller hold segment shared_ptr for segment handle the indexer's lifetime\n  virtual std::vector<VectorColumnIndexer::Ptr> get_vector_indexer(\n      const std::string &field_name) const = 0;\n\n  virtual std::vector<VectorColumnIndexer::Ptr> get_quant_vector_indexer(\n      const std::string &field_name) const = 0;\n\n  // caller hold segment shared_ptr for segment handle the indexer's lifetime\n  virtual InvertedColumnIndexer::Ptr get_scalar_indexer(\n      const std::string &field_name) const = 0;\n\n  virtual const IndexFilter::Ptr get_filter() = 0;\n\n  // for others\n  virtual Status flush() = 0;\n  virtual Status dump() = 0;\n\n  // only mark need_destroyed\n  virtual Status destroy() = 0;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/segment_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"segment_helper.h\"\n#include <cstdint>\n#include <functional>\n#include <memory>\n#include <arrow/compute/api_vector.h>\n#include <arrow/type_fwd.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n#include \"db/common/constants.h\"\n#include \"db/common/file_helper.h\"\n#include \"db/common/global_resource.h\"\n#include \"db/common/typedef.h\"\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n#include \"db/index/column/vector_column/vector_column_indexer.h\"\n#include \"db/index/common/index_filter.h\"\n#include \"db/index/common/meta.h\"\n#include \"db/index/storage/forward_writer.h\"\n#include \"roaring.hh\"\n\nnamespace zvec {\n\nStatus SegmentHelper::Execute(SegmentTask::Ptr &task) {\n  auto &task_info = task->task_info();\n  Status s;\n  if (std::holds_alternative<CompactTask>(task_info)) {\n    auto &compact_task = std::get<CompactTask>(task_info);\n    s = ExecuteCompactTask(compact_task);\n  } else if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {\n    auto &create_index_task = std::get<CreateVectorIndexTask>(task_info);\n    s = ExecuteCreateVectorIndexTask(create_index_task);\n  } else if (std::holds_alternative<CreateScalarIndexTask>(task_info)) {\n    auto &create_index_task = std::get<CreateScalarIndexTask>(task_info);\n    s = ExecuteCreateScalarIndexTask(create_index_task);\n  } else if (std::holds_alternative<DropVectorIndexTask>(task_info)) {\n    auto &drop_index_task = std::get<DropVectorIndexTask>(task_info);\n    s = ExecuteDropVectorIndexTask(drop_index_task);\n  } else if (std::holds_alternative<DropScalarIndexTask>(task_info)) {\n    auto &drop_index_task = std::get<DropScalarIndexTask>(task_info);\n    s = ExecuteDropScalarIndexTask(drop_index_task);\n  } else {\n    return Status::InvalidArgument(\"Unknown task type\");\n  }\n  return s;\n}\n\nclass RowIdFilter : public IndexFilter {\n public:\n  explicit RowIdFilter(roaring::Roaring &&delete_row_id_bitmap)\n      : delete_row_id_bitmap_(delete_row_id_bitmap) {}\n\n  bool is_filtered(uint64_t id) const override {\n    return delete_row_id_bitmap_.contains(id);\n  }\n\n private:\n  roaring::Roaring delete_row_id_bitmap_;\n};\n\nStatus SegmentHelper::ExecuteCompactTask(CompactTask &task) {\n  // input\n  auto collection_path = task.collection_path_;\n  auto schema = task.schema_;\n  auto input_segments = task.input_segments_;\n  auto filter = task.filter_;\n  auto output_segment_id = task.output_segment_id_;\n\n  auto columns = schema->forward_field_names();\n\n  // make segment path\n  auto output_segment_path =\n      FileHelper::MakeTempSegmentPath(collection_path, output_segment_id);\n  if (!FileHelper::CreateDirectory(output_segment_path)) {\n    LOG_ERROR(\"Create directory failed: %s\", output_segment_path.c_str());\n    return Status::InternalError(\"Create directory failed: %s\",\n                                 output_segment_path.c_str());\n  }\n\n  std::function<BlockID()> block_id_generator =\n      [block_id = BlockID{0}]() mutable { return block_id++; };\n\n  // iterate every doc, build forward and invert indexer\n  roaring::Roaring delete_row_id_bitmap;\n  uint64_t min_doc_id{std::numeric_limits<uint64_t>::max()};\n  uint64_t max_doc_id{0};\n  uint32_t doc_count{0};\n  std::vector<BlockMeta> block_metas;\n  Status s = ReduceScalar(schema, input_segments, output_segment_path, columns,\n                          filter, task.forward_use_parquet_, block_id_generator,\n                          &delete_row_id_bitmap, &block_metas, &min_doc_id,\n                          &max_doc_id, &doc_count);\n  CHECK_RETURN_STATUS(s);\n\n  if (doc_count == 0) {\n    FileHelper::RemoveDirectory(output_segment_path);\n    return Status::OK();\n  }\n\n  std::shared_ptr<RowIdFilter> row_id_filter =\n      std::make_shared<RowIdFilter>(std::move(delete_row_id_bitmap));\n\n  s = ReduceVectorIndex(schema, input_segments, output_segment_path,\n                        row_id_filter, block_id_generator, min_doc_id,\n                        max_doc_id, doc_count, task.concurrency_, &block_metas);\n  CHECK_RETURN_STATUS(s);\n\n  LOG_INFO(\"Compacted vector index\");\n\n  auto new_segment_meta = std::make_shared<SegmentMeta>();\n  new_segment_meta->set_id(task.output_segment_id_);\n  new_segment_meta->set_persisted_blocks(block_metas);\n  std::set<std::string> indexed_vector_fields;\n  for (auto &field : schema->vector_fields()) {\n    indexed_vector_fields.emplace(field->name());\n  }\n  new_segment_meta->set_indexed_vector_fields(indexed_vector_fields);\n  task.output_segment_meta_ = new_segment_meta;\n\n  return Status::OK();\n}\n\nStatus SegmentHelper::ReduceScalar(\n    const CollectionSchema::Ptr schema,\n    const std::vector<Segment::Ptr> &input_segments,\n    const std::string &output_segment_path,\n    const std::vector<std::string> &columns, const IndexFilter::Ptr &filter,\n    bool forward_use_parquet, std::function<BlockID()> &block_id_generator,\n    roaring::Roaring *delete_row_id_bitmap,\n    std::vector<BlockMeta> *output_block_metas, uint64_t *min_doc_id,\n    uint64_t *max_doc_id, uint32_t *doc_count) {\n  // forward\n  auto forward_block_id = block_id_generator();\n  auto forward_path = FileHelper::MakeForwardBlockPath(\n      output_segment_path, forward_block_id, forward_use_parquet);\n\n  std::shared_ptr<ForwardWriter> forward_writer;\n  if (forward_use_parquet) {\n    forward_writer = ForwardWriter::CreateParquetWriter(forward_path);\n  } else {\n    forward_writer = ForwardWriter::CreateArrowIPCWriter(forward_path);\n  }\n\n  // invert index\n  auto all_fields = schema->fields();\n  std::vector<FieldSchema> invert_fields;\n  std::vector<std::string> invert_field_names;\n  for (auto &field : all_fields) {\n    if (!field->is_vector_field()) {\n      if (field->index_params() &&\n          field->index_params()->type() == IndexType::INVERT) {\n        invert_fields.push_back(*field);\n        invert_field_names.push_back(field->name());\n      }\n    }\n  }\n  InvertedIndexer::Ptr invert_indexer;\n  BlockID invert_block_id{0};\n  if (invert_fields.size() > 0) {\n    invert_block_id = block_id_generator();\n    auto invert_path =\n        FileHelper::MakeInvertIndexPath(output_segment_path, invert_block_id);\n    invert_indexer = InvertedIndexer::CreateAndOpen(schema->name(), invert_path,\n                                                    true, invert_fields, false);\n    if (invert_indexer == nullptr) {\n      return Status::InternalError(\"Open invert indexer failed\");\n    }\n  }\n\n  uint32_t row_id_offset{0U};\n  *doc_count = 0;\n\n  std::vector<std::string> all_reduce_columns{GLOBAL_DOC_ID, USER_ID};\n  for (auto &column : columns) {\n    all_reduce_columns.push_back(column);\n  }\n\n  for (auto &segment : input_segments) {\n    auto reader = segment->scan(all_reduce_columns);\n    if (reader == nullptr) {\n      return Status::InternalError(\"scan segment failed\");\n    }\n\n    while (true) {\n      auto batch = reader->Next();\n      if (!batch.ok()) {\n        return Status::InternalError(\"reader next failed: \",\n                                     batch.status().message());\n      }\n\n      auto batch_value = batch.ValueOrDie();\n\n      if (!batch_value) {\n        break;\n      }\n\n      if (batch_value->num_rows() == 0) continue;\n\n      std::shared_ptr<arrow::RecordBatch> filtered_batch;\n      auto as =\n          FilterRecordBatch(batch_value, filter, row_id_offset, &filtered_batch,\n                            delete_row_id_bitmap, min_doc_id, max_doc_id);\n      if (!as.ok()) {\n        return Status::InternalError(\"filter record batch failed: \",\n                                     as.message());\n      }\n\n      row_id_offset += batch_value->num_rows();\n\n      if (!filtered_batch || filtered_batch->num_rows() == 0) {\n        continue;\n      }\n\n      // forward\n      as = forward_writer->insert_batch(filtered_batch);\n      if (!as.ok()) {\n        return Status::InternalError(\"writer insert failed: \", as.message());\n      }\n\n      // invert index\n      if (invert_indexer) {\n        auto s = ReduceScalarIndex(invert_indexer, filtered_batch, *doc_count);\n        CHECK_RETURN_STATUS(s);\n      }\n\n      *doc_count += filtered_batch->num_rows();\n    }\n  }\n\n  if (*doc_count == 0) {\n    // no docs\n    return Status::OK();\n  }\n\n  // flush forward\n  auto as = forward_writer->finalize();\n  if (!as.ok()) {\n    return Status::InternalError(\"writer finalize failed: \", as.message());\n  }\n\n  BlockMeta forward_meta;\n  forward_meta.set_id(forward_block_id);\n  forward_meta.set_type(BlockType::SCALAR);\n  forward_meta.set_min_doc_id(*min_doc_id);\n  forward_meta.set_max_doc_id(*max_doc_id);\n  forward_meta.set_doc_count(*doc_count);\n  forward_meta.set_columns(all_reduce_columns);\n\n  output_block_metas->push_back(forward_meta);\n\n  if (invert_indexer) {\n    auto s = invert_indexer->flush();\n    CHECK_RETURN_STATUS(s);\n\n    s = invert_indexer->seal();\n    CHECK_RETURN_STATUS(s);\n\n    BlockMeta meta;\n    meta.set_id(invert_block_id);\n    meta.set_type(BlockType::SCALAR_INDEX);\n\n    output_block_metas->push_back(meta);\n  }\n\n  LOG_INFO(\"Compacted scalar and scalar index\");\n\n  return Status::OK();\n}\n\nStatus SegmentHelper::ReduceScalarIndex(\n    InvertedIndexer::Ptr invert_indexer,\n    const std::shared_ptr<arrow::RecordBatch> &batch, uint32_t doc_id_offset) {\n  auto a_schema = batch->schema();\n  int num_columns = batch->num_columns();\n\n  for (int i = 0; i < num_columns; ++i) {\n    auto field = a_schema->field(i);\n    auto column_name = field->name();\n\n    auto indexer = (*invert_indexer)[column_name];\n    if (!indexer) {\n      continue;\n    }\n\n    auto array = batch->column(i);\n    auto type_id = field->type()->id();\n\n    Status s;\n\n    switch (type_id) {\n      case arrow::Type::BOOL: {\n        auto typed_array = std::static_pointer_cast<arrow::BooleanArray>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            bool value = typed_array->Value(j);\n            s = indexer->insert(j + doc_id_offset, value);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::INT32: {\n        auto typed_array = std::static_pointer_cast<arrow::Int32Array>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            int32_t value = typed_array->Value(j);\n            std::string value_str(reinterpret_cast<const char *>(&value),\n                                  sizeof(value));\n            s = indexer->insert(j + doc_id_offset, value_str);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::INT64: {\n        auto typed_array = std::static_pointer_cast<arrow::Int64Array>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            int64_t value = typed_array->Value(j);\n            std::string value_str(reinterpret_cast<const char *>(&value),\n                                  sizeof(value));\n            s = indexer->insert(j + doc_id_offset, value_str);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::UINT32: {\n        auto typed_array = std::static_pointer_cast<arrow::UInt32Array>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            uint32_t value = typed_array->Value(j);\n            std::string value_str(reinterpret_cast<const char *>(&value),\n                                  sizeof(value));\n            s = indexer->insert(j + doc_id_offset, value_str);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::UINT64: {\n        auto typed_array = std::static_pointer_cast<arrow::UInt64Array>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            uint64_t value = typed_array->Value(j);\n            std::string value_str(reinterpret_cast<const char *>(&value),\n                                  sizeof(value));\n            s = indexer->insert(j + doc_id_offset, value_str);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::FLOAT: {\n        auto typed_array = std::static_pointer_cast<arrow::FloatArray>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            float value = typed_array->Value(j);\n            std::string value_str(reinterpret_cast<const char *>(&value),\n                                  sizeof(value));\n            s = indexer->insert(j + doc_id_offset, value_str);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::DOUBLE: {\n        auto typed_array = std::static_pointer_cast<arrow::DoubleArray>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            double value = typed_array->Value(j);\n            std::string value_str(reinterpret_cast<const char *>(&value),\n                                  sizeof(value));\n            s = indexer->insert(j + doc_id_offset, value_str);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::STRING: {\n        auto typed_array = std::static_pointer_cast<arrow::StringArray>(array);\n        for (int64_t j = 0; j < typed_array->length(); ++j) {\n          if (!typed_array->IsNull(j)) {\n            std::string value_str = typed_array->GetString(j);\n            s = indexer->insert(j + doc_id_offset, value_str);\n            CHECK_RETURN_STATUS(s);\n          } else {\n            s = indexer->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n          }\n        }\n        break;\n      }\n      case arrow::Type::LIST: {\n        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);\n        auto value_array = list_array->values();\n        auto value_type_id = value_array->type()->id();\n\n        auto offset_array = list_array->offsets();\n        auto typed_offsets =\n            std::static_pointer_cast<arrow::Int32Array>(offset_array);\n\n        for (int64_t j = 0; j < list_array->length(); ++j) {\n          if (list_array->IsNull(j)) {\n            s = (*invert_indexer)[column_name]->insert_null(j + doc_id_offset);\n            CHECK_RETURN_STATUS(s);\n            continue;\n          }\n\n          int32_t start_offset = typed_offsets->Value(j);\n          int32_t end_offset = typed_offsets->Value(j + 1);\n\n          switch (value_type_id) {\n            case arrow::Type::BOOL: {\n              std::vector<bool> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::BooleanArray>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  values.push_back(typed->Value(k));\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            case arrow::Type::INT32: {\n              std::vector<std::string> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::Int32Array>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  int32_t value = typed->Value(k);\n                  std::string value_str(reinterpret_cast<const char *>(&value),\n                                        sizeof(value));\n                  values.push_back(value_str);\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            case arrow::Type::INT64: {\n              std::vector<std::string> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::Int64Array>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  int64_t value = typed->Value(k);\n                  std::string value_str(reinterpret_cast<const char *>(&value),\n                                        sizeof(value));\n                  values.push_back(value_str);\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            case arrow::Type::UINT32: {\n              std::vector<std::string> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::UInt32Array>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  uint32_t value = typed->Value(k);\n                  std::string value_str(reinterpret_cast<const char *>(&value),\n                                        sizeof(value));\n                  values.push_back(value_str);\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            case arrow::Type::UINT64: {\n              std::vector<std::string> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::UInt64Array>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  uint64_t value = typed->Value(k);\n                  std::string value_str(reinterpret_cast<const char *>(&value),\n                                        sizeof(value));\n                  values.push_back(value_str);\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            case arrow::Type::FLOAT: {\n              std::vector<std::string> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::FloatArray>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  float value = typed->Value(k);\n                  std::string value_str(reinterpret_cast<const char *>(&value),\n                                        sizeof(value));\n                  values.push_back(value_str);\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            case arrow::Type::DOUBLE: {\n              std::vector<std::string> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::DoubleArray>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  double value = typed->Value(k);\n                  std::string value_str(reinterpret_cast<const char *>(&value),\n                                        sizeof(value));\n                  values.push_back(value_str);\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            case arrow::Type::STRING: {\n              std::vector<std::string> values;\n              auto typed =\n                  std::static_pointer_cast<arrow::StringArray>(value_array);\n              for (int32_t k = start_offset; k < end_offset; ++k) {\n                if (typed->IsValid(k)) {\n                  values.push_back(typed->GetString(k));\n                }\n              }\n              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,\n                                                         values);\n              CHECK_RETURN_STATUS(s);\n              break;\n            }\n            default:\n              LOG_WARN(\n                  \"Warning: Unsupported nested type '%s' in List column '%s'\",\n                  value_array->type()->ToString().c_str(), column_name.c_str());\n              continue;\n          }\n        }\n        break;\n      }\n      default:\n        LOG_WARN(\"Warning: Unsupported column type '%s' for column '%s'\",\n                 field->type()->ToString().c_str(), column_name.c_str());\n        continue;\n    }\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentHelper::ReduceVectorIndex(\n    const CollectionSchema::Ptr schema,\n    const std::vector<Segment::Ptr> &input_segments,\n    const std::string &output_segment_path, const IndexFilter::Ptr &filter,\n    std::function<BlockID()> &block_id_generator, uint64_t min_doc_id,\n    uint64_t max_doc_id, uint32_t doc_count, int concurrency,\n    std::vector<BlockMeta> *output_block_metas) {\n  Status s;\n\n  // vector\n  auto vector_fields = schema->vector_fields();\n  for (auto &field : vector_fields) {\n    auto vector_index_params =\n        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());\n\n    auto vector_block_id = block_id_generator();\n    if (vector_index_params->quantize_type() == QuantizeType::UNDEFINED) {\n      auto vector_index_path = FileHelper::MakeVectorIndexPath(\n          output_segment_path, field->name(), vector_block_id);\n\n      // only create original vector indexer\n      auto vector_indexer =\n          std::make_shared<VectorColumnIndexer>(vector_index_path, *field);\n      s = vector_indexer->Open({true, true});\n      CHECK_RETURN_STATUS(s);\n\n      std::vector<VectorColumnIndexer::Ptr> merge_indexers;\n      for (auto &input_segment : input_segments) {\n        // merge_indexers should be ordered put\n        auto to_merge_indexers =\n            input_segment->get_vector_indexer(field->name());\n        merge_indexers.insert(merge_indexers.end(), to_merge_indexers.begin(),\n                              to_merge_indexers.end());\n      }\n\n      vector_column_params::MergeOptions merge_options;\n      if (concurrency == 0) {\n        merge_options.pool = GlobalResource::Instance().optimize_thread_pool();\n      } else {\n        merge_options.write_concurrency = concurrency;\n      }\n\n      s = vector_indexer->Merge(merge_indexers, filter, merge_options);\n      CHECK_RETURN_STATUS(s);\n\n      s = vector_indexer->Flush();\n      CHECK_RETURN_STATUS(s);\n\n      BlockMeta new_block_meta;\n      new_block_meta.set_id(vector_block_id);\n      new_block_meta.set_type(BlockType::VECTOR_INDEX);\n      new_block_meta.set_columns({field->name()});\n      new_block_meta.set_min_doc_id(min_doc_id);\n      new_block_meta.set_max_doc_id(max_doc_id);\n      new_block_meta.set_doc_count(doc_count);\n\n      output_block_metas->push_back(new_block_meta);\n    } else {\n      auto vector_index_path = FileHelper::MakeQuantizeVectorIndexPath(\n          output_segment_path, field->name(), vector_block_id);\n\n      auto field_without_quantize = std::make_shared<FieldSchema>(*field);\n      field_without_quantize->set_index_params(\n          MakeDefaultVectorIndexParams(vector_index_params->metric_type()));\n\n      // create flat index\n      auto vector_indexer = std::make_shared<VectorColumnIndexer>(\n          vector_index_path, *field_without_quantize);\n      s = vector_indexer->Open({true, true});\n      CHECK_RETURN_STATUS(s);\n\n      std::vector<VectorColumnIndexer::Ptr> merge_indexers;\n      for (auto &input_segment : input_segments) {\n        // merge_indexers should be ordered put\n        auto to_merge_indexers =\n            input_segment->get_vector_indexer(field->name());\n        merge_indexers.insert(merge_indexers.end(), to_merge_indexers.begin(),\n                              to_merge_indexers.end());\n      }\n\n      vector_column_params::MergeOptions merge_options;\n      if (concurrency == 0) {\n        merge_options.pool = GlobalResource::Instance().optimize_thread_pool();\n      } else {\n        merge_options.write_concurrency = concurrency;\n      }\n\n      s = vector_indexer->Merge(merge_indexers, filter, merge_options);\n      CHECK_RETURN_STATUS(s);\n\n      s = vector_indexer->Flush();\n      CHECK_RETURN_STATUS(s);\n\n      BlockMeta new_block_meta;\n      new_block_meta.set_id(vector_block_id);\n      new_block_meta.set_type(BlockType::VECTOR_INDEX);\n      new_block_meta.set_columns({field->name()});\n      output_block_metas->push_back(new_block_meta);\n\n      // create quantize index\n      auto vector_quan_block_id = block_id_generator();\n\n      auto vector_quan_index_path = FileHelper::MakeQuantizeVectorIndexPath(\n          output_segment_path, field->name(), vector_quan_block_id);\n\n      auto vector_indexer_quantize =\n          std::make_shared<VectorColumnIndexer>(vector_quan_index_path, *field);\n      s = vector_indexer_quantize->Open({true, true});\n      CHECK_RETURN_STATUS(s);\n\n      merge_indexers.clear();\n      for (auto &input_segment : input_segments) {\n        // merge_indexers should be ordered put\n        auto to_merge_indexers =\n            input_segment->get_quant_vector_indexer(field->name());\n        merge_indexers.insert(merge_indexers.end(), to_merge_indexers.begin(),\n                              to_merge_indexers.end());\n      }\n\n      s = vector_indexer_quantize->Merge(merge_indexers, filter, merge_options);\n      CHECK_RETURN_STATUS(s);\n\n      s = vector_indexer_quantize->Flush();\n      CHECK_RETURN_STATUS(s);\n\n      new_block_meta.set_id(vector_quan_block_id);\n      new_block_meta.set_type(BlockType::VECTOR_INDEX_QUANTIZE);\n      new_block_meta.set_columns({field->name()});\n      output_block_metas->push_back(new_block_meta);\n    }\n  }\n\n  return Status::OK();\n}\n\narrow::Status SegmentHelper::FilterRecordBatch(\n    const std::shared_ptr<arrow::RecordBatch> &batch,\n    const IndexFilter::Ptr filter, uint32_t row_id_offset,\n    std::shared_ptr<arrow::RecordBatch> *filterd,\n    roaring::Roaring *delete_row_id_bitmap, uint64_t *min_doc_id,\n    uint64_t *max_doc_id) {\n  if (!filter) {\n    *filterd = batch;\n    for (int64_t i = 0; i < batch->num_rows(); ++i) {\n      // column 0 is doc_id\n      auto result = batch->column(0)->GetScalar(i);\n      if (!result.ok()) {\n        return result.status();\n      }\n      uint64_t doc_id =\n          std::dynamic_pointer_cast<arrow::UInt64Scalar>(*result)->value;\n      *min_doc_id = std::min(*min_doc_id, doc_id);\n      *max_doc_id = std::max(*max_doc_id, doc_id);\n    }\n    return arrow::Status::OK();\n  }\n\n  std::vector<uint64_t> selected_indices;\n  for (int64_t i = 0; i < batch->num_rows(); ++i) {\n    auto result = batch->column(0)->GetScalar(i);\n    if (!result.ok()) {\n      return result.status();\n    }\n    uint64_t doc_id =\n        std::dynamic_pointer_cast<arrow::UInt64Scalar>(*result)->value;\n    if (!filter->is_filtered(doc_id)) {\n      selected_indices.push_back(i);\n      *min_doc_id = std::min(*min_doc_id, doc_id);\n      *max_doc_id = std::max(*max_doc_id, doc_id);\n    } else {\n      delete_row_id_bitmap->add(i + row_id_offset);\n    }\n  }\n\n  if (selected_indices.empty()) {\n    return arrow::Status::OK();\n  }\n\n  arrow::UInt64Builder builder;\n  ARROW_RETURN_NOT_OK(builder.AppendValues(selected_indices));\n  std::shared_ptr<arrow::Array> selection_array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&selection_array));\n\n  std::vector<std::shared_ptr<arrow::Array>> filtered_columns;\n  for (int i = 0; i < batch->num_columns(); ++i) {\n    arrow::Datum out;\n    ARROW_ASSIGN_OR_RAISE(\n        out, arrow::compute::Take(batch->column(i), selection_array));\n    filtered_columns.push_back(out.make_array());\n  }\n\n  auto filtered_batch = arrow::RecordBatch::Make(\n      batch->schema(), static_cast<int64_t>(selected_indices.size()),\n      filtered_columns);\n\n  *filterd = filtered_batch;\n\n  return arrow::Status::OK();\n}\n\nStatus SegmentHelper::ExecuteCreateVectorIndexTask(\n    CreateVectorIndexTask &task) {\n  if (task.column_to_build_vector_index_ == \"\") {\n    return task.input_segment_->create_all_vector_index(\n        task.concurrency_, &task.output_segment_meta_,\n        &task.output_vector_indexers_, &task.output_quant_vector_indexers_);\n  } else {\n    return task.input_segment_->create_vector_index(\n        task.column_to_build_vector_index_, task.index_params_,\n        task.concurrency_, &task.output_segment_meta_,\n        &task.output_vector_indexers_, &task.output_quant_vector_indexers_);\n  }\n}\n\nStatus SegmentHelper::ExecuteCreateScalarIndexTask(\n    CreateScalarIndexTask &task) {\n  return task.input_segment_->create_scalar_index(\n      task.columns_to_build_scalar_index_, task.index_params_,\n      &task.output_segment_meta_, &task.output_scalar_indexer_);\n}\n\nStatus SegmentHelper::ExecuteDropVectorIndexTask(DropVectorIndexTask &task) {\n  return task.input_segment_->drop_vector_index(\n      task.column_to_drop_vector_index_, &task.output_segment_meta_,\n      &task.output_vector_indexers_);\n}\n\nStatus SegmentHelper::ExecuteDropScalarIndexTask(DropScalarIndexTask &task) {\n  return task.input_segment_->drop_scalar_index(\n      task.columns_to_drop_scalar_index_, &task.output_segment_meta_,\n      &task.output_scalar_indexer_);\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/segment_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <variant>\n#include <arrow/record_batch.h>\n#include <arrow/status.h>\n#include <zvec/db/index_params.h>\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n#include \"db/index/common/index_filter.h\"\n#include \"db/index/common/meta.h\"\n#include \"segment.h\"\n\nnamespace zvec {\n\nstruct CompactTask {\n  CompactTask(const std::string &collection_path,\n              const CollectionSchema::Ptr &schema,\n              const std::vector<Segment::Ptr> &input_segments,\n              SegmentID output_segment_id, const IndexFilter::Ptr filter,\n              bool forward_use_parquet, int concurrency)\n      : collection_path_(collection_path),\n        schema_(schema),\n        input_segments_(input_segments),\n        output_segment_id_(output_segment_id),\n        filter_(std::move(filter)),\n        forward_use_parquet_(forward_use_parquet),\n        concurrency_(concurrency) {}\n\n  const std::string collection_path_;\n  const CollectionSchema::Ptr schema_;\n  const std::vector<Segment::Ptr>\n      input_segments_;  // size must > 1 when filter is nullptr; size could = 1\n                        // when filter is not nullptr\n  SegmentID output_segment_id_;\n  const IndexFilter::Ptr filter_;\n  bool forward_use_parquet_;\n  int concurrency_;\n\n  // output\n  SegmentMeta::Ptr output_segment_meta_;\n};\n\nstruct CreateVectorIndexTask {\n  CreateVectorIndexTask(const Segment::Ptr &input_segment,\n                        const std::string &column_to_build_vector_index,\n                        const IndexParams::Ptr &index_params, int concurrency)\n      : input_segment_(input_segment),\n        column_to_build_vector_index_(column_to_build_vector_index),\n        index_params_(index_params),\n        concurrency_(concurrency) {}\n\n  Segment::Ptr input_segment_;\n  std::string column_to_build_vector_index_;  // if empty means create index for\n  // all vector columns\n  IndexParams::Ptr index_params_;\n  int concurrency_;\n\n  // output\n  SegmentMeta::Ptr output_segment_meta_;\n  std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n      output_vector_indexers_;\n  std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n      output_quant_vector_indexers_;\n};\n\nstruct DropVectorIndexTask {\n  DropVectorIndexTask(const Segment::Ptr &input_segment,\n                      const std::string &column_to_drop_vector_index)\n      : input_segment_(input_segment),\n        column_to_drop_vector_index_(column_to_drop_vector_index) {}\n\n  Segment::Ptr input_segment_;\n  std::string column_to_drop_vector_index_;\n\n  // output\n  SegmentMeta::Ptr output_segment_meta_;\n  std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n      output_vector_indexers_;\n};\n\nstruct CreateScalarIndexTask {\n  CreateScalarIndexTask(\n      const Segment::Ptr &input_segment,\n      const std::vector<std::string> &columns_to_build_scalar_index,\n      const IndexParams::Ptr &index_params, int concurrency)\n      : input_segment_(input_segment),\n        columns_to_build_scalar_index_(columns_to_build_scalar_index),\n        index_params_(index_params),\n        concurrency_(concurrency) {}\n\n  Segment::Ptr input_segment_;\n  std::vector<std::string> columns_to_build_scalar_index_;\n  IndexParams::Ptr index_params_;\n  int concurrency_;\n\n  // output\n  SegmentMeta::Ptr output_segment_meta_;\n  InvertedIndexer::Ptr output_scalar_indexer_;\n};\n\nstruct DropScalarIndexTask {\n  DropScalarIndexTask(Segment::Ptr input_segment,\n                      std::vector<std::string> columns_to_drop_scalar_index)\n      : input_segment_(input_segment),\n        columns_to_drop_scalar_index_(columns_to_drop_scalar_index) {}\n\n  Segment::Ptr input_segment_;\n  std::vector<std::string> columns_to_drop_scalar_index_;\n\n  // output\n  SegmentMeta::Ptr output_segment_meta_;\n  InvertedIndexer::Ptr output_scalar_indexer_;  // nullptr means no scalar index\n};\n\nclass SegmentTask {\n public:\n  using Ptr = std::shared_ptr<SegmentTask>;\n\n  using TaskInfo =\n      std::variant<CompactTask, CreateVectorIndexTask, DropVectorIndexTask,\n                   CreateScalarIndexTask, DropScalarIndexTask>;\n\n  static Ptr CreateComapctTask(const CompactTask &task) {\n    return std::make_shared<SegmentTask>(task);\n  }\n\n  static Ptr CreateCreateVectorIndexTask(const CreateVectorIndexTask &task) {\n    return std::make_shared<SegmentTask>(task);\n  }\n\n  static Ptr CreateDropVectorIndexTask(const DropVectorIndexTask &task) {\n    return std::make_shared<SegmentTask>(task);\n  }\n\n  static Ptr CreateCreateScalarIndexTask(const CreateScalarIndexTask &task) {\n    return std::make_shared<SegmentTask>(task);\n  }\n\n  static Ptr CreateDropScalarIndexTask(const DropScalarIndexTask &task) {\n    return std::make_shared<SegmentTask>(task);\n  }\n\n public:\n  SegmentTask(const CompactTask &task) : task_info_(task) {}\n\n  SegmentTask(const CreateVectorIndexTask &task) : task_info_(task) {}\n\n  SegmentTask(const CreateScalarIndexTask &task) : task_info_(task) {}\n\n  SegmentTask(const DropVectorIndexTask &task) : task_info_(task) {}\n\n  SegmentTask(const DropScalarIndexTask &task) : task_info_(task) {}\n\n  TaskInfo &task_info() {\n    return task_info_;\n  }\n\n private:\n  TaskInfo task_info_;\n};\n\nclass SegmentHelper {\n public:\n  static Status Execute(SegmentTask::Ptr &task);\n\n private:\n  static Status ExecuteCompactTask(CompactTask &task);\n\n  static Status ExecuteCreateVectorIndexTask(CreateVectorIndexTask &task);\n\n  static Status ExecuteCreateScalarIndexTask(CreateScalarIndexTask &task);\n\n  static Status ExecuteDropVectorIndexTask(DropVectorIndexTask &task);\n\n  static Status ExecuteDropScalarIndexTask(DropScalarIndexTask &task);\n\n public:\n  static Status ReduceScalar(const CollectionSchema::Ptr schema,\n                             const std::vector<Segment::Ptr> &input_segments,\n                             const std::string &output_segment_path,\n                             const std::vector<std::string> &columns,\n                             const IndexFilter::Ptr &filter,\n                             bool forward_use_parquet,\n                             std::function<BlockID()> &block_id_generator,\n                             roaring::Roaring *delete_row_id_bitmap,\n                             std::vector<BlockMeta> *output_block_metas,\n                             uint64_t *min_doc_id, uint64_t *max_doc_id,\n                             uint32_t *doc_count);\n\n  static Status ReduceScalarIndex(\n      InvertedIndexer::Ptr indexer,\n      const std::shared_ptr<arrow::RecordBatch> &batch, uint32_t doc_id_offset);\n\n  static Status ReduceVectorIndex(\n      const CollectionSchema::Ptr schema,\n      const std::vector<Segment::Ptr> &input_segments,\n      const std::string &output_segment_path, const IndexFilter::Ptr &filter,\n      std::function<BlockID()> &block_id_generator, uint64_t min_doc_id,\n      uint64_t max_doc_id, uint32_t doc_count, int concurrency,\n      std::vector<BlockMeta> *output_block_metas);\n\n  static arrow::Status FilterRecordBatch(\n      const std::shared_ptr<arrow::RecordBatch> &batch,\n      const IndexFilter::Ptr filter, uint32_t row_id_offset,\n      std::shared_ptr<arrow::RecordBatch> *filtered,\n      roaring::Roaring *delete_row_id_bitmap, uint64_t *min_doc_id,\n      uint64_t *max_doc_id);\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/segment_manager.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n// segment_manager.cpp\n#include \"db/index/segment/segment_manager.h\"\n#include <algorithm>\n#include <future>\n#include <thread>\n#include <vector>\n#include <zvec/db/status.h>\n#include \"db/common/typedef.h\"\n\nnamespace zvec {\n\nStatus SegmentManager::add_segment(Segment::Ptr segment) {\n  if (!segment) {\n    return Status::InvalidArgument(\"Segment is null\");\n  }\n\n  segments_map_[segment->id()] = segment;\n  return Status::OK();\n}\n\nStatus SegmentManager::remove_segment(SegmentID segment_id) {\n  auto iter = segments_map_.find(segment_id);\n  if (iter == segments_map_.end()) {\n    return Status::NotFound(\"Segment not found\");\n  }\n\n  segments_map_.erase(segment_id);\n  return Status::OK();\n}\n\nStatus SegmentManager::destroy_segment(SegmentID segment_id) {\n  auto iter = segments_map_.find(segment_id);\n  if (iter == segments_map_.end()) {\n    return Status::NotFound(\"Segment not found\");\n  }\n\n  auto s = iter->second->destroy();\n  CHECK_RETURN_STATUS(s);\n\n  segments_map_.erase(segment_id);\n  return Status::OK();\n}\n\nstd::vector<Segment::Ptr> SegmentManager::get_segments() const {\n  std::vector<Segment::Ptr> segments;\n  for (auto &pair : segments_map_) {\n    segments.push_back(pair.second);\n  }\n  std::sort(segments.begin(), segments.end(),\n            [](Segment::Ptr a, Segment::Ptr b) {\n              return a->meta()->min_doc_id() < b->meta()->min_doc_id();\n            });\n  return segments;\n}\n\nstd::vector<SegmentMeta::Ptr> SegmentManager::get_segments_meta() const {\n  std::vector<SegmentMeta::Ptr> segments_meta;\n  for (auto &pair : segments_map_) {\n    segments_meta.push_back(pair.second->meta());\n  }\n\n  std::sort(segments_meta.begin(), segments_meta.end(),\n            [](SegmentMeta::Ptr a, SegmentMeta::Ptr b) {\n              return a->min_doc_id() < b->min_doc_id();\n            });\n\n  return segments_meta;\n}\n\nStatus SegmentManager::add_column(const FieldSchema::Ptr &column_schema,\n                                  const std::string &expression,\n                                  int concurrency) {\n  if (concurrency <= 0) {\n    concurrency = static_cast<int>(std::thread::hardware_concurrency());\n  }\n\n  std::vector<std::future<Status>> futures;\n  std::vector<std::pair<SegmentID, Segment::Ptr>> segments(\n      segments_map_.begin(), segments_map_.end());\n\n  for (size_t i = 0; i < segments.size(); i += concurrency) {\n    size_t end = std::min(i + concurrency, segments.size());\n    for (size_t j = i; j < end; ++j) {\n      auto &segment = segments[j].second;\n      futures.emplace_back(std::async(std::launch::async, [&]() -> Status {\n        return segment->add_column(column_schema, expression,\n                                   AddColumnOptions{concurrency});\n      }));\n    }\n\n    for (auto it = futures.begin(); it != futures.end(); ++it) {\n      Status status = it->get();\n      if (!status.ok()) {\n        return status;\n      }\n    }\n    futures.clear();\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentManager::alter_column(const std::string &column_name,\n                                    const FieldSchema::Ptr &new_column_schema,\n                                    int concurrency) {\n  if (concurrency <= 0) {\n    concurrency = static_cast<int>(std::thread::hardware_concurrency());\n  }\n\n  std::vector<std::future<Status>> futures;\n  std::vector<std::pair<SegmentID, Segment::Ptr>> segments(\n      segments_map_.begin(), segments_map_.end());\n\n  for (size_t i = 0; i < segments.size(); i += concurrency) {\n    size_t end = std::min(i + concurrency, segments.size());\n    for (size_t j = i; j < end; ++j) {\n      auto &segment = segments[j].second;\n      futures.emplace_back(std::async(std::launch::async, [&]() -> Status {\n        return segment->alter_column(column_name, new_column_schema,\n                                     AlterColumnOptions{concurrency});\n      }));\n    }\n\n    for (auto it = futures.begin(); it != futures.end(); ++it) {\n      Status status = it->get();\n      if (!status.ok()) {\n        return status;\n      }\n    }\n    futures.clear();\n  }\n\n  return Status::OK();\n}\n\nStatus SegmentManager::drop_column(const std::string &column_name) {\n  for (auto &[segment_id, segment] : segments_map_) {\n    auto s = segment->drop_column(column_name);\n    CHECK_RETURN_STATUS(s);\n  }\n\n  return Status::OK();\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/segment_manager.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <unordered_map>\n#include <vector>\n#include \"segment.h\"\n\nnamespace zvec {\nclass SegmentManager {\n public:\n  using Ptr = std::shared_ptr<SegmentManager>;\n\n  SegmentManager() = default;\n  ~SegmentManager() = default;\n\n public:\n  uint32_t segment_count() const {\n    return segments_map_.size();\n  }\n\n  Status add_segment(Segment::Ptr segment);\n\n  Status remove_segment(SegmentID segment_id);\n\n  Status destroy_segment(SegmentID segment_id);\n\n  std::vector<Segment::Ptr> get_segments() const;\n\n  std::vector<SegmentMeta::Ptr> get_segments_meta() const;\n\n  Status add_column(const FieldSchema::Ptr &column_schema,\n                    const std::string &expression, int concurrency);\n\n  Status alter_column(const std::string &column_name,\n                      const FieldSchema::Ptr &new_column_schema,\n                      int concurrency);\n\n  Status drop_column(const std::string &column_name);\n\n private:\n  std::unordered_map<SegmentID, Segment::Ptr> segments_map_;\n};\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/sql_expr_parser.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"sql_expr_parser.h\"\n#include <cctype>\n#include <string>\n#include <arrow/result.h>\n#include <arrow/status.h>\n#include <arrow/type.h>\n\nnamespace zvec {\n\nbool IsNumericType(const std::shared_ptr<arrow::DataType> &type) {\n  return arrow::is_integer(type->id()) || arrow::is_floating(type->id());\n}\n\nusing arrow::compute::call;\nusing arrow::compute::Expression;\nusing arrow::compute::field_ref;\nusing arrow::compute::literal;\n\nclass Parser {\n public:\n  Parser(const std::string &expr, const std::shared_ptr<arrow::Schema> &schema)\n      : expr_(expr), pos_(0), schema_(schema) {}\n\n  arrow::Result<Expression> Parse() {\n    SkipWhitespace();\n    ARROW_ASSIGN_OR_RAISE(auto e, ParseExpression());\n    SkipWhitespace();\n    if ((size_t)pos_ < expr_.size()) {\n      return arrow::Status::Invalid(\"Unexpected character at position \", pos_,\n                                    \": \", expr_[pos_]);\n    }\n    return e;\n  }\n\n private:\n  std::string expr_;\n  int pos_;\n  std::shared_ptr<arrow::Schema> schema_;\n\n  void SkipWhitespace() {\n    while ((size_t)pos_ < expr_.size() && std::isspace(expr_[pos_])) {\n      ++pos_;\n    }\n  }\n\n  arrow::Result<Expression> ParseExpression() {\n    SkipWhitespace();\n    ARROW_ASSIGN_OR_RAISE(auto left, ParseTerm());\n    SkipWhitespace();\n    while ((size_t)pos_ < expr_.size() &&\n           (expr_[pos_] == '+' || expr_[pos_] == '-')) {\n      char op = expr_[pos_++];\n      SkipWhitespace();\n      ARROW_ASSIGN_OR_RAISE(auto right, ParseTerm());\n      SkipWhitespace();\n      auto func = (op == '+') ? \"add\" : \"subtract\";\n      left = call(std::string(func), {left, right});\n    }\n\n    return left;\n  }\n\n  arrow::Result<Expression> ParseTerm() {\n    SkipWhitespace();\n    ARROW_ASSIGN_OR_RAISE(auto left, ParseFactor());\n    SkipWhitespace();\n\n    while ((size_t)pos_ < expr_.size() &&\n           (expr_[pos_] == '*' || expr_[pos_] == '/')) {\n      char op = expr_[pos_++];\n      SkipWhitespace();\n      ARROW_ASSIGN_OR_RAISE(auto right, ParseFactor());\n      SkipWhitespace();\n      auto func = (op == '*') ? \"multiply\" : \"divide\";\n      left = call(std::string(func), {left, right});\n    }\n\n    return left;\n  }\n\n  arrow::Result<Expression> ParseFactor() {\n    SkipWhitespace();\n\n    if ((size_t)pos_ >= expr_.size()) {\n      return arrow::Status::Invalid(\"Unexpected end of expression.\");\n    }\n\n    char c = expr_[pos_];\n\n    // Parenthetical expression\n    if (c == '(') {\n      ++pos_;\n      SkipWhitespace();\n      ARROW_ASSIGN_OR_RAISE(auto inner, ParseExpression());\n      SkipWhitespace();\n      if ((size_t)pos_ >= expr_.size() || expr_[pos_] != ')') {\n        return arrow::Status::Invalid(\"Mismatched parentheses.\");\n      }\n      ++pos_;\n      SkipWhitespace();\n      return inner;\n    }\n\n    // Unary minus operator\n    if (c == '-') {\n      ++pos_;  // Skip the minus sign\n      SkipWhitespace();\n      ARROW_ASSIGN_OR_RAISE(auto operand, ParseFactor());\n      return call(\"negate\", {operand});\n    }\n\n    // Unary plus operator (optional support)\n    if (c == '+') {\n      ++pos_;  // Skip the plus sign\n      SkipWhitespace();\n      return ParseFactor();\n    }\n\n    // Numeric literal (integer or floating point)\n    if (std::isdigit(c)) {\n      return ParseNumber();\n    }\n\n    // Column name (starts with letter or _)\n    if (std::isalpha(c) || c == '_') {\n      return ParseColumnName();\n    }\n\n    return arrow::Status::Invalid(\"Unexpected character: '\", std::string(1, c),\n                                  \"'\");\n  }\n\n  arrow::Result<Expression> ParseNumber() {\n    int start = pos_;\n    bool has_dot = false;\n    bool has_exponent = false;\n\n    while ((size_t)pos_ < expr_.size()) {\n      char c = expr_[pos_];\n      if (std::isdigit(c)) {\n        ++pos_;\n      } else if (c == '.' && !has_dot) {\n        has_dot = true;\n        ++pos_;\n      } else if ((c == 'e' || c == 'E') && !has_exponent) {\n        has_exponent = true;\n        ++pos_;\n        if ((size_t)pos_ < expr_.size() &&\n            (expr_[pos_] == '+' || expr_[pos_] == '-')) {\n          ++pos_;\n        }\n      } else {\n        break;\n      }\n    }\n\n    std::string num_str = expr_.substr(start, pos_ - start);\n\n    if (!has_dot && !has_exponent) {\n      try {\n        int64_t value = std::stoll(num_str);\n        return literal(value);\n      } catch (...) {\n        // fallback to double\n        try {\n          double value = std::stod(num_str);\n          return literal(value);\n        } catch (...) {\n          return arrow::Status::Invalid(\"Invalid integer: \", num_str);\n        }\n      }\n    } else {\n      try {\n        double value = std::stod(num_str);\n        return literal(value);\n      } catch (...) {\n        return arrow::Status::Invalid(\"Invalid float: \", num_str);\n      }\n    }\n    return arrow::Status::Invalid(\"Failed to parse number: \", num_str);\n  }\n\n  arrow::Result<Expression> ParseColumnName() {\n    int start = pos_;\n    while ((size_t)pos_ < expr_.size()) {\n      char c = expr_[pos_];\n      if (std::isalnum(c) || c == '_') {\n        ++pos_;\n      } else {\n        break;\n      }\n    }\n    std::string name = expr_.substr(start, pos_ - start);\n\n    auto field = schema_->GetFieldByName(name);\n    if (!field) {\n      return arrow::Status::Invalid(\"Column not found in schema: \", name);\n    } else if (!IsNumericType(field->type())) {\n      return arrow::Status::Invalid(\"Column is not numeric: \", name);\n    }\n\n    return field_ref(name);\n  }\n};\n\narrow::Result<Expression> CheckSupportedArithmeticExpression(\n    const Expression &expr, const arrow::Schema &schema) {\n  // Case 0: Literal, must be numeric type\n  if (auto literal = expr.literal()) {\n    auto type = literal->type();\n    if (IsNumericType(type)) {\n      return expr;\n    } else {\n      return arrow::Status::Invalid(\"Only numeric literals are allowed, got: \",\n                                    literal->ToString());\n    }\n  }\n\n  // Case 1: Single column reference (e.g., col)\n  if (auto field_ref = expr.field_ref()) {\n    auto field = schema.GetFieldByName(*field_ref->name());\n    if (!field) {\n      return arrow::Status::Invalid(\"Field not found: \", *field_ref->name());\n    }\n    if (!IsNumericType(field->type())) {\n      return arrow::Status::Invalid(\n          \"Only numeric columns are allowed, but got: \", field->ToString());\n    }\n    return expr;  // Valid, return directly\n  }\n\n  // Step 2: Handle function calls (unary, binary, etc.)\n  if (auto call = expr.call()) {\n    const auto &func_name = call->function_name;\n\n    // Case 2: Binary arithmetic operations (e.g., col + 1)\n    if (func_name == \"add\" || func_name == \"subtract\" ||\n        func_name == \"multiply\" || func_name == \"divide\") {\n      if (call->arguments.size() != 2) {\n        return arrow::Status::Invalid(\"Expected two arguments for '\", func_name,\n                                      \"'\");\n      }\n\n      const auto &left = call->arguments[0];\n      const auto &right = call->arguments[1];\n\n      // One must be field_ref, the other must be literal\n      bool left_is_field = left.field_ref() != nullptr;\n      bool right_is_literal = right.literal() != nullptr;\n\n      if (left_is_field && right_is_literal) {\n        auto field = schema.GetFieldByName(*left.field_ref()->name());\n        if (!field) {\n          return arrow::Status::Invalid(\"Field not found: \",\n                                        *left.field_ref()->name());\n        }\n        if (!IsNumericType(field->type())) {\n          return arrow::Status::Invalid(\"Column is not numeric: \",\n                                        field->ToString());\n        }\n        return expr;\n      }\n\n      bool right_is_field = right.field_ref() != nullptr;\n      bool left_is_literal = left.literal() != nullptr;\n\n      if (right_is_field && left_is_literal) {\n        auto field = schema.GetFieldByName(*right.field_ref()->name());\n        if (!field) {\n          return arrow::Status::Invalid(\"Field not found: \",\n                                        *right.field_ref()->name());\n        }\n        if (!IsNumericType(field->type())) {\n          return arrow::Status::Invalid(\"Column is not numeric: \",\n                                        field->ToString());\n        }\n        return expr;\n      }\n\n      return arrow::Status::Invalid(\n          \"Only support binary operation between a column and a literal, got: \",\n          expr.ToString());\n    }\n\n    // Case 3: Unary operators (e.g., -col)\n    if (func_name == \"negate\") {\n      if (call->arguments.size() != 1) {\n        return arrow::Status::Invalid(\"negate expects one argument\");\n      }\n      const auto &arg = call->arguments[0];\n\n      // Check if argument is field_ref or literal\n      if (auto field_ref = arg.field_ref()) {\n        auto field = schema.GetFieldByName(*field_ref->name());\n        if (!field) {\n          return arrow::Status::Invalid(\"Field not found: \",\n                                        *field_ref->name());\n        }\n        if (!IsNumericType(field->type())) {\n          return arrow::Status::Invalid(\"Cannot negate non-numeric column: \",\n                                        field->ToString());\n        }\n        return expr;\n      } else if (auto literal = arg.literal()) {\n        // Allow negation of literals\n        if (IsNumericType(literal->type())) {\n          return expr;\n        } else {\n          return arrow::Status::Invalid(\"Cannot negate non-numeric literal: \",\n                                        literal->ToString());\n        }\n      } else {\n        return arrow::Status::Invalid(\n            \"Only support negation of a column or numeric literal, got: \",\n            arg.ToString());\n      }\n    }\n\n    // Unsupported functions\n    return arrow::Status::Invalid(\"Unsupported function in expression: \",\n                                  func_name);\n  }\n\n  // Fallback error: unsupported expression form\n  return arrow::Status::Invalid(\n      \"Only support: (1) single numeric column or literal, (2) column +/-/*/% \"\n      \"literal, (3) -column. Got: \",\n      expr.ToString());\n}\n\n// Public interface function\narrow::Result<Expression> ParseToExpression(\n    const std::string &sql_expr, const std::shared_ptr<arrow::Schema> &schema) {\n  Parser parser(sql_expr, schema);\n  return parser.Parse();\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/segment/sql_expr_parser.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <arrow/compute/api.h>\n#include <arrow/status.h>\n\nnamespace zvec {\n\narrow::Result<arrow::compute::Expression> ParseToExpression(\n    const std::string &sql_expr, const std::shared_ptr<arrow::Schema> &schema);\n\narrow::Result<arrow::compute::Expression> CheckSupportedArithmeticExpression(\n    const arrow::compute::Expression &expr, const arrow::Schema &schema);\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/storage/arrow_ipc_writer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"arrow_ipc_writer.h\"\n#include <cstdint>\n#include <iostream>\n#include <arrow/compute/api_vector.h>\n\nnamespace zvec {\n\nArrowIpcWriter::ArrowIpcWriter(const std::string &filepath,\n                               int64_t max_rows_per_batch)\n    : filepath_(filepath),\n      max_rows_per_batch_(max_rows_per_batch),\n      finalized_(false) {}\n\nArrowIpcWriter::~ArrowIpcWriter() {\n  if (!finalized_ && writer_) {\n    auto status = finalize();\n    if (!status.ok()) {\n      std::cerr << \"Auto-finalize failed: \" << status.ToString() << std::endl;\n    }\n  }\n}\n\narrow::Status ArrowIpcWriter::insert(\n    std::shared_ptr<arrow::RecordBatchReader> reader,\n    const IndexFilter::Ptr &filter) {\n  if (!reader) {\n    return arrow::Status::Invalid(\"RecordBatchReader is null\");\n  }\n\n  auto incoming_schema = reader->schema();\n  if (!incoming_schema) {\n    return arrow::Status::Invalid(\"Reader schema is null\");\n  }\n\n  if (!writer_) {\n    schema_ = incoming_schema;\n\n    ARROW_ASSIGN_OR_RAISE(sink_, arrow::io::FileOutputStream::Open(filepath_));\n\n    auto writer = arrow::ipc::MakeFileWriter(sink_.get(), schema_);\n    if (!writer.ok()) {\n      return writer.status();\n    }\n\n    writer_ = std::move(writer.ValueOrDie());\n\n  } else {\n    if (!schema_->Equals(incoming_schema)) {\n      return arrow::Status::Invalid(\"Schema mismatch in Insert()\");\n    }\n  }\n\n  std::shared_ptr<arrow::RecordBatch> batch;\n  while (true) {\n    ARROW_ASSIGN_OR_RAISE(batch, reader->Next());\n    if (!batch) break;\n    if (batch->num_rows() == 0) continue;\n\n    if (max_rows_per_batch_ > 0 && batch->num_rows() > max_rows_per_batch_) {\n      int64_t offset = 0;\n      while (offset < batch->num_rows()) {\n        int64_t length =\n            std::min(max_rows_per_batch_, batch->num_rows() - offset);\n        auto slice = batch->Slice(offset, length);\n        ARROW_RETURN_NOT_OK(write_batch(*slice, filter));\n        offset += length;\n      }\n    } else {\n      ARROW_RETURN_NOT_OK(write_batch(*batch, filter));\n    }\n\n    batch.reset();\n  }\n\n  return arrow::Status::OK();\n}\n\narrow::Status ArrowIpcWriter::insert_batch(\n    std::shared_ptr<arrow::RecordBatch> batch, const IndexFilter::Ptr &filter) {\n  if (!batch) {\n    return arrow::Status::Invalid(\"RecordBatch is null\");\n  }\n\n  if (batch->num_rows() == 0) {\n    return arrow::Status::OK();\n  }\n\n  auto incoming_schema = batch->schema();\n  if (!incoming_schema) {\n    return arrow::Status::Invalid(\"Reader schema is null\");\n  }\n\n  if (!writer_) {\n    schema_ = incoming_schema;\n\n    ARROW_ASSIGN_OR_RAISE(sink_, arrow::io::FileOutputStream::Open(filepath_));\n\n    auto writer = arrow::ipc::MakeFileWriter(sink_.get(), schema_);\n    if (!writer.ok()) {\n      return writer.status();\n    }\n\n    writer_ = std::move(writer.ValueOrDie());\n\n  } else {\n    if (!schema_->Equals(incoming_schema)) {\n      return arrow::Status::Invalid(\"Schema mismatch in Insert()\");\n    }\n  }\n\n  if (max_rows_per_batch_ > 0 && batch->num_rows() > max_rows_per_batch_) {\n    int64_t offset = 0;\n    while (offset < batch->num_rows()) {\n      int64_t length =\n          std::min(max_rows_per_batch_, batch->num_rows() - offset);\n      auto slice = batch->Slice(offset, length);\n\n      ARROW_RETURN_NOT_OK(write_batch(*slice, filter));\n\n      offset += length;\n    }\n  } else {\n    ARROW_RETURN_NOT_OK(write_batch(*batch, filter));\n  }\n\n  return arrow::Status::OK();\n}\n\narrow::Status ArrowIpcWriter::write_batch(const arrow::RecordBatch &batch,\n                                          const IndexFilter::Ptr &filter) {\n  if (!filter) {\n    return writer_->WriteRecordBatch(batch);\n  }\n\n  std::vector<int64_t> selected_indices;\n  for (int64_t i = 0; i < batch.num_rows(); ++i) {\n    if (filter->is_filtered(i)) {\n      selected_indices.push_back(i);\n    }\n  }\n\n  if (selected_indices.empty()) {\n    return arrow::Status::OK();\n  }\n\n  arrow::Int64Builder builder;\n  ARROW_RETURN_NOT_OK(builder.AppendValues(selected_indices));\n  std::shared_ptr<arrow::Array> selection_array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&selection_array));\n\n  std::vector<std::shared_ptr<arrow::Array>> filtered_columns;\n  for (int i = 0; i < batch.num_columns(); ++i) {\n    arrow::Datum out;\n    ARROW_ASSIGN_OR_RAISE(\n        out, arrow::compute::Take(batch.column(i), selection_array));\n    filtered_columns.push_back(out.make_array());\n  }\n\n  auto filtered_batch = arrow::RecordBatch::Make(\n      batch.schema(), static_cast<int64_t>(selected_indices.size()),\n      filtered_columns);\n\n  return writer_->WriteRecordBatch(*filtered_batch);\n}\n\narrow::Status ArrowIpcWriter::finalize() {\n  if (finalized_) return arrow::Status::OK();\n  if (!writer_) {\n    return arrow::Status::Invalid(\"No data written, cannot finalize\");\n  }\n\n  ARROW_RETURN_NOT_OK(writer_->Close());\n  writer_.reset();\n\n  ARROW_RETURN_NOT_OK(sink_->Close());\n  sink_.reset();\n\n  finalized_ = true;\n  return arrow::Status::OK();\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/arrow_ipc_writer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n// arrow_ipc_writer.h\n#pragma once\n\n#include <memory>\n#include <string>\n#include <arrow/api.h>\n#include <arrow/io/api.h>\n#include <arrow/ipc/writer.h>\n#include \"db/index/common/index_filter.h\"\n#include \"forward_writer.h\"\n\nnamespace zvec {\n\nclass ArrowIpcWriter : public ForwardWriter {\n public:\n  explicit ArrowIpcWriter(const std::string &filepath,\n                          int64_t max_rows_per_batch = 0);\n  ~ArrowIpcWriter();\n\n  arrow::Status insert(std::shared_ptr<arrow::RecordBatchReader> reader,\n                       const IndexFilter::Ptr &filter = nullptr) override;\n\n  arrow::Status insert_batch(std::shared_ptr<arrow::RecordBatch> batch,\n                             const IndexFilter::Ptr &filter = nullptr) override;\n\n  arrow::Status finalize() override;\n\n private:\n  arrow::Status write_batch(const arrow::RecordBatch &batch,\n                            const IndexFilter::Ptr &filter);\n\n private:\n  std::string filepath_;\n  int64_t max_rows_per_batch_;\n\n  std::shared_ptr<arrow::io::FileOutputStream> sink_;\n  std::shared_ptr<arrow::ipc::RecordBatchWriter> writer_;\n  std::shared_ptr<arrow::Schema> schema_;\n  bool finalized_;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/base_forward_store.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <string>\n#include <vector>\n#include <arrow/compute/api.h>\n#include <arrow/datum.h>\n#include <arrow/table_builder.h>\n#include <arrow/util/async_generator.h>\n#include <zvec/db/status.h>\n\nnamespace cp = arrow::compute;\n\nusing Table = arrow::Table;\nusing RecordBatch = arrow::RecordBatch;\nusing RecordBatchReader = arrow::RecordBatchReader;\nusing RecordBatchBuilder = arrow::RecordBatchBuilder;\nusing TablePtr = std::shared_ptr<Table>;\nusing ExecBatchPtr = std::shared_ptr<arrow::compute::ExecBatch>;\nusing RecordBatchPtr = std::shared_ptr<RecordBatch>;\nusing RecordBatchReaderPtr = std::shared_ptr<RecordBatchReader>;\nusing RecordBatchBuilderPtr = std::shared_ptr<RecordBatchBuilder>;\n\nnamespace zvec {\n\nclass BaseForwardStore {\n public:\n  using Ptr = std::shared_ptr<BaseForwardStore>;\n\n  virtual Status Open() = 0;\n\n  virtual TablePtr fetch(const std::vector<std::string> &columns,\n                         const std::vector<int> &indices) = 0;\n\n  virtual ExecBatchPtr fetch(const std::vector<std::string> &columns,\n                             int index) = 0;\n\n  virtual RecordBatchReaderPtr scan(\n      const std::vector<std::string> &columns) = 0;\n\n  virtual const std::shared_ptr<arrow::Schema> physic_schema() const = 0;\n\n  virtual TablePtr get_table() = 0;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/bufferpool_forward_store.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"bufferpool_forward_store.h\"\n#include <arrow/acero/exec_plan.h>\n#include <arrow/compute/api.h>\n#include <arrow/filesystem/api.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/ipc/writer.h>\n#include <arrow/result.h>\n#include <arrow/status.h>\n#include <parquet/arrow/reader.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"db/index/storage/store_helper.h\"\n#include \"lazy_record_batch_reader.h\"\n\n\nnamespace zvec {\n\nBufferPoolForwardStore::BufferPoolForwardStore(const std::string &uri)\n    : file_path_(uri) {}\n\nStatus BufferPoolForwardStore::Open() {\n  std::string uri = file_path_;\n  auto status = CreateRandomAccessFileByUri(uri, &file_, &file_path_);\n  if (!status.ok()) {\n    return Status::InternalError(\"Failed to create random access uri: \", uri,\n                                 \" : \", status.ToString());\n  }\n  auto format = InferFileFormat(file_path_);\n  if (format == FileFormat::PARQUET) {\n    status = OpenParquet(file_);\n    if (!status.ok()) {\n      return Status::InternalError(\"Failed to open parquet file: \", file_path_,\n                                   \" : \", status.ToString());\n    }\n  } else {\n    return Status::InternalError(\"Unsupported format, file: \", file_path_);\n  }\n  return Status::OK();\n}\narrow::Status BufferPoolForwardStore::OpenParquet(\n    const std::shared_ptr<arrow::io::RandomAccessFile> &file) {\n  auto parquet_file_reader = parquet::ParquetFileReader::Open(file);\n  ARROW_RETURN_NOT_OK(parquet::arrow::FileReader::Make(\n      arrow::default_memory_pool(), std::move(parquet_file_reader),\n      &parquet_reader_));\n\n  auto parquet_metadata = parquet_reader_->parquet_reader()->metadata();\n  num_rows_ = parquet_metadata->num_rows();\n  num_row_groups_ = parquet_metadata->num_row_groups();\n\n  // Initialize row group offsets and row counts\n  int64_t offset = 0;\n  for (int64_t rg = 0; rg < num_row_groups_; ++rg) {\n    auto row_group_metadata = parquet_metadata->RowGroup(rg);\n    int64_t num_rows_in_group = row_group_metadata->num_rows();\n    row_group_row_nums_.push_back(num_rows_in_group);\n    row_group_offsets_.push_back(offset);\n    offset += num_rows_in_group;\n  }\n\n  ARROW_RETURN_NOT_OK(parquet_reader_->GetSchema(&physic_schema_));\n\n  LOG_INFO(\"Opened Parquet with %lld rows, %d cols, %d row groups\",\n           static_cast<long long>(num_rows_), physic_schema_->num_fields(),\n           parquet_metadata->num_row_groups());\n\n  return arrow::Status::OK();\n}\n\n\nbool BufferPoolForwardStore::validate(\n    const std::vector<std::string> &columns) const {\n  if (columns.empty()) {\n    LOG_ERROR(\"Empty columns\");\n    return false;\n  }\n  // TODO : for persist segment, after add new column, this check is not\n  // correct.\n  for (auto &column : columns) {\n    if (column == LOCAL_ROW_ID) {\n      continue;\n    }\n    if (physic_schema_->GetFieldIndex(column) == -1) {\n      LOG_ERROR(\"Validate failed. unknown column: %s\", column.c_str());\n      return false;\n    }\n  }\n  return true;\n}\n\nint BufferPoolForwardStore::FindRowGroupForRow(int64_t row) {\n  auto it = std::upper_bound(row_group_offsets_.begin(),\n                             row_group_offsets_.end(), row);\n  if (it == row_group_offsets_.begin()) {\n    return 0;\n  }\n  return static_cast<int>(std::distance(row_group_offsets_.begin(), it) - 1);\n}\n\nint64_t BufferPoolForwardStore::GetRowGroupOffset(int rg_id) {\n  if (rg_id < 0 || rg_id >= static_cast<int>(row_group_offsets_.size())) {\n    LOG_ERROR(\"Invalid row group id: %d, max: %zu\", rg_id,\n              row_group_offsets_.size());\n    return -1;\n  }\n  return row_group_offsets_[rg_id];\n}\n\n\nTablePtr BufferPoolForwardStore::fetch(const std::vector<std::string> &columns,\n                                       const std::vector<int> &indices) {\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  if (indices.empty()) {\n    arrow::ArrayVector empty_arrays;\n    auto fields = SelectFields(physic_schema_, columns);\n    for (const auto &field : fields) {\n      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());\n    }\n    return arrow::Table::Make(std::make_shared<arrow::Schema>(fields),\n                              empty_arrays, 0);\n  }\n\n  bool need_local_doc_id = false;\n  std::vector<int> col_indices;\n  std::vector<int> data_column_positions;\n  std::vector<std::shared_ptr<arrow::Field>> all_fields(columns.size());\n\n  for (size_t i = 0; i < columns.size(); ++i) {\n    if (columns[i] == LOCAL_ROW_ID) {\n      need_local_doc_id = true;\n      all_fields[i] = arrow::field(LOCAL_ROW_ID, arrow::uint64());\n    } else {\n      int idx = physic_schema_->GetFieldIndex(columns[i]);\n      if (idx == -1) {\n        LOG_ERROR(\"Unknown column: %s\", columns[i].c_str());\n        return nullptr;\n      }\n      col_indices.push_back(idx);\n      data_column_positions.push_back(static_cast<int>(i));\n      all_fields[i] = physic_schema_->GetFieldByName(columns[i]);\n    }\n  }\n\n  std::unordered_map<int, std::vector<std::pair<int, uint64_t>>> rg_to_local;\n  std::vector<std::pair<int, int64_t>>\n      local_doc_id_pairs;  // (output_row, global_row)\n\n  int output_row = 0;\n  for (int global_row : indices) {\n    if (global_row < 0 || global_row >= num_rows_) {\n      LOG_ERROR(\"Invalid row index: %d, max: %lld\", global_row,\n                static_cast<long long>(num_rows_));\n      return nullptr;\n    }\n    int rg_id = FindRowGroupForRow(global_row);\n    int64_t offset = GetRowGroupOffset(rg_id);\n    if (offset == -1) {\n      LOG_ERROR(\"Failed to get row group offset for row: %d\", global_row);\n      return nullptr;\n    }\n    uint64_t local_in_rg = global_row - offset;\n    rg_to_local[rg_id].emplace_back(output_row, local_in_rg);\n\n    if (need_local_doc_id) {\n      local_doc_id_pairs.emplace_back(output_row,\n                                      static_cast<int64_t>(global_row));\n    }\n    ++output_row;\n  }\n\n  std::vector<std::vector<std::pair<int, std::shared_ptr<arrow::Scalar>>>>\n      sorted_scalars(col_indices.size());\n\n  auto &buf_mgr = ailego::BufferManager::Instance();\n  for (const auto &[rg_id, pairs] : rg_to_local) {\n    for (size_t i = 0; i < col_indices.size(); ++i) {\n      int col_idx = col_indices[i];\n      auto buffer_id = ailego::BufferID::ParquetID(file_path_, col_idx, rg_id);\n      auto buffer_handle = buf_mgr.acquire(buffer_id);\n      auto col_chunked_array = buffer_handle.pin_parquet_data();\n\n      if (!col_chunked_array) {\n        LOG_ERROR(\n            \"Failed to pin parquet data for file: %s, column: %d, row_group: \"\n            \"%d\",\n            file_path_.c_str(), col_idx, rg_id);\n        return nullptr;\n      }\n\n      if (col_chunked_array->num_chunks() == 0) {\n        LOG_WARN(\n            \"No chunks in chunked array for file: %s, column: %d, row_group: \"\n            \"%d\",\n            file_path_.c_str(), col_idx, rg_id);\n        continue;\n      }\n\n      auto &dst = sorted_scalars[i];\n      for (const auto &[tmp_output_row, local_idx] : pairs) {\n        if ((size_t)local_idx >= (size_t)col_chunked_array->length()) {\n          LOG_ERROR(\"Local index %ld out of bounds for array length %zu\",\n                    static_cast<long>(local_idx),\n                    (size_t)col_chunked_array->length());\n          return nullptr;\n        }\n        auto scalar_result = col_chunked_array->GetScalar(local_idx);\n        if (!scalar_result.ok()) {\n          LOG_ERROR(\"Failed to get scalar for row %zu status: %s\",\n                    (size_t)local_idx,\n                    scalar_result.status().ToString().c_str());\n        }\n        dst.emplace_back(tmp_output_row, std::move(scalar_result.ValueOrDie()));\n      }\n    }\n  }\n\n  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());\n  for (size_t i = 0; i < sorted_scalars.size(); ++i) {\n    auto &vec = sorted_scalars[i];\n    std::sort(vec.begin(), vec.end());\n    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars;\n    ordered_scalars.reserve(vec.size());\n    for (auto &p : vec) {\n      ordered_scalars.push_back(std::move(p.second));\n    }\n\n    std::shared_ptr<arrow::Array> arr;\n    auto status = ConvertScalarVectorToArrayByType(ordered_scalars, &arr);\n    if (!status.ok()) {\n      LOG_ERROR(\"ConvertScalarVectorToArrayByType failed: %s\",\n                status.message().c_str());\n      return nullptr;\n    }\n\n    int position = data_column_positions[i];\n    result_arrays[position] = std::move(arr);\n  }\n\n  if (need_local_doc_id) {\n    std::sort(local_doc_id_pairs.begin(), local_doc_id_pairs.end());\n    std::vector<uint64_t> values;\n    values.reserve(local_doc_id_pairs.size());\n    for (const auto &p : local_doc_id_pairs) {\n      values.push_back(p.second);\n    }\n\n    // Create UInt64Array\n    auto buffer_result = arrow::AllocateBuffer(values.size() * sizeof(uint64_t),\n                                               arrow::default_memory_pool());\n    if (!buffer_result.ok()) return nullptr;\n    auto buffer = std::move(buffer_result.ValueOrDie());\n    std::memcpy(buffer->mutable_data(), values.data(),\n                values.size() * sizeof(uint64_t));\n\n    std::vector<std::shared_ptr<arrow::Buffer>> buffers;\n    buffers.push_back(nullptr);  // no null bitmap\n    buffers.push_back(std::shared_ptr<arrow::Buffer>(buffer.release()));\n\n    auto data = arrow::ArrayData::Make(arrow::uint64(),\n                                       static_cast<uint64_t>(values.size()),\n                                       std::move(buffers), /*null_count=*/0);\n\n    for (size_t i = 0; i < columns.size(); ++i) {\n      if (columns[i] == LOCAL_ROW_ID) {\n        result_arrays[i] = std::make_shared<arrow::UInt64Array>(data);\n      }\n    }\n  }\n\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns;\n  result_columns.reserve(result_arrays.size());\n  for (auto &arr : result_arrays) {\n    result_columns.emplace_back(std::make_shared<arrow::ChunkedArray>(arr));\n  }\n\n  auto out_schema = std::make_shared<arrow::Schema>(all_fields);\n  return arrow::Table::Make(out_schema, result_columns,\n                            static_cast<int64_t>(indices.size()));\n}\n\nExecBatchPtr BufferPoolForwardStore::fetch(\n    const std::vector<std::string> &columns, int index) {\n  if (!validate(columns) || index < 0 || index >= num_rows_) {\n    return nullptr;\n  }\n\n  std::vector<int> col_indices;\n  for (const auto &col : columns) {\n    int idx = physic_schema_->GetFieldIndex(col);\n    if (idx == -1) {\n      LOG_ERROR(\"Unknown column: %s\", col.c_str());\n      return nullptr;\n    }\n    col_indices.push_back(idx);\n  }\n\n  int rg_id = FindRowGroupForRow(index);\n  int64_t offset = GetRowGroupOffset(rg_id);\n\n  std::vector<arrow::Datum> scalars;\n  auto &buf_mgr = ailego::BufferManager::Instance();\n  for (size_t i = 0; i < col_indices.size(); ++i) {\n    int col_idx = col_indices[i];\n    auto buffer_id = ailego::BufferID::ParquetID(file_path_, col_idx, rg_id);\n    auto buffer_handle = buf_mgr.acquire(buffer_id);\n    auto col_chunked_array = buffer_handle.pin_parquet_data();\n\n    if (!col_chunked_array) {\n      LOG_ERROR(\n          \"Failed to pin parquet data for file: %s, column: %d, row_group: \"\n          \"%d\",\n          file_path_.c_str(), col_idx, rg_id);\n      return nullptr;\n    }\n\n    if (col_chunked_array->num_chunks() == 0) {\n      LOG_WARN(\n          \"No chunks in chunked array for file: %s, column: %d, row_group: \"\n          \"%d\",\n          file_path_.c_str(), col_idx, rg_id);\n      continue;\n    }\n    auto concat_result = arrow::Concatenate(col_chunked_array->chunks(),\n                                            arrow::default_memory_pool());\n    if (!concat_result.ok()) {\n      LOG_ERROR(\"Concatenate failed for file: %s, column: %d, row_group: %d\",\n                file_path_.c_str(), col_idx, rg_id);\n      return nullptr;\n    }\n    auto concat = concat_result.ValueOrDie();\n    auto scalar_result = concat->GetScalar(index - offset);\n    if (!scalar_result.ok()) {\n      LOG_ERROR(\"Failed to get scalar for row %zu status: %s\", (size_t)offset,\n                scalar_result.status().ToString().c_str());\n    }\n\n    scalars.emplace_back(std::move(scalar_result.ValueOrDie()));\n  }\n\n  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);\n}\n\nRecordBatchReaderPtr BufferPoolForwardStore::scan(\n    const std::vector<std::string> &columns) {\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  // Create a new parquet reader for scanning\n  std::unique_ptr<parquet::arrow::FileReader> parquet_reader;\n  auto parquet_file_reader = parquet::ParquetFileReader::Open(file_);\n  auto status = parquet::arrow::FileReader::Make(arrow::default_memory_pool(),\n                                                 std::move(parquet_file_reader),\n                                                 &parquet_reader);\n  if (!status.ok()) {\n    LOG_ERROR(\"Failed to create parquet reader: %s\", status.message().c_str());\n    return nullptr;\n  }\n\n  return std::make_shared<ParquetRecordBatchReader>(parquet_reader, columns,\n                                                    physic_schema_, file_path_);\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/bufferpool_forward_store.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <string>\n#include <vector>\n#include <arrow/api.h>\n#include <arrow/compute/api.h>\n#include <arrow/filesystem/filesystem.h>\n#include <arrow/io/api.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/util/async_generator.h>\n#include <parquet/arrow/reader.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/db/status.h>\n#include \"base_forward_store.h\"\n\nnamespace zvec {\n\n/// BufferPoolForwardStore implements a forward store that uses a buffer pool\n/// to efficiently manage data access from parquet files.\nclass BufferPoolForwardStore\n    : public BaseForwardStore,\n      public std::enable_shared_from_this<BufferPoolForwardStore> {\n public:\n  /// Pointer type for BufferPoolForwardStore instances\n  using Ptr = std::shared_ptr<BufferPoolForwardStore>;\n\n  /// Constructor that initializes the store with a file URI\n  /// \\param uri The URI of the file to be accessed\n  explicit BufferPoolForwardStore(const std::string &uri);\n\n  virtual ~BufferPoolForwardStore() = default;\n\n  Status Open() override;\n\n  /// Fetch specific columns and row indices from the data source\n  /// \\param columns The list of column names to fetch\n  /// \\param indices The list of row indices to fetch\n  /// \\return A table containing the requested data or nullptr on failure\n  TablePtr fetch(const std::vector<std::string> &columns,\n                 const std::vector<int> &indices) override;\n\n  /// Fetch specific columns and row indices from the data source\n  /// \\param columns The list of column names to fetch\n  /// \\param index The row index to fetch\n  /// \\return An ExecBatch containing the requested data or nullptr on failure\n  ExecBatchPtr fetch(const std::vector<std::string> &columns,\n                     int index) override;\n\n  /// Scan specified columns from the data source\n  /// \\param columns The list of column names to scan\n  /// \\return A RecordBatchReader for streaming the data or nullptr on failure\n  RecordBatchReaderPtr scan(const std::vector<std::string> &columns) override;\n\n  /// Get the physical schema of the file\n  /// \\return A shared pointer to the arrow schema representing the physical\n  /// structure of the data\n  const std::shared_ptr<arrow::Schema> physic_schema() const override {\n    return physic_schema_;\n  }\n\n  TablePtr get_table() override {\n    return nullptr;\n  }\n\n private:\n  /// Validate that the requested columns exist in the schema\n  /// \\param columns The list of column names to validate\n  /// \\return true if all columns are valid, false otherwise\n  bool validate(const std::vector<std::string> &columns) const;\n\n  /// Open a parquet file and initialize metadata\n  /// \\param file The RandomAccessFile to read from\n  /// \\return arrow::Status indicating success or failure\n  arrow::Status OpenParquet(\n      const std::shared_ptr<arrow::io::RandomAccessFile> &file);\n\n  /// Find which row group contains a given row\n  /// \\param row The row index to locate\n  /// \\return The row group ID containing the row\n  int FindRowGroupForRow(int64_t row);\n\n  /// Get the row offset for a given row group\n  /// \\param rg_id The row group ID\n  /// \\return The row offset of the row group, or -1 on error\n  int64_t GetRowGroupOffset(int rg_id);\n\n private:\n  /// Physical schema of the file\n  std::shared_ptr<arrow::Schema> physic_schema_;\n\n  /// Total number of rows in the file\n  int64_t num_rows_ = 0;\n\n  /// Path to the file\n  std::string file_path_;\n\n  // Parquet-specific members\n  /// The RandomAccessFile for reading data\n  std::shared_ptr<arrow::io::RandomAccessFile> file_;\n\n  /// The parquet file reader\n  std::unique_ptr<parquet::arrow::FileReader> parquet_reader_;\n\n  /// Number of row groups in the file\n  int64_t num_row_groups_ = 0;\n\n  /// Offsets of each row group\n  std::vector<int64_t> row_group_offsets_;\n\n  /// Number of rows in each row group\n  std::vector<int64_t> row_group_row_nums_;\n\n  /// Buffer manager for caching data\n  std::shared_ptr<ailego::BufferManager> buffer_manager_;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/chunked_file_writer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"chunked_file_writer.h\"\n#include <fstream>\n#include <arrow/ipc/writer.h>\n#include <parquet/arrow/writer.h>\n#include <parquet/exception.h>\n#include <zvec/ailego/logger/logger.h>\n\n\nnamespace zvec {\n\nclass IpcChunkedWriter : public ChunkedFileWriter {\n public:\n  static arrow::Result<std::unique_ptr<IpcChunkedWriter>> Make(\n      const std::string &path, const std::shared_ptr<arrow::Schema> &schema) {\n    ARROW_ASSIGN_OR_RAISE(auto out_file,\n                          arrow::io::FileOutputStream::Open(path));\n\n    std::shared_ptr<arrow::ipc::RecordBatchWriter> writer;\n    ARROW_ASSIGN_OR_RAISE(writer, arrow::ipc::MakeFileWriter(out_file, schema));\n\n    return std::make_unique<IpcChunkedWriter>(schema, std::move(out_file),\n                                              std::move(writer));\n  }\n\n  IpcChunkedWriter(std::shared_ptr<arrow::Schema> schema,\n                   std::shared_ptr<arrow::io::FileOutputStream> out_file,\n                   std::shared_ptr<arrow::ipc::RecordBatchWriter> writer)\n      : ChunkedFileWriter(std::move(schema)),\n        out_file_(std::move(out_file)),\n        writer_(std::move(writer)) {}\n\n  arrow::Status Write(const arrow::RecordBatch &batch) override {\n    return writer_->WriteRecordBatch(batch);\n  }\n\n  arrow::Status Write(const arrow::Table &table) override {\n    return writer_->WriteTable(table);\n  }\n\n  arrow::Status Close() override {\n    ARROW_RETURN_NOT_OK(writer_->Close());\n    return out_file_->Close();\n  }\n\n private:\n  std::shared_ptr<arrow::io::FileOutputStream> out_file_;\n  std::shared_ptr<arrow::ipc::RecordBatchWriter> writer_;\n};\n\n\nclass ParquetChunkedWriter : public ChunkedFileWriter {\n public:\n  static arrow::Result<std::unique_ptr<ParquetChunkedWriter>> Make(\n      const std::string &path, const std::shared_ptr<arrow::Schema> &schema) {\n    ARROW_ASSIGN_OR_RAISE(auto out_file,\n                          arrow::io::FileOutputStream::Open(path));\n\n    parquet::WriterProperties::Builder builder;\n    auto properties = builder.build();\n\n    std::shared_ptr<parquet::arrow::FileWriter> writer;\n    ARROW_ASSIGN_OR_RAISE(writer, parquet::arrow::FileWriter::Open(\n                                      *schema, arrow::default_memory_pool(),\n                                      out_file, properties));\n\n    return std::make_unique<ParquetChunkedWriter>(schema, std::move(out_file),\n                                                  std::move(writer));\n  }\n\n  ParquetChunkedWriter(std::shared_ptr<arrow::Schema> schema,\n                       std::shared_ptr<arrow::io::FileOutputStream> out_file,\n                       std::shared_ptr<parquet::arrow::FileWriter> writer)\n      : ChunkedFileWriter(std::move(schema)),\n        out_file_(std::move(out_file)),\n        writer_(std::move(writer)) {}\n\n  arrow::Status Write(const arrow::RecordBatch &batch) override {\n    return writer_->WriteRecordBatch(batch);\n  }\n\n  arrow::Status Write(const arrow::Table &table) override {\n    return writer_->WriteTable(table);\n  }\n\n  arrow::Status Close() override {\n    ARROW_RETURN_NOT_OK(writer_->Close());\n    return out_file_->Close();\n  }\n\n private:\n  std::shared_ptr<arrow::io::FileOutputStream> out_file_;\n  std::shared_ptr<parquet::arrow::FileWriter> writer_;\n};\n\n\nstd::unique_ptr<ChunkedFileWriter> ChunkedFileWriter::Open(\n    const std::string &file_path, const std::shared_ptr<arrow::Schema> &schema,\n    FileFormat format) {\n  switch (format) {\n    case FileFormat::IPC: {\n      auto result = IpcChunkedWriter::Make(file_path, schema);\n      if (!result.ok()) {\n        LOG_ERROR(\"Failed to open IPC writer: %s\",\n                  result.status().ToString().c_str());\n        return nullptr;\n      }\n      return std::move(result).ValueUnsafe();\n    }\n    case FileFormat::PARQUET: {\n      auto result = ParquetChunkedWriter::Make(file_path, schema);\n      if (!result.ok()) {\n        LOG_ERROR(\"Failed to open Parquet writer: %s\",\n                  result.status().ToString().c_str());\n        return nullptr;\n      }\n      return std::move(result).ValueUnsafe();\n    }\n    default:\n      LOG_ERROR(\"Unsupported format\");\n      return nullptr;\n  }\n}\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/storage/chunked_file_writer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <arrow/api.h>\n#include <arrow/io/api.h>\n#include <parquet/arrow/writer.h>\n#include <zvec/db/type.h>\n\nnamespace zvec {\n\nclass ChunkedFileWriter {\n public:\n  using Ptr = std::unique_ptr<ChunkedFileWriter>;\n\n  static std::unique_ptr<ChunkedFileWriter> Open(\n      const std::string &file_path,\n      const std::shared_ptr<arrow::Schema> &schema, FileFormat format);\n\n  virtual arrow::Status Write(const arrow::RecordBatch &batch) = 0;\n\n  virtual arrow::Status Write(const arrow::Table &table) = 0;\n\n  virtual arrow::Status Close() = 0;\n\n  virtual ~ChunkedFileWriter() = default;  // LCOV_EXCL_BR_LINE\n\n protected:\n  explicit ChunkedFileWriter(std::shared_ptr<arrow::Schema> schema)\n      : schema_(std::move(schema)) {}\n\n  std::shared_ptr<arrow::Schema> schema_;\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/storage/forward_writer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n// forward_writer.cc\n#include \"forward_writer.h\"\n#include \"arrow_ipc_writer.h\"\n#include \"parquet_writer.h\"\n\nnamespace zvec {\n\nstd::unique_ptr<ForwardWriter> ForwardWriter::CreateArrowIPCWriter(\n    const std::string &filepath, int64_t max_rows_per_batch) {\n  return std::make_unique<ArrowIpcWriter>(filepath, max_rows_per_batch);\n}\n\nstd::unique_ptr<ForwardWriter> ForwardWriter::CreateParquetWriter(\n    const std::string &filepath, int64_t max_rows_per_batch) {\n  return std::make_unique<ParquetWriter>(filepath, max_rows_per_batch);\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/forward_writer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <string>\n#include <arrow/api.h>\n#include \"db/index/common/index_filter.h\"\n\nnamespace zvec {\n\nclass ForwardWriter {\n public:\n  virtual ~ForwardWriter() = default;\n\n  // Factory methods\n  static std::unique_ptr<ForwardWriter> CreateArrowIPCWriter(\n      const std::string &filepath, int64_t max_rows_per_batch = 0);\n\n  static std::unique_ptr<ForwardWriter> CreateParquetWriter(\n      const std::string &filepath, int64_t max_rows_per_batch = 0);\n\n  // Interface methods\n  virtual arrow::Status insert(std::shared_ptr<arrow::RecordBatchReader> reader,\n                               const IndexFilter::Ptr &filter = nullptr) = 0;\n\n  virtual arrow::Status insert_batch(\n      std::shared_ptr<arrow::RecordBatch> batch,\n      const IndexFilter::Ptr &filter = nullptr) = 0;\n\n  virtual arrow::Status finalize() = 0;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/lazy_record_batch_reader.h",
    "content": "\n// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <arrow/ipc/reader.h>\n#include <parquet/arrow/reader.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include \"db/common/constants.h\"\n\n\nnamespace zvec {\n\nclass IPCRecordBatchReader : public arrow::RecordBatchReader {\n public:\n  IPCRecordBatchReader(\n      std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader,\n      const std::vector<std::string> &columns,\n      std::shared_ptr<arrow::Schema> schema)\n      : reader_(std::move(reader)),\n        schema_(std::move(schema)),\n        columns_(columns) {\n    std::vector<std::shared_ptr<arrow::Field>> fields;\n    for (const auto &col : columns) {\n      int index = schema_->GetFieldIndex(col);\n      if (index != -1) {\n        fields.push_back(schema_->field(index));\n        col_indices_.push_back(index);\n      }\n    }\n    projected_schema_ = arrow::schema(fields);\n    num_record_batches_ = reader_->num_record_batches();\n  }\n\n  std::shared_ptr<arrow::Schema> schema() const override {\n    return projected_schema_;\n  }\n\n  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *batch) override {\n    if (current_batch_ >= num_record_batches_) {\n      *batch = nullptr;\n      return arrow::Status::OK();\n    }\n\n    ARROW_ASSIGN_OR_RAISE(auto full_batch,\n                          reader_->ReadRecordBatch(current_batch_));\n    current_batch_++;\n\n    std::vector<std::shared_ptr<arrow::Array>> projected_arrays;\n    for (int index : col_indices_) {\n      projected_arrays.push_back(full_batch->column(index));\n    }\n\n    *batch = arrow::RecordBatch::Make(projected_schema_, full_batch->num_rows(),\n                                      projected_arrays);\n    return arrow::Status::OK();\n  }\n\n private:\n  std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader_;\n  std::shared_ptr<arrow::Schema> schema_;\n  std::shared_ptr<arrow::Schema> projected_schema_;\n  std::vector<std::string> columns_;\n  std::vector<int> col_indices_;\n  int current_batch_ = 0;\n  int num_record_batches_ = 0;\n};\n\n\nclass ParquetRecordBatchReader : public arrow::RecordBatchReader {\n public:\n  ParquetRecordBatchReader(std::unique_ptr<parquet::arrow::FileReader> &reader,\n                           const std::vector<std::string> &columns,\n                           std::shared_ptr<arrow::Schema> schema,\n                           const std::string &file_path, bool with_cache = true)\n      : reader_(std::move(reader)),\n        schema_(std::move(schema)),\n        columns_(columns),\n        file_path_(file_path),\n        with_cache_(with_cache) {\n    std::vector<std::shared_ptr<arrow::Field>> fields;\n    for (const auto &col : columns) {\n      int index = schema_->GetFieldIndex(col);\n      if (index != -1) {\n        fields.push_back(schema_->field(index));\n        col_indices_.push_back(index);\n      }\n    }\n    projected_schema_ = arrow::schema(fields);\n\n    auto parquet_metadata = reader_->parquet_reader()->metadata();\n    total_rows_ = parquet_metadata->num_rows();\n    num_row_groups_ = parquet_metadata->num_row_groups();\n    int64_t offset = 0;\n    for (int64_t rg = 0; rg < num_row_groups_; ++rg) {\n      auto row_group_metadata = parquet_metadata->RowGroup(rg);\n      int64_t num_rows_in_group = row_group_metadata->num_rows();\n      row_group_row_nums_.push_back(num_rows_in_group);\n      row_group_offsets_.push_back(offset);\n      offset += num_rows_in_group;\n    }\n  }\n\n  std::shared_ptr<arrow::Schema> schema() const override {\n    return projected_schema_;\n  }\n\n  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *batch) override {\n    if (current_row_group_ >= num_row_groups_) {\n      return arrow::Status::OK();\n    }\n\n    int64_t rg_id = current_row_group_;\n    int64_t num_rows_in_rg = row_group_row_nums_[rg_id];\n\n    std::vector<std::shared_ptr<arrow::Array>> chunks(col_indices_.size());\n    if (with_cache_) {\n      auto &buf_mgr = ailego::BufferManager::Instance();\n      for (size_t col_idx = 0; col_idx < col_indices_.size(); ++col_idx) {\n        auto buffer_id = ailego::BufferID::ParquetID(\n            file_path_, col_indices_[col_idx], rg_id);\n        auto buffer_handle = buf_mgr.acquire(buffer_id);\n        auto col_chunked_array = buffer_handle.pin_parquet_data();\n        if (col_chunked_array) {\n          std::shared_ptr<arrow::Array> concat;\n          auto concat_result = arrow::Concatenate(col_chunked_array->chunks(),\n                                                  arrow::default_memory_pool());\n          if (!concat_result.ok()) {\n            return concat_result.status();\n          }\n          concat = concat_result.ValueOrDie();\n          chunks[col_idx] = concat;\n        }\n      }\n    } else {\n      std::shared_ptr<arrow::Table> rg_table;\n      ARROW_RETURN_NOT_OK(\n          reader_->RowGroup(rg_id)->ReadTable(col_indices_, &rg_table));\n      for (size_t i = 0; i < col_indices_.size(); ++i) {\n        std::shared_ptr<arrow::Array> concat;\n        auto concat_result = arrow::Concatenate(rg_table->column(i)->chunks(),\n                                                arrow::default_memory_pool());\n        if (!concat_result.ok()) {\n          return concat_result.status();\n        }\n        concat = concat_result.ValueOrDie();\n        chunks[i] = concat;\n      }\n    }\n\n    *batch =\n        arrow::RecordBatch::Make(projected_schema_, num_rows_in_rg, chunks);\n    current_row_group_++;\n    return arrow::Status::OK();\n  }\n\n private:\n  std::unique_ptr<parquet::arrow::FileReader> reader_;\n  std::shared_ptr<arrow::Schema> schema_;\n  std::shared_ptr<arrow::Schema> projected_schema_;\n  std::vector<std::string> columns_;\n  std::vector<int> col_indices_;\n  std::string file_path_;\n\n  int64_t current_row_group_ = 0;\n  int64_t num_row_groups_ = 0;\n  int64_t total_rows_ = 0;\n  std::vector<int64_t> row_group_offsets_;\n  std::vector<int64_t> row_group_row_nums_;\n  bool with_cache_;\n};\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/storage/memory_forward_store.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"memory_forward_store.h\"\n#include <memory>\n#include <string>\n#include <vector>\n#include <ailego/pattern/defer.h>\n#include <arrow/api.h>\n#include <arrow/builder.h>\n#include <arrow/compute/api.h>\n#include <arrow/io/file.h>\n#include <arrow/ipc/writer.h>\n#include <arrow/record_batch.h>\n#include <arrow/table.h>\n#include <arrow/util/async_generator.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"db/common/constants.h\"\n#include \"db/index/storage/base_forward_store.h\"\n\n\nnamespace zvec {\n\nMemForwardStore::MemForwardStore(\n    const std::shared_ptr<CollectionSchema> &collection_schema,\n    const std::string &path, const FileFormat format,\n    const uint32_t max_buffer_size)\n    : schema_(collection_schema),\n      path_(path),\n      format_(format),\n      max_cache_size_(max_buffer_size / 100),\n      max_buffer_size_(max_buffer_size) {\n  cache_.reserve(128);\n}\n\nStatus MemForwardStore::Open() {\n  arrow::FieldVector fields;\n  auto status = ConvertCollectionSchemaToArrowFields(schema_, &fields);\n  if (!status.ok()) {\n    return Status::InternalError(\"convert schema to arrow fields failed \",\n                                 status.ToString());\n  }\n  physic_schema_ = arrow::schema(fields);\n  // Initialize file writer\n  writer_ = ChunkedFileWriter::Open(path_, physic_schema_, format_);\n  return Status::OK();\n}\n\nRecordBatchBuilderPtr MemForwardStore::createBuilder() {\n  auto result = arrow::RecordBatchBuilder::Make(physic_schema_,\n                                                arrow::default_memory_pool());\n  if (!result.ok()) {\n    LOG_ERROR(\"failed to create RecordBatchBuilder: %s\",\n              result.status().ToString().c_str());\n    return nullptr;\n  }\n  return std::move(result.ValueOrDie());\n}\n\n\nbool MemForwardStore::validate(const std::vector<std::string> &columns) const {\n  if (columns.empty()) {\n    LOG_ERROR(\"empty columns\");\n    return false;\n  }\n  for (auto &column : columns) {\n    if (column == LOCAL_ROW_ID) {\n      continue;\n    }\n    if (physic_schema_->GetFieldIndex(column) == -1) {\n      LOG_ERROR(\"validate failed. unknown column: %s\", column.c_str());\n      return false;\n    }\n  }\n  return true;\n}\n\n\n// Notice: This function just convert the docs to arrow::ArrayBuilder, not clean\n// the cache_.\narrow::Status MemForwardStore::convertToBuilder(\n    RecordBatchBuilderPtr &rb_builder) {\n  for (const auto &doc : cache_) {\n    auto &fields = physic_schema_->fields();\n\n    // global doc_id\n    auto gid_builder =\n        dynamic_cast<arrow::UInt64Builder *>(rb_builder->GetField(0));\n    ARROW_RETURN_NOT_OK(gid_builder->Append(doc.doc_id()));\n\n    // user id(pk)\n    auto uid_builder =\n        dynamic_cast<arrow::StringBuilder *>(rb_builder->GetField(1));\n    ARROW_RETURN_NOT_OK(uid_builder->Append(doc.pk()));\n\n    // other fields\n    for (size_t idx = 2; idx < fields.size(); ++idx) {\n      auto field = fields[idx];\n      auto builder = rb_builder->GetField(idx);\n      ARROW_RETURN_NOT_OK(AppendFieldValueToBuilder(doc, field, builder));\n    }\n  }\n  return arrow::Status::OK();\n}\n\nStatus MemForwardStore::insert(const Doc &doc) {\n  std::lock_guard lock(cache_mtx_);\n  cache_.emplace_back(doc);\n  num_rows_++;\n  auto doc_bytes = doc.memory_usage();\n  total_cache_bytes_ = total_cache_bytes_ + (uint32_t)doc_bytes;\n  if (total_cache_bytes_ < max_cache_size_) {\n    return Status::OK();\n  }\n  // Flush cache when it reaches max size\n  auto rb_builder = createBuilder();\n  auto status = convertToBuilder(rb_builder);\n  if (!status.ok()) {\n    return Status::InternalError(\"convertToBuilder error: \", status.ToString());\n  }\n  auto result = rb_builder->Flush(false);\n  if (!result.ok()) {\n    return Status::InternalError(\"flush error: \", result.status().ToString());\n  }\n  auto batch = result.ValueOrDie();\n  int64_t rb_size = MemorySize(*batch);\n  batches_.push_back(batch);\n\n  total_rb_bytes_ = total_rb_bytes_ + (uint32_t)rb_size;\n  cache_.clear();\n  total_cache_bytes_ = 0;\n\n  return Status::OK();\n}\n\narrow::Result<RecordBatchPtr> MemForwardStore::convertToRecordBatch() {\n  auto rb_builder = createBuilder();\n  ARROW_RETURN_NOT_OK(convertToBuilder(rb_builder));\n  ARROW_ASSIGN_OR_RAISE(auto batch, rb_builder->Flush(false));\n  return batch;\n}\n\narrow::Result<TablePtr> MemForwardStore::convertToTable(\n    const std::vector<std::string> &columns, const std::vector<int> &indices) {\n  std::shared_ptr<arrow::RecordBatch> batch;\n  ARROW_ASSIGN_OR_RAISE(batch, convertToRecordBatch());\n  std::vector<std::shared_ptr<arrow::RecordBatch>> all_batches = batches_;\n  if (batch->num_rows() > 0) {\n    all_batches.push_back(batch);\n  }\n\n  if (all_batches.empty()) {\n    return arrow::Table::MakeEmpty(physic_schema_, nullptr);\n  }\n\n  // Combine all batches into a single table\n  std::shared_ptr<arrow::Table> combined_table;\n  ARROW_ASSIGN_OR_RAISE(combined_table,\n                        arrow::Table::FromRecordBatches(all_batches));\n\n  std::shared_ptr<arrow::Table> filtered_table = combined_table;\n  if (!indices.empty()) {\n    // Filter rows by indices if provided\n    std::shared_ptr<arrow::Array> index_array;\n    arrow::Int32Builder builder;\n    ARROW_RETURN_NOT_OK(builder.AppendValues(indices));\n    ARROW_RETURN_NOT_OK(builder.Finish(&index_array));\n\n    arrow::Datum input_datum(combined_table);\n    arrow::Datum index_datum(index_array);\n\n    arrow::compute::ExecContext ctx;\n    arrow::Datum result_datum;\n    ARROW_ASSIGN_OR_RAISE(\n        result_datum,\n        arrow::compute::Take(input_datum, index_datum,\n                             arrow::compute::TakeOptions::Defaults(), &ctx));\n    filtered_table = result_datum.table();\n  }\n\n  std::shared_ptr<arrow::Table> selected_table = filtered_table;\n  if (!columns.empty()) {\n    // Select only specified columns\n    std::vector<int> column_indices;\n    for (const auto &column_name : columns) {\n      if (column_name == LOCAL_ROW_ID) continue;\n      int index = filtered_table->schema()->GetFieldIndex(column_name);\n      if (index != -1) {\n        column_indices.push_back(index);\n      }\n    }\n\n    if (!column_indices.empty()) {\n      ARROW_ASSIGN_OR_RAISE(selected_table,\n                            filtered_table->SelectColumns(column_indices));\n    }\n  }\n  return selected_table;\n}\n\nStatus MemForwardStore::flush() {\n  std::lock_guard lock(cache_mtx_);\n\n  if (cache_.empty() && batches_.empty()) {\n    return Status::OK();\n  }\n\n  auto result = convertToRecordBatch();\n  if (!result.ok()) {\n    return Status::InternalError(\"failed to convert cache to RecordBatch: \",\n                                 result.status().ToString());\n  }\n\n  auto cache_batch = result.ValueOrDie();\n  if (cache_batch->num_rows() > 0) {\n    batches_.push_back(cache_batch);\n    cache_.clear();\n  }\n\n  bool has_incr = false;\n  size_t start_index = flushed_batches_;\n\n  while (start_index < batches_.size()) {\n    std::vector<std::shared_ptr<arrow::RecordBatch>> batches_to_merge;\n    int64_t total_rows = 0;\n    size_t end_index = start_index;\n\n    while (end_index < batches_.size()) {\n      auto &current_batch = batches_[end_index];\n      int64_t current_rows = current_batch->num_rows();\n\n      if (current_rows >= kMaxRecordBatchNumRows) {\n        if (batches_to_merge.empty()) {\n          batches_to_merge.push_back(current_batch);\n          end_index++;\n        }\n        break;\n      }\n\n      if (!batches_to_merge.empty() &&\n          total_rows + current_rows > kMaxRecordBatchNumRows) {\n        break;\n      }\n\n      batches_to_merge.push_back(current_batch);\n      total_rows += current_rows;\n      end_index++;\n    }\n\n    if (!batches_to_merge.empty()) {\n      std::shared_ptr<arrow::RecordBatch> batch_to_write;\n\n      if (batches_to_merge.size() == 1) {\n        batch_to_write = batches_to_merge[0];\n      } else {\n        std::shared_ptr<arrow::Table> table;\n        auto status =\n            arrow::Table::FromRecordBatches(batches_to_merge).Value(&table);\n        if (!status.ok()) {\n          return Status::InternalError(\"failed to merge batches: \",\n                                       status.ToString());\n        }\n\n        result = table->CombineChunksToBatch();\n        if (!result.ok()) {\n          return Status::InternalError(\"failed to combine chunks: \",\n                                       result.status().ToString());\n        }\n        batch_to_write = result.ValueOrDie();\n      }\n\n      auto status = writer_->Write(*batch_to_write);\n      if (!status.ok()) {\n        return Status::InternalError(\"failed to write RecordBatch to file: \",\n                                     status.ToString());\n      }\n\n      flushed_batches_ = end_index;\n      has_incr = true;\n    } else {\n      break;\n    }\n\n    start_index = end_index;\n  }\n\n  if (has_incr) {\n    LOG_INFO(\"successfully flushed %u batches to %s\", flushed_batches_,\n             path_.c_str());\n  }\n  return Status::OK();\n}\n\nStatus MemForwardStore::close() {\n  if (!cache_.empty() || !batches_.empty()) {\n    flush();\n  }\n  if (writer_) {\n    auto status = writer_->Close();\n    if (!status.ok()) {\n      LOG_WARN(\"failed to close writer: %s\", status.ToString().c_str());\n    }\n    writer_.reset();\n  }\n  batches_.clear();\n  cache_.clear();\n  return Status::OK();\n}\n\nTablePtr MemForwardStore::get_table() {\n  std::lock_guard lock(cache_mtx_);\n  std::shared_ptr<arrow::RecordBatch> batch =\n      convertToRecordBatch().ValueOrDie();\n  std::vector<std::shared_ptr<arrow::RecordBatch>> all_batches = batches_;\n  if (batch->num_rows() > 0) {\n    all_batches.push_back(batch);\n  }\n\n  if (all_batches.empty()) {\n    return nullptr;\n  }\n\n  return arrow::Table::FromRecordBatches(all_batches).ValueOrDie();\n}\n\nTablePtr MemForwardStore::fetch(const std::vector<std::string> &columns,\n                                const std::vector<int> &indices) {\n  std::lock_guard lock(cache_mtx_);\n\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  if (indices.empty()) {\n    arrow::ArrayVector empty_arrays;\n    auto fields = SelectFields(physic_schema_, columns);\n    for (const auto &field : fields) {\n      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());\n    }\n    return arrow::Table::Make(std::make_shared<arrow::Schema>(fields),\n                              empty_arrays, 0);\n  }\n\n  bool need_local_doc_id = false;\n  std::vector<std::string> data_columns;\n  std::vector<bool> is_local_row_id(columns.size(), false);\n\n  for (size_t i = 0; i < columns.size(); ++i) {\n    if (columns[i] == LOCAL_ROW_ID) {\n      need_local_doc_id = true;\n      is_local_row_id[i] = true;\n    } else {\n      data_columns.push_back(columns[i]);\n    }\n  }\n\n  auto result = convertToTable(data_columns, indices);\n  if (!result.ok()) {\n    LOG_ERROR(\"failed to convert to table: %s\",\n              result.status().ToString().c_str());\n    return nullptr;\n  }\n\n  auto data_table = result.ValueOrDie();\n  if (!need_local_doc_id) {\n    return data_table;\n  }\n\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns(\n      columns.size());\n  std::vector<std::shared_ptr<arrow::Field>> result_fields(columns.size());\n\n  size_t data_col_idx = 0;\n  for (size_t i = 0; i < columns.size(); ++i) {\n    if (is_local_row_id[i]) {\n      continue;\n    }\n\n    result_columns[i] = data_table->column(data_col_idx);\n    result_fields[i] = data_table->schema()->field(data_col_idx);\n    data_col_idx++;\n  }\n\n  if (need_local_doc_id) {\n    std::shared_ptr<arrow::Array> rowid_array;\n    arrow::UInt64Builder builder;\n\n    std::vector<uint64_t> indices_i64(indices.begin(), indices.end());\n    auto status = builder.AppendValues(indices_i64);\n\n    if (!status.ok()) {\n      LOG_ERROR(\"failed to append rowid values: %s\", status.ToString().c_str());\n      return nullptr;\n    }\n\n    status = builder.Finish(&rowid_array);\n    if (!status.ok()) {\n      LOG_ERROR(\"failed to finish rowid array: %s\", status.ToString().c_str());\n      return nullptr;\n    }\n    auto rowid_chunked = std::make_shared<arrow::ChunkedArray>(rowid_array);\n\n    for (size_t i = 0; i < columns.size(); ++i) {\n      if (is_local_row_id[i]) {\n        result_columns[i] = rowid_chunked;\n        result_fields[i] = arrow::field(LOCAL_ROW_ID, arrow::uint64());\n      }\n    }\n  }\n\n  auto new_schema = arrow::schema(result_fields);\n  return arrow::Table::Make(new_schema, result_columns, data_table->num_rows());\n}\n\nExecBatchPtr MemForwardStore::fetch(const std::vector<std::string> &columns,\n                                    int index) {\n  std::lock_guard lock(cache_mtx_);\n\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  auto result = convertToTable(columns, std::vector<int>{index});\n  if (!result.ok()) {\n    LOG_ERROR(\"failed to convert to table: %s\",\n              result.status().ToString().c_str());\n    return nullptr;\n  }\n\n  auto table = result.ValueOrDie();\n\n  // Extract scalars\n  std::vector<arrow::Datum> scalars;\n  scalars.reserve(columns.size());\n  for (const auto &column : columns) {\n    const auto &array = table->GetColumnByName(column);\n    auto scalar_result = array->GetScalar(0);\n    if (!scalar_result.ok()) {\n      LOG_ERROR(\"failed to get column %s scalar from array: %s\", column.c_str(),\n                scalar_result.status().ToString().c_str());\n      return nullptr;\n    }\n    scalars.emplace_back(std::move(scalar_result.ValueOrDie()));\n  }\n\n  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);\n}\n\nRecordBatchReaderPtr MemForwardStore::scan(\n    const std::vector<std::string> &columns) {\n  std::lock_guard lock(cache_mtx_);\n\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  auto result = convertToTable(columns, {});\n  if (!result.ok()) {\n    LOG_ERROR(\"failed to convert to table: %s\",\n              result.status().ToString().c_str());\n    return nullptr;\n  }\n\n  return std::make_shared<arrow::TableBatchReader>(result.ValueOrDie());\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/memory_forward_store.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <iostream>\n#include <memory>\n#include <mutex>\n#include <string>\n#include <vector>\n#include <arrow/compute/api.h>\n#include <arrow/table_builder.h>\n#include <arrow/util/async_generator.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/status.h>\n#include \"base_forward_store.h\"\n#include \"chunked_file_writer.h\"\n#include \"store_helper.h\"\n\nnamespace zvec {\n\n/// MemForwardStore implements a forward store that keeps data in memory\n/// and can flush data to disk when needed.\nclass MemForwardStore : public BaseForwardStore {\n public:\n  /// Pointer type for MemForwardStore instances\n  using Ptr = std::shared_ptr<MemForwardStore>;\n\n  /// Constructor that initializes the store with collection schema and settings\n  /// \\param collection_schema The schema for the collection\n  /// \\param path The path where data will be stored\n  /// \\param format The file format for persistence\n  /// \\param max_cache_rows Maximum number of rows to keep in cache\n  /// \\param max_rows Maximum number of rows allowed in the store\n  MemForwardStore(const std::shared_ptr<CollectionSchema> &collection_schema,\n                  const std::string &path, const FileFormat format,\n                  const uint32_t max_buffer_size = 100 * 1024 * 1024);\n\n  virtual ~MemForwardStore() {\n    close();\n  }\n\n  /// Check if the store is full\n  /// \\return true if the store has reached its maximum capacity\n  bool is_full() {\n    return total_bytes() >= max_buffer_size_;\n  }\n\n  /// Open the store\n  /// \\return 0 on success, non-zero on failure\n  Status Open() override;\n\n  /// Insert a document into the store\n  /// \\param doc The document to insert\n  /// \\return 0 on success, non-zero on failure\n  Status insert(const Doc &doc);\n\n  /// Flush cached data to disk\n  /// \\return 0 on success, non-zero on failure\n  Status flush();\n\n  /// Close the store and flush any remaining data\n  /// \\return 0 on success, non-zero on failure\n  Status close();\n\n public:\n  /// Get the path of the store\n  /// \\return The path where data is stored\n  const std::string path() const {\n    return path_;\n  }\n\n  /// Get the total bytes of the store\n  uint32_t total_bytes() const {\n    return total_cache_bytes_ + total_rb_bytes_;\n  }\n\n  /// Get the total number of rows in the store\n  uint32_t num_rows() const {\n    return num_rows_;\n  }\n\n public:\n  /// Fetch specific columns and row indices from the data source\n  /// \\param columns The list of column names to fetch\n  /// \\param indices The list of row indices to fetch\n  /// \\return A table containing the requested data or nullptr on failure\n  TablePtr fetch(const std::vector<std::string> &columns,\n                 const std::vector<int> &indices) override;\n\n  /// Fetch specific columns and row indices from the data source\n  /// \\param columns The list of column names to fetch\n  /// \\param index The row index to fetch\n  /// \\return An ExecBatch containing the requested data or nullptr on failure\n  ExecBatchPtr fetch(const std::vector<std::string> &columns,\n                     int index) override;\n\n  /// Scan specified columns from the data source\n  /// \\param columns The list of column names to scan\n  /// \\return A RecordBatchReader for streaming the data or nullptr on failure\n  RecordBatchReaderPtr scan(const std::vector<std::string> &columns) override;\n\n  /// Get the physical schema of the file\n  /// \\return A shared pointer to the arrow schema representing the physical\n  /// structure of the data\n  const std::shared_ptr<arrow::Schema> physic_schema() const override {\n    return physic_schema_;\n  }\n\n  TablePtr get_table() override;\n\n private:\n  /// Create a RecordBatchBuilder for this store\n  /// \\return A new RecordBatchBuilder\n  RecordBatchBuilderPtr createBuilder();\n\n  /// Convert internal data to a RecordBatch\n  /// \\return A Result containing the RecordBatch or an error status\n  arrow::Result<RecordBatchPtr> convertToRecordBatch();\n\n  /// Convert internal data to a Table\n  /// \\param columns The list of column names to include\n  /// \\param indices The list of row indices to include\n  /// \\return A Result containing the Table or an error status\n  arrow::Result<TablePtr> convertToTable(\n      const std::vector<std::string> &columns, const std::vector<int> &indices);\n\n  /// Convert internal data to a RecordBatchBuilder\n  /// \\param builder The builder to populate\n  /// \\return arrow::Status indicating success or failure\n  arrow::Status convertToBuilder(RecordBatchBuilderPtr &builder);\n\n  /// Validate that the requested columns exist in the schema\n  /// \\param columns The list of column names to validate\n  /// \\return true if all columns are valid, false otherwise\n  bool validate(const std::vector<std::string> &columns) const;\n\n private:\n  /// Mutex to protect cache access\n  std::mutex cache_mtx_;\n\n  /// Cache of documents waiting to be flushed\n  std::vector<Doc> cache_;\n\n  /// Batches of data that have been flushed\n  std::vector<RecordBatchPtr> batches_;\n\n  /// Collection schema\n  std::shared_ptr<CollectionSchema> schema_;\n\n  /// Physical schema\n  std::shared_ptr<arrow::Schema> physic_schema_;\n\n  /// Total RecordBatch bytes in the store\n  uint32_t total_rb_bytes_{0};\n\n  /// Total cache doc bytes\n  uint32_t total_cache_bytes_{0};\n\n  /// Total number of rows in the store\n  uint32_t num_rows_{0};\n\n  /// Path where data is stored\n  std::string path_;\n\n  /// File format for persistence\n  FileFormat format_;\n\n  /// Number of batches that have been flushed\n  uint32_t flushed_batches_{0};\n\n  /// Writer for chunked files\n  ChunkedFileWriter::Ptr writer_;\n\n\n  /// Maximum size of cache, default 1MB\n  uint32_t max_cache_size_{1048576};\n\n  /// Maximum size of the buffer, default 100MB\n  uint32_t max_buffer_size_{104857600};\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/mmap_forward_store.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"mmap_forward_store.h\"\n#include <memory>\n#include <arrow/acero/options.h>\n#include <arrow/compute/api.h>\n#include <arrow/datum.h>\n#include <arrow/filesystem/localfs.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"db/index/storage/base_forward_store.h\"\n#include \"lazy_record_batch_reader.h\"\n\n\nnamespace zvec {\n\nMmapForwardStore::MmapForwardStore(const std::string &uri) : file_path_(uri) {}\n\nStatus MmapForwardStore::Open() {\n  std::string uri = file_path_;\n  auto status = CreateRandomAccessFileByUri(uri, &file_, &file_path_);\n  if (!status.ok()) {\n    LOG_ERROR(\"Failed to create random access uri: %s : %s\", uri.c_str(),\n              status.ToString().c_str());\n    return Status::InvalidArgument(status.ToString());\n  }\n  format_ = InferFileFormat(file_path_);\n  switch (format_) {\n    case FileFormat::PARQUET: {\n      status = OpenParquet(file_);\n      if (!status.ok()) {\n        LOG_ERROR(\"Failed to open parquet file: %s : %s\", file_path_.c_str(),\n                  status.ToString().c_str());\n        return Status::InternalError(status.ToString());\n      }\n      break;\n    }\n    case FileFormat::IPC: {\n      status = OpenIPC(file_);\n      if (!status.ok()) {\n        LOG_ERROR(\"Failed to open ipc file: %s : %s\", file_path_.c_str(),\n                  status.ToString().c_str());\n        return Status::InternalError(status.ToString());\n      }\n      break;\n    }\n    default:\n      LOG_ERROR(\"Unknown file format: %s\", uri.c_str());\n      return Status::InvalidArgument(\"Unknown file format: \", uri);\n      break;\n  }\n  return Status::OK();\n}\n\narrow::Status MmapForwardStore::OpenParquet(\n    const std::shared_ptr<arrow::io::RandomAccessFile> &file) {\n  auto parquet_file_reader = parquet::ParquetFileReader::Open(file);\n  ARROW_RETURN_NOT_OK(parquet::arrow::FileReader::Make(\n      arrow::default_memory_pool(), std::move(parquet_file_reader),\n      &parquet_reader_));\n\n  auto parquet_metadata = parquet_reader_->parquet_reader()->metadata();\n  num_rows_ = parquet_metadata->num_rows();\n  num_row_groups_ = parquet_metadata->num_row_groups();\n\n  // Initialize row group offsets and row counts\n  int64_t offset = 0;\n  for (int64_t rg = 0; rg < num_row_groups_; ++rg) {\n    auto row_group_metadata = parquet_metadata->RowGroup(rg);\n    int64_t num_rows_in_group = row_group_metadata->num_rows();\n    row_group_row_nums_.push_back(num_rows_in_group);\n    row_group_offsets_.push_back(offset);\n    offset += num_rows_in_group;\n  }\n\n  ARROW_RETURN_NOT_OK(parquet_reader_->GetSchema(&physic_schema_));\n\n  LOG_INFO(\"Opened Parquet with %lld rows, %d cols, %d row groups\",\n           static_cast<long long>(num_rows_), physic_schema_->num_fields(),\n           parquet_metadata->num_row_groups());\n\n  return arrow::Status::OK();\n}\n\narrow::Status MmapForwardStore::OpenIPC(\n    const std::shared_ptr<arrow::io::RandomAccessFile> &file) {\n  std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader;\n  arrow::Result<std::shared_ptr<arrow::ipc::RecordBatchFileReader>> result =\n      arrow::ipc::RecordBatchFileReader::Open(file);\n  ARROW_RETURN_NOT_OK(result.status());\n  reader = std::move(result).ValueOrDie();\n  ipc_file_reader_ = std::move(reader);\n  PARQUET_ASSIGN_OR_THROW(table_, ipc_file_reader_->ToTable());\n\n  if (table_->num_columns() == 0) {\n    return arrow::Status::Invalid(\"IPC file has no columns\");\n  }\n\n  auto chunked_array = table_->column(0);\n  for (int i = 0; i < chunked_array->num_chunks(); ++i) {\n    auto chunk = chunked_array->chunk(i);\n\n    if (chunk->length() == 0) {\n      return arrow::Status::Invalid(\"Encountered empty chunk at index %d\", i);\n    }\n\n    chunk_index_map_.emplace_back(num_rows_, num_rows_ + chunk->length() - 1);\n    num_rows_ += chunk->length();\n\n    // Check if all chunks have the same size except possibly the last one\n    if (fixed_batch_size_ == -1) {\n      fixed_batch_size_ = chunk->length();\n    } else if (fixed_batch_size_ != chunk->length()) {\n      if (i != chunked_array->num_chunks() - 1) {\n        is_fixed_batch_size_ = false;\n      }\n    }\n  }\n\n  physic_schema_ = ipc_file_reader_->schema();\n  LOG_INFO(\n      \"Opened IPC with %lld rows, %d cols, %d chunks, is_fixed_batch_size[%d] \"\n      \"fixed_batch_size[%lld] physic_schema: %s\",\n      static_cast<long long>(num_rows_), physic_schema_->num_fields(),\n      chunked_array->num_chunks(), is_fixed_batch_size_,\n      static_cast<long long>(fixed_batch_size_),\n      physic_schema_->ToString().c_str());\n\n  return arrow::Status::OK();\n}\n\nbool MmapForwardStore::validate(const std::vector<std::string> &columns) const {\n  if (columns.empty()) {\n    LOG_ERROR(\"Empty columns\");\n    return false;\n  }\n  for (auto &column : columns) {\n    if (column == LOCAL_ROW_ID) {\n      continue;\n    }\n    if (physic_schema_->GetFieldIndex(column) == -1) {\n      LOG_ERROR(\"Validate failed. unknown column: %s\", column.c_str());\n      return false;\n    }\n  }\n  return true;\n}\n\nRecordBatchReaderPtr MmapForwardStore::ScanParquet(\n    const std::vector<std::string> &columns) {\n  // Create a new parquet reader for scanning\n  std::unique_ptr<parquet::arrow::FileReader> parquet_reader;\n  auto parquet_file_reader = parquet::ParquetFileReader::Open(file_);\n  auto status = parquet::arrow::FileReader::Make(arrow::default_memory_pool(),\n                                                 std::move(parquet_file_reader),\n                                                 &parquet_reader);\n  if (!status.ok()) {\n    LOG_ERROR(\"Failed to create parquet reader: %s\", status.message().c_str());\n    return nullptr;\n  }\n\n  auto rb_reader = std::make_shared<ParquetRecordBatchReader>(\n      parquet_reader, columns, physic_schema_, file_path_, false);\n  return rb_reader;\n}\n\nRecordBatchReaderPtr MmapForwardStore::ScanIPC(\n    const std::vector<std::string> &columns) {\n  std::vector<int> col_indices;\n  for (auto &column : columns) {\n    int idx = physic_schema_->GetFieldIndex(column);\n    if (idx == -1) continue;\n    col_indices.push_back(idx);\n  }\n\n  auto result = table_->SelectColumns(col_indices);\n  if (!result.ok()) {\n    LOG_ERROR(\"Failed to select columns: %s\",\n              result.status().message().c_str());\n    return nullptr;\n  }\n  auto sub_table = std::move(result).ValueOrDie();\n\n  return std::make_shared<arrow::TableBatchReader>(sub_table);\n}\n\nTablePtr MmapForwardStore::FetchParquet(const std::vector<std::string> &columns,\n                                        const std::vector<int> &indices) {\n  bool need_local_doc_id = false;\n  std::vector<int> col_indices;\n  std::vector<int> data_column_positions;\n\n  for (size_t i = 0; i < columns.size(); ++i) {\n    if (columns[i] == LOCAL_ROW_ID) {\n      need_local_doc_id = true;\n    } else {\n      int idx = physic_schema_->GetFieldIndex(columns[i]);\n      if (idx == -1) return nullptr;\n      col_indices.push_back(idx);\n      data_column_positions.push_back(static_cast<int>(i));\n    }\n  }\n\n  std::vector<std::vector<std::pair<int, std::shared_ptr<arrow::Scalar>>>>\n      sorted_scalars(col_indices.size());\n  std::vector<std::pair<int, int64_t>> local_doc_id_pairs;\n\n  // Group by row group, but keep track of original output position\n  std::unordered_map<int, std::vector<std::pair<int, uint64_t>>> rg_to_local;\n  int output_row = 0;\n  for (int global_row : indices) {\n    if (global_row < 0 || global_row >= num_rows_) return nullptr;\n    int rg_id = FindRowGroupForRow(global_row);\n    int64_t offset = GetRowGroupOffset(rg_id);\n    uint64_t local_in_rg = global_row - offset;\n    rg_to_local[rg_id].emplace_back(output_row, local_in_rg);\n    if (need_local_doc_id) {\n      local_doc_id_pairs.emplace_back(output_row, global_row);\n    }\n    ++output_row;\n  }\n\n  // Read each row group and extract scalars at required positions\n  for (const auto &[rg_id, pairs] : rg_to_local) {\n    std::shared_ptr<arrow::Table> rg_table;\n    auto status =\n        parquet_reader_->RowGroup(rg_id)->ReadTable(col_indices, &rg_table);\n    if (!status.ok()) {\n      LOG_ERROR(\"Failed to read row group %d\", rg_id);\n      return nullptr;\n    }\n\n    // Concatenate chunks for faster random access\n    std::vector<std::shared_ptr<arrow::Array>> flat_columns;\n    for (const auto &col : rg_table->columns()) {\n      auto flat_result =\n          arrow::Concatenate(col->chunks(), arrow::default_memory_pool());\n      if (!flat_result.ok()) {\n        LOG_ERROR(\"Failed to concatenate chunks for rg {%d} status:%s\", rg_id,\n                  flat_result.status().message().c_str());\n        return nullptr;\n      }\n      flat_columns.push_back(flat_result.ValueOrDie());\n    }\n\n    // Extract scalars for this RG\n    for (size_t i = 0; i < col_indices.size(); ++i) {\n      auto &dst = sorted_scalars[i];\n      const auto &array = flat_columns[i];\n\n      for (const auto &[output_row_tmp, local_idx] : pairs) {\n        auto scalar_result = array->GetScalar(local_idx);\n        if (!scalar_result.ok()) {\n          LOG_ERROR(\"Failed to get scalar for row %zu status: %s\",\n                    (size_t)local_idx,\n                    scalar_result.status().ToString().c_str());\n        }\n        dst.emplace_back(output_row_tmp, scalar_result.ValueOrDie());\n      }\n    }\n  }\n\n  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());\n\n  for (size_t i = 0; i < sorted_scalars.size(); ++i) {\n    auto &vec = sorted_scalars[i];\n    std::sort(vec.begin(), vec.end());\n    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars;\n    ordered_scalars.reserve(vec.size());\n    for (auto &p : vec) {\n      ordered_scalars.push_back(std::move(p.second));\n    }\n\n    std::shared_ptr<arrow::Array> arr;\n    auto status = ConvertScalarVectorToArrayByType(ordered_scalars, &arr);\n    if (!status.ok()) {\n      LOG_ERROR(\"ConvertScalarVectorToArrayByType failed: %s\",\n                status.message().c_str());\n      return nullptr;\n    }\n\n    int position = data_column_positions[i];\n    result_arrays[position] = std::move(arr);\n  }\n\n  if (need_local_doc_id) {\n    std::sort(local_doc_id_pairs.begin(), local_doc_id_pairs.end());\n    std::vector<uint64_t> values;\n    values.reserve(local_doc_id_pairs.size());\n    for (const auto &p : local_doc_id_pairs) {\n      values.push_back(p.second);\n    }\n\n    // Create UInt64Array\n    auto buffer_result = arrow::AllocateBuffer(values.size() * sizeof(uint64_t),\n                                               arrow::default_memory_pool());\n    if (!buffer_result.ok()) return nullptr;\n    auto buffer = std::move(buffer_result.ValueOrDie());\n    std::memcpy(buffer->mutable_data(), values.data(),\n                values.size() * sizeof(uint64_t));\n\n    std::vector<std::shared_ptr<arrow::Buffer>> buffers;\n    buffers.push_back(nullptr);  // no null bitmap\n    buffers.push_back(std::shared_ptr<arrow::Buffer>(buffer.release()));\n\n    auto data = arrow::ArrayData::Make(arrow::uint64(),\n                                       static_cast<uint64_t>(values.size()),\n                                       std::move(buffers), /*null_count=*/0);\n\n    for (size_t i = 0; i < columns.size(); ++i) {\n      if (columns[i] == LOCAL_ROW_ID) {\n        result_arrays[i] = std::make_shared<arrow::UInt64Array>(data);\n      }\n    }\n  }\n\n  std::vector<std::shared_ptr<arrow::Field>> selected_fields;\n  for (const auto &col : columns) {\n    if (col == LOCAL_ROW_ID) {\n      selected_fields.push_back(arrow::field(LOCAL_ROW_ID, arrow::uint64()));\n    } else {\n      selected_fields.push_back(physic_schema_->GetFieldByName(col));\n    }\n  }\n\n  auto out_schema = std::make_shared<arrow::Schema>(selected_fields);\n\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> chunks;\n  chunks.reserve(result_arrays.size());\n  for (auto &arr : result_arrays) {\n    chunks.emplace_back(std::make_shared<arrow::ChunkedArray>(arr));\n  }\n\n  return arrow::Table::Make(out_schema, chunks,\n                            static_cast<int64_t>(indices.size()));\n}\n\nExecBatchPtr MmapForwardStore::FetchParquet(\n    const std::vector<std::string> &columns, int index) {\n  std::vector<int> col_indices;\n  for (const auto &col : columns) {\n    int idx = physic_schema_->GetFieldIndex(col);\n    if (idx == -1) return nullptr;\n    col_indices.push_back(idx);\n  }\n\n  int rg_id = FindRowGroupForRow(index);\n  int64_t offset = GetRowGroupOffset(rg_id);\n  uint64_t local_in_rg = index - offset;\n\n  std::shared_ptr<arrow::Table> rg_table;\n  auto status =\n      parquet_reader_->RowGroup(rg_id)->ReadTable(col_indices, &rg_table);\n  if (!status.ok()) {\n    LOG_ERROR(\"Failed to read row group %d\", rg_id);\n    return nullptr;\n  }\n\n  // Extract scalars\n  std::vector<arrow::Datum> scalars;\n  scalars.reserve(columns.size());\n  for (const auto &column : columns) {\n    const auto &array = rg_table->GetColumnByName(column);\n    auto scalar_result = array->GetScalar(local_in_rg);\n    scalars.emplace_back(std::move(scalar_result.ValueOrDie()));\n  }\n\n  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);\n}\n\nTablePtr MmapForwardStore::FetchIPC(const std::vector<std::string> &columns,\n                                    const std::vector<int> &indices) {\n  std::vector<std::pair<int64_t, int64_t>> indices_in_table;\n  auto chunked_array = table_->column(0);\n  for (const auto &target_index : indices) {\n    int target_chunk_index = -1;\n    int64_t offset_in_chunk = -1;\n    if (FindTargetChunk(target_index, chunked_array->num_chunks(),\n                        &target_chunk_index, &offset_in_chunk)) {\n      indices_in_table.emplace_back(target_chunk_index, offset_in_chunk);\n    } else {\n      LOG_ERROR(\"Failed to find target chunk for index %d\", target_index);\n      return nullptr;\n    }\n  }\n\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns;\n  std::vector<std::shared_ptr<arrow::Field>> result_fields;\n\n  for (size_t i = 0; i < columns.size(); ++i) {\n    if (columns[i] == LOCAL_ROW_ID) {\n      std::shared_ptr<arrow::Array> array;\n      arrow::UInt64Builder builder;\n      std::vector<uint64_t> u64_indices(indices.begin(), indices.end());\n      auto status = builder.AppendValues(u64_indices);\n      if (!status.ok()) {\n        LOG_ERROR(\"Failed to append values to UInt64Builder: %s\",\n                  status.ToString().c_str());\n        return nullptr;\n      }\n\n      status = builder.Finish(&array);\n      if (!status.ok()) {\n        LOG_ERROR(\"Failed to finish UInt64Builder: %s\",\n                  status.ToString().c_str());\n        return nullptr;\n      }\n\n      result_columns.push_back(std::make_shared<arrow::ChunkedArray>(array));\n      result_fields.push_back(\n          arrow::field(LOCAL_ROW_ID, arrow::uint64(), false));\n    } else {\n      std::shared_ptr<arrow::Array> array;\n      auto col_array = table_->GetColumnByName(columns[i]);\n      auto status =\n          BuildArrayFromIndicesWithType(col_array, indices_in_table, &array);\n      if (!status.ok()) {\n        LOG_ERROR(\"BuildArrayFromIndices failed: %s\",\n                  status.ToString().c_str());\n        return nullptr;\n      }\n      result_columns.push_back(std::make_shared<arrow::ChunkedArray>(array));\n      result_fields.push_back(physic_schema_->GetFieldByName(columns[i]));\n    }\n  }\n\n  auto result_schema = std::make_shared<arrow::Schema>(result_fields);\n  return arrow::Table::Make(result_schema, result_columns, indices.size());\n}\n\nExecBatchPtr MmapForwardStore::FetchIPC(const std::vector<std::string> &columns,\n                                        int index) {\n  // Extract scalars\n  std::vector<arrow::Datum> scalars;\n  scalars.reserve(columns.size());\n  for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) {\n    //! NOTICE: no need to check LOCAL_ROW_ID here\n    int field_index = table_->schema()->GetFieldIndex(columns[col_idx]);\n    auto chunked_array = table_->column(field_index);\n    auto scalar_result = chunked_array->GetScalar(index);\n    if (scalar_result.ok()) {\n      scalars.push_back(scalar_result.ValueOrDie());\n    } else {\n      LOG_ERROR(\"Get scalar failed for column %zu, row %d: %s\", col_idx, index,\n                scalar_result.status().ToString().c_str());\n      return nullptr;\n    }\n  }\n\n  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);\n}\n\nint MmapForwardStore::FindRowGroupForRow(int64_t row) {\n  auto it = std::upper_bound(row_group_offsets_.begin(),\n                             row_group_offsets_.end(), row);\n  if (it == row_group_offsets_.begin()) {\n    return 0;\n  }\n  return static_cast<int>(std::distance(row_group_offsets_.begin(), it) - 1);\n}\n\nint64_t MmapForwardStore::GetRowGroupOffset(int rg_id) {\n  return row_group_offsets_[rg_id];\n}\n\nbool MmapForwardStore::FindTargetChunk(int target_index, int num_chunks,\n                                       int *target_chunk_index,\n                                       int64_t *offset_in_chunk) {\n  if (target_index < 0 || target_index >= num_rows_) {\n    return false;\n  }\n\n  if (is_fixed_batch_size_ && fixed_batch_size_ > 0) {\n    // direct calculation\n    int chunk_index = target_index / fixed_batch_size_;\n    if (chunk_index < 0 || chunk_index >= num_chunks) {\n      return false;\n    }\n    *target_chunk_index = chunk_index;\n    *offset_in_chunk = target_index % fixed_batch_size_;\n    return true;\n  } else {\n    // binary search\n    int left = 0;\n    int right = num_chunks - 1;\n\n    while (left <= right) {\n      int mid = left + (right - left) / 2;\n      const auto &range = chunk_index_map_[mid];\n\n      if (target_index >= range.first && target_index <= range.second) {\n        *target_chunk_index = mid;\n        *offset_in_chunk = target_index - range.first;\n        return true;\n      } else if (target_index < range.first) {\n        right = mid - 1;\n      } else {\n        left = mid + 1;\n      }\n    }\n  }\n\n  return false;\n}\n\nTablePtr MmapForwardStore::fetch(const std::vector<std::string> &columns,\n                                 const std::vector<int> &indices) {\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  if (indices.empty()) {\n    arrow::ArrayVector empty_arrays;\n    auto fields = SelectFields(physic_schema_, columns);\n    for (const auto &field : fields) {\n      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());\n    }\n    return arrow::Table::Make(std::make_shared<arrow::Schema>(fields),\n                              empty_arrays, 0);\n  }\n\n  if (format_ == FileFormat::PARQUET) {\n    return FetchParquet(columns, indices);\n  } else {\n    return FetchIPC(columns, indices);\n  }\n}\n\nExecBatchPtr MmapForwardStore::fetch(const std::vector<std::string> &columns,\n                                     int index) {\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  if (index < 0 || index >= num_rows_) {\n    LOG_ERROR(\"Invalid global row: %d\", index);\n    return nullptr;\n  }\n\n  if (format_ == FileFormat::PARQUET) {\n    return FetchParquet(columns, index);\n  } else {\n    return FetchIPC(columns, index);\n  }\n}\n\nRecordBatchReaderPtr MmapForwardStore::scan(\n    const std::vector<std::string> &columns) {\n  if (!validate(columns)) {\n    return nullptr;\n  }\n\n  if (format_ == FileFormat::PARQUET) {\n    return ScanParquet(columns);\n  } else {\n    return ScanIPC(columns);\n  }\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/mmap_forward_store.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <iostream>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <unordered_set>\n#include <vector>\n#include <arrow/api.h>\n#include <arrow/chunked_array.h>\n#include <arrow/compute/api.h>\n#include <arrow/dataset/api.h>\n#include <arrow/filesystem/api.h>\n#include <arrow/io/file.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/result.h>\n#include <arrow/status.h>\n#include <arrow/table.h>\n#include <arrow/util/async_generator.h>\n#include <parquet/arrow/reader.h>\n#include <parquet/column_reader.h>\n#include <parquet/exception.h>\n#include <zvec/db/status.h>\n#include \"base_forward_store.h\"\n#include \"store_helper.h\"\n\nnamespace zvec {\n\n/// MmapForwardStore implements a forward store that uses memory mapping\n/// to efficiently access data from parquet or IPC files.\nclass MmapForwardStore : public BaseForwardStore {\n public:\n  /// Pointer type for MmapForwardStore instances\n  using Ptr = std::shared_ptr<MmapForwardStore>;\n\n  /// Constructor that initializes the store with a file URI\n  /// \\param uri The URI of the file to be accessed\n  MmapForwardStore(const std::string &uri);\n  virtual ~MmapForwardStore() {}\n\n  Status Open() override;\n\n  /// Fetch specific columns and row indices from the data source\n  /// \\param columns The list of column names to fetch\n  /// \\param indices The list of row indices to fetch\n  /// \\return A table containing the requested data or nullptr on failure\n  TablePtr fetch(const std::vector<std::string> &columns,\n                 const std::vector<int> &indices) override;\n\n  /// Fetch specific columns and a single row index from the data source\n  /// \\param columns The list of column names to fetch\n  /// \\param index The row index to fetch\n  /// \\return An ExecBatch containing the requested data or nullptr on failure\n  ExecBatchPtr fetch(const std::vector<std::string> &columns,\n                     int index) override;\n\n  /// Scan specified columns from the data source\n  /// \\param columns The list of column names to scan\n  /// \\return A RecordBatchReader for streaming the data or nullptr on failure\n  RecordBatchReaderPtr scan(const std::vector<std::string> &columns) override;\n\n  /// Get the physical schema of the file\n  /// \\return A shared pointer to the arrow schema representing the physical\n  /// structure of the data\n  const std::shared_ptr<arrow::Schema> physic_schema() const override {\n    return physic_schema_;\n  }\n\n  TablePtr get_table() override {\n    return table_;\n  }\n\n private:\n  /// Validate that the requested columns exist in the schema\n  /// \\param columns The list of column names to validate\n  /// \\return true if all columns are valid, false otherwise\n  bool validate(const std::vector<std::string> &columns) const;\n\n private:\n  /// Open a parquet file and initialize metadata\n  /// \\param file The RandomAccessFile to read from\n  /// \\return arrow::Status indicating success or failure\n  arrow::Status OpenParquet(\n      const std::shared_ptr<arrow::io::RandomAccessFile> &file);\n\n  /// Open an IPC file and initialize metadata\n  /// \\param file The RandomAccessFile to read from\n  /// \\return arrow::Status indicating success or failure\n  arrow::Status OpenIPC(\n      const std::shared_ptr<arrow::io::RandomAccessFile> &file);\n\n  /// Fetch data from a parquet file\n  /// \\param columns The list of column names to fetch\n  /// \\param indices The list of row indices to fetch\n  /// \\return A table containing the requested data or nullptr on failure\n  TablePtr FetchParquet(const std::vector<std::string> &columns,\n                        const std::vector<int> &indices);\n\n  /// Fetch specific columns and a single row index from parquet file\n  /// \\param columns The list of column names to fetch\n  /// \\param index The row index to fetch\n  /// \\return An ExecBatch containing the requested data or nullptr on failure\n  ExecBatchPtr FetchParquet(const std::vector<std::string> &columns, int index);\n\n  /// Fetch data from an IPC file\n  /// \\param columns The list of column names to fetch\n  /// \\param indices The list of row indices to fetch\n  /// \\return A table containing the requested data or nullptr on failure\n  TablePtr FetchIPC(const std::vector<std::string> &columns,\n                    const std::vector<int> &indices);\n\n  /// Fetch specific columns and a single row index from IPC file\n  /// \\param columns The list of column names to fetch\n  /// \\param index The row index to fetch\n  /// \\return An ExecBatch containing the requested data or nullptr on failure\n  ExecBatchPtr FetchIPC(const std::vector<std::string> &columns, int index);\n\n  /// Scan data from a parquet file\n  /// \\param columns The list of column names to scan\n  /// \\return A RecordBatchReader for streaming the data or nullptr on failure\n  RecordBatchReaderPtr ScanParquet(const std::vector<std::string> &columns);\n\n  /// Scan data from an IPC file\n  /// \\param columns The list of column names to scan\n  /// \\return A RecordBatchReader for streaming the data or nullptr on failure\n  RecordBatchReaderPtr ScanIPC(const std::vector<std::string> &columns);\n\n  /// Find which row group contains a given row\n  /// \\param row The row index to locate\n  /// \\return The row group ID containing the row\n  int FindRowGroupForRow(int64_t row);\n\n  /// Get the row offset for a given row group\n  /// \\param rg_id The row group ID\n  /// \\return The row offset of the row group, or -1 on error\n  int64_t GetRowGroupOffset(int rg_id);\n\n  /// Find the chunk that contains a target row index using binary search\n  /// \\param target_index The row index to locate\n  /// \\param num_chunks The total number of chunks in the array\n  /// \\param target_chunk_index Output parameter for the index of the chunk\n  /// containing the target\n  /// \\param offset_in_chunk Output parameter for the offset within the found\n  /// chunk\n  /// \\return true if the target chunk was found, false otherwise\n  bool FindTargetChunk(int target_index, int num_chunks,\n                       int *target_chunk_index, int64_t *offset_in_chunk);\n\n private:\n  /// Format of the file being accessed\n  FileFormat format_;\n\n  /// Physical schema of the file\n  std::shared_ptr<arrow::Schema> physic_schema_;\n\n  /// Total number of rows in the file\n  int64_t num_rows_{0};\n\n  /// Path to the file\n  std::string file_path_;\n\n  // Parquet-specific members\n  /// The RandomAccessFile for reading data\n  std::shared_ptr<arrow::io::RandomAccessFile> file_;\n\n  /// The parquet file reader\n  std::unique_ptr<parquet::arrow::FileReader> parquet_reader_;\n\n  /// Number of row groups in the file\n  int64_t num_row_groups_{0};\n\n  /// Offsets of each row group\n  std::vector<int64_t> row_group_offsets_;\n\n  /// Number of rows in each row group\n  std::vector<int64_t> row_group_row_nums_;\n\n  // IPC-specific members\n  /// The IPC file reader\n  std::shared_ptr<arrow::ipc::RecordBatchFileReader> ipc_file_reader_;\n\n  std::shared_ptr<arrow::Table> table_;\n\n  std::vector<std::pair<int64_t, int64_t>> chunk_index_map_;\n\n  // For performance tuning\n  bool is_fixed_batch_size_{true};\n  int64_t fixed_batch_size_{-1};\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/parquet_writer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"parquet_writer.h\"\n#include <cstdint>\n#include <iostream>\n#include <arrow/compute/api_vector.h>\n\nnamespace zvec {\n\nParquetWriter::ParquetWriter(const std::string &filepath,\n                             int64_t max_rows_per_group)\n    : filepath_(filepath), max_rows_per_group_(max_rows_per_group) {}\n\nParquetWriter::~ParquetWriter() {\n  if (!finalized_ && writer_) {\n    auto status = finalize();\n    if (!status.ok()) {\n      std::cerr << \"Auto-finalize failed: \" << status.ToString() << std::endl;\n    }\n  }\n}\n\narrow::Status ParquetWriter::insert(\n    std::shared_ptr<arrow::RecordBatchReader> reader,\n    const IndexFilter::Ptr &filter) {\n  if (!reader) {\n    return arrow::Status::Invalid(\"RecordBatchReader is null\");\n  }\n\n  if (!writer_) {\n    auto schema = reader->schema();\n    if (!schema) {\n      return arrow::Status::Invalid(\"Reader schema is null\");\n    }\n\n    ARROW_ASSIGN_OR_RAISE(outfile_,\n                          arrow::io::FileOutputStream::Open(filepath_));\n\n    parquet::WriterProperties::Builder builder;\n    std::shared_ptr<parquet::WriterProperties> props = builder.build();\n\n    auto writer = parquet::arrow::FileWriter::Open(\n        *schema, arrow::default_memory_pool(), outfile_, props);\n    ARROW_RETURN_NOT_OK(writer);\n    writer_ = std::move(writer.ValueOrDie());\n  }\n\n  std::shared_ptr<arrow::RecordBatch> batch;\n  while (true) {\n    ARROW_ASSIGN_OR_RAISE(batch, reader->Next());\n    if (!batch) break;\n\n    if (batch->num_rows() == 0) continue;\n\n    if (max_rows_per_group_ > 0 && batch->num_rows() > max_rows_per_group_) {\n      int64_t offset = 0;\n      while (offset < batch->num_rows()) {\n        int64_t length =\n            std::min(max_rows_per_group_, batch->num_rows() - offset);\n        auto slice = batch->Slice(offset, length);\n        ARROW_RETURN_NOT_OK(write_batch(*slice, filter));\n        offset += length;\n      }\n    } else {\n      ARROW_RETURN_NOT_OK(write_batch(*batch, filter));\n    }\n\n    batch.reset();\n  }\n\n  return arrow::Status::OK();\n}\n\narrow::Status ParquetWriter::insert_batch(\n    std::shared_ptr<arrow::RecordBatch> batch, const IndexFilter::Ptr &filter) {\n  if (!batch) {\n    return arrow::Status::Invalid(\"RecordBatch is null\");\n  }\n\n  if (batch->num_rows() == 0) {\n    return arrow::Status::OK();\n  }\n\n  if (!writer_) {\n    auto schema = batch->schema();\n\n    ARROW_ASSIGN_OR_RAISE(outfile_,\n                          arrow::io::FileOutputStream::Open(filepath_));\n\n    parquet::WriterProperties::Builder builder;\n    std::shared_ptr<parquet::WriterProperties> props = builder.build();\n\n    auto writer = parquet::arrow::FileWriter::Open(\n        *schema, arrow::default_memory_pool(), outfile_, props);\n    ARROW_RETURN_NOT_OK(writer);\n    writer_ = std::move(writer.ValueOrDie());\n  }\n\n  if (max_rows_per_group_ > 0 && batch->num_rows() > max_rows_per_group_) {\n    int64_t offset = 0;\n    while (offset < batch->num_rows()) {\n      int64_t length =\n          std::min(max_rows_per_group_, batch->num_rows() - offset);\n      auto slice = batch->Slice(offset, length);\n\n      ARROW_RETURN_NOT_OK(write_batch(*slice, filter));\n\n      offset += length;\n    }\n  } else {\n    ARROW_RETURN_NOT_OK(write_batch(*batch, filter));\n  }\n\n  return arrow::Status::OK();\n}\n\narrow::Status ParquetWriter::write_batch(const arrow::RecordBatch &batch,\n                                         const IndexFilter::Ptr &filter) {\n  if (!filter) {\n    return writer_->WriteRecordBatch(batch);\n  }\n\n  std::vector<int64_t> selected_indices;\n  for (int64_t i = 0; i < batch.num_rows(); ++i) {\n    if (filter->is_filtered(i)) {\n      selected_indices.push_back(i);\n    }\n  }\n\n  if (selected_indices.empty()) {\n    return arrow::Status::OK();\n  }\n\n  arrow::Int64Builder builder;\n  ARROW_RETURN_NOT_OK(builder.AppendValues(selected_indices));\n  std::shared_ptr<arrow::Array> selection_array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&selection_array));\n\n  std::vector<std::shared_ptr<arrow::Array>> filtered_columns;\n  for (int i = 0; i < batch.num_columns(); ++i) {\n    arrow::Datum out;\n    ARROW_ASSIGN_OR_RAISE(\n        out, arrow::compute::Take(batch.column(i), selection_array));\n    filtered_columns.push_back(out.make_array());\n  }\n\n  auto filtered_batch = arrow::RecordBatch::Make(\n      batch.schema(), static_cast<int64_t>(selected_indices.size()),\n      filtered_columns);\n\n  return writer_->WriteRecordBatch(*filtered_batch);\n}\n\narrow::Status ParquetWriter::finalize() {\n  if (finalized_) {\n    return arrow::Status::OK();\n  }\n  if (!writer_) {\n    return arrow::Status::Invalid(\n        \"No data written, cannot finalize empty file\");\n  }\n\n  ARROW_RETURN_NOT_OK(writer_->Close());\n  writer_.reset();\n\n  ARROW_RETURN_NOT_OK(outfile_->Close());\n  outfile_.reset();\n\n  finalized_ = true;\n  return arrow::Status::OK();\n}\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/parquet_writer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <string>\n#include <arrow/api.h>\n#include <arrow/io/api.h>\n#include <arrow/status.h>\n#include <parquet/arrow/writer.h>\n#include \"db/index/common/index_filter.h\"\n#include \"forward_writer.h\"\n\nnamespace zvec {\n\nclass ParquetWriter : public ForwardWriter {\n public:\n  explicit ParquetWriter(const std::string &filepath,\n                         int64_t max_rows_per_group = 0);\n\n  ~ParquetWriter();\n\n  arrow::Status insert(std::shared_ptr<arrow::RecordBatchReader> reader,\n                       const IndexFilter::Ptr &filter = nullptr) override;\n\n  arrow::Status insert_batch(std::shared_ptr<arrow::RecordBatch> batch,\n                             const IndexFilter::Ptr &filter = nullptr) override;\n\n  arrow::Status finalize() override;\n\n private:\n  arrow::Status write_batch(const arrow::RecordBatch &batch,\n                            const IndexFilter::Ptr &filter);\n\n private:\n  std::string filepath_;\n  int64_t max_rows_per_group_ = 0;\n\n  std::shared_ptr<arrow::io::FileOutputStream> outfile_;\n  std::unique_ptr<parquet::arrow::FileWriter> writer_;\n  bool finalized_ = false;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/store_helper.h",
    "content": "\n// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <filesystem>\n#include <iostream>\n#include <memory>\n#include <string>\n#include <arrow/api.h>\n#include <arrow/compute/api.h>\n#include <arrow/dataset/api.h>\n#include <arrow/filesystem/api.h>\n#include <arrow/io/file.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/result.h>\n#include <arrow/status.h>\n#include <parquet/arrow/reader.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/schema.h>\n#include \"db/common/constants.h\"\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/meta.h\"\n#include \"chunked_file_writer.h\"\n\n\nnamespace zvec {\n\ninline FileFormat InferFileFormat(const std::string &file_path) {\n  std::string ext = std::filesystem::path(file_path).extension();\n  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);\n  if (ext == \".parquet\") {\n    return FileFormat::PARQUET;\n  } else if (ext == \".feather\" || ext == \".ipc\" || ext == \".arrow\") {\n    return FileFormat::IPC;\n  } else {\n    return FileFormat::UNKNOWN;\n  }\n}\n\ninline arrow::Status ConvertFieldSchemaToArrowField(\n    const FieldSchema *field, std::shared_ptr<arrow::Field> *out) {\n  switch (field->data_type()) {\n    case DataType::BINARY:\n      *out = arrow::field(field->name(), arrow::binary(), field->nullable());\n      break;\n    case DataType::STRING:\n      *out = arrow::field(field->name(), arrow::utf8(), field->nullable());\n      break;\n    case DataType::BOOL:\n      *out = arrow::field(field->name(), arrow::boolean(), field->nullable());\n      break;\n    case DataType::INT32:\n      *out = arrow::field(field->name(), arrow::int32(), field->nullable());\n      break;\n    case DataType::INT64:\n      *out = arrow::field(field->name(), arrow::int64(), field->nullable());\n      break;\n    case DataType::UINT32:\n      *out = arrow::field(field->name(), arrow::uint32(), field->nullable());\n      break;\n    case DataType::UINT64:\n      *out = arrow::field(field->name(), arrow::uint64(), field->nullable());\n      break;\n    case DataType::FLOAT:\n      *out = arrow::field(field->name(), arrow::float32(), field->nullable());\n      break;\n    case DataType::DOUBLE:\n      *out = arrow::field(field->name(), arrow::float64(), field->nullable());\n      break;\n    case DataType::ARRAY_BINARY:\n      *out = arrow::field(field->name(), arrow::list(arrow::binary()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_STRING:\n      *out = arrow::field(field->name(), arrow::list(arrow::utf8()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_BOOL:\n      *out = arrow::field(field->name(), arrow::list(arrow::boolean()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_INT32:\n      *out = arrow::field(field->name(), arrow::list(arrow::int32()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_INT64:\n      *out = arrow::field(field->name(), arrow::list(arrow::int64()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_UINT32:\n      *out = arrow::field(field->name(), arrow::list(arrow::uint32()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_UINT64:\n      *out = arrow::field(field->name(), arrow::list(arrow::uint64()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_FLOAT:\n      *out = arrow::field(field->name(), arrow::list(arrow::float32()),\n                          field->nullable());\n      break;\n    case DataType::ARRAY_DOUBLE:\n      *out = arrow::field(field->name(), arrow::list(arrow::float64()),\n                          field->nullable());\n      break;\n    default:\n      return arrow::Status::Invalid(\n          \"Unsupported data type \",\n          DataTypeCodeBook::AsString(field->data_type()));\n  }\n\n  return arrow::Status::OK();\n}\n\ninline arrow::Status ConvertCollectionSchemaToArrowFields(\n    const CollectionSchema::Ptr &schema, arrow::FieldVector *out) {\n  arrow::FieldVector fields;\n  fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64(), false));\n  fields.push_back(arrow::field(USER_ID, arrow::utf8(), false));\n  for (auto &field : schema->forward_fields()) {\n    std::shared_ptr<arrow::Field> arrow_field;\n    ARROW_RETURN_NOT_OK(\n        ConvertFieldSchemaToArrowField(field.get(), &arrow_field));\n    fields.push_back(arrow_field);\n  }\n  *out = std::move(fields);\n  return arrow::Status::OK();\n}\n\ntemplate <typename BuilderType, typename ScalarType>\ninline arrow::Status ConvertScalarVectorToArray(\n    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,\n    std::shared_ptr<arrow::Array> *out) {\n  std::shared_ptr<arrow::Array> arr;\n  BuilderType builder;\n  for (const auto &scalar : ordered_scalars) {\n    if (scalar == nullptr || scalar->is_valid == false) {\n      ARROW_RETURN_NOT_OK(builder.AppendNull());\n      continue;\n    }\n    auto status =\n        builder.Append(dynamic_cast<const ScalarType &>(*scalar).value);\n  }\n  ARROW_RETURN_NOT_OK(builder.Finish(&arr));\n  *out = arr;\n  return arrow::Status::OK();\n}\n\ntemplate <>\ninline arrow::Status\nConvertScalarVectorToArray<arrow::StringBuilder, arrow::StringScalar>(\n    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,\n    std::shared_ptr<arrow::Array> *out) {\n  std::shared_ptr<arrow::Array> arr;\n  arrow::StringBuilder builder;\n  for (const auto &scalar : ordered_scalars) {\n    if (scalar == nullptr || scalar->is_valid == false) {\n      ARROW_RETURN_NOT_OK(builder.AppendNull());\n      continue;\n    }\n    const auto &str = dynamic_cast<const arrow::StringScalar &>(*scalar).value;\n    ARROW_RETURN_NOT_OK(\n        builder.Append(str->data(), static_cast<int>(str->size())));\n  }\n  ARROW_RETURN_NOT_OK(builder.Finish(&arr));\n  *out = arr;\n  return arrow::Status::OK();\n}\n\ntemplate <>\ninline arrow::Status\nConvertScalarVectorToArray<arrow::BinaryBuilder, arrow::BinaryScalar>(\n    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,\n    std::shared_ptr<arrow::Array> *out) {\n  std::shared_ptr<arrow::Array> arr;\n  arrow::BinaryBuilder builder;\n  for (const auto &scalar : ordered_scalars) {\n    if (scalar == nullptr || scalar->is_valid == false) {\n      ARROW_RETURN_NOT_OK(builder.AppendNull());\n      continue;\n    }\n    const auto &binary_scalar =\n        dynamic_cast<const arrow::BinaryScalar &>(*scalar);\n    if (binary_scalar.value) {\n      ARROW_RETURN_NOT_OK(\n          builder.Append(binary_scalar.value->data(),\n                         static_cast<int>(binary_scalar.value->size())));\n    } else {\n      ARROW_RETURN_NOT_OK(builder.AppendEmptyValue());\n    }\n  }\n  ARROW_RETURN_NOT_OK(builder.Finish(&arr));\n  *out = arr;\n  return arrow::Status::OK();\n}\n\ninline arrow::Status ConvertScalarVectorToArrayByType(\n    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,\n    std::shared_ptr<arrow::Array> *out) {\n  auto type = ordered_scalars.empty() ? nullptr : ordered_scalars[0]->type;\n  if (type == nullptr)\n    return arrow::Status::Invalid(\"Cannot convert empty vector to array\");\n  arrow::Status status;\n  switch (type->id()) {\n    case arrow::Type::BINARY:\n      status =\n          ConvertScalarVectorToArray<arrow::BinaryBuilder, arrow::BinaryScalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::BOOL:\n      status = ConvertScalarVectorToArray<arrow::BooleanBuilder,\n                                          arrow::BooleanScalar>(ordered_scalars,\n                                                                out);\n      break;\n    case arrow::Type::INT32:\n      status =\n          ConvertScalarVectorToArray<arrow::Int32Builder, arrow::Int32Scalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::UINT32:\n      status =\n          ConvertScalarVectorToArray<arrow::UInt32Builder, arrow::UInt32Scalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::INT64:\n      status =\n          ConvertScalarVectorToArray<arrow::Int64Builder, arrow::Int64Scalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::UINT64:\n      status =\n          ConvertScalarVectorToArray<arrow::UInt64Builder, arrow::UInt64Scalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::FLOAT:\n      status =\n          ConvertScalarVectorToArray<arrow::FloatBuilder, arrow::FloatScalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::DOUBLE:\n      status =\n          ConvertScalarVectorToArray<arrow::DoubleBuilder, arrow::DoubleScalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::STRING:\n      status =\n          ConvertScalarVectorToArray<arrow::StringBuilder, arrow::StringScalar>(\n              ordered_scalars, out);\n      break;\n    case arrow::Type::LIST: {\n      if (ordered_scalars.empty()) {\n        return arrow::Status::Invalid(\n            \"Cannot convert empty vector to list array\");\n      }\n\n      auto list_type = std::dynamic_pointer_cast<arrow::ListType>(type);\n      if (!list_type) {\n        return arrow::Status::TypeError(\"Expected ListType for LIST scalar\");\n      }\n\n      std::unique_ptr<arrow::ArrayBuilder> value_builder;\n      ARROW_RETURN_NOT_OK(arrow::MakeBuilder(arrow::default_memory_pool(),\n                                             list_type->value_type(),\n                                             &value_builder));\n\n      arrow::ListBuilder builder(arrow::default_memory_pool(),\n                                 std::move(value_builder), list_type);\n\n      for (const auto &scalar : ordered_scalars) {\n        if (scalar == nullptr || scalar->is_valid == false) {\n          ARROW_RETURN_NOT_OK(builder.AppendNull());\n          continue;\n        }\n\n        auto list_scalar = std::dynamic_pointer_cast<arrow::ListScalar>(scalar);\n        if (!list_scalar) {\n          return arrow::Status::TypeError(\"Expected ListScalar for LIST type\");\n        }\n\n        ARROW_RETURN_NOT_OK(builder.Append());\n        auto value_builder_ptr = builder.value_builder();\n        ARROW_RETURN_NOT_OK(value_builder_ptr->AppendArraySlice(\n            *list_scalar->value->data(), 0, list_scalar->value->length()));\n      }\n\n      std::shared_ptr<arrow::Array> arr;\n      ARROW_RETURN_NOT_OK(builder.Finish(&arr));\n      *out = arr;\n      return arrow::Status::OK();\n    }\n    default:\n      // TODO other type\n      return arrow::Status::NotImplemented(\"Unsupported type\");\n  }\n\n  return status;\n}\n\ntemplate <typename ArrowBuilderType, typename ValueType>\ninline arrow::Status AppendValue(ArrowBuilderType *builder, const Doc &doc,\n                                 std::shared_ptr<arrow::Field> field) {\n  auto value = doc.get<ValueType>(field->name());\n  if (!value.has_value()) {\n    return builder->AppendNull();\n  }\n  return builder->Append(value.value());\n}\n\ninline arrow::Status AppendFieldValueToBuilder(\n    const Doc &doc, const std::shared_ptr<arrow::Field> &field,\n    arrow::ArrayBuilder *builder) {\n  auto type = field->type()->id();\n  switch (type) {\n    case arrow::Type::STRING: {\n      auto string_builder = dynamic_cast<arrow::StringBuilder *>(builder);\n      return AppendValue<arrow::StringBuilder, std::string>(string_builder, doc,\n                                                            field);\n    }\n    case arrow::Type::INT32: {\n      auto int32_builder = dynamic_cast<arrow::Int32Builder *>(builder);\n      return AppendValue<arrow::Int32Builder, int32_t>(int32_builder, doc,\n                                                       field);\n    }\n    case arrow::Type::INT64: {\n      auto int64_builder = dynamic_cast<arrow::Int64Builder *>(builder);\n      return AppendValue<arrow::Int64Builder, int64_t>(int64_builder, doc,\n                                                       field);\n    }\n    case arrow::Type::UINT32: {\n      auto uint32_builder = dynamic_cast<arrow::UInt32Builder *>(builder);\n      return AppendValue<arrow::UInt32Builder, uint32_t>(uint32_builder, doc,\n                                                         field);\n    }\n    case arrow::Type::UINT64: {\n      auto uint64_builder = dynamic_cast<arrow::UInt64Builder *>(builder);\n      return AppendValue<arrow::UInt64Builder, uint64_t>(uint64_builder, doc,\n                                                         field);\n    }\n    case arrow::Type::DOUBLE: {\n      auto double_builder = dynamic_cast<arrow::DoubleBuilder *>(builder);\n      return AppendValue<arrow::DoubleBuilder, double>(double_builder, doc,\n                                                       field);\n    }\n    case arrow::Type::FLOAT: {\n      auto float_builder = dynamic_cast<arrow::FloatBuilder *>(builder);\n      return AppendValue<arrow::FloatBuilder, float>(float_builder, doc, field);\n    }\n    case arrow::Type::BOOL: {\n      auto bool_builder = dynamic_cast<arrow::BooleanBuilder *>(builder);\n      return AppendValue<arrow::BooleanBuilder, bool>(bool_builder, doc, field);\n    }\n    case arrow::Type::BINARY: {\n      auto binary_builder = dynamic_cast<arrow::BinaryBuilder *>(builder);\n      return AppendValue<arrow::BinaryBuilder, std::string>(binary_builder, doc,\n                                                            field);\n    }\n    case arrow::Type::LIST: {\n      auto list_builder = dynamic_cast<arrow::ListBuilder *>(builder);\n      auto list_type =\n          std::dynamic_pointer_cast<arrow::ListType>(field->type());\n\n      if (!list_type) {\n        return arrow::Status::TypeError(\"Field type is not ListType\");\n      }\n\n      auto value_type = list_type->value_type()->id();\n\n      switch (value_type) {\n        case arrow::Type::BINARY: {\n          auto value = doc.get<std::vector<std::string>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto binary_builder = dynamic_cast<arrow::BinaryBuilder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(binary_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::BOOL: {\n          auto value = doc.get<std::vector<bool>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto bool_builder = dynamic_cast<arrow::BooleanBuilder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(bool_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::INT32: {\n          auto value = doc.get<std::vector<int32_t>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto int32_builder = dynamic_cast<arrow::Int32Builder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(int32_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::INT64: {\n          auto value = doc.get<std::vector<int64_t>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto int64_builder = dynamic_cast<arrow::Int64Builder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(int64_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::UINT32: {\n          auto value = doc.get<std::vector<uint32_t>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto uint32_builder = dynamic_cast<arrow::UInt32Builder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(uint32_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::UINT64: {\n          auto value = doc.get<std::vector<uint64_t>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto uint64_builder = dynamic_cast<arrow::UInt64Builder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(uint64_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::FLOAT: {\n          auto value = doc.get<std::vector<float>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto float_builder = dynamic_cast<arrow::FloatBuilder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(float_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::DOUBLE: {\n          auto value = doc.get<std::vector<double>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto double_builder = dynamic_cast<arrow::DoubleBuilder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(double_builder->Append(item));\n          }\n          break;\n        }\n\n        case arrow::Type::STRING: {\n          auto value = doc.get<std::vector<std::string>>(field->name());\n          if (!value.has_value()) {\n            return list_builder->AppendNull();\n          }\n\n          const auto &list_value = value.value();\n          auto string_builder = dynamic_cast<arrow::StringBuilder *>(\n              list_builder->value_builder());\n\n          ARROW_RETURN_NOT_OK(list_builder->Append());\n          for (const auto &item : list_value) {\n            ARROW_RETURN_NOT_OK(string_builder->Append(item));\n          }\n          break;\n        }\n\n        default:\n          return arrow::Status::NotImplemented(\n              \"unsupported list element type: \", value_type);\n      }\n\n      return arrow::Status::OK();\n    }\n    default:\n      return arrow::Status::NotImplemented(\"unsupported type: \", type,\n                                           \", field: \", field->name());\n  }\n}\n\ntemplate <typename ArrowArrayType, typename BuilderType>\ninline arrow::Status BuildArrayFromIndices(\n    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,\n    const std::vector<std::pair<int64_t, int64_t>> &indices_in_chunked_array,\n    std::shared_ptr<arrow::Array> *out_array) {\n  BuilderType builder;\n  ARROW_RETURN_NOT_OK(\n      builder.Reserve(static_cast<int64_t>(indices_in_chunked_array.size())));\n\n  int64_t last_chunk_index = -1;\n  const ArrowArrayType *cached_chunk{nullptr};\n\n  bool no_null = chunked_array->null_count() == 0;\n  for (const auto &pair : indices_in_chunked_array) {\n    if (pair.first != last_chunk_index) {\n      const auto &chunk = chunked_array->chunk(pair.first);\n      cached_chunk = static_cast<const ArrowArrayType *>(chunk.get());\n      last_chunk_index = pair.first;\n    }\n\n    if (no_null || !cached_chunk->IsNull(pair.second)) {\n      ARROW_RETURN_NOT_OK(builder.Append(cached_chunk->Value(pair.second)));\n    } else {\n      ARROW_RETURN_NOT_OK(builder.AppendNull());\n    }\n  }\n\n  return builder.Finish(out_array);\n}\n\ntemplate <>\ninline arrow::Status\nBuildArrayFromIndices<arrow::StringArray, arrow::StringBuilder>(\n    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,\n    const std::vector<std::pair<int64_t, int64_t>> &indices_in_chunked_array,\n    std::shared_ptr<arrow::Array> *out_array) {\n  arrow::StringBuilder builder;\n  ARROW_RETURN_NOT_OK(\n      builder.Reserve(static_cast<int64_t>(indices_in_chunked_array.size())));\n\n  bool no_null = chunked_array->null_count() == 0;\n  const arrow::StringArray *cached_chunk{nullptr};\n\n  int64_t last_chunk_index = -1;\n  int64_t data_size = 0;\n  for (const auto &pair : indices_in_chunked_array) {\n    if (pair.first != last_chunk_index) {\n      const auto &chunk = chunked_array->chunk(pair.first);\n      cached_chunk = static_cast<const arrow::StringArray *>(chunk.get());\n      last_chunk_index = pair.first;\n    }\n\n    if (no_null || !cached_chunk->IsNull(pair.second)) {\n      data_size += cached_chunk->Value(pair.second).size();\n    }\n  }\n  ARROW_RETURN_NOT_OK(builder.ReserveData(data_size));\n\n\n  last_chunk_index = -1;\n  for (const auto &pair : indices_in_chunked_array) {\n    if (pair.first != last_chunk_index) {\n      const auto &chunk = chunked_array->chunk(pair.first);\n      cached_chunk = static_cast<const arrow::StringArray *>(chunk.get());\n      last_chunk_index = pair.first;\n    }\n\n    if (no_null || !cached_chunk->IsNull(pair.second)) {\n      ARROW_RETURN_NOT_OK(builder.Append(cached_chunk->Value(pair.second)));\n    } else {\n      ARROW_RETURN_NOT_OK(builder.AppendNull());\n    }\n  }\n\n  return builder.Finish(out_array);\n}\n\ninline arrow::Status BuildListArrayFromIndices(\n    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,\n    const std::vector<std::pair<int64_t, int64_t>> &indices_in_chunked_array,\n    const std::shared_ptr<arrow::ListType> &list_type,\n    std::shared_ptr<arrow::Array> *out_array) {\n  std::unique_ptr<arrow::ArrayBuilder> value_builder;\n  ARROW_RETURN_NOT_OK(arrow::MakeBuilder(\n      arrow::default_memory_pool(), list_type->value_type(), &value_builder));\n\n  arrow::ListBuilder builder(arrow::default_memory_pool(),\n                             std::move(value_builder), list_type);\n  ARROW_RETURN_NOT_OK(\n      builder.Reserve(static_cast<int64_t>(indices_in_chunked_array.size())));\n\n  int64_t last_chunk_index = -1;\n  const arrow::ListArray *cached_chunk{nullptr};\n\n  for (const auto &pair : indices_in_chunked_array) {\n    if (pair.first != last_chunk_index) {\n      const auto &chunk = chunked_array->chunk(pair.first);\n      cached_chunk = std::static_pointer_cast<arrow::ListArray>(chunk).get();\n      last_chunk_index = pair.first;\n    }\n\n    if (cached_chunk->IsValid(pair.second)) {\n      auto offset = cached_chunk->value_offset(pair.second);\n      auto length = cached_chunk->value_length(pair.second);\n\n      ARROW_RETURN_NOT_OK(builder.Append());\n      auto value_builder_ptr = builder.value_builder();\n      auto values = cached_chunk->values();\n      ARROW_RETURN_NOT_OK(\n          value_builder_ptr->AppendArraySlice(*values->data(), offset, length));\n    } else {\n      ARROW_RETURN_NOT_OK(builder.AppendNull());\n    }\n  }\n\n  return builder.Finish(out_array);\n}\n\ninline arrow::Status BuildArrayFromIndicesWithType(\n    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,\n    const std::vector<std::pair<int64_t, int64_t>> &indices_in_table,\n    std::shared_ptr<arrow::Array> *out_array) {\n  auto col_data_type = chunked_array->type();\n  switch (col_data_type->id()) {\n    case arrow::Type::STRING:\n      return BuildArrayFromIndices<arrow::StringArray, arrow::StringBuilder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::INT32:\n      return BuildArrayFromIndices<arrow::Int32Array, arrow::Int32Builder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::INT64:\n      return BuildArrayFromIndices<arrow::Int64Array, arrow::Int64Builder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::UINT32:\n      return BuildArrayFromIndices<arrow::UInt32Array, arrow::UInt32Builder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::UINT64:\n      return BuildArrayFromIndices<arrow::UInt64Array, arrow::UInt64Builder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::DOUBLE:\n      return BuildArrayFromIndices<arrow::DoubleArray, arrow::DoubleBuilder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::FLOAT:\n      return BuildArrayFromIndices<arrow::FloatArray, arrow::FloatBuilder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::BOOL:\n      return BuildArrayFromIndices<arrow::BooleanArray, arrow::BooleanBuilder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::BINARY:\n      return BuildArrayFromIndices<arrow::BinaryArray, arrow::BinaryBuilder>(\n          chunked_array, indices_in_table, out_array);\n    case arrow::Type::LIST: {\n      auto list_type =\n          std::dynamic_pointer_cast<arrow::ListType>(col_data_type);\n      return BuildListArrayFromIndices(chunked_array, indices_in_table,\n                                       list_type, out_array);\n    }\n    default:\n      return arrow::Status::NotImplemented(\"Unsupported element type: \",\n                                           col_data_type->name().c_str());\n  }\n}\n\ninline arrow::Status CreateRandomAccessFileByUri(\n    const std::string &uri,\n    std::shared_ptr<arrow::io::RandomAccessFile> *out_file,\n    std::string *out_file_path) {\n  std::string path_from_uri, file_path;\n  std::shared_ptr<arrow::fs::FileSystem> fs;\n  auto maybe_fs = arrow::fs::FileSystemFromUri(uri, &path_from_uri);\n\n  if (maybe_fs.ok()) {\n    fs = maybe_fs.ValueOrDie();\n    *out_file_path = path_from_uri;\n  } else {\n    arrow::fs::LocalFileSystemOptions options;\n    options.use_mmap = true;\n    fs = std::make_shared<arrow::fs::LocalFileSystem>(options);\n    if (uri.length() >= 2 && uri.substr(0, 2) == \"./\") {\n      *out_file_path = uri.substr(2);\n    } else {\n      *out_file_path = uri;\n    }\n  }\n\n  auto result = fs->OpenInputFile(*out_file_path);\n  if (!result.ok()) {\n    return result.status();\n  }\n  *out_file = result.ValueOrDie();\n  return arrow::Status::OK();\n}\n\ninline std::vector<std::shared_ptr<arrow::Field>> SelectFields(\n    const std::shared_ptr<arrow::Schema> &schema,\n    const std::vector<std::string> &column_names) {\n  std::vector<std::shared_ptr<arrow::Field>> fields;\n  for (const auto &name : column_names) {\n    if (name == LOCAL_ROW_ID) {\n      fields.push_back(arrow::field(LOCAL_ROW_ID, arrow::uint64()));\n    } else {\n      fields.push_back(schema->field(schema->GetFieldIndex(name)));\n    }\n  }\n  return fields;\n}\n\ninline arrow::Result<std::shared_ptr<arrow::Array>> SelectArrayByIndices(\n    const std::shared_ptr<arrow::Array> &arr,\n    const std::vector<int32_t> &indices) {\n  arrow::Int32Builder builder;\n  ARROW_RETURN_NOT_OK(builder.AppendValues(indices));\n  std::shared_ptr<arrow::Array> indices_array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&indices_array));\n\n  return arrow::compute::Take(*arr, *indices_array);\n}\n\ninline arrow::Result<std::shared_ptr<arrow::dataset::Dataset>>\nReadBlocksAsDataset(const std::vector<BlockMeta> &scalar_blocks,\n                    const std::string &base_path, uint32_t collection_id,\n                    bool use_parquet) {\n  auto fs = std::make_shared<arrow::fs::LocalFileSystem>();\n  auto pool = arrow::default_memory_pool();\n\n  if (scalar_blocks.empty()) {\n    return arrow::Status::Invalid(\"No block metadata provided\");\n  }\n\n  using ColData = std::pair<std::shared_ptr<arrow::Field>,\n                            std::shared_ptr<arrow::ChunkedArray>>;\n  std::map<uint64_t, std::map<std::string, ColData>> segments;\n  std::map<uint64_t, uint32_t> segment_doc_count;\n  std::set<uint64_t> ordered_min_ids;\n\n  for (const auto &block : scalar_blocks) {\n    if (block.doc_count_ == 0 || block.columns_.empty()) continue;\n\n    uint64_t start_row = block.min_doc_id_;\n    uint32_t expected_count = block.doc_count_;\n\n    std::string filepath = FileHelper::MakeForwardBlockPath(\n        base_path, collection_id, block.id_, use_parquet);\n\n    try {\n      auto file_info = fs->GetFileInfo(filepath).ValueOrDie();\n      auto file = fs->OpenInputFile(file_info.path()).ValueOrDie();\n\n      std::shared_ptr<arrow::Table> table;\n\n      if (use_parquet) {\n        std::unique_ptr<parquet::arrow::FileReader> reader;\n        reader = parquet::arrow::OpenFile(file, pool).ValueOrDie();\n        ARROW_RETURN_NOT_OK(reader->ReadTable(&table));\n      } else {\n        auto reader =\n            arrow::ipc::RecordBatchFileReader::Open(file).ValueOrDie();\n\n        std::vector<std::shared_ptr<arrow::RecordBatch>> batches;\n        for (int i = 0; i < reader->num_record_batches(); ++i) {\n          auto batch = reader->ReadRecordBatch(i).ValueOrDie();\n          batches.push_back(batch);\n        }\n\n        table = arrow::Table::FromRecordBatches(batches).ValueOrDie();\n      }\n\n      if (segments.find(start_row) == segments.end()) {\n        segments[start_row] = {};\n        segment_doc_count[start_row] = expected_count;\n        ordered_min_ids.insert(start_row);\n      }\n\n      for (int i = 0; i < table->num_columns(); ++i) {\n        const auto &field = table->schema()->field(i);\n        auto original_chunked_array = table->column(i);\n\n        segments[start_row][field->name()] = {field, original_chunked_array};\n      }\n    } catch (const std::exception &e) {\n      return arrow::Status::IOError(\"Failed to read block \",\n                                    std::to_string(block.id_), \": \", e.what());\n    }\n  }\n\n  if (segments.empty()) {\n    return arrow::Status::Invalid(\"No valid data blocks found\");\n  }\n\n  std::vector<uint64_t> sorted_starts(ordered_min_ids.begin(),\n                                      ordered_min_ids.end());\n  std::sort(sorted_starts.begin(), sorted_starts.end());\n\n  std::vector<std::shared_ptr<arrow::Table>> segment_tables;\n  for (uint64_t start_row : sorted_starts) {\n    auto &col_map = segments[start_row];\n    uint32_t count = segment_doc_count[start_row];\n\n    std::vector<std::shared_ptr<arrow::Field>> fields;\n    std::vector<std::shared_ptr<arrow::ChunkedArray>> columns;\n\n    for (const auto &kv : col_map) {\n      fields.push_back(kv.second.first);\n      columns.push_back(kv.second.second);\n    }\n\n    auto schema = std::make_shared<arrow::Schema>(fields);\n    std::shared_ptr<arrow::Table> table =\n        arrow::Table::Make(schema, columns, count);\n    if (!table) {\n      return arrow::Status::Invalid(\n          \"Failed to create table from schema and columns\");\n    }\n    segment_tables.push_back(table);\n  }\n\n  ARROW_ASSIGN_OR_RAISE(auto final_table,\n                        arrow::ConcatenateTables(segment_tables));\n  auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(final_table);\n  return dataset;\n}\n\ninline arrow::Result<std::shared_ptr<arrow::Table>>\nEvaluateExpressionWithDataset(\n    const std::shared_ptr<arrow::dataset::Dataset> &dataset,\n    const std::string &new_column_name, const arrow::compute::Expression &expr,\n    const std::shared_ptr<arrow::DataType> &expected_type) {\n  auto new_scan_result = dataset->NewScan();\n  if (!new_scan_result.ok()) {\n    return arrow::Status::Invalid(\"Failed to create scanner builder\");\n  }\n  auto scanner_builder = std::move(new_scan_result.ValueOrDie());\n\n  arrow::compute::CastOptions cast_options;\n  cast_options.to_type = expected_type;\n  cast_options.allow_int_overflow = true;\n  cast_options.allow_float_truncate = true;\n  arrow::Expression cast_expr = call(\"cast\", {expr}, cast_options);\n\n  auto status = scanner_builder->Project({cast_expr}, {new_column_name});\n  if (!status.ok()) {\n    return arrow::Status::Invalid(\"Failed to project expression: \",\n                                  status.ToString());\n  }\n  auto scanner_result = scanner_builder->Finish();\n  if (!scanner_result.ok()) {\n    return arrow::Status::Invalid(\"Failed to finish scanner builder: \",\n                                  scanner_result.status().ToString());\n  }\n  auto scanner = std::move(scanner_result.ValueOrDie());\n\n  auto to_table_result = scanner->ToTable();\n  if (!to_table_result.ok()) {\n    return arrow::Status::Invalid(\"Failed to convert scanner to table: \",\n                                  to_table_result.status().ToString());\n  }\n  auto result_table = std::move(to_table_result.ValueOrDie());\n  return result_table;\n}\n\ninline arrow::Status WriteColumnInBlocks(\n    const std::string &column_name,\n    const std::shared_ptr<arrow::ChunkedArray> &data,\n    const std::vector<BlockMeta> &blocks, const std::string &base_path,\n    uint32_t segment_id, std::function<BlockID()> allocate_block_id,\n    bool use_parquet, std::vector<BlockMeta> *out) {\n  int offset = 0;\n  for (const auto &block : blocks) {\n    auto slice = data->Slice(offset, block.doc_count_);\n    auto field = arrow::field(column_name, slice->type());\n    auto physic_schema = arrow::schema({field});\n    auto table = arrow::Table::Make(arrow::schema({field}), {slice});\n\n    BlockID block_id = allocate_block_id();\n    std::string path = FileHelper::MakeForwardBlockPath(base_path, segment_id,\n                                                        block_id, use_parquet);\n    auto writer = ChunkedFileWriter::Open(\n        path, physic_schema,\n        use_parquet ? FileFormat::PARQUET : FileFormat::IPC);\n    ARROW_RETURN_NOT_OK(writer->Write(*table));\n    ARROW_RETURN_NOT_OK(writer->Close());\n\n    BlockMeta new_block(block_id, BlockType::SCALAR, block.min_doc_id_,\n                        block.max_doc_id_, block.doc_count_, {column_name});\n\n    out->push_back(new_block);\n\n    offset += block.doc_count_;\n  }\n  return arrow::Status::OK();\n}\n\ninline int64_t MemorySize(const arrow::RecordBatch &batch) {\n  int64_t total = 0;\n  for (int i = 0; i < batch.num_columns(); ++i) {\n    const auto &array = batch.column(i);\n    const auto &data = array->data();\n    for (const auto &buffer : data->buffers) {\n      if (buffer) {\n        total += buffer->size();\n      }\n    }\n  }\n  return total;\n}\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/db/index/storage/wal/local_wal_file.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"local_wal_file.h\"\n#include <unistd.h>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"db/common/error_code.h\"\n#include \"db/common/file_helper.h\"\n#include \"db/common/typedef.h\"\n\n#define MAX_RECORD_SIZE 4194304  // 4Mb\n\nnamespace zvec {\n\nint LocalWalFile::append(std::string &&data) {\n  WalRecord record;\n  record.length_ = data.size();\n  record.crc_ = ailego::Crc32c::Hash(\n      reinterpret_cast<const void *>(data.data()), record.length_, 0);\n  record.content_ = std::forward<std::string>(data);\n\n  if (write_record(record) < 0) {\n    WLOG_ERROR(\"Wal write record error. record.length_[%zu]\",\n               (size_t)record.length_);\n    return -1;\n  }\n  // if max_docs_wal_flush_ is 0, no need flush\n  if (max_docs_wal_flush_ != 0 && docs_count_ >= max_docs_wal_flush_) {\n    if (!file_.flush()) {\n      WLOG_ERROR(\"Wal flush error. docs_count_[%zu] max_docs_wal_flush_[%zu]\",\n                 (size_t)docs_count_, (size_t)max_docs_wal_flush_);\n    }\n    docs_count_ = 0;\n  }\n  return 0;\n}\n\nstd::string LocalWalFile::next() {\n  WalRecord record;\n  if (read_record(record) > 0) {\n    uint32_t tmp_crc = ailego::Crc32c::Hash(\n        reinterpret_cast<const void *>(record.content_.data()), record.length_,\n        0);\n    if (tmp_crc == record.crc_) {\n      return std::move(record.content_);\n    } else {\n      WLOG_ERROR(\n          \"Wal next error. record.length_[%zu] crc_[%zu] != tmp_crc[%zu]\",\n          (size_t)record.length_, (size_t)record.crc_, (size_t)tmp_crc);\n    }\n  }\n  // end of file or read error\n  return std::string();\n}\n\nint LocalWalFile::open(const WalOptions &wal_option) {\n  CHECK_STATUS(opened_, false);\n  if (wal_option.create_new) {\n    if (FileHelper::FileExists(wal_path_)) {\n      WLOG_ERROR(\"Wal open error. file already exist create_new[%d]\",\n                 wal_option.create_new);\n      return -1;\n    }\n\n    if (!file_.create(wal_path_, false)) {\n      WLOG_ERROR(\"Wal create error. create_new[%d]\", wal_option.create_new);\n      return -1;\n    }\n\n    // write wal header\n    int write_size = file_.write((const void *)&header_, sizeof(header_));\n    if (write_size != sizeof(header_)) {\n      WLOG_ERROR(\"Wal write header error. create_new[%d]\",\n                 wal_option.create_new);\n      return -1;\n    }\n\n  } else {\n    if (!FileHelper::FileExists(wal_path_)) {\n      WLOG_ERROR(\"Wal open error. file is not exist create_new[%d]\",\n                 wal_option.create_new);\n      return -1;\n    }\n\n    if (!file_.open(wal_path_.c_str(), false)) {\n      WLOG_ERROR(\"Wal open error. create_new[%d]\", wal_option.create_new);\n      return -1;\n    }\n\n    // open default for write\n    file_.seek(0, ailego::File::Origin::End);\n  }\n\n  max_docs_wal_flush_ = wal_option.max_docs_wal_flush;\n  opened_ = true;\n\n  WLOG_INFO(\"Wal open success. create_new[%d]\", wal_option.create_new);\n  return 0;\n}\n\nint LocalWalFile::close() {\n  CHECK_STATUS(opened_, true);\n  file_.close();\n  WLOG_INFO(\"Wal close success\");\n  opened_ = false;\n  return 0;\n}\n\nint LocalWalFile::remove() {\n  if (opened_) {\n    close();\n  }\n  if (FileHelper::FileExists(wal_path_)) {\n    FileHelper::RemoveFile(wal_path_);\n    WLOG_INFO(\"Wal remove success.\");\n  }\n  return 0;\n}\n\nint LocalWalFile::flush() {\n  CHECK_STATUS(opened_, true);\n  if (!file_.flush()) {\n    WLOG_ERROR(\"Wal flush error.\");\n    return -1;\n  }\n  return 0;\n}\n\nint LocalWalFile::prepare_for_read() {\n  CHECK_STATUS(opened_, true);\n  if (!file_.seek(0, ailego::File::Origin::Begin)) {\n    return -1;\n  }\n  int read_size = file_.read((void *)&header_, sizeof(header_));\n  if (read_size != sizeof(header_)) {\n    WLOG_ERROR(\"Wal read header error.\");\n    return -1;\n  }\n  if (header_.wal_version != 0UL) {\n    WLOG_ERROR(\"Wal version not support error.\");\n    return -1;\n  }\n  return 0;\n}\n\n//! Return 1 if success or -1 if write error\nint LocalWalFile::write_record(WalRecord &record) {\n  CHECK_STATUS(opened_, true);\n\n  int write_size = 0;\n  int ret = -1;\n\n  std::lock_guard<std::mutex> lock(file_mutex_);\n  do {\n    write_size = file_.write((const void *)&record.length_, LENGTH_SIZE);\n    if (write_size != LENGTH_SIZE) {\n      WLOG_ERROR(\"Wal write error. record.length_ error write_size[%d]\",\n                 write_size);\n      break;\n    }\n\n    write_size = file_.write((const void *)&record.crc_, CRC_SIZE);\n    if (write_size != CRC_SIZE) {\n      WLOG_ERROR(\"Wal write error. record.crc_ error write_size[%d]\",\n                 write_size);\n      break;\n    }\n\n    write_size =\n        file_.write((const void *)record.content_.data(), record.length_);\n    if (write_size != (int)record.length_) {\n      WLOG_ERROR(\"Wal write error. record.content_ error write_size[%d]\",\n                 write_size);\n      break;\n    }\n    ret = 1;  // write one record success\n    docs_count_++;\n  } while (false);\n\n  return ret;\n}\n\n//! Return 1 if success or 0 if eof or -1 if read error\nint LocalWalFile::read_record(WalRecord &record) {\n  CHECK_STATUS(opened_, true);\n\n  int read_size = 0;\n  std::string err_msg;\n  int ret = -1;\n\n  do {\n    read_size =\n        file_.read(reinterpret_cast<void *>(&record.length_), LENGTH_SIZE);\n    if (read_size == 0) {\n      ret = 0;\n      WLOG_INFO(\"Wal read finished. end of file\");\n      break;\n    }\n\n    if (read_size != LENGTH_SIZE) {\n      WLOG_ERROR(\"Wal read error. record.length_ error read_size[%d]\",\n                 read_size);\n      break;\n    }\n\n    read_size = file_.read(reinterpret_cast<void *>(&record.crc_), CRC_SIZE);\n    if (read_size != CRC_SIZE) {\n      WLOG_ERROR(\"Wal read error. record.crc_ error read_size[%d]\", read_size);\n      break;\n    }\n\n    // resize may crash if record.length_ very large\n    if (record.length_ <= 0 || record.length_ > MAX_RECORD_SIZE) {\n      WLOG_ERROR(\"Wal read error. record.length_ value error read_size[%d]\",\n                 read_size);\n      break;\n    }\n\n    record.content_.resize(record.length_);\n    read_size = file_.read((void *)const_cast<char *>(record.content_.data()),\n                           record.length_);\n    if (read_size != (int)record.length_) {\n      WLOG_ERROR(\"Wal read error. record.content_ error read_size[%d]\",\n                 read_size);\n      break;\n    }\n    ret = 1;  // read one record success\n  } while (false);\n\n  return ret;\n}\n\n};  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/wal/local_wal_file.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <atomic>\n#include <condition_variable>\n#include <deque>\n#include <fstream>\n#include <mutex>\n#include <thread>\n#include <unordered_map>\n#include <zvec/ailego/io/file.h>\n#include \"wal_file.h\"\n\nnamespace zvec {\n\n/*\n * Wal Header info\n */\nstruct WalHeader {\n  uint64_t wal_version{0U};\n  uint64_t reserved_[7];\n};\n\nstatic_assert(sizeof(WalHeader) % 64 == 0,\n              \"Wal Header must be aligned with 64 bytes\");\n\nclass WalRecord {\n public:\n  uint32_t length_;\n  uint32_t crc_;\n  std::string content_;\n};\n\nclass LocalWalFile : public WalFile {\n public:\n  LocalWalFile(const LocalWalFile &) = delete;\n  LocalWalFile &operator=(const LocalWalFile &) = delete;\n\n  //! Constructor\n  LocalWalFile(const std::string &wal_path) : wal_path_(wal_path) {}\n\n  //! Destructor\n  ~LocalWalFile() {\n    if (opened_) {\n      close();\n    }\n  }\n\n public:\n  int append(std::string &&data) override;\n  int prepare_for_read() override;\n  std::string next() override;\n\n public:\n  int open(const WalOptions &wal_option) override;\n\n  int close() override;\n\n  int flush() override;\n\n  int remove() override;\n\n  bool has_record() override {\n    return file_.size() > sizeof(header_);\n  }\n\n private:\n  int write_record(WalRecord &record);\n  int read_record(WalRecord &record);\n\n private:\n  ailego::File file_;\n  const static int32_t LENGTH_SIZE{4};\n  const static int32_t CRC_SIZE{4};\n\n private:\n  std::string wal_path_{};\n  std::mutex file_mutex_;\n  uint32_t max_docs_wal_flush_{0};\n  std::atomic<uint64_t> docs_count_{0UL};\n  WalHeader header_;\n\n  bool opened_{false};\n};\n\n\n};  // namespace zvec\n"
  },
  {
    "path": "src/db/index/storage/wal/wal_file.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"wal_file.h\"\n#include \"local_wal_file.h\"\n\nnamespace zvec {\n\nWalFilePtr WalFile::Create(const std::string &wal_path) {\n  return std::make_shared<LocalWalFile>(wal_path);\n}\n\nint WalFile::CreateAndOpen(const std::string &wal_path,\n                           const WalOptions &wal_options,\n                           WalFilePtr *wal_file) {\n  *wal_file = std::make_shared<LocalWalFile>(wal_path);\n\n  return (*wal_file)->open(wal_options);\n}\n\n\n};  // namespace zvec"
  },
  {
    "path": "src/db/index/storage/wal/wal_file.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <string>\n#include <vector>\n\n\nnamespace zvec {\n\nclass WalFile;\n\nusing WalFilePtr = std::shared_ptr<WalFile>;\n\nstruct WalOptions {\n  uint32_t max_docs_wal_flush{0};\n  bool create_new{false};\n};\n\nclass WalFile {\n public:\n  //! Constructor\n  WalFile() {}\n\n  //! Destructor\n  virtual ~WalFile() {}  // LCOV_EXCL_LINE\n\n  //! Create an instance\n  static WalFilePtr Create(const std::string &wal_path);\n\n  //! Crate an instance and open\n  static int CreateAndOpen(const std::string &wal_path,\n                           const WalOptions &wal_options, WalFilePtr *wal_file);\n\n public:\n  virtual int append(std::string &&data) = 0;\n  virtual int prepare_for_read() = 0;\n  virtual std::string next() = 0;\n\n public:\n  //! Open and initialize WalFile\n  virtual int open(const WalOptions &wal_options) = 0;\n\n  //! Close WalFile\n  virtual int close() = 0;\n\n  //! Remove wal disk file\n  virtual int remove() = 0;\n\n  //! Flush WalFile's memory to disk file\n  virtual int flush() = 0;\n\n  virtual bool has_record() = 0;\n};\n\n};  // namespace zvec"
  },
  {
    "path": "src/db/proto/zvec.proto",
    "content": "syntax = \"proto3\";\n\npackage zvec.proto;\n\noption cc_enable_arenas = true;\n\n// The Go package name, refers to\n// https://developers.google.com/protocol-buffers/docs/reference/go-generated#package\noption go_package = \"proxima/zvec/proto\";\n\n/*! Types of Data\n */\nenum DataType {\n  DT_UNDEFINED = 0;\n\n  DT_BINARY = 1;\n  DT_STRING = 2;\n  DT_BOOL = 3;\n  DT_INT32 = 4;\n  DT_INT64 = 5;\n  DT_UINT32 = 6;\n  DT_UINT64 = 7;\n  DT_FLOAT = 8;\n  DT_DOUBLE = 9;\n\n  DT_VECTOR_BINARY32 = 20;\n  DT_VECTOR_BINARY64 = 21;\n  DT_VECTOR_FP16 = 22;\n  DT_VECTOR_FP32 = 23;\n  DT_VECTOR_FP64 = 24;\n  DT_VECTOR_INT4 = 25;\n  DT_VECTOR_INT8 = 26;\n  DT_VECTOR_INT16 = 27;\n\n  DT_SPARSE_VECTOR_FP16 = 30;\n  DT_SPARSE_VECTOR_FP32 = 31;\n\n  // ARRAY\n  DT_ARRAY_BINARY = 40;\n  DT_ARRAY_STRING = 41;\n  DT_ARRAY_BOOL = 42;\n  DT_ARRAY_INT32 = 43;\n  DT_ARRAY_INT64 = 44;\n  DT_ARRAY_UINT32 = 45;\n  DT_ARRAY_UINT64 = 46;\n  DT_ARRAY_FLOAT = 47;\n  DT_ARRAY_DOUBLE = 48;\n};\n\nenum IndexType {\n  // Undefined\n  IT_UNDEFINED = 0;\n  // Proxima HNSW Index\n  IT_HNSW = 1;\n  // Proxima IVF Index\n  IT_IVF = 2;\n  // Proxima FLAT Index\n  IT_FLAT = 3;\n  // Proxima HNSW RABITQ Index\n  IT_HNSW_RABITQ = 4;\n  // Invert Index\n  IT_INVERT = 10;\n};\n\nenum QuantizeType {\n  QT_UNDEFINED = 0;\n  QT_FP16 = 1;\n  QT_INT8 = 2;\n  QT_INT4 = 3;\n  QT_RABITQ = 4;\n};\n\nenum MetricType {\n  MT_UNDEFINED = 0;\n  MT_L2 = 1;\n  MT_IP = 2;\n  MT_COSINE = 3;\n};\n\nmessage InvertIndexParams {\n  bool enable_range_optimization = 1;\n};\n\nmessage BaseIndexParams {\n  MetricType metric_type = 1;\n  QuantizeType quantize_type = 2;\n};\n\nmessage HnswIndexParams {\n  BaseIndexParams base = 1;\n  int32 m = 2;\n  int32 ef_construction = 3;\n}\n\nmessage HnswRabitqIndexParams {\n  BaseIndexParams base = 1;\n  int32 m = 2;\n  int32 ef_construction = 3;\n  int32 total_bits = 4;\n  int32 num_clusters = 5;\n  int32 sample_count = 6;\n}\n\nmessage FlatIndexParams {\n  BaseIndexParams base = 1;\n}\n\nmessage IVFIndexParams {\n  BaseIndexParams base = 1;\n  int32 n_list = 2;\n  int32 n_iters = 3;\n  bool use_soar = 4;\n}\n\nmessage IndexParams {\n  oneof params {\n    InvertIndexParams invert = 1;\n    HnswIndexParams hnsw = 2;\n    FlatIndexParams flat = 3;\n    IVFIndexParams ivf = 4;\n    HnswRabitqIndexParams hnsw_rabitq = 5;\n  };\n};\n\nmessage FieldSchema {\n  string name = 1;\n  DataType data_type = 2;\n  uint32 dimension = 3;\n  bool nullable = 4;\n  IndexParams index_params = 5;\n};\n\nmessage CollectionSchema {\n  string name = 1;\n  repeated FieldSchema fields = 2;\n  uint64 max_doc_count_per_segment = 3;\n};\n\nenum BlockType {\n  BT_UNDEFINED = 0;\n  BT_SCALAR = 1;\n  BT_SCALAR_INDEX = 2;\n  BT_VECTOR_INDEX = 3;\n  BT_VECTOR_INDEX_QUANTIZE = 4;\n};\n\nmessage BlockMeta {\n  uint32 block_id = 1;\n  BlockType block_type = 2;  // for getting filename prefix\n  uint64 min_doc_id = 3;\n  uint64 max_doc_id = 4;\n  uint64 doc_count = 5;\n  repeated string columns = 6;  // columns contained in this block\n};\n\n// message AlterColumnMeta {\n//   string old_column_name = 1;\n//   FieldSchema new_schema = 2;\n// };\n\nmessage SegmentMeta {\n  uint32 segment_id = 1;\n  // scalar data, vector data and vector index\n  repeated BlockMeta persisted_blocks = 2;\n\n  BlockMeta writing_forward_block = 3;\n\n  // if indexed, index_params can be retrieved from schema\n  // if not indexed, index_params is default index_params(flat)\n  repeated string indexed_vector_fields = 4;\n  // repeated AlterColumnMeta alter_columns = 10;\n};\n\nmessage Manifest {\n  uint32 version = 1;\n\n  CollectionSchema schema = 2;\n\n  bool enable_mmap = 3;\n\n  repeated SegmentMeta persisted_segment_metas = 4;\n\n  SegmentMeta writing_segment_meta = 5;\n\n  uint32 id_map_path_suffix = 6;\n  uint32 delete_snapshot_path_suffix = 7;\n\n  uint32 next_segment_id = 8;\n};"
  },
  {
    "path": "src/db/sqlengine/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_library(\n    NAME zvec_sqlengine STATIC STRICT\n    SRCS *.cc common/*.cc antlr/gen/*.cc parser/*.cc analyzer/*.cc planner/*.cc planner/ops/*.cc planner/physical_rules/*.cc\n    LIBS zvec_index\n         zvec_common\n         antlr4\n         Arrow::arrow_acero\n    INCS . ${PROJECT_ROOT_DIR}/src\n    VERSION \"${PROXIMA_ZVEC_VERSION}\"\n  )\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_analyzer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_analyzer.h\"\n#include <cstddef>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/db/config.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n#include \"db/common/constants.h\"\n#include \"db/common/error_code.h\"\n#include \"db/index/common/type_helper.h\"\n#include \"db/sqlengine/analyzer/query_node.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"db/sqlengine/parser/select_info.h\"\n#include \"query_info_helper.h\"\n#include \"simple_rewriter.h\"\n\nnamespace zvec::sqlengine {\n\nconst std::map<NodeOp, QueryNodeOp> QueryAnalyzer::opMap_ = {\n    {NodeOp::T_AND, QueryNodeOp::Q_AND},\n    {NodeOp::T_OR, QueryNodeOp::Q_OR},\n    {NodeOp::T_EQ, QueryNodeOp::Q_EQ},\n    {NodeOp::T_NE, QueryNodeOp::Q_NE},\n    {NodeOp::T_GT, QueryNodeOp::Q_GT},\n    {NodeOp::T_GE, QueryNodeOp::Q_GE},\n    {NodeOp::T_LT, QueryNodeOp::Q_LT},\n    {NodeOp::T_LE, QueryNodeOp::Q_LE},\n    {NodeOp::T_LIKE, QueryNodeOp::Q_LIKE},\n    {NodeOp::T_IN, QueryNodeOp::Q_IN},\n    {NodeOp::T_CONTAIN_ALL, QueryNodeOp::Q_CONTAIN_ALL},\n    {NodeOp::T_CONTAIN_ANY, QueryNodeOp::Q_CONTAIN_ANY},\n    {NodeOp::T_PLUS, QueryNodeOp::Q_PLUS},\n    {NodeOp::T_MINUS, QueryNodeOp::Q_MINUS},\n    {NodeOp::T_MUL, QueryNodeOp::Q_MUL},\n    {NodeOp::T_DIV, QueryNodeOp::Q_DIV},\n    {NodeOp::T_FUNCTION_CALL, QueryNodeOp::Q_FUNCTION_CALL},\n    {NodeOp::T_RANGE_VALUE, QueryNodeOp::Q_RANGE_VALUE},\n    {NodeOp::T_LIST_VALUE, QueryNodeOp::Q_LIST_VALUE},\n    {NodeOp::T_VECTOR_MATRIX_VALUE, QueryNodeOp::Q_VECTOR_MATRIX_VALUE},\n    {NodeOp::T_INT_VALUE, QueryNodeOp::Q_INT_VALUE},\n    {NodeOp::T_FLOAT_VALUE, QueryNodeOp::Q_FLOAT_VALUE},\n    {NodeOp::T_STRING_VALUE, QueryNodeOp::Q_STRING_VALUE},\n    {NodeOp::T_NULL_VALUE, QueryNodeOp::Q_NULL_VALUE},\n    {NodeOp::T_ID, QueryNodeOp::Q_ID},\n    {NodeOp::T_BOOL_VALUE, QueryNodeOp::Q_BOOL_VALUE},\n    {NodeOp::T_IS_NULL, QueryNodeOp::Q_IS_NULL},\n    {NodeOp::T_IS_NOT_NULL, QueryNodeOp::Q_IS_NOT_NULL},\n};\n\nResult<QueryInfo::Ptr> QueryAnalyzer::analyze(const CollectionSchema &schema,\n                                              SQLInfo::Ptr sql_info) {\n  // create query_info from sql_info. The purpose:\n  // 1. Keep module isolated\n  // 2. Everything in sql_info should be read-only, any potential changes\n  // should apply to query_info. Especially for changes about syntax\n  // optimization applied to query_info.\n  // 3. add more necessary information and more analyzing\n  // result to QueryInfo, so as to ease plan and execution.\n  auto query_info_ret = create_queryinfo_from_sqlinfo(schema, *sql_info);\n  if (!query_info_ret) {\n    return query_info_ret;\n  }\n  auto query_info = std::move(query_info_ret.value());\n\n  // select list check\n  for (auto &query_field_info : query_info->query_fields()) {\n    const std::string &field_name = query_field_info->field_name();\n    auto forward_field = schema.get_field(field_name);\n    if (!forward_field) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(field_name, \" not defined in schema\"));\n    }\n\n    // set forward field info as reference\n    query_field_info->set_field_schema_ptr(forward_field);\n\n    // add forward field info\n    query_info->add_select_item_schema_ptr(field_name, forward_field);\n  }\n\n  // condition check & decide index/filter condition\n  if (query_info->search_cond() != nullptr) {\n    // rewrite query by  rule\n    SimpleRewriter rewriter;\n    rewriter.rewrite(query_info.get());\n\n    SearchCondCheckWalker search_cond_check_walker(schema);\n    search_cond_check_walker.traverse_cond_node(query_info->search_cond());\n    if (!search_cond_check_walker.err_msg().empty()) {\n      return tl::make_unexpected(\n          Status::NotSupported(search_cond_check_walker.err_msg()));\n    }\n\n    size_t num_of_filters = search_cond_check_walker.filter_rels().size() +\n                            search_cond_check_walker.invert_rels().size();\n    if (num_of_filters > kMaxNumOfFilters) {\n      return tl::make_unexpected(\n          Status::NotSupported(\"max number of filters is \"\n                               \"limited to 4096\"));\n    }\n\n    auto st = decide_filter_index_cond(schema, search_cond_check_walker,\n                                       query_info.get());\n    if (!st.ok()) {\n      return tl::make_unexpected(\n          Status::InternalError(\"decide_filter_index_cond failed\"));\n    }\n    // add forward filter meta according to final result\n    auto status = set_forward_filter_meta(schema, query_info.get(),\n                                          query_info->filter_cond().get());\n    if (!status.ok()) {\n      return tl::make_unexpected(status);\n    }\n\n    // for special feature: post filtering, move filters to post filters\n    if (query_info->vector_cond_info() &&\n        query_info->vector_cond_info()->post_filter_topk() > 0) {\n      query_info->set_post_invert_cond(query_info->invert_cond());\n      query_info->set_invert_cond(nullptr);\n      query_info->set_post_filter_cond(query_info->filter_cond());\n      query_info->set_filter_cond(nullptr);\n      LOG_DEBUG(\"post filter is applied. %u\",\n                query_info->vector_cond_info()->post_filter_topk());\n    }\n  }\n\n  // orderby list check\n  for (auto &query_orderby_info : query_info->query_orderbys()) {\n    const std::string &field_name = query_orderby_info->field_name();\n    auto forward_field = schema.get_forward_field(field_name);\n\n    if (forward_field == nullptr) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(field_name, \" not defined in schema\"));\n    }\n\n    if (forward_field->is_array_type()) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"order by fields should not be array data type\"));\n    }\n\n    // set forward field info as reference\n    query_orderby_info->set_field_schema_ptr(forward_field);\n\n    // add forward field info\n    query_info->add_orderby_item_schema_ptr(field_name, forward_field);\n  }\n\n  // group by check\n  if (const auto &group = query_info->group_by(); group != nullptr) {\n    if (!query_info->vector_cond_info()) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"group by should has vector query\"));\n    }\n    if (!query_info->query_orderbys().empty()) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"group by not \"\n                                  \"support order by forward\"));\n    }\n    auto forward_field = schema.get_forward_field(group->group_by_field);\n    if (!forward_field) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          group->group_by_field, \"not defined in schema\"));\n    }\n    if (forward_field->is_array_type()) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"group by fields \"\n                                  \"should not be array data type\"));\n    }\n    if (forward_field->is_vector_field()) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"group by fields \"\n                                  \"should not be vector data type\"));\n    }\n    query_info->set_group_by_schema_ptr(forward_field);\n  }\n  return query_info;\n}\n\nStatus QueryAnalyzer::set_forward_filter_meta(const CollectionSchema &schema,\n                                              QueryInfo *query_info,\n                                              QueryNode *filter_cond) {\n  if (filter_cond == nullptr) {\n    return Status::OK();\n  }\n\n  if (filter_cond->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {\n    QueryNode *left_node = filter_cond->left().get();\n    QueryNode *right_node = filter_cond->right().get();\n    if (filter_cond->left() != nullptr) {\n      auto ret = set_forward_filter_meta(schema, query_info, left_node);\n      if (!ret.ok()) {\n        return ret;\n      }\n    }\n    if (filter_cond->right() != nullptr) {\n      return set_forward_filter_meta(schema, query_info, right_node);\n    }\n    return Status::OK();\n  }\n\n  QueryRelNode *query_rel_node = reinterpret_cast<QueryRelNode *>(filter_cond);\n  query_rel_node->set_forward();\n  std::string forward_field_name;\n  auto *left_node = query_rel_node->left_node();\n  if (left_node->op() == QueryNodeOp::Q_ID) {\n    forward_field_name = left_node->text();\n  } else if (left_node->op() == QueryNodeOp::Q_FUNCTION_CALL) {\n    const QueryFuncNode *func_node =\n        dynamic_cast<const QueryFuncNode *>(left_node);\n    const auto &arguments = func_node->arguments();\n    auto func_name = func_node->get_func_name();\n    if (func_name == kFuncArrayLength) {\n      forward_field_name = arguments[0]->text();\n    } else {\n      return Status::NotSupported(\"function \", func_name, \" is not supported\");\n    }\n  } else {\n    return Status::NotSupported(\"left node \", left_node->op(),\n                                \" is not supported\");\n  }\n  auto forward_field = schema.get_forward_field(forward_field_name);\n  if (forward_field == nullptr) {\n    return Status::InvalidArgument(forward_field_name, \" not found in schema\");\n  }\n  if (forward_field->has_invert_index()) {\n    // invert condition to forward condition\n    QueryNode *right_node =\n        std::dynamic_pointer_cast<QueryNode>(query_rel_node->right()).get();\n    // Revert numeric buf to numeric text\n    QueryInfoHelper::constant_node_data_buf_2_text(\n        forward_field->element_data_type(), forward_field->is_array_type(),\n        right_node);\n  }\n\n  // forward_field is nullptr for schema free field\n  query_info->add_forward_filter_schema_ptr(forward_field_name, forward_field);\n  return Status::OK();\n}\n\n// decide filter or index condition according to data collected from\n// search_cond_check_walker\nStatus QueryAnalyzer::decide_filter_index_cond(\n    const CollectionSchema &schema,\n    const SearchCondCheckWalker &search_cond_check_walker,\n    QueryInfo *query_info) {\n  const std::vector<QueryRelNode *> &filter_rels =\n      search_cond_check_walker.filter_rels();\n  const std::vector<QueryRelNode *> &invert_rels =\n      search_cond_check_walker.invert_rels();\n  QueryRelNode *vector_rel = search_cond_check_walker.vector_rel();\n  uint32_t vector_rel_size = (vector_rel != nullptr) ? 1 : 0;\n  uint32_t invert_size = (uint32_t)invert_rels.size();\n  uint32_t filter_size = (uint32_t)filter_rels.size();\n\n  LOG_DEBUG(\"vector_rel_size[%u] invert[%u] filter[%u]\", vector_rel_size,\n            invert_size, filter_size);\n\n  // sanity check\n  // check if all invert conds exist in one sub-tree, if yes,\n  // move the sub-tree as final invert cond for query.\n  if (invert_size > 0) {\n    QueryNode *invert_subroot =\n        get_invert_subroot(query_info->search_cond().get());\n    if (invert_subroot != nullptr) {\n      LOG_DEBUG(\n          \"all invert conds are under one sub-root, invert query applied. \"\n          \"[%s]\",\n          invert_subroot->text().c_str());\n      query_info->set_invert_cond(\n          invert_subroot->detach_from_search_cond(query_info));\n    }\n  }\n\n  if (vector_rel_size > 0) {\n    if (vector_rel->or_ancestor()) {\n      return Status::InvalidArgument(\n          \"vector condition must NOT be OR ancestor.\");\n    }\n    std::shared_ptr<QueryInfo::QueryVectorCondInfo> vector_cond_info;\n    Status st = check_and_convert_vector(schema, vector_rel, &vector_cond_info);\n    if (!st.ok()) {\n      return st;\n    }\n    vector_rel->detach_from_search_cond(query_info);\n    query_info->set_vector_cond_info(std::move(vector_cond_info));\n  }\n\n  // after set invert and vector well, the left conds are filter cond if any\n  if (query_info->search_cond() != nullptr) {\n    if (filter_size != 0) {  // optimize\n      query_info->set_filter_cond(query_info->search_cond());\n    }\n    // after above steps, all conds are moved to vector/invert/forward,\n    // so clear search cond finally.\n    query_info->set_search_cond(nullptr);\n  }\n\n  return Status::OK();\n}\n\nQueryNode *QueryAnalyzer::get_invert_subroot(QueryNode *search_cond) {\n  SubRootResult subroot_result;\n  std::function<bool(QueryRelNode * node)> rule = [](QueryRelNode *rel_node) {\n    return rel_node->is_invert();\n  };\n  QueryInfoHelper::find_subroot_by_rule(search_cond, rule, &subroot_result);\n  return subroot_result.subroot;\n}\n\nResult<QueryInfo::Ptr> QueryAnalyzer::create_queryinfo_from_sqlinfo(\n    const CollectionSchema &schema, const SQLInfo &sql_info) {\n  QueryInfo::Ptr query_info = std::make_shared<QueryInfo>();\n\n  if (sql_info.type() != SQLInfo::SQLType::SELECT) {\n    return tl::make_unexpected(\n        Status::NotSupported(\"only select is \"\n                             \"supported\"));\n  }\n\n  SelectInfo::Ptr select_info =\n      std::dynamic_pointer_cast<SelectInfo>(sql_info.base_info());\n  if (select_info == nullptr) {\n    return tl::make_unexpected(Status::InternalError(\"select_info is null\"));\n  }\n\n  // copy search and filter\n  std::string err;\n  query_info->set_search_cond(\n      create_querynode_from_node(select_info->search_cond(), 0, &err));\n  if (!err.empty()) {\n    return tl::make_unexpected(\n        Status::InternalError(\"create querynode from node failed: \", err));\n  }\n\n  // set select element info\n  for (const auto &select_elem_info : select_info->selected_elems()) {\n    if (select_elem_info->is_empty()) {\n      continue;  // leave query_field to be null\n    }\n\n    if (select_elem_info->is_asterisk()) {\n      query_info->set_asterisk(true);\n      for (auto &forward_field : schema.forward_fields()) {\n        if (!zvec::FieldSchema::is_vector_field(\n                forward_field->element_data_type())) {\n          query_info->add_query_field(std::make_shared<QueryFieldInfo>(\n              forward_field->name(), \"\", \"\", \"\", false));\n        }\n      }\n      continue;\n    }\n\n    query_info->add_query_field(std::make_shared<QueryFieldInfo>(\n        select_elem_info->field_name(), select_elem_info->alias(),\n        select_elem_info->func_name(), select_elem_info->func_param(),\n        select_elem_info->is_func_param_asterisk()));\n  }\n\n  if (select_info->include_vector()) {\n    query_info->set_include_vector(true);\n    for (auto &index_field : schema.vector_fields()) {\n      if (!query_info->exists_in_query_fields(index_field->name())) {\n        query_info->add_query_field(std::make_shared<QueryFieldInfo>(\n            index_field->name(), \"\", \"\", \"\", false));\n      }\n    }\n  }\n  query_info->set_include_doc_id(select_info->is_include_doc_id());\n\n  // set order by element info\n  for (auto &orderby_elem_info : select_info->orderby_elems()) {\n    query_info->add_query_orderby(std::make_shared<QueryOrderbyInfo>(\n        orderby_elem_info->field_name(), orderby_elem_info->is_desc()));\n  }\n\n  // set topN\n  if (select_info->limit() > 0) {\n    query_info->set_query_topn(select_info->limit());\n  } else {\n    query_info->set_query_topn(DEFAULT_TOPN);\n  }\n\n  // set group by\n  query_info->set_group_by(select_info->group_by());\n\n  return query_info;\n}\n\nQueryNode::Ptr QueryAnalyzer::create_querynode_from_node(const Node::Ptr &node,\n                                                         uint32_t level,\n                                                         std::string *err) {\n  QueryNode::Ptr query_node = nullptr;\n\n  if (node == nullptr) {\n    return nullptr;\n  }\n\n  // copy subclass object according to node op\n  if (node->type() == Node::NodeType::REL_EXPR) {\n    // REL_EXPR include T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LIKE, T_IN,\n    // T_CONTAIN_ALL, T_CONTAIN_ANY, T_IS_NULL, T_IS_NOT_NULL\n    // use type == REL_EXPR to simplify */\n    query_node = std::make_shared<QueryRelNode>();\n  } else {\n    if (node->op() == NodeOp::T_INT_VALUE ||\n        node->op() == NodeOp::T_FLOAT_VALUE ||\n        node->op() == NodeOp::T_STRING_VALUE ||\n        node->op() == NodeOp::T_NULL_VALUE ||\n        node->op() == NodeOp::T_BOOL_VALUE) {\n      ConstantNode::Ptr constant_node =\n          std::dynamic_pointer_cast<ConstantNode>(node);\n      query_node = std::make_shared<QueryConstantNode>(constant_node->value());\n    } else if (node->op() == NodeOp::T_ID) {\n      IDNode::Ptr id_node = std::dynamic_pointer_cast<IDNode>(node);\n      query_node = std::make_shared<QueryIDNode>(id_node->value());\n    } else if (node->op() == NodeOp::T_VECTOR_MATRIX_VALUE) {\n      VectorMatrixNode::Ptr vector_node =\n          std::dynamic_pointer_cast<VectorMatrixNode>(node);\n      query_node =\n          std::make_shared<QueryVectorMatrixNode>(std::move(vector_node));\n    } else if (node->op() == NodeOp::T_FUNCTION_CALL) {\n      FuncNode::Ptr func_node = std::dynamic_pointer_cast<FuncNode>(node);\n      QueryFuncNode::Ptr query_func_node = std::make_shared<QueryFuncNode>();\n      query_func_node->set_func_name_node(create_querynode_from_node(\n          func_node->get_func_name_node(), level + 1, err));\n      for (auto argument : func_node->arguments()) {\n        query_func_node->add_argument(\n            create_querynode_from_node(argument, level + 1, err));\n      }\n      query_node = std::move(query_func_node);\n    } else if (node->op() == NodeOp::T_LIST_VALUE) {\n      InValueExprListNode::Ptr in_value_expr_list_node =\n          std::dynamic_pointer_cast<InValueExprListNode>(node);\n      QueryListNode::Ptr query_in_value_expr_node =\n          std::make_shared<QueryListNode>();\n\n      for (auto in_value_expr : in_value_expr_list_node->in_value_expr_list()) {\n        query_in_value_expr_node->add_value_expr(\n            create_querynode_from_node(in_value_expr, level, err));\n      }\n      query_in_value_expr_node->set_exclude(in_value_expr_list_node->exclude());\n      query_node = std::move(query_in_value_expr_node);\n    } else { /* others are normal Node */\n      query_node = std::make_shared<QueryNode>();\n    }\n  }\n\n  if (query_node == nullptr) {\n    *err = \"node op is not handled. \" + node->type_to_str(node->op());\n    return nullptr;\n  }\n\n  // copy nodeOp\n  QueryNodeOp query_node_op = nodeop_2_query_nodeop(node->op());\n  if (query_node_op == QueryNodeOp::Q_NONE) {\n    *err = \"cannot find query node op \" + Node::type_to_str(node->op());\n    return nullptr;\n  }\n  query_node->set_op(query_node_op);\n\n  // set & increment level\n  query_node->set_level(level++);\n\n  // copy left & right\n  if (node->left() != nullptr) {\n    query_node->set_left(create_querynode_from_node(node->left(), level, err));\n  }\n  if (node->right() != nullptr) {\n    query_node->set_right(\n        create_querynode_from_node(node->right(), level, err));\n  }\n\n  return query_node;\n}\n\nQueryNodeOp QueryAnalyzer::nodeop_2_query_nodeop(NodeOp op) {\n  auto iter = opMap_.find(op);\n  if (iter == opMap_.end()) {\n    return QueryNodeOp::Q_NONE;\n  }\n  return iter->second;\n}\n\nStatus QueryAnalyzer::check_and_convert_vector(\n    const CollectionSchema &schema, const QueryRelNode *query_rel_node,\n    std::shared_ptr<QueryInfo::QueryVectorCondInfo> *vector_cond) {\n  const QueryNode::Ptr &vector_field_node = query_rel_node->left();\n  const auto &vector_field_name = vector_field_node->text();\n\n  auto vector_meta = schema.get_vector_field(vector_field_name);\n  if (vector_meta == nullptr) {\n    return Status::InvalidArgument(\"vector field not found:\",\n                                   vector_field_name);\n  }\n\n  std::string vector_term;\n  uint32_t dimension = vector_meta->dimension();\n  std::string vector_sparse_indices;\n  std::string vector_sparse_values;\n  QueryParams::Ptr query_params;\n\n  const QueryNode::Ptr &vector_value_node = query_rel_node->right();\n\n  // for pb request\n  if (vector_value_node->op() == QueryNodeOp::Q_VECTOR_MATRIX_VALUE) {\n    // for format vector = [,,,]\n    const QueryVectorMatrixNode::Ptr &vector_node =\n        std::dynamic_pointer_cast<QueryVectorMatrixNode>(vector_value_node);\n    // we only have vector matrix, other info is not available\n    vector_term = vector_node->matrix();\n    vector_sparse_indices = vector_node->sparse_indices();\n    vector_sparse_values = vector_node->sparse_values();\n    query_params = vector_node->query_params();\n  } else {\n    return Status::InvalidArgument(\"invalid vector value node. op[\",\n                                   vector_value_node->op_name(), \"], text[\",\n                                   vector_value_node->text(), \"]\");\n  }\n\n  auto core_data_type =\n      DataTypeCodeBook::to_data_type(vector_meta->data_type());\n  if (core_data_type == core::IndexMeta::DataType::DT_UNDEFINED) {\n    return Status::InvalidArgument(\"invalid data type:\",\n                                   (int)vector_meta->data_type());\n  }\n\n  *vector_cond = std::make_shared<QueryInfo::QueryVectorCondInfo>(\n      vector_meta, vector_term, core_data_type, dimension,\n      std::move(vector_sparse_indices), std::move(vector_sparse_values),\n      std::move(query_params));\n  return Status::OK();\n}\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_analyzer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <map>\n#include <memory>\n#include <string>\n#include <zvec/db/status.h>\n#include \"db/sqlengine/parser/sql_info.h\"\n#include \"query_info.h\"\n#include \"query_node_walker.h\"\n\nnamespace zvec::sqlengine {\n\nclass QueryAnalyzer {\n public:\n  QueryAnalyzer() = default;\n\n  Result<QueryInfo::Ptr> analyze(const CollectionSchema &schema,\n                                 SQLInfo::Ptr sql_info);\n  const std::string &err_msg();\n  int err_code();\n\n private:\n  Result<QueryInfo::Ptr> create_queryinfo_from_sqlinfo(\n      const CollectionSchema &schema, const SQLInfo &sql_info);\n  QueryNode::Ptr create_querynode_from_node(const Node::Ptr &node,\n                                            uint32_t level, std::string *err);\n  QueryNodeOp nodeop_2_query_nodeop(NodeOp op);\n  Status decide_filter_index_cond(\n      const CollectionSchema &schema,\n      const SearchCondCheckWalker &search_cond_check_walker,\n      QueryInfo *query_info);\n  QueryNode *get_invert_subroot(QueryNode *node);\n  Status check_and_convert_vector(\n      const CollectionSchema &schema, const QueryRelNode *query_rel_node,\n      std::shared_ptr<QueryInfo::QueryVectorCondInfo> *vector_cond);\n\n  Status set_forward_filter_meta(const CollectionSchema &schema,\n                                 QueryInfo *query_info, QueryNode *filter_cond);\n\n private:\n  static const std::map<NodeOp, QueryNodeOp> opMap_;\n  static const int DEFAULT_TOPN = 20;\n  static const size_t kMaxNumOfFilters = 4096;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_field_info.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_field_info.h\"\n\nnamespace zvec::sqlengine {\n\nstd::string QueryFieldInfo::to_string() const {\n  std::string str = \"\";\n  if (is_func_call()) {\n    if (is_func_param_asterisk()) {\n      str += func_name_ + \"(*)\";\n    } else {\n      str += func_name_ + \"(\" + func_param_ + \")\";\n    }\n  } else {\n    str = field_name_;\n    if (!alias_.empty()) {\n      str += \" as \" + alias_;\n    }\n  }\n\n  return str;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_field_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <zvec/db/schema.h>\n\nnamespace zvec::sqlengine {\n\nclass QueryFieldInfo {\n public:\n  using Ptr = std::shared_ptr<QueryFieldInfo>;\n\n  QueryFieldInfo() {}\n\n  QueryFieldInfo(const std::string &m_field_name, const std::string &m_alias,\n                 const std::string &m_func_name,\n                 const std::string &m_func_param, bool m_func_param_asterisk)\n      : field_name_(m_field_name),\n        alias_(m_alias),\n        func_name_(m_func_name),\n        func_param_(m_func_param),\n        func_param_asterisk_(m_func_param_asterisk) {}\n\n  ~QueryFieldInfo() {}\n\n  void set_field_name(const std::string &value) {\n    field_name_ = value;\n  }\n\n  const std::string &field_name() const {\n    return field_name_;\n  }\n\n  void set_alias(const std::string &value) {\n    alias_ = value;\n  }\n  const std::string &alias() const {\n    return alias_;\n  }\n\n  const std::string &func_name() const {\n    return func_name_;\n  }\n\n  void set_func_name(const std::string &value) {\n    func_name_ = value;\n  }\n\n  const std::string &func_param() const {\n    return func_param_;\n  }\n\n  void set_func_param(const std::string &value) {\n    func_param_ = value;\n  }\n\n  bool is_func_call() const {\n    return (!func_name_.empty());\n  }\n\n  void set_func_param_asterisk(bool value) {\n    func_param_asterisk_ = value;\n  }\n  bool is_func_param_asterisk() const {\n    return func_param_asterisk_;\n  }\n\n  void set_field_schema_ptr(const zvec::FieldSchema *field_schema_ptr) {\n    field_schema_ptr_ = field_schema_ptr;\n  }\n  const zvec::FieldSchema *field_schema_ptr() {\n    return field_schema_ptr_;\n  }\n\n\n  std::string to_string() const;\n\n private:\n  std::string field_name_{\"\"};\n  std::string alias_{\"\"};\n\n  std::string func_name_{\"\"};\n  std::string func_param_{\"\"};\n  bool func_param_asterisk_{false};\n\n  const zvec::FieldSchema *field_schema_ptr_;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_info.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_info.h\"\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include \"db/common/constants.h\"\n\nnamespace zvec::sqlengine {\n\nstd::string QueryInfo::to_string() const {\n  std::string str = \"Query Info: {\\n\";\n\n  if (!query_fields_.empty()) {\n    str += \"query_fields: \";\n    for (auto iter = query_fields_.begin(); iter != query_fields_.end();\n         iter++) {\n      if (iter != query_fields_.begin()) {\n        str += \", \";\n      }\n      QueryFieldInfo::Ptr query_field_info_ptr = *iter;\n      str += query_field_info_ptr->to_string();\n    }\n    str += \"\\n\";\n  }\n\n  if (!query_orderbys_.empty()) {\n    str += \"query_orderbys: \";\n    for (auto iter = query_orderbys_.begin(); iter != query_orderbys_.end();\n         iter++) {\n      if (iter != query_orderbys_.begin()) {\n        str += \", \";\n      }\n      QueryOrderbyInfo::Ptr query_orderby_info_ptr = *iter;\n      str += query_orderby_info_ptr->to_string();\n    }\n    str += \"\\n\";\n  }\n\n  if (!all_fetched_schema_schemas_.empty()) {\n    str += \"all_fetched_field_schemas: \";\n    for (auto iter = all_fetched_schema_schemas_.begin();\n         iter != all_fetched_schema_schemas_.end(); iter++) {\n      if (iter != all_fetched_schema_schemas_.begin()) {\n        str += \", \";\n      }\n      str += iter->first;\n    }\n    str += \"\\n\";\n  }\n\n  if (group_by_ != nullptr) {\n    str += \"group_by: \" + group_by_->to_string() + \"\\n\";\n  }\n\n  str += \"query_topn: \" + std::to_string(query_topn_) + \" \";\n  str += \"\\n\";\n\n  str += \"search_cond:\\n\";\n  if (search_cond_ != nullptr) {\n    str += search_cond_->text();\n    str += \"\\n\";\n  }\n\n  str += \"vector_cond:\\n\";\n  if (vector_cond_info_ != nullptr) {\n    ailego::StringHelper::Append(\n        &str, vector_cond_info_->vector_field_name(), \"=\", \"feature(\",\n        vector_cond_info_->batch() > 1 ? \"matrix[[...],...]\" : \"vector[...]\",\n        \", \", vector_cond_info_->data_type(), \",\", vector_cond_info_->batch(),\n        \")(FEATURE\",\n        vector_cond_info_->vector_sparse_indices().empty() ? \"\"\n                                                           : \"_WITH_SPARSE\",\n        \")\\n\");\n  }\n\n  str += \"filter_cond:\\n\";\n  if (filter_cond_ != nullptr) {\n    str += filter_cond_->text();\n    str += \"\\n\";\n  }\n\n  str += \"invert_cond:\\n\";\n  if (invert_cond_ != nullptr) {\n    str += invert_cond_->text();\n    str += \"\\n\";\n  }\n\n  str += \"}\";\n  return str;\n}\n\nbool QueryInfo::is_filter_unsatisfiable() const {\n  if (invert_cond_ && invert_cond_->predictate_result().has_value() &&\n      !invert_cond_->predictate_result().value()) {\n    return true;\n  }\n  if (filter_cond_ && filter_cond_->predictate_result().has_value() &&\n      !filter_cond_->predictate_result().value()) {\n    return true;\n  }\n  return false;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include <unordered_map>\n#include <unordered_set>\n#include <vector>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/db/schema.h>\n#include \"db/common/constants.h\"\n#include \"db/sqlengine/common/group_by.h\"\n#include \"query_field_info.h\"\n#include \"query_node.h\"\n#include \"query_orderby_info.h\"\n\nnamespace zvec::sqlengine {\n\nstruct FieldAndSchema {\n  FieldAndSchema(std::string field, const FieldSchema *schema)\n      : field_name(std::move(field)), field_schema_ptr(schema) {}\n\n  std::string field_name;\n  const FieldSchema *field_schema_ptr;\n};\n\nclass QueryInfo {\n public:\n  using Ptr = std::shared_ptr<QueryInfo>;\n\n  class QueryVectorCondInfo {\n   public:\n    using Ptr = std::shared_ptr<QueryVectorCondInfo>;\n\n    QueryVectorCondInfo(const FieldSchema *vector_schema,\n                        const std::string &vector_term,\n                        core::IndexMeta::DataType core_data_type, int dimension,\n                        std::string vector_sparse_indices,\n                        std::string vector_sparse_values,\n                        QueryParams::Ptr query_params)\n        : vector_schema_(vector_schema),\n          vector_term_(vector_term),\n          data_type_(core_data_type),\n          dimension_(dimension),\n          vector_sparse_indices_(std::move(vector_sparse_indices)),\n          vector_sparse_values_(std::move(vector_sparse_values)),\n          query_params_(std::move(query_params)) {\n      auto *vector_params = dynamic_cast<VectorIndexParams *>(\n          vector_schema_->index_params().get());\n      if (vector_params && vector_params->metric_type() == MetricType::IP) {\n        reverse_sort_ = true;\n      }\n    }\n\n   public:\n    std::string vector_field_name() const {\n      return vector_schema_->name();\n    }\n\n    const FieldSchema *vector_schema() const {\n      return vector_schema_;\n    }\n\n    const std::string &vector_term() const {\n      return vector_term_;\n    }\n\n    core::IndexMeta::DataType data_type() const {\n      return data_type_;\n    }\n\n    uint32_t dimension() const {\n      return dimension_;\n    }\n\n    uint32_t post_filter_topk() const {\n      return 0;\n    }\n\n    int batch() const {\n      return 1;\n    }\n\n    uint32_t sparse_count() const {\n      return vector_sparse_indices_.size() / sizeof(uint32_t);\n    }\n\n    const std::string &vector_sparse_indices() const {\n      return vector_sparse_indices_;\n    }\n\n    const std::string &vector_sparse_values() const {\n      return vector_sparse_values_;\n    }\n\n    bool is_reverse_sort() const {\n      return reverse_sort_;\n    }\n\n    const QueryParams::Ptr &query_params() const {\n      return query_params_;\n    }\n\n   private:\n    const FieldSchema *vector_schema_{nullptr};\n    std::string vector_term_{\"\"};\n    core::IndexMeta::DataType data_type_;\n    uint32_t dimension_{0};\n    std::string vector_sparse_indices_{\"\"};\n    std::string vector_sparse_values_{\"\"};\n    QueryParams::Ptr query_params_;\n    bool reverse_sort_{false};\n  };\n\n public:\n  QueryInfo() = default;\n  ~QueryInfo() = default;\n\n  void set_search_cond(QueryNode::Ptr value) {\n    search_cond_ = std::move(value);\n  }\n\n  QueryNode::Ptr search_cond() const {\n    return search_cond_;\n  }\n\n  void set_invert_cond(QueryNode::Ptr value) {\n    invert_cond_ = std::move(value);\n  }\n\n  QueryNode::Ptr invert_cond() const {\n    return invert_cond_;\n  }\n\n  void set_filter_cond(QueryNode::Ptr value) {\n    filter_cond_ = std::move(value);\n  }\n\n  QueryNode::Ptr filter_cond() const {\n    return filter_cond_;\n  }\n\n  void set_vector_cond_info(QueryVectorCondInfo::Ptr value) {\n    vector_cond_info_ = std::move(value);\n  }\n\n  const QueryVectorCondInfo::Ptr &vector_cond_info() const {\n    return vector_cond_info_;\n  }\n\n  void set_query_topn(uint32_t value) {\n    query_topn_ = value;\n  }\n\n  uint32_t query_topn() const {\n    return query_topn_;\n  }\n\n  const std::vector<QueryFieldInfo::Ptr> &query_fields() const {\n    return query_fields_;\n  }\n\n  void add_query_field(QueryFieldInfo::Ptr &&query_field_info) {\n    query_fields_.emplace_back(query_field_info);\n  }\n\n  const std::vector<QueryOrderbyInfo::Ptr> &query_orderbys() const {\n    return query_orderbys_;\n  }\n\n  void add_query_orderby(QueryOrderbyInfo::Ptr &&query_orderby_info) {\n    query_orderbys_.emplace_back(query_orderby_info);\n  }\n\n  void add_select_item_schema_ptr(\n      std::string field, const zvec::FieldSchema *select_item_schema_ptr) {\n    bool is_vector_field = false;\n    if (select_item_schema_ptr != nullptr &&\n        FieldSchema::is_vector_field(select_item_schema_ptr->data_type())) {\n      is_vector_field = true;\n    }\n    add_fetched_schema(field, select_item_schema_ptr);\n    if (is_vector_field) {\n      selected_vector_fields_.emplace_back(field, select_item_schema_ptr);\n    } else {\n      selectd_scalar_field_names_.emplace_back(field);\n    }\n    select_item_schema_ptrs_.emplace_back(std::move(field),\n                                          std::move(select_item_schema_ptr));\n  }\n\n  const std::vector<FieldAndSchema> &select_item_schema_ptrs() const {\n    return select_item_schema_ptrs_;\n  }\n\n  void add_forward_filter_schema_ptr(\n      std::string field, const zvec::FieldSchema *forward_filter_schema_ptr) {\n    add_fetched_schema(field, forward_filter_schema_ptr);\n    if (forward_filter_field_names_set_.emplace(field).second) {\n      forward_filter_field_names_.emplace_back(std::move(field));\n    }\n  }\n\n  void add_orderby_item_schema_ptr(\n      std::string field, const zvec::FieldSchema *orderby_item_schema_ptr) {\n    add_fetched_schema(field, orderby_item_schema_ptr);\n    orderby_item_schema_ptrs_.emplace_back(std::move(field),\n                                           orderby_item_schema_ptr);\n  }\n\n  const std::vector<FieldAndSchema> &orderby_item_schema_ptrs() const {\n    return orderby_item_schema_ptrs_;\n  }\n\n  void add_fetched_schema(std::string field,\n                          const zvec::FieldSchema *other_item_schema_ptr) {\n    auto res = all_fetched_schema_schemas_.emplace(std::move(field),\n                                                   other_item_schema_ptr);\n    if (res.second &&\n        !FieldSchema::is_vector_field(other_item_schema_ptr->data_type())) {\n      all_fetched_scalar_field_names_.emplace_back(\n          other_item_schema_ptr->name());\n    }\n  }\n\n  const std::unordered_map<std::string, const FieldSchema *> &\n  all_fetched_schemas() const {\n    return all_fetched_schema_schemas_;\n  }\n\n  bool is_field_fetched(const std::string &field) const {\n    return all_fetched_schema_schemas_.count(field) > 0;\n  }\n\n  const std::vector<std::string> &get_selected_scalar_field_names() {\n    return selectd_scalar_field_names_;\n  }\n\n  const std::vector<std::string> &get_all_fetched_scalar_field_names() {\n    return all_fetched_scalar_field_names_;\n  }\n\n  const std::vector<std::string> &get_forward_filter_field_names() {\n    return forward_filter_field_names_;\n  };\n\n\n  bool exists_in_query_fields(const std::string &field_name) {\n    for (auto query_field_info : query_fields_) {\n      if (field_name == query_field_info->field_name()) {\n        return true;\n      }\n    }\n    return false;\n  }\n\n  void set_post_invert_cond(const QueryNode::Ptr &value) {\n    post_invert_cond_ = value;\n  }\n\n  const QueryNode::Ptr &post_invert_cond() const {\n    return post_invert_cond_;\n  }\n\n  void set_post_filter_cond(const QueryNode::Ptr &value) {\n    post_filter_cond_ = value;\n  }\n\n  const QueryNode::Ptr &post_filter_cond() const {\n    return post_filter_cond_;\n  }\n\n  void set_asterisk(bool value) {\n    asterisk_ = value;\n  }\n\n  bool is_asterisk() const {\n    return asterisk_;\n  }\n\n  void set_include_vector(bool value) {\n    include_vector_ = value;\n  }\n\n  bool is_include_vector() const {\n    return include_vector_;\n  }\n\n  void set_include_doc_id(bool value) {\n    include_doc_id_ = value;\n    if (include_doc_id_) {\n      selectd_scalar_field_names_.emplace_back(GLOBAL_DOC_ID);\n      all_fetched_scalar_field_names_.emplace_back(GLOBAL_DOC_ID);\n    }\n  }\n\n  bool is_include_doc_id() const {\n    return include_doc_id_;\n  }\n\n  const std::vector<FieldAndSchema> &selected_vector_fields() const {\n    return selected_vector_fields_;\n  }\n\n  void set_group_by(GroupBy::Ptr group_by) {\n    group_by_ = std::move(group_by);\n  }\n  const GroupBy::Ptr &group_by() const {\n    return group_by_;\n  }\n\n  void set_group_by_schema_ptr(const FieldSchema *group_by_schema_ptr) {\n    group_by_schema_ptr_ = group_by_schema_ptr;\n  }\n  const FieldSchema *group_by_schema_ptr() const {\n    return group_by_schema_ptr_;\n  }\n\n  std::string to_string() const;\n\n  bool is_filter_unsatisfiable() const;\n\n private:\n  QueryNode::Ptr search_cond_{nullptr};\n\n  QueryNode::Ptr invert_cond_{nullptr};\n  QueryNode::Ptr filter_cond_{nullptr};\n\n  QueryVectorCondInfo::Ptr vector_cond_info_{nullptr};\n\n  // these two are for post filtering only\n  QueryNode::Ptr post_invert_cond_{nullptr};\n  QueryNode::Ptr post_filter_cond_{nullptr};\n\n  uint32_t query_topn_{0};\n  std::vector<QueryFieldInfo::Ptr> query_fields_{};\n  std::vector<QueryOrderbyInfo::Ptr> query_orderbys_{};\n\n  GroupBy::Ptr group_by_{};\n\n  // from analyzing\n  std::unordered_set<std::string> forward_filter_field_names_set_{};\n  std::vector<std::string> forward_filter_field_names_{};\n  // USER_ID are system needed fields\n  std::vector<std::string> selectd_scalar_field_names_{USER_ID};\n  std::vector<FieldAndSchema> select_item_schema_ptrs_{};\n  std::vector<FieldAndSchema> orderby_item_schema_ptrs_{};\n  // all fetched field schemas from forward, including user select fields and\n  // system needed fields\n  std::unordered_map<std::string, const FieldSchema *>\n      all_fetched_schema_schemas_{};\n  std::vector<std::string> all_fetched_scalar_field_names_{USER_ID,\n                                                           LOCAL_ROW_ID};\n\n  bool asterisk_{false};\n  bool include_vector_{false};\n  bool include_doc_id_{false};\n  std::vector<FieldAndSchema> selected_vector_fields_{};\n  const FieldSchema *group_by_schema_ptr_{};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_info_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_info_helper.h\"\n#include <memory>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/utility/string_helper.h>\n\nnamespace zvec::sqlengine {\n\n\nbool QueryInfoHelper::text_2_data_buf(const std::string &text,\n                                      zvec::DataType data_type,\n                                      std::string *data_buf) {\n  if (data_type == zvec::DataType::INT32) {\n    int32_t int32_val;\n    if (!ailego::StringHelper::ToInt32(text, &int32_val)) {\n      return false;\n    }\n    data_buf->assign((const char *)&int32_val, sizeof(int32_t));\n    return true;\n  }\n\n  if (data_type == zvec::DataType::UINT32) {\n    uint32_t uint32_val;\n    if (!ailego::StringHelper::ToUint32(text, &uint32_val)) {\n      return false;\n    }\n    data_buf->assign((const char *)&uint32_val, sizeof(uint32_t));\n    return true;\n  }\n\n  if (data_type == zvec::DataType::INT64) {\n    int64_t int64_val;\n    if (!ailego::StringHelper::ToInt64(text, &int64_val)) {\n      return false;\n    }\n    data_buf->assign((const char *)&int64_val, sizeof(int64_t));\n    return true;\n  }\n\n  if (data_type == zvec::DataType::UINT64) {\n    uint64_t uint64_val;\n    if (!ailego::StringHelper::ToUint64(text, &uint64_val)) {\n      return false;\n    }\n    data_buf->assign((const char *)&uint64_val, sizeof(uint64_t));\n    return true;\n  }\n\n  if (data_type == zvec::DataType::FLOAT) {\n    float float_val;\n    if (!ailego::StringHelper::ToFloat(text, &float_val)) {\n      return false;\n    }\n    data_buf->assign((const char *)&float_val, sizeof(float));\n    return true;\n  }\n\n  if (data_type == zvec::DataType::DOUBLE) {\n    double double_val;\n    if (!ailego::StringHelper::ToDouble(text, &double_val)) {\n      return false;\n    }\n    data_buf->assign((const char *)&double_val, sizeof(double));\n    return true;\n  }\n\n  return false;\n}\n\nbool QueryInfoHelper::data_buf_2_text(const std::string &data_buf,\n                                      zvec::DataType data_type,\n                                      std::string *text) {\n  if (data_type == zvec::DataType::INT32) {\n    *text = ailego::StringHelper::ToString(*(int32_t *)data_buf.data());\n    return true;\n  }\n\n  if (data_type == zvec::DataType::UINT32) {\n    *text = ailego::StringHelper::ToString(*(uint32_t *)data_buf.data());\n    return true;\n  }\n\n  if (data_type == zvec::DataType::INT64) {\n    *text = ailego::StringHelper::ToString(*(int64_t *)data_buf.data());\n    return true;\n  }\n\n  if (data_type == zvec::DataType::UINT64) {\n    *text = ailego::StringHelper::ToString(*(uint64_t *)data_buf.data());\n    return true;\n  }\n\n  if (data_type == zvec::DataType::FLOAT) {\n    *text = ailego::StringHelper::ToString(*(float *)data_buf.data());\n    return true;\n  }\n\n  if (data_type == zvec::DataType::DOUBLE) {\n    *text = ailego::StringHelper::ToString(*(double *)data_buf.data());\n    return true;\n  }\n\n  return false;\n}\n\nvoid QueryInfoHelper::constant_node_data_buf_2_text(DataType data_type,\n                                                    bool is_array_type,\n                                                    QueryNode *node) {\n  if (is_array_type) {  // node->op() == QueryNodeOp::Q_LIST_VALUE\n    QueryListNode *list_node = reinterpret_cast<QueryListNode *>(node);\n    for (auto &child_node : list_node->value_expr_list()) {\n      if (std::string numeric_text{\"\"};\n          data_buf_2_text(child_node->text(), data_type, &numeric_text)) {\n        child_node->set_text(std::move(numeric_text));\n      }\n    }\n    return;\n  }\n\n  if (std::string numeric_text{\"\"};\n      data_buf_2_text(node->text(), data_type, &numeric_text)) {\n    node->set_text(std::move(numeric_text));\n  }\n}\n\n\n// rule in argument is for rel_expr in children.\n// rule !or_ancestor is for result.\n// !or_ancestor is shared and enough as fixed result rule for current rules\nbool QueryInfoHelper::traverse_node_by_rule(\n    QueryNode *node, const std::function<bool(QueryRelNode *node)> &rule,\n    SubRootResult *subroot_result, int32_t *num_of_child) {\n  if (node->type() == QueryNode::QueryNodeType::REL_EXPR) {\n    QueryRelNode *rel_node = dynamic_cast<QueryRelNode *>(node);\n    rel_node->set_rule_result(false);  // clear previous if any\n    *num_of_child = 1;\n    bool result = rule(rel_node);\n    if (result) {\n      if (!node->or_ancestor()) {\n        subroot_result->set_result(rel_node, *num_of_child);\n      }\n      rel_node->set_rule_result(true);\n    }\n    return result;\n  }\n\n  int32_t left_num_of_child = 0;\n  int32_t right_num_of_child = 0;\n  QueryNode *left_node = node->left().get();\n  QueryNode *right_node = node->right().get();\n\n  bool left_ok = traverse_node_by_rule(left_node, rule, subroot_result,\n                                       &left_num_of_child);\n  // if (!left_ok) {\n  //   return false;\n  // }\n\n  bool right_ok = traverse_node_by_rule(right_node, rule, subroot_result,\n                                        &right_num_of_child);\n\n  *num_of_child = left_num_of_child + right_num_of_child;\n\n  if (left_ok && right_ok) {\n    if (!node->or_ancestor()) {\n      subroot_result->set_result(node, *num_of_child);\n    }\n    return true;\n  }\n\n  return false;\n}\n\nvoid QueryInfoHelper::find_subroot_by_rule(\n    QueryNode *root, const std::function<bool(QueryRelNode *node)> &rule,\n    SubRootResult *subroot_result) {\n  int32_t num_of_child = 0;\n  traverse_node_by_rule(root, rule, subroot_result, &num_of_child);\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_info_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include \"query_info.h\"\n\nnamespace zvec::sqlengine {\n\nstruct SubRootResult {\n  QueryNode *subroot;\n  int32_t num_of_child;\n\n  SubRootResult() : subroot(nullptr), num_of_child(0) {}\n\n  SubRootResult(QueryNode *node, int32_t num)\n      : subroot(node), num_of_child(num) {}\n\n  void set_result(QueryNode *node, int32_t num) {\n    if (subroot == nullptr || num_of_child < num) {\n      subroot = node;\n      num_of_child = num;\n    }\n  }\n};\n\nclass QueryInfoHelper {\n public:\n  static bool text_2_data_buf(const std::string &text, zvec::DataType data_type,\n                              std::string *data_buf);\n  static bool data_buf_2_text(const std::string &data_buf,\n                              zvec::DataType data_type, std::string *text);\n  static void constant_node_data_buf_2_text(DataType data_type,\n                                            bool is_array_type,\n                                            QueryNode *node);\n\n  static void find_subroot_by_rule(\n      QueryNode *root, const std::function<bool(QueryRelNode *node)> &rule,\n      SubRootResult *subroot_result);\n\n  static bool traverse_node_by_rule(\n      QueryNode *node, const std::function<bool(QueryRelNode *node)> &rule,\n      SubRootResult *subroot_result, int32_t *num_of_child);\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_node.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_node.h\"\n#include <assert.h>\n#include <sstream>\n#include <zvec/ailego/logger/logger.h>\n#include \"query_info.h\"\n\nnamespace zvec::sqlengine {\n\nvoid QueryNode::set_type_by_op() {\n  QueryNodeType node_type = QueryNodeType::NO_TYPE;\n  switch (op()) {\n    case QueryNodeOp::Q_AND:\n    case QueryNodeOp::Q_OR:\n      node_type = QueryNodeType::LOGIC_EXPR;\n      break;\n\n    case QueryNodeOp::Q_EQ:\n    case QueryNodeOp::Q_NE:\n    case QueryNodeOp::Q_GT:\n    case QueryNodeOp::Q_GE:\n    case QueryNodeOp::Q_LT:\n    case QueryNodeOp::Q_LE:\n    case QueryNodeOp::Q_LIKE:\n    case QueryNodeOp::Q_IN:\n    case QueryNodeOp::Q_CONTAIN_ANY:\n    case QueryNodeOp::Q_CONTAIN_ALL:\n    case QueryNodeOp::Q_IS_NULL:\n    case QueryNodeOp::Q_IS_NOT_NULL:\n      node_type = QueryNodeType::REL_EXPR;\n      break;\n\n    case QueryNodeOp::Q_PLUS:\n    case QueryNodeOp::Q_MINUS:\n    case QueryNodeOp::Q_MUL:\n    case QueryNodeOp::Q_DIV:\n      node_type = QueryNodeType::ARITH_EXPR;\n      break;\n\n    case QueryNodeOp::Q_FUNCTION_CALL:\n      node_type = QueryNodeType::FUNC;\n      break;\n\n    case QueryNodeOp::Q_RANGE_VALUE:\n    case QueryNodeOp::Q_LIST_VALUE:\n    case QueryNodeOp::Q_VECTOR_MATRIX_VALUE:\n    case QueryNodeOp::Q_INT_VALUE:\n    case QueryNodeOp::Q_FLOAT_VALUE:\n    case QueryNodeOp::Q_STRING_VALUE:\n    case QueryNodeOp::Q_BOOL_VALUE:\n    case QueryNodeOp::Q_NULL_VALUE:\n      node_type = QueryNodeType::CONST;\n      break;\n    case QueryNodeOp::Q_ID:\n      node_type = QueryNodeType::ID;\n      break;\n    default:\n      break;\n  }\n\n  type_ = node_type;\n}\n\nQueryNode::Ptr QueryNode::detach_from_parent() {\n  if (parent_->left().get() == this) {\n    QueryNode::Ptr tmp = parent_->left();\n    parent_->set_left(nullptr);\n    return tmp;\n  } else {  // if (parent_->right().get() == this)\n    QueryNode::Ptr tmp = parent_->right();\n    parent_->set_right(nullptr);\n    return tmp;\n  }\n}\n\nQueryNode::Ptr QueryNode::replace_from_parent(QueryNode::Ptr new_node_ptr) {\n  new_node_ptr->set_parent(parent_);\n  if (parent_->left().get() == this) {\n    QueryNode::Ptr tmp = parent_->left();\n    parent_->set_left(std::move(new_node_ptr));\n    tmp->set_parent(nullptr);\n    return tmp;\n  } else {  // if (parent_->right().get() == this)\n    QueryNode::Ptr tmp = parent_->right();\n    parent_->set_right(std::move(new_node_ptr));\n    tmp->set_parent(nullptr);\n    return tmp;\n  }\n}\n\nQueryNode::Ptr QueryNode::replace_from_search_cond(QueryNode::Ptr new_node_ptr,\n                                                   QueryInfo *query_info) {\n  if (parent_ == nullptr) {\n    new_node_ptr->set_parent(parent_);\n    QueryNode::Ptr tmp = query_info->search_cond();\n    query_info->set_search_cond(std::move(new_node_ptr));\n    return tmp;\n  }\n  return replace_from_parent(std::move(new_node_ptr));\n}\n\nQueryNode::Ptr QueryNode::detach_from_search_cond(QueryInfo *query_info) {\n  if (parent_ == nullptr) {\n    QueryNode::Ptr tmp = query_info->search_cond();\n    query_info->set_search_cond(nullptr);\n    return tmp;\n  }\n\n  return detach_from_parent();\n}\n\nQueryNode::Ptr QueryNode::detach_from_invert_cond(QueryInfo *query_info) {\n  if (parent_ == nullptr) {\n    QueryNode::Ptr tmp = query_info->invert_cond();\n    query_info->set_invert_cond(nullptr);\n    return tmp;\n  }\n\n  return detach_from_parent();\n}\n\nstd::string QueryNode::text() const {\n  std::stringstream stream;\n  switch (type_) {\n    case QueryNodeType::LOGIC_EXPR:\n      stream << \"(\" << left_text() << \") \" << op_name() << \" (\" << right_text()\n             << \")\";\n      break;\n    case QueryNodeType::REL_EXPR:\n      stream << left()->text() << op_name() << right()->text();\n      break;\n    default:\n      break;\n  }\n\n  return stream.str();\n}\n\nbool QueryNode::is_matched(const QueryNode &) const {\n  LOG_ERROR(\"Not implementated. op[%s]\", op_name().c_str());\n  return false;\n}\n\n//========================================================================\n\nstd::string QueryVectorMatrixNode::text() const {\n  return node_->text();\n}\n\n//========================================================================\n\nQueryConstantNode::QueryConstantNode(const std::string &m_value) {\n  value_ = m_value;\n}\n\nstd::string QueryConstantNode::value() {\n  return value_;\n}\n\nstd::string QueryConstantNode::text() const {\n  return value_;\n}\n\nvoid QueryConstantNode::set_text(std::string new_val) {\n  value_ = std::move(new_val);\n}\n\n//========================================================================\n\nQueryIDNode::QueryIDNode(const std::string &m_value) {\n  value_ = m_value;\n}\n\nvoid QueryIDNode::set_value(const std::string &m_value) {\n  value_ = m_value;\n}\n\nstd::string QueryIDNode::value() {\n  return value_;\n}\n\nstd::string QueryIDNode::text() const {\n  return value_;\n}\n\nbool QueryIDNode::is_matched(const QueryNode &other) const {\n  if (other.op() != op()) {\n    return false;\n  }\n  auto &other_id_node = dynamic_cast<const QueryIDNode &>(other);\n  return value_ == other_id_node.value_;\n}\n\n//========================================================================\n\nQueryFuncNode::QueryFuncNode() {\n  set_op(QueryNodeOp::Q_FUNCTION_CALL);\n}\n\nvoid QueryFuncNode::set_func_name_node(QueryNode::Ptr func_name_node) {\n  func_name_node_ = std::move(func_name_node);\n  if (func_name_node_->text() == \"feature\") {\n    func_type_ = QueryFuncType::FEATURE;\n  } else {\n    func_type_ = QueryFuncType::NON_FEATURE;\n  }\n}\n\nconst QueryNode::Ptr &QueryFuncNode::get_func_name_node() const {\n  return func_name_node_;\n}\n\nvoid QueryFuncNode::add_argument(QueryNode::Ptr argument_node) {\n  arguments_.emplace_back(std::move(argument_node));\n}\n\nconst std::vector<QueryNode::Ptr> &QueryFuncNode::arguments() const {\n  return arguments_;\n}\n\nstd::string QueryFuncNode::text() const {\n  std::stringstream stream;\n  stream << func_name_node_->text();\n  stream << \"(\";\n\n  int i = 0;\n  for (auto argument : arguments_) {\n    if (i > 0) {\n      stream << \", \";\n    }\n    stream << argument->text();\n    i++;\n  }\n  stream << \")\";\n  return stream.str();\n}\n\nbool QueryFuncNode::is_matched(const QueryNode &other) const {\n  if (other.op() != op()) {\n    return false;\n  }\n  auto &other_func_node = dynamic_cast<const QueryFuncNode &>(other);\n  if (!func_name_node_->is_matched(*other_func_node.func_name_node_)) {\n    return false;\n  }\n  // only id() function with zero arguments is considered matched\n  if (arguments_.empty() && other_func_node.arguments_.empty() &&\n      func_name_node_->text() == \"id\") {\n    return true;\n  }\n  return false;\n}\n\n\n//========================================================================\n\nQueryRelNode::QueryRelNode() {}\n\nvoid QueryRelNode::set_rel_type(RelType value) {\n  rel_type_ = value;\n}\n\nQueryRelNode::RelType QueryRelNode::rel_type() {\n  return rel_type_;\n}\n\nstd::string QueryRelNode::text() const {\n  std::stringstream stream;\n  stream << QueryNode::text();\n  if (rel_type_ == RelType::NO_TYPE) {\n    stream << \"(NO_REL_TYPE)\";\n  } else if (is_feature()) {\n    stream << \"(FEATURE)\";\n  } else if (is_invert()) {\n    stream << \"(INVERT)\";\n  } else if (is_forward()) {\n    stream << \"(FORWARD)\";\n  }\n  if (or_ancestor()) {\n    stream << \"(OR_A)\";\n  }\n\n  return stream.str();\n}\n\n//========================================================================\n\nstd::string QueryListNode::text() const {\n  std::stringstream stream;\n  if (exclude_) {\n    stream << \"NOT \";\n  }\n\n  stream << \"(\";\n\n  int i = 0;\n  for (auto value_expr : value_expr_list_) {\n    if (i > 0) {\n      stream << \", \";\n    }\n    stream << value_expr->text();\n    i++;\n  }\n  stream << \")\";\n  return stream.str();\n}\n\nstd::vector<std::string> QueryListNode::to_value_list() {\n  std::vector<std::string> value_list;\n  for (auto &value_expr : value_expr_list_) {\n    value_list.emplace_back(value_expr->text());\n  }\n\n  return value_list;\n}\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_node.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <optional>\n#include <vector>\n#include <zvec/db/query_params.h>\n#include \"db/sqlengine/common/generic_node.h\"\n#include \"db/sqlengine/parser/node.h\"\n\nnamespace zvec::sqlengine {\n\nenum class QueryNodeOp {\n  Q_NONE,\n  Q_AND,\n  Q_OR,\n  Q_EQ,\n  Q_NE,\n  Q_GT,\n  Q_GE,\n  Q_LT,\n  Q_LE,\n  Q_LIKE,\n  Q_IN,\n  Q_CONTAIN_ALL,\n  Q_CONTAIN_ANY,\n  Q_PLUS,\n  Q_MINUS,\n  Q_MUL,\n  Q_DIV,\n  Q_FUNCTION_CALL,\n  Q_RANGE_VALUE,\n  Q_LIST_VALUE,\n  Q_VECTOR_MATRIX_VALUE,\n  Q_INT_VALUE,\n  Q_FLOAT_VALUE,\n  Q_STRING_VALUE,\n  Q_NULL_VALUE,\n  Q_ID,\n  Q_BOOL_VALUE,\n  Q_IS_NULL,\n  Q_IS_NOT_NULL,\n};\n\nclass QueryInfo;\nclass QueryNode : public Generic_Node<QueryNodeOp, QueryNode> {\n public:\n  using Ptr = std::shared_ptr<QueryNode>;\n\n  static inline std::string type_to_str(QueryNodeOp c) {\n    static std::string names[] = {\"NONE\",\n                                  \"and\",\n                                  \"or\",\n                                  \"=\",\n                                  \"!=\",\n                                  \">\",\n                                  \">=\",\n                                  \"<\",\n                                  \"<=\",\n                                  \" LIKE \",\n                                  \" in \",\n                                  \" contain_all \",\n                                  \" contain_any \",\n                                  \"+\",\n                                  \"-\",\n                                  \"*\",\n                                  \"/\",\n                                  \"FUNCTION_CALL\",\n                                  \"RANGE_VALUE\",\n                                  \"LIST_VALUE\",\n                                  \"VECTOR_MATRIX_VALUE\",\n                                  \"INT_VALUE\",\n                                  \"FLOAT_VALUE\",\n                                  \"STRING_VALUE\",\n                                  \"NULL_VALUE\",\n                                  \"ID\",\n                                  \"BOOL_VALUE\",\n                                  \" IS_NULL \",\n                                  \" IS_NOT_NULL \"};\n\n    return names[static_cast<int>(c)];\n  }\n\n  enum class QueryNodeType {\n    NO_TYPE,\n    LOGIC_EXPR,\n    REL_EXPR,\n    ARITH_EXPR,\n    FUNC,\n    CONST,\n    ID\n  };\n\n public:\n  QueryNode() : Generic_Node(QueryNodeOp::Q_NONE) {}\n  QueryNode(QueryNodeOp m_op) : Generic_Node(m_op) {\n    set_op(m_op);\n  }\n  ~QueryNode() override = default;\n\n  std::string left_text() const {\n    if (left_ == nullptr) {\n      return \"nullptr\";\n    }\n    return left_->text();\n  }\n  std::string right_text() const {\n    if (right_ == nullptr) {\n      return \"nullptr\";\n    }\n    return right_->text();\n  }\n\n\n  virtual bool is_matched(const QueryNode &other) const;\n\n  void set_op(QueryNodeOp value) override {\n    Generic_Node<QueryNodeOp, QueryNode>::set_op(value);\n    set_type_by_op();\n  }\n\n  std::string op_name() const {\n    return type_to_str(op_);\n  }\n\n  QueryNode::QueryNodeType type() const {\n    return type_;\n  }\n\n  void set_level(uint32_t value) {\n    level_ = value;\n  }\n  uint32_t level() {\n    return level_;\n  }\n\n  void set_or_ancestor(bool val = true) {\n    or_ancestor_ = val;\n  }\n\n  bool or_ancestor() const {\n    return or_ancestor_;\n  }\n\n  QueryNode::Ptr detach_from_parent();\n\n  QueryNode::Ptr replace_from_parent(QueryNode::Ptr new_query_node);\n\n  QueryNode::Ptr replace_from_search_cond(QueryNode::Ptr new_query_node,\n                                          QueryInfo *query_info);\n\n  QueryNode::Ptr detach_from_search_cond(QueryInfo *query_info_ptr);\n\n  QueryNode::Ptr detach_from_invert_cond(QueryInfo *query_info_ptr);\n\n  virtual std::string text() const override;\n\n  virtual void set_text(std::string /*new_val*/) {\n    /* for QueryConstantNode only */\n    return;\n  }\n\n  std::optional<bool> predictate_result() const {\n    return predictate_result_;\n  }\n  void set_predictate_result(bool result) {\n    predictate_result_ = result;\n  }\n\n protected:\n  void set_type_by_op();\n\n protected:\n  QueryNodeType type_{QueryNodeType::NO_TYPE};\n\n private:\n  uint32_t level_{0};\n  bool or_ancestor_{false};\n  // evaluation result of predication, maybe true, false or unknown\n  std::optional<bool> predictate_result_{std::nullopt};\n};\n\nclass QueryVectorMatrixNode : public QueryNode {\n public:\n  using Ptr = std::shared_ptr<QueryVectorMatrixNode>;\n\n  QueryVectorMatrixNode(std::shared_ptr<VectorMatrixNode> node)\n      : node_(std::move(node)) {}\n\n  std::string text() const override;\n\n  const std::string &matrix() const {\n    return node_->matrix();\n  }\n\n  const std::string &sparse_indices() const {\n    return node_->sparse_indices();\n  }\n\n  const std::string &sparse_values() const {\n    return node_->sparse_values();\n  }\n\n  const QueryParams::Ptr &query_params() const {\n    return node_->query_params();\n  }\n\n private:\n  std::shared_ptr<const VectorMatrixNode> node_{nullptr};\n};\n\nclass QueryConstantNode : public QueryNode {\n public:\n  using Ptr = std::shared_ptr<QueryConstantNode>;\n\n  QueryConstantNode(const std::string &m_value);\n\n  std::string value();\n  std::string text() const override;\n\n  void set_text(std::string new_val) override;\n\n private:\n  std::string value_;\n};\n\nclass QueryIDNode : public QueryNode {\n public:\n  using Ptr = std::shared_ptr<QueryIDNode>;\n\n  QueryIDNode(const std::string &m_value);\n\n  void set_value(const std::string &m_value);\n\n  std::string value();\n  std::string text() const override;\n\n  bool is_matched(const QueryNode &other) const override;\n\n private:\n  std::string value_;\n};\n\nclass QueryFuncNode : public QueryNode {\n  enum class QueryFuncType { FEATURE = 0, NON_FEATURE = 1 };\n\n public:\n  using Ptr = std::shared_ptr<QueryFuncNode>;\n\n  QueryFuncNode();\n  virtual ~QueryFuncNode() = default;\n\n  void set_func_name_node(QueryNode::Ptr func_name_node);\n  const QueryNode::Ptr &get_func_name_node() const;\n\n  std::string get_func_name() const {\n    return func_name_node_->text();\n  }\n\n  void add_argument(QueryNode::Ptr argument_node);\n  const std::vector<QueryNode::Ptr> &arguments() const;\n\n  std::string text() const override;\n  bool is_feature_func() {\n    return func_type_ == QueryFuncType::FEATURE;\n  }\n\n  bool is_matched(const QueryNode &other) const override;\n\n private:\n  QueryNode::Ptr func_name_node_{nullptr};\n  std::vector<QueryNode::Ptr> arguments_{};\n  QueryFuncType func_type_{QueryFuncType::FEATURE};\n};\n\nclass QueryRelNode : public QueryNode {\n public:\n  using Ptr = std::shared_ptr<QueryRelNode>;\n\n  enum class RelType { NO_TYPE, FEATURE, INVERT, FORWARD };\n\n  QueryRelNode();\n\n  bool is_feature() const {\n    return rel_type_ == RelType::FEATURE;\n  }\n  bool is_invert() const {\n    return rel_type_ == RelType::INVERT;\n  }\n  bool is_forward() const {\n    return rel_type_ == RelType::FORWARD;\n  }\n\n  void set_vector() {\n    rel_type_ = RelType::FEATURE;\n  }\n  void set_invert() {\n    rel_type_ = RelType::INVERT;\n  }\n  void set_forward() {\n    rel_type_ = RelType::FORWARD;\n  }\n\n  void set_rel_type(RelType value);\n  RelType rel_type();\n\n  std::string text() const override;\n\n  bool rule_result() {\n    return rule_result_;\n  }\n\n  void set_rule_result(bool result) {\n    rule_result_ = result;\n  }\n\n private:\n  RelType rel_type_{RelType::NO_TYPE};\n  // rule result is intermediate result for evalute rules\n  bool rule_result_{false};\n};\n\nclass QueryListNode : public QueryNode {\n public:\n  using Ptr = std::shared_ptr<QueryListNode>;\n\n  QueryListNode() {\n    set_op(QueryNodeOp::Q_LIST_VALUE);\n  }\n\n  void add_value_expr(QueryNode::Ptr value_expr) {\n    value_expr_list_.emplace_back(std::move(value_expr));\n  }\n\n  const std::vector<QueryNode::Ptr> &value_expr_list() const {\n    return value_expr_list_;\n  }\n\n  bool exclude() const {\n    return exclude_;\n  }\n\n  void set_exclude(bool val) {\n    exclude_ = val;\n  }\n\n  std::string text() const override;\n\n  std::vector<std::string> to_value_list();\n\n private:\n  std::vector<QueryNode::Ptr> value_expr_list_{};\n  bool exclude_{false};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_node_walker.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_node_walker.h\"\n#include <cstddef>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/db/index_params.h>\n#include <zvec/db/type.h>\n#include \"db/common/constants.h\"\n#include \"db/index/common/type_helper.h\"\n#include \"db/sqlengine/analyzer/query_node.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"query_info_helper.h\"\n\nnamespace zvec::sqlengine {\n\ninline bool is_numeric_type(zvec::DataType data_type) {\n  // include INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE\n  // use following code to reduce the runtime comparison cost\n  return (data_type >= zvec::DataType::INT32 &&\n          data_type <= zvec::DataType::DOUBLE);\n}\n\nSearchCondCheckWalker::SearchCondCheckWalker(const CollectionSchema &table_ptr)\n    : table_ptr_(table_ptr) {}\n\nControlOp SearchCondCheckWalker::traverse_cond_node(\n    const QueryNode::Ptr &query_node, bool or_ancestor) {\n  if (query_node == nullptr) {\n    return ControlOp::BREAK;\n  }\n\n  ControlOp ret = access(query_node, or_ancestor);\n  if (ret == ControlOp::BREAK) {\n    // finish traversing\n    return ControlOp::BREAK;\n  }\n\n  if (query_node->op() == QueryNodeOp::Q_OR) {\n    or_ancestor = true;\n  }\n\n  if (query_node->left() != nullptr) {\n    ControlOp ret2 = traverse_cond_node(query_node->left(), or_ancestor);\n    if (ret2 == ControlOp::BREAK) {\n      return ControlOp::BREAK;\n    }\n  }\n  if (query_node->right() != nullptr) {\n    ControlOp ret2 = traverse_cond_node(query_node->right(), or_ancestor);\n    if (ret2 == ControlOp::BREAK) {\n      return ControlOp::BREAK;\n    }\n  }\n\n  return ControlOp::CONTINUE;\n}\n\nControlOp SearchCondCheckWalker::access(const QueryNode::Ptr &query_node,\n                                        bool or_ancestor) {\n  // set all types of child node or ancestor if it does,\n  // besides query_rel_node, mainly for logic node invert_subroot_node_\n  if (or_ancestor) {\n    query_node->set_or_ancestor();\n  }\n\n  if (query_node->type() != QueryNode::QueryNodeType::REL_EXPR) {\n    return ControlOp::CONTINUE;\n  }\n\n  const QueryRelNode::Ptr &query_rel_node =\n      std::dynamic_pointer_cast<QueryRelNode>(query_node);\n\n  const QueryNode::Ptr &left = query_rel_node->left();\n  const QueryNode::Ptr &right = query_rel_node->right();\n\n  // left side must be single field name or function\n  if (left->op() != QueryNodeOp::Q_ID &&\n      left->op() != QueryNodeOp::Q_FUNCTION_CALL) {\n    err_msg_ =\n        \"left side in relation expr must be single field name or function \"\n        \"call. \" +\n        query_node->text();\n    return ControlOp::BREAK;\n  }\n\n  if (left->op() == QueryNodeOp::Q_FUNCTION_CALL) {\n    if (!left_op_func_check(query_rel_node)) {\n      return ControlOp::BREAK;\n    }\n    return ControlOp::CONTINUE;\n  }\n\n  // right side support constant value only\n  if (right->type() != QueryNode::QueryNodeType::CONST &&\n      right->type() != QueryNode::QueryNodeType::FUNC) {\n    err_msg_ =\n        \"right side in relation expr support constant value or function \"\n        \"only. \" +\n        query_node->text();\n    return ControlOp::BREAK;\n  }\n\n  // Function check\n  if (right->type() == QueryNode::QueryNodeType::FUNC) {\n    if (func_check(right) != 0) {\n      return ControlOp::BREAK;\n    }\n  }\n\n  // In phrase check, IN only work with list value\n  if (query_node->op() == QueryNodeOp::Q_IN) {\n    if (right->op() != QueryNodeOp::Q_LIST_VALUE) {\n      err_msg_ =\n          \"In rel expr only works with list value. \" + query_node->text();\n      return ControlOp::BREAK;\n    }\n    QueryListNode::Ptr list_node =\n        std::dynamic_pointer_cast<QueryListNode>(right);\n    if (list_node->value_expr_list().size() > 20000) {\n      err_msg_ = \"In rel expr only support list size no more than 20000 \" +\n                 query_node->text();\n      return ControlOp::BREAK;\n    }\n  }\n\n  std::string field_name = left->text();\n\n  const zvec::FieldSchema *vector_field =\n      table_ptr_.get_vector_field(field_name);\n\n  // check vector index cond\n  if (vector_field != nullptr) {\n    // vector supports eq only.\n    if (query_node->op() != QueryNodeOp::Q_EQ) {\n      err_msg_ = ailego::StringHelper::Concat(\"vector field only support EQ. \",\n                                              query_rel_node->text());\n      return ControlOp::BREAK;\n    }\n    // more than one vector query check.\n    if (vector_rel_ != NULL) {\n      err_msg_ = ailego::StringHelper::Concat(\n          \"more than one vector search is not supported. \", vector_rel_->text(),\n          \" \", query_rel_node->text());\n      return ControlOp::BREAK;\n    }\n    vector_rel_ = query_rel_node.get();\n    query_rel_node->set_vector();\n    // arrive here, it is a index condition.\n    return ControlOp::CONTINUE;\n  }\n\n  const zvec::FieldSchema *forward_field =\n      table_ptr_.get_forward_field(field_name);\n  // field must have schema\n  if (!forward_field) {\n    err_msg_ = ailego::StringHelper::Concat(\"field not found in table schema: \",\n                                            query_rel_node->text());\n    return ControlOp::BREAK;\n  }\n\n  // only string field or is null allow empty string value\n  if (right->text().empty() &&\n      (forward_field->element_data_type() != DataType::STRING &&\n       query_node->op() != QueryNodeOp::Q_IS_NULL &&\n       query_node->op() != QueryNodeOp::Q_IS_NOT_NULL)) {\n    err_msg_ = ailego::StringHelper::Concat(\n        \"right side in relation expr is empty: \", query_node->text());\n    return ControlOp::BREAK;\n  }\n\n  if (query_node->op() == QueryNodeOp::Q_IS_NULL ||\n      query_node->op() == QueryNodeOp::Q_IS_NOT_NULL) {\n    if (forward_field->index_params() != nullptr) {\n      add_invert_filter(query_rel_node.get());\n    } else {\n      add_forward_filter(query_rel_node.get(), field_name);\n    }\n    return ControlOp::CONTINUE;\n  }\n\n  // Like phrase check\n  if (query_node->op() == QueryNodeOp::Q_LIKE) {\n    if (!check_like(*forward_field, query_rel_node.get())) {\n      return ControlOp::BREAK;\n    }\n    return ControlOp::CONTINUE;\n  }\n\n\n  // invert index analysis, if field exists on both forward and index,\n  // as long as the cond conform to index cond criteria,\n  // it is regarded as index cond, not forward cond.\n  if (forward_field->index_params() != nullptr) {\n    if (const auto ret = check_array_and_contain_compatible(\n            query_rel_node, forward_field, true);\n        ret != std::nullopt) {\n      return ret.value();\n    }\n    // data type of index only support string, numeric and vector, and:\n    // string supports all op,\n    const auto field_data_type = forward_field->element_data_type();\n    const bool is_string_field = field_data_type == zvec::DataType::STRING;\n    // numeric supports all op except like, ( as well as bool )\n    const bool is_numeric_field_without_like =\n        query_node->op() != QueryNodeOp::Q_LIKE &&\n        (is_numeric_type(field_data_type) ||\n         field_data_type == zvec::DataType::BOOL);\n\n    // if not satisfy, fall back to forward analysis\n    if (is_string_field || is_numeric_field_without_like) {\n      if (!check_and_convert_value_type(field_data_type, right)) {\n        err_msg_ = ailego::StringHelper::Concat(\n            \"field type and value type not match in relation expr. \",\n            query_rel_node->text());\n        return ControlOp::BREAK;\n      }\n\n      // bool op check\n      if (field_data_type == zvec::DataType::BOOL) {\n        if (query_node->op() != QueryNodeOp::Q_EQ &&\n            query_node->op() != QueryNodeOp::Q_NE) {\n          err_msg_ = \"bool type only support EQ and NQ\";\n          return ControlOp::BREAK;\n        }\n      }\n\n      add_invert_filter(query_rel_node.get());\n      // arrive here, it is a index condition.\n      return ControlOp::CONTINUE;\n    }\n  }\n\n  // compared with index_field's check, following check for forward_field:\n  // 1. support BINARY type\n  // 2. validate `like op only on str` in body instead of `if` condition block\n  // 3. use check_and_convert_value_type() instead of field_type_vs_value_type()\n  //        to convert numeric values to str & not support BINARY\n  if (forward_field != nullptr) {\n    if (const auto ret = check_array_and_contain_compatible(\n            query_rel_node, forward_field, false);\n        ret != std::nullopt) {\n      return ret.value();\n    }\n    // data type of forward only support binary, string, bool, int and float\n    if (forward_field->element_data_type() == zvec::DataType::BINARY ||\n        forward_field->element_data_type() == zvec::DataType::STRING ||\n        forward_field->element_data_type() == zvec::DataType::BOOL ||\n        is_numeric_type(forward_field->element_data_type())) {\n      if (!field_type_vs_value_type(forward_field->element_data_type(),\n                                    right)) {\n        err_msg_ = ailego::StringHelper::Concat(\n            \"forward field type and value type not match in relation expr. \",\n            query_rel_node->text());\n        return ControlOp::BREAK;\n      }\n\n      // bool op check\n      if (forward_field->element_data_type() == zvec::DataType::BOOL) {\n        if (query_node->op() != QueryNodeOp::Q_EQ &&\n            query_node->op() != QueryNodeOp::Q_NE) {\n          err_msg_ = \"bool type only support EQ and NQ\";\n          return ControlOp::BREAK;\n        }\n      }\n\n      // like only works on string\n      if (query_node->op() == QueryNodeOp::Q_LIKE &&\n          forward_field->element_data_type() != zvec::DataType::STRING) {\n        err_msg_ = \"operator LIKE only works on string\";\n        return ControlOp::BREAK;\n      }\n\n      add_forward_filter(query_rel_node.get(), field_name);\n      // arrive here, it is a forward.\n      return ControlOp::CONTINUE;\n    } else {\n      err_msg_ = ailego::StringHelper::Concat(\n          \"unsupported data type in relation expr: \", query_rel_node->text());\n      return ControlOp::BREAK;\n    }\n  } else {\n    if (right->op() == QueryNodeOp::Q_VECTOR_MATRIX_VALUE) {\n      err_msg_ = ailego::StringHelper::Concat(\n          \"vector vector not supported for schema free field in relation \"\n          \"expr: \",\n          query_rel_node->text());\n      return ControlOp::BREAK;\n    }\n    if (right->type() != QueryNode::QueryNodeType::CONST) {\n      err_msg_ = ailego::StringHelper::Concat(\n          \"only support const for schema free field in relation expr: \",\n          query_rel_node->text());\n      return ControlOp::BREAK;\n    }\n    add_forward_filter(query_rel_node.get(), field_name);\n    // treat as schema free field forward\n    return ControlOp::CONTINUE;\n  }\n}\n\nint SearchCondCheckWalker::func_check(const QueryNode::Ptr &func_node) {\n  const QueryFuncNode::Ptr &func_node_ptr =\n      std::dynamic_pointer_cast<QueryFuncNode>(func_node);\n  const QueryNode::Ptr &func_name_node_ptr =\n      func_node_ptr->get_func_name_node();\n  /* function must be feature */\n  std::string func_name = func_name_node_ptr->text();\n  if (func_name != kFeature) {\n    err_msg_ = \"Function is not supported. \" + func_name;\n    return -1;\n  }\n  size_t size = func_node_ptr->arguments().size();\n  if (size < 1 || size > 4) {\n    err_msg_ = \"vector function has wrong number of arguments. \";\n    return -1;\n  }\n  // do not check arguments here, check during vector transforming\n  return 0;\n}\n\ntl::expected<void, std::string> SearchCondCheckWalker::array_length_func_check(\n    const QueryFuncNode::Ptr &func_node_ptr,\n    const QueryRelNode::Ptr &query_node) {\n  const auto &arguments = func_node_ptr->arguments();\n  if (arguments.size() != 1) {\n    return tl::make_unexpected(\n        \"array_length function should have only one argument. \");\n  }\n  auto &arg0 = arguments[0];\n  if (arg0->op() != QueryNodeOp::Q_ID) {\n    return tl::make_unexpected(\n        \"array_length function argument must be a field name, got \" +\n        arg0->op_name());\n  }\n  auto *arg0_schema = table_ptr_.get_field(arg0->text());\n  if (arg0_schema == nullptr) {\n    return tl::make_unexpected(\n        \"array_length argument not found in schema, with \" + arg0->text());\n  }\n  if (!arg0_schema->is_array_type()) {\n    return tl::make_unexpected(\n        \"array_length only support array, got \" +\n        DataTypeCodeBook::AsString(arg0_schema->data_type()));\n  }\n  if (!is_arithematic_compare_op(query_node->op())) {\n    return tl::make_unexpected(\n        \"array_length only support arithematic \"\n        \"compare op, got \" +\n        query_node->op_name());\n  }\n  // only allow integer\n  auto &right_node = query_node->right();\n  if (right_node->op() != QueryNodeOp::Q_INT_VALUE) {\n    return tl::make_unexpected(\n        \"array_length right side only support integer, got \" +\n        right_node->op_name());\n  }\n\n  if (arg0_schema->index_params() != nullptr) {\n    if (!check_and_convert_value_type(DataType::UINT32, right_node)) {\n      return tl::make_unexpected(\n          \"array_length right side only support integer, got \" +\n          right_node->op_name());\n    }\n    add_invert_filter(query_node.get());\n  } else {\n    add_forward_filter(query_node.get(), arg0->text());\n  }\n\n  return {};\n}\n\nbool SearchCondCheckWalker::is_arithematic_compare_op(QueryNodeOp op) {\n  return op == QueryNodeOp::Q_EQ || op == QueryNodeOp::Q_NE ||\n         op == QueryNodeOp::Q_GT || op == QueryNodeOp::Q_GE ||\n         op == QueryNodeOp::Q_LT || op == QueryNodeOp::Q_LE;\n}\n\nbool SearchCondCheckWalker::left_op_func_check(\n    const QueryRelNode::Ptr &query_node) {\n  const QueryFuncNode::Ptr &func_node_ptr =\n      std::dynamic_pointer_cast<QueryFuncNode>(query_node->left());\n  const QueryNode::Ptr &func_name_node_ptr =\n      func_node_ptr->get_func_name_node();\n  /* function must be feature */\n  std::string func_name = func_name_node_ptr->text();\n  tl::expected<void, std::string> res;\n  if (func_name == kFuncArrayLength) {\n    res = array_length_func_check(func_node_ptr, query_node);\n  } else {\n    err_msg_ = \"Function is not supported. \" + func_name;\n    return false;\n  }\n  if (!res.has_value()) {\n    err_msg_ = res.error();\n    return false;\n  }\n  return true;\n}\n\nbool SearchCondCheckWalker::check_like(const zvec::FieldSchema &field,\n                                       QueryRelNode *query_node) {\n  auto *like_value_node = query_node->right_node();\n  if (like_value_node->op() != QueryNodeOp::Q_STRING_VALUE) {\n    err_msg_ = \"like phrase only support string now.\";\n    return false;\n  }\n  std::string field_name = query_node->left_node()->text();\n  const InvertIndexParams *param =\n      dynamic_cast<InvertIndexParams *>(field.index_params().get());\n  if (param == nullptr) {\n    add_forward_filter(query_node, std::move(field_name));\n    return true;\n  }\n  int percent_count = 0;\n  int underscore_count = 0;\n  std::string text = like_value_node->text();\n  size_t percent_loc = std::string::npos;\n  for (size_t i = 0; i < text.size(); i++) {\n    char c = text[i];\n    if (c == '\\\\') {\n      // just ignore next character\n      i++;\n      continue;\n    }\n    if (c == '%') {\n      percent_count++;\n      percent_loc = i;\n    } else if (c == '_') {\n      underscore_count++;\n    }\n  }\n  // invert support at most one '%', not support '_'\n  if (percent_count > 1 || underscore_count > 0) {\n    add_forward_filter(query_node, std::move(field_name));\n    return true;\n  }\n  // invert only support % at the end if extended wildcard is not enabled\n  if (param->enable_extended_wildcard() || percent_loc == text.size() - 1) {\n    add_invert_filter(query_node);\n  } else {\n    add_forward_filter(query_node, std::move(field_name));\n  }\n  return true;\n}\n\nbool SearchCondCheckWalker::field_type_vs_value_type(\n    zvec::DataType data_type, const QueryNode::Ptr &node) {\n  QueryNodeOp value_type = node->op();\n  if (value_type == QueryNodeOp::Q_LIST_VALUE) {\n    return field_type_vs_list_value_type(data_type, node);\n  }\n\n  if ((data_type == zvec::DataType::BINARY ||\n       data_type == zvec::DataType::STRING) &&\n      value_type != QueryNodeOp::Q_STRING_VALUE) {\n    return false;\n  }\n  if (data_type == zvec::DataType::BOOL &&\n      value_type != QueryNodeOp::Q_BOOL_VALUE) {\n    return false;\n  }\n  if ((data_type == zvec::DataType::INT32 ||\n       data_type == zvec::DataType::INT64 ||\n       data_type == zvec::DataType::UINT32 ||\n       data_type == zvec::DataType::UINT64) &&\n      value_type != QueryNodeOp::Q_INT_VALUE) {\n    return false;\n  }\n  if ((data_type == zvec::DataType::FLOAT ||\n       data_type == zvec::DataType::DOUBLE) &&\n      (value_type != QueryNodeOp::Q_FLOAT_VALUE &&\n       value_type != QueryNodeOp::Q_INT_VALUE)) {\n    return false;\n  }\n\n  if (zvec::FieldSchema::is_vector_field(data_type)) {\n    if (value_type != QueryNodeOp::Q_VECTOR_MATRIX_VALUE &&\n        value_type != QueryNodeOp::Q_FUNCTION_CALL) {\n      return false;\n    }\n    if (value_type == QueryNodeOp::Q_FUNCTION_CALL) {\n      QueryFuncNode::Ptr func_node =\n          std::dynamic_pointer_cast<QueryFuncNode>(node);\n      if (!func_node->is_feature_func()) {\n        return false;\n      }\n    }\n  }\n\n  return true;\n}\n\nbool SearchCondCheckWalker::field_type_vs_list_value_type(\n    zvec::DataType data_type, const QueryNode::Ptr &node) {\n  /* list value only support field with data type string, numeric and bool */\n  if (!(data_type == zvec::DataType::STRING || is_numeric_type(data_type) ||\n        data_type == zvec::DataType::BOOL)) {\n    return false;\n  }\n\n  QueryListNode::Ptr list_node = std::dynamic_pointer_cast<QueryListNode>(node);\n  for (auto &value : list_node->value_expr_list()) {\n    // recursively call single value check and convert\n    if (bool ret = field_type_vs_value_type(data_type, value); !ret) {\n      return false;\n    }\n  }\n\n  return true;\n}\n\n// use for invert index, compared with field_type_vs_value_type:\n// 1. not support DataType::BINARY, for the invert index doesn't support it\n// 2. convert numeric to str, for the invert index is based on text\nbool SearchCondCheckWalker::check_and_convert_value_type(\n    zvec::DataType data_type, const QueryNode::Ptr &node) {\n  QueryNodeOp value_type = node->op();\n\n  if (value_type == QueryNodeOp::Q_LIST_VALUE) {\n    return check_and_convert_list_value_type(data_type, node);\n  }\n\n  if (data_type == zvec::DataType::STRING &&\n      value_type != QueryNodeOp::Q_STRING_VALUE) {\n    return false;\n  }\n\n  if (data_type == zvec::DataType::BOOL &&\n      value_type != QueryNodeOp::Q_BOOL_VALUE) {\n    return false;\n  }\n\n  if ((data_type == zvec::DataType::INT32 ||\n       data_type == zvec::DataType::INT64 ||\n       data_type == zvec::DataType::UINT32 ||\n       data_type == zvec::DataType::UINT64) &&\n      value_type != QueryNodeOp::Q_INT_VALUE) {\n    return false;\n  }\n\n  if ((data_type == zvec::DataType::FLOAT ||\n       data_type == zvec::DataType::DOUBLE) &&\n      (value_type != QueryNodeOp::Q_FLOAT_VALUE &&\n       value_type != QueryNodeOp::Q_INT_VALUE)) {\n    return false;\n  }\n\n  if (zvec::FieldSchema::is_vector_field(data_type)) {\n    if (value_type != QueryNodeOp::Q_VECTOR_MATRIX_VALUE &&\n        value_type != QueryNodeOp::Q_FUNCTION_CALL) {\n      return false;\n    }\n    if (value_type == QueryNodeOp::Q_FUNCTION_CALL) {\n      QueryFuncNode::Ptr func_node =\n          std::dynamic_pointer_cast<QueryFuncNode>(node);\n      if (!func_node->is_feature_func()) {\n        return false;\n      }\n    }\n  }\n\n  if (is_numeric_type(data_type)) {\n    std::string numeric_buf;\n    if (!QueryInfoHelper::text_2_data_buf(node->text(), data_type,\n                                          &numeric_buf)) {\n      return false;\n    }\n    node->set_text(std::move(numeric_buf));\n  }\n\n  return true;\n}\n\nbool SearchCondCheckWalker::check_and_convert_list_value_type(\n    zvec::DataType data_type, const QueryNode::Ptr &node) {\n  /* list value only support field with data type string and numeric */\n  if (!(data_type == zvec::DataType::STRING || is_numeric_type(data_type) ||\n        data_type == DataType::BOOL)) {\n    return false;\n  }\n\n  QueryListNode::Ptr list_node = std::dynamic_pointer_cast<QueryListNode>(node);\n  for (auto &value : list_node->value_expr_list()) {\n    // recursively call single value check and convert\n    if (bool ret = check_and_convert_value_type(data_type, value); !ret) {\n      return false;\n    }\n  }\n\n  return true;\n}\n\n// RULEs for contain_* operator & array_* data type\n// 1. **only** array__dt supports contain_* op\n//          && array__dt **only** supports contain_* op\n// 2. right hand value should be a list\n// 3. list size should be no more than MAX_ARRAY_FIELD_LEN\n// 4. list value type should be same as index field's sub type\n//    e.g., array_int32 containing a list of int64 is invalid\n// 5. following the restriction of `in`, only string & numeric list is allowed\n// 6. (same with other field) if field exists on both forward and index,\n//  the cond should be index one, aka invert index has higher priority\nstd::optional<ControlOp>\nSearchCondCheckWalker::check_array_and_contain_compatible(\n    const QueryRelNode::Ptr &query_rel_node, const FieldSchema *field,\n    bool is_invert_field) {\n  const QueryNode::Ptr &left = query_rel_node->left();\n  const QueryNode::Ptr &right = query_rel_node->right();\n\n  const bool is_contain_op =\n      query_rel_node->op() == QueryNodeOp::Q_CONTAIN_ALL ||\n      query_rel_node->op() == QueryNodeOp::Q_CONTAIN_ANY;\n\n  // not check here\n  if (!(field->is_array_type() || is_contain_op)) {\n    return {};\n  }\n\n  // rule 1, which can be expressed in an alternative way:\n  // is_array & is_contain_op must have same value\n  if (field->is_array_type() ^ is_contain_op) {\n    err_msg_ = ailego::StringHelper::Concat(\n        \"Contain_* rel expr only works with array data type and \"\n        \"array data type only works with contain_* op. filter: \",\n        query_rel_node->text());\n    return ControlOp::BREAK;\n  }\n  // rule 2\n  if (right->op() != QueryNodeOp::Q_LIST_VALUE) {\n    err_msg_ = ailego::StringHelper::Concat(\n        \"Contain_* rel expr only works with list value. filter: \",\n        query_rel_node->text());\n    return ControlOp::BREAK;\n  }\n  // rule 3\n  QueryListNode::Ptr list_node =\n      std::dynamic_pointer_cast<QueryListNode>(right);\n  if (list_node->value_expr_list().size() > MAX_ARRAY_FIELD_LEN) {\n    err_msg_ = ailego::StringHelper::Concat(\n        \"Contain_* rel expr only support list size no more than \",\n        ailego::StringHelper::ToString(MAX_ARRAY_FIELD_LEN), \": \",\n        query_rel_node->text());\n    return ControlOp::BREAK;\n  }\n\n  // rule 4, check if list value type matches field's sub type\n  // rule 5 is enforced by check_and_convert_value_type(), inside which\n  // will call check_and_convert_list_value_type() to constrain\n  // the list value type\n  // Similarly to field_type_vs_value_type() func for forward index\n  if (!(is_invert_field\n            ? check_and_convert_value_type(field->element_data_type(), right)\n            : field_type_vs_value_type(field->element_data_type(), right))) {\n    err_msg_ = ailego::StringHelper::Concat(\n        \"field type and value type not match in relation expr. \",\n        query_rel_node->text());\n    return ControlOp::BREAK;\n  }\n\n  // pass all these checks\n  if (is_invert_field) {\n    add_invert_filter(query_rel_node.get());\n  } else {\n    add_forward_filter(query_rel_node.get(), left->text());\n  }\n  return ControlOp::CONTINUE;\n}\n\nvoid SearchCondCheckWalker::add_forward_filter(QueryRelNode *query_rel_node,\n                                               std::string forward_field_name) {\n  forward_filter_field_names_.emplace_back(std::move(forward_field_name));\n  filter_rels_.push_back(query_rel_node);\n  query_rel_node->set_forward();\n}\n\nvoid SearchCondCheckWalker::add_invert_filter(QueryRelNode *query_rel_node) {\n  invert_rels_.push_back(query_rel_node);\n  query_rel_node->set_invert();\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_node_walker.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <optional>\n#include <string>\n#include <vector>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/db/type.h>\n#include \"db/sqlengine/analyzer/query_node.h\"\n#include \"query_info.h\"\n\nnamespace zvec::sqlengine {\n\nenum class ControlOp { CONTINUE, BREAK };\n\nclass SearchCondCheckWalker {\n public:\n  SearchCondCheckWalker(const zvec::CollectionSchema &table_ptr);\n  ControlOp traverse_cond_node(const QueryNode::Ptr &query_node,\n                               bool or_ancestor = false);\n\n\n  const std::vector<std::string> &forward_filter_field_names() {\n    return forward_filter_field_names_;\n  }\n\n  QueryRelNode *vector_rel() const {\n    return vector_rel_;\n  }\n\n  const std::vector<QueryRelNode *> &invert_rels() const {\n    return invert_rels_;\n  }\n\n  const std::vector<QueryRelNode *> &filter_rels() const {\n    return filter_rels_;\n  }\n\n  const std::string err_msg() {\n    return err_msg_;\n  }\n\n private:\n  ControlOp access(const QueryNode::Ptr &query_node, bool or_ancestor);\n\n  std::optional<ControlOp> check_array_and_contain_compatible(\n      const QueryRelNode::Ptr &query_rel_node, const FieldSchema *field,\n      bool is_invert_field);\n\n  int func_check(const QueryNode::Ptr &func_node);\n  bool left_op_func_check(const QueryRelNode::Ptr &query_node);\n  tl::expected<void, std::string> array_length_func_check(\n      const QueryFuncNode::Ptr &func_node, const QueryRelNode::Ptr &query_node);\n  bool is_arithematic_compare_op(QueryNodeOp op);\n  bool check_like(const zvec::FieldSchema &field, QueryRelNode *query_node);\n\n  bool field_type_vs_value_type(zvec::DataType data_type,\n                                const QueryNode::Ptr &node);\n\n  bool field_type_vs_list_value_type(zvec::DataType data_type,\n                                     const QueryNode::Ptr &node);\n\n  bool check_and_convert_value_type(zvec::DataType data_type,\n                                    const QueryNode::Ptr &node);\n\n  bool check_and_convert_list_value_type(zvec::DataType data_type,\n                                         const QueryNode::Ptr &node);\n  void add_forward_filter(QueryRelNode *query_rel_node,\n                          std::string forward_field_name);\n  void add_invert_filter(QueryRelNode *query_rel_node);\n\n private:\n  std::string err_msg_;\n  const CollectionSchema &table_ptr_;\n  std::vector<std::string> forward_filter_field_names_{};\n\n  QueryRelNode *vector_rel_{nullptr};\n  std::vector<QueryRelNode *> filter_rels_{};\n  std::vector<QueryRelNode *> invert_rels_{};\n\n  static inline const std::string kFeature = \"feature\";\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_orderby_info.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_orderby_info.h\"\n\nnamespace zvec::sqlengine {\n\nQueryOrderbyInfo::QueryOrderbyInfo() {}\n\nQueryOrderbyInfo::QueryOrderbyInfo(const std::string &m_field_name, bool m_desc)\n    : field_name_(m_field_name), desc_(m_desc) {}\n\n\nstd::string QueryOrderbyInfo::to_string() const {\n  std::string str = field_name_;\n  str = str + \" \" + (desc_ ? \"DESC\" : \"ASC\");\n  return str;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/query_orderby_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <zvec/db/schema.h>\n\nnamespace zvec::sqlengine {\n\nclass QueryOrderbyInfo {\n public:\n  using Ptr = std::shared_ptr<QueryOrderbyInfo>;\n\n  QueryOrderbyInfo();\n  QueryOrderbyInfo(const std::string &m_field_name, bool m_desc);\n  ~QueryOrderbyInfo() = default;\n\n  void set_field_name(const std::string &value) {\n    field_name_ = value;\n  }\n\n  const std::string &field_name() const {\n    return field_name_;\n  }\n\n  void set_desc() {\n    desc_ = true;\n  }\n  bool is_desc() const {\n    return desc_;\n  }\n\n  void set_field_schema_ptr(const zvec::FieldSchema *field_schema_ptr) {\n    field_schema_ptr_ = field_schema_ptr;\n  }\n  const zvec::FieldSchema *field_schema_ptr() {\n    return field_schema_ptr_;\n  }\n\n  std::string to_string() const;\n\n private:\n  std::string field_name_{\"\"};\n  bool desc_{false};\n\n  const zvec::FieldSchema *field_schema_ptr_;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/simple_rewriter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"simple_rewriter.h\"\n#include <array>\n#include <memory>\n#include <vector>\n#include \"db/sqlengine/analyzer/query_node.h\"\n\nnamespace zvec::sqlengine {\n\nvoid SimpleRewriter::rewrite(QueryInfo *query_info) {\n  auto query_node = query_info->search_cond();\n  if (query_node == nullptr) {\n    return;\n  }\n  std::string before_rewrite = query_node->text();\n\n  EqualOrRewriteRule equal_or_rule;\n  ContainRewriteRule contain_rule;\n  std::array<RewriteRule *, 2> rewrite_rules{\n      &equal_or_rule,\n      &contain_rule,\n  };\n  bool rewrited = false;\n  for (auto &rule : rewrite_rules) {\n    rewrited = rule->rewrite(query_node) || rewrited;\n  }\n  if (rewrited) {\n    simplify_tree(query_node, query_info);\n    std::string after_rewrite = query_info->search_cond()->text();\n    LOG_INFO(\"Rewrite filter. before[%s] after[%s]\", before_rewrite.c_str(),\n             after_rewrite.c_str());\n  }\n}\n\nvoid SimpleRewriter::simplify_tree(QueryNode::Ptr query_node,\n                                   QueryInfo *query_info) {\n  if (query_node == nullptr ||\n      query_node->type() != QueryNode::QueryNodeType::LOGIC_EXPR) {\n    return;\n  }\n  simplify_tree(query_node->left(), query_info);\n  simplify_tree(query_node->right(), query_info);\n  if (query_node->left() == nullptr) {\n    if (query_node->right() == nullptr) {\n      query_node->detach_from_search_cond(query_info);\n    } else {\n      query_node->replace_from_search_cond(query_node->right(), query_info);\n    }\n  } else {\n    if (query_node->right() == nullptr) {\n      query_node->replace_from_search_cond(query_node->left(), query_info);\n    }\n  }\n}\n\nbool EqualOrRewriteRule::rewrite(QueryNode::Ptr query_node) {\n  rewrite_impl(false, std::move(query_node));\n  return rewrited_;\n}\n\nvoid EqualOrRewriteRule::rewrite_impl(bool is_or, QueryNode::Ptr query_node) {\n  if (query_node == nullptr) {\n    return;\n  }\n  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {\n    bool is_cur_or = query_node->op() == QueryNodeOp::Q_OR;\n    if (!is_cur_or) {\n      cur_ = nullptr;\n    }\n    rewrite_impl(is_cur_or, query_node->left());\n    rewrite_impl(is_cur_or, query_node->right());\n    return;\n  }\n  if (!is_or) {\n    return;\n  }\n  if (query_node->op() == QueryNodeOp::Q_EQ ||\n      query_node->op() == QueryNodeOp::Q_NE) {\n    bool is_ne = query_node->op() == QueryNodeOp::Q_NE;\n    if (cur_ == nullptr || !cur_->left()->is_matched(*query_node->left())) {\n      cur_ = query_node;\n    } else {\n      if (cur_->op() == QueryNodeOp::Q_IN) {\n        QueryListNode::Ptr list =\n            std::dynamic_pointer_cast<QueryListNode>(cur_->right());\n        if (is_ne == list->exclude()) {\n          list->add_value_expr(query_node->right());\n          // detach from parent\n          query_node->detach_from_parent();\n        } else {\n          cur_ = query_node;\n        }\n      } else {  // EQ || NE\n        if (query_node->op() == cur_->op()) {\n          // create in node\n          QueryListNode::Ptr list = std::make_shared<QueryListNode>();\n          list->add_value_expr(cur_->right());\n          list->add_value_expr(query_node->right());\n          list->set_exclude(is_ne);\n          auto in_node = std::make_shared<QueryRelNode>();\n          in_node->set_left(cur_->left());\n          in_node->set_right(std::move(list));\n          in_node->set_op(QueryNodeOp::Q_IN);\n          // detach from parent\n          query_node->detach_from_parent();\n          cur_->replace_from_parent(in_node);\n          cur_ = std::move(in_node);\n          rewrited_ = true;\n        } else {\n          cur_ = query_node;\n        }\n      }\n    }\n  }\n}\n\nstd::optional<bool> get_predicate_result(const QueryNode *ptr) {\n  if (ptr == nullptr) {\n    return std::nullopt;\n  }\n  return ptr->predictate_result();\n}\n\nbool ContainRewriteRule::rewrite(QueryNode::Ptr query_node) {\n  if (query_node == nullptr) {\n    return false;\n  }\n  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {\n    bool rewrited = rewrite(query_node->left()) || rewrite(query_node->right());\n    auto left_result = get_predicate_result(query_node->left().get());\n    auto right_result = get_predicate_result(query_node->right().get());\n    // ContainRewrite can only generate false predict result value\n    if (left_result.has_value() || right_result.has_value()) {\n      if (query_node->op() == QueryNodeOp::Q_AND) {\n        query_node->set_predictate_result(false);\n      } else if (query_node->op() == QueryNodeOp::Q_OR) {\n        // if left is false\n        if (left_result.has_value()) {\n          // if right is null or false\n          if (right_result.has_value() || query_node->right() == nullptr) {\n            query_node->set_predictate_result(false);\n          } else {  // if right is not null and not false\n            query_node->left()->detach_from_parent();\n          }\n        } else {\n          if (right_result.has_value()) {\n            if (query_node->left() == nullptr) {\n              // set predict result to false if left is null\n              query_node->set_predictate_result(false);\n            } else {\n              // detach right if left is not null and not false\n              query_node->right()->detach_from_parent();\n            }\n          }\n        }\n      }\n    }\n    return rewrited;\n  }\n  auto op = query_node->op();\n  if (op != QueryNodeOp::Q_CONTAIN_ALL && op != QueryNodeOp::Q_CONTAIN_ANY) {\n    return false;\n  }\n  auto list_node =\n      std::dynamic_pointer_cast<QueryListNode>(query_node->right());\n  if (!list_node->value_expr_list().empty()) {\n    return false;\n  }\n  if ((list_node->exclude() && op == QueryNodeOp::Q_CONTAIN_ALL) ||\n      (!list_node->exclude() && op == QueryNodeOp::Q_CONTAIN_ANY)) {\n    // `not contain_all ()` evaluates to false\n    // `contain_any ()` evaluates to false\n    query_node->set_predictate_result(false);\n    return true;\n  }\n  // `contain_all()` or `not contain_any()` rewrite to `is not null`\n  query_node->set_op(QueryNodeOp::Q_IS_NOT_NULL);\n  auto right = std::make_shared<QueryConstantNode>(\"\");\n  right->set_op(QueryNodeOp::Q_NULL_VALUE);\n  query_node->set_right(std::move(right));\n  return true;\n}\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/analyzer/simple_rewriter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/sqlengine/analyzer/query_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass SimpleRewriter {\n public:\n  SimpleRewriter() = default;\n\n  //! Rewrite query_info->search_cond and simplify tree\n  void rewrite(QueryInfo *query_info);\n\n private:\n  void simplify_tree(QueryNode::Ptr query_node, QueryInfo *query_info);\n};\n\nclass RewriteRule {\n public:\n  RewriteRule() = default;\n  //! Rewrite filter, return whether successfully rewrited.\n  virtual bool rewrite(QueryNode::Ptr query_node) = 0;\n\n protected:\n  bool rewrited_{false};\n};\n\nclass EqualOrRewriteRule : public RewriteRule {\n public:\n  EqualOrRewriteRule() = default;\n\n  bool rewrite(QueryNode::Ptr query_node) override;\n\n private:\n  void rewrite_impl(bool is_or, QueryNode::Ptr query_node);\n\n private:\n  QueryNode::Ptr cur_;\n};\n\n// ContainRewriteRule rewrites contain_all/any ()\nclass ContainRewriteRule : public RewriteRule {\n public:\n  ContainRewriteRule() = default;\n\n  bool rewrite(QueryNode::Ptr query_node) override;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/antlr/SQLLexer.g4",
    "content": "lexer grammar SQLLexer;\n\nchannels{COMMENTS}\n\nOR:                           'OR';\nAND:                          'AND';\nNOT:                          'NOT';\nIN:                           'IN';\nCONTAIN_ALL:                  'CONTAIN_ALL';\nCONTAIN_ANY:                  'CONTAIN_ANY';\nBETWEEN:                      'BETWEEN';\nLIKE:                         'LIKE';\nWHERE:\t\t\t\t\t\t            'WHERE';\nSELECT:\t\t\t\t\t\t            'SELECT';\nFROM:                         'FROM';\nAS:\t\t\t\t\t\t\t              'AS';\nBY:\t\t\t\t\t\t\t              'BY';\nORDER:\t\t\t\t\t\t            'ORDER';\nASC:\t\t\t\t\t\t              'ASC';\nDESC:\t\t\t\t\t\t              'DESC';\nLIMIT:\t\t\t\t\t\t            'LIMIT';\nTRUE_V:                         'TRUE';\nFALSE_V:                        'FALSE';\nIS:                           'IS';\nNULL_V:                         'NULL';\n\nfragment\nUNSIGNED_INTEGER: UNSIGNED_INTEGER_FRAGMENT;\nINTEGER: MINUS_SIGN? UNSIGNED_INTEGER;\n\nfragment\nAPPROXIMATE_NUM_LIT: FLOAT_FRAGMENT ('E' ('+'|'-')? (FLOAT_FRAGMENT | UNSIGNED_INTEGER_FRAGMENT))? ('D' | 'F')?;\nFLOAT: MINUS_SIGN? APPROXIMATE_NUM_LIT;\n\nSQUOTA_STRING: '\\'' (~('\\'' | '\\\\') | '\\\\'. )* '\\'';\nDQUOTA_STRING: '\"' (~('\"' | '\\\\') | '\\\\'. )* '\"';\n\n\nDOT: '.';\nLP: '(';\nRP: ')';\nLMP: '[';\nRMP: ']';\nASTERISK: '*';\nPLUS_SIGN: '+';\nMINUS_SIGN: '-';\nCOMMA: ',';\nSOLIDUS: '/';\nMOD: '%';\nAT_SIGN: '@';\nASSIGN_OP: ':=';\nSHARP_SIGN: '#';\n\nCOLON: ':';\nSEMI: ';';\nLE_OP: '<=';\nGE_OP: '>=';\nNE_OP: '!=';\nCARET_OP: '^';\nTILDE_OP: '~';\nL_OP: '<';\nG_OP: '>';\nE_OP: '=';\nCONCAT_OP: '||';\nUNDERSCORE: '_';\n\nSPACES: [ \\t\\r\\n]+ -> skip;\n\nfragment\nSIMPLE_LETTER\n    : [A-Z]\n    ;\n\nfragment\nUNSIGNED_INTEGER_FRAGMENT: [0-9]+ ;\n\nfragment\nFLOAT_FRAGMENT\n    : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+\n    ;\n\n\nVECTOR\n    : LMP (MINUS_SIGN|UNSIGNED_INTEGER_FRAGMENT|FLOAT_FRAGMENT|','| SPACES)+ RMP\n    ;\n\nSINGLE_LINE_COMMENT: '--' ~('\\r' | '\\n')* (NEWLINE | EOF)   -> channel(COMMENTS);\nMULTI_LINE_COMMENT: '/*' .*? '*/'                           -> channel(COMMENTS);\n\nfragment\nNEWLINE: '\\r'? '\\n';\n\nREGULAR_ID: (SIMPLE_LETTER | '_' | '-' | [0-9])+;\n\n\n"
  },
  {
    "path": "src/db/sqlengine/antlr/SQLParser.g4",
    "content": "parser grammar SQLParser;\n\noptions {tokenVocab=SQLLexer;}\n\nswallow_to_semi\n    : ~SEMI+\n    ;\n\ncompilation_unit\n    : (unit_statement (SOLIDUS | SEMI)?)+ EOF\n    ;\n\nlogic_expr_unit\n    : logic_expr EOF\n    ;\n\nunit_statement\n    : dql_statement\n    ;\n\nwhere_clause\n    : WHERE logic_expr\n    ;\n\nlogic_expr\n    : relation_expr\n    | logic_expr AND logic_expr\n    | logic_expr OR logic_expr\n    | enclosed_expr\n    ;\n\nenclosed_expr\n    : LP logic_expr RP\n    ;\n\nrelation_expr\n    : identifier rel_oper value_expr\n    | identifier LIKE value_expr\n    | identifier NOT? IN LP in_value_expr_list RP\n     //LMP'[' RMP']' only used in vector representation\n    | identifier NOT? (CONTAIN_ALL | CONTAIN_ANY) LP in_value_expr_list? RP\n    | identifier IS NOT? NULL_V\n    | function_call rel_oper value_expr\n    ;\n\nrel_oper\n    : E_OP\n    | ne_op\n    | L_OP\n    | G_OP\n    | le_op\n    | ge_op\n    ;\n\nvalue_expr\n    : constant\n    | function_call\n    ;\n\nin_value_expr_list\n    : in_value_expr (COMMA in_value_expr)*\n    ;\n\nin_value_expr\n    : constant_num_and_str\n    | bool_value\n    ;\n\nconstant\n    : numeric\n    | quoted_string\n    | vector_expr\n    | bool_value\n    ;\n\nconstant_num_and_str\n    : numeric\n    | quoted_string\n    ;\n\nmatrix\n    : LMP VECTOR (COMMA VECTOR)* RMP\n    ;\n\nvector_expr\n    : VECTOR\n    | matrix\n    ;\n\nfunction_value_expr\n    : value_expr\n    | identifier\n    ;\n\nfunction_call\n    : identifier LP (function_value_expr (COMMA function_value_expr)*)? RP\n    ;\n\ndql_statement\n    : select_statement\n    ;\n\nselect_statement\n    : SELECT selected_elements from_clause where_clause? order_by_clause? limit_clause?\n    ;\n\nselected_elements\n    : selected_element (COMMA selected_element)*\n    ;\n\nselected_element\n    : ASTERISK\n    | field_name AS? field_alias?\n    ;\n\nfrom_clause\n    : FROM tableview_name\n    ;\n\norder_by_clause\n    : ORDER BY order_by_element (COMMA order_by_element)*\n    ;\n\norder_by_element\n    : field_name (ASC | DESC)?\n    ;\n\nlimit_clause\n    : LIMIT int_value\n    ;\n\n\n// $>\n\n/********* schema objects names *********/\n\n\ntableview_name\n    : identifier\n    ;\n\nfield_name\n    : identifier\n    ;\n\ntable_alias\n    : identifier\n    ;\n\nfield_alias\n    : AS? identifier\n    ;\n\nnumeric\n    : int_value\n    | float_value\n    ;\n\nint_value\n    : INTEGER\n    ;\n\nfloat_value\n    : FLOAT\n    ;\n\nquoted_string\n    : SQUOTA_STRING\n\t| DQUOTA_STRING\n    ;\nbool_value\n\t: TRUE_V\n\t| FALSE_V\n\t;\n\nidentifier\n    : regular_id\n    ;\n\nne_op\n    : NE_OP\n    ;\n\nge_op\n    : GE_OP\n    | G_OP E_OP\n    ;\n\nle_op\n    : LE_OP\n    | L_OP E_OP\n    ;\n\nregular_id\n    : REGULAR_ID\n\t| OR\n\t| AND\n\t| NOT\n\t| IN\n\t| BETWEEN\n\t| LIKE\n\t| WHERE\n\t| SELECT\n\t| AS\n\t| BY\n\t| ORDER\n\t| ASC\n\t| DESC\n\t| LIMIT\n\t;\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLLexer.cc",
    "content": "\n// Generated from SQLLexer.g4 by ANTLR 4.8\n\n\n#include \"SQLLexer.h\"\n\n\nusing namespace antlr4;\n\nusing namespace antlr4;\n\nSQLLexer::SQLLexer(CharStream *input) : Lexer(input) {\n  _interpreter = new atn::LexerATNSimulator(this, _atn, _decisionToDFA,\n                                            _sharedContextCache);\n}\n\nSQLLexer::~SQLLexer() {\n  delete _interpreter;\n}\n\nstd::string SQLLexer::getGrammarFileName() const {\n  return \"SQLLexer.g4\";\n}\n\nconst std::vector<std::string> &SQLLexer::getRuleNames() const {\n  return _ruleNames;\n}\n\nconst std::vector<std::string> &SQLLexer::getChannelNames() const {\n  return _channelNames;\n}\n\nconst std::vector<std::string> &SQLLexer::getModeNames() const {\n  return _modeNames;\n}\n\nconst std::vector<std::string> &SQLLexer::getTokenNames() const {\n  return _tokenNames;\n}\n\ndfa::Vocabulary &SQLLexer::getVocabulary() const {\n  return _vocabulary;\n}\n\nconst std::vector<uint16_t> SQLLexer::getSerializedATN() const {\n  return _serializedATN;\n}\n\nconst atn::ATN &SQLLexer::getATN() const {\n  return _atn;\n}\n\n\n// Static vars and initialization.\nstd::vector<dfa::DFA> SQLLexer::_decisionToDFA;\natn::PredictionContextCache SQLLexer::_sharedContextCache;\n\n// We own the ATN which in turn owns the ATN states.\natn::ATN SQLLexer::_atn;\nstd::vector<uint16_t> SQLLexer::_serializedATN;\n\nstd::vector<std::string> SQLLexer::_ruleNames = {\"OR\",\n                                                 \"AND\",\n                                                 \"NOT\",\n                                                 \"IN\",\n                                                 \"CONTAIN_ALL\",\n                                                 \"CONTAIN_ANY\",\n                                                 \"BETWEEN\",\n                                                 \"LIKE\",\n                                                 \"WHERE\",\n                                                 \"SELECT\",\n                                                 \"FROM\",\n                                                 \"AS\",\n                                                 \"BY\",\n                                                 \"ORDER\",\n                                                 \"ASC\",\n                                                 \"DESC\",\n                                                 \"LIMIT\",\n                                                 \"TRUE_V\",\n                                                 \"FALSE_V\",\n                                                 \"IS\",\n                                                 \"NULL_V\",\n                                                 \"UNSIGNED_INTEGER\",\n                                                 \"INTEGER\",\n                                                 \"APPROXIMATE_NUM_LIT\",\n                                                 \"FLOAT\",\n                                                 \"SQUOTA_STRING\",\n                                                 \"DQUOTA_STRING\",\n                                                 \"DOT\",\n                                                 \"LP\",\n                                                 \"RP\",\n                                                 \"LMP\",\n                                                 \"RMP\",\n                                                 \"ASTERISK\",\n                                                 \"PLUS_SIGN\",\n                                                 \"MINUS_SIGN\",\n                                                 \"COMMA\",\n                                                 \"SOLIDUS\",\n                                                 \"MOD\",\n                                                 \"AT_SIGN\",\n                                                 \"ASSIGN_OP\",\n                                                 \"SHARP_SIGN\",\n                                                 \"COLON\",\n                                                 \"SEMI\",\n                                                 \"LE_OP\",\n                                                 \"GE_OP\",\n                                                 \"NE_OP\",\n                                                 \"CARET_OP\",\n                                                 \"TILDE_OP\",\n                                                 \"L_OP\",\n                                                 \"G_OP\",\n                                                 \"E_OP\",\n                                                 \"CONCAT_OP\",\n                                                 \"UNDERSCORE\",\n                                                 \"SPACES\",\n                                                 \"SIMPLE_LETTER\",\n                                                 \"UNSIGNED_INTEGER_FRAGMENT\",\n                                                 \"FLOAT_FRAGMENT\",\n                                                 \"VECTOR\",\n                                                 \"SINGLE_LINE_COMMENT\",\n                                                 \"MULTI_LINE_COMMENT\",\n                                                 \"NEWLINE\",\n                                                 \"REGULAR_ID\"};\n\nstd::vector<std::string> SQLLexer::_channelNames = {\"DEFAULT_TOKEN_CHANNEL\",\n                                                    \"HIDDEN\", \"COMMENTS\"};\n\nstd::vector<std::string> SQLLexer::_modeNames = {\"DEFAULT_MODE\"};\n\nstd::vector<std::string> SQLLexer::_literalNames = {\"\",\n                                                    \"'OR'\",\n                                                    \"'AND'\",\n                                                    \"'NOT'\",\n                                                    \"'IN'\",\n                                                    \"'CONTAIN_ALL'\",\n                                                    \"'CONTAIN_ANY'\",\n                                                    \"'BETWEEN'\",\n                                                    \"'LIKE'\",\n                                                    \"'WHERE'\",\n                                                    \"'SELECT'\",\n                                                    \"'FROM'\",\n                                                    \"'AS'\",\n                                                    \"'BY'\",\n                                                    \"'ORDER'\",\n                                                    \"'ASC'\",\n                                                    \"'DESC'\",\n                                                    \"'LIMIT'\",\n                                                    \"'TRUE'\",\n                                                    \"'FALSE'\",\n                                                    \"'IS'\",\n                                                    \"'NULL'\",\n                                                    \"\",\n                                                    \"\",\n                                                    \"\",\n                                                    \"\",\n                                                    \"'.'\",\n                                                    \"'('\",\n                                                    \"')'\",\n                                                    \"'['\",\n                                                    \"']'\",\n                                                    \"'*'\",\n                                                    \"'+'\",\n                                                    \"'-'\",\n                                                    \"','\",\n                                                    \"'/'\",\n                                                    \"'%'\",\n                                                    \"'@'\",\n                                                    \"':='\",\n                                                    \"'#'\",\n                                                    \"':'\",\n                                                    \"';'\",\n                                                    \"'<='\",\n                                                    \"'>='\",\n                                                    \"'!='\",\n                                                    \"'^'\",\n                                                    \"'~'\",\n                                                    \"'<'\",\n                                                    \"'>'\",\n                                                    \"'='\",\n                                                    \"'||'\",\n                                                    \"'_'\"};\n\nstd::vector<std::string> SQLLexer::_symbolicNames = {\"\",\n                                                     \"OR\",\n                                                     \"AND\",\n                                                     \"NOT\",\n                                                     \"IN\",\n                                                     \"CONTAIN_ALL\",\n                                                     \"CONTAIN_ANY\",\n                                                     \"BETWEEN\",\n                                                     \"LIKE\",\n                                                     \"WHERE\",\n                                                     \"SELECT\",\n                                                     \"FROM\",\n                                                     \"AS\",\n                                                     \"BY\",\n                                                     \"ORDER\",\n                                                     \"ASC\",\n                                                     \"DESC\",\n                                                     \"LIMIT\",\n                                                     \"TRUE_V\",\n                                                     \"FALSE_V\",\n                                                     \"IS\",\n                                                     \"NULL_V\",\n                                                     \"INTEGER\",\n                                                     \"FLOAT\",\n                                                     \"SQUOTA_STRING\",\n                                                     \"DQUOTA_STRING\",\n                                                     \"DOT\",\n                                                     \"LP\",\n                                                     \"RP\",\n                                                     \"LMP\",\n                                                     \"RMP\",\n                                                     \"ASTERISK\",\n                                                     \"PLUS_SIGN\",\n                                                     \"MINUS_SIGN\",\n                                                     \"COMMA\",\n                                                     \"SOLIDUS\",\n                                                     \"MOD\",\n                                                     \"AT_SIGN\",\n                                                     \"ASSIGN_OP\",\n                                                     \"SHARP_SIGN\",\n                                                     \"COLON\",\n                                                     \"SEMI\",\n                                                     \"LE_OP\",\n                                                     \"GE_OP\",\n                                                     \"NE_OP\",\n                                                     \"CARET_OP\",\n                                                     \"TILDE_OP\",\n                                                     \"L_OP\",\n                                                     \"G_OP\",\n                                                     \"E_OP\",\n                                                     \"CONCAT_OP\",\n                                                     \"UNDERSCORE\",\n                                                     \"SPACES\",\n                                                     \"VECTOR\",\n                                                     \"SINGLE_LINE_COMMENT\",\n                                                     \"MULTI_LINE_COMMENT\",\n                                                     \"REGULAR_ID\"};\n\ndfa::Vocabulary SQLLexer::_vocabulary(_literalNames, _symbolicNames);\n\nstd::vector<std::string> SQLLexer::_tokenNames;\n\nSQLLexer::Initializer::Initializer() {\n  // This code could be in a static initializer lambda, but VS doesn't allow\n  // access to private class members from there.\n  for (size_t i = 0; i < _symbolicNames.size(); ++i) {\n    std::string name = _vocabulary.getLiteralName(i);\n    if (name.empty()) {\n      name = _vocabulary.getSymbolicName(i);\n    }\n\n    if (name.empty()) {\n      _tokenNames.push_back(\"<INVALID>\");\n    } else {\n      _tokenNames.push_back(name);\n    }\n  }\n\n  _serializedATN = {\n      0x3,   0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964,\n      0x2,   0x3a,   0x1ab,  0x8,    0x1,    0x4,    0x2,    0x9,    0x2,\n      0x4,   0x3,    0x9,    0x3,    0x4,    0x4,    0x9,    0x4,    0x4,\n      0x5,   0x9,    0x5,    0x4,    0x6,    0x9,    0x6,    0x4,    0x7,\n      0x9,   0x7,    0x4,    0x8,    0x9,    0x8,    0x4,    0x9,    0x9,\n      0x9,   0x4,    0xa,    0x9,    0xa,    0x4,    0xb,    0x9,    0xb,\n      0x4,   0xc,    0x9,    0xc,    0x4,    0xd,    0x9,    0xd,    0x4,\n      0xe,   0x9,    0xe,    0x4,    0xf,    0x9,    0xf,    0x4,    0x10,\n      0x9,   0x10,   0x4,    0x11,   0x9,    0x11,   0x4,    0x12,   0x9,\n      0x12,  0x4,    0x13,   0x9,    0x13,   0x4,    0x14,   0x9,    0x14,\n      0x4,   0x15,   0x9,    0x15,   0x4,    0x16,   0x9,    0x16,   0x4,\n      0x17,  0x9,    0x17,   0x4,    0x18,   0x9,    0x18,   0x4,    0x19,\n      0x9,   0x19,   0x4,    0x1a,   0x9,    0x1a,   0x4,    0x1b,   0x9,\n      0x1b,  0x4,    0x1c,   0x9,    0x1c,   0x4,    0x1d,   0x9,    0x1d,\n      0x4,   0x1e,   0x9,    0x1e,   0x4,    0x1f,   0x9,    0x1f,   0x4,\n      0x20,  0x9,    0x20,   0x4,    0x21,   0x9,    0x21,   0x4,    0x22,\n      0x9,   0x22,   0x4,    0x23,   0x9,    0x23,   0x4,    0x24,   0x9,\n      0x24,  0x4,    0x25,   0x9,    0x25,   0x4,    0x26,   0x9,    0x26,\n      0x4,   0x27,   0x9,    0x27,   0x4,    0x28,   0x9,    0x28,   0x4,\n      0x29,  0x9,    0x29,   0x4,    0x2a,   0x9,    0x2a,   0x4,    0x2b,\n      0x9,   0x2b,   0x4,    0x2c,   0x9,    0x2c,   0x4,    0x2d,   0x9,\n      0x2d,  0x4,    0x2e,   0x9,    0x2e,   0x4,    0x2f,   0x9,    0x2f,\n      0x4,   0x30,   0x9,    0x30,   0x4,    0x31,   0x9,    0x31,   0x4,\n      0x32,  0x9,    0x32,   0x4,    0x33,   0x9,    0x33,   0x4,    0x34,\n      0x9,   0x34,   0x4,    0x35,   0x9,    0x35,   0x4,    0x36,   0x9,\n      0x36,  0x4,    0x37,   0x9,    0x37,   0x4,    0x38,   0x9,    0x38,\n      0x4,   0x39,   0x9,    0x39,   0x4,    0x3a,   0x9,    0x3a,   0x4,\n      0x3b,  0x9,    0x3b,   0x4,    0x3c,   0x9,    0x3c,   0x4,    0x3d,\n      0x9,   0x3d,   0x4,    0x3e,   0x9,    0x3e,   0x4,    0x3f,   0x9,\n      0x3f,  0x3,    0x2,    0x3,    0x2,    0x3,    0x2,    0x3,    0x3,\n      0x3,   0x3,    0x3,    0x3,    0x3,    0x3,    0x3,    0x4,    0x3,\n      0x4,   0x3,    0x4,    0x3,    0x4,    0x3,    0x5,    0x3,    0x5,\n      0x3,   0x5,    0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,\n      0x6,   0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,    0x6,\n      0x3,   0x6,    0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,\n      0x7,   0x3,    0x7,    0x3,    0x7,    0x3,    0x7,    0x3,    0x7,\n      0x3,   0x7,    0x3,    0x7,    0x3,    0x7,    0x3,    0x7,    0x3,\n      0x7,   0x3,    0x7,    0x3,    0x7,    0x3,    0x8,    0x3,    0x8,\n      0x3,   0x8,    0x3,    0x8,    0x3,    0x8,    0x3,    0x8,    0x3,\n      0x8,   0x3,    0x8,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,\n      0x3,   0x9,    0x3,    0x9,    0x3,    0xa,    0x3,    0xa,    0x3,\n      0xa,   0x3,    0xa,    0x3,    0xa,    0x3,    0xa,    0x3,    0xb,\n      0x3,   0xb,    0x3,    0xb,    0x3,    0xb,    0x3,    0xb,    0x3,\n      0xb,   0x3,    0xb,    0x3,    0xc,    0x3,    0xc,    0x3,    0xc,\n      0x3,   0xc,    0x3,    0xc,    0x3,    0xd,    0x3,    0xd,    0x3,\n      0xd,   0x3,    0xe,    0x3,    0xe,    0x3,    0xe,    0x3,    0xf,\n      0x3,   0xf,    0x3,    0xf,    0x3,    0xf,    0x3,    0xf,    0x3,\n      0xf,   0x3,    0x10,   0x3,    0x10,   0x3,    0x10,   0x3,    0x10,\n      0x3,   0x11,   0x3,    0x11,   0x3,    0x11,   0x3,    0x11,   0x3,\n      0x11,  0x3,    0x12,   0x3,    0x12,   0x3,    0x12,   0x3,    0x12,\n      0x3,   0x12,   0x3,    0x12,   0x3,    0x13,   0x3,    0x13,   0x3,\n      0x13,  0x3,    0x13,   0x3,    0x13,   0x3,    0x14,   0x3,    0x14,\n      0x3,   0x14,   0x3,    0x14,   0x3,    0x14,   0x3,    0x14,   0x3,\n      0x15,  0x3,    0x15,   0x3,    0x15,   0x3,    0x16,   0x3,    0x16,\n      0x3,   0x16,   0x3,    0x16,   0x3,    0x16,   0x3,    0x17,   0x3,\n      0x17,  0x3,    0x18,   0x5,    0x18,   0xf6,   0xa,    0x18,   0x3,\n      0x18,  0x3,    0x18,   0x3,    0x19,   0x3,    0x19,   0x3,    0x19,\n      0x5,   0x19,   0xfd,   0xa,    0x19,   0x3,    0x19,   0x3,    0x19,\n      0x5,   0x19,   0x101,  0xa,    0x19,   0x5,    0x19,   0x103,  0xa,\n      0x19,  0x3,    0x19,   0x5,    0x19,   0x106,  0xa,    0x19,   0x3,\n      0x1a,  0x5,    0x1a,   0x109,  0xa,    0x1a,   0x3,    0x1a,   0x3,\n      0x1a,  0x3,    0x1b,   0x3,    0x1b,   0x3,    0x1b,   0x3,    0x1b,\n      0x7,   0x1b,   0x111,  0xa,    0x1b,   0xc,    0x1b,   0xe,    0x1b,\n      0x114, 0xb,    0x1b,   0x3,    0x1b,   0x3,    0x1b,   0x3,    0x1c,\n      0x3,   0x1c,   0x3,    0x1c,   0x3,    0x1c,   0x7,    0x1c,   0x11c,\n      0xa,   0x1c,   0xc,    0x1c,   0xe,    0x1c,   0x11f,  0xb,    0x1c,\n      0x3,   0x1c,   0x3,    0x1c,   0x3,    0x1d,   0x3,    0x1d,   0x3,\n      0x1e,  0x3,    0x1e,   0x3,    0x1f,   0x3,    0x1f,   0x3,    0x20,\n      0x3,   0x20,   0x3,    0x21,   0x3,    0x21,   0x3,    0x22,   0x3,\n      0x22,  0x3,    0x23,   0x3,    0x23,   0x3,    0x24,   0x3,    0x24,\n      0x3,   0x25,   0x3,    0x25,   0x3,    0x26,   0x3,    0x26,   0x3,\n      0x27,  0x3,    0x27,   0x3,    0x28,   0x3,    0x28,   0x3,    0x29,\n      0x3,   0x29,   0x3,    0x29,   0x3,    0x2a,   0x3,    0x2a,   0x3,\n      0x2b,  0x3,    0x2b,   0x3,    0x2c,   0x3,    0x2c,   0x3,    0x2d,\n      0x3,   0x2d,   0x3,    0x2d,   0x3,    0x2e,   0x3,    0x2e,   0x3,\n      0x2e,  0x3,    0x2f,   0x3,    0x2f,   0x3,    0x2f,   0x3,    0x30,\n      0x3,   0x30,   0x3,    0x31,   0x3,    0x31,   0x3,    0x32,   0x3,\n      0x32,  0x3,    0x33,   0x3,    0x33,   0x3,    0x34,   0x3,    0x34,\n      0x3,   0x35,   0x3,    0x35,   0x3,    0x35,   0x3,    0x36,   0x3,\n      0x36,  0x3,    0x37,   0x6,    0x37,   0x15d,  0xa,    0x37,   0xd,\n      0x37,  0xe,    0x37,   0x15e,  0x3,    0x37,   0x3,    0x37,   0x3,\n      0x38,  0x3,    0x38,   0x3,    0x39,   0x6,    0x39,   0x166,  0xa,\n      0x39,  0xd,    0x39,   0xe,    0x39,   0x167,  0x3,    0x3a,   0x7,\n      0x3a,  0x16b,  0xa,    0x3a,   0xc,    0x3a,   0xe,    0x3a,   0x16e,\n      0xb,   0x3a,   0x3,    0x3a,   0x5,    0x3a,   0x171,  0xa,    0x3a,\n      0x3,   0x3a,   0x6,    0x3a,   0x174,  0xa,    0x3a,   0xd,    0x3a,\n      0xe,   0x3a,   0x175,  0x3,    0x3b,   0x3,    0x3b,   0x3,    0x3b,\n      0x3,   0x3b,   0x3,    0x3b,   0x3,    0x3b,   0x6,    0x3b,   0x17e,\n      0xa,   0x3b,   0xd,    0x3b,   0xe,    0x3b,   0x17f,  0x3,    0x3b,\n      0x3,   0x3b,   0x3,    0x3c,   0x3,    0x3c,   0x3,    0x3c,   0x3,\n      0x3c,  0x7,    0x3c,   0x188,  0xa,    0x3c,   0xc,    0x3c,   0xe,\n      0x3c,  0x18b,  0xb,    0x3c,   0x3,    0x3c,   0x3,    0x3c,   0x5,\n      0x3c,  0x18f,  0xa,    0x3c,   0x3,    0x3c,   0x3,    0x3c,   0x3,\n      0x3d,  0x3,    0x3d,   0x3,    0x3d,   0x3,    0x3d,   0x7,    0x3d,\n      0x197, 0xa,    0x3d,   0xc,    0x3d,   0xe,    0x3d,   0x19a,  0xb,\n      0x3d,  0x3,    0x3d,   0x3,    0x3d,   0x3,    0x3d,   0x3,    0x3d,\n      0x3,   0x3d,   0x3,    0x3e,   0x5,    0x3e,   0x1a2,  0xa,    0x3e,\n      0x3,   0x3e,   0x3,    0x3e,   0x3,    0x3f,   0x3,    0x3f,   0x6,\n      0x3f,  0x1a8,  0xa,    0x3f,   0xd,    0x3f,   0xe,    0x3f,   0x1a9,\n      0x3,   0x198,  0x2,    0x40,   0x3,    0x3,    0x5,    0x4,    0x7,\n      0x5,   0x9,    0x6,    0xb,    0x7,    0xd,    0x8,    0xf,    0x9,\n      0x11,  0xa,    0x13,   0xb,    0x15,   0xc,    0x17,   0xd,    0x19,\n      0xe,   0x1b,   0xf,    0x1d,   0x10,   0x1f,   0x11,   0x21,   0x12,\n      0x23,  0x13,   0x25,   0x14,   0x27,   0x15,   0x29,   0x16,   0x2b,\n      0x17,  0x2d,   0x2,    0x2f,   0x18,   0x31,   0x2,    0x33,   0x19,\n      0x35,  0x1a,   0x37,   0x1b,   0x39,   0x1c,   0x3b,   0x1d,   0x3d,\n      0x1e,  0x3f,   0x1f,   0x41,   0x20,   0x43,   0x21,   0x45,   0x22,\n      0x47,  0x23,   0x49,   0x24,   0x4b,   0x25,   0x4d,   0x26,   0x4f,\n      0x27,  0x51,   0x28,   0x53,   0x29,   0x55,   0x2a,   0x57,   0x2b,\n      0x59,  0x2c,   0x5b,   0x2d,   0x5d,   0x2e,   0x5f,   0x2f,   0x61,\n      0x30,  0x63,   0x31,   0x65,   0x32,   0x67,   0x33,   0x69,   0x34,\n      0x6b,  0x35,   0x6d,   0x36,   0x6f,   0x2,    0x71,   0x2,    0x73,\n      0x2,   0x75,   0x37,   0x77,   0x38,   0x79,   0x39,   0x7b,   0x2,\n      0x7d,  0x3a,   0x3,    0x2,    0xb,    0x4,    0x2,    0x2d,   0x2d,\n      0x2f,  0x2f,   0x4,    0x2,    0x46,   0x46,   0x48,   0x48,   0x4,\n      0x2,   0x29,   0x29,   0x5e,   0x5e,   0x4,    0x2,    0x24,   0x24,\n      0x5e,  0x5e,   0x5,    0x2,    0xb,    0xc,    0xf,    0xf,    0x22,\n      0x22,  0x3,    0x2,    0x43,   0x5c,   0x3,    0x2,    0x32,   0x3b,\n      0x4,   0x2,    0xc,    0xc,    0xf,    0xf,    0x5,    0x2,    0x2f,\n      0x2f,  0x32,   0x3b,   0x61,   0x61,   0x2,    0x1be,  0x2,    0x3,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x5,    0x3,    0x2,    0x2,\n      0x2,   0x2,    0x7,    0x3,    0x2,    0x2,    0x2,    0x2,    0x9,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0xb,    0x3,    0x2,    0x2,\n      0x2,   0x2,    0xd,    0x3,    0x2,    0x2,    0x2,    0x2,    0xf,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x11,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x13,   0x3,    0x2,    0x2,    0x2,    0x2,    0x15,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x17,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x19,   0x3,    0x2,    0x2,    0x2,    0x2,    0x1b,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x1d,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x1f,   0x3,    0x2,    0x2,    0x2,    0x2,    0x21,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x23,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x25,   0x3,    0x2,    0x2,    0x2,    0x2,    0x27,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x29,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x2b,   0x3,    0x2,    0x2,    0x2,    0x2,    0x2f,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x33,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x35,   0x3,    0x2,    0x2,    0x2,    0x2,    0x37,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x39,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x3b,   0x3,    0x2,    0x2,    0x2,    0x2,    0x3d,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x3f,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x41,   0x3,    0x2,    0x2,    0x2,    0x2,    0x43,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x45,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x47,   0x3,    0x2,    0x2,    0x2,    0x2,    0x49,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x4b,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x4d,   0x3,    0x2,    0x2,    0x2,    0x2,    0x4f,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x51,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x53,   0x3,    0x2,    0x2,    0x2,    0x2,    0x55,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x57,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x59,   0x3,    0x2,    0x2,    0x2,    0x2,    0x5b,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x5d,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x5f,   0x3,    0x2,    0x2,    0x2,    0x2,    0x61,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x63,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x65,   0x3,    0x2,    0x2,    0x2,    0x2,    0x67,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x69,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x6b,   0x3,    0x2,    0x2,    0x2,    0x2,    0x6d,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x75,   0x3,    0x2,    0x2,\n      0x2,   0x2,    0x77,   0x3,    0x2,    0x2,    0x2,    0x2,    0x79,\n      0x3,   0x2,    0x2,    0x2,    0x2,    0x7d,   0x3,    0x2,    0x2,\n      0x2,   0x3,    0x7f,   0x3,    0x2,    0x2,    0x2,    0x5,    0x82,\n      0x3,   0x2,    0x2,    0x2,    0x7,    0x86,   0x3,    0x2,    0x2,\n      0x2,   0x9,    0x8a,   0x3,    0x2,    0x2,    0x2,    0xb,    0x8d,\n      0x3,   0x2,    0x2,    0x2,    0xd,    0x99,   0x3,    0x2,    0x2,\n      0x2,   0xf,    0xa5,   0x3,    0x2,    0x2,    0x2,    0x11,   0xad,\n      0x3,   0x2,    0x2,    0x2,    0x13,   0xb2,   0x3,    0x2,    0x2,\n      0x2,   0x15,   0xb8,   0x3,    0x2,    0x2,    0x2,    0x17,   0xbf,\n      0x3,   0x2,    0x2,    0x2,    0x19,   0xc4,   0x3,    0x2,    0x2,\n      0x2,   0x1b,   0xc7,   0x3,    0x2,    0x2,    0x2,    0x1d,   0xca,\n      0x3,   0x2,    0x2,    0x2,    0x1f,   0xd0,   0x3,    0x2,    0x2,\n      0x2,   0x21,   0xd4,   0x3,    0x2,    0x2,    0x2,    0x23,   0xd9,\n      0x3,   0x2,    0x2,    0x2,    0x25,   0xdf,   0x3,    0x2,    0x2,\n      0x2,   0x27,   0xe4,   0x3,    0x2,    0x2,    0x2,    0x29,   0xea,\n      0x3,   0x2,    0x2,    0x2,    0x2b,   0xed,   0x3,    0x2,    0x2,\n      0x2,   0x2d,   0xf2,   0x3,    0x2,    0x2,    0x2,    0x2f,   0xf5,\n      0x3,   0x2,    0x2,    0x2,    0x31,   0xf9,   0x3,    0x2,    0x2,\n      0x2,   0x33,   0x108,  0x3,    0x2,    0x2,    0x2,    0x35,   0x10c,\n      0x3,   0x2,    0x2,    0x2,    0x37,   0x117,  0x3,    0x2,    0x2,\n      0x2,   0x39,   0x122,  0x3,    0x2,    0x2,    0x2,    0x3b,   0x124,\n      0x3,   0x2,    0x2,    0x2,    0x3d,   0x126,  0x3,    0x2,    0x2,\n      0x2,   0x3f,   0x128,  0x3,    0x2,    0x2,    0x2,    0x41,   0x12a,\n      0x3,   0x2,    0x2,    0x2,    0x43,   0x12c,  0x3,    0x2,    0x2,\n      0x2,   0x45,   0x12e,  0x3,    0x2,    0x2,    0x2,    0x47,   0x130,\n      0x3,   0x2,    0x2,    0x2,    0x49,   0x132,  0x3,    0x2,    0x2,\n      0x2,   0x4b,   0x134,  0x3,    0x2,    0x2,    0x2,    0x4d,   0x136,\n      0x3,   0x2,    0x2,    0x2,    0x4f,   0x138,  0x3,    0x2,    0x2,\n      0x2,   0x51,   0x13a,  0x3,    0x2,    0x2,    0x2,    0x53,   0x13d,\n      0x3,   0x2,    0x2,    0x2,    0x55,   0x13f,  0x3,    0x2,    0x2,\n      0x2,   0x57,   0x141,  0x3,    0x2,    0x2,    0x2,    0x59,   0x143,\n      0x3,   0x2,    0x2,    0x2,    0x5b,   0x146,  0x3,    0x2,    0x2,\n      0x2,   0x5d,   0x149,  0x3,    0x2,    0x2,    0x2,    0x5f,   0x14c,\n      0x3,   0x2,    0x2,    0x2,    0x61,   0x14e,  0x3,    0x2,    0x2,\n      0x2,   0x63,   0x150,  0x3,    0x2,    0x2,    0x2,    0x65,   0x152,\n      0x3,   0x2,    0x2,    0x2,    0x67,   0x154,  0x3,    0x2,    0x2,\n      0x2,   0x69,   0x156,  0x3,    0x2,    0x2,    0x2,    0x6b,   0x159,\n      0x3,   0x2,    0x2,    0x2,    0x6d,   0x15c,  0x3,    0x2,    0x2,\n      0x2,   0x6f,   0x162,  0x3,    0x2,    0x2,    0x2,    0x71,   0x165,\n      0x3,   0x2,    0x2,    0x2,    0x73,   0x16c,  0x3,    0x2,    0x2,\n      0x2,   0x75,   0x177,  0x3,    0x2,    0x2,    0x2,    0x77,   0x183,\n      0x3,   0x2,    0x2,    0x2,    0x79,   0x192,  0x3,    0x2,    0x2,\n      0x2,   0x7b,   0x1a1,  0x3,    0x2,    0x2,    0x2,    0x7d,   0x1a7,\n      0x3,   0x2,    0x2,    0x2,    0x7f,   0x80,   0x7,    0x51,   0x2,\n      0x2,   0x80,   0x81,   0x7,    0x54,   0x2,    0x2,    0x81,   0x4,\n      0x3,   0x2,    0x2,    0x2,    0x82,   0x83,   0x7,    0x43,   0x2,\n      0x2,   0x83,   0x84,   0x7,    0x50,   0x2,    0x2,    0x84,   0x85,\n      0x7,   0x46,   0x2,    0x2,    0x85,   0x6,    0x3,    0x2,    0x2,\n      0x2,   0x86,   0x87,   0x7,    0x50,   0x2,    0x2,    0x87,   0x88,\n      0x7,   0x51,   0x2,    0x2,    0x88,   0x89,   0x7,    0x56,   0x2,\n      0x2,   0x89,   0x8,    0x3,    0x2,    0x2,    0x2,    0x8a,   0x8b,\n      0x7,   0x4b,   0x2,    0x2,    0x8b,   0x8c,   0x7,    0x50,   0x2,\n      0x2,   0x8c,   0xa,    0x3,    0x2,    0x2,    0x2,    0x8d,   0x8e,\n      0x7,   0x45,   0x2,    0x2,    0x8e,   0x8f,   0x7,    0x51,   0x2,\n      0x2,   0x8f,   0x90,   0x7,    0x50,   0x2,    0x2,    0x90,   0x91,\n      0x7,   0x56,   0x2,    0x2,    0x91,   0x92,   0x7,    0x43,   0x2,\n      0x2,   0x92,   0x93,   0x7,    0x4b,   0x2,    0x2,    0x93,   0x94,\n      0x7,   0x50,   0x2,    0x2,    0x94,   0x95,   0x7,    0x61,   0x2,\n      0x2,   0x95,   0x96,   0x7,    0x43,   0x2,    0x2,    0x96,   0x97,\n      0x7,   0x4e,   0x2,    0x2,    0x97,   0x98,   0x7,    0x4e,   0x2,\n      0x2,   0x98,   0xc,    0x3,    0x2,    0x2,    0x2,    0x99,   0x9a,\n      0x7,   0x45,   0x2,    0x2,    0x9a,   0x9b,   0x7,    0x51,   0x2,\n      0x2,   0x9b,   0x9c,   0x7,    0x50,   0x2,    0x2,    0x9c,   0x9d,\n      0x7,   0x56,   0x2,    0x2,    0x9d,   0x9e,   0x7,    0x43,   0x2,\n      0x2,   0x9e,   0x9f,   0x7,    0x4b,   0x2,    0x2,    0x9f,   0xa0,\n      0x7,   0x50,   0x2,    0x2,    0xa0,   0xa1,   0x7,    0x61,   0x2,\n      0x2,   0xa1,   0xa2,   0x7,    0x43,   0x2,    0x2,    0xa2,   0xa3,\n      0x7,   0x50,   0x2,    0x2,    0xa3,   0xa4,   0x7,    0x5b,   0x2,\n      0x2,   0xa4,   0xe,    0x3,    0x2,    0x2,    0x2,    0xa5,   0xa6,\n      0x7,   0x44,   0x2,    0x2,    0xa6,   0xa7,   0x7,    0x47,   0x2,\n      0x2,   0xa7,   0xa8,   0x7,    0x56,   0x2,    0x2,    0xa8,   0xa9,\n      0x7,   0x59,   0x2,    0x2,    0xa9,   0xaa,   0x7,    0x47,   0x2,\n      0x2,   0xaa,   0xab,   0x7,    0x47,   0x2,    0x2,    0xab,   0xac,\n      0x7,   0x50,   0x2,    0x2,    0xac,   0x10,   0x3,    0x2,    0x2,\n      0x2,   0xad,   0xae,   0x7,    0x4e,   0x2,    0x2,    0xae,   0xaf,\n      0x7,   0x4b,   0x2,    0x2,    0xaf,   0xb0,   0x7,    0x4d,   0x2,\n      0x2,   0xb0,   0xb1,   0x7,    0x47,   0x2,    0x2,    0xb1,   0x12,\n      0x3,   0x2,    0x2,    0x2,    0xb2,   0xb3,   0x7,    0x59,   0x2,\n      0x2,   0xb3,   0xb4,   0x7,    0x4a,   0x2,    0x2,    0xb4,   0xb5,\n      0x7,   0x47,   0x2,    0x2,    0xb5,   0xb6,   0x7,    0x54,   0x2,\n      0x2,   0xb6,   0xb7,   0x7,    0x47,   0x2,    0x2,    0xb7,   0x14,\n      0x3,   0x2,    0x2,    0x2,    0xb8,   0xb9,   0x7,    0x55,   0x2,\n      0x2,   0xb9,   0xba,   0x7,    0x47,   0x2,    0x2,    0xba,   0xbb,\n      0x7,   0x4e,   0x2,    0x2,    0xbb,   0xbc,   0x7,    0x47,   0x2,\n      0x2,   0xbc,   0xbd,   0x7,    0x45,   0x2,    0x2,    0xbd,   0xbe,\n      0x7,   0x56,   0x2,    0x2,    0xbe,   0x16,   0x3,    0x2,    0x2,\n      0x2,   0xbf,   0xc0,   0x7,    0x48,   0x2,    0x2,    0xc0,   0xc1,\n      0x7,   0x54,   0x2,    0x2,    0xc1,   0xc2,   0x7,    0x51,   0x2,\n      0x2,   0xc2,   0xc3,   0x7,    0x4f,   0x2,    0x2,    0xc3,   0x18,\n      0x3,   0x2,    0x2,    0x2,    0xc4,   0xc5,   0x7,    0x43,   0x2,\n      0x2,   0xc5,   0xc6,   0x7,    0x55,   0x2,    0x2,    0xc6,   0x1a,\n      0x3,   0x2,    0x2,    0x2,    0xc7,   0xc8,   0x7,    0x44,   0x2,\n      0x2,   0xc8,   0xc9,   0x7,    0x5b,   0x2,    0x2,    0xc9,   0x1c,\n      0x3,   0x2,    0x2,    0x2,    0xca,   0xcb,   0x7,    0x51,   0x2,\n      0x2,   0xcb,   0xcc,   0x7,    0x54,   0x2,    0x2,    0xcc,   0xcd,\n      0x7,   0x46,   0x2,    0x2,    0xcd,   0xce,   0x7,    0x47,   0x2,\n      0x2,   0xce,   0xcf,   0x7,    0x54,   0x2,    0x2,    0xcf,   0x1e,\n      0x3,   0x2,    0x2,    0x2,    0xd0,   0xd1,   0x7,    0x43,   0x2,\n      0x2,   0xd1,   0xd2,   0x7,    0x55,   0x2,    0x2,    0xd2,   0xd3,\n      0x7,   0x45,   0x2,    0x2,    0xd3,   0x20,   0x3,    0x2,    0x2,\n      0x2,   0xd4,   0xd5,   0x7,    0x46,   0x2,    0x2,    0xd5,   0xd6,\n      0x7,   0x47,   0x2,    0x2,    0xd6,   0xd7,   0x7,    0x55,   0x2,\n      0x2,   0xd7,   0xd8,   0x7,    0x45,   0x2,    0x2,    0xd8,   0x22,\n      0x3,   0x2,    0x2,    0x2,    0xd9,   0xda,   0x7,    0x4e,   0x2,\n      0x2,   0xda,   0xdb,   0x7,    0x4b,   0x2,    0x2,    0xdb,   0xdc,\n      0x7,   0x4f,   0x2,    0x2,    0xdc,   0xdd,   0x7,    0x4b,   0x2,\n      0x2,   0xdd,   0xde,   0x7,    0x56,   0x2,    0x2,    0xde,   0x24,\n      0x3,   0x2,    0x2,    0x2,    0xdf,   0xe0,   0x7,    0x56,   0x2,\n      0x2,   0xe0,   0xe1,   0x7,    0x54,   0x2,    0x2,    0xe1,   0xe2,\n      0x7,   0x57,   0x2,    0x2,    0xe2,   0xe3,   0x7,    0x47,   0x2,\n      0x2,   0xe3,   0x26,   0x3,    0x2,    0x2,    0x2,    0xe4,   0xe5,\n      0x7,   0x48,   0x2,    0x2,    0xe5,   0xe6,   0x7,    0x43,   0x2,\n      0x2,   0xe6,   0xe7,   0x7,    0x4e,   0x2,    0x2,    0xe7,   0xe8,\n      0x7,   0x55,   0x2,    0x2,    0xe8,   0xe9,   0x7,    0x47,   0x2,\n      0x2,   0xe9,   0x28,   0x3,    0x2,    0x2,    0x2,    0xea,   0xeb,\n      0x7,   0x4b,   0x2,    0x2,    0xeb,   0xec,   0x7,    0x55,   0x2,\n      0x2,   0xec,   0x2a,   0x3,    0x2,    0x2,    0x2,    0xed,   0xee,\n      0x7,   0x50,   0x2,    0x2,    0xee,   0xef,   0x7,    0x57,   0x2,\n      0x2,   0xef,   0xf0,   0x7,    0x4e,   0x2,    0x2,    0xf0,   0xf1,\n      0x7,   0x4e,   0x2,    0x2,    0xf1,   0x2c,   0x3,    0x2,    0x2,\n      0x2,   0xf2,   0xf3,   0x5,    0x71,   0x39,   0x2,    0xf3,   0x2e,\n      0x3,   0x2,    0x2,    0x2,    0xf4,   0xf6,   0x5,    0x47,   0x24,\n      0x2,   0xf5,   0xf4,   0x3,    0x2,    0x2,    0x2,    0xf5,   0xf6,\n      0x3,   0x2,    0x2,    0x2,    0xf6,   0xf7,   0x3,    0x2,    0x2,\n      0x2,   0xf7,   0xf8,   0x5,    0x2d,   0x17,   0x2,    0xf8,   0x30,\n      0x3,   0x2,    0x2,    0x2,    0xf9,   0x102,  0x5,    0x73,   0x3a,\n      0x2,   0xfa,   0xfc,   0x7,    0x47,   0x2,    0x2,    0xfb,   0xfd,\n      0x9,   0x2,    0x2,    0x2,    0xfc,   0xfb,   0x3,    0x2,    0x2,\n      0x2,   0xfc,   0xfd,   0x3,    0x2,    0x2,    0x2,    0xfd,   0x100,\n      0x3,   0x2,    0x2,    0x2,    0xfe,   0x101,  0x5,    0x73,   0x3a,\n      0x2,   0xff,   0x101,  0x5,    0x71,   0x39,   0x2,    0x100,  0xfe,\n      0x3,   0x2,    0x2,    0x2,    0x100,  0xff,   0x3,    0x2,    0x2,\n      0x2,   0x101,  0x103,  0x3,    0x2,    0x2,    0x2,    0x102,  0xfa,\n      0x3,   0x2,    0x2,    0x2,    0x102,  0x103,  0x3,    0x2,    0x2,\n      0x2,   0x103,  0x105,  0x3,    0x2,    0x2,    0x2,    0x104,  0x106,\n      0x9,   0x3,    0x2,    0x2,    0x105,  0x104,  0x3,    0x2,    0x2,\n      0x2,   0x105,  0x106,  0x3,    0x2,    0x2,    0x2,    0x106,  0x32,\n      0x3,   0x2,    0x2,    0x2,    0x107,  0x109,  0x5,    0x47,   0x24,\n      0x2,   0x108,  0x107,  0x3,    0x2,    0x2,    0x2,    0x108,  0x109,\n      0x3,   0x2,    0x2,    0x2,    0x109,  0x10a,  0x3,    0x2,    0x2,\n      0x2,   0x10a,  0x10b,  0x5,    0x31,   0x19,   0x2,    0x10b,  0x34,\n      0x3,   0x2,    0x2,    0x2,    0x10c,  0x112,  0x7,    0x29,   0x2,\n      0x2,   0x10d,  0x111,  0xa,    0x4,    0x2,    0x2,    0x10e,  0x10f,\n      0x7,   0x5e,   0x2,    0x2,    0x10f,  0x111,  0xb,    0x2,    0x2,\n      0x2,   0x110,  0x10d,  0x3,    0x2,    0x2,    0x2,    0x110,  0x10e,\n      0x3,   0x2,    0x2,    0x2,    0x111,  0x114,  0x3,    0x2,    0x2,\n      0x2,   0x112,  0x110,  0x3,    0x2,    0x2,    0x2,    0x112,  0x113,\n      0x3,   0x2,    0x2,    0x2,    0x113,  0x115,  0x3,    0x2,    0x2,\n      0x2,   0x114,  0x112,  0x3,    0x2,    0x2,    0x2,    0x115,  0x116,\n      0x7,   0x29,   0x2,    0x2,    0x116,  0x36,   0x3,    0x2,    0x2,\n      0x2,   0x117,  0x11d,  0x7,    0x24,   0x2,    0x2,    0x118,  0x11c,\n      0xa,   0x5,    0x2,    0x2,    0x119,  0x11a,  0x7,    0x5e,   0x2,\n      0x2,   0x11a,  0x11c,  0xb,    0x2,    0x2,    0x2,    0x11b,  0x118,\n      0x3,   0x2,    0x2,    0x2,    0x11b,  0x119,  0x3,    0x2,    0x2,\n      0x2,   0x11c,  0x11f,  0x3,    0x2,    0x2,    0x2,    0x11d,  0x11b,\n      0x3,   0x2,    0x2,    0x2,    0x11d,  0x11e,  0x3,    0x2,    0x2,\n      0x2,   0x11e,  0x120,  0x3,    0x2,    0x2,    0x2,    0x11f,  0x11d,\n      0x3,   0x2,    0x2,    0x2,    0x120,  0x121,  0x7,    0x24,   0x2,\n      0x2,   0x121,  0x38,   0x3,    0x2,    0x2,    0x2,    0x122,  0x123,\n      0x7,   0x30,   0x2,    0x2,    0x123,  0x3a,   0x3,    0x2,    0x2,\n      0x2,   0x124,  0x125,  0x7,    0x2a,   0x2,    0x2,    0x125,  0x3c,\n      0x3,   0x2,    0x2,    0x2,    0x126,  0x127,  0x7,    0x2b,   0x2,\n      0x2,   0x127,  0x3e,   0x3,    0x2,    0x2,    0x2,    0x128,  0x129,\n      0x7,   0x5d,   0x2,    0x2,    0x129,  0x40,   0x3,    0x2,    0x2,\n      0x2,   0x12a,  0x12b,  0x7,    0x5f,   0x2,    0x2,    0x12b,  0x42,\n      0x3,   0x2,    0x2,    0x2,    0x12c,  0x12d,  0x7,    0x2c,   0x2,\n      0x2,   0x12d,  0x44,   0x3,    0x2,    0x2,    0x2,    0x12e,  0x12f,\n      0x7,   0x2d,   0x2,    0x2,    0x12f,  0x46,   0x3,    0x2,    0x2,\n      0x2,   0x130,  0x131,  0x7,    0x2f,   0x2,    0x2,    0x131,  0x48,\n      0x3,   0x2,    0x2,    0x2,    0x132,  0x133,  0x7,    0x2e,   0x2,\n      0x2,   0x133,  0x4a,   0x3,    0x2,    0x2,    0x2,    0x134,  0x135,\n      0x7,   0x31,   0x2,    0x2,    0x135,  0x4c,   0x3,    0x2,    0x2,\n      0x2,   0x136,  0x137,  0x7,    0x27,   0x2,    0x2,    0x137,  0x4e,\n      0x3,   0x2,    0x2,    0x2,    0x138,  0x139,  0x7,    0x42,   0x2,\n      0x2,   0x139,  0x50,   0x3,    0x2,    0x2,    0x2,    0x13a,  0x13b,\n      0x7,   0x3c,   0x2,    0x2,    0x13b,  0x13c,  0x7,    0x3f,   0x2,\n      0x2,   0x13c,  0x52,   0x3,    0x2,    0x2,    0x2,    0x13d,  0x13e,\n      0x7,   0x25,   0x2,    0x2,    0x13e,  0x54,   0x3,    0x2,    0x2,\n      0x2,   0x13f,  0x140,  0x7,    0x3c,   0x2,    0x2,    0x140,  0x56,\n      0x3,   0x2,    0x2,    0x2,    0x141,  0x142,  0x7,    0x3d,   0x2,\n      0x2,   0x142,  0x58,   0x3,    0x2,    0x2,    0x2,    0x143,  0x144,\n      0x7,   0x3e,   0x2,    0x2,    0x144,  0x145,  0x7,    0x3f,   0x2,\n      0x2,   0x145,  0x5a,   0x3,    0x2,    0x2,    0x2,    0x146,  0x147,\n      0x7,   0x40,   0x2,    0x2,    0x147,  0x148,  0x7,    0x3f,   0x2,\n      0x2,   0x148,  0x5c,   0x3,    0x2,    0x2,    0x2,    0x149,  0x14a,\n      0x7,   0x23,   0x2,    0x2,    0x14a,  0x14b,  0x7,    0x3f,   0x2,\n      0x2,   0x14b,  0x5e,   0x3,    0x2,    0x2,    0x2,    0x14c,  0x14d,\n      0x7,   0x60,   0x2,    0x2,    0x14d,  0x60,   0x3,    0x2,    0x2,\n      0x2,   0x14e,  0x14f,  0x7,    0x80,   0x2,    0x2,    0x14f,  0x62,\n      0x3,   0x2,    0x2,    0x2,    0x150,  0x151,  0x7,    0x3e,   0x2,\n      0x2,   0x151,  0x64,   0x3,    0x2,    0x2,    0x2,    0x152,  0x153,\n      0x7,   0x40,   0x2,    0x2,    0x153,  0x66,   0x3,    0x2,    0x2,\n      0x2,   0x154,  0x155,  0x7,    0x3f,   0x2,    0x2,    0x155,  0x68,\n      0x3,   0x2,    0x2,    0x2,    0x156,  0x157,  0x7,    0x7e,   0x2,\n      0x2,   0x157,  0x158,  0x7,    0x7e,   0x2,    0x2,    0x158,  0x6a,\n      0x3,   0x2,    0x2,    0x2,    0x159,  0x15a,  0x7,    0x61,   0x2,\n      0x2,   0x15a,  0x6c,   0x3,    0x2,    0x2,    0x2,    0x15b,  0x15d,\n      0x9,   0x6,    0x2,    0x2,    0x15c,  0x15b,  0x3,    0x2,    0x2,\n      0x2,   0x15d,  0x15e,  0x3,    0x2,    0x2,    0x2,    0x15e,  0x15c,\n      0x3,   0x2,    0x2,    0x2,    0x15e,  0x15f,  0x3,    0x2,    0x2,\n      0x2,   0x15f,  0x160,  0x3,    0x2,    0x2,    0x2,    0x160,  0x161,\n      0x8,   0x37,   0x2,    0x2,    0x161,  0x6e,   0x3,    0x2,    0x2,\n      0x2,   0x162,  0x163,  0x9,    0x7,    0x2,    0x2,    0x163,  0x70,\n      0x3,   0x2,    0x2,    0x2,    0x164,  0x166,  0x9,    0x8,    0x2,\n      0x2,   0x165,  0x164,  0x3,    0x2,    0x2,    0x2,    0x166,  0x167,\n      0x3,   0x2,    0x2,    0x2,    0x167,  0x165,  0x3,    0x2,    0x2,\n      0x2,   0x167,  0x168,  0x3,    0x2,    0x2,    0x2,    0x168,  0x72,\n      0x3,   0x2,    0x2,    0x2,    0x169,  0x16b,  0x5,    0x2d,   0x17,\n      0x2,   0x16a,  0x169,  0x3,    0x2,    0x2,    0x2,    0x16b,  0x16e,\n      0x3,   0x2,    0x2,    0x2,    0x16c,  0x16a,  0x3,    0x2,    0x2,\n      0x2,   0x16c,  0x16d,  0x3,    0x2,    0x2,    0x2,    0x16d,  0x170,\n      0x3,   0x2,    0x2,    0x2,    0x16e,  0x16c,  0x3,    0x2,    0x2,\n      0x2,   0x16f,  0x171,  0x7,    0x30,   0x2,    0x2,    0x170,  0x16f,\n      0x3,   0x2,    0x2,    0x2,    0x170,  0x171,  0x3,    0x2,    0x2,\n      0x2,   0x171,  0x173,  0x3,    0x2,    0x2,    0x2,    0x172,  0x174,\n      0x5,   0x2d,   0x17,   0x2,    0x173,  0x172,  0x3,    0x2,    0x2,\n      0x2,   0x174,  0x175,  0x3,    0x2,    0x2,    0x2,    0x175,  0x173,\n      0x3,   0x2,    0x2,    0x2,    0x175,  0x176,  0x3,    0x2,    0x2,\n      0x2,   0x176,  0x74,   0x3,    0x2,    0x2,    0x2,    0x177,  0x17d,\n      0x5,   0x3f,   0x20,   0x2,    0x178,  0x17e,  0x5,    0x47,   0x24,\n      0x2,   0x179,  0x17e,  0x5,    0x71,   0x39,   0x2,    0x17a,  0x17e,\n      0x5,   0x73,   0x3a,   0x2,    0x17b,  0x17e,  0x7,    0x2e,   0x2,\n      0x2,   0x17c,  0x17e,  0x5,    0x6d,   0x37,   0x2,    0x17d,  0x178,\n      0x3,   0x2,    0x2,    0x2,    0x17d,  0x179,  0x3,    0x2,    0x2,\n      0x2,   0x17d,  0x17a,  0x3,    0x2,    0x2,    0x2,    0x17d,  0x17b,\n      0x3,   0x2,    0x2,    0x2,    0x17d,  0x17c,  0x3,    0x2,    0x2,\n      0x2,   0x17e,  0x17f,  0x3,    0x2,    0x2,    0x2,    0x17f,  0x17d,\n      0x3,   0x2,    0x2,    0x2,    0x17f,  0x180,  0x3,    0x2,    0x2,\n      0x2,   0x180,  0x181,  0x3,    0x2,    0x2,    0x2,    0x181,  0x182,\n      0x5,   0x41,   0x21,   0x2,    0x182,  0x76,   0x3,    0x2,    0x2,\n      0x2,   0x183,  0x184,  0x7,    0x2f,   0x2,    0x2,    0x184,  0x185,\n      0x7,   0x2f,   0x2,    0x2,    0x185,  0x189,  0x3,    0x2,    0x2,\n      0x2,   0x186,  0x188,  0xa,    0x9,    0x2,    0x2,    0x187,  0x186,\n      0x3,   0x2,    0x2,    0x2,    0x188,  0x18b,  0x3,    0x2,    0x2,\n      0x2,   0x189,  0x187,  0x3,    0x2,    0x2,    0x2,    0x189,  0x18a,\n      0x3,   0x2,    0x2,    0x2,    0x18a,  0x18e,  0x3,    0x2,    0x2,\n      0x2,   0x18b,  0x189,  0x3,    0x2,    0x2,    0x2,    0x18c,  0x18f,\n      0x5,   0x7b,   0x3e,   0x2,    0x18d,  0x18f,  0x7,    0x2,    0x2,\n      0x3,   0x18e,  0x18c,  0x3,    0x2,    0x2,    0x2,    0x18e,  0x18d,\n      0x3,   0x2,    0x2,    0x2,    0x18f,  0x190,  0x3,    0x2,    0x2,\n      0x2,   0x190,  0x191,  0x8,    0x3c,   0x3,    0x2,    0x191,  0x78,\n      0x3,   0x2,    0x2,    0x2,    0x192,  0x193,  0x7,    0x31,   0x2,\n      0x2,   0x193,  0x194,  0x7,    0x2c,   0x2,    0x2,    0x194,  0x198,\n      0x3,   0x2,    0x2,    0x2,    0x195,  0x197,  0xb,    0x2,    0x2,\n      0x2,   0x196,  0x195,  0x3,    0x2,    0x2,    0x2,    0x197,  0x19a,\n      0x3,   0x2,    0x2,    0x2,    0x198,  0x199,  0x3,    0x2,    0x2,\n      0x2,   0x198,  0x196,  0x3,    0x2,    0x2,    0x2,    0x199,  0x19b,\n      0x3,   0x2,    0x2,    0x2,    0x19a,  0x198,  0x3,    0x2,    0x2,\n      0x2,   0x19b,  0x19c,  0x7,    0x2c,   0x2,    0x2,    0x19c,  0x19d,\n      0x7,   0x31,   0x2,    0x2,    0x19d,  0x19e,  0x3,    0x2,    0x2,\n      0x2,   0x19e,  0x19f,  0x8,    0x3d,   0x3,    0x2,    0x19f,  0x7a,\n      0x3,   0x2,    0x2,    0x2,    0x1a0,  0x1a2,  0x7,    0xf,    0x2,\n      0x2,   0x1a1,  0x1a0,  0x3,    0x2,    0x2,    0x2,    0x1a1,  0x1a2,\n      0x3,   0x2,    0x2,    0x2,    0x1a2,  0x1a3,  0x3,    0x2,    0x2,\n      0x2,   0x1a3,  0x1a4,  0x7,    0xc,    0x2,    0x2,    0x1a4,  0x7c,\n      0x3,   0x2,    0x2,    0x2,    0x1a5,  0x1a8,  0x5,    0x6f,   0x38,\n      0x2,   0x1a6,  0x1a8,  0x9,    0xa,    0x2,    0x2,    0x1a7,  0x1a5,\n      0x3,   0x2,    0x2,    0x2,    0x1a7,  0x1a6,  0x3,    0x2,    0x2,\n      0x2,   0x1a8,  0x1a9,  0x3,    0x2,    0x2,    0x2,    0x1a9,  0x1a7,\n      0x3,   0x2,    0x2,    0x2,    0x1a9,  0x1aa,  0x3,    0x2,    0x2,\n      0x2,   0x1aa,  0x7e,   0x3,    0x2,    0x2,    0x2,    0x1a,   0x2,\n      0xf5,  0xfc,   0x100,  0x102,  0x105,  0x108,  0x110,  0x112,  0x11b,\n      0x11d, 0x15e,  0x167,  0x16c,  0x170,  0x175,  0x17d,  0x17f,  0x189,\n      0x18e, 0x198,  0x1a1,  0x1a7,  0x1a9,  0x4,    0x8,    0x2,    0x2,\n      0x2,   0x4,    0x2,\n  };\n\n  atn::ATNDeserializer deserializer;\n  _atn = deserializer.deserialize(_serializedATN);\n\n  size_t count = _atn.getNumberOfDecisions();\n  _decisionToDFA.reserve(count);\n  for (size_t i = 0; i < count; i++) {\n    _decisionToDFA.emplace_back(_atn.getDecisionState(i), i);\n  }\n}\n\nSQLLexer::Initializer SQLLexer::_init;\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLLexer.h",
    "content": "\n// Generated from SQLLexer.g4 by ANTLR 4.8\n\n#pragma once\n\n\n#include \"antlr4-runtime.h\"\n\n\nnamespace antlr4 {\n\n\nclass SQLLexer : public antlr4::Lexer {\n public:\n  enum {\n    OR = 1,\n    AND = 2,\n    NOT = 3,\n    IN = 4,\n    CONTAIN_ALL = 5,\n    CONTAIN_ANY = 6,\n    BETWEEN = 7,\n    LIKE = 8,\n    WHERE = 9,\n    SELECT = 10,\n    FROM = 11,\n    AS = 12,\n    BY = 13,\n    ORDER = 14,\n    ASC = 15,\n    DESC = 16,\n    LIMIT = 17,\n    TRUE_V = 18,\n    FALSE_V = 19,\n    IS = 20,\n    NULL_V = 21,\n    INTEGER = 22,\n    FLOAT = 23,\n    SQUOTA_STRING = 24,\n    DQUOTA_STRING = 25,\n    DOT = 26,\n    LP = 27,\n    RP = 28,\n    LMP = 29,\n    RMP = 30,\n    ASTERISK = 31,\n    PLUS_SIGN = 32,\n    MINUS_SIGN = 33,\n    COMMA = 34,\n    SOLIDUS = 35,\n    MOD = 36,\n    AT_SIGN = 37,\n    ASSIGN_OP = 38,\n    SHARP_SIGN = 39,\n    COLON = 40,\n    SEMI = 41,\n    LE_OP = 42,\n    GE_OP = 43,\n    NE_OP = 44,\n    CARET_OP = 45,\n    TILDE_OP = 46,\n    L_OP = 47,\n    G_OP = 48,\n    E_OP = 49,\n    CONCAT_OP = 50,\n    UNDERSCORE = 51,\n    SPACES = 52,\n    VECTOR = 53,\n    SINGLE_LINE_COMMENT = 54,\n    MULTI_LINE_COMMENT = 55,\n    REGULAR_ID = 56\n  };\n\n  enum { COMMENTS = 2 };\n\n  SQLLexer(antlr4::CharStream *input);\n  ~SQLLexer();\n\n  virtual std::string getGrammarFileName() const override;\n  virtual const std::vector<std::string> &getRuleNames() const override;\n\n  virtual const std::vector<std::string> &getChannelNames() const override;\n  virtual const std::vector<std::string> &getModeNames() const override;\n  virtual const std::vector<std::string> &getTokenNames()\n      const override;  // deprecated, use vocabulary instead\n  virtual antlr4::dfa::Vocabulary &getVocabulary() const override;\n\n  virtual const std::vector<uint16_t> getSerializedATN() const override;\n  virtual const antlr4::atn::ATN &getATN() const override;\n\n private:\n  static std::vector<antlr4::dfa::DFA> _decisionToDFA;\n  static antlr4::atn::PredictionContextCache _sharedContextCache;\n  static std::vector<std::string> _ruleNames;\n  static std::vector<std::string> _tokenNames;\n  static std::vector<std::string> _channelNames;\n  static std::vector<std::string> _modeNames;\n\n  static std::vector<std::string> _literalNames;\n  static std::vector<std::string> _symbolicNames;\n  static antlr4::dfa::Vocabulary _vocabulary;\n  static antlr4::atn::ATN _atn;\n  static std::vector<uint16_t> _serializedATN;\n\n\n  // Individual action functions triggered by action() above.\n\n  // Individual semantic predicate functions triggered by sempred() above.\n\n  struct Initializer {\n    Initializer();\n  };\n  static Initializer _init;\n};\n\n}  // namespace antlr4\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLLexer.interp",
    "content": "token literal names:\nnull\n'OR'\n'AND'\n'NOT'\n'IN'\n'CONTAIN_ALL'\n'CONTAIN_ANY'\n'BETWEEN'\n'LIKE'\n'WHERE'\n'SELECT'\n'FROM'\n'AS'\n'BY'\n'ORDER'\n'ASC'\n'DESC'\n'LIMIT'\n'TRUE'\n'FALSE'\n'IS'\n'NULL'\nnull\nnull\nnull\nnull\n'.'\n'('\n')'\n'['\n']'\n'*'\n'+'\n'-'\n','\n'/'\n'%'\n'@'\n':='\n'#'\n':'\n';'\n'<='\n'>='\n'!='\n'^'\n'~'\n'<'\n'>'\n'='\n'||'\n'_'\nnull\nnull\nnull\nnull\nnull\n\ntoken symbolic names:\nnull\nOR\nAND\nNOT\nIN\nCONTAIN_ALL\nCONTAIN_ANY\nBETWEEN\nLIKE\nWHERE\nSELECT\nFROM\nAS\nBY\nORDER\nASC\nDESC\nLIMIT\nTRUE_V\nFALSE_V\nIS\nNULL_V\nINTEGER\nFLOAT\nSQUOTA_STRING\nDQUOTA_STRING\nDOT\nLP\nRP\nLMP\nRMP\nASTERISK\nPLUS_SIGN\nMINUS_SIGN\nCOMMA\nSOLIDUS\nMOD\nAT_SIGN\nASSIGN_OP\nSHARP_SIGN\nCOLON\nSEMI\nLE_OP\nGE_OP\nNE_OP\nCARET_OP\nTILDE_OP\nL_OP\nG_OP\nE_OP\nCONCAT_OP\nUNDERSCORE\nSPACES\nVECTOR\nSINGLE_LINE_COMMENT\nMULTI_LINE_COMMENT\nREGULAR_ID\n\nrule names:\nOR\nAND\nNOT\nIN\nCONTAIN_ALL\nCONTAIN_ANY\nBETWEEN\nLIKE\nWHERE\nSELECT\nFROM\nAS\nBY\nORDER\nASC\nDESC\nLIMIT\nTRUE_V\nFALSE_V\nIS\nNULL_V\nUNSIGNED_INTEGER\nINTEGER\nAPPROXIMATE_NUM_LIT\nFLOAT\nSQUOTA_STRING\nDQUOTA_STRING\nDOT\nLP\nRP\nLMP\nRMP\nASTERISK\nPLUS_SIGN\nMINUS_SIGN\nCOMMA\nSOLIDUS\nMOD\nAT_SIGN\nASSIGN_OP\nSHARP_SIGN\nCOLON\nSEMI\nLE_OP\nGE_OP\nNE_OP\nCARET_OP\nTILDE_OP\nL_OP\nG_OP\nE_OP\nCONCAT_OP\nUNDERSCORE\nSPACES\nSIMPLE_LETTER\nUNSIGNED_INTEGER_FRAGMENT\nFLOAT_FRAGMENT\nVECTOR\nSINGLE_LINE_COMMENT\nMULTI_LINE_COMMENT\nNEWLINE\nREGULAR_ID\n\nchannel names:\nDEFAULT_TOKEN_CHANNEL\nHIDDEN\nnull\nnull\nCOMMENTS\n\nmode names:\nDEFAULT_MODE\n\natn:\n[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 58, 427, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 4, 22, 9, 22, 4, 23, 9, 23, 4, 24, 9, 24, 4, 25, 9, 25, 4, 26, 9, 26, 4, 27, 9, 27, 4, 28, 9, 28, 4, 29, 9, 29, 4, 30, 9, 30, 4, 31, 9, 31, 4, 32, 9, 32, 4, 33, 9, 33, 4, 34, 9, 34, 4, 35, 9, 35, 4, 36, 9, 36, 4, 37, 9, 37, 4, 38, 9, 38, 4, 39, 9, 39, 4, 40, 9, 40, 4, 41, 9, 41, 4, 42, 9, 42, 4, 43, 9, 43, 4, 44, 9, 44, 4, 45, 9, 45, 4, 46, 9, 46, 4, 47, 9, 47, 4, 48, 9, 48, 4, 49, 9, 49, 4, 50, 9, 50, 4, 51, 9, 51, 4, 52, 9, 52, 4, 53, 9, 53, 4, 54, 9, 54, 4, 55, 9, 55, 4, 56, 9, 56, 4, 57, 9, 57, 4, 58, 9, 58, 4, 59, 9, 59, 4, 60, 9, 60, 4, 61, 9, 61, 4, 62, 9, 62, 4, 63, 9, 63, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 12, 3, 12, 3, 12, 3, 12, 3, 12, 3, 13, 3, 13, 3, 13, 3, 14, 3, 14, 3, 14, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 3, 16, 3, 16, 3, 16, 3, 16, 3, 17, 3, 17, 3, 17, 3, 17, 3, 17, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 21, 3, 21, 3, 21, 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 23, 3, 23, 3, 24, 5, 24, 246, 10, 24, 3, 24, 3, 24, 3, 25, 3, 25, 3, 25, 5, 25, 253, 10, 25, 3, 25, 3, 25, 5, 25, 257, 10, 25, 5, 25, 259, 10, 25, 3, 25, 5, 25, 262, 10, 25, 3, 26, 5, 26, 265, 10, 26, 3, 26, 3, 26, 3, 27, 3, 27, 3, 27, 3, 27, 7, 27, 273, 10, 27, 12, 27, 14, 27, 276, 11, 27, 3, 27, 3, 27, 3, 28, 3, 28, 3, 28, 3, 28, 7, 28, 284, 10, 28, 12, 28, 14, 28, 287, 11, 28, 3, 28, 3, 28, 3, 29, 3, 29, 3, 30, 3, 30, 3, 31, 3, 31, 3, 32, 3, 32, 3, 33, 3, 33, 3, 34, 3, 34, 3, 35, 3, 35, 3, 36, 3, 36, 3, 37, 3, 37, 3, 38, 3, 38, 3, 39, 3, 39, 3, 40, 3, 40, 3, 41, 3, 41, 3, 41, 3, 42, 3, 42, 3, 43, 3, 43, 3, 44, 3, 44, 3, 45, 3, 45, 3, 45, 3, 46, 3, 46, 3, 46, 3, 47, 3, 47, 3, 47, 3, 48, 3, 48, 3, 49, 3, 49, 3, 50, 3, 50, 3, 51, 3, 51, 3, 52, 3, 52, 3, 53, 3, 53, 3, 53, 3, 54, 3, 54, 3, 55, 6, 55, 349, 10, 55, 13, 55, 14, 55, 350, 3, 55, 3, 55, 3, 56, 3, 56, 3, 57, 6, 57, 358, 10, 57, 13, 57, 14, 57, 359, 3, 58, 7, 58, 363, 10, 58, 12, 58, 14, 58, 366, 11, 58, 3, 58, 5, 58, 369, 10, 58, 3, 58, 6, 58, 372, 10, 58, 13, 58, 14, 58, 373, 3, 59, 3, 59, 3, 59, 3, 59, 3, 59, 3, 59, 6, 59, 382, 10, 59, 13, 59, 14, 59, 383, 3, 59, 3, 59, 3, 60, 3, 60, 3, 60, 3, 60, 7, 60, 392, 10, 60, 12, 60, 14, 60, 395, 11, 60, 3, 60, 3, 60, 5, 60, 399, 10, 60, 3, 60, 3, 60, 3, 61, 3, 61, 3, 61, 3, 61, 7, 61, 407, 10, 61, 12, 61, 14, 61, 410, 11, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 62, 5, 62, 418, 10, 62, 3, 62, 3, 62, 3, 63, 3, 63, 6, 63, 424, 10, 63, 13, 63, 14, 63, 425, 3, 408, 2, 64, 3, 3, 5, 4, 7, 5, 9, 6, 11, 7, 13, 8, 15, 9, 17, 10, 19, 11, 21, 12, 23, 13, 25, 14, 27, 15, 29, 16, 31, 17, 33, 18, 35, 19, 37, 20, 39, 21, 41, 22, 43, 23, 45, 2, 47, 24, 49, 2, 51, 25, 53, 26, 55, 27, 57, 28, 59, 29, 61, 30, 63, 31, 65, 32, 67, 33, 69, 34, 71, 35, 73, 36, 75, 37, 77, 38, 79, 39, 81, 40, 83, 41, 85, 42, 87, 43, 89, 44, 91, 45, 93, 46, 95, 47, 97, 48, 99, 49, 101, 50, 103, 51, 105, 52, 107, 53, 109, 54, 111, 2, 113, 2, 115, 2, 117, 55, 119, 56, 121, 57, 123, 2, 125, 58, 3, 2, 11, 4, 2, 45, 45, 47, 47, 4, 2, 70, 70, 72, 72, 4, 2, 41, 41, 94, 94, 4, 2, 36, 36, 94, 94, 5, 2, 11, 12, 15, 15, 34, 34, 3, 2, 67, 92, 3, 2, 50, 59, 4, 2, 12, 12, 15, 15, 5, 2, 47, 47, 50, 59, 97, 97, 2, 446, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 2, 15, 3, 2, 2, 2, 2, 17, 3, 2, 2, 2, 2, 19, 3, 2, 2, 2, 2, 21, 3, 2, 2, 2, 2, 23, 3, 2, 2, 2, 2, 25, 3, 2, 2, 2, 2, 27, 3, 2, 2, 2, 2, 29, 3, 2, 2, 2, 2, 31, 3, 2, 2, 2, 2, 33, 3, 2, 2, 2, 2, 35, 3, 2, 2, 2, 2, 37, 3, 2, 2, 2, 2, 39, 3, 2, 2, 2, 2, 41, 3, 2, 2, 2, 2, 43, 3, 2, 2, 2, 2, 47, 3, 2, 2, 2, 2, 51, 3, 2, 2, 2, 2, 53, 3, 2, 2, 2, 2, 55, 3, 2, 2, 2, 2, 57, 3, 2, 2, 2, 2, 59, 3, 2, 2, 2, 2, 61, 3, 2, 2, 2, 2, 63, 3, 2, 2, 2, 2, 65, 3, 2, 2, 2, 2, 67, 3, 2, 2, 2, 2, 69, 3, 2, 2, 2, 2, 71, 3, 2, 2, 2, 2, 73, 3, 2, 2, 2, 2, 75, 3, 2, 2, 2, 2, 77, 3, 2, 2, 2, 2, 79, 3, 2, 2, 2, 2, 81, 3, 2, 2, 2, 2, 83, 3, 2, 2, 2, 2, 85, 3, 2, 2, 2, 2, 87, 3, 2, 2, 2, 2, 89, 3, 2, 2, 2, 2, 91, 3, 2, 2, 2, 2, 93, 3, 2, 2, 2, 2, 95, 3, 2, 2, 2, 2, 97, 3, 2, 2, 2, 2, 99, 3, 2, 2, 2, 2, 101, 3, 2, 2, 2, 2, 103, 3, 2, 2, 2, 2, 105, 3, 2, 2, 2, 2, 107, 3, 2, 2, 2, 2, 109, 3, 2, 2, 2, 2, 117, 3, 2, 2, 2, 2, 119, 3, 2, 2, 2, 2, 121, 3, 2, 2, 2, 2, 125, 3, 2, 2, 2, 3, 127, 3, 2, 2, 2, 5, 130, 3, 2, 2, 2, 7, 134, 3, 2, 2, 2, 9, 138, 3, 2, 2, 2, 11, 141, 3, 2, 2, 2, 13, 153, 3, 2, 2, 2, 15, 165, 3, 2, 2, 2, 17, 173, 3, 2, 2, 2, 19, 178, 3, 2, 2, 2, 21, 184, 3, 2, 2, 2, 23, 191, 3, 2, 2, 2, 25, 196, 3, 2, 2, 2, 27, 199, 3, 2, 2, 2, 29, 202, 3, 2, 2, 2, 31, 208, 3, 2, 2, 2, 33, 212, 3, 2, 2, 2, 35, 217, 3, 2, 2, 2, 37, 223, 3, 2, 2, 2, 39, 228, 3, 2, 2, 2, 41, 234, 3, 2, 2, 2, 43, 237, 3, 2, 2, 2, 45, 242, 3, 2, 2, 2, 47, 245, 3, 2, 2, 2, 49, 249, 3, 2, 2, 2, 51, 264, 3, 2, 2, 2, 53, 268, 3, 2, 2, 2, 55, 279, 3, 2, 2, 2, 57, 290, 3, 2, 2, 2, 59, 292, 3, 2, 2, 2, 61, 294, 3, 2, 2, 2, 63, 296, 3, 2, 2, 2, 65, 298, 3, 2, 2, 2, 67, 300, 3, 2, 2, 2, 69, 302, 3, 2, 2, 2, 71, 304, 3, 2, 2, 2, 73, 306, 3, 2, 2, 2, 75, 308, 3, 2, 2, 2, 77, 310, 3, 2, 2, 2, 79, 312, 3, 2, 2, 2, 81, 314, 3, 2, 2, 2, 83, 317, 3, 2, 2, 2, 85, 319, 3, 2, 2, 2, 87, 321, 3, 2, 2, 2, 89, 323, 3, 2, 2, 2, 91, 326, 3, 2, 2, 2, 93, 329, 3, 2, 2, 2, 95, 332, 3, 2, 2, 2, 97, 334, 3, 2, 2, 2, 99, 336, 3, 2, 2, 2, 101, 338, 3, 2, 2, 2, 103, 340, 3, 2, 2, 2, 105, 342, 3, 2, 2, 2, 107, 345, 3, 2, 2, 2, 109, 348, 3, 2, 2, 2, 111, 354, 3, 2, 2, 2, 113, 357, 3, 2, 2, 2, 115, 364, 3, 2, 2, 2, 117, 375, 3, 2, 2, 2, 119, 387, 3, 2, 2, 2, 121, 402, 3, 2, 2, 2, 123, 417, 3, 2, 2, 2, 125, 423, 3, 2, 2, 2, 127, 128, 7, 81, 2, 2, 128, 129, 7, 84, 2, 2, 129, 4, 3, 2, 2, 2, 130, 131, 7, 67, 2, 2, 131, 132, 7, 80, 2, 2, 132, 133, 7, 70, 2, 2, 133, 6, 3, 2, 2, 2, 134, 135, 7, 80, 2, 2, 135, 136, 7, 81, 2, 2, 136, 137, 7, 86, 2, 2, 137, 8, 3, 2, 2, 2, 138, 139, 7, 75, 2, 2, 139, 140, 7, 80, 2, 2, 140, 10, 3, 2, 2, 2, 141, 142, 7, 69, 2, 2, 142, 143, 7, 81, 2, 2, 143, 144, 7, 80, 2, 2, 144, 145, 7, 86, 2, 2, 145, 146, 7, 67, 2, 2, 146, 147, 7, 75, 2, 2, 147, 148, 7, 80, 2, 2, 148, 149, 7, 97, 2, 2, 149, 150, 7, 67, 2, 2, 150, 151, 7, 78, 2, 2, 151, 152, 7, 78, 2, 2, 152, 12, 3, 2, 2, 2, 153, 154, 7, 69, 2, 2, 154, 155, 7, 81, 2, 2, 155, 156, 7, 80, 2, 2, 156, 157, 7, 86, 2, 2, 157, 158, 7, 67, 2, 2, 158, 159, 7, 75, 2, 2, 159, 160, 7, 80, 2, 2, 160, 161, 7, 97, 2, 2, 161, 162, 7, 67, 2, 2, 162, 163, 7, 80, 2, 2, 163, 164, 7, 91, 2, 2, 164, 14, 3, 2, 2, 2, 165, 166, 7, 68, 2, 2, 166, 167, 7, 71, 2, 2, 167, 168, 7, 86, 2, 2, 168, 169, 7, 89, 2, 2, 169, 170, 7, 71, 2, 2, 170, 171, 7, 71, 2, 2, 171, 172, 7, 80, 2, 2, 172, 16, 3, 2, 2, 2, 173, 174, 7, 78, 2, 2, 174, 175, 7, 75, 2, 2, 175, 176, 7, 77, 2, 2, 176, 177, 7, 71, 2, 2, 177, 18, 3, 2, 2, 2, 178, 179, 7, 89, 2, 2, 179, 180, 7, 74, 2, 2, 180, 181, 7, 71, 2, 2, 181, 182, 7, 84, 2, 2, 182, 183, 7, 71, 2, 2, 183, 20, 3, 2, 2, 2, 184, 185, 7, 85, 2, 2, 185, 186, 7, 71, 2, 2, 186, 187, 7, 78, 2, 2, 187, 188, 7, 71, 2, 2, 188, 189, 7, 69, 2, 2, 189, 190, 7, 86, 2, 2, 190, 22, 3, 2, 2, 2, 191, 192, 7, 72, 2, 2, 192, 193, 7, 84, 2, 2, 193, 194, 7, 81, 2, 2, 194, 195, 7, 79, 2, 2, 195, 24, 3, 2, 2, 2, 196, 197, 7, 67, 2, 2, 197, 198, 7, 85, 2, 2, 198, 26, 3, 2, 2, 2, 199, 200, 7, 68, 2, 2, 200, 201, 7, 91, 2, 2, 201, 28, 3, 2, 2, 2, 202, 203, 7, 81, 2, 2, 203, 204, 7, 84, 2, 2, 204, 205, 7, 70, 2, 2, 205, 206, 7, 71, 2, 2, 206, 207, 7, 84, 2, 2, 207, 30, 3, 2, 2, 2, 208, 209, 7, 67, 2, 2, 209, 210, 7, 85, 2, 2, 210, 211, 7, 69, 2, 2, 211, 32, 3, 2, 2, 2, 212, 213, 7, 70, 2, 2, 213, 214, 7, 71, 2, 2, 214, 215, 7, 85, 2, 2, 215, 216, 7, 69, 2, 2, 216, 34, 3, 2, 2, 2, 217, 218, 7, 78, 2, 2, 218, 219, 7, 75, 2, 2, 219, 220, 7, 79, 2, 2, 220, 221, 7, 75, 2, 2, 221, 222, 7, 86, 2, 2, 222, 36, 3, 2, 2, 2, 223, 224, 7, 86, 2, 2, 224, 225, 7, 84, 2, 2, 225, 226, 7, 87, 2, 2, 226, 227, 7, 71, 2, 2, 227, 38, 3, 2, 2, 2, 228, 229, 7, 72, 2, 2, 229, 230, 7, 67, 2, 2, 230, 231, 7, 78, 2, 2, 231, 232, 7, 85, 2, 2, 232, 233, 7, 71, 2, 2, 233, 40, 3, 2, 2, 2, 234, 235, 7, 75, 2, 2, 235, 236, 7, 85, 2, 2, 236, 42, 3, 2, 2, 2, 237, 238, 7, 80, 2, 2, 238, 239, 7, 87, 2, 2, 239, 240, 7, 78, 2, 2, 240, 241, 7, 78, 2, 2, 241, 44, 3, 2, 2, 2, 242, 243, 5, 113, 57, 2, 243, 46, 3, 2, 2, 2, 244, 246, 5, 71, 36, 2, 245, 244, 3, 2, 2, 2, 245, 246, 3, 2, 2, 2, 246, 247, 3, 2, 2, 2, 247, 248, 5, 45, 23, 2, 248, 48, 3, 2, 2, 2, 249, 258, 5, 115, 58, 2, 250, 252, 7, 71, 2, 2, 251, 253, 9, 2, 2, 2, 252, 251, 3, 2, 2, 2, 252, 253, 3, 2, 2, 2, 253, 256, 3, 2, 2, 2, 254, 257, 5, 115, 58, 2, 255, 257, 5, 113, 57, 2, 256, 254, 3, 2, 2, 2, 256, 255, 3, 2, 2, 2, 257, 259, 3, 2, 2, 2, 258, 250, 3, 2, 2, 2, 258, 259, 3, 2, 2, 2, 259, 261, 3, 2, 2, 2, 260, 262, 9, 3, 2, 2, 261, 260, 3, 2, 2, 2, 261, 262, 3, 2, 2, 2, 262, 50, 3, 2, 2, 2, 263, 265, 5, 71, 36, 2, 264, 263, 3, 2, 2, 2, 264, 265, 3, 2, 2, 2, 265, 266, 3, 2, 2, 2, 266, 267, 5, 49, 25, 2, 267, 52, 3, 2, 2, 2, 268, 274, 7, 41, 2, 2, 269, 273, 10, 4, 2, 2, 270, 271, 7, 94, 2, 2, 271, 273, 11, 2, 2, 2, 272, 269, 3, 2, 2, 2, 272, 270, 3, 2, 2, 2, 273, 276, 3, 2, 2, 2, 274, 272, 3, 2, 2, 2, 274, 275, 3, 2, 2, 2, 275, 277, 3, 2, 2, 2, 276, 274, 3, 2, 2, 2, 277, 278, 7, 41, 2, 2, 278, 54, 3, 2, 2, 2, 279, 285, 7, 36, 2, 2, 280, 284, 10, 5, 2, 2, 281, 282, 7, 94, 2, 2, 282, 284, 11, 2, 2, 2, 283, 280, 3, 2, 2, 2, 283, 281, 3, 2, 2, 2, 284, 287, 3, 2, 2, 2, 285, 283, 3, 2, 2, 2, 285, 286, 3, 2, 2, 2, 286, 288, 3, 2, 2, 2, 287, 285, 3, 2, 2, 2, 288, 289, 7, 36, 2, 2, 289, 56, 3, 2, 2, 2, 290, 291, 7, 48, 2, 2, 291, 58, 3, 2, 2, 2, 292, 293, 7, 42, 2, 2, 293, 60, 3, 2, 2, 2, 294, 295, 7, 43, 2, 2, 295, 62, 3, 2, 2, 2, 296, 297, 7, 93, 2, 2, 297, 64, 3, 2, 2, 2, 298, 299, 7, 95, 2, 2, 299, 66, 3, 2, 2, 2, 300, 301, 7, 44, 2, 2, 301, 68, 3, 2, 2, 2, 302, 303, 7, 45, 2, 2, 303, 70, 3, 2, 2, 2, 304, 305, 7, 47, 2, 2, 305, 72, 3, 2, 2, 2, 306, 307, 7, 46, 2, 2, 307, 74, 3, 2, 2, 2, 308, 309, 7, 49, 2, 2, 309, 76, 3, 2, 2, 2, 310, 311, 7, 39, 2, 2, 311, 78, 3, 2, 2, 2, 312, 313, 7, 66, 2, 2, 313, 80, 3, 2, 2, 2, 314, 315, 7, 60, 2, 2, 315, 316, 7, 63, 2, 2, 316, 82, 3, 2, 2, 2, 317, 318, 7, 37, 2, 2, 318, 84, 3, 2, 2, 2, 319, 320, 7, 60, 2, 2, 320, 86, 3, 2, 2, 2, 321, 322, 7, 61, 2, 2, 322, 88, 3, 2, 2, 2, 323, 324, 7, 62, 2, 2, 324, 325, 7, 63, 2, 2, 325, 90, 3, 2, 2, 2, 326, 327, 7, 64, 2, 2, 327, 328, 7, 63, 2, 2, 328, 92, 3, 2, 2, 2, 329, 330, 7, 35, 2, 2, 330, 331, 7, 63, 2, 2, 331, 94, 3, 2, 2, 2, 332, 333, 7, 96, 2, 2, 333, 96, 3, 2, 2, 2, 334, 335, 7, 128, 2, 2, 335, 98, 3, 2, 2, 2, 336, 337, 7, 62, 2, 2, 337, 100, 3, 2, 2, 2, 338, 339, 7, 64, 2, 2, 339, 102, 3, 2, 2, 2, 340, 341, 7, 63, 2, 2, 341, 104, 3, 2, 2, 2, 342, 343, 7, 126, 2, 2, 343, 344, 7, 126, 2, 2, 344, 106, 3, 2, 2, 2, 345, 346, 7, 97, 2, 2, 346, 108, 3, 2, 2, 2, 347, 349, 9, 6, 2, 2, 348, 347, 3, 2, 2, 2, 349, 350, 3, 2, 2, 2, 350, 348, 3, 2, 2, 2, 350, 351, 3, 2, 2, 2, 351, 352, 3, 2, 2, 2, 352, 353, 8, 55, 2, 2, 353, 110, 3, 2, 2, 2, 354, 355, 9, 7, 2, 2, 355, 112, 3, 2, 2, 2, 356, 358, 9, 8, 2, 2, 357, 356, 3, 2, 2, 2, 358, 359, 3, 2, 2, 2, 359, 357, 3, 2, 2, 2, 359, 360, 3, 2, 2, 2, 360, 114, 3, 2, 2, 2, 361, 363, 5, 45, 23, 2, 362, 361, 3, 2, 2, 2, 363, 366, 3, 2, 2, 2, 364, 362, 3, 2, 2, 2, 364, 365, 3, 2, 2, 2, 365, 368, 3, 2, 2, 2, 366, 364, 3, 2, 2, 2, 367, 369, 7, 48, 2, 2, 368, 367, 3, 2, 2, 2, 368, 369, 3, 2, 2, 2, 369, 371, 3, 2, 2, 2, 370, 372, 5, 45, 23, 2, 371, 370, 3, 2, 2, 2, 372, 373, 3, 2, 2, 2, 373, 371, 3, 2, 2, 2, 373, 374, 3, 2, 2, 2, 374, 116, 3, 2, 2, 2, 375, 381, 5, 63, 32, 2, 376, 382, 5, 71, 36, 2, 377, 382, 5, 113, 57, 2, 378, 382, 5, 115, 58, 2, 379, 382, 7, 46, 2, 2, 380, 382, 5, 109, 55, 2, 381, 376, 3, 2, 2, 2, 381, 377, 3, 2, 2, 2, 381, 378, 3, 2, 2, 2, 381, 379, 3, 2, 2, 2, 381, 380, 3, 2, 2, 2, 382, 383, 3, 2, 2, 2, 383, 381, 3, 2, 2, 2, 383, 384, 3, 2, 2, 2, 384, 385, 3, 2, 2, 2, 385, 386, 5, 65, 33, 2, 386, 118, 3, 2, 2, 2, 387, 388, 7, 47, 2, 2, 388, 389, 7, 47, 2, 2, 389, 393, 3, 2, 2, 2, 390, 392, 10, 9, 2, 2, 391, 390, 3, 2, 2, 2, 392, 395, 3, 2, 2, 2, 393, 391, 3, 2, 2, 2, 393, 394, 3, 2, 2, 2, 394, 398, 3, 2, 2, 2, 395, 393, 3, 2, 2, 2, 396, 399, 5, 123, 62, 2, 397, 399, 7, 2, 2, 3, 398, 396, 3, 2, 2, 2, 398, 397, 3, 2, 2, 2, 399, 400, 3, 2, 2, 2, 400, 401, 8, 60, 3, 2, 401, 120, 3, 2, 2, 2, 402, 403, 7, 49, 2, 2, 403, 404, 7, 44, 2, 2, 404, 408, 3, 2, 2, 2, 405, 407, 11, 2, 2, 2, 406, 405, 3, 2, 2, 2, 407, 410, 3, 2, 2, 2, 408, 409, 3, 2, 2, 2, 408, 406, 3, 2, 2, 2, 409, 411, 3, 2, 2, 2, 410, 408, 3, 2, 2, 2, 411, 412, 7, 44, 2, 2, 412, 413, 7, 49, 2, 2, 413, 414, 3, 2, 2, 2, 414, 415, 8, 61, 3, 2, 415, 122, 3, 2, 2, 2, 416, 418, 7, 15, 2, 2, 417, 416, 3, 2, 2, 2, 417, 418, 3, 2, 2, 2, 418, 419, 3, 2, 2, 2, 419, 420, 7, 12, 2, 2, 420, 124, 3, 2, 2, 2, 421, 424, 5, 111, 56, 2, 422, 424, 9, 10, 2, 2, 423, 421, 3, 2, 2, 2, 423, 422, 3, 2, 2, 2, 424, 425, 3, 2, 2, 2, 425, 423, 3, 2, 2, 2, 425, 426, 3, 2, 2, 2, 426, 126, 3, 2, 2, 2, 26, 2, 245, 252, 256, 258, 261, 264, 272, 274, 283, 285, 350, 359, 364, 368, 373, 381, 383, 393, 398, 408, 417, 423, 425, 4, 8, 2, 2, 2, 4, 2]"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLLexer.tokens",
    "content": "OR=1\nAND=2\nNOT=3\nIN=4\nCONTAIN_ALL=5\nCONTAIN_ANY=6\nBETWEEN=7\nLIKE=8\nWHERE=9\nSELECT=10\nFROM=11\nAS=12\nBY=13\nORDER=14\nASC=15\nDESC=16\nLIMIT=17\nTRUE_V=18\nFALSE_V=19\nIS=20\nNULL_V=21\nINTEGER=22\nFLOAT=23\nSQUOTA_STRING=24\nDQUOTA_STRING=25\nDOT=26\nLP=27\nRP=28\nLMP=29\nRMP=30\nASTERISK=31\nPLUS_SIGN=32\nMINUS_SIGN=33\nCOMMA=34\nSOLIDUS=35\nMOD=36\nAT_SIGN=37\nASSIGN_OP=38\nSHARP_SIGN=39\nCOLON=40\nSEMI=41\nLE_OP=42\nGE_OP=43\nNE_OP=44\nCARET_OP=45\nTILDE_OP=46\nL_OP=47\nG_OP=48\nE_OP=49\nCONCAT_OP=50\nUNDERSCORE=51\nSPACES=52\nVECTOR=53\nSINGLE_LINE_COMMENT=54\nMULTI_LINE_COMMENT=55\nREGULAR_ID=56\n'OR'=1\n'AND'=2\n'NOT'=3\n'IN'=4\n'CONTAIN_ALL'=5\n'CONTAIN_ANY'=6\n'BETWEEN'=7\n'LIKE'=8\n'WHERE'=9\n'SELECT'=10\n'FROM'=11\n'AS'=12\n'BY'=13\n'ORDER'=14\n'ASC'=15\n'DESC'=16\n'LIMIT'=17\n'TRUE'=18\n'FALSE'=19\n'IS'=20\n'NULL'=21\n'.'=26\n'('=27\n')'=28\n'['=29\n']'=30\n'*'=31\n'+'=32\n'-'=33\n','=34\n'/'=35\n'%'=36\n'@'=37\n':='=38\n'#'=39\n':'=40\n';'=41\n'<='=42\n'>='=43\n'!='=44\n'^'=45\n'~'=46\n'<'=47\n'>'=48\n'='=49\n'||'=50\n'_'=51\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParser.cc",
    "content": "\n// Generated from SQLParser.g4 by ANTLR 4.8\n\n\n#include \"SQLParser.h\"\n#include \"SQLParserListener.h\"\n\n\nusing namespace antlrcpp;\nusing namespace antlr4;\nusing namespace antlr4;\n\nSQLParser::SQLParser(TokenStream *input) : Parser(input) {\n  _interpreter = new atn::ParserATNSimulator(this, _atn, _decisionToDFA,\n                                             _sharedContextCache);\n}\n\nSQLParser::~SQLParser() {\n  delete _interpreter;\n}\n\nstd::string SQLParser::getGrammarFileName() const {\n  return \"SQLParser.g4\";\n}\n\nconst std::vector<std::string> &SQLParser::getRuleNames() const {\n  return _ruleNames;\n}\n\ndfa::Vocabulary &SQLParser::getVocabulary() const {\n  return _vocabulary;\n}\n\n\n//----------------- Swallow_to_semiContext\n//------------------------------------------------------------------\n\nSQLParser::Swallow_to_semiContext::Swallow_to_semiContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nstd::vector<tree::TerminalNode *> SQLParser::Swallow_to_semiContext::SEMI() {\n  return getTokens(SQLParser::SEMI);\n}\n\ntree::TerminalNode *SQLParser::Swallow_to_semiContext::SEMI(size_t i) {\n  return getToken(SQLParser::SEMI, i);\n}\n\n\nsize_t SQLParser::Swallow_to_semiContext::getRuleIndex() const {\n  return SQLParser::RuleSwallow_to_semi;\n}\n\nvoid SQLParser::Swallow_to_semiContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterSwallow_to_semi(this);\n}\n\nvoid SQLParser::Swallow_to_semiContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitSwallow_to_semi(this);\n}\n\nSQLParser::Swallow_to_semiContext *SQLParser::swallow_to_semi() {\n  Swallow_to_semiContext *_localctx =\n      _tracker.createInstance<Swallow_to_semiContext>(_ctx, getState());\n  enterRule(_localctx, 0, SQLParser::RuleSwallow_to_semi);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(81);\n    _errHandler->sync(this);\n    _la = _input->LA(1);\n    do {\n      setState(80);\n      _la = _input->LA(1);\n      if (_la == 0 || _la == Token::EOF || (_la == SQLParser::SEMI)) {\n        _errHandler->recoverInline(this);\n      } else {\n        _errHandler->reportMatch(this);\n        consume();\n      }\n      setState(83);\n      _errHandler->sync(this);\n      _la = _input->LA(1);\n    } while (\n        (((_la & ~0x3fULL) == 0) &&\n         ((1ULL << _la) &\n          ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |\n           (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |\n           (1ULL << SQLParser::CONTAIN_ALL) | (1ULL << SQLParser::CONTAIN_ANY) |\n           (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |\n           (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |\n           (1ULL << SQLParser::FROM) | (1ULL << SQLParser::AS) |\n           (1ULL << SQLParser::BY) | (1ULL << SQLParser::ORDER) |\n           (1ULL << SQLParser::ASC) | (1ULL << SQLParser::DESC) |\n           (1ULL << SQLParser::LIMIT) | (1ULL << SQLParser::TRUE_V) |\n           (1ULL << SQLParser::FALSE_V) | (1ULL << SQLParser::IS) |\n           (1ULL << SQLParser::NULL_V) | (1ULL << SQLParser::INTEGER) |\n           (1ULL << SQLParser::FLOAT) | (1ULL << SQLParser::SQUOTA_STRING) |\n           (1ULL << SQLParser::DQUOTA_STRING) | (1ULL << SQLParser::DOT) |\n           (1ULL << SQLParser::LP) | (1ULL << SQLParser::RP) |\n           (1ULL << SQLParser::LMP) | (1ULL << SQLParser::RMP) |\n           (1ULL << SQLParser::ASTERISK) | (1ULL << SQLParser::PLUS_SIGN) |\n           (1ULL << SQLParser::MINUS_SIGN) | (1ULL << SQLParser::COMMA) |\n           (1ULL << SQLParser::SOLIDUS) | (1ULL << SQLParser::MOD) |\n           (1ULL << SQLParser::AT_SIGN) | (1ULL << SQLParser::ASSIGN_OP) |\n           (1ULL << SQLParser::SHARP_SIGN) | (1ULL << SQLParser::COLON) |\n           (1ULL << SQLParser::LE_OP) | (1ULL << SQLParser::GE_OP) |\n           (1ULL << SQLParser::NE_OP) | (1ULL << SQLParser::CARET_OP) |\n           (1ULL << SQLParser::TILDE_OP) | (1ULL << SQLParser::L_OP) |\n           (1ULL << SQLParser::G_OP) | (1ULL << SQLParser::E_OP) |\n           (1ULL << SQLParser::CONCAT_OP) | (1ULL << SQLParser::UNDERSCORE) |\n           (1ULL << SQLParser::SPACES) | (1ULL << SQLParser::VECTOR) |\n           (1ULL << SQLParser::SINGLE_LINE_COMMENT) |\n           (1ULL << SQLParser::MULTI_LINE_COMMENT) |\n           (1ULL << SQLParser::REGULAR_ID))) != 0));\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Compilation_unitContext\n//------------------------------------------------------------------\n\nSQLParser::Compilation_unitContext::Compilation_unitContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Compilation_unitContext::EOF() {\n  return getToken(SQLParser::EOF, 0);\n}\n\nstd::vector<SQLParser::Unit_statementContext *>\nSQLParser::Compilation_unitContext::unit_statement() {\n  return getRuleContexts<SQLParser::Unit_statementContext>();\n}\n\nSQLParser::Unit_statementContext *\nSQLParser::Compilation_unitContext::unit_statement(size_t i) {\n  return getRuleContext<SQLParser::Unit_statementContext>(i);\n}\n\nstd::vector<tree::TerminalNode *>\nSQLParser::Compilation_unitContext::SOLIDUS() {\n  return getTokens(SQLParser::SOLIDUS);\n}\n\ntree::TerminalNode *SQLParser::Compilation_unitContext::SOLIDUS(size_t i) {\n  return getToken(SQLParser::SOLIDUS, i);\n}\n\nstd::vector<tree::TerminalNode *> SQLParser::Compilation_unitContext::SEMI() {\n  return getTokens(SQLParser::SEMI);\n}\n\ntree::TerminalNode *SQLParser::Compilation_unitContext::SEMI(size_t i) {\n  return getToken(SQLParser::SEMI, i);\n}\n\n\nsize_t SQLParser::Compilation_unitContext::getRuleIndex() const {\n  return SQLParser::RuleCompilation_unit;\n}\n\nvoid SQLParser::Compilation_unitContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterCompilation_unit(this);\n}\n\nvoid SQLParser::Compilation_unitContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitCompilation_unit(this);\n}\n\nSQLParser::Compilation_unitContext *SQLParser::compilation_unit() {\n  Compilation_unitContext *_localctx =\n      _tracker.createInstance<Compilation_unitContext>(_ctx, getState());\n  enterRule(_localctx, 2, SQLParser::RuleCompilation_unit);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(89);\n    _errHandler->sync(this);\n    _la = _input->LA(1);\n    do {\n      setState(85);\n      unit_statement();\n      setState(87);\n      _errHandler->sync(this);\n\n      _la = _input->LA(1);\n      if (_la == SQLParser::SOLIDUS\n\n          || _la == SQLParser::SEMI) {\n        setState(86);\n        _la = _input->LA(1);\n        if (!(_la == SQLParser::SOLIDUS\n\n              || _la == SQLParser::SEMI)) {\n          _errHandler->recoverInline(this);\n        } else {\n          _errHandler->reportMatch(this);\n          consume();\n        }\n      }\n      setState(91);\n      _errHandler->sync(this);\n      _la = _input->LA(1);\n    } while (_la == SQLParser::SELECT);\n    setState(93);\n    match(SQLParser::EOF);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Logic_expr_unitContext\n//------------------------------------------------------------------\n\nSQLParser::Logic_expr_unitContext::Logic_expr_unitContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Logic_exprContext *SQLParser::Logic_expr_unitContext::logic_expr() {\n  return getRuleContext<SQLParser::Logic_exprContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Logic_expr_unitContext::EOF() {\n  return getToken(SQLParser::EOF, 0);\n}\n\n\nsize_t SQLParser::Logic_expr_unitContext::getRuleIndex() const {\n  return SQLParser::RuleLogic_expr_unit;\n}\n\nvoid SQLParser::Logic_expr_unitContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterLogic_expr_unit(this);\n}\n\nvoid SQLParser::Logic_expr_unitContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitLogic_expr_unit(this);\n}\n\nSQLParser::Logic_expr_unitContext *SQLParser::logic_expr_unit() {\n  Logic_expr_unitContext *_localctx =\n      _tracker.createInstance<Logic_expr_unitContext>(_ctx, getState());\n  enterRule(_localctx, 4, SQLParser::RuleLogic_expr_unit);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(95);\n    logic_expr(0);\n    setState(96);\n    match(SQLParser::EOF);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Unit_statementContext\n//------------------------------------------------------------------\n\nSQLParser::Unit_statementContext::Unit_statementContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Dql_statementContext *\nSQLParser::Unit_statementContext::dql_statement() {\n  return getRuleContext<SQLParser::Dql_statementContext>(0);\n}\n\n\nsize_t SQLParser::Unit_statementContext::getRuleIndex() const {\n  return SQLParser::RuleUnit_statement;\n}\n\nvoid SQLParser::Unit_statementContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterUnit_statement(this);\n}\n\nvoid SQLParser::Unit_statementContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitUnit_statement(this);\n}\n\nSQLParser::Unit_statementContext *SQLParser::unit_statement() {\n  Unit_statementContext *_localctx =\n      _tracker.createInstance<Unit_statementContext>(_ctx, getState());\n  enterRule(_localctx, 6, SQLParser::RuleUnit_statement);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(98);\n    dql_statement();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Where_clauseContext\n//------------------------------------------------------------------\n\nSQLParser::Where_clauseContext::Where_clauseContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Where_clauseContext::WHERE() {\n  return getToken(SQLParser::WHERE, 0);\n}\n\nSQLParser::Logic_exprContext *SQLParser::Where_clauseContext::logic_expr() {\n  return getRuleContext<SQLParser::Logic_exprContext>(0);\n}\n\n\nsize_t SQLParser::Where_clauseContext::getRuleIndex() const {\n  return SQLParser::RuleWhere_clause;\n}\n\nvoid SQLParser::Where_clauseContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterWhere_clause(this);\n}\n\nvoid SQLParser::Where_clauseContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitWhere_clause(this);\n}\n\nSQLParser::Where_clauseContext *SQLParser::where_clause() {\n  Where_clauseContext *_localctx =\n      _tracker.createInstance<Where_clauseContext>(_ctx, getState());\n  enterRule(_localctx, 8, SQLParser::RuleWhere_clause);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(100);\n    match(SQLParser::WHERE);\n    setState(101);\n    logic_expr(0);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Logic_exprContext\n//------------------------------------------------------------------\n\nSQLParser::Logic_exprContext::Logic_exprContext(ParserRuleContext *parent_ctx,\n                                                size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Relation_exprContext *SQLParser::Logic_exprContext::relation_expr() {\n  return getRuleContext<SQLParser::Relation_exprContext>(0);\n}\n\nSQLParser::Enclosed_exprContext *SQLParser::Logic_exprContext::enclosed_expr() {\n  return getRuleContext<SQLParser::Enclosed_exprContext>(0);\n}\n\nstd::vector<SQLParser::Logic_exprContext *>\nSQLParser::Logic_exprContext::logic_expr() {\n  return getRuleContexts<SQLParser::Logic_exprContext>();\n}\n\nSQLParser::Logic_exprContext *SQLParser::Logic_exprContext::logic_expr(\n    size_t i) {\n  return getRuleContext<SQLParser::Logic_exprContext>(i);\n}\n\ntree::TerminalNode *SQLParser::Logic_exprContext::AND() {\n  return getToken(SQLParser::AND, 0);\n}\n\ntree::TerminalNode *SQLParser::Logic_exprContext::OR() {\n  return getToken(SQLParser::OR, 0);\n}\n\n\nsize_t SQLParser::Logic_exprContext::getRuleIndex() const {\n  return SQLParser::RuleLogic_expr;\n}\n\nvoid SQLParser::Logic_exprContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterLogic_expr(this);\n}\n\nvoid SQLParser::Logic_exprContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitLogic_expr(this);\n}\n\n\nSQLParser::Logic_exprContext *SQLParser::logic_expr() {\n  return logic_expr(0);\n}\n\nSQLParser::Logic_exprContext *SQLParser::logic_expr(int precedence) {\n  ParserRuleContext *parentContext = _ctx;\n  size_t parentState = getState();\n  SQLParser::Logic_exprContext *_localctx =\n      _tracker.createInstance<Logic_exprContext>(_ctx, parentState);\n  SQLParser::Logic_exprContext *previousContext = _localctx;\n  (void)previousContext;  // Silence compiler, in case the context is not used\n                          // by generated code.\n  size_t startState = 10;\n  enterRecursionRule(_localctx, 10, SQLParser::RuleLogic_expr, precedence);\n\n\n  auto onExit = finally([=] { unrollRecursionContexts(parentContext); });\n  try {\n    size_t alt;\n    enterOuterAlt(_localctx, 1);\n    setState(106);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::OR:\n      case SQLParser::AND:\n      case SQLParser::NOT:\n      case SQLParser::IN:\n      case SQLParser::BETWEEN:\n      case SQLParser::LIKE:\n      case SQLParser::WHERE:\n      case SQLParser::SELECT:\n      case SQLParser::AS:\n      case SQLParser::BY:\n      case SQLParser::ORDER:\n      case SQLParser::ASC:\n      case SQLParser::DESC:\n      case SQLParser::LIMIT:\n      case SQLParser::REGULAR_ID: {\n        setState(104);\n        relation_expr();\n        break;\n      }\n\n      case SQLParser::LP: {\n        setState(105);\n        enclosed_expr();\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n    _ctx->stop = _input->LT(-1);\n    setState(116);\n    _errHandler->sync(this);\n    alt = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input, 5,\n                                                                     _ctx);\n    while (alt != 2 && alt != atn::ATN::INVALID_ALT_NUMBER) {\n      if (alt == 1) {\n        if (!_parseListeners.empty()) triggerExitRuleEvent();\n        previousContext = _localctx;\n        setState(114);\n        _errHandler->sync(this);\n        switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(\n            _input, 4, _ctx)) {\n          case 1: {\n            _localctx = _tracker.createInstance<Logic_exprContext>(\n                parentContext, parentState);\n            pushNewRecursionContext(_localctx, startState, RuleLogic_expr);\n            setState(108);\n\n            if (!(precpred(_ctx, 3)))\n              throw FailedPredicateException(this, \"precpred(_ctx, 3)\");\n            setState(109);\n            match(SQLParser::AND);\n            setState(110);\n            logic_expr(4);\n            break;\n          }\n\n          case 2: {\n            _localctx = _tracker.createInstance<Logic_exprContext>(\n                parentContext, parentState);\n            pushNewRecursionContext(_localctx, startState, RuleLogic_expr);\n            setState(111);\n\n            if (!(precpred(_ctx, 2)))\n              throw FailedPredicateException(this, \"precpred(_ctx, 2)\");\n            setState(112);\n            match(SQLParser::OR);\n            setState(113);\n            logic_expr(3);\n            break;\n          }\n        }\n      }\n      setState(118);\n      _errHandler->sync(this);\n      alt = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input,\n                                                                       5, _ctx);\n    }\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n  return _localctx;\n}\n\n//----------------- Enclosed_exprContext\n//------------------------------------------------------------------\n\nSQLParser::Enclosed_exprContext::Enclosed_exprContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Enclosed_exprContext::LP() {\n  return getToken(SQLParser::LP, 0);\n}\n\nSQLParser::Logic_exprContext *SQLParser::Enclosed_exprContext::logic_expr() {\n  return getRuleContext<SQLParser::Logic_exprContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Enclosed_exprContext::RP() {\n  return getToken(SQLParser::RP, 0);\n}\n\n\nsize_t SQLParser::Enclosed_exprContext::getRuleIndex() const {\n  return SQLParser::RuleEnclosed_expr;\n}\n\nvoid SQLParser::Enclosed_exprContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterEnclosed_expr(this);\n}\n\nvoid SQLParser::Enclosed_exprContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitEnclosed_expr(this);\n}\n\nSQLParser::Enclosed_exprContext *SQLParser::enclosed_expr() {\n  Enclosed_exprContext *_localctx =\n      _tracker.createInstance<Enclosed_exprContext>(_ctx, getState());\n  enterRule(_localctx, 12, SQLParser::RuleEnclosed_expr);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(119);\n    match(SQLParser::LP);\n    setState(120);\n    logic_expr(0);\n    setState(121);\n    match(SQLParser::RP);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Relation_exprContext\n//------------------------------------------------------------------\n\nSQLParser::Relation_exprContext::Relation_exprContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::IdentifierContext *SQLParser::Relation_exprContext::identifier() {\n  return getRuleContext<SQLParser::IdentifierContext>(0);\n}\n\nSQLParser::Rel_operContext *SQLParser::Relation_exprContext::rel_oper() {\n  return getRuleContext<SQLParser::Rel_operContext>(0);\n}\n\nSQLParser::Value_exprContext *SQLParser::Relation_exprContext::value_expr() {\n  return getRuleContext<SQLParser::Value_exprContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::LIKE() {\n  return getToken(SQLParser::LIKE, 0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::IN() {\n  return getToken(SQLParser::IN, 0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::LP() {\n  return getToken(SQLParser::LP, 0);\n}\n\nSQLParser::In_value_expr_listContext *\nSQLParser::Relation_exprContext::in_value_expr_list() {\n  return getRuleContext<SQLParser::In_value_expr_listContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::RP() {\n  return getToken(SQLParser::RP, 0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::NOT() {\n  return getToken(SQLParser::NOT, 0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::CONTAIN_ALL() {\n  return getToken(SQLParser::CONTAIN_ALL, 0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::CONTAIN_ANY() {\n  return getToken(SQLParser::CONTAIN_ANY, 0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::IS() {\n  return getToken(SQLParser::IS, 0);\n}\n\ntree::TerminalNode *SQLParser::Relation_exprContext::NULL_V() {\n  return getToken(SQLParser::NULL_V, 0);\n}\n\nSQLParser::Function_callContext *\nSQLParser::Relation_exprContext::function_call() {\n  return getRuleContext<SQLParser::Function_callContext>(0);\n}\n\n\nsize_t SQLParser::Relation_exprContext::getRuleIndex() const {\n  return SQLParser::RuleRelation_expr;\n}\n\nvoid SQLParser::Relation_exprContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterRelation_expr(this);\n}\n\nvoid SQLParser::Relation_exprContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitRelation_expr(this);\n}\n\nSQLParser::Relation_exprContext *SQLParser::relation_expr() {\n  Relation_exprContext *_localctx =\n      _tracker.createInstance<Relation_exprContext>(_ctx, getState());\n  enterRule(_localctx, 14, SQLParser::RuleRelation_expr);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(162);\n    _errHandler->sync(this);\n    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(\n        _input, 10, _ctx)) {\n      case 1: {\n        enterOuterAlt(_localctx, 1);\n        setState(123);\n        identifier();\n        setState(124);\n        rel_oper();\n        setState(125);\n        value_expr();\n        break;\n      }\n\n      case 2: {\n        enterOuterAlt(_localctx, 2);\n        setState(127);\n        identifier();\n        setState(128);\n        match(SQLParser::LIKE);\n        setState(129);\n        value_expr();\n        break;\n      }\n\n      case 3: {\n        enterOuterAlt(_localctx, 3);\n        setState(131);\n        identifier();\n        setState(133);\n        _errHandler->sync(this);\n\n        _la = _input->LA(1);\n        if (_la == SQLParser::NOT) {\n          setState(132);\n          match(SQLParser::NOT);\n        }\n        setState(135);\n        match(SQLParser::IN);\n        setState(136);\n        match(SQLParser::LP);\n        setState(137);\n        in_value_expr_list();\n        setState(138);\n        match(SQLParser::RP);\n        break;\n      }\n\n      case 4: {\n        enterOuterAlt(_localctx, 4);\n        setState(140);\n        identifier();\n        setState(142);\n        _errHandler->sync(this);\n\n        _la = _input->LA(1);\n        if (_la == SQLParser::NOT) {\n          setState(141);\n          match(SQLParser::NOT);\n        }\n        setState(144);\n        _la = _input->LA(1);\n        if (!(_la == SQLParser::CONTAIN_ALL\n\n              || _la == SQLParser::CONTAIN_ANY)) {\n          _errHandler->recoverInline(this);\n        } else {\n          _errHandler->reportMatch(this);\n          consume();\n        }\n        setState(145);\n        match(SQLParser::LP);\n        setState(147);\n        _errHandler->sync(this);\n\n        _la = _input->LA(1);\n        if ((((_la & ~0x3fULL) == 0) &&\n             ((1ULL << _la) &\n              ((1ULL << SQLParser::TRUE_V) | (1ULL << SQLParser::FALSE_V) |\n               (1ULL << SQLParser::INTEGER) | (1ULL << SQLParser::FLOAT) |\n               (1ULL << SQLParser::SQUOTA_STRING) |\n               (1ULL << SQLParser::DQUOTA_STRING))) != 0)) {\n          setState(146);\n          in_value_expr_list();\n        }\n        setState(149);\n        match(SQLParser::RP);\n        break;\n      }\n\n      case 5: {\n        enterOuterAlt(_localctx, 5);\n        setState(151);\n        identifier();\n        setState(152);\n        match(SQLParser::IS);\n        setState(154);\n        _errHandler->sync(this);\n\n        _la = _input->LA(1);\n        if (_la == SQLParser::NOT) {\n          setState(153);\n          match(SQLParser::NOT);\n        }\n        setState(156);\n        match(SQLParser::NULL_V);\n        break;\n      }\n\n      case 6: {\n        enterOuterAlt(_localctx, 6);\n        setState(158);\n        function_call();\n        setState(159);\n        rel_oper();\n        setState(160);\n        value_expr();\n        break;\n      }\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Rel_operContext\n//------------------------------------------------------------------\n\nSQLParser::Rel_operContext::Rel_operContext(ParserRuleContext *parent_ctx,\n                                            size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Rel_operContext::E_OP() {\n  return getToken(SQLParser::E_OP, 0);\n}\n\nSQLParser::Ne_opContext *SQLParser::Rel_operContext::ne_op() {\n  return getRuleContext<SQLParser::Ne_opContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Rel_operContext::L_OP() {\n  return getToken(SQLParser::L_OP, 0);\n}\n\ntree::TerminalNode *SQLParser::Rel_operContext::G_OP() {\n  return getToken(SQLParser::G_OP, 0);\n}\n\nSQLParser::Le_opContext *SQLParser::Rel_operContext::le_op() {\n  return getRuleContext<SQLParser::Le_opContext>(0);\n}\n\nSQLParser::Ge_opContext *SQLParser::Rel_operContext::ge_op() {\n  return getRuleContext<SQLParser::Ge_opContext>(0);\n}\n\n\nsize_t SQLParser::Rel_operContext::getRuleIndex() const {\n  return SQLParser::RuleRel_oper;\n}\n\nvoid SQLParser::Rel_operContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterRel_oper(this);\n}\n\nvoid SQLParser::Rel_operContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitRel_oper(this);\n}\n\nSQLParser::Rel_operContext *SQLParser::rel_oper() {\n  Rel_operContext *_localctx =\n      _tracker.createInstance<Rel_operContext>(_ctx, getState());\n  enterRule(_localctx, 16, SQLParser::RuleRel_oper);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(170);\n    _errHandler->sync(this);\n    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(\n        _input, 11, _ctx)) {\n      case 1: {\n        enterOuterAlt(_localctx, 1);\n        setState(164);\n        match(SQLParser::E_OP);\n        break;\n      }\n\n      case 2: {\n        enterOuterAlt(_localctx, 2);\n        setState(165);\n        ne_op();\n        break;\n      }\n\n      case 3: {\n        enterOuterAlt(_localctx, 3);\n        setState(166);\n        match(SQLParser::L_OP);\n        break;\n      }\n\n      case 4: {\n        enterOuterAlt(_localctx, 4);\n        setState(167);\n        match(SQLParser::G_OP);\n        break;\n      }\n\n      case 5: {\n        enterOuterAlt(_localctx, 5);\n        setState(168);\n        le_op();\n        break;\n      }\n\n      case 6: {\n        enterOuterAlt(_localctx, 6);\n        setState(169);\n        ge_op();\n        break;\n      }\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Value_exprContext\n//------------------------------------------------------------------\n\nSQLParser::Value_exprContext::Value_exprContext(ParserRuleContext *parent_ctx,\n                                                size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::ConstantContext *SQLParser::Value_exprContext::constant() {\n  return getRuleContext<SQLParser::ConstantContext>(0);\n}\n\nSQLParser::Function_callContext *SQLParser::Value_exprContext::function_call() {\n  return getRuleContext<SQLParser::Function_callContext>(0);\n}\n\n\nsize_t SQLParser::Value_exprContext::getRuleIndex() const {\n  return SQLParser::RuleValue_expr;\n}\n\nvoid SQLParser::Value_exprContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterValue_expr(this);\n}\n\nvoid SQLParser::Value_exprContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitValue_expr(this);\n}\n\nSQLParser::Value_exprContext *SQLParser::value_expr() {\n  Value_exprContext *_localctx =\n      _tracker.createInstance<Value_exprContext>(_ctx, getState());\n  enterRule(_localctx, 18, SQLParser::RuleValue_expr);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(174);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::TRUE_V:\n      case SQLParser::FALSE_V:\n      case SQLParser::INTEGER:\n      case SQLParser::FLOAT:\n      case SQLParser::SQUOTA_STRING:\n      case SQLParser::DQUOTA_STRING:\n      case SQLParser::LMP:\n      case SQLParser::VECTOR: {\n        enterOuterAlt(_localctx, 1);\n        setState(172);\n        constant();\n        break;\n      }\n\n      case SQLParser::OR:\n      case SQLParser::AND:\n      case SQLParser::NOT:\n      case SQLParser::IN:\n      case SQLParser::BETWEEN:\n      case SQLParser::LIKE:\n      case SQLParser::WHERE:\n      case SQLParser::SELECT:\n      case SQLParser::AS:\n      case SQLParser::BY:\n      case SQLParser::ORDER:\n      case SQLParser::ASC:\n      case SQLParser::DESC:\n      case SQLParser::LIMIT:\n      case SQLParser::REGULAR_ID: {\n        enterOuterAlt(_localctx, 2);\n        setState(173);\n        function_call();\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- In_value_expr_listContext\n//------------------------------------------------------------------\n\nSQLParser::In_value_expr_listContext::In_value_expr_listContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nstd::vector<SQLParser::In_value_exprContext *>\nSQLParser::In_value_expr_listContext::in_value_expr() {\n  return getRuleContexts<SQLParser::In_value_exprContext>();\n}\n\nSQLParser::In_value_exprContext *\nSQLParser::In_value_expr_listContext::in_value_expr(size_t i) {\n  return getRuleContext<SQLParser::In_value_exprContext>(i);\n}\n\nstd::vector<tree::TerminalNode *>\nSQLParser::In_value_expr_listContext::COMMA() {\n  return getTokens(SQLParser::COMMA);\n}\n\ntree::TerminalNode *SQLParser::In_value_expr_listContext::COMMA(size_t i) {\n  return getToken(SQLParser::COMMA, i);\n}\n\n\nsize_t SQLParser::In_value_expr_listContext::getRuleIndex() const {\n  return SQLParser::RuleIn_value_expr_list;\n}\n\nvoid SQLParser::In_value_expr_listContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterIn_value_expr_list(this);\n}\n\nvoid SQLParser::In_value_expr_listContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitIn_value_expr_list(this);\n}\n\nSQLParser::In_value_expr_listContext *SQLParser::in_value_expr_list() {\n  In_value_expr_listContext *_localctx =\n      _tracker.createInstance<In_value_expr_listContext>(_ctx, getState());\n  enterRule(_localctx, 20, SQLParser::RuleIn_value_expr_list);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(176);\n    in_value_expr();\n    setState(181);\n    _errHandler->sync(this);\n    _la = _input->LA(1);\n    while (_la == SQLParser::COMMA) {\n      setState(177);\n      match(SQLParser::COMMA);\n      setState(178);\n      in_value_expr();\n      setState(183);\n      _errHandler->sync(this);\n      _la = _input->LA(1);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- In_value_exprContext\n//------------------------------------------------------------------\n\nSQLParser::In_value_exprContext::In_value_exprContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Constant_num_and_strContext *\nSQLParser::In_value_exprContext::constant_num_and_str() {\n  return getRuleContext<SQLParser::Constant_num_and_strContext>(0);\n}\n\nSQLParser::Bool_valueContext *SQLParser::In_value_exprContext::bool_value() {\n  return getRuleContext<SQLParser::Bool_valueContext>(0);\n}\n\n\nsize_t SQLParser::In_value_exprContext::getRuleIndex() const {\n  return SQLParser::RuleIn_value_expr;\n}\n\nvoid SQLParser::In_value_exprContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterIn_value_expr(this);\n}\n\nvoid SQLParser::In_value_exprContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitIn_value_expr(this);\n}\n\nSQLParser::In_value_exprContext *SQLParser::in_value_expr() {\n  In_value_exprContext *_localctx =\n      _tracker.createInstance<In_value_exprContext>(_ctx, getState());\n  enterRule(_localctx, 22, SQLParser::RuleIn_value_expr);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(186);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::INTEGER:\n      case SQLParser::FLOAT:\n      case SQLParser::SQUOTA_STRING:\n      case SQLParser::DQUOTA_STRING: {\n        enterOuterAlt(_localctx, 1);\n        setState(184);\n        constant_num_and_str();\n        break;\n      }\n\n      case SQLParser::TRUE_V:\n      case SQLParser::FALSE_V: {\n        enterOuterAlt(_localctx, 2);\n        setState(185);\n        bool_value();\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- ConstantContext\n//------------------------------------------------------------------\n\nSQLParser::ConstantContext::ConstantContext(ParserRuleContext *parent_ctx,\n                                            size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::NumericContext *SQLParser::ConstantContext::numeric() {\n  return getRuleContext<SQLParser::NumericContext>(0);\n}\n\nSQLParser::Quoted_stringContext *SQLParser::ConstantContext::quoted_string() {\n  return getRuleContext<SQLParser::Quoted_stringContext>(0);\n}\n\nSQLParser::Vector_exprContext *SQLParser::ConstantContext::vector_expr() {\n  return getRuleContext<SQLParser::Vector_exprContext>(0);\n}\n\nSQLParser::Bool_valueContext *SQLParser::ConstantContext::bool_value() {\n  return getRuleContext<SQLParser::Bool_valueContext>(0);\n}\n\n\nsize_t SQLParser::ConstantContext::getRuleIndex() const {\n  return SQLParser::RuleConstant;\n}\n\nvoid SQLParser::ConstantContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterConstant(this);\n}\n\nvoid SQLParser::ConstantContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitConstant(this);\n}\n\nSQLParser::ConstantContext *SQLParser::constant() {\n  ConstantContext *_localctx =\n      _tracker.createInstance<ConstantContext>(_ctx, getState());\n  enterRule(_localctx, 24, SQLParser::RuleConstant);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(192);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::INTEGER:\n      case SQLParser::FLOAT: {\n        enterOuterAlt(_localctx, 1);\n        setState(188);\n        numeric();\n        break;\n      }\n\n      case SQLParser::SQUOTA_STRING:\n      case SQLParser::DQUOTA_STRING: {\n        enterOuterAlt(_localctx, 2);\n        setState(189);\n        quoted_string();\n        break;\n      }\n\n      case SQLParser::LMP:\n      case SQLParser::VECTOR: {\n        enterOuterAlt(_localctx, 3);\n        setState(190);\n        vector_expr();\n        break;\n      }\n\n      case SQLParser::TRUE_V:\n      case SQLParser::FALSE_V: {\n        enterOuterAlt(_localctx, 4);\n        setState(191);\n        bool_value();\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Constant_num_and_strContext\n//------------------------------------------------------------------\n\nSQLParser::Constant_num_and_strContext::Constant_num_and_strContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::NumericContext *SQLParser::Constant_num_and_strContext::numeric() {\n  return getRuleContext<SQLParser::NumericContext>(0);\n}\n\nSQLParser::Quoted_stringContext *\nSQLParser::Constant_num_and_strContext::quoted_string() {\n  return getRuleContext<SQLParser::Quoted_stringContext>(0);\n}\n\n\nsize_t SQLParser::Constant_num_and_strContext::getRuleIndex() const {\n  return SQLParser::RuleConstant_num_and_str;\n}\n\nvoid SQLParser::Constant_num_and_strContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr)\n    parserListener->enterConstant_num_and_str(this);\n}\n\nvoid SQLParser::Constant_num_and_strContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitConstant_num_and_str(this);\n}\n\nSQLParser::Constant_num_and_strContext *SQLParser::constant_num_and_str() {\n  Constant_num_and_strContext *_localctx =\n      _tracker.createInstance<Constant_num_and_strContext>(_ctx, getState());\n  enterRule(_localctx, 26, SQLParser::RuleConstant_num_and_str);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(196);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::INTEGER:\n      case SQLParser::FLOAT: {\n        enterOuterAlt(_localctx, 1);\n        setState(194);\n        numeric();\n        break;\n      }\n\n      case SQLParser::SQUOTA_STRING:\n      case SQLParser::DQUOTA_STRING: {\n        enterOuterAlt(_localctx, 2);\n        setState(195);\n        quoted_string();\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- MatrixContext\n//------------------------------------------------------------------\n\nSQLParser::MatrixContext::MatrixContext(ParserRuleContext *parent_ctx,\n                                        size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::MatrixContext::LMP() {\n  return getToken(SQLParser::LMP, 0);\n}\n\nstd::vector<tree::TerminalNode *> SQLParser::MatrixContext::VECTOR() {\n  return getTokens(SQLParser::VECTOR);\n}\n\ntree::TerminalNode *SQLParser::MatrixContext::VECTOR(size_t i) {\n  return getToken(SQLParser::VECTOR, i);\n}\n\ntree::TerminalNode *SQLParser::MatrixContext::RMP() {\n  return getToken(SQLParser::RMP, 0);\n}\n\nstd::vector<tree::TerminalNode *> SQLParser::MatrixContext::COMMA() {\n  return getTokens(SQLParser::COMMA);\n}\n\ntree::TerminalNode *SQLParser::MatrixContext::COMMA(size_t i) {\n  return getToken(SQLParser::COMMA, i);\n}\n\n\nsize_t SQLParser::MatrixContext::getRuleIndex() const {\n  return SQLParser::RuleMatrix;\n}\n\nvoid SQLParser::MatrixContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterMatrix(this);\n}\n\nvoid SQLParser::MatrixContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitMatrix(this);\n}\n\nSQLParser::MatrixContext *SQLParser::matrix() {\n  MatrixContext *_localctx =\n      _tracker.createInstance<MatrixContext>(_ctx, getState());\n  enterRule(_localctx, 28, SQLParser::RuleMatrix);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(198);\n    match(SQLParser::LMP);\n    setState(199);\n    match(SQLParser::VECTOR);\n    setState(204);\n    _errHandler->sync(this);\n    _la = _input->LA(1);\n    while (_la == SQLParser::COMMA) {\n      setState(200);\n      match(SQLParser::COMMA);\n      setState(201);\n      match(SQLParser::VECTOR);\n      setState(206);\n      _errHandler->sync(this);\n      _la = _input->LA(1);\n    }\n    setState(207);\n    match(SQLParser::RMP);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Vector_exprContext\n//------------------------------------------------------------------\n\nSQLParser::Vector_exprContext::Vector_exprContext(ParserRuleContext *parent_ctx,\n                                                  size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Vector_exprContext::VECTOR() {\n  return getToken(SQLParser::VECTOR, 0);\n}\n\nSQLParser::MatrixContext *SQLParser::Vector_exprContext::matrix() {\n  return getRuleContext<SQLParser::MatrixContext>(0);\n}\n\n\nsize_t SQLParser::Vector_exprContext::getRuleIndex() const {\n  return SQLParser::RuleVector_expr;\n}\n\nvoid SQLParser::Vector_exprContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterVector_expr(this);\n}\n\nvoid SQLParser::Vector_exprContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitVector_expr(this);\n}\n\nSQLParser::Vector_exprContext *SQLParser::vector_expr() {\n  Vector_exprContext *_localctx =\n      _tracker.createInstance<Vector_exprContext>(_ctx, getState());\n  enterRule(_localctx, 30, SQLParser::RuleVector_expr);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(211);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::VECTOR: {\n        enterOuterAlt(_localctx, 1);\n        setState(209);\n        match(SQLParser::VECTOR);\n        break;\n      }\n\n      case SQLParser::LMP: {\n        enterOuterAlt(_localctx, 2);\n        setState(210);\n        matrix();\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Function_value_exprContext\n//------------------------------------------------------------------\n\nSQLParser::Function_value_exprContext::Function_value_exprContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Value_exprContext *\nSQLParser::Function_value_exprContext::value_expr() {\n  return getRuleContext<SQLParser::Value_exprContext>(0);\n}\n\nSQLParser::IdentifierContext *\nSQLParser::Function_value_exprContext::identifier() {\n  return getRuleContext<SQLParser::IdentifierContext>(0);\n}\n\n\nsize_t SQLParser::Function_value_exprContext::getRuleIndex() const {\n  return SQLParser::RuleFunction_value_expr;\n}\n\nvoid SQLParser::Function_value_exprContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterFunction_value_expr(this);\n}\n\nvoid SQLParser::Function_value_exprContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitFunction_value_expr(this);\n}\n\nSQLParser::Function_value_exprContext *SQLParser::function_value_expr() {\n  Function_value_exprContext *_localctx =\n      _tracker.createInstance<Function_value_exprContext>(_ctx, getState());\n  enterRule(_localctx, 32, SQLParser::RuleFunction_value_expr);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(215);\n    _errHandler->sync(this);\n    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(\n        _input, 19, _ctx)) {\n      case 1: {\n        enterOuterAlt(_localctx, 1);\n        setState(213);\n        value_expr();\n        break;\n      }\n\n      case 2: {\n        enterOuterAlt(_localctx, 2);\n        setState(214);\n        identifier();\n        break;\n      }\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Function_callContext\n//------------------------------------------------------------------\n\nSQLParser::Function_callContext::Function_callContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::IdentifierContext *SQLParser::Function_callContext::identifier() {\n  return getRuleContext<SQLParser::IdentifierContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Function_callContext::LP() {\n  return getToken(SQLParser::LP, 0);\n}\n\ntree::TerminalNode *SQLParser::Function_callContext::RP() {\n  return getToken(SQLParser::RP, 0);\n}\n\nstd::vector<SQLParser::Function_value_exprContext *>\nSQLParser::Function_callContext::function_value_expr() {\n  return getRuleContexts<SQLParser::Function_value_exprContext>();\n}\n\nSQLParser::Function_value_exprContext *\nSQLParser::Function_callContext::function_value_expr(size_t i) {\n  return getRuleContext<SQLParser::Function_value_exprContext>(i);\n}\n\nstd::vector<tree::TerminalNode *> SQLParser::Function_callContext::COMMA() {\n  return getTokens(SQLParser::COMMA);\n}\n\ntree::TerminalNode *SQLParser::Function_callContext::COMMA(size_t i) {\n  return getToken(SQLParser::COMMA, i);\n}\n\n\nsize_t SQLParser::Function_callContext::getRuleIndex() const {\n  return SQLParser::RuleFunction_call;\n}\n\nvoid SQLParser::Function_callContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterFunction_call(this);\n}\n\nvoid SQLParser::Function_callContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitFunction_call(this);\n}\n\nSQLParser::Function_callContext *SQLParser::function_call() {\n  Function_callContext *_localctx =\n      _tracker.createInstance<Function_callContext>(_ctx, getState());\n  enterRule(_localctx, 34, SQLParser::RuleFunction_call);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(217);\n    identifier();\n    setState(218);\n    match(SQLParser::LP);\n    setState(227);\n    _errHandler->sync(this);\n\n    _la = _input->LA(1);\n    if ((((_la & ~0x3fULL) == 0) &&\n         ((1ULL << _la) &\n          ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |\n           (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |\n           (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |\n           (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |\n           (1ULL << SQLParser::AS) | (1ULL << SQLParser::BY) |\n           (1ULL << SQLParser::ORDER) | (1ULL << SQLParser::ASC) |\n           (1ULL << SQLParser::DESC) | (1ULL << SQLParser::LIMIT) |\n           (1ULL << SQLParser::TRUE_V) | (1ULL << SQLParser::FALSE_V) |\n           (1ULL << SQLParser::INTEGER) | (1ULL << SQLParser::FLOAT) |\n           (1ULL << SQLParser::SQUOTA_STRING) |\n           (1ULL << SQLParser::DQUOTA_STRING) | (1ULL << SQLParser::LMP) |\n           (1ULL << SQLParser::VECTOR) | (1ULL << SQLParser::REGULAR_ID))) !=\n             0)) {\n      setState(219);\n      function_value_expr();\n      setState(224);\n      _errHandler->sync(this);\n      _la = _input->LA(1);\n      while (_la == SQLParser::COMMA) {\n        setState(220);\n        match(SQLParser::COMMA);\n        setState(221);\n        function_value_expr();\n        setState(226);\n        _errHandler->sync(this);\n        _la = _input->LA(1);\n      }\n    }\n    setState(229);\n    match(SQLParser::RP);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Dql_statementContext\n//------------------------------------------------------------------\n\nSQLParser::Dql_statementContext::Dql_statementContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Select_statementContext *\nSQLParser::Dql_statementContext::select_statement() {\n  return getRuleContext<SQLParser::Select_statementContext>(0);\n}\n\n\nsize_t SQLParser::Dql_statementContext::getRuleIndex() const {\n  return SQLParser::RuleDql_statement;\n}\n\nvoid SQLParser::Dql_statementContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterDql_statement(this);\n}\n\nvoid SQLParser::Dql_statementContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitDql_statement(this);\n}\n\nSQLParser::Dql_statementContext *SQLParser::dql_statement() {\n  Dql_statementContext *_localctx =\n      _tracker.createInstance<Dql_statementContext>(_ctx, getState());\n  enterRule(_localctx, 36, SQLParser::RuleDql_statement);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(231);\n    select_statement();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Select_statementContext\n//------------------------------------------------------------------\n\nSQLParser::Select_statementContext::Select_statementContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Select_statementContext::SELECT() {\n  return getToken(SQLParser::SELECT, 0);\n}\n\nSQLParser::Selected_elementsContext *\nSQLParser::Select_statementContext::selected_elements() {\n  return getRuleContext<SQLParser::Selected_elementsContext>(0);\n}\n\nSQLParser::From_clauseContext *\nSQLParser::Select_statementContext::from_clause() {\n  return getRuleContext<SQLParser::From_clauseContext>(0);\n}\n\nSQLParser::Where_clauseContext *\nSQLParser::Select_statementContext::where_clause() {\n  return getRuleContext<SQLParser::Where_clauseContext>(0);\n}\n\nSQLParser::Order_by_clauseContext *\nSQLParser::Select_statementContext::order_by_clause() {\n  return getRuleContext<SQLParser::Order_by_clauseContext>(0);\n}\n\nSQLParser::Limit_clauseContext *\nSQLParser::Select_statementContext::limit_clause() {\n  return getRuleContext<SQLParser::Limit_clauseContext>(0);\n}\n\n\nsize_t SQLParser::Select_statementContext::getRuleIndex() const {\n  return SQLParser::RuleSelect_statement;\n}\n\nvoid SQLParser::Select_statementContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterSelect_statement(this);\n}\n\nvoid SQLParser::Select_statementContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitSelect_statement(this);\n}\n\nSQLParser::Select_statementContext *SQLParser::select_statement() {\n  Select_statementContext *_localctx =\n      _tracker.createInstance<Select_statementContext>(_ctx, getState());\n  enterRule(_localctx, 38, SQLParser::RuleSelect_statement);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(233);\n    match(SQLParser::SELECT);\n    setState(234);\n    selected_elements();\n    setState(235);\n    from_clause();\n    setState(237);\n    _errHandler->sync(this);\n\n    _la = _input->LA(1);\n    if (_la == SQLParser::WHERE) {\n      setState(236);\n      where_clause();\n    }\n    setState(240);\n    _errHandler->sync(this);\n\n    _la = _input->LA(1);\n    if (_la == SQLParser::ORDER) {\n      setState(239);\n      order_by_clause();\n    }\n    setState(243);\n    _errHandler->sync(this);\n\n    _la = _input->LA(1);\n    if (_la == SQLParser::LIMIT) {\n      setState(242);\n      limit_clause();\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Selected_elementsContext\n//------------------------------------------------------------------\n\nSQLParser::Selected_elementsContext::Selected_elementsContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nstd::vector<SQLParser::Selected_elementContext *>\nSQLParser::Selected_elementsContext::selected_element() {\n  return getRuleContexts<SQLParser::Selected_elementContext>();\n}\n\nSQLParser::Selected_elementContext *\nSQLParser::Selected_elementsContext::selected_element(size_t i) {\n  return getRuleContext<SQLParser::Selected_elementContext>(i);\n}\n\nstd::vector<tree::TerminalNode *> SQLParser::Selected_elementsContext::COMMA() {\n  return getTokens(SQLParser::COMMA);\n}\n\ntree::TerminalNode *SQLParser::Selected_elementsContext::COMMA(size_t i) {\n  return getToken(SQLParser::COMMA, i);\n}\n\n\nsize_t SQLParser::Selected_elementsContext::getRuleIndex() const {\n  return SQLParser::RuleSelected_elements;\n}\n\nvoid SQLParser::Selected_elementsContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterSelected_elements(this);\n}\n\nvoid SQLParser::Selected_elementsContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitSelected_elements(this);\n}\n\nSQLParser::Selected_elementsContext *SQLParser::selected_elements() {\n  Selected_elementsContext *_localctx =\n      _tracker.createInstance<Selected_elementsContext>(_ctx, getState());\n  enterRule(_localctx, 40, SQLParser::RuleSelected_elements);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(245);\n    selected_element();\n    setState(250);\n    _errHandler->sync(this);\n    _la = _input->LA(1);\n    while (_la == SQLParser::COMMA) {\n      setState(246);\n      match(SQLParser::COMMA);\n      setState(247);\n      selected_element();\n      setState(252);\n      _errHandler->sync(this);\n      _la = _input->LA(1);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Selected_elementContext\n//------------------------------------------------------------------\n\nSQLParser::Selected_elementContext::Selected_elementContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Selected_elementContext::ASTERISK() {\n  return getToken(SQLParser::ASTERISK, 0);\n}\n\nSQLParser::Field_nameContext *SQLParser::Selected_elementContext::field_name() {\n  return getRuleContext<SQLParser::Field_nameContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Selected_elementContext::AS() {\n  return getToken(SQLParser::AS, 0);\n}\n\nSQLParser::Field_aliasContext *\nSQLParser::Selected_elementContext::field_alias() {\n  return getRuleContext<SQLParser::Field_aliasContext>(0);\n}\n\n\nsize_t SQLParser::Selected_elementContext::getRuleIndex() const {\n  return SQLParser::RuleSelected_element;\n}\n\nvoid SQLParser::Selected_elementContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterSelected_element(this);\n}\n\nvoid SQLParser::Selected_elementContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitSelected_element(this);\n}\n\nSQLParser::Selected_elementContext *SQLParser::selected_element() {\n  Selected_elementContext *_localctx =\n      _tracker.createInstance<Selected_elementContext>(_ctx, getState());\n  enterRule(_localctx, 42, SQLParser::RuleSelected_element);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(261);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::ASTERISK: {\n        enterOuterAlt(_localctx, 1);\n        setState(253);\n        match(SQLParser::ASTERISK);\n        break;\n      }\n\n      case SQLParser::OR:\n      case SQLParser::AND:\n      case SQLParser::NOT:\n      case SQLParser::IN:\n      case SQLParser::BETWEEN:\n      case SQLParser::LIKE:\n      case SQLParser::WHERE:\n      case SQLParser::SELECT:\n      case SQLParser::AS:\n      case SQLParser::BY:\n      case SQLParser::ORDER:\n      case SQLParser::ASC:\n      case SQLParser::DESC:\n      case SQLParser::LIMIT:\n      case SQLParser::REGULAR_ID: {\n        enterOuterAlt(_localctx, 2);\n        setState(254);\n        field_name();\n        setState(256);\n        _errHandler->sync(this);\n\n        switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(\n            _input, 26, _ctx)) {\n          case 1: {\n            setState(255);\n            match(SQLParser::AS);\n            break;\n          }\n        }\n        setState(259);\n        _errHandler->sync(this);\n\n        _la = _input->LA(1);\n        if ((((_la & ~0x3fULL) == 0) &&\n             ((1ULL << _la) &\n              ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |\n               (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |\n               (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |\n               (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |\n               (1ULL << SQLParser::AS) | (1ULL << SQLParser::BY) |\n               (1ULL << SQLParser::ORDER) | (1ULL << SQLParser::ASC) |\n               (1ULL << SQLParser::DESC) | (1ULL << SQLParser::LIMIT) |\n               (1ULL << SQLParser::REGULAR_ID))) != 0)) {\n          setState(258);\n          field_alias();\n        }\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- From_clauseContext\n//------------------------------------------------------------------\n\nSQLParser::From_clauseContext::From_clauseContext(ParserRuleContext *parent_ctx,\n                                                  size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::From_clauseContext::FROM() {\n  return getToken(SQLParser::FROM, 0);\n}\n\nSQLParser::Tableview_nameContext *\nSQLParser::From_clauseContext::tableview_name() {\n  return getRuleContext<SQLParser::Tableview_nameContext>(0);\n}\n\n\nsize_t SQLParser::From_clauseContext::getRuleIndex() const {\n  return SQLParser::RuleFrom_clause;\n}\n\nvoid SQLParser::From_clauseContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterFrom_clause(this);\n}\n\nvoid SQLParser::From_clauseContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitFrom_clause(this);\n}\n\nSQLParser::From_clauseContext *SQLParser::from_clause() {\n  From_clauseContext *_localctx =\n      _tracker.createInstance<From_clauseContext>(_ctx, getState());\n  enterRule(_localctx, 44, SQLParser::RuleFrom_clause);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(263);\n    match(SQLParser::FROM);\n    setState(264);\n    tableview_name();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Order_by_clauseContext\n//------------------------------------------------------------------\n\nSQLParser::Order_by_clauseContext::Order_by_clauseContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Order_by_clauseContext::ORDER() {\n  return getToken(SQLParser::ORDER, 0);\n}\n\ntree::TerminalNode *SQLParser::Order_by_clauseContext::BY() {\n  return getToken(SQLParser::BY, 0);\n}\n\nstd::vector<SQLParser::Order_by_elementContext *>\nSQLParser::Order_by_clauseContext::order_by_element() {\n  return getRuleContexts<SQLParser::Order_by_elementContext>();\n}\n\nSQLParser::Order_by_elementContext *\nSQLParser::Order_by_clauseContext::order_by_element(size_t i) {\n  return getRuleContext<SQLParser::Order_by_elementContext>(i);\n}\n\nstd::vector<tree::TerminalNode *> SQLParser::Order_by_clauseContext::COMMA() {\n  return getTokens(SQLParser::COMMA);\n}\n\ntree::TerminalNode *SQLParser::Order_by_clauseContext::COMMA(size_t i) {\n  return getToken(SQLParser::COMMA, i);\n}\n\n\nsize_t SQLParser::Order_by_clauseContext::getRuleIndex() const {\n  return SQLParser::RuleOrder_by_clause;\n}\n\nvoid SQLParser::Order_by_clauseContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterOrder_by_clause(this);\n}\n\nvoid SQLParser::Order_by_clauseContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitOrder_by_clause(this);\n}\n\nSQLParser::Order_by_clauseContext *SQLParser::order_by_clause() {\n  Order_by_clauseContext *_localctx =\n      _tracker.createInstance<Order_by_clauseContext>(_ctx, getState());\n  enterRule(_localctx, 46, SQLParser::RuleOrder_by_clause);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(266);\n    match(SQLParser::ORDER);\n    setState(267);\n    match(SQLParser::BY);\n    setState(268);\n    order_by_element();\n    setState(273);\n    _errHandler->sync(this);\n    _la = _input->LA(1);\n    while (_la == SQLParser::COMMA) {\n      setState(269);\n      match(SQLParser::COMMA);\n      setState(270);\n      order_by_element();\n      setState(275);\n      _errHandler->sync(this);\n      _la = _input->LA(1);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Order_by_elementContext\n//------------------------------------------------------------------\n\nSQLParser::Order_by_elementContext::Order_by_elementContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Field_nameContext *SQLParser::Order_by_elementContext::field_name() {\n  return getRuleContext<SQLParser::Field_nameContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Order_by_elementContext::ASC() {\n  return getToken(SQLParser::ASC, 0);\n}\n\ntree::TerminalNode *SQLParser::Order_by_elementContext::DESC() {\n  return getToken(SQLParser::DESC, 0);\n}\n\n\nsize_t SQLParser::Order_by_elementContext::getRuleIndex() const {\n  return SQLParser::RuleOrder_by_element;\n}\n\nvoid SQLParser::Order_by_elementContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterOrder_by_element(this);\n}\n\nvoid SQLParser::Order_by_elementContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitOrder_by_element(this);\n}\n\nSQLParser::Order_by_elementContext *SQLParser::order_by_element() {\n  Order_by_elementContext *_localctx =\n      _tracker.createInstance<Order_by_elementContext>(_ctx, getState());\n  enterRule(_localctx, 48, SQLParser::RuleOrder_by_element);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(276);\n    field_name();\n    setState(278);\n    _errHandler->sync(this);\n\n    _la = _input->LA(1);\n    if (_la == SQLParser::ASC\n\n        || _la == SQLParser::DESC) {\n      setState(277);\n      _la = _input->LA(1);\n      if (!(_la == SQLParser::ASC\n\n            || _la == SQLParser::DESC)) {\n        _errHandler->recoverInline(this);\n      } else {\n        _errHandler->reportMatch(this);\n        consume();\n      }\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Limit_clauseContext\n//------------------------------------------------------------------\n\nSQLParser::Limit_clauseContext::Limit_clauseContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Limit_clauseContext::LIMIT() {\n  return getToken(SQLParser::LIMIT, 0);\n}\n\nSQLParser::Int_valueContext *SQLParser::Limit_clauseContext::int_value() {\n  return getRuleContext<SQLParser::Int_valueContext>(0);\n}\n\n\nsize_t SQLParser::Limit_clauseContext::getRuleIndex() const {\n  return SQLParser::RuleLimit_clause;\n}\n\nvoid SQLParser::Limit_clauseContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterLimit_clause(this);\n}\n\nvoid SQLParser::Limit_clauseContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitLimit_clause(this);\n}\n\nSQLParser::Limit_clauseContext *SQLParser::limit_clause() {\n  Limit_clauseContext *_localctx =\n      _tracker.createInstance<Limit_clauseContext>(_ctx, getState());\n  enterRule(_localctx, 50, SQLParser::RuleLimit_clause);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(280);\n    match(SQLParser::LIMIT);\n    setState(281);\n    int_value();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Tableview_nameContext\n//------------------------------------------------------------------\n\nSQLParser::Tableview_nameContext::Tableview_nameContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::IdentifierContext *SQLParser::Tableview_nameContext::identifier() {\n  return getRuleContext<SQLParser::IdentifierContext>(0);\n}\n\n\nsize_t SQLParser::Tableview_nameContext::getRuleIndex() const {\n  return SQLParser::RuleTableview_name;\n}\n\nvoid SQLParser::Tableview_nameContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterTableview_name(this);\n}\n\nvoid SQLParser::Tableview_nameContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitTableview_name(this);\n}\n\nSQLParser::Tableview_nameContext *SQLParser::tableview_name() {\n  Tableview_nameContext *_localctx =\n      _tracker.createInstance<Tableview_nameContext>(_ctx, getState());\n  enterRule(_localctx, 52, SQLParser::RuleTableview_name);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(283);\n    identifier();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Field_nameContext\n//------------------------------------------------------------------\n\nSQLParser::Field_nameContext::Field_nameContext(ParserRuleContext *parent_ctx,\n                                                size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::IdentifierContext *SQLParser::Field_nameContext::identifier() {\n  return getRuleContext<SQLParser::IdentifierContext>(0);\n}\n\n\nsize_t SQLParser::Field_nameContext::getRuleIndex() const {\n  return SQLParser::RuleField_name;\n}\n\nvoid SQLParser::Field_nameContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterField_name(this);\n}\n\nvoid SQLParser::Field_nameContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitField_name(this);\n}\n\nSQLParser::Field_nameContext *SQLParser::field_name() {\n  Field_nameContext *_localctx =\n      _tracker.createInstance<Field_nameContext>(_ctx, getState());\n  enterRule(_localctx, 54, SQLParser::RuleField_name);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(285);\n    identifier();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Table_aliasContext\n//------------------------------------------------------------------\n\nSQLParser::Table_aliasContext::Table_aliasContext(ParserRuleContext *parent_ctx,\n                                                  size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::IdentifierContext *SQLParser::Table_aliasContext::identifier() {\n  return getRuleContext<SQLParser::IdentifierContext>(0);\n}\n\n\nsize_t SQLParser::Table_aliasContext::getRuleIndex() const {\n  return SQLParser::RuleTable_alias;\n}\n\nvoid SQLParser::Table_aliasContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterTable_alias(this);\n}\n\nvoid SQLParser::Table_aliasContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitTable_alias(this);\n}\n\nSQLParser::Table_aliasContext *SQLParser::table_alias() {\n  Table_aliasContext *_localctx =\n      _tracker.createInstance<Table_aliasContext>(_ctx, getState());\n  enterRule(_localctx, 56, SQLParser::RuleTable_alias);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(287);\n    identifier();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Field_aliasContext\n//------------------------------------------------------------------\n\nSQLParser::Field_aliasContext::Field_aliasContext(ParserRuleContext *parent_ctx,\n                                                  size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::IdentifierContext *SQLParser::Field_aliasContext::identifier() {\n  return getRuleContext<SQLParser::IdentifierContext>(0);\n}\n\ntree::TerminalNode *SQLParser::Field_aliasContext::AS() {\n  return getToken(SQLParser::AS, 0);\n}\n\n\nsize_t SQLParser::Field_aliasContext::getRuleIndex() const {\n  return SQLParser::RuleField_alias;\n}\n\nvoid SQLParser::Field_aliasContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterField_alias(this);\n}\n\nvoid SQLParser::Field_aliasContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitField_alias(this);\n}\n\nSQLParser::Field_aliasContext *SQLParser::field_alias() {\n  Field_aliasContext *_localctx =\n      _tracker.createInstance<Field_aliasContext>(_ctx, getState());\n  enterRule(_localctx, 58, SQLParser::RuleField_alias);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(290);\n    _errHandler->sync(this);\n\n    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(\n        _input, 31, _ctx)) {\n      case 1: {\n        setState(289);\n        match(SQLParser::AS);\n        break;\n      }\n    }\n    setState(292);\n    identifier();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- NumericContext\n//------------------------------------------------------------------\n\nSQLParser::NumericContext::NumericContext(ParserRuleContext *parent_ctx,\n                                          size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Int_valueContext *SQLParser::NumericContext::int_value() {\n  return getRuleContext<SQLParser::Int_valueContext>(0);\n}\n\nSQLParser::Float_valueContext *SQLParser::NumericContext::float_value() {\n  return getRuleContext<SQLParser::Float_valueContext>(0);\n}\n\n\nsize_t SQLParser::NumericContext::getRuleIndex() const {\n  return SQLParser::RuleNumeric;\n}\n\nvoid SQLParser::NumericContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterNumeric(this);\n}\n\nvoid SQLParser::NumericContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitNumeric(this);\n}\n\nSQLParser::NumericContext *SQLParser::numeric() {\n  NumericContext *_localctx =\n      _tracker.createInstance<NumericContext>(_ctx, getState());\n  enterRule(_localctx, 60, SQLParser::RuleNumeric);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(296);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::INTEGER: {\n        enterOuterAlt(_localctx, 1);\n        setState(294);\n        int_value();\n        break;\n      }\n\n      case SQLParser::FLOAT: {\n        enterOuterAlt(_localctx, 2);\n        setState(295);\n        float_value();\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Int_valueContext\n//------------------------------------------------------------------\n\nSQLParser::Int_valueContext::Int_valueContext(ParserRuleContext *parent_ctx,\n                                              size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Int_valueContext::INTEGER() {\n  return getToken(SQLParser::INTEGER, 0);\n}\n\n\nsize_t SQLParser::Int_valueContext::getRuleIndex() const {\n  return SQLParser::RuleInt_value;\n}\n\nvoid SQLParser::Int_valueContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterInt_value(this);\n}\n\nvoid SQLParser::Int_valueContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitInt_value(this);\n}\n\nSQLParser::Int_valueContext *SQLParser::int_value() {\n  Int_valueContext *_localctx =\n      _tracker.createInstance<Int_valueContext>(_ctx, getState());\n  enterRule(_localctx, 62, SQLParser::RuleInt_value);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(298);\n    match(SQLParser::INTEGER);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Float_valueContext\n//------------------------------------------------------------------\n\nSQLParser::Float_valueContext::Float_valueContext(ParserRuleContext *parent_ctx,\n                                                  size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Float_valueContext::FLOAT() {\n  return getToken(SQLParser::FLOAT, 0);\n}\n\n\nsize_t SQLParser::Float_valueContext::getRuleIndex() const {\n  return SQLParser::RuleFloat_value;\n}\n\nvoid SQLParser::Float_valueContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterFloat_value(this);\n}\n\nvoid SQLParser::Float_valueContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitFloat_value(this);\n}\n\nSQLParser::Float_valueContext *SQLParser::float_value() {\n  Float_valueContext *_localctx =\n      _tracker.createInstance<Float_valueContext>(_ctx, getState());\n  enterRule(_localctx, 64, SQLParser::RuleFloat_value);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(300);\n    match(SQLParser::FLOAT);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Quoted_stringContext\n//------------------------------------------------------------------\n\nSQLParser::Quoted_stringContext::Quoted_stringContext(\n    ParserRuleContext *parent_ctx, size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Quoted_stringContext::SQUOTA_STRING() {\n  return getToken(SQLParser::SQUOTA_STRING, 0);\n}\n\ntree::TerminalNode *SQLParser::Quoted_stringContext::DQUOTA_STRING() {\n  return getToken(SQLParser::DQUOTA_STRING, 0);\n}\n\n\nsize_t SQLParser::Quoted_stringContext::getRuleIndex() const {\n  return SQLParser::RuleQuoted_string;\n}\n\nvoid SQLParser::Quoted_stringContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterQuoted_string(this);\n}\n\nvoid SQLParser::Quoted_stringContext::exitRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitQuoted_string(this);\n}\n\nSQLParser::Quoted_stringContext *SQLParser::quoted_string() {\n  Quoted_stringContext *_localctx =\n      _tracker.createInstance<Quoted_stringContext>(_ctx, getState());\n  enterRule(_localctx, 66, SQLParser::RuleQuoted_string);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(302);\n    _la = _input->LA(1);\n    if (!(_la == SQLParser::SQUOTA_STRING\n\n          || _la == SQLParser::DQUOTA_STRING)) {\n      _errHandler->recoverInline(this);\n    } else {\n      _errHandler->reportMatch(this);\n      consume();\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Bool_valueContext\n//------------------------------------------------------------------\n\nSQLParser::Bool_valueContext::Bool_valueContext(ParserRuleContext *parent_ctx,\n                                                size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Bool_valueContext::TRUE_V() {\n  return getToken(SQLParser::TRUE_V, 0);\n}\n\ntree::TerminalNode *SQLParser::Bool_valueContext::FALSE_V() {\n  return getToken(SQLParser::FALSE_V, 0);\n}\n\n\nsize_t SQLParser::Bool_valueContext::getRuleIndex() const {\n  return SQLParser::RuleBool_value;\n}\n\nvoid SQLParser::Bool_valueContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterBool_value(this);\n}\n\nvoid SQLParser::Bool_valueContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitBool_value(this);\n}\n\nSQLParser::Bool_valueContext *SQLParser::bool_value() {\n  Bool_valueContext *_localctx =\n      _tracker.createInstance<Bool_valueContext>(_ctx, getState());\n  enterRule(_localctx, 68, SQLParser::RuleBool_value);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(304);\n    _la = _input->LA(1);\n    if (!(_la == SQLParser::TRUE_V\n\n          || _la == SQLParser::FALSE_V)) {\n      _errHandler->recoverInline(this);\n    } else {\n      _errHandler->reportMatch(this);\n      consume();\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- IdentifierContext\n//------------------------------------------------------------------\n\nSQLParser::IdentifierContext::IdentifierContext(ParserRuleContext *parent_ctx,\n                                                size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\nSQLParser::Regular_idContext *SQLParser::IdentifierContext::regular_id() {\n  return getRuleContext<SQLParser::Regular_idContext>(0);\n}\n\n\nsize_t SQLParser::IdentifierContext::getRuleIndex() const {\n  return SQLParser::RuleIdentifier;\n}\n\nvoid SQLParser::IdentifierContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterIdentifier(this);\n}\n\nvoid SQLParser::IdentifierContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitIdentifier(this);\n}\n\nSQLParser::IdentifierContext *SQLParser::identifier() {\n  IdentifierContext *_localctx =\n      _tracker.createInstance<IdentifierContext>(_ctx, getState());\n  enterRule(_localctx, 70, SQLParser::RuleIdentifier);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(306);\n    regular_id();\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Ne_opContext\n//------------------------------------------------------------------\n\nSQLParser::Ne_opContext::Ne_opContext(ParserRuleContext *parent_ctx,\n                                      size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Ne_opContext::NE_OP() {\n  return getToken(SQLParser::NE_OP, 0);\n}\n\n\nsize_t SQLParser::Ne_opContext::getRuleIndex() const {\n  return SQLParser::RuleNe_op;\n}\n\nvoid SQLParser::Ne_opContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterNe_op(this);\n}\n\nvoid SQLParser::Ne_opContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitNe_op(this);\n}\n\nSQLParser::Ne_opContext *SQLParser::ne_op() {\n  Ne_opContext *_localctx =\n      _tracker.createInstance<Ne_opContext>(_ctx, getState());\n  enterRule(_localctx, 72, SQLParser::RuleNe_op);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(308);\n    match(SQLParser::NE_OP);\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Ge_opContext\n//------------------------------------------------------------------\n\nSQLParser::Ge_opContext::Ge_opContext(ParserRuleContext *parent_ctx,\n                                      size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Ge_opContext::GE_OP() {\n  return getToken(SQLParser::GE_OP, 0);\n}\n\ntree::TerminalNode *SQLParser::Ge_opContext::G_OP() {\n  return getToken(SQLParser::G_OP, 0);\n}\n\ntree::TerminalNode *SQLParser::Ge_opContext::E_OP() {\n  return getToken(SQLParser::E_OP, 0);\n}\n\n\nsize_t SQLParser::Ge_opContext::getRuleIndex() const {\n  return SQLParser::RuleGe_op;\n}\n\nvoid SQLParser::Ge_opContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterGe_op(this);\n}\n\nvoid SQLParser::Ge_opContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitGe_op(this);\n}\n\nSQLParser::Ge_opContext *SQLParser::ge_op() {\n  Ge_opContext *_localctx =\n      _tracker.createInstance<Ge_opContext>(_ctx, getState());\n  enterRule(_localctx, 74, SQLParser::RuleGe_op);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(313);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::GE_OP: {\n        enterOuterAlt(_localctx, 1);\n        setState(310);\n        match(SQLParser::GE_OP);\n        break;\n      }\n\n      case SQLParser::G_OP: {\n        enterOuterAlt(_localctx, 2);\n        setState(311);\n        match(SQLParser::G_OP);\n        setState(312);\n        match(SQLParser::E_OP);\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Le_opContext\n//------------------------------------------------------------------\n\nSQLParser::Le_opContext::Le_opContext(ParserRuleContext *parent_ctx,\n                                      size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Le_opContext::LE_OP() {\n  return getToken(SQLParser::LE_OP, 0);\n}\n\ntree::TerminalNode *SQLParser::Le_opContext::L_OP() {\n  return getToken(SQLParser::L_OP, 0);\n}\n\ntree::TerminalNode *SQLParser::Le_opContext::E_OP() {\n  return getToken(SQLParser::E_OP, 0);\n}\n\n\nsize_t SQLParser::Le_opContext::getRuleIndex() const {\n  return SQLParser::RuleLe_op;\n}\n\nvoid SQLParser::Le_opContext::enterRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterLe_op(this);\n}\n\nvoid SQLParser::Le_opContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitLe_op(this);\n}\n\nSQLParser::Le_opContext *SQLParser::le_op() {\n  Le_opContext *_localctx =\n      _tracker.createInstance<Le_opContext>(_ctx, getState());\n  enterRule(_localctx, 76, SQLParser::RuleLe_op);\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    setState(318);\n    _errHandler->sync(this);\n    switch (_input->LA(1)) {\n      case SQLParser::LE_OP: {\n        enterOuterAlt(_localctx, 1);\n        setState(315);\n        match(SQLParser::LE_OP);\n        break;\n      }\n\n      case SQLParser::L_OP: {\n        enterOuterAlt(_localctx, 2);\n        setState(316);\n        match(SQLParser::L_OP);\n        setState(317);\n        match(SQLParser::E_OP);\n        break;\n      }\n\n      default:\n        throw NoViableAltException(this);\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\n//----------------- Regular_idContext\n//------------------------------------------------------------------\n\nSQLParser::Regular_idContext::Regular_idContext(ParserRuleContext *parent_ctx,\n                                                size_t invoking_state)\n    : ParserRuleContext(parent_ctx, invoking_state) {}\n\ntree::TerminalNode *SQLParser::Regular_idContext::REGULAR_ID() {\n  return getToken(SQLParser::REGULAR_ID, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::OR() {\n  return getToken(SQLParser::OR, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::AND() {\n  return getToken(SQLParser::AND, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::NOT() {\n  return getToken(SQLParser::NOT, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::IN() {\n  return getToken(SQLParser::IN, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::BETWEEN() {\n  return getToken(SQLParser::BETWEEN, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::LIKE() {\n  return getToken(SQLParser::LIKE, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::WHERE() {\n  return getToken(SQLParser::WHERE, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::SELECT() {\n  return getToken(SQLParser::SELECT, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::AS() {\n  return getToken(SQLParser::AS, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::BY() {\n  return getToken(SQLParser::BY, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::ORDER() {\n  return getToken(SQLParser::ORDER, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::ASC() {\n  return getToken(SQLParser::ASC, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::DESC() {\n  return getToken(SQLParser::DESC, 0);\n}\n\ntree::TerminalNode *SQLParser::Regular_idContext::LIMIT() {\n  return getToken(SQLParser::LIMIT, 0);\n}\n\n\nsize_t SQLParser::Regular_idContext::getRuleIndex() const {\n  return SQLParser::RuleRegular_id;\n}\n\nvoid SQLParser::Regular_idContext::enterRule(\n    tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->enterRegular_id(this);\n}\n\nvoid SQLParser::Regular_idContext::exitRule(tree::ParseTreeListener *listener) {\n  auto parserListener = dynamic_cast<SQLParserListener *>(listener);\n  if (parserListener != nullptr) parserListener->exitRegular_id(this);\n}\n\nSQLParser::Regular_idContext *SQLParser::regular_id() {\n  Regular_idContext *_localctx =\n      _tracker.createInstance<Regular_idContext>(_ctx, getState());\n  enterRule(_localctx, 78, SQLParser::RuleRegular_id);\n  size_t _la = 0;\n\n  auto onExit = finally([=] { exitRule(); });\n  try {\n    enterOuterAlt(_localctx, 1);\n    setState(320);\n    _la = _input->LA(1);\n    if (!((((_la & ~0x3fULL) == 0) &&\n           ((1ULL << _la) &\n            ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |\n             (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |\n             (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |\n             (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |\n             (1ULL << SQLParser::AS) | (1ULL << SQLParser::BY) |\n             (1ULL << SQLParser::ORDER) | (1ULL << SQLParser::ASC) |\n             (1ULL << SQLParser::DESC) | (1ULL << SQLParser::LIMIT) |\n             (1ULL << SQLParser::REGULAR_ID))) != 0))) {\n      _errHandler->recoverInline(this);\n    } else {\n      _errHandler->reportMatch(this);\n      consume();\n    }\n\n  } catch (RecognitionException &e) {\n    _errHandler->reportError(this, e);\n    _localctx->exception = std::current_exception();\n    _errHandler->recover(this, _localctx->exception);\n  }\n\n  return _localctx;\n}\n\nbool SQLParser::sempred(RuleContext *context, size_t ruleIndex,\n                        size_t predicateIndex) {\n  switch (ruleIndex) {\n    case 5:\n      return logic_exprSempred(dynamic_cast<Logic_exprContext *>(context),\n                               predicateIndex);\n\n    default:\n      break;\n  }\n  return true;\n}\n\nbool SQLParser::logic_exprSempred(Logic_exprContext * /*_localctx*/,\n                                  size_t predicateIndex) {\n  switch (predicateIndex) {\n    case 0:\n      return precpred(_ctx, 3);\n    case 1:\n      return precpred(_ctx, 2);\n\n    default:\n      break;\n  }\n  return true;\n}\n\n// Static vars and initialization.\nstd::vector<dfa::DFA> SQLParser::_decisionToDFA;\natn::PredictionContextCache SQLParser::_sharedContextCache;\n\n// We own the ATN which in turn owns the ATN states.\natn::ATN SQLParser::_atn;\nstd::vector<uint16_t> SQLParser::_serializedATN;\n\nstd::vector<std::string> SQLParser::_ruleNames = {\"swallow_to_semi\",\n                                                  \"compilation_unit\",\n                                                  \"logic_expr_unit\",\n                                                  \"unit_statement\",\n                                                  \"where_clause\",\n                                                  \"logic_expr\",\n                                                  \"enclosed_expr\",\n                                                  \"relation_expr\",\n                                                  \"rel_oper\",\n                                                  \"value_expr\",\n                                                  \"in_value_expr_list\",\n                                                  \"in_value_expr\",\n                                                  \"constant\",\n                                                  \"constant_num_and_str\",\n                                                  \"matrix\",\n                                                  \"vector_expr\",\n                                                  \"function_value_expr\",\n                                                  \"function_call\",\n                                                  \"dql_statement\",\n                                                  \"select_statement\",\n                                                  \"selected_elements\",\n                                                  \"selected_element\",\n                                                  \"from_clause\",\n                                                  \"order_by_clause\",\n                                                  \"order_by_element\",\n                                                  \"limit_clause\",\n                                                  \"tableview_name\",\n                                                  \"field_name\",\n                                                  \"table_alias\",\n                                                  \"field_alias\",\n                                                  \"numeric\",\n                                                  \"int_value\",\n                                                  \"float_value\",\n                                                  \"quoted_string\",\n                                                  \"bool_value\",\n                                                  \"identifier\",\n                                                  \"ne_op\",\n                                                  \"ge_op\",\n                                                  \"le_op\",\n                                                  \"regular_id\"};\n\nstd::vector<std::string> SQLParser::_literalNames = {\"\",\n                                                     \"'OR'\",\n                                                     \"'AND'\",\n                                                     \"'NOT'\",\n                                                     \"'IN'\",\n                                                     \"'CONTAIN_ALL'\",\n                                                     \"'CONTAIN_ANY'\",\n                                                     \"'BETWEEN'\",\n                                                     \"'LIKE'\",\n                                                     \"'WHERE'\",\n                                                     \"'SELECT'\",\n                                                     \"'FROM'\",\n                                                     \"'AS'\",\n                                                     \"'BY'\",\n                                                     \"'ORDER'\",\n                                                     \"'ASC'\",\n                                                     \"'DESC'\",\n                                                     \"'LIMIT'\",\n                                                     \"'TRUE'\",\n                                                     \"'FALSE'\",\n                                                     \"'IS'\",\n                                                     \"'NULL'\",\n                                                     \"\",\n                                                     \"\",\n                                                     \"\",\n                                                     \"\",\n                                                     \"'.'\",\n                                                     \"'('\",\n                                                     \"')'\",\n                                                     \"'['\",\n                                                     \"']'\",\n                                                     \"'*'\",\n                                                     \"'+'\",\n                                                     \"'-'\",\n                                                     \"','\",\n                                                     \"'/'\",\n                                                     \"'%'\",\n                                                     \"'@'\",\n                                                     \"':='\",\n                                                     \"'#'\",\n                                                     \"':'\",\n                                                     \"';'\",\n                                                     \"'<='\",\n                                                     \"'>='\",\n                                                     \"'!='\",\n                                                     \"'^'\",\n                                                     \"'~'\",\n                                                     \"'<'\",\n                                                     \"'>'\",\n                                                     \"'='\",\n                                                     \"'||'\",\n                                                     \"'_'\"};\n\nstd::vector<std::string> SQLParser::_symbolicNames = {\"\",\n                                                      \"OR\",\n                                                      \"AND\",\n                                                      \"NOT\",\n                                                      \"IN\",\n                                                      \"CONTAIN_ALL\",\n                                                      \"CONTAIN_ANY\",\n                                                      \"BETWEEN\",\n                                                      \"LIKE\",\n                                                      \"WHERE\",\n                                                      \"SELECT\",\n                                                      \"FROM\",\n                                                      \"AS\",\n                                                      \"BY\",\n                                                      \"ORDER\",\n                                                      \"ASC\",\n                                                      \"DESC\",\n                                                      \"LIMIT\",\n                                                      \"TRUE_V\",\n                                                      \"FALSE_V\",\n                                                      \"IS\",\n                                                      \"NULL_V\",\n                                                      \"INTEGER\",\n                                                      \"FLOAT\",\n                                                      \"SQUOTA_STRING\",\n                                                      \"DQUOTA_STRING\",\n                                                      \"DOT\",\n                                                      \"LP\",\n                                                      \"RP\",\n                                                      \"LMP\",\n                                                      \"RMP\",\n                                                      \"ASTERISK\",\n                                                      \"PLUS_SIGN\",\n                                                      \"MINUS_SIGN\",\n                                                      \"COMMA\",\n                                                      \"SOLIDUS\",\n                                                      \"MOD\",\n                                                      \"AT_SIGN\",\n                                                      \"ASSIGN_OP\",\n                                                      \"SHARP_SIGN\",\n                                                      \"COLON\",\n                                                      \"SEMI\",\n                                                      \"LE_OP\",\n                                                      \"GE_OP\",\n                                                      \"NE_OP\",\n                                                      \"CARET_OP\",\n                                                      \"TILDE_OP\",\n                                                      \"L_OP\",\n                                                      \"G_OP\",\n                                                      \"E_OP\",\n                                                      \"CONCAT_OP\",\n                                                      \"UNDERSCORE\",\n                                                      \"SPACES\",\n                                                      \"VECTOR\",\n                                                      \"SINGLE_LINE_COMMENT\",\n                                                      \"MULTI_LINE_COMMENT\",\n                                                      \"REGULAR_ID\"};\n\ndfa::Vocabulary SQLParser::_vocabulary(_literalNames, _symbolicNames);\n\nstd::vector<std::string> SQLParser::_tokenNames;\n\nSQLParser::Initializer::Initializer() {\n  for (size_t i = 0; i < _symbolicNames.size(); ++i) {\n    std::string name = _vocabulary.getLiteralName(i);\n    if (name.empty()) {\n      name = _vocabulary.getSymbolicName(i);\n    }\n\n    if (name.empty()) {\n      _tokenNames.push_back(\"<INVALID>\");\n    } else {\n      _tokenNames.push_back(name);\n    }\n  }\n\n  _serializedATN = {\n      0x3,   0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964,\n      0x3,   0x3a,   0x145,  0x4,    0x2,    0x9,    0x2,    0x4,    0x3,\n      0x9,   0x3,    0x4,    0x4,    0x9,    0x4,    0x4,    0x5,    0x9,\n      0x5,   0x4,    0x6,    0x9,    0x6,    0x4,    0x7,    0x9,    0x7,\n      0x4,   0x8,    0x9,    0x8,    0x4,    0x9,    0x9,    0x9,    0x4,\n      0xa,   0x9,    0xa,    0x4,    0xb,    0x9,    0xb,    0x4,    0xc,\n      0x9,   0xc,    0x4,    0xd,    0x9,    0xd,    0x4,    0xe,    0x9,\n      0xe,   0x4,    0xf,    0x9,    0xf,    0x4,    0x10,   0x9,    0x10,\n      0x4,   0x11,   0x9,    0x11,   0x4,    0x12,   0x9,    0x12,   0x4,\n      0x13,  0x9,    0x13,   0x4,    0x14,   0x9,    0x14,   0x4,    0x15,\n      0x9,   0x15,   0x4,    0x16,   0x9,    0x16,   0x4,    0x17,   0x9,\n      0x17,  0x4,    0x18,   0x9,    0x18,   0x4,    0x19,   0x9,    0x19,\n      0x4,   0x1a,   0x9,    0x1a,   0x4,    0x1b,   0x9,    0x1b,   0x4,\n      0x1c,  0x9,    0x1c,   0x4,    0x1d,   0x9,    0x1d,   0x4,    0x1e,\n      0x9,   0x1e,   0x4,    0x1f,   0x9,    0x1f,   0x4,    0x20,   0x9,\n      0x20,  0x4,    0x21,   0x9,    0x21,   0x4,    0x22,   0x9,    0x22,\n      0x4,   0x23,   0x9,    0x23,   0x4,    0x24,   0x9,    0x24,   0x4,\n      0x25,  0x9,    0x25,   0x4,    0x26,   0x9,    0x26,   0x4,    0x27,\n      0x9,   0x27,   0x4,    0x28,   0x9,    0x28,   0x4,    0x29,   0x9,\n      0x29,  0x3,    0x2,    0x6,    0x2,    0x54,   0xa,    0x2,    0xd,\n      0x2,   0xe,    0x2,    0x55,   0x3,    0x3,    0x3,    0x3,    0x5,\n      0x3,   0x5a,   0xa,    0x3,    0x6,    0x3,    0x5c,   0xa,    0x3,\n      0xd,   0x3,    0xe,    0x3,    0x5d,   0x3,    0x3,    0x3,    0x3,\n      0x3,   0x4,    0x3,    0x4,    0x3,    0x4,    0x3,    0x5,    0x3,\n      0x5,   0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,    0x7,\n      0x3,   0x7,    0x3,    0x7,    0x5,    0x7,    0x6d,   0xa,    0x7,\n      0x3,   0x7,    0x3,    0x7,    0x3,    0x7,    0x3,    0x7,    0x3,\n      0x7,   0x3,    0x7,    0x7,    0x7,    0x75,   0xa,    0x7,    0xc,\n      0x7,   0xe,    0x7,    0x78,   0xb,    0x7,    0x3,    0x8,    0x3,\n      0x8,   0x3,    0x8,    0x3,    0x8,    0x3,    0x9,    0x3,    0x9,\n      0x3,   0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,\n      0x9,   0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x5,    0x9,\n      0x88,  0xa,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,\n      0x3,   0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x5,\n      0x9,   0x91,   0xa,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,\n      0x9,   0x5,    0x9,    0x96,   0xa,    0x9,    0x3,    0x9,    0x3,\n      0x9,   0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x5,    0x9,\n      0x9d,  0xa,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,\n      0x3,   0x9,    0x3,    0x9,    0x3,    0x9,    0x5,    0x9,    0xa5,\n      0xa,   0x9,    0x3,    0xa,    0x3,    0xa,    0x3,    0xa,    0x3,\n      0xa,   0x3,    0xa,    0x3,    0xa,    0x5,    0xa,    0xad,   0xa,\n      0xa,   0x3,    0xb,    0x3,    0xb,    0x5,    0xb,    0xb1,   0xa,\n      0xb,   0x3,    0xc,    0x3,    0xc,    0x3,    0xc,    0x7,    0xc,\n      0xb6,  0xa,    0xc,    0xc,    0xc,    0xe,    0xc,    0xb9,   0xb,\n      0xc,   0x3,    0xd,    0x3,    0xd,    0x5,    0xd,    0xbd,   0xa,\n      0xd,   0x3,    0xe,    0x3,    0xe,    0x3,    0xe,    0x3,    0xe,\n      0x5,   0xe,    0xc3,   0xa,    0xe,    0x3,    0xf,    0x3,    0xf,\n      0x5,   0xf,    0xc7,   0xa,    0xf,    0x3,    0x10,   0x3,    0x10,\n      0x3,   0x10,   0x3,    0x10,   0x7,    0x10,   0xcd,   0xa,    0x10,\n      0xc,   0x10,   0xe,    0x10,   0xd0,   0xb,    0x10,   0x3,    0x10,\n      0x3,   0x10,   0x3,    0x11,   0x3,    0x11,   0x5,    0x11,   0xd6,\n      0xa,   0x11,   0x3,    0x12,   0x3,    0x12,   0x5,    0x12,   0xda,\n      0xa,   0x12,   0x3,    0x13,   0x3,    0x13,   0x3,    0x13,   0x3,\n      0x13,  0x3,    0x13,   0x7,    0x13,   0xe1,   0xa,    0x13,   0xc,\n      0x13,  0xe,    0x13,   0xe4,   0xb,    0x13,   0x5,    0x13,   0xe6,\n      0xa,   0x13,   0x3,    0x13,   0x3,    0x13,   0x3,    0x14,   0x3,\n      0x14,  0x3,    0x15,   0x3,    0x15,   0x3,    0x15,   0x3,    0x15,\n      0x5,   0x15,   0xf0,   0xa,    0x15,   0x3,    0x15,   0x5,    0x15,\n      0xf3,  0xa,    0x15,   0x3,    0x15,   0x5,    0x15,   0xf6,   0xa,\n      0x15,  0x3,    0x16,   0x3,    0x16,   0x3,    0x16,   0x7,    0x16,\n      0xfb,  0xa,    0x16,   0xc,    0x16,   0xe,    0x16,   0xfe,   0xb,\n      0x16,  0x3,    0x17,   0x3,    0x17,   0x3,    0x17,   0x5,    0x17,\n      0x103, 0xa,    0x17,   0x3,    0x17,   0x5,    0x17,   0x106,  0xa,\n      0x17,  0x5,    0x17,   0x108,  0xa,    0x17,   0x3,    0x18,   0x3,\n      0x18,  0x3,    0x18,   0x3,    0x19,   0x3,    0x19,   0x3,    0x19,\n      0x3,   0x19,   0x3,    0x19,   0x7,    0x19,   0x112,  0xa,    0x19,\n      0xc,   0x19,   0xe,    0x19,   0x115,  0xb,    0x19,   0x3,    0x1a,\n      0x3,   0x1a,   0x5,    0x1a,   0x119,  0xa,    0x1a,   0x3,    0x1b,\n      0x3,   0x1b,   0x3,    0x1b,   0x3,    0x1c,   0x3,    0x1c,   0x3,\n      0x1d,  0x3,    0x1d,   0x3,    0x1e,   0x3,    0x1e,   0x3,    0x1f,\n      0x5,   0x1f,   0x125,  0xa,    0x1f,   0x3,    0x1f,   0x3,    0x1f,\n      0x3,   0x20,   0x3,    0x20,   0x5,    0x20,   0x12b,  0xa,    0x20,\n      0x3,   0x21,   0x3,    0x21,   0x3,    0x22,   0x3,    0x22,   0x3,\n      0x23,  0x3,    0x23,   0x3,    0x24,   0x3,    0x24,   0x3,    0x25,\n      0x3,   0x25,   0x3,    0x26,   0x3,    0x26,   0x3,    0x27,   0x3,\n      0x27,  0x3,    0x27,   0x5,    0x27,   0x13c,  0xa,    0x27,   0x3,\n      0x28,  0x3,    0x28,   0x3,    0x28,   0x5,    0x28,   0x141,  0xa,\n      0x28,  0x3,    0x29,   0x3,    0x29,   0x3,    0x29,   0x2,    0x3,\n      0xc,   0x2a,   0x2,    0x4,    0x6,    0x8,    0xa,    0xc,    0xe,\n      0x10,  0x12,   0x14,   0x16,   0x18,   0x1a,   0x1c,   0x1e,   0x20,\n      0x22,  0x24,   0x26,   0x28,   0x2a,   0x2c,   0x2e,   0x30,   0x32,\n      0x34,  0x36,   0x38,   0x3a,   0x3c,   0x3e,   0x40,   0x42,   0x44,\n      0x46,  0x48,   0x4a,   0x4c,   0x4e,   0x50,   0x2,    0x9,    0x3,\n      0x2,   0x2b,   0x2b,   0x4,    0x2,    0x25,   0x25,   0x2b,   0x2b,\n      0x3,   0x2,    0x7,    0x8,    0x3,    0x2,    0x11,   0x12,   0x3,\n      0x2,   0x1a,   0x1b,   0x3,    0x2,    0x14,   0x15,   0x6,    0x2,\n      0x3,   0x6,    0x9,    0xc,    0xe,    0x13,   0x3a,   0x3a,   0x2,\n      0x149, 0x2,    0x53,   0x3,    0x2,    0x2,    0x2,    0x4,    0x5b,\n      0x3,   0x2,    0x2,    0x2,    0x6,    0x61,   0x3,    0x2,    0x2,\n      0x2,   0x8,    0x64,   0x3,    0x2,    0x2,    0x2,    0xa,    0x66,\n      0x3,   0x2,    0x2,    0x2,    0xc,    0x6c,   0x3,    0x2,    0x2,\n      0x2,   0xe,    0x79,   0x3,    0x2,    0x2,    0x2,    0x10,   0xa4,\n      0x3,   0x2,    0x2,    0x2,    0x12,   0xac,   0x3,    0x2,    0x2,\n      0x2,   0x14,   0xb0,   0x3,    0x2,    0x2,    0x2,    0x16,   0xb2,\n      0x3,   0x2,    0x2,    0x2,    0x18,   0xbc,   0x3,    0x2,    0x2,\n      0x2,   0x1a,   0xc2,   0x3,    0x2,    0x2,    0x2,    0x1c,   0xc6,\n      0x3,   0x2,    0x2,    0x2,    0x1e,   0xc8,   0x3,    0x2,    0x2,\n      0x2,   0x20,   0xd5,   0x3,    0x2,    0x2,    0x2,    0x22,   0xd9,\n      0x3,   0x2,    0x2,    0x2,    0x24,   0xdb,   0x3,    0x2,    0x2,\n      0x2,   0x26,   0xe9,   0x3,    0x2,    0x2,    0x2,    0x28,   0xeb,\n      0x3,   0x2,    0x2,    0x2,    0x2a,   0xf7,   0x3,    0x2,    0x2,\n      0x2,   0x2c,   0x107,  0x3,    0x2,    0x2,    0x2,    0x2e,   0x109,\n      0x3,   0x2,    0x2,    0x2,    0x30,   0x10c,  0x3,    0x2,    0x2,\n      0x2,   0x32,   0x116,  0x3,    0x2,    0x2,    0x2,    0x34,   0x11a,\n      0x3,   0x2,    0x2,    0x2,    0x36,   0x11d,  0x3,    0x2,    0x2,\n      0x2,   0x38,   0x11f,  0x3,    0x2,    0x2,    0x2,    0x3a,   0x121,\n      0x3,   0x2,    0x2,    0x2,    0x3c,   0x124,  0x3,    0x2,    0x2,\n      0x2,   0x3e,   0x12a,  0x3,    0x2,    0x2,    0x2,    0x40,   0x12c,\n      0x3,   0x2,    0x2,    0x2,    0x42,   0x12e,  0x3,    0x2,    0x2,\n      0x2,   0x44,   0x130,  0x3,    0x2,    0x2,    0x2,    0x46,   0x132,\n      0x3,   0x2,    0x2,    0x2,    0x48,   0x134,  0x3,    0x2,    0x2,\n      0x2,   0x4a,   0x136,  0x3,    0x2,    0x2,    0x2,    0x4c,   0x13b,\n      0x3,   0x2,    0x2,    0x2,    0x4e,   0x140,  0x3,    0x2,    0x2,\n      0x2,   0x50,   0x142,  0x3,    0x2,    0x2,    0x2,    0x52,   0x54,\n      0xa,   0x2,    0x2,    0x2,    0x53,   0x52,   0x3,    0x2,    0x2,\n      0x2,   0x54,   0x55,   0x3,    0x2,    0x2,    0x2,    0x55,   0x53,\n      0x3,   0x2,    0x2,    0x2,    0x55,   0x56,   0x3,    0x2,    0x2,\n      0x2,   0x56,   0x3,    0x3,    0x2,    0x2,    0x2,    0x57,   0x59,\n      0x5,   0x8,    0x5,    0x2,    0x58,   0x5a,   0x9,    0x3,    0x2,\n      0x2,   0x59,   0x58,   0x3,    0x2,    0x2,    0x2,    0x59,   0x5a,\n      0x3,   0x2,    0x2,    0x2,    0x5a,   0x5c,   0x3,    0x2,    0x2,\n      0x2,   0x5b,   0x57,   0x3,    0x2,    0x2,    0x2,    0x5c,   0x5d,\n      0x3,   0x2,    0x2,    0x2,    0x5d,   0x5b,   0x3,    0x2,    0x2,\n      0x2,   0x5d,   0x5e,   0x3,    0x2,    0x2,    0x2,    0x5e,   0x5f,\n      0x3,   0x2,    0x2,    0x2,    0x5f,   0x60,   0x7,    0x2,    0x2,\n      0x3,   0x60,   0x5,    0x3,    0x2,    0x2,    0x2,    0x61,   0x62,\n      0x5,   0xc,    0x7,    0x2,    0x62,   0x63,   0x7,    0x2,    0x2,\n      0x3,   0x63,   0x7,    0x3,    0x2,    0x2,    0x2,    0x64,   0x65,\n      0x5,   0x26,   0x14,   0x2,    0x65,   0x9,    0x3,    0x2,    0x2,\n      0x2,   0x66,   0x67,   0x7,    0xb,    0x2,    0x2,    0x67,   0x68,\n      0x5,   0xc,    0x7,    0x2,    0x68,   0xb,    0x3,    0x2,    0x2,\n      0x2,   0x69,   0x6a,   0x8,    0x7,    0x1,    0x2,    0x6a,   0x6d,\n      0x5,   0x10,   0x9,    0x2,    0x6b,   0x6d,   0x5,    0xe,    0x8,\n      0x2,   0x6c,   0x69,   0x3,    0x2,    0x2,    0x2,    0x6c,   0x6b,\n      0x3,   0x2,    0x2,    0x2,    0x6d,   0x76,   0x3,    0x2,    0x2,\n      0x2,   0x6e,   0x6f,   0xc,    0x5,    0x2,    0x2,    0x6f,   0x70,\n      0x7,   0x4,    0x2,    0x2,    0x70,   0x75,   0x5,    0xc,    0x7,\n      0x6,   0x71,   0x72,   0xc,    0x4,    0x2,    0x2,    0x72,   0x73,\n      0x7,   0x3,    0x2,    0x2,    0x73,   0x75,   0x5,    0xc,    0x7,\n      0x5,   0x74,   0x6e,   0x3,    0x2,    0x2,    0x2,    0x74,   0x71,\n      0x3,   0x2,    0x2,    0x2,    0x75,   0x78,   0x3,    0x2,    0x2,\n      0x2,   0x76,   0x74,   0x3,    0x2,    0x2,    0x2,    0x76,   0x77,\n      0x3,   0x2,    0x2,    0x2,    0x77,   0xd,    0x3,    0x2,    0x2,\n      0x2,   0x78,   0x76,   0x3,    0x2,    0x2,    0x2,    0x79,   0x7a,\n      0x7,   0x1d,   0x2,    0x2,    0x7a,   0x7b,   0x5,    0xc,    0x7,\n      0x2,   0x7b,   0x7c,   0x7,    0x1e,   0x2,    0x2,    0x7c,   0xf,\n      0x3,   0x2,    0x2,    0x2,    0x7d,   0x7e,   0x5,    0x48,   0x25,\n      0x2,   0x7e,   0x7f,   0x5,    0x12,   0xa,    0x2,    0x7f,   0x80,\n      0x5,   0x14,   0xb,    0x2,    0x80,   0xa5,   0x3,    0x2,    0x2,\n      0x2,   0x81,   0x82,   0x5,    0x48,   0x25,   0x2,    0x82,   0x83,\n      0x7,   0xa,    0x2,    0x2,    0x83,   0x84,   0x5,    0x14,   0xb,\n      0x2,   0x84,   0xa5,   0x3,    0x2,    0x2,    0x2,    0x85,   0x87,\n      0x5,   0x48,   0x25,   0x2,    0x86,   0x88,   0x7,    0x5,    0x2,\n      0x2,   0x87,   0x86,   0x3,    0x2,    0x2,    0x2,    0x87,   0x88,\n      0x3,   0x2,    0x2,    0x2,    0x88,   0x89,   0x3,    0x2,    0x2,\n      0x2,   0x89,   0x8a,   0x7,    0x6,    0x2,    0x2,    0x8a,   0x8b,\n      0x7,   0x1d,   0x2,    0x2,    0x8b,   0x8c,   0x5,    0x16,   0xc,\n      0x2,   0x8c,   0x8d,   0x7,    0x1e,   0x2,    0x2,    0x8d,   0xa5,\n      0x3,   0x2,    0x2,    0x2,    0x8e,   0x90,   0x5,    0x48,   0x25,\n      0x2,   0x8f,   0x91,   0x7,    0x5,    0x2,    0x2,    0x90,   0x8f,\n      0x3,   0x2,    0x2,    0x2,    0x90,   0x91,   0x3,    0x2,    0x2,\n      0x2,   0x91,   0x92,   0x3,    0x2,    0x2,    0x2,    0x92,   0x93,\n      0x9,   0x4,    0x2,    0x2,    0x93,   0x95,   0x7,    0x1d,   0x2,\n      0x2,   0x94,   0x96,   0x5,    0x16,   0xc,    0x2,    0x95,   0x94,\n      0x3,   0x2,    0x2,    0x2,    0x95,   0x96,   0x3,    0x2,    0x2,\n      0x2,   0x96,   0x97,   0x3,    0x2,    0x2,    0x2,    0x97,   0x98,\n      0x7,   0x1e,   0x2,    0x2,    0x98,   0xa5,   0x3,    0x2,    0x2,\n      0x2,   0x99,   0x9a,   0x5,    0x48,   0x25,   0x2,    0x9a,   0x9c,\n      0x7,   0x16,   0x2,    0x2,    0x9b,   0x9d,   0x7,    0x5,    0x2,\n      0x2,   0x9c,   0x9b,   0x3,    0x2,    0x2,    0x2,    0x9c,   0x9d,\n      0x3,   0x2,    0x2,    0x2,    0x9d,   0x9e,   0x3,    0x2,    0x2,\n      0x2,   0x9e,   0x9f,   0x7,    0x17,   0x2,    0x2,    0x9f,   0xa5,\n      0x3,   0x2,    0x2,    0x2,    0xa0,   0xa1,   0x5,    0x24,   0x13,\n      0x2,   0xa1,   0xa2,   0x5,    0x12,   0xa,    0x2,    0xa2,   0xa3,\n      0x5,   0x14,   0xb,    0x2,    0xa3,   0xa5,   0x3,    0x2,    0x2,\n      0x2,   0xa4,   0x7d,   0x3,    0x2,    0x2,    0x2,    0xa4,   0x81,\n      0x3,   0x2,    0x2,    0x2,    0xa4,   0x85,   0x3,    0x2,    0x2,\n      0x2,   0xa4,   0x8e,   0x3,    0x2,    0x2,    0x2,    0xa4,   0x99,\n      0x3,   0x2,    0x2,    0x2,    0xa4,   0xa0,   0x3,    0x2,    0x2,\n      0x2,   0xa5,   0x11,   0x3,    0x2,    0x2,    0x2,    0xa6,   0xad,\n      0x7,   0x33,   0x2,    0x2,    0xa7,   0xad,   0x5,    0x4a,   0x26,\n      0x2,   0xa8,   0xad,   0x7,    0x31,   0x2,    0x2,    0xa9,   0xad,\n      0x7,   0x32,   0x2,    0x2,    0xaa,   0xad,   0x5,    0x4e,   0x28,\n      0x2,   0xab,   0xad,   0x5,    0x4c,   0x27,   0x2,    0xac,   0xa6,\n      0x3,   0x2,    0x2,    0x2,    0xac,   0xa7,   0x3,    0x2,    0x2,\n      0x2,   0xac,   0xa8,   0x3,    0x2,    0x2,    0x2,    0xac,   0xa9,\n      0x3,   0x2,    0x2,    0x2,    0xac,   0xaa,   0x3,    0x2,    0x2,\n      0x2,   0xac,   0xab,   0x3,    0x2,    0x2,    0x2,    0xad,   0x13,\n      0x3,   0x2,    0x2,    0x2,    0xae,   0xb1,   0x5,    0x1a,   0xe,\n      0x2,   0xaf,   0xb1,   0x5,    0x24,   0x13,   0x2,    0xb0,   0xae,\n      0x3,   0x2,    0x2,    0x2,    0xb0,   0xaf,   0x3,    0x2,    0x2,\n      0x2,   0xb1,   0x15,   0x3,    0x2,    0x2,    0x2,    0xb2,   0xb7,\n      0x5,   0x18,   0xd,    0x2,    0xb3,   0xb4,   0x7,    0x24,   0x2,\n      0x2,   0xb4,   0xb6,   0x5,    0x18,   0xd,    0x2,    0xb5,   0xb3,\n      0x3,   0x2,    0x2,    0x2,    0xb6,   0xb9,   0x3,    0x2,    0x2,\n      0x2,   0xb7,   0xb5,   0x3,    0x2,    0x2,    0x2,    0xb7,   0xb8,\n      0x3,   0x2,    0x2,    0x2,    0xb8,   0x17,   0x3,    0x2,    0x2,\n      0x2,   0xb9,   0xb7,   0x3,    0x2,    0x2,    0x2,    0xba,   0xbd,\n      0x5,   0x1c,   0xf,    0x2,    0xbb,   0xbd,   0x5,    0x46,   0x24,\n      0x2,   0xbc,   0xba,   0x3,    0x2,    0x2,    0x2,    0xbc,   0xbb,\n      0x3,   0x2,    0x2,    0x2,    0xbd,   0x19,   0x3,    0x2,    0x2,\n      0x2,   0xbe,   0xc3,   0x5,    0x3e,   0x20,   0x2,    0xbf,   0xc3,\n      0x5,   0x44,   0x23,   0x2,    0xc0,   0xc3,   0x5,    0x20,   0x11,\n      0x2,   0xc1,   0xc3,   0x5,    0x46,   0x24,   0x2,    0xc2,   0xbe,\n      0x3,   0x2,    0x2,    0x2,    0xc2,   0xbf,   0x3,    0x2,    0x2,\n      0x2,   0xc2,   0xc0,   0x3,    0x2,    0x2,    0x2,    0xc2,   0xc1,\n      0x3,   0x2,    0x2,    0x2,    0xc3,   0x1b,   0x3,    0x2,    0x2,\n      0x2,   0xc4,   0xc7,   0x5,    0x3e,   0x20,   0x2,    0xc5,   0xc7,\n      0x5,   0x44,   0x23,   0x2,    0xc6,   0xc4,   0x3,    0x2,    0x2,\n      0x2,   0xc6,   0xc5,   0x3,    0x2,    0x2,    0x2,    0xc7,   0x1d,\n      0x3,   0x2,    0x2,    0x2,    0xc8,   0xc9,   0x7,    0x1f,   0x2,\n      0x2,   0xc9,   0xce,   0x7,    0x37,   0x2,    0x2,    0xca,   0xcb,\n      0x7,   0x24,   0x2,    0x2,    0xcb,   0xcd,   0x7,    0x37,   0x2,\n      0x2,   0xcc,   0xca,   0x3,    0x2,    0x2,    0x2,    0xcd,   0xd0,\n      0x3,   0x2,    0x2,    0x2,    0xce,   0xcc,   0x3,    0x2,    0x2,\n      0x2,   0xce,   0xcf,   0x3,    0x2,    0x2,    0x2,    0xcf,   0xd1,\n      0x3,   0x2,    0x2,    0x2,    0xd0,   0xce,   0x3,    0x2,    0x2,\n      0x2,   0xd1,   0xd2,   0x7,    0x20,   0x2,    0x2,    0xd2,   0x1f,\n      0x3,   0x2,    0x2,    0x2,    0xd3,   0xd6,   0x7,    0x37,   0x2,\n      0x2,   0xd4,   0xd6,   0x5,    0x1e,   0x10,   0x2,    0xd5,   0xd3,\n      0x3,   0x2,    0x2,    0x2,    0xd5,   0xd4,   0x3,    0x2,    0x2,\n      0x2,   0xd6,   0x21,   0x3,    0x2,    0x2,    0x2,    0xd7,   0xda,\n      0x5,   0x14,   0xb,    0x2,    0xd8,   0xda,   0x5,    0x48,   0x25,\n      0x2,   0xd9,   0xd7,   0x3,    0x2,    0x2,    0x2,    0xd9,   0xd8,\n      0x3,   0x2,    0x2,    0x2,    0xda,   0x23,   0x3,    0x2,    0x2,\n      0x2,   0xdb,   0xdc,   0x5,    0x48,   0x25,   0x2,    0xdc,   0xe5,\n      0x7,   0x1d,   0x2,    0x2,    0xdd,   0xe2,   0x5,    0x22,   0x12,\n      0x2,   0xde,   0xdf,   0x7,    0x24,   0x2,    0x2,    0xdf,   0xe1,\n      0x5,   0x22,   0x12,   0x2,    0xe0,   0xde,   0x3,    0x2,    0x2,\n      0x2,   0xe1,   0xe4,   0x3,    0x2,    0x2,    0x2,    0xe2,   0xe0,\n      0x3,   0x2,    0x2,    0x2,    0xe2,   0xe3,   0x3,    0x2,    0x2,\n      0x2,   0xe3,   0xe6,   0x3,    0x2,    0x2,    0x2,    0xe4,   0xe2,\n      0x3,   0x2,    0x2,    0x2,    0xe5,   0xdd,   0x3,    0x2,    0x2,\n      0x2,   0xe5,   0xe6,   0x3,    0x2,    0x2,    0x2,    0xe6,   0xe7,\n      0x3,   0x2,    0x2,    0x2,    0xe7,   0xe8,   0x7,    0x1e,   0x2,\n      0x2,   0xe8,   0x25,   0x3,    0x2,    0x2,    0x2,    0xe9,   0xea,\n      0x5,   0x28,   0x15,   0x2,    0xea,   0x27,   0x3,    0x2,    0x2,\n      0x2,   0xeb,   0xec,   0x7,    0xc,    0x2,    0x2,    0xec,   0xed,\n      0x5,   0x2a,   0x16,   0x2,    0xed,   0xef,   0x5,    0x2e,   0x18,\n      0x2,   0xee,   0xf0,   0x5,    0xa,    0x6,    0x2,    0xef,   0xee,\n      0x3,   0x2,    0x2,    0x2,    0xef,   0xf0,   0x3,    0x2,    0x2,\n      0x2,   0xf0,   0xf2,   0x3,    0x2,    0x2,    0x2,    0xf1,   0xf3,\n      0x5,   0x30,   0x19,   0x2,    0xf2,   0xf1,   0x3,    0x2,    0x2,\n      0x2,   0xf2,   0xf3,   0x3,    0x2,    0x2,    0x2,    0xf3,   0xf5,\n      0x3,   0x2,    0x2,    0x2,    0xf4,   0xf6,   0x5,    0x34,   0x1b,\n      0x2,   0xf5,   0xf4,   0x3,    0x2,    0x2,    0x2,    0xf5,   0xf6,\n      0x3,   0x2,    0x2,    0x2,    0xf6,   0x29,   0x3,    0x2,    0x2,\n      0x2,   0xf7,   0xfc,   0x5,    0x2c,   0x17,   0x2,    0xf8,   0xf9,\n      0x7,   0x24,   0x2,    0x2,    0xf9,   0xfb,   0x5,    0x2c,   0x17,\n      0x2,   0xfa,   0xf8,   0x3,    0x2,    0x2,    0x2,    0xfb,   0xfe,\n      0x3,   0x2,    0x2,    0x2,    0xfc,   0xfa,   0x3,    0x2,    0x2,\n      0x2,   0xfc,   0xfd,   0x3,    0x2,    0x2,    0x2,    0xfd,   0x2b,\n      0x3,   0x2,    0x2,    0x2,    0xfe,   0xfc,   0x3,    0x2,    0x2,\n      0x2,   0xff,   0x108,  0x7,    0x21,   0x2,    0x2,    0x100,  0x102,\n      0x5,   0x38,   0x1d,   0x2,    0x101,  0x103,  0x7,    0xe,    0x2,\n      0x2,   0x102,  0x101,  0x3,    0x2,    0x2,    0x2,    0x102,  0x103,\n      0x3,   0x2,    0x2,    0x2,    0x103,  0x105,  0x3,    0x2,    0x2,\n      0x2,   0x104,  0x106,  0x5,    0x3c,   0x1f,   0x2,    0x105,  0x104,\n      0x3,   0x2,    0x2,    0x2,    0x105,  0x106,  0x3,    0x2,    0x2,\n      0x2,   0x106,  0x108,  0x3,    0x2,    0x2,    0x2,    0x107,  0xff,\n      0x3,   0x2,    0x2,    0x2,    0x107,  0x100,  0x3,    0x2,    0x2,\n      0x2,   0x108,  0x2d,   0x3,    0x2,    0x2,    0x2,    0x109,  0x10a,\n      0x7,   0xd,    0x2,    0x2,    0x10a,  0x10b,  0x5,    0x36,   0x1c,\n      0x2,   0x10b,  0x2f,   0x3,    0x2,    0x2,    0x2,    0x10c,  0x10d,\n      0x7,   0x10,   0x2,    0x2,    0x10d,  0x10e,  0x7,    0xf,    0x2,\n      0x2,   0x10e,  0x113,  0x5,    0x32,   0x1a,   0x2,    0x10f,  0x110,\n      0x7,   0x24,   0x2,    0x2,    0x110,  0x112,  0x5,    0x32,   0x1a,\n      0x2,   0x111,  0x10f,  0x3,    0x2,    0x2,    0x2,    0x112,  0x115,\n      0x3,   0x2,    0x2,    0x2,    0x113,  0x111,  0x3,    0x2,    0x2,\n      0x2,   0x113,  0x114,  0x3,    0x2,    0x2,    0x2,    0x114,  0x31,\n      0x3,   0x2,    0x2,    0x2,    0x115,  0x113,  0x3,    0x2,    0x2,\n      0x2,   0x116,  0x118,  0x5,    0x38,   0x1d,   0x2,    0x117,  0x119,\n      0x9,   0x5,    0x2,    0x2,    0x118,  0x117,  0x3,    0x2,    0x2,\n      0x2,   0x118,  0x119,  0x3,    0x2,    0x2,    0x2,    0x119,  0x33,\n      0x3,   0x2,    0x2,    0x2,    0x11a,  0x11b,  0x7,    0x13,   0x2,\n      0x2,   0x11b,  0x11c,  0x5,    0x40,   0x21,   0x2,    0x11c,  0x35,\n      0x3,   0x2,    0x2,    0x2,    0x11d,  0x11e,  0x5,    0x48,   0x25,\n      0x2,   0x11e,  0x37,   0x3,    0x2,    0x2,    0x2,    0x11f,  0x120,\n      0x5,   0x48,   0x25,   0x2,    0x120,  0x39,   0x3,    0x2,    0x2,\n      0x2,   0x121,  0x122,  0x5,    0x48,   0x25,   0x2,    0x122,  0x3b,\n      0x3,   0x2,    0x2,    0x2,    0x123,  0x125,  0x7,    0xe,    0x2,\n      0x2,   0x124,  0x123,  0x3,    0x2,    0x2,    0x2,    0x124,  0x125,\n      0x3,   0x2,    0x2,    0x2,    0x125,  0x126,  0x3,    0x2,    0x2,\n      0x2,   0x126,  0x127,  0x5,    0x48,   0x25,   0x2,    0x127,  0x3d,\n      0x3,   0x2,    0x2,    0x2,    0x128,  0x12b,  0x5,    0x40,   0x21,\n      0x2,   0x129,  0x12b,  0x5,    0x42,   0x22,   0x2,    0x12a,  0x128,\n      0x3,   0x2,    0x2,    0x2,    0x12a,  0x129,  0x3,    0x2,    0x2,\n      0x2,   0x12b,  0x3f,   0x3,    0x2,    0x2,    0x2,    0x12c,  0x12d,\n      0x7,   0x18,   0x2,    0x2,    0x12d,  0x41,   0x3,    0x2,    0x2,\n      0x2,   0x12e,  0x12f,  0x7,    0x19,   0x2,    0x2,    0x12f,  0x43,\n      0x3,   0x2,    0x2,    0x2,    0x130,  0x131,  0x9,    0x6,    0x2,\n      0x2,   0x131,  0x45,   0x3,    0x2,    0x2,    0x2,    0x132,  0x133,\n      0x9,   0x7,    0x2,    0x2,    0x133,  0x47,   0x3,    0x2,    0x2,\n      0x2,   0x134,  0x135,  0x5,    0x50,   0x29,   0x2,    0x135,  0x49,\n      0x3,   0x2,    0x2,    0x2,    0x136,  0x137,  0x7,    0x2e,   0x2,\n      0x2,   0x137,  0x4b,   0x3,    0x2,    0x2,    0x2,    0x138,  0x13c,\n      0x7,   0x2d,   0x2,    0x2,    0x139,  0x13a,  0x7,    0x32,   0x2,\n      0x2,   0x13a,  0x13c,  0x7,    0x33,   0x2,    0x2,    0x13b,  0x138,\n      0x3,   0x2,    0x2,    0x2,    0x13b,  0x139,  0x3,    0x2,    0x2,\n      0x2,   0x13c,  0x4d,   0x3,    0x2,    0x2,    0x2,    0x13d,  0x141,\n      0x7,   0x2c,   0x2,    0x2,    0x13e,  0x13f,  0x7,    0x31,   0x2,\n      0x2,   0x13f,  0x141,  0x7,    0x33,   0x2,    0x2,    0x140,  0x13d,\n      0x3,   0x2,    0x2,    0x2,    0x140,  0x13e,  0x3,    0x2,    0x2,\n      0x2,   0x141,  0x4f,   0x3,    0x2,    0x2,    0x2,    0x142,  0x143,\n      0x9,   0x8,    0x2,    0x2,    0x143,  0x51,   0x3,    0x2,    0x2,\n      0x2,   0x25,   0x55,   0x59,   0x5d,   0x6c,   0x74,   0x76,   0x87,\n      0x90,  0x95,   0x9c,   0xa4,   0xac,   0xb0,   0xb7,   0xbc,   0xc2,\n      0xc6,  0xce,   0xd5,   0xd9,   0xe2,   0xe5,   0xef,   0xf2,   0xf5,\n      0xfc,  0x102,  0x105,  0x107,  0x113,  0x118,  0x124,  0x12a,  0x13b,\n      0x140,\n  };\n\n  atn::ATNDeserializer deserializer;\n  _atn = deserializer.deserialize(_serializedATN);\n\n  size_t count = _atn.getNumberOfDecisions();\n  _decisionToDFA.reserve(count);\n  for (size_t i = 0; i < count; i++) {\n    _decisionToDFA.emplace_back(_atn.getDecisionState(i), i);\n  }\n}\n\nSQLParser::Initializer SQLParser::_init;\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParser.h",
    "content": "\n// Generated from SQLParser.g4 by ANTLR 4.8\n\n#pragma once\n\n\n#include \"antlr4-runtime.h\"\n\n\nnamespace antlr4 {\n\n\nclass SQLParser : public antlr4::Parser {\n public:\n  enum {\n    OR = 1,\n    AND = 2,\n    NOT = 3,\n    IN = 4,\n    CONTAIN_ALL = 5,\n    CONTAIN_ANY = 6,\n    BETWEEN = 7,\n    LIKE = 8,\n    WHERE = 9,\n    SELECT = 10,\n    FROM = 11,\n    AS = 12,\n    BY = 13,\n    ORDER = 14,\n    ASC = 15,\n    DESC = 16,\n    LIMIT = 17,\n    TRUE_V = 18,\n    FALSE_V = 19,\n    IS = 20,\n    NULL_V = 21,\n    INTEGER = 22,\n    FLOAT = 23,\n    SQUOTA_STRING = 24,\n    DQUOTA_STRING = 25,\n    DOT = 26,\n    LP = 27,\n    RP = 28,\n    LMP = 29,\n    RMP = 30,\n    ASTERISK = 31,\n    PLUS_SIGN = 32,\n    MINUS_SIGN = 33,\n    COMMA = 34,\n    SOLIDUS = 35,\n    MOD = 36,\n    AT_SIGN = 37,\n    ASSIGN_OP = 38,\n    SHARP_SIGN = 39,\n    COLON = 40,\n    SEMI = 41,\n    LE_OP = 42,\n    GE_OP = 43,\n    NE_OP = 44,\n    CARET_OP = 45,\n    TILDE_OP = 46,\n    L_OP = 47,\n    G_OP = 48,\n    E_OP = 49,\n    CONCAT_OP = 50,\n    UNDERSCORE = 51,\n    SPACES = 52,\n    VECTOR = 53,\n    SINGLE_LINE_COMMENT = 54,\n    MULTI_LINE_COMMENT = 55,\n    REGULAR_ID = 56\n  };\n\n  enum {\n    RuleSwallow_to_semi = 0,\n    RuleCompilation_unit = 1,\n    RuleLogic_expr_unit = 2,\n    RuleUnit_statement = 3,\n    RuleWhere_clause = 4,\n    RuleLogic_expr = 5,\n    RuleEnclosed_expr = 6,\n    RuleRelation_expr = 7,\n    RuleRel_oper = 8,\n    RuleValue_expr = 9,\n    RuleIn_value_expr_list = 10,\n    RuleIn_value_expr = 11,\n    RuleConstant = 12,\n    RuleConstant_num_and_str = 13,\n    RuleMatrix = 14,\n    RuleVector_expr = 15,\n    RuleFunction_value_expr = 16,\n    RuleFunction_call = 17,\n    RuleDql_statement = 18,\n    RuleSelect_statement = 19,\n    RuleSelected_elements = 20,\n    RuleSelected_element = 21,\n    RuleFrom_clause = 22,\n    RuleOrder_by_clause = 23,\n    RuleOrder_by_element = 24,\n    RuleLimit_clause = 25,\n    RuleTableview_name = 26,\n    RuleField_name = 27,\n    RuleTable_alias = 28,\n    RuleField_alias = 29,\n    RuleNumeric = 30,\n    RuleInt_value = 31,\n    RuleFloat_value = 32,\n    RuleQuoted_string = 33,\n    RuleBool_value = 34,\n    RuleIdentifier = 35,\n    RuleNe_op = 36,\n    RuleGe_op = 37,\n    RuleLe_op = 38,\n    RuleRegular_id = 39\n  };\n\n  SQLParser(antlr4::TokenStream *input);\n  ~SQLParser();\n\n  virtual std::string getGrammarFileName() const override;\n  virtual const antlr4::atn::ATN &getATN() const override {\n    return _atn;\n  };\n  virtual const std::vector<std::string> &getTokenNames() const override {\n    return _tokenNames;\n  };  // deprecated: use vocabulary instead.\n  virtual const std::vector<std::string> &getRuleNames() const override;\n  virtual antlr4::dfa::Vocabulary &getVocabulary() const override;\n\n\n  class Swallow_to_semiContext;\n  class Compilation_unitContext;\n  class Logic_expr_unitContext;\n  class Unit_statementContext;\n  class Where_clauseContext;\n  class Logic_exprContext;\n  class Enclosed_exprContext;\n  class Relation_exprContext;\n  class Rel_operContext;\n  class Value_exprContext;\n  class In_value_expr_listContext;\n  class In_value_exprContext;\n  class ConstantContext;\n  class Constant_num_and_strContext;\n  class MatrixContext;\n  class Vector_exprContext;\n  class Function_value_exprContext;\n  class Function_callContext;\n  class Dql_statementContext;\n  class Select_statementContext;\n  class Selected_elementsContext;\n  class Selected_elementContext;\n  class From_clauseContext;\n  class Order_by_clauseContext;\n  class Order_by_elementContext;\n  class Limit_clauseContext;\n  class Tableview_nameContext;\n  class Field_nameContext;\n  class Table_aliasContext;\n  class Field_aliasContext;\n  class NumericContext;\n  class Int_valueContext;\n  class Float_valueContext;\n  class Quoted_stringContext;\n  class Bool_valueContext;\n  class IdentifierContext;\n  class Ne_opContext;\n  class Ge_opContext;\n  class Le_opContext;\n  class Regular_idContext;\n\n  class Swallow_to_semiContext : public antlr4::ParserRuleContext {\n   public:\n    Swallow_to_semiContext(antlr4::ParserRuleContext *parent_ctx,\n                           size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    std::vector<antlr4::tree::TerminalNode *> SEMI();\n    antlr4::tree::TerminalNode *SEMI(size_t i);\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Swallow_to_semiContext *swallow_to_semi();\n\n  class Compilation_unitContext : public antlr4::ParserRuleContext {\n   public:\n    Compilation_unitContext(antlr4::ParserRuleContext *parent_ctx,\n                            size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *EOF();\n    std::vector<Unit_statementContext *> unit_statement();\n    Unit_statementContext *unit_statement(size_t i);\n    std::vector<antlr4::tree::TerminalNode *> SOLIDUS();\n    antlr4::tree::TerminalNode *SOLIDUS(size_t i);\n    std::vector<antlr4::tree::TerminalNode *> SEMI();\n    antlr4::tree::TerminalNode *SEMI(size_t i);\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Compilation_unitContext *compilation_unit();\n\n  class Logic_expr_unitContext : public antlr4::ParserRuleContext {\n   public:\n    Logic_expr_unitContext(antlr4::ParserRuleContext *parent_ctx,\n                           size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Logic_exprContext *logic_expr();\n    antlr4::tree::TerminalNode *EOF();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Logic_expr_unitContext *logic_expr_unit();\n\n  class Unit_statementContext : public antlr4::ParserRuleContext {\n   public:\n    Unit_statementContext(antlr4::ParserRuleContext *parent_ctx,\n                          size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Dql_statementContext *dql_statement();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Unit_statementContext *unit_statement();\n\n  class Where_clauseContext : public antlr4::ParserRuleContext {\n   public:\n    Where_clauseContext(antlr4::ParserRuleContext *parent_ctx,\n                        size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *WHERE();\n    Logic_exprContext *logic_expr();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Where_clauseContext *where_clause();\n\n  class Logic_exprContext : public antlr4::ParserRuleContext {\n   public:\n    Logic_exprContext(antlr4::ParserRuleContext *parent_ctx,\n                      size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Relation_exprContext *relation_expr();\n    Enclosed_exprContext *enclosed_expr();\n    std::vector<Logic_exprContext *> logic_expr();\n    Logic_exprContext *logic_expr(size_t i);\n    antlr4::tree::TerminalNode *AND();\n    antlr4::tree::TerminalNode *OR();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Logic_exprContext *logic_expr();\n  Logic_exprContext *logic_expr(int precedence);\n  class Enclosed_exprContext : public antlr4::ParserRuleContext {\n   public:\n    Enclosed_exprContext(antlr4::ParserRuleContext *parent_ctx,\n                         size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *LP();\n    Logic_exprContext *logic_expr();\n    antlr4::tree::TerminalNode *RP();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Enclosed_exprContext *enclosed_expr();\n\n  class Relation_exprContext : public antlr4::ParserRuleContext {\n   public:\n    Relation_exprContext(antlr4::ParserRuleContext *parent_ctx,\n                         size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    IdentifierContext *identifier();\n    Rel_operContext *rel_oper();\n    Value_exprContext *value_expr();\n    antlr4::tree::TerminalNode *LIKE();\n    antlr4::tree::TerminalNode *IN();\n    antlr4::tree::TerminalNode *LP();\n    In_value_expr_listContext *in_value_expr_list();\n    antlr4::tree::TerminalNode *RP();\n    antlr4::tree::TerminalNode *NOT();\n    antlr4::tree::TerminalNode *CONTAIN_ALL();\n    antlr4::tree::TerminalNode *CONTAIN_ANY();\n    antlr4::tree::TerminalNode *IS();\n    antlr4::tree::TerminalNode *NULL_V();\n    Function_callContext *function_call();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Relation_exprContext *relation_expr();\n\n  class Rel_operContext : public antlr4::ParserRuleContext {\n   public:\n    Rel_operContext(antlr4::ParserRuleContext *parent_ctx,\n                    size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *E_OP();\n    Ne_opContext *ne_op();\n    antlr4::tree::TerminalNode *L_OP();\n    antlr4::tree::TerminalNode *G_OP();\n    Le_opContext *le_op();\n    Ge_opContext *ge_op();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Rel_operContext *rel_oper();\n\n  class Value_exprContext : public antlr4::ParserRuleContext {\n   public:\n    Value_exprContext(antlr4::ParserRuleContext *parent_ctx,\n                      size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    ConstantContext *constant();\n    Function_callContext *function_call();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Value_exprContext *value_expr();\n\n  class In_value_expr_listContext : public antlr4::ParserRuleContext {\n   public:\n    In_value_expr_listContext(antlr4::ParserRuleContext *parent_ctx,\n                              size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    std::vector<In_value_exprContext *> in_value_expr();\n    In_value_exprContext *in_value_expr(size_t i);\n    std::vector<antlr4::tree::TerminalNode *> COMMA();\n    antlr4::tree::TerminalNode *COMMA(size_t i);\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  In_value_expr_listContext *in_value_expr_list();\n\n  class In_value_exprContext : public antlr4::ParserRuleContext {\n   public:\n    In_value_exprContext(antlr4::ParserRuleContext *parent_ctx,\n                         size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Constant_num_and_strContext *constant_num_and_str();\n    Bool_valueContext *bool_value();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  In_value_exprContext *in_value_expr();\n\n  class ConstantContext : public antlr4::ParserRuleContext {\n   public:\n    ConstantContext(antlr4::ParserRuleContext *parent_ctx,\n                    size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    NumericContext *numeric();\n    Quoted_stringContext *quoted_string();\n    Vector_exprContext *vector_expr();\n    Bool_valueContext *bool_value();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  ConstantContext *constant();\n\n  class Constant_num_and_strContext : public antlr4::ParserRuleContext {\n   public:\n    Constant_num_and_strContext(antlr4::ParserRuleContext *parent_ctx,\n                                size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    NumericContext *numeric();\n    Quoted_stringContext *quoted_string();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Constant_num_and_strContext *constant_num_and_str();\n\n  class MatrixContext : public antlr4::ParserRuleContext {\n   public:\n    MatrixContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *LMP();\n    std::vector<antlr4::tree::TerminalNode *> VECTOR();\n    antlr4::tree::TerminalNode *VECTOR(size_t i);\n    antlr4::tree::TerminalNode *RMP();\n    std::vector<antlr4::tree::TerminalNode *> COMMA();\n    antlr4::tree::TerminalNode *COMMA(size_t i);\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  MatrixContext *matrix();\n\n  class Vector_exprContext : public antlr4::ParserRuleContext {\n   public:\n    Vector_exprContext(antlr4::ParserRuleContext *parent_ctx,\n                       size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *VECTOR();\n    MatrixContext *matrix();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Vector_exprContext *vector_expr();\n\n  class Function_value_exprContext : public antlr4::ParserRuleContext {\n   public:\n    Function_value_exprContext(antlr4::ParserRuleContext *parent_ctx,\n                               size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Value_exprContext *value_expr();\n    IdentifierContext *identifier();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Function_value_exprContext *function_value_expr();\n\n  class Function_callContext : public antlr4::ParserRuleContext {\n   public:\n    Function_callContext(antlr4::ParserRuleContext *parent_ctx,\n                         size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    IdentifierContext *identifier();\n    antlr4::tree::TerminalNode *LP();\n    antlr4::tree::TerminalNode *RP();\n    std::vector<Function_value_exprContext *> function_value_expr();\n    Function_value_exprContext *function_value_expr(size_t i);\n    std::vector<antlr4::tree::TerminalNode *> COMMA();\n    antlr4::tree::TerminalNode *COMMA(size_t i);\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Function_callContext *function_call();\n\n  class Dql_statementContext : public antlr4::ParserRuleContext {\n   public:\n    Dql_statementContext(antlr4::ParserRuleContext *parent_ctx,\n                         size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Select_statementContext *select_statement();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Dql_statementContext *dql_statement();\n\n  class Select_statementContext : public antlr4::ParserRuleContext {\n   public:\n    Select_statementContext(antlr4::ParserRuleContext *parent_ctx,\n                            size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *SELECT();\n    Selected_elementsContext *selected_elements();\n    From_clauseContext *from_clause();\n    Where_clauseContext *where_clause();\n    Order_by_clauseContext *order_by_clause();\n    Limit_clauseContext *limit_clause();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Select_statementContext *select_statement();\n\n  class Selected_elementsContext : public antlr4::ParserRuleContext {\n   public:\n    Selected_elementsContext(antlr4::ParserRuleContext *parent_ctx,\n                             size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    std::vector<Selected_elementContext *> selected_element();\n    Selected_elementContext *selected_element(size_t i);\n    std::vector<antlr4::tree::TerminalNode *> COMMA();\n    antlr4::tree::TerminalNode *COMMA(size_t i);\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Selected_elementsContext *selected_elements();\n\n  class Selected_elementContext : public antlr4::ParserRuleContext {\n   public:\n    Selected_elementContext(antlr4::ParserRuleContext *parent_ctx,\n                            size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *ASTERISK();\n    Field_nameContext *field_name();\n    antlr4::tree::TerminalNode *AS();\n    Field_aliasContext *field_alias();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Selected_elementContext *selected_element();\n\n  class From_clauseContext : public antlr4::ParserRuleContext {\n   public:\n    From_clauseContext(antlr4::ParserRuleContext *parent_ctx,\n                       size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *FROM();\n    Tableview_nameContext *tableview_name();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  From_clauseContext *from_clause();\n\n  class Order_by_clauseContext : public antlr4::ParserRuleContext {\n   public:\n    Order_by_clauseContext(antlr4::ParserRuleContext *parent_ctx,\n                           size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *ORDER();\n    antlr4::tree::TerminalNode *BY();\n    std::vector<Order_by_elementContext *> order_by_element();\n    Order_by_elementContext *order_by_element(size_t i);\n    std::vector<antlr4::tree::TerminalNode *> COMMA();\n    antlr4::tree::TerminalNode *COMMA(size_t i);\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Order_by_clauseContext *order_by_clause();\n\n  class Order_by_elementContext : public antlr4::ParserRuleContext {\n   public:\n    Order_by_elementContext(antlr4::ParserRuleContext *parent_ctx,\n                            size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Field_nameContext *field_name();\n    antlr4::tree::TerminalNode *ASC();\n    antlr4::tree::TerminalNode *DESC();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Order_by_elementContext *order_by_element();\n\n  class Limit_clauseContext : public antlr4::ParserRuleContext {\n   public:\n    Limit_clauseContext(antlr4::ParserRuleContext *parent_ctx,\n                        size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *LIMIT();\n    Int_valueContext *int_value();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Limit_clauseContext *limit_clause();\n\n  class Tableview_nameContext : public antlr4::ParserRuleContext {\n   public:\n    Tableview_nameContext(antlr4::ParserRuleContext *parent_ctx,\n                          size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    IdentifierContext *identifier();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Tableview_nameContext *tableview_name();\n\n  class Field_nameContext : public antlr4::ParserRuleContext {\n   public:\n    Field_nameContext(antlr4::ParserRuleContext *parent_ctx,\n                      size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    IdentifierContext *identifier();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Field_nameContext *field_name();\n\n  class Table_aliasContext : public antlr4::ParserRuleContext {\n   public:\n    Table_aliasContext(antlr4::ParserRuleContext *parent_ctx,\n                       size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    IdentifierContext *identifier();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Table_aliasContext *table_alias();\n\n  class Field_aliasContext : public antlr4::ParserRuleContext {\n   public:\n    Field_aliasContext(antlr4::ParserRuleContext *parent_ctx,\n                       size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    IdentifierContext *identifier();\n    antlr4::tree::TerminalNode *AS();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Field_aliasContext *field_alias();\n\n  class NumericContext : public antlr4::ParserRuleContext {\n   public:\n    NumericContext(antlr4::ParserRuleContext *parent_ctx,\n                   size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Int_valueContext *int_value();\n    Float_valueContext *float_value();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  NumericContext *numeric();\n\n  class Int_valueContext : public antlr4::ParserRuleContext {\n   public:\n    Int_valueContext(antlr4::ParserRuleContext *parent_ctx,\n                     size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *INTEGER();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Int_valueContext *int_value();\n\n  class Float_valueContext : public antlr4::ParserRuleContext {\n   public:\n    Float_valueContext(antlr4::ParserRuleContext *parent_ctx,\n                       size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *FLOAT();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Float_valueContext *float_value();\n\n  class Quoted_stringContext : public antlr4::ParserRuleContext {\n   public:\n    Quoted_stringContext(antlr4::ParserRuleContext *parent_ctx,\n                         size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *SQUOTA_STRING();\n    antlr4::tree::TerminalNode *DQUOTA_STRING();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Quoted_stringContext *quoted_string();\n\n  class Bool_valueContext : public antlr4::ParserRuleContext {\n   public:\n    Bool_valueContext(antlr4::ParserRuleContext *parent_ctx,\n                      size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *TRUE_V();\n    antlr4::tree::TerminalNode *FALSE_V();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Bool_valueContext *bool_value();\n\n  class IdentifierContext : public antlr4::ParserRuleContext {\n   public:\n    IdentifierContext(antlr4::ParserRuleContext *parent_ctx,\n                      size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    Regular_idContext *regular_id();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  IdentifierContext *identifier();\n\n  class Ne_opContext : public antlr4::ParserRuleContext {\n   public:\n    Ne_opContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *NE_OP();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Ne_opContext *ne_op();\n\n  class Ge_opContext : public antlr4::ParserRuleContext {\n   public:\n    Ge_opContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *GE_OP();\n    antlr4::tree::TerminalNode *G_OP();\n    antlr4::tree::TerminalNode *E_OP();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Ge_opContext *ge_op();\n\n  class Le_opContext : public antlr4::ParserRuleContext {\n   public:\n    Le_opContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *LE_OP();\n    antlr4::tree::TerminalNode *L_OP();\n    antlr4::tree::TerminalNode *E_OP();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Le_opContext *le_op();\n\n  class Regular_idContext : public antlr4::ParserRuleContext {\n   public:\n    Regular_idContext(antlr4::ParserRuleContext *parent_ctx,\n                      size_t invoking_state);\n    virtual size_t getRuleIndex() const override;\n    antlr4::tree::TerminalNode *REGULAR_ID();\n    antlr4::tree::TerminalNode *OR();\n    antlr4::tree::TerminalNode *AND();\n    antlr4::tree::TerminalNode *NOT();\n    antlr4::tree::TerminalNode *IN();\n    antlr4::tree::TerminalNode *BETWEEN();\n    antlr4::tree::TerminalNode *LIKE();\n    antlr4::tree::TerminalNode *WHERE();\n    antlr4::tree::TerminalNode *SELECT();\n    antlr4::tree::TerminalNode *AS();\n    antlr4::tree::TerminalNode *BY();\n    antlr4::tree::TerminalNode *ORDER();\n    antlr4::tree::TerminalNode *ASC();\n    antlr4::tree::TerminalNode *DESC();\n    antlr4::tree::TerminalNode *LIMIT();\n\n    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;\n    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;\n  };\n\n  Regular_idContext *regular_id();\n\n\n  virtual bool sempred(antlr4::RuleContext *_localctx, size_t ruleIndex,\n                       size_t predicateIndex) override;\n  bool logic_exprSempred(Logic_exprContext * /*_localctx*/,\n                         size_t predicateIndex);\n\n private:\n  static std::vector<antlr4::dfa::DFA> _decisionToDFA;\n  static antlr4::atn::PredictionContextCache _sharedContextCache;\n  static std::vector<std::string> _ruleNames;\n  static std::vector<std::string> _tokenNames;\n\n  static std::vector<std::string> _literalNames;\n  static std::vector<std::string> _symbolicNames;\n  static antlr4::dfa::Vocabulary _vocabulary;\n  static antlr4::atn::ATN _atn;\n  static std::vector<uint16_t> _serializedATN;\n\n\n  struct Initializer {\n    Initializer();\n  };\n  static Initializer _init;\n};\n\n}  // namespace antlr4\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParser.interp",
    "content": "token literal names:\nnull\n'OR'\n'AND'\n'NOT'\n'IN'\n'CONTAIN_ALL'\n'CONTAIN_ANY'\n'BETWEEN'\n'LIKE'\n'WHERE'\n'SELECT'\n'FROM'\n'AS'\n'BY'\n'ORDER'\n'ASC'\n'DESC'\n'LIMIT'\n'TRUE'\n'FALSE'\n'IS'\n'NULL'\nnull\nnull\nnull\nnull\n'.'\n'('\n')'\n'['\n']'\n'*'\n'+'\n'-'\n','\n'/'\n'%'\n'@'\n':='\n'#'\n':'\n';'\n'<='\n'>='\n'!='\n'^'\n'~'\n'<'\n'>'\n'='\n'||'\n'_'\nnull\nnull\nnull\nnull\nnull\n\ntoken symbolic names:\nnull\nOR\nAND\nNOT\nIN\nCONTAIN_ALL\nCONTAIN_ANY\nBETWEEN\nLIKE\nWHERE\nSELECT\nFROM\nAS\nBY\nORDER\nASC\nDESC\nLIMIT\nTRUE_V\nFALSE_V\nIS\nNULL_V\nINTEGER\nFLOAT\nSQUOTA_STRING\nDQUOTA_STRING\nDOT\nLP\nRP\nLMP\nRMP\nASTERISK\nPLUS_SIGN\nMINUS_SIGN\nCOMMA\nSOLIDUS\nMOD\nAT_SIGN\nASSIGN_OP\nSHARP_SIGN\nCOLON\nSEMI\nLE_OP\nGE_OP\nNE_OP\nCARET_OP\nTILDE_OP\nL_OP\nG_OP\nE_OP\nCONCAT_OP\nUNDERSCORE\nSPACES\nVECTOR\nSINGLE_LINE_COMMENT\nMULTI_LINE_COMMENT\nREGULAR_ID\n\nrule names:\nswallow_to_semi\ncompilation_unit\nlogic_expr_unit\nunit_statement\nwhere_clause\nlogic_expr\nenclosed_expr\nrelation_expr\nrel_oper\nvalue_expr\nin_value_expr_list\nin_value_expr\nconstant\nconstant_num_and_str\nmatrix\nvector_expr\nfunction_value_expr\nfunction_call\ndql_statement\nselect_statement\nselected_elements\nselected_element\nfrom_clause\norder_by_clause\norder_by_element\nlimit_clause\ntableview_name\nfield_name\ntable_alias\nfield_alias\nnumeric\nint_value\nfloat_value\nquoted_string\nbool_value\nidentifier\nne_op\nge_op\nle_op\nregular_id\n\n\natn:\n[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 58, 325, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 4, 22, 9, 22, 4, 23, 9, 23, 4, 24, 9, 24, 4, 25, 9, 25, 4, 26, 9, 26, 4, 27, 9, 27, 4, 28, 9, 28, 4, 29, 9, 29, 4, 30, 9, 30, 4, 31, 9, 31, 4, 32, 9, 32, 4, 33, 9, 33, 4, 34, 9, 34, 4, 35, 9, 35, 4, 36, 9, 36, 4, 37, 9, 37, 4, 38, 9, 38, 4, 39, 9, 39, 4, 40, 9, 40, 4, 41, 9, 41, 3, 2, 6, 2, 84, 10, 2, 13, 2, 14, 2, 85, 3, 3, 3, 3, 5, 3, 90, 10, 3, 6, 3, 92, 10, 3, 13, 3, 14, 3, 93, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 5, 7, 109, 10, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 7, 7, 117, 10, 7, 12, 7, 14, 7, 120, 11, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 136, 10, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 145, 10, 9, 3, 9, 3, 9, 3, 9, 5, 9, 150, 10, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 157, 10, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 165, 10, 9, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 5, 10, 173, 10, 10, 3, 11, 3, 11, 5, 11, 177, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 182, 10, 12, 12, 12, 14, 12, 185, 11, 12, 3, 13, 3, 13, 5, 13, 189, 10, 13, 3, 14, 3, 14, 3, 14, 3, 14, 5, 14, 195, 10, 14, 3, 15, 3, 15, 5, 15, 199, 10, 15, 3, 16, 3, 16, 3, 16, 3, 16, 7, 16, 205, 10, 16, 12, 16, 14, 16, 208, 11, 16, 3, 16, 3, 16, 3, 17, 3, 17, 5, 17, 214, 10, 17, 3, 18, 3, 18, 5, 18, 218, 10, 18, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 7, 19, 225, 10, 19, 12, 19, 14, 19, 228, 11, 19, 5, 19, 230, 10, 19, 3, 19, 3, 19, 3, 20, 3, 20, 3, 21, 3, 21, 3, 21, 3, 21, 5, 21, 240, 10, 21, 3, 21, 5, 21, 243, 10, 21, 3, 21, 5, 21, 246, 10, 21, 3, 22, 3, 22, 3, 22, 7, 22, 251, 10, 22, 12, 22, 14, 22, 254, 11, 22, 3, 23, 3, 23, 3, 23, 5, 23, 259, 10, 23, 3, 23, 5, 23, 262, 10, 23, 5, 23, 264, 10, 23, 3, 24, 3, 24, 3, 24, 3, 25, 3, 25, 3, 25, 3, 25, 3, 25, 7, 25, 274, 10, 25, 12, 25, 14, 25, 277, 11, 25, 3, 26, 3, 26, 5, 26, 281, 10, 26, 3, 27, 3, 27, 3, 27, 3, 28, 3, 28, 3, 29, 3, 29, 3, 30, 3, 30, 3, 31, 5, 31, 293, 10, 31, 3, 31, 3, 31, 3, 32, 3, 32, 5, 32, 299, 10, 32, 3, 33, 3, 33, 3, 34, 3, 34, 3, 35, 3, 35, 3, 36, 3, 36, 3, 37, 3, 37, 3, 38, 3, 38, 3, 39, 3, 39, 3, 39, 5, 39, 316, 10, 39, 3, 40, 3, 40, 3, 40, 5, 40, 321, 10, 40, 3, 41, 3, 41, 3, 41, 2, 3, 12, 42, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 2, 9, 3, 2, 43, 43, 4, 2, 37, 37, 43, 43, 3, 2, 7, 8, 3, 2, 17, 18, 3, 2, 26, 27, 3, 2, 20, 21, 6, 2, 3, 6, 9, 12, 14, 19, 58, 58, 2, 329, 2, 83, 3, 2, 2, 2, 4, 91, 3, 2, 2, 2, 6, 97, 3, 2, 2, 2, 8, 100, 3, 2, 2, 2, 10, 102, 3, 2, 2, 2, 12, 108, 3, 2, 2, 2, 14, 121, 3, 2, 2, 2, 16, 164, 3, 2, 2, 2, 18, 172, 3, 2, 2, 2, 20, 176, 3, 2, 2, 2, 22, 178, 3, 2, 2, 2, 24, 188, 3, 2, 2, 2, 26, 194, 3, 2, 2, 2, 28, 198, 3, 2, 2, 2, 30, 200, 3, 2, 2, 2, 32, 213, 3, 2, 2, 2, 34, 217, 3, 2, 2, 2, 36, 219, 3, 2, 2, 2, 38, 233, 3, 2, 2, 2, 40, 235, 3, 2, 2, 2, 42, 247, 3, 2, 2, 2, 44, 263, 3, 2, 2, 2, 46, 265, 3, 2, 2, 2, 48, 268, 3, 2, 2, 2, 50, 278, 3, 2, 2, 2, 52, 282, 3, 2, 2, 2, 54, 285, 3, 2, 2, 2, 56, 287, 3, 2, 2, 2, 58, 289, 3, 2, 2, 2, 60, 292, 3, 2, 2, 2, 62, 298, 3, 2, 2, 2, 64, 300, 3, 2, 2, 2, 66, 302, 3, 2, 2, 2, 68, 304, 3, 2, 2, 2, 70, 306, 3, 2, 2, 2, 72, 308, 3, 2, 2, 2, 74, 310, 3, 2, 2, 2, 76, 315, 3, 2, 2, 2, 78, 320, 3, 2, 2, 2, 80, 322, 3, 2, 2, 2, 82, 84, 10, 2, 2, 2, 83, 82, 3, 2, 2, 2, 84, 85, 3, 2, 2, 2, 85, 83, 3, 2, 2, 2, 85, 86, 3, 2, 2, 2, 86, 3, 3, 2, 2, 2, 87, 89, 5, 8, 5, 2, 88, 90, 9, 3, 2, 2, 89, 88, 3, 2, 2, 2, 89, 90, 3, 2, 2, 2, 90, 92, 3, 2, 2, 2, 91, 87, 3, 2, 2, 2, 92, 93, 3, 2, 2, 2, 93, 91, 3, 2, 2, 2, 93, 94, 3, 2, 2, 2, 94, 95, 3, 2, 2, 2, 95, 96, 7, 2, 2, 3, 96, 5, 3, 2, 2, 2, 97, 98, 5, 12, 7, 2, 98, 99, 7, 2, 2, 3, 99, 7, 3, 2, 2, 2, 100, 101, 5, 38, 20, 2, 101, 9, 3, 2, 2, 2, 102, 103, 7, 11, 2, 2, 103, 104, 5, 12, 7, 2, 104, 11, 3, 2, 2, 2, 105, 106, 8, 7, 1, 2, 106, 109, 5, 16, 9, 2, 107, 109, 5, 14, 8, 2, 108, 105, 3, 2, 2, 2, 108, 107, 3, 2, 2, 2, 109, 118, 3, 2, 2, 2, 110, 111, 12, 5, 2, 2, 111, 112, 7, 4, 2, 2, 112, 117, 5, 12, 7, 6, 113, 114, 12, 4, 2, 2, 114, 115, 7, 3, 2, 2, 115, 117, 5, 12, 7, 5, 116, 110, 3, 2, 2, 2, 116, 113, 3, 2, 2, 2, 117, 120, 3, 2, 2, 2, 118, 116, 3, 2, 2, 2, 118, 119, 3, 2, 2, 2, 119, 13, 3, 2, 2, 2, 120, 118, 3, 2, 2, 2, 121, 122, 7, 29, 2, 2, 122, 123, 5, 12, 7, 2, 123, 124, 7, 30, 2, 2, 124, 15, 3, 2, 2, 2, 125, 126, 5, 72, 37, 2, 126, 127, 5, 18, 10, 2, 127, 128, 5, 20, 11, 2, 128, 165, 3, 2, 2, 2, 129, 130, 5, 72, 37, 2, 130, 131, 7, 10, 2, 2, 131, 132, 5, 20, 11, 2, 132, 165, 3, 2, 2, 2, 133, 135, 5, 72, 37, 2, 134, 136, 7, 5, 2, 2, 135, 134, 3, 2, 2, 2, 135, 136, 3, 2, 2, 2, 136, 137, 3, 2, 2, 2, 137, 138, 7, 6, 2, 2, 138, 139, 7, 29, 2, 2, 139, 140, 5, 22, 12, 2, 140, 141, 7, 30, 2, 2, 141, 165, 3, 2, 2, 2, 142, 144, 5, 72, 37, 2, 143, 145, 7, 5, 2, 2, 144, 143, 3, 2, 2, 2, 144, 145, 3, 2, 2, 2, 145, 146, 3, 2, 2, 2, 146, 147, 9, 4, 2, 2, 147, 149, 7, 29, 2, 2, 148, 150, 5, 22, 12, 2, 149, 148, 3, 2, 2, 2, 149, 150, 3, 2, 2, 2, 150, 151, 3, 2, 2, 2, 151, 152, 7, 30, 2, 2, 152, 165, 3, 2, 2, 2, 153, 154, 5, 72, 37, 2, 154, 156, 7, 22, 2, 2, 155, 157, 7, 5, 2, 2, 156, 155, 3, 2, 2, 2, 156, 157, 3, 2, 2, 2, 157, 158, 3, 2, 2, 2, 158, 159, 7, 23, 2, 2, 159, 165, 3, 2, 2, 2, 160, 161, 5, 36, 19, 2, 161, 162, 5, 18, 10, 2, 162, 163, 5, 20, 11, 2, 163, 165, 3, 2, 2, 2, 164, 125, 3, 2, 2, 2, 164, 129, 3, 2, 2, 2, 164, 133, 3, 2, 2, 2, 164, 142, 3, 2, 2, 2, 164, 153, 3, 2, 2, 2, 164, 160, 3, 2, 2, 2, 165, 17, 3, 2, 2, 2, 166, 173, 7, 51, 2, 2, 167, 173, 5, 74, 38, 2, 168, 173, 7, 49, 2, 2, 169, 173, 7, 50, 2, 2, 170, 173, 5, 78, 40, 2, 171, 173, 5, 76, 39, 2, 172, 166, 3, 2, 2, 2, 172, 167, 3, 2, 2, 2, 172, 168, 3, 2, 2, 2, 172, 169, 3, 2, 2, 2, 172, 170, 3, 2, 2, 2, 172, 171, 3, 2, 2, 2, 173, 19, 3, 2, 2, 2, 174, 177, 5, 26, 14, 2, 175, 177, 5, 36, 19, 2, 176, 174, 3, 2, 2, 2, 176, 175, 3, 2, 2, 2, 177, 21, 3, 2, 2, 2, 178, 183, 5, 24, 13, 2, 179, 180, 7, 36, 2, 2, 180, 182, 5, 24, 13, 2, 181, 179, 3, 2, 2, 2, 182, 185, 3, 2, 2, 2, 183, 181, 3, 2, 2, 2, 183, 184, 3, 2, 2, 2, 184, 23, 3, 2, 2, 2, 185, 183, 3, 2, 2, 2, 186, 189, 5, 28, 15, 2, 187, 189, 5, 70, 36, 2, 188, 186, 3, 2, 2, 2, 188, 187, 3, 2, 2, 2, 189, 25, 3, 2, 2, 2, 190, 195, 5, 62, 32, 2, 191, 195, 5, 68, 35, 2, 192, 195, 5, 32, 17, 2, 193, 195, 5, 70, 36, 2, 194, 190, 3, 2, 2, 2, 194, 191, 3, 2, 2, 2, 194, 192, 3, 2, 2, 2, 194, 193, 3, 2, 2, 2, 195, 27, 3, 2, 2, 2, 196, 199, 5, 62, 32, 2, 197, 199, 5, 68, 35, 2, 198, 196, 3, 2, 2, 2, 198, 197, 3, 2, 2, 2, 199, 29, 3, 2, 2, 2, 200, 201, 7, 31, 2, 2, 201, 206, 7, 55, 2, 2, 202, 203, 7, 36, 2, 2, 203, 205, 7, 55, 2, 2, 204, 202, 3, 2, 2, 2, 205, 208, 3, 2, 2, 2, 206, 204, 3, 2, 2, 2, 206, 207, 3, 2, 2, 2, 207, 209, 3, 2, 2, 2, 208, 206, 3, 2, 2, 2, 209, 210, 7, 32, 2, 2, 210, 31, 3, 2, 2, 2, 211, 214, 7, 55, 2, 2, 212, 214, 5, 30, 16, 2, 213, 211, 3, 2, 2, 2, 213, 212, 3, 2, 2, 2, 214, 33, 3, 2, 2, 2, 215, 218, 5, 20, 11, 2, 216, 218, 5, 72, 37, 2, 217, 215, 3, 2, 2, 2, 217, 216, 3, 2, 2, 2, 218, 35, 3, 2, 2, 2, 219, 220, 5, 72, 37, 2, 220, 229, 7, 29, 2, 2, 221, 226, 5, 34, 18, 2, 222, 223, 7, 36, 2, 2, 223, 225, 5, 34, 18, 2, 224, 222, 3, 2, 2, 2, 225, 228, 3, 2, 2, 2, 226, 224, 3, 2, 2, 2, 226, 227, 3, 2, 2, 2, 227, 230, 3, 2, 2, 2, 228, 226, 3, 2, 2, 2, 229, 221, 3, 2, 2, 2, 229, 230, 3, 2, 2, 2, 230, 231, 3, 2, 2, 2, 231, 232, 7, 30, 2, 2, 232, 37, 3, 2, 2, 2, 233, 234, 5, 40, 21, 2, 234, 39, 3, 2, 2, 2, 235, 236, 7, 12, 2, 2, 236, 237, 5, 42, 22, 2, 237, 239, 5, 46, 24, 2, 238, 240, 5, 10, 6, 2, 239, 238, 3, 2, 2, 2, 239, 240, 3, 2, 2, 2, 240, 242, 3, 2, 2, 2, 241, 243, 5, 48, 25, 2, 242, 241, 3, 2, 2, 2, 242, 243, 3, 2, 2, 2, 243, 245, 3, 2, 2, 2, 244, 246, 5, 52, 27, 2, 245, 244, 3, 2, 2, 2, 245, 246, 3, 2, 2, 2, 246, 41, 3, 2, 2, 2, 247, 252, 5, 44, 23, 2, 248, 249, 7, 36, 2, 2, 249, 251, 5, 44, 23, 2, 250, 248, 3, 2, 2, 2, 251, 254, 3, 2, 2, 2, 252, 250, 3, 2, 2, 2, 252, 253, 3, 2, 2, 2, 253, 43, 3, 2, 2, 2, 254, 252, 3, 2, 2, 2, 255, 264, 7, 33, 2, 2, 256, 258, 5, 56, 29, 2, 257, 259, 7, 14, 2, 2, 258, 257, 3, 2, 2, 2, 258, 259, 3, 2, 2, 2, 259, 261, 3, 2, 2, 2, 260, 262, 5, 60, 31, 2, 261, 260, 3, 2, 2, 2, 261, 262, 3, 2, 2, 2, 262, 264, 3, 2, 2, 2, 263, 255, 3, 2, 2, 2, 263, 256, 3, 2, 2, 2, 264, 45, 3, 2, 2, 2, 265, 266, 7, 13, 2, 2, 266, 267, 5, 54, 28, 2, 267, 47, 3, 2, 2, 2, 268, 269, 7, 16, 2, 2, 269, 270, 7, 15, 2, 2, 270, 275, 5, 50, 26, 2, 271, 272, 7, 36, 2, 2, 272, 274, 5, 50, 26, 2, 273, 271, 3, 2, 2, 2, 274, 277, 3, 2, 2, 2, 275, 273, 3, 2, 2, 2, 275, 276, 3, 2, 2, 2, 276, 49, 3, 2, 2, 2, 277, 275, 3, 2, 2, 2, 278, 280, 5, 56, 29, 2, 279, 281, 9, 5, 2, 2, 280, 279, 3, 2, 2, 2, 280, 281, 3, 2, 2, 2, 281, 51, 3, 2, 2, 2, 282, 283, 7, 19, 2, 2, 283, 284, 5, 64, 33, 2, 284, 53, 3, 2, 2, 2, 285, 286, 5, 72, 37, 2, 286, 55, 3, 2, 2, 2, 287, 288, 5, 72, 37, 2, 288, 57, 3, 2, 2, 2, 289, 290, 5, 72, 37, 2, 290, 59, 3, 2, 2, 2, 291, 293, 7, 14, 2, 2, 292, 291, 3, 2, 2, 2, 292, 293, 3, 2, 2, 2, 293, 294, 3, 2, 2, 2, 294, 295, 5, 72, 37, 2, 295, 61, 3, 2, 2, 2, 296, 299, 5, 64, 33, 2, 297, 299, 5, 66, 34, 2, 298, 296, 3, 2, 2, 2, 298, 297, 3, 2, 2, 2, 299, 63, 3, 2, 2, 2, 300, 301, 7, 24, 2, 2, 301, 65, 3, 2, 2, 2, 302, 303, 7, 25, 2, 2, 303, 67, 3, 2, 2, 2, 304, 305, 9, 6, 2, 2, 305, 69, 3, 2, 2, 2, 306, 307, 9, 7, 2, 2, 307, 71, 3, 2, 2, 2, 308, 309, 5, 80, 41, 2, 309, 73, 3, 2, 2, 2, 310, 311, 7, 46, 2, 2, 311, 75, 3, 2, 2, 2, 312, 316, 7, 45, 2, 2, 313, 314, 7, 50, 2, 2, 314, 316, 7, 51, 2, 2, 315, 312, 3, 2, 2, 2, 315, 313, 3, 2, 2, 2, 316, 77, 3, 2, 2, 2, 317, 321, 7, 44, 2, 2, 318, 319, 7, 49, 2, 2, 319, 321, 7, 51, 2, 2, 320, 317, 3, 2, 2, 2, 320, 318, 3, 2, 2, 2, 321, 79, 3, 2, 2, 2, 322, 323, 9, 8, 2, 2, 323, 81, 3, 2, 2, 2, 37, 85, 89, 93, 108, 116, 118, 135, 144, 149, 156, 164, 172, 176, 183, 188, 194, 198, 206, 213, 217, 226, 229, 239, 242, 245, 252, 258, 261, 263, 275, 280, 292, 298, 315, 320]"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParser.tokens",
    "content": "OR=1\nAND=2\nNOT=3\nIN=4\nCONTAIN_ALL=5\nCONTAIN_ANY=6\nBETWEEN=7\nLIKE=8\nWHERE=9\nSELECT=10\nFROM=11\nAS=12\nBY=13\nORDER=14\nASC=15\nDESC=16\nLIMIT=17\nTRUE_V=18\nFALSE_V=19\nIS=20\nNULL_V=21\nINTEGER=22\nFLOAT=23\nSQUOTA_STRING=24\nDQUOTA_STRING=25\nDOT=26\nLP=27\nRP=28\nLMP=29\nRMP=30\nASTERISK=31\nPLUS_SIGN=32\nMINUS_SIGN=33\nCOMMA=34\nSOLIDUS=35\nMOD=36\nAT_SIGN=37\nASSIGN_OP=38\nSHARP_SIGN=39\nCOLON=40\nSEMI=41\nLE_OP=42\nGE_OP=43\nNE_OP=44\nCARET_OP=45\nTILDE_OP=46\nL_OP=47\nG_OP=48\nE_OP=49\nCONCAT_OP=50\nUNDERSCORE=51\nSPACES=52\nVECTOR=53\nSINGLE_LINE_COMMENT=54\nMULTI_LINE_COMMENT=55\nREGULAR_ID=56\n'OR'=1\n'AND'=2\n'NOT'=3\n'IN'=4\n'CONTAIN_ALL'=5\n'CONTAIN_ANY'=6\n'BETWEEN'=7\n'LIKE'=8\n'WHERE'=9\n'SELECT'=10\n'FROM'=11\n'AS'=12\n'BY'=13\n'ORDER'=14\n'ASC'=15\n'DESC'=16\n'LIMIT'=17\n'TRUE'=18\n'FALSE'=19\n'IS'=20\n'NULL'=21\n'.'=26\n'('=27\n')'=28\n'['=29\n']'=30\n'*'=31\n'+'=32\n'-'=33\n','=34\n'/'=35\n'%'=36\n'@'=37\n':='=38\n'#'=39\n':'=40\n';'=41\n'<='=42\n'>='=43\n'!='=44\n'^'=45\n'~'=46\n'<'=47\n'>'=48\n'='=49\n'||'=50\n'_'=51\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParserBaseListener.cc",
    "content": "\n// Generated from SQLParser.g4 by ANTLR 4.8\n\n\n#include \"SQLParserBaseListener.h\"\n\n\nusing namespace antlr4;\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParserBaseListener.h",
    "content": "\n// Generated from SQLParser.g4 by ANTLR 4.8\n\n#pragma once\n\n\n#include \"SQLParserListener.h\"\n#include \"antlr4-runtime.h\"\n\n\nnamespace antlr4 {\n\n/**\n * This class provides an empty implementation of SQLParserListener,\n * which can be extended to create a listener which only needs to handle a\n * subset of the available methods.\n */\nclass SQLParserBaseListener : public SQLParserListener {\n public:\n  virtual void enterSwallow_to_semi(\n      SQLParser::Swallow_to_semiContext * /*ctx*/) override {}\n  virtual void exitSwallow_to_semi(\n      SQLParser::Swallow_to_semiContext * /*ctx*/) override {}\n\n  virtual void enterCompilation_unit(\n      SQLParser::Compilation_unitContext * /*ctx*/) override {}\n  virtual void exitCompilation_unit(\n      SQLParser::Compilation_unitContext * /*ctx*/) override {}\n\n  virtual void enterLogic_expr_unit(\n      SQLParser::Logic_expr_unitContext * /*ctx*/) override {}\n  virtual void exitLogic_expr_unit(\n      SQLParser::Logic_expr_unitContext * /*ctx*/) override {}\n\n  virtual void enterUnit_statement(\n      SQLParser::Unit_statementContext * /*ctx*/) override {}\n  virtual void exitUnit_statement(\n      SQLParser::Unit_statementContext * /*ctx*/) override {}\n\n  virtual void enterWhere_clause(\n      SQLParser::Where_clauseContext * /*ctx*/) override {}\n  virtual void exitWhere_clause(\n      SQLParser::Where_clauseContext * /*ctx*/) override {}\n\n  virtual void enterLogic_expr(\n      SQLParser::Logic_exprContext * /*ctx*/) override {}\n  virtual void exitLogic_expr(SQLParser::Logic_exprContext * /*ctx*/) override {\n  }\n\n  virtual void enterEnclosed_expr(\n      SQLParser::Enclosed_exprContext * /*ctx*/) override {}\n  virtual void exitEnclosed_expr(\n      SQLParser::Enclosed_exprContext * /*ctx*/) override {}\n\n  virtual void enterRelation_expr(\n      SQLParser::Relation_exprContext * /*ctx*/) override {}\n  virtual void exitRelation_expr(\n      SQLParser::Relation_exprContext * /*ctx*/) override {}\n\n  virtual void enterRel_oper(SQLParser::Rel_operContext * /*ctx*/) override {}\n  virtual void exitRel_oper(SQLParser::Rel_operContext * /*ctx*/) override {}\n\n  virtual void enterValue_expr(\n      SQLParser::Value_exprContext * /*ctx*/) override {}\n  virtual void exitValue_expr(SQLParser::Value_exprContext * /*ctx*/) override {\n  }\n\n  virtual void enterIn_value_expr_list(\n      SQLParser::In_value_expr_listContext * /*ctx*/) override {}\n  virtual void exitIn_value_expr_list(\n      SQLParser::In_value_expr_listContext * /*ctx*/) override {}\n\n  virtual void enterIn_value_expr(\n      SQLParser::In_value_exprContext * /*ctx*/) override {}\n  virtual void exitIn_value_expr(\n      SQLParser::In_value_exprContext * /*ctx*/) override {}\n\n  virtual void enterConstant(SQLParser::ConstantContext * /*ctx*/) override {}\n  virtual void exitConstant(SQLParser::ConstantContext * /*ctx*/) override {}\n\n  virtual void enterConstant_num_and_str(\n      SQLParser::Constant_num_and_strContext * /*ctx*/) override {}\n  virtual void exitConstant_num_and_str(\n      SQLParser::Constant_num_and_strContext * /*ctx*/) override {}\n\n  virtual void enterMatrix(SQLParser::MatrixContext * /*ctx*/) override {}\n  virtual void exitMatrix(SQLParser::MatrixContext * /*ctx*/) override {}\n\n  virtual void enterVector_expr(\n      SQLParser::Vector_exprContext * /*ctx*/) override {}\n  virtual void exitVector_expr(\n      SQLParser::Vector_exprContext * /*ctx*/) override {}\n\n  virtual void enterFunction_value_expr(\n      SQLParser::Function_value_exprContext * /*ctx*/) override {}\n  virtual void exitFunction_value_expr(\n      SQLParser::Function_value_exprContext * /*ctx*/) override {}\n\n  virtual void enterFunction_call(\n      SQLParser::Function_callContext * /*ctx*/) override {}\n  virtual void exitFunction_call(\n      SQLParser::Function_callContext * /*ctx*/) override {}\n\n  virtual void enterDql_statement(\n      SQLParser::Dql_statementContext * /*ctx*/) override {}\n  virtual void exitDql_statement(\n      SQLParser::Dql_statementContext * /*ctx*/) override {}\n\n  virtual void enterSelect_statement(\n      SQLParser::Select_statementContext * /*ctx*/) override {}\n  virtual void exitSelect_statement(\n      SQLParser::Select_statementContext * /*ctx*/) override {}\n\n  virtual void enterSelected_elements(\n      SQLParser::Selected_elementsContext * /*ctx*/) override {}\n  virtual void exitSelected_elements(\n      SQLParser::Selected_elementsContext * /*ctx*/) override {}\n\n  virtual void enterSelected_element(\n      SQLParser::Selected_elementContext * /*ctx*/) override {}\n  virtual void exitSelected_element(\n      SQLParser::Selected_elementContext * /*ctx*/) override {}\n\n  virtual void enterFrom_clause(\n      SQLParser::From_clauseContext * /*ctx*/) override {}\n  virtual void exitFrom_clause(\n      SQLParser::From_clauseContext * /*ctx*/) override {}\n\n  virtual void enterOrder_by_clause(\n      SQLParser::Order_by_clauseContext * /*ctx*/) override {}\n  virtual void exitOrder_by_clause(\n      SQLParser::Order_by_clauseContext * /*ctx*/) override {}\n\n  virtual void enterOrder_by_element(\n      SQLParser::Order_by_elementContext * /*ctx*/) override {}\n  virtual void exitOrder_by_element(\n      SQLParser::Order_by_elementContext * /*ctx*/) override {}\n\n  virtual void enterLimit_clause(\n      SQLParser::Limit_clauseContext * /*ctx*/) override {}\n  virtual void exitLimit_clause(\n      SQLParser::Limit_clauseContext * /*ctx*/) override {}\n\n  virtual void enterTableview_name(\n      SQLParser::Tableview_nameContext * /*ctx*/) override {}\n  virtual void exitTableview_name(\n      SQLParser::Tableview_nameContext * /*ctx*/) override {}\n\n  virtual void enterField_name(\n      SQLParser::Field_nameContext * /*ctx*/) override {}\n  virtual void exitField_name(SQLParser::Field_nameContext * /*ctx*/) override {\n  }\n\n  virtual void enterTable_alias(\n      SQLParser::Table_aliasContext * /*ctx*/) override {}\n  virtual void exitTable_alias(\n      SQLParser::Table_aliasContext * /*ctx*/) override {}\n\n  virtual void enterField_alias(\n      SQLParser::Field_aliasContext * /*ctx*/) override {}\n  virtual void exitField_alias(\n      SQLParser::Field_aliasContext * /*ctx*/) override {}\n\n  virtual void enterNumeric(SQLParser::NumericContext * /*ctx*/) override {}\n  virtual void exitNumeric(SQLParser::NumericContext * /*ctx*/) override {}\n\n  virtual void enterInt_value(SQLParser::Int_valueContext * /*ctx*/) override {}\n  virtual void exitInt_value(SQLParser::Int_valueContext * /*ctx*/) override {}\n\n  virtual void enterFloat_value(\n      SQLParser::Float_valueContext * /*ctx*/) override {}\n  virtual void exitFloat_value(\n      SQLParser::Float_valueContext * /*ctx*/) override {}\n\n  virtual void enterQuoted_string(\n      SQLParser::Quoted_stringContext * /*ctx*/) override {}\n  virtual void exitQuoted_string(\n      SQLParser::Quoted_stringContext * /*ctx*/) override {}\n\n  virtual void enterBool_value(\n      SQLParser::Bool_valueContext * /*ctx*/) override {}\n  virtual void exitBool_value(SQLParser::Bool_valueContext * /*ctx*/) override {\n  }\n\n  virtual void enterIdentifier(\n      SQLParser::IdentifierContext * /*ctx*/) override {}\n  virtual void exitIdentifier(SQLParser::IdentifierContext * /*ctx*/) override {\n  }\n\n  virtual void enterNe_op(SQLParser::Ne_opContext * /*ctx*/) override {}\n  virtual void exitNe_op(SQLParser::Ne_opContext * /*ctx*/) override {}\n\n  virtual void enterGe_op(SQLParser::Ge_opContext * /*ctx*/) override {}\n  virtual void exitGe_op(SQLParser::Ge_opContext * /*ctx*/) override {}\n\n  virtual void enterLe_op(SQLParser::Le_opContext * /*ctx*/) override {}\n  virtual void exitLe_op(SQLParser::Le_opContext * /*ctx*/) override {}\n\n  virtual void enterRegular_id(\n      SQLParser::Regular_idContext * /*ctx*/) override {}\n  virtual void exitRegular_id(SQLParser::Regular_idContext * /*ctx*/) override {\n  }\n\n\n  virtual void enterEveryRule(antlr4::ParserRuleContext * /*ctx*/) override {}\n  virtual void exitEveryRule(antlr4::ParserRuleContext * /*ctx*/) override {}\n  virtual void visitTerminal(antlr4::tree::TerminalNode * /*node*/) override {}\n  virtual void visitErrorNode(antlr4::tree::ErrorNode * /*node*/) override {}\n};\n\n}  // namespace antlr4\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParserListener.cc",
    "content": "\n// Generated from SQLParser.g4 by ANTLR 4.8\n\n\n#include \"SQLParserListener.h\"\n\n\nusing namespace antlr4;\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen/SQLParserListener.h",
    "content": "\n// Generated from SQLParser.g4 by ANTLR 4.8\n\n#pragma once\n\n\n#include \"SQLParser.h\"\n#include \"antlr4-runtime.h\"\n\n\nnamespace antlr4 {\n\n/**\n * This interface defines an abstract listener for a parse tree produced by\n * SQLParser.\n */\nclass SQLParserListener : public antlr4::tree::ParseTreeListener {\n public:\n  virtual void enterSwallow_to_semi(SQLParser::Swallow_to_semiContext *ctx) = 0;\n  virtual void exitSwallow_to_semi(SQLParser::Swallow_to_semiContext *ctx) = 0;\n\n  virtual void enterCompilation_unit(\n      SQLParser::Compilation_unitContext *ctx) = 0;\n  virtual void exitCompilation_unit(\n      SQLParser::Compilation_unitContext *ctx) = 0;\n\n  virtual void enterLogic_expr_unit(SQLParser::Logic_expr_unitContext *ctx) = 0;\n  virtual void exitLogic_expr_unit(SQLParser::Logic_expr_unitContext *ctx) = 0;\n\n  virtual void enterUnit_statement(SQLParser::Unit_statementContext *ctx) = 0;\n  virtual void exitUnit_statement(SQLParser::Unit_statementContext *ctx) = 0;\n\n  virtual void enterWhere_clause(SQLParser::Where_clauseContext *ctx) = 0;\n  virtual void exitWhere_clause(SQLParser::Where_clauseContext *ctx) = 0;\n\n  virtual void enterLogic_expr(SQLParser::Logic_exprContext *ctx) = 0;\n  virtual void exitLogic_expr(SQLParser::Logic_exprContext *ctx) = 0;\n\n  virtual void enterEnclosed_expr(SQLParser::Enclosed_exprContext *ctx) = 0;\n  virtual void exitEnclosed_expr(SQLParser::Enclosed_exprContext *ctx) = 0;\n\n  virtual void enterRelation_expr(SQLParser::Relation_exprContext *ctx) = 0;\n  virtual void exitRelation_expr(SQLParser::Relation_exprContext *ctx) = 0;\n\n  virtual void enterRel_oper(SQLParser::Rel_operContext *ctx) = 0;\n  virtual void exitRel_oper(SQLParser::Rel_operContext *ctx) = 0;\n\n  virtual void enterValue_expr(SQLParser::Value_exprContext *ctx) = 0;\n  virtual void exitValue_expr(SQLParser::Value_exprContext *ctx) = 0;\n\n  virtual void enterIn_value_expr_list(\n      SQLParser::In_value_expr_listContext *ctx) = 0;\n  virtual void exitIn_value_expr_list(\n      SQLParser::In_value_expr_listContext *ctx) = 0;\n\n  virtual void enterIn_value_expr(SQLParser::In_value_exprContext *ctx) = 0;\n  virtual void exitIn_value_expr(SQLParser::In_value_exprContext *ctx) = 0;\n\n  virtual void enterConstant(SQLParser::ConstantContext *ctx) = 0;\n  virtual void exitConstant(SQLParser::ConstantContext *ctx) = 0;\n\n  virtual void enterConstant_num_and_str(\n      SQLParser::Constant_num_and_strContext *ctx) = 0;\n  virtual void exitConstant_num_and_str(\n      SQLParser::Constant_num_and_strContext *ctx) = 0;\n\n  virtual void enterMatrix(SQLParser::MatrixContext *ctx) = 0;\n  virtual void exitMatrix(SQLParser::MatrixContext *ctx) = 0;\n\n  virtual void enterVector_expr(SQLParser::Vector_exprContext *ctx) = 0;\n  virtual void exitVector_expr(SQLParser::Vector_exprContext *ctx) = 0;\n\n  virtual void enterFunction_value_expr(\n      SQLParser::Function_value_exprContext *ctx) = 0;\n  virtual void exitFunction_value_expr(\n      SQLParser::Function_value_exprContext *ctx) = 0;\n\n  virtual void enterFunction_call(SQLParser::Function_callContext *ctx) = 0;\n  virtual void exitFunction_call(SQLParser::Function_callContext *ctx) = 0;\n\n  virtual void enterDql_statement(SQLParser::Dql_statementContext *ctx) = 0;\n  virtual void exitDql_statement(SQLParser::Dql_statementContext *ctx) = 0;\n\n  virtual void enterSelect_statement(\n      SQLParser::Select_statementContext *ctx) = 0;\n  virtual void exitSelect_statement(\n      SQLParser::Select_statementContext *ctx) = 0;\n\n  virtual void enterSelected_elements(\n      SQLParser::Selected_elementsContext *ctx) = 0;\n  virtual void exitSelected_elements(\n      SQLParser::Selected_elementsContext *ctx) = 0;\n\n  virtual void enterSelected_element(\n      SQLParser::Selected_elementContext *ctx) = 0;\n  virtual void exitSelected_element(\n      SQLParser::Selected_elementContext *ctx) = 0;\n\n  virtual void enterFrom_clause(SQLParser::From_clauseContext *ctx) = 0;\n  virtual void exitFrom_clause(SQLParser::From_clauseContext *ctx) = 0;\n\n  virtual void enterOrder_by_clause(SQLParser::Order_by_clauseContext *ctx) = 0;\n  virtual void exitOrder_by_clause(SQLParser::Order_by_clauseContext *ctx) = 0;\n\n  virtual void enterOrder_by_element(\n      SQLParser::Order_by_elementContext *ctx) = 0;\n  virtual void exitOrder_by_element(\n      SQLParser::Order_by_elementContext *ctx) = 0;\n\n  virtual void enterLimit_clause(SQLParser::Limit_clauseContext *ctx) = 0;\n  virtual void exitLimit_clause(SQLParser::Limit_clauseContext *ctx) = 0;\n\n  virtual void enterTableview_name(SQLParser::Tableview_nameContext *ctx) = 0;\n  virtual void exitTableview_name(SQLParser::Tableview_nameContext *ctx) = 0;\n\n  virtual void enterField_name(SQLParser::Field_nameContext *ctx) = 0;\n  virtual void exitField_name(SQLParser::Field_nameContext *ctx) = 0;\n\n  virtual void enterTable_alias(SQLParser::Table_aliasContext *ctx) = 0;\n  virtual void exitTable_alias(SQLParser::Table_aliasContext *ctx) = 0;\n\n  virtual void enterField_alias(SQLParser::Field_aliasContext *ctx) = 0;\n  virtual void exitField_alias(SQLParser::Field_aliasContext *ctx) = 0;\n\n  virtual void enterNumeric(SQLParser::NumericContext *ctx) = 0;\n  virtual void exitNumeric(SQLParser::NumericContext *ctx) = 0;\n\n  virtual void enterInt_value(SQLParser::Int_valueContext *ctx) = 0;\n  virtual void exitInt_value(SQLParser::Int_valueContext *ctx) = 0;\n\n  virtual void enterFloat_value(SQLParser::Float_valueContext *ctx) = 0;\n  virtual void exitFloat_value(SQLParser::Float_valueContext *ctx) = 0;\n\n  virtual void enterQuoted_string(SQLParser::Quoted_stringContext *ctx) = 0;\n  virtual void exitQuoted_string(SQLParser::Quoted_stringContext *ctx) = 0;\n\n  virtual void enterBool_value(SQLParser::Bool_valueContext *ctx) = 0;\n  virtual void exitBool_value(SQLParser::Bool_valueContext *ctx) = 0;\n\n  virtual void enterIdentifier(SQLParser::IdentifierContext *ctx) = 0;\n  virtual void exitIdentifier(SQLParser::IdentifierContext *ctx) = 0;\n\n  virtual void enterNe_op(SQLParser::Ne_opContext *ctx) = 0;\n  virtual void exitNe_op(SQLParser::Ne_opContext *ctx) = 0;\n\n  virtual void enterGe_op(SQLParser::Ge_opContext *ctx) = 0;\n  virtual void exitGe_op(SQLParser::Ge_opContext *ctx) = 0;\n\n  virtual void enterLe_op(SQLParser::Le_opContext *ctx) = 0;\n  virtual void exitLe_op(SQLParser::Le_opContext *ctx) = 0;\n\n  virtual void enterRegular_id(SQLParser::Regular_idContext *ctx) = 0;\n  virtual void exitRegular_id(SQLParser::Regular_idContext *ctx) = 0;\n};\n\n}  // namespace antlr4\n"
  },
  {
    "path": "src/db/sqlengine/antlr/gen_parser.sh",
    "content": "#!/bin/sh\n#****************************************************************#\n# ScriptName: gen_parser.sh\n# Author: fancy.lf\n# Function: command to generate antlr sql parser code in se directory\n#***************************************************************#\n\njava -jar ../../../../../thirdparty/antlr/antlr-4.8-complete.jar -Dlanguage=Cpp -package antlr4 SQLLexer.g4 SQLParser.g4 -o gen\nsed -i 's/\\bu8\"/\"/g' gen/*.cc\n"
  },
  {
    "path": "src/db/sqlengine/common/generic_node.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n\nnamespace zvec::sqlengine {\n\ntemplate <typename NodeOp, typename Node>\nclass Generic_Node {\n public:\n  using Ptr = std::shared_ptr<Node>;\n\n  Generic_Node(NodeOp m_op);\n  virtual ~Generic_Node() = default;\n\n  void set_left(Ptr m_left);\n  void set_right(Ptr m_right);\n  const Ptr &left() const {\n    return left_;\n  }\n  const Ptr &right() const {\n    return right_;\n  }\n  Node *left_node() const {\n    return left_.get();\n  }\n  Node *right_node() const {\n    return right_.get();\n  }\n  void set_parent(Generic_Node *m_parent);\n  Generic_Node *parent();\n\n  virtual NodeOp op() const {\n    return op_;\n  }\n  virtual void set_op(NodeOp value) {\n    op_ = value;\n  }\n  virtual std::string text() const = 0;\n\n protected:\n  NodeOp op_;\n  Ptr left_{nullptr};\n  Ptr right_{nullptr};\n  Generic_Node *parent_{nullptr};\n};\n\ntemplate <typename NodeOp, typename Node>\nGeneric_Node<NodeOp, Node>::Generic_Node(NodeOp m_op) {\n  op_ = m_op;\n}\n\ntemplate <typename NodeOp, typename Node>\nvoid Generic_Node<NodeOp, Node>::set_left(Ptr m_left) {\n  left_ = std::move(m_left);\n  if (left_ != nullptr) {\n    left_->set_parent(this);\n  }\n}\ntemplate <typename NodeOp, typename Node>\nvoid Generic_Node<NodeOp, Node>::set_right(Ptr m_right) {\n  right_ = std::move(m_right);\n  if (right_ != nullptr) {\n    right_->set_parent(this);\n  }\n}\ntemplate <typename NodeOp, typename Node>\nvoid Generic_Node<NodeOp, Node>::set_parent(Generic_Node<NodeOp, Node> *value) {\n  this->parent_ = value;\n}\ntemplate <typename NodeOp, typename Node>\nGeneric_Node<NodeOp, Node> *Generic_Node<NodeOp, Node>::parent() {\n  return parent_;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/common/group_by.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/db/doc.h>\n\nnamespace zvec::sqlengine {\n\nstruct GroupBy {\n  using Ptr = std::shared_ptr<GroupBy>;\n\n  GroupBy() = default;\n\n  GroupBy(std::string group_by_field, uint32_t group_topk, uint32_t group_count)\n      : group_by_field(std::move(group_by_field)),\n        group_topk(group_topk),\n        group_count(group_count) {}\n\n  std::string to_string() {\n    return ailego::StringHelper::Concat(\"field[\", group_by_field, \"] topk[\",\n                                        group_topk, \"] count[\", group_count,\n                                        \"]\");\n  }\n\n  std::string group_by_field;\n  uint32_t group_topk{0};\n  uint32_t group_count{0};\n};\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/common/util.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"util.h\"\n#include <sys/time.h>\n#include <algorithm>\n#include <cctype>\n#include <iostream>\n#include <string>\n#include <arrow/type.h>\n#include <arrow/type_fwd.h>\n#include <zvec/ailego/encoding/json.h>\n#include <zvec/ailego/utility/string_helper.h>\n\nnamespace zvec::sqlengine {\n\n// std::string\n// Util::trim(const std::string str, char c) {\n//    std::string tmp = str.substr(str.find_first_not_of(c));\n//  return tmp.substr(0, tmp.find_last_not_of(c) + 1);\n//}\n\n// above implementation will trim more than one c at front or rear. This is not\n// expected.\n// below implementation remove both side only one matched char, strictly.\n// str is supposed to match on both side at same time. remove both side one byte\n// each as c = 0\nstd::string Util::trim_one_both_side(const std::string &str, unsigned char c) {\n  int len = str.length();\n  if (len < 2) {\n    return str;\n  }\n\n  if (str.at(0) == c && str.at(len - 1) == c) {\n    return str.substr(1, len - 2);\n  }\n\n  return str;\n}\n\nvoid Util::string_replace(const std::string &src, const std::string &dst,\n                          std::string *str) {\n  std::string::size_type pos = 0;\n  std::string::size_type srclen = src.size();\n  std::string::size_type dstlen = dst.size();\n\n  while ((pos = str->find(src, pos)) != std::string::npos) {\n    str->replace(pos, srclen, dst);\n    pos += dstlen;\n  }\n\n  return;\n}\n\n// normalize sql for parse result after parse\nstd::string Util::normalize(const std::string &sql) {\n  std::string new_sql = sql;\n  // rule 1. replace \\\" with \"\n  Util::string_replace(\"\\\\\\\"\", \"\\\"\", &new_sql);\n  // rule 2. replace \\' with ''\n  Util::string_replace(\"\\\\\\'\", \"\\'\", &new_sql);\n\n  return new_sql;\n}\n\nstd::shared_ptr<arrow::Schema> Util::append_field(\n    const arrow::Schema &schema, const std::string &name,\n    std::shared_ptr<arrow::DataType> type) {\n  auto res = schema.AddField(schema.num_fields(), arrow::field(name, type));\n  return res.MoveValueUnsafe();\n}\n\nstd::shared_ptr<arrow::DataType> Util::sparse_type() {\n  return arrow::struct_(arrow::FieldVector{\n      arrow::field(\"index\", arrow::binary()),\n      arrow::field(\"value\", arrow::binary()),\n  });\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/common/util.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <memory>\n#include <string>\n#include <arrow/api.h>\n#include <arrow/record_batch.h>\n\nnamespace zvec::sqlengine {\n\nstatic const constexpr char *kFieldScore = \"_zvec_score\";\nstatic const constexpr char *kFieldVector = \"_zvec_vector\";\nstatic const constexpr char *kFieldSparseIndices = \"_zvec_sindices\";\nstatic const constexpr char *kFieldSparseValues = \"_zvec_svalues\";\nstatic const constexpr char *kFieldIsValid = \"_zvec_is_valid\";\nstatic const constexpr char *kFieldGroupId = \"_zvec_group_id\";\n\nstatic const inline std::string kCheckNotFiltered = \"check_not_filtered\";\nstatic const inline std::string kFetchVector = \"fetch_vector\";\nstatic const inline std::string kFetchSparseVector = \"fetch_sparse_vector\";\nstatic const inline std::string kContainAll = \"contain_all\";\nstatic const inline std::string kContainAny = \"contain_any\";\n\nstatic const inline std::string kFuncArrayLength = \"array_length\";\n\n#define enum_to_string(x) #x\n\nclass Util {\n public:\n  static std::string trim_one_both_side(const std::string &str,\n                                        unsigned char c);\n  static void string_replace(const std::string &strsrc,\n                             const std::string &strdst, std::string *str);\n  static std::string normalize(const std::string &sql);\n\n  static std::shared_ptr<arrow::Schema> append_field(\n      const arrow::Schema &schema, const std::string &name,\n      std::shared_ptr<arrow::DataType> type);\n\n  static std::shared_ptr<arrow::DataType> sparse_type();\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/base_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <iostream>\n#include <memory>\n\nnamespace zvec::sqlengine {\n\nclass BaseInfo {\n public:\n  using Ptr = std::shared_ptr<BaseInfo>;\n\n  BaseInfo(const std::string &value) {\n    table_name_ = value;\n  }\n\n  virtual ~BaseInfo() {}\n\n  BaseInfo(const BaseInfo &info) {\n    table_name_ = info.table_name_;\n  }\n\n  BaseInfo &operator=(const BaseInfo &info) {\n    table_name_ = info.table_name_;\n    return *this;\n  }\n\n  std::string table_name() {\n    return table_name_;\n  }\n\n  bool validate() {\n    return true;\n  }\n\n  const std::string &err_msg() {\n    return err_msg_;\n  }\n\n  void set_err_msg(const std::string &value) {\n    err_msg_ = value;\n  }\n\n  virtual std::string to_string() = 0;\n\n private:\n  std::string table_name_{\"\"};\n  std::string err_msg_{\"\"};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/case_changing_charstream.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <iostream>\n#include \"CharStream.h\"\n\nnamespace zvec::sqlengine {\n\nusing namespace antlr4;\n\nclass CaseChangingCharStream : public CharStream {\n public:\n  // Constructs a new CaseChangingCharStream wrapping the given {@link\n  // CharStream} forcing all characters to upper_ case or lower case.\n  // @param stream_ The stream_ to wrap.\n  // @param upper_ If true force each symbol to upper_ case, otherwise force to\n  // lower.\n  CaseChangingCharStream(CharStream *m_stream, bool m_upper) {\n    stream_ = m_stream;\n    upper_ = m_upper;\n  }\n\n  std::string getText(const misc::Interval &interval) {\n    return stream_->getText(interval);\n  }\n\n  void consume() {\n    stream_->consume();\n  }\n\n  size_t LA(ssize_t i) {\n    size_t c = stream_->LA(i);\n    if (c <= 0) {\n      return c;\n    }\n    if (upper_) {\n      return toupper((int)c);\n    }\n    return tolower((int)c);\n  }\n\n  ssize_t mark() {\n    return stream_->mark();\n  }\n\n  void release(ssize_t marker) {\n    stream_->release(marker);\n  }\n\n  size_t index() {\n    return stream_->index();\n  }\n\n  void seek(size_t m_index) {\n    stream_->seek(m_index);\n  }\n\n  size_t size() {\n    return stream_->size();\n  }\n\n  std::string getSourceName() const {\n    return stream_->getSourceName();\n  }\n\n  std::string toString() const {\n    return stream_->toString();\n  }\n\n private:\n  CharStream *stream_;\n  bool upper_ = true;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/error_verbose_listener.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include \"BaseErrorListener.h\"\n\nnamespace zvec::sqlengine {\n\nusing namespace antlr4;\n\n#define UNUSED(x) (void)x\n\nclass ErrorVerboseListener : BaseErrorListener {\n public:\n  ErrorVerboseListener() {}\n  ~ErrorVerboseListener() {}\n\n  void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line,\n                   size_t charPositionInLine, const std::string &msg,\n                   std::exception_ptr e) {\n    UNUSED(recognizer);\n    UNUSED(offendingSymbol);\n    UNUSED(e);\n\n    err_msg_ = std::to_string(line) + \" \" + std::to_string(charPositionInLine) +\n               \" \" + msg;\n    return;\n  }\n\n  const std::string &err_msg() {\n    return err_msg_;\n  }\n\n private:\n  std::string err_msg_;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/node.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"node.h\"\n#include <assert.h>\n#include <sstream>\n#include \"db/sqlengine/common/util.h\"\n\nnamespace zvec::sqlengine {\n\nNode::Node() : Generic_Node(NodeOp::T_NONE) {}\n\nNode::Node(NodeOp m_op) : Generic_Node(m_op) {\n  set_op(m_op);\n}\n\nvoid Node::set_op(NodeOp value) {\n  Generic_Node<NodeOp, Node>::set_op(value);\n  set_type_by_op();\n}\n\nstd::string Node::op_name() const {\n  return type_to_str(op_);\n}\n\nNode::NodeType Node::type() {\n  return type_;\n}\n\nvoid Node::set_type_by_op() {\n  NodeType node_type = NodeType::NO_TYPE;\n  switch (op()) {\n    case NodeOp::T_AND:\n    case NodeOp::T_OR:\n      node_type = NodeType::LOGIC_EXPR;\n      break;\n\n    case NodeOp::T_EQ:\n    case NodeOp::T_NE:\n    case NodeOp::T_GT:\n    case NodeOp::T_GE:\n    case NodeOp::T_LT:\n    case NodeOp::T_LE:\n    case NodeOp::T_LIKE:\n    case NodeOp::T_IN:\n    case NodeOp::T_CONTAIN_ALL:\n    case NodeOp::T_CONTAIN_ANY:\n    case NodeOp::T_IS_NULL:\n    case NodeOp::T_IS_NOT_NULL:\n      node_type = NodeType::REL_EXPR;\n      break;\n\n    case NodeOp::T_PLUS:\n    case NodeOp::T_MINUS:\n    case NodeOp::T_MUL:\n    case NodeOp::T_DIV:\n      node_type = NodeType::ARITH_EXPR;\n      break;\n\n    case NodeOp::T_FUNCTION_CALL:\n      node_type = NodeType::FUNC;\n      break;\n\n    case NodeOp::T_RANGE_VALUE:\n    case NodeOp::T_LIST_VALUE:\n    case NodeOp::T_VECTOR_MATRIX_VALUE:\n    case NodeOp::T_INT_VALUE:\n    case NodeOp::T_FLOAT_VALUE:\n    case NodeOp::T_STRING_VALUE:\n    case NodeOp::T_BOOL_VALUE:\n    case NodeOp::T_NULL_VALUE:\n      node_type = NodeType::CONST;\n      break;\n    case NodeOp::T_ID:\n      node_type = NodeType::ID;\n      break;\n    default:\n      break;\n  }\n\n  type_ = node_type;\n}\n\nstd::string Node::text() const {\n  std::stringstream stream;\n  switch (type_) {\n    case NodeType::LOGIC_EXPR:\n      stream << \"(\" << left()->text() << \") \" << op_name() << \" (\"\n             << right()->text() << \")\";\n      break;\n    case NodeType::REL_EXPR:\n      stream << left()->text() << op_name() << right()->text();\n      break;\n    default:\n      break;\n  }\n\n  return stream.str();\n}\n\nstd::string Node::to_string() {\n  return text();\n}\n\n//========================================================================\n\nRangeNode::RangeNode() : Node(NodeOp::T_RANGE_VALUE) {}\n\nRangeNode::RangeNode(bool m_min_equal, bool m_max_equal) {\n  set_op(NodeOp::T_RANGE_VALUE);\n  min_equal_ = m_min_equal;\n  max_equal_ = m_max_equal;\n}\n\nvoid RangeNode::set_min_equal(bool value) {\n  min_equal_ = value;\n}\n\nvoid RangeNode::set_max_equal(bool value) {\n  max_equal_ = value;\n}\n\nbool RangeNode::min_equal() {\n  return min_equal_;\n}\nbool RangeNode::max_equal() {\n  return max_equal_;\n}\n\nstd::string RangeNode::text() const {\n  return (min_equal_ ? \"[\" : \"(\") + left()->text() + \"~\" + right()->text() +\n         (max_equal_ ? \"]\" : \")\");\n}\n\nvoid RangeNode::set_child_op(NodeOp value) {\n  child_op_ = value;\n}\n\nNodeOp RangeNode::child_op() {\n  return child_op_;\n}\n\n//========================================================================\n\nConstantNode::ConstantNode(const std::string &m_value) {\n  value_ = m_value;\n}\n\nvoid ConstantNode::set_value(const std::string &m_value) {\n  value_ = m_value;\n}\nconst std::string &ConstantNode::value() {\n  return value_;\n}\n\nstd::string ConstantNode::text() const {\n  return value_;\n}\n\n//========================================================================\n\nIDNode::IDNode(const std::string &m_value) {\n  value_ = m_value;\n  set_op(NodeOp::T_ID);\n}\n\nvoid IDNode::set_value(const std::string &m_value) {\n  value_ = m_value;\n}\nconst std::string &IDNode::value() {\n  return value_;\n}\n\nstd::string IDNode::text() const {\n  return value_;\n}\n\n//========================================================================\n\nFuncNode::FuncNode() : Node(NodeOp::T_FUNCTION_CALL) {}\n\nvoid FuncNode::set_func_name_node(Node::Ptr func_name_node) {\n  func_name_node_ = std::move(func_name_node);\n}\n\nconst Node::Ptr &FuncNode::get_func_name_node() {\n  return func_name_node_;\n}\n\nvoid FuncNode::add_argument(Node::Ptr argument_node) {\n  arguments_.emplace_back(std::move(argument_node));\n}\n\nconst std::vector<Node::Ptr> &FuncNode::arguments() {\n  return arguments_;\n}\n\nstd::string FuncNode::text() const {\n  std::stringstream stream;\n  stream << func_name_node_->text();\n  stream << \"(\";\n\n  int i = 0;\n  for (auto argument : arguments_) {\n    if (i > 0) {\n      stream << \", \";\n    }\n    stream << argument->text();\n    i++;\n  }\n  stream << \")\";\n  return stream.str();\n}\n\n\n//========================================================================\n\nstd::string InValueExprListNode::text() const {\n  std::stringstream stream;\n  if (exclude_) {\n    stream << \"NOT \";\n  }\n\n  stream << \"(\";\n\n  int i = 0;\n  for (auto in_value_expr : in_value_expr_list_) {\n    if (i > 0) {\n      stream << \", \";\n    }\n    stream << in_value_expr->text();\n    i++;\n  }\n  stream << \")\";\n  return stream.str();\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/node.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <vector>\n#include <zvec/db/query_params.h>\n#include \"db/sqlengine/common/generic_node.h\"\n\nnamespace zvec::sqlengine {\n\nenum class NodeOp {\n  T_NONE,\n  T_NOT,\n  T_AND,\n  T_OR,\n  T_EQ,\n  T_NE,\n  T_GT,\n  T_GE,\n  T_LT,\n  T_LE,\n  T_BETWEEN,\n  T_LIKE,\n  T_IN,\n  T_CONTAIN_ALL,\n  T_CONTAIN_ANY,\n  T_IS_NULL,\n  T_IS_NOT_NULL,\n  T_PLUS,\n  T_MINUS,\n  T_MUL,\n  T_DIV,\n  T_FUNCTION_CALL,\n  T_RANGE_VALUE,\n  T_LIST_VALUE,\n  T_VECTOR_MATRIX_VALUE,\n  T_INT_VALUE,\n  T_FLOAT_VALUE,\n  T_STRING_VALUE,\n  T_NULL_VALUE,\n  T_ID,\n  T_BOOL_VALUE\n};\n\nclass Node : public Generic_Node<NodeOp, Node> {\n public:\n  using Ptr = std::shared_ptr<Node>;\n\n  static inline std::string type_to_str(NodeOp c) {\n    static std::string names[] = {\"NONE\",\n                                  \"!\",\n                                  \"and\",\n                                  \"or\",\n                                  \"=\",\n                                  \"!=\",\n                                  \">\",\n                                  \">=\",\n                                  \"<\",\n                                  \"<=\",\n                                  \"BETWEEN\",\n                                  \" LIKE \",\n                                  \" IN \",\n                                  \" CONTAIN_ALL \",\n                                  \" CONTAIN_ANY \",\n                                  \"IS_NULL\",\n                                  \"IS_NOT_NULL\",\n                                  \"+\",\n                                  \"-\",\n                                  \"*\",\n                                  \"/\",\n                                  \"FUNCTION_CALL\",\n                                  \"RANGE_VALUE\",\n                                  \"LIST_VALUE\",\n                                  \"VECTOR_MATRIX_VALUE\",\n                                  \"VECTOR_FEATURES_VALUE\",\n                                  \"INT_VALUE\",\n                                  \"FLOAT_VALUE\",\n                                  \"STRING_VALUE\",\n                                  \"NULL_VALUE\",\n                                  \"ID\",\n                                  \"BOOL_VALUE\"};\n\n    return names[static_cast<int>(c)];\n  }\n\n  enum class NodeType {\n    NO_TYPE,\n    LOGIC_EXPR,\n    REL_EXPR,\n    ENCLOSED_ARITH_EXPR,\n    ARITH_EXPR,\n    FUNC,\n    CONST,\n    ID\n  };\n\n public:\n  Node();\n  Node(NodeOp op);\n  ~Node() override = default;\n\n  void set_op(NodeOp op) override;\n  std::string op_name() const;\n\n  NodeType type();\n\n  virtual std::string text() const override;\n  std::string to_string();\n\n private:\n  void set_type_by_op();\n\n private:\n  static const std::string node_op_names[];\n\n private:\n  NodeType type_{NodeType::NO_TYPE};\n};\n\nclass RangeNode : public Node {\n public:\n  using Ptr = std::shared_ptr<RangeNode>;\n\n  RangeNode();\n  RangeNode(bool m_min_equal, bool m_max_equal);\n  virtual ~RangeNode() = default;\n\n  void set_min_equal(bool value);\n  void set_max_equal(bool value);\n\n  bool min_equal();\n  bool max_equal();\n\n  std::string text() const override;\n  void set_child_op(NodeOp value);\n  NodeOp child_op();\n\n private:\n  bool min_equal_{false}, max_equal_{false};\n  NodeOp child_op_{NodeOp::T_NONE};\n};\n\nclass VectorMatrixNode : public Node {\n public:\n  using Ptr = std::shared_ptr<VectorMatrixNode>;\n\n  VectorMatrixNode(std::string matrix, std::string sparse_indices,\n                   std::string sparse_values, QueryParams::Ptr query_params)\n      : matrix_(std::move(matrix)),\n        sparse_indices_(std::move(sparse_indices)),\n        sparse_values_(std::move(sparse_values)),\n        query_params_(std::move(query_params)) {\n    set_op(NodeOp::T_VECTOR_MATRIX_VALUE);\n  }\n\n  const std::string &matrix() const {\n    return matrix_;\n  }\n\n  const std::string &sparse_indices() const {\n    return sparse_indices_;\n  }\n\n  const std::string &sparse_values() const {\n    return sparse_values_;\n  }\n\n  const QueryParams::Ptr &query_params() const {\n    return query_params_;\n  }\n\n  std::string text() const override {\n    // do not distinguish between matrix and vector\n    static std::string txt = \"[...]\";\n    return txt;\n  }\n\n private:\n  std::string matrix_;\n  std::string sparse_indices_;\n  std::string sparse_values_;\n  QueryParams::Ptr query_params_;\n};\n\nclass ConstantNode : public Node {\n public:\n  using Ptr = std::shared_ptr<ConstantNode>;\n\n  ConstantNode(const std::string &m_value);\n\n  void set_value(const std::string &m_value);\n  const std::string &value();\n\n  std::string text() const override;\n\n private:\n  std::string value_{\"\"};\n};\n\nclass IDNode : public Node {\n public:\n  using Ptr = std::shared_ptr<IDNode>;\n\n  IDNode(const std::string &m_value);\n\n  void set_value(const std::string &m_value);\n  const std::string &value();\n\n  std::string text() const override;\n\n private:\n  std::string value_{\"\"};\n};\n\nclass FuncNode : public Node {\n public:\n  using Ptr = std::shared_ptr<FuncNode>;\n\n  FuncNode();\n  virtual ~FuncNode() = default;\n\n  void set_func_name_node(Node::Ptr func_name_node);\n  const Node::Ptr &get_func_name_node();\n\n  void add_argument(Node::Ptr argument_node);\n  const std::vector<Node::Ptr> &arguments();\n\n  std::string text() const override;\n\n private:\n  Node::Ptr func_name_node_{nullptr};\n  std::vector<Node::Ptr> arguments_{};\n};\n\nclass InValueExprListNode : public Node {\n public:\n  using Ptr = std::shared_ptr<InValueExprListNode>;\n\n  InValueExprListNode() : Node(NodeOp::T_LIST_VALUE) {}\n\n  void add_in_value_expr(Node::Ptr in_value_expr) {\n    in_value_expr_list_.emplace_back(std::move(in_value_expr));\n  }\n\n  const std::vector<Node::Ptr> &in_value_expr_list() {\n    return in_value_expr_list_;\n  }\n\n  bool exclude() {\n    return exclude_;\n  }\n\n  void set_exclude(bool val) {\n    exclude_ = val;\n  }\n\n  std::string text() const override;\n\n private:\n  std::vector<Node::Ptr> in_value_expr_list_{};\n  bool exclude_{false};\n};\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/orderby_elem_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include \"node.h\"\n\nnamespace zvec::sqlengine {\n\nclass OrderByElemInfo {\n public:\n  using Ptr = std::shared_ptr<OrderByElemInfo>;\n\n  OrderByElemInfo() = default;\n\n  const std::string &field_name() {\n    return field_name_;\n  }\n\n  void set_field_name(const std::string &value) {\n    field_name_ = value;\n  }\n\n  void set_desc() {\n    desc_ = true;\n  }\n\n  bool is_desc() {\n    return desc_;\n  }\n\n  std::string to_string() {\n    std::string str = field_name_ + \" \" + (desc_ ? \"DESC\" : \"ASC\");\n    return str;\n  }\n\n private:\n  std::string field_name_{\"\"};\n  bool desc_{false};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/query_parser.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_parser.h\"\n\nnamespace zvec::sqlengine {\n\nSQLInfo::Ptr QueryParser::parse(const std::string &query) {\n  ZVecSQLParser se_sql_parser_;\n\n  SQLInfo::Ptr sql_info = se_sql_parser_.parse(query);\n  if (sql_info == NULL) {\n    err_msg_ = se_sql_parser_.err_msg();\n    return NULL;\n  }\n\n  return sql_info;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/query_parser.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"sql_info.h\"\n#include \"zvec_sql_parser.h\"\n\nnamespace zvec::sqlengine {\n\nclass QueryParser {\n public:\n  SQLInfo::Ptr parse(const std::string &query);\n\n  const std::string &err_msg() {\n    return err_msg_;\n  }\n\n private:\n  std::string err_msg_{\"\"};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/select_info.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"select_info.h\"\n\nnamespace zvec::sqlengine {\n\nSelectInfo::SelectInfo(const std::string &m_table_name)\n    : BaseInfo(m_table_name) {}\n\nSelectInfo::~SelectInfo() {}\n\nSelectInfo::SelectInfo(const SelectInfo &info) : BaseInfo(info) {\n  if (info.selected_elems_.empty() == false) {\n    for (auto iter = info.selected_elems_.begin();\n         iter != info.selected_elems_.end(); iter++) {\n      add_selected_elem(std::make_shared<SelectedElemInfo>(*(*iter)));\n    }\n  }\n\n  if (info.orderby_elems_.empty() == false) {\n    for (auto iter = info.orderby_elems_.begin();\n         iter != info.orderby_elems_.end(); iter++) {\n      add_order_by_elem(std::make_shared<OrderByElemInfo>(*(*iter)));\n    }\n  }\n\n  search_cond_ = copy_node(info.search_cond_);\n\n  limit_ = info.limit_;\n}\n\nSelectInfo &SelectInfo::operator=(const SelectInfo &info) {\n  if (info.selected_elems_.empty() == false) {\n    for (auto iter = info.selected_elems_.begin();\n         iter != info.selected_elems_.end(); iter++) {\n      add_selected_elem(std::make_shared<SelectedElemInfo>(*(*iter)));\n    }\n  }\n\n  if (info.orderby_elems_.empty() == false) {\n    for (auto iter = info.orderby_elems_.begin();\n         iter != info.orderby_elems_.end(); iter++) {\n      add_order_by_elem(std::make_shared<OrderByElemInfo>(*(*iter)));\n    }\n  }\n\n  search_cond_ = copy_node(info.search_cond_);\n\n  limit_ = info.limit_;\n\n  return *this;\n}\n\nNode::Ptr SelectInfo::copy_node(const Node::Ptr &node) {\n  Node::Ptr new_node = nullptr;\n\n  if (node == nullptr) {\n    return nullptr;\n  }\n\n  if (node->op() == NodeOp::T_INT_VALUE ||\n      node->op() == NodeOp::T_FLOAT_VALUE ||\n      node->op() == NodeOp::T_STRING_VALUE ||\n      node->op() == NodeOp::T_NULL_VALUE ||\n      node->op() == NodeOp::T_BOOL_VALUE) {\n    ConstantNode::Ptr constant_node =\n        std::dynamic_pointer_cast<ConstantNode>(node);\n    new_node = std::make_shared<ConstantNode>(constant_node->value());\n  } else if (node->op() == NodeOp::T_ID) {\n    IDNode::Ptr id_node = std::dynamic_pointer_cast<IDNode>(node);\n    new_node = std::make_shared<IDNode>(id_node->value());\n  } else if (node->op() == NodeOp::T_VECTOR_MATRIX_VALUE) {\n    VectorMatrixNode::Ptr vector_node =\n        std::dynamic_pointer_cast<VectorMatrixNode>(node);\n    new_node = std::make_shared<VectorMatrixNode>(\n        vector_node->matrix(), vector_node->sparse_indices(),\n        vector_node->sparse_values(), vector_node->query_params());\n  } else if (node->op() == NodeOp::T_FUNCTION_CALL) {\n    FuncNode::Ptr func_node = std::dynamic_pointer_cast<FuncNode>(node);\n    FuncNode::Ptr new_func_node = std::make_shared<FuncNode>();\n    new_func_node->set_func_name_node(\n        copy_node(func_node->get_func_name_node()));\n    for (auto argument : func_node->arguments()) {\n      new_func_node->add_argument(copy_node(argument));\n    }\n    new_node = std::move(new_func_node);\n  } else { /* others are normal Node */\n    new_node = std::make_shared<Node>();\n  }\n\n\n  // copy nodeOp\n  new_node->set_op(node->op());\n\n  // copy left & right\n  if (node->left() != nullptr) {\n    new_node->set_left(copy_node(node->left()));\n  }\n  if (node->right() != nullptr) {\n    new_node->set_right(copy_node(node->right()));\n  }\n\n  return new_node;\n}\n\nvoid SelectInfo::add_selected_elem(SelectedElemInfo::Ptr selected_elem_info) {\n  selected_elems_.push_back(std::move(selected_elem_info));\n}\n\nvoid SelectInfo::add_order_by_elem(OrderByElemInfo::Ptr orderby_elem_info) {\n  orderby_elems_.push_back(std::move(orderby_elem_info));\n}\n\nvoid SelectInfo::set_limit(int value) {\n  limit_ = value;\n}\n\nvoid SelectInfo::set_search_cond(Node::Ptr cond) {\n  search_cond_ = std::move(cond);\n}\n\nconst std::vector<SelectedElemInfo::Ptr> &SelectInfo::selected_elems() {\n  return selected_elems_;\n}\nconst std::vector<OrderByElemInfo::Ptr> &SelectInfo::orderby_elems() {\n  return orderby_elems_;\n}\n\nint SelectInfo::limit() {\n  return limit_;\n}\n\nconst Node::Ptr &SelectInfo::search_cond() const {\n  return search_cond_;\n}\n\nNode::Ptr &SelectInfo::mutable_search_cond() {\n  return search_cond_;\n}\n\nstd::string SelectInfo::to_string() {\n  std::string str;\n\n  str += \"table: \" + table_name();\n  str += \"\\n\";\n\n  if (selected_elems_.empty() == false) {\n    str += \"SelectedElems: \";\n    for (auto iter = selected_elems_.begin(); iter != selected_elems_.end();\n         iter++) {\n      if (iter != selected_elems_.begin()) {\n        str += \", \";\n      }\n      str += (*iter)->to_string();\n    }\n    str += \"\\n\";\n  }\n\n  if (include_vector_) {\n    str += \"Include Vector: true\";\n    str += \"\\n\";\n  }\n\n  if (search_cond_ != nullptr) {\n    str += \"Search Condition: \";\n    str += search_cond_->text();\n    str += \"\\n\";\n  }\n\n  if (orderby_elems_.empty() == false) {\n    str += \"Orderby Elems: \";\n    for (auto iter = orderby_elems_.begin(); iter != orderby_elems_.end();\n         iter++) {\n      if (iter != orderby_elems_.begin()) {\n        str += \", \";\n      }\n      str += (*iter)->to_string();\n    }\n    str += \"\\n\";\n  }\n\n  if (limit_ != -1) {\n    str += \"limit: \" + std::to_string(limit_) + \" \";\n    str += \"\\n\";\n  }\n\n  return str;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/select_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <map>\n#include <memory>\n#include <vector>\n#include \"db/sqlengine/common/group_by.h\"\n#include \"base_info.h\"\n#include \"node.h\"\n#include \"orderby_elem_info.h\"\n#include \"selected_elem_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass SelectInfo : public BaseInfo {\n public:\n  using Ptr = std::shared_ptr<SelectInfo>;\n\n  SelectInfo(const std::string &m_table_name);\n  ~SelectInfo();\n\n  SelectInfo(const SelectInfo &info);\n  SelectInfo &operator=(const SelectInfo &info);\n\n  const std::vector<SelectedElemInfo::Ptr> &selected_elems();\n  const std::vector<OrderByElemInfo::Ptr> &orderby_elems();\n  int limit();\n  const Node::Ptr &search_cond() const;\n  Node::Ptr &mutable_search_cond();\n\n  void add_selected_elem(SelectedElemInfo::Ptr selected_elem_info);\n  void add_order_by_elem(OrderByElemInfo::Ptr orderby_elem_info);\n  void set_limit(int value);\n  void set_search_cond(Node::Ptr cond);\n\n  void set_include_vector(bool value) {\n    include_vector_ = value;\n  }\n\n  bool include_vector() {\n    return include_vector_;\n  }\n\n  void set_include_doc_id(bool value) {\n    include_doc_id_ = value;\n  }\n\n  bool is_include_doc_id() {\n    return include_doc_id_;\n  }\n\n  void set_group_by(GroupBy::Ptr group_by) {\n    group_by_ = std::move(group_by);\n  }\n  const GroupBy::Ptr &group_by() const {\n    return group_by_;\n  }\n\n  std::string to_string();\n\n private:\n  Node::Ptr copy_node(const Node::Ptr &node);\n\n private:\n  std::vector<SelectedElemInfo::Ptr> selected_elems_{};\n  std::vector<OrderByElemInfo::Ptr> orderby_elems_{};\n  Node::Ptr search_cond_{nullptr};\n  GroupBy::Ptr group_by_{};\n  int limit_{-1};\n  bool include_vector_{false};\n  bool include_doc_id_{false};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/selected_elem_info.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"selected_elem_info.h\"\n\nnamespace zvec::sqlengine {\n\nstd::string SelectedElemInfo::to_string() const {\n  std::string str = \"\";\n  if (is_asterisk()) {\n    str += \"*\";\n  } else if (is_func_call()) {\n    if (is_func_param_asterisk()) {\n      str += func_name_ + \"(*)\";\n    } else {\n      str += func_name_ + \"(\" + func_param_ + \")\";\n    }\n  } else {\n    str = field_name_;\n    if (!alias().empty()) {\n      str += \" as \" + alias();\n    }\n  }\n\n  return str;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/selected_elem_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include \"node.h\"\n\nnamespace zvec::sqlengine {\n\nclass SelectedElemInfo {\n public:\n  using Ptr = std::shared_ptr<SelectedElemInfo>;\n\n  void set_asterisk(const bool value) {\n    asterisk_ = value;\n  }\n\n  bool is_asterisk() const {\n    return asterisk_;\n  }\n\n  void set_empty(const bool value) {\n    empty_ = value;\n  }\n\n  bool is_empty() const {\n    return empty_;\n  }\n\n  void set_field_name(const std::string &value) {\n    field_name_ = value;\n  }\n\n  const std::string &field_name() const {\n    return field_name_;\n  }\n\n  void set_alias(const std::string &value) {\n    alias_ = value;\n  }\n\n  const std::string &alias() const {\n    return alias_;\n  }\n\n  const std::string &func_name() const {\n    return func_name_;\n  }\n\n  void set_func_name(const std::string &value) {\n    func_name_ = value;\n    if (!value.empty()) {\n      func_call_ = true;\n    }\n  }\n\n  const std::string &func_param() const {\n    return func_param_;\n  }\n\n  void set_func_param(const std::string &value) {\n    func_param_ = value;\n  }\n\n  bool is_func_call() const {\n    return func_call_;\n  }\n\n  void set_func_param_asterisk(bool value) {\n    func_param_asterisk_ = value;\n  }\n  bool is_func_param_asterisk() const {\n    return func_param_asterisk_;\n  }\n\n  std::string to_string() const;\n\n private:\n  bool asterisk_{false};\n  bool empty_{false};\n\n  std::string field_name_{\"\"};\n  std::string alias_{\"\"};\n\n  std::string func_name_{\"\"};\n  bool func_call_{false};\n  std::string func_param_{\"\"};\n  bool func_param_asterisk_{false};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/sql_info.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"sql_info.h\"\n#include <memory>\n#include \"db/sqlengine/common/util.h\"\n#include \"select_info.h\"\n\n\nnamespace zvec::sqlengine {\n\nSQLInfo::SQLInfo(SQLType m_type, BaseInfo::Ptr m_base_info) {\n  type_ = m_type;\n  base_info_ = m_base_info;\n}\n\nSQLInfo::~SQLInfo() {}\n\nSQLInfo::SQLInfo(const SQLInfo &info) {\n  type_ = info.type_;\n  if (type_ == SQLType::SELECT) {\n    SelectInfo::Ptr select_info = std::make_shared<SelectInfo>(\n        *(std::dynamic_pointer_cast<SelectInfo>(info.base_info_)));\n    base_info_ = select_info;\n  } else {\n    base_info_ = nullptr;\n  }\n}\n\nSQLInfo &SQLInfo::operator=(const SQLInfo &info) {\n  type_ = info.type_;\n  if (type_ == SQLType::SELECT) {\n    SelectInfo::Ptr select_info = std::make_shared<SelectInfo>(\n        *(std::dynamic_pointer_cast<SelectInfo>(info.base_info_)));\n    base_info_ = select_info;\n  } else {\n    base_info_ = nullptr;\n  }\n\n  return *this;\n}\n\nvoid SQLInfo::set_base_info(BaseInfo::Ptr value) {\n  base_info_ = std::move(value);\n}\n\nconst BaseInfo::Ptr &SQLInfo::base_info() const {\n  return base_info_;\n}\n\nvoid SQLInfo::set_type(SQLType value) {\n  type_ = value;\n}\n\nSQLInfo::SQLType SQLInfo::type() const {\n  return type_;\n}\n\nstd::string SQLInfo::type_name() const {\n  return type_to_str(type_);\n}\n\nstd::string SQLInfo::to_string() {\n  std::string str = \"SQL Info: {\\n\";\n  str += \"Type: \" + type_name();\n  str += \"\\n\";\n  str += \"Info:\";\n  str += \"\\n\";\n  str += base_info_->to_string();\n  str += \"}\";\n  return str;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/sql_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include \"base_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass SQLInfo {\n public:\n  using Ptr = std::shared_ptr<SQLInfo>;\n\n  enum class SQLType {\n    NONE,\n    INSERT,\n    UPSERT,\n    UPDATE,\n    DELETE,\n    CREATE,\n    DROP,\n    SELECT,\n    SHOW_TABLES\n  };\n  static inline std::string type_to_str(SQLType c) {\n    static std::string names[] = {\"NONE\",   \"INSERT\", \"UPSERT\",\n                                  \"UPDATE\", \"DELETE\", \"CREATE\",\n                                  \"DROP\",   \"SELECT\", \"SHOW_TABLES\"};\n    return names[static_cast<int>(c)];\n  }\n\n public:\n  SQLInfo(SQLType type, BaseInfo::Ptr m_base_info);\n  ~SQLInfo();\n\n  SQLInfo(const SQLInfo &info);\n  SQLInfo &operator=(const SQLInfo &info);\n\n  void set_base_info(BaseInfo::Ptr value);\n  void set_type(SQLType value);\n\n  SQLType type() const;\n  std::string type_name() const;\n  const BaseInfo::Ptr &base_info() const;\n\n  std::string to_string();\n\n private:\n  SQLType type_{SQLType::NONE};\n  BaseInfo::Ptr base_info_;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/sql_info_helper.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"sql_info_helper.h\"\n#include <stdint.h>\n#include <memory>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/db/doc.h>\n#include \"db/sqlengine/common/group_by.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"db/sqlengine/parser/node.h\"\n#include \"select_info.h\"\n\nnamespace zvec::sqlengine {\n\nusing namespace zvec;\n\nNode::Ptr handle_vector(const VectorQuery &request, std::string * /*err_msg*/) {\n  Node::Ptr rel_exp = std::make_shared<Node>(NodeOp::T_EQ);\n  rel_exp->set_left(std::make_shared<IDNode>(request.field_name_));\n  rel_exp->set_right(std::make_shared<VectorMatrixNode>(\n      request.query_vector_, request.query_sparse_indices_,\n      request.query_sparse_values_, request.query_params_));\n  return rel_exp;\n}\n\nvoid handle_query_field(const VectorQuery *query, SelectInfo *selected_info) {\n  if (!query->output_fields_.has_value()) {\n    SelectedElemInfo::Ptr selected_elem_info =\n        std::make_shared<SelectedElemInfo>();\n    selected_elem_info->set_asterisk(true);\n    selected_info->add_selected_elem(std::move(selected_elem_info));\n  } else if (query->output_fields_->empty()) {\n    // select no field if output_fields is specified with empty vector\n    SelectedElemInfo::Ptr selected_elem_info =\n        std::make_shared<SelectedElemInfo>();\n    selected_elem_info->set_empty(true);\n    selected_info->add_selected_elem(std::move(selected_elem_info));\n  } else {\n    for (const auto &field : *query->output_fields_) {\n      SelectedElemInfo::Ptr selected_elem_info =\n          std::make_shared<SelectedElemInfo>();\n      if (field == \"*\") {\n        selected_elem_info->set_asterisk(true);\n      } else {\n        selected_elem_info->set_field_name(field);\n      }\n      selected_info->add_selected_elem(std::move(selected_elem_info));\n    }\n  }\n}\n\nbool SQLInfoHelper::MessageToSQLInfo(const VectorQuery *query,\n                                     Node::Ptr filter_node,\n                                     std::shared_ptr<GroupBy> group_by,\n                                     sqlengine::SQLInfo::Ptr *sql_info,\n                                     std::string *err_msg) {\n  Node::Ptr index_params_node_ptr = nullptr;\n  if (!query->query_vector_.empty() || !query->query_sparse_indices_.empty()) {\n    index_params_node_ptr = handle_vector(*query, err_msg);\n    if (index_params_node_ptr == nullptr) {\n      return false;\n    }\n  }\n\n  Node::Ptr cond_expr = nullptr;\n  if (index_params_node_ptr && filter_node) {\n    cond_expr = std::make_shared<Node>(NodeOp::T_AND);\n    cond_expr->set_left(index_params_node_ptr);\n    cond_expr->set_right(filter_node);\n  } else if (index_params_node_ptr) {\n    cond_expr = index_params_node_ptr;\n  } else if (filter_node) {\n    cond_expr = filter_node;\n  }\n\n  SelectInfo::Ptr select_info = std::make_shared<SelectInfo>(\"\");\n  handle_query_field(query, select_info.get());\n  select_info->set_search_cond(cond_expr);\n\n  uint32_t topk = query->topk_;\n  select_info->set_limit(topk);\n  select_info->set_include_vector(query->include_vector_);\n  select_info->set_include_doc_id(query->include_doc_id_);\n\n  select_info->set_group_by(std::move(group_by));\n  //\n  // for (int i = 0; i < query->order_by_fields_size(); ++i) {\n  //   auto orderby_elem_info = std::make_shared<OrderByElemInfo>();\n  //   orderby_elem_info->set_field_name(query->order_by_fields(i).field());\n  //   if (query->order_by_fields(i).desc()) {\n  //     orderby_elem_info->set_desc();\n  //   }\n  //   select_info->add_order_by_elem(std::move(orderby_elem_info));\n  // }\n\n  *sql_info = std::make_shared<SQLInfo>(SQLInfo::SQLType::SELECT, select_info);\n  return true;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/sql_info_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/db/doc.h>\n#include \"db/sqlengine/common/group_by.h\"\n#include \"db/sqlengine/parser/node.h\"\n#include \"db/sqlengine/parser/sql_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass SQLInfoHelper {\n public:\n  //! Perform QueryRequest to sql info conversion:\n  static bool MessageToSQLInfo(const VectorQuery *query, Node::Ptr filter_node,\n                               std::shared_ptr<GroupBy> group_by,\n                               sqlengine::SQLInfo::Ptr *sql_info,\n                               std::string *err_msg);\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/zvec_cached_sql_parser.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec_cached_sql_parser.h\"\n#include <exception>\n#include <typeinfo>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include \"atn/ParserATNSimulator.h\"\n#include \"db/sqlengine/antlr/gen/SQLLexer.h\"\n#include \"db/sqlengine/antlr/gen/SQLParser.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"case_changing_charstream.h\"\n#include \"error_verbose_listener.h\"\n#include \"node.h\"\n#include \"select_info.h\"\n#include \"selected_elem_info.h\"\n\nusing namespace antlr4;\nusing namespace tree;\nusing namespace atn;\n\nnamespace zvec::sqlengine {\n\nstd::unordered_map<std::string, SQLInfo::Ptr>\n    ZVecCachedSQLParser::sql_info_map_{};\nstd::unordered_map<std::string, Node::Ptr> ZVecCachedSQLParser::filter_map_;\nuint32_t ZVecCachedSQLParser::Hit{0};\nuint32_t ZVecCachedSQLParser::Miss{0};\n\nZVecCachedSQLParser::ZVecCachedSQLParser(uint32_t cache_count)\n    : cache_count_(cache_count) {}\n\nZVecCachedSQLParser::~ZVecCachedSQLParser() {}\n\nSQLInfo::Ptr ZVecCachedSQLParser::parse(const std::string &query,\n                                        bool need_formatted_tree) {\n  std::string query_cache_key{\"\"};\n  SQLInfo::Ptr cached_sql_info = get_from_cache(query, &query_cache_key);\n  if (cached_sql_info != nullptr) {\n    return cached_sql_info;\n  }\n\n  SQLInfo::Ptr new_sql_info = real_parser_.parse(query, need_formatted_tree);\n  if (new_sql_info == nullptr) {\n    // no need to cache parse failed sql. just return.\n    err_msg_ = real_parser_.err_msg();\n    return nullptr;\n  }\n\n  put_into_cache(query_cache_key, new_sql_info);\n\n  return new_sql_info;\n}\n\nvoid ZVecCachedSQLParser::put_into_cache(const std::string &query_cache_key,\n                                         const SQLInfo::Ptr &new_sql_info) {\n  {\n    std::unique_lock guard(shared_mutex_);\n    if (sql_info_map_.size() >= cache_count_) {\n      // if full, clear to refresh new sql\n      sql_info_map_.clear();\n      Hit = Miss = 0;\n    }\n    sql_info_map_.emplace(query_cache_key, new_sql_info);\n  }\n\n  LOG_DEBUG(\"cache emplaced. [%s] [%s] \", query_cache_key.c_str(),\n            new_sql_info->to_string().c_str());\n\n  return;\n}\n\nSQLInfo::Ptr ZVecCachedSQLParser::get_from_cache(const std::string &query,\n                                                 std::string *query_cache_key) {\n  // find [ and ], must only one occurrence.\n  std::string::size_type left_pos, right_pos;\n  left_pos = query.find(\"[\");\n  if (left_pos == query.npos) {\n    return nullptr;\n  }\n  // find from left_pos+1\n  right_pos = query.rfind(\"]\");\n  if (right_pos == query.npos) {\n    return nullptr;\n  }\n\n  // ok, let's find it.\n  *query_cache_key = query.substr(0, left_pos);\n  query_cache_key->append(query.begin() + right_pos + 1, query.end());\n  std::string vector_text = query.substr(left_pos, right_pos - left_pos + 1);\n\n  SQLInfo::Ptr cached_sql_info = nullptr;\n  SQLInfo::Ptr copied_sql_info = nullptr;\n  {  // lock only in this block. after sql_info is copied, just unlock.\n    std::shared_lock guard(shared_mutex_);\n    auto iter = sql_info_map_.find(*query_cache_key);\n    if (iter == sql_info_map_.end()) {\n      ++Miss;\n      LOG_DEBUG(\"cache miss. key: [%s]\", query_cache_key->c_str());\n      return nullptr;\n    }\n\n    cached_sql_info = iter->second;\n    // copy cached_sql_info\n    copied_sql_info = std::make_shared<SQLInfo>(*cached_sql_info);\n  }\n\n  // parse vector part\n  Node::Ptr vector_node = parse_vector_text(&vector_text);\n  if (vector_node == nullptr) {\n    LOG_DEBUG(\"wrong vector format: [%s]\", vector_text.c_str());\n    return nullptr;\n  }\n  // replace vector in copied_sql_info\n  if (replace_vector_node(copied_sql_info, vector_node) != 0) {\n    LOG_WARN(\"replace_vector_node failed. [%s][%s]\", query.c_str(),\n             vector_text.c_str());\n    return nullptr;\n  }\n\n  ++Hit;\n  LOG_DEBUG(\"cache hit. key: [%s] sql_info: [%s]\", query_cache_key->c_str(),\n            copied_sql_info->to_string().c_str());\n  return copied_sql_info;\n}\n\nint ZVecCachedSQLParser::replace_vector_node(SQLInfo::Ptr cached_sql_info,\n                                             Node::Ptr vector_node) {\n  SelectInfo::Ptr cached_select_info =\n      std::dynamic_pointer_cast<SelectInfo>(cached_sql_info->base_info());\n  if (cached_select_info == nullptr) {\n    LOG_WARN(\"wrong select_info in cache. [%s]\",\n             cached_sql_info->to_string().c_str());\n    return -1;\n  }\n\n  Node::Ptr search_cond = cached_select_info->mutable_search_cond();\n  if (search_cond == nullptr) {\n    LOG_WARN(\"wrong search_cond in cache. [%s]\",\n             cached_sql_info->to_string().c_str());\n    return -1;\n  }\n\n  replace_flag_ = false;\n  if (traverse_to_replace(search_cond, vector_node) != 0 ||\n      replace_flag_ == false) {\n    LOG_WARN(\"replace search_cond in cache failed. [%s]\",\n             cached_sql_info->to_string().c_str());\n    return -1;\n  }\n\n  return 0;\n}\n\nint ZVecCachedSQLParser::traverse_to_replace(Node::Ptr ptr,\n                                             Node::Ptr vector_node) {\n  if (ptr->op() == NodeOp::T_VECTOR_MATRIX_VALUE) {\n    Node *parent = dynamic_cast<Node *>(ptr->parent());\n    if (parent == nullptr) {\n      LOG_WARN(\"wrong parent node in cache. [%s]\", ptr->to_string().c_str());\n      return -1;\n    }\n    if (parent->left() == ptr) {\n      parent->set_left(vector_node);\n      replace_flag_ = true;\n    } else if (parent->right() == ptr) {\n      parent->set_right(vector_node);\n      replace_flag_ = true;\n    } else {\n      LOG_WARN(\"wrong node in cache. [%s]\", ptr->to_string().c_str());\n      return -1;\n    }\n    return 0;\n  }\n\n  if (ptr->left() != nullptr) {\n    if (traverse_to_replace(ptr->left(), vector_node) < 0) {\n      return -1;\n    }\n    if (replace_flag_) {\n      return 0;\n    }\n  }\n  if (ptr->right() != nullptr) {\n    if (traverse_to_replace(ptr->right(), vector_node) != 0) {\n      return -1;\n    }\n    if (replace_flag_) {\n      return 0;\n    }\n  }\n\n  return 0;\n}\n\nNode::Ptr ZVecCachedSQLParser::parse_filter(const std::string &filter,\n                                            bool need_formatted_tree) {\n  {\n    std::shared_lock guard(shared_mutex_);\n    auto iter = filter_map_.find(filter);\n    if (iter != filter_map_.end()) {\n      ++Hit;\n      return iter->second;\n    }\n    ++Miss;\n  }\n  auto node = real_parser_.parse_filter(filter, need_formatted_tree);\n  err_msg_ = real_parser_.err_msg();\n  formatted_tree_ = real_parser_.formatted_tree();\n  if (node != nullptr) {\n    std::unique_lock guard(shared_mutex_);\n    if (filter_map_.size() >= cache_count_) {\n      // clear cache if full\n      filter_map_.clear();\n      Hit = Miss = 0;\n    }\n    filter_map_.emplace(filter, node);\n  }\n  return node;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/zvec_cached_sql_parser.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <shared_mutex>\n#include <unordered_map>\n#include \"select_info.h\"\n#include \"zvec_sql_parser.h\"\n\nnamespace zvec::sqlengine {\n\nclass ZVecCachedSQLParser : public ZVecParser {\n public:\n  ZVecCachedSQLParser(uint32_t cache_count);\n  ~ZVecCachedSQLParser();\n\n  SQLInfo::Ptr parse(const std::string &query,\n                     bool need_formatted_tree = false) override;\n\n  Node::Ptr parse_filter(const std::string &filter,\n                         bool need_formatted_tree = false) override;\n\n private:\n  void put_into_cache(const std::string &query_cache_key,\n                      const SQLInfo::Ptr &sql_info);\n  SQLInfo::Ptr get_from_cache(const std::string &query,\n                              std::string *query_cache_key);\n\n  int replace_vector_node(SQLInfo::Ptr cached_sql_info, Node::Ptr vector_node);\n  int traverse_to_replace(Node::Ptr ptr, Node::Ptr vector_node);\n\n private:\n  static std::unordered_map<std::string, SQLInfo::Ptr> sql_info_map_;\n  static std::unordered_map<std::string, Node::Ptr> filter_map_;\n  static uint32_t Hit;\n  static uint32_t Miss;\n  inline static std::shared_mutex shared_mutex_;\n\n private:\n  bool replace_flag_{false};\n  ZVecSQLParser real_parser_;\n  uint32_t cache_count_{0};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/zvec_parser.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec_parser.h\"\n#include <float.h>\n#include <stdint.h>\n#include <exception>\n#include <fstream>\n#include <iostream>\n#include <limits>\n#include <typeinfo>\n#include <zvec/ailego/encoding/json.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include \"db/sqlengine/common/util.h\"\n#include \"tree/ParseTree.h\"\n#include \"zvec_cached_sql_parser.h\"\n#include \"zvec_sql_parser.h\"\n\nusing namespace antlr4;\nusing namespace tree;\n\nnamespace zvec::sqlengine {\n\nconst std::string &ZVecParser::err_msg() {\n  return err_msg_;\n}\n\nstd::string ZVecParser::trim(std::string &value) {\n  value = Util::trim_one_both_side(value, '\\'');\n  value = Util::trim_one_both_side(value, '\\\"');\n  // Util::string_replace(value, \"\\\\\", \"\");\n  return value;\n}\n\nconst std::string &ZVecParser::formatted_tree() {\n  return formatted_tree_;\n}\n\nstd::string ZVecParser::to_formatted_string_tree(void *tree, void *parser) {\n  if (tree == NULL || parser == NULL) {\n    return \"\";\n  }\n\n  ParseTree *parse_tree = reinterpret_cast<ParseTree *>(tree);\n  ZVecParser *se_parser = reinterpret_cast<ZVecParser *>(parser);\n\n  std::string tree_text = parse_tree->toStringTree(se_parser);\n\n  int pos = 0, pos1 = 0, pos2 = 0, start = 0;\n  int i = 0, num = 0;\n  const std::string DELIMITER = \"  \";\n  const std::string LINE = \"\\n\";\n  int lastPos1 = 0;\n\n  std::string out;\n\n  while (true) {\n    std::string formatted = \"\";\n\n    pos1 = (int)tree_text.find_first_of('(', start);\n    pos2 = (int)tree_text.find_first_of(')', start);\n\n    if (pos1 == 0) {\n      start = pos + 1;\n      continue;\n    }\n\n    if (pos1 < 0 && pos2 < 0) {\n      break;\n    }\n\n    if (pos1 >= 0 && pos1 < pos2) {\n      if (lastPos1 == 1) {\n        formatted += \"(\";\n      }\n      pos = pos1;\n      formatted += tree_text.substr(start, (size_t)pos1 - start);\n      num++;\n    } else {\n      if (lastPos1 == 1) {\n        formatted += \"(\";\n      }\n      pos = pos2;\n      formatted += tree_text.substr(start, (size_t)pos2 - start) + \")\";\n      num--;\n    }\n\n    formatted += LINE;\n    for (i = 0; i < num; i++) {\n      formatted += DELIMITER;\n    }\n\n    start = pos + 1;\n\n    if (pos == pos1) {\n      lastPos1 = 1;\n    } else {\n      lastPos1 = 0;\n    }\n\n    out += formatted;\n  }\n\n  return out;\n}\n\nvoid ZVecParser::save_to_file(const std::string &file_name,\n                              const std::string &formatted) {\n  std::ofstream outfile(file_name);\n  outfile << formatted;\n  outfile << std::endl;\n  outfile.close();\n}\n\nZVecParser::Ptr ZVecParser::create() {\n  // TODO: support config\n  // auto &config = zvec::Config::Instance();\n  // int32_t cache_count = config.get_sql_info_cache_count();\n  return create(100);\n}\n\nZVecParser::Ptr ZVecParser::create(int cache_count) {\n  // if not config, or if config between 0 and 100, upround to 100\n  if (cache_count >= 0 && cache_count < DEFAULT_CACHE_COUNT) {\n    cache_count = DEFAULT_CACHE_COUNT;\n  }\n\n  if (cache_count > 0) {\n    LOG_DEBUG(\"ZVecCachedSQLParser enabled. effective cache_count %d\",\n              cache_count);\n    return std::make_shared<ZVecCachedSQLParser>(cache_count);\n  } else {\n    LOG_DEBUG(\"ZVecCachedSQLParser disabled.\");\n    return std::make_shared<ZVecSQLParser>();\n  }\n}\n\nNode::Ptr ZVecParser::parse_vector_text(std::string *vector_text) {\n  return std::make_shared<VectorMatrixNode>(std::move(*vector_text), \"\", \"\",\n                                            nullptr);\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/zvec_parser.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"node.h\"\n#include \"sql_info.h\"\n\nnamespace zvec::sqlengine {\n\nusing VoidPtr = void *;\n\nclass ZVecParser {\n public:\n  using Ptr = std::shared_ptr<ZVecParser>;\n\n  ZVecParser() = default;\n  virtual ~ZVecParser() {};\n\n  virtual SQLInfo::Ptr parse(const std::string &query,\n                             bool formatted_tree = false) = 0;\n\n  virtual Node::Ptr parse_filter(const std::string &filter,\n                                 bool need_formatted_tree = false) = 0;\n\n\n protected:\n  std::string trim(std::string &value);\n  virtual std::string to_formatted_string_tree(void *tree, void *parser);\n  virtual void save_to_file(const std::string &file_name,\n                            const std::string &formatted);\n\n public:\n  virtual const std::string &err_msg();\n  virtual const std::string &formatted_tree();\n\n  Node::Ptr parse_vector_text(std::string *vector_text);\n\n public:\n  static ZVecParser::Ptr create();\n  static ZVecParser::Ptr create(int cache_count);\n  const static int32_t DEFAULT_CACHE_COUNT{100};\n\n protected:\n  std::string err_msg_{\"\"};\n  std::string formatted_tree_{\"\"};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/zvec_sql_parser.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec_sql_parser.h\"\n#include <exception>\n#include <memory>\n#include <zvec/ailego/logger/logger.h>\n#include \"atn/ParserATNSimulator.h\"\n#include \"db/sqlengine/antlr/gen/SQLLexer.h\"\n#include \"db/sqlengine/antlr/gen/SQLParser.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"case_changing_charstream.h\"\n#include \"error_verbose_listener.h\"\n#include \"node.h\"\n#include \"select_info.h\"\n#include \"selected_elem_info.h\"\n\nusing namespace antlr4;\nusing namespace tree;\nusing namespace atn;\n\nnamespace zvec::sqlengine {\n\nSQLInfo::Ptr ZVecSQLParser::parse(const std::string &query,\n                                  bool need_formatted_tree) {\n  try {\n    ANTLRInputStream input(query);\n    CaseChangingCharStream in(&input, true);\n\n    SQLLexer lexer(&in);\n\n    CommonTokenStream tokens(&lexer);\n\n    SQLParser parser(&tokens);\n\n    // remove and add new error listeners\n    ErrorVerboseListener lexer_error_listener;\n    lexer.removeErrorListeners();  // remove all error listeners\n    lexer.addErrorListener((ANTLRErrorListener *)&lexer_error_listener);  // add\n    ErrorVerboseListener parser_error_listener;\n    parser.removeErrorListeners();  // remove all error listeners\n    parser.addErrorListener(\n        (ANTLRErrorListener *)&parser_error_listener);  // add\n\n    // int64_t curtime = Util::cur_micro_second_time();\n    ParseTree *tree = parser.compilation_unit();\n\n    if (lexer.getNumberOfSyntaxErrors() > 0 ||\n        parser.getNumberOfSyntaxErrors() > 0) {\n      LOG_INFO(\"SLL failed. using LL\");\n      tokens.reset();\n      parser.reset();\n      parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(\n          PredictionMode::LL);\n      tree = parser.compilation_unit();\n    }\n\n    // int64_t duration = Util::cur_micro_second_time() - curtime;\n    // printf(\"parsing time %ld\\n\", duration);\n    // LOG_DEBUG(\"antlr parsing time: [%ld]\", duration);\n\n    if (lexer.getNumberOfSyntaxErrors() > 0) {\n      err_msg_ = \"lexer error [\" + lexer_error_listener.err_msg() + \"]\";\n      return nullptr;\n    }\n    if (parser.getNumberOfSyntaxErrors() > 0) {\n      err_msg_ = \"syntax error [\" + parser_error_listener.err_msg() + \"]\";\n      return nullptr;\n    }\n\n    if (need_formatted_tree) {\n      formatted_tree_ = to_formatted_string_tree(tree, &parser);\n    }\n\n    SQLInfo::Ptr sqlInfo = sql_info(tree);\n    return sqlInfo;\n  } catch (std::exception &e) {\n    err_msg_ = \"parse error [\" + std::string(e.what()) + \"]\";\n    return nullptr;\n  }\n}\n\nSQLInfo::Ptr ZVecSQLParser::sql_info(VoidPtr tree) {\n  ParseTree *parseTree = reinterpret_cast<ParseTree *>(tree);\n  SQLParser::Compilation_unitContext *compilation_unit_node =\n      (SQLParser::Compilation_unitContext *)parseTree;\n  SQLParser::Unit_statementContext *unit_statement_node =\n      (SQLParser::Unit_statementContext *)compilation_unit_node->children[0];\n\n  SQLInfo::SQLType sqlType = sql_type(unit_statement_node);\n  if (sqlType == SQLInfo::SQLType::NONE) {\n    return nullptr;\n  }\n\n  BaseInfo::Ptr baseInfo = nullptr;\n  switch (sqlType) {\n    case SQLInfo::SQLType::SELECT:\n      baseInfo =\n          select_info(unit_statement_node->dql_statement()->select_statement());\n      break;\n    default:\n      break;\n  }\n\n  if (baseInfo == nullptr) {\n    return nullptr;\n  }\n\n  if (baseInfo->validate() == false) {\n    err_msg_ = baseInfo->err_msg();\n    return nullptr;\n  }\n\n  SQLInfo::Ptr sqlInfo = std::make_shared<SQLInfo>(sqlType, baseInfo);\n  return sqlInfo;\n}\n\nSQLInfo::SQLType ZVecSQLParser::sql_type(VoidPtr node) {\n  SQLParser::Unit_statementContext *unit_statement_node =\n      reinterpret_cast<SQLParser::Unit_statementContext *>(node);\n\n  SQLParser::Dql_statementContext *dql_statement_node =\n      (SQLParser::Dql_statementContext *)unit_statement_node->dql_statement();\n\n  if (dql_statement_node != nullptr) {\n    if (dql_statement_node->select_statement() != nullptr) {\n      return SQLInfo::SQLType::SELECT;\n    }\n  }\n\n  return SQLInfo::SQLType::NONE;\n}\n\nSelectInfo::Ptr ZVecSQLParser::select_info(VoidPtr node) {\n  SQLParser::Select_statementContext *select_statement_node =\n      reinterpret_cast<SQLParser::Select_statementContext *>(node);\n\n  SQLParser::Selected_elementsContext *selected_elements_node =\n      select_statement_node->selected_elements();\n  SQLParser::From_clauseContext *from_clause_node =\n      select_statement_node->from_clause();\n  SQLParser::Where_clauseContext *where_node =\n      select_statement_node->where_clause();\n  SQLParser::Order_by_clauseContext *order_by_node =\n      select_statement_node->order_by_clause();\n  SQLParser::Limit_clauseContext *limit_node =\n      select_statement_node->limit_clause();\n\n  std::string table_name = \"\";\n\n  if (from_clause_node->tableview_name() != nullptr) {\n    table_name = from_clause_node->tableview_name()->getText();\n  }\n  SelectInfo::Ptr selectInfo = std::make_shared<SelectInfo>(table_name);\n\n  for (auto selected_element_node :\n       selected_elements_node->selected_element()) {\n    SelectedElemInfo::Ptr selected_elem_info =\n        std::make_shared<SelectedElemInfo>();\n\n    if (selected_element_node->field_name() != nullptr) {\n      selected_elem_info->set_field_name(\n          selected_element_node->field_name()->getText());\n      if (selected_element_node->field_alias() != nullptr) {\n        selected_elem_info->set_alias(\n            selected_element_node->field_alias()->getText());\n      }\n    } else if (selected_element_node->ASTERISK()) {\n      selected_elem_info->set_asterisk(true);\n    }\n\n    selectInfo->add_selected_elem(std::move(selected_elem_info));\n  }\n\n  if (where_node) {\n    Node::Ptr cond = handle_logic_expr_node(where_node->logic_expr());\n    if (cond == nullptr) {\n      return nullptr;\n    }\n    selectInfo->set_search_cond(std::move(cond));\n  }\n\n  if (order_by_node != nullptr) {\n    for (auto order_by_element : order_by_node->order_by_element()) {\n      auto orderby_elem_info = std::make_shared<OrderByElemInfo>();\n      orderby_elem_info->set_field_name(\n          order_by_element->field_name()->getText());\n      if (order_by_element->DESC()) {\n        orderby_elem_info->set_desc();\n      }\n      selectInfo->add_order_by_elem(std::move(orderby_elem_info));\n    }\n  }\n\n  if (limit_node != nullptr) {\n    selectInfo->set_limit(std::stoi(limit_node->int_value()->getText()));\n  }\n\n  return selectInfo;\n}\n\nNode::Ptr ZVecSQLParser::handle_logic_expr_node(VoidPtr node) {\n  SQLParser::Logic_exprContext *logicExprNode =\n      reinterpret_cast<SQLParser::Logic_exprContext *>(node);\n  const std::vector<SQLParser::Logic_exprContext *> &logicExprChildNodes =\n      logicExprNode->logic_expr();\n\n  if (logicExprNode->OR() != nullptr) {\n    Node::Ptr orExpr = std::make_shared<Node>(NodeOp::T_OR);\n    orExpr->set_left(handle_logic_expr_node(logicExprChildNodes[0]));\n    orExpr->set_right(handle_logic_expr_node(logicExprChildNodes[1]));\n    return orExpr;\n  } else if (logicExprNode->AND() != nullptr) {\n    Node::Ptr andExpr = std::make_shared<Node>(NodeOp::T_AND);\n    andExpr->set_left(handle_logic_expr_node(logicExprChildNodes[0]));\n    andExpr->set_right(handle_logic_expr_node(logicExprChildNodes[1]));\n    return andExpr;\n  } else if (logicExprNode->enclosed_expr() != nullptr) {\n    // enclosed_expr is represented by sub-tree structure\n    return handle_logic_expr_node(logicExprNode->enclosed_expr()->logic_expr());\n  } else if (logicExprNode->relation_expr() != nullptr) {\n    return handle_rel_expr_node(logicExprNode->relation_expr());\n  }\n\n  return nullptr;\n}\n\nNode::Ptr ZVecSQLParser::handle_rel_expr_left_node(VoidPtr node) {\n  SQLParser::Relation_exprContext *relationExprNode =\n      reinterpret_cast<SQLParser::Relation_exprContext *>(node);\n  // either identifier or function call\n  if (relationExprNode->identifier() != nullptr) {\n    return handle_id_node(relationExprNode->identifier());\n  } else if (relationExprNode->function_call() != nullptr) {\n    return handle_function_call_node(relationExprNode->function_call());\n  }\n\n  err_msg_ = \"Parse failed. Unexpected rel expr left node.\" +\n             relationExprNode->getText();\n  return nullptr;\n}\n\nNode::Ptr ZVecSQLParser::handle_rel_expr_node(VoidPtr node) {\n  SQLParser::Relation_exprContext *relationExprNode =\n      reinterpret_cast<SQLParser::Relation_exprContext *>(node);\n  if (relationExprNode->rel_oper() != nullptr) {\n    SQLParser::Rel_operContext *op = relationExprNode->rel_oper();\n    NodeOp nodeOp = NodeOp::T_NONE;\n    if (op->E_OP()) {\n      nodeOp = NodeOp::T_EQ;\n    } else if (op->ne_op()) {\n      nodeOp = NodeOp::T_NE;\n    } else if (op->L_OP()) {\n      nodeOp = NodeOp::T_LT;\n    } else if (op->G_OP()) {\n      nodeOp = NodeOp::T_GT;\n    } else if (op->le_op()) {\n      nodeOp = NodeOp::T_LE;\n    } else if (op->ge_op()) {\n      nodeOp = NodeOp::T_GE;\n    }\n    Node::Ptr relationalExpr = std::make_shared<Node>(nodeOp);\n    relationalExpr->set_left(handle_rel_expr_left_node(relationExprNode));\n    Node::Ptr value_node =\n        handle_value_expr_node(relationExprNode->value_expr());\n    if (value_node == nullptr) {\n      return nullptr;\n    }\n    relationalExpr->set_right(std::move(value_node));\n    return relationalExpr;\n  } else if (relationExprNode->LIKE() != nullptr) {\n    NodeOp nodeOp = NodeOp::T_LIKE;\n    Node::Ptr relationalExpr = std::make_shared<Node>(nodeOp);\n    relationalExpr->set_left(handle_rel_expr_left_node(relationExprNode));\n    Node::Ptr value_node =\n        handle_value_expr_node(relationExprNode->value_expr());\n    if (value_node == nullptr) {\n      return nullptr;\n    }\n    relationalExpr->set_right(std::move(value_node));\n    return relationalExpr;\n  } else if (relationExprNode->IN() != nullptr ||\n             relationExprNode->CONTAIN_ALL() != nullptr ||\n             relationExprNode->CONTAIN_ANY() != nullptr) {\n    NodeOp nodeOp = NodeOp::T_NONE;\n\n    if (relationExprNode->CONTAIN_ALL() != nullptr) {\n      nodeOp = NodeOp::T_CONTAIN_ALL;\n    } else if (relationExprNode->CONTAIN_ANY() != nullptr) {\n      nodeOp = NodeOp::T_CONTAIN_ANY;\n    } else {\n      //      relationExprNode->IN() != nullptr\n      nodeOp = NodeOp::T_IN;\n    }\n\n    Node::Ptr relationalExpr = std::make_shared<Node>(nodeOp);\n    relationalExpr->set_left(handle_rel_expr_left_node(relationExprNode));\n    Node::Ptr in_value_expr_list_node =\n        handle_in_value_expr_list_node(relationExprNode->in_value_expr_list(),\n                                       relationExprNode->NOT() != nullptr);\n    if (in_value_expr_list_node == nullptr) {\n      return nullptr;\n    }\n    relationalExpr->set_right(std::move(in_value_expr_list_node));\n    return relationalExpr;\n  } else if (relationExprNode->NULL_V() != nullptr) {\n    NodeOp nodeOp = NodeOp::T_IS_NULL;\n    if (relationExprNode->NOT() != nullptr) {\n      nodeOp = NodeOp::T_IS_NOT_NULL;\n    }\n    auto null_node = std::make_shared<Node>(nodeOp);\n    null_node->set_left(handle_rel_expr_left_node(relationExprNode));\n    auto right = std::make_shared<ConstantNode>(\"\");\n    right->set_op(NodeOp::T_NULL_VALUE);\n    null_node->set_right(std::move(right));\n    return null_node;\n  }\n\n  return nullptr;\n}\n\nNode::Ptr ZVecSQLParser::handle_value_expr_node(VoidPtr node) {\n  SQLParser::Value_exprContext *valueExprNode =\n      reinterpret_cast<SQLParser::Value_exprContext *>(node);\n\n  if (valueExprNode->constant() != nullptr) {\n    return handle_const_node(valueExprNode->constant());\n  } else if (valueExprNode->function_call() != nullptr) {\n    return handle_function_call_node(valueExprNode->function_call());\n  }\n\n  return nullptr;\n}\n\nNode::Ptr ZVecSQLParser::handle_function_value_expr_node(VoidPtr node) {\n  SQLParser::Function_value_exprContext *valueExprNode =\n      reinterpret_cast<SQLParser::Function_value_exprContext *>(node);\n\n  if (valueExprNode->value_expr() != nullptr) {\n    return handle_value_expr_node(valueExprNode->value_expr());\n  } else if (valueExprNode->identifier() != nullptr) {\n    return handle_id_node(valueExprNode->identifier());\n  }\n\n  return nullptr;\n}\n\nNode::Ptr ZVecSQLParser::handle_in_value_expr_node(VoidPtr node) {\n  SQLParser::In_value_exprContext *inValueExprNode =\n      reinterpret_cast<SQLParser::In_value_exprContext *>(node);\n\n  if (inValueExprNode->constant_num_and_str() != nullptr) {\n    return handle_const_num_and_str_node(\n        inValueExprNode->constant_num_and_str());\n  } else if (inValueExprNode->bool_value() != nullptr) {\n    return handle_bool_value_node(inValueExprNode->bool_value());\n  }\n\n  return nullptr;\n}\n\nNode::Ptr ZVecSQLParser::handle_bool_value_node(\n    antlr4::SQLParser::Bool_valueContext *node) {\n  // normalize bool value\n  auto value = node->TRUE_V() ? \"true\" : \"false\";\n  auto constExpr = std::make_shared<ConstantNode>(value);\n  constExpr->set_op(NodeOp::T_BOOL_VALUE);\n  return constExpr;\n}\n\nNode::Ptr ZVecSQLParser::handle_in_value_expr_list_node(VoidPtr node,\n                                                        bool exclude) {\n  SQLParser::In_value_expr_listContext *inValueExprListContext =\n      reinterpret_cast<SQLParser::In_value_expr_listContext *>(node);\n\n  InValueExprListNode::Ptr in_value_expr_list_node =\n      std::make_shared<InValueExprListNode>();\n  in_value_expr_list_node->set_exclude(exclude);\n  if (!inValueExprListContext) {\n    return in_value_expr_list_node;\n  }\n\n  auto in_value_expr_list = inValueExprListContext->in_value_expr();\n  for (auto in_value_expr : in_value_expr_list) {\n    Node::Ptr in_value_node = handle_in_value_expr_node(in_value_expr);\n    if (in_value_node == nullptr) {\n      return nullptr;\n    }\n    in_value_expr_list_node->add_in_value_expr(std::move(in_value_node));\n  }\n\n  return in_value_expr_list_node;\n}\n\nNode::Ptr ZVecSQLParser::handle_function_call_node(VoidPtr node) {\n  SQLParser::Function_callContext *function_call_node =\n      reinterpret_cast<SQLParser::Function_callContext *>(node);\n\n  FuncNode::Ptr func_node_ptr = std::make_shared<FuncNode>();\n\n  func_node_ptr->set_func_name_node(\n      handle_id_node(function_call_node->identifier()));\n  auto value_expr_list = function_call_node->function_value_expr();\n  for (auto value_expr : value_expr_list) {\n    Node::Ptr value_node = handle_function_value_expr_node(value_expr);\n    if (value_node == nullptr) {\n      return nullptr;\n    }\n    func_node_ptr->add_argument(std::move(value_node));\n  }\n\n  return func_node_ptr;\n}\n\nNode::Ptr ZVecSQLParser::handle_const_node(VoidPtr node) {\n  Node::Ptr constExpr = nullptr;\n  SQLParser::ConstantContext *constantNode =\n      reinterpret_cast<SQLParser::ConstantContext *>(node);\n  if (constantNode->numeric()) {\n    constExpr =\n        std::make_shared<ConstantNode>(constantNode->numeric()->getText());\n    if (constantNode->numeric()->int_value()) {\n      constExpr->set_op(NodeOp::T_INT_VALUE);\n    } else if (constantNode->numeric()->float_value()) {\n      constExpr->set_op(NodeOp::T_FLOAT_VALUE);\n    }\n  } else if (constantNode->quoted_string()) {\n    std::string value = constantNode->quoted_string()->getText();\n    value = trim(value);\n    value = Util::normalize(value);\n    constExpr = std::make_shared<ConstantNode>(value);\n    constExpr->set_op(NodeOp::T_STRING_VALUE);\n  } else if (constantNode->vector_expr()) {\n    constExpr = handle_vector_expr_node(constantNode->vector_expr());\n    if (constExpr == nullptr) {\n      err_msg_ = \"Parse failed. vector format error.\" +\n                 constantNode->vector_expr()->getText();\n      LOG_ERROR(\"Parse failed. vector format error. [%s]\",\n                constantNode->vector_expr()->getText().c_str());\n      return nullptr;\n    }\n  } else if (constantNode->bool_value()) {\n    constExpr = handle_bool_value_node(constantNode->bool_value());\n  }\n\n  return constExpr;\n}\n\nNode::Ptr ZVecSQLParser::handle_const_num_and_str_node(VoidPtr node) {\n  Node::Ptr constExpr = nullptr;\n  SQLParser::Constant_num_and_strContext *constant_num_and_str_Node =\n      reinterpret_cast<SQLParser::Constant_num_and_strContext *>(node);\n  if (constant_num_and_str_Node->numeric()) {\n    constExpr = std::make_shared<ConstantNode>(\n        constant_num_and_str_Node->numeric()->getText());\n    if (constant_num_and_str_Node->numeric()->int_value()) {\n      constExpr->set_op(NodeOp::T_INT_VALUE);\n    } else if (constant_num_and_str_Node->numeric()->float_value()) {\n      constExpr->set_op(NodeOp::T_FLOAT_VALUE);\n    }\n  } else if (constant_num_and_str_Node->quoted_string()) {\n    std::string value = constant_num_and_str_Node->quoted_string()->getText();\n    value = trim(value);\n    value = Util::normalize(value);\n    constExpr = std::make_shared<ConstantNode>(value);\n    constExpr->set_op(NodeOp::T_STRING_VALUE);\n  }\n\n  return constExpr;\n}\n\nNode::Ptr ZVecSQLParser::handle_vector_expr_node(VoidPtr node) {\n  SQLParser::Vector_exprContext *vector_ExprNode =\n      reinterpret_cast<SQLParser::Vector_exprContext *>(node);\n\n  std::string vector_text = vector_ExprNode->getText();\n  return parse_vector_text(&vector_text);\n}\n\nNode::Ptr ZVecSQLParser::handle_id_node(VoidPtr node) {\n  SQLParser::IdentifierContext *identifierNode =\n      reinterpret_cast<SQLParser::IdentifierContext *>(node);\n\n  Node::Ptr identifierExpr =\n      std::make_shared<IDNode>(identifierNode->getText());\n  identifierExpr->set_op(NodeOp::T_ID);\n  return identifierExpr;\n}\n\nNode::Ptr ZVecSQLParser::parse_filter(const std::string &filter,\n                                      bool need_formatted_tree) {\n  try {\n    ANTLRInputStream input(filter);\n    CaseChangingCharStream in(&input, true);\n\n    SQLLexer lexer(&in);\n\n    CommonTokenStream tokens(&lexer);\n\n    SQLParser parser(&tokens);\n\n    // remove and add new error listeners\n    ErrorVerboseListener lexer_error_listener;\n    lexer.removeErrorListeners();  // remove all error listeners\n    lexer.addErrorListener((ANTLRErrorListener *)&lexer_error_listener);  // add\n    ErrorVerboseListener parser_error_listener;\n    parser.removeErrorListeners();  // remove all error listeners\n    parser.addErrorListener(\n        (ANTLRErrorListener *)&parser_error_listener);  // add\n\n    // int64_t curtime = Util::cur_micro_second_time();\n    ParseTree *tree = parser.logic_expr_unit();\n\n    if (lexer.getNumberOfSyntaxErrors() > 0 ||\n        parser.getNumberOfSyntaxErrors() > 0) {\n      LOG_INFO(\"SLL failed. using LL\");\n      tokens.reset();\n      parser.reset();\n      parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(\n          PredictionMode::LL);\n      tree = parser.logic_expr_unit();\n    }\n\n    // int64_t duration = Util::cur_micro_second_time() - curtime;\n    // printf(\"parsing time %ld\\n\", duration);\n    // LOG_DEBUG(\"antlr parsing time: [%ld]\", duration);\n\n    if (lexer.getNumberOfSyntaxErrors() > 0) {\n      err_msg_ = \"lexer error [\" + lexer_error_listener.err_msg() + \"]\";\n      return nullptr;\n    }\n    if (parser.getNumberOfSyntaxErrors() > 0) {\n      err_msg_ = \"syntax error [\" + parser_error_listener.err_msg() + \"]\";\n      return nullptr;\n    }\n\n    if (need_formatted_tree) {\n      formatted_tree_ = to_formatted_string_tree(tree, &parser);\n    }\n    auto *logic_expr_tree =\n        dynamic_cast<SQLParser::Logic_expr_unitContext *>(tree);\n    if (logic_expr_tree == nullptr ||\n        logic_expr_tree->logic_expr() == nullptr) {\n      err_msg_ = \"parse error [null tree]\";\n      return nullptr;\n    }\n\n    return handle_logic_expr_node(logic_expr_tree->logic_expr());\n  } catch (const std::exception &e) {\n    err_msg_ = \"parse error [\" + std::string(e.what()) + \"]\";\n    return nullptr;\n  }\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/parser/zvec_sql_parser.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"db/sqlengine/antlr/gen/SQLParser.h\"\n#include \"select_info.h\"\n#include \"zvec_parser.h\"\n\nnamespace zvec::sqlengine {\n\nclass ZVecSQLParser : public ZVecParser {\n public:\n  ZVecSQLParser() = default;\n\n  SQLInfo::Ptr parse(const std::string &query,\n                     bool need_formatted_tree = false) override;\n\n  Node::Ptr parse_filter(const std::string &filter,\n                         bool need_formatted_tree = false) override;\n\n private:\n  SQLInfo::Ptr sql_info(VoidPtr tree);\n\n  SQLInfo::SQLType sql_type(VoidPtr node);\n  SelectInfo::Ptr select_info(VoidPtr node);\n\n  Node::Ptr handle_logic_expr_node(VoidPtr node);\n  Node::Ptr handle_rel_expr_node(VoidPtr node);\n  Node::Ptr handle_rel_expr_left_node(VoidPtr node);\n  Node::Ptr handle_value_expr_node(VoidPtr node);\n  Node::Ptr handle_function_value_expr_node(VoidPtr node);\n  Node::Ptr handle_in_value_expr_node(VoidPtr node);\n  Node::Ptr handle_in_value_expr_list_node(VoidPtr node, bool exclude);\n  Node::Ptr handle_id_node(VoidPtr node);\n  Node::Ptr handle_const_node(VoidPtr node);\n  Node::Ptr handle_const_num_and_str_node(VoidPtr node);\n  Node::Ptr handle_bool_value_node(antlr4::SQLParser::Bool_valueContext *node);\n  Node::Ptr handle_vector_expr_node(VoidPtr node);\n  Node::Ptr handle_function_call_node(VoidPtr node);\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/doc_filter.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/sqlengine/planner/doc_filter.h\"\n#include <optional>\n#include <arrow/acero/exec_plan.h>\n#include <arrow/table.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/config.h>\n#include \"db/sqlengine/planner/invert_search.h\"\n\nnamespace zvec::sqlengine {\n\nStatus DocFilter::compute_filter() {\n  if (invert_cond_) {\n    InvertSearch search(segment_.get());\n    auto invert_res = search.exec_invert_cond_tree(invert_cond_.get());\n    if (!invert_res) {\n      return Status::InternalError(\"Execute invert search failed: \",\n                                   invert_res.error().message());\n    }\n    invert_result_ = invert_res.value();\n    invert_filter_ = invert_result_->make_filter();\n  }\n\n  if (forward_plan_) {\n    auto forward_result = arrow::acero::DeclarationToTable(*forward_plan_);\n    if (!forward_result.ok()) {\n      return Status::InternalError(\"Execute filter bitmap failed: \",\n                                   forward_result.status().ToString());\n    }\n    // has only one column with boolean type\n    auto &forward_table = forward_result.ValueUnsafe();\n    if (forward_table->num_columns() != 1 ||\n        forward_table->column(0)->type() != arrow::boolean()) {\n      return Status::InternalError(\"Filter bitmap is not boolean type\");\n    }\n    forward_bitmap_ = forward_table->column(0);\n  }\n\n  if (forward_filter_expr_) {\n    // get schema to bind to Expression\n    auto table = segment_->fetch(query_info_->get_forward_filter_field_names(),\n                                 std::vector<int>{});\n    if (!table) {\n      return Status::InternalError(\"Fetch forward failed\");\n    }\n    auto bind_res = forward_filter_expr_->Bind(*table->schema());\n    if (!bind_res.ok()) {\n      return Status::InternalError(\"Bind forward filter expression failed\",\n                                   bind_res.status().ToString());\n    }\n    *forward_filter_expr_ = bind_res.MoveValueUnsafe();\n  }\n  return Status::OK();\n}\n\nbool DocFilter::empty() const {\n  return !(delete_filter_ || invert_filter_ || forward_plan_ ||\n           forward_filter_expr_);\n}\n\nbool DocFilter::is_filtered(uint64_t id) const {\n  if (delete_filter_ && delete_filter_->is_filtered(id)) {\n    return true;\n  }\n  if (invert_filter_ && invert_filter_->is_filtered(id)) {\n    return true;\n  }\n  auto forward_bit = get_forward_bit(id);\n  if (!forward_bit) {\n    return false;\n  }\n  // revert to return false if forward filter is matched\n  return !forward_bit.value();\n}\n\nstd::optional<bool> DocFilter::get_forward_bit(uint64_t id) const {\n  if (forward_filter_expr_) {\n    return is_matched_by_forward_filter(id);\n  }\n  if (!forward_bitmap_) {\n    return std::nullopt;\n  }\n  uint64_t rows_seen = 0;\n  for (int c = 0; c < forward_bitmap_->num_chunks(); c++) {\n    const auto &arr = forward_bitmap_->chunk(c);\n    if (id < rows_seen + arr->length()) {\n      auto *bool_array = static_cast<arrow::BooleanArray *>(arr.get());\n      return (*bool_array)[id - rows_seen];\n    }\n    rows_seen += arr->length();\n  }\n  LOG_ERROR(\"ID is out or range: id[%zu] count[%zu]\", (size_t)id,\n            (size_t)rows_seen);\n  return std::nullopt;\n}\n\nstd::optional<std::vector<uint64_t>> DocFilter::get_bf_by_keys_and_update() {\n  auto meta = segment_->meta();\n  if (!meta) {\n    return std::nullopt;\n  }\n  // TODO: support forward\n  if (!invert_result_) {\n    return std::nullopt;\n  }\n  size_t doc_count = meta->doc_count();\n  float brute_force_by_keys_ratio =\n      GlobalConfig::Instance().brute_force_by_keys_ratio();\n  uint64_t bf_by_keys_threshold = meta->doc_count() * brute_force_by_keys_ratio;\n\n  // decide to use brute force by keys or not\n  if (size_t match_count = invert_result_->count();\n      match_count <= bf_by_keys_threshold) {\n    std::vector<uint32_t> ids;\n    invert_result_->extract_ids(&ids);\n    invert_filter_.reset();\n    invert_result_.reset();\n    LOG_INFO(\"Use brute force by keys, doc_count[%zu] invert_result_count[%zu]\",\n             doc_count, match_count);\n    return std::vector<uint64_t>(ids.begin(), ids.end());\n  } else {\n    LOG_DEBUG(\n        \"Not use brute force by keys, doc_count[%zu] invert_result_count[%zu]\",\n        doc_count, match_count);\n  }\n  return std::nullopt;\n}\n\nstd::optional<bool> DocFilter::is_matched_by_forward_filter(uint64_t id) const {\n  auto exec_batch =\n      segment_->fetch(query_info_->get_forward_filter_field_names(), id);\n  if (!exec_batch) {\n    LOG_ERROR(\"Fetch forward failed, id[%zu]\", (size_t)id);\n    return std::nullopt;\n  }\n  auto maybe_result = arrow::compute::ExecuteScalarExpression(\n      *forward_filter_expr_, *exec_batch);\n  if (!maybe_result.ok()) {\n    LOG_ERROR(\"Execute scalar expression failed, id[%zu] err[%s]\", (size_t)id,\n              maybe_result.status().ToString().c_str());\n    return std::nullopt;\n  }\n  arrow::Datum datum = maybe_result.MoveValueUnsafe();\n  if (datum.is_scalar()) {\n    return datum.scalar_as<arrow::BooleanScalar>().value;\n  }\n  LOG_ERROR(\"Datum is not scalar, id[%zu] type[%s]\", (size_t)id,\n            datum.type()->ToString().c_str());\n  return std::nullopt;\n}\n\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/doc_filter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <arrow/acero/api.h>\n#include <arrow/chunked_array.h>\n#include <zvec/db/status.h>\n#include \"db/index/column/inverted_column/inverted_search_result.h\"\n#include \"db/index/common/index_filter.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/analyzer/query_info.h\"\n#include \"db/sqlengine/analyzer/query_node.h\"\n\nnamespace zvec::sqlengine {\n\nclass DocFilter : public IndexFilter {\n public:\n  using Ptr = std::shared_ptr<DocFilter>;\n\n  DocFilter(Segment::Ptr segment, QueryInfo::Ptr query_info,\n            std::unique_ptr<arrow::acero::Declaration> forward_plan,\n            std::unique_ptr<arrow::compute::Expression> forward_filter)\n      : segment_(std::move(segment)),\n        query_info_(std::move(query_info)),\n        delete_filter_(segment_->get_filter()),\n        invert_cond_(query_info_->invert_cond()),\n        forward_plan_(std::move(forward_plan)),\n        forward_filter_expr_(std::move(forward_filter)) {}\n\n  Status compute_filter();\n\n  bool is_filtered(uint64_t id) const override;\n\n  //! get brute force by keys and clear `invert_filter_` if suitable\n  std::optional<std::vector<uint64_t>> get_bf_by_keys_and_update();\n\n  bool empty() const;\n\n private:\n  std::optional<bool> get_forward_bit(uint64_t id) const;\n  std::optional<bool> is_matched_by_forward_filter(uint64_t id) const;\n\n private:\n  Segment::Ptr segment_;\n  QueryInfo::Ptr query_info_;\n  IndexFilter::Ptr delete_filter_;\n  QueryNode::Ptr invert_cond_;\n  // either forward_plan_ or forward_expr_ is set\n  std::unique_ptr<arrow::acero::Declaration> forward_plan_;\n  std::unique_ptr<arrow::compute::Expression> forward_filter_expr_;\n\n  InvertedSearchResult::Ptr invert_result_;\n  IndexFilter::Ptr invert_filter_{nullptr};\n\n  std::shared_ptr<arrow::ChunkedArray> forward_bitmap_;\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/invert_recall_node.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/sqlengine/planner/invert_recall_node.h\"\n#include <arrow/api.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"db/sqlengine/planner/invert_search.h\"\n\nnamespace cp = arrow::compute;\n\nnamespace zvec::sqlengine {\n\narrow::AsyncGenerator<std::optional<cp::ExecBatch>> InvertRecallNode::gen() {\n  auto state_ptr = std::make_shared<State>();\n  return [self = shared_from_this(), state_ptr = std::move(state_ptr)]()\n             -> arrow::Future<std::optional<cp::ExecBatch>> {\n    auto &state = *state_ptr;\n\n    if (!state.iter_) {\n      auto invert_ret = self->prepare();\n      if (!invert_ret) {\n        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n            arrow::Status::ExecutionError(\"prepare invert failed:\",\n                                          invert_ret.error().c_str()));\n      }\n      state.invert_result_ = invert_ret.value();\n      state.iter_ = state.invert_result_->create_iterator();\n    }\n\n    if (!state.iter_->valid()) {\n      // return nullopt to indicate end\n      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n          std::nullopt);\n    }\n\n    // collect a batch\n    std::vector<int> indices;\n    indices.reserve(self->batch_size_);\n    for (int i = 0; state.iter_->valid() && i < self->batch_size_;\n         state.iter_->next()) {\n      if (self->seg_filter_ &&\n          self->seg_filter_->is_filtered(state.iter_->doc_id())) {\n        continue;\n      }\n      i++;\n      indices.push_back(state.iter_->doc_id());\n    }\n    if (indices.empty()) {\n      // return nullopt to indicate end\n      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n          std::nullopt);\n    }\n\n    auto table = self->segment_->fetch(self->fetched_columns_, indices);\n    if (!table) {\n      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n          arrow::Status::UnknownError(\"fetch table failed\"));\n    }\n    auto batch = table->CombineChunksToBatch();\n    if (!batch.ok()) {\n      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n          arrow::Status::ExecutionError(\"combine chunks to batch failed:\",\n                                        batch.status().ToString()));\n    }\n    cp::ExecBatch exec_batch(*batch.ValueUnsafe());\n    return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n        std::move(exec_batch));\n  };\n}\n\nResult<InvertedSearchResult::Ptr> InvertRecallNode::prepare() {\n  InvertSearch search(segment_.get());\n  return search.exec_invert_cond_tree(query_info_->invert_cond().get());\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/invert_recall_node.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <arrow/acero/api.h>\n#include <arrow/api.h>\n#include \"db/index/column/common/index_results.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/analyzer/query_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass InvertRecallNode : public std::enable_shared_from_this<InvertRecallNode> {\n public:\n  InvertRecallNode(Segment::Ptr segment, QueryInfo::Ptr query_info,\n                   int batch_size)\n      : segment_(std::move(segment)),\n        query_info_(std::move(query_info)),\n        // need fetch for forward filter, order by\n        fetched_columns_(query_info_->get_all_fetched_scalar_field_names()),\n        seg_filter_(segment_->get_filter()),\n        batch_size_(batch_size) {\n    auto table = segment_->fetch(fetched_columns_, std::vector<int>{});\n    schema_ = table->schema();\n  }\n\n  //! get schema\n  std::shared_ptr<arrow::Schema> schema() const {\n    return schema_;\n  }\n\n  arrow::AsyncGenerator<std::optional<cp::ExecBatch>> gen();\n\n private:\n  Result<InvertedSearchResult::Ptr> prepare();\n\n private:\n  struct State {\n    InvertedSearchResult::Ptr invert_result_;\n    IndexResults::IteratorUPtr iter_;\n  };\n\n  Segment::Ptr segment_;\n  QueryInfo::Ptr query_info_;\n  const std::vector<std::string> &fetched_columns_;\n  IndexFilter::Ptr seg_filter_;\n  int batch_size_;\n  std::shared_ptr<arrow::Schema> schema_;\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/invert_search.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"invert_search.h\"\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/type.h>\n#include \"db/sqlengine/analyzer/query_node.h\"\n#include \"db/sqlengine/common/util.h\"\n\nnamespace zvec::sqlengine {\n\nconst std::unordered_map<QueryNodeOp, CompareOp> kOpMap_ = {\n    {QueryNodeOp::Q_EQ, CompareOp::EQ},\n    {QueryNodeOp::Q_NE, CompareOp::NE},\n    {QueryNodeOp::Q_LT, CompareOp::LT},\n    {QueryNodeOp::Q_LE, CompareOp::LE},\n    {QueryNodeOp::Q_GT, CompareOp::GT},\n    {QueryNodeOp::Q_GE, CompareOp::GE},\n    {QueryNodeOp::Q_LIKE, CompareOp::LIKE},\n    {QueryNodeOp::Q_IN, CompareOp::CONTAIN_ANY},\n    {QueryNodeOp::Q_CONTAIN_ALL, CompareOp::CONTAIN_ALL},\n    {QueryNodeOp::Q_CONTAIN_ANY, CompareOp::CONTAIN_ANY},\n    {QueryNodeOp::Q_IS_NULL, CompareOp::IS_NULL},\n    {QueryNodeOp::Q_IS_NOT_NULL, CompareOp::IS_NOT_NULL},\n};\n\nResult<InvertedSearchResult::Ptr> InvertSearch::exec_invert_cond_tree(\n    const QueryNode *query_node) {\n  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {\n    if (query_node->left() != nullptr) {\n      auto left_res = exec_invert_cond_tree(query_node->left_node());\n      if (!left_res) {\n        return left_res;\n      }\n      if (query_node->right() == nullptr) {\n        return left_res;\n      } else {\n        auto right_res = exec_invert_cond_tree(query_node->right_node());\n        if (!right_res) {\n          return right_res;\n        }\n        query_node->op() == QueryNodeOp::Q_AND\n            ? left_res.value()->AND(*right_res.value())\n            : left_res.value()->OR(*right_res.value());\n        return left_res;\n      }\n    }\n    if (query_node->right() != nullptr) {\n      return exec_invert_cond_tree(query_node->right_node());\n    }\n    return tl::make_unexpected(Status::InvalidArgument(\n        \"exec_invert_cond_tree, logic expr has no left or right node.\"));\n  }\n\n  if (query_node->type() == QueryNode::QueryNodeType::REL_EXPR) {\n    return exec_invert_cond_node(query_node);\n  }\n\n  return tl::make_unexpected(Status::InvalidArgument(\n      \"exec_invert_cond_tree unexpected type:\", query_node->op_name()));\n}\n\nCompareOp InvertSearch::query_nodeop2search_op(QueryNodeOp op) {\n  auto iter = kOpMap_.find(op);\n  if (iter == kOpMap_.end()) {\n    return CompareOp::NONE;\n  }\n  return iter->second;\n}\n\nResult<InvertedSearchResult::Ptr> InvertSearch::exec_invert_cond_node(\n    const QueryNode *invert_cond) {\n  auto term_node = invert_cond->right();\n\n  // get search oper\n  CompareOp oper = query_nodeop2search_op(invert_cond->op());\n  if (oper == CompareOp::NONE) {\n    return tl::make_unexpected(Status::InvalidArgument(\n        \"do_invert_scan, get search operator failed. op:\",\n        invert_cond->op_name()));\n  }\n\n  bool is_array_length = false;\n  auto *left_node = invert_cond->left_node();\n  std::string invert_field_name;\n  if (left_node->op() == QueryNodeOp::Q_ID) {\n    invert_field_name = left_node->text();\n  } else if (left_node->op() == QueryNodeOp::Q_FUNCTION_CALL) {\n    const QueryFuncNode *func_node =\n        dynamic_cast<const QueryFuncNode *>(left_node);\n    const auto &func_name = func_node->get_func_name();\n    const auto &arguments = func_node->arguments();\n    if (func_name == kFuncArrayLength) {\n      invert_field_name = arguments[0]->text();\n      is_array_length = true;\n    } else {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"do_invert_scan, unsupported function call. func:\",\n          func_name.c_str()));\n    }\n  } else {\n    return tl::make_unexpected(Status::InvalidArgument(\n        \"do_invert_scan, unsupported left node. op:\", left_node->op_name()));\n  }\n\n  // get field reader\n  auto invert_reader = segment_->get_scalar_indexer(invert_field_name);\n  if (invert_reader == nullptr) {\n    return tl::make_unexpected(Status::InvalidArgument(\n        \"do_invert_scan, get invert column reader failed. field:\",\n        invert_field_name.c_str()));\n  }\n\n  if (oper == CompareOp::IS_NULL) {\n    auto invert_res = invert_reader->search_null();\n    if (!invert_res) {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"invert column reader search null failed.\"));\n    }\n    return invert_res;\n  } else if (oper == CompareOp::IS_NOT_NULL) {\n    auto invert_res = invert_reader->search_non_null();\n    if (!invert_res) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"invert column reader search not null failed.\"));\n    }\n    return invert_res;\n  } else if (oper == CompareOp::CONTAIN_ALL || oper == CompareOp::CONTAIN_ANY) {\n    // NOTE: IN is handled as CONTAIN_ANY\n    QueryListNode::Ptr list_node =\n        std::dynamic_pointer_cast<QueryListNode>(term_node);\n    if (list_node->exclude()) {\n      oper = oper == CompareOp::CONTAIN_ALL ? CompareOp::NOT_CONTAIN_ALL\n                                            : CompareOp::NOT_CONTAIN_ANY;\n    }\n    auto invert_res =\n        invert_reader->multi_search(list_node->to_value_list(), oper);\n    if (!invert_res) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"invert column reader multi_search failed. op:\", int(oper)));\n    }\n    return invert_res;\n  } else if (!is_array_length) {\n    auto invert_term = term_node->text();\n    auto invert_res = invert_reader->search(invert_term, oper);\n    if (!invert_res) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"invert column reader search failed. term:\", invert_term.c_str(),\n          \" op:\", invert_cond->op_name().c_str()));\n    }\n    return invert_res;\n  } else {\n    auto invert_term = term_node->text();\n    uint32_t len = *(reinterpret_cast<const uint32_t *>(invert_term.data()));\n    auto invert_res = invert_reader->search_array_len(len, oper);\n    if (!invert_res) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"invert column reader search failed. term:\", invert_term.c_str(),\n          \" op:\", invert_cond->op_name().c_str()));\n    }\n    return invert_res;\n  }\n}\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/invert_search.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/pattern/expected.hpp>\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/analyzer/query_node.h\"\n\nnamespace zvec::sqlengine {\n\nclass InvertSearch {\n public:\n  using Ptr = std::shared_ptr<InvertSearch>;\n\n  InvertSearch(zvec::Segment *segment) : segment_(segment) {}\n\n  Result<InvertedSearchResult::Ptr> exec_invert_cond_tree(\n      const QueryNode *invert_cond);\n\n  static CompareOp query_nodeop2search_op(QueryNodeOp op);\n\n private:\n  Result<InvertedSearchResult::Ptr> exec_invert_cond_node(\n      const QueryNode *invert_cond);\n\n private:\n  zvec::Segment *segment_;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/op_register.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include \"db/sqlengine/planner/op_register.h\"\n#include <arrow/status.h>\n#include \"db/sqlengine/planner/ops/check_not_filtered_op.h\"\n#include \"db/sqlengine/planner/ops/contain_op.h\"\n#include \"db/sqlengine/planner/ops/fetch_vector_op.h\"\n\nnamespace zvec::sqlengine {\n\narrow::Status OpRegister::register_ops() {\n  ARROW_RETURN_NOT_OK(CheckNotFilteredOp::register_op());\n  ARROW_RETURN_NOT_OK(FetchVectorOp::register_op());\n  ARROW_RETURN_NOT_OK(ContainOp::register_op());\n  return arrow::Status::OK();\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/op_register.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <arrow/status.h>\n\nnamespace zvec::sqlengine {\n\nclass OpRegister {\n public:\n  static arrow::Status register_ops();\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/ops/check_not_filtered_op.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include \"db/sqlengine/planner/ops/check_not_filtered_op.h\"\n#include <arrow/type_fwd.h>\n#include \"db/sqlengine/common/util.h\"\n\nnamespace zvec::sqlengine {\n\narrow::Status CheckNotFilteredOp::CheckNotFilteredFunction(\n    cp::KernelContext *ctx, const cp::ExecSpan &batch, cp::ExecResult *out) {\n  CheckNotFilteredState *state =\n      static_cast<CheckNotFilteredState *>(ctx->state());\n  auto *filter = state->args.filter.get();\n  if (filter == nullptr) {\n    return arrow::Status::ExecutionError(\"filter is null\");\n  }\n\n  auto row_span = batch[0].array.GetSpan<uint64_t>(1, batch.length);\n  std::shared_ptr<arrow::BooleanBuilder> builder =\n      std::make_shared<arrow::BooleanBuilder>(ctx->memory_pool());\n  ARROW_RETURN_NOT_OK(builder->Reserve(batch.length));\n  for (int i = 0; i < batch.length; i++) {\n    builder->UnsafeAppend(!filter->is_filtered(row_span[i]));\n  }\n  std::shared_ptr<arrow::Array> result_array;\n  ARROW_RETURN_NOT_OK(builder->Finish(&result_array));\n\n  out->value = std::move(result_array->data());\n  return arrow::Status::OK();\n}\n\narrow::Result<std::unique_ptr<arrow::compute::KernelState>>\nCheckNotFilteredOp::InitExprValue(arrow::compute::KernelContext *,\n                                  const arrow::compute::KernelInitArgs &args) {\n  auto func_options =\n      static_cast<const CheckNotFilteredOp::Options *>(args.options);\n  return std::make_unique<CheckNotFilteredOp::CheckNotFilteredState>(\n      func_options ? func_options : nullptr);\n}\n\n\narrow::Status CheckNotFilteredOp::register_op() {\n  static Options options = Options::Defaults();\n  auto func = std::make_shared<cp::ScalarFunction>(\n      kCheckNotFiltered, cp::Arity::Unary(), func_doc, &options, false);\n  cp::ScalarKernel kernel({arrow::uint64()}, arrow::boolean(),\n                          CheckNotFilteredFunction, InitExprValue);\n\n  kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;\n  kernel.null_handling = cp::NullHandling::COMPUTED_NO_PREALLOCATE;\n\n  ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));\n\n  auto registry = cp::GetFunctionRegistry();\n  ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));\n\n  return arrow::Status::OK();\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/ops/check_not_filtered_op.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <arrow/api.h>\n#include <arrow/compute/api.h>\n#include \"db/index/common/index_filter.h\"\n\n\nnamespace zvec::sqlengine {\n\nnamespace cp = arrow::compute;\n\nclass CheckNotFilteredOp {\n public:\n  class CheckNotFilteredOptionsType\n      : public arrow::compute::FunctionOptionsType {\n   public:\n    static const arrow::compute::FunctionOptionsType *GetInstance() {\n      static std::unique_ptr<arrow::compute::FunctionOptionsType> instance(\n          new CheckNotFilteredOptionsType());\n      return instance.get();\n    }\n\n    const char *type_name() const override {\n      return \"CheckNotFiltered\";\n    }\n\n    std::string Stringify(\n        const arrow::compute::FunctionOptions & /*options*/) const override {\n      return type_name();\n    }\n\n    bool Compare(const arrow::compute::FunctionOptions &options,\n                 const arrow::compute::FunctionOptions &other) const override {\n      const auto &lop = static_cast<const Options &>(options);\n      const auto &rop = static_cast<const Options &>(other);\n      return lop.args.filter == rop.args.filter;\n    }\n\n    std::unique_ptr<arrow::compute::FunctionOptions> Copy(\n        const arrow::compute::FunctionOptions &options) const override {\n      const auto &opts = static_cast<const Options &>(options);\n      return std::make_unique<Options>(opts.args.filter);\n    }\n  };\n\n  struct Args {\n    IndexFilter::Ptr filter;\n  };\n\n  class Options : public cp::FunctionOptions {\n   public:\n    Options() : Options(nullptr) {}\n\n    Options(IndexFilter::Ptr filter)\n        : cp::FunctionOptions(CheckNotFilteredOptionsType::GetInstance()),\n          args{std::move(filter)} {}\n\n    static inline constexpr char const kTypeName[] =\n        \"CheckNotFilteredFunctionOptions\";\n\n    static Options Defaults() {\n      return Options();\n    }\n\n    Args get_args() const {\n      return args;\n    }\n\n    Args args;\n  };\n\n  struct CheckNotFilteredState : public arrow::compute::KernelState {\n    Args args;\n\n    explicit CheckNotFilteredState(const Options *o) {\n      if (o) {\n        args = o->get_args();\n      }\n    }\n  };\n\n\n  static arrow::Status CheckNotFilteredFunction(cp::KernelContext *ctx,\n                                                const cp::ExecSpan &batch,\n                                                cp::ExecResult *out);\n\n  static inline const cp::FunctionDoc func_doc{\n      \"check if the segment row id is not filtered\",\n      \"returns not_filtered(x)\",\n      {\"segment_row_id\"},\n      \"Options\"};\n\n  static arrow::Status register_op();\n\n  static arrow::Result<std::unique_ptr<arrow::compute::KernelState>>\n  InitExprValue(arrow::compute::KernelContext *,\n                const arrow::compute::KernelInitArgs &args);\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/ops/contain_op.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include \"db/sqlengine/planner/ops/contain_op.h\"\n#include <memory>\n#include <arrow/api.h>\n#include <zvec/db/type.h>\n#include \"db/sqlengine/common/util.h\"\n\nnamespace zvec::sqlengine {\n\nenum class ContainType { kContainAll, kContainAny };\ntemplate <typename ArrowArrayType, ContainType contain_type>\nbool match_value(const arrow::Array *value_array, int64_t offset,\n                 int64_t length, const arrow::Array *value_set_array) {\n  auto *value_typed_arr = static_cast<const ArrowArrayType *>(value_array);\n  auto *value_set_typed_arr =\n      static_cast<const ArrowArrayType *>(value_set_array);\n  if (contain_type == ContainType::kContainAll) {\n    for (int j = 0; j < value_set_typed_arr->length(); ++j) {\n      bool contain = false;\n      for (int i = 0; i < length; ++i) {\n        if constexpr (std::is_same_v<ArrowArrayType, arrow::StringArray> ||\n                      std::is_same_v<ArrowArrayType, arrow::LargeStringArray> ||\n                      std::is_same_v<ArrowArrayType, arrow::BinaryArray> ||\n                      std::is_same_v<ArrowArrayType, arrow::LargeBinaryArray>) {\n          if (value_typed_arr->GetView(offset + i) ==\n              value_set_typed_arr->GetView(j)) {\n            contain = true;\n            break;\n          }\n        } else {\n          if (value_typed_arr->Value(offset + i) ==\n              value_set_typed_arr->Value(j)) {\n            contain = true;\n            break;\n          }\n        }\n      }\n      if (!contain) {\n        return false;\n      }\n    }\n    return true;\n  } else {  // contain_type == kContainAny\n    for (int j = 0; j < value_set_typed_arr->length(); ++j) {\n      for (int i = 0; i < length; ++i) {\n        if constexpr (std::is_same_v<ArrowArrayType, arrow::StringArray> ||\n                      std::is_same_v<ArrowArrayType, arrow::LargeStringArray> ||\n                      std::is_same_v<ArrowArrayType, arrow::BinaryArray> ||\n                      std::is_same_v<ArrowArrayType, arrow::LargeBinaryArray>) {\n          if (value_typed_arr->GetView(offset + i) ==\n              value_set_typed_arr->GetView(j)) {\n            return true;\n          }\n        } else {\n          if (value_typed_arr->Value(offset + i) ==\n              value_set_typed_arr->Value(j)) {\n            return true;\n          }\n        }\n      }\n    }\n    return false;\n  }\n}\n\ntemplate <ContainType contain_type>\narrow::Status ContainFunction(cp::KernelContext *ctx, const cp::ExecSpan &batch,\n                              cp::ExecResult *out) {\n  auto *state = static_cast<ContainOp::ContainState *>(ctx->state());\n  const auto &value_set = state->args.value_set;\n  if (value_set == nullptr) {\n    return arrow::Status::ExecutionError(\"value_set is null\");\n  }\n\n  const auto &input_array = batch[0].array;\n  if (batch[0].type()->id() != arrow::Type::LIST) {\n    return arrow::Status::ExecutionError(\"batch type is not list\");\n  }\n  if (!input_array.type->field(0)->type()->Equals(value_set->type())) {\n    return arrow::Status::ExecutionError(\n        \"value_set type is not equal to batch type\");\n  }\n  auto list_array =\n      std::dynamic_pointer_cast<arrow::ListArray>(input_array.ToArray());\n\n  std::shared_ptr<arrow::BooleanBuilder> builder =\n      std::make_shared<arrow::BooleanBuilder>(ctx->memory_pool());\n  ARROW_RETURN_NOT_OK(builder->Reserve(batch.length));\n  const auto &list_value_array = list_array->values();\n  for (int i = 0; i < batch.length; i++) {\n    // a whole list may be null for a doc\n    if (list_array->IsNull(i)) {\n      ARROW_RETURN_NOT_OK(builder->AppendNull());\n      continue;\n    }\n    auto length = list_array->value_length(i);\n    auto offset = list_array->value_offset(i);\n    bool match = false;\n    switch (state->args.data_type) {\n      case DataType::ARRAY_INT32:\n        match = match_value<arrow::Int32Array, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      case DataType::ARRAY_UINT32:\n        match = match_value<arrow::UInt32Array, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      case DataType::ARRAY_INT64:\n        match = match_value<arrow::Int64Array, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      case DataType::ARRAY_UINT64:\n        match = match_value<arrow::UInt64Array, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      case DataType::ARRAY_FLOAT:\n        match = match_value<arrow::FloatArray, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      case DataType::ARRAY_DOUBLE:\n        match = match_value<arrow::DoubleArray, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      case DataType::ARRAY_STRING:\n        match = match_value<arrow::StringArray, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      case DataType::ARRAY_BOOL:\n        match = match_value<arrow::BooleanArray, contain_type>(\n            list_value_array.get(), offset, length, value_set.get());\n        break;\n\n      default:\n        return arrow::Status::ExecutionError(\"unsupported data type\");\n    }\n    ARROW_RETURN_NOT_OK(builder->Append(match));\n  }\n\n  std::shared_ptr<arrow::Array> result_array;\n  ARROW_RETURN_NOT_OK(builder->Finish(&result_array));\n\n  out->value = std::move(result_array->data());\n  //   out->array_data()->type = batch[0].type()->GetShared::Ptr();\n  return arrow::Status::OK();\n}\n\narrow::Result<std::unique_ptr<arrow::compute::KernelState>>\nContainOp::InitExprValue(arrow::compute::KernelContext *,\n                         const arrow::compute::KernelInitArgs &args) {\n  auto func_options = static_cast<const ContainOp::Options *>(args.options);\n  return std::make_unique<ContainOp::ContainState>(func_options ? func_options\n                                                                : nullptr);\n}\n\n\narrow::Status ContainOp::register_op() {\n  static Options options = Options::Defaults();\n\n  {\n    auto func = std::make_shared<cp::ScalarFunction>(\n        kContainAll, cp::Arity::Unary(), func_doc, &options, false);\n    for (const auto &type :\n         {arrow::int32(), arrow::uint32(), arrow::int64(), arrow::uint64(),\n          arrow::float32(), arrow::float64(), arrow::utf8(),\n          arrow::boolean()}) {\n      cp::ScalarKernel kernel({arrow::list(type)}, arrow::boolean(),\n                              ContainFunction<ContainType::kContainAll>,\n                              InitExprValue);\n      kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;\n      kernel.null_handling = cp::NullHandling::INTERSECTION;\n      ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));\n    }\n\n    auto registry = cp::GetFunctionRegistry();\n    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));\n  }\n\n  {\n    auto func = std::make_shared<cp::ScalarFunction>(\n        kContainAny, cp::Arity::Unary(), func_doc, &options, false);\n    for (const auto &type :\n         {arrow::int32(), arrow::uint32(), arrow::int64(), arrow::uint64(),\n          arrow::float32(), arrow::float64(), arrow::utf8(),\n          arrow::boolean()}) {\n      cp::ScalarKernel kernel({arrow::list(type)}, arrow::boolean(),\n                              ContainFunction<ContainType::kContainAny>,\n                              InitExprValue);\n      kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;\n      kernel.null_handling = cp::NullHandling::INTERSECTION;\n      ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));\n    }\n\n    auto registry = cp::GetFunctionRegistry();\n    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));\n  }\n\n  return arrow::Status::OK();\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/ops/contain_op.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <memory>\n#include <arrow/api.h>\n#include <arrow/compute/api.h>\n#include <zvec/db/type.h>\n\n\nnamespace zvec::sqlengine {\n\nnamespace cp = arrow::compute;\n\nclass ContainOp {\n public:\n  class ContainOptionsType : public arrow::compute::FunctionOptionsType {\n   public:\n    static const arrow::compute::FunctionOptionsType *GetInstance() {\n      static std::unique_ptr<arrow::compute::FunctionOptionsType> instance(\n          new ContainOptionsType());\n      return instance.get();\n    }\n\n    const char *type_name() const override {\n      return \"Contain\";\n    }\n\n    std::string Stringify(\n        const arrow::compute::FunctionOptions & /*options*/) const override {\n      return type_name();\n    }\n\n    bool Compare(const arrow::compute::FunctionOptions &options,\n                 const arrow::compute::FunctionOptions &other) const override {\n      const auto &lop = static_cast<const Options &>(options);\n      const auto &rop = static_cast<const Options &>(other);\n      if (lop.args.data_type != rop.args.data_type) {\n        return false;\n      }\n      auto *left_value = lop.args.value_set.get();\n      auto *right_value = rop.args.value_set.get();\n      if (left_value && right_value) {\n        return left_value->Equals(*right_value);\n      } else if (!left_value && !right_value) {\n        return true;\n      } else {\n        return false;\n      }\n    }\n\n    std::unique_ptr<arrow::compute::FunctionOptions> Copy(\n        const arrow::compute::FunctionOptions &options) const override {\n      const auto &opts = static_cast<const Options &>(options);\n      return std::make_unique<Options>(opts.args);\n    }\n  };\n\n  struct Args {\n    std::shared_ptr<arrow::Array> value_set;\n    DataType data_type;\n  };\n\n  class Options : public cp::FunctionOptions {\n   public:\n    Options() : Options(Args{}) {}\n\n    Options(Args args)\n        : cp::FunctionOptions(ContainOptionsType::GetInstance()),\n          args(std::move(args)) {}\n\n    static inline constexpr char const kTypeName[] = \"ContainFunctionOptions\";\n\n    static Options Defaults() {\n      return Options();\n    }\n\n    Args get_args() const {\n      return args;\n    }\n\n    Args args;\n  };\n\n  struct ContainState : public arrow::compute::KernelState {\n    Args args;\n\n    explicit ContainState(const Options *o) {\n      if (o) {\n        args = o->get_args();\n      }\n    }\n  };\n\n\n  static inline const cp::FunctionDoc func_doc{\n      \"check if contain_all/any\",\n      \"returns contain_all/any(x)\",\n      {\"value_set\"},\n      \"Options\",\n  };\n\n  static arrow::Status register_op();\n\n  static arrow::Result<std::unique_ptr<arrow::compute::KernelState>>\n  InitExprValue(arrow::compute::KernelContext *,\n                const arrow::compute::KernelInitArgs &args);\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/ops/fetch_vector_op.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include \"db/sqlengine/planner/ops/fetch_vector_op.h\"\n#include <arrow/status.h>\n#include \"db/index/column/vector_column/combined_vector_column_indexer.h\"\n#include \"db/sqlengine/common/util.h\"\n\nnamespace zvec::sqlengine {\n\n#define CHECK_ARROW_STATUS(msg, status)                                        \\\n  if (!status.ok()) {                                                          \\\n    return tl::make_unexpected(Status::InternalError(msg, status.ToString())); \\\n  }\n\ntemplate <typename Iter>\nResult<std::shared_ptr<arrow::Array>> fetch_dense_vector_helper(\n    const CombinedVectorColumnIndexer *indexer, Iter begin, Iter end) {\n  size_t count = std::distance(begin, end);\n  arrow::BinaryBuilder vector_builder;\n  CHECK_ARROW_STATUS(\"Reserve vector builder failed:\",\n                     vector_builder.Reserve(count));\n  for (Iter i = begin; i != end; ++i) {\n    auto vector_res = indexer->Fetch(*i);\n    if (!vector_res) {\n      return tl::make_unexpected(vector_res.error());\n    }\n    const auto &data = std::get<vector_column_params::DenseVectorBuffer>(\n                           vector_res.value().vector_buffer)\n                           .data;\n    if (data.empty()) {\n      CHECK_ARROW_STATUS(\"Append null failed:\", vector_builder.AppendNull());\n    } else {\n      CHECK_ARROW_STATUS(\"Append vector failed:\", vector_builder.Append(data));\n    }\n  }\n  auto vector_array_ret = vector_builder.Finish();\n  if (!vector_array_ret.ok()) {\n    return tl::make_unexpected(Status::InternalError(\n        \"finish vector builder failed:\", vector_array_ret.status().ToString()));\n  }\n  return vector_array_ret.MoveValueUnsafe();\n}\n\nResult<std::shared_ptr<arrow::Array>> FetchVectorOp::fetch_dense_vector(\n    const Segment &segment, const std::string &vector_name,\n    const std::vector<int> &rows) {\n  auto indexer = segment.get_combined_vector_indexer(vector_name);\n  if (!indexer) {\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"vector index not found:\", vector_name));\n  }\n  return fetch_dense_vector_helper(indexer.get(), rows.begin(), rows.end());\n}\n\n\ntemplate <typename Iter>\nResult<std::shared_ptr<arrow::Array>> fetch_sparse_vector_helper(\n    const CombinedVectorColumnIndexer *indexer, Iter begin, Iter end) {\n  size_t count = std::distance(begin, end);\n  std::unique_ptr<arrow::StructBuilder> sparse_builder;\n  arrow::BinaryBuilder *sparse_index_builder = nullptr;\n  arrow::BinaryBuilder *sparse_value_builder = nullptr;\n  auto array_builder = arrow::MakeBuilder(Util::sparse_type());\n  if (!array_builder.ok()) {\n    return tl::make_unexpected(Status::InternalError(\n        \"make builder failed:\", array_builder.status().ToString()));\n  }\n  sparse_builder.reset(dynamic_cast<arrow::StructBuilder *>(\n      array_builder.ValueUnsafe().release()));\n  sparse_index_builder =\n      dynamic_cast<arrow::BinaryBuilder *>(sparse_builder->field_builder(0));\n  sparse_value_builder =\n      dynamic_cast<arrow::BinaryBuilder *>(sparse_builder->field_builder(1));\n\n  CHECK_ARROW_STATUS(\"Reserve failed:\", sparse_builder->Reserve(count));\n  CHECK_ARROW_STATUS(\"Reserve failed:\", sparse_index_builder->Reserve(count));\n  CHECK_ARROW_STATUS(\"Reserve failed:\", sparse_value_builder->Reserve(count));\n  for (auto i = begin; i != end; i++) {\n    auto vector_res = indexer->Fetch(*i);\n    if (!vector_res) {\n      return tl::make_unexpected(vector_res.error());\n    }\n    const auto &data = std::get<vector_column_params::SparseVectorBuffer>(\n        vector_res.value().vector_buffer);\n    if (data.indices.empty()) {\n      // will auto append to sub builder\n      CHECK_ARROW_STATUS(\"Append failed\", sparse_builder->AppendNull());\n    } else {\n      CHECK_ARROW_STATUS(\"Append failed\", sparse_builder->Append(true));\n      CHECK_ARROW_STATUS(\"Append failed\",\n                         sparse_index_builder->Append(data.indices));\n      CHECK_ARROW_STATUS(\"Append failed\",\n                         sparse_value_builder->Append(data.values));\n    }\n  }\n\n  auto vector_array_ret = sparse_builder->Finish();\n  if (!vector_array_ret.ok()) {\n    return tl::make_unexpected(Status::InternalError(\n        \"finish vector builder failed:\", vector_array_ret.status().ToString()));\n  }\n  return vector_array_ret.MoveValueUnsafe();\n}\n\nResult<std::shared_ptr<arrow::Array>> FetchVectorOp::fetch_sparse_vector(\n    const Segment &segment, const std::string &vector_name,\n    const std::vector<int> &rows) {\n  auto indexer = segment.get_combined_vector_indexer(vector_name);\n  if (!indexer) {\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"vector index not found:\", vector_name));\n  }\n  return fetch_sparse_vector_helper(indexer.get(), rows.begin(), rows.end());\n}\n\nstd::unique_ptr<cp::FunctionOptions> FetchVectorOp::FunctionOptionsType::Copy(\n    const cp::FunctionOptions &) const {\n  return std::make_unique<FetchVectorFunctionOptions>();\n}\n\narrow::Status FetchVectorOp::FetchVectorFunction(cp::KernelContext *ctx,\n                                                 const cp::ExecSpan &batch,\n                                                 cp::ExecResult *out) {\n  FetchVectorState *state = static_cast<FetchVectorState *>(ctx->state());\n  if (state->args.indexer == nullptr) {\n    return arrow::Status::ExecutionError(\"indexer is null\");\n  }\n\n  auto row_span = batch[0].array.GetSpan<uint64_t>(1, batch.length);\n  Result<std::shared_ptr<arrow::Array>> res;\n  if (state->args.is_dense) {\n    res = fetch_dense_vector_helper(state->args.indexer.get(), row_span.begin(),\n                                    row_span.end());\n  } else {\n    res = fetch_sparse_vector_helper(state->args.indexer.get(),\n                                     row_span.begin(), row_span.end());\n  }\n  if (!res) {\n    return arrow::Status::ExecutionError(\"fetch vector failed:\",\n                                         res.error().c_str());\n  }\n\n  out->value = std::move(res.value()->data());\n  return arrow::Status::OK();\n}\n\narrow::Result<std::unique_ptr<arrow::compute::KernelState>>\nFetchVectorOp::InitExprValue(arrow::compute::KernelContext *,\n                             const arrow::compute::KernelInitArgs &args) {\n  auto func_options = static_cast<const FetchVectorOp::Options *>(args.options);\n  return std::make_unique<FetchVectorOp::FetchVectorState>(\n      func_options ? func_options : nullptr);\n}\n\n\narrow::Status FetchVectorOp::register_op() {\n  static Options options = Options::Defaults();\n  {\n    const std::string name = \"fetch_vector\";\n    auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(),\n                                                     func_doc, &options, false);\n    cp::ScalarKernel kernel({arrow::uint64()}, arrow::binary(),\n                            FetchVectorFunction, InitExprValue);\n\n    kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;\n    kernel.null_handling = cp::NullHandling::COMPUTED_NO_PREALLOCATE;\n\n    ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));\n\n    auto registry = cp::GetFunctionRegistry();\n    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));\n  }\n\n  {\n    const std::string name = \"fetch_sparse_vector\";\n    auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(),\n                                                     func_doc, &options, false);\n    cp::ScalarKernel kernel({arrow::uint64()}, Util::sparse_type(),\n                            FetchVectorFunction, InitExprValue);\n\n    kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;\n    kernel.null_handling = cp::NullHandling::COMPUTED_NO_PREALLOCATE;\n\n    ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));\n\n    auto registry = cp::GetFunctionRegistry();\n    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));\n  }\n\n  return arrow::Status::OK();\n}\n\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/ops/fetch_vector_op.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <arrow/api.h>\n#include <zvec/db/status.h>\n#include \"db/index/column/vector_column/combined_vector_column_indexer.h\"\n#include \"db/index/segment/segment.h\"\n\nnamespace zvec::sqlengine {\n\nnamespace cp = arrow::compute;\n\ntemplate <typename OptionsType>\nstruct OptionsWrapper : public cp::KernelState {\n  explicit OptionsWrapper(OptionsType options) : options(std::move(options)) {}\n\n  static arrow::Result<std::unique_ptr<KernelState>> Init(\n      cp::KernelContext * /*ctx*/, const cp::KernelInitArgs &args) {\n    if (auto options = static_cast<const OptionsType *>(args.options)) {\n      return std::make_unique<OptionsWrapper>(*options);\n    }\n\n    return arrow::Status::Invalid(\n        \"Attempted to initialize KernelState from null FunctionOptions\");\n  }\n\n  static const OptionsType &Get(const KernelState &state) {\n    return ::arrow::internal::checked_cast<const OptionsWrapper &>(state)\n        .options;\n  }\n\n  static const OptionsType &Get(cp::KernelContext *ctx) {\n    return Get(*ctx->state());\n  }\n\n  OptionsType options;\n};\n\nclass FetchVectorOp {\n public:\n  static Result<std::shared_ptr<arrow::Array>> fetch_dense_vector(\n      const Segment &segment, const std::string &vector_name,\n      const std::vector<int> &rows);\n\n  static Result<std::shared_ptr<arrow::Array>> fetch_sparse_vector(\n      const Segment &segment, const std::string &vector_name,\n      const std::vector<int> &rows);\n\n  static arrow::Status register_op();\n\n  class FunctionOptionsType : public cp::FunctionOptionsType {\n    const char *type_name() const override {\n      return \"FetchVectorFunctionOptionsType\";\n    }\n\n    std::string Stringify(const cp::FunctionOptions &) const override {\n      return \"FetchVectorFunctionOptionsType\";\n    }\n\n    bool Compare(const cp::FunctionOptions &,\n                 const cp::FunctionOptions &) const override {\n      return false;\n    }\n\n    std::unique_ptr<cp::FunctionOptions> Copy(\n        const cp::FunctionOptions &) const override;\n    // optional: support for serialization\n    // Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const\n    // override; Result<std::unique_ptr<FunctionOptions>> Deserialize(const\n    // Buffer&) const override;\n  };\n\n  static cp::FunctionOptionsType *GetFetchVectorFunctionOptionsType() {\n    static FunctionOptionsType options_type;\n    return &options_type;\n  }\n\n  class FetchVectorFunctionOptions : public cp::FunctionOptions {\n   public:\n    FetchVectorFunctionOptions()\n        : cp::FunctionOptions(GetFetchVectorFunctionOptionsType()) {}\n  };\n\n  class FetchVectorOptionsType : public arrow::compute::FunctionOptionsType {\n   public:\n    static const arrow::compute::FunctionOptionsType *GetInstance() {\n      static std::unique_ptr<arrow::compute::FunctionOptionsType> instance(\n          new FetchVectorOptionsType());\n      return instance.get();\n    }\n\n    const char *type_name() const override {\n      return \"FetchVector\";\n    }\n\n    std::string Stringify(\n        const arrow::compute::FunctionOptions & /*options*/) const override {\n      return type_name();\n    }\n\n    bool Compare(const arrow::compute::FunctionOptions &options,\n                 const arrow::compute::FunctionOptions &other) const override {\n      const auto &lop = static_cast<const Options &>(options);\n      const auto &rop = static_cast<const Options &>(other);\n      return lop.args.is_dense == rop.args.is_dense &&\n             lop.args.indexer == rop.args.indexer;\n    }\n\n    std::unique_ptr<arrow::compute::FunctionOptions> Copy(\n        const arrow::compute::FunctionOptions &options) const override {\n      const auto &opts = static_cast<const Options &>(options);\n      return std::make_unique<Options>(opts.args.indexer, opts.args.is_dense);\n    }\n  };\n\n  struct Args {\n    CombinedVectorColumnIndexer::Ptr indexer;\n    bool is_dense{true};\n  };\n\n  class Options : public cp::FunctionOptions {\n   public:\n    Options() : Options(nullptr, true) {}\n\n    Options(CombinedVectorColumnIndexer::Ptr indexer, bool is_dense)\n        : cp::FunctionOptions(FetchVectorOptionsType::GetInstance()),\n          args{indexer, is_dense} {}\n\n    static inline constexpr char const kTypeName[] =\n        \"FetchVectorFunctionOptions\";\n\n    static Options Defaults() {\n      return Options();\n    }\n\n    Args get_args() const {\n      return args;\n    }\n\n    Args args;\n  };\n\n  struct FetchVectorState : public arrow::compute::KernelState {\n    Args args;\n\n    explicit FetchVectorState(const Options *o) {\n      if (o) {\n        args = o->get_args();\n      }\n    }\n  };\n\n\n  static arrow::Status FetchVectorFunction(cp::KernelContext *ctx,\n                                           const cp::ExecSpan &batch,\n                                           cp::ExecResult *out);\n\n  static inline const cp::FunctionDoc func_doc{\n      \"fetch dense or sparse vector\",\n      \"returns fetch_vector(x)\",\n      {\"segment_row_id\"},\n      \"Options\",\n  };\n\n  static arrow::Result<std::unique_ptr<arrow::compute::KernelState>>\n  InitExprValue(arrow::compute::KernelContext *,\n                const arrow::compute::KernelInitArgs &args);\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/optimizer.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"optimizer.h\"\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/config.h>\n#include <zvec/db/type.h>\n#include \"db/sqlengine/analyzer/query_info_helper.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"db/sqlengine/planner/invert_search.h\"\n\nnamespace zvec::sqlengine {\n\n\nOptimizer::Ptr InvertCondOptimizer::CreateInvertCondOptimizer(\n    CollectionSchema *collection_schema) {\n  return std::make_shared<InvertCondOptimizer>(collection_schema);\n}\n\n// return true if invert cond should be convert to forward cond\nbool InvertCondOptimizer::ratio_rule(Segment *segment,\n                                     QueryRelNode *invert_cond) {\n  if (invert_cond == nullptr) {\n    return false;\n  }\n\n  if (invert_cond->op() == QueryNodeOp::Q_LIKE ||\n      invert_cond->op() == QueryNodeOp::Q_IN ||\n      invert_cond->op() == QueryNodeOp::Q_CONTAIN_ANY ||\n      invert_cond->op() == QueryNodeOp::Q_CONTAIN_ALL ||\n      invert_cond->op() == QueryNodeOp::Q_EQ ||\n      invert_cond->op() == QueryNodeOp::Q_NE) {\n    return false;\n  }\n\n  const QueryNode::Ptr &left = invert_cond->left();\n\n  const std::string column_name = left->text();\n  auto invert_column_reader = segment->get_scalar_indexer(column_name);\n  if (invert_column_reader == nullptr) {\n    LOG_ERROR(\"Get invert column reader failed. invert_cond [%s]\",\n              invert_cond->text().c_str());\n    return false;\n  }\n\n  CompareOp oper = InvertSearch::query_nodeop2search_op(invert_cond->op());\n  if (oper == CompareOp::NONE) {\n    LOG_ERROR(\"Optimizer get search operator failed. invert_cond [%s]\",\n              invert_cond->text().c_str());\n    return false;\n  }\n\n  std::string invert_term = invert_cond->right()->text();\n\n  float invert_to_forward_scan_ratio =\n      GlobalConfig::Instance().invert_to_forward_scan_ratio();\n\n  uint64_t total_size = 0;\n  uint64_t range_size = 0;\n  Status status = invert_column_reader->evaluate_ratio(\n      invert_term, oper, &total_size, &range_size);\n  if (!status.ok()) {\n    LOG_WARN(\"Optimizer evaluate failed. invert_cond [%s] err[%s]\",\n             invert_cond->text().c_str(), status.c_str());\n    return false;\n  }\n\n  float ratio = 0.0;\n  if (total_size > 0) {\n    ratio = (range_size * 1.0) / total_size;\n  }\n\n  if (ratio < invert_to_forward_scan_ratio) {\n    return false;\n  }\n\n  LOG_DEBUG(\n      \"Optimizer evaluate result reach threshold. \"\n      \"invert_cond [%s] total_size[%zu] range_size[%zu] ratio[%f]\",\n      invert_cond->text().c_str(), (size_t)total_size, (size_t)range_size,\n      ratio);\n\n  return true;\n}\n\n// return true if ratio rule return false, meaning invert cond no need to be\n// optimized by ratio rule and still keep as invert cond is\nbool InvertCondOptimizer::invert_rule(Segment *segment,\n                                      QueryRelNode *invert_cond) {\n  return !ratio_rule(segment, invert_cond);\n}\n\nvoid InvertCondOptimizer::convert_invert_cond_to_forward(\n    QueryInfo *query_info, QueryNode *invert_cond) {\n  if (invert_cond == nullptr) {\n    return;\n  }\n\n  if (invert_cond->type() == QueryNode::QueryNodeType::REL_EXPR) {\n    // convert invert cond to forward cond\n    QueryRelNode *query_rel_node =\n        reinterpret_cast<QueryRelNode *>(invert_cond);\n\n    const QueryNode::Ptr &left = query_rel_node->left();\n    const QueryNode::Ptr &right = query_rel_node->right();\n\n    const std::string column_name = left->text();\n\n    query_rel_node->set_forward();\n\n    // 1. add column to forward field\n    auto forward_field = collection_schema_->get_forward_field(column_name);\n    DataType data_type = forward_field->element_data_type();\n    // currently array invert field won't be converted to forward\n    // bool is_array_type = forward_field->is_array_type();\n    query_info->add_forward_filter_schema_ptr(column_name,\n                                              std::move(forward_field));\n\n    // 2. Revert numeric buf to numeric text\n    std::string numeric_text{\"\"};\n    if (QueryInfoHelper::data_buf_2_text(right->text(), data_type,\n                                         &numeric_text)) {\n      right->set_text(numeric_text);\n    }\n\n    return;\n  }\n\n  convert_invert_cond_to_forward(query_info, invert_cond->left().get());\n  convert_invert_cond_to_forward(query_info, invert_cond->right().get());\n}\n\n\nvoid InvertCondOptimizer::check_node_except_subroot(QueryNode *invert_cond,\n                                                    QueryNode *invert_subroot,\n                                                    bool *rest_has_invert) {\n  if (invert_cond == nullptr) {\n    return;\n  }\n\n  // skip subroot\n  if (invert_subroot != nullptr && invert_cond == invert_subroot) {\n    return;\n  }\n\n  if (invert_cond->type() == QueryNode::QueryNodeType::REL_EXPR) {\n    QueryRelNode *query_rel_node =\n        reinterpret_cast<QueryRelNode *>(invert_cond);\n    if (query_rel_node->rule_result()) {\n      *rest_has_invert = true;\n    }\n    return;\n  }\n\n  check_node_except_subroot(invert_cond->left().get(), invert_subroot,\n                            rest_has_invert);\n  if (*rest_has_invert) {\n    return;\n  }\n  check_node_except_subroot(invert_cond->right().get(), invert_subroot,\n                            rest_has_invert);\n}\n\nbool InvertCondOptimizer::apply_optimize_result(QueryInfo *query_info,\n                                                QueryNode *invert_subroot) {\n  // case 1. invert subroot same as invert cond, do nothing\n  if (invert_subroot == query_info->invert_cond().get()) {\n    LOG_DEBUG(\"No need to move to forward, invert conds are all eligable. \");\n    return false;\n  }\n\n  // case 2. invert subroot is not found\n  if (invert_subroot == nullptr) {\n    // That invert_subroot is nullptr may means different scenarios,\n    // 1. All invert conditions should be converted to forward condition\n    // according to optimize rule.\n    // 2. Some invert condition should be converted to forward, which result in\n    // left invert conditions are not able to be invert condition any more, eg:\n    // A or B B won't be invert cond after A converted to forward. We need only\n    // to optimize scenario 1, and leave scenario 2 untouched. Achieve the check\n    // also by check_node_except_subroot same as in case 3.\n\n    bool rest_has_invert = false;\n    check_node_except_subroot(query_info->invert_cond().get(), nullptr,\n                              &rest_has_invert);\n    if (rest_has_invert) {\n      LOG_DEBUG(\n          \"invert_subroot is not found, but failed in \"\n          \"check_node_except_subroot\");\n      return false;\n    }\n\n    QueryNode::Ptr subroot_ptr = query_info->invert_cond();\n\n    query_info->set_invert_cond(nullptr);\n\n    // convert invert cond to forward cond\n    convert_invert_cond_to_forward(query_info, subroot_ptr.get());\n\n    // move to forward cond\n    if (query_info->filter_cond() == nullptr) {\n      query_info->set_filter_cond(std::move(subroot_ptr));\n    } else {\n      QueryNode::Ptr filter_node = std::make_shared<QueryNode>();\n      filter_node->set_op(QueryNodeOp::Q_AND);\n      filter_node->set_left(query_info->filter_cond());\n      filter_node->set_right(std::move(subroot_ptr));\n      query_info->set_filter_cond(std::move(filter_node));\n    }\n\n    LOG_DEBUG(\"All invert conds moved to forward cond. forward conds [%s]\",\n              query_info->filter_cond()->text().c_str());\n\n    return true;\n  }\n\n  // case 3. subroot is found and be part of invert cond\n  LOG_DEBUG(\n      \"find invert_subroot in invert cond. \"\n      \"invert cond [%s] and invert_subroot [%s]. \",\n      query_info->invert_cond()->text().c_str(),\n      invert_subroot->text().c_str());\n\n  // If other nodes outside invert subroot still be invert cond,\n  // these nodes should not be convert to forward cond. Not to optimize.\n  bool rest_has_invert = false;\n  check_node_except_subroot(query_info->invert_cond().get(), invert_subroot,\n                            &rest_has_invert);\n  if (rest_has_invert) {\n    LOG_DEBUG(\"invert_subroot failed in check_node_except_subroot\");\n    return false;\n  }\n\n  QueryNode::Ptr invert_subroot_ptr =\n      invert_subroot->detach_from_invert_cond(query_info);\n\n  QueryNode::Ptr invert2forward = query_info->invert_cond();\n\n  // convert rest of invert cond to forward cond\n  convert_invert_cond_to_forward(query_info, invert2forward.get());\n\n  // move to forward cond\n  if (query_info->filter_cond() == nullptr) {\n    query_info->set_filter_cond(std::move(invert2forward));\n  } else {\n    QueryNode::Ptr filter_node = std::make_shared<QueryNode>();\n    filter_node->set_op(QueryNodeOp::Q_AND);\n    filter_node->set_left(query_info->filter_cond());\n    filter_node->set_right(std::move(invert2forward));\n    query_info->set_filter_cond(std::move(filter_node));\n  }\n\n  // set subroot as invert cond\n  query_info->set_invert_cond(std::move(invert_subroot_ptr));\n\n  LOG_DEBUG(\"Optimized. forward cond [%s], invert cond [%s]. \",\n            query_info->filter_cond()->text().c_str(),\n            query_info->invert_cond()->text().c_str());\n\n  return true;\n}\n\nbool InvertCondOptimizer::optimize(Segment *segment, QueryInfo *query_info) {\n  auto invert_cond = query_info->invert_cond();\n  // TODO: check if support optimize for mutable\n  if (invert_cond == nullptr) {\n    return false;\n  }\n\n  // find invert subroot after considering ratio rule,\n  // specifically, which invert cond subroot is still eligable.\n  SubRootResult invert_subroot;\n  std::function<bool(QueryRelNode * node)> rule = std::bind(\n      &InvertCondOptimizer::invert_rule, this, segment, std::placeholders::_1);\n  QueryInfoHelper::find_subroot_by_rule(invert_cond.get(), rule,\n                                        &invert_subroot);\n\n  return apply_optimize_result(query_info, invert_subroot.subroot);\n  ;\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/optimizer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/analyzer/query_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass Optimizer {\n public:\n  using Ptr = std::shared_ptr<Optimizer>;\n\n  virtual bool optimize(Segment *segment, QueryInfo *query_info) = 0;\n};\n\nclass InvertCondOptimizer : public Optimizer {\n public:\n  explicit InvertCondOptimizer(CollectionSchema *collection_schema)\n      : collection_schema_(collection_schema) {}\n\n  virtual ~InvertCondOptimizer() = default;\n\n public:\n  static Optimizer::Ptr CreateInvertCondOptimizer(\n      CollectionSchema *collection_schema);\n\n public:\n  bool optimize(Segment *segment, QueryInfo *query_info) override;\n\n protected:\n  virtual bool invert_rule(Segment *segment, QueryRelNode *invert_cond);\n\n private:\n  bool ratio_rule(Segment *segment, QueryRelNode *invert_cond);\n\n  bool apply_optimize_result(QueryInfo *query_info, QueryNode *invert_subroot);\n\n  void convert_invert_cond_to_forward(QueryInfo *query_info,\n                                      QueryNode *invert_cond);\n\n  void check_node_except_subroot(QueryNode *invert_cond,\n                                 QueryNode *invert_subroot,\n                                 bool *rest_has_invert);\n\n private:\n  CollectionSchema *collection_schema_{nullptr};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/plan_info.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"plan_info.h\"\n#include <arrow/api.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include \"db/common/error_code.h\"\n\nnamespace zvec::sqlengine {\n\nResult<std::unique_ptr<arrow::RecordBatchReader>>\nPlanInfo::execute_to_reader() {\n  auto res = arrow::acero::DeclarationToReader(plan_);\n  if (!res.ok()) {\n    return tl::make_unexpected(Status::InternalError(\n        \"execute plan_info failed: \", res.status().ToString()));\n  }\n  return res.MoveValueUnsafe();\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/plan_info.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <list>\n#include <memory>\n#include <string>\n#include <arrow/acero/api.h>\n#include <arrow/acero/exec_plan.h>\n#include <zvec/db/status.h>\n\nnamespace zvec::sqlengine {\n\nclass PlanInfo {\n public:\n  using Ptr = std::shared_ptr<PlanInfo>;\n\n  PlanInfo(arrow::acero::Declaration plan,\n           std::shared_ptr<arrow::Schema> schema)\n      : plan_(std::move(plan)), schema_(std::move(schema)) {}\n\n  Result<std::unique_ptr<arrow::RecordBatchReader>> execute_to_reader();\n\n  std::shared_ptr<arrow::Schema> schema() const {\n    return schema_;\n  }\n\n private:\n  arrow::acero::Declaration plan_;\n  std::shared_ptr<arrow::Schema> schema_;\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/query_planner.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"query_planner.h\"\n#include <memory>\n#include <utility>\n#include <vector>\n#include <arrow/acero/api.h>\n#include <arrow/compute/api.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n#include \"db/common/constants.h\"\n#include \"db/common/global_resource.h\"\n#include \"db/sqlengine/analyzer/query_info.h\"\n#include \"db/sqlengine/analyzer/query_node.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"db/sqlengine/planner/invert_recall_node.h\"\n#include \"db/sqlengine/planner/ops/check_not_filtered_op.h\"\n#include \"db/sqlengine/planner/ops/contain_op.h\"\n#include \"db/sqlengine/planner/ops/fetch_vector_op.h\"\n#include \"db/sqlengine/planner/plan_info.h\"\n#include \"db/sqlengine/planner/segment_node.h\"\n#include \"db/sqlengine/planner/vector_recall_node.h\"\n#include \"optimizer.h\"\n\nnamespace zvec::sqlengine {\n\nnamespace cp = ::arrow::compute;\nnamespace ac = ::arrow::acero;\n\nQueryPlanner::QueryPlanner(CollectionSchema *schema) : schema_(schema) {}\n\ntemplate <typename T>\nauto convert_node_to_value(const QueryNode::Ptr &node) {\n  const std::string &str = node->text();\n  T value;\n  if constexpr (std::is_same_v<T, int32_t>) {\n    ailego::StringHelper::ToInt32(str, &value);\n  } else if constexpr (std::is_same_v<T, int64_t>) {\n    ailego::StringHelper::ToInt64(str, &value);\n  } else if constexpr (std::is_same_v<T, uint32_t>) {\n    ailego::StringHelper::ToUint32(str, &value);\n  } else if constexpr (std::is_same_v<T, uint64_t>) {\n    ailego::StringHelper::ToUint64(str, &value);\n  } else if constexpr (std::is_same_v<T, float>) {\n    ailego::StringHelper::ToFloat(str, &value);\n  } else if constexpr (std::is_same_v<T, double>) {\n    ailego::StringHelper::ToDouble(str, &value);\n  } else {\n    static_assert(!std::is_same_v<T, T>, \"Unsupported type for conversion\");\n  }\n  return value;\n}\n\n\ntemplate <typename ArrowType>\narrow::Result<std::shared_ptr<arrow::Array>> to_arrow_array(\n    const std::vector<QueryNode::Ptr> &input) {\n  using CType = typename ArrowType::c_type;\n  typename arrow::TypeTraits<ArrowType>::BuilderType builder;\n  ARROW_RETURN_NOT_OK(builder.Reserve(input.size()));\n\n  for (auto &s : input) {\n    ARROW_RETURN_NOT_OK(builder.Append(convert_node_to_value<CType>(s)));\n  }\n\n  std::shared_ptr<arrow::Array> array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&array));\n  return array;\n}\n\narrow::Result<std::shared_ptr<arrow::Array>> to_arrow_string_array(\n    const std::vector<QueryNode::Ptr> &input) {\n  arrow::StringBuilder builder;\n  ARROW_RETURN_NOT_OK(builder.Reserve(input.size()));\n\n  for (auto &s : input) {\n    ARROW_RETURN_NOT_OK(builder.Append(s->text()));\n  }\n\n  std::shared_ptr<arrow::Array> array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&array));\n  return array;\n}\n\narrow::Result<std::shared_ptr<arrow::Array>> to_arrow_bool_array(\n    const std::vector<QueryNode::Ptr> &input) {\n  arrow::BooleanBuilder builder;\n  ARROW_RETURN_NOT_OK(builder.Reserve(input.size()));\n\n  for (auto &s : input) {\n    // input is normalized to \"true\" or \"false\"\n    ARROW_RETURN_NOT_OK(builder.Append(s->text() == \"true\"));\n  }\n\n  std::shared_ptr<arrow::Array> array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&array));\n  return array;\n}\n\narrow::Result<std::shared_ptr<arrow::Array>> create_array_from_list_node(\n    DataType data_type, const QueryListNode *list_node) {\n  auto const &value_expr_list = list_node->value_expr_list();\n  switch (data_type) {\n    case DataType::INT32:\n      return to_arrow_array<arrow::Int32Type>(value_expr_list);\n    case DataType::UINT32:\n      return to_arrow_array<arrow::UInt32Type>(value_expr_list);\n    case DataType::INT64:\n      return to_arrow_array<arrow::Int64Type>(value_expr_list);\n    case DataType::UINT64:\n      return to_arrow_array<arrow::UInt64Type>(value_expr_list);\n    case DataType::FLOAT:\n      return to_arrow_array<arrow::FloatType>(value_expr_list);\n    case DataType::DOUBLE:\n      return to_arrow_array<arrow::DoubleType>(value_expr_list);\n    case DataType::STRING:\n      return to_arrow_string_array(value_expr_list);\n    case DataType::BOOL:\n      return to_arrow_bool_array(value_expr_list);\n    default:\n      LOG_ERROR(\"Unsupported data type for list node. %d\", (int)data_type);\n      return arrow::Status::Invalid(\"Unsupported data type for list node.\");\n  }\n}\n\nResult<cp::Expression> QueryPlanner::create_filter_node(\n    const QueryNode *query_node) {\n  const QueryNode *left = query_node->left_node();\n  const QueryNode *right = query_node->right_node();\n\n  arrow::Expression left_exp;\n  DataType data_type;\n  if (left->op() == QueryNodeOp::Q_ID) {\n    left_exp = cp::field_ref(left->text());\n    auto field_schema = schema_->get_forward_field(left->text());\n    data_type = field_schema->data_type();\n  } else if (left->op() == QueryNodeOp::Q_FUNCTION_CALL) {\n    const QueryFuncNode *func_node = dynamic_cast<const QueryFuncNode *>(left);\n    const auto &func_name = func_node->get_func_name_node()->text();\n    const auto &arguments = func_node->arguments();\n    if (func_name == kFuncArrayLength) {\n      left_exp =\n          cp::call(\"list_value_length\", {cp::field_ref(arguments[0]->text())});\n      // assume array_length argument is uint32\n      data_type = DataType::UINT32;\n    } else {\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"unexpected function call\", func_name));\n    }\n  } else {\n    LOG_ERROR(\"Unexpected left op. expr[%s]\", query_node->text().c_str());\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"unexpected left op\", left->text()));\n  }\n\n  cp::Expression right_exp;\n  const std::string &filter_value = right->text();\n  auto op = query_node->op();\n  if (op == QueryNodeOp::Q_IS_NULL) {\n    return cp::is_null(std::move(left_exp));\n  } else if (op == QueryNodeOp::Q_IS_NOT_NULL) {\n    return cp::is_valid(std::move(left_exp));\n  }\n\n  // TODO: check invalid filter\n  if (op == QueryNodeOp::Q_IN || op == QueryNodeOp::Q_CONTAIN_ALL ||\n      op == QueryNodeOp::Q_CONTAIN_ANY) {\n    const QueryListNode *list_node = dynamic_cast<const QueryListNode *>(right);\n    auto array_res = create_array_from_list_node(\n        FieldSchema::get_element_data_type(data_type), list_node);\n    if (!array_res.ok()) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"create array failed\", array_res.status().ToString()));\n    }\n    if (op == QueryNodeOp::Q_IN) {\n      auto in_filter = cp::call(\n          \"is_in\", {std::move(left_exp)},\n          std::make_shared<cp::SetLookupOptions>(array_res.MoveValueUnsafe()));\n      if (list_node->exclude()) {\n        return cp::not_(std::move(in_filter));\n      }\n      return in_filter;\n    }\n    auto contain_filter =\n        cp::call(op == QueryNodeOp::Q_CONTAIN_ALL ? kContainAll : kContainAny,\n                 {std::move(left_exp)},\n                 std::make_shared<ContainOp::Options>(\n                     ContainOp::Args{array_res.MoveValueUnsafe(), data_type}));\n    if (list_node->exclude()) {\n      return cp::not_(std::move(contain_filter));\n    }\n    return contain_filter;\n  }\n\n  switch (data_type) {\n    case DataType::STRING: {\n      if (op == sqlengine::QueryNodeOp::Q_LIKE) {\n        return cp::call(\"match_like\", {std::move(left_exp)},\n                        cp::MatchSubstringOptions(filter_value));\n      } else {\n        right_exp = cp::literal(filter_value);\n      }\n      break;\n    }\n    case DataType::INT32: {\n      int32_t int32_value;\n      ailego::StringHelper::ToInt32(filter_value, &int32_value);\n      right_exp = cp::literal(int32_value);\n      break;\n    }\n    case DataType::UINT32: {\n      uint32_t uint32_value;\n      ailego::StringHelper::ToUint32(filter_value, &uint32_value);\n      right_exp = cp::literal(uint32_value);\n      break;\n    }\n    case DataType::INT64: {\n      int64_t int64_value;\n      ailego::StringHelper::ToInt64(filter_value, &int64_value);\n      right_exp = cp::literal(int64_value);\n      break;\n    }\n    case DataType::UINT64: {\n      uint64_t uint64_value;\n      ailego::StringHelper::ToUint64(filter_value, &uint64_value);\n      right_exp = cp::literal(uint64_value);\n      break;\n    }\n    case DataType::FLOAT: {\n      float float_value;\n      ailego::StringHelper::ToFloat(filter_value, &float_value);\n      right_exp = cp::literal(float_value);\n      break;\n    }\n    case DataType::DOUBLE: {\n      double double_value;\n      ailego::StringHelper::ToDouble(filter_value, &double_value);\n      right_exp = cp::literal(double_value);\n      break;\n    }\n    case DataType::BOOL: {\n      std::string lower_filter_value;\n      lower_filter_value.resize(filter_value.size());\n      bool bool_value;\n      std::transform(filter_value.begin(), filter_value.end(),\n                     lower_filter_value.begin(), ::tolower);\n      if (lower_filter_value == \"true\") {\n        bool_value = true;\n      } else if (lower_filter_value == \"false\") {\n        bool_value = false;\n      } else {\n        LOG_ERROR(\"Unrecognized bool value: %s\", filter_value.c_str());\n        return tl::make_unexpected(\n            Status::InvalidArgument(\"unexpected bool value\", filter_value));\n      }\n      right_exp = cp::literal(bool_value);\n      break;\n    }\n    default: {\n      LOG_ERROR(\"filter to data type is not supported.\");\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"filter to data type is not supported\", data_type));\n      break;\n    }\n  }\n\n  switch (op) {\n    case sqlengine::QueryNodeOp::Q_EQ:\n      return cp::equal(std::move(left_exp), std::move(right_exp));\n    case sqlengine::QueryNodeOp::Q_NE:\n      return cp::not_equal(std::move(left_exp), std::move(right_exp));\n    case sqlengine::QueryNodeOp::Q_GT:\n      return cp::greater(std::move(left_exp), std::move(right_exp));\n    case sqlengine::QueryNodeOp::Q_LT:\n      return cp::less(std::move(left_exp), std::move(right_exp));\n    case sqlengine::QueryNodeOp::Q_GE:\n      return cp::greater_equal(std::move(left_exp), std::move(right_exp));\n    case sqlengine::QueryNodeOp::Q_LE:\n      return cp::less_equal(std::move(left_exp), std::move(right_exp));\n      // NOTE: Q_LIKE already handled above\n\n    default:\n      return tl::make_unexpected(Status::InvalidArgument(\"unexpected op\", op));\n      break;\n  }\n  return tl::make_unexpected(Status::InvalidArgument(\"unexpected op\", op));\n}\n\nResult<cp::Expression> QueryPlanner::parse_filter(const QueryNode *query_node) {\n  if (!query_node) {\n    return cp::literal(true);\n  }\n  if (query_node->type() == QueryNode::QueryNodeType::REL_EXPR) {\n    return create_filter_node(query_node);\n  }\n  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {\n    auto left = parse_filter(query_node->left_node());\n    if (!left) {\n      return left;\n    }\n    auto right = parse_filter(query_node->right_node());\n    if (!right) {\n      return right;\n    }\n    if (query_node->op() == QueryNodeOp::Q_AND) {\n      return cp::and_(std::move(left.value()), std::move(right.value()));\n    } else if (query_node->op() == QueryNodeOp::Q_OR) {\n      return cp::or_(std::move(left.value()), std::move(right.value()));\n    }\n  }\n  return tl::make_unexpected(\n      Status::InvalidArgument(\"unexpected \", query_node->text()));\n}\n\n\nResult<PlanInfo::Ptr> QueryPlanner::make_plan(\n    const std::vector<Segment::Ptr> &segments, const std::string &trace_id,\n    std::vector<sqlengine::QueryInfo::Ptr> *query_infos) {\n  // make logic plan from query_info\n  // PlanInfo::Ptr logical_plan = make_logical_plan(query_info);\n\n  // do logic optimization here\n\n  // as we don't have logic optimization in a period of time,\n  // simply make physical plan directly from query info\n  return make_physical_plan(segments, trace_id, query_infos);\n}\n\nResult<PlanInfo::Ptr> QueryPlanner::make_physical_plan(\n    const std::vector<Segment::Ptr> &segments, const std::string & /*trace_id*/,\n    std::vector<sqlengine::QueryInfo::Ptr> *query_infos) {\n  const std::string &table_name = schema_->name();\n  if (segments.empty()) {\n    LOG_ERROR(\"Segment not found [%s]\", table_name.c_str());\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"segment not found:\", table_name));\n  }\n\n  QueryInfo *query_info = (*query_infos)[0].get();\n  LOG_DEBUG(\"Making plan for collection[%s] query_info[%s]\", table_name.c_str(),\n            query_info->to_string().c_str());\n  int topn = query_info->query_topn();\n  auto vector_cond = query_info->vector_cond_info();\n  bool has_group_by = query_info->group_by() != nullptr;\n\n  // optimize plan by instrument query info condition, eg adjust invert cond\n  Optimizer::Ptr optimizer =\n      InvertCondOptimizer::CreateInvertCondOptimizer(schema_);\n  int num_segments = segments.size();\n  std::vector<PlanInfo::Ptr> segment_plans(segments.size());\n  for (int idx = 0; idx < num_segments; ++idx) {\n    auto &segment = segments[idx];\n    auto &segment_query_info = (*query_infos)[idx];\n    bool only_invert_before_opt =\n        segment_query_info->invert_cond() != nullptr &&\n        segment_query_info->filter_cond() == nullptr;\n    if (optimizer) {\n      // Optimize by change query info if needed.\n      if (!optimizer->optimize(segment.get(), segment_query_info.get())) {\n        LOG_DEBUG(\n            \"Not optimized. collection[%s] segment[%zu] \"\n            \"segment_query_info[%s]\",\n            table_name.c_str(), (size_t)segment->id(),\n            segment_query_info->to_string().c_str());\n      } else {\n        LOG_DEBUG(\n            \"Optimized. collection[%s] segment[%zu] segment_query_info[%s]\",\n            table_name.c_str(), (size_t)segment->id(),\n            segment_query_info->to_string().c_str());\n      }\n    }\n    bool only_forward_after_opt =\n        segment_query_info->invert_cond() == nullptr &&\n        segment_query_info->filter_cond() != nullptr;\n    // if only invert cond before opt and only forward cond after opt,\n    // single stage search should be performed as large ratio of docs match\n    // with filter\n    bool single_stage_search = only_invert_before_opt && only_forward_after_opt;\n    std::unique_ptr<arrow::compute::Expression> forward_filter;\n    if (query_info->filter_cond()) {\n      auto filter = parse_filter(query_info->filter_cond().get());\n      if (!filter) {\n        LOG_ERROR(\"Parse filter failed: %s\", filter.error().c_str());\n        return tl::make_unexpected(filter.error());\n      }\n      forward_filter =\n          std::make_unique<cp::Expression>(std::move(filter.value()));\n    }\n\n    Result<PlanInfo::Ptr> seg_plan;\n    if (query_info->vector_cond_info()) {\n      seg_plan = vector_scan(segment, std::move(segment_query_info),\n                             std::move(forward_filter), single_stage_search);\n    } else if (query_info->invert_cond()) {\n      seg_plan = invert_scan(segment, std::move(segment_query_info),\n                             std::move(forward_filter));\n    } else {\n      seg_plan = forward_scan(segment, std::move(segment_query_info),\n                              std::move(forward_filter));\n    }\n    if (!seg_plan) {\n      LOG_ERROR(\"Make plan failed: %s\", seg_plan.error().c_str());\n      return seg_plan;\n    }\n    if (segments.size() == 1) {\n      return seg_plan;\n    }\n    segment_plans[idx] = std::move(seg_plan.value());\n  }\n\n  // multi segment logic\n  ailego::ThreadPool *pool = GlobalResource::Instance().query_thread_pool();\n  auto recall_node =\n      std::make_shared<SegmentNode>(std::move(segment_plans), pool);\n  auto source_node_options =\n      arrow::acero::SourceNodeOptions{recall_node->schema(), recall_node->gen(),\n                                      arrow::compute::Ordering::Implicit()};\n  ac::Declaration node{\"source\", source_node_options};\n\n  if (vector_cond) {\n    node = ac::Declaration{\"order_by\",\n                           {std::move(node)},\n                           ac::OrderByNodeOptions{cp::Ordering{{cp::SortKey{\n                               kFieldScore, vector_cond->is_reverse_sort()\n                                                ? cp::SortOrder::Descending\n                                                : cp::SortOrder::Ascending}}}}};\n  }\n\n  // group by need to collect all docs\n  if (!has_group_by) {\n    node = ac::Declaration{\n        \"fetch\", {std::move(node)}, ac::FetchNodeOptions{0, topn}};\n  }\n  return std::make_shared<PlanInfo>(std::move(node), recall_node->schema());\n}\n\nResult<PlanInfo::Ptr> QueryPlanner::forward_scan(\n    Segment::Ptr seg, QueryInfo::Ptr query_info,\n    std::unique_ptr<arrow::compute::Expression> forward_filter) {\n  auto reader = seg->scan(query_info->get_all_fetched_scalar_field_names());\n  auto schema = reader->schema();\n  ac::Declaration node{\n      \"record_batch_reader_source\",\n      ac::RecordBatchReaderSourceNodeOptions{std::move(reader)}};\n\n  auto seg_filter = seg->get_filter();\n  if (seg_filter) {\n    cp::Expression check_not_filtered =\n        cp::call(kCheckNotFiltered, {cp::field_ref(LOCAL_ROW_ID)},\n                 std::make_shared<CheckNotFilteredOp::Options>(seg_filter));\n    node =\n        ac::Declaration{\"filter\",\n                        {std::move(node)},\n                        ac::FilterNodeOptions(std::move(check_not_filtered))};\n  }\n\n  if (forward_filter) {\n    node = ac::Declaration{\"filter\",\n                           {std::move(node)},\n                           ac::FilterNodeOptions(std::move(*forward_filter))};\n  }\n\n  if (query_info->is_include_vector()) {\n    std::vector<cp::Expression> expressions;\n    std::vector<std::string> names =\n        query_info->get_all_fetched_scalar_field_names();\n    for (const auto &field_name : names) {\n      expressions.emplace_back(cp::field_ref(field_name));\n    }\n    for (const auto &vector_field : query_info->selected_vector_fields()) {\n      auto indexer = seg->get_combined_vector_indexer(vector_field.field_name);\n      if (!indexer) {\n        return tl::make_unexpected(Status::InvalidArgument(\n            \"vector indexer not found:\", vector_field.field_name));\n      }\n      if (vector_field.field_schema_ptr->is_dense_vector()) {\n        expressions.emplace_back(\n            cp::call(\"fetch_vector\", {cp::field_ref(LOCAL_ROW_ID)},\n                     std::make_shared<FetchVectorOp::Options>(indexer, true)));\n        schema = Util::append_field(*schema, vector_field.field_name,\n                                    arrow::binary());\n      } else {\n        expressions.emplace_back(\n            cp::call(\"fetch_sparse_vector\", {cp::field_ref(LOCAL_ROW_ID)},\n                     std::make_shared<FetchVectorOp::Options>(indexer, false)));\n        schema = Util::append_field(*schema, vector_field.field_name,\n                                    Util::sparse_type());\n      }\n      names.emplace_back(vector_field.field_name);\n    }\n    node = ac::Declaration{\n        \"project\",\n        {std::move(node)},\n        ac::ProjectNodeOptions{std::move(expressions), std::move(names)}};\n  }\n\n  node = ac::Declaration{\"fetch\",\n                         {std::move(node)},\n                         ac::FetchNodeOptions{0, query_info->query_topn()}};\n  return std::make_shared<PlanInfo>(std::move(node), std::move(schema));\n}\n\nResult<PlanInfo::Ptr> QueryPlanner::vector_scan(\n    Segment::Ptr seg, QueryInfo::Ptr query_info,\n    std::unique_ptr<arrow::compute::Expression> forward_filter,\n    bool single_stage_search) {\n  std::unique_ptr<ac::Declaration> forward_filter_plan;\n  // if single stage search is not enabled, first run acero plan to get\n  // forward bitmap, then filter during vector search. otherwise, filter\n  // forward during forward search.\n  if (forward_filter && !single_stage_search) {\n    ac::RecordBatchReaderSourceNodeOptions source_options{\n        seg->scan(query_info->get_forward_filter_field_names())};\n    forward_filter_plan.reset(new ac::Declaration{ac::Declaration::Sequence({\n        {\"record_batch_reader_source\", std::move(source_options)},\n        {\n            \"project\",\n            ac::ProjectNodeOptions{{std::move(*forward_filter)},\n                                   {kFieldIsValid}},\n        },\n    })});\n    forward_filter.reset();\n  }\n  auto doc_filter = std::make_shared<DocFilter>(seg, query_info,\n                                                std::move(forward_filter_plan),\n                                                std::move(forward_filter));\n\n  int topn = query_info->query_topn();\n  int batch_size = get_batch_size(*query_info, false);\n  auto recall_node = std::make_shared<VectorRecallNode>(\n      std::move(seg), std::move(query_info), std::move(doc_filter), batch_size,\n      single_stage_search);\n\n  auto source_node_options =\n      arrow::acero::SourceNodeOptions{recall_node->schema(), recall_node->gen(),\n                                      arrow::compute::Ordering::Implicit()};\n  ac::Declaration node{\"source\", source_node_options};\n  // group by need to collect all docs\n  if (!recall_node->query_info()->group_by()) {\n    node = ac::Declaration{\n        \"fetch\", {std::move(node)}, ac::FetchNodeOptions{0, topn}};\n  }\n  return std::make_shared<PlanInfo>(std::move(node), recall_node->schema());\n}\n\nResult<PlanInfo::Ptr> QueryPlanner::invert_scan(\n    Segment::Ptr seg, QueryInfo::Ptr query_info,\n    std::unique_ptr<arrow::compute::Expression> forward_filter) {\n  auto topn = query_info->query_topn();\n  int batch_size = get_batch_size(*query_info, forward_filter != nullptr);\n  auto recall_node =\n      std::make_shared<InvertRecallNode>(seg, query_info, batch_size);\n\n  auto source_node_options =\n      arrow::acero::SourceNodeOptions{recall_node->schema(), recall_node->gen(),\n                                      arrow::compute::Ordering::Implicit()};\n  ac::Declaration node{\"source\", source_node_options};\n  if (forward_filter) {\n    node = ac::Declaration{\"filter\",\n                           {std::move(node)},\n                           ac::FilterNodeOptions(std::move(*forward_filter))};\n  }\n\n  auto schema = recall_node->schema();\n  if (query_info->is_include_vector()) {\n    std::vector<cp::Expression> expressions;\n    std::vector<std::string> names =\n        query_info->get_all_fetched_scalar_field_names();\n    for (const auto &field_name : names) {\n      expressions.emplace_back(cp::field_ref(field_name));\n    }\n    for (const auto &vector_field : query_info->selected_vector_fields()) {\n      auto indexer = seg->get_combined_vector_indexer(vector_field.field_name);\n      if (!indexer) {\n        return tl::make_unexpected(Status::InvalidArgument(\n            \"vector indexer not found:\", vector_field.field_name));\n      }\n      if (vector_field.field_schema_ptr->is_dense_vector()) {\n        expressions.emplace_back(\n            cp::call(\"fetch_vector\", {cp::field_ref(LOCAL_ROW_ID)},\n                     std::make_shared<FetchVectorOp::Options>(indexer, true)));\n        schema = Util::append_field(*schema, vector_field.field_name,\n                                    arrow::binary());\n      } else {\n        expressions.emplace_back(\n            cp::call(\"fetch_sparse_vector\", {cp::field_ref(LOCAL_ROW_ID)},\n                     std::make_shared<FetchVectorOp::Options>(indexer, false)));\n        schema = Util::append_field(*schema, vector_field.field_name,\n                                    Util::sparse_type());\n      }\n      names.emplace_back(vector_field.field_name);\n    }\n    node = ac::Declaration{\n        \"project\",\n        {std::move(node)},\n        ac::ProjectNodeOptions{std::move(expressions), std::move(names)}};\n  }\n\n  node = ac::Declaration{\n      \"fetch\", {std::move(node)}, ac::FetchNodeOptions{0, topn}};\n  return std::make_shared<PlanInfo>(std::move(node), std::move(schema));\n}\n\nint QueryPlanner::get_batch_size(const QueryInfo &info, bool has_later_filter) {\n  // ref https://arrow.apache.org/docs/developers/cpp/acero.html#batch-size\n  if (!info.query_orderbys().empty() || has_later_filter) {\n    return 32 * 1024;\n  }\n  return std::min(info.query_topn(), 32U * 1024);\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/query_planner.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <arrow/acero/exec_plan.h>\n#include <arrow/compute/expression.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/db/status.h>\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/analyzer/query_info.h\"\n#include \"plan_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass QueryPlanner {\n public:\n  QueryPlanner(CollectionSchema *schema);\n\n  Result<PlanInfo::Ptr> make_plan(\n      const std::vector<Segment::Ptr> &segments, const std::string &trace_id,\n      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);\n\n\n private:\n  Result<PlanInfo::Ptr> make_physical_plan(\n      const std::vector<Segment::Ptr> &segments, const std::string &trace_id,\n      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);\n\n  Result<PlanInfo::Ptr> make_group_by_physical_plan(\n      const std::vector<Segment::Ptr> &segments, const std::string &trace_id,\n      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);\n\n private:\n  Result<cp::Expression> parse_filter(const QueryNode *node);\n\n  Result<cp::Expression> create_filter_node(const QueryNode *node);\n\n  Result<PlanInfo::Ptr> vector_scan(\n      Segment::Ptr seg, QueryInfo::Ptr query_info,\n      std::unique_ptr<arrow::compute::Expression> forward_filter,\n      bool single_stage_search);\n  Result<PlanInfo::Ptr> invert_scan(\n      Segment::Ptr seg, QueryInfo::Ptr query_info,\n      std::unique_ptr<arrow::compute::Expression> forward_filter);\n  Result<PlanInfo::Ptr> forward_scan(\n      Segment::Ptr seg, QueryInfo::Ptr query_info,\n      std::unique_ptr<arrow::compute::Expression> forward_filter);\n\n  static int get_batch_size(const QueryInfo &info, bool has_later_filter);\n\n private:\n  CollectionSchema *schema_{nullptr};\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/planner/segment_node.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/sqlengine/planner/segment_node.h\"\n#include <memory>\n#include <optional>\n#include <arrow/record_batch.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n\nnamespace zvec::sqlengine {\n\nnamespace cp = arrow::compute;\n\narrow::AsyncGenerator<std::optional<arrow::compute::ExecBatch>>\nSegmentNode::gen() {\n  return [self = shared_from_this()]()\n             -> arrow::Future<std::optional<arrow::compute::ExecBatch>> {\n    if (!self->prepared_.exchange(true)) {\n      auto status = self->prepare();\n      if (!status.ok()) {\n        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n            arrow::Status::ExecutionError(\"prepare segment node failed:\",\n                                          status.c_str()));\n      }\n    }\n    // process backward\n    std::shared_ptr<arrow::RecordBatch> batch;\n    while (!self->readers_.empty()) {\n      auto &back = self->readers_.back();\n      auto status = back->ReadNext(&batch);\n      if (!status.ok()) {\n        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n            arrow::Status::ExecutionError(\"read next batch failed:\",\n                                          status.ToString()));\n      }\n      if (batch == nullptr) {\n        LOG_DEBUG(\"batch finished: %p\", back.get());\n        self->readers_.pop_back();\n        continue;\n      }\n      LOG_INFO(\"Segment batch: %p %s\", back.get(), batch->ToString().c_str());\n      cp::ExecBatch exec_batch(*batch);\n      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n          std::move(exec_batch));\n    };\n    // 返回空的optional表示结束\n    return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n        std::nullopt);\n  };\n}\n\nStatus SegmentNode::prepare() {\n  auto group = thread_pool_->make_group();\n\n  std::vector<Result<std::unique_ptr<arrow::RecordBatchReader>>> results_;\n  results_.resize(segment_plans_.size());\n  for (size_t i = 0; i < segment_plans_.size(); i++) {\n    auto &plan = segment_plans_[i];\n    group->execute([&, i]() { results_[i] = plan->execute_to_reader(); });\n  }\n  group->wait_finish();\n  for (size_t i = 0; i < segment_plans_.size(); i++) {\n    auto &result = results_[i];\n    if (!result) {\n      return result.error();\n    }\n    readers_[i] = std::move(result.value());\n  }\n  return Status::OK();\n}\n\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/segment_node.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <atomic>\n#include <memory>\n#include <optional>\n#include <arrow/acero/api.h>\n#include <arrow/api.h>\n#include <arrow/util/async_generator.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/db/status.h>\n#include \"db/sqlengine/planner/plan_info.h\"\n\nnamespace zvec::sqlengine {\n\nclass SegmentNode : public std::enable_shared_from_this<SegmentNode> {\n public:\n  SegmentNode(std::vector<PlanInfo::Ptr> segment_plans,\n              ailego::ThreadPool *thread_pool)\n      : segment_plans_(std::move(segment_plans)),\n        thread_pool_(thread_pool),\n        readers_(segment_plans_.size()) {}\n\n  //! get schema\n  std::shared_ptr<arrow::Schema> schema() const {\n    return segment_plans_[0]->schema();\n  }\n\n  arrow::AsyncGenerator<std::optional<arrow::compute::ExecBatch>> gen();\n\n private:\n  Status prepare();\n\n private:\n  std::vector<PlanInfo::Ptr> segment_plans_;\n  ailego::ThreadPool *thread_pool_;\n\n  std::vector<std::unique_ptr<arrow::RecordBatchReader>> readers_;\n  std::atomic_bool prepared_{false};\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/vector_recall_node.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/sqlengine/planner/vector_recall_node.h\"\n#include <cstdint>\n#include <memory>\n#include <string>\n#include <arrow/array/builder_binary.h>\n#include <arrow/result.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/db/index_params.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/type.h>\n#include \"db/index/column/vector_column/vector_column_params.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"db/sqlengine/planner/ops/fetch_vector_op.h\"\n\nnamespace zvec::sqlengine {\n\nVectorRecallNode::VectorRecallNode(Segment::Ptr segment,\n                                   QueryInfo::Ptr query_info,\n                                   DocFilter::Ptr doc_filter, int batch_size,\n                                   bool single_stage_search)\n    : segment_(std::move(segment)),\n      query_info_(std::move(query_info)),\n      doc_filter_(doc_filter),\n      batch_size_(batch_size),\n      // need fetch filter fields if single stage search, otherwise only fetch\n      // selectd scalar fields, as forward filter is already performed and order\n      // by only support vector score\n      fetched_columns_(single_stage_search\n                           ? query_info_->get_all_fetched_scalar_field_names()\n                           : query_info_->get_selected_scalar_field_names()) {\n  auto table = segment_->fetch(fetched_columns_, std::vector<int>{});\n  schema_ = table->schema();\n  schema_ = Util::append_field(*schema_, kFieldScore, arrow::float32());\n  if (query_info_->is_include_vector()) {\n    for (auto &field : query_info_->selected_vector_fields()) {\n      if (field.field_schema_ptr->is_dense_vector()) {\n        schema_ =\n            Util::append_field(*schema_, field.field_name, arrow::binary());\n      } else {\n        schema_ =\n            Util::append_field(*schema_, field.field_name, Util::sparse_type());\n      }\n    }\n  }\n  if (query_info_->group_by()) {\n    schema_ = Util::append_field(*schema_, kFieldGroupId, arrow::utf8());\n  }\n}\n\narrow::AsyncGenerator<std::optional<cp::ExecBatch>> VectorRecallNode::gen() {\n  auto state_ptr = std::make_shared<State>(shared_from_this());\n  return [state_ptr = std::move(state_ptr)]() mutable\n         -> arrow::Future<std::optional<cp::ExecBatch>> {\n    auto &state = *state_ptr;\n    if (!state.iter_) {\n      auto vector_ret = state.self_->prepare();\n      if (!vector_ret) {\n        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n            arrow::Status::ExecutionError(\"prepare vector failed:\",\n                                          vector_ret.error().c_str()));\n      }\n      state.vector_result_ = vector_ret.value();\n      state.iter_ = state.vector_result_->create_iterator();\n    }\n\n    // check if there is any data\n    if (!state.iter_->valid()) {\n      // return empty optional to indicate end\n      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n          std::nullopt);\n    }\n\n    auto record_batch = state.collect_batch();\n    if (!record_batch.ok()) {\n      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n          arrow::Status::ExecutionError(\"collect batch failed:\",\n                                        record_batch.status().ToString()));\n    }\n    cp::ExecBatch exec_batch(*record_batch.ValueOrDie());\n    return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(\n        std::move(exec_batch));\n  };\n}\n\nstd::string decode_group_id_from_forward(const FieldSchema *schema,\n                                         const arrow::Array &array) {\n  if (array.IsNull(0)) {\n    return \"\";\n  }\n  switch (schema->data_type()) {\n    case DataType::INT32:\n      return std::to_string(\n          static_cast<const arrow::Int32Array &>(array).Value(0));\n    case DataType::UINT32:\n      return std::to_string(\n          static_cast<const arrow::UInt32Array &>(array).Value(0));\n    case DataType::INT64:\n      return std::to_string(\n          static_cast<const arrow::Int64Array &>(array).Value(0));\n    case DataType::UINT64:\n      return std::to_string(\n          static_cast<const arrow::UInt64Array &>(array).Value(0));\n    case DataType::STRING:\n      return static_cast<const arrow::StringArray &>(array).GetString(0);\n    case DataType::FLOAT:\n      return std::to_string(\n          static_cast<const arrow::FloatArray &>(array).Value(0));\n    case DataType::DOUBLE:\n      return std::to_string(\n          static_cast<const arrow::DoubleArray &>(array).Value(0));\n    case DataType::BOOL:\n      return static_cast<const arrow::BooleanArray &>(array).Value(0) ? \"true\"\n                                                                      : \"false\";\n    default:\n      LOG_ERROR(\"Unsupported data type: %d\", (int)schema->data_type());\n      return \"\";\n  }\n}\n\nResult<IndexResults::Ptr> VectorRecallNode::prepare() {\n  auto filter_status = doc_filter_->compute_filter();\n  if (!filter_status.ok()) {\n    return tl::make_unexpected(filter_status);\n  }\n  auto &vector_cond_ = query_info_->vector_cond_info();\n  CombinedVectorColumnIndexer::Ptr vector_indexer;\n  if (auto *vector_params = dynamic_cast<const VectorIndexParams *>(\n          vector_cond_->vector_schema()->index_params().get());\n      vector_params == nullptr ||\n      vector_params->quantize_type() == QuantizeType::UNDEFINED) {\n    vector_indexer = segment_->get_combined_vector_indexer(\n        vector_cond_->vector_field_name());\n  } else {\n    vector_indexer = segment_->get_quant_combined_vector_indexer(\n        vector_cond_->vector_field_name());\n  }\n  if (!vector_indexer) {\n    return tl::make_unexpected(Status::InvalidArgument(\n        \"vector index not found:\", vector_cond_->vector_field_name()));\n  }\n  vector_column_params::QueryParams query_params;\n  query_params.topk = query_info_->query_topn();\n  query_params.data_type = vector_cond_->vector_schema()->data_type();\n  query_params.dimension = vector_cond_->dimension();\n  query_params.query_params = vector_cond_->query_params();\n  auto brute_force_keys = doc_filter_->get_bf_by_keys_and_update();\n  if (brute_force_keys) {\n    query_params.bf_pks.emplace_back(std::move(brute_force_keys.value()));\n  }\n  // set filter after brute force check\n  query_params.filter = doc_filter_->empty() ? nullptr : doc_filter_.get();\n  if (const auto &group_by = query_info_->group_by(); group_by) {\n    auto group_fun = [this, &group_by](uint64_t row_id) -> std::string {\n      auto table = segment_->fetch({group_by->group_by_field},\n                                   std::vector<int>{(int)row_id});\n      static std::string kEmpty;\n      if (!table) {\n        LOG_ERROR(\"Fetch group by field failed: field[%s] row_id[%zu]\",\n                  group_by->group_by_field.c_str(), (size_t)row_id);\n        return kEmpty;\n      }\n      if (table->num_rows() != 1) {\n        LOG_ERROR(\n            \"Fetch group by field failed: field[%s] row_id[%zu] rows[%zu]\",\n            group_by->group_by_field.c_str(), (size_t)row_id,\n            (size_t)table->num_rows());\n        return kEmpty;\n      }\n      if (table->column(0)->chunk(0)->IsNull(0)) {\n        return kEmpty;\n      }\n      return decode_group_id_from_forward(query_info_->group_by_schema_ptr(),\n                                          *table->column(0)->chunk(0));\n    };\n    query_params.group_by =\n        std::make_unique<vector_column_params::GroupByParams>(\n            group_by->group_topk, group_by->group_count, std::move(group_fun));\n  }\n\n  vector_column_params::VectorData vector_data;\n  if (vector_cond_->vector_schema()->is_dense_vector()) {\n    vector_data.vector =\n        vector_column_params::DenseVector{vector_cond_->vector_term().data()};\n  } else {\n    vector_data.vector = vector_column_params::SparseVector{\n        vector_cond_->sparse_count(),\n        vector_cond_->vector_sparse_indices().data(),\n        vector_cond_->vector_sparse_values().data()};\n  }\n\n  auto vector_ret = vector_indexer->Search(vector_data, query_params);\n  if (!vector_ret) {\n    return tl::make_unexpected(vector_ret.error());\n  }\n  return vector_ret;\n}\n\narrow::Result<std::shared_ptr<arrow::RecordBatch>>\nVectorRecallNode::State::collect_batch() {\n  // collect a batch\n  std::vector<int> indices;\n  indices.reserve(self_->batch_size_);\n  arrow::FloatBuilder builder;\n  arrow::StringBuilder group_id_builder;\n  for (int i = 0; iter_->valid() && i < self_->batch_size_;\n       i++, iter_->next()) {\n    indices.push_back(iter_->doc_id());\n    ARROW_RETURN_NOT_OK(builder.Append(iter_->score()));\n    if (self_->query_info_->group_by()) {\n      ARROW_RETURN_NOT_OK(group_id_builder.Append(iter_->group_id()));\n    }\n  }\n  auto table = self_->segment_->fetch(self_->fetched_columns_, indices);\n  if (!table) {\n    return arrow::Status::ExecutionError(\"fetch table failed\");\n  }\n  auto batch = table->CombineChunksToBatch();\n  if (!batch.ok()) {\n    return arrow::Status::ExecutionError(\"combine chunks to batch failed:\",\n                                         batch.status().ToString());\n  }\n  auto score_array = builder.Finish();\n  if (!score_array.ok()) {\n    return arrow::Status::ExecutionError(\"finish builder failed:\",\n                                         score_array.status().ToString());\n  }\n  auto record_batch = std::move(batch.ValueUnsafe());\n  ARROW_ASSIGN_OR_RAISE(\n      record_batch,\n      record_batch->AddColumn(record_batch->num_columns(), kFieldScore,\n                              score_array.MoveValueUnsafe()));\n\n  if (self_->query_info_->is_include_vector()) {\n    for (auto &field : self_->query_info_->selected_vector_fields()) {\n      Result<std::shared_ptr<arrow::Array>> array_res;\n      if (field.field_schema_ptr->is_dense_vector()) {\n        array_res = FetchVectorOp::fetch_dense_vector(\n            *self_->segment_, field.field_name, indices);\n      } else {\n        array_res = FetchVectorOp::fetch_sparse_vector(\n            *self_->segment_, field.field_name, indices);\n      }\n      if (!array_res) {\n        return arrow::Status::ExecutionError(\"fetch vector failed:\",\n                                             array_res.error().c_str());\n      }\n      ARROW_ASSIGN_OR_RAISE(\n          record_batch,\n          record_batch->AddColumn(record_batch->num_columns(), field.field_name,\n                                  std::move(array_res.value())));\n    }\n  }\n\n  if (self_->query_info_->group_by()) {\n    auto group_id_array = group_id_builder.Finish();\n    if (!group_id_array.ok()) {\n      return arrow::Status::ExecutionError(\"finish group id builder failed:\",\n                                           group_id_array.status().ToString());\n    }\n    ARROW_ASSIGN_OR_RAISE(\n        record_batch,\n        record_batch->AddColumn(record_batch->num_columns(), kFieldGroupId,\n                                group_id_array.MoveValueUnsafe()));\n  }\n\n  LOG_DEBUG(\"Record batch: %s\", record_batch->ToString().c_str());\n  return record_batch;\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/planner/vector_recall_node.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <arrow/acero/api.h>\n#include <arrow/api.h>\n#include <zvec/db/status.h>\n#include \"db/index/column/common/index_results.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/analyzer/query_info.h\"\n#include \"db/sqlengine/planner/doc_filter.h\"\n\nnamespace zvec::sqlengine {\n\nclass VectorRecallNode : public std::enable_shared_from_this<VectorRecallNode> {\n public:\n  using Ptr = std::shared_ptr<VectorRecallNode>;\n  VectorRecallNode(Segment::Ptr segment, QueryInfo::Ptr query_info,\n                   DocFilter::Ptr doc_filter, int batch_size,\n                   bool single_stage_search);\n\n  //! get schema\n  std::shared_ptr<arrow::Schema> schema() const {\n    return schema_;\n  }\n\n  arrow::AsyncGenerator<std::optional<cp::ExecBatch>> gen();\n\n  const QueryInfo::Ptr &query_info() const {\n    return query_info_;\n  }\n\n private:\n  Result<IndexResults::Ptr> prepare();\n\n private:\n  struct State {\n    State(VectorRecallNode::Ptr self) : self_(std::move(self)) {}\n\n    arrow::Result<std::shared_ptr<arrow::RecordBatch>> collect_batch();\n\n    VectorRecallNode::Ptr self_;\n    IndexResults::Ptr vector_result_;\n    IndexResults::IteratorUPtr iter_;\n  };\n\n  Segment::Ptr segment_;\n  QueryInfo::Ptr query_info_;\n  DocFilter::Ptr doc_filter_;\n  int batch_size_;\n  const std::vector<std::string> &fetched_columns_;\n  std::shared_ptr<arrow::Schema> schema_;\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/sqlengine.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/sqlengine/sqlengine_impl.h\"\n\n\nnamespace zvec::sqlengine {\n\nSQLEngine::Ptr SQLEngine::create(zvec::Profiler::Ptr profiler) {\n  return std::make_shared<SQLEngineImpl>(std::move(profiler));\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/sqlengine.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/db/doc.h>\n#include <zvec/db/status.h>\n#include \"db/common/profiler.h\"\n#include \"db/index/segment/segment.h\"\n\nnamespace zvec::sqlengine {\n\nclass SQLEngine {\n public:\n  using Ptr = std::shared_ptr<SQLEngine>;\n  virtual ~SQLEngine();\n\n  virtual Result<DocPtrList> execute(\n      CollectionSchema::Ptr collection, const VectorQuery &query,\n      const std::vector<Segment::Ptr> &segments) = 0;\n\n  virtual Result<GroupResults> execute_group_by(\n      CollectionSchema::Ptr collection,\n      const GroupByVectorQuery &group_by_query,\n      const std::vector<Segment::Ptr> &segments) = 0;\n\n public:\n  static SQLEngine::Ptr create(zvec::Profiler::Ptr profiler);\n};\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "src/db/sqlengine/sqlengine_impl.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include \"db/sqlengine/sqlengine_impl.h\"\n#include <unordered_map>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/type.h>\n#include \"db/common/constants.h\"\n#include \"db/sqlengine/analyzer/query_analyzer.h\"\n#include \"db/sqlengine/parser/sql_info_helper.h\"\n#include \"db/sqlengine/parser/zvec_parser.h\"\n#include \"db/sqlengine/planner/op_register.h\"\n#include \"db/sqlengine/planner/query_planner.h\"\n\nnamespace zvec::sqlengine {\n\nvoid global_init() {\n  static std::once_flag once;\n  // run once\n  std::call_once(once, []() {\n    auto status = arrow::compute::Initialize();\n    if (!status.ok()) {\n      LOG_ERROR(\"arrow compute init failed: [%s]\", status.ToString().c_str());\n      abort();\n    }\n    status = OpRegister::register_ops();\n    if (!status.ok()) {\n      LOG_ERROR(\"arrow compute register op failed: [%s]\",\n                status.ToString().c_str());\n      abort();\n    }\n  });\n}\n\nSQLEngine::~SQLEngine() = default;\n\nSQLEngineImpl::SQLEngineImpl(zvec::Profiler::Ptr profiler)\n    : profiler_(std::move(profiler)) {}\n\nResult<DocPtrList> SQLEngineImpl::execute(\n    CollectionSchema::Ptr collection, const VectorQuery &query,\n    const std::vector<Segment::Ptr> &segments) {\n  if (segments.empty()) {\n    return DocPtrList{};\n  }\n\n  auto query_info = parse_request(collection, query, nullptr);\n  if (!query_info) {\n    return tl::make_unexpected(query_info.error());\n  }\n  if (query_info.value()->is_filter_unsatisfiable()) {\n    LOG_WARN(\"filter is unsatisfiable: %s\",\n             query_info.value()->to_string().c_str());\n    return {};\n  }\n  const auto &select_item_meta_ptrs =\n      query_info.value()->select_item_schema_ptrs();\n  std::vector<QueryInfo::Ptr> query_infos(segments.size(), query_info.value());\n  auto reader = search_by_query_info(collection, segments, &query_infos);\n  if (!reader) {\n    return tl::make_unexpected(\n        Status::InternalError(\"Execute plan failed: \", reader.error().c_str()));\n  }\n  return fill_result(select_item_meta_ptrs, reader.value().get());\n}\n\nVectorQuery from_group_by(const GroupByVectorQuery &gq) {\n  VectorQuery vq;\n  vq.field_name_ = gq.field_name_;\n  vq.query_vector_ = gq.query_vector_;\n  vq.query_sparse_indices_ = gq.query_sparse_indices_;\n  vq.query_sparse_values_ = gq.query_sparse_values_;\n  vq.filter_ = gq.filter_;\n  vq.include_vector_ = gq.include_vector_;\n  vq.query_params_ = gq.query_params_;\n  vq.output_fields_ = gq.output_fields_;\n  vq.topk_ = 0;\n  return vq;\n}\n\nResult<GroupResults> SQLEngineImpl::execute_group_by(\n    CollectionSchema::Ptr collection, const GroupByVectorQuery &group_by_query,\n    const std::vector<Segment::Ptr> &segments) {\n  if (segments.empty()) {\n    return GroupResults{};\n  }\n\n  VectorQuery query = from_group_by(group_by_query);\n  auto query_info = parse_request(\n      collection, query,\n      std::make_shared<GroupBy>(group_by_query.group_by_field_name_,\n                                group_by_query.group_topk_,\n                                group_by_query.group_count_));\n  if (!query_info) {\n    return tl::make_unexpected(query_info.error());\n  }\n  if (query_info.value()->is_filter_unsatisfiable()) {\n    LOG_WARN(\"filter is unsatisfiable: %s\",\n             query_info.value()->to_string().c_str());\n    return {};\n  }\n  std::vector<QueryInfo::Ptr> query_infos(segments.size(), query_info.value());\n  auto reader = search_by_query_info(collection, segments, &query_infos);\n  if (!reader) {\n    return tl::make_unexpected(\n        Status::InternalError(\"Execute plan failed: \", reader.error().c_str()));\n  }\n  return fill_group_by_result(*query_info.value(), reader.value().get());\n}\n\nResult<QueryInfo::Ptr> SQLEngineImpl::parse_sql_info(\n    const CollectionSchema &schema, const SQLInfo::Ptr &sql_info) {\n  profiler_->open_stage(\"analyze stage\");\n  QueryAnalyzer analyzer;\n  auto query_info = analyzer.analyze(schema, sql_info);\n  if (!query_info) {\n    return tl::make_unexpected(Status::InvalidArgument(\n        \"Analyze sql info failed:\", query_info.error().c_str()));\n  }\n  profiler_->close_stage();\n  LOG_DEBUG(\"query_info: [%s]\", query_info.value()->to_string().c_str());\n  return query_info.value();\n}\n\nResult<QueryInfo::Ptr> SQLEngineImpl::parse_request(\n    CollectionSchema::Ptr collection, const VectorQuery &request,\n    std::shared_ptr<GroupBy> group_by) {\n  profiler_->open_stage(\"message_to_sqlinfo\");\n  sqlengine::SQLInfo::Ptr sql_info;\n  std::string err_msg;\n  Node::Ptr filter_node;\n  if (!request.filter_.empty()) {\n    ZVecParser::Ptr parser = ZVecParser::create();\n    filter_node = parser->parse_filter(request.filter_);\n    if (filter_node == nullptr) {\n      LOG_ERROR(\"parse filter failed. reason:[%s] filter:[%s]\",\n                parser->err_msg().c_str(), request.filter_.c_str());\n      return tl::make_unexpected(\n          Status::InvalidArgument(\"Invalid filter:\", parser->err_msg()));\n    }\n  }\n  if (group_by) {\n    auto &group = *group_by;\n    if (group.group_by_field.empty() || group.group_count == 0 ||\n        group.group_topk == 0) {\n      return tl::make_unexpected(Status::InvalidArgument(\n          \"Invalid group by request: group_by\", group.group_by_field,\n          \" group_count: \", group.group_count,\n          \" group_topk: \", group.group_topk));\n    }\n  }\n\n  sqlengine::SQLInfoHelper::MessageToSQLInfo(&request, std::move(filter_node),\n                                             std::move(group_by), &sql_info,\n                                             &err_msg);\n  profiler_->close_stage();\n  if (!err_msg.empty()) {\n    LOG_ERROR(\"QueryAgent, message to sql info failed, err_msg: %s\",\n              err_msg.c_str());\n    return tl::make_unexpected(\n        Status::InvalidArgument(\"To sql info failed:\", err_msg));\n  }\n  LOG_DEBUG(\"Sql info is %s\", sql_info->to_string().c_str());\n  return parse_sql_info(*collection, std::move(sql_info));\n}\n\nResult<std::unique_ptr<arrow::RecordBatchReader>>\nSQLEngineImpl::search_by_query_info(\n    CollectionSchema::Ptr collection, const std::vector<Segment::Ptr> &segments,\n    std::vector<sqlengine::QueryInfo::Ptr> *query_infos) {\n  global_init();\n\n  profiler_->open_stage(\"plan stage\");\n  QueryPlanner planner(collection.get());\n  auto plan_info =\n      planner.make_plan(segments, profiler_->trace_id(), query_infos);\n  if (!plan_info) {\n    LOG_ERROR(\"plan query_info failed: [%s]\", plan_info.error().c_str());\n    return tl::make_unexpected(plan_info.error());\n  }\n  profiler_->close_stage();\n  // LOG_DEBUG(\"plan_info: [%s]\", plan_info->to_string().c_str());\n  return plan_info.value()->execute_to_reader();\n}\n\n#define GET_FIELD_FROM_RECORD_BATCH(res, field_name)                         \\\n  auto res = record_batch.GetColumnByName(field_name);                       \\\n  if (!res) {                                                                \\\n    return Status::InternalError(\"Get column by name failed: \", field_name); \\\n  }\n\ntemplate <typename T>\nstd::vector<T> to_vector(const char *data, size_t size) {\n  std::vector<T> vec(size);\n  memcpy(vec.data(), data, size * sizeof(T));\n  return vec;\n}\n\ntemplate <typename VectorType>\nStatus fill_doc_sparse_vector(const arrow::StructArray *typed_arr,\n                              const std::string &field_name,\n                              DocPtrList::iterator doc_it) {\n  auto *indices = (const arrow::BinaryArray *)typed_arr->field(0).get();\n  auto *values = (const arrow::BinaryArray *)typed_arr->field(1).get();\n  bool has_null = typed_arr->null_count() > 0;\n  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {\n    if (has_null && typed_arr->IsNull(i)) {\n      continue;\n    }\n    auto indice_data = indices->GetView(i);\n    auto value_data = values->GetView(i);\n    uint32_t count = indice_data.size() / sizeof(uint32_t);\n    if (count != value_data.size() / sizeof(VectorType)) {\n      return Status::InvalidArgument(\"Dimension not match:\", count, \" vs \",\n                                     value_data.size() / sizeof(VectorType));\n    }\n    (*doc_it)->set(\n        field_name,\n        std::make_pair(to_vector<uint32_t>(indice_data.data(), count),\n                       to_vector<VectorType>(value_data.data(), count)));\n  }\n  return Status::OK();\n}\n\ntemplate <typename VectorType>\nStatus fill_doc_vector(const arrow::BinaryArray *typed_arr,\n                       const std::string &field_name, int dimension,\n                       DocPtrList::iterator doc_it) {\n  bool no_null = typed_arr->null_count() == 0;\n  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {\n    if (no_null || !typed_arr->IsNull(i)) {\n      auto data = typed_arr->GetView(i);\n      if ((size_t)dimension != data.size() / sizeof(VectorType)) {\n        return Status::InvalidArgument(\"Dimension not match:\", dimension,\n                                       \" vs \",\n                                       data.size() / sizeof(VectorType));\n      }\n      (*doc_it)->set(field_name, std::vector<VectorType>(\n                                     (const VectorType *)&data[0],\n                                     (const VectorType *)&data[0] + dimension));\n    }\n  }\n  return Status::OK();\n}\n\ntemplate <typename ArrowArrayType>\nStatus fill_doc_field(const arrow::Array *arr, const std::string &field_name,\n                      DocPtrList::iterator doc_it) {\n  auto *typed_arr = static_cast<const ArrowArrayType *>(arr);\n  bool no_null = typed_arr->null_count() == 0;\n  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {\n    if (no_null || !typed_arr->IsNull(i)) {\n      if constexpr (std::is_same_v<ArrowArrayType, arrow::StringArray> ||\n                    std::is_same_v<ArrowArrayType, arrow::LargeStringArray> ||\n                    std::is_same_v<ArrowArrayType, arrow::BinaryArray> ||\n                    std::is_same_v<ArrowArrayType, arrow::LargeBinaryArray>) {\n        (*doc_it)->set(field_name, typed_arr->GetString(i));\n      } else {\n        (*doc_it)->set(field_name, typed_arr->Value(i));\n      }\n    }\n  }\n  return Status::OK();\n}\n\ntemplate <typename ArrowArrayType, typename ElementType>\nStatus fill_doc_array_field(const arrow::Array *arr,\n                            const std::string &field_name,\n                            DocPtrList::iterator doc_it) {\n  const auto *list_arr = static_cast<const arrow::ListArray *>(arr);\n  auto *typed_arr =\n      dynamic_cast<const ArrowArrayType *>(list_arr->values().get());\n  bool has_null = list_arr->null_count() > 0;\n  for (int64_t i = 0; i < list_arr->length(); ++i, ++doc_it) {\n    if (has_null && list_arr->IsNull(i)) {\n      continue;\n    }\n    int64_t offset = list_arr->value_offset(i);\n    int64_t length = list_arr->value_length(i);\n    std::vector<ElementType> vec(length);\n    for (int64_t j = 0; j < length; ++j) {\n      vec[j] = typed_arr->Value(offset + j);\n    }\n    (*doc_it)->set(field_name, std::move(vec));\n  }\n  return Status::OK();\n}\n\nStatus fill_doc_field(const std::shared_ptr<arrow::Array> &chunk,\n                      const FieldSchema &field_schema,\n                      DocPtrList::iterator doc_it) {\n  switch (field_schema.data_type()) {\n    case DataType::INT32:\n      return fill_doc_field<arrow::Int32Array>(chunk.get(), field_schema.name(),\n                                               doc_it);\n    case DataType::UINT32:\n      return fill_doc_field<arrow::UInt32Array>(chunk.get(),\n                                                field_schema.name(), doc_it);\n    case DataType::INT64:\n      return fill_doc_field<arrow::Int64Array>(chunk.get(), field_schema.name(),\n                                               doc_it);\n    case DataType::UINT64:\n      return fill_doc_field<arrow::UInt64Array>(chunk.get(),\n                                                field_schema.name(), doc_it);\n    case DataType::FLOAT:\n      return fill_doc_field<arrow::FloatArray>(chunk.get(), field_schema.name(),\n                                               doc_it);\n    case DataType::DOUBLE:\n      return fill_doc_field<arrow::DoubleArray>(chunk.get(),\n                                                field_schema.name(), doc_it);\n    case DataType::BOOL:\n      return fill_doc_field<arrow::BooleanArray>(chunk.get(),\n                                                 field_schema.name(), doc_it);\n    case DataType::BINARY:\n      return fill_doc_field<arrow::BinaryArray>(chunk.get(),\n                                                field_schema.name(), doc_it);\n\n    case DataType::STRING:\n      return fill_doc_field<arrow::StringArray>(chunk.get(),\n                                                field_schema.name(), doc_it);\n\n    case DataType::ARRAY_INT32:\n      return fill_doc_array_field<arrow::Int32Array, int32_t>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_INT64:\n      return fill_doc_array_field<arrow::Int64Array, int64_t>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_UINT32:\n      return fill_doc_array_field<arrow::UInt32Array, uint32_t>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_UINT64:\n      return fill_doc_array_field<arrow::UInt64Array, uint64_t>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_FLOAT:\n      return fill_doc_array_field<arrow::FloatArray, float>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_DOUBLE:\n      return fill_doc_array_field<arrow::DoubleArray, double>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_STRING:\n      return fill_doc_array_field<arrow::StringArray, std::string>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_BINARY:\n      return fill_doc_array_field<arrow::BinaryArray, std::string>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::ARRAY_BOOL:\n      return fill_doc_array_field<arrow::BooleanArray, bool>(\n          chunk.get(), field_schema.name(), doc_it);\n\n    case DataType::VECTOR_FP32:\n      return fill_doc_vector<float>((arrow::BinaryArray *)chunk.get(),\n                                    field_schema.name(),\n                                    field_schema.dimension(), doc_it);\n\n    case DataType::VECTOR_FP64:\n      return fill_doc_vector<double>((arrow::BinaryArray *)chunk.get(),\n                                     field_schema.name(),\n                                     field_schema.dimension(), doc_it);\n    case DataType::VECTOR_FP16:\n      return fill_doc_vector<float16_t>((arrow::BinaryArray *)chunk.get(),\n                                        field_schema.name(),\n                                        field_schema.dimension(), doc_it);\n\n    case DataType::VECTOR_INT16:\n      return fill_doc_vector<int16_t>((arrow::BinaryArray *)chunk.get(),\n                                      field_schema.name(),\n                                      field_schema.dimension(), doc_it);\n\n    case DataType::VECTOR_INT8:\n      return fill_doc_vector<int8_t>((arrow::BinaryArray *)chunk.get(),\n                                     field_schema.name(),\n                                     field_schema.dimension(), doc_it);\n\n    case DataType::VECTOR_BINARY32:\n      return fill_doc_vector<uint32_t>(\n          (arrow::BinaryArray *)chunk.get(), field_schema.name(),\n          field_schema.dimension() / sizeof(uint32_t), doc_it);\n\n    case DataType::VECTOR_BINARY64:\n      return fill_doc_vector<uint64_t>(\n          (arrow::BinaryArray *)chunk.get(), field_schema.name(),\n          field_schema.dimension() / sizeof(uint64_t), doc_it);\n\n    case DataType::SPARSE_VECTOR_FP32:\n      return fill_doc_sparse_vector<float>((arrow::StructArray *)chunk.get(),\n                                           field_schema.name(), doc_it);\n\n    case DataType::SPARSE_VECTOR_FP16:\n      return fill_doc_sparse_vector<float16_t>(\n          (arrow::StructArray *)chunk.get(), field_schema.name(), doc_it);\n\n    default:\n      return Status::InvalidArgument(\"Datatype not supported:\",\n                                     field_schema.data_type());\n  }\n  return Status::OK();\n}\n\nvoid fill_doc_id(const std::shared_ptr<arrow::Array> &doc_id_array,\n                 DocPtrList::iterator doc_it) {\n  arrow::UInt64Array *typed_arr =\n      static_cast<arrow::UInt64Array *>(doc_id_array.get());\n  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {\n    // doc_id is non-null\n    (*doc_it)->set_doc_id(typed_arr->Value(i));\n  }\n}\n\nvoid fill_doc_score(const std::shared_ptr<arrow::Array> &doc_id_array,\n                    DocPtrList::iterator doc_it) {\n  arrow::FloatArray *typed_arr =\n      static_cast<arrow::FloatArray *>(doc_id_array.get());\n  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {\n    // doc_score is non-null\n    (*doc_it)->set_score(typed_arr->Value(i));\n  }\n}\n\nvoid fill_user_id(const std::shared_ptr<arrow::Array> &user_id_array,\n                  DocPtrList::iterator doc_it) {\n  arrow::StringArray *typed_arr =\n      static_cast<arrow::StringArray *>(user_id_array.get());\n  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {\n    // user_id is non-null\n    (*doc_it)->set_pk(typed_arr->GetString(i));\n  }\n}\n\nStatus record_batch_to_doc_list(\n    const std::vector<FieldAndSchema> &output_fields,\n    const arrow::RecordBatch &record_batch, DocPtrList::iterator doc_it) {\n  GET_FIELD_FROM_RECORD_BATCH(user_id_array, USER_ID);\n  fill_user_id(user_id_array, doc_it);\n  if (auto doc_id_array = record_batch.GetColumnByName(GLOBAL_DOC_ID);\n      doc_id_array != nullptr) {\n    fill_doc_id(doc_id_array, doc_it);\n  }\n  if (auto score_array = record_batch.GetColumnByName(kFieldScore);\n      score_array != nullptr) {\n    fill_doc_score(score_array, doc_it);\n  }\n\n  for (auto &[field_name, field_schema] : output_fields) {\n    GET_FIELD_FROM_RECORD_BATCH(field_array, field_name);\n    if (auto status = fill_doc_field(field_array, *field_schema, doc_it);\n        !status.ok()) {\n      return status;\n    }\n  }\n  if (ailego::LoggerBroker::IsLevelEnabled(ailego::Logger::LEVEL_DEBUG)) {\n    for (int i = 0; i < record_batch.num_rows(); i++) {\n      LOG_DEBUG(\"Doc: %s\", (*(doc_it + i))->to_detail_string().c_str());\n    }\n  }\n  return Status::OK();\n}\n\nResult<DocPtrList> SQLEngineImpl::fill_result(\n    const std::vector<FieldAndSchema> &output_fields,\n    arrow::RecordBatchReader *reader) {\n  DocPtrList docs;\n  std::shared_ptr<RecordBatch> record_batch;\n  while (true) {\n    auto read_res = reader->ReadNext(&record_batch);\n    if (!read_res.ok()) {\n      return tl::make_unexpected(Status::InternalError(\n          \"Read record batch failed: \", read_res.ToString()));\n    }\n    if (record_batch == nullptr) {\n      break;\n    }\n    size_t cur_size = docs.size();\n    docs.resize(docs.size() + record_batch->num_rows());\n    for (int i = 0; i < record_batch->num_rows(); i++) {\n      docs[cur_size + i] = std::make_shared<Doc>();\n    }\n    auto status = record_batch_to_doc_list(output_fields, *record_batch,\n                                           docs.begin() + cur_size);\n    if (!status.ok()) {\n      return tl::make_unexpected(status);\n    }\n  }\n  return docs;\n}\n\n\nResult<GroupResults> SQLEngineImpl::fill_group_by_result(\n    const QueryInfo &query_info, arrow::RecordBatchReader *reader) {\n  const std::vector<FieldAndSchema> &output_fields =\n      query_info.select_item_schema_ptrs();\n  uint32_t group_count = query_info.group_by()->group_count;\n  uint32_t group_topk = query_info.group_by()->group_topk;\n  std::shared_ptr<RecordBatch> record_batch;\n  std::unordered_map<std::string, std::vector<Doc>> group_to_docs;\n  while (true) {\n    auto read_res = reader->ReadNext(&record_batch);\n    if (!read_res.ok()) {\n      return tl::make_unexpected(Status::InternalError(\n          \"Read record batch failed: \", read_res.ToString()));\n    }\n    if (record_batch == nullptr) {\n      break;\n    }\n    DocPtrList docs(record_batch->num_rows());\n    for (int i = 0; i < record_batch->num_rows(); i++) {\n      docs[i] = std::make_shared<Doc>();\n    }\n    auto status =\n        record_batch_to_doc_list(output_fields, *record_batch, docs.begin());\n    if (!status.ok()) {\n      return tl::make_unexpected(status);\n    }\n    auto group_id_array = record_batch->GetColumnByName(kFieldGroupId);\n    if (!group_id_array) {\n      return tl::make_unexpected(\n          Status::InternalError(\"Get group_id_array failed\"));\n    }\n    arrow::StringArray *typed_arr =\n        static_cast<arrow::StringArray *>(group_id_array.get());\n    for (int i = 0; i < record_batch->num_rows(); i++) {\n      if (!typed_arr->IsNull(i)) {\n        // docs already order by score\n        auto &group_docs = group_to_docs[typed_arr->GetString(i)];\n        if (group_docs.size() < group_count) {\n          group_docs.push_back(std::move(*docs[i]));\n        }\n      }\n    }\n  }\n  GroupResults group_results;\n  for (auto &kv : group_to_docs) {\n    group_results.emplace_back(\n        GroupResult{std::move(kv.first), std::move(kv.second)});\n  }\n  std::sort(group_results.begin(), group_results.end(),\n            [&query_info](GroupResult &a, GroupResult &b) {\n              if (query_info.vector_cond_info()->is_reverse_sort()) {\n                return a.docs_[0].score() > b.docs_[0].score();\n              }\n              return a.docs_[0].score() < b.docs_[0].score();\n            });\n  if (group_results.size() > group_topk) {\n    group_results.resize(group_topk);\n  }\n  for (auto &group_result : group_results) {\n    LOG_DEBUG(\"Group: %s\", group_result.group_by_value_.c_str());\n    for (auto &doc : group_result.docs_) {\n      LOG_DEBUG(\"\\tDoc: %s\", doc.to_detail_string().c_str());\n    }\n  }\n  return group_results;\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/db/sqlengine/sqlengine_impl.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <memory.h>\n#include <memory>\n#include <vector>\n#include <arrow/api.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/schema.h>\n#include \"analyzer/query_info.h\"\n#include \"common/group_by.h\"\n#include \"db/sqlengine/common/util.h\"\n#include \"db/sqlengine/parser/sql_info.h\"\n#include \"db/sqlengine/sqlengine.h\"\n\nnamespace zvec::sqlengine {\n\nclass SQLEngineImpl : public SQLEngine {\n public:\n  SQLEngineImpl(zvec::Profiler::Ptr profiler);\n\n  //! Parse pb request\n  Result<QueryInfo::Ptr> parse_request(CollectionSchema::Ptr collection,\n                                       const VectorQuery &request,\n                                       std::shared_ptr<GroupBy> group_by);\n\n  //! Perform search with given query_info, segments and index filter\n  Result<std::unique_ptr<arrow::RecordBatchReader>> search_by_query_info(\n      CollectionSchema::Ptr collection,\n      const std::vector<Segment::Ptr> &segments,\n      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);\n\n  Result<DocPtrList> execute(\n      CollectionSchema::Ptr collection, const VectorQuery &query,\n      const std::vector<Segment::Ptr> &segments) override;\n\n  Result<GroupResults> execute_group_by(\n      CollectionSchema::Ptr collection,\n      const GroupByVectorQuery &group_by_query,\n      const std::vector<Segment::Ptr> &segments) override;\n\n  const std::string &execution_time_info() {\n    return execution_time_info_;\n  }\n\n private:\n  Result<DocPtrList> fill_result(\n      const std::vector<FieldAndSchema> &output_fields,\n      arrow::RecordBatchReader *reader);\n\n  Result<QueryInfo::Ptr> parse_sql_info(const CollectionSchema &schema,\n                                        const SQLInfo::Ptr &sql_info);\n\n  Result<GroupResults> fill_group_by_result(const QueryInfo &query_info,\n                                            arrow::RecordBatchReader *reader);\n\n private:\n  zvec::Profiler::Ptr profiler_;\n  std::string execution_time_info_{};\n};\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "src/include/zvec/ailego/buffer/buffer_manager.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <sys/stat.h>\n#include <chrono>\n#include <cstdint>\n#include <filesystem>\n#include <memory>\n#include <vector>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/pattern/singleton.h>\n\nnamespace arrow {\nclass ChunkedArray;\nclass Array;\nclass DataType;\nclass Scalar;\ntemplate <typename T>\nclass Result;\nclass Status;\nclass Buffer;\n}  // namespace arrow\n\nnamespace zvec {\n\n\nnamespace ailego {\n\n\nstruct BufferID;\nclass BufferManager;\nclass BufferHandle;\n\n\nstruct BufferID {\n  struct ParquetPos {\n    int column;\n    int row_group;\n  };\n  struct VectorPos {\n    uint32_t offset;\n    uint32_t length;\n  };\n  union Position {\n    explicit Position() = default;\n    ParquetPos forward;\n    VectorPos vector;\n  };\n  enum TYPE {\n    PARQUET = 1,\n    VECTOR = 2,\n    UNKNOWN = 0,\n  };\n\n\n  static std::uint64_t getLastModifiedNs(const std::filesystem::path &p) {\n    auto ftime = std::filesystem::last_write_time(p);\n    return static_cast<std::uint64_t>(ftime.time_since_epoch().count());\n  }\n\n  // Cross-platform helper to get nanosecond modification time\n  //   static long get_st_mtime_nsec(const struct stat &file_stat) {\n  // #ifdef __APPLE__\n  //     return file_stat.st_mtim.tv_nsec;\n  // #else\n  //     return file_stat.st_mtim.tv_nsec;\n  // #endif\n  //   }\n\n  static BufferID ParquetID(const std::string &file_name, int column,\n                            int row_group) {\n    BufferID buffer_id{};\n    buffer_id.type = TYPE::PARQUET;\n    buffer_id.file_name = file_name;\n    buffer_id.pos.forward.column = column;\n    buffer_id.pos.forward.row_group = row_group;\n    struct stat file_stat;\n    if (stat(file_name.c_str(), &file_stat) == 0) {\n      // file_stat.st_ino contains the inode number\n      // file_stat.st_dev contains the device ID\n      // Together they uniquely identify a file\n      buffer_id.file_id = file_stat.st_ino;\n      std::filesystem::path p(file_name);\n      buffer_id.mtime = getLastModifiedNs(p);\n    }\n    return buffer_id;\n  }\n\n  static BufferID VectorID(const std::string &file_name, uint32_t offset,\n                           uint32_t length) {\n    BufferID buffer_id{};\n    buffer_id.type = TYPE::VECTOR;\n    buffer_id.file_name = file_name;\n    struct stat file_stat;\n    if (stat(file_name.c_str(), &file_stat) == 0) {\n      buffer_id.file_id = file_stat.st_ino;\n      std::filesystem::path p(file_name);\n      buffer_id.mtime = getLastModifiedNs(p);\n    }\n    buffer_id.pos.vector.offset = offset;\n    buffer_id.pos.vector.length = length;\n    return buffer_id;\n  }\n\n  explicit BufferID() = default;\n\n  // Type of the file backing this buffer\n  TYPE type{UNKNOWN};\n\n  // Name of the file backing this buffer\n  std::string file_name{};\n\n  // Unique file id\n  uint64_t file_id{};\n\n  long mtime{};\n\n  // To identify which part of the backing file should be loaded into the buffer\n  Position pos{};\n\n\n  // Get the forward ID\n  const inline struct ParquetPos &parquet() const {\n    return pos.forward;\n  }\n\n\n  // Get the vector ID\n  const inline struct VectorPos &vector() const {\n    return pos.vector;\n  }\n\n\n  // Get debug string\n  const std::string to_string() const {\n    std::string msg{\"Buffer[\"};\n    if (type == TYPE::PARQUET) {\n      msg += \"parquet: \" + file_name + \"[\" + std::to_string(file_id) + \"]\" +\n             \", column: \" + std::to_string(parquet().column) +\n             \", row_group: \" + std::to_string(parquet().row_group);\n    } else if (type == TYPE::VECTOR) {\n      msg += \"vector: \" + file_name + \"[\" + std::to_string(file_id) + \"]\" +\n             \", offset: \" + std::to_string(vector().offset);\n    } else {\n      msg += \"unknown\";\n    }\n    msg += \", mtime: \" + std::to_string(mtime);\n    msg += \"]\";\n    return msg;\n  }\n};\n\n\n// Thread-safe LRU buffer implementation.\nclass BufferManager : public Singleton<BufferManager> {\n  friend BufferHandle;\n\n public:\n  void init(uint64_t limit, uint32_t num_shards = 1);\n\n  BufferHandle acquire(BufferID &buffer_id);\n\n  std::unique_ptr<BufferHandle> acquire_ptr(BufferID &buffer_id);\n\n  uint64_t total_size_in_bytes() const;\n\n  ~BufferManager();\n\n private:\n  struct BufferContext;\n\n  class BufferPool;\n\n  // Custom deleter for Arrow buffer that automatically notifies us when the\n  // buffer is no longer referenced by Arrow\n  struct ArrowBufferDeleter {\n    explicit ArrowBufferDeleter(BufferContext *c);\n    BufferContext *context;\n    // Only reduces the reference count but does not actually release the\n    // buffer, since the buffer memory is managed by the BufferManager.\n    void operator()(arrow::Buffer *);\n  };\n\n  std::vector<BufferPool *> pools_;\n};\n\n\nclass BufferHandle {\n public:\n  typedef std::unique_ptr<BufferHandle> Pointer;\n\n  explicit BufferHandle(BufferManager::BufferContext *context = nullptr);\n  BufferHandle(const BufferHandle &) = delete;\n  BufferHandle(BufferHandle &&) = default;\n  BufferHandle &operator=(const BufferHandle &) = delete;\n  BufferHandle &operator=(BufferHandle &&) = default;\n\n\n  ~BufferHandle();\n\n\n  // Pin parquet data in memory by allocating arrow buffers of appropriate size\n  // and reading data from the backing file.\n  // The lifecycle of the allocated memory is automatically managed through\n  // shared pointers. The buffers are guaranteed to be held until they are not\n  // referenced.\n  // Returns a pointer to the loaded ChunkedArray in Arrow format.\n  std::shared_ptr<arrow::ChunkedArray> pin_parquet_data();\n\n\n  // Pin vector data in memory by allocating a buffer of appropriate size and\n  // loading data from the backing file.\n  // The memory is guaranteed to be held until unpin() is called. The caller\n  // must call unpin() to release the memory when it is no longer needed.\n  // Returns a raw memory address.\n  void *pin_vector_data();\n\n\n  // Reduce the reference count for this vector buffer.\n  // Returns true if this was the last reference.\n  // When reference count is zero, the buffer is moved to the eviction list and\n  // becomes eligible for removal under memory pressure.\n  bool unpin_vector_data();\n\n\n  // Get the current reference count.\n  uint32_t references() const;\n\n\n  // Get the buffer size.\n  uint32_t size() const;\n\n\n private:\n  using BufferContext = BufferManager::BufferContext;\n  using BufferPool = BufferManager::BufferPool;\n\n  BufferContext *context_;\n  BufferPool *pool_;\n};\n\n\n}  // namespace ailego\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/buffer/buffer_pool.h",
    "content": "#pragma once\n\n#include <sys/stat.h>\n#include <fcntl.h>\n#include <unistd.h>\n#include <atomic>\n#include <cassert>\n#include <cstdio>\n#include <cstdlib>\n#include <cstring>\n#include <iostream>\n#include <limits>\n#include <map>\n#include <memory>\n#include <mutex>\n#include <queue>\n#include <stdexcept>\n#include <string>\n#include <unordered_map>\n#include <zvec/ailego/internal/platform.h>\n#include \"concurrentqueue.h\"\n\nnamespace zvec {\nnamespace ailego {\n\nusing block_id_t = size_t;\nusing version_t = size_t;\n\nclass LPMap;\n\nclass LRUCache {\n public:\n  typedef std::pair<block_id_t, version_t> BlockType;\n  typedef moodycamel::ConcurrentQueue<BlockType> ConcurrentQueue;\n\n  int init(size_t block_size);\n\n  bool evict_single_block(BlockType &item);\n\n  bool add_single_block(const LPMap *lp_map, const BlockType &block,\n                        int block_type);\n\n  void clear_dead_node(const LPMap *lp_map);\n\n private:\n  constexpr static size_t CATCH_QUEUE_NUM = 3;\n  size_t block_size_{0};\n  std::vector<ConcurrentQueue> queues_;\n  alignas(64) std::atomic<size_t> evict_queue_insertions_{0};\n};\n\nclass LPMap {\n  struct Entry {\n    alignas(64) std::atomic<int> ref_count;\n    alignas(64) std::atomic<version_t> load_count;\n    char *buffer;\n  };\n\n public:\n  LPMap() : entry_num_(0), entries_(nullptr) {}\n  ~LPMap() {\n    delete[] entries_;\n  }\n\n  void init(size_t entry_num);\n\n  char *acquire_block(block_id_t block_id, bool lru_mode);\n\n  void release_block(block_id_t block_id);\n\n  char *evict_block(block_id_t block_id);\n\n  char *set_block_acquired(block_id_t block_id, char *buffer);\n\n  void recycle(moodycamel::ConcurrentQueue<char *> &free_buffers);\n\n  size_t entry_num() const {\n    return entry_num_;\n  }\n\n  inline bool isDeadBlock(LRUCache::BlockType block) const {\n    Entry &entry = entries_[block.first];\n    return block.second != entry.load_count.load();\n  }\n\n private:\n  size_t entry_num_{0};\n  Entry *entries_{nullptr};\n  LRUCache cache_;\n};\n\nclass VecBufferPoolHandle;\n\nclass VecBufferPool {\n public:\n  typedef std::shared_ptr<VecBufferPool> Pointer;\n\n  VecBufferPool(const std::string &filename);\n  ~VecBufferPool() {\n    // Free all buffers in the free list\n    char *buf = nullptr;\n    while (free_buffers_.try_dequeue(buf)) {\n      ailego_free(buf);\n    }\n    // Free any buffers still pinned in the map\n    for (size_t i = 0; i < lp_map_.entry_num(); ++i) {\n      char *b = lp_map_.evict_block(i);\n      if (b) ailego_free(b);\n    }\n    close(fd_);\n  }\n\n  int init(size_t pool_capacity, size_t block_size, size_t segment_count);\n\n  VecBufferPoolHandle get_handle();\n\n  char *acquire_buffer(block_id_t block_id, size_t offset, size_t size,\n                       int retry = 0);\n\n  int get_meta(size_t offset, size_t length, char *buffer);\n\n  size_t file_size() const {\n    return file_size_;\n  }\n\n  bool no_lru_mode() {\n    return no_lru_mode_;\n  }\n\n private:\n  int fd_;\n  size_t file_size_;\n  size_t pool_capacity_;\n  bool no_lru_mode_;\n\n public:\n  LPMap lp_map_;\n\n private:\n  std::vector<std::unique_ptr<std::mutex>> mutex_vec_;\n  moodycamel::ConcurrentQueue<char *> free_buffers_;\n};\n\nclass VecBufferPoolHandle {\n public:\n  VecBufferPoolHandle(VecBufferPool &pool) : pool_(pool) {}\n  VecBufferPoolHandle(VecBufferPoolHandle &&other) : pool_(other.pool_) {}\n\n  ~VecBufferPoolHandle() = default;\n\n  typedef std::shared_ptr<VecBufferPoolHandle> Pointer;\n\n  char *get_block(size_t offset, size_t size, size_t block_id);\n\n  int get_meta(size_t offset, size_t length, char *buffer);\n\n  void release_one(block_id_t block_id);\n\n  void acquire_one(block_id_t block_id);\n\n private:\n  VecBufferPool &pool_;\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/ailego/buffer/concurrentqueue.h",
    "content": "// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free\n// queue. An overview, including benchmark results, is provided here:\n//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++\n// The full design is also described in excruciating detail at:\n//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue\n\n// Simplified BSD license:\n// Copyright (c) 2013-2020, Cameron Desrochers.\n// All rights reserved.\n//\n// Redistribution and use in source and binary forms, with or without\n// modification, are permitted provided that the following conditions are met:\n//\n// - Redistributions of source code must retain the above copyright notice, this\n// list of conditions and the following disclaimer.\n// - Redistributions in binary form must reproduce the above copyright notice,\n// this list of conditions and the following disclaimer in the documentation\n// and/or other materials provided with the distribution.\n//\n// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE\n// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n// POSSIBILITY OF SUCH DAMAGE.\n\n// Also dual-licensed under the Boost Software License (see LICENSE.md)\n\n#pragma once\n\n#if defined(__GNUC__) && !defined(__INTEL_COMPILER)\n// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and\n// Traits::index_t are set to < 32 bits, causing integer promotion, causing\n// warnings upon assigning any computed values)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wconversion\"\n\n#ifdef MCDBGQ_USE_RELACY\n#pragma GCC diagnostic ignored \"-Wint-to-pointer-cast\"\n#endif\n#endif\n\n#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)\n// VS2019 with /W4 warns about constant conditional expressions but unless\n// /std=c++17 or higher does not support `if constexpr`, so we have no choice\n// but to simply disable the warning\n#pragma warning(push)\n#pragma warning(disable : 4127)  // conditional expression is constant\n#endif\n\n#if defined(__APPLE__)\n#include \"TargetConditionals.h\"\n#endif\n\n#ifdef MCDBGQ_USE_RELACY\n#include \"relacy/relacy_std.hpp\"\n#include \"relacy_shims.h\"\n// We only use malloc/free anyway, and the delete macro messes up `= delete`\n// method declarations. We'll override the default trait malloc ourselves\n// without a macro.\n#undef new\n#undef delete\n#undef malloc\n#undef free\n#else\n#include <atomic>  // Requires C++11. Sorry VS2010.\n#include <cassert>\n#endif\n#include <algorithm>\n#include <array>\n#include <climits>  // for CHAR_BIT\n#include <cstddef>  // for max_align_t\n#include <cstdint>\n#include <cstdlib>\n#include <limits>\n#include <mutex>  // used for thread exit synchronization\n#include <thread>  // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading\n#include <type_traits>\n#include <utility>\n\n// Platform-specific definitions of a numeric thread ID type and an invalid\n// value\nnamespace moodycamel {\nnamespace details {\ntemplate <typename thread_id_t>\nstruct thread_id_converter {\n  typedef thread_id_t thread_id_numeric_size_t;\n  typedef thread_id_t thread_id_hash_t;\n  static thread_id_hash_t prehash(thread_id_t const &x) {\n    return x;\n  }\n};\n}  // namespace details\n}  // namespace moodycamel\n#if defined(MCDBGQ_USE_RELACY)\nnamespace moodycamel {\nnamespace details {\ntypedef std::uint32_t thread_id_t;\nstatic const thread_id_t invalid_thread_id = 0xFFFFFFFFU;\nstatic const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;\nstatic inline thread_id_t thread_id() {\n  return rl::thread_index();\n}\n}  // namespace details\n}  // namespace moodycamel\n#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)\n// No sense pulling in windows.h in a header, we'll manually declare the\n// function we use and rely on backwards-compatibility for this not to break\nextern \"C\"\n    __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);\nnamespace moodycamel {\nnamespace details {\nstatic_assert(sizeof(unsigned long) == sizeof(std::uint32_t),\n              \"Expected size of unsigned long to be 32 bits on Windows\");\ntypedef std::uint32_t thread_id_t;\nstatic const thread_id_t invalid_thread_id =\n    0;  // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx\nstatic const thread_id_t invalid_thread_id2 =\n    0xFFFFFFFFU;  // Not technically guaranteed to be invalid, but is never used\n                  // in practice. Note that all Win32 thread IDs are presently\n                  // multiples of 4.\nstatic inline thread_id_t thread_id() {\n  return static_cast<thread_id_t>(::GetCurrentThreadId());\n}\n}  // namespace details\n}  // namespace moodycamel\n#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \\\n    (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) ||  \\\n    defined(MOODYCAMEL_NO_THREAD_LOCAL)\nnamespace moodycamel {\nnamespace details {\nstatic_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8,\n              \"std::thread::id is expected to be either 4 or 8 bytes\");\n\ntypedef std::thread::id thread_id_t;\nstatic const thread_id_t invalid_thread_id;  // Default ctor creates invalid ID\n\n// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have\n// one; it's only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined\n// anyway, which it won't be.\nstatic inline thread_id_t thread_id() {\n  return std::this_thread::get_id();\n}\n\ntemplate <std::size_t>\nstruct thread_id_size {};\ntemplate <>\nstruct thread_id_size<4> {\n  typedef std::uint32_t numeric_t;\n};\ntemplate <>\nstruct thread_id_size<8> {\n  typedef std::uint64_t numeric_t;\n};\n\ntemplate <>\nstruct thread_id_converter<thread_id_t> {\n  typedef thread_id_size<sizeof(thread_id_t)>::numeric_t\n      thread_id_numeric_size_t;\n#ifndef __APPLE__\n  typedef std::size_t thread_id_hash_t;\n#else\n  typedef thread_id_numeric_size_t thread_id_hash_t;\n#endif\n\n  static thread_id_hash_t prehash(thread_id_t const &x) {\n#ifndef __APPLE__\n    return std::hash<std::thread::id>()(x);\n#else\n    return *reinterpret_cast<thread_id_hash_t const *>(&x);\n#endif\n  }\n};\n}\n}\n#else\n// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475\n// In order to get a numeric thread ID in a platform-independent way, we use a\n// thread-local static variable's address as a thread identifier :-)\n#if defined(__GNUC__) || defined(__INTEL_COMPILER)\n#define MOODYCAMEL_THREADLOCAL __thread\n#elif defined(_MSC_VER)\n#define MOODYCAMEL_THREADLOCAL __declspec(thread)\n#else\n// Assume C++11 compliant compiler\n#define MOODYCAMEL_THREADLOCAL thread_local\n#endif\nnamespace moodycamel {\nnamespace details {\ntypedef std::uintptr_t thread_id_t;\nstatic const thread_id_t invalid_thread_id = 0;  // Address can't be nullptr\nstatic const thread_id_t invalid_thread_id2 =\n    1;  // Member accesses off a null pointer are also generally invalid. Plus\n        // it's not aligned.\ninline thread_id_t thread_id() {\n  static MOODYCAMEL_THREADLOCAL int x;\n  return reinterpret_cast<thread_id_t>(&x);\n}\n}\n}\n#endif\n\n// Constexpr if\n#ifndef MOODYCAMEL_CONSTEXPR_IF\n#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || \\\n    __cplusplus > 201402L\n#define MOODYCAMEL_CONSTEXPR_IF if constexpr\n#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]\n#else\n#define MOODYCAMEL_CONSTEXPR_IF if\n#define MOODYCAMEL_MAYBE_UNUSED\n#endif\n#endif\n\n// Exceptions\n#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED\n#if (defined(_MSC_VER) && defined(_CPPUNWIND)) ||   \\\n    (defined(__GNUC__) && defined(__EXCEPTIONS)) || \\\n    (!defined(_MSC_VER) && !defined(__GNUC__))\n#define MOODYCAMEL_EXCEPTIONS_ENABLED\n#endif\n#endif\n#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED\n#define MOODYCAMEL_TRY try\n#define MOODYCAMEL_CATCH(...) catch (__VA_ARGS__)\n#define MOODYCAMEL_RETHROW throw\n#define MOODYCAMEL_THROW(expr) throw(expr)\n#else\n#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF(true)\n#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF(false)\n#define MOODYCAMEL_RETHROW\n#define MOODYCAMEL_THROW(expr)\n#endif\n\n#ifndef MOODYCAMEL_NOEXCEPT\n#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)\n#define MOODYCAMEL_NOEXCEPT\n#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true\n#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true\n#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800\n// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when\n// it shouldn't :-( We have to assume *all* non-trivial constructors may throw\n// on VS2012!\n#define MOODYCAMEL_NOEXCEPT _NOEXCEPT\n#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)    \\\n  (std::is_rvalue_reference<valueType>::value &&           \\\n           std::is_move_constructible<type>::value         \\\n       ? std::is_trivially_move_constructible<type>::value \\\n       : std::is_trivially_copy_constructible<type>::value)\n#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)      \\\n  ((std::is_rvalue_reference<valueType>::value &&              \\\n            std::is_move_assignable<type>::value               \\\n        ? std::is_trivially_move_assignable<type>::value ||    \\\n              std::is_nothrow_move_assignable<type>::value     \\\n        : std::is_trivially_copy_assignable<type>::value ||    \\\n              std::is_nothrow_copy_assignable<type>::value) && \\\n   MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))\n#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900\n#define MOODYCAMEL_NOEXCEPT _NOEXCEPT\n#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)       \\\n  (std::is_rvalue_reference<valueType>::value &&              \\\n           std::is_move_constructible<type>::value            \\\n       ? std::is_trivially_move_constructible<type>::value || \\\n             std::is_nothrow_move_constructible<type>::value  \\\n       : std::is_trivially_copy_constructible<type>::value || \\\n             std::is_nothrow_copy_constructible<type>::value)\n#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)      \\\n  ((std::is_rvalue_reference<valueType>::value &&              \\\n            std::is_move_assignable<type>::value               \\\n        ? std::is_trivially_move_assignable<type>::value ||    \\\n              std::is_nothrow_move_assignable<type>::value     \\\n        : std::is_trivially_copy_assignable<type>::value ||    \\\n              std::is_nothrow_copy_assignable<type>::value) && \\\n   MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))\n#else\n#define MOODYCAMEL_NOEXCEPT noexcept\n#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)\n#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)\n#endif\n#endif\n\n#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n#ifdef MCDBGQ_USE_RELACY\n#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n#else\n// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a\n// crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 g++ <=4.7 doesn't\n// support thread_local either. Finally, iOS/ARM doesn't have support for it\n// either, and g++/ARM allows it to compile but it's unconfirmed to actually\n// work\n#if (!defined(_MSC_VER) || _MSC_VER >= 1900) &&                        \\\n    (!defined(__MINGW32__) && !defined(__MINGW64__) ||                 \\\n     !defined(__WINPTHREADS_VERSION)) &&                               \\\n    (!defined(__GNUC__) || __GNUC__ > 4 ||                             \\\n     (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) &&                        \\\n    (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && \\\n    !defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__)\n// Assume `thread_local` is fully supported in all other C++11\n// compilers/platforms\n#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED  // tentatively enabled for now;\n                                                 // years ago several users\n                                                 // report having problems with\n                                                 // it on\n#endif\n#endif\n#endif\n\n// VS2012 doesn't support deleted functions.\n// In this case, we declare the function normally but don't define it. A link\n// error will be generated if the function is called.\n#ifndef MOODYCAMEL_DELETE_FUNCTION\n#if defined(_MSC_VER) && _MSC_VER < 1800\n#define MOODYCAMEL_DELETE_FUNCTION\n#else\n#define MOODYCAMEL_DELETE_FUNCTION = delete\n#endif\n#endif\n\nnamespace moodycamel {\nnamespace details {\n#ifndef MOODYCAMEL_ALIGNAS\n// VS2013 doesn't support alignas or alignof, and align() requires a constant\n// literal\n#if defined(_MSC_VER) && _MSC_VER <= 1800\n#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))\n#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)\n#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \\\n  typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type\ntemplate <int Align, typename T>\nstruct Vs2013Aligned {};  // default, unsupported alignment\ntemplate <typename T>\nstruct Vs2013Aligned<1, T> {\n  typedef __declspec(align(1)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<2, T> {\n  typedef __declspec(align(2)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<4, T> {\n  typedef __declspec(align(4)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<8, T> {\n  typedef __declspec(align(8)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<16, T> {\n  typedef __declspec(align(16)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<32, T> {\n  typedef __declspec(align(32)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<64, T> {\n  typedef __declspec(align(64)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<128, T> {\n  typedef __declspec(align(128)) T type;\n};\ntemplate <typename T>\nstruct Vs2013Aligned<256, T> {\n  typedef __declspec(align(256)) T type;\n};\n#else\ntemplate <typename T>\nstruct identity {\n  typedef T type;\n};\n#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)\n#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)\n#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \\\n  alignas(alignof(obj)) typename details::identity<T>::type\n#endif\n#endif\n}  // namespace details\n}  // namespace moodycamel\n\n\n// TSAN can false report races in lock-free code.  To enable TSAN to be used\n// from projects that use this one, we can apply per-function compile-time\n// suppression. See\n// https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer\n#define MOODYCAMEL_NO_TSAN\n#if defined(__has_feature)\n#if __has_feature(thread_sanitizer)\n#undef MOODYCAMEL_NO_TSAN\n#define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize(\"thread\")))\n#endif  // TSAN\n#endif  // TSAN\n\n// Compiler-specific likely/unlikely hints\nnamespace moodycamel {\nnamespace details {\n#if defined(__GNUC__)\nstatic inline bool(likely)(bool x) {\n  return __builtin_expect((x), true);\n}\nstatic inline bool(unlikely)(bool x) {\n  return __builtin_expect((x), false);\n}\n#else\nstatic inline bool(likely)(bool x) {\n  return x;\n}\nstatic inline bool(unlikely)(bool x) {\n  return x;\n}\n#endif\n}  // namespace details\n}  // namespace moodycamel\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n#include \"internal/concurrentqueue_internal_debug.h\"\n#endif\n\nnamespace moodycamel {\nnamespace details {\ntemplate <typename T>\nstruct const_numeric_max {\n  static_assert(std::is_integral<T>::value,\n                \"const_numeric_max can only be used with integers\");\n  static const T value =\n      std::numeric_limits<T>::is_signed\n          ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) -\n                static_cast<T>(1)\n          : static_cast<T>(-1);\n};\n\n#if defined(__GLIBCXX__)\ntypedef ::max_align_t\n    std_max_align_t;  // libstdc++ forgot to add it to std:: for a while\n#else\ntypedef std::max_align_t std_max_align_t;  // Others (e.g. MSVC) insist it can\n                                           // *only* be accessed via std::\n#endif\n\n// Some platforms have incorrectly set max_align_t to a type with <8 bytes\n// alignment even while supporting 8-byte aligned scalar values (*cough* 32-bit\n// iOS). Work around this with our own union. See issue #64.\ntypedef union {\n  std_max_align_t x;\n  long long y;\n  void *z;\n} max_align_t;\n}  // namespace details\n\n// Default traits for the ConcurrentQueue. To change some of the\n// traits without re-implementing all of them, inherit from this\n// struct and shadow the declarations you wish to be different;\n// since the traits are used as a template type parameter, the\n// shadowed declarations will be used where defined, and the defaults\n// otherwise.\nstruct ConcurrentQueueDefaultTraits {\n  // General-purpose size type. std::size_t is strongly recommended.\n  typedef std::size_t size_t;\n\n  // The type used for the enqueue and dequeue indices. Must be at least as\n  // large as size_t. Should be significantly larger than the number of elements\n  // you expect to hold at once, especially if you have a high turnover rate;\n  // for example, on 32-bit x86, if you expect to have over a hundred million\n  // elements or pump several million elements through your queue in a very\n  // short space of time, using a 32-bit type *may* trigger a race condition.\n  // A 64-bit int type is recommended in that case, and in practice will\n  // prevent a race condition no matter the usage of the queue. Note that\n  // whether the queue is lock-free with a 64-int type depends on the whether\n  // std::atomic<std::uint64_t> is lock-free, which is platform-specific.\n  typedef std::size_t index_t;\n\n  // Internally, all elements are enqueued and dequeued from multi-element\n  // blocks; this is the smallest controllable unit. If you expect few elements\n  // but many producers, a smaller block size should be favoured. For few\n  // producers and/or many elements, a larger block size is preferred. A sane\n  // default is provided. Must be a power of 2.\n  static const size_t BLOCK_SIZE = 32;\n\n  // For explicit producers (i.e. when using a producer token), the block is\n  // checked for being empty by iterating through a list of flags, one per\n  // element. For large block sizes, this is too inefficient, and switching to\n  // an atomic counter-based approach is faster. The switch is made for block\n  // sizes strictly larger than this threshold.\n  static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;\n\n  // How many full blocks can be expected for a single explicit producer? This\n  // should reflect that number's maximum for optimal performance. Must be a\n  // power of 2.\n  static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;\n\n  // How many full blocks can be expected for a single implicit producer? This\n  // should reflect that number's maximum for optimal performance. Must be a\n  // power of 2.\n  static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;\n\n  // The initial size of the hash table mapping thread IDs to implicit\n  // producers. Note that the hash is resized every time it becomes half full.\n  // Must be a power of two, and either 0 or at least 1. If 0, implicit\n  // production (using the enqueue methods without an explicit producer token)\n  // is disabled.\n  static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;\n\n  // Controls the number of items that an explicit consumer (i.e. one with a\n  // token) must consume before it causes all consumers to rotate and move on to\n  // the next internal queue.\n  static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE =\n      256;\n\n  // The maximum number of elements (inclusive) that can be enqueued to a\n  // sub-queue. Enqueue operations that would cause this limit to be surpassed\n  // will fail. Note that this limit is enforced at the block level (for\n  // performance reasons), i.e. it's rounded up to the nearest block size.\n  static const size_t MAX_SUBQUEUE_SIZE =\n      details::const_numeric_max<size_t>::value;\n\n  // The number of times to spin before sleeping when waiting on a semaphore.\n  // Recommended values are on the order of 1000-10000 unless the number of\n  // consumer threads exceeds the number of idle cores (in which case try\n  // 0-100). Only affects instances of the BlockingConcurrentQueue.\n  static const int MAX_SEMA_SPINS = 10000;\n\n  // Whether to recycle dynamically-allocated blocks into an internal free list\n  // or not. If false, only pre-allocated blocks (controlled by the constructor\n  // arguments) will be recycled, and all others will be `free`d back to the\n  // heap. Note that blocks consumed by explicit producers are only freed on\n  // destruction of the queue (not following destruction of the token)\n  // regardless of this trait.\n  static const bool RECYCLE_ALLOCATED_BLOCKS = false;\n\n\n#ifndef MCDBGQ_USE_RELACY\n  // Memory allocation can be customized if needed.\n  // malloc should return nullptr on failure, and handle alignment like\n  // std::malloc.\n#if defined(malloc) || defined(free)\n  // Gah, this is 2015, stop defining macros that break standard code already!\n  // Work around malloc/free being special macros:\n  static inline void *WORKAROUND_malloc(size_t size) {\n    return malloc(size);\n  }\n  static inline void WORKAROUND_free(void *ptr) {\n    return free(ptr);\n  }\n  static inline void *(malloc)(size_t size) {\n    return WORKAROUND_malloc(size);\n  }\n  static inline void(free)(void *ptr) {\n    return WORKAROUND_free(ptr);\n  }\n#else\n  static inline void *malloc(size_t size) {\n    return std::malloc(size);\n  }\n  static inline void free(void *ptr) {\n    return std::free(ptr);\n  }\n#endif\n#else\n  // Debug versions when running under the Relacy race detector (ignore\n  // these in user code)\n  static inline void *malloc(size_t size) {\n    return rl::rl_malloc(size, $);\n  }\n  static inline void free(void *ptr) {\n    return rl::rl_free(ptr, $);\n  }\n#endif\n};\n\n\n// When producing or consuming many elements, the most efficient way is to:\n//    1) Use one of the bulk-operation methods of the queue with a token\n//    2) Failing that, use the bulk-operation methods without a token\n//    3) Failing that, create a token and use that with the single-item methods\n//    4) Failing that, use the single-parameter methods of the queue\n// Having said that, don't create tokens willy-nilly -- ideally there should be\n// a maximum of one token per thread (of each kind).\nstruct ProducerToken;\nstruct ConsumerToken;\n\ntemplate <typename T, typename Traits>\nclass ConcurrentQueue;\ntemplate <typename T, typename Traits>\nclass BlockingConcurrentQueue;\nclass ConcurrentQueueTests;\n\n\nnamespace details {\nstruct ConcurrentQueueProducerTypelessBase {\n  ConcurrentQueueProducerTypelessBase *next;\n  std::atomic<bool> inactive;\n  ProducerToken *token;\n\n  ConcurrentQueueProducerTypelessBase()\n      : next(nullptr), inactive(false), token(nullptr) {}\n};\n\ntemplate <bool use32>\nstruct _hash_32_or_64 {\n  static inline std::uint32_t hash(std::uint32_t h) {\n    // MurmurHash3 finalizer -- see\n    // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp\n    // Since the thread ID is already unique, all we really want to do is\n    // propagate that uniqueness evenly across all the bits, so that we can use\n    // a subset of the bits while reducing collisions significantly\n    h ^= h >> 16;\n    h *= 0x85ebca6b;\n    h ^= h >> 13;\n    h *= 0xc2b2ae35;\n    return h ^ (h >> 16);\n  }\n};\ntemplate <>\nstruct _hash_32_or_64<1> {\n  static inline std::uint64_t hash(std::uint64_t h) {\n    h ^= h >> 33;\n    h *= 0xff51afd7ed558ccd;\n    h ^= h >> 33;\n    h *= 0xc4ceb9fe1a85ec53;\n    return h ^ (h >> 33);\n  }\n};\ntemplate <std::size_t size>\nstruct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {};\n\nstatic inline size_t hash_thread_id(thread_id_t id) {\n  static_assert(\n      sizeof(thread_id_t) <= 8,\n      \"Expected a platform where thread IDs are at most 64-bit values\");\n  return static_cast<size_t>(\n      hash_32_or_64<sizeof(\n          thread_id_converter<thread_id_t>::thread_id_hash_t)>::\n          hash(thread_id_converter<thread_id_t>::prehash(id)));\n}\n\ntemplate <typename T>\nstatic inline bool circular_less_than(T a, T b) {\n  static_assert(\n      std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,\n      \"circular_less_than is intended to be used only with unsigned integer \"\n      \"types\");\n  return static_cast<T>(a - b) >\n         static_cast<T>(static_cast<T>(1)\n                        << (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));\n  // Note: extra parens around rhs of operator<< is MSVC bug:\n  // https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931\n  //       silencing the bug requires #pragma warning(disable: 4554) around the\n  //       calling code and has no effect when done here.\n}\n\ntemplate <typename U>\nstatic inline char *align_for(char *ptr) {\n  const std::size_t alignment = std::alignment_of<U>::value;\n  return ptr +\n         (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) %\n             alignment;\n}\n\ntemplate <typename T>\nstatic inline T ceil_to_pow_2(T x) {\n  static_assert(\n      std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,\n      \"ceil_to_pow_2 is intended to be used only with unsigned integer types\");\n\n  // Adapted from\n  // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2\n  --x;\n  x |= x >> 1;\n  x |= x >> 2;\n  x |= x >> 4;\n  for (std::size_t i = 1; i < sizeof(T); i <<= 1) {\n    x |= x >> (i << 3);\n  }\n  ++x;\n  return x;\n}\n\ntemplate <typename T>\nstatic inline void swap_relaxed(std::atomic<T> &left, std::atomic<T> &right) {\n  T temp = left.load(std::memory_order_relaxed);\n  left.store(right.load(std::memory_order_relaxed), std::memory_order_relaxed);\n  right.store(temp, std::memory_order_relaxed);\n}\n\ntemplate <typename T>\nstatic inline T const &nomove(T const &x) {\n  return x;\n}\n\ntemplate <bool Enable>\nstruct nomove_if {\n  template <typename T>\n  static inline T const &eval(T const &x) {\n    return x;\n  }\n};\n\ntemplate <>\nstruct nomove_if<false> {\n  template <typename U>\n  static inline auto eval(U &&x) -> decltype(std::forward<U>(x)) {\n    return std::forward<U>(x);\n  }\n};\n\ntemplate <typename It>\nstatic inline auto deref_noexcept(It &it) MOODYCAMEL_NOEXCEPT -> decltype(*it) {\n  return *it;\n}\n\n#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || \\\n    (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)\ntemplate <typename T>\nstruct is_trivially_destructible : std::is_trivially_destructible<T> {};\n#else\ntemplate <typename T>\nstruct is_trivially_destructible : std::has_trivial_destructor<T> {};\n#endif\n\n#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n#ifdef MCDBGQ_USE_RELACY\ntypedef RelacyThreadExitListener ThreadExitListener;\ntypedef RelacyThreadExitNotifier ThreadExitNotifier;\n#else\nclass ThreadExitNotifier;\n\nstruct ThreadExitListener {\n  typedef void (*callback_t)(void *);\n  callback_t callback;\n  void *userData;\n\n  ThreadExitListener *next;   // reserved for use by the ThreadExitNotifier\n  ThreadExitNotifier *chain;  // reserved for use by the ThreadExitNotifier\n};\n\nclass ThreadExitNotifier {\n public:\n  static void subscribe(ThreadExitListener *listener) {\n    auto &tlsInst = instance();\n    std::lock_guard<std::mutex> guard(mutex());\n    listener->next = tlsInst.tail;\n    listener->chain = &tlsInst;\n    tlsInst.tail = listener;\n  }\n\n  static void unsubscribe(ThreadExitListener *listener) {\n    std::lock_guard<std::mutex> guard(mutex());\n    if (!listener->chain) {\n      return;  // race with ~ThreadExitNotifier\n    }\n    auto &tlsInst = *listener->chain;\n    listener->chain = nullptr;\n    ThreadExitListener **prev = &tlsInst.tail;\n    for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {\n      if (ptr == listener) {\n        *prev = ptr->next;\n        break;\n      }\n      prev = &ptr->next;\n    }\n  }\n\n private:\n  ThreadExitNotifier() : tail(nullptr) {}\n  ThreadExitNotifier(ThreadExitNotifier const &) MOODYCAMEL_DELETE_FUNCTION;\n  ThreadExitNotifier &operator=(ThreadExitNotifier const &)\n      MOODYCAMEL_DELETE_FUNCTION;\n\n  ~ThreadExitNotifier() {\n    // This thread is about to exit, let everyone know!\n    assert(this == &instance() &&\n           \"If this assert fails, you likely have a buggy compiler! Change the \"\n           \"preprocessor conditions such that \"\n           \"MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.\");\n    std::lock_guard<std::mutex> guard(mutex());\n    for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {\n      ptr->chain = nullptr;\n      ptr->callback(ptr->userData);\n    }\n  }\n\n  // Thread-local\n  static inline ThreadExitNotifier &instance() {\n    static thread_local ThreadExitNotifier notifier;\n    return notifier;\n  }\n\n  static inline std::mutex &mutex() {\n    // Must be static because the ThreadExitNotifier could be destroyed while\n    // unsubscribe is called\n    static std::mutex mutex;\n    return mutex;\n  }\n\n private:\n  ThreadExitListener *tail;\n};\n#endif\n#endif\n\ntemplate <typename T>\nstruct static_is_lock_free_num {\n  enum { value = 0 };\n};\ntemplate <>\nstruct static_is_lock_free_num<signed char> {\n  enum { value = ATOMIC_CHAR_LOCK_FREE };\n};\ntemplate <>\nstruct static_is_lock_free_num<short> {\n  enum { value = ATOMIC_SHORT_LOCK_FREE };\n};\ntemplate <>\nstruct static_is_lock_free_num<int> {\n  enum { value = ATOMIC_INT_LOCK_FREE };\n};\ntemplate <>\nstruct static_is_lock_free_num<long> {\n  enum { value = ATOMIC_LONG_LOCK_FREE };\n};\ntemplate <>\nstruct static_is_lock_free_num<long long> {\n  enum { value = ATOMIC_LLONG_LOCK_FREE };\n};\ntemplate <typename T>\nstruct static_is_lock_free\n    : static_is_lock_free_num<typename std::make_signed<T>::type> {};\ntemplate <>\nstruct static_is_lock_free<bool> {\n  enum { value = ATOMIC_BOOL_LOCK_FREE };\n};\ntemplate <typename U>\nstruct static_is_lock_free<U *> {\n  enum { value = ATOMIC_POINTER_LOCK_FREE };\n};\n}  // namespace details\n\n\nstruct ProducerToken {\n  template <typename T, typename Traits>\n  explicit ProducerToken(ConcurrentQueue<T, Traits> &queue);\n\n  template <typename T, typename Traits>\n  explicit ProducerToken(BlockingConcurrentQueue<T, Traits> &queue);\n\n  ProducerToken(ProducerToken &&other) MOODYCAMEL_NOEXCEPT\n      : producer(other.producer) {\n    other.producer = nullptr;\n    if (producer != nullptr) {\n      producer->token = this;\n    }\n  }\n\n  inline ProducerToken &operator=(ProducerToken &&other) MOODYCAMEL_NOEXCEPT {\n    swap(other);\n    return *this;\n  }\n\n  void swap(ProducerToken &other) MOODYCAMEL_NOEXCEPT {\n    std::swap(producer, other.producer);\n    if (producer != nullptr) {\n      producer->token = this;\n    }\n    if (other.producer != nullptr) {\n      other.producer->token = &other;\n    }\n  }\n\n  // A token is always valid unless:\n  //     1) Memory allocation failed during construction\n  //     2) It was moved via the move constructor\n  //        (Note: assignment does a swap, leaving both potentially valid)\n  //     3) The associated queue was destroyed\n  // Note that if valid() returns true, that only indicates\n  // that the token is valid for use with a specific queue,\n  // but not which one; that's up to the user to track.\n  inline bool valid() const {\n    return producer != nullptr;\n  }\n\n  ~ProducerToken() {\n    if (producer != nullptr) {\n      producer->token = nullptr;\n      producer->inactive.store(true, std::memory_order_release);\n    }\n  }\n\n  // Disable copying and assignment\n  ProducerToken(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;\n  ProducerToken &operator=(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;\n\n private:\n  template <typename T, typename Traits>\n  friend class ConcurrentQueue;\n  friend class ConcurrentQueueTests;\n\n protected:\n  details::ConcurrentQueueProducerTypelessBase *producer;\n};\n\n\nstruct ConsumerToken {\n  template <typename T, typename Traits>\n  explicit ConsumerToken(ConcurrentQueue<T, Traits> &q);\n\n  template <typename T, typename Traits>\n  explicit ConsumerToken(BlockingConcurrentQueue<T, Traits> &q);\n\n  ConsumerToken(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT\n      : initialOffset(other.initialOffset),\n        lastKnownGlobalOffset(other.lastKnownGlobalOffset),\n        itemsConsumedFromCurrent(other.itemsConsumedFromCurrent),\n        currentProducer(other.currentProducer),\n        desiredProducer(other.desiredProducer) {}\n\n  inline ConsumerToken &operator=(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT {\n    swap(other);\n    return *this;\n  }\n\n  void swap(ConsumerToken &other) MOODYCAMEL_NOEXCEPT {\n    std::swap(initialOffset, other.initialOffset);\n    std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);\n    std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);\n    std::swap(currentProducer, other.currentProducer);\n    std::swap(desiredProducer, other.desiredProducer);\n  }\n\n  // Disable copying and assignment\n  ConsumerToken(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;\n  ConsumerToken &operator=(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;\n\n private:\n  template <typename T, typename Traits>\n  friend class ConcurrentQueue;\n  friend class ConcurrentQueueTests;\n\n private:  // but shared with ConcurrentQueue\n  std::uint32_t initialOffset;\n  std::uint32_t lastKnownGlobalOffset;\n  std::uint32_t itemsConsumedFromCurrent;\n  details::ConcurrentQueueProducerTypelessBase *currentProducer;\n  details::ConcurrentQueueProducerTypelessBase *desiredProducer;\n};\n\n// Need to forward-declare this swap because it's in a namespace.\n// See\n// http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces\ntemplate <typename T, typename Traits>\ninline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,\n                 typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)\n    MOODYCAMEL_NOEXCEPT;\n\n\ntemplate <typename T, typename Traits = ConcurrentQueueDefaultTraits>\nclass ConcurrentQueue {\n public:\n  typedef ::moodycamel::ProducerToken producer_token_t;\n  typedef ::moodycamel::ConsumerToken consumer_token_t;\n\n  typedef typename Traits::index_t index_t;\n  typedef typename Traits::size_t size_t;\n\n  static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);\n  static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD =\n      static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);\n  static const size_t EXPLICIT_INITIAL_INDEX_SIZE =\n      static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);\n  static const size_t IMPLICIT_INITIAL_INDEX_SIZE =\n      static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);\n  static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE =\n      static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);\n  static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE =\n      static_cast<std::uint32_t>(\n          Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);\n#ifdef _MSC_VER\n#pragma warning(push)\n#pragma warning(disable : 4307)  // + integral constant overflow (that's what\n                                 // the ternary expression is for!)\n#pragma warning(disable : 4309)  // static_cast: Truncation of constant value\n#endif\n  static const size_t MAX_SUBQUEUE_SIZE =\n      (details::const_numeric_max<size_t>::value -\n           static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) <\n       BLOCK_SIZE)\n          ? details::const_numeric_max<size_t>::value\n          : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) +\n              (BLOCK_SIZE - 1)) /\n             BLOCK_SIZE * BLOCK_SIZE);\n#ifdef _MSC_VER\n#pragma warning(pop)\n#endif\n\n  static_assert(!std::numeric_limits<size_t>::is_signed &&\n                    std::is_integral<size_t>::value,\n                \"Traits::size_t must be an unsigned integral type\");\n  static_assert(!std::numeric_limits<index_t>::is_signed &&\n                    std::is_integral<index_t>::value,\n                \"Traits::index_t must be an unsigned integral type\");\n  static_assert(sizeof(index_t) >= sizeof(size_t),\n                \"Traits::index_t must be at least as wide as Traits::size_t\");\n  static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)),\n                \"Traits::BLOCK_SIZE must be a power of 2 (and at least 2)\");\n  static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) &&\n                    !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD &\n                      (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)),\n                \"Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a \"\n                \"power of 2 (and greater than 1)\");\n  static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) &&\n                    !(EXPLICIT_INITIAL_INDEX_SIZE &\n                      (EXPLICIT_INITIAL_INDEX_SIZE - 1)),\n                \"Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and \"\n                \"greater than 1)\");\n  static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) &&\n                    !(IMPLICIT_INITIAL_INDEX_SIZE &\n                      (IMPLICIT_INITIAL_INDEX_SIZE - 1)),\n                \"Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and \"\n                \"greater than 1)\");\n  static_assert(\n      (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) ||\n          !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE &\n            (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)),\n      \"Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2\");\n  static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 ||\n                    INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1,\n                \"Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least \"\n                \"1 (or 0 to disable implicit enqueueing)\");\n\n public:\n  // Creates a queue with at least `capacity` element slots; note that the\n  // actual number of elements that can be inserted without additional memory\n  // allocation depends on the number of producers and the block size (e.g. if\n  // the block size is equal to `capacity`, only a single block will be\n  // allocated up-front, which means only a single producer will be able to\n  // enqueue elements without an extra allocation -- blocks aren't shared\n  // between producers). This method is not thread safe -- it is up to the user\n  // to ensure that the queue is fully constructed before it starts being used\n  // by other threads (this includes making the memory effects of construction\n  // visible, possibly with a memory barrier).\n  explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)\n      : producerListTail(nullptr),\n        producerCount(0),\n        initialBlockPoolIndex(0),\n        nextExplicitConsumerId(0),\n        globalExplicitConsumerOffset(0) {\n    implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);\n    populate_initial_implicit_producer_hash();\n    populate_initial_block_list(capacity / BLOCK_SIZE +\n                                ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n    // Track all the producers using a fully-resolved typed list for\n    // each kind; this makes it possible to debug them starting from\n    // the root queue object (otherwise wacky casts are needed that\n    // don't compile in the debugger's expression evaluator).\n    explicitProducers.store(nullptr, std::memory_order_relaxed);\n    implicitProducers.store(nullptr, std::memory_order_relaxed);\n#endif\n  }\n\n  // Computes the correct amount of pre-allocated blocks for you based\n  // on the minimum number of elements you want available at any given\n  // time, and the maximum concurrent number of each type of producer.\n  ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers,\n                  size_t maxImplicitProducers)\n      : producerListTail(nullptr),\n        producerCount(0),\n        initialBlockPoolIndex(0),\n        nextExplicitConsumerId(0),\n        globalExplicitConsumerOffset(0) {\n    implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);\n    populate_initial_implicit_producer_hash();\n    size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) *\n                        (maxExplicitProducers + 1) +\n                    2 * (maxExplicitProducers + maxImplicitProducers);\n    populate_initial_block_list(blocks);\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n    explicitProducers.store(nullptr, std::memory_order_relaxed);\n    implicitProducers.store(nullptr, std::memory_order_relaxed);\n#endif\n  }\n\n  // Note: The queue should not be accessed concurrently while it's\n  // being deleted. It's up to the user to synchronize this.\n  // This method is not thread safe.\n  ~ConcurrentQueue() {\n    // Destroy producers\n    auto ptr = producerListTail.load(std::memory_order_relaxed);\n    while (ptr != nullptr) {\n      auto next = ptr->next_prod();\n      if (ptr->token != nullptr) {\n        ptr->token->producer = nullptr;\n      }\n      destroy(ptr);\n      ptr = next;\n    }\n\n    // Destroy implicit producer hash tables\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {\n      auto hash = implicitProducerHash.load(std::memory_order_relaxed);\n      while (hash != nullptr) {\n        auto prev = hash->prev;\n        if (prev != nullptr) {  // The last hash is part of this object and was\n                                // not allocated dynamically\n          for (size_t i = 0; i != hash->capacity; ++i) {\n            hash->entries[i].~ImplicitProducerKVP();\n          }\n          hash->~ImplicitProducerHash();\n          (Traits::free)(hash);\n        }\n        hash = prev;\n      }\n    }\n\n    // Destroy global free list\n    auto block = freeList.head_unsafe();\n    while (block != nullptr) {\n      auto next = block->freeListNext.load(std::memory_order_relaxed);\n      if (block->dynamicallyAllocated) {\n        destroy(block);\n      }\n      block = next;\n    }\n\n    // Destroy initial free list\n    destroy_array(initialBlockPool, initialBlockPoolSize);\n  }\n\n  // Disable copying and copy assignment\n  ConcurrentQueue(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION;\n  ConcurrentQueue &operator=(ConcurrentQueue const &)\n      MOODYCAMEL_DELETE_FUNCTION;\n\n  // Moving is supported, but note that it is *not* a thread-safe operation.\n  // Nobody can use the queue while it's being moved, and the memory effects\n  // of that move must be propagated to other threads before they can use it.\n  // Note: When a queue is moved, its tokens are still valid but can only be\n  // used with the destination queue (i.e. semantically they are moved along\n  // with the queue itself).\n  ConcurrentQueue(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT\n      : producerListTail(\n            other.producerListTail.load(std::memory_order_relaxed)),\n        producerCount(other.producerCount.load(std::memory_order_relaxed)),\n        initialBlockPoolIndex(\n            other.initialBlockPoolIndex.load(std::memory_order_relaxed)),\n        initialBlockPool(other.initialBlockPool),\n        initialBlockPoolSize(other.initialBlockPoolSize),\n        freeList(std::move(other.freeList)),\n        nextExplicitConsumerId(\n            other.nextExplicitConsumerId.load(std::memory_order_relaxed)),\n        globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(\n            std::memory_order_relaxed)) {\n    // Move the other one into this, and leave the other one as an empty queue\n    implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);\n    populate_initial_implicit_producer_hash();\n    swap_implicit_producer_hashes(other);\n\n    other.producerListTail.store(nullptr, std::memory_order_relaxed);\n    other.producerCount.store(0, std::memory_order_relaxed);\n    other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);\n    other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n    explicitProducers.store(\n        other.explicitProducers.load(std::memory_order_relaxed),\n        std::memory_order_relaxed);\n    other.explicitProducers.store(nullptr, std::memory_order_relaxed);\n    implicitProducers.store(\n        other.implicitProducers.load(std::memory_order_relaxed),\n        std::memory_order_relaxed);\n    other.implicitProducers.store(nullptr, std::memory_order_relaxed);\n#endif\n\n    other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);\n    other.initialBlockPoolSize = 0;\n    other.initialBlockPool = nullptr;\n\n    reown_producers();\n  }\n\n  inline ConcurrentQueue &operator=(ConcurrentQueue &&other)\n      MOODYCAMEL_NOEXCEPT {\n    return swap_internal(other);\n  }\n\n  // Swaps this queue's state with the other's. Not thread-safe.\n  // Swapping two queues does not invalidate their tokens, however\n  // the tokens that were created for one queue must be used with\n  // only the swapped queue (i.e. the tokens are tied to the\n  // queue's movable state, not the object itself).\n  inline void swap(ConcurrentQueue &other) MOODYCAMEL_NOEXCEPT {\n    swap_internal(other);\n  }\n\n private:\n  ConcurrentQueue &swap_internal(ConcurrentQueue &other) {\n    if (this == &other) {\n      return *this;\n    }\n\n    details::swap_relaxed(producerListTail, other.producerListTail);\n    details::swap_relaxed(producerCount, other.producerCount);\n    details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);\n    std::swap(initialBlockPool, other.initialBlockPool);\n    std::swap(initialBlockPoolSize, other.initialBlockPoolSize);\n    freeList.swap(other.freeList);\n    details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);\n    details::swap_relaxed(globalExplicitConsumerOffset,\n                          other.globalExplicitConsumerOffset);\n\n    swap_implicit_producer_hashes(other);\n\n    reown_producers();\n    other.reown_producers();\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n    details::swap_relaxed(explicitProducers, other.explicitProducers);\n    details::swap_relaxed(implicitProducers, other.implicitProducers);\n#endif\n\n    return *this;\n  }\n\n public:\n  // Enqueues a single item (by copying it).\n  // Allocates memory if required. Only fails if memory allocation fails (or\n  // implicit production is disabled because\n  // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or\n  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).\n  // Thread-safe.\n  inline bool enqueue(T const &item) {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)\n    return false;\n    else return inner_enqueue<CanAlloc>(item);\n  }\n\n  // Enqueues a single item (by moving it, if possible).\n  // Allocates memory if required. Only fails if memory allocation fails (or\n  // implicit production is disabled because\n  // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or\n  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).\n  // Thread-safe.\n  inline bool enqueue(T &&item) {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)\n    return false;\n    else return inner_enqueue<CanAlloc>(std::move(item));\n  }\n\n  // Enqueues a single item (by copying it) using an explicit producer token.\n  // Allocates memory if required. Only fails if memory allocation fails (or\n  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).\n  // Thread-safe.\n  inline bool enqueue(producer_token_t const &token, T const &item) {\n    return inner_enqueue<CanAlloc>(token, item);\n  }\n\n  // Enqueues a single item (by moving it, if possible) using an explicit\n  // producer token. Allocates memory if required. Only fails if memory\n  // allocation fails (or Traits::MAX_SUBQUEUE_SIZE has been defined and would\n  // be surpassed). Thread-safe.\n  inline bool enqueue(producer_token_t const &token, T &&item) {\n    return inner_enqueue<CanAlloc>(token, std::move(item));\n  }\n\n  // Enqueues several items.\n  // Allocates memory if required. Only fails if memory allocation fails (or\n  // implicit production is disabled because\n  // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or\n  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). Note:\n  // Use std::make_move_iterator if the elements should be moved instead of\n  // copied. Thread-safe.\n  template <typename It>\n  bool enqueue_bulk(It itemFirst, size_t count) {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)\n    return false;\n    else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);\n  }\n\n  // Enqueues several items using an explicit producer token.\n  // Allocates memory if required. Only fails if memory allocation fails\n  // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).\n  // Note: Use std::make_move_iterator if the elements should be moved\n  // instead of copied.\n  // Thread-safe.\n  template <typename It>\n  bool enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count) {\n    return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);\n  }\n\n  // Enqueues a single item (by copying it).\n  // Does not allocate memory. Fails if not enough room to enqueue (or implicit\n  // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE\n  // is 0).\n  // Thread-safe.\n  inline bool try_enqueue(T const &item) {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)\n    return false;\n    else return inner_enqueue<CannotAlloc>(item);\n  }\n\n  // Enqueues a single item (by moving it, if possible).\n  // Does not allocate memory (except for one-time implicit producer).\n  // Fails if not enough room to enqueue (or implicit production is\n  // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).\n  // Thread-safe.\n  inline bool try_enqueue(T &&item) {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)\n    return false;\n    else return inner_enqueue<CannotAlloc>(std::move(item));\n  }\n\n  // Enqueues a single item (by copying it) using an explicit producer token.\n  // Does not allocate memory. Fails if not enough room to enqueue.\n  // Thread-safe.\n  inline bool try_enqueue(producer_token_t const &token, T const &item) {\n    return inner_enqueue<CannotAlloc>(token, item);\n  }\n\n  // Enqueues a single item (by moving it, if possible) using an explicit\n  // producer token. Does not allocate memory. Fails if not enough room to\n  // enqueue. Thread-safe.\n  inline bool try_enqueue(producer_token_t const &token, T &&item) {\n    return inner_enqueue<CannotAlloc>(token, std::move(item));\n  }\n\n  // Enqueues several items.\n  // Does not allocate memory (except for one-time implicit producer).\n  // Fails if not enough room to enqueue (or implicit production is\n  // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).\n  // Note: Use std::make_move_iterator if the elements should be moved\n  // instead of copied.\n  // Thread-safe.\n  template <typename It>\n  bool try_enqueue_bulk(It itemFirst, size_t count) {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)\n    return false;\n    else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);\n  }\n\n  // Enqueues several items using an explicit producer token.\n  // Does not allocate memory. Fails if not enough room to enqueue.\n  // Note: Use std::make_move_iterator if the elements should be moved\n  // instead of copied.\n  // Thread-safe.\n  template <typename It>\n  bool try_enqueue_bulk(producer_token_t const &token, It itemFirst,\n                        size_t count) {\n    return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);\n  }\n\n\n  // Attempts to dequeue from the queue.\n  // Returns false if all producer streams appeared empty at the time they\n  // were checked (so, the queue is likely but not guaranteed to be empty).\n  // Never allocates. Thread-safe.\n  template <typename U>\n  bool try_dequeue(U &item) {\n    // Instead of simply trying each producer in turn (which could cause\n    // needless contention on the first producer), we score them heuristically.\n    size_t nonEmptyCount = 0;\n    ProducerBase *best = nullptr;\n    size_t bestSize = 0;\n    for (auto ptr = producerListTail.load(std::memory_order_acquire);\n         nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {\n      auto size = ptr->size_approx();\n      if (size > 0) {\n        if (size > bestSize) {\n          bestSize = size;\n          best = ptr;\n        }\n        ++nonEmptyCount;\n      }\n    }\n\n    // If there was at least one non-empty queue but it appears empty at the\n    // time we try to dequeue from it, we need to make sure every queue's been\n    // tried\n    if (nonEmptyCount > 0) {\n      if ((details::likely)(best->dequeue(item))) {\n        return true;\n      }\n      for (auto ptr = producerListTail.load(std::memory_order_acquire);\n           ptr != nullptr; ptr = ptr->next_prod()) {\n        if (ptr != best && ptr->dequeue(item)) {\n          return true;\n        }\n      }\n    }\n    return false;\n  }\n\n  // Attempts to dequeue from the queue.\n  // Returns false if all producer streams appeared empty at the time they\n  // were checked (so, the queue is likely but not guaranteed to be empty).\n  // This differs from the try_dequeue(item) method in that this one does\n  // not attempt to reduce contention by interleaving the order that producer\n  // streams are dequeued from. So, using this method can reduce overall\n  // throughput under contention, but will give more predictable results in\n  // single-threaded consumer scenarios. This is mostly only useful for internal\n  // unit tests. Never allocates. Thread-safe.\n  template <typename U>\n  bool try_dequeue_non_interleaved(U &item) {\n    for (auto ptr = producerListTail.load(std::memory_order_acquire);\n         ptr != nullptr; ptr = ptr->next_prod()) {\n      if (ptr->dequeue(item)) {\n        return true;\n      }\n    }\n    return false;\n  }\n\n  // Attempts to dequeue from the queue using an explicit consumer token.\n  // Returns false if all producer streams appeared empty at the time they\n  // were checked (so, the queue is likely but not guaranteed to be empty).\n  // Never allocates. Thread-safe.\n  template <typename U>\n  bool try_dequeue(consumer_token_t &token, U &item) {\n    // The idea is roughly as follows:\n    // Every 256 items from one producer, make everyone rotate (increase the\n    // global offset) -> this means the highest efficiency consumer dictates the\n    // rotation speed of everyone else, more or less If you see that the global\n    // offset has changed, you must reset your consumption counter and move to\n    // your designated place If there's no items where you're supposed to be,\n    // keep moving until you find a producer with some items If the global\n    // offset has not changed but you've run out of items to consume, move over\n    // from your current position until you find an producer with something in\n    // it\n\n    if (token.desiredProducer == nullptr ||\n        token.lastKnownGlobalOffset !=\n            globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {\n      if (!update_current_producer_after_rotation(token)) {\n        return false;\n      }\n    }\n\n    // If there was at least one non-empty queue but it appears empty at the\n    // time we try to dequeue from it, we need to make sure every queue's been\n    // tried\n    if (static_cast<ProducerBase *>(token.currentProducer)->dequeue(item)) {\n      if (++token.itemsConsumedFromCurrent ==\n          EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {\n        globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);\n      }\n      return true;\n    }\n\n    auto tail = producerListTail.load(std::memory_order_acquire);\n    auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();\n    if (ptr == nullptr) {\n      ptr = tail;\n    }\n    while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {\n      if (ptr->dequeue(item)) {\n        token.currentProducer = ptr;\n        token.itemsConsumedFromCurrent = 1;\n        return true;\n      }\n      ptr = ptr->next_prod();\n      if (ptr == nullptr) {\n        ptr = tail;\n      }\n    }\n    return false;\n  }\n\n  // Attempts to dequeue several elements from the queue.\n  // Returns the number of items actually dequeued.\n  // Returns 0 if all producer streams appeared empty at the time they\n  // were checked (so, the queue is likely but not guaranteed to be empty).\n  // Never allocates. Thread-safe.\n  template <typename It>\n  size_t try_dequeue_bulk(It itemFirst, size_t max) {\n    size_t count = 0;\n    for (auto ptr = producerListTail.load(std::memory_order_acquire);\n         ptr != nullptr; ptr = ptr->next_prod()) {\n      count += ptr->dequeue_bulk(itemFirst, max - count);\n      if (count == max) {\n        break;\n      }\n    }\n    return count;\n  }\n\n  // Attempts to dequeue several elements from the queue using an explicit\n  // consumer token. Returns the number of items actually dequeued. Returns 0 if\n  // all producer streams appeared empty at the time they were checked (so, the\n  // queue is likely but not guaranteed to be empty). Never allocates.\n  // Thread-safe.\n  template <typename It>\n  size_t try_dequeue_bulk(consumer_token_t &token, It itemFirst, size_t max) {\n    if (token.desiredProducer == nullptr ||\n        token.lastKnownGlobalOffset !=\n            globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {\n      if (!update_current_producer_after_rotation(token)) {\n        return 0;\n      }\n    }\n\n    size_t count = static_cast<ProducerBase *>(token.currentProducer)\n                       ->dequeue_bulk(itemFirst, max);\n    if (count == max) {\n      if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >=\n          EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {\n        globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);\n      }\n      return max;\n    }\n    token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);\n    max -= count;\n\n    auto tail = producerListTail.load(std::memory_order_acquire);\n    auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();\n    if (ptr == nullptr) {\n      ptr = tail;\n    }\n    while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {\n      auto dequeued = ptr->dequeue_bulk(itemFirst, max);\n      count += dequeued;\n      if (dequeued != 0) {\n        token.currentProducer = ptr;\n        token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);\n      }\n      if (dequeued == max) {\n        break;\n      }\n      max -= dequeued;\n      ptr = ptr->next_prod();\n      if (ptr == nullptr) {\n        ptr = tail;\n      }\n    }\n    return count;\n  }\n\n\n  // Attempts to dequeue from a specific producer's inner queue.\n  // If you happen to know which producer you want to dequeue from, this\n  // is significantly faster than using the general-case try_dequeue methods.\n  // Returns false if the producer's queue appeared empty at the time it\n  // was checked (so, the queue is likely but not guaranteed to be empty).\n  // Never allocates. Thread-safe.\n  template <typename U>\n  inline bool try_dequeue_from_producer(producer_token_t const &producer,\n                                        U &item) {\n    return static_cast<ExplicitProducer *>(producer.producer)->dequeue(item);\n  }\n\n  // Attempts to dequeue several elements from a specific producer's inner\n  // queue. Returns the number of items actually dequeued. If you happen to know\n  // which producer you want to dequeue from, this is significantly faster than\n  // using the general-case try_dequeue methods. Returns 0 if the producer's\n  // queue appeared empty at the time it was checked (so, the queue is likely\n  // but not guaranteed to be empty). Never allocates. Thread-safe.\n  template <typename It>\n  inline size_t try_dequeue_bulk_from_producer(producer_token_t const &producer,\n                                               It itemFirst, size_t max) {\n    return static_cast<ExplicitProducer *>(producer.producer)\n        ->dequeue_bulk(itemFirst, max);\n  }\n\n\n  // Returns an estimate of the total number of elements currently in the queue.\n  // This estimate is only accurate if the queue has completely stabilized\n  // before it is called (i.e. all enqueue and dequeue operations have completed\n  // and their memory effects are visible on the calling thread, and no further\n  // operations start while this method is being called). Thread-safe.\n  size_t size_approx() const {\n    size_t size = 0;\n    for (auto ptr = producerListTail.load(std::memory_order_acquire);\n         ptr != nullptr; ptr = ptr->next_prod()) {\n      size += ptr->size_approx();\n    }\n    return size;\n  }\n\n\n  // Returns true if the underlying atomic variables used by\n  // the queue are lock-free (they should be on most platforms).\n  // Thread-safe.\n  static constexpr bool is_lock_free() {\n    return details::static_is_lock_free<bool>::value == 2 &&\n           details::static_is_lock_free<size_t>::value == 2 &&\n           details::static_is_lock_free<std::uint32_t>::value == 2 &&\n           details::static_is_lock_free<index_t>::value == 2 &&\n           details::static_is_lock_free<void *>::value == 2 &&\n           details::static_is_lock_free<typename details::thread_id_converter<\n               details::thread_id_t>::thread_id_numeric_size_t>::value == 2;\n  }\n\n\n private:\n  friend struct ProducerToken;\n  friend struct ConsumerToken;\n  struct ExplicitProducer;\n  friend struct ExplicitProducer;\n  struct ImplicitProducer;\n  friend struct ImplicitProducer;\n  friend class ConcurrentQueueTests;\n\n  enum AllocationMode { CanAlloc, CannotAlloc };\n\n\n  ///////////////////////////////\n  // Queue methods\n  ///////////////////////////////\n\n  template <AllocationMode canAlloc, typename U>\n  inline bool inner_enqueue(producer_token_t const &token, U &&element) {\n    return static_cast<ExplicitProducer *>(token.producer)\n        ->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(\n            std::forward<U>(element));\n  }\n\n  template <AllocationMode canAlloc, typename U>\n  inline bool inner_enqueue(U &&element) {\n    auto producer = get_or_add_implicit_producer();\n    return producer == nullptr\n               ? false\n               : producer->ConcurrentQueue::ImplicitProducer::template enqueue<\n                     canAlloc>(std::forward<U>(element));\n  }\n\n  template <AllocationMode canAlloc, typename It>\n  inline bool inner_enqueue_bulk(producer_token_t const &token, It itemFirst,\n                                 size_t count) {\n    return static_cast<ExplicitProducer *>(token.producer)\n        ->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(\n            itemFirst, count);\n  }\n\n  template <AllocationMode canAlloc, typename It>\n  inline bool inner_enqueue_bulk(It itemFirst, size_t count) {\n    auto producer = get_or_add_implicit_producer();\n    return producer == nullptr\n               ? false\n               : producer->ConcurrentQueue::ImplicitProducer::\n                     template enqueue_bulk<canAlloc>(itemFirst, count);\n  }\n\n  inline bool update_current_producer_after_rotation(consumer_token_t &token) {\n    // Ah, there's been a rotation, figure out where we should be!\n    auto tail = producerListTail.load(std::memory_order_acquire);\n    if (token.desiredProducer == nullptr && tail == nullptr) {\n      return false;\n    }\n    auto prodCount = producerCount.load(std::memory_order_relaxed);\n    auto globalOffset =\n        globalExplicitConsumerOffset.load(std::memory_order_relaxed);\n    if ((details::unlikely)(token.desiredProducer == nullptr)) {\n      // Aha, first time we're dequeueing anything.\n      // Figure out our local position\n      // Note: offset is from start, not end, but we're traversing from end --\n      // subtract from count first\n      std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);\n      token.desiredProducer = tail;\n      for (std::uint32_t i = 0; i != offset; ++i) {\n        token.desiredProducer =\n            static_cast<ProducerBase *>(token.desiredProducer)->next_prod();\n        if (token.desiredProducer == nullptr) {\n          token.desiredProducer = tail;\n        }\n      }\n    }\n\n    std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;\n    if (delta >= prodCount) {\n      delta = delta % prodCount;\n    }\n    for (std::uint32_t i = 0; i != delta; ++i) {\n      token.desiredProducer =\n          static_cast<ProducerBase *>(token.desiredProducer)->next_prod();\n      if (token.desiredProducer == nullptr) {\n        token.desiredProducer = tail;\n      }\n    }\n\n    token.lastKnownGlobalOffset = globalOffset;\n    token.currentProducer = token.desiredProducer;\n    token.itemsConsumedFromCurrent = 0;\n    return true;\n  }\n\n\n  ///////////////////////////\n  // Free list\n  ///////////////////////////\n\n  template <typename N>\n  struct FreeListNode {\n    FreeListNode() : freeListRefs(0), freeListNext(nullptr) {}\n\n    std::atomic<std::uint32_t> freeListRefs;\n    std::atomic<N *> freeListNext;\n  };\n\n  // A simple CAS-based lock-free free list. Not the fastest thing in the world\n  // under heavy contention, but simple and correct (assuming nodes are never\n  // freed until after the free list is destroyed), and fairly speedy under low\n  // contention.\n  template <typename N>  // N must inherit FreeListNode or have the same fields\n                         // (and initialization of them)\n  struct FreeList {\n    FreeList() : freeListHead(nullptr) {}\n    FreeList(FreeList &&other)\n        : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) {\n      other.freeListHead.store(nullptr, std::memory_order_relaxed);\n    }\n    void swap(FreeList &other) {\n      details::swap_relaxed(freeListHead, other.freeListHead);\n    }\n\n    FreeList(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;\n    FreeList &operator=(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;\n\n    inline void add(N *node) {\n#ifdef MCDBGQ_NOLOCKFREE_FREELIST\n      debug::DebugLock lock(mutex);\n#endif\n      // We know that the should-be-on-freelist bit is 0 at this point, so it's\n      // safe to set it using a fetch_add\n      if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST,\n                                       std::memory_order_acq_rel) == 0) {\n        // Oh look! We were the last ones referencing this node, and we know\n        // we want to add it to the free list, so let's do it!\n        add_knowing_refcount_is_zero(node);\n      }\n    }\n\n    inline N *try_get() {\n#ifdef MCDBGQ_NOLOCKFREE_FREELIST\n      debug::DebugLock lock(mutex);\n#endif\n      auto head = freeListHead.load(std::memory_order_acquire);\n      while (head != nullptr) {\n        auto prevHead = head;\n        auto refs = head->freeListRefs.load(std::memory_order_relaxed);\n        if ((refs & REFS_MASK) == 0 ||\n            !head->freeListRefs.compare_exchange_strong(\n                refs, refs + 1, std::memory_order_acquire)) {\n          head = freeListHead.load(std::memory_order_acquire);\n          continue;\n        }\n\n        // Good, reference count has been incremented (it wasn't at zero), which\n        // means we can read the next and not worry about it changing between\n        // now and the time we do the CAS\n        auto next = head->freeListNext.load(std::memory_order_relaxed);\n        if (freeListHead.compare_exchange_strong(head, next,\n                                                 std::memory_order_acquire,\n                                                 std::memory_order_relaxed)) {\n          // Yay, got the node. This means it was on the list, which means\n          // shouldBeOnFreeList must be false no matter the refcount (because\n          // nobody else knows it's been taken off yet, it can't have been put\n          // back on).\n          assert((head->freeListRefs.load(std::memory_order_relaxed) &\n                  SHOULD_BE_ON_FREELIST) == 0);\n\n          // Decrease refcount twice, once for our ref, and once for the list's\n          // ref\n          head->freeListRefs.fetch_sub(2, std::memory_order_release);\n          return head;\n        }\n\n        // OK, the head must have changed on us, but we still need to decrease\n        // the refcount we increased. Note that we don't need to release any\n        // memory effects, but we do need to ensure that the reference count\n        // decrement happens-after the CAS on the head.\n        refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);\n        if (refs == SHOULD_BE_ON_FREELIST + 1) {\n          add_knowing_refcount_is_zero(prevHead);\n        }\n      }\n\n      return nullptr;\n    }\n\n    // Useful for traversing the list when there's no contention (e.g. to\n    // destroy remaining nodes)\n    N *head_unsafe() const {\n      return freeListHead.load(std::memory_order_relaxed);\n    }\n\n   private:\n    inline void add_knowing_refcount_is_zero(N *node) {\n      // Since the refcount is zero, and nobody can increase it once it's zero\n      // (except us, and we run only one copy of this method per node at a time,\n      // i.e. the single thread case), then we know we can safely change the\n      // next pointer of the node; however, once the refcount is back above\n      // zero, then other threads could increase it (happens under heavy\n      // contention, when the refcount goes to zero in between a load and a\n      // refcount increment of a node in try_get, then back up to something\n      // non-zero, then the refcount increment is done by the other thread) --\n      // so, if the CAS to add the node to the actual list fails, decrease the\n      // refcount and leave the add operation to the next thread who puts the\n      // refcount back at zero (which could be us, hence the loop).\n      auto head = freeListHead.load(std::memory_order_relaxed);\n      while (true) {\n        node->freeListNext.store(head, std::memory_order_relaxed);\n        node->freeListRefs.store(1, std::memory_order_release);\n        if (!freeListHead.compare_exchange_strong(head, node,\n                                                  std::memory_order_release,\n                                                  std::memory_order_relaxed)) {\n          // Hmm, the add failed, but we can only try again when the refcount\n          // goes back to zero\n          if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1,\n                                           std::memory_order_acq_rel) == 1) {\n            continue;\n          }\n        }\n        return;\n      }\n    }\n\n   private:\n    // Implemented like a stack, but where node order doesn't matter (nodes are\n    // inserted out of order under contention)\n    std::atomic<N *> freeListHead;\n\n    static const std::uint32_t REFS_MASK = 0x7FFFFFFF;\n    static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;\n\n#ifdef MCDBGQ_NOLOCKFREE_FREELIST\n    debug::DebugMutex mutex;\n#endif\n  };\n\n\n  ///////////////////////////\n  // Block\n  ///////////////////////////\n\n  enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };\n\n  struct Block {\n    Block()\n        : next(nullptr),\n          elementsCompletelyDequeued(0),\n          freeListRefs(0),\n          freeListNext(nullptr),\n          dynamicallyAllocated(true) {\n#ifdef MCDBGQ_TRACKMEM\n      owner = nullptr;\n#endif\n    }\n\n    template <InnerQueueContext context>\n    inline bool is_empty() const {\n      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&\n                              BLOCK_SIZE <=\n                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {\n        // Check flags\n        for (size_t i = 0; i < BLOCK_SIZE; ++i) {\n          if (!emptyFlags[i].load(std::memory_order_relaxed)) {\n            return false;\n          }\n        }\n\n        // Aha, empty; make sure we have all other memory effects that happened\n        // before the empty flags were set\n        std::atomic_thread_fence(std::memory_order_acquire);\n        return true;\n      }\n      else {\n        // Check counter\n        if (elementsCompletelyDequeued.load(std::memory_order_relaxed) ==\n            BLOCK_SIZE) {\n          std::atomic_thread_fence(std::memory_order_acquire);\n          return true;\n        }\n        assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <=\n               BLOCK_SIZE);\n        return false;\n      }\n    }\n\n    // Returns true if the block is now empty (does not apply in explicit\n    // context)\n    template <InnerQueueContext context>\n    inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) {\n      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&\n                              BLOCK_SIZE <=\n                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {\n        // Set flag\n        assert(!emptyFlags[BLOCK_SIZE - 1 -\n                           static_cast<size_t>(\n                               i & static_cast<index_t>(BLOCK_SIZE - 1))]\n                    .load(std::memory_order_relaxed));\n        emptyFlags[BLOCK_SIZE - 1 -\n                   static_cast<size_t>(i &\n                                       static_cast<index_t>(BLOCK_SIZE - 1))]\n            .store(true, std::memory_order_release);\n        return false;\n      }\n      else {\n        // Increment counter\n        auto prevVal =\n            elementsCompletelyDequeued.fetch_add(1, std::memory_order_acq_rel);\n        assert(prevVal < BLOCK_SIZE);\n        return prevVal == BLOCK_SIZE - 1;\n      }\n    }\n\n    // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping\n    // and count > 0). Returns true if the block is now empty (does not apply in\n    // explicit context).\n    template <InnerQueueContext context>\n    inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i,\n                               size_t count) {\n      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&\n                              BLOCK_SIZE <=\n                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {\n        // Set flags\n        std::atomic_thread_fence(std::memory_order_release);\n        i = BLOCK_SIZE - 1 -\n            static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) -\n            count + 1;\n        for (size_t j = 0; j != count; ++j) {\n          assert(!emptyFlags[i + j].load(std::memory_order_relaxed));\n          emptyFlags[i + j].store(true, std::memory_order_relaxed);\n        }\n        return false;\n      }\n      else {\n        // Increment counter\n        auto prevVal = elementsCompletelyDequeued.fetch_add(\n            count, std::memory_order_acq_rel);\n        assert(prevVal + count <= BLOCK_SIZE);\n        return prevVal + count == BLOCK_SIZE;\n      }\n    }\n\n    template <InnerQueueContext context>\n    inline void set_all_empty() {\n      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&\n                              BLOCK_SIZE <=\n                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {\n        // Set all flags\n        for (size_t i = 0; i != BLOCK_SIZE; ++i) {\n          emptyFlags[i].store(true, std::memory_order_relaxed);\n        }\n      }\n      else {\n        // Reset counter\n        elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);\n      }\n    }\n\n    template <InnerQueueContext context>\n    inline void reset_empty() {\n      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&\n                              BLOCK_SIZE <=\n                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {\n        // Reset flags\n        for (size_t i = 0; i != BLOCK_SIZE; ++i) {\n          emptyFlags[i].store(false, std::memory_order_relaxed);\n        }\n      }\n      else {\n        // Reset counter\n        elementsCompletelyDequeued.store(0, std::memory_order_relaxed);\n      }\n    }\n\n    inline T *operator[](index_t idx) MOODYCAMEL_NOEXCEPT {\n      return static_cast<T *>(static_cast<void *>(elements)) +\n             static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));\n    }\n    inline T const *operator[](index_t idx) const MOODYCAMEL_NOEXCEPT {\n      return static_cast<T const *>(static_cast<void const *>(elements)) +\n             static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));\n    }\n\n   private:\n    static_assert(std::alignment_of<T>::value <= sizeof(T),\n                  \"The queue does not support types with an alignment greater \"\n                  \"than their size at this time\");\n    MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;\n\n   public:\n    Block *next;\n    std::atomic<size_t> elementsCompletelyDequeued;\n    std::atomic<bool> emptyFlags\n        [BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];\n\n   public:\n    std::atomic<std::uint32_t> freeListRefs;\n    std::atomic<Block *> freeListNext;\n    bool dynamicallyAllocated;  // Perhaps a better name for this would be\n                                // 'isNotPartOfInitialBlockPool'\n\n#ifdef MCDBGQ_TRACKMEM\n    void *owner;\n#endif\n  };\n  static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value,\n                \"Internal error: Blocks must be at least as aligned as the \"\n                \"type they are wrapping\");\n\n\n#ifdef MCDBGQ_TRACKMEM\n public:\n  struct MemStats;\n\n private:\n#endif\n\n  ///////////////////////////\n  // Producer base\n  ///////////////////////////\n\n  struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase {\n    ProducerBase(ConcurrentQueue *parent_, bool isExplicit_)\n        : tailIndex(0),\n          headIndex(0),\n          dequeueOptimisticCount(0),\n          dequeueOvercommit(0),\n          tailBlock(nullptr),\n          isExplicit(isExplicit_),\n          parent(parent_) {}\n\n    virtual ~ProducerBase() {}\n\n    template <typename U>\n    inline bool dequeue(U &element) {\n      if (isExplicit) {\n        return static_cast<ExplicitProducer *>(this)->dequeue(element);\n      } else {\n        return static_cast<ImplicitProducer *>(this)->dequeue(element);\n      }\n    }\n\n    template <typename It>\n    inline size_t dequeue_bulk(It &itemFirst, size_t max) {\n      if (isExplicit) {\n        return static_cast<ExplicitProducer *>(this)->dequeue_bulk(itemFirst,\n                                                                   max);\n      } else {\n        return static_cast<ImplicitProducer *>(this)->dequeue_bulk(itemFirst,\n                                                                   max);\n      }\n    }\n\n    inline ProducerBase *next_prod() const {\n      return static_cast<ProducerBase *>(next);\n    }\n\n    inline size_t size_approx() const {\n      auto tail = tailIndex.load(std::memory_order_relaxed);\n      auto head = headIndex.load(std::memory_order_relaxed);\n      return details::circular_less_than(head, tail)\n                 ? static_cast<size_t>(tail - head)\n                 : 0;\n    }\n\n    inline index_t getTail() const {\n      return tailIndex.load(std::memory_order_relaxed);\n    }\n\n   protected:\n    std::atomic<index_t> tailIndex;  // Where to enqueue to next\n    std::atomic<index_t> headIndex;  // Where to dequeue from next\n\n    std::atomic<index_t> dequeueOptimisticCount;\n    std::atomic<index_t> dequeueOvercommit;\n\n    Block *tailBlock;\n\n   public:\n    bool isExplicit;\n    ConcurrentQueue *parent;\n\n   protected:\n#ifdef MCDBGQ_TRACKMEM\n    friend struct MemStats;\n#endif\n  };\n\n\n  ///////////////////////////\n  // Explicit queue\n  ///////////////////////////\n\n  struct ExplicitProducer : public ProducerBase {\n    explicit ExplicitProducer(ConcurrentQueue *parent_)\n        : ProducerBase(parent_, true),\n          blockIndex(nullptr),\n          pr_blockIndexSlotsUsed(0),\n          pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),\n          pr_blockIndexFront(0),\n          pr_blockIndexEntries(nullptr),\n          pr_blockIndexRaw(nullptr) {\n      size_t poolBasedIndexSize =\n          details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;\n      if (poolBasedIndexSize > pr_blockIndexSize) {\n        pr_blockIndexSize = poolBasedIndexSize;\n      }\n\n      new_block_index(0);  // This creates an index with double the number of\n                           // current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE\n    }\n\n    ~ExplicitProducer() {\n      // Destruct any elements not yet dequeued.\n      // Since we're in the destructor, we can assume all elements\n      // are either completely dequeued or completely not (no halfways).\n      if (this->tailBlock !=\n          nullptr) {  // Note this means there must be a block index too\n        // First find the block that's partially dequeued, if any\n        Block *halfDequeuedBlock = nullptr;\n        if ((this->headIndex.load(std::memory_order_relaxed) &\n             static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {\n          // The head's not on a block boundary, meaning a block somewhere is\n          // partially dequeued (or the head block is the tail block and was\n          // fully dequeued, but the head/tail are still not on a boundary)\n          size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) &\n                     (pr_blockIndexSize - 1);\n          while (details::circular_less_than<index_t>(\n              pr_blockIndexEntries[i].base + BLOCK_SIZE,\n              this->headIndex.load(std::memory_order_relaxed))) {\n            i = (i + 1) & (pr_blockIndexSize - 1);\n          }\n          assert(details::circular_less_than<index_t>(\n              pr_blockIndexEntries[i].base,\n              this->headIndex.load(std::memory_order_relaxed)));\n          halfDequeuedBlock = pr_blockIndexEntries[i].block;\n        }\n\n        // Start at the head block (note the first line in the loop gives us the\n        // head from the tail on the first iteration)\n        auto block = this->tailBlock;\n        do {\n          block = block->next;\n          if (block->ConcurrentQueue::Block::template is_empty<\n                  explicit_context>()) {\n            continue;\n          }\n\n          size_t i = 0;  // Offset into block\n          if (block == halfDequeuedBlock) {\n            i = static_cast<size_t>(\n                this->headIndex.load(std::memory_order_relaxed) &\n                static_cast<index_t>(BLOCK_SIZE - 1));\n          }\n\n          // Walk through all the items in the block; if this is the tail block,\n          // we need to stop when we reach the tail index\n          auto lastValidIndex =\n              (this->tailIndex.load(std::memory_order_relaxed) &\n               static_cast<index_t>(BLOCK_SIZE - 1)) == 0\n                  ? BLOCK_SIZE\n                  : static_cast<size_t>(\n                        this->tailIndex.load(std::memory_order_relaxed) &\n                        static_cast<index_t>(BLOCK_SIZE - 1));\n          while (i != BLOCK_SIZE &&\n                 (block != this->tailBlock || i != lastValidIndex)) {\n            (*block)[i++]->~T();\n          }\n        } while (block != this->tailBlock);\n      }\n\n      // Destroy all blocks that we own\n      if (this->tailBlock != nullptr) {\n        auto block = this->tailBlock;\n        do {\n          auto nextBlock = block->next;\n          this->parent->add_block_to_free_list(block);\n          block = nextBlock;\n        } while (block != this->tailBlock);\n      }\n\n      // Destroy the block indices\n      auto header = static_cast<BlockIndexHeader *>(pr_blockIndexRaw);\n      while (header != nullptr) {\n        auto prev = static_cast<BlockIndexHeader *>(header->prev);\n        header->~BlockIndexHeader();\n        (Traits::free)(header);\n        header = prev;\n      }\n    }\n\n    template <AllocationMode allocMode, typename U>\n    inline bool enqueue(U &&element) {\n      index_t currentTailIndex =\n          this->tailIndex.load(std::memory_order_relaxed);\n      index_t newTailIndex = 1 + currentTailIndex;\n      if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {\n        // We reached the end of a block, start a new one\n        auto startBlock = this->tailBlock;\n        auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;\n        if (this->tailBlock != nullptr &&\n            this->tailBlock->next->ConcurrentQueue::Block::template is_empty<\n                explicit_context>()) {\n          // We can re-use the block ahead of us, it's empty!\n          this->tailBlock = this->tailBlock->next;\n          this->tailBlock->ConcurrentQueue::Block::template reset_empty<\n              explicit_context>();\n\n          // We'll put the block on the block index (guaranteed to be room since\n          // we're conceptually removing the last block from it first -- except\n          // instead of removing then adding, we can just overwrite). Note that\n          // there must be a valid block index here, since even if allocation\n          // failed in the ctor, it would have been re-attempted when adding the\n          // first block to the queue; since there is such a block, a block\n          // index must have been successfully allocated.\n        } else {\n          // Whatever head value we see here is >= the last value we saw here\n          // (relatively), and <= its current value. Since we have the most\n          // recent tail, the head must be\n          // <= to it.\n          auto head = this->headIndex.load(std::memory_order_relaxed);\n          assert(!details::circular_less_than<index_t>(currentTailIndex, head));\n          if (!details::circular_less_than<index_t>(\n                  head, currentTailIndex + BLOCK_SIZE) ||\n              (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&\n               (MAX_SUBQUEUE_SIZE == 0 ||\n                MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {\n            // We can't enqueue in another block because there's not enough\n            // leeway -- the tail could surpass the head by the time the block\n            // fills up! (Or we'll exceed the size limit, if the second part of\n            // the condition was true.)\n            return false;\n          }\n          // We're going to need a new block; check that the block index has\n          // room\n          if (pr_blockIndexRaw == nullptr ||\n              pr_blockIndexSlotsUsed == pr_blockIndexSize) {\n            // Hmm, the circular block index is already full -- we'll need\n            // to allocate a new index. Note pr_blockIndexRaw can only be\n            // nullptr if the initial allocation failed in the constructor.\n\n            MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) {\n              return false;\n            }\n            else if (!new_block_index(pr_blockIndexSlotsUsed)) {\n              return false;\n            }\n          }\n\n          // Insert a new block in the circular linked list\n          auto newBlock =\n              this->parent\n                  ->ConcurrentQueue::template requisition_block<allocMode>();\n          if (newBlock == nullptr) {\n            return false;\n          }\n#ifdef MCDBGQ_TRACKMEM\n          newBlock->owner = this;\n#endif\n          newBlock->ConcurrentQueue::Block::template reset_empty<\n              explicit_context>();\n          if (this->tailBlock == nullptr) {\n            newBlock->next = newBlock;\n          } else {\n            newBlock->next = this->tailBlock->next;\n            this->tailBlock->next = newBlock;\n          }\n          this->tailBlock = newBlock;\n          ++pr_blockIndexSlotsUsed;\n        }\n\n        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(\n            T, U,\n            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {\n          // The constructor may throw. We want the element not to appear in the\n          // queue in that case (without corrupting the queue):\n          MOODYCAMEL_TRY {\n            new ((*this->tailBlock)[currentTailIndex])\n                T(std::forward<U>(element));\n          }\n          MOODYCAMEL_CATCH(...) {\n            // Revert change to the current block, but leave the new block\n            // available for next time\n            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;\n            this->tailBlock =\n                startBlock == nullptr ? this->tailBlock : startBlock;\n            MOODYCAMEL_RETHROW;\n          }\n        }\n        else {\n          (void)startBlock;\n          (void)originalBlockIndexSlotsUsed;\n        }\n\n        // Add block to block index\n        auto &entry = blockIndex.load(std::memory_order_relaxed)\n                          ->entries[pr_blockIndexFront];\n        entry.base = currentTailIndex;\n        entry.block = this->tailBlock;\n        blockIndex.load(std::memory_order_relaxed)\n            ->front.store(pr_blockIndexFront, std::memory_order_release);\n        pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);\n\n        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(\n            T, U,\n            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {\n          this->tailIndex.store(newTailIndex, std::memory_order_release);\n          return true;\n        }\n      }\n\n      // Enqueue\n      new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));\n\n      this->tailIndex.store(newTailIndex, std::memory_order_release);\n      return true;\n    }\n\n    template <typename U>\n    bool dequeue(U &element) {\n      auto tail = this->tailIndex.load(std::memory_order_relaxed);\n      auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);\n      if (details::circular_less_than<index_t>(\n              this->dequeueOptimisticCount.load(std::memory_order_relaxed) -\n                  overcommit,\n              tail)) {\n        // Might be something to dequeue, let's give it a try\n\n        // Note that this if is purely for performance purposes in the common\n        // case when the queue is empty and the values are eventually consistent\n        // -- we may enter here spuriously.\n\n        // Note that whatever the values of overcommit and tail are, they are\n        // not going to change (unless we change them) and must be the same\n        // value at this point (inside the if) as when the if condition was\n        // evaluated.\n\n        // We insert an acquire fence here to synchronize-with the release upon\n        // incrementing dequeueOvercommit below. This ensures that whatever the\n        // value we got loaded into overcommit, the load of dequeueOptisticCount\n        // in the fetch_add below will result in a value at least as recent as\n        // that (and therefore at least as large). Note that I believe a\n        // compiler (signal) fence here would be sufficient due to the nature of\n        // fetch_add (all read-modify-write operations are guaranteed to work on\n        // the latest value in the modification order), but unfortunately that\n        // can't be shown to be correct using only the C++11 standard. See\n        // http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case\n        std::atomic_thread_fence(std::memory_order_acquire);\n\n        // Increment optimistic counter, then check if it went over the boundary\n        auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(\n            1, std::memory_order_relaxed);\n\n        // Note that since dequeueOvercommit must be <= dequeueOptimisticCount\n        // (because dequeueOvercommit is only ever incremented after\n        // dequeueOptimisticCount -- this is enforced in the `else` block\n        // below), and since we now have a version of dequeueOptimisticCount\n        // that is at least as recent as overcommit (due to the release upon\n        // incrementing dequeueOvercommit and the acquire above that\n        // synchronizes with it), overcommit <= myDequeueCount. However, we\n        // can't assert this since both dequeueOptimisticCount and\n        // dequeueOvercommit may (independently) overflow; in such a case,\n        // though, the logic still holds since the difference between the two is\n        // maintained.\n\n        // Note that we reload tail here in case it changed; it will be the same\n        // value as before or greater, since this load is sequenced after\n        // (happens after) the earlier load above. This is supported by\n        // read-read coherency (as defined in the standard), explained here:\n        // http://en.cppreference.com/w/cpp/atomic/memory_order\n        tail = this->tailIndex.load(std::memory_order_acquire);\n        if ((details::likely)(details::circular_less_than<index_t>(\n                myDequeueCount - overcommit, tail))) {\n          // Guaranteed to be at least one element to dequeue!\n\n          // Get the index. Note that since there's guaranteed to be at least\n          // one element, this will never exceed tail. We need to do an\n          // acquire-release fence here since it's possible that whatever\n          // condition got us to this point was for an earlier enqueued element\n          // (that we already see the memory effects for), but that by the time\n          // we increment somebody else has incremented it, and we need to see\n          // the memory effects for *that* element, which is in such a case is\n          // necessarily visible on the thread that incremented it in the first\n          // place with the more current condition (they must have acquired a\n          // tail that is at least as recent).\n          auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);\n\n\n          // Determine which block the element is in\n\n          auto localBlockIndex = blockIndex.load(std::memory_order_acquire);\n          auto localBlockIndexHead =\n              localBlockIndex->front.load(std::memory_order_acquire);\n\n          // We need to be careful here about subtracting and dividing because\n          // of index wrap-around. When an index wraps, we need to preserve the\n          // sign of the offset when dividing it by the block size (in order to\n          // get a correct signed block count offset in all cases):\n          auto headBase = localBlockIndex->entries[localBlockIndexHead].base;\n          auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);\n          auto offset = static_cast<size_t>(\n              static_cast<typename std::make_signed<index_t>::type>(\n                  blockBaseIndex - headBase) /\n              static_cast<typename std::make_signed<index_t>::type>(\n                  BLOCK_SIZE));\n          auto block = localBlockIndex\n                           ->entries[(localBlockIndexHead + offset) &\n                                     (localBlockIndex->size - 1)]\n                           .block;\n\n          // Dequeue\n          auto &el = *((*block)[index]);\n          if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {\n            // Make sure the element is still fully dequeued and destroyed even\n            // if the assignment throws\n            struct Guard {\n              Block *block;\n              index_t index;\n\n              ~Guard() {\n                (*block)[index]->~T();\n                block->ConcurrentQueue::Block::template set_empty<\n                    explicit_context>(index);\n              }\n            } guard = {block, index};\n\n            element = std::move(el);  // NOLINT\n          } else {\n            element = std::move(el);  // NOLINT\n            el.~T();                  // NOLINT\n            block->ConcurrentQueue::Block::template set_empty<explicit_context>(\n                index);\n          }\n\n          return true;\n        } else {\n          // Wasn't anything to dequeue after all; make the effective dequeue\n          // count eventually consistent\n          this->dequeueOvercommit.fetch_add(\n              1, std::memory_order_release);  // Release so that the fetch_add\n                                              // on dequeueOptimisticCount is\n                                              // guaranteed to happen before\n                                              // this write\n        }\n      }\n\n      return false;\n    }\n\n    template <AllocationMode allocMode, typename It>\n    bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) {\n      // First, we need to make sure we have enough room to enqueue all of the\n      // elements; this means pre-allocating blocks and putting them in the\n      // block index (but only if all the allocations succeeded).\n      index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);\n      auto startBlock = this->tailBlock;\n      auto originalBlockIndexFront = pr_blockIndexFront;\n      auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;\n\n      Block *firstAllocatedBlock = nullptr;\n\n      // Figure out how many blocks we'll need to allocate, and do so\n      size_t blockBaseDiff =\n          ((startTailIndex + count - 1) &\n           ~static_cast<index_t>(BLOCK_SIZE - 1)) -\n          ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));\n      index_t currentTailIndex =\n          (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);\n      if (blockBaseDiff > 0) {\n        // Allocate as many blocks as possible from ahead\n        while (blockBaseDiff > 0 && this->tailBlock != nullptr &&\n               this->tailBlock->next != firstAllocatedBlock &&\n               this->tailBlock->next->ConcurrentQueue::Block::template is_empty<\n                   explicit_context>()) {\n          blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);\n          currentTailIndex += static_cast<index_t>(BLOCK_SIZE);\n\n          this->tailBlock = this->tailBlock->next;\n          firstAllocatedBlock = firstAllocatedBlock == nullptr\n                                    ? this->tailBlock\n                                    : firstAllocatedBlock;\n\n          auto &entry = blockIndex.load(std::memory_order_relaxed)\n                            ->entries[pr_blockIndexFront];\n          entry.base = currentTailIndex;\n          entry.block = this->tailBlock;\n          pr_blockIndexFront =\n              (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);\n        }\n\n        // Now allocate as many blocks as necessary from the block pool\n        while (blockBaseDiff > 0) {\n          blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);\n          currentTailIndex += static_cast<index_t>(BLOCK_SIZE);\n\n          auto head = this->headIndex.load(std::memory_order_relaxed);\n          assert(!details::circular_less_than<index_t>(currentTailIndex, head));\n          bool full =\n              !details::circular_less_than<index_t>(\n                  head, currentTailIndex + BLOCK_SIZE) ||\n              (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&\n               (MAX_SUBQUEUE_SIZE == 0 ||\n                MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));\n          if (pr_blockIndexRaw == nullptr ||\n              pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {\n            MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) {\n              // Failed to allocate, undo changes (but keep injected blocks)\n              pr_blockIndexFront = originalBlockIndexFront;\n              pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;\n              this->tailBlock =\n                  startBlock == nullptr ? firstAllocatedBlock : startBlock;\n              return false;\n            }\n            else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {\n              // Failed to allocate, undo changes (but keep injected blocks)\n              pr_blockIndexFront = originalBlockIndexFront;\n              pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;\n              this->tailBlock =\n                  startBlock == nullptr ? firstAllocatedBlock : startBlock;\n              return false;\n            }\n\n            // pr_blockIndexFront is updated inside new_block_index, so we need\n            // to update our fallback value too (since we keep the new index\n            // even if we later fail)\n            originalBlockIndexFront = originalBlockIndexSlotsUsed;\n          }\n\n          // Insert a new block in the circular linked list\n          auto newBlock =\n              this->parent\n                  ->ConcurrentQueue::template requisition_block<allocMode>();\n          if (newBlock == nullptr) {\n            pr_blockIndexFront = originalBlockIndexFront;\n            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;\n            this->tailBlock =\n                startBlock == nullptr ? firstAllocatedBlock : startBlock;\n            return false;\n          }\n\n#ifdef MCDBGQ_TRACKMEM\n          newBlock->owner = this;\n#endif\n          newBlock->ConcurrentQueue::Block::template set_all_empty<\n              explicit_context>();\n          if (this->tailBlock == nullptr) {\n            newBlock->next = newBlock;\n          } else {\n            newBlock->next = this->tailBlock->next;\n            this->tailBlock->next = newBlock;\n          }\n          this->tailBlock = newBlock;\n          firstAllocatedBlock = firstAllocatedBlock == nullptr\n                                    ? this->tailBlock\n                                    : firstAllocatedBlock;\n\n          ++pr_blockIndexSlotsUsed;\n\n          auto &entry = blockIndex.load(std::memory_order_relaxed)\n                            ->entries[pr_blockIndexFront];\n          entry.base = currentTailIndex;\n          entry.block = this->tailBlock;\n          pr_blockIndexFront =\n              (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);\n        }\n\n        // Excellent, all allocations succeeded. Reset each block's emptiness\n        // before we fill them up, and publish the new block index front\n        auto block = firstAllocatedBlock;\n        while (true) {\n          block->ConcurrentQueue::Block::template reset_empty<\n              explicit_context>();\n          if (block == this->tailBlock) {\n            break;\n          }\n          block = block->next;\n        }\n\n        MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(\n            T, decltype(*itemFirst),\n            new (static_cast<T *>(nullptr))\n                T(details::deref_noexcept(itemFirst)))) {\n          blockIndex.load(std::memory_order_relaxed)\n              ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1),\n                            std::memory_order_release);\n        }\n      }\n\n      // Enqueue, one block at a time\n      index_t newTailIndex = startTailIndex + static_cast<index_t>(count);\n      currentTailIndex = startTailIndex;\n      auto endBlock = this->tailBlock;\n      this->tailBlock = startBlock;\n      assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||\n             firstAllocatedBlock != nullptr || count == 0);\n      if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 &&\n          firstAllocatedBlock != nullptr) {\n        this->tailBlock = firstAllocatedBlock;\n      }\n      while (true) {\n        index_t stopIndex =\n            (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n            static_cast<index_t>(BLOCK_SIZE);\n        if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {\n          stopIndex = newTailIndex;\n        }\n        MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(\n            T, decltype(*itemFirst),\n            new (static_cast<T *>(nullptr))\n                T(details::deref_noexcept(itemFirst)))) {\n          while (currentTailIndex != stopIndex) {\n            new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);\n          }\n        }\n        else {\n          MOODYCAMEL_TRY {\n            while (currentTailIndex != stopIndex) {\n              // Must use copy constructor even if move constructor is available\n              // because we may have to revert if there's an exception.\n              // Sorry about the horrible templated next line, but it was the\n              // only way to disable moving *at compile time*, which is\n              // important because a type may only define a (noexcept) move\n              // constructor, and so calls to the cctor will not compile, even\n              // if they are in an if branch that will never be executed\n              new ((*this->tailBlock)[currentTailIndex]) T(\n                  details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(\n                      T, decltype(*itemFirst),\n                      new (static_cast<T *>(nullptr)) T(details::deref_noexcept(\n                          itemFirst)))>::eval(*itemFirst));\n              ++currentTailIndex;\n              ++itemFirst;\n            }\n          }\n          MOODYCAMEL_CATCH(...) {\n            // Oh dear, an exception's been thrown -- destroy the elements that\n            // were enqueued so far and revert the entire bulk operation (we'll\n            // keep any allocated blocks in our linked list for later, though).\n            auto constructedStopIndex = currentTailIndex;\n            auto lastBlockEnqueued = this->tailBlock;\n\n            pr_blockIndexFront = originalBlockIndexFront;\n            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;\n            this->tailBlock =\n                startBlock == nullptr ? firstAllocatedBlock : startBlock;\n\n            if (!details::is_trivially_destructible<T>::value) {\n              auto block = startBlock;\n              if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) ==\n                  0) {\n                block = firstAllocatedBlock;\n              }\n              currentTailIndex = startTailIndex;\n              while (true) {\n                stopIndex =\n                    (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n                    static_cast<index_t>(BLOCK_SIZE);\n                if (details::circular_less_than<index_t>(constructedStopIndex,\n                                                         stopIndex)) {\n                  stopIndex = constructedStopIndex;\n                }\n                while (currentTailIndex != stopIndex) {\n                  (*block)[currentTailIndex++]->~T();\n                }\n                if (block == lastBlockEnqueued) {\n                  break;\n                }\n                block = block->next;\n              }\n            }\n            MOODYCAMEL_RETHROW;\n          }\n        }\n\n        if (this->tailBlock == endBlock) {\n          assert(currentTailIndex == newTailIndex);\n          break;\n        }\n        this->tailBlock = this->tailBlock->next;\n      }\n\n      MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(\n          T, decltype(*itemFirst),\n          new (static_cast<T *>(nullptr))\n              T(details::deref_noexcept(itemFirst)))) {\n        if (firstAllocatedBlock != nullptr)\n          blockIndex.load(std::memory_order_relaxed)\n              ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1),\n                            std::memory_order_release);\n      }\n\n      this->tailIndex.store(newTailIndex, std::memory_order_release);\n      return true;\n    }\n\n    template <typename It>\n    size_t dequeue_bulk(It &itemFirst, size_t max) {\n      auto tail = this->tailIndex.load(std::memory_order_relaxed);\n      auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);\n      auto desiredCount = static_cast<size_t>(\n          tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -\n                  overcommit));\n      if (details::circular_less_than<size_t>(0, desiredCount)) {\n        desiredCount = desiredCount < max ? desiredCount : max;\n        std::atomic_thread_fence(std::memory_order_acquire);\n\n        auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(\n            desiredCount, std::memory_order_relaxed);\n\n        tail = this->tailIndex.load(std::memory_order_acquire);\n        auto actualCount =\n            static_cast<size_t>(tail - (myDequeueCount - overcommit));\n        if (details::circular_less_than<size_t>(0, actualCount)) {\n          actualCount = desiredCount < actualCount ? desiredCount : actualCount;\n          if (actualCount < desiredCount) {\n            this->dequeueOvercommit.fetch_add(desiredCount - actualCount,\n                                              std::memory_order_release);\n          }\n\n          // Get the first index. Note that since there's guaranteed to be at\n          // least actualCount elements, this will never exceed tail.\n          auto firstIndex =\n              this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);\n\n          // Determine which block the first element is in\n          auto localBlockIndex = blockIndex.load(std::memory_order_acquire);\n          auto localBlockIndexHead =\n              localBlockIndex->front.load(std::memory_order_acquire);\n\n          auto headBase = localBlockIndex->entries[localBlockIndexHead].base;\n          auto firstBlockBaseIndex =\n              firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);\n          auto offset = static_cast<size_t>(\n              static_cast<typename std::make_signed<index_t>::type>(\n                  firstBlockBaseIndex - headBase) /\n              static_cast<typename std::make_signed<index_t>::type>(\n                  BLOCK_SIZE));\n          auto indexIndex =\n              (localBlockIndexHead + offset) & (localBlockIndex->size - 1);\n\n          // Iterate the blocks and dequeue\n          auto index = firstIndex;\n          do {\n            auto firstIndexInBlock = index;\n            index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n                               static_cast<index_t>(BLOCK_SIZE);\n            endIndex =\n                details::circular_less_than<index_t>(\n                    firstIndex + static_cast<index_t>(actualCount), endIndex)\n                    ? firstIndex + static_cast<index_t>(actualCount)\n                    : endIndex;\n            auto block = localBlockIndex->entries[indexIndex].block;\n            if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&,\n                                           details::deref_noexcept(itemFirst) =\n                                               std::move((*(*block)[index])))) {\n              while (index != endIndex) {\n                auto &el = *((*block)[index]);\n                *itemFirst++ = std::move(el);\n                el.~T();\n                ++index;\n              }\n            } else {\n              MOODYCAMEL_TRY {\n                while (index != endIndex) {\n                  auto &el = *((*block)[index]);\n                  *itemFirst = std::move(el);\n                  ++itemFirst;\n                  el.~T();\n                  ++index;\n                }\n              }\n              MOODYCAMEL_CATCH(...) {\n                // It's too late to revert the dequeue, but we can make sure\n                // that all the dequeued objects are properly destroyed and the\n                // block index (and empty count) are properly updated before we\n                // propagate the exception\n                do {\n                  block = localBlockIndex->entries[indexIndex].block;\n                  while (index != endIndex) {\n                    (*block)[index++]->~T();\n                  }\n                  block->ConcurrentQueue::Block::template set_many_empty<\n                      explicit_context>(\n                      firstIndexInBlock,\n                      static_cast<size_t>(endIndex - firstIndexInBlock));\n                  indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);\n\n                  firstIndexInBlock = index;\n                  endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n                             static_cast<index_t>(BLOCK_SIZE);\n                  endIndex =\n                      details::circular_less_than<index_t>(\n                          firstIndex + static_cast<index_t>(actualCount),\n                          endIndex)\n                          ? firstIndex + static_cast<index_t>(actualCount)\n                          : endIndex;\n                } while (index != firstIndex + actualCount);\n\n                MOODYCAMEL_RETHROW;\n              }\n            }\n            block->ConcurrentQueue::Block::template set_many_empty<\n                explicit_context>(\n                firstIndexInBlock,\n                static_cast<size_t>(endIndex - firstIndexInBlock));\n            indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);\n          } while (index != firstIndex + actualCount);\n\n          return actualCount;\n        } else {\n          // Wasn't anything to dequeue after all; make the effective dequeue\n          // count eventually consistent\n          this->dequeueOvercommit.fetch_add(desiredCount,\n                                            std::memory_order_release);\n        }\n      }\n\n      return 0;\n    }\n\n   private:\n    struct BlockIndexEntry {\n      index_t base;\n      Block *block;\n    };\n\n    struct BlockIndexHeader {\n      size_t size;\n      std::atomic<size_t>\n          front;  // Current slot (not next, like pr_blockIndexFront)\n      BlockIndexEntry *entries;\n      void *prev;\n    };\n\n\n    bool new_block_index(size_t numberOfFilledSlotsToExpose) {\n      auto prevBlockSizeMask = pr_blockIndexSize - 1;\n\n      // Create the new block\n      pr_blockIndexSize <<= 1;\n      auto newRawPtr = static_cast<char *>((Traits::malloc)(\n          sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value -\n          1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));\n      if (newRawPtr == nullptr) {\n        pr_blockIndexSize >>= 1;  // Reset to allow graceful retry\n        return false;\n      }\n\n      auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry *>(\n          details::align_for<BlockIndexEntry>(newRawPtr +\n                                              sizeof(BlockIndexHeader)));\n\n      // Copy in all the old indices, if any\n      size_t j = 0;\n      if (pr_blockIndexSlotsUsed != 0) {\n        auto i =\n            (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;\n        do {\n          newBlockIndexEntries[j++] = pr_blockIndexEntries[i];\n          i = (i + 1) & prevBlockSizeMask;\n        } while (i != pr_blockIndexFront);\n      }\n\n      // Update everything\n      auto header = new (newRawPtr) BlockIndexHeader;\n      header->size = pr_blockIndexSize;\n      header->front.store(numberOfFilledSlotsToExpose - 1,\n                          std::memory_order_relaxed);\n      header->entries = newBlockIndexEntries;\n      header->prev = pr_blockIndexRaw;  // we link the new block to the old one\n                                        // so we can free it later\n\n      pr_blockIndexFront = j;\n      pr_blockIndexEntries = newBlockIndexEntries;\n      pr_blockIndexRaw = newRawPtr;\n      blockIndex.store(header, std::memory_order_release);\n\n      return true;\n    }\n\n   private:\n    std::atomic<BlockIndexHeader *> blockIndex;\n\n    // To be used by producer only -- consumer must use the ones in referenced\n    // by blockIndex\n    size_t pr_blockIndexSlotsUsed;\n    size_t pr_blockIndexSize;\n    size_t pr_blockIndexFront;  // Next slot (not current)\n    BlockIndexEntry *pr_blockIndexEntries;\n    void *pr_blockIndexRaw;\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n   public:\n    ExplicitProducer *nextExplicitProducer;\n\n   private:\n#endif\n\n#ifdef MCDBGQ_TRACKMEM\n    friend struct MemStats;\n#endif\n  };\n\n\n  //////////////////////////////////\n  // Implicit queue\n  //////////////////////////////////\n\n  struct ImplicitProducer : public ProducerBase {\n    ImplicitProducer(ConcurrentQueue *parent_)\n        : ProducerBase(parent_, false),\n          nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),\n          blockIndex(nullptr) {\n      new_block_index();\n    }\n\n    ~ImplicitProducer() {\n      // Note that since we're in the destructor we can assume that all\n      // enqueue/dequeue operations completed already; this means that all\n      // undequeued elements are placed contiguously across contiguous blocks,\n      // and that only the first and last remaining blocks can be only partially\n      // empty (all other remaining blocks must be completely full).\n\n#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n      // Unregister ourselves for thread termination notification\n      if (!this->inactive.load(std::memory_order_relaxed)) {\n        details::ThreadExitNotifier::unsubscribe(&threadExitListener);\n      }\n#endif\n\n      // Destroy all remaining elements!\n      auto tail = this->tailIndex.load(std::memory_order_relaxed);\n      auto index = this->headIndex.load(std::memory_order_relaxed);\n      Block *block = nullptr;\n      assert(index == tail || details::circular_less_than(index, tail));\n      bool forceFreeLastBlock =\n          index != tail;  // If we enter the loop, then the last (tail) block\n                          // will not be freed\n      while (index != tail) {\n        if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ||\n            block == nullptr) {\n          if (block != nullptr) {\n            // Free the old block\n            this->parent->add_block_to_free_list(block);\n          }\n\n          block = get_block_index_entry_for_index(index)->value.load(\n              std::memory_order_relaxed);\n        }\n\n        ((*block)[index])->~T();\n        ++index;\n      }\n      // Even if the queue is empty, there's still one block that's not on the\n      // free list (unless the head index reached the end of it, in which case\n      // the tail will be poised to create a new block).\n      if (this->tailBlock != nullptr &&\n          (forceFreeLastBlock ||\n           (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {\n        this->parent->add_block_to_free_list(this->tailBlock);\n      }\n\n      // Destroy block index\n      auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);\n      if (localBlockIndex != nullptr) {\n        for (size_t i = 0; i != localBlockIndex->capacity; ++i) {\n          localBlockIndex->index[i]->~BlockIndexEntry();\n        }\n        do {\n          auto prev = localBlockIndex->prev;\n          localBlockIndex->~BlockIndexHeader();\n          (Traits::free)(localBlockIndex);\n          localBlockIndex = prev;\n        } while (localBlockIndex != nullptr);\n      }\n    }\n\n    template <AllocationMode allocMode, typename U>\n    inline bool enqueue(U &&element) {\n      index_t currentTailIndex =\n          this->tailIndex.load(std::memory_order_relaxed);\n      index_t newTailIndex = 1 + currentTailIndex;\n      if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {\n        // We reached the end of a block, start a new one\n        auto head = this->headIndex.load(std::memory_order_relaxed);\n        assert(!details::circular_less_than<index_t>(currentTailIndex, head));\n        if (!details::circular_less_than<index_t>(\n                head, currentTailIndex + BLOCK_SIZE) ||\n            (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&\n             (MAX_SUBQUEUE_SIZE == 0 ||\n              MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {\n          return false;\n        }\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n        debug::DebugLock lock(mutex);\n#endif\n        // Find out where we'll be inserting this block in the block index\n        BlockIndexEntry *idxEntry;\n        if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {\n          return false;\n        }\n\n        // Get ahold of a new block\n        auto newBlock =\n            this->parent\n                ->ConcurrentQueue::template requisition_block<allocMode>();\n        if (newBlock == nullptr) {\n          rewind_block_index_tail();\n          idxEntry->value.store(nullptr, std::memory_order_relaxed);\n          return false;\n        }\n#ifdef MCDBGQ_TRACKMEM\n        newBlock->owner = this;\n#endif\n        newBlock\n            ->ConcurrentQueue::Block::template reset_empty<implicit_context>();\n\n        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(\n            T, U,\n            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {\n          // May throw, try to insert now before we publish the fact that we\n          // have this new block\n          MOODYCAMEL_TRY {\n            new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));\n          }\n          MOODYCAMEL_CATCH(...) {\n            rewind_block_index_tail();\n            idxEntry->value.store(nullptr, std::memory_order_relaxed);\n            this->parent->add_block_to_free_list(newBlock);\n            MOODYCAMEL_RETHROW;\n          }\n        }\n\n        // Insert the new block into the index\n        idxEntry->value.store(newBlock, std::memory_order_relaxed);\n\n        this->tailBlock = newBlock;\n\n        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(\n            T, U,\n            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {\n          this->tailIndex.store(newTailIndex, std::memory_order_release);\n          return true;\n        }\n      }\n\n      // Enqueue\n      new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));\n\n      this->tailIndex.store(newTailIndex, std::memory_order_release);\n      return true;\n    }\n\n    template <typename U>\n    bool dequeue(U &element) {\n      // See ExplicitProducer::dequeue for rationale and explanation\n      index_t tail = this->tailIndex.load(std::memory_order_relaxed);\n      index_t overcommit =\n          this->dequeueOvercommit.load(std::memory_order_relaxed);\n      if (details::circular_less_than<index_t>(\n              this->dequeueOptimisticCount.load(std::memory_order_relaxed) -\n                  overcommit,\n              tail)) {\n        std::atomic_thread_fence(std::memory_order_acquire);\n\n        index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(\n            1, std::memory_order_relaxed);\n        tail = this->tailIndex.load(std::memory_order_acquire);\n        if ((details::likely)(details::circular_less_than<index_t>(\n                myDequeueCount - overcommit, tail))) {\n          index_t index =\n              this->headIndex.fetch_add(1, std::memory_order_acq_rel);\n\n          // Determine which block the element is in\n          auto entry = get_block_index_entry_for_index(index);\n\n          // Dequeue\n          auto block = entry->value.load(std::memory_order_relaxed);\n          auto &el = *((*block)[index]);\n\n          if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n            // Note: Acquiring the mutex with every dequeue instead of only when\n            // a block is released is very sub-optimal, but it is, after all,\n            // purely debug code.\n            debug::DebugLock lock(producer->mutex);\n#endif\n            struct Guard {\n              Block *block;\n              index_t index;\n              BlockIndexEntry *entry;\n              ConcurrentQueue *parent;\n\n              ~Guard() {\n                (*block)[index]->~T();\n                if (block->ConcurrentQueue::Block::template set_empty<\n                        implicit_context>(index)) {\n                  entry->value.store(nullptr, std::memory_order_relaxed);\n                  parent->add_block_to_free_list(block);\n                }\n              }\n            } guard = {block, index, entry, this->parent};\n\n            element = std::move(el);  // NOLINT\n          } else {\n            element = std::move(el);  // NOLINT\n            el.~T();                  // NOLINT\n\n            if (block->ConcurrentQueue::Block::template set_empty<\n                    implicit_context>(index)) {\n              {\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n                debug::DebugLock lock(mutex);\n#endif\n                // Add the block back into the global free pool (and remove from\n                // block index)\n                entry->value.store(nullptr, std::memory_order_relaxed);\n              }\n              this->parent->add_block_to_free_list(\n                  block);  // releases the above store\n            }\n          }\n\n          return true;\n        } else {\n          this->dequeueOvercommit.fetch_add(1, std::memory_order_release);\n        }\n      }\n\n      return false;\n    }\n\n#ifdef _MSC_VER\n#pragma warning(push)\n#pragma warning(disable : 4706)  // assignment within conditional expression\n#endif\n    template <AllocationMode allocMode, typename It>\n    bool enqueue_bulk(It itemFirst, size_t count) {\n      // First, we need to make sure we have enough room to enqueue all of the\n      // elements; this means pre-allocating blocks and putting them in the\n      // block index (but only if all the allocations succeeded).\n\n      // Note that the tailBlock we start off with may not be owned by us any\n      // more; this happens if it was filled up exactly to the top (setting\n      // tailIndex to the first index of the next block which is not yet\n      // allocated), then dequeued completely (putting it on the free list)\n      // before we enqueue again.\n\n      index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);\n      auto startBlock = this->tailBlock;\n      Block *firstAllocatedBlock = nullptr;\n      auto endBlock = this->tailBlock;\n\n      // Figure out how many blocks we'll need to allocate, and do so\n      size_t blockBaseDiff =\n          ((startTailIndex + count - 1) &\n           ~static_cast<index_t>(BLOCK_SIZE - 1)) -\n          ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));\n      index_t currentTailIndex =\n          (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);\n      if (blockBaseDiff > 0) {\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n        debug::DebugLock lock(mutex);\n#endif\n        do {\n          blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);\n          currentTailIndex += static_cast<index_t>(BLOCK_SIZE);\n\n          // Find out where we'll be inserting this block in the block index\n          BlockIndexEntry *idxEntry =\n              nullptr;  // initialization here unnecessary but compiler can't\n                        // always tell\n          Block *newBlock;\n          bool indexInserted = false;\n          auto head = this->headIndex.load(std::memory_order_relaxed);\n          assert(!details::circular_less_than<index_t>(currentTailIndex, head));\n          bool full =\n              !details::circular_less_than<index_t>(\n                  head, currentTailIndex + BLOCK_SIZE) ||\n              (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&\n               (MAX_SUBQUEUE_SIZE == 0 ||\n                MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));\n\n          if (full ||\n              !(indexInserted = insert_block_index_entry<allocMode>(\n                    idxEntry, currentTailIndex)) ||\n              (newBlock =\n                   this->parent->ConcurrentQueue::template requisition_block<\n                       allocMode>()) == nullptr) {\n            // Index allocation or block allocation failed; revert any other\n            // allocations and index insertions done so far for this operation\n            if (indexInserted) {\n              rewind_block_index_tail();\n              idxEntry->value.store(nullptr, std::memory_order_relaxed);\n            }\n            currentTailIndex =\n                (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);\n            for (auto block = firstAllocatedBlock; block != nullptr;\n                 block = block->next) {\n              currentTailIndex += static_cast<index_t>(BLOCK_SIZE);\n              idxEntry = get_block_index_entry_for_index(currentTailIndex);\n              idxEntry->value.store(nullptr, std::memory_order_relaxed);\n              rewind_block_index_tail();\n            }\n            this->parent->add_blocks_to_free_list(firstAllocatedBlock);\n            this->tailBlock = startBlock;\n\n            return false;\n          }\n\n#ifdef MCDBGQ_TRACKMEM\n          newBlock->owner = this;\n#endif\n          newBlock->ConcurrentQueue::Block::template reset_empty<\n              implicit_context>();\n          newBlock->next = nullptr;\n\n          // Insert the new block into the index\n          idxEntry->value.store(newBlock, std::memory_order_relaxed);\n\n          // Store the chain of blocks so that we can undo if later allocations\n          // fail, and so that we can find the blocks when we do the actual\n          // enqueueing\n          if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||\n              firstAllocatedBlock != nullptr) {\n            assert(this->tailBlock != nullptr);\n            this->tailBlock->next = newBlock;\n          }\n          this->tailBlock = newBlock;\n          endBlock = newBlock;\n          firstAllocatedBlock =\n              firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;\n        } while (blockBaseDiff > 0);\n      }\n\n      // Enqueue, one block at a time\n      index_t newTailIndex = startTailIndex + static_cast<index_t>(count);\n      currentTailIndex = startTailIndex;\n      this->tailBlock = startBlock;\n      assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||\n             firstAllocatedBlock != nullptr || count == 0);\n      if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 &&\n          firstAllocatedBlock != nullptr) {\n        this->tailBlock = firstAllocatedBlock;\n      }\n      while (true) {\n        index_t stopIndex =\n            (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n            static_cast<index_t>(BLOCK_SIZE);\n        if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {\n          stopIndex = newTailIndex;\n        }\n        MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(\n            T, decltype(*itemFirst),\n            new (static_cast<T *>(nullptr))\n                T(details::deref_noexcept(itemFirst)))) {\n          while (currentTailIndex != stopIndex) {\n            new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);\n          }\n        }\n        else {\n          MOODYCAMEL_TRY {\n            while (currentTailIndex != stopIndex) {\n              new ((*this->tailBlock)[currentTailIndex]) T(\n                  details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(\n                      T, decltype(*itemFirst),\n                      new (static_cast<T *>(nullptr)) T(details::deref_noexcept(\n                          itemFirst)))>::eval(*itemFirst));\n              ++currentTailIndex;\n              ++itemFirst;\n            }\n          }\n          MOODYCAMEL_CATCH(...) {\n            auto constructedStopIndex = currentTailIndex;\n            auto lastBlockEnqueued = this->tailBlock;\n\n            if (!details::is_trivially_destructible<T>::value) {\n              auto block = startBlock;\n              if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) ==\n                  0) {\n                block = firstAllocatedBlock;\n              }\n              currentTailIndex = startTailIndex;\n              while (true) {\n                stopIndex =\n                    (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n                    static_cast<index_t>(BLOCK_SIZE);\n                if (details::circular_less_than<index_t>(constructedStopIndex,\n                                                         stopIndex)) {\n                  stopIndex = constructedStopIndex;\n                }\n                while (currentTailIndex != stopIndex) {\n                  (*block)[currentTailIndex++]->~T();\n                }\n                if (block == lastBlockEnqueued) {\n                  break;\n                }\n                block = block->next;\n              }\n            }\n\n            currentTailIndex =\n                (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);\n            for (auto block = firstAllocatedBlock; block != nullptr;\n                 block = block->next) {\n              currentTailIndex += static_cast<index_t>(BLOCK_SIZE);\n              auto idxEntry = get_block_index_entry_for_index(currentTailIndex);\n              idxEntry->value.store(nullptr, std::memory_order_relaxed);\n              rewind_block_index_tail();\n            }\n            this->parent->add_blocks_to_free_list(firstAllocatedBlock);\n            this->tailBlock = startBlock;\n            MOODYCAMEL_RETHROW;\n          }\n        }\n\n        if (this->tailBlock == endBlock) {\n          assert(currentTailIndex == newTailIndex);\n          break;\n        }\n        this->tailBlock = this->tailBlock->next;\n      }\n      this->tailIndex.store(newTailIndex, std::memory_order_release);\n      return true;\n    }\n#ifdef _MSC_VER\n#pragma warning(pop)\n#endif\n\n    template <typename It>\n    size_t dequeue_bulk(It &itemFirst, size_t max) {\n      auto tail = this->tailIndex.load(std::memory_order_relaxed);\n      auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);\n      auto desiredCount = static_cast<size_t>(\n          tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -\n                  overcommit));\n      if (details::circular_less_than<size_t>(0, desiredCount)) {\n        desiredCount = desiredCount < max ? desiredCount : max;\n        std::atomic_thread_fence(std::memory_order_acquire);\n\n        auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(\n            desiredCount, std::memory_order_relaxed);\n\n        tail = this->tailIndex.load(std::memory_order_acquire);\n        auto actualCount =\n            static_cast<size_t>(tail - (myDequeueCount - overcommit));\n        if (details::circular_less_than<size_t>(0, actualCount)) {\n          actualCount = desiredCount < actualCount ? desiredCount : actualCount;\n          if (actualCount < desiredCount) {\n            this->dequeueOvercommit.fetch_add(desiredCount - actualCount,\n                                              std::memory_order_release);\n          }\n\n          // Get the first index. Note that since there's guaranteed to be at\n          // least actualCount elements, this will never exceed tail.\n          auto firstIndex =\n              this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);\n\n          // Iterate the blocks and dequeue\n          auto index = firstIndex;\n          BlockIndexHeader *localBlockIndex;\n          auto indexIndex =\n              get_block_index_index_for_index(index, localBlockIndex);\n          do {\n            auto blockStartIndex = index;\n            index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n                               static_cast<index_t>(BLOCK_SIZE);\n            endIndex =\n                details::circular_less_than<index_t>(\n                    firstIndex + static_cast<index_t>(actualCount), endIndex)\n                    ? firstIndex + static_cast<index_t>(actualCount)\n                    : endIndex;\n\n            auto entry = localBlockIndex->index[indexIndex];\n            auto block = entry->value.load(std::memory_order_relaxed);\n            if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&,\n                                           details::deref_noexcept(itemFirst) =\n                                               std::move((*(*block)[index])))) {\n              while (index != endIndex) {\n                auto &el = *((*block)[index]);\n                *itemFirst++ = std::move(el);\n                el.~T();\n                ++index;\n              }\n            } else {\n              MOODYCAMEL_TRY {\n                while (index != endIndex) {\n                  auto &el = *((*block)[index]);\n                  *itemFirst = std::move(el);\n                  ++itemFirst;\n                  el.~T();\n                  ++index;\n                }\n              }\n              MOODYCAMEL_CATCH(...) {\n                do {\n                  entry = localBlockIndex->index[indexIndex];\n                  block = entry->value.load(std::memory_order_relaxed);\n                  while (index != endIndex) {\n                    (*block)[index++]->~T();\n                  }\n\n                  if (block->ConcurrentQueue::Block::template set_many_empty<\n                          implicit_context>(\n                          blockStartIndex,\n                          static_cast<size_t>(endIndex - blockStartIndex))) {\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n                    debug::DebugLock lock(mutex);\n#endif\n                    entry->value.store(nullptr, std::memory_order_relaxed);\n                    this->parent->add_block_to_free_list(block);\n                  }\n                  indexIndex =\n                      (indexIndex + 1) & (localBlockIndex->capacity - 1);\n\n                  blockStartIndex = index;\n                  endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +\n                             static_cast<index_t>(BLOCK_SIZE);\n                  endIndex =\n                      details::circular_less_than<index_t>(\n                          firstIndex + static_cast<index_t>(actualCount),\n                          endIndex)\n                          ? firstIndex + static_cast<index_t>(actualCount)\n                          : endIndex;\n                } while (index != firstIndex + actualCount);\n\n                MOODYCAMEL_RETHROW;\n              }\n            }\n            if (block->ConcurrentQueue::Block::template set_many_empty<\n                    implicit_context>(\n                    blockStartIndex,\n                    static_cast<size_t>(endIndex - blockStartIndex))) {\n              {\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n                debug::DebugLock lock(mutex);\n#endif\n                // Note that the set_many_empty above did a release, meaning\n                // that anybody who acquires the block we're about to free can\n                // use it safely since our writes (and reads!) will have\n                // happened-before then.\n                entry->value.store(nullptr, std::memory_order_relaxed);\n              }\n              this->parent->add_block_to_free_list(\n                  block);  // releases the above store\n            }\n            indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);\n          } while (index != firstIndex + actualCount);\n\n          return actualCount;\n        } else {\n          this->dequeueOvercommit.fetch_add(desiredCount,\n                                            std::memory_order_release);\n        }\n      }\n\n      return 0;\n    }\n\n   private:\n    // The block size must be > 1, so any number with the low bit set is an\n    // invalid block base index\n    static const index_t INVALID_BLOCK_BASE = 1;\n\n    struct BlockIndexEntry {\n      std::atomic<index_t> key;\n      std::atomic<Block *> value;\n    };\n\n    struct BlockIndexHeader {\n      size_t capacity;\n      std::atomic<size_t> tail;\n      BlockIndexEntry *entries;\n      BlockIndexEntry **index;\n      BlockIndexHeader *prev;\n    };\n\n    template <AllocationMode allocMode>\n    inline bool insert_block_index_entry(BlockIndexEntry *&idxEntry,\n                                         index_t blockStartIndex) {\n      auto localBlockIndex =\n          blockIndex.load(std::memory_order_relaxed);  // We're the only writer\n                                                       // thread, relaxed is OK\n      if (localBlockIndex == nullptr) {\n        return false;  // this can happen if new_block_index failed in the\n                       // constructor\n      }\n      size_t newTail =\n          (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) &\n          (localBlockIndex->capacity - 1);\n      idxEntry = localBlockIndex->index[newTail];\n      if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||\n          idxEntry->value.load(std::memory_order_relaxed) == nullptr) {\n        idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);\n        localBlockIndex->tail.store(newTail, std::memory_order_release);\n        return true;\n      }\n\n      // No room in the old block index, try to allocate another one!\n      MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) {\n        return false;\n      }\n      else if (!new_block_index()) {\n        return false;\n      }\n      else {\n        localBlockIndex = blockIndex.load(std::memory_order_relaxed);\n        newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) &\n                  (localBlockIndex->capacity - 1);\n        idxEntry = localBlockIndex->index[newTail];\n        assert(idxEntry->key.load(std::memory_order_relaxed) ==\n               INVALID_BLOCK_BASE);\n        idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);\n        localBlockIndex->tail.store(newTail, std::memory_order_release);\n        return true;\n      }\n    }\n\n    inline void rewind_block_index_tail() {\n      auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);\n      localBlockIndex->tail.store(\n          (localBlockIndex->tail.load(std::memory_order_relaxed) - 1) &\n              (localBlockIndex->capacity - 1),\n          std::memory_order_relaxed);\n    }\n\n    inline BlockIndexEntry *get_block_index_entry_for_index(\n        index_t index) const {\n      BlockIndexHeader *localBlockIndex;\n      auto idx = get_block_index_index_for_index(index, localBlockIndex);\n      return localBlockIndex->index[idx];\n    }\n\n    inline size_t get_block_index_index_for_index(\n        index_t index, BlockIndexHeader *&localBlockIndex) const {\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n      debug::DebugLock lock(mutex);\n#endif\n      index &= ~static_cast<index_t>(BLOCK_SIZE - 1);\n      localBlockIndex = blockIndex.load(std::memory_order_acquire);\n      auto tail = localBlockIndex->tail.load(std::memory_order_acquire);\n      auto tailBase =\n          localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);\n      assert(tailBase != INVALID_BLOCK_BASE);\n      // Note: Must use division instead of shift because the index may wrap\n      // around, causing a negative offset, whose negativity we want to preserve\n      auto offset = static_cast<size_t>(\n          static_cast<typename std::make_signed<index_t>::type>(index -\n                                                                tailBase) /\n          static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));\n      size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);\n      assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) ==\n                 index &&\n             localBlockIndex->index[idx]->value.load(\n                 std::memory_order_relaxed) != nullptr);\n      return idx;\n    }\n\n    bool new_block_index() {\n      auto prev = blockIndex.load(std::memory_order_relaxed);\n      size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;\n      auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;\n      auto raw = static_cast<char *>((Traits::malloc)(\n          sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value -\n          1 + sizeof(BlockIndexEntry) * entryCount +\n          std::alignment_of<BlockIndexEntry *>::value - 1 +\n          sizeof(BlockIndexEntry *) * nextBlockIndexCapacity));\n      if (raw == nullptr) {\n        return false;\n      }\n\n      auto header = new (raw) BlockIndexHeader;\n      auto entries = reinterpret_cast<BlockIndexEntry *>(\n          details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));\n      auto index = reinterpret_cast<BlockIndexEntry **>(\n          details::align_for<BlockIndexEntry *>(\n              reinterpret_cast<char *>(entries) +\n              sizeof(BlockIndexEntry) * entryCount));\n      if (prev != nullptr) {\n        auto prevTail = prev->tail.load(std::memory_order_relaxed);\n        auto prevPos = prevTail;\n        size_t i = 0;\n        do {\n          prevPos = (prevPos + 1) & (prev->capacity - 1);\n          index[i++] = prev->index[prevPos];\n        } while (prevPos != prevTail);\n        assert(i == prevCapacity);\n      }\n      for (size_t i = 0; i != entryCount; ++i) {\n        new (entries + i) BlockIndexEntry;\n        entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);\n        index[prevCapacity + i] = entries + i;\n      }\n      header->prev = prev;\n      header->entries = entries;\n      header->index = index;\n      header->capacity = nextBlockIndexCapacity;\n      header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1),\n                         std::memory_order_relaxed);\n\n      blockIndex.store(header, std::memory_order_release);\n\n      nextBlockIndexCapacity <<= 1;\n\n      return true;\n    }\n\n   private:\n    size_t nextBlockIndexCapacity;\n    std::atomic<BlockIndexHeader *> blockIndex;\n\n#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n   public:\n    details::ThreadExitListener threadExitListener;\n\n   private:\n#endif\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n   public:\n    ImplicitProducer *nextImplicitProducer;\n\n   private:\n#endif\n\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX\n    mutable debug::DebugMutex mutex;\n#endif\n#ifdef MCDBGQ_TRACKMEM\n    friend struct MemStats;\n#endif\n  };\n\n\n  //////////////////////////////////\n  // Block pool manipulation\n  //////////////////////////////////\n\n  void populate_initial_block_list(size_t blockCount) {\n    initialBlockPoolSize = blockCount;\n    if (initialBlockPoolSize == 0) {\n      initialBlockPool = nullptr;\n      return;\n    }\n\n    initialBlockPool = create_array<Block>(blockCount);\n    if (initialBlockPool == nullptr) {\n      initialBlockPoolSize = 0;\n    }\n    for (size_t i = 0; i < initialBlockPoolSize; ++i) {\n      initialBlockPool[i].dynamicallyAllocated = false;\n    }\n  }\n\n  inline Block *try_get_block_from_initial_pool() {\n    if (initialBlockPoolIndex.load(std::memory_order_relaxed) >=\n        initialBlockPoolSize) {\n      return nullptr;\n    }\n\n    auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);\n\n    return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;\n  }\n\n  inline void add_block_to_free_list(Block *block) {\n#ifdef MCDBGQ_TRACKMEM\n    block->owner = nullptr;\n#endif\n    if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) {\n      destroy(block);\n    } else {\n      freeList.add(block);\n    }\n  }\n\n  inline void add_blocks_to_free_list(Block *block) {\n    while (block != nullptr) {\n      auto next = block->next;\n      add_block_to_free_list(block);\n      block = next;\n    }\n  }\n\n  inline Block *try_get_block_from_free_list() {\n    return freeList.try_get();\n  }\n\n  // Gets a free block from one of the memory pools, or allocates a new one (if\n  // applicable)\n  template <AllocationMode canAlloc>\n  Block *requisition_block() {\n    auto block = try_get_block_from_initial_pool();\n    if (block != nullptr) {\n      return block;\n    }\n\n    block = try_get_block_from_free_list();\n    if (block != nullptr) {\n      return block;\n    }\n\n    MOODYCAMEL_CONSTEXPR_IF(canAlloc == CanAlloc) {\n      return create<Block>();\n    }\n    else {\n      return nullptr;\n    }\n  }\n\n\n#ifdef MCDBGQ_TRACKMEM\n public:\n  struct MemStats {\n    size_t allocatedBlocks;\n    size_t usedBlocks;\n    size_t freeBlocks;\n    size_t ownedBlocksExplicit;\n    size_t ownedBlocksImplicit;\n    size_t implicitProducers;\n    size_t explicitProducers;\n    size_t elementsEnqueued;\n    size_t blockClassBytes;\n    size_t queueClassBytes;\n    size_t implicitBlockIndexBytes;\n    size_t explicitBlockIndexBytes;\n\n    friend class ConcurrentQueue;\n\n   private:\n    static MemStats getFor(ConcurrentQueue *q) {\n      MemStats stats = {0};\n\n      stats.elementsEnqueued = q->size_approx();\n\n      auto block = q->freeList.head_unsafe();\n      while (block != nullptr) {\n        ++stats.allocatedBlocks;\n        ++stats.freeBlocks;\n        block = block->freeListNext.load(std::memory_order_relaxed);\n      }\n\n      for (auto ptr = q->producerListTail.load(std::memory_order_acquire);\n           ptr != nullptr; ptr = ptr->next_prod()) {\n        bool implicit = dynamic_cast<ImplicitProducer *>(ptr) != nullptr;\n        stats.implicitProducers += implicit ? 1 : 0;\n        stats.explicitProducers += implicit ? 0 : 1;\n\n        if (implicit) {\n          auto prod = static_cast<ImplicitProducer *>(ptr);\n          stats.queueClassBytes += sizeof(ImplicitProducer);\n          auto head = prod->headIndex.load(std::memory_order_relaxed);\n          auto tail = prod->tailIndex.load(std::memory_order_relaxed);\n          auto hash = prod->blockIndex.load(std::memory_order_relaxed);\n          if (hash != nullptr) {\n            for (size_t i = 0; i != hash->capacity; ++i) {\n              if (hash->index[i]->key.load(std::memory_order_relaxed) !=\n                      ImplicitProducer::INVALID_BLOCK_BASE &&\n                  hash->index[i]->value.load(std::memory_order_relaxed) !=\n                      nullptr) {\n                ++stats.allocatedBlocks;\n                ++stats.ownedBlocksImplicit;\n              }\n            }\n            stats.implicitBlockIndexBytes +=\n                hash->capacity *\n                sizeof(typename ImplicitProducer::BlockIndexEntry);\n            for (; hash != nullptr; hash = hash->prev) {\n              stats.implicitBlockIndexBytes +=\n                  sizeof(typename ImplicitProducer::BlockIndexHeader) +\n                  hash->capacity *\n                      sizeof(typename ImplicitProducer::BlockIndexEntry *);\n            }\n          }\n          for (; details::circular_less_than<index_t>(head, tail);\n               head += BLOCK_SIZE) {\n            // auto block = prod->get_block_index_entry_for_index(head);\n            ++stats.usedBlocks;\n          }\n        } else {\n          auto prod = static_cast<ExplicitProducer *>(ptr);\n          stats.queueClassBytes += sizeof(ExplicitProducer);\n          auto tailBlock = prod->tailBlock;\n          bool wasNonEmpty = false;\n          if (tailBlock != nullptr) {\n            auto block = tailBlock;\n            do {\n              ++stats.allocatedBlocks;\n              if (!block->ConcurrentQueue::Block::template is_empty<\n                      explicit_context>() ||\n                  wasNonEmpty) {\n                ++stats.usedBlocks;\n                wasNonEmpty = wasNonEmpty || block != tailBlock;\n              }\n              ++stats.ownedBlocksExplicit;\n              block = block->next;\n            } while (block != tailBlock);\n          }\n          auto index = prod->blockIndex.load(std::memory_order_relaxed);\n          while (index != nullptr) {\n            stats.explicitBlockIndexBytes +=\n                sizeof(typename ExplicitProducer::BlockIndexHeader) +\n                index->size *\n                    sizeof(typename ExplicitProducer::BlockIndexEntry);\n            index = static_cast<typename ExplicitProducer::BlockIndexHeader *>(\n                index->prev);\n          }\n        }\n      }\n\n      auto freeOnInitialPool =\n          q->initialBlockPoolIndex.load(std::memory_order_relaxed) >=\n                  q->initialBlockPoolSize\n              ? 0\n              : q->initialBlockPoolSize -\n                    q->initialBlockPoolIndex.load(std::memory_order_relaxed);\n      stats.allocatedBlocks += freeOnInitialPool;\n      stats.freeBlocks += freeOnInitialPool;\n\n      stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;\n      stats.queueClassBytes += sizeof(ConcurrentQueue);\n\n      return stats;\n    }\n  };\n\n  // For debugging only. Not thread-safe.\n  MemStats getMemStats() {\n    return MemStats::getFor(this);\n  }\n\n private:\n  friend struct MemStats;\n#endif\n\n\n  //////////////////////////////////\n  // Producer list manipulation\n  //////////////////////////////////\n\n  ProducerBase *recycle_or_create_producer(bool isExplicit) {\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH\n    debug::DebugLock lock(implicitProdMutex);\n#endif\n    // Try to re-use one first\n    for (auto ptr = producerListTail.load(std::memory_order_acquire);\n         ptr != nullptr; ptr = ptr->next_prod()) {\n      if (ptr->inactive.load(std::memory_order_relaxed) &&\n          ptr->isExplicit == isExplicit) {\n        bool expected = true;\n        if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false,\n                                                  std::memory_order_acquire,\n                                                  std::memory_order_relaxed)) {\n          // We caught one! It's been marked as activated, the caller can have\n          // it\n          return ptr;\n        }\n      }\n    }\n\n    return add_producer(\n        isExplicit ? static_cast<ProducerBase *>(create<ExplicitProducer>(this))\n                   : create<ImplicitProducer>(this));\n  }\n\n  ProducerBase *add_producer(ProducerBase *producer) {\n    // Handle failed memory allocation\n    if (producer == nullptr) {\n      return nullptr;\n    }\n\n    producerCount.fetch_add(1, std::memory_order_relaxed);\n\n    // Add it to the lock-free list\n    auto prevTail = producerListTail.load(std::memory_order_relaxed);\n    do {\n      producer->next = prevTail;\n    } while (!producerListTail.compare_exchange_weak(\n        prevTail, producer, std::memory_order_release,\n        std::memory_order_relaxed));\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n    if (producer->isExplicit) {\n      auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);\n      do {\n        static_cast<ExplicitProducer *>(producer)->nextExplicitProducer =\n            prevTailExplicit;\n      } while (!explicitProducers.compare_exchange_weak(\n          prevTailExplicit, static_cast<ExplicitProducer *>(producer),\n          std::memory_order_release, std::memory_order_relaxed));\n    } else {\n      auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);\n      do {\n        static_cast<ImplicitProducer *>(producer)->nextImplicitProducer =\n            prevTailImplicit;\n      } while (!implicitProducers.compare_exchange_weak(\n          prevTailImplicit, static_cast<ImplicitProducer *>(producer),\n          std::memory_order_release, std::memory_order_relaxed));\n    }\n#endif\n\n    return producer;\n  }\n\n  void reown_producers() {\n    // After another instance is moved-into/swapped-with this one, all the\n    // producers we stole still think their parents are the other queue.\n    // So fix them up!\n    for (auto ptr = producerListTail.load(std::memory_order_relaxed);\n         ptr != nullptr; ptr = ptr->next_prod()) {\n      ptr->parent = this;\n    }\n  }\n\n\n  //////////////////////////////////\n  // Implicit producer hash\n  //////////////////////////////////\n\n  struct ImplicitProducerKVP {\n    std::atomic<details::thread_id_t> key;\n    ImplicitProducer *value;  // No need for atomicity since it's only read by\n                              // the thread that sets it in the first place\n\n    ImplicitProducerKVP() : value(nullptr) {}\n\n    ImplicitProducerKVP(ImplicitProducerKVP &&other) MOODYCAMEL_NOEXCEPT {\n      key.store(other.key.load(std::memory_order_relaxed),\n                std::memory_order_relaxed);\n      value = other.value;\n    }\n\n    inline ImplicitProducerKVP &operator=(ImplicitProducerKVP &&other)\n        MOODYCAMEL_NOEXCEPT {\n      swap(other);\n      return *this;\n    }\n\n    inline void swap(ImplicitProducerKVP &other) MOODYCAMEL_NOEXCEPT {\n      if (this != &other) {\n        details::swap_relaxed(key, other.key);\n        std::swap(value, other.value);\n      }\n    }\n  };\n\n  template <typename XT, typename XTraits>\n  friend void moodycamel::swap(\n      typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &,\n      typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &)\n      MOODYCAMEL_NOEXCEPT;\n\n  struct ImplicitProducerHash {\n    size_t capacity;\n    ImplicitProducerKVP *entries;\n    ImplicitProducerHash *prev;\n  };\n\n  inline void populate_initial_implicit_producer_hash() {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {\n      return;\n    }\n    else {\n      implicitProducerHashCount.store(0, std::memory_order_relaxed);\n      auto hash = &initialImplicitProducerHash;\n      hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;\n      hash->entries = &initialImplicitProducerHashEntries[0];\n      for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {\n        initialImplicitProducerHashEntries[i].key.store(\n            details::invalid_thread_id, std::memory_order_relaxed);\n      }\n      hash->prev = nullptr;\n      implicitProducerHash.store(hash, std::memory_order_relaxed);\n    }\n  }\n\n  void swap_implicit_producer_hashes(ConcurrentQueue &other) {\n    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {\n      return;\n    }\n    else {\n      // Swap (assumes our implicit producer hash is initialized)\n      initialImplicitProducerHashEntries.swap(\n          other.initialImplicitProducerHashEntries);\n      initialImplicitProducerHash.entries =\n          &initialImplicitProducerHashEntries[0];\n      other.initialImplicitProducerHash.entries =\n          &other.initialImplicitProducerHashEntries[0];\n\n      details::swap_relaxed(implicitProducerHashCount,\n                            other.implicitProducerHashCount);\n\n      details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);\n      if (implicitProducerHash.load(std::memory_order_relaxed) ==\n          &other.initialImplicitProducerHash) {\n        implicitProducerHash.store(&initialImplicitProducerHash,\n                                   std::memory_order_relaxed);\n      } else {\n        ImplicitProducerHash *hash;\n        for (hash = implicitProducerHash.load(std::memory_order_relaxed);\n             hash->prev != &other.initialImplicitProducerHash;\n             hash = hash->prev) {\n          continue;\n        }\n        hash->prev = &initialImplicitProducerHash;\n      }\n      if (other.implicitProducerHash.load(std::memory_order_relaxed) ==\n          &initialImplicitProducerHash) {\n        other.implicitProducerHash.store(&other.initialImplicitProducerHash,\n                                         std::memory_order_relaxed);\n      } else {\n        ImplicitProducerHash *hash;\n        for (hash = other.implicitProducerHash.load(std::memory_order_relaxed);\n             hash->prev != &initialImplicitProducerHash; hash = hash->prev) {\n          continue;\n        }\n        hash->prev = &other.initialImplicitProducerHash;\n      }\n    }\n  }\n\n  // Only fails (returns nullptr) if memory allocation fails\n  ImplicitProducer *get_or_add_implicit_producer() {\n    // Note that since the data is essentially thread-local (key is thread ID),\n    // there's a reduced need for fences (memory ordering is already consistent\n    // for any individual thread), except for the current table itself.\n\n    // Start by looking for the thread ID in the current and all previous hash\n    // tables. If it's not found, it must not be in there yet, since this same\n    // thread would have added it previously to one of the tables that we\n    // traversed.\n\n    // Code and algorithm adapted from\n    // http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table\n\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH\n    debug::DebugLock lock(implicitProdMutex);\n#endif\n\n    auto id = details::thread_id();\n    auto hashedId = details::hash_thread_id(id);\n\n    auto mainHash = implicitProducerHash.load(std::memory_order_acquire);\n    assert(\n        mainHash !=\n        nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)\n    for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {\n      // Look for the id in this hash\n      auto index = hashedId;\n      while (true) {  // Not an infinite loop because at least one slot is free\n                      // in the hash table\n        index &= hash->capacity - 1u;\n\n        auto probedKey =\n            hash->entries[index].key.load(std::memory_order_relaxed);\n        if (probedKey == id) {\n          // Found it! If we had to search several hashes deep, though, we\n          // should lazily add it to the current main hash table to avoid the\n          // extended search next time. Note there's guaranteed to be room in\n          // the current hash table since every subsequent table implicitly\n          // reserves space for all previous tables (there's only one\n          // implicitProducerHashCount).\n          auto value = hash->entries[index].value;\n          if (hash != mainHash) {\n            index = hashedId;\n            while (true) {\n              index &= mainHash->capacity - 1u;\n              auto empty = details::invalid_thread_id;\n#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n              auto reusable = details::invalid_thread_id2;\n              if (mainHash->entries[index].key.compare_exchange_strong(\n                      empty, id, std::memory_order_seq_cst,\n                      std::memory_order_relaxed) ||\n                  mainHash->entries[index].key.compare_exchange_strong(\n                      reusable, id, std::memory_order_seq_cst,\n                      std::memory_order_relaxed)) {\n#else\n              if (mainHash->entries[index].key.compare_exchange_strong(\n                      empty, id, std::memory_order_seq_cst,\n                      std::memory_order_relaxed)) {\n#endif\n                mainHash->entries[index].value = value;\n                break;\n              }\n              ++index;\n            }\n          }\n\n          return value;\n        }\n        if (probedKey == details::invalid_thread_id) {\n          break;  // Not in this hash table\n        }\n        ++index;\n      }\n    }\n\n    // Insert!\n    auto newCount =\n        1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);\n    while (true) {\n      // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)\n      if (newCount >= (mainHash->capacity >> 1) &&\n          !implicitProducerHashResizeInProgress.test_and_set(\n              std::memory_order_acquire)) {\n        // We've acquired the resize lock, try to allocate a bigger hash table.\n        // Note the acquire fence synchronizes with the release fence at the end\n        // of this block, and hence when we reload implicitProducerHash it must\n        // be the most recent version (it only gets changed within this locked\n        // block).\n        mainHash = implicitProducerHash.load(std::memory_order_acquire);\n        if (newCount >= (mainHash->capacity >> 1)) {\n          size_t newCapacity = mainHash->capacity << 1;\n          while (newCount >= (newCapacity >> 1)) {\n            newCapacity <<= 1;\n          }\n          auto raw = static_cast<char *>(\n              (Traits::malloc)(sizeof(ImplicitProducerHash) +\n                               std::alignment_of<ImplicitProducerKVP>::value -\n                               1 + sizeof(ImplicitProducerKVP) * newCapacity));\n          if (raw == nullptr) {\n            // Allocation failed\n            implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);\n            implicitProducerHashResizeInProgress.clear(\n                std::memory_order_relaxed);\n            return nullptr;\n          }\n\n          auto newHash = new (raw) ImplicitProducerHash;\n          newHash->capacity = static_cast<size_t>(newCapacity);\n          newHash->entries = reinterpret_cast<ImplicitProducerKVP *>(\n              details::align_for<ImplicitProducerKVP>(\n                  raw + sizeof(ImplicitProducerHash)));\n          for (size_t i = 0; i != newCapacity; ++i) {\n            new (newHash->entries + i) ImplicitProducerKVP;\n            newHash->entries[i].key.store(details::invalid_thread_id,\n                                          std::memory_order_relaxed);\n          }\n          newHash->prev = mainHash;\n          implicitProducerHash.store(newHash, std::memory_order_release);\n          implicitProducerHashResizeInProgress.clear(std::memory_order_release);\n          mainHash = newHash;\n        } else {\n          implicitProducerHashResizeInProgress.clear(std::memory_order_release);\n        }\n      }\n\n      // If it's < three-quarters full, add to the old one anyway so that we\n      // don't have to wait for the next table to finish being allocated by\n      // another thread (and if we just finished allocating above, the condition\n      // will always be true)\n      if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {\n        auto producer =\n            static_cast<ImplicitProducer *>(recycle_or_create_producer(false));\n        if (producer == nullptr) {\n          implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);\n          return nullptr;\n        }\n\n#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n        producer->threadExitListener.callback =\n            &ConcurrentQueue::implicit_producer_thread_exited_callback;\n        producer->threadExitListener.userData = producer;\n        details::ThreadExitNotifier::subscribe(&producer->threadExitListener);\n#endif\n\n        auto index = hashedId;\n        while (true) {\n          index &= mainHash->capacity - 1u;\n          auto empty = details::invalid_thread_id;\n#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n          auto reusable = details::invalid_thread_id2;\n          if (mainHash->entries[index].key.compare_exchange_strong(\n                  reusable, id, std::memory_order_seq_cst,\n                  std::memory_order_relaxed)) {\n            implicitProducerHashCount.fetch_sub(\n                1,\n                std::memory_order_relaxed);  // already counted as a used slot\n            mainHash->entries[index].value = producer;\n            break;\n          }\n#endif\n          if (mainHash->entries[index].key.compare_exchange_strong(\n                  empty, id, std::memory_order_seq_cst,\n                  std::memory_order_relaxed)) {\n            mainHash->entries[index].value = producer;\n            break;\n          }\n          ++index;\n        }\n        return producer;\n      }\n\n      // Hmm, the old hash is quite full and somebody else is busy allocating a\n      // new one. We need to wait for the allocating thread to finish (if it\n      // succeeds, we add, if not, we try to allocate ourselves).\n      mainHash = implicitProducerHash.load(std::memory_order_acquire);\n    }\n  }\n\n#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED\n  void implicit_producer_thread_exited(ImplicitProducer *producer) {\n    // Remove from hash\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH\n    debug::DebugLock lock(implicitProdMutex);\n#endif\n    auto hash = implicitProducerHash.load(std::memory_order_acquire);\n    assert(hash != nullptr);  // The thread exit listener is only registered if\n                              // we were added to a hash in the first place\n    auto id = details::thread_id();\n    auto hashedId = details::hash_thread_id(id);\n    details::thread_id_t probedKey;\n\n    // We need to traverse all the hashes just in case other threads aren't on\n    // the current one yet and are trying to add an entry thinking there's a\n    // free slot (because they reused a producer)\n    for (; hash != nullptr; hash = hash->prev) {\n      auto index = hashedId;\n      do {\n        index &= hash->capacity - 1u;\n        probedKey = id;\n        if (hash->entries[index].key.compare_exchange_strong(\n                probedKey, details::invalid_thread_id2,\n                std::memory_order_seq_cst, std::memory_order_relaxed)) {\n          break;\n        }\n        ++index;\n      } while (\n          probedKey !=\n          details::invalid_thread_id);  // Can happen if the hash has changed\n                                        // but we weren't put back in it yet, or\n                                        // if we weren't added to this hash in\n                                        // the first place\n    }\n\n    // Mark the queue as being recyclable\n    producer->inactive.store(true, std::memory_order_release);\n  }\n\n  static void implicit_producer_thread_exited_callback(void *userData) {\n    auto producer = static_cast<ImplicitProducer *>(userData);\n    auto queue = producer->parent;\n    queue->implicit_producer_thread_exited(producer);\n  }\n#endif\n\n  //////////////////////////////////\n  // Utility functions\n  //////////////////////////////////\n\n  template <typename TAlign>\n  static inline void *aligned_malloc(size_t size) {\n    MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <=\n                            std::alignment_of<details::max_align_t>::value)\n    return (Traits::malloc)(size);\n    else {\n      size_t alignment = std::alignment_of<TAlign>::value;\n      void *raw = (Traits::malloc)(size + alignment - 1 + sizeof(void *));\n      if (!raw) return nullptr;\n      char *ptr = details::align_for<TAlign>(reinterpret_cast<char *>(raw) +\n                                             sizeof(void *));\n      *(reinterpret_cast<void **>(ptr) - 1) = raw;\n      return ptr;\n    }\n  }\n\n  template <typename TAlign>\n  static inline void aligned_free(void *ptr) {\n    MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <=\n                            std::alignment_of<details::max_align_t>::value)\n    return (Traits::free)(ptr);\n    else(Traits::free)(ptr ? *(reinterpret_cast<void **>(ptr) - 1) : nullptr);\n  }\n\n  template <typename U>\n  static inline U *create_array(size_t count) {\n    assert(count > 0);\n    U *p = static_cast<U *>(aligned_malloc<U>(sizeof(U) * count));\n    if (p == nullptr) return nullptr;\n\n    for (size_t i = 0; i != count; ++i) new (p + i) U();\n    return p;\n  }\n\n  template <typename U>\n  static inline void destroy_array(U *p, size_t count) {\n    if (p != nullptr) {\n      assert(count > 0);\n      for (size_t i = count; i != 0;) (p + --i)->~U();\n    }\n    aligned_free<U>(p);\n  }\n\n  template <typename U>\n  static inline U *create() {\n    void *p = aligned_malloc<U>(sizeof(U));\n    return p != nullptr ? new (p) U : nullptr;\n  }\n\n  template <typename U, typename A1>\n  static inline U *create(A1 &&a1) {\n    void *p = aligned_malloc<U>(sizeof(U));\n    return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;\n  }\n\n  template <typename U>\n  static inline void destroy(U *p) {\n    if (p != nullptr) p->~U();\n    aligned_free<U>(p);\n  }\n\n private:\n  std::atomic<ProducerBase *> producerListTail;\n  std::atomic<std::uint32_t> producerCount;\n\n  std::atomic<size_t> initialBlockPoolIndex;\n  Block *initialBlockPool;\n  size_t initialBlockPoolSize;\n\n#ifndef MCDBGQ_USEDEBUGFREELIST\n  FreeList<Block> freeList;\n#else\n  debug::DebugFreeList<Block> freeList;\n#endif\n\n  std::atomic<ImplicitProducerHash *> implicitProducerHash;\n  std::atomic<size_t>\n      implicitProducerHashCount;  // Number of slots logically used\n  ImplicitProducerHash initialImplicitProducerHash;\n  std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE>\n      initialImplicitProducerHashEntries;\n  std::atomic_flag implicitProducerHashResizeInProgress;\n\n  std::atomic<std::uint32_t> nextExplicitConsumerId;\n  std::atomic<std::uint32_t> globalExplicitConsumerOffset;\n\n#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH\n  debug::DebugMutex implicitProdMutex;\n#endif\n\n#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG\n  std::atomic<ExplicitProducer *> explicitProducers;\n  std::atomic<ImplicitProducer *> implicitProducers;\n#endif\n};\n\n\ntemplate <typename T, typename Traits>\nProducerToken::ProducerToken(ConcurrentQueue<T, Traits> &queue)\n    : producer(queue.recycle_or_create_producer(true)) {\n  if (producer != nullptr) {\n    producer->token = this;\n  }\n}\n\ntemplate <typename T, typename Traits>\nProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits> &queue)\n    : producer(reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)\n                   ->recycle_or_create_producer(true)) {\n  if (producer != nullptr) {\n    producer->token = this;\n  }\n}\n\ntemplate <typename T, typename Traits>\nConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits> &queue)\n    : itemsConsumedFromCurrent(0),\n      currentProducer(nullptr),\n      desiredProducer(nullptr) {\n  initialOffset =\n      queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);\n  lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);\n}\n\ntemplate <typename T, typename Traits>\nConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits> &queue)\n    : itemsConsumedFromCurrent(0),\n      currentProducer(nullptr),\n      desiredProducer(nullptr) {\n  initialOffset =\n      reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)\n          ->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);\n  lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);\n}\n\ntemplate <typename T, typename Traits>\ninline void swap(ConcurrentQueue<T, Traits> &a,\n                 ConcurrentQueue<T, Traits> &b) MOODYCAMEL_NOEXCEPT {\n  a.swap(b);\n}\n\ninline void swap(ProducerToken &a, ProducerToken &b) MOODYCAMEL_NOEXCEPT {\n  a.swap(b);\n}\n\ninline void swap(ConsumerToken &a, ConsumerToken &b) MOODYCAMEL_NOEXCEPT {\n  a.swap(b);\n}\n\ntemplate <typename T, typename Traits>\ninline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,\n                 typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)\n    MOODYCAMEL_NOEXCEPT {\n  a.swap(b);\n}\n\n}  // namespace moodycamel\n\n#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)\n#pragma warning(pop)\n#endif\n\n#if defined(__GNUC__) && !defined(__INTEL_COMPILER)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "src/include/zvec/ailego/container/blob.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <string>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! AiLego Blob Wrap\n */\nclass BlobWrap {\n public:\n  //! Constructor\n  BlobWrap(void) : buffer_(nullptr), size_(0u) {}\n\n  //! Constructor\n  BlobWrap(const BlobWrap &rhs) : buffer_(rhs.buffer_), size_(rhs.size_) {}\n\n  //! Constructor\n  BlobWrap(BlobWrap &&rhs) : buffer_(rhs.buffer_), size_(rhs.size_) {\n    rhs.buffer_ = nullptr;\n    rhs.size_ = 0u;\n  }\n\n  //! Constructor\n  BlobWrap(const void *buf, size_t len)\n      : buffer_(const_cast<void *>(buf)), size_(len) {}\n\n  //! Constructor\n  BlobWrap(const std::string &buf)\n      : buffer_(const_cast<char *>(buf.data())), size_(buf.size()) {}\n\n  //! Destructor\n  ~BlobWrap(void) {}\n\n  //! Assignment\n  BlobWrap &operator=(const BlobWrap &rhs) {\n    buffer_ = rhs.buffer_;\n    size_ = rhs.size_;\n    return *this;\n  }\n\n  //! Assignment\n  BlobWrap &operator=(BlobWrap &&rhs) {\n    buffer_ = rhs.buffer_;\n    size_ = rhs.size_;\n    rhs.buffer_ = nullptr;\n    rhs.size_ = 0u;\n    return *this;\n  }\n\n  //! Test if the blob is valid\n  bool is_valid(void) const {\n    return (buffer_ && size_);\n  }\n\n  //! Mount a buffer as blob\n  void mount(void *buf, size_t len) {\n    buffer_ = buf;\n    size_ = len;\n  }\n\n  //! Mount a string as blob\n  void mount(std::string &buf) {\n    buffer_ = const_cast<char *>(buf.data());\n    size_ = buf.size();\n  }\n\n  //! Umount the buffer of blob\n  void umount(void) {\n    buffer_ = nullptr;\n    size_ = 0u;\n  }\n\n  //! Retrieve buffer of blob\n  void *buffer(void) {\n    return buffer_;\n  }\n\n  //! Retrieve buffer of blob\n  const void *buffer(void) const {\n    return buffer_;\n  }\n\n  //! Retrieve size of blob\n  size_t size(void) const {\n    return size_;\n  }\n\n  //! Copy a buffer into blob\n  void copy(const void *buf, size_t len) {\n    memcpy(buffer_, buf, std::min(size_, len));\n  }\n\n  //! Copy a blob to blob\n  void copy(const BlobWrap &rhs) {\n    memcpy(buffer_, rhs.buffer_, std::min(size_, rhs.size_));\n  }\n\n  //! Copy a string to blob\n  void copy(const std::string &str) {\n    memcpy(buffer_, str.data(), std::min(size_, str.size()));\n  }\n\n  //! Zero the buffer of blob\n  void zero(void) {\n    memset(buffer_, 0, size_);\n  }\n\n private:\n  void *buffer_;\n  size_t size_;\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/ailego/container/cube.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <string>\n#include <typeinfo>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace ailego {\nnamespace internal {\n\n/*! Cube Policy\n */\nstruct CubePolicy {\n  //! Destructor\n  virtual ~CubePolicy(void) {}\n\n  //! Assign `src` to `dst`\n  virtual void assign(const void *src, void **dst) = 0;\n\n  //! Cleanup value\n  virtual void cleanup(void **val) = 0;\n\n  //! Clone value of `src` to `dst`\n  virtual void clone(void *const *src, void **dst) = 0;\n\n  //! Move `src` to `dst`\n  virtual void move(void *src, void **dst) = 0;\n\n  //! Retrieve size\n  virtual size_t size(void) const = 0;\n\n  //! Retrieve type information\n  virtual const std::type_info &type(void) const = 0;\n\n  //! Retrieve value\n  virtual void *value(void **src) = 0;\n\n  //! Retrieve value\n  virtual const void *value(void *const *src) const = 0;\n};\n\n/*! Small Cube Policy\n */\ntemplate <typename T>\nstruct SmallCubePolicy : public CubePolicy {\n  //! Assign `src` to `dst`\n  void assign(const void *src, void **dst) {\n    new (dst) T(*reinterpret_cast<const T *>(src));\n  }\n\n  //! Cleanup value\n  void cleanup(void **val) {\n    reinterpret_cast<T *>(val)->~T();\n  }\n\n  //! Clone value of `src` to `dst`\n  void clone(void *const *src, void **dst) {\n    new (dst) T(*reinterpret_cast<const T *>(src));\n  }\n\n  //! Move `src` to `dst`\n  void move(void *src, void **dst) {\n    new (dst) T(std::move(*reinterpret_cast<T *>(src)));\n  }\n\n  //! Retrieve size\n  size_t size(void) const {\n    return sizeof(T);\n  }\n\n  //! Retrieve type information\n  const std::type_info &type(void) const {\n    return typeid(T);\n  }\n\n  //! Retrieve value\n  void *value(void **src) {\n    return reinterpret_cast<void *>(src);\n  }\n\n  //! Retrieve value\n  const void *value(void *const *src) const {\n    return reinterpret_cast<const void *>(src);\n  }\n};\n\n/*! Large Cube Policy\n */\ntemplate <typename T>\nstruct LargeCubePolicy : public CubePolicy {\n  //! Assign `src` to `dst`\n  void assign(const void *src, void **dst) {\n    *dst = new T(*reinterpret_cast<const T *>(src));\n  }\n\n  //! Cleanup value\n  void cleanup(void **val) {\n    delete (reinterpret_cast<T *>(*val));\n  }\n\n  //! Clone value of `src` to `dst`\n  void clone(void *const *src, void **dst) {\n    *dst = new T(**reinterpret_cast<T *const *>(src));\n  }\n\n  //! Move `src` to `dst`\n  void move(void *src, void **dst) {\n    *dst = new T(std::move(*reinterpret_cast<T *>(src)));\n  }\n\n  //! Retrieve size\n  size_t size(void) const {\n    return sizeof(T);\n  }\n\n  //! Retrieve type information\n  const std::type_info &type(void) const {\n    return typeid(T);\n  }\n\n  //! Retrieve value\n  void *value(void **src) {\n    return *src;\n  }\n\n  //! Retrieve value\n  const void *value(void *const *src) const {\n    return *src;\n  }\n};\n\n/*! Policy Selector\n */\ntemplate <typename T, typename = void>\nstruct PolicySelector {\n  typedef LargeCubePolicy<T> Type;\n};\n\n/*! Policy Selector\n */\ntemplate <typename T>\nstruct PolicySelector<\n    T, typename std::enable_if<sizeof(T) <= sizeof(void *)>::type> {\n  typedef SmallCubePolicy<T> Type;\n};\n\n}  // namespace internal\n\n/*! Cube class\n */\nclass Cube {\n public:\n  //! Constructor\n  Cube(void) : policy_(Cube::Policy<Cube::EmptyPolicy>()), object_(nullptr) {}\n\n  //! Constructor\n  template <typename T>\n  Cube(const T &rhs) : policy_(Cube::Policy<T>()), object_(nullptr) {\n    policy_->assign(&rhs, &object_);\n  }\n\n  //! Constructor\n  template <typename T, typename = typename std::enable_if<\n                            !std::is_same<Cube &, T>::value &&\n                            !std::is_same<T &, T>::value>::type>\n  Cube(T &&rhs) : policy_(Cube::Policy<T>()), object_(nullptr) {\n    policy_->move(&rhs, &object_);\n  }\n\n  //! Constructor\n  Cube(const char *str)\n      : policy_(Cube::Policy<std::string>()), object_(nullptr) {\n    std::string rhs(str);\n    policy_->move(&rhs, &object_);\n  }\n\n  //! Constructor\n  Cube(char str[]) : policy_(Cube::Policy<std::string>()), object_(nullptr) {\n    std::string rhs(str);\n    policy_->move(&rhs, &object_);\n  }\n\n  //! Constructor\n  Cube(const Cube &rhs) : policy_(rhs.policy_), object_(nullptr) {\n    policy_->clone(&rhs.object_, &object_);\n  }\n\n  //! Constructor\n  Cube(Cube &&rhs) : policy_(rhs.policy_), object_(rhs.object_) {\n    rhs.policy_ = Cube::Policy<Cube::EmptyPolicy>();\n    rhs.object_ = nullptr;\n  }\n\n  //! Destructor\n  ~Cube(void) {\n    policy_->cleanup(&object_);\n  }\n\n  //! Assignment\n  template <typename T>\n  Cube &operator=(const T &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n\n  //! Assignment\n  template <typename T, typename = typename std::enable_if<\n                            !std::is_same<Cube &, T>::value &&\n                            !std::is_same<T &, T>::value>::type>\n  Cube &operator=(T &&rhs) {\n    this->assign(std::forward<T>(rhs));\n    return *this;\n  }\n\n  //! Assignment\n  Cube &operator=(const Cube &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n\n  //! Assignment\n  Cube &operator=(Cube &&rhs) {\n    this->assign(std::forward<Cube>(rhs));\n    return *this;\n  }\n\n  //! Assignment\n  Cube &operator=(const char *str) {\n    this->assign(str);\n    return *this;\n  }\n\n  //! Assignment\n  Cube &operator=(char str[]) {\n    this->assign(str);\n    return *this;\n  }\n\n  //! Retrieve object in original type\n  template <typename T>\n  operator T &() {\n    return this->cast<T>();\n  }\n\n  //! Retrieve object in original type\n  template <typename T>\n  operator const T &() const {\n    return this->cast<T>();\n  }\n\n  //! Assign content\n  template <typename T>\n  void assign(const T &rhs) {\n    policy_->cleanup(&object_);\n    policy_ = Cube::Policy<T>();\n    policy_->assign(&rhs, &object_);\n  }\n\n  //! Assign content\n  template <typename T, typename = typename std::enable_if<\n                            !std::is_same<Cube &, T>::value &&\n                            !std::is_same<T &, T>::value>::type>\n  void assign(T &&rhs) {\n    policy_->cleanup(&object_);\n    policy_ = Cube::Policy<T>();\n    policy_->move(&rhs, &object_);\n  }\n\n  //! Assign content from another Cube\n  void assign(const Cube &rhs) {\n    policy_->cleanup(&object_);\n    policy_ = rhs.policy_;\n    policy_->clone(&rhs.object_, &object_);\n  }\n\n  //! Assign content from another Cube\n  void assign(Cube &&rhs) {\n    if (this != &rhs) {\n      policy_->cleanup(&object_);\n      policy_ = rhs.policy_;\n      object_ = rhs.object_;\n      rhs.policy_ = Cube::Policy<Cube::EmptyPolicy>();\n      rhs.object_ = nullptr;\n    }\n  }\n\n  //! Assign content\n  void assign(const char *str) {\n    policy_->cleanup(&object_);\n    policy_ = Cube::Policy<std::string>();\n    std::string rhs(str);\n    policy_->move(&rhs, &object_);\n  }\n\n  //! Assign content\n  void assign(char str[]) {\n    policy_->cleanup(&object_);\n    policy_ = Cube::Policy<std::string>();\n    std::string rhs(str);\n    policy_->move(&rhs, &object_);\n  }\n\n  //! Swap the content with another Cube\n  Cube &swap(Cube &rhs) {\n    std::swap(policy_, rhs.policy_);\n    std::swap(object_, rhs.object_);\n    return *this;\n  }\n\n  //! Cast to the original type\n  template <typename T>\n  T &cast(void) {\n    if (policy_ != Cube::Policy<T>()) {\n      throw std::bad_cast();\n    }\n    return *reinterpret_cast<T *>(policy_->value(&object_));\n  }\n\n  //! Cast to the original type\n  template <typename T>\n  const T &cast(void) const {\n    if (policy_ != Cube::Policy<T>()) {\n      throw std::bad_cast();\n    }\n    return *reinterpret_cast<const T *>(policy_->value(&object_));\n  }\n\n  //! Cast to the original type (unsafe)\n  template <typename T>\n  T &unsafe_cast(void) {\n    return *reinterpret_cast<T *>(policy_->value(&object_));\n  }\n\n  //! Cast to the original type (unsafe)\n  template <typename T>\n  const T &unsafe_cast(void) const {\n    return *reinterpret_cast<const T *>(policy_->value(&object_));\n  }\n\n  //! Test if the Cube is empty\n  bool empty(void) const {\n    return (policy_ == Cube::Policy<Cube::EmptyPolicy>());\n  }\n\n  //! Reset Cube allocated memory\n  void reset(void) {\n    policy_->cleanup(&object_);\n    policy_ = Cube::Policy<Cube::EmptyPolicy>();\n    object_ = nullptr;\n  }\n\n  //! Test if the Cube is compatible with another one\n  bool compatible(const Cube &rhs) const {\n    return (policy_ == rhs.policy_ || policy_->type() == rhs.policy_->type());\n  }\n\n  //! Test if the Cube is compatible with another one\n  template <typename T>\n  bool compatible(void) const {\n    return (policy_ == Cube::Policy<T>() ||\n            policy_->type() == Cube::Policy<T>()->type());\n  }\n\n  //! Retrieve size\n  size_t size(void) const {\n    return (!this->empty() ? policy_->size() : 0u);\n  }\n\n  //! Retrieve type information\n  const std::type_info &type(void) const {\n    return (!this->empty() ? policy_->type() : typeid(void));\n  }\n\n protected:\n  /*! Empty Policy\n   */\n  struct EmptyPolicy {};\n\n  //! Make a static policy object\n  template <typename T>\n  static internal::CubePolicy *MakePolicy(void) {\n    static typename internal::PolicySelector<T>::Type policy;\n    return (&policy);\n  }\n\n  //! Retrieve a static policy object\n  template <typename T>\n  static internal::CubePolicy *Policy(void) {\n    return MakePolicy<typename UnderlyingType<T>::type>();\n  }\n\n private:\n  //! Members\n  internal::CubePolicy *policy_;\n  void *object_;\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/ailego/container/heap.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <functional>\n#include <limits>\n#include <utility>\n#include <vector>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Heap Adapter\n */\ntemplate <typename T, typename TCompare = std::less<T>,\n          typename TBase = std::vector<T>>\nclass Heap : public TBase {\n public:\n  //! Constructor\n  Heap(void)\n      : TBase(), limit_(std::numeric_limits<size_t>::max()), compare_() {}\n\n  //! Constructor\n  template <typename... Args>\n  Heap(size_t max, Args &&...args)\n      : TBase(),\n        limit_(std::max<size_t>(max, 1u)),\n        compare_(std::forward<Args>(args)...) {\n    TBase::reserve(limit_);\n  }\n\n  //! Constructor\n  Heap(const Heap &rhs)\n      : TBase(rhs), limit_(rhs.limit_), compare_(rhs.compare_) {}\n\n  //! Constructor\n  Heap(Heap &&rhs)\n      : TBase(std::move(rhs)),\n        limit_(rhs.limit_),\n        compare_(std::move(rhs.compare_)) {}\n\n  //! Constructor\n  Heap(const TBase &rhs)\n      : TBase(rhs), limit_(std::numeric_limits<size_t>::max()), compare_() {\n    std::make_heap(TBase::begin(), TBase::end(), compare_);\n  }\n\n  //! Constructor\n  Heap(TBase &&rhs)\n      : TBase(std::move(rhs)),\n        limit_(std::numeric_limits<size_t>::max()),\n        compare_() {\n    std::make_heap(TBase::begin(), TBase::end(), compare_);\n  }\n\n  //! Assignment\n  Heap &operator=(const Heap &rhs) {\n    TBase::operator=(static_cast<const TBase &>(rhs));\n    limit_ = rhs.limit_;\n    compare_ = rhs.compare_;\n    return *this;\n  }\n\n  //! Assignment\n  Heap &operator=(Heap &&rhs) {\n    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));\n    limit_ = rhs.limit_;\n    compare_ = std::move(rhs.compare_);\n    return *this;\n  }\n\n  //! Exchange the content\n  void swap(Heap &rhs) {\n    TBase::swap(static_cast<TBase &>(rhs));\n    std::swap(limit_, rhs.limit_);\n    std::swap(compare_, rhs.compare_);\n  }\n\n  //! Pop the front element\n  void pop(void) {\n    if (TBase::size() > 1) {\n      auto last = TBase::end() - 1;\n      this->replace_heap(TBase::begin(), last, std::move(*last));\n    }\n    TBase::pop_back();\n  }\n\n  //! Insert a new element into the heap\n  template <class... TArgs>\n  void emplace(TArgs &&...args) {\n    if (this->full()) {\n      typename std::remove_reference<T>::type val(std::forward<TArgs>(args)...);\n\n      auto first = TBase::begin();\n      if (compare_(val, *first)) {\n        this->replace_heap(first, TBase::end(), std::move(val));\n      }\n    } else {\n      TBase::emplace_back(std::forward<TArgs>(args)...);\n      std::push_heap(TBase::begin(), TBase::end(), compare_);\n    }\n  }\n\n  //! Insert a new element into the heap\n  void push(const T &val) {\n    if (this->full()) {\n      auto first = TBase::begin();\n      if (compare_(val, *first)) {\n        this->replace_heap(first, TBase::end(), val);\n      }\n    } else {\n      TBase::push_back(val);\n      std::push_heap(TBase::begin(), TBase::end(), compare_);\n    }\n  }\n\n  //! Insert a new element into the heap\n  void push(T &&val) {\n    if (this->full()) {\n      auto first = TBase::begin();\n      if (compare_(val, *first)) {\n        this->replace_heap(first, TBase::end(), std::move(val));\n      }\n    } else {\n      TBase::push_back(std::move(val));\n      std::push_heap(TBase::begin(), TBase::end(), compare_);\n    }\n  }\n\n  //! Retrieve the limit of heap\n  size_t limit(void) const {\n    return limit_;\n  }\n\n  //! Limit the heap with max size\n  void limit(size_t max) {\n    limit_ = std::max<size_t>(max, 1u);\n    TBase::reserve(limit_);\n  }\n\n  //! Unlimit the size of heap\n  void unlimit(void) {\n    limit_ = std::numeric_limits<size_t>::max();\n  }\n\n  //! Check whether the heap is full\n  bool full(void) const {\n    return (TBase::size() == limit_);\n  }\n\n  //! Update the heap\n  void update(void) {\n    std::make_heap(TBase::begin(), TBase::end(), compare_);\n    while (limit_ < TBase::size()) {\n      this->pop();\n    }\n  }\n\n  //! Sort the elements in the heap\n  void sort(void) {\n    std::sort(TBase::begin(), TBase::end(), compare_);\n  }\n\n protected:\n  //! Replace the top element of heap\n  template <typename TRandomIterator, typename TValue>\n  void replace_heap(TRandomIterator first, TRandomIterator last, TValue &&val) {\n    using _DistanceType =\n        typename std::iterator_traits<TRandomIterator>::difference_type;\n\n    _DistanceType hole = 0;\n    _DistanceType count = _DistanceType(last - first);\n\n    if (count > 1) {\n      _DistanceType child = (hole << 1) + 1;\n\n      while (child < count) {\n        _DistanceType right_child = child + 1;\n\n        if (right_child < count &&\n            compare_(*(first + child), *(first + right_child))) {\n          child = right_child;\n        }\n        if (!compare_(val, *(first + child))) {\n          break;\n        }\n        *(first + hole) = std::move(*(first + child));\n        hole = child;\n        child = (hole << 1) + 1;\n      }\n    }\n    *(first + hole) = std::forward<TValue>(val);\n  }\n\n private:\n  size_t limit_;\n  TCompare compare_;\n};\n\n/*! Key Value Heap Comparer\n */\ntemplate <typename TKey, typename TValue, typename TCompare = std::less<TValue>>\nstruct KeyValueHeapComparer {\n  //! Function call\n  bool operator()(const std::pair<TKey, TValue> &lhs,\n                  const std::pair<TKey, TValue> &rhs) const {\n    return compare_(lhs.second, rhs.second);\n  }\n\n private:\n  TCompare compare_;\n};\n\n/*! Key Value Heap\n */\ntemplate <typename TKey, typename TValue, typename TCompare = std::less<TValue>>\nusing KeyValueHeap =\n    Heap<std::pair<TKey, TValue>, KeyValueHeapComparer<TKey, TValue, TCompare>>;\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/ailego/container/hypercube.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <map>\n#include <string>\n#include <zvec/ailego/container/cube.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Hypercube\n */\nclass Hypercube {\n public:\n  //! Constructor\n  Hypercube(void) : cubes_() {}\n\n  //! Constructor\n  Hypercube(const Hypercube &rhs) : cubes_(rhs.cubes_) {}\n\n  //! Constructor\n  Hypercube(Hypercube &&rhs) : cubes_() {\n    cubes_.swap(rhs.cubes_);\n  }\n\n  //! Destructor\n  ~Hypercube(void) {}\n\n  //! Assignment\n  Hypercube &operator=(const Hypercube &rhs) {\n    cubes_ = rhs.cubes_;\n    return *this;\n  }\n\n  //! Assignment\n  Hypercube &operator=(Hypercube &&rhs) {\n    cubes_ = std::move(rhs.cubes_);\n    return *this;\n  }\n\n  //! Overloaded operator []\n  Cube &operator[](const std::string &key) {\n    return cubes_[key];\n  }\n\n  //! Overloaded operator []\n  Cube &operator[](std::string &&key) {\n    return cubes_[std::forward<std::string>(key)];\n  }\n\n  //! Test if the element is exist\n  bool has(const std::string &key) const {\n    return (cubes_.find(key) != cubes_.end());\n  }\n\n  //! Test if the hyper cube is empty\n  bool empty(void) const {\n    return cubes_.empty();\n  }\n\n  //! Insert a key-value pair into map\n  bool insert(const std::string &key, Cube &&val) {\n    return cubes_.emplace(key, std::forward<Cube>(val)).second;\n  }\n\n  //! Insert a key-value pair into map\n  bool insert(std::string &&key, Cube &&val) {\n    return cubes_\n        .emplace(std::forward<std::string>(key), std::forward<Cube>(val))\n        .second;\n  }\n\n  //! Insert a key-value pair into map\n  template <typename T>\n  bool insert(const std::string &key, T &&val) {\n    return cubes_.emplace(key, Cube(std::forward<T>(val))).second;\n  }\n\n  //! Insert a key-value pair into map\n  template <typename T>\n  bool insert(std::string &&key, T &&val) {\n    return cubes_\n        .emplace(std::forward<std::string>(key), Cube(std::forward<T>(val)))\n        .second;\n  }\n\n  //! Insert or assign a key-value pair to map\n  void insert_or_assign(const std::string &key, Cube &&val) {\n    auto it = cubes_.lower_bound(key);\n    if (it != cubes_.end() && it->first == key) {\n      it->second = std::forward<Cube>(val);\n    } else {\n      cubes_.emplace_hint(it, key, std::forward<Cube>(val));\n    }\n  }\n\n  //! Insert or assign a key-value pair to map\n  void insert_or_assign(std::string &&key, Cube &&val) {\n    auto it = cubes_.lower_bound(key);\n    if (it != cubes_.end() && it->first == key) {\n      it->second = std::forward<Cube>(val);\n    } else {\n      cubes_.emplace_hint(it, std::forward<std::string>(key),\n                          std::forward<Cube>(val));\n    }\n  }\n\n  //! Insert or assign a key-value pair to map\n  template <typename T>\n  void insert_or_assign(const std::string &key, T &&val) {\n    auto it = cubes_.lower_bound(key);\n    if (it != cubes_.end() && it->first == key) {\n      it->second = Cube(std::forward<T>(val));\n    } else {\n      cubes_.emplace_hint(it, key, Cube(std::forward<T>(val)));\n    }\n  }\n\n  //! Insert or assign a key-value pair to map\n  template <typename T>\n  void insert_or_assign(std::string &&key, T &&val) {\n    auto it = cubes_.lower_bound(key);\n    if (it != cubes_.end() && it->first == key) {\n      it->second = Cube(std::forward<T>(val));\n    } else {\n      cubes_.emplace_hint(it, std::forward<std::string>(key),\n                          Cube(std::forward<T>(val)));\n    }\n  }\n\n  //! Clear the map\n  void clear(void) {\n    cubes_.clear();\n  }\n\n  //! Swap the map\n  void swap(Hypercube &rhs) {\n    cubes_.swap(rhs.cubes_);\n  }\n\n  //! Erase the pair via a key\n  bool erase(const std::string &key) {\n    auto iter = cubes_.find(key);\n    if (iter != cubes_.end()) {\n      cubes_.erase(iter);\n      return true;\n    }\n    return false;\n  }\n\n  //! Retrieve the value via a key\n  bool get(const std::string &key, Cube *out) const {\n    auto iter = cubes_.find(key);\n    if (iter != cubes_.end()) {\n      *out = iter->second;\n      return true;\n    }\n    return false;\n  }\n\n  //! Retrieve the value via a key\n  Cube *get(const std::string &key) {\n    auto iter = cubes_.find(key);\n    if (iter != cubes_.end()) {\n      return &iter->second;\n    }\n    return nullptr;\n  }\n\n  //! Retrieve the value via a key\n  const Cube *get(const std::string &key) const {\n    auto iter = cubes_.find(key);\n    if (iter != cubes_.end()) {\n      return &iter->second;\n    }\n    return nullptr;\n  }\n\n  //! Retrieve the value via a key\n  template <typename T>\n  bool get(const std::string &key, T *out) const {\n    auto iter = cubes_.find(key);\n    if (iter != cubes_.end()) {\n      if (iter->second.compatible<T>()) {\n        *out = iter->second.unsafe_cast<T>();\n        return true;\n      }\n    }\n    return false;\n  }\n\n  //! Retrieve the value via a key\n  template <typename T>\n  T &get(const std::string &key, T &def) {\n    auto iter = cubes_.find(key);\n    if (iter != cubes_.end()) {\n      if (iter->second.compatible<T>()) {\n        return iter->second.unsafe_cast<T>();\n      }\n    }\n    return def;\n  }\n\n  //! Retrieve the value via a key\n  template <typename T>\n  const T &get(const std::string &key, const T &def) const {\n    auto iter = cubes_.find(key);\n    if (iter != cubes_.end()) {\n      if (iter->second.compatible<T>()) {\n        return iter->second.unsafe_cast<T>();\n      }\n    }\n    return def;\n  }\n\n  //! Merge another hyper cube\n  void merge(const Hypercube &rhs) {\n    for (const auto &it : rhs.cubes_) {\n      auto iter = cubes_.find(it.first);\n      if (iter != cubes_.end()) {\n        iter->second = it.second;\n      } else {\n        cubes_.emplace(it.first, it.second);\n      }\n    }\n  }\n\n  //! Merge another hyper cube\n  void merge(Hypercube &&rhs) {\n    for (auto &it : rhs.cubes_) {\n      auto iter = cubes_.find(it.first);\n      if (iter != cubes_.end()) {\n        iter->second = std::move(it.second);\n      } else {\n        cubes_.emplace(std::move(it.first), std::move(it.second));\n      }\n    }\n  }\n\n  //! Retrieve the cubes\n  const std::map<std::string, Cube> &cubes(void) const {\n    return cubes_;\n  }\n\n  //! Retrieve the cubes\n  std::map<std::string, Cube> *mutable_cubes(void) {\n    return &cubes_;\n  }\n\n private:\n  std::map<std::string, Cube> cubes_;\n};\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/ailego/container/params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/container/hypercube.h>\n\nnamespace zvec {\nnamespace ailego {\n\n//! Trying compatible with T\n#define _TRYING_COMPATIBLE(cube, T, out)                                     \\\n  if (cube->compatible<T>())                                                 \\\n  return (                                                                   \\\n      *out = static_cast<typename std::remove_pointer<decltype(out)>::type>( \\\n          cube->unsafe_cast<T>()),                                           \\\n      true)\n\n//! Trying compatible with T (Boolean)\n#define _TRYING_COMPATIBLE_BOOL(cube, T, out) \\\n  if (cube->compatible<T>()) return (*out = !!cube->unsafe_cast<T>(), true)\n\n//! Trying compatible with T (String)\n#define _TRYING_COMPATIBLE_STRING(cube, T, out) \\\n  if (cube->compatible<T>())                    \\\n  return (out->assign(std::to_string(cube->unsafe_cast<T>())), true)\n\n//! Trying convert string\n#define _TRYING_CONVERT_STRING(cube, out)                                      \\\n  if (cube->compatible<std::string>())                                         \\\n  return (*out = Params::StringCast<std::remove_pointer<decltype(out)>::type>( \\\n              cube->unsafe_cast<std::string>()),                               \\\n          true)\n\n/*! Index Params\n */\nclass Params {\n public:\n  //! Constructor\n  Params(void) : hypercube_() {}\n\n  //! Constructor\n  Params(const Params &rhs) : hypercube_(rhs.hypercube_) {}\n\n  //! Constructor\n  Params(Params &&rhs) : hypercube_() {\n    hypercube_.swap(rhs.hypercube_);\n  }\n\n  //! Destructor\n  ~Params(void) {}\n\n  //! Assignment\n  Params &operator=(const Params &rhs) {\n    hypercube_ = rhs.hypercube_;\n    return *this;\n  }\n\n  //! Assignment\n  Params &operator=(Params &&rhs) {\n    hypercube_.swap(rhs.hypercube_);\n    return *this;\n  }\n\n  //! Overloaded operator []\n  ailego::Cube &operator[](const std::string &key) {\n    return hypercube_[key];\n  }\n\n  //! Overloaded operator []\n  ailego::Cube &operator[](std::string &&key) {\n    return hypercube_[std::move(key)];\n  }\n\n  //! Test if the element is exist\n  bool has(const std::string &key) const {\n    return hypercube_.has(key);\n  }\n\n  //! Test if the map is empty\n  bool empty(void) const {\n    return hypercube_.empty();\n  }\n\n  //! Clear the map\n  void clear(void) {\n    hypercube_.clear();\n  }\n\n  //! Erase the pair via a key\n  bool erase(const std::string &key) {\n    return hypercube_.erase(key);\n  }\n\n  //! Merge another index params\n  void merge(const Params &rhs) {\n    hypercube_.merge(rhs.hypercube_);\n  }\n\n  //! Merge another index params\n  void merge(Params &&rhs) {\n    hypercube_.merge(std::move(rhs.hypercube_));\n  }\n\n  //! Set the value of key in T\n  template <typename T>\n  bool insert(const std::string &key, T &&val) {\n    return hypercube_.insert<T>(key, std::forward<T>(val));\n  }\n\n  //! Set the value of key in T\n  template <typename T>\n  bool insert(std::string &&key, T &&val) {\n    return hypercube_.insert<T>(std::forward<std::string>(key),\n                                std::forward<T>(val));\n  }\n\n  //! Set the value of key in T\n  template <typename T>\n  void set(const std::string &key, T &&val) {\n    hypercube_.insert_or_assign<T>(key, std::forward<T>(val));\n  }\n\n  //! Set the value of key in T\n  template <typename T>\n  void set(std::string &&key, T &&val) {\n    hypercube_.insert_or_assign<T>(std::forward<std::string>(key),\n                                   std::forward<T>(val));\n  }\n\n  //! Retrieve the value in boolean\n  bool get(const std::string &key, bool *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE_BOOL(cube, char, out);\n      _TRYING_COMPATIBLE_BOOL(cube, unsigned char, out);\n      _TRYING_COMPATIBLE_BOOL(cube, signed char, out);\n      _TRYING_COMPATIBLE_BOOL(cube, short int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, unsigned int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, long int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, long long int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE_BOOL(cube, float, out);\n      _TRYING_COMPATIBLE_BOOL(cube, double, out);\n      _TRYING_COMPATIBLE_BOOL(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'char'\n  bool get(const std::string &key, char *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'unsigned char'\n  bool get(const std::string &key, unsigned char *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'signed char'\n  bool get(const std::string &key, signed char *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'short int'\n  bool get(const std::string &key, short int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'unsigned short int'\n  bool get(const std::string &key, unsigned short int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'int'\n  bool get(const std::string &key, int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'unsigned int'\n  bool get(const std::string &key, unsigned int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'long int'\n  bool get(const std::string &key, long int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'unsigned long int'\n  bool get(const std::string &key, unsigned long int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'long long int'\n  bool get(const std::string &key, long long int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'unsigned long long int'\n  bool get(const std::string &key, unsigned long long int *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'float'\n  bool get(const std::string &key, float *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'double'\n  bool get(const std::string &key, double *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in 'long double'\n  bool get(const std::string &key, long double *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, long double, out);\n      _TRYING_COMPATIBLE(cube, double, out);\n      _TRYING_COMPATIBLE(cube, float, out);\n      _TRYING_COMPATIBLE(cube, long long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE(cube, long int, out);\n      _TRYING_COMPATIBLE(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE(cube, int, out);\n      _TRYING_COMPATIBLE(cube, unsigned int, out);\n      _TRYING_COMPATIBLE(cube, short int, out);\n      _TRYING_COMPATIBLE(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE(cube, char, out);\n      _TRYING_COMPATIBLE(cube, unsigned char, out);\n      _TRYING_COMPATIBLE(cube, signed char, out);\n      _TRYING_COMPATIBLE(cube, bool, out);\n      _TRYING_CONVERT_STRING(cube, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in string\n  bool get(const std::string &key, std::string *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, std::string, out);\n      _TRYING_COMPATIBLE_STRING(cube, bool, out);\n      _TRYING_COMPATIBLE_STRING(cube, char, out);\n      _TRYING_COMPATIBLE_STRING(cube, unsigned char, out);\n      _TRYING_COMPATIBLE_STRING(cube, signed char, out);\n      _TRYING_COMPATIBLE_STRING(cube, short int, out);\n      _TRYING_COMPATIBLE_STRING(cube, unsigned short int, out);\n      _TRYING_COMPATIBLE_STRING(cube, int, out);\n      _TRYING_COMPATIBLE_STRING(cube, unsigned int, out);\n      _TRYING_COMPATIBLE_STRING(cube, long int, out);\n      _TRYING_COMPATIBLE_STRING(cube, unsigned long int, out);\n      _TRYING_COMPATIBLE_STRING(cube, long long int, out);\n      _TRYING_COMPATIBLE_STRING(cube, unsigned long long int, out);\n      _TRYING_COMPATIBLE_STRING(cube, float, out);\n      _TRYING_COMPATIBLE_STRING(cube, double, out);\n      _TRYING_COMPATIBLE_STRING(cube, long double, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in T\n  template <typename T>\n  bool get(const std::string &key, T *out) const {\n    const ailego::Cube *cube = hypercube_.get(key);\n    if (cube) {\n      _TRYING_COMPATIBLE(cube, T, out);\n    }\n    return false;\n  }\n\n  //! Retrieve the value in boolean\n  bool get_as_bool(const std::string &key) const {\n    bool result = false;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in int8\n  int8_t get_as_int8(const std::string &key) const {\n    int8_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in int16\n  int16_t get_as_int16(const std::string &key) const {\n    int16_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in int32\n  int32_t get_as_int32(const std::string &key) const {\n    int32_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in int64\n  int64_t get_as_int64(const std::string &key) const {\n    int64_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in uint8\n  uint8_t get_as_uint8(const std::string &key) const {\n    uint8_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in uint16\n  uint16_t get_as_uint16(const std::string &key) const {\n    uint16_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in uint32\n  uint32_t get_as_uint32(const std::string &key) const {\n    uint32_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in uint64\n  uint64_t get_as_uint64(const std::string &key) const {\n    uint64_t result = 0;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in float\n  float get_as_float(const std::string &key) const {\n    float result = 0.0f;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in double\n  double get_as_double(const std::string &key) const {\n    double result = 0.0f;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the value in string\n  std::string get_as_string(const std::string &key) const {\n    std::string result;\n    this->get(key, &result);\n    return result;\n  }\n\n  //! Retrieve the debug string\n  std::string debug_string(void) const {\n    std::string str;\n    SerializeToBuffer(*this, &str);\n    return str;\n  }\n\n  //! Retrieve the map of parameters\n  const ailego::Hypercube &hypercube(void) const {\n    return hypercube_;\n  }\n\n  //! Retrieve the map of parameters\n  ailego::Hypercube *mutable_hypercube(void) {\n    return &hypercube_;\n  }\n\n  //! Parse parameters from buffer (Json format)\n  static bool ParseFromBuffer(const std::string &buf, Params *params);\n\n  //! Parse parameters from OS environment\n  static void ParseFromEnvironment(Params *params);\n\n  //! Serialize parameters into buffer\n  static void SerializeToBuffer(const Params &params, std::string *buf);\n\n protected:\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, float>::value, T>::type {\n    return std::strtof(str.c_str(), nullptr);\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, double>::value, T>::type {\n    return std::strtod(str.c_str(), nullptr);\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, long double>::value, T>::type {\n    return std::strtold(str.c_str(), nullptr);\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, char>::value, T>::type {\n    return static_cast<char>(std::strtol(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, signed char>::value, T>::type {\n    return static_cast<signed char>(std::strtol(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, unsigned char>::value, T>::type {\n    return static_cast<unsigned char>(std::strtoul(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, short int>::value, T>::type {\n    return static_cast<short int>(std::strtol(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, int>::value, T>::type {\n    return static_cast<int>(std::strtol(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, long int>::value, T>::type {\n    return static_cast<long int>(std::strtol(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, long long int>::value, T>::type {\n    return static_cast<long long int>(std::strtoll(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, unsigned short int>::value,\n                              T>::type {\n    return static_cast<unsigned short int>(\n        std::strtoul(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, unsigned int>::value, T>::type {\n    return static_cast<unsigned int>(std::strtoul(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, unsigned long int>::value,\n                              T>::type {\n    return static_cast<unsigned long int>(\n        std::strtoul(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, unsigned long long int>::value,\n                              T>::type {\n    return static_cast<unsigned long long int>(\n        std::strtoull(str.c_str(), nullptr, 0));\n  }\n\n  //! Convert string type to another type\n  template <typename T>\n  static auto StringCast(const std::string &str) ->\n      typename std::enable_if<std::is_same<T, bool>::value, T>::type {\n    if (str.empty()) {\n      return false;\n    }\n    char c = str[0];\n    if (c == 'Y' || c == 'T' || c == 'y' || c == 't') {\n      return true;\n    }\n    return !!std::strtof(str.c_str(), nullptr);\n  }\n\n private:\n  ailego::Hypercube hypercube_;\n};\n\n#undef _TRYING_COMPATIBLE\n#undef _TRYING_COMPATIBLE\n#undef _TRYING_COMPATIBLE_BOOL\n#undef _TRYING_COMPATIBLE_STRING\n#undef _TRYING_CONVERT_STRING\n\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/ailego/container/vector.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <initializer_list>\n#include <stdexcept>\n#include <string>\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Fixed Vector\n */\ntemplate <typename T, size_t N>\nclass FixedVector {\n public:\n  enum { MAX_SIZE = N };\n\n  //! Constructor\n  template <typename... U>\n  FixedVector(U... vals) : data_{vals...} {}\n\n  //! Overloaded operator []\n  T &operator[](size_t i) {\n    return data_[i];\n  }\n\n  //! Overloaded operator []\n  constexpr const T &operator[](size_t i) const {\n    return data_[i];\n  }\n\n  //! Retrieve data pointer\n  T *data(void) {\n    return data_;\n  }\n\n  //! Retrieve data pointer\n  const T *data(void) const {\n    return data_;\n  }\n\n  //! Retrieve count of elements in vector\n  constexpr size_t size(void) const {\n    return MAX_SIZE;\n  }\n\n  //! Convert a array pointer to vector pointer\n  static FixedVector *Cast(T arr[N]) {\n    return reinterpret_cast<FixedVector<T, N> *>(arr);\n  }\n\n  //! Convert a array pointer to vector pointer\n  static const FixedVector *Cast(const T arr[N]) {\n    return reinterpret_cast<const FixedVector<T, N> *>(arr);\n  }\n\n private:\n  //! Data member\n  T data_[N];\n};\n\n/*! Numerical Vector Adapter\n */\ntemplate <typename T, typename TBase = std::string,\n          typename =\n              typename std::enable_if<IsTriviallyCopyable<T>::value>::type>\nclass NumericalVector : public TBase {\n public:\n  typedef typename std::remove_cv<T>::type ValueType;\n  typedef ValueType *iterator;\n  typedef const ValueType *const_iterator;\n\n  //! Constructor\n  NumericalVector(void) : TBase() {}\n\n  //! Constructor\n  explicit NumericalVector(size_t dim) : TBase() {\n    this->resize(dim);\n  }\n\n  //! Constructor\n  NumericalVector(size_t dim, const ValueType &val) : TBase() {\n    this->resize(dim, val);\n  }\n\n  //! Constructor\n  NumericalVector(const NumericalVector &rhs) : TBase(rhs) {}\n\n  //! Constructor\n  NumericalVector(NumericalVector &&rhs) : TBase(std::forward<TBase>(rhs)) {}\n\n  //! Constructor\n  NumericalVector(const TBase &rhs) : TBase(rhs) {\n    if (TBase::size() % sizeof(T) != 0) {\n      throw std::length_error(\"Unmatched length\");\n    }\n  }\n\n  //! Constructor\n  NumericalVector(TBase &&rhs) : TBase(std::move(rhs)) {\n    if (TBase::size() % sizeof(T) != 0) {\n      throw std::length_error(\"Unmatched length\");\n    }\n  }\n\n  //! Constructor\n  NumericalVector(std::initializer_list<ValueType> il) : TBase() {\n    for (const auto &it : il) {\n      TBase::append(reinterpret_cast<const char *>(&it), sizeof(ValueType));\n    }\n  }\n\n  //! Assignment\n  NumericalVector &operator=(const NumericalVector &rhs) {\n    TBase::operator=(static_cast<const TBase &>(rhs));\n    return *this;\n  }\n\n  //! Assignment\n  NumericalVector &operator=(NumericalVector &&rhs) {\n    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));\n    return *this;\n  }\n\n  //! Assignment\n  NumericalVector &operator=(const TBase &rhs) {\n    TBase::operator=(rhs);\n    return *this;\n  }\n\n  //! Assignment\n  NumericalVector &operator=(TBase &&rhs) {\n    TBase::operator=(std::move(rhs));\n    return *this;\n  }\n\n  //! Overloaded operator []\n  ValueType &operator[](size_t i) {\n    return *(this->data() + i);\n  }\n\n  //! Overloaded operator []\n  const ValueType &operator[](size_t i) const {\n    return *(this->data() + i);\n  }\n\n  //! Appends a copy of value\n  NumericalVector &append(const ValueType &val) {\n    TBase::append(reinterpret_cast<const char *>(&val), sizeof(ValueType));\n    return *this;\n  }\n\n  //! Append a copy of value\n  void append(std::initializer_list<ValueType> il) {\n    for (const auto &it : il) {\n      TBase::append(reinterpret_cast<const char *>(&it), sizeof(ValueType));\n    }\n  }\n\n  //! Assign content to vector\n  void assign(const ValueType *vec, size_t len) {\n    TBase::assign(reinterpret_cast<const char *>(vec), len * sizeof(ValueType));\n  }\n\n  //! Assign content to vector\n  void assign(size_t n, const ValueType &val) {\n    this->clear();\n    this->resize(n, val);\n  }\n\n  //! Assign content to vector\n  void assign(std::initializer_list<ValueType> il) {\n    this->clear();\n    for (const auto &it : il) {\n      TBase::append(reinterpret_cast<const char *>(&it), sizeof(ValueType));\n    }\n  }\n\n  //! Retrieve element\n  ValueType &at(size_t i) {\n    return *(this->data() + i);\n  }\n\n  //! Retrieve element\n  const ValueType &at(size_t i) const {\n    return *(this->data() + i);\n  }\n\n  //! Access last element\n  ValueType &back(void) {\n    return *(this->rbegin());\n  }\n\n  //! Access last element\n  const ValueType &back(void) const {\n    return *(this->rbegin());\n  }\n\n  //! Retrieve iterator to beginning\n  iterator begin(void) {\n    return this->data();\n  }\n\n  //! Retrieve iterator to beginning\n  const_iterator begin(void) const {\n    return this->data();\n  }\n\n  //! Retrieve size of allocated storage\n  size_t capacity(void) const {\n    return (TBase::capacity() / sizeof(ValueType));\n  }\n\n  //! Clear the vector\n  void clear(void) {\n    TBase::clear();\n  }\n\n  //! Retrieve pointer of data\n  ValueType *data(void) {\n    return reinterpret_cast<ValueType *>(&(TBase::operator[](0)));\n  }\n\n  //! Retrieve pointer of data\n  const ValueType *data(void) const {\n    return reinterpret_cast<const ValueType *>(TBase::data());\n  }\n\n  //! Test if vector is empty\n  bool empty(void) const {\n    return TBase::empty();\n  }\n\n  //! An iterator to the past-the-end\n  iterator end(void) {\n    return (this->data() + this->size());\n  }\n\n  //! An iterator to the past-the-end\n  const_iterator end(void) const {\n    return (this->data() + this->size());\n  }\n\n  //! Access first element\n  ValueType &front(void) {\n    return *(this->begin());\n  }\n\n  //! Access first element\n  const ValueType &front(void) const {\n    return *(this->begin());\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t n) {\n    TBase::reserve(n * sizeof(ValueType));\n  }\n\n  //! Resize the vector to a length of n elements\n  void resize(size_t n) {\n    TBase::resize(n * sizeof(ValueType));\n  }\n\n  //! Resize the vector to a length of n elements\n  void resize(size_t n, const ValueType &val) {\n    size_t count = this->size();\n\n    TBase::resize(n * sizeof(ValueType));\n    for (size_t i = count; i < n; ++i) {\n      *(this->data() + i) = val;\n    }\n  }\n\n  //! Retrieve dimension of vector\n  size_t size(void) const {\n    return (TBase::size() / sizeof(ValueType));\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const {\n    return (TBase::size() / sizeof(ValueType));\n  }\n\n  //! Retrieve size of vector in bytes\n  size_t bytes(void) const {\n    return TBase::size();\n  }\n\n  //! Swap vector values\n  void swap(NumericalVector &vec) {\n    TBase::swap(static_cast<TBase &>(vec));\n  }\n};\n\n/*! Nibble Vector Adapter\n */\ntemplate <typename T, typename TBase = std::string,\n          typename = typename std::enable_if<std::is_integral<T>::value>::type>\nclass NibbleVector : public TBase {\n public:\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n  using StoreType = typename std::make_unsigned<ValueType>::type;\n\n  //! const_iterator of Nibble Vector\n  class const_iterator {\n   public:\n    //! Constructor\n    const_iterator(void) : i_(0), owner_(nullptr) {}\n\n    //! Constructor\n    const_iterator(const NibbleVector *owner, size_t i)\n        : i_(i), owner_(owner) {}\n\n    //! Equality\n    bool operator==(const const_iterator &rhs) const {\n      return (i_ == rhs.i_);\n    }\n\n    //! No equality\n    bool operator!=(const const_iterator &rhs) const {\n      return (i_ != rhs.i_);\n    }\n\n    //! Increment (Prefix)\n    const_iterator &operator++() {\n      ++i_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    const_iterator operator++(int) {\n      const_iterator tmp = *this;\n      ++i_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    const_iterator &operator--() {\n      --i_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    const_iterator operator--(int) {\n      const_iterator tmp = *this;\n      --i_;\n      return tmp;\n    }\n\n    //! operator \"+=\"\n    const_iterator &operator+=(size_t offset) {\n      i_ += offset;\n      return *this;\n    }\n\n    //! operator \"-=\"\n    const_iterator &operator-=(size_t offset) {\n      i_ -= offset;\n      return *this;\n    }\n\n    //! Indirection (Signed integral)\n    ValueType operator*() const {\n      return owner_->element<ValueType>(i_);\n    }\n\n   private:\n    size_t i_;\n    const NibbleVector *owner_;\n  };\n\n  //! Constructor\n  NibbleVector(void) : TBase() {}\n\n  //! Constructor\n  explicit NibbleVector(size_t dim) : TBase() {\n    this->resize(dim);\n  }\n\n  //! Constructor\n  NibbleVector(size_t dim, ValueType val) : TBase() {\n    this->resize(dim, val);\n  }\n\n  //! Constructor\n  NibbleVector(const NibbleVector &rhs) : TBase(rhs) {}\n\n  //! Constructor\n  NibbleVector(NibbleVector &&rhs) : TBase(std::forward<TBase>(rhs)) {}\n\n  //! Constructor\n  NibbleVector(const TBase &rhs) : TBase(rhs) {}\n\n  //! Constructor\n  NibbleVector(TBase &&rhs) : TBase(std::move(rhs)) {}\n\n  //! Constructor\n  NibbleVector(std::initializer_list<ValueType> il) : TBase() {\n    this->resize(il.size());\n\n    size_t index = 0;\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n\n    for (auto val : il) {\n      arr[index >> 1] |= ((uint8_t)(val & 0xf) << ((index & 1) << 2));\n      ++index;\n    }\n  }\n\n  //! Assignment\n  NibbleVector &operator=(const NibbleVector &rhs) {\n    TBase::operator=(static_cast<const TBase &>(rhs));\n    return *this;\n  }\n\n  //! Assignment\n  NibbleVector &operator=(NibbleVector &&rhs) {\n    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));\n    return *this;\n  }\n\n  //! Assignment\n  NibbleVector &operator=(const TBase &rhs) {\n    TBase::operator=(rhs);\n    return *this;\n  }\n\n  //! Assignment\n  NibbleVector &operator=(TBase &&rhs) {\n    TBase::operator=(std::move(rhs));\n    return *this;\n  }\n\n  //! Overloaded operator [] (Signed integral)\n  ValueType operator[](size_t i) const {\n    return this->at(i);\n  }\n\n  //! Appends a copy of value\n  NibbleVector &append(ValueType lo, ValueType hi) {\n    TBase::push_back(((uint8_t)(hi & 0xf) << 4) | (uint8_t)(lo & 0xf));\n    return *this;\n  }\n\n  //! Append a copy of value\n  void append(std::initializer_list<ValueType> il) {\n    size_t index = this->size();\n    this->resize(index + il.size());\n\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    for (auto val : il) {\n      arr[index >> 1] |= ((uint8_t)(val & 0xf) << ((index & 1) << 2));\n      ++index;\n    }\n  }\n\n  //! Assign content to vector\n  void assign(const ValueType *vec, size_t len) {\n    this->clear();\n    this->resize(len);\n\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    for (size_t i = 0; i != len; ++i) {\n      arr[i >> 1] |= ((uint8_t)(vec[i] & 0xf) << ((i & 1) << 2));\n    }\n  }\n\n  //! Assign content to vector\n  void assign(size_t n, ValueType val) {\n    this->clear();\n    this->resize(n, val);\n  }\n\n  //! Assign content to vector\n  void assign(std::initializer_list<ValueType> il) {\n    this->clear();\n    this->resize(il.size());\n\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    size_t index = 0;\n\n    for (auto val : il) {\n      arr[index >> 1] |= ((uint8_t)(val & 0xf) << ((index & 1) << 2));\n      ++index;\n    }\n  }\n\n  //! Set a element\n  void set(size_t i, ValueType val) {\n    uint8_t *it = reinterpret_cast<uint8_t *>(&(TBase::operator[](i >> 1)));\n    if (i & 1) {\n      *it = (*it & 0x0f) | ((uint8_t)(val & 0xf) << 4);\n    } else {\n      *it = (*it & 0xf0) | (uint8_t)(val & 0xf);\n    }\n  }\n\n  //! Retrieve element\n  ValueType at(size_t i) const {\n    return this->element<ValueType>(i);\n  }\n\n  //! Access last element\n  ValueType back(void) const {\n    return this->at(this->size() - 1);\n  }\n\n  //! Retrieve iterator to beginning\n  const_iterator begin(void) const {\n    return const_iterator(this, 0);\n  }\n\n  //! Retrieve size of allocated storage\n  size_t capacity(void) const {\n    return (TBase::capacity() << 1);\n  }\n\n  //! Clear the vector\n  void clear(void) {\n    TBase::clear();\n  }\n\n  //! Retrieve pointer of data\n  StoreType *data(void) {\n    return reinterpret_cast<StoreType *>(&(TBase::operator[](0)));\n  }\n\n  //! Retrieve pointer of data\n  const StoreType *data(void) const {\n    return reinterpret_cast<const StoreType *>(TBase::data());\n  }\n\n  //! Test if vector is empty\n  bool empty(void) const {\n    return TBase::empty();\n  }\n\n  //! An iterator to the past-the-end\n  const_iterator end(void) const {\n    return const_iterator(this, this->size());\n  }\n\n  //! Access first element\n  ValueType front(void) const {\n    return this->at(0);\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t n) {\n    TBase::reserve((n + (sizeof(ValueType) << 1) - 1) /\n                   (sizeof(ValueType) << 1) * sizeof(ValueType));\n  }\n\n  //! Resize the vector to a length of n elements\n  void resize(size_t n) {\n    TBase::resize((n + (sizeof(ValueType) << 1) - 1) /\n                  (sizeof(ValueType) << 1) * sizeof(ValueType));\n  }\n\n  //! Resize the vector to a length of n elements\n  void resize(size_t n, ValueType val) {\n    TBase::resize((n + (sizeof(ValueType) << 1) - 1) /\n                      (sizeof(ValueType) << 1) * sizeof(ValueType),\n                  ((uint8_t)(val & 0xf) << 4) | (uint8_t)(val & 0xf));\n  }\n\n  //! Retrieve dimension of vector\n  size_t size(void) const {\n    return (TBase::size() << 1);\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const {\n    return (TBase::size() << 1);\n  }\n\n  //! Retrieve size of vector in bytes\n  size_t bytes(void) const {\n    return TBase::size();\n  }\n\n  //! Swap vector values\n  void swap(NibbleVector &vec) {\n    TBase::swap(static_cast<TBase &>(vec));\n  }\n\n protected:\n  //! Retrieve element (Signed integral)\n  template <typename U>\n  auto element(size_t i) const ->\n      typename std::enable_if<std::is_signed<U>::value, U>::type {\n    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());\n    return (static_cast<int8_t>(arr[i >> 1] << (~(i << 2) & 4)) >> 4);\n  }\n\n  //! Retrieve element (Unsigned integral)\n  template <typename U>\n  auto element(size_t i) const ->\n      typename std::enable_if<std::is_unsigned<U>::value, U>::type {\n    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());\n    return ((arr[i >> 1] >> ((i & 1) << 2)) & 0xf);\n  }\n};\n\n/*! Binary Vector Adapter\n */\ntemplate <typename T, typename TBase = std::string,\n          typename = typename std::enable_if<std::is_integral<T>::value>::type>\nclass BinaryVector : public TBase {\n public:\n  //! Type of value\n  using ValueType = typename std::remove_cv<T>::type;\n\n  //! const_iterator of Binary Vector\n  class const_iterator {\n   public:\n    //! Constructor\n    const_iterator(void) : i_(0), arr_(nullptr) {}\n\n    //! Constructor\n    const_iterator(const void *buf, size_t i)\n        : i_(i), arr_(reinterpret_cast<const uint8_t *>(buf)) {}\n\n    //! Equality\n    bool operator==(const const_iterator &rhs) const {\n      return (i_ == rhs.i_);\n    }\n\n    //! No equality\n    bool operator!=(const const_iterator &rhs) const {\n      return (i_ != rhs.i_);\n    }\n\n    //! Increment (Prefix)\n    const_iterator &operator++() {\n      ++i_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    const_iterator operator++(int) {\n      const_iterator tmp = *this;\n      ++i_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    const_iterator &operator--() {\n      --i_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    const_iterator operator--(int) {\n      const_iterator tmp = *this;\n      --i_;\n      return tmp;\n    }\n\n    //! operator \"+=\"\n    const_iterator &operator+=(size_t offset) {\n      i_ += offset;\n      return *this;\n    }\n\n    //! operator \"-=\"\n    const_iterator &operator-=(size_t offset) {\n      i_ -= offset;\n      return *this;\n    }\n\n    //! Indirection (eg. *iter)\n    bool operator*() const {\n      return ((arr_[i_ >> 3] & (1u << (i_ & 7))) != 0);\n    }\n\n   private:\n    size_t i_;\n    const uint8_t *arr_;\n  };\n\n  //! Constructor\n  BinaryVector(void) : TBase() {}\n\n  //! Constructor\n  explicit BinaryVector(size_t dim) : TBase() {\n    this->resize(dim);\n  }\n\n  //! Constructor\n  BinaryVector(size_t dim, bool val) : TBase() {\n    this->resize(dim, val);\n  }\n\n  //! Constructor\n  BinaryVector(const BinaryVector &rhs) : TBase(rhs) {}\n\n  //! Constructor\n  BinaryVector(BinaryVector &&rhs) : TBase(std::move(rhs)) {}\n\n  //! Constructor\n  BinaryVector(const TBase &rhs) : TBase(rhs) {\n    if (TBase::size() % sizeof(T) != 0) {\n      throw std::length_error(\"Unmatched length\");\n    }\n  }\n\n  //! Constructor\n  BinaryVector(TBase &&rhs) : TBase(std::move(rhs)) {\n    if (TBase::size() % sizeof(T) != 0) {\n      throw std::length_error(\"Unmatched length\");\n    }\n  }\n\n  //! Constructor\n  BinaryVector(std::initializer_list<bool> il) : TBase() {\n    this->resize(il.size());\n\n    size_t index = 0;\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n\n    for (auto val : il) {\n      if (val) {\n        arr[index >> 3] |= (uint8_t)(1u << (index & 7));\n      }\n      ++index;\n    }\n  }\n\n  //! Assignment\n  BinaryVector &operator=(const BinaryVector &rhs) {\n    TBase::operator=(static_cast<const TBase &>(rhs));\n    return *this;\n  }\n\n  //! Assignment\n  BinaryVector &operator=(BinaryVector &&rhs) {\n    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));\n    return *this;\n  }\n\n  //! Assignment\n  BinaryVector &operator=(const TBase &rhs) {\n    TBase::operator=(rhs);\n    return *this;\n  }\n\n  //! Assignment\n  BinaryVector &operator=(TBase &&rhs) {\n    TBase::operator=(std::move(rhs));\n    return *this;\n  }\n\n  //! Overloaded operator []\n  bool operator[](size_t i) const {\n    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());\n    return ((arr[i >> 3] & (1u << (i & 7))) != 0);\n  }\n\n  //! Assign content to vector\n  void assign(const bool *vec, size_t len) {\n    this->clear();\n    this->resize(len);\n\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    for (size_t i = 0; i < len; ++i) {\n      bool val = vec[i];\n      if (val) {\n        arr[i >> 3] |= (1u << (i & 7));\n      }\n    }\n  }\n\n  //! Assign content to vector\n  void assign(size_t n, bool val) {\n    this->clear();\n    this->resize(n, val);\n  }\n\n  //! Assign content to vector\n  void assign(std::initializer_list<bool> il) {\n    this->clear();\n    this->resize(il.size());\n\n    size_t index = 0;\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    for (auto val : il) {\n      if (val) {\n        arr[index >> 3] |= (uint8_t)(1u << (index & 7));\n      }\n      ++index;\n    }\n  }\n\n  //! Retrieve element\n  bool at(size_t i) const {\n    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());\n    return ((arr[i >> 3] & (1u << (i & 7))) != 0);\n  }\n\n  //! Set a bit\n  void set(size_t i) {\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    arr[i >> 3] |= (uint8_t)(1u << (i & 7));\n  }\n\n  //! Reset a bit\n  void reset(size_t i) {\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    arr[i >> 3] &= (uint8_t)(~(1u << (i & 7)));\n  }\n\n  //! Toggle a bit\n  void flip(size_t i) {\n    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));\n    arr[i >> 3] ^= (uint8_t)(1u << (i & 7));\n  }\n\n  //! Access last element\n  bool back(void) const {\n    return this->at(this->size() - 1);\n  }\n\n  //! Retrieve const_iterator to beginning\n  const_iterator begin(void) const {\n    return const_iterator(this->data(), 0);\n  }\n\n  //! Retrieve size of allocated storage\n  size_t capacity(void) const {\n    return (TBase::capacity() << 3);\n  }\n\n  //! Clear the vector\n  void clear(void) {\n    TBase::clear();\n  }\n\n  //! Retrieve pointer of data\n  ValueType *data(void) {\n    return reinterpret_cast<ValueType *>(&(TBase::operator[](0)));\n  }\n\n  //! Retrieve pointer of data\n  const ValueType *data(void) const {\n    return reinterpret_cast<const ValueType *>(TBase::data());\n  }\n\n  //! Test if vector is empty\n  bool empty(void) const {\n    return TBase::empty();\n  }\n\n  //! An const_iterator to the past-the-end\n  const_iterator end(void) const {\n    return const_iterator(this->data(), this->size());\n  }\n\n  //! Access first element\n  bool front(void) const {\n    return this->at(0);\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t n) {\n    TBase::reserve((n + (sizeof(ValueType) << 3) - 1) /\n                   (sizeof(ValueType) << 3) * sizeof(ValueType));\n  }\n\n  //! Resize the vector to a length of n elements\n  void resize(size_t n) {\n    TBase::resize((n + (sizeof(ValueType) << 3) - 1) /\n                  (sizeof(ValueType) << 3) * sizeof(ValueType));\n  }\n\n  //! Resize the vector to a length of n elements\n  void resize(size_t n, bool val) {\n    TBase::resize((n + (sizeof(ValueType) << 3) - 1) /\n                      (sizeof(ValueType) << 3) * sizeof(ValueType),\n                  val ? 0xffu : 0u);\n  }\n\n  //! Retrieve dimension of vector\n  size_t size(void) const {\n    return (TBase::size() << 3);\n  }\n\n  //! Retrieve dimension of vector\n  size_t dimension(void) const {\n    return (TBase::size() << 3);\n  }\n\n  //! Retrieve size of vector in bytes\n  size_t bytes(void) const {\n    return TBase::size();\n  }\n\n  //! Swap vector values\n  void swap(BinaryVector &vec) {\n    TBase::swap(static_cast<TBase &>(vec));\n  }\n};\n\n/*! Hybrid Vector Adapter\n */\ntemplate <typename T, typename TBase = std::string,\n          typename =\n              typename std::enable_if<IsTriviallyCopyable<T>::value>::type>\nclass HybridVector : public NumericalVector<T, TBase> {\n public:\n  typedef typename std::remove_cv<T>::type ValueType;\n  typedef ValueType *iterator;\n  typedef const ValueType *const_iterator;\n\n  //! Constructor\n  HybridVector(void) = default;\n\n  //! Constructor\n  explicit HybridVector(size_t dim) : NumericalVector<T, TBase>(dim) {}\n\n  //! Constructor\n  HybridVector(const HybridVector &rhs)\n      : NumericalVector<T, TBase>(rhs),\n        sparse_count_(rhs.sparse_count_),\n        sparse_indices_(rhs.sparse_indices_),\n        sparse_data_(rhs.sparse_data_) {}\n\n  //! Constructor\n  HybridVector(HybridVector &&rhs)\n      : NumericalVector<T, TBase>(std::forward<NumericalVector<T, TBase>>(rhs)),\n        sparse_count_(rhs.sparse_count_),\n        sparse_indices_(std::move(rhs.sparse_indices_)),\n        sparse_data_(std::move(rhs.sparse_data_)) {}\n\n  //! Assignment\n  HybridVector &operator=(const HybridVector &rhs) {\n    NumericalVector<T, TBase>::operator=(\n        static_cast<const NumericalVector<T, TBase> &>(rhs));\n    sparse_count_ = rhs.sparse_count_;\n    sparse_indices_ = rhs.sparse_indices_;\n    sparse_data_ = rhs.sparse_data_;\n\n    return *this;\n  }\n\n  //! Assignment\n  HybridVector &operator=(HybridVector &&rhs) {\n    NumericalVector<T, TBase>::operator=(\n        std::move(static_cast<NumericalVector<T, TBase> &&>(rhs)));\n    sparse_count_ = rhs.sparse_count_;\n    sparse_indices_ = std::move(rhs.sparse_indices_);\n    sparse_data_ = std::move(rhs.sparse_data_);\n\n    return *this;\n  }\n\n  size_t sparse_count() const {\n    return sparse_count_;\n  }\n\n  const uint32_t *sparse_indices() const {\n    return reinterpret_cast<const uint32_t *>(sparse_indices_.data());\n  }\n\n  const ValueType *sparse_data(void) const {\n    return reinterpret_cast<const ValueType *>(sparse_data_.data());\n  }\n\n  //! Request a change in capacity\n  void resize_for_sparse(size_t n) {\n    sparse_indices_.resize(n * sizeof(uint32_t));\n    sparse_data_.resize(n * sizeof(ValueType));\n  }\n\n  int add_sparses(const NumericalVector<uint32_t> &indexes,\n                  const NumericalVector<ValueType> &values) {\n    sparse_count_ = indexes.size();\n\n    sparse_indices_ = (const std::string &)indexes;\n    sparse_data_ = (const std::string &)values;\n\n    return 0;\n  }\n\n private:\n  //! Data Member\n  size_t sparse_count_;\n  std::string sparse_indices_;\n  std::string sparse_data_;\n};\n\n/*! Sparse Vector Adapter\n */\ntemplate <typename T>\nclass SparseVector {\n public:\n  typedef typename std::remove_cv<T>::type ValueType;\n  typedef ValueType *iterator;\n  typedef const ValueType *const_iterator;\n\n  //! Constructor\n  SparseVector(void) = default;\n\n  size_t sparse_count() const {\n    return sparse_count_;\n  }\n\n  const uint32_t *sparse_indices() const {\n    return reinterpret_cast<const uint32_t *>(sparse_indices_.data());\n  }\n\n  const ValueType *sparse_data(void) const {\n    return reinterpret_cast<const ValueType *>(sparse_data_.data());\n  }\n\n  //! Request a change in capacity\n  void resize_for_sparse(size_t n) {\n    sparse_indices_.resize(n * sizeof(uint32_t));\n    sparse_data_.resize(n * sizeof(ValueType));\n  }\n\n  int add_sparses(const NumericalVector<uint32_t> &indexes,\n                  const NumericalVector<ValueType> &values) {\n    sparse_count_ = indexes.size();\n\n    sparse_indices_ = (const std::string &)indexes;\n    sparse_data_ = (const std::string &)values;\n\n    return 0;\n  }\n\n private:\n  //! Data Member\n  size_t sparse_count_;\n  std::string sparse_indices_;\n  std::string sparse_data_;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/encoding/json/mod_json.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <math.h>\n#include <stdbool.h>\n#include <stdint.h>\n\n#if !defined(__cplusplus) && defined(_MSC_VER)\n#if !defined(inline)\n#define inline __inline\n#endif\n#endif\n\n#if defined(__cplusplus)\nextern \"C\" {\n#endif\n\n#define MOD_JSON_FALSE (false)\n#define MOD_JSON_TRUE (true)\n#define MOD_JSON_INFINITY (INFINITY)\n\n/*! JSON Type\n */\nenum mod_json_type {\n  mod_json_type_null = 0,\n  mod_json_type_boolean = 1,\n  mod_json_type_integer = 2,\n  mod_json_type_float = 3,\n  mod_json_type_string = 4,\n  mod_json_type_array = 5,\n  mod_json_type_object = 6\n};\n\n/*! JSON Token State\n */\nenum mod_json_state {\n  mod_json_state_null = 0,\n  mod_json_state_start = 1,\n  mod_json_state_finish = 2,\n  mod_json_state_array_start = 3,\n  mod_json_state_array_half = 4,\n  mod_json_state_array_finish = 5,\n  mod_json_state_object_start = 6,\n  mod_json_state_object_half1 = 7,\n  mod_json_state_object_half2 = 8,\n  mod_json_state_object_finish = 9,\n  mod_json_state_max = 10\n};\n\n/*! JSON Token Error Code\n */\nenum mod_json_error {\n  mod_json_error_null = 0,\n  mod_json_error_invalid = 1,\n  mod_json_error_state = 2,\n  mod_json_error_empty = 3,\n  mod_json_error_break = 4,\n  mod_json_error_depth = 5,\n  mod_json_error_trunc = 6,\n  mod_json_error_start = 7,\n  mod_json_error_array = 8,\n  mod_json_error_object = 9,\n  mod_json_error_key = 10,\n  mod_json_error_value = 11,\n  mod_json_error_quote = 12\n};\n\n/*! JSON Token Event\n */\nenum mod_json_event {\n  mod_json_event_null = 0,\n  mod_json_event_field = 1,\n  mod_json_event_object = 2,\n  mod_json_event_array = 3,\n  mod_json_event_boolean = 4,\n  mod_json_event_integer = 5,\n  mod_json_event_float = 6,\n  mod_json_event_string = 7\n};\n\ntypedef unsigned int mod_json_size_t;\ntypedef int mod_json_ssize_t;\ntypedef bool mod_json_boolean_t;\ntypedef char mod_json_char_t;\ntypedef const char mod_json_cchar_t;\ntypedef unsigned char mod_json_uchar_t;\ntypedef long long mod_json_integer_t;\ntypedef double mod_json_float_t;\ntypedef void mod_json_void_t;\ntypedef enum mod_json_type mod_json_type_t;\ntypedef union mod_json_any mod_json_any_t;\ntypedef struct mod_json_value mod_json_value_t;\ntypedef struct mod_json_string mod_json_string_t;\ntypedef struct mod_json_array mod_json_array_t;\ntypedef struct mod_json_object mod_json_object_t;\ntypedef struct mod_json_pair mod_json_pair_t;\ntypedef struct mod_json_option mod_json_option_t;\ntypedef enum mod_json_state mod_json_state_t;\ntypedef enum mod_json_error mod_json_error_t;\ntypedef enum mod_json_event mod_json_event_t;\ntypedef struct mod_json_token mod_json_token_t;\n\n/*! Callback function when parsing JSON\n */\ntypedef int (*mod_json_event_proc)(mod_json_token_t *tok, mod_json_void_t *val,\n                                   mod_json_size_t len);\n\n/*! JSON Any\n */\nunion mod_json_any {\n  mod_json_object_t *c_obj;\n  mod_json_array_t *c_arr;\n  mod_json_string_t *c_str;\n  mod_json_float_t c_float;\n  mod_json_boolean_t c_bool;\n  mod_json_integer_t c_int;\n};\n\n/*! JSON Value\n */\nstruct mod_json_value {\n  mod_json_ssize_t refer;\n  mod_json_type_t type;\n  mod_json_any_t data;\n};\n\n/*! JSON String\n */\nstruct mod_json_string {\n  mod_json_ssize_t refer;\n  mod_json_size_t size;\n  mod_json_char_t *first;\n  mod_json_char_t *last;\n};\n\n/*! JSON Array\n */\nstruct mod_json_array {\n  mod_json_ssize_t refer;\n  mod_json_size_t size;\n  mod_json_value_t **first;\n  mod_json_value_t **last;\n};\n\n/*! JSON Pair\n */\nstruct mod_json_pair {\n  mod_json_string_t *key;\n  mod_json_value_t *val;\n};\n\n/*! JSON Object\n */\nstruct mod_json_object {\n  mod_json_ssize_t refer;\n  mod_json_size_t size;\n  mod_json_pair_t *first;\n  mod_json_pair_t *last;\n};\n\n#define MOD_JSON_COMMENT 0x0001  /* Enable comments */\n#define MOD_JSON_UNSTRICT 0x0002 /* Enable loose JSON string */\n#define MOD_JSON_SIMPLE 0x0004   /* Enable simple format */\n#define MOD_JSON_SQUOTE 0x0008   /* Enable single quotes support */\n\n/*! JSON Option\n */\nstruct mod_json_option {\n  mod_json_size_t options;\n  mod_json_size_t object_depth;\n  mod_json_size_t array_depth;\n};\n\n/**\n *  \\brief           Create and set a JSON null value\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_null(void);\n\n/**\n *  \\brief           Create and set a JSON object value\n *  \\param obj       The value to be assigned\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_object(mod_json_object_t *obj);\n\n/**\n *  \\brief           Create and set a JSON array value\n *  \\param arr       The value to be assigned\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_array(mod_json_array_t *arr);\n\n/**\n *  \\brief           Create and set a JSON string value\n *  \\param str       The value to be assigned\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_string(mod_json_string_t *str);\n\n/**\n *  \\brief           Create and set a JSON string buffer\n *  \\param buf       The pointer of string buffer\n *  \\param len       The length of string buffer\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_buffer(mod_json_cchar_t *buf,\n                                            mod_json_size_t len);\n\n/**\n *  \\brief           Create and set a JSON integer value\n *  \\param num       The value to be assigned\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_integer(mod_json_integer_t num);\n\n/**\n *  \\brief           Create and set a JSON float value\n *  \\param dbl       The value to be assigned\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_float(mod_json_float_t dbl);\n\n/**\n *  \\brief           Create and set a JSON boolean value\n *  \\param bol       The value to be assigned\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_set_boolean(mod_json_boolean_t bol);\n\n/**\n *  \\brief           Assign a JSON value as null\n *  \\param val       The pointer of value\n */\nvoid mod_json_value_assign_null(mod_json_value_t *val);\n\n/**\n *  \\brief           Assign a JSON value as a object\n *  \\param val       The pointer of value\n *  \\param obj       The value to be assigned\n */\nvoid mod_json_value_assign_object(mod_json_value_t *val,\n                                  mod_json_object_t *obj);\n\n/**\n *  \\brief           Assign a JSON value as an array\n *  \\param val       The pointer of value\n *  \\param arr       The value to be assigned\n */\nvoid mod_json_value_assign_array(mod_json_value_t *val, mod_json_array_t *arr);\n\n/**\n *  \\brief           Assign a JSON value as a string\n *  \\param val       The pointer of value\n *  \\param str       The value to be assigned\n */\nvoid mod_json_value_assign_string(mod_json_value_t *val,\n                                  mod_json_string_t *str);\n\n/**\n *  \\brief           Assign a JSON value as an integer\n *  \\param val       The pointer of value\n *  \\param num       The value to be assigned\n */\nvoid mod_json_value_assign_integer(mod_json_value_t *val,\n                                   mod_json_integer_t num);\n\n/**\n *  \\brief           Assign a JSON value as a float\n *  \\param val       The pointer of value\n *  \\param dbl       The value to be assigned\n */\nvoid mod_json_value_assign_float(mod_json_value_t *val, mod_json_float_t dbl);\n\n/**\n *  \\brief           Assign a JSON value as a boolean\n *  \\param val       The pointer of value\n *  \\param bol       The value to be assigned\n */\nvoid mod_json_value_assign_boolean(mod_json_value_t *val,\n                                   mod_json_boolean_t bol);\n\n/**\n *  \\brief           Assign a new JSON value\n *  \\param dst       The pointer of destination value (can't be null)\n *  \\param src       The pointer of source value (can be null)\n */\nvoid mod_json_value_assign(mod_json_value_t *dst, mod_json_value_t *src);\n\n/**\n *  \\brief           Merge a JSON value into another one\n *  \\param dst       The pointer of destination value (can't be null)\n *  \\param src       The pointer of source value (can be null)\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_value_merge(mod_json_value_t *dst, mod_json_value_t *src);\n\n/**\n *  \\brief           Retrieve object of a JSON value\n *  \\param val       The pointer of value\n *  \\return          Null indicates unmatched type or empty.\n */\nmod_json_object_t *mod_json_value_object(mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve array of a JSON value\n *  \\param val       The pointer of value\n *  \\return          Null indicates unmatched type or empty.\n */\nmod_json_array_t *mod_json_value_array(mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve string of a JSON value\n *  \\param val       The pointer of value\n *  \\return          Null indicates unmatched type or empty.\n */\nmod_json_string_t *mod_json_value_string(mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve c-string of a JSON value\n *  \\param val       The pointer of value\n *  \\return          Null indicates unmatched type or empty.\n */\nmod_json_cchar_t *mod_json_value_cstring(mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve float of a JSON value\n *  \\param val       The pointer of value\n *  \\return          It will try converting the unmatched\n                     value to float. If nothing be done,\n                     returns zero by default.\n */\nmod_json_float_t mod_json_value_float(mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve boolean of a JSON value\n *  \\param val       The pointer of value\n *  \\return          If string, object or array is not empty,\n                     number(integer or float) does not equal\n                     to zero, it returns true.\n */\nmod_json_boolean_t mod_json_value_boolean(mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve integer of a JSON value\n *  \\param val       The pointer of value\n *  \\return          It will try converting the unmatched\n                     value to integer. If nothing be done,\n                     returns zero by default.\n */\nmod_json_integer_t mod_json_value_integer(mod_json_value_t *val);\n\n/**\n *  \\brief           Clone a JSON value\n *  \\param val       The pointer of value\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_value_clone(mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve non-zero if they are equal\n *  \\param lhs       The pointer of left value\n *  \\param rhs       The pointer of right value\n *  \\return          1 indicates true, 0 indicates false.\n */\nmod_json_boolean_t mod_json_value_is_equal(mod_json_value_t *lhs,\n                                           mod_json_value_t *rhs);\n\n/**\n *  \\brief           Unset or destroy a JSON value\n *  \\param val       The pointer of value\n */\nvoid mod_json_value_unset(mod_json_value_t *val);\n\n/**\n *  \\brief           Increase reference count of a JSON value\n *  \\param val       The pointer of value\n *  \\return          The original pointer of value\n */\nstatic inline mod_json_value_t *mod_json_value_get(mod_json_value_t *val) {\n  ++val->refer;\n  return val;\n}\n\n/**\n *  \\brief           Decrease reference count of a JSON value\n *  \\param val       The pointer of value\n *  \\return          The new number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_value_put(mod_json_value_t *val) {\n  return (--val->refer);\n}\n\n/**\n *  \\brief           Retrieve refer-counter of a JSON value\n *  \\param val       The pointer of value\n *  \\return          The number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_value_refer(mod_json_value_t *val) {\n  return (val ? val->refer : -1);\n}\n\n/**\n *  \\brief           Set the refer-counter as leaked\n *  \\param val       The pointer of value\n */\nstatic inline void mod_json_value_set_leaked(mod_json_value_t *val) {\n  val->refer = 0;\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is leaked\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_leaked(\n    mod_json_value_t *val) {\n  return (val->refer <= 0);\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is shared\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_shared(\n    mod_json_value_t *val) {\n  return (val->refer > 1);\n}\n\n/**\n *  \\brief           Grab (get or clone) a JSON value\n *  \\param val       The pointer of value\n *  \\return          Null indicates failure\n */\nstatic inline mod_json_value_t *mod_json_value_grab(mod_json_value_t *val) {\n  /* Is it leaked? */\n  if (!mod_json_value_is_leaked(val)) {\n    return mod_json_value_get(val);\n  }\n  return mod_json_value_clone(val);\n}\n\n/**\n *  \\brief           Retrieve type of a JSON value\n *  \\param val       The pointer of value\n *  \\return          The code of type\n */\nstatic inline mod_json_type_t mod_json_value_type(mod_json_value_t *val) {\n  return (val->type);\n}\n\n/**\n *  \\brief           Retrieve non-zero if a JSON value is null\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_null(mod_json_value_t *val) {\n  return (val ? val->type == mod_json_type_null : MOD_JSON_TRUE);\n}\n\n/**\n *  \\brief           Retrieve non-zero if it is a JSON array\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_array(\n    mod_json_value_t *val) {\n  return (val ? val->type == mod_json_type_array : MOD_JSON_FALSE);\n}\n\n/**\n *  \\brief           Retrieve non-zero if it is a JSON object\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_object(\n    mod_json_value_t *val) {\n  return (val ? val->type == mod_json_type_object : MOD_JSON_FALSE);\n}\n\n/**\n *  \\brief           Retrieve non-zero if it is a JSON string\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_string(\n    mod_json_value_t *val) {\n  return (val ? val->type == mod_json_type_string : MOD_JSON_FALSE);\n}\n\n/**\n *  \\brief           Retrieve non-zero if it is a JSON float\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_float(\n    mod_json_value_t *val) {\n  return (val ? val->type == mod_json_type_float : MOD_JSON_FALSE);\n}\n\n/**\n *  \\brief           Retrieve non-zero if it is a JSON boolean\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_boolean(\n    mod_json_value_t *val) {\n  return (val ? val->type == mod_json_type_boolean : MOD_JSON_FALSE);\n}\n\n/**\n *  \\brief           Retrieve non-zero if it is a JSON integer\n *  \\param val       The pointer of value\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_value_is_integer(\n    mod_json_value_t *val) {\n  return (val ? val->type == mod_json_type_integer : MOD_JSON_FALSE);\n}\n\n/**\n *  \\brief           Request a change in capacity\n *  \\param str       The pointer of string\n *  \\param n         The requested size of capacity\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_string_reserve(mod_json_string_t *str, mod_json_size_t n);\n\n/**\n *  \\brief           Create and set a JSON string\n *  \\param cstr      The pointer of c-string\n *  \\param len       The length of c-string\n *  \\return          Null indicates failure.\n */\nmod_json_string_t *mod_json_string_set(mod_json_cchar_t *cstr,\n                                       mod_json_size_t len);\n\n/**\n *  \\brief           Assign new content to a JSON string\n *  \\param str       The pointer of string\n *  \\param cstr      The pointer of c-string\n *  \\param len       The length of c-string\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_string_assign(mod_json_string_t *str, mod_json_cchar_t *cstr,\n                           mod_json_size_t len);\n\n/**\n *  \\brief           Clone a JSON string\n *  \\param str       The pointer of string\n *  \\return          Null indicates failure.\n */\nstatic inline mod_json_string_t *mod_json_string_clone(mod_json_string_t *str) {\n  return (str ? mod_json_string_set(str->first,\n                                    (mod_json_size_t)(str->last - str->first))\n              : (mod_json_string_t *)0);\n}\n\n/**\n *  \\brief           Unset or destroy a JSON string\n *  \\param str       The pointer of string\n */\nvoid mod_json_string_unset(mod_json_string_t *str);\n\n/**\n *  \\brief           Reset a JSON string\n *  \\param str       The pointer of string\n */\nvoid mod_json_string_reset(mod_json_string_t *str);\n\n/**\n *  \\brief           Append a c-string to a JSON string\n *  \\param str       The pointer of string\n *  \\param cstr      The pointer of c-string\n *  \\param len       The length of c-string\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_string_append(mod_json_string_t *str, mod_json_cchar_t *cstr,\n                           mod_json_size_t len);\n\n/**\n *  \\brief           Add a copy of a JSON string\n *  \\param str       The main string\n *  \\param val       The appended string\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_string_add(mod_json_string_t *str, mod_json_string_t *val);\n\n/**\n *  \\brief           Retrieve HASH of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The value of HASH\n */\nmod_json_size_t mod_json_string_hash(mod_json_string_t *str);\n\n/**\n *  \\brief           Compare two JSON strings (case sensitive)\n *  \\param str1      The first string\n *  \\param str2      The second string\n *  \\return          0 indicates equal.\n */\nint mod_json_string_compare(mod_json_string_t *str1, mod_json_string_t *str2);\n\n/**\n *  \\brief           Convert a JSON string to an integer\n *  \\param str       The pointer of string\n *  \\return          If nothing be done, returns zero by default.\n */\nmod_json_integer_t mod_json_string_integer(mod_json_string_t *str);\n\n/**\n *  \\brief           Convert a JSON string to a float\n *  \\param str       The pointer of string\n *  \\return          If nothing be done, returns zero by default.\n */\nmod_json_float_t mod_json_string_float(mod_json_string_t *str);\n\n/**\n *  \\brief           Encode a JSON string\n *  \\param src       The pointer of source string\n *  \\return          Null indicates failure.\n */\nmod_json_string_t *mod_json_string_encode(mod_json_string_t *src);\n\n/**\n *  \\brief           Decode a JSON string\n *  \\param src       The pointer of source string\n *  \\return          Null indicates failure.\n */\nmod_json_string_t *mod_json_string_decode(mod_json_string_t *src);\n\n/**\n *  \\brief           Increase reference count of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The original pointer of string\n */\nstatic inline mod_json_string_t *mod_json_string_get(mod_json_string_t *str) {\n  ++str->refer;\n  return str;\n}\n\n/**\n *  \\brief           Decrease reference count of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The new number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_string_put(mod_json_string_t *str) {\n  return (--str->refer);\n}\n\n/**\n *  \\brief           Retrieve refer-counter of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_string_refer(mod_json_string_t *str) {\n  return (str ? str->refer : -1);\n}\n\n/**\n *  \\brief           Set the refer-counter as leaked\n *  \\param str       The pointer of string\n */\nstatic inline void mod_json_string_set_leaked(mod_json_string_t *str) {\n  str->refer = 0;\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is leaked\n *  \\param str       The pointer of string\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_string_is_leaked(\n    mod_json_string_t *str) {\n  return (str->refer <= 0);\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is shared\n *  \\param str       The pointer of string\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_string_is_shared(\n    mod_json_string_t *str) {\n  return (str->refer > 1);\n}\n\n/**\n *  \\brief           Grab (get or clone) a JSON string\n *  \\param str       The pointer of string\n *  \\return          Null indicates failure\n */\nstatic inline mod_json_string_t *mod_json_string_grab(mod_json_string_t *str) {\n  /* Is it leaked? */\n  if (!mod_json_string_is_leaked(str)) {\n    return mod_json_string_get(str);\n  }\n  return mod_json_string_clone(str);\n}\n\n/**\n *  \\brief           Retrieve c-string of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The pointer of c-string\n */\nstatic inline mod_json_cchar_t *mod_json_string_cstr(mod_json_string_t *str) {\n  return (str ? str->first : (mod_json_cchar_t *)0);\n}\n\n/**\n *  \\brief           Retrieve data pointer of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The pointer of data\n */\nstatic inline mod_json_char_t *mod_json_string_data(mod_json_string_t *str) {\n  return (str ? str->first : (mod_json_char_t *)0);\n}\n\n/**\n *  \\brief           Retrieve capacity of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The size of allocated storage\n */\nstatic inline mod_json_size_t mod_json_string_capacity(mod_json_string_t *str) {\n  return (str ? (str->size - 1) : 0);\n}\n\n/**\n *  \\brief           Retrieve length of a JSON string\n *  \\param str       The pointer of string\n *  \\return          The length of string\n */\nstatic inline mod_json_size_t mod_json_string_length(mod_json_string_t *str) {\n  return (str ? (mod_json_size_t)(str->last - str->first) : 0);\n}\n\n/**\n *  \\brief           Retrieve non-zero if a JSON string is empty\n *  \\param str       The pointer of string\n *  \\return          0 indicates non-empty\n */\nstatic inline mod_json_boolean_t mod_json_string_empty(mod_json_string_t *str) {\n  return (mod_json_string_length(str) == 0);\n}\n\n/**\n *  \\brief           Create and set a JSON array\n *  \\param size      The initialized size of array\n *  \\return          Null indicates failure.\n */\nmod_json_array_t *mod_json_array_set(mod_json_size_t size);\n\n/**\n *  \\brief           Clone a JSON array\n *  \\param arr       The pointer of array\n *  \\return          Null indicates failure.\n */\nmod_json_array_t *mod_json_array_clone(mod_json_array_t *arr);\n\n/**\n *  \\brief           Retrieve non-zero if they are equal\n *  \\param lhs       The pointer of left array\n *  \\param rhs       The pointer of right array\n *  \\return          1 indicates true, 0 indicates false.\n */\nmod_json_boolean_t mod_json_array_is_equal(mod_json_array_t *lhs,\n                                           mod_json_array_t *rhs);\n\n/**\n *  \\brief           Unset or destroy a JSON array\n *  \\param arr       The pointer of array\n */\nvoid mod_json_array_unset(mod_json_array_t *arr);\n\n/**\n *  \\brief           Reset a JSON array\n *  \\param arr       The pointer of array\n */\nvoid mod_json_array_reset(mod_json_array_t *arr);\n\n/**\n *  \\brief           Create and set a JSON array (default parameters)\n *  \\param size      The initialized size of array\n *  \\return          Null indicates failure.\n */\nstatic inline mod_json_array_t *mod_json_array_set_default(void) {\n  return mod_json_array_set(0);\n}\n\n/**\n *  \\brief           Increase reference count of a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The original pointer of array\n */\nstatic inline mod_json_array_t *mod_json_array_get(mod_json_array_t *arr) {\n  ++arr->refer;\n  return arr;\n}\n\n/**\n *  \\brief           Decrease reference count of a JSON array\n *  \\param str       The pointer of array\n *  \\return          The new number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_array_put(mod_json_array_t *arr) {\n  return (--arr->refer);\n}\n\n/**\n *  \\brief           Retrieve refer-counter of a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_array_refer(mod_json_array_t *arr) {\n  return (arr ? arr->refer : -1);\n}\n\n/**\n *  \\brief           Set the refer-counter as leaked\n *  \\param arr       The pointer of array\n */\nstatic inline void mod_json_array_set_leaked(mod_json_array_t *arr) {\n  arr->refer = 0;\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is leaked\n *  \\param arr       The pointer of array\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_array_is_leaked(\n    mod_json_array_t *arr) {\n  return (arr->refer <= 0);\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is shared\n *  \\param arr       The pointer of array\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_array_is_shared(\n    mod_json_array_t *arr) {\n  return (arr->refer > 1);\n}\n\n/**\n *  \\brief           Grab (get or clone) a JSON array\n *  \\param arr       The pointer of array\n *  \\return          Null indicates failure\n */\nstatic inline mod_json_array_t *mod_json_array_grab(mod_json_array_t *arr) {\n  /* Is it leaked? */\n  if (!mod_json_array_is_leaked(arr)) {\n    return mod_json_array_get(arr);\n  }\n  return mod_json_array_clone(arr);\n}\n\n/**\n *  \\brief           Retrieve count of elements in a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The count of elements\n */\nstatic inline mod_json_size_t mod_json_array_count(mod_json_array_t *arr) {\n  return (arr ? (mod_json_size_t)(arr->last - arr->first) : 0);\n}\n\n/**\n *  \\brief           Retrieve capacity of a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The size of allocated storage\n */\nstatic inline mod_json_size_t mod_json_array_capacity(mod_json_array_t *arr) {\n  return (arr ? arr->size : 0);\n}\n\n/**\n *  \\brief           Retrieve non-zero if a JSON array is empty\n *  \\param arr       The pointer of array\n *  \\return          0 indicates non-empty\n */\nstatic inline mod_json_boolean_t mod_json_array_empty(mod_json_array_t *arr) {\n  return (mod_json_array_count(arr) == 0);\n}\n\n/**\n *  \\brief           Retrieve the begin of a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The pointer of begin\n */\nstatic inline mod_json_value_t **mod_json_array_begin(mod_json_array_t *arr) {\n  return (arr->first);\n}\n\n/**\n *  \\brief           Retrieve the reverse begin of a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The pointer of reverse begin\n */\nstatic inline mod_json_value_t **mod_json_array_rbegin(mod_json_array_t *arr) {\n  return (arr->last - 1);\n}\n\n/**\n *  \\brief           Retrieve the end of a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The pointer of end\n */\nstatic inline mod_json_value_t **mod_json_array_end(mod_json_array_t *arr) {\n  return (arr->last);\n}\n\n/**\n *  \\brief           Retrieve the reverse end of a JSON array\n *  \\param arr       The pointer of array\n *  \\return          The pointer of reverse end\n */\nstatic inline mod_json_value_t **mod_json_array_rend(mod_json_array_t *arr) {\n  return (arr->first - 1);\n}\n\n/**\n *  \\brief           Request a change in capacity\n *  \\param arr       The pointer of array\n *  \\param n         The requested size of capacity\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_array_reserve(mod_json_array_t *arr, mod_json_size_t n);\n\n/**\n *  \\brief           Reverse the order of the elements in an array\n *  \\param arr       The pointer of array\n *  \\return          0 indicates success, -1 indicates failure.\n */\nvoid mod_json_array_reverse(mod_json_array_t *arr);\n\n/**\n *  \\brief           Push a value into a JSON array\n *  \\param arr       The pointer of array\n *  \\param val       The pointer of value\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_array_push(mod_json_array_t *arr, mod_json_value_t *val);\n\n/**\n *  \\brief           Pop the last element from a JSON array\n *  \\param arr       The pointer of array\n */\nvoid mod_json_array_pop(mod_json_array_t *arr);\n\n/**\n *  \\brief           Remove the first element of a JSON array\n *  \\param arr       The pointer of array\n */\nvoid mod_json_array_shift(mod_json_array_t *arr);\n\n/**\n *  \\brief           Retrieve a value in JSON array\n *  \\param arr       The pointer of array\n *  \\param id        The index (start from zero)\n *  \\return          Null indicates no one be found.\n */\nmod_json_value_t *mod_json_array_at(mod_json_array_t *arr, mod_json_size_t id);\n\n/**\n *  \\brief           Merge a JSON array into another one\n *  \\param dst       The pointer of destination array (can't be null)\n *  \\param src       The pointer of source array (can't be null)\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_array_merge(mod_json_array_t *dst, mod_json_array_t *src);\n\n/**\n *  \\brief           Resize a JSON array so that it contains n elements\n *  \\param arr       The pointer of array\n *  \\param n         The new size, expressed in number of elements\n *  \\param val       The pointer of value assigned (can be null)\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_array_resize(mod_json_array_t *arr, mod_json_size_t n,\n                          mod_json_value_t *val);\n\n/**\n *  \\brief           Retrieve key of a JSON pair\n *  \\param pair      The pointer of pair\n *  \\return          The key of pair\n */\nstatic inline mod_json_string_t *mod_json_pair_key(mod_json_pair_t *pair) {\n  return (pair->key);\n}\n\n/**\n *  \\brief           Retrieve value of a JSON pair\n *  \\param pair      The pointer of pair\n *  \\return          The value of pair\n */\nstatic inline mod_json_value_t *mod_json_pair_value(mod_json_pair_t *pair) {\n  return (pair->val);\n}\n\n/**\n *  \\brief           Create and set a JSON object\n *  \\param size      The initialized size of object\n *  \\return          Null indicates failure.\n */\nmod_json_object_t *mod_json_object_set(mod_json_size_t size);\n\n/**\n *  \\brief           Clone a JSON object\n *  \\param obj       The pointer of object\n *  \\return          Null indicates failure.\n */\nmod_json_object_t *mod_json_object_clone(mod_json_object_t *obj);\n\n/**\n *  \\brief           Retrieve non-zero if they are equal\n *  \\param lhs       The pointer of left object\n *  \\param rhs       The pointer of right object\n *  \\return          1 indicates true, 0 indicates false.\n */\nmod_json_boolean_t mod_json_object_is_equal(mod_json_object_t *lhs,\n                                            mod_json_object_t *rhs);\n\n/**\n *  \\brief           Unset or destroy a JSON object\n *  \\param obj       The pointer of object\n */\nvoid mod_json_object_unset(mod_json_object_t *obj);\n\n/**\n *  \\brief           Reset a JSON object\n *  \\param obj       The pointer of object\n */\nvoid mod_json_object_reset(mod_json_object_t *obj);\n\n/**\n *  \\brief           Create and set a JSON object (default parameters)\n *  \\return          Null indicates failure.\n */\nstatic inline mod_json_object_t *mod_json_object_set_default(void) {\n  return mod_json_object_set(0);\n}\n\n/**\n *  \\brief           Increase reference count of a JSON object\n *  \\param obj       The pointer of object\n *  \\return          The original pointer of object\n */\nstatic inline mod_json_object_t *mod_json_object_get(mod_json_object_t *obj) {\n  ++obj->refer;\n  return obj;\n}\n\n/**\n *  \\brief           Decrease reference count of a JSON object\n *  \\param str       The pointer of object\n *  \\return          The new number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_object_put(mod_json_object_t *obj) {\n  return (--obj->refer);\n}\n\n/**\n *  \\brief           Retrieve refer-counter of a JSON object\n *  \\param obj       The pointer of object\n *  \\return          The number of refer-counter\n */\nstatic inline mod_json_ssize_t mod_json_object_refer(mod_json_object_t *obj) {\n  return (obj ? obj->refer : -1);\n}\n\n/**\n *  \\brief           Set the refer-counter as leaked\n *  \\param obj       The pointer of object\n */\nstatic inline void mod_json_object_set_leaked(mod_json_object_t *obj) {\n  obj->refer = 0;\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is leaked\n *  \\param obj       The pointer of object\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_object_is_leaked(\n    mod_json_object_t *obj) {\n  return (obj->refer <= 0);\n}\n\n/**\n *  \\brief           Retrieve non-zero if refer-counter is shared\n *  \\param obj       The pointer of object\n *  \\return          1 indicates TRUE, 0 indicates FALSE\n */\nstatic inline mod_json_boolean_t mod_json_object_is_shared(\n    mod_json_object_t *obj) {\n  return (obj->refer > 1);\n}\n\n/**\n *  \\brief           Grab (get or clone) a JSON object\n *  \\param obj       The pointer of object\n *  \\return          Null indicates failure\n */\nstatic inline mod_json_object_t *mod_json_object_grab(mod_json_object_t *obj) {\n  /* Is it leaked? */\n  if (!mod_json_object_is_leaked(obj)) {\n    return mod_json_object_get(obj);\n  }\n  return mod_json_object_clone(obj);\n}\n\n/**\n *  \\brief           Retrieve count of elements in a JSON object\n *  \\param obj       The pointer of object\n *  \\return          The count of elements\n */\nstatic inline mod_json_size_t mod_json_object_count(mod_json_object_t *obj) {\n  return (obj ? (mod_json_size_t)(obj->last - obj->first) : 0);\n}\n\n/**\n *  \\brief           Retrieve non-zero if a JSON object is empty\n *  \\param obj       The pointer of object\n *  \\return          0 indicates non-empty\n */\nstatic inline mod_json_boolean_t mod_json_object_empty(mod_json_object_t *obj) {\n  return (mod_json_object_count(obj) == 0);\n}\n\n/**\n *  \\brief           Retrieve the begin of a JSON object\n *  \\param obj       The pointer of object\n *  \\return          The pointer of begin\n */\nstatic inline mod_json_pair_t *mod_json_object_begin(mod_json_object_t *obj) {\n  return (obj->first);\n}\n\n/**\n *  \\brief           Retrieve the reverse begin of a JSON object\n *  \\param obj       The pointer of object\n *  \\return          The pointer of reverse begin\n */\nstatic inline mod_json_pair_t *mod_json_object_rbegin(mod_json_object_t *obj) {\n  return (obj->last - 1);\n}\n\n/**\n *  \\brief           Retrieve the end of a JSON object\n *  \\param obj       The pointer of object\n *  \\return          The pointer of end\n */\nstatic inline mod_json_pair_t *mod_json_object_end(mod_json_object_t *obj) {\n  return (obj->last);\n}\n\n/**\n *  \\brief           Retrieve the reverse end of a JSON object\n *  \\param obj       The pointer of object\n *  \\return          The pointer of reverse end\n */\nstatic inline mod_json_pair_t *mod_json_object_rend(mod_json_object_t *obj) {\n  return (obj->first - 1);\n}\n\n/**\n *  \\brief           Insert a pair into a JSON object\n *  \\param obj       The pointer of object\n *  \\param key       The string of key\n *  \\param val       The pointer of value\n *  \\return          The pair inserted, Null indicates failure.\n */\nmod_json_pair_t *mod_json_object_insert(mod_json_object_t *obj,\n                                        mod_json_string_t *key,\n                                        mod_json_value_t *val);\n\n/**\n *  \\brief           Assign a pair into a JSON object\n *  \\param obj       The pointer of object\n *  \\param key       The string of key\n *  \\param val       The pointer of value\n *  \\return          The pair assigned, Null indicates failure.\n */\nmod_json_pair_t *mod_json_object_assign(mod_json_object_t *obj,\n                                        mod_json_string_t *key,\n                                        mod_json_value_t *val);\n\n/**\n *  \\brief           Touch a pair in a JSON object\n *  \\param obj       The pointer of object\n *  \\param key       The c-string of key\n */\nmod_json_pair_t *mod_json_object_touch(mod_json_object_t *obj,\n                                       mod_json_cchar_t *key);\n\n/**\n *  \\brief           Erase a pair from a JSON object\n *  \\param obj       The pointer of object\n *  \\param key       The c-string of key\n */\nvoid mod_json_object_erase(mod_json_object_t *obj, mod_json_cchar_t *key);\n\n/**\n *  \\brief           Get a value in a JSON object\n *  \\param obj       The pointer of object\n *  \\param key       The c-string of key\n *  \\return          Null indicates failure.\n */\nmod_json_value_t *mod_json_object_at(mod_json_object_t *obj,\n                                     mod_json_cchar_t *key);\n\n/**\n *  \\brief           Find a pair in a JSON object\n *  \\param obj       The pointer of object\n *  \\param key       The c-string of key\n *  \\return          Null indicates failure.\n */\nmod_json_pair_t *mod_json_object_find(mod_json_object_t *obj,\n                                      mod_json_cchar_t *key);\n\n/**\n *  \\brief           Merge a JSON object into another one\n *  \\param dst       The pointer of destination object (can't be null)\n *  \\param src       The pointer of source object (can't be null)\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_object_merge(mod_json_object_t *dst, mod_json_object_t *src);\n\n/**\n *  \\brief           Create a JSON token\n *  \\param opt       The options of parser\n *  \\return          The pointer of token, Null indicates failure.\n */\nmod_json_token_t *mod_json_token_create(mod_json_option_t *opt);\n\n/**\n *  \\brief           Destroy a JSON token\n *  \\param tok       The pointer of token\n */\nvoid mod_json_token_destroy(mod_json_token_t *tok);\n\n/**\n *  \\brief           Parse a c-string with a JSON token\n *  \\param tok       The pointer of token\n *  \\param cstr      The pointer of c-string\n *  \\return          0 indicates success, -1 indicates failure.\n */\nint mod_json_token_parse(mod_json_token_t *tok, mod_json_cchar_t *cstr);\n\n/**\n *  \\brief           Retrieve error of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The code of error\n */\nmod_json_error_t mod_json_token_error(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve error context of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The pointer of context, null indicates non-errors\n */\nmod_json_cchar_t *mod_json_token_context(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve state of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of state\n */\nmod_json_state_t mod_json_token_state(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve object depth of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of object depth\n */\nmod_json_size_t mod_json_token_object_depth(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve array depth of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of array depth\n */\nmod_json_size_t mod_json_token_array_depth(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve max object depth of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of max object depth\n */\nmod_json_size_t mod_json_token_max_object_depth(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve max array depth of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of max array depth\n */\nmod_json_size_t mod_json_token_max_array_depth(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve depth of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of depth\n */\nmod_json_size_t mod_json_token_depth(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve max depth of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of max depth\n */\nmod_json_size_t mod_json_token_max_depth(mod_json_token_t *tok);\n\n/**\n *  \\brief           Retrieve parameter of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The value of parameter\n */\nmod_json_void_t *mod_json_token_param(mod_json_token_t *tok);\n\n/**\n *  \\brief           Set parameter of a JSON token\n *  \\param tok       The pointer of token\n *  \\param param     The value of parameter\n */\nvoid mod_json_token_set_param(mod_json_token_t *tok, mod_json_void_t *param);\n\n/**\n *  \\brief           Register callback function of a JSON token\n *  \\param tok       The pointer of token\n *  \\param proc      The pointer of callback function\n */\nvoid mod_json_token_set_event(mod_json_token_t *tok, mod_json_event_proc proc);\n\n/**\n *  \\brief           Retrieve event code of a JSON token\n *  \\param tok       The pointer of token\n *  \\return          The code of event\n */\nmod_json_event_t mod_json_token_event(mod_json_token_t *tok);\n\n/**\n *  \\brief           Parse a c-string with a JSON token\n *  \\param tok       The pointer of token\n *  \\param cstr      The pointer of c-string\n *  \\return          The pointer of value, Null indicates failure.\n */\nmod_json_value_t *mod_json_parse(mod_json_token_t *tok, mod_json_cchar_t *cstr);\n\n/**\n *  \\brief           Parse a c-string simply\n *  \\param cstr      The pointer of c-string\n *  \\param opts      The options of parser\n *  \\return          The pointer of value, Null indicates failure.\n */\nmod_json_value_t *mod_json_parse_simply(mod_json_cchar_t *cstr,\n                                        mod_json_size_t opts);\n\n/**\n *  \\brief           Dump a JSON value in string\n *  \\param val       The pointer of value\n *  \\return          Null indicates failure.\n */\nmod_json_string_t *mod_json_dump(mod_json_value_t *val);\n\n#if defined(__cplusplus)\n} /* extern \"C\" */\n#endif\n"
  },
  {
    "path": "src/include/zvec/ailego/encoding/json/mod_json_plus.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cfloat>\n#include <cstring>\n#include <stdexcept>\n#include <string>\n#include \"mod_json.h\"\n\nnamespace zvec {\nnamespace ailego {\n\n/*! JSON String\n */\nclass JsonString {\n public:\n  typedef mod_json_size_t size_type;\n  typedef mod_json_ssize_t ssize_type;\n  typedef mod_json_float_t float_type;\n  typedef mod_json_integer_t integer_type;\n\n  //! Constructor\n  JsonString(void) : str_(0) {}\n\n  //! Constructor\n  JsonString(const JsonString &rhs) : str_(0) {\n    if (rhs.str_) {\n      str_ = mod_json_string_grab(rhs.str_);\n    }\n  }\n\n#if __cplusplus >= 201103L\n  //! Constructor\n  JsonString(JsonString &&rhs) : str_(rhs.str_) {\n    rhs.str_ = 0;\n  }\n#endif\n\n  //! Constructor\n  JsonString(const char *cstr) {\n    str_ = cstr ? mod_json_string_set(cstr, (mod_json_size_t)std::strlen(cstr))\n                : 0;\n  }\n\n  //! Constructor\n  JsonString(const char *cstr, size_type len) {\n    str_ = mod_json_string_set(cstr, len);\n  }\n\n  //! Constructor\n  JsonString(const std::string &str) {\n    str_ = mod_json_string_set(str.c_str(), (mod_json_size_t)str.size());\n  }\n\n  //! Destructor\n  ~JsonString(void) {\n    mod_json_string_unset(str_);\n  }\n\n  //! Assign new contents to the string, replacing its current content\n  JsonString &operator=(const JsonString &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign new contents to the string, replacing its current content\n  JsonString &operator=(JsonString &&rhs) {\n    this->assign(std::move(rhs));\n    return *this;\n  }\n#endif\n\n  //! Assign new contents to the string, replacing its current content\n  JsonString &operator=(const char *cstr) {\n    this->assign(cstr);\n    return *this;\n  }\n\n  //! Assign new contents to the string, replacing its current content\n  JsonString &operator=(const std::string &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n\n  //! Append a JSON string\n  JsonString &operator+=(const JsonString &str) {\n    this->append(str);\n    return *this;\n  }\n\n  //! Append a c-style string\n  JsonString &operator+=(const char *cstr) {\n    this->append(cstr);\n    return *this;\n  }\n\n  //! Append a character to string\n  JsonString &operator+=(char c) {\n    this->append(c);\n    return *this;\n  }\n\n  //! Equality\n  bool operator==(const JsonString &rhs) const {\n    return (mod_json_string_compare(str_, rhs.str_) == 0);\n  }\n\n  //! No equality\n  bool operator!=(const JsonString &rhs) const {\n    return !(*this == rhs);\n  }\n\n  //! Retrieve the character at index n\n  char &operator[](size_type n) {\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonString::operator[]\");\n    }\n    return *(str_->first + n);\n  }\n\n  //! Retrieve the character at index n\n  const char &operator[](size_type n) const {\n    return *(str_->first + n);\n  }\n\n  //! Retrieve non-zero if the string is valid\n  bool is_valid(void) const {\n    return (str_ != (mod_json_string_t *)0);\n  }\n\n  //! Retrieve non-zero if the string is empty\n  bool empty(void) const {\n    return mod_json_string_empty(str_);\n  }\n\n  //! Assign a JSON string\n  void assign(const JsonString &rhs) {\n    mod_json_string_unset(str_);\n    str_ = rhs.str_ ? mod_json_string_grab(rhs.str_) : 0;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign a JSON string\n  void assign(JsonString &&rhs) {\n    mod_json_string_unset(str_);\n    str_ = rhs.str_;\n    rhs.str_ = 0;\n  }\n#endif\n\n  //! Assign a c-style string\n  void assign(const char *cstr) {\n    if (cstr) {\n      if (!copy_on_write() ||\n          mod_json_string_assign(str_, cstr,\n                                 (mod_json_size_t)std::strlen(cstr)) != 0) {\n        throw std::runtime_error(\"JsonString::assign\");\n      }\n    }\n  }\n\n  //! Assign a c-style string\n  void assign(const char *cstr, size_type len) {\n    if (!copy_on_write() || mod_json_string_assign(str_, cstr, len) != 0) {\n      throw std::runtime_error(\"JsonString::assign\");\n    }\n  }\n\n  //! Assign a STL-style string\n  void assign(const std::string &str) {\n    if (!copy_on_write() ||\n        mod_json_string_assign(str_, str.c_str(),\n                               (mod_json_size_t)str.size()) != 0) {\n      throw std::runtime_error(\"JsonString::assign\");\n    }\n  }\n\n  //! Append a JSON string\n  void append(const JsonString &str) {\n    if (str.str_) {\n      if (!copy_on_write() || mod_json_string_add(str_, str.str_) != 0) {\n        throw std::runtime_error(\"JsonString::append\");\n      }\n    }\n  }\n\n  //! Append a c-style string\n  void append(const char *cstr) {\n    if (cstr) {\n      if (!copy_on_write() ||\n          mod_json_string_append(str_, cstr,\n                                 (mod_json_size_t)std::strlen(cstr)) != 0) {\n        throw std::runtime_error(\"JsonString::append\");\n      }\n    }\n  }\n\n  //! Append a c-style string\n  void append(const char *cstr, size_type len) {\n    if (!copy_on_write() || mod_json_string_append(str_, cstr, len) != 0) {\n      throw std::runtime_error(\"JsonString::append\");\n    }\n  }\n\n  //! Append a STL-style string\n  void append(const std::string &str) {\n    if (!copy_on_write() ||\n        mod_json_string_append(str_, str.c_str(),\n                               (mod_json_size_t)str.size()) != 0) {\n      throw std::runtime_error(\"JsonString::append\");\n    }\n  }\n\n  //! Append a character to string\n  void append(char c) {\n    if (!copy_on_write() || mod_json_string_append(str_, &c, 1) != 0) {\n      throw std::runtime_error(\"JsonString::append\");\n    }\n  }\n\n  //! Retrieve the character at index n\n  char &at(size_type n) {\n    if (this->size() <= n) {\n      throw std::out_of_range(\"JsonString::at\");\n    }\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonString::at\");\n    }\n    return *(str_->first + n);\n  }\n\n  //! Retrieve the character at index n\n  const char &at(size_type n) const {\n    if (this->size() <= n) {\n      throw std::out_of_range(\"JsonString::at\");\n    }\n    return *(str_->first + n);\n  }\n\n  //! Request a change in capacity\n  void reserve(size_type n) {\n    if (!copy_on_write() || mod_json_string_reserve(str_, n) != 0) {\n      throw std::runtime_error(\"JsonString::reserve\");\n    }\n  }\n\n  //! Clear the JSON string\n  void clear(void) {\n    mod_json_string_unset(str_);\n    str_ = 0;\n  }\n\n  //! Exchange the content with another JSON string\n  void swap(JsonString &rhs) {\n    mod_json_string_t *str = str_;\n    str_ = rhs.str_;\n    rhs.str_ = str;\n  }\n\n  //! Retrieve the data pointer\n  char *data(void) {\n    return mod_json_string_data(str_);\n  }\n\n  //! Retrieve the data pointer\n  const char *data(void) const {\n    return mod_json_string_data(str_);\n  }\n\n  //! Retrieve HASH of a JSON string\n  size_type hash(void) const {\n    return mod_json_string_hash(str_);\n  }\n\n  //! Compare two JSON strings (case sensitive)\n  int compare(const JsonString &rhs) const {\n    return mod_json_string_compare(str_, rhs.str_);\n  }\n\n  //! Compare two strings (case sensitive)\n  int compare(const char *cstr) const {\n    const char *self = this->c_str();\n    if (self && cstr) {\n      return std::strcmp(self, cstr);\n    }\n\n    // particular case\n    if (!self && cstr) {\n      return -1;\n    } else if (self && !cstr) {\n      return 1;\n    }\n    return 0;\n  }\n\n  // Encode a JSON string\n  JsonString encode(void) const {\n    JsonString ret;\n    ret.str_ = mod_json_string_encode(str_);\n    return ret;\n  }\n\n  // Decode a JSON string\n  JsonString decode(void) const {\n    JsonString ret;\n    ret.str_ = mod_json_string_decode(str_);\n    return ret;\n  }\n\n  //! Retrieve the capacity of string\n  size_type capacity(void) const {\n    return mod_json_string_capacity(str_);\n  }\n\n  //! Retrieve the length of string\n  size_type size(void) const {\n    return mod_json_string_length(str_);\n  }\n\n  //! Retrieve the length of string\n  size_type length(void) const {\n    return mod_json_string_length(str_);\n  }\n\n  //! Retrieve refer-counter of string\n  ssize_type refer(void) const {\n    return mod_json_string_refer(str_);\n  }\n\n  //! Retrieve the c-style string\n  const char *c_str(void) const {\n    return mod_json_string_cstr(str_);\n  }\n\n  //! Convert string to float\n  float_type as_float(void) const {\n    return mod_json_string_float(str_);\n  }\n\n  //! Convert string to integer\n  integer_type as_integer(void) const {\n    return mod_json_string_integer(str_);\n  }\n\n  //! Retrieve string as a STL string\n  std::string as_stl_string(void) const {\n    if (!this->empty()) {\n      return std::string(this->data(), this->size());\n    }\n    return std::string();\n  }\n\n protected:\n  //! Clone the string for writing\n  bool copy_on_write(void) {\n    if (str_) {\n      if (mod_json_string_is_shared(str_)) {\n        mod_json_string_put(str_);\n        str_ = mod_json_string_clone(str_);\n      }\n    } else {\n      str_ = mod_json_string_set(\"\", 0);\n    }\n    return (str_ != 0);\n  }\n\n  //! Clone the value and leak it\n  bool copy_and_leak(void) {\n    if (copy_on_write()) {\n      mod_json_string_set_leaked(str_);\n      return true;\n    }\n    return false;\n  }\n\n private:\n  mod_json_string_t *str_;\n};\n\nclass JsonArray;\nclass JsonObject;\n\n/*! JSON Value\n */\nclass JsonValue {\n public:\n  typedef mod_json_size_t size_type;\n  typedef mod_json_ssize_t ssize_type;\n  typedef mod_json_float_t float_type;\n  typedef mod_json_integer_t integer_type;\n\n  //! Constructor\n  JsonValue(void) : val_(0) {}\n\n  //! Constructor\n  explicit JsonValue(const bool &val) {\n    val_ = mod_json_value_set_boolean((mod_json_boolean_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const signed char &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const char &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const short int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const long int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const long long int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const float &val) {\n    val_ = mod_json_value_set_float((mod_json_float_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const double &val) {\n    val_ = mod_json_value_set_float((mod_json_float_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const long double &val) {\n    val_ = mod_json_value_set_float((mod_json_float_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const unsigned char &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const unsigned short int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const unsigned int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const unsigned long int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  explicit JsonValue(const unsigned long long int &val) {\n    val_ = mod_json_value_set_integer((mod_json_integer_t)val);\n  }\n\n  //! Constructor\n  JsonValue(const JsonString &val) {\n    val_ = mod_json_value_set_string(*(mod_json_string_t **)&val);\n  }\n\n  //! Constructor\n  JsonValue(const char *val) {\n    val_ = mod_json_value_set_buffer(\n        val, val ? (mod_json_size_t)std::strlen(val) : 0);\n  }\n\n  //! Constructor\n  JsonValue(const char *val, size_type len) {\n    val_ = mod_json_value_set_buffer(val, len);\n  }\n\n  //! Constructor\n  JsonValue(const std::string &val) {\n    val_ = mod_json_value_set_buffer(val.data(), (mod_json_size_t)val.size());\n  }\n\n  //! Constructor\n  JsonValue(const JsonArray &val) {\n    val_ = mod_json_value_set_array(*(mod_json_array_t **)&val);\n  }\n\n  //! Constructor\n  JsonValue(const JsonObject &val) {\n    val_ = mod_json_value_set_object(*(mod_json_object_t **)&val);\n  }\n\n  //! Constructor\n  JsonValue(const JsonValue &rhs) : val_(0) {\n    if (rhs.val_) {\n      val_ = mod_json_value_grab(rhs.val_);\n    }\n  }\n\n#if __cplusplus >= 201103L\n  //! Constructor\n  JsonValue(JsonValue &&rhs) : val_(rhs.val_) {\n    rhs.val_ = 0;\n  }\n#endif\n\n  //! Destructor\n  ~JsonValue(void) {\n    mod_json_value_unset(val_);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const JsonValue &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(JsonValue &&rhs) {\n    this->assign(std::move(rhs));\n    return *this;\n  }\n#endif\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const bool &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const signed char &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const char &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const short int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const long int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const long long int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const float &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const double &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const long double &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const unsigned char &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const unsigned short int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const unsigned int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const unsigned long int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const unsigned long long int &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const JsonString &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const char *val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const std::string &val) {\n    this->assign(val);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const JsonArray &arr) {\n    this->assign(arr);\n    return *this;\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  JsonValue &operator=(const JsonObject &obj) {\n    this->assign(obj);\n    return *this;\n  }\n\n  //! Equality\n  bool operator==(const JsonValue &rhs) const {\n    return mod_json_value_is_equal(val_, rhs.val_);\n  }\n\n  //! No equality\n  bool operator!=(const JsonValue &rhs) const {\n    return !(*this == rhs);\n  }\n\n  //! Treat self value as object by force, retrieving value of a key\n  JsonValue &operator[](const char *key) {\n    return this->get_value(key);\n  }\n\n  //! Retrieve a reference of value by a key\n  JsonValue operator[](const char *key) const {\n    return this->get_value(key);\n  }\n\n  //! Treat self value as object by force, retrieving value of a key\n  JsonValue &operator[](const JsonString &key) {\n    return this->get_value(key.c_str());\n  }\n\n  //! Retrieve a reference of value by a key\n  JsonValue operator[](const JsonString &key) const {\n    return this->get_value(key.c_str());\n  }\n\n  //! Treat self value as object by force, retrieving value of a key\n  JsonValue &operator[](const std::string &key) {\n    return this->get_value(key.c_str());\n  }\n\n  //! Retrieve a reference of value by a key\n  JsonValue operator[](const std::string &key) const {\n    return this->get_value(key.c_str());\n  }\n\n  //! Treat self value as array by force, retrieving value at index n\n  JsonValue &operator[](size_type n) {\n    return this->get_value(n);\n  }\n\n  //! Retrieve a reference of value at index n\n  JsonValue operator[](size_type n) const {\n    return this->get_value(n);\n  }\n\n  //! Retrieve non-zero if the value is valid\n  bool is_valid(void) const {\n    return (val_ != (mod_json_value_t *)0);\n  }\n\n  //! Retrieve non-zero if the value is a object\n  bool is_object(void) const {\n    return mod_json_value_is_object(val_);\n  }\n\n  //! Retrieve non-zero if the value is an array\n  bool is_array(void) const {\n    return mod_json_value_is_array(val_);\n  }\n\n  //! Retrieve non-zero if the value is a string\n  bool is_string(void) const {\n    return mod_json_value_is_string(val_);\n  }\n\n  //! Retrieve non-zero if the value is null\n  bool is_null(void) const {\n    return mod_json_value_is_null(val_);\n  }\n\n  //! Retrieve non-zero if the value is a float\n  bool is_float(void) const {\n    return mod_json_value_is_float(val_);\n  }\n\n  //! Retrieve non-zero if the value is an integer\n  bool is_integer(void) const {\n    return mod_json_value_is_integer(val_);\n  }\n\n  //! Retrieve non-zero if the value is a boolean\n  bool is_boolean(void) const {\n    return mod_json_value_is_boolean(val_);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const JsonValue &rhs) {\n    mod_json_value_unset(val_);\n    val_ = rhs.val_ ? mod_json_value_grab(rhs.val_) : 0;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign new contents to the value, replacing its current content\n  void assign(JsonValue &&rhs) {\n    mod_json_value_unset(val_);\n    val_ = rhs.val_;\n    rhs.val_ = 0;\n  }\n#endif\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const bool &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_boolean(val_, (mod_json_boolean_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const signed char &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const char &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const short int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const long int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const long long int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const float &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_float(val_, (mod_json_float_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const double &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_float(val_, (mod_json_float_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const long double &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_float(val_, (mod_json_float_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const unsigned char &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const unsigned short int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const unsigned int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const unsigned long int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const unsigned long long int &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const JsonString &val) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_string(val_, *(mod_json_string_t **)&val);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const char *val) {\n    JsonString str(val);\n    if (!str.is_valid() || !copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_string(val_, *(mod_json_string_t **)&str);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const char *val, size_type len) {\n    JsonString str(val, len);\n    if (!str.is_valid() || !copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_string(val_, *(mod_json_string_t **)&str);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const std::string &val) {\n    JsonString str(val);\n    if (!str.is_valid() || !copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::assign\");\n    }\n    mod_json_value_assign_string(val_, *(mod_json_string_t **)&str);\n  }\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const JsonArray &arr);\n\n  //! Assign new contents to the value, replacing its current content\n  void assign(const JsonObject &obj);\n\n  //! Retrieve refer-counter of JSON value\n  ssize_type refer(void) const {\n    return mod_json_value_refer(val_);\n  }\n\n  //! Retrieve value as JSON format string\n  JsonString as_json_string(void) const {\n    mod_json_string_t *tmp = mod_json_dump(val_);\n    JsonString ret = *reinterpret_cast<JsonString *>(&tmp);\n    if (tmp) {\n      mod_json_string_unset(tmp);\n    }\n    return ret;\n  }\n\n  //! Retrieve value as a STL string\n  std::string as_stl_string(void) const {\n    if (is_string()) {\n      return to_string().as_stl_string();\n    }\n    return std::string();\n  }\n\n  //! Retrieve value as JSON string\n  const JsonString &as_string(void) const {\n    if (!is_string()) {\n      throw std::logic_error(\"JsonValue::as_string\");\n    }\n    return to_string();\n  }\n\n  //! Retrieve value as c-style string\n  const char *as_c_string(void) const {\n    return mod_json_value_cstring(val_);\n  }\n\n  //! Retrieve value as JSON string\n  JsonString &as_string(void) {\n    if (!is_string()) {\n      throw std::logic_error(\"JsonValue::as_string\");\n    }\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonValue::as_string\");\n    }\n    return to_string();\n  }\n\n  //! Retrieve value as JSON array\n  const JsonArray &as_array(void) const {\n    if (!is_array()) {\n      throw std::logic_error(\"JsonValue::as_array\");\n    }\n    return to_array();\n  }\n\n  //! Retrieve value as JSON array\n  JsonArray &as_array(void) {\n    if (!is_array()) {\n      throw std::logic_error(\"JsonValue::as_array\");\n    }\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonValue::as_array\");\n    }\n    return to_array();\n  }\n\n  //! Retrieve value as JSON object\n  const JsonObject &as_object(void) const {\n    if (!is_object()) {\n      throw std::logic_error(\"JsonValue::as_object\");\n    }\n    return to_object();\n  }\n\n  //! Retrieve value as JSON object\n  JsonObject &as_object(void) {\n    if (!is_object()) {\n      throw std::logic_error(\"JsonValue::as_object\");\n    }\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonValue::as_object\");\n    }\n    return to_object();\n  }\n\n  //! Retrieve value as float\n  float_type as_float(void) const {\n    return mod_json_value_float(val_);\n  }\n\n  //! Retrieve value as integer\n  integer_type as_integer(void) const {\n    return mod_json_value_integer(val_);\n  }\n\n  //! Retrieve value as boolean\n  bool as_bool(void) const {\n    return mod_json_value_boolean(val_);\n  }\n\n  //! Exchange the content with another JSON value\n  void swap(JsonValue &rhs) {\n    mod_json_value_t *val = val_;\n    val_ = rhs.val_;\n    rhs.val_ = val;\n  }\n\n  //! Merge another JSON value\n  void merge(const JsonValue &rhs) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonValue::merge\");\n    }\n    mod_json_value_merge(val_, rhs.val_);\n  }\n\n  //! Parse a sting as a JSON value\n  bool parse(const char *str) {\n    mod_json_token_t *tok = mod_json_token_create(NULL);\n\n    if (tok) {\n      mod_json_value_t *jval = mod_json_parse(tok, str);\n\n      mod_json_token_destroy(tok);\n      if (jval) {\n        *this = *reinterpret_cast<JsonValue *>(&jval);\n        mod_json_value_unset(jval);\n        return is_valid();\n      }\n    }\n    return false;\n  }\n\n  //! Parse a sting as a JSON value\n  bool parse(const JsonString &str) {\n    return this->parse(str.c_str());\n  }\n\n  //! Parse a sting as a JSON value\n  bool parse(const std::string &str) {\n    return this->parse(str.c_str());\n  }\n\n protected:\n  //! Clone the value for writing\n  bool copy_on_write(void) {\n    if (val_) {\n      if (mod_json_value_is_shared(val_)) {\n        mod_json_value_put(val_);\n        val_ = mod_json_value_clone(val_);\n      }\n    } else {\n      val_ = mod_json_value_set_null();\n    }\n    return (val_ != 0);\n  }\n\n  //! Clone the value and leak it\n  bool copy_and_leak(void) {\n    if (copy_on_write()) {\n      mod_json_value_set_leaked(val_);\n      return true;\n    }\n    return false;\n  }\n\n  //! Convert value to JSON object\n  JsonObject &to_object(void);\n\n  //! Convert value to JSON object\n  const JsonObject &to_object(void) const;\n\n  //! Convert value to JSON array\n  JsonArray &to_array(void);\n\n  //! Convert value to JSON array\n  const JsonArray &to_array(void) const;\n\n  //! Convert value to JSON string\n  JsonString &to_string(void);\n\n  //! Convert value to JSON string\n  const JsonString &to_string(void) const;\n\n  //! Treat self value as object by force, retrieving value of a key\n  JsonValue &get_value(const char *key);\n\n  //! Retrieve a reference of value by a key\n  JsonValue get_value(const char *key) const;\n\n  //! Treat self value as array by force, retrieving value at index n\n  JsonValue &get_value(size_type n);\n\n  //! Retrieve a reference of value at index n\n  JsonValue get_value(size_type n) const;\n\n  //! Set the new array to the value, replacing its current content\n  void set_value(const JsonArray &val);\n\n  //! Set the new object to the value, replacing its current content\n  void set_value(const JsonObject &val);\n\n private:\n  mod_json_value_t *val_;\n};\n\n/*! JSON Array\n */\nclass JsonArray {\n public:\n  typedef mod_json_size_t size_type;\n  typedef mod_json_ssize_t ssize_type;\n\n  class iterator;\n  class const_iterator;\n  class reverse_iterator;\n  class const_reverse_iterator;\n\n  /*! Const iterator of JSON Array\n   */\n  class const_iterator {\n   public:\n    //! Constructor\n    const_iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const const_iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const const_iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    const_iterator &operator++() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    const_iterator operator++(int) {\n      const_iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    const_iterator &operator--() {\n      --iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    const_iterator operator--(int) {\n      const_iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    const JsonValue &operator*() const {\n      return *reinterpret_cast<const JsonValue *>(iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    const JsonValue *operator->() const {\n      return reinterpret_cast<const JsonValue *>(iter_);\n    }\n\n    //! Retrieve as const reverse iterator\n    operator const_reverse_iterator() const {\n      return const_reverse_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonArray;\n    friend class JsonArray::iterator;\n    friend class JsonArray::reverse_iterator;\n    friend class JsonArray::const_reverse_iterator;\n\n    //! Constructor for friends\n    const_iterator(mod_json_value_t *const *iter) : iter_(iter) {}\n\n   private:\n    mod_json_value_t *const *iter_;\n  };\n\n  /*! iterator of JSON Array\n   */\n  class iterator {\n   public:\n    //! Constructor\n    iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    iterator &operator++() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    iterator operator++(int) {\n      iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    iterator &operator--() {\n      --iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    iterator operator--(int) {\n      iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    JsonValue &operator*() const {\n      return *reinterpret_cast<JsonValue *>(iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    JsonValue *operator->() const {\n      return reinterpret_cast<JsonValue *>(iter_);\n    }\n\n    //! Retrieve as const iterator\n    operator const_iterator() const {\n      return const_iterator(iter_);\n    }\n\n    //! Retrieve as reverse iterator\n    operator reverse_iterator() const {\n      return reverse_iterator(iter_);\n    }\n\n    //! Retrieve as const reverse iterator\n    operator const_reverse_iterator() const {\n      return const_reverse_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonArray;\n    friend class JsonArray::reverse_iterator;\n\n    //! Constructor for friends\n    iterator(mod_json_value_t **iter) : iter_(iter) {}\n\n   private:\n    mod_json_value_t **iter_;\n  };\n\n  /*! Const Reverse iterator of JSON Array\n   */\n  class const_reverse_iterator {\n   public:\n    //! Constructor\n    const_reverse_iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const const_reverse_iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const const_reverse_iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    const_reverse_iterator &operator++() {\n      --iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    const_reverse_iterator operator++(int) {\n      const_reverse_iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    const_reverse_iterator &operator--() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    const_reverse_iterator operator--(int) {\n      const_reverse_iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    const JsonValue &operator*() const {\n      return *reinterpret_cast<const JsonValue *>(iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    const JsonValue *operator->() const {\n      return reinterpret_cast<const JsonValue *>(iter_);\n    }\n\n    //! Retrieve as const iterator\n    operator const_iterator() const {\n      return const_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonArray;\n    friend class JsonArray::iterator;\n    friend class JsonArray::const_iterator;\n    friend class JsonArray::reverse_iterator;\n\n    //! Constructor for friends\n    const_reverse_iterator(mod_json_value_t *const *iter) : iter_(iter) {}\n\n   private:\n    mod_json_value_t *const *iter_;\n  };\n\n  /*! Reverse iterator of JSON Array\n   */\n  class reverse_iterator {\n   public:\n    //! Constructor\n    reverse_iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const reverse_iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const reverse_iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    reverse_iterator &operator++() {\n      --iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    reverse_iterator operator++(int) {\n      reverse_iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    reverse_iterator &operator--() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    reverse_iterator operator--(int) {\n      reverse_iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    JsonValue &operator*() const {\n      return *reinterpret_cast<JsonValue *>(iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    JsonValue *operator->() const {\n      return reinterpret_cast<JsonValue *>(iter_);\n    }\n\n    //! Retrieve as iterator\n    operator iterator() const {\n      return iterator(iter_);\n    }\n\n    //! Retrieve as const iterator\n    operator const_iterator() const {\n      return const_iterator(iter_);\n    }\n\n    //! Retrieve as const reverse iterator\n    operator const_reverse_iterator() const {\n      return const_reverse_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonArray;\n    friend class JsonArray::iterator;\n\n    //! Constructor for friends\n    reverse_iterator(mod_json_value_t **iter) : iter_(iter) {}\n\n   private:\n    mod_json_value_t **iter_;\n  };\n\n  //! Constructor\n  JsonArray(void) : arr_(0) {}\n\n  //! Constructor\n  JsonArray(const JsonArray &rhs) : arr_(0) {\n    if (rhs.arr_) {\n      arr_ = mod_json_array_grab(rhs.arr_);\n    }\n  }\n\n#if __cplusplus >= 201103L\n  //! Constructor\n  JsonArray(JsonArray &&rhs) : arr_(rhs.arr_) {\n    rhs.arr_ = 0;\n  }\n#endif\n\n  //! Destructor\n  ~JsonArray(void) {\n    mod_json_array_unset(arr_);\n  }\n\n  //! Assign new contents to the array, replacing its current content\n  JsonArray &operator=(const JsonArray &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign new contents to the array, replacing its current content\n  JsonArray &operator=(JsonArray &&rhs) {\n    this->assign(std::move(rhs));\n    return *this;\n  }\n#endif\n\n  //! Equality\n  bool operator==(const JsonArray &rhs) const {\n    return mod_json_array_is_equal(arr_, rhs.arr_);\n  }\n\n  //! No equality\n  bool operator!=(const JsonArray &rhs) const {\n    return !(*this == rhs);\n  }\n\n  //! Retrieve the value at index n, if no one exists, throw an exception.\n  JsonValue &operator[](size_type n) {\n    return this->at(n);\n  }\n\n  //! Retrieve the value at index n, if no one exists, return a null value.\n  JsonValue operator[](size_type n) const {\n    return ((n < this->size()) ? this->get_value(n) : JsonValue());\n  }\n\n  //! Retrieve non-zero if the array is valid\n  bool is_valid(void) const {\n    return (arr_ != (mod_json_array_t *)0);\n  }\n\n  //! Retrieve non-zero if the array is empty\n  bool empty(void) const {\n    return mod_json_array_empty(arr_);\n  }\n\n  //! Retrieve the size of JSON array\n  size_type size(void) const {\n    return mod_json_array_count(arr_);\n  }\n\n  //! Retrieve the capacity of JSON array\n  size_type capacity(void) const {\n    return mod_json_array_capacity(arr_);\n  }\n\n  //! Retrieve refer-counter of JSON array\n  ssize_type refer(void) const {\n    return mod_json_array_refer(arr_);\n  }\n\n  //! Assign new contents to the array, replacing its current content\n  void assign(const JsonArray &rhs) {\n    mod_json_array_unset(arr_);\n    arr_ = rhs.arr_ ? mod_json_array_grab(rhs.arr_) : 0;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign new contents to the array, replacing its current content\n  void assign(JsonArray &&rhs) {\n    mod_json_array_unset(arr_);\n    arr_ = rhs.arr_;\n    rhs.arr_ = 0;\n  }\n#endif\n\n  //! Request a change in capacity\n  void reserve(size_type n) {\n    if (!copy_on_write() || mod_json_array_reserve(arr_, n) != 0) {\n      throw std::runtime_error(\"JsonArray::reserve\");\n    }\n  }\n\n  //! Reverse the order of the elements\n  void reverse(void) {\n    if (arr_ && copy_on_write()) {\n      mod_json_array_reverse(arr_);\n    }\n  }\n\n  //! Push a value to array\n  void push(const JsonValue &val) {\n    JsonValue tmp(val);\n\n    if (!copy_on_write() ||\n        mod_json_array_push(arr_, *((mod_json_value_t **)&tmp)) != 0) {\n      throw std::runtime_error(\"JsonArray::push\");\n    }\n  }\n\n  //! Pop the last element from array\n  void pop(void) {\n    if (arr_) {\n      if (!copy_on_write()) {\n        throw std::runtime_error(\"JsonArray::pop\");\n      }\n      mod_json_array_pop(arr_);\n    }\n  }\n\n  //! Remove the first element of array\n  void shift(void) {\n    if (arr_) {\n      if (!copy_on_write()) {\n        throw std::runtime_error(\"JsonArray::shift\");\n      }\n      mod_json_array_shift(arr_);\n    }\n  }\n\n  //! Retrieve the value at index n\n  JsonValue &at(size_type n) {\n    if (this->size() <= n) {\n      throw std::out_of_range(\"JsonArray::at\");\n    }\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonArray::at\");\n    }\n    return this->get_value(n);\n  }\n\n  //! Retrieve the value at index n\n  const JsonValue &at(size_type n) const {\n    if (this->size() <= n) {\n      throw std::out_of_range(\"JsonArray::at\");\n    }\n    return this->get_value(n);\n  }\n\n  //! Retrieve a reference to the first element\n  JsonValue &front(void) {\n    if (this->size() <= 0) {\n      throw std::out_of_range(\"JsonArray::front\");\n    }\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonArray::front\");\n    }\n    return this->get_value(0);\n  }\n\n  //! Retrieve a reference to the first element\n  const JsonValue &front(void) const {\n    if (this->size() <= 0) {\n      throw std::out_of_range(\"JsonArray::front\");\n    }\n    return this->get_value(0);\n  }\n\n  //! Retrieve a reference to the last element\n  JsonValue &back(void) {\n    if (this->size() <= 0) {\n      throw std::out_of_range(\"JsonArray::back\");\n    }\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonArray::back\");\n    }\n    return this->get_value(this->size() - 1);\n  }\n\n  //! Retrieve a reference to the last element\n  const JsonValue &back(void) const {\n    if (this->size() <= 0) {\n      throw std::out_of_range(\"JsonArray::back\");\n    }\n    return this->get_value(this->size() - 1);\n  }\n\n  //! Clear the JSON array\n  void clear(void) {\n    mod_json_array_unset(arr_);\n    arr_ = 0;\n  }\n\n  //! Exchange the content with another JSON array\n  void swap(JsonArray &rhs) {\n    mod_json_array_t *arr = arr_;\n    arr_ = rhs.arr_;\n    rhs.arr_ = arr;\n  }\n\n  //! Merge another JSON array\n  void merge(const JsonArray &rhs) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonArray::merge\");\n    }\n    mod_json_array_merge(arr_, rhs.arr_);\n  }\n\n  //! Resize a JSON array so that it contains n elements\n  void resize(size_type n, const JsonValue &val = JsonValue()) {\n    if (!copy_on_write() ||\n        mod_json_array_resize(arr_, n, *((mod_json_value_t **)&val)) != 0) {\n      throw std::runtime_error(\"JsonArray::resize\");\n    }\n  }\n\n  //! Retrieve an iterator pointing to the first element\n  iterator begin(void) {\n    if (copy_and_leak()) {\n      return iterator(mod_json_array_begin(arr_));\n    }\n    return iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the first element\n  const_iterator begin(void) const {\n    if (arr_) {\n      return const_iterator(mod_json_array_begin(arr_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the first element\n  const_iterator cbegin(void) const {\n    if (arr_) {\n      return const_iterator(mod_json_array_begin(arr_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a reverse iterator pointing to the last element\n  reverse_iterator rbegin(void) {\n    if (copy_and_leak()) {\n      return reverse_iterator(mod_json_array_rbegin(arr_));\n    }\n    return reverse_iterator();\n  }\n\n  //! Retrieve a const reverse iterator pointing to the last element\n  const_reverse_iterator rbegin(void) const {\n    if (arr_) {\n      return const_reverse_iterator(mod_json_array_rbegin(arr_));\n    }\n    return const_reverse_iterator();\n  }\n\n  //! Retrieve a const reverse iterator pointing to the last element\n  const_reverse_iterator crbegin(void) const {\n    if (arr_) {\n      return const_reverse_iterator(mod_json_array_rbegin(arr_));\n    }\n    return const_reverse_iterator();\n  }\n\n  //! Retrieve an iterator pointing to the past-the-end element\n  iterator end(void) {\n    if (copy_and_leak()) {\n      return iterator(mod_json_array_end(arr_));\n    }\n    return iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the past-the-end element\n  const_iterator end(void) const {\n    if (arr_) {\n      return const_iterator(mod_json_array_end(arr_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the past-the-end element\n  const_iterator cend(void) const {\n    if (arr_) {\n      return const_iterator(mod_json_array_end(arr_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a reverse pointing to the past-the-end element\n  reverse_iterator rend(void) {\n    if (copy_and_leak()) {\n      return reverse_iterator(mod_json_array_rend(arr_));\n    }\n    return reverse_iterator();\n  }\n\n  //! Retrieve a const reverse pointing to the past-the-end element\n  const_reverse_iterator rend(void) const {\n    if (arr_) {\n      return const_reverse_iterator(mod_json_array_rend(arr_));\n    }\n    return const_reverse_iterator();\n  }\n\n  //! Retrieve a const reverse pointing to the past-the-end element\n  const_reverse_iterator crend(void) const {\n    if (arr_) {\n      return const_reverse_iterator(mod_json_array_rend(arr_));\n    }\n    return const_reverse_iterator();\n  }\n\n protected:\n  //! Clone the array for writing\n  bool copy_on_write(void) {\n    if (arr_) {\n      if (mod_json_array_is_shared(arr_)) {\n        mod_json_array_put(arr_);\n        arr_ = mod_json_array_clone(arr_);\n      }\n    } else {\n      arr_ = mod_json_array_set_default();\n    }\n    return (arr_ != 0);\n  }\n\n  //! Clone the array and leak it\n  bool copy_and_leak(void) {\n    if (copy_on_write()) {\n      mod_json_array_set_leaked(arr_);\n      return true;\n    }\n    return false;\n  }\n\n  //! Retrieve the value at index n\n  JsonValue &get_value(size_type n) {\n    return *reinterpret_cast<JsonValue *>(arr_->first + n);\n  }\n\n  //! Retrieve the value at index n\n  const JsonValue &get_value(size_type n) const {\n    return *reinterpret_cast<JsonValue *>(arr_->first + n);\n  }\n\n private:\n  mod_json_array_t *arr_;\n};\n\n/*! JSON Pair\n */\nclass JsonPair {\n public:\n  //! Constructor\n  JsonPair(void) : pair_(0) {}\n\n  //! Retrieve non-zero if the pair is valid\n  bool is_valid(void) const {\n    return (pair_ != (mod_json_pair_t *)0);\n  }\n\n  //! Retrieve the key of pair\n  const JsonString &key(void) const {\n    return *reinterpret_cast<JsonString *>(&pair_->key);\n  }\n\n  //! Retrieve the value of pair\n  JsonValue &value(void) {\n    return *reinterpret_cast<JsonValue *>(&pair_->val);\n  }\n\n  //! Retrieve the value of pair\n  const JsonValue &value(void) const {\n    return *reinterpret_cast<JsonValue *>(&pair_->val);\n  }\n\n protected:\n  friend class JsonObject;\n\n  //! Constructor for friends\n  JsonPair(mod_json_pair_t *pair) : pair_(pair) {}\n\n  //! Constructor for friends\n  JsonPair(const JsonPair &rhs) : pair_(rhs.pair_) {}\n\n private:\n  mod_json_pair_t *pair_;\n};\n\n/*! JSON Object\n */\nclass JsonObject {\n public:\n  typedef mod_json_size_t size_type;\n  typedef mod_json_ssize_t ssize_type;\n\n  class iterator;\n  class const_iterator;\n  class reverse_iterator;\n  class const_reverse_iterator;\n\n  /*! Const iterator of JSON Object\n   */\n  class const_iterator {\n   public:\n    //! Constructor\n    const_iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const const_iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const const_iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    const_iterator &operator++() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    const_iterator operator++(int) {\n      const_iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    const_iterator &operator--() {\n      --iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    const_iterator operator--(int) {\n      const_iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    const JsonPair &operator*() const {\n      return *reinterpret_cast<const JsonPair *>(&iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    const JsonPair *operator->() const {\n      return reinterpret_cast<const JsonPair *>(&iter_);\n    }\n\n    //! Retrieve as const reverse iterator\n    operator const_reverse_iterator() const {\n      return const_reverse_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonObject;\n    friend class JsonObject::iterator;\n    friend class JsonObject::reverse_iterator;\n    friend class JsonObject::const_reverse_iterator;\n\n    //! Constructor for friends\n    const_iterator(const mod_json_pair_t *iter) : iter_(iter) {}\n\n   private:\n    const mod_json_pair_t *iter_;\n  };\n\n  /*! iterator of JSON Object\n   */\n  class iterator {\n   public:\n    //! Constructor\n    iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    iterator &operator++() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    iterator operator++(int) {\n      iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    iterator &operator--() {\n      --iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    iterator operator--(int) {\n      iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    JsonPair &operator*() const {\n      return *reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    JsonPair *operator->() const {\n      return reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);\n    }\n\n    //! Retrieve as const iterator\n    operator const_iterator() const {\n      return const_iterator(iter_);\n    }\n\n    //! Retrieve as reverse iterator\n    operator reverse_iterator() const {\n      return reverse_iterator(iter_);\n    }\n\n    //! Retrieve as const reverse iterator\n    operator const_reverse_iterator() const {\n      return const_reverse_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonObject;\n    friend class JsonObject::reverse_iterator;\n\n    //! Constructor for friends\n    iterator(mod_json_pair_t *iter) : iter_(iter) {}\n\n   private:\n    mod_json_pair_t *iter_;\n  };\n\n  /*! Const Reverse iterator of JSON Object\n   */\n  class const_reverse_iterator {\n   public:\n    //! Constructor\n    const_reverse_iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const const_reverse_iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const const_reverse_iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    const_reverse_iterator &operator++() {\n      --iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    const_reverse_iterator operator++(int) {\n      const_reverse_iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    const_reverse_iterator &operator--() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    const_reverse_iterator operator--(int) {\n      const_reverse_iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    const JsonPair &operator*() const {\n      return *reinterpret_cast<const JsonPair *>(&iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    const JsonPair *operator->() const {\n      return reinterpret_cast<const JsonPair *>(&iter_);\n    }\n\n    //! Retrieve as const iterator\n    operator const_iterator() const {\n      return const_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonObject;\n    friend class JsonObject::iterator;\n    friend class JsonObject::const_iterator;\n    friend class JsonObject::reverse_iterator;\n\n    //! Constructor for friends\n    const_reverse_iterator(const mod_json_pair_t *iter) : iter_(iter) {}\n\n   private:\n    const mod_json_pair_t *iter_;\n  };\n\n  /*! iterator of JSON Object\n   */\n  class reverse_iterator {\n   public:\n    //! Constructor\n    reverse_iterator(void) : iter_(0) {}\n\n    //! Equality\n    bool operator==(const reverse_iterator &rhs) const {\n      return (iter_ == rhs.iter_);\n    }\n\n    //! No equality\n    bool operator!=(const reverse_iterator &rhs) const {\n      return (iter_ != rhs.iter_);\n    }\n\n    //! Increment (Prefix)\n    reverse_iterator &operator++() {\n      --iter_;\n      return *this;\n    }\n\n    //! Increment (Suffix)\n    reverse_iterator operator++(int) {\n      reverse_iterator tmp = *this;\n      --iter_;\n      return tmp;\n    }\n\n    //! Decrement (Prefix)\n    reverse_iterator &operator--() {\n      ++iter_;\n      return *this;\n    }\n\n    //! Decrement (Suffix)\n    reverse_iterator operator--(int) {\n      reverse_iterator tmp = *this;\n      ++iter_;\n      return tmp;\n    }\n\n    //! Indirection (eg. *iter)\n    JsonPair &operator*() const {\n      return *reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);\n    }\n\n    //! Structure dereference (eg. iter->)\n    JsonPair *operator->() const {\n      return reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);\n    }\n\n    //! Retrieve as iterator\n    operator iterator() const {\n      return iterator(iter_);\n    }\n\n    //! Retrieve as const iterator\n    operator const_iterator() const {\n      return const_iterator(iter_);\n    }\n\n    //! Retrieve as const reverse iterator\n    operator const_reverse_iterator() const {\n      return const_reverse_iterator(iter_);\n    }\n\n   protected:\n    friend class JsonObject;\n    friend class JsonArray::iterator;\n\n    //! Constructor for friends\n    reverse_iterator(mod_json_pair_t *iter) : iter_(iter) {}\n\n   private:\n    mod_json_pair_t *iter_;\n  };\n\n  //! Constructor\n  JsonObject(void) : obj_(0) {}\n\n  //! Constructor\n  JsonObject(const JsonObject &rhs) : obj_(0) {\n    if (rhs.obj_) {\n      obj_ = mod_json_object_grab(rhs.obj_);\n    }\n  }\n\n#if __cplusplus >= 201103L\n  //! Constructor\n  JsonObject(JsonObject &&rhs) : obj_(rhs.obj_) {\n    rhs.obj_ = 0;\n  }\n#endif\n\n  //! Destructor\n  ~JsonObject(void) {\n    mod_json_object_unset(obj_);\n  }\n\n  //! Assign new contents to the object, replacing its current content\n  JsonObject &operator=(const JsonObject &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign new contents to the object, replacing its current content\n  JsonObject &operator=(JsonObject &&rhs) {\n    this->assign(std::move(rhs));\n    return *this;\n  }\n#endif\n\n  //! Equality\n  bool operator==(const JsonObject &rhs) const {\n    return mod_json_object_is_equal(obj_, rhs.obj_);\n  }\n\n  //! No equality\n  bool operator!=(const JsonObject &rhs) const {\n    return !(*this == rhs);\n  }\n\n  //! Retrieve the value of a key, if no one exists, create a new one.\n  JsonValue &operator[](const char *key) {\n    if (!key) {\n      throw std::invalid_argument(\"JsonObject::operator[]\");\n    }\n\n    if (!copy_and_leak()) {\n      throw std::runtime_error(\"JsonObject::operator[]\");\n    }\n\n    JsonPair pair(mod_json_object_touch(obj_, key));\n    if (!pair.is_valid()) {\n      throw std::runtime_error(\"JsonObject::operator[]\");\n    }\n    return pair.value();\n  }\n\n  //! Retrieve the value of a key, if no one exists, return a null value.\n  JsonValue operator[](const char *key) const {\n    if (!key) {\n      throw std::invalid_argument(\"JsonObject::operator[]\");\n    }\n\n    JsonPair pair(mod_json_object_find(obj_, key));\n    return (pair.is_valid() ? pair.value() : JsonValue());\n  }\n\n  //! Retrieve the value of a key, if no one exists, create a new one.\n  JsonValue &operator[](const JsonString &key) {\n    return (*this)[key.c_str()];\n  }\n\n  //! Retrieve the value of a key, if no one exists, return a null value.\n  JsonValue operator[](const JsonString &key) const {\n    return (*this)[key.c_str()];\n  }\n\n  //! Retrieve non-zero if the object is valid\n  bool is_valid(void) const {\n    return (obj_ != (mod_json_object_t *)0);\n  }\n\n  //! Retrieve non-zero if the object is empty\n  bool empty(void) const {\n    return mod_json_object_empty(obj_);\n  }\n\n  //! Retrieve the size of JSON object\n  size_type size(void) const {\n    return mod_json_object_count(obj_);\n  }\n\n  //! Retrieve refer-counter of JSON object\n  ssize_type refer(void) const {\n    return mod_json_object_refer(obj_);\n  }\n\n  //! Assign new contents to the object, replacing its current content\n  void assign(const JsonObject &rhs) {\n    mod_json_object_unset(obj_);\n    obj_ = rhs.obj_ ? mod_json_object_grab(rhs.obj_) : 0;\n  }\n\n#if __cplusplus >= 201103L\n  //! Assign new contents to the object, replacing its current content\n  void assign(JsonObject &&rhs) {\n    mod_json_object_unset(obj_);\n    obj_ = rhs.obj_;\n    rhs.obj_ = 0;\n  }\n#endif\n\n  //! Clear the JSON object\n  void clear(void) {\n    mod_json_object_unset(obj_);\n    obj_ = 0;\n  }\n\n  //! Set the value of a key\n  bool set(const JsonString &key, const JsonValue &val) {\n    JsonValue tmp(val);\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonObject::set\");\n    }\n    return (mod_json_object_insert(obj_, *(mod_json_string_t **)&key,\n                                   *(mod_json_value_t **)&tmp) !=\n            (mod_json_pair_t *)0);\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, JsonValue *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = pair.value();\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, JsonString *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid() || !pair.value().is_string()) {\n      return false;\n    }\n    *val = pair.value().as_string();\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, std::string *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid() || !pair.value().is_string()) {\n      return false;\n    }\n    *val = pair.value().as_stl_string();\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, JsonArray *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid() || !pair.value().is_array()) {\n      return false;\n    }\n    *val = pair.value().as_array();\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, JsonObject *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid() || !pair.value().is_object()) {\n      return false;\n    }\n    *val = pair.value().as_object();\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, bool *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = pair.value().as_bool();\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, signed char *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<signed char>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, char *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<char>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, short int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<short int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, long int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<long int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, long long int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<long long int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, unsigned char *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<unsigned char>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, unsigned short int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<unsigned short int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, unsigned int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<unsigned int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, unsigned long int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<unsigned long int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, unsigned long long int *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<unsigned long long int>(pair.value().as_integer());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, float *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<float>(pair.value().as_float());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, double *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<double>(pair.value().as_float());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  bool get(const char *key, long double *val) const {\n    const JsonPair pair(mod_json_object_find(obj_, key));\n    if (!pair.is_valid()) {\n      return false;\n    }\n    *val = static_cast<long double>(pair.value().as_float());\n    return true;\n  }\n\n  //! Retrieve the value of a key\n  template <typename T>\n  bool get(const JsonString &key, T *val) const {\n    return this->get(key.c_str(), val);\n  }\n\n  //! Retrieve the value of a key\n  template <typename T>\n  bool get(const std::string &key, T *val) const {\n    return this->get(key.c_str(), val);\n  }\n\n  //! Delete a key-value pair from JSON object\n  void unset(const char *key) {\n    if (obj_ && key) {\n      if (!copy_on_write()) {\n        throw std::runtime_error(\"JsonObject::unset\");\n      }\n      mod_json_object_erase(obj_, key);\n    }\n  }\n\n  //! Retrieve non-zero if the key exists in JSON object\n  bool has(const char *key) const {\n    return (mod_json_object_find(obj_, key) != (mod_json_pair_t *)0);\n  }\n\n  //! Exchange the content with another JSON object\n  void swap(JsonObject &rhs) {\n    mod_json_object_t *obj = obj_;\n    obj_ = rhs.obj_;\n    rhs.obj_ = obj;\n  }\n\n  //! Merge another JSON object\n  void merge(const JsonObject &rhs) {\n    if (!copy_on_write()) {\n      throw std::runtime_error(\"JsonObject::merge\");\n    }\n    mod_json_object_merge(obj_, rhs.obj_);\n  }\n\n  //! Retrieve an iterator pointing to the first element\n  iterator begin(void) {\n    if (copy_and_leak()) {\n      return iterator(mod_json_object_begin(obj_));\n    }\n    return iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the first element\n  const_iterator begin(void) const {\n    if (obj_) {\n      return const_iterator(mod_json_object_begin(obj_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the first element\n  const_iterator cbegin(void) const {\n    if (obj_) {\n      return const_iterator(mod_json_object_begin(obj_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a reverse iterator pointing to the last element\n  reverse_iterator rbegin(void) {\n    if (copy_and_leak()) {\n      return reverse_iterator(mod_json_object_rbegin(obj_));\n    }\n    return reverse_iterator();\n  }\n\n  //! Retrieve a const reverse iterator pointing to the last element\n  const_reverse_iterator rbegin(void) const {\n    if (obj_) {\n      return const_reverse_iterator(mod_json_object_rbegin(obj_));\n    }\n    return const_reverse_iterator();\n  }\n\n  //! Retrieve a const reverse iterator pointing to the last element\n  const_reverse_iterator crbegin(void) const {\n    if (obj_) {\n      return const_reverse_iterator(mod_json_object_rbegin(obj_));\n    }\n    return const_reverse_iterator();\n  }\n\n  //! Retrieve an iterator pointing to the past-the-end element\n  iterator end(void) {\n    if (copy_and_leak()) {\n      return iterator(mod_json_object_end(obj_));\n    }\n    return iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the past-the-end element\n  const_iterator end(void) const {\n    if (obj_) {\n      return const_iterator(mod_json_object_end(obj_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a const iterator pointing to the past-the-end element\n  const_iterator cend(void) const {\n    if (obj_) {\n      return const_iterator(mod_json_object_end(obj_));\n    }\n    return const_iterator();\n  }\n\n  //! Retrieve a reverse pointing to the past-the-end element\n  reverse_iterator rend(void) {\n    if (copy_and_leak()) {\n      return reverse_iterator(mod_json_object_rend(obj_));\n    }\n    return reverse_iterator();\n  }\n\n  //! Retrieve a const reverse pointing to the past-the-end element\n  const_reverse_iterator rend(void) const {\n    if (obj_) {\n      return const_reverse_iterator(mod_json_object_rend(obj_));\n    }\n    return const_reverse_iterator();\n  }\n\n  //! Retrieve a const reverse pointing to the past-the-end element\n  const_reverse_iterator crend(void) const {\n    if (obj_) {\n      return const_reverse_iterator(mod_json_object_rend(obj_));\n    }\n    return const_reverse_iterator();\n  }\n\n protected:\n  //! Clone the object for writing\n  bool copy_on_write(void) {\n    if (obj_) {\n      if (mod_json_object_is_shared(obj_)) {\n        mod_json_object_put(obj_);\n        obj_ = mod_json_object_clone(obj_);\n      }\n    } else {\n      obj_ = mod_json_object_set_default();\n    }\n    return (obj_ != 0);\n  }\n\n  //! Clone the object and leak it\n  bool copy_and_leak(void) {\n    if (copy_on_write()) {\n      mod_json_object_set_leaked(obj_);\n      return true;\n    }\n    return false;\n  }\n\n private:\n  mod_json_object_t *obj_;\n};\n\n//! Assign new contents to the value, replacing its current content\ninline void JsonValue::assign(const JsonArray &arr) {\n  this->set_value(arr);\n}\n\n//! Assign new contents to the value, replacing its current content\ninline void JsonValue::assign(const JsonObject &obj) {\n  this->set_value(obj);\n}\n\n//! Convert value to JSON object\ninline JsonObject &JsonValue::to_object(void) {\n  return *reinterpret_cast<JsonObject *>(&val_->data.c_obj);\n}\n\n//! Convert value to JSON object\ninline const JsonObject &JsonValue::to_object(void) const {\n  return *reinterpret_cast<JsonObject *>(&val_->data.c_obj);\n}\n\n//! Convert value to JSON array\ninline JsonArray &JsonValue::to_array(void) {\n  return *reinterpret_cast<JsonArray *>(&val_->data.c_arr);\n}\n\n//! Convert value to JSON array\ninline const JsonArray &JsonValue::to_array(void) const {\n  return *reinterpret_cast<JsonArray *>(&val_->data.c_arr);\n}\n\n//! Convert value to JSON string\ninline JsonString &JsonValue::to_string(void) {\n  return *reinterpret_cast<JsonString *>(&val_->data.c_str);\n}\n\n//! Convert value to JSON string\ninline const JsonString &JsonValue::to_string(void) const {\n  return *reinterpret_cast<JsonString *>(&val_->data.c_str);\n}\n\n//! Treat self value as object by force, retrieving value of a key\ninline JsonValue &JsonValue::get_value(const char *key) {\n  if (!is_object()) {\n    *this = JsonObject();\n  }\n  if (!copy_and_leak()) {\n    throw std::runtime_error(\"JsonValue::get_value\");\n  }\n  return (to_object())[key];\n}\n\n//! Retrieve a reference of value by a key\ninline JsonValue JsonValue::get_value(const char *key) const {\n  return (is_object() ? (to_object())[key] : JsonValue());\n}\n\n//! Treat self value as array by force, retrieving value at index n\ninline JsonValue &JsonValue::get_value(size_type n) {\n  if (!is_array()) {\n    throw std::logic_error(\"JsonValue::get_value\");\n  }\n  if (!copy_and_leak()) {\n    throw std::runtime_error(\"JsonValue::get_value\");\n  }\n  return (to_array())[n];\n}\n\n//! Retrieve a reference of value at index n\ninline JsonValue JsonValue::get_value(size_type n) const {\n  return (is_array() ? (to_array())[n] : JsonValue());\n}\n\n//! Set the new array to the value, replacing its current content\ninline void JsonValue::set_value(const JsonArray &val) {\n  if (!copy_on_write()) {\n    throw std::runtime_error(\"JsonValue::set_value\");\n  }\n  mod_json_value_assign_array(val_, *(mod_json_array_t **)&val);\n}\n\n//! Set the new object to the value, replacing its current content\ninline void JsonValue::set_value(const JsonObject &val) {\n  if (!copy_on_write()) {\n    throw std::runtime_error(\"JsonValue::set_value\");\n  }\n  mod_json_value_assign_object(val_, *(mod_json_object_t **)&val);\n}\n\n/*! JSON Parser\n */\nclass JsonParser {\n public:\n  typedef mod_json_size_t size_type;\n\n  //! Constructor\n  JsonParser(void)\n      : state_(mod_json_state_null), error_(mod_json_error_null), context_(0) {\n    option_.options = 0;\n    option_.object_depth = 0;\n    option_.array_depth = 0;\n  }\n\n  //! Destructor\n  ~JsonParser(void) {}\n\n  //! Set the max object depth\n  void set_object_depth(size_type depth) {\n    option_.object_depth = depth;\n  }\n\n  //! Set the max array depth\n  void set_array_depth(size_type depth) {\n    option_.array_depth = depth;\n  }\n\n  //! Enable/Disable comments\n  void set_comment(bool enable = true) {\n    if (enable) {\n      option_.options |= MOD_JSON_COMMENT;\n    } else {\n      option_.options &= ~MOD_JSON_COMMENT;\n    }\n  }\n\n  //! Enable/Disable loose strings\n  void set_unstrict(bool enable = true) {\n    if (enable) {\n      option_.options |= MOD_JSON_UNSTRICT;\n    } else {\n      option_.options &= ~MOD_JSON_UNSTRICT;\n    }\n  }\n\n  //! Enable/Disable simple format\n  void set_simple(bool enable = true) {\n    if (enable) {\n      option_.options |= MOD_JSON_SIMPLE;\n    } else {\n      option_.options &= ~MOD_JSON_SIMPLE;\n    }\n  }\n\n  //! Enable/Disable single quotes support\n  void set_squote(bool enable = true) {\n    if (enable) {\n      option_.options |= MOD_JSON_SQUOTE;\n    } else {\n      option_.options &= ~MOD_JSON_SQUOTE;\n    }\n  }\n\n  //! Convert a sting to a JSON value\n  bool parse(const char *str, JsonValue *out) {\n    mod_json_token_t *tok;\n\n    state_ = mod_json_state_null;\n    error_ = mod_json_error_null;\n    context_ = str;\n\n    tok = mod_json_token_create(&option_);\n    if (tok) {\n      mod_json_value_t *jval;\n\n      jval = mod_json_parse(tok, str);\n\n      /* save information of token */\n      state_ = mod_json_token_state(tok);\n      error_ = mod_json_token_error(tok);\n      context_ = mod_json_token_context(tok);\n      mod_json_token_destroy(tok);\n\n      if (jval) {\n        *out = *reinterpret_cast<JsonValue *>(&jval);\n        mod_json_value_unset(jval);\n\n        return out->is_valid();\n      }\n    }\n    return false;\n  }\n\n  //! Retrieve the error code of parser\n  int error(void) const {\n    return (int)error_;\n  }\n\n  //! Retrieve the state code of parser\n  int state(void) const {\n    return (int)state_;\n  }\n\n  //! Retrieve the context of parser\n  const char *context(void) const {\n    return context_;\n  }\n\n private:\n  mod_json_option_t option_;\n  mod_json_state_t state_;\n  mod_json_error_t error_;\n  mod_json_cchar_t *context_;\n};\n\n/*! JSON Dumper\n */\nclass JsonDumper {\n public:\n  //! Constructor\n  JsonDumper(void) : str_() {}\n\n  //! Destructor\n  ~JsonDumper(void) {}\n\n  //! Dump a JSON value to string\n  bool dump(const JsonValue &val) {\n    mod_json_string_t *str;\n\n    str = mod_json_dump(*((mod_json_value_t **)&val));\n    str_ = *reinterpret_cast<JsonString *>(&str);\n    if (str) {\n      mod_json_string_unset(str);\n      return true;\n    }\n    return false;\n  }\n\n  //! Retrieve result of dumper\n  JsonString &result(void) {\n    return str_;\n  }\n\n  //! Retrieve result of dumper\n  const JsonString &result(void) const {\n    return str_;\n  }\n\n private:\n  JsonString str_;\n};\n\n//! Equality\nstatic inline bool operator==(const ailego::JsonString &lhs, const char *rhs) {\n  const char *self = lhs.c_str();\n  if (self == rhs) {\n    return true;\n  }\n\n  if (self && rhs) {\n    return (std::strcmp(self, rhs) == 0);\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const char *lhs, const ailego::JsonString &rhs) {\n  return (rhs == lhs);\n}\n\n//! Equality\nstatic inline bool operator==(const ailego::JsonString &lhs,\n                              const std::string &rhs) {\n  std::size_t ls = lhs.size();\n  std::size_t rs = rhs.size();\n  if (ls == 0 && rs == 0) {\n    return true;\n  }\n\n  if (ls == rs) {\n    const char *ld = lhs.data();\n    const char *rd = rhs.data();\n\n    if (ld && rd) {\n      return (std::memcmp(ld, rd, ls) == 0);\n    }\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const std::string &lhs, const JsonString &rhs) {\n  return (rhs == lhs);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonString &lhs, const JsonValue &rhs) {\n  return (rhs.is_string() ? lhs == rhs.as_string() : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const JsonString &rhs) {\n  return (lhs.is_string() ? lhs.as_string() == rhs : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonArray &lhs, const JsonValue &rhs) {\n  return (rhs.is_array() ? lhs == rhs.as_array() : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const JsonArray &rhs) {\n  return (lhs.is_array() ? lhs.as_array() == rhs : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonObject &lhs, const JsonValue &rhs) {\n  return (rhs.is_object() ? lhs == rhs.as_object() : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const JsonObject &rhs) {\n  return (lhs.is_object() ? lhs.as_object() == rhs : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const bool &rhs) {\n  return (lhs.is_boolean() ? lhs.as_bool() == rhs : false);\n}\n\n//! Equality\nstatic inline bool operator==(const bool &lhs, const JsonValue &rhs) {\n  return (rhs.is_boolean() ? lhs == rhs.as_bool() : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const signed char &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const signed char &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const char &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const char &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const short int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const short int &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const int &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const long int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const long int &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const long long int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const long long int &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const float &rhs) {\n  if (lhs.is_float()) {\n    double diff = static_cast<double>(lhs.as_float() - rhs);\n    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const float &lhs, const JsonValue &rhs) {\n  if (rhs.is_float()) {\n    double diff = static_cast<double>(rhs.as_float() - lhs);\n    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const double &rhs) {\n  if (lhs.is_float()) {\n    double diff = static_cast<double>(lhs.as_float() - rhs);\n    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const double &lhs, const JsonValue &rhs) {\n  if (rhs.is_float()) {\n    double diff = static_cast<double>(rhs.as_float() - lhs);\n    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const long double &rhs) {\n  if (lhs.is_float()) {\n    double diff = static_cast<double>(lhs.as_float() - rhs);\n    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const long double &lhs, const JsonValue &rhs) {\n  if (rhs.is_float()) {\n    double diff = static_cast<double>(rhs.as_float() - lhs);\n    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));\n  }\n  return false;\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const unsigned char &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const unsigned char &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs,\n                              const unsigned short int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const unsigned short int &lhs,\n                              const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const unsigned int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const unsigned int &lhs, const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs,\n                              const unsigned long int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const unsigned long int &lhs,\n                              const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs,\n                              const unsigned long long int &rhs) {\n  return (lhs.is_integer()\n              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const unsigned long long int &lhs,\n                              const JsonValue &rhs) {\n  return (rhs.is_integer()\n              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()\n              : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const char *rhs) {\n  return (lhs.is_string() ? lhs.as_string() == rhs : false);\n}\n\n//! Equality\nstatic inline bool operator==(const char *lhs, const JsonValue &rhs) {\n  return (rhs.is_string() ? lhs == rhs.as_string() : false);\n}\n\n//! Equality\nstatic inline bool operator==(const JsonValue &lhs, const std::string &rhs) {\n  return (lhs.is_string() ? lhs.as_string() == rhs : false);\n}\n\n//! Equality\nstatic inline bool operator==(const std::string &lhs, const JsonValue &rhs) {\n  return (rhs.is_string() ? lhs == rhs.as_string() : false);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonString &lhs, const char *rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const char *lhs, const JsonString &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonString &lhs, const std::string &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const std::string &lhs, const JsonString &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonString &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const JsonString &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonArray &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const JsonArray &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonObject &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const JsonObject &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const bool &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const bool &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const signed char &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const signed char &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const char &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const char &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const short int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const short int &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const int &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const long int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const long int &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const long long int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const long long int &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const float &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const float &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const double &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const double &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const long double &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const long double &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const unsigned char &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const unsigned char &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs,\n                              const unsigned short int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const unsigned short int &lhs,\n                              const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const unsigned int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const unsigned int &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs,\n                              const unsigned long int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const unsigned long int &lhs,\n                              const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs,\n                              const unsigned long long int &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const unsigned long long int &lhs,\n                              const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const char *rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const char *lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const JsonValue &lhs, const std::string &rhs) {\n  return !(lhs == rhs);\n}\n\n//! No equality\nstatic inline bool operator!=(const std::string &lhs, const JsonValue &rhs) {\n  return !(lhs == rhs);\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/encoding/json.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/encoding/json/mod_json_plus.h>\n"
  },
  {
    "path": "src/include/zvec/ailego/hash/crc32c.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Crc32c Hash\n */\nstruct Crc32c {\n  //! Compute the CRC32C checksum for the source data buffer\n  static uint32_t Hash(const void *data, size_t len, uint32_t crc);\n\n  //! Compute the CRC32C checksum for the source data buffer\n  static inline uint32_t Hash(const void *data, size_t len) {\n    return Hash(data, len, 0u);\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/hash/jump_hash.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\nnamespace zvec {\nnamespace ailego {\n\n//! Jump consistent hash algorithm (https://arxiv.org/pdf/1406.2294.pdf)\nstatic inline int32_t JumpHash(uint64_t key, int32_t num_buckets) {\n  int64_t b = 1, j = 0;\n  while (j < num_buckets) {\n    b = j;\n    key = key * 2862933555777941757ULL + 1;\n    j = (int64_t)(double(b + 1) *\n                  (double(1LL << 31) / double((key >> 33) + 1)));\n  }\n  return (int32_t)b;\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/internal/platform.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#if defined(_WIN32) || defined(_WIN64)\n#include <sdkddkver.h>\n#endif\n\n#include <sys/types.h>\n#include <limits.h>\n#include <stdbool.h>\n#include <stddef.h>\n#include <stdint.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <time.h>\n\n#if defined(_MSC_VER)\n#include <intrin.h>\n#else\n#include <strings.h>\n#include <unistd.h>\n#if defined(__x86_64__) || defined(__i386)\n#include <x86intrin.h>\n#endif\n#if defined(__ARM_NEON)\n#include <arm_neon.h>\n#endif\n#if defined(__ARM_FEATURE_CRC32)\n#include <arm_acle.h>\n#endif\n#endif\n\n#if defined(__cplusplus)\nextern \"C\" {\n#endif\n\n#ifndef NDEBUG\n#define AILEGO_DEBUG\n#endif\n\n//! Fixed Intel intrinsics macro in MSVC\n#if defined(_MSC_VER)\n#if (_M_IX86_FP == 2 || defined(_M_AMD64) || defined(_M_X64))\n#define __SSE__ 1\n#define __SSE2__ 1\n#if _MSC_VER >= 1500\n#define __SSE3__ 1\n#define __SSSE3__ 1\n#define __SSE4_1__ 1\n#define __SSE4_2__ 1\n#endif\n#elif _M_IX86_FP == 1\n#define __SSE__ 1\n#endif\n#endif  // _MSC_VER\n\n#if defined(_WIN32) || defined(_WIN64)\n#if defined(_WIN64)\n#define AILEGO_M64\n#else\n#define AILEGO_M32\n#endif\n#endif\n\n#if defined(__GNUC__)\n#if defined(__x86_64__) || defined(__aarch64__) || defined(__ppc64__)\n#define AILEGO_M64\n#else\n#define AILEGO_M32\n#endif\n#endif\n\n#ifndef AILEGO_ALIGNED\n#if defined(_MSC_VER)\n#define AILEGO_ALIGNED(x) __declspec(align(x))\n#define AILEGO_DEPRECATED __declspec(deprecated)\n#elif defined(__GNUC__)\n#define AILEGO_ALIGNED(x) __attribute__((aligned(x)))\n#define AILEGO_DEPRECATED __attribute__((deprecated))\n#else\n#define AILEGO_ALIGNED(x)\n#define AILEGO_DEPRECATED\n#endif\n#endif\n\n//! Add 'inline' for MSVC\n#if defined(_MSC_VER) && !defined(__cplusplus)\n#if !defined(inline)\n#define inline __inline\n#endif\n#endif\n\n//! Add 'ssize_t' for MSVC\n#if defined(_MSC_VER)\ntypedef intptr_t ssize_t;\n#endif\n\n#if defined(_MSC_VER)\n//! Returns the number of trailing 0-bits in x\nstatic inline int ailego_ctz32(uint32_t x) {\n  unsigned long r = 0;\n  _BitScanForward(&r, x);\n  return (int)r;\n}\n\n//! Returns the number of leading 0-bits in x\nstatic inline int ailego_clz32(uint32_t x) {\n  unsigned long r = 0;\n  _BitScanReverse(&r, x);\n  return (31 - (int)r);\n}\n\n#if defined(AILEGO_M64)\n//! Returns the number of trailing 0-bits in x\nstatic inline int ailego_ctz64(uint64_t x) {\n  unsigned long r = 0;\n  _BitScanForward64(&r, x);\n  return (int)r;\n}\n\n//! Returns the number of leading 0-bits in x\nstatic inline int ailego_clz64(uint64_t x) {\n  unsigned long r = 0;\n  _BitScanReverse64(&r, x);\n  return (63 - (int)r);\n}\n#else\n//! Returns the number of trailing 0-bits in x\nstatic inline int ailego_ctz64(uint64_t x) {\n  unsigned long r = 0;\n  unsigned long m = (unsigned long)x;\n  _BitScanForward(&r, m);\n  if (r == 0) {\n    m = (unsigned long)(x >> 32);\n    _BitScanForward(&r, m);\n    if (r != 0) {\n      r += 32;\n    }\n  }\n  return (int)r;\n}\n\n//! Returns the number of leading 0-bits in x\nstatic inline int ailego_clz64(uint64_t x) {\n  unsigned long r = 0;\n  unsigned long m = (unsigned long)(x >> 32);\n  _BitScanReverse(&r, m);\n  if (r != 0) {\n    return (31 - (int)r);\n  }\n  m = (unsigned long)x;\n  _BitScanReverse(&r, m);\n  return (63 - (int)r);\n}\n#endif  // AILEGO_M64\n\n//! Counts the number of one bits\n#define ailego_popcount32(x) (__popcnt(x))\n#define ailego_popcount64(x) (__popcnt64(x))\n#define ailego_likely(x) (x)\n#define ailego_unlikely(x) (x)\n#ifdef __SSE__\n#define ailego_prefetch(p) _mm_prefetch((p), 0)\n#else\n#define ailego_prefetch(p) ((void)(p))\n#endif\n#else  // !_MSC_VER\n#define ailego_ctz32(x) (__builtin_ctz(x))\n#define ailego_ctz64(x) (__builtin_ctzll(x))\n#define ailego_clz32(x) (__builtin_clz(x))\n#define ailego_clz64(x) (__builtin_clzll(x))\n#define ailego_popcount32(x) (__builtin_popcount(x))\n#define ailego_popcount64(x) (__builtin_popcountl(x))\n#define ailego_likely(x) (__builtin_expect(!!(x), 1))\n#define ailego_unlikely(x) (__builtin_expect(!!(x), 0))\n#define ailego_prefetch(p) (__builtin_prefetch((p)))\n#endif  // _MSC_VER\n\n#if defined(AILEGO_M64)\n#define ailego_ctz ailego_ctz64\n#define ailego_clz ailego_clz64\n#define ailego_popcount ailego_popcount64\n#else\n#define ailego_ctz ailego_ctz32\n#define ailego_clz ailego_clz32\n#define ailego_popcount ailego_popcount32\n#endif  // AILEGO_M64\n\n#if defined(__arm__) || defined(__aarch64__)\n// ARMv7 Architecture Reference Manual (for YIELD)\n// ARM Compiler toolchain Compiler Reference (for __yield() instrinsic)\n#if defined(__CC_ARM)\n#define ailego_yield() __yield()\n#else\n#define ailego_yield() __asm__ __volatile__(\"yield\")\n#endif  // __CC_ARM\n#elif defined(__SSE2__)\n#define ailego_yield() _mm_pause()\n#else\n#define ailego_yield() ((void)0)\n#endif  // __arm__ || __aarch64__\n\n#if defined(_MSC_VER)\n#define ailego_aligned_malloc(SIZE, ALIGN) \\\n  _aligned_malloc((size_t)(SIZE), (ALIGN))\n#define ailego_aligned_free _aligned_free\n#else  // !_MSC_VER\n#if defined(_ISOC11_SOURCE)\n#define ailego_aligned_malloc(SIZE, ALIGN) \\\n  aligned_alloc((ALIGN), (size_t)(SIZE))\n#else  // !_ISOC11_SOURCE\n#define ailego_aligned_malloc(SIZE, ALIGN) \\\n  ailego_posix_malloc((size_t)(SIZE), (ALIGN))\n#endif  // _ISOC11_SOURCE\n#define ailego_aligned_free free\n#endif  // _MSC_VER\n\n#if !defined(__SANITIZE_ADDRESS__)\n#if defined(__has_feature)\n#if __has_feature(address_sanitizer)\n#define __SANITIZE_ADDRESS__ 1\n#endif  // address_sanitizer\n#endif  // __has_feature\n#endif  // !__SANITIZE_ADDRESS__\n\n#if !defined(__SANITIZE_ADDRESS__)\n#if !defined(ailego_malloc)\n#if defined(__AVX512F__)\n#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 64)\n#elif defined(__AVX__)\n#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 32)\n#elif defined(__SSE__)\n#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 16)\n#elif defined(__ARM_NEON)\n#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 16)\n#endif\n#endif  // !ailego_malloc\n#if (defined(__SSE__) || defined(__ARM_NEON)) && !defined(ailego_free)\n#define ailego_free ailego_aligned_free\n#endif\n#endif  // !__SANITIZE_ADDRESS__\n\n#ifndef ailego_malloc\n#define ailego_malloc(SIZE) malloc((size_t)(SIZE))\n#endif\n#ifndef ailego_free\n#define ailego_free free\n#endif\n\n#ifndef ailego_offsetof\n#define ailego_offsetof(TYPE, MEMBER) ((size_t) & ((TYPE *)0)->MEMBER)\n#endif\n\n#ifndef ailego_align\n#define ailego_align(SIZE, BOUND) (((SIZE) + ((BOUND) - 1)) & ~((BOUND) - 1))\n#endif\n\n#ifndef ailego_align8\n#define ailego_align8(SIZE) ailego_align(SIZE, 8)\n#endif\n\n#ifndef ailego_min\n#define ailego_min(A, B) (((A) < (B)) ? (A) : (B))\n#endif\n\n#ifndef ailego_max\n#define ailego_max(A, B) (((A) > (B)) ? (A) : (B))\n#endif\n\n#ifndef ailego_malloc_object\n#define ailego_malloc_object(TYPE) ((TYPE *)ailego_malloc(sizeof(TYPE)))\n#endif\n#ifndef ailego_malloc_array\n#define ailego_malloc_array(TYPE, SIZE) \\\n  ((TYPE *)ailego_malloc(SIZE * sizeof(TYPE)))\n#endif\n\n#ifndef ailego_minus_if_ne_zero\n#define ailego_minus_if_ne_zero(COND) \\\n  if (ailego_unlikely((COND) != 0)) return (-1)\n#endif\n\n#ifndef ailego_zero_if_ne_zero\n#define ailego_zero_if_ne_zero(COND) \\\n  if (ailego_unlikely((COND) != 0)) return (0)\n#endif\n\n#ifndef ailego_null_if_ne_zero\n#define ailego_null_if_ne_zero(COND) \\\n  if (ailego_unlikely((COND) != 0)) return (NULL)\n#endif\n\n#ifndef ailego_false_if_ne_zero\n#define ailego_false_if_ne_zero(COND) \\\n  if (ailego_unlikely((COND) != 0)) return (false)\n#endif\n\n#ifndef ailego_return_if_ne_zero\n#define ailego_return_if_ne_zero(COND) \\\n  if (ailego_unlikely((COND) != 0)) return\n#endif\n\n#ifndef ailego_break_if_ne_zero\n#define ailego_break_if_ne_zero(COND) \\\n  if (ailego_unlikely((COND) != 0)) break\n#endif\n\n#ifndef ailego_continue_if_ne_zero\n#define ailego_continue_if_ne_zero(COND) \\\n  if (ailego_unlikely((COND) != 0)) continue\n#endif\n\n#ifndef ailego_do_if_ne_zero\n#define ailego_do_if_ne_zero(COND) if (ailego_unlikely((COND) != 0))\n#endif\n\n#ifndef ailego_minus_if_lt_zero\n#define ailego_minus_if_lt_zero(COND) \\\n  if (ailego_unlikely((COND) < 0)) return (-1)\n#endif\n\n#ifndef ailego_zero_if_lt_zero\n#define ailego_zero_if_lt_zero(COND) \\\n  if (ailego_unlikely((COND) < 0)) return (0)\n#endif\n\n#ifndef ailego_null_if_lt_zero\n#define ailego_null_if_lt_zero(COND) \\\n  if (ailego_unlikely((COND) < 0)) return (NULL)\n#endif\n\n#ifndef ailego_false_if_lt_zero\n#define ailego_false_if_lt_zero(COND) \\\n  if (ailego_unlikely((COND) < 0)) return (false)\n#endif\n\n#ifndef ailego_return_if_lt_zero\n#define ailego_return_if_lt_zero(COND) \\\n  if (ailego_unlikely((COND) < 0)) return\n#endif\n\n#ifndef ailego_break_if_lt_zero\n#define ailego_break_if_lt_zero(COND) \\\n  if (ailego_unlikely((COND) < 0)) break\n#endif\n\n#ifndef ailego_continue_if_lt_zero\n#define ailego_continue_if_lt_zero(COND) \\\n  if (ailego_unlikely((COND) < 0)) continue\n#endif\n\n#ifndef ailego_do_if_lt_zero\n#define ailego_do_if_lt_zero(COND) if (ailego_unlikely((COND) < 0))\n#endif\n\n#ifndef ailego_minus_if_false\n#define ailego_minus_if_false(COND) \\\n  if (ailego_unlikely(!(COND))) return (-1)\n#endif\n\n#ifndef ailego_zero_if_false\n#define ailego_zero_if_false(COND) \\\n  if (ailego_unlikely(!(COND))) return (0)\n#endif\n\n#ifndef ailego_null_if_false\n#define ailego_null_if_false(COND) \\\n  if (ailego_unlikely(!(COND))) return (NULL)\n#endif\n\n#ifndef ailego_false_if_false\n#define ailego_false_if_false(COND) \\\n  if (ailego_unlikely(!(COND))) return (false)\n#endif\n\n#ifndef ailego_return_if_false\n#define ailego_return_if_false(COND) \\\n  if (ailego_unlikely(!(COND))) return\n#endif\n\n#ifndef ailego_break_if_false\n#define ailego_break_if_false(COND) \\\n  if (ailego_unlikely(!(COND))) break\n#endif\n\n#ifndef ailego_continue_if_false\n#define ailego_continue_if_false(COND) \\\n  if (ailego_unlikely(!(COND))) continue\n#endif\n\n#ifndef ailego_do_if_false\n#define ailego_do_if_false(COND) if (ailego_unlikely(!(COND)))\n#endif\n\n#ifndef ailego_compile_assert\n#define ailego_compile_assert(COND, MSG) \\\n  typedef char Static_Assertion_##MSG[(!!(COND)) * 2 - 1]\n#endif\n\n#ifndef ailego_static_assert3\n#define ailego_static_assert3(COND, LINE) \\\n  ailego_compile_assert(COND, At_Line_##LINE)\n#endif\n\n#ifndef ailego_static_assert2\n#define ailego_static_assert2(COND, LINE) ailego_static_assert3(COND, LINE)\n#endif\n\n#ifndef ailego_static_assert\n#define ailego_static_assert(COND) ailego_static_assert2(COND, __LINE__)\n#endif\n\n//! Abort and report if an assertion is failed\n#ifndef ailego_assert_abort\n#define ailego_assert_abort(COND, MSG)                                         \\\n  (void)(ailego_likely(COND) || (ailego_assert_report(__FILE__, __FUNCTION__,  \\\n                                                      __LINE__, #COND, (MSG)), \\\n                                 abort(), 0))\n#endif\n\n#ifdef AILEGO_DEBUG\n#ifndef ailego_assert\n#define ailego_assert(COND) ailego_assert_abort(COND, \"\")\n#endif\n#ifndef ailego_assert_with\n#define ailego_assert_with(COND, MSG) ailego_assert_abort(COND, MSG)\n#endif\n#else  // !AILEGO_DEBUG\n#ifndef ailego_assert\n#define ailego_assert(COND) ((void)0)\n#endif\n#ifndef ailego_assert_with\n#define ailego_assert_with(COND, MSG) ((void)0)\n#endif\n#endif  // AILEGO_DEBUG\n\n#ifndef ailego_check\n#define ailego_check(COND) ailego_assert_abort(COND, \"\")\n#endif\n#ifndef ailego_check_with\n#define ailego_check_with(COND, MSG) ailego_assert_abort(COND, MSG)\n#endif\n\n#ifndef _MSC_VER\n//! Allocates memory on a specified alignment boundary\nstatic inline void *ailego_posix_malloc(size_t size, size_t align) {\n  void *ptr;\n  ailego_null_if_ne_zero(posix_memalign(&ptr, align, size));\n  return ptr;\n}\n#endif\n\n//! Report an assertion is failed\nstatic inline void ailego_assert_report(const char *file, const char *func,\n                                        int line, const char *cond,\n                                        const char *msg) {\n  fprintf(stderr, \"Assertion failed: (%s) in %s(), %s line %d. %s\\n\", cond,\n          func, file, line, msg);\n}\n\n// because the behavior of conversion from negative float to unsigned integer\n// is undefined (on arm, result will be zero), it's necessary to convert it\n// to signed integer firstly\nstatic inline uint8_t static_cast_from_float_to_uint8(float data) {\n  return static_cast<uint8_t>(static_cast<int32_t>(data));\n}\n\nstatic inline uint16_t static_cast_from_float_to_uint16(float data) {\n  return static_cast<uint16_t>(static_cast<int32_t>(data));\n}\n\n#ifdef __cplusplus\n} /* extern \"C\" */\n#endif\n"
  },
  {
    "path": "src/include/zvec/ailego/io/file.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/utility/file_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! File Utility\n */\nclass File {\n public:\n  //! Native Handle in OS\n  typedef FileHelper::NativeHandle NativeHandle;\n\n  //! Invalid Handle\n  static constexpr NativeHandle InvalidHandle = (NativeHandle)(-1);\n\n  //! Specifies the position in a file to use for seeking.\n  enum struct Origin { Begin = 0, Current = 1, End = 2 };\n\n  //! Options of memory mapping\n  enum {\n    MMAP_READONLY = 1,\n    MMAP_SHARED = 2,\n    MMAP_LOCKED = 4,\n    MMAP_WARMUP = 8,\n    MMAP_POPULATE = 16,\n    MMAP_HUGE_PAGE = 32,\n  };\n\n  //! Constructor\n  File(void) : native_handle_(File::InvalidHandle), read_only_(false) {}\n\n  //! Constructor\n  File(File &&rhs) {\n    read_only_ = rhs.read_only_;\n    native_handle_ = rhs.native_handle_;\n    rhs.read_only_ = false;\n    rhs.native_handle_ = File::InvalidHandle;\n  }\n\n  //! Destructor\n  ~File(void) {\n    this->close();\n  }\n\n  //! Assignment\n  File &operator=(File &&rhs) {\n    read_only_ = rhs.read_only_;\n    native_handle_ = rhs.native_handle_;\n    rhs.read_only_ = false;\n    rhs.native_handle_ = File::InvalidHandle;\n    return *this;\n  }\n\n  //! Test if the file is valid\n  bool is_valid(void) const {\n    return (native_handle_ != File::InvalidHandle);\n  }\n\n  //! Retrieve non-zero if memory region is read only\n  bool read_only(void) const {\n    return read_only_;\n  }\n\n  //! Retrieve native handle\n  NativeHandle native_handle(void) const {\n    return native_handle_;\n  }\n\n  //! Create a local file\n  bool create(const char *path, size_t size, bool direct);\n\n  //! Open a local file\n  bool open(const char *path, bool rdonly, bool direct);\n\n  //! Close the local file\n  void close(void);\n\n  //! Reset the file\n  void reset(void);\n\n  //! Write data into the file\n  size_t write(const void *data, size_t len);\n\n  //! Write data into the file\n  size_t write(ssize_t off, const void *data, size_t len);\n\n  //! Read data from the file\n  size_t read(void *buf, size_t len);\n\n  //! Read data from the file\n  size_t read(ssize_t off, void *buf, size_t len);\n\n  //! Synchronize memory with physical storage\n  bool flush(void);\n\n  //! Sets the current position of the file to the given value\n  bool seek(ssize_t off, Origin origin);\n\n  //! Truncate the file to a specified length\n  bool truncate(size_t len);\n\n  //! Retrieve size of file\n  size_t size(void) const;\n\n  //! Retrieve offset of file\n  ssize_t offset(void) const;\n\n  //! Create a local file\n  bool create(const char *path, size_t len) {\n    return this->create(path, len, false);\n  }\n\n  //! Create a local file\n  bool create(const std::string &path, size_t len, bool direct) {\n    return this->create(path.c_str(), len, direct);\n  }\n\n  //! Create a local file\n  bool create(const std::string &path, size_t len) {\n    return this->create(path.c_str(), len);\n  }\n\n  //! Open a local file\n  bool open(const char *path, bool rdonly) {\n    return this->open(path, rdonly, false);\n  }\n\n  //! Open a local file\n  bool open(const std::string &path, bool rdonly, bool direct) {\n    return this->open(path.c_str(), rdonly, direct);\n  }\n\n  //! Open a local file\n  bool open(const std::string &path, bool rdonly) {\n    return this->open(path.c_str(), rdonly);\n  }\n\n  //! Map a region of file into memory\n  void *map(ssize_t off, size_t len, int opts) {\n    if (read_only_) {\n      opts |= File::MMAP_READONLY;\n    }\n    return File::MemoryMap(native_handle_, off, len, opts);\n  }\n\n  //! Map a region of file into memory\n  static void *MemoryMap(NativeHandle handle, ssize_t off, size_t len,\n                         int opts);\n\n  //! Map an anonymous region into memory\n  static void *MemoryMap(size_t len, int opts);\n\n  //! Remap the region into memory\n  static void *MemoryRemap(void *oldptr, size_t oldsize, void *newptr,\n                           size_t newsize);\n\n  //! Unmap a mapping region\n  static void MemoryUnmap(void *addr, size_t len);\n\n  //! Synchronize a memory map\n  static bool MemoryFlush(void *addr, size_t len);\n\n  //! Lock the memory region into RAM\n  static bool MemoryLock(void *addr, size_t len);\n\n  //! Unlock the memory region in RAM\n  static bool MemoryUnlock(void *addr, size_t len);\n\n  //! Warm up a memory region\n  static void MemoryWarmup(void *addr, size_t len);\n\n  //! Delete a name and possibly the file it refers to\n  static bool Delete(const char *path) {\n    return FileHelper::DeleteFile(path);\n  }\n\n  //! Delete a name and possibly the file it refers to\n  static bool Delete(const std::string &path) {\n    return FileHelper::DeleteFile(path.c_str());\n  }\n\n  //! Change the name or location of a file\n  static bool Rename(const char *oldpath, const char *newpath) {\n    return FileHelper::RenameFile(oldpath, newpath);\n  }\n\n  //! Change the name or location of a file\n  static bool Rename(const std::string &oldpath, const std::string &newpath) {\n    return FileHelper::RenameFile(oldpath.c_str(), newpath.c_str());\n  }\n\n  //! Retrieve the base name from a path\n  static const char *BaseName(const char *path) {\n    return FileHelper::BaseName(path);\n  }\n\n  //! Retrieve the base name from a path\n  static const char *BaseName(const std::string &path) {\n    return BaseName(path.c_str());\n  }\n\n  //! Make directories' path\n  static bool MakePath(const char *path) {\n    return FileHelper::MakePath(path);\n  }\n\n  //! Make directories' path\n  static bool MakePath(const std::string &path) {\n    return FileHelper::MakePath(path.c_str());\n  }\n\n  //! Remove a file or a directory (includes files & subdirectories)\n  static bool RemovePath(const char *path) {\n    return FileHelper::RemovePath(path);\n  }\n\n  //! Remove a file or a directory (includes files & subdirectories)\n  static bool RemovePath(const std::string &path) {\n    return FileHelper::RemovePath(path.c_str());\n  }\n\n  //! Remove a directory (includes files & subdirectories)\n  static bool RemoveDirectory(const char *path) {\n    return FileHelper::RemoveDirectory(path);\n  }\n\n  //! Remove a directory (includes files & subdirectories)\n  static bool RemoveDirectory(const std::string &path) {\n    return FileHelper::RemoveDirectory(path.c_str());\n  }\n\n  //! Retrieve non-zero if the path exists\n  static bool IsExist(const char *path) {\n    return FileHelper::IsExist(path);\n  }\n\n  //! Retrieve non-zero if the path exists\n  static bool IsExist(const std::string &path) {\n    return FileHelper::IsExist(path.c_str());\n  }\n\n  //! Retrieve non-zero if the path is a regular file\n  static bool IsRegular(const char *path) {\n    return FileHelper::IsRegular(path);\n  }\n\n  //! Retrieve non-zero if the path is a regular file\n  static bool IsRegular(const std::string &path) {\n    return FileHelper::IsRegular(path.c_str());\n  }\n\n  //! Retrieve non-zero if the path is a directory\n  static bool IsDirectory(const char *path) {\n    return FileHelper::IsDirectory(path);\n  }\n\n  //! Retrieve non-zero if the path is a directory\n  static bool IsDirectory(const std::string &path) {\n    return FileHelper::IsDirectory(path.c_str());\n  }\n\n  //! Retrieve non-zero if the path is a symbolic link\n  static bool IsSymbolicLink(const char *path) {\n    return FileHelper::IsSymbolicLink(path);\n  }\n\n  //! Retrieve non-zero if the path is a symbolic link\n  static bool IsSymbolicLink(const std::string &path) {\n    return FileHelper::IsSymbolicLink(path.c_str());\n  }\n\n  //! Retrieve non-zero if two paths are pointing to the same file\n  static bool IsSame(const char *path1, const char *path2) {\n    return FileHelper::IsSame(path1, path2);\n  }\n\n  //! Retrieve non-zero if two paths are pointing to the same file\n  static bool IsSame(const std::string &path1, const std::string &path2) {\n    return FileHelper::IsSame(path1.c_str(), path2.c_str());\n  }\n\n private:\n  //! Disable them\n  File(const File &) = delete;\n  File &operator=(const File &) = delete;\n\n  //! Members\n  NativeHandle native_handle_;\n  bool read_only_;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/io/mmap_file.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/io/file.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Memory Mapping File\n */\nclass MMapFile {\n public:\n  //! Constructor\n  MMapFile(void)\n      : read_only_(false), region_(nullptr), region_size_(0), offset_(0) {}\n\n  //! Constructor\n  MMapFile(MMapFile &&rhs) {\n    read_only_ = rhs.read_only_;\n    region_ = rhs.region_;\n    region_size_ = rhs.region_size_;\n    offset_ = rhs.offset_;\n    rhs.read_only_ = false;\n    rhs.region_ = nullptr;\n    rhs.region_size_ = 0;\n    rhs.offset_ = 0;\n  }\n\n  //! Destructor\n  ~MMapFile(void) {\n    this->close();\n  }\n\n  //! Assignment\n  MMapFile &operator=(MMapFile &&rhs) {\n    read_only_ = rhs.read_only_;\n    region_ = rhs.region_;\n    region_size_ = rhs.region_size_;\n    offset_ = rhs.offset_;\n    rhs.read_only_ = false;\n    rhs.region_ = nullptr;\n    rhs.region_size_ = 0;\n    rhs.offset_ = 0;\n    return *this;\n  }\n\n  //! Test if the file is valid\n  bool is_valid(void) const {\n    return (region_ != nullptr);\n  }\n\n  //! Retrieve non-zero if memory region is read only\n  bool read_only(void) const {\n    return read_only_;\n  }\n\n  //! Create a memory mapping file\n  bool create(const char *path, size_t len) {\n    ailego_false_if_false(!region_ && path);\n\n    File file;\n    ailego_false_if_false(file.create(path, len));\n\n    region_ = File::MemoryMap(file.native_handle(), 0, len, File::MMAP_SHARED);\n    ailego_false_if_false(region_);\n\n    read_only_ = false;\n    region_size_ = len;\n    return true;\n  }\n\n  //! Create a memory mapping file\n  bool create(const std::string &path, size_t len) {\n    return this->create(path.c_str(), len);\n  }\n\n  //! Open a memory mapping file\n  bool open(const char *path, bool rdonly, bool shared) {\n    ailego_false_if_false(!region_ && path);\n\n    File file;\n    ailego_false_if_false(file.open(path, rdonly, false));\n\n    size_t len = file.size();\n    int opts = 0;\n    if (rdonly) {\n      opts |= File::MMAP_READONLY;\n    }\n    if (shared) {\n      opts |= File::MMAP_SHARED;\n    }\n    region_ = File::MemoryMap(file.native_handle(), 0, len, opts);\n    ailego_false_if_false(region_);\n\n    read_only_ = rdonly;\n    region_size_ = len;\n    return true;\n  }\n\n  //! Open a memory mapping file\n  bool open(const std::string &path, bool rdonly, bool shared) {\n    return this->open(path.c_str(), rdonly, shared);\n  }\n\n  //! Open a memory mapping file\n  bool open(const char *path, bool rdonly) {\n    return this->open(path, rdonly, false);\n  }\n\n  //! Open a memory mapping file\n  bool open(const std::string &path, bool rdonly) {\n    return this->open(path, rdonly, false);\n  }\n\n  //! Close the memory mapping file\n  void close(void) {\n    File::MemoryUnmap(region_, region_size_);\n    region_ = nullptr;\n    region_size_ = 0;\n    offset_ = 0;\n  }\n\n  //! Synchronize memory with physical storage\n  bool flush(void) {\n    return File::MemoryFlush(region_, region_size_);\n  }\n\n  //! Lock the memory region into RAM\n  bool lock(void) {\n    return File::MemoryLock(region_, region_size_);\n  }\n\n  //! Unlock the memory region in RAM\n  bool unlock(void) {\n    return File::MemoryUnlock(region_, region_size_);\n  }\n\n  //! Warm up the memory region\n  void warmup(void) {\n    File::MemoryWarmup(region_, region_size_);\n  }\n\n  //! Reset the file\n  void reset(void) {\n    offset_ = 0;\n  }\n\n  //! Write data into the storage\n  size_t write(const void *data, size_t len) {\n    if (offset_ + len > region_size_) {\n      len = region_size_ - offset_;\n    }\n    memcpy((uint8_t *)region_ + offset_, data, len);\n    offset_ += len;\n    return len;\n  }\n\n  //! Write data into the storage\n  size_t write(size_t off, const void *data, size_t len) {\n    if (off + len > region_size_) {\n      if (off > region_size_) {\n        off = region_size_;\n      }\n      len = region_size_ - off;\n    }\n    memcpy((uint8_t *)region_ + off, data, len);\n    return len;\n  }\n\n  //! Read data from the storage (Zero-copy)\n  size_t read(const void **data, size_t len) {\n    if (offset_ + len > region_size_) {\n      len = region_size_ - offset_;\n    }\n    *data = (uint8_t *)region_ + offset_;\n    offset_ += len;\n    return len;\n  }\n\n  //! Read data from the storage (Zero-copy)\n  size_t read(size_t off, const void **data, size_t len) {\n    if (off + len > region_size_) {\n      if (off > region_size_) {\n        off = region_size_;\n      }\n      len = region_size_ - off;\n    }\n    *data = (uint8_t *)region_ + off;\n    return len;\n  }\n\n  //! Read data from the storage\n  size_t read(void *data, size_t len) {\n    if (offset_ + len > region_size_) {\n      len = region_size_ - offset_;\n    }\n    memcpy(data, (uint8_t *)region_ + offset_, len);\n    offset_ += len;\n    return len;\n  }\n\n  //! Read data from the storage\n  size_t read(size_t off, void *data, size_t len) {\n    if (off + len > region_size_) {\n      if (off > region_size_) {\n        off = region_size_;\n      }\n      len = region_size_ - off;\n    }\n    memcpy(data, (uint8_t *)region_ + off, len);\n    return len;\n  }\n\n  //! Retrieve memory region of file\n  void *region(void) const {\n    return region_;\n  }\n\n  //! Retrieve region size of file\n  size_t size(void) const {\n    return region_size_;\n  }\n\n  //! Retrieve offset of file\n  size_t offset(void) const {\n    return offset_;\n  }\n\n private:\n  //! Disable them\n  MMapFile(const MMapFile &) = delete;\n  MMapFile &operator=(const MMapFile &) = delete;\n\n  //! Members\n  bool read_only_;\n  void *region_;\n  size_t region_size_;\n  size_t offset_;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/logger/logger.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdarg>\n#include <memory>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/pattern/factory.h>\n\n//! Register Index Logger\n#define FACTORY_REGISTER_LOGGER_ALIAS(__NAME__, __IMPL__, ...)      \\\n  AILEGO_FACTORY_REGISTER(__NAME__, zvec::ailego::Logger, __IMPL__, \\\n                          ##__VA_ARGS__)\n\n//! Register Index Logger\n#define FACTORY_REGISTER_LOGGER(__IMPL__, ...) \\\n  FACTORY_REGISTER_LOGGER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n#define PROXIMA_LOG_IMPL(level, format, ...)                             \\\n  do {                                                                   \\\n    if (zvec::ailego::LoggerBroker::IsLevelEnabled(level)) {             \\\n      zvec::ailego::LoggerBroker::Log(level, __FILE__, __LINE__, format, \\\n                                      ##__VA_ARGS__);                    \\\n    }                                                                    \\\n  } while (0)\n\n//! Log Debug Message\n#ifndef LOG_DEBUG\n#define LOG_DEBUG(format, ...) \\\n  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_DEBUG, format, ##__VA_ARGS__)\n#endif\n\n//! Log Information Message\n#ifndef LOG_INFO\n#define LOG_INFO(format, ...) \\\n  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_INFO, format, ##__VA_ARGS__)\n#endif\n\n//! Log Warn Message\n#ifndef LOG_WARN\n#define LOG_WARN(format, ...) \\\n  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_WARN, format, ##__VA_ARGS__)\n#endif\n\n//! Log Error Message\n#ifndef LOG_ERROR\n#define LOG_ERROR(format, ...) \\\n  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_ERROR, format, ##__VA_ARGS__)\n#endif\n\n//! Log Fatal Message\n#ifndef LOG_FATAL\n#define LOG_FATAL(format, ...) \\\n  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_FATAL, format, ##__VA_ARGS__)\n#endif\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Index Logger\n */\nstruct Logger {\n  //! Index Logger Pointer\n  typedef std::shared_ptr<Logger> Pointer;\n\n  static const int LEVEL_DEBUG;\n  static const int LEVEL_INFO;\n  static const int LEVEL_WARN;\n  static const int LEVEL_ERROR;\n  static const int LEVEL_FATAL;\n\n  //! Retrieve string of level\n  static const char *LevelString(int level) {\n    static const char *info[] = {\"DEBUG\", \" INFO\", \" WARN\", \"ERROR\", \"FATAL\"};\n    if (level < (int)(sizeof(info) / sizeof(info[0]))) {\n      return info[level];\n    }\n    return \"\";\n  }\n\n  //! Retrieve symbol of level\n  static char LevelSymbol(int level) {\n    static const char info[5] = {'D', 'I', 'W', 'E', 'F'};\n    if (level < (int)(sizeof(info) / sizeof(info[0]))) {\n      return info[level];\n    }\n    return ' ';\n  }\n\n  //! Destructor\n  virtual ~Logger(void) {}\n\n  //! Initialize Logger\n  virtual int init(const Params &params) = 0;\n\n  //! Cleanup Logger\n  virtual int cleanup(void) = 0;\n\n  //! Log Message\n  virtual void log(int level, const char *file, int line, const char *format,\n                   va_list args) = 0;\n};\n\n/*! Index Logger Broker\n */\nclass LoggerBroker {\n public:\n  //! Register Logger\n  static Logger::Pointer Register(Logger::Pointer logger) {\n    Logger::Pointer ret = std::move(logger_);\n    logger_ = std::move(logger);\n    return ret;\n  }\n\n  //! Register Logger with init params\n  static int Register(Logger::Pointer logger, const ailego::Params &params) {\n    //! Cleanup the previous, before initizlizing the new one\n    if (logger_) {\n      logger_->cleanup();\n    }\n    logger_ = std::move(logger);\n    return logger_->init(params);\n  }\n\n  //! Unregister Logger\n  static void Unregister(void) {\n    logger_ = nullptr;\n  }\n\n  //! Set Level of Logger\n  static void SetLevel(int level) {\n    logger_level_ = level;\n  }\n\n  //! Check if log level is enabled\n  static bool IsLevelEnabled(int level) {\n    return logger_level_ <= level && logger_;\n  }\n\n  //! Log Message\n  __attribute__((format(printf, 4, 5))) static void Log(\n      int level, const char *file, int line, const char *format, ...) {\n    if (IsLevelEnabled(level)) {\n      va_list args;\n      va_start(args, format);\n      logger_->log(level, file, line, format, args);\n      va_end(args);\n    }\n  }\n\n private:\n  //! Disable them\n  LoggerBroker(void) = delete;\n  LoggerBroker(const LoggerBroker &) = delete;\n  LoggerBroker(LoggerBroker &&) = delete;\n\n  //! Members\n  static int logger_level_;\n  static Logger::Pointer logger_;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/math_batch/utils.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstddef>\nnamespace zvec::ailego::DistanceBatch {\n\ntypedef void (*DistanceBatchQueryPreprocessFunc)(void *query, size_t dim);\n\n}  // namespace zvec::ailego::DistanceBatch"
  },
  {
    "path": "src/include/zvec/ailego/parallel/thread_pool.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <atomic>\n#include <condition_variable>\n#include <mutex>\n#include <queue>\n#include <thread>\n#include <utility>\n#include <vector>\n#include <zvec/ailego/pattern/closure.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Thread Pool\n */\nclass ThreadPool {\n public:\n  /*! Thread Pool Task Group\n   */\n  class TaskGroup : public std::enable_shared_from_this<TaskGroup> {\n   public:\n    using Pointer = std::shared_ptr<TaskGroup>;\n\n    //! Constructor\n    TaskGroup(ThreadPool *pool) : pool_(pool) {}\n\n    //! Push a task to the queue\n    void enqueue(const ClosureHandler &handle) {\n      pool_->enqueue(handle, this->shared_from_this(), nullptr);\n    }\n\n    //! Push a task to the queue\n    void enqueue(ClosureHandler &&handle) {\n      pool_->enqueue(std::move(handle), this->shared_from_this(), nullptr);\n    }\n\n    //! Submit a task to the queue\n    void submit(ClosureHandler &&handle) {\n      return enqueue_and_wake(std::move(handle));\n    }\n\n    //! Push a task to the queue\n    void enqueue_and_wake(const ClosureHandler &handle) {\n      pool_->enqueue_and_wake(handle, this->shared_from_this(), nullptr);\n    }\n\n    //! Push a task to the queue\n    void enqueue_and_wake(ClosureHandler &&handle) {\n      pool_->enqueue_and_wake(std::move(handle), this->shared_from_this(),\n                              nullptr);\n    }\n\n    //! Execute a function as a task in pool\n    template <typename... TArgs>\n    void execute_and_wait(TArgs &&...args) {\n      ThreadPool::TaskControl ctrl;\n      pool_->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...),\n                              this->shared_from_this(), &ctrl);\n      ctrl.wait();\n    }\n\n    //! Execute a function as a task in pool\n    template <typename... TArgs>\n    void execute(TArgs &&...args) {\n      this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));\n    }\n\n    //! Wait until all tasks in group finished\n    void wait_finish(void) {\n      std::unique_lock<std::mutex> lock(mutex_);\n      cond_.wait(lock, [this]() { return this->is_finished(); });\n    }\n\n    //! Check if the group is finished\n    bool is_finished(void) const {\n      return (active_count_ == 0 && pending_count_ == 0);\n    }\n\n    //! Retrieve count of pending tasks in group\n    size_t pending_count(void) const {\n      return pending_count_.load(std::memory_order_relaxed);\n    }\n\n    //! Retrieve count of active tasks in group\n    size_t active_count(void) const {\n      return active_count_.load(std::memory_order_relaxed);\n    }\n\n   protected:\n    friend class ThreadPool;\n\n    //! Mark a task enqueued\n    void mark_task_enqueued(void) {\n      ++pending_count_;\n    }\n\n    //! Mark a task actived\n    void mark_task_actived(void) {\n      std::lock_guard<std::mutex> lock(mutex_);\n      ++active_count_;\n      --pending_count_;\n    }\n\n    //! Notify a task finished\n    void notify(void) {\n      std::lock_guard<std::mutex> lock(mutex_);\n      if (--active_count_ == 0 && pending_count_ == 0) {\n        cond_.notify_all();\n      }\n    }\n\n   private:\n    //! Members\n    ThreadPool *pool_{nullptr};\n    std::atomic_uint active_count_{0};\n    std::atomic_uint pending_count_{0};\n    std::mutex mutex_{};\n    std::condition_variable cond_{};\n  };\n\n  //! Constructor\n  explicit ThreadPool(uint32_t size, bool binding);\n\n  //! Constructor\n  explicit ThreadPool(bool binding)\n      : ThreadPool{std::max(std::thread::hardware_concurrency(), 1u), binding} {\n  }\n\n  //! Constructor\n  ThreadPool(void) : ThreadPool{false} {}\n\n  //! Destructor\n  ~ThreadPool(void) {\n    this->stop();\n\n    // Join all threads\n    for (auto it = pool_.begin(); it != pool_.end(); ++it) {\n      if (it->joinable()) {\n        it->join();\n      }\n    }\n  }\n\n  //! Retrieve thread count in pool\n  size_t count(void) const {\n    return pool_.size();\n  }\n\n  //! Stop all threads\n  void stop(void) {\n    // Set stop flag as ture, then wake all threads\n    stopping_ = true;\n    std::lock_guard<std::mutex> lock(queue_mutex_);\n    work_cond_.notify_all();\n  }\n\n  //! Push a task to the queue\n  void enqueue(const ClosureHandler &handle) {\n    this->enqueue(handle, nullptr);\n  }\n\n  //! Push a task to the queue\n  void enqueue(ClosureHandler &&handle) {\n    this->enqueue(std::move(handle), nullptr);\n  }\n\n  //! Push a task to the queue\n  void enqueue_and_wake(const ClosureHandler &handle) {\n    this->enqueue_and_wake(handle, nullptr);\n  }\n\n  //! Push a task to the queue\n  void enqueue_and_wake(ClosureHandler &&handle) {\n    this->enqueue_and_wake(std::move(handle), nullptr);\n  }\n\n  //! Execute a function as a task in pool\n  template <typename... TArgs>\n  void execute_and_wait(TArgs &&...args) {\n    ThreadPool::TaskControl ctrl;\n    this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...), &ctrl);\n    ctrl.wait();\n  }\n\n  //! Execute a function as a task in pool\n  template <typename... TArgs>\n  void execute(TArgs &&...args) {\n    this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));\n  }\n\n  //! Wake any one thread\n  void wake_any(void) {\n    std::lock_guard<std::mutex> lock(queue_mutex_);\n    work_cond_.notify_one();\n  }\n\n  //! Wake all threads\n  void wake_all(void) {\n    std::lock_guard<std::mutex> lock(queue_mutex_);\n    work_cond_.notify_all();\n  }\n\n  //! Wait until all threads finished processing\n  void wait_finish(void) {\n    std::unique_lock<std::mutex> lock(wait_mutex_);\n    finished_cond_.wait(lock, [this]() { return this->is_finished(); });\n  }\n\n  //! Wait until all threads stopped processing\n  void wait_stop(void) {\n    std::unique_lock<std::mutex> lock(wait_mutex_);\n    stopped_cond_.wait(lock, [this]() { return this->is_stopped(); });\n  }\n\n  //! Make a task group\n  TaskGroup::Pointer make_group(void) {\n    return std::make_shared<TaskGroup>(this);\n  }\n\n  //! Check if the pool is finished\n  bool is_finished(void) const {\n    return (active_count_ == 0 && pending_count_ == 0);\n  }\n\n  //! Check if the pool is stopped\n  bool is_stopped(void) const {\n    return (worker_count_ == 0);\n  }\n\n  //! Retrieve count of worker in pool\n  size_t worker_count(void) const {\n    return worker_count_.load(std::memory_order_relaxed);\n  }\n\n  //! Retrieve count of pending tasks in pool\n  size_t pending_count(void) const {\n    return pending_count_.load(std::memory_order_relaxed);\n  }\n\n  //! Retrieve count of active tasks in pool\n  size_t active_count(void) const {\n    return active_count_.load(std::memory_order_relaxed);\n  }\n\n  //! Get the thread index via thread id\n  int indexof(const std::thread::id &thread_id) const {\n    for (size_t i = 0; i < pool_.size(); ++i) {\n      if (pool_[i].get_id() == thread_id) {\n        return static_cast<int>(i);\n      }\n    }\n    return -1;\n  }\n\n  //! Get the current work thread index\n  int indexof_this(void) const {\n    return this->indexof(std::this_thread::get_id());\n  }\n\n  //! Bind threads to processors\n  void bind(void);\n\n  //! Unbind threads of processors\n  void unbind(void);\n\n protected:\n  //! Thread task control\n  class TaskControl {\n   public:\n    //! Notify task finished\n    void notify(void) {\n      finished_ = true;\n      std::lock_guard<std::mutex> lock(mutex_);\n      cond_.notify_one();\n    }\n\n    //! Wait until task finished\n    void wait(void) {\n      std::unique_lock<std::mutex> lock(mutex_);\n      cond_.wait(lock, [this]() { return finished_.load(); });\n    }\n\n   private:\n    std::atomic_bool finished_{false};\n    std::mutex mutex_{};\n    std::condition_variable cond_{};\n  };\n\n  //! Thread task\n  struct Task {\n    // Constructor\n    Task(const ClosureHandler &h, TaskControl *c) : handle(h), control(c) {}\n\n    // Constructor\n    Task(ClosureHandler &&h, TaskControl *c)\n        : handle(std::move(h)), control(c) {}\n\n    // Constructor\n    Task(const ClosureHandler &h, TaskGroup::Pointer &&g, TaskControl *c)\n        : handle(h), group(std::move(g)), control(c) {}\n\n    // Constructor\n    Task(ClosureHandler &&h, TaskGroup::Pointer &&g, TaskControl *c)\n        : handle(std::move(h)), group(std::move(g)), control(c) {}\n\n    // Constructor\n    Task(void) {}\n\n    //! Members\n    ClosureHandler handle{};\n    TaskGroup::Pointer group{nullptr};\n    TaskControl *control{nullptr};\n  };\n\n  //! Thread worker callback\n  void worker(void);\n\n  //! Pick a task from queue\n  bool picking(Task *task);\n\n  //! Push a task to the queue\n  template <typename T>\n  void enqueue(T &&handle, TaskControl *ctrl) {\n    if (handle) {\n      std::lock_guard<std::mutex> lock(queue_mutex_);\n      ++pending_count_;\n      queue_.emplace(std::forward<T>(handle), ctrl);\n    }\n  }\n\n  //! Push a task to the queue with group\n  template <typename T>\n  void enqueue(T &&handle, TaskGroup::Pointer &&group, TaskControl *ctrl) {\n    if (handle) {\n      std::lock_guard<std::mutex> lock(queue_mutex_);\n      ++pending_count_;\n      group->mark_task_enqueued();\n      queue_.emplace(std::forward<T>(handle), std::move(group), ctrl);\n    }\n  }\n\n  //! Push a task to the queue\n  template <typename T>\n  void enqueue_and_wake(T &&handle, TaskControl *ctrl) {\n    if (handle) {\n      std::lock_guard<std::mutex> lock(queue_mutex_);\n      ++pending_count_;\n      queue_.emplace(std::forward<T>(handle), ctrl);\n      work_cond_.notify_one();\n    }\n  }\n\n  //! Push a task to the queue with group\n  template <typename T>\n  void enqueue_and_wake(T &&handle, TaskGroup::Pointer &&group,\n                        TaskControl *ctrl) {\n    if (handle) {\n      std::lock_guard<std::mutex> lock(queue_mutex_);\n      ++pending_count_;\n      group->mark_task_enqueued();\n      queue_.emplace(std::forward<T>(handle), std::move(group), ctrl);\n      work_cond_.notify_one();\n    }\n  }\n\n private:\n  //! Disable them\n  ThreadPool(const ThreadPool &) = delete;\n  ThreadPool(ThreadPool &&) = delete;\n  ThreadPool &operator=(const ThreadPool &) = delete;\n\n  //! Members\n  std::queue<Task> queue_{};\n  std::atomic_bool stopping_{false};\n  std::atomic_uint worker_count_{0};\n  std::atomic_uint active_count_{0};\n  std::atomic_uint pending_count_{0};\n  std::mutex queue_mutex_{};\n  std::mutex wait_mutex_{};\n  std::condition_variable work_cond_{};\n  std::condition_variable finished_cond_{};\n  std::condition_variable stopped_cond_{};\n  std::vector<std::thread> pool_{};\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/parallel/thread_queue.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <atomic>\n#include <condition_variable>\n#include <mutex>\n#include <queue>\n#include <thread>\n#include <utility>\n#include <vector>\n#include <zvec/ailego/hash/jump_hash.h>\n#include <zvec/ailego/pattern/closure.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Thread Queue (One Thread One Queue)\n */\nclass ThreadQueue {\n public:\n  /*! Thread Worker (One Thread One Worker)\n   */\n  class ThreadWorker {\n   public:\n    //! Constructor\n    ThreadWorker(ThreadQueue *owner) : owner_(owner) {}\n\n    //! Destructor\n    ~ThreadWorker(void) {\n      // Join the current thread\n      if (thread_.joinable()) {\n        thread_.join();\n      }\n    }\n\n    //! Push a task to the queue\n    template <typename T>\n    void enqueue(T &&handle) {\n      std::lock_guard<std::mutex> lock(mutex_);\n      queue_.emplace(std::forward<T>(handle));\n    }\n\n    //! Push a task to the queue\n    template <typename T>\n    void enqueue_and_wake(T &&handle) {\n      std::lock_guard<std::mutex> lock(mutex_);\n      queue_.emplace(std::forward<T>(handle));\n      cond_.notify_one();\n    }\n\n    //! Execute a function as a task\n    template <typename... TArgs>\n    void execute(TArgs &&...args) {\n      this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));\n    }\n\n    //! Push a task to the queue with high priority\n    template <typename T>\n    void enqueue_high_priority(T &&handle) {\n      std::lock_guard<std::mutex> lock(mutex_);\n      queue_high_priority_.emplace(std::forward<T>(handle));\n    }\n\n    //! Push a task to the queue with high priority and wake\n    template <typename T>\n    void enqueue_high_priority_and_wake(T &&handle) {\n      std::lock_guard<std::mutex> lock(mutex_);\n      queue_high_priority_.emplace(std::forward<T>(handle));\n      cond_.notify_one();\n    }\n\n    //! Execute a task in high priority\n    template <typename... TArgs>\n    void execute_high_priority(TArgs &&...args) {\n      this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));\n    }\n\n    //! Wake the thread\n    void wake(void) {\n      std::lock_guard<std::mutex> lock(mutex_);\n      cond_.notify_one();\n    }\n\n    //! Notify thread stopped\n    void stop(void) {\n      // Set stop flag as ture, then wake the thread\n      stopping_ = true;\n      std::lock_guard<std::mutex> lock(mutex_);\n      cond_.notify_one();\n    }\n\n   protected:\n    //! Thread worker callback\n    void worker(void) {\n      owner_->mark_worker_started();\n\n      ClosureHandler task;\n      while (this->picking(&task)) {\n        // Run the task\n        if (task) {\n          task->run();\n          task = nullptr;\n        }\n      }\n      owner_->mark_worker_stopped();\n    }\n\n    //! Pick a task from queue\n    bool picking(ClosureHandler *task) {\n      std::unique_lock<std::mutex> latch(mutex_);\n      cond_.wait(latch, [this]() {\n        return (queue_.size() > 0 || queue_high_priority_.size() > 0 ||\n                stopping_);\n      });\n      if (stopping_) {\n        return false;\n      }\n\n      if (!queue_high_priority_.empty()) {\n        *task = std::move(queue_high_priority_.front());\n        queue_high_priority_.pop();\n      } else {\n        *task = std::move(queue_.front());\n        queue_.pop();\n      }\n\n      return true;\n    }\n\n   private:\n    //! Disable them\n    ThreadWorker(void) = delete;\n    ThreadWorker(ThreadWorker &&) = delete;\n    ThreadWorker(const ThreadWorker &) = delete;\n    ThreadWorker &operator=(const ThreadWorker &) = delete;\n\n    //! Members\n    ThreadQueue *owner_{nullptr};\n    std::queue<ClosureHandler> queue_{};\n    std::queue<ClosureHandler> queue_high_priority_{};\n    std::atomic_bool stopping_{false};\n    std::mutex mutex_{};\n    std::condition_variable cond_{};\n    std::thread thread_{&ThreadWorker::worker, this};\n  };\n\n  //! Constructor\n  ThreadQueue(void)\n      : ThreadQueue{std::max(std::thread::hardware_concurrency(), 1u)} {}\n\n  //! Constructor\n  explicit ThreadQueue(uint32_t size) {\n    for (uint32_t i = 0u; i < size; ++i) {\n      threads_.emplace_back(new ThreadWorker(this));\n    }\n  }\n\n  //! Destructor\n  ~ThreadQueue(void) {\n    this->stop();\n    // Cleanup threads\n    for (auto it = threads_.begin(); it != threads_.end(); ++it) {\n      delete *it;\n    }\n  }\n\n  //! operator []\n  ThreadWorker &operator[](size_t i) {\n    return *(threads_[i]);\n  }\n\n  //! Stop the thread\n  void stop(void) {\n    // Stop all workers\n    for (auto it = threads_.begin(); it != threads_.end(); ++it) {\n      (*it)->stop();\n    }\n  }\n\n  //! Wake all worker threads\n  void wake(void) {\n    for (auto it = threads_.begin(); it != threads_.end(); ++it) {\n      (*it)->wake();\n    }\n  }\n\n  //! Wait until all threads stopped processing\n  void wait_stop(void) {\n    std::unique_lock<std::mutex> lock(wait_mutex_);\n    stopped_cond_.wait(lock, [this]() { return this->is_stopped(); });\n  }\n\n  //! Check if the pool is stopped\n  bool is_stopped(void) const {\n    return (worker_count_ == 0);\n  }\n\n  //! Retrieve count of worker in queue\n  size_t worker_count(void) const {\n    return worker_count_.load(std::memory_order_relaxed);\n  }\n\n  //! Retrieve thread count in queue\n  size_t count(void) const {\n    return threads_.size();\n  }\n\n  //! Push a task to the queue\n  template <typename T>\n  void enqueue(uint64_t key, T &&handle) {\n    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]->enqueue(\n        std::forward<T>(handle));\n  }\n\n  //! Push a task to the queue\n  template <typename T>\n  void enqueue_and_wake(uint64_t key, T &&handle) {\n    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]\n        ->enqueue_and_wake(std::forward<T>(handle));\n  }\n\n  //! Execute a function as a task in pool\n  template <typename... TArgs>\n  void execute(uint64_t key, TArgs &&...args) {\n    this->enqueue_and_wake(key, Closure::New(std::forward<TArgs>(args)...));\n  }\n\n  //! Push a task to the queue with high priority\n  template <typename T>\n  void enqueue_high_priority(uint64_t key, T &&handle) {\n    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]\n        ->enqueue_high_priority(std::forward<T>(handle));\n  }\n\n  //! Push a task to the queue with high priority and wake\n  template <typename T>\n  void enqueue_high_priority_and_wake(uint64_t key, T &&handle) {\n    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]\n        ->enqueue_high_priority_and_wake(std::forward<T>(handle));\n  }\n\n  //! Execute a function as a task in pool with high priority\n  template <typename... TArgs>\n  void execute_high_priority(uint64_t key, TArgs &&...args) {\n    this->enqueue_high_priority_and_wake(\n        key, Closure::New(std::forward<TArgs>(args)...));\n  }\n\n protected:\n  //! Mark a worker started\n  void mark_worker_started(void) {\n    ++worker_count_;\n  }\n\n  //! Mark a worker stopped\n  void mark_worker_stopped(void) {\n    // Decrease count of workers\n    std::lock_guard<std::mutex> lock(wait_mutex_);\n    if (--worker_count_ == 0) {\n      stopped_cond_.notify_all();\n    }\n  }\n\n private:\n  //! Disable them\n  ThreadQueue(const ThreadQueue &) = delete;\n  ThreadQueue(ThreadQueue &&) = delete;\n  ThreadQueue &operator=(const ThreadQueue &) = delete;\n\n  //! Members\n  std::atomic_uint worker_count_{0};\n  std::mutex wait_mutex_{};\n  std::condition_variable stopped_cond_{};\n  std::vector<ThreadWorker *> threads_{};\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/pattern/closure.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <tuple>\n#include <type_traits>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Callback Validator (declaration)\n */\ntemplate <typename TFunc>\nstruct CallbackValidator;\n\n/*! Callback Validator (function pointer)\n */\ntemplate <typename R, typename... TParams>\nstruct CallbackValidator<R (*)(TParams...)> {\n  enum { Value = true };\n};\n\n/*! Callback Validator (function)\n */\ntemplate <typename R, typename... TParams>\nstruct CallbackValidator<R(TParams...)> : CallbackValidator<R (*)(TParams...)> {\n};\n\n/*! Callback Validator (member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackValidator<R (T::*)(TParams...)>\n    : CallbackValidator<R (*)(TParams...)> {};\n\n/*! Callback Validator (constable member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackValidator<R (T::*)(TParams...) const>\n    : CallbackValidator<R (*)(TParams...)> {};\n\n/*! Callback Validator (volatile member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackValidator<R (T::*)(TParams...) volatile>\n    : CallbackValidator<R (*)(TParams...)> {};\n\n/*! Callback Validator (constable volatile member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackValidator<R (T::*)(TParams...) const volatile>\n    : CallbackValidator<R (*)(TParams...)> {};\n\n/*! Callback Validator\n */\ntemplate <typename TFunc>\nstruct CallbackValidator {\n protected:\n  using FalseType = long;\n  using TrueType = char;\n\n  //! Check if the class contains operator()\n  template <typename T>\n  static TrueType &Validate(decltype(&T::operator()));\n\n  //! Check if the class contains operator()\n  template <typename T>\n  static FalseType &Validate(...);\n\n public:\n  enum { Value = (sizeof(Validate<TFunc>(nullptr)) == sizeof(TrueType)) };\n};\n\n/*! Callback Validator (left reference)\n */\ntemplate <typename TFunc>\nstruct CallbackValidator<TFunc &> : CallbackValidator<TFunc> {};\n\n/*! Callback Validator (right reference)\n */\ntemplate <typename TFunc>\nstruct CallbackValidator<TFunc &&> : CallbackValidator<TFunc> {};\n\n/*! Callback Traits (declaration)\n */\ntemplate <typename TFunc>\nstruct CallbackTraits;\n\n/*! Callback Traits (function pointer)\n */\ntemplate <typename R, typename... TParams>\nstruct CallbackTraits<R (*)(TParams...)> {\n  using Type = R (*)(TParams...);\n  using ResultType = R;\n  using TupleType = std::tuple<typename std::decay<TParams>::type...>;\n\n  //! Callback Traits Parameter\n  template <size_t N>\n  struct Parameter {\n    using Type = typename std::tuple_element<N, std::tuple<TParams...>>::type;\n  };\n\n  //! Number of parameters\n  enum { Arity = sizeof...(TParams) };\n};\n\n/*! Callback Traits (function)\n */\ntemplate <typename R, typename... TParams>\nstruct CallbackTraits<R(TParams...)> : CallbackTraits<R (*)(TParams...)> {\n  using Type = R (*)(TParams...);\n};\n\n/*! Callback Traits (member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackTraits<R (T::*)(TParams...)>\n    : CallbackTraits<R (*)(TParams...)> {\n  using Type = R (T::*)(TParams...);\n};\n\n/*! Callback Traits (constable member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackTraits<R (T::*)(TParams...) const>\n    : CallbackTraits<R (*)(TParams...)> {\n  using Type = R (T::*)(TParams...) const;\n};\n\n/*! Callback Traits (volatile member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackTraits<R (T::*)(TParams...) volatile>\n    : CallbackTraits<R (*)(TParams...)> {\n  using Type = R (T::*)(TParams...) volatile;\n};\n\n/*! Callback Traits (constable volatile member function pointer)\n */\ntemplate <typename T, typename R, typename... TParams>\nstruct CallbackTraits<R (T::*)(TParams...) const volatile>\n    : CallbackTraits<R (*)(TParams...)> {\n  using Type = R (T::*)(TParams...) const volatile;\n};\n\n/*! Callback Traits\n */\ntemplate <typename TFunc>\nstruct CallbackTraits : CallbackTraits<decltype(&TFunc::operator())> {\n  using Type = TFunc;\n};\n\n/*! Callback Traits (left reference)\n */\ntemplate <typename TFunc>\nstruct CallbackTraits<TFunc &> : CallbackTraits<TFunc> {};\n\n/*! Callback Traits (right reference)\n */\ntemplate <typename TFunc>\nstruct CallbackTraits<TFunc &&> : CallbackTraits<TFunc> {};\n\n/*! Callback Functor\n */\ntemplate <typename TFunc>\nstruct CallbackFunctor {\n  using Traits = CallbackTraits<TFunc>;\n  using Type = typename Traits::Type;\n  using ResultType = typename Traits::ResultType;\n  using TupleType = typename Traits::TupleType;\n\n  //! Tuple Index Maker\n  template <size_t N, size_t... I>\n  struct TupleIndexMaker : TupleIndexMaker<N - 1, N - 1, I...> {};\n\n  //! Tuple Index\n  template <size_t...>\n  struct TupleIndex {};\n\n  //! Tuple Index Maker (special)\n  template <size_t... I>\n  struct TupleIndexMaker<0, I...> {\n    using Type = TupleIndex<I...>;\n  };\n\n  //! Run the callback function\n  template <size_t... I>\n  static ResultType Run(Type &impl, TupleType &tuple, TupleIndex<I...>) {\n    return (impl)(std::forward<typename Traits::template Parameter<I>::Type>(\n        std::get<I>(tuple))...);\n  }\n\n  //! Run the callback member function\n  template <typename T, size_t... I>\n  static ResultType Run(T *obj, Type &impl, TupleType &tuple,\n                        TupleIndex<I...>) {\n    return (obj->*impl)(\n        std::forward<typename Traits::template Parameter<I>::Type>(\n            std::get<I>(tuple))...);\n  }\n\n  //! Run the callback function\n  static ResultType Run(Type &impl, TupleType &tuple) {\n    return Run(impl, tuple, typename TupleIndexMaker<Traits::Arity>::Type());\n  }\n\n  //! Run the callback member function\n  template <typename T>\n  static ResultType Run(T *obj, Type &impl, TupleType &tuple) {\n    return Run(obj, impl, tuple,\n               typename TupleIndexMaker<Traits::Arity>::Type());\n  }\n};\n\n/*! Callback Object\n */\ntemplate <typename T>\nstruct CallbackObject {\n  using Type = typename std::remove_reference<T>::type;\n};\n\n/*! Callback (declaration)\n */\ntemplate <typename R>\nclass Callback;\n\n/*! Callback (void)\n */\ntemplate <>\nclass Callback<void> {\n public:\n  using Pointer = std::shared_ptr<Callback<void>>;\n\n  //! Destructor\n  virtual ~Callback(void) {}\n\n  //! Function call\n  void operator()(void) {\n    this->run();\n  }\n\n  //! Run the callback function\n  virtual void run(void) = 0;\n\n  //! Create callback closure (member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static typename Callback<R>::Pointer New(T *obj, R (T::*impl)(TParams...),\n                                           TArgs &&...args);\n\n  //! Create callback closure (constable member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static typename Callback<R>::Pointer New(const T *obj,\n                                           R (T::*impl)(TParams...) const,\n                                           TArgs &&...args);\n\n  //! Create callback closure (volatile member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static typename Callback<R>::Pointer New(volatile T *obj,\n                                           R (T::*impl)(TParams...) volatile,\n                                           TArgs &&...args);\n\n  //! Create callback closure (constable volatile member function pointer)\n  template <typename T, typename R, typename... TParams, typename... TArgs>\n  static typename Callback<R>::Pointer New(const volatile T *obj,\n                                           R (T::*impl)(TParams...)\n                                               const volatile,\n                                           TArgs &&...args);\n\n  //! Create callback closure (function)\n  template <\n      typename TFunc, typename... TArgs,\n      typename = typename std::enable_if<CallbackValidator<TFunc>::Value>::type>\n  static typename Callback<typename CallbackTraits<TFunc>::ResultType>::Pointer\n  New(TFunc &&impl, TArgs &&...args);\n};\n\n/*! Callback\n */\ntemplate <typename R>\nclass Callback : public Callback<void> {\n public:\n  using Pointer = std::shared_ptr<Callback<R>>;\n  using Callback<void>::run;\n\n  //! Function call\n  void operator()(void) {\n    this->run();\n  }\n\n  //! Function call with return\n  void operator()(R *r) {\n    this->run(r);\n  }\n\n  //! Run the callback function\n  virtual void run(R *) = 0;\n\n protected:\n  //! Constructor\n  Callback(void) {};\n};\n\n/*! Callback Implementation\n */\ntemplate <typename T, typename R, typename TFunc>\nclass CallbackImpl : public Callback<R> {\n public:\n  using Object = CallbackObject<T>;\n  using Functor = CallbackFunctor<TFunc>;\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(typename Object::Type *obj, const typename Functor::Type &impl,\n               TArgs &&...args)\n      : obj_(obj), impl_(impl), tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(typename Object::Type *obj, typename Functor::Type &&impl,\n               TArgs &&...args)\n      : obj_(obj),\n        impl_(std::move(impl)),\n        tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Run the callback function\n  void run(void) override {\n    Functor::Run(obj_, impl_, tuple_);\n  }\n\n  //! Run the callback function\n  void run(R *r) override {\n    *r = Functor::Run(obj_, impl_, tuple_);\n  }\n\n protected:\n  //! Disable them\n  CallbackImpl(void) = delete;\n  CallbackImpl(const CallbackImpl &) = delete;\n  CallbackImpl(CallbackImpl &&) = delete;\n  CallbackImpl &operator=(const CallbackImpl &) = delete;\n\n private:\n  typename Object::Type *obj_;\n  typename Functor::Type impl_;\n  typename Functor::TupleType tuple_;\n};\n\n/*! Callback Implementation\n */\ntemplate <typename T, typename TFunc>\nclass CallbackImpl<T, void, TFunc> : public Callback<void> {\n public:\n  using Object = CallbackObject<T>;\n  using Functor = CallbackFunctor<TFunc>;\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(typename Object::Type *obj, const typename Functor::Type &impl,\n               TArgs &&...args)\n      : obj_(obj), impl_(impl), tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(typename Object::Type *obj, typename Functor::Type &&impl,\n               TArgs &&...args)\n      : obj_(obj),\n        impl_(std::move(impl)),\n        tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Run the callback function\n  void run(void) override {\n    Functor::Run(obj_, impl_, tuple_);\n  }\n\n protected:\n  //! Disable them\n  CallbackImpl(void) = delete;\n  CallbackImpl(const CallbackImpl &) = delete;\n  CallbackImpl(CallbackImpl &&) = delete;\n  CallbackImpl &operator=(const CallbackImpl &) = delete;\n\n private:\n  typename Object::Type *obj_;\n  typename Functor::Type impl_;\n  typename Functor::TupleType tuple_;\n};\n\n/*! Callback Implementation\n */\ntemplate <typename R, typename TFunc>\nclass CallbackImpl<void, R, TFunc> : public Callback<R> {\n public:\n  using Functor = CallbackFunctor<TFunc>;\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(const typename Functor::Type &impl, TArgs &&...args)\n      : impl_(impl), tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(typename Functor::Type &&impl, TArgs &&...args)\n      : impl_(std::move(impl)), tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Run the callback function\n  void run(void) override {\n    Functor::Run(impl_, tuple_);\n  }\n\n  //! Run the callback function\n  void run(R *r) override {\n    *r = Functor::Run(impl_, tuple_);\n  }\n\n protected:\n  //! Disable them\n  CallbackImpl(void) = delete;\n  CallbackImpl(const CallbackImpl &) = delete;\n  CallbackImpl(CallbackImpl &&) = delete;\n  CallbackImpl &operator=(const CallbackImpl &) = delete;\n\n private:\n  typename Functor::Type impl_;\n  typename Functor::TupleType tuple_;\n};\n\n/*! Callback Implementation\n */\ntemplate <typename TFunc>\nclass CallbackImpl<void, void, TFunc> : public Callback<void> {\n public:\n  using Functor = CallbackFunctor<TFunc>;\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(const typename Functor::Type &impl, TArgs &&...args)\n      : impl_(impl), tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Constructor\n  template <typename... TArgs>\n  CallbackImpl(typename Functor::Type &&impl, TArgs &&...args)\n      : impl_(std::move(impl)), tuple_(std::forward<TArgs>(args)...) {}\n\n  //! Run the callback function\n  void run(void) override {\n    Functor::Run(impl_, tuple_);\n  }\n\n protected:\n  //! Disable them\n  CallbackImpl(void) = delete;\n  CallbackImpl(const CallbackImpl &) = delete;\n  CallbackImpl(CallbackImpl &&) = delete;\n  CallbackImpl &operator=(const CallbackImpl &) = delete;\n\n private:\n  typename Functor::Type impl_;\n  typename Functor::TupleType tuple_;\n};\n\n//! Create callback closure (member function pointer)\ntemplate <typename T, typename R, typename... TParams, typename... TArgs>\ntypename Callback<R>::Pointer Callback<void>::New(T *obj,\n                                                  R (T::*impl)(TParams...),\n                                                  TArgs &&...args) {\n  return std::make_shared<CallbackImpl<T, R, decltype(impl)>>(\n      obj, impl, std::forward<TArgs>(args)...);\n}\n\n//! Create callback closure (constable member function pointer)\ntemplate <typename T, typename R, typename... TParams, typename... TArgs>\ntypename Callback<R>::Pointer Callback<void>::New(const T *obj,\n                                                  R (T::*impl)(TParams...)\n                                                      const,\n                                                  TArgs &&...args) {\n  return std::make_shared<CallbackImpl<const T, R, decltype(impl)>>(\n      obj, impl, std::forward<TArgs>(args)...);\n}\n\n//! Create callback closure (volatile member function pointer)\ntemplate <typename T, typename R, typename... TParams, typename... TArgs>\ntypename Callback<R>::Pointer Callback<void>::New(\n    volatile T *obj, R (T::*impl)(TParams...) volatile, TArgs &&...args) {\n  return std::make_shared<CallbackImpl<volatile T, R, decltype(impl)>>(\n      obj, impl, std::forward<TArgs>(args)...);\n}\n\n//! Create callback closure (constable volatile member function pointer)\ntemplate <typename T, typename R, typename... TParams, typename... TArgs>\ntypename Callback<R>::Pointer Callback<void>::New(const volatile T *obj,\n                                                  R (T::*impl)(TParams...)\n                                                      const volatile,\n                                                  TArgs &&...args) {\n  return std::make_shared<CallbackImpl<const volatile T, R, decltype(impl)>>(\n      obj, impl, std::forward<TArgs>(args)...);\n}\n\n//! Create callback closure (function)\ntemplate <typename TFunc, typename... TArgs, typename>\ntypename Callback<typename CallbackTraits<TFunc>::ResultType>::Pointer\nCallback<void>::New(TFunc &&impl, TArgs &&...args) {\n  return std::make_shared<CallbackImpl<\n      void, typename CallbackTraits<TFunc>::ResultType, decltype(impl)>>(\n      std::forward<TFunc>(impl), std::forward<TArgs>(args)...);\n}\n\n//! Callback Handler\ntemplate <typename R>\nusing CallbackHandler = typename Callback<R>::Pointer;\n\n//! Closure\nusing Closure = Callback<void>;\n\n//! Closure Handler\nusing ClosureHandler = Closure::Pointer;\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/pattern/expected.hpp",
    "content": "///\n// expected - An implementation of std::expected with extensions\n// Written in 2017 by Sy Brand (tartanllama@gmail.com, @TartanLlama)\n//\n// Documentation available at http://tl.tartanllama.xyz/\n//\n// To the extent possible under law, the author(s) have dedicated all\n// copyright and related and neighboring rights to this software to the\n// public domain worldwide. This software is distributed without any warranty.\n//\n// You should have received a copy of the CC0 Public Domain Dedication\n// along with this software. If not, see\n// <http://creativecommons.org/publicdomain/zero/1.0/>.\n///\n\n#ifndef TL_EXPECTED_HPP\n#define TL_EXPECTED_HPP\n\n#define TL_EXPECTED_VERSION_MAJOR 1\n#define TL_EXPECTED_VERSION_MINOR 2\n#define TL_EXPECTED_VERSION_PATCH 0\n\n#include <exception>\n#include <functional>\n#include <type_traits>\n#include <utility>\n\n#if defined(__EXCEPTIONS) || defined(_CPPUNWIND)\n#define TL_EXPECTED_EXCEPTIONS_ENABLED\n#endif\n\n#if (defined(_MSC_VER) && _MSC_VER == 1900)\n#define TL_EXPECTED_MSVC2015\n#define TL_EXPECTED_MSVC2015_CONSTEXPR\n#else\n#define TL_EXPECTED_MSVC2015_CONSTEXPR constexpr\n#endif\n\n#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \\\n     !defined(__clang__))\n#define TL_EXPECTED_GCC49\n#endif\n\n#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 4 && \\\n     !defined(__clang__))\n#define TL_EXPECTED_GCC54\n#endif\n\n#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 5 && \\\n     !defined(__clang__))\n#define TL_EXPECTED_GCC55\n#endif\n\n#if !defined(TL_ASSERT)\n// can't have assert in constexpr in C++11 and GCC 4.9 has a compiler bug\n#if (TL_CPLUSPLUS > 201103L) && !defined(TL_EXPECTED_GCC49)\n#include <cassert>\n#define TL_ASSERT(x) assert(x)\n#else\n#define TL_ASSERT(x)\n#endif\n#endif\n\n#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \\\n     !defined(__clang__))\n// GCC < 5 doesn't support overloading on const&& for member functions\n\n#define TL_EXPECTED_NO_CONSTRR\n// GCC < 5 doesn't support some standard C++11 type traits\n#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \\\n  std::has_trivial_copy_constructor<T>\n#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \\\n  std::has_trivial_copy_assign<T>\n\n// This one will be different for GCC 5.7 if it's ever supported\n#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \\\n  std::is_trivially_destructible<T>\n\n// GCC 5 < v < 8 has a bug in is_trivially_copy_constructible which breaks\n// std::vector for non-copyable types\n#elif (defined(__GNUC__) && __GNUC__ < 8 && !defined(__clang__))\n#ifndef TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX\n#define TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX\nnamespace tl {\nnamespace detail {\ntemplate <class T>\nstruct is_trivially_copy_constructible\n    : std::is_trivially_copy_constructible<T> {};\n#ifdef _GLIBCXX_VECTOR\ntemplate <class T, class A>\nstruct is_trivially_copy_constructible<std::vector<T, A>> : std::false_type {};\n#endif\n}  // namespace detail\n}  // namespace tl\n#endif\n\n#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \\\n  tl::detail::is_trivially_copy_constructible<T>\n#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \\\n  std::is_trivially_copy_assignable<T>\n#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \\\n  std::is_trivially_destructible<T>\n#else\n#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \\\n  std::is_trivially_copy_constructible<T>\n#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \\\n  std::is_trivially_copy_assignable<T>\n#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \\\n  std::is_trivially_destructible<T>\n#endif\n\n#ifdef _MSVC_LANG\n#define TL_CPLUSPLUS _MSVC_LANG\n#else\n#define TL_CPLUSPLUS __cplusplus\n#endif\n\n#if TL_CPLUSPLUS > 201103L\n#define TL_EXPECTED_CXX14\n#endif\n\n#ifdef TL_EXPECTED_GCC49\n#define TL_EXPECTED_GCC49_CONSTEXPR\n#else\n#define TL_EXPECTED_GCC49_CONSTEXPR constexpr\n#endif\n\n#if (TL_CPLUSPLUS == 201103L || defined(TL_EXPECTED_MSVC2015) || \\\n     defined(TL_EXPECTED_GCC49))\n#define TL_EXPECTED_11_CONSTEXPR\n#else\n#define TL_EXPECTED_11_CONSTEXPR constexpr\n#endif\n\n#if TL_CPLUSPLUS >= 201703L\n#define TL_EXPECTED_NODISCARD [[nodiscard]]\n#else\n#define TL_EXPECTED_NODISCARD\n#endif\n\nnamespace tl {\ntemplate <class T, class E>\nclass TL_EXPECTED_NODISCARD expected;\n\n#ifndef TL_MONOSTATE_INPLACE_MUTEX\n#define TL_MONOSTATE_INPLACE_MUTEX\nclass monostate {};\n\nstruct in_place_t {\n  explicit in_place_t() = default;\n};\nstatic constexpr in_place_t in_place{};\n#endif\n\ntemplate <class E>\nclass unexpected {\n public:\n  static_assert(!std::is_same<E, void>::value, \"E must not be void\");\n\n  unexpected() = delete;\n  constexpr explicit unexpected(const E &e) : m_val(e) {}\n\n  constexpr explicit unexpected(E &&e) : m_val(std::move(e)) {}\n\n  template <class... Args, typename std::enable_if<std::is_constructible<\n                               E, Args &&...>::value>::type * = nullptr>\n  constexpr explicit unexpected(Args &&...args)\n      : m_val(std::forward<Args>(args)...) {}\n  template <\n      class U, class... Args,\n      typename std::enable_if<std::is_constructible<\n          E, std::initializer_list<U> &, Args &&...>::value>::type * = nullptr>\n  constexpr explicit unexpected(std::initializer_list<U> l, Args &&...args)\n      : m_val(l, std::forward<Args>(args)...) {}\n\n  constexpr const E &value() const & {\n    return m_val;\n  }\n  TL_EXPECTED_11_CONSTEXPR E &value() & {\n    return m_val;\n  }\n  TL_EXPECTED_11_CONSTEXPR E &&value() && {\n    return std::move(m_val);\n  }\n  constexpr const E &&value() const && {\n    return std::move(m_val);\n  }\n\n private:\n  E m_val;\n};\n\n#ifdef __cpp_deduction_guides\ntemplate <class E>\nunexpected(E) -> unexpected<E>;\n#endif\n\ntemplate <class E>\nconstexpr bool operator==(const unexpected<E> &lhs, const unexpected<E> &rhs) {\n  return lhs.value() == rhs.value();\n}\ntemplate <class E>\nconstexpr bool operator!=(const unexpected<E> &lhs, const unexpected<E> &rhs) {\n  return lhs.value() != rhs.value();\n}\ntemplate <class E>\nconstexpr bool operator<(const unexpected<E> &lhs, const unexpected<E> &rhs) {\n  return lhs.value() < rhs.value();\n}\ntemplate <class E>\nconstexpr bool operator<=(const unexpected<E> &lhs, const unexpected<E> &rhs) {\n  return lhs.value() <= rhs.value();\n}\ntemplate <class E>\nconstexpr bool operator>(const unexpected<E> &lhs, const unexpected<E> &rhs) {\n  return lhs.value() > rhs.value();\n}\ntemplate <class E>\nconstexpr bool operator>=(const unexpected<E> &lhs, const unexpected<E> &rhs) {\n  return lhs.value() >= rhs.value();\n}\n\ntemplate <class E>\nunexpected<typename std::decay<E>::type> make_unexpected(E &&e) {\n  return unexpected<typename std::decay<E>::type>(std::forward<E>(e));\n}\n\nstruct unexpect_t {\n  unexpect_t() = default;\n};\nstatic constexpr unexpect_t unexpect{};\n\nnamespace detail {\ntemplate <typename E>\n[[noreturn]] TL_EXPECTED_11_CONSTEXPR void throw_exception(E &&e) {\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n  throw std::forward<E>(e);\n#else\n  (void)e;\n#ifdef _MSC_VER\n  __assume(0);\n#else\n  __builtin_unreachable();\n#endif\n#endif\n}\n\n#ifndef TL_TRAITS_MUTEX\n#define TL_TRAITS_MUTEX\n// C++14-style aliases for brevity\ntemplate <class T>\nusing remove_const_t = typename std::remove_const<T>::type;\ntemplate <class T>\nusing remove_reference_t = typename std::remove_reference<T>::type;\ntemplate <class T>\nusing decay_t = typename std::decay<T>::type;\ntemplate <bool E, class T = void>\nusing enable_if_t = typename std::enable_if<E, T>::type;\ntemplate <bool B, class T, class F>\nusing conditional_t = typename std::conditional<B, T, F>::type;\n\n// std::conjunction from C++17\ntemplate <class...>\nstruct conjunction : std::true_type {};\ntemplate <class B>\nstruct conjunction<B> : B {};\ntemplate <class B, class... Bs>\nstruct conjunction<B, Bs...>\n    : std::conditional<bool(B::value), conjunction<Bs...>, B>::type {};\n\n#if defined(_LIBCPP_VERSION) && __cplusplus == 201103L\n#define TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND\n#endif\n\n// In C++11 mode, there's an issue in libc++'s std::mem_fn\n// which results in a hard-error when using it in a noexcept expression\n// in some cases. This is a check to workaround the common failing case.\n#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND\ntemplate <class T>\nstruct is_pointer_to_non_const_member_func : std::false_type {};\ntemplate <class T, class Ret, class... Args>\nstruct is_pointer_to_non_const_member_func<Ret (T::*)(Args...)>\n    : std::true_type {};\ntemplate <class T, class Ret, class... Args>\nstruct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &>\n    : std::true_type {};\ntemplate <class T, class Ret, class... Args>\nstruct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &&>\n    : std::true_type {};\ntemplate <class T, class Ret, class... Args>\nstruct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile>\n    : std::true_type {};\ntemplate <class T, class Ret, class... Args>\nstruct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &>\n    : std::true_type {};\ntemplate <class T, class Ret, class... Args>\nstruct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &&>\n    : std::true_type {};\n\ntemplate <class T>\nstruct is_const_or_const_ref : std::false_type {};\ntemplate <class T>\nstruct is_const_or_const_ref<T const &> : std::true_type {};\ntemplate <class T>\nstruct is_const_or_const_ref<T const> : std::true_type {};\n#endif\n\n// std::invoke from C++17\n// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround\ntemplate <\n    typename Fn, typename... Args,\n#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND\n    typename = enable_if_t<!(is_pointer_to_non_const_member_func<Fn>::value &&\n                             is_const_or_const_ref<Args...>::value)>,\n#endif\n    typename = enable_if_t<std::is_member_pointer<decay_t<Fn>>::value>, int = 0>\nconstexpr auto invoke(Fn &&f, Args &&...args) noexcept(\n    noexcept(std::mem_fn(f)(std::forward<Args>(args)...)))\n    -> decltype(std::mem_fn(f)(std::forward<Args>(args)...)) {\n  return std::mem_fn(f)(std::forward<Args>(args)...);\n}\n\ntemplate <typename Fn, typename... Args,\n          typename = enable_if_t<!std::is_member_pointer<decay_t<Fn>>::value>>\nconstexpr auto invoke(Fn &&f, Args &&...args) noexcept(\n    noexcept(std::forward<Fn>(f)(std::forward<Args>(args)...)))\n    -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...)) {\n  return std::forward<Fn>(f)(std::forward<Args>(args)...);\n}\n\n// std::invoke_result from C++17\ntemplate <class F, class, class... Us>\nstruct invoke_result_impl;\n\ntemplate <class F, class... Us>\nstruct invoke_result_impl<\n    F,\n    decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...), void()),\n    Us...> {\n  using type =\n      decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...));\n};\n\ntemplate <class F, class... Us>\nusing invoke_result = invoke_result_impl<F, void, Us...>;\n\ntemplate <class F, class... Us>\nusing invoke_result_t = typename invoke_result<F, Us...>::type;\n\n#if defined(_MSC_VER) && _MSC_VER <= 1900\n// TODO make a version which works with MSVC 2015\ntemplate <class T, class U = T>\nstruct is_swappable : std::true_type {};\n\ntemplate <class T, class U = T>\nstruct is_nothrow_swappable : std::true_type {};\n#else\n// https://stackoverflow.com/questions/26744589/what-is-a-proper-way-to-implement-is-swappable-to-test-for-the-swappable-concept\nnamespace swap_adl_tests {\n// if swap ADL finds this then it would call std::swap otherwise (same\n// signature)\nstruct tag {};\n\ntemplate <class T>\ntag swap(T &, T &);\ntemplate <class T, std::size_t N>\ntag swap(T (&a)[N], T (&b)[N]);\n\n// helper functions to test if an unqualified swap is possible, and if it\n// becomes std::swap\ntemplate <class, class>\nstd::false_type can_swap(...) noexcept(false);\ntemplate <class T, class U,\n          class = decltype(swap(std::declval<T &>(), std::declval<U &>()))>\nstd::true_type can_swap(int) noexcept(noexcept(swap(std::declval<T &>(),\n                                                    std::declval<U &>())));\n\ntemplate <class, class>\nstd::false_type uses_std(...);\ntemplate <class T, class U>\nstd::is_same<decltype(swap(std::declval<T &>(), std::declval<U &>())), tag>\nuses_std(int);\n\ntemplate <class T>\nstruct is_std_swap_noexcept\n    : std::integral_constant<bool,\n                             std::is_nothrow_move_constructible<T>::value &&\n                                 std::is_nothrow_move_assignable<T>::value> {};\n\ntemplate <class T, std::size_t N>\nstruct is_std_swap_noexcept<T[N]> : is_std_swap_noexcept<T> {};\n\ntemplate <class T, class U>\nstruct is_adl_swap_noexcept\n    : std::integral_constant<bool, noexcept(can_swap<T, U>(0))> {};\n}  // namespace swap_adl_tests\n\ntemplate <class T, class U = T>\nstruct is_swappable\n    : std::integral_constant<\n          bool,\n          decltype(detail::swap_adl_tests::can_swap<T, U>(0))::value &&\n              (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value ||\n               (std::is_move_assignable<T>::value &&\n                std::is_move_constructible<T>::value))> {};\n\ntemplate <class T, std::size_t N>\nstruct is_swappable<T[N], T[N]>\n    : std::integral_constant<\n          bool,\n          decltype(detail::swap_adl_tests::can_swap<T[N], T[N]>(0))::value &&\n              (!decltype(detail::swap_adl_tests::uses_std<T[N], T[N]>(\n                   0))::value ||\n               is_swappable<T, T>::value)> {};\n\ntemplate <class T, class U = T>\nstruct is_nothrow_swappable\n    : std::integral_constant<\n          bool,\n          is_swappable<T, U>::value &&\n              ((decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value &&\n                detail::swap_adl_tests::is_std_swap_noexcept<T>::value) ||\n               (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value &&\n                detail::swap_adl_tests::is_adl_swap_noexcept<T, U>::value))> {};\n#endif\n#endif\n\n// Trait for checking if a type is a tl::expected\ntemplate <class T>\nstruct is_expected_impl : std::false_type {};\ntemplate <class T, class E>\nstruct is_expected_impl<expected<T, E>> : std::true_type {};\ntemplate <class T>\nusing is_expected = is_expected_impl<decay_t<T>>;\n\ntemplate <class T, class E, class U>\nusing expected_enable_forward_value = detail::enable_if_t<\n    std::is_constructible<T, U &&>::value &&\n    !std::is_same<detail::decay_t<U>, in_place_t>::value &&\n    !std::is_same<expected<T, E>, detail::decay_t<U>>::value &&\n    !std::is_same<unexpected<E>, detail::decay_t<U>>::value>;\n\ntemplate <class T, class E, class U, class G, class UR, class GR>\nusing expected_enable_from_other = detail::enable_if_t<\n    std::is_constructible<T, UR>::value &&\n    std::is_constructible<E, GR>::value &&\n    !std::is_constructible<T, expected<U, G> &>::value &&\n    !std::is_constructible<T, expected<U, G> &&>::value &&\n    !std::is_constructible<T, const expected<U, G> &>::value &&\n    !std::is_constructible<T, const expected<U, G> &&>::value &&\n    !std::is_convertible<expected<U, G> &, T>::value &&\n    !std::is_convertible<expected<U, G> &&, T>::value &&\n    !std::is_convertible<const expected<U, G> &, T>::value &&\n    !std::is_convertible<const expected<U, G> &&, T>::value>;\n\ntemplate <class T, class U>\nusing is_void_or = conditional_t<std::is_void<T>::value, std::true_type, U>;\n\ntemplate <class T>\nusing is_copy_constructible_or_void =\n    is_void_or<T, std::is_copy_constructible<T>>;\n\ntemplate <class T>\nusing is_move_constructible_or_void =\n    is_void_or<T, std::is_move_constructible<T>>;\n\ntemplate <class T>\nusing is_copy_assignable_or_void = is_void_or<T, std::is_copy_assignable<T>>;\n\ntemplate <class T>\nusing is_move_assignable_or_void = is_void_or<T, std::is_move_assignable<T>>;\n\n}  // namespace detail\n\nnamespace detail {\nstruct no_init_t {};\nstatic constexpr no_init_t no_init{};\n\n// Implements the storage of the values, and ensures that the destructor is\n// trivial if it can be.\n//\n// This specialization is for where neither `T` or `E` is trivially\n// destructible, so the destructors must be called on destruction of the\n// `expected`\ntemplate <class T, class E, bool = std::is_trivially_destructible<T>::value,\n          bool = std::is_trivially_destructible<E>::value>\nstruct expected_storage_base {\n  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}\n  constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {}\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =\n                nullptr>\n  constexpr expected_storage_base(in_place_t, Args &&...args)\n      : m_val(std::forward<Args>(args)...), m_has_val(true) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,\n                                  Args &&...args)\n      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =\n                nullptr>\n  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)\n      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr explicit expected_storage_base(unexpect_t,\n                                           std::initializer_list<U> il,\n                                           Args &&...args)\n      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}\n\n  ~expected_storage_base() {\n    if (m_has_val) {\n      m_val.~T();\n    } else {\n      m_unexpect.~unexpected<E>();\n    }\n  }\n  union {\n    T m_val;\n    unexpected<E> m_unexpect;\n    char m_no_init;\n  };\n  bool m_has_val;\n};\n\n// This specialization is for when both `T` and `E` are trivially-destructible,\n// so the destructor of the `expected` can be trivial.\ntemplate <class T, class E>\nstruct expected_storage_base<T, E, true, true> {\n  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}\n  constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {}\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =\n                nullptr>\n  constexpr expected_storage_base(in_place_t, Args &&...args)\n      : m_val(std::forward<Args>(args)...), m_has_val(true) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,\n                                  Args &&...args)\n      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =\n                nullptr>\n  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)\n      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr explicit expected_storage_base(unexpect_t,\n                                           std::initializer_list<U> il,\n                                           Args &&...args)\n      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}\n\n  expected_storage_base(const expected_storage_base &) = default;\n  expected_storage_base(expected_storage_base &&) = default;\n  expected_storage_base &operator=(const expected_storage_base &) = default;\n  expected_storage_base &operator=(expected_storage_base &&) = default;\n  ~expected_storage_base() = default;\n  union {\n    T m_val;\n    unexpected<E> m_unexpect;\n    char m_no_init;\n  };\n  bool m_has_val;\n};\n\n// T is trivial, E is not.\ntemplate <class T, class E>\nstruct expected_storage_base<T, E, true, false> {\n  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}\n  TL_EXPECTED_MSVC2015_CONSTEXPR expected_storage_base(no_init_t)\n      : m_no_init(), m_has_val(false) {}\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =\n                nullptr>\n  constexpr expected_storage_base(in_place_t, Args &&...args)\n      : m_val(std::forward<Args>(args)...), m_has_val(true) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,\n                                  Args &&...args)\n      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =\n                nullptr>\n  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)\n      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr explicit expected_storage_base(unexpect_t,\n                                           std::initializer_list<U> il,\n                                           Args &&...args)\n      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}\n\n  expected_storage_base(const expected_storage_base &) = default;\n  expected_storage_base(expected_storage_base &&) = default;\n  expected_storage_base &operator=(const expected_storage_base &) = default;\n  expected_storage_base &operator=(expected_storage_base &&) = default;\n  ~expected_storage_base() {\n    if (!m_has_val) {\n      m_unexpect.~unexpected<E>();\n    }\n  }\n\n  union {\n    T m_val;\n    unexpected<E> m_unexpect;\n    char m_no_init;\n  };\n  bool m_has_val;\n};\n\n// E is trivial, T is not.\ntemplate <class T, class E>\nstruct expected_storage_base<T, E, false, true> {\n  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}\n  constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {}\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =\n                nullptr>\n  constexpr expected_storage_base(in_place_t, Args &&...args)\n      : m_val(std::forward<Args>(args)...), m_has_val(true) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,\n                                  Args &&...args)\n      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =\n                nullptr>\n  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)\n      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr explicit expected_storage_base(unexpect_t,\n                                           std::initializer_list<U> il,\n                                           Args &&...args)\n      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}\n\n  expected_storage_base(const expected_storage_base &) = default;\n  expected_storage_base(expected_storage_base &&) = default;\n  expected_storage_base &operator=(const expected_storage_base &) = default;\n  expected_storage_base &operator=(expected_storage_base &&) = default;\n  ~expected_storage_base() {\n    if (m_has_val) {\n      m_val.~T();\n    }\n  }\n  union {\n    T m_val;\n    unexpected<E> m_unexpect;\n    char m_no_init;\n  };\n  bool m_has_val;\n};\n\n// `T` is `void`, `E` is trivially-destructible\ntemplate <class E>\nstruct expected_storage_base<void, E, false, true> {\n#if __GNUC__ <= 5\n// no constexpr for GCC 4/5 bug\n#else\n  TL_EXPECTED_MSVC2015_CONSTEXPR\n#endif\n  expected_storage_base() : m_has_val(true) {}\n\n  constexpr expected_storage_base(no_init_t) : m_val(), m_has_val(false) {}\n\n  constexpr expected_storage_base(in_place_t) : m_has_val(true) {}\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =\n                nullptr>\n  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)\n      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr explicit expected_storage_base(unexpect_t,\n                                           std::initializer_list<U> il,\n                                           Args &&...args)\n      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}\n\n  expected_storage_base(const expected_storage_base &) = default;\n  expected_storage_base(expected_storage_base &&) = default;\n  expected_storage_base &operator=(const expected_storage_base &) = default;\n  expected_storage_base &operator=(expected_storage_base &&) = default;\n  ~expected_storage_base() = default;\n  struct dummy {};\n  union {\n    unexpected<E> m_unexpect;\n    dummy m_val;\n  };\n  bool m_has_val;\n};\n\n// `T` is `void`, `E` is not trivially-destructible\ntemplate <class E>\nstruct expected_storage_base<void, E, false, false> {\n  constexpr expected_storage_base() : m_dummy(), m_has_val(true) {}\n  constexpr expected_storage_base(no_init_t) : m_dummy(), m_has_val(false) {}\n\n  constexpr expected_storage_base(in_place_t) : m_dummy(), m_has_val(true) {}\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =\n                nullptr>\n  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)\n      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr explicit expected_storage_base(unexpect_t,\n                                           std::initializer_list<U> il,\n                                           Args &&...args)\n      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}\n\n  expected_storage_base(const expected_storage_base &) = default;\n  expected_storage_base(expected_storage_base &&) = default;\n  expected_storage_base &operator=(const expected_storage_base &) = default;\n  expected_storage_base &operator=(expected_storage_base &&) = default;\n  ~expected_storage_base() {\n    if (!m_has_val) {\n      m_unexpect.~unexpected<E>();\n    }\n  }\n\n  union {\n    unexpected<E> m_unexpect;\n    char m_dummy;\n  };\n  bool m_has_val;\n};\n\n// This base class provides some handy member functions which can be used in\n// further derived classes\ntemplate <class T, class E>\nstruct expected_operations_base : expected_storage_base<T, E> {\n  using expected_storage_base<T, E>::expected_storage_base;\n\n  template <class... Args>\n  void construct(Args &&...args) noexcept {\n    new (std::addressof(this->m_val)) T(std::forward<Args>(args)...);\n    this->m_has_val = true;\n  }\n\n  template <class Rhs>\n  void construct_with(Rhs &&rhs) noexcept {\n    new (std::addressof(this->m_val)) T(std::forward<Rhs>(rhs).get());\n    this->m_has_val = true;\n  }\n\n  template <class... Args>\n  void construct_error(Args &&...args) noexcept {\n    new (std::addressof(this->m_unexpect))\n        unexpected<E>(std::forward<Args>(args)...);\n    this->m_has_val = false;\n  }\n\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n\n  // These assign overloads ensure that the most efficient assignment\n  // implementation is used while maintaining the strong exception guarantee.\n  // The problematic case is where rhs has a value, but *this does not.\n  //\n  // This overload handles the case where we can just copy-construct `T`\n  // directly into place without throwing.\n  template <class U = T,\n            detail::enable_if_t<std::is_nothrow_copy_constructible<U>::value>\n                * = nullptr>\n  void assign(const expected_operations_base &rhs) noexcept {\n    if (!this->m_has_val && rhs.m_has_val) {\n      geterr().~unexpected<E>();\n      construct(rhs.get());\n    } else {\n      assign_common(rhs);\n    }\n  }\n\n  // This overload handles the case where we can attempt to create a copy of\n  // `T`, then no-throw move it into place if the copy was successful.\n  template <class U = T,\n            detail::enable_if_t<!std::is_nothrow_copy_constructible<U>::value &&\n                                std::is_nothrow_move_constructible<U>::value>\n                * = nullptr>\n  void assign(const expected_operations_base &rhs) noexcept {\n    if (!this->m_has_val && rhs.m_has_val) {\n      T tmp = rhs.get();\n      geterr().~unexpected<E>();\n      construct(std::move(tmp));\n    } else {\n      assign_common(rhs);\n    }\n  }\n\n  // This overload is the worst-case, where we have to move-construct the\n  // unexpected value into temporary storage, then try to copy the T into place.\n  // If the construction succeeds, then everything is fine, but if it throws,\n  // then we move the old unexpected value back into place before rethrowing the\n  // exception.\n  template <class U = T,\n            detail::enable_if_t<!std::is_nothrow_copy_constructible<U>::value &&\n                                !std::is_nothrow_move_constructible<U>::value>\n                * = nullptr>\n  void assign(const expected_operations_base &rhs) {\n    if (!this->m_has_val && rhs.m_has_val) {\n      auto tmp = std::move(geterr());\n      geterr().~unexpected<E>();\n\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n      try {\n        construct(rhs.get());\n      } catch (...) {\n        geterr() = std::move(tmp);\n        throw;\n      }\n#else\n      construct(rhs.get());\n#endif\n    } else {\n      assign_common(rhs);\n    }\n  }\n\n  // These overloads do the same as above, but for rvalues\n  template <class U = T,\n            detail::enable_if_t<std::is_nothrow_move_constructible<U>::value>\n                * = nullptr>\n  void assign(expected_operations_base &&rhs) noexcept {\n    if (!this->m_has_val && rhs.m_has_val) {\n      geterr().~unexpected<E>();\n      construct(std::move(rhs).get());\n    } else {\n      assign_common(std::move(rhs));\n    }\n  }\n\n  template <class U = T,\n            detail::enable_if_t<!std::is_nothrow_move_constructible<U>::value>\n                * = nullptr>\n  void assign(expected_operations_base &&rhs) {\n    if (!this->m_has_val && rhs.m_has_val) {\n      auto tmp = std::move(geterr());\n      geterr().~unexpected<E>();\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n      try {\n        construct(std::move(rhs).get());\n      } catch (...) {\n        geterr() = std::move(tmp);\n        throw;\n      }\n#else\n      construct(std::move(rhs).get());\n#endif\n    } else {\n      assign_common(std::move(rhs));\n    }\n  }\n\n#else\n\n  // If exceptions are disabled then we can just copy-construct\n  void assign(const expected_operations_base &rhs) noexcept {\n    if (!this->m_has_val && rhs.m_has_val) {\n      geterr().~unexpected<E>();\n      construct(rhs.get());\n    } else {\n      assign_common(rhs);\n    }\n  }\n\n  void assign(expected_operations_base &&rhs) noexcept {\n    if (!this->m_has_val && rhs.m_has_val) {\n      geterr().~unexpected<E>();\n      construct(std::move(rhs).get());\n    } else {\n      assign_common(std::move(rhs));\n    }\n  }\n\n#endif\n\n  // The common part of move/copy assigning\n  template <class Rhs>\n  void assign_common(Rhs &&rhs) {\n    if (this->m_has_val) {\n      if (rhs.m_has_val) {\n        get() = std::forward<Rhs>(rhs).get();\n      } else {\n        destroy_val();\n        construct_error(std::forward<Rhs>(rhs).geterr());\n      }\n    } else {\n      if (!rhs.m_has_val) {\n        geterr() = std::forward<Rhs>(rhs).geterr();\n      }\n    }\n  }\n\n  bool has_value() const {\n    return this->m_has_val;\n  }\n\n  TL_EXPECTED_11_CONSTEXPR T &get() & {\n    return this->m_val;\n  }\n  constexpr const T &get() const & {\n    return this->m_val;\n  }\n  TL_EXPECTED_11_CONSTEXPR T &&get() && {\n    return std::move(this->m_val);\n  }\n#ifndef TL_EXPECTED_NO_CONSTRR\n  constexpr const T &&get() const && {\n    return std::move(this->m_val);\n  }\n#endif\n\n  TL_EXPECTED_11_CONSTEXPR unexpected<E> &geterr() & {\n    return this->m_unexpect;\n  }\n  constexpr const unexpected<E> &geterr() const & {\n    return this->m_unexpect;\n  }\n  TL_EXPECTED_11_CONSTEXPR unexpected<E> &&geterr() && {\n    return std::move(this->m_unexpect);\n  }\n#ifndef TL_EXPECTED_NO_CONSTRR\n  constexpr const unexpected<E> &&geterr() const && {\n    return std::move(this->m_unexpect);\n  }\n#endif\n\n  TL_EXPECTED_11_CONSTEXPR void destroy_val() {\n    get().~T();\n  }\n};\n\n// This base class provides some handy member functions which can be used in\n// further derived classes\ntemplate <class E>\nstruct expected_operations_base<void, E> : expected_storage_base<void, E> {\n  using expected_storage_base<void, E>::expected_storage_base;\n\n  template <class... Args>\n  void construct() noexcept {\n    this->m_has_val = true;\n  }\n\n  // This function doesn't use its argument, but needs it so that code in\n  // levels above this can work independently of whether T is void\n  template <class Rhs>\n  void construct_with(Rhs &&) noexcept {\n    this->m_has_val = true;\n  }\n\n  template <class... Args>\n  void construct_error(Args &&...args) noexcept {\n    new (std::addressof(this->m_unexpect))\n        unexpected<E>(std::forward<Args>(args)...);\n    this->m_has_val = false;\n  }\n\n  template <class Rhs>\n  void assign(Rhs &&rhs) noexcept {\n    if (!this->m_has_val) {\n      if (rhs.m_has_val) {\n        geterr().~unexpected<E>();\n        construct();\n      } else {\n        geterr() = std::forward<Rhs>(rhs).geterr();\n      }\n    } else {\n      if (!rhs.m_has_val) {\n        construct_error(std::forward<Rhs>(rhs).geterr());\n      }\n    }\n  }\n\n  bool has_value() const {\n    return this->m_has_val;\n  }\n\n  TL_EXPECTED_11_CONSTEXPR unexpected<E> &geterr() & {\n    return this->m_unexpect;\n  }\n  constexpr const unexpected<E> &geterr() const & {\n    return this->m_unexpect;\n  }\n  TL_EXPECTED_11_CONSTEXPR unexpected<E> &&geterr() && {\n    return std::move(this->m_unexpect);\n  }\n#ifndef TL_EXPECTED_NO_CONSTRR\n  constexpr const unexpected<E> &&geterr() const && {\n    return std::move(this->m_unexpect);\n  }\n#endif\n\n  TL_EXPECTED_11_CONSTEXPR void destroy_val() {\n    // no-op\n  }\n};\n\n// This class manages conditionally having a trivial copy constructor\n// This specialization is for when T and E are trivially copy constructible\ntemplate <class T, class E,\n          bool = is_void_or<T, TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(\n                                   T)>::value &&\n                 TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value,\n          bool = (is_copy_constructible_or_void<T>::value &&\n                  std::is_copy_constructible<E>::value)>\nstruct expected_copy_base : expected_operations_base<T, E> {\n  using expected_operations_base<T, E>::expected_operations_base;\n};\n\n// This specialization is for when T or E are non-trivially copy constructible\ntemplate <class T, class E>\nstruct expected_copy_base<T, E, false, true> : expected_operations_base<T, E> {\n  using expected_operations_base<T, E>::expected_operations_base;\n\n  expected_copy_base() = default;\n  expected_copy_base(const expected_copy_base &rhs)\n      : expected_operations_base<T, E>(no_init) {\n    if (rhs.has_value()) {\n      this->construct_with(rhs);\n    } else {\n      this->construct_error(rhs.geterr());\n    }\n  }\n\n  expected_copy_base(expected_copy_base &&rhs) = default;\n  expected_copy_base &operator=(const expected_copy_base &rhs) = default;\n  expected_copy_base &operator=(expected_copy_base &&rhs) = default;\n};\n\n// This class manages conditionally having a trivial move constructor\n// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it\n// doesn't implement an analogue to std::is_trivially_move_constructible. We\n// have to make do with a non-trivial move constructor even if T is trivially\n// move constructible\n#ifndef TL_EXPECTED_GCC49\ntemplate <class T, class E,\n          bool =\n              is_void_or<T, std::is_trivially_move_constructible<T>>::value &&\n              std::is_trivially_move_constructible<E>::value>\nstruct expected_move_base : expected_copy_base<T, E> {\n  using expected_copy_base<T, E>::expected_copy_base;\n};\n#else\ntemplate <class T, class E, bool = false>\nstruct expected_move_base;\n#endif\ntemplate <class T, class E>\nstruct expected_move_base<T, E, false> : expected_copy_base<T, E> {\n  using expected_copy_base<T, E>::expected_copy_base;\n\n  expected_move_base() = default;\n  expected_move_base(const expected_move_base &rhs) = default;\n\n  expected_move_base(expected_move_base &&rhs) noexcept(\n      std::is_nothrow_move_constructible<T>::value)\n      : expected_copy_base<T, E>(no_init) {\n    if (rhs.has_value()) {\n      this->construct_with(std::move(rhs));\n    } else {\n      this->construct_error(std::move(rhs.geterr()));\n    }\n  }\n  expected_move_base &operator=(const expected_move_base &rhs) = default;\n  expected_move_base &operator=(expected_move_base &&rhs) = default;\n};\n\n// This class manages conditionally having a trivial copy assignment operator\ntemplate <\n    class T, class E,\n    bool =\n        is_void_or<\n            T, conjunction<TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T),\n                           TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T),\n                           TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T)>>::value &&\n        TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(E)::value &&\n        TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value &&\n        TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(E)::value,\n    bool = (is_copy_constructible_or_void<T>::value &&\n            std::is_copy_constructible<E>::value &&\n            is_copy_assignable_or_void<T>::value &&\n            std::is_copy_assignable<E>::value)>\nstruct expected_copy_assign_base : expected_move_base<T, E> {\n  using expected_move_base<T, E>::expected_move_base;\n};\n\ntemplate <class T, class E>\nstruct expected_copy_assign_base<T, E, false, true> : expected_move_base<T, E> {\n  using expected_move_base<T, E>::expected_move_base;\n\n  expected_copy_assign_base() = default;\n  expected_copy_assign_base(const expected_copy_assign_base &rhs) = default;\n\n  expected_copy_assign_base(expected_copy_assign_base &&rhs) = default;\n  expected_copy_assign_base &operator=(const expected_copy_assign_base &rhs) {\n    this->assign(rhs);\n    return *this;\n  }\n  expected_copy_assign_base &operator=(expected_copy_assign_base &&rhs) =\n      default;\n};\n\n// This class manages conditionally having a trivial move assignment operator\n// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it\n// doesn't implement an analogue to std::is_trivially_move_assignable. We have\n// to make do with a non-trivial move assignment operator even if T is trivially\n// move assignable\n#ifndef TL_EXPECTED_GCC49\ntemplate <\n    class T, class E,\n    bool = is_void_or<\n               T, conjunction<std::is_trivially_destructible<T>,\n                              std::is_trivially_move_constructible<T>,\n                              std::is_trivially_move_assignable<T>>>::value &&\n           std::is_trivially_destructible<E>::value &&\n           std::is_trivially_move_constructible<E>::value &&\n           std::is_trivially_move_assignable<E>::value>\nstruct expected_move_assign_base : expected_copy_assign_base<T, E> {\n  using expected_copy_assign_base<T, E>::expected_copy_assign_base;\n};\n#else\ntemplate <class T, class E, bool = false>\nstruct expected_move_assign_base;\n#endif\n\ntemplate <class T, class E>\nstruct expected_move_assign_base<T, E, false>\n    : expected_copy_assign_base<T, E> {\n  using expected_copy_assign_base<T, E>::expected_copy_assign_base;\n\n  expected_move_assign_base() = default;\n  expected_move_assign_base(const expected_move_assign_base &rhs) = default;\n\n  expected_move_assign_base(expected_move_assign_base &&rhs) = default;\n\n  expected_move_assign_base &operator=(const expected_move_assign_base &rhs) =\n      default;\n\n  expected_move_assign_base &operator=(\n      expected_move_assign_base\n          &&rhs) noexcept(std::is_nothrow_move_constructible<T>::value &&\n                          std::is_nothrow_move_assignable<T>::value) {\n    this->assign(std::move(rhs));\n    return *this;\n  }\n};\n\n// expected_delete_ctor_base will conditionally delete copy and move\n// constructors depending on whether T is copy/move constructible\ntemplate <class T, class E,\n          bool EnableCopy = (is_copy_constructible_or_void<T>::value &&\n                             std::is_copy_constructible<E>::value),\n          bool EnableMove = (is_move_constructible_or_void<T>::value &&\n                             std::is_move_constructible<E>::value)>\nstruct expected_delete_ctor_base {\n  expected_delete_ctor_base() = default;\n  expected_delete_ctor_base(const expected_delete_ctor_base &) = default;\n  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default;\n  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =\n      default;\n  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =\n      default;\n};\n\ntemplate <class T, class E>\nstruct expected_delete_ctor_base<T, E, true, false> {\n  expected_delete_ctor_base() = default;\n  expected_delete_ctor_base(const expected_delete_ctor_base &) = default;\n  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete;\n  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =\n      default;\n  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =\n      default;\n};\n\ntemplate <class T, class E>\nstruct expected_delete_ctor_base<T, E, false, true> {\n  expected_delete_ctor_base() = default;\n  expected_delete_ctor_base(const expected_delete_ctor_base &) = delete;\n  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default;\n  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =\n      default;\n  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =\n      default;\n};\n\ntemplate <class T, class E>\nstruct expected_delete_ctor_base<T, E, false, false> {\n  expected_delete_ctor_base() = default;\n  expected_delete_ctor_base(const expected_delete_ctor_base &) = delete;\n  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete;\n  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =\n      default;\n  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =\n      default;\n};\n\n// expected_delete_assign_base will conditionally delete copy and move\n// constructors depending on whether T and E are copy/move constructible +\n// assignable\ntemplate <class T, class E,\n          bool EnableCopy = (is_copy_constructible_or_void<T>::value &&\n                             std::is_copy_constructible<E>::value &&\n                             is_copy_assignable_or_void<T>::value &&\n                             std::is_copy_assignable<E>::value),\n          bool EnableMove = (is_move_constructible_or_void<T>::value &&\n                             std::is_move_constructible<E>::value &&\n                             is_move_assignable_or_void<T>::value &&\n                             std::is_move_assignable<E>::value)>\nstruct expected_delete_assign_base {\n  expected_delete_assign_base() = default;\n  expected_delete_assign_base(const expected_delete_assign_base &) = default;\n  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =\n      default;\n  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =\n      default;\n  expected_delete_assign_base &operator=(\n      expected_delete_assign_base &&) noexcept = default;\n};\n\ntemplate <class T, class E>\nstruct expected_delete_assign_base<T, E, true, false> {\n  expected_delete_assign_base() = default;\n  expected_delete_assign_base(const expected_delete_assign_base &) = default;\n  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =\n      default;\n  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =\n      default;\n  expected_delete_assign_base &operator=(\n      expected_delete_assign_base &&) noexcept = delete;\n};\n\ntemplate <class T, class E>\nstruct expected_delete_assign_base<T, E, false, true> {\n  expected_delete_assign_base() = default;\n  expected_delete_assign_base(const expected_delete_assign_base &) = default;\n  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =\n      default;\n  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =\n      delete;\n  expected_delete_assign_base &operator=(\n      expected_delete_assign_base &&) noexcept = default;\n};\n\ntemplate <class T, class E>\nstruct expected_delete_assign_base<T, E, false, false> {\n  expected_delete_assign_base() = default;\n  expected_delete_assign_base(const expected_delete_assign_base &) = default;\n  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =\n      default;\n  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =\n      delete;\n  expected_delete_assign_base &operator=(\n      expected_delete_assign_base &&) noexcept = delete;\n};\n\n// This is needed to be able to construct the expected_default_ctor_base which\n// follows, while still conditionally deleting the default constructor.\nstruct default_constructor_tag {\n  explicit constexpr default_constructor_tag() = default;\n};\n\n// expected_default_ctor_base will ensure that expected has a deleted default\n// constructor if T is not default constructible.\n// This specialization is for when T is default constructible\ntemplate <class T, class E,\n          bool Enable =\n              std::is_default_constructible<T>::value || std::is_void<T>::value>\nstruct expected_default_ctor_base {\n  constexpr expected_default_ctor_base() noexcept = default;\n  constexpr expected_default_ctor_base(\n      expected_default_ctor_base const &) noexcept = default;\n  constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept =\n      default;\n  expected_default_ctor_base &operator=(\n      expected_default_ctor_base const &) noexcept = default;\n  expected_default_ctor_base &operator=(\n      expected_default_ctor_base &&) noexcept = default;\n\n  constexpr explicit expected_default_ctor_base(default_constructor_tag) {}\n};\n\n// This specialization is for when T is not default constructible\ntemplate <class T, class E>\nstruct expected_default_ctor_base<T, E, false> {\n  constexpr expected_default_ctor_base() noexcept = delete;\n  constexpr expected_default_ctor_base(\n      expected_default_ctor_base const &) noexcept = default;\n  constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept =\n      default;\n  expected_default_ctor_base &operator=(\n      expected_default_ctor_base const &) noexcept = default;\n  expected_default_ctor_base &operator=(\n      expected_default_ctor_base &&) noexcept = default;\n\n  constexpr explicit expected_default_ctor_base(default_constructor_tag) {}\n};\n}  // namespace detail\n\ntemplate <class E>\nclass bad_expected_access : public std::exception {\n public:\n  explicit bad_expected_access(E e) : m_val(std::move(e)) {}\n\n  virtual const char *what() const noexcept override {\n    return \"Bad expected access\";\n  }\n\n  const E &error() const & {\n    return m_val;\n  }\n  E &error() & {\n    return m_val;\n  }\n  const E &&error() const && {\n    return std::move(m_val);\n  }\n  E &&error() && {\n    return std::move(m_val);\n  }\n\n private:\n  E m_val;\n};\n\n/// An `expected<T, E>` object is an object that contains the storage for\n/// another object and manages the lifetime of this contained object `T`.\n/// Alternatively it could contain the storage for another unexpected object\n/// `E`. The contained object may not be initialized after the expected object\n/// has been initialized, and may not be destroyed before the expected object\n/// has been destroyed. The initialization state of the contained object is\n/// tracked by the expected object.\ntemplate <class T, class E>\nclass TL_EXPECTED_NODISCARD expected\n    : private detail::expected_move_assign_base<T, E>,\n      private detail::expected_delete_ctor_base<T, E>,\n      private detail::expected_delete_assign_base<T, E>,\n      private detail::expected_default_ctor_base<T, E> {\n  static_assert(!std::is_reference<T>::value, \"T must not be a reference\");\n  static_assert(!std::is_same<T, std::remove_cv<in_place_t>::type>::value,\n                \"T must not be in_place_t\");\n  static_assert(!std::is_same<T, std::remove_cv<unexpect_t>::type>::value,\n                \"T must not be unexpect_t\");\n  static_assert(\n      !std::is_same<T, typename std::remove_cv<unexpected<E>>::type>::value,\n      \"T must not be unexpected<E>\");\n  static_assert(!std::is_reference<E>::value, \"E must not be a reference\");\n\n  T *valptr() {\n    return std::addressof(this->m_val);\n  }\n  const T *valptr() const {\n    return std::addressof(this->m_val);\n  }\n  unexpected<E> *errptr() {\n    return std::addressof(this->m_unexpect);\n  }\n  const unexpected<E> *errptr() const {\n    return std::addressof(this->m_unexpect);\n  }\n\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR U &val() {\n    return this->m_val;\n  }\n  TL_EXPECTED_11_CONSTEXPR unexpected<E> &err() {\n    return this->m_unexpect;\n  }\n\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  constexpr const U &val() const {\n    return this->m_val;\n  }\n  constexpr const unexpected<E> &err() const {\n    return this->m_unexpect;\n  }\n\n  using impl_base = detail::expected_move_assign_base<T, E>;\n  using ctor_base = detail::expected_default_ctor_base<T, E>;\n\n public:\n  typedef T value_type;\n  typedef E error_type;\n  typedef unexpected<E> unexpected_type;\n\n#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \\\n    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) & {\n    return and_then_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) && {\n    return and_then_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto and_then(F &&f) const & {\n    return and_then_impl(*this, std::forward<F>(f));\n  }\n\n#ifndef TL_EXPECTED_NO_CONSTRR\n  template <class F>\n  constexpr auto and_then(F &&f) const && {\n    return and_then_impl(std::move(*this), std::forward<F>(f));\n  }\n#endif\n\n#else\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto and_then(\n      F &&f) & -> decltype(and_then_impl(std::declval<expected &>(),\n                                         std::forward<F>(f))) {\n    return and_then_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto and_then(\n      F &&f) && -> decltype(and_then_impl(std::declval<expected &&>(),\n                                          std::forward<F>(f))) {\n    return and_then_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto and_then(\n      F &&f) const & -> decltype(and_then_impl(std::declval<expected const &>(),\n                                               std::forward<F>(f))) {\n    return and_then_impl(*this, std::forward<F>(f));\n  }\n\n#ifndef TL_EXPECTED_NO_CONSTRR\n  template <class F>\n  constexpr auto and_then(F &&f)\n      const && -> decltype(and_then_impl(std::declval<expected const &&>(),\n                                         std::forward<F>(f))) {\n    return and_then_impl(std::move(*this), std::forward<F>(f));\n  }\n#endif\n#endif\n\n#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \\\n    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto map(F &&f) & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto map(F &&f) && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto map(F &&f) const & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto map(F &&f) const && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n#else\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(\n      std::declval<expected &>(), std::declval<F &&>()))\n  map(F &&f) & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval<expected>(),\n                                                      std::declval<F &&>()))\n  map(F &&f) && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr decltype(expected_map_impl(std::declval<const expected &>(),\n                                       std::declval<F &&>()))\n  map(F &&f) const & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n\n#ifndef TL_EXPECTED_NO_CONSTRR\n  template <class F>\n  constexpr decltype(expected_map_impl(std::declval<const expected &&>(),\n                                       std::declval<F &&>()))\n  map(F &&f) const && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n#endif\n#endif\n\n#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \\\n    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto transform(F &&f) const & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto transform(F &&f) const && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n#else\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(\n      std::declval<expected &>(), std::declval<F &&>()))\n  transform(F &&f) & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval<expected>(),\n                                                      std::declval<F &&>()))\n  transform(F &&f) && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr decltype(expected_map_impl(std::declval<const expected &>(),\n                                       std::declval<F &&>()))\n  transform(F &&f) const & {\n    return expected_map_impl(*this, std::forward<F>(f));\n  }\n\n#ifndef TL_EXPECTED_NO_CONSTRR\n  template <class F>\n  constexpr decltype(expected_map_impl(std::declval<const expected &&>(),\n                                       std::declval<F &&>()))\n  transform(F &&f) const && {\n    return expected_map_impl(std::move(*this), std::forward<F>(f));\n  }\n#endif\n#endif\n\n#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \\\n    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto map_error(F &&f) const & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto map_error(F &&f) const && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n#else\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &>(),\n                                                   std::declval<F &&>()))\n  map_error(F &&f) & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &&>(),\n                                                   std::declval<F &&>()))\n  map_error(F &&f) && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr decltype(map_error_impl(std::declval<const expected &>(),\n                                    std::declval<F &&>()))\n  map_error(F &&f) const & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n\n#ifndef TL_EXPECTED_NO_CONSTRR\n  template <class F>\n  constexpr decltype(map_error_impl(std::declval<const expected &&>(),\n                                    std::declval<F &&>()))\n  map_error(F &&f) const && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n#endif\n#endif\n#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \\\n    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto transform_error(F &&f) const & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  constexpr auto transform_error(F &&f) const && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n#else\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &>(),\n                                                   std::declval<F &&>()))\n  transform_error(F &&f) & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n  template <class F>\n  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &&>(),\n                                                   std::declval<F &&>()))\n  transform_error(F &&f) && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n  template <class F>\n  constexpr decltype(map_error_impl(std::declval<const expected &>(),\n                                    std::declval<F &&>()))\n  transform_error(F &&f) const & {\n    return map_error_impl(*this, std::forward<F>(f));\n  }\n\n#ifndef TL_EXPECTED_NO_CONSTRR\n  template <class F>\n  constexpr decltype(map_error_impl(std::declval<const expected &&>(),\n                                    std::declval<F &&>()))\n  transform_error(F &&f) const && {\n    return map_error_impl(std::move(*this), std::forward<F>(f));\n  }\n#endif\n#endif\n  template <class F>\n  expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) & {\n    return or_else_impl(*this, std::forward<F>(f));\n  }\n\n  template <class F>\n  expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) && {\n    return or_else_impl(std::move(*this), std::forward<F>(f));\n  }\n\n  template <class F>\n  expected constexpr or_else(F &&f) const & {\n    return or_else_impl(*this, std::forward<F>(f));\n  }\n\n#ifndef TL_EXPECTED_NO_CONSTRR\n  template <class F>\n  expected constexpr or_else(F &&f) const && {\n    return or_else_impl(std::move(*this), std::forward<F>(f));\n  }\n#endif\n  constexpr expected() = default;\n  constexpr expected(const expected &rhs) = default;\n  constexpr expected(expected &&rhs) = default;\n  expected &operator=(const expected &rhs) = default;\n  expected &operator=(expected &&rhs) = default;\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =\n                nullptr>\n  constexpr expected(in_place_t, Args &&...args)\n      : impl_base(in_place, std::forward<Args>(args)...),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr expected(in_place_t, std::initializer_list<U> il, Args &&...args)\n      : impl_base(in_place, il, std::forward<Args>(args)...),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <class G = E,\n            detail::enable_if_t<std::is_constructible<E, const G &>::value> * =\n                nullptr,\n            detail::enable_if_t<!std::is_convertible<const G &, E>::value> * =\n                nullptr>\n  explicit constexpr expected(const unexpected<G> &e)\n      : impl_base(unexpect, e.value()),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <\n      class G = E,\n      detail::enable_if_t<std::is_constructible<E, const G &>::value> * =\n          nullptr,\n      detail::enable_if_t<std::is_convertible<const G &, E>::value> * = nullptr>\n  constexpr expected(unexpected<G> const &e)\n      : impl_base(unexpect, e.value()),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <\n      class G = E,\n      detail::enable_if_t<std::is_constructible<E, G &&>::value> * = nullptr,\n      detail::enable_if_t<!std::is_convertible<G &&, E>::value> * = nullptr>\n  explicit constexpr expected(unexpected<G> &&e) noexcept(\n      std::is_nothrow_constructible<E, G &&>::value)\n      : impl_base(unexpect, std::move(e.value())),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <\n      class G = E,\n      detail::enable_if_t<std::is_constructible<E, G &&>::value> * = nullptr,\n      detail::enable_if_t<std::is_convertible<G &&, E>::value> * = nullptr>\n  constexpr expected(unexpected<G> &&e) noexcept(\n      std::is_nothrow_constructible<E, G &&>::value)\n      : impl_base(unexpect, std::move(e.value())),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <class... Args,\n            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =\n                nullptr>\n  constexpr explicit expected(unexpect_t, Args &&...args)\n      : impl_base(unexpect, std::forward<Args>(args)...),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_constructible<\n                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  constexpr explicit expected(unexpect_t, std::initializer_list<U> il,\n                              Args &&...args)\n      : impl_base(unexpect, il, std::forward<Args>(args)...),\n        ctor_base(detail::default_constructor_tag{}) {}\n\n  template <class U, class G,\n            detail::enable_if_t<!(std::is_convertible<U const &, T>::value &&\n                                  std::is_convertible<G const &, E>::value)> * =\n                nullptr,\n            detail::expected_enable_from_other<T, E, U, G, const U &, const G &>\n                * = nullptr>\n  explicit TL_EXPECTED_11_CONSTEXPR expected(const expected<U, G> &rhs)\n      : ctor_base(detail::default_constructor_tag{}) {\n    if (rhs.has_value()) {\n      this->construct(*rhs);\n    } else {\n      this->construct_error(rhs.error());\n    }\n  }\n\n  template <class U, class G,\n            detail::enable_if_t<(std::is_convertible<U const &, T>::value &&\n                                 std::is_convertible<G const &, E>::value)> * =\n                nullptr,\n            detail::expected_enable_from_other<T, E, U, G, const U &, const G &>\n                * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR expected(const expected<U, G> &rhs)\n      : ctor_base(detail::default_constructor_tag{}) {\n    if (rhs.has_value()) {\n      this->construct(*rhs);\n    } else {\n      this->construct_error(rhs.error());\n    }\n  }\n\n  template <\n      class U, class G,\n      detail::enable_if_t<!(std::is_convertible<U &&, T>::value &&\n                            std::is_convertible<G &&, E>::value)> * = nullptr,\n      detail::expected_enable_from_other<T, E, U, G, U &&, G &&> * = nullptr>\n  explicit TL_EXPECTED_11_CONSTEXPR expected(expected<U, G> &&rhs)\n      : ctor_base(detail::default_constructor_tag{}) {\n    if (rhs.has_value()) {\n      this->construct(std::move(*rhs));\n    } else {\n      this->construct_error(std::move(rhs.error()));\n    }\n  }\n\n  template <\n      class U, class G,\n      detail::enable_if_t<(std::is_convertible<U &&, T>::value &&\n                           std::is_convertible<G &&, E>::value)> * = nullptr,\n      detail::expected_enable_from_other<T, E, U, G, U &&, G &&> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR expected(expected<U, G> &&rhs)\n      : ctor_base(detail::default_constructor_tag{}) {\n    if (rhs.has_value()) {\n      this->construct(std::move(*rhs));\n    } else {\n      this->construct_error(std::move(rhs.error()));\n    }\n  }\n\n  template <\n      class U = T,\n      detail::enable_if_t<!std::is_convertible<U &&, T>::value> * = nullptr,\n      detail::expected_enable_forward_value<T, E, U> * = nullptr>\n  explicit TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v)\n      : expected(in_place, std::forward<U>(v)) {}\n\n  template <\n      class U = T,\n      detail::enable_if_t<std::is_convertible<U &&, T>::value> * = nullptr,\n      detail::expected_enable_forward_value<T, E, U> * = nullptr>\n  TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v)\n      : expected(in_place, std::forward<U>(v)) {}\n\n  template <\n      class U = T, class G = T,\n      detail::enable_if_t<std::is_nothrow_constructible<T, U &&>::value> * =\n          nullptr,\n      detail::enable_if_t<!std::is_void<G>::value> * = nullptr,\n      detail::enable_if_t<\n          (!std::is_same<expected<T, E>, detail::decay_t<U>>::value &&\n           !detail::conjunction<std::is_scalar<T>,\n                                std::is_same<T, detail::decay_t<U>>>::value &&\n           std::is_constructible<T, U>::value &&\n           std::is_assignable<G &, U>::value &&\n           std::is_nothrow_move_constructible<E>::value)> * = nullptr>\n  expected &operator=(U &&v) {\n    if (has_value()) {\n      val() = std::forward<U>(v);\n    } else {\n      err().~unexpected<E>();\n      ::new (valptr()) T(std::forward<U>(v));\n      this->m_has_val = true;\n    }\n\n    return *this;\n  }\n\n  template <\n      class U = T, class G = T,\n      detail::enable_if_t<!std::is_nothrow_constructible<T, U &&>::value> * =\n          nullptr,\n      detail::enable_if_t<!std::is_void<U>::value> * = nullptr,\n      detail::enable_if_t<\n          (!std::is_same<expected<T, E>, detail::decay_t<U>>::value &&\n           !detail::conjunction<std::is_scalar<T>,\n                                std::is_same<T, detail::decay_t<U>>>::value &&\n           std::is_constructible<T, U>::value &&\n           std::is_assignable<G &, U>::value &&\n           std::is_nothrow_move_constructible<E>::value)> * = nullptr>\n  expected &operator=(U &&v) {\n    if (has_value()) {\n      val() = std::forward<U>(v);\n    } else {\n      auto tmp = std::move(err());\n      err().~unexpected<E>();\n\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n      try {\n        ::new (valptr()) T(std::forward<U>(v));\n        this->m_has_val = true;\n      } catch (...) {\n        err() = std::move(tmp);\n        throw;\n      }\n#else\n      ::new (valptr()) T(std::forward<U>(v));\n      this->m_has_val = true;\n#endif\n    }\n\n    return *this;\n  }\n\n  template <class G = E,\n            detail::enable_if_t<std::is_nothrow_copy_constructible<G>::value &&\n                                std::is_assignable<G &, G>::value> * = nullptr>\n  expected &operator=(const unexpected<G> &rhs) {\n    if (!has_value()) {\n      err() = rhs;\n    } else {\n      this->destroy_val();\n      ::new (errptr()) unexpected<E>(rhs);\n      this->m_has_val = false;\n    }\n\n    return *this;\n  }\n\n  template <class G = E,\n            detail::enable_if_t<std::is_nothrow_move_constructible<G>::value &&\n                                std::is_move_assignable<G>::value> * = nullptr>\n  expected &operator=(unexpected<G> &&rhs) noexcept {\n    if (!has_value()) {\n      err() = std::move(rhs);\n    } else {\n      this->destroy_val();\n      ::new (errptr()) unexpected<E>(std::move(rhs));\n      this->m_has_val = false;\n    }\n\n    return *this;\n  }\n\n  template <class... Args, detail::enable_if_t<std::is_nothrow_constructible<\n                               T, Args &&...>::value> * = nullptr>\n  void emplace(Args &&...args) {\n    if (has_value()) {\n      val().~T();\n    } else {\n      err().~unexpected<E>();\n      this->m_has_val = true;\n    }\n    ::new (valptr()) T(std::forward<Args>(args)...);\n  }\n\n  template <class... Args, detail::enable_if_t<!std::is_nothrow_constructible<\n                               T, Args &&...>::value> * = nullptr>\n  void emplace(Args &&...args) {\n    if (has_value()) {\n      val().~T();\n      ::new (valptr()) T(std::forward<Args>(args)...);\n    } else {\n      auto tmp = std::move(err());\n      err().~unexpected<E>();\n\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n      try {\n        ::new (valptr()) T(std::forward<Args>(args)...);\n        this->m_has_val = true;\n      } catch (...) {\n        err() = std::move(tmp);\n        throw;\n      }\n#else\n      ::new (valptr()) T(std::forward<Args>(args)...);\n      this->m_has_val = true;\n#endif\n    }\n  }\n\n  template <class U, class... Args,\n            detail::enable_if_t<std::is_nothrow_constructible<\n                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  void emplace(std::initializer_list<U> il, Args &&...args) {\n    if (has_value()) {\n      T t(il, std::forward<Args>(args)...);\n      val() = std::move(t);\n    } else {\n      err().~unexpected<E>();\n      ::new (valptr()) T(il, std::forward<Args>(args)...);\n      this->m_has_val = true;\n    }\n  }\n\n  template <class U, class... Args,\n            detail::enable_if_t<!std::is_nothrow_constructible<\n                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>\n  void emplace(std::initializer_list<U> il, Args &&...args) {\n    if (has_value()) {\n      T t(il, std::forward<Args>(args)...);\n      val() = std::move(t);\n    } else {\n      auto tmp = std::move(err());\n      err().~unexpected<E>();\n\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n      try {\n        ::new (valptr()) T(il, std::forward<Args>(args)...);\n        this->m_has_val = true;\n      } catch (...) {\n        err() = std::move(tmp);\n        throw;\n      }\n#else\n      ::new (valptr()) T(il, std::forward<Args>(args)...);\n      this->m_has_val = true;\n#endif\n    }\n  }\n\n private:\n  using t_is_void = std::true_type;\n  using t_is_not_void = std::false_type;\n  using t_is_nothrow_move_constructible = std::true_type;\n  using move_constructing_t_can_throw = std::false_type;\n  using e_is_nothrow_move_constructible = std::true_type;\n  using move_constructing_e_can_throw = std::false_type;\n\n  void swap_where_both_have_value(expected & /*rhs*/, t_is_void) noexcept {\n    // swapping void is a no-op\n  }\n\n  void swap_where_both_have_value(expected &rhs, t_is_not_void) {\n    using std::swap;\n    swap(val(), rhs.val());\n  }\n\n  void swap_where_only_one_has_value(expected &rhs, t_is_void) noexcept(\n      std::is_nothrow_move_constructible<E>::value) {\n    ::new (errptr()) unexpected_type(std::move(rhs.err()));\n    rhs.err().~unexpected_type();\n    std::swap(this->m_has_val, rhs.m_has_val);\n  }\n\n  void swap_where_only_one_has_value(expected &rhs, t_is_not_void) {\n    swap_where_only_one_has_value_and_t_is_not_void(\n        rhs, typename std::is_nothrow_move_constructible<T>::type{},\n        typename std::is_nothrow_move_constructible<E>::type{});\n  }\n\n  void swap_where_only_one_has_value_and_t_is_not_void(\n      expected &rhs, t_is_nothrow_move_constructible,\n      e_is_nothrow_move_constructible) noexcept {\n    auto temp = std::move(val());\n    val().~T();\n    ::new (errptr()) unexpected_type(std::move(rhs.err()));\n    rhs.err().~unexpected_type();\n    ::new (rhs.valptr()) T(std::move(temp));\n    std::swap(this->m_has_val, rhs.m_has_val);\n  }\n\n  void swap_where_only_one_has_value_and_t_is_not_void(\n      expected &rhs, t_is_nothrow_move_constructible,\n      move_constructing_e_can_throw) {\n    auto temp = std::move(val());\n    val().~T();\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n    try {\n      ::new (errptr()) unexpected_type(std::move(rhs.err()));\n      rhs.err().~unexpected_type();\n      ::new (rhs.valptr()) T(std::move(temp));\n      std::swap(this->m_has_val, rhs.m_has_val);\n    } catch (...) {\n      val() = std::move(temp);\n      throw;\n    }\n#else\n    ::new (errptr()) unexpected_type(std::move(rhs.err()));\n    rhs.err().~unexpected_type();\n    ::new (rhs.valptr()) T(std::move(temp));\n    std::swap(this->m_has_val, rhs.m_has_val);\n#endif\n  }\n\n  void swap_where_only_one_has_value_and_t_is_not_void(\n      expected &rhs, move_constructing_t_can_throw,\n      e_is_nothrow_move_constructible) {\n    auto temp = std::move(rhs.err());\n    rhs.err().~unexpected_type();\n#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED\n    try {\n      ::new (rhs.valptr()) T(std::move(val()));\n      val().~T();\n      ::new (errptr()) unexpected_type(std::move(temp));\n      std::swap(this->m_has_val, rhs.m_has_val);\n    } catch (...) {\n      rhs.err() = std::move(temp);\n      throw;\n    }\n#else\n    ::new (rhs.valptr()) T(std::move(val()));\n    val().~T();\n    ::new (errptr()) unexpected_type(std::move(temp));\n    std::swap(this->m_has_val, rhs.m_has_val);\n#endif\n  }\n\n public:\n  template <class OT = T, class OE = E>\n  detail::enable_if_t<detail::is_swappable<OT>::value &&\n                      detail::is_swappable<OE>::value &&\n                      (std::is_nothrow_move_constructible<OT>::value ||\n                       std::is_nothrow_move_constructible<OE>::value)>\n  swap(expected &rhs) noexcept(std::is_nothrow_move_constructible<T>::value &&\n                               detail::is_nothrow_swappable<T>::value &&\n                               std::is_nothrow_move_constructible<E>::value &&\n                               detail::is_nothrow_swappable<E>::value) {\n    if (has_value() && rhs.has_value()) {\n      swap_where_both_have_value(rhs, typename std::is_void<T>::type{});\n    } else if (!has_value() && rhs.has_value()) {\n      rhs.swap(*this);\n    } else if (has_value()) {\n      swap_where_only_one_has_value(rhs, typename std::is_void<T>::type{});\n    } else {\n      using std::swap;\n      swap(err(), rhs.err());\n    }\n  }\n\n  constexpr const T *operator->() const {\n    TL_ASSERT(has_value());\n    return valptr();\n  }\n  TL_EXPECTED_11_CONSTEXPR T *operator->() {\n    TL_ASSERT(has_value());\n    return valptr();\n  }\n\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  constexpr const U &operator*() const & {\n    TL_ASSERT(has_value());\n    return val();\n  }\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR U &operator*() & {\n    TL_ASSERT(has_value());\n    return val();\n  }\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  constexpr const U &&operator*() const && {\n    TL_ASSERT(has_value());\n    return std::move(val());\n  }\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR U &&operator*() && {\n    TL_ASSERT(has_value());\n    return std::move(val());\n  }\n\n  constexpr bool has_value() const noexcept {\n    return this->m_has_val;\n  }\n  constexpr explicit operator bool() const noexcept {\n    return this->m_has_val;\n  }\n\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR const U &value() const & {\n    if (!has_value())\n      detail::throw_exception(bad_expected_access<E>(err().value()));\n    return val();\n  }\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR U &value() & {\n    if (!has_value())\n      detail::throw_exception(bad_expected_access<E>(err().value()));\n    return val();\n  }\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR const U &&value() const && {\n    if (!has_value())\n      detail::throw_exception(bad_expected_access<E>(std::move(err()).value()));\n    return std::move(val());\n  }\n  template <class U = T,\n            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>\n  TL_EXPECTED_11_CONSTEXPR U &&value() && {\n    if (!has_value())\n      detail::throw_exception(bad_expected_access<E>(std::move(err()).value()));\n    return std::move(val());\n  }\n\n  constexpr const E &error() const & {\n    TL_ASSERT(!has_value());\n    return err().value();\n  }\n  TL_EXPECTED_11_CONSTEXPR E &error() & {\n    TL_ASSERT(!has_value());\n    return err().value();\n  }\n  constexpr const E &&error() const && {\n    TL_ASSERT(!has_value());\n    return std::move(err().value());\n  }\n  TL_EXPECTED_11_CONSTEXPR E &&error() && {\n    TL_ASSERT(!has_value());\n    return std::move(err().value());\n  }\n\n  template <class U>\n  constexpr T value_or(U &&v) const & {\n    static_assert(std::is_copy_constructible<T>::value &&\n                      std::is_convertible<U &&, T>::value,\n                  \"T must be copy-constructible and convertible to from U&&\");\n    return bool(*this) ? **this : static_cast<T>(std::forward<U>(v));\n  }\n  template <class U>\n  TL_EXPECTED_11_CONSTEXPR T value_or(U &&v) && {\n    static_assert(std::is_move_constructible<T>::value &&\n                      std::is_convertible<U &&, T>::value,\n                  \"T must be move-constructible and convertible to from U&&\");\n    return bool(*this) ? std::move(**this) : static_cast<T>(std::forward<U>(v));\n  }\n};\n\nnamespace detail {\ntemplate <class Exp>\nusing exp_t = typename detail::decay_t<Exp>::value_type;\ntemplate <class Exp>\nusing err_t = typename detail::decay_t<Exp>::error_type;\ntemplate <class Exp, class Ret>\nusing ret_t = expected<Ret, err_t<Exp>>;\n\n#ifdef TL_EXPECTED_CXX14\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              *std::declval<Exp>()))>\nconstexpr auto and_then_impl(Exp &&exp, F &&f) {\n  static_assert(detail::is_expected<Ret>::value, \"F must return an expected\");\n\n  return exp.has_value()\n             ? detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp))\n             : Ret(unexpect, std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>()))>\nconstexpr auto and_then_impl(Exp &&exp, F &&f) {\n  static_assert(detail::is_expected<Ret>::value, \"F must return an expected\");\n\n  return exp.has_value() ? detail::invoke(std::forward<F>(f))\n                         : Ret(unexpect, std::forward<Exp>(exp).error());\n}\n#else\ntemplate <class>\nstruct TC;\ntemplate <class Exp, class F,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              *std::declval<Exp>())),\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr>\nauto and_then_impl(Exp &&exp, F &&f) -> Ret {\n  static_assert(detail::is_expected<Ret>::value, \"F must return an expected\");\n\n  return exp.has_value()\n             ? detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp))\n             : Ret(unexpect, std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          class Ret = decltype(detail::invoke(std::declval<F>())),\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr>\nconstexpr auto and_then_impl(Exp &&exp, F &&f) -> Ret {\n  static_assert(detail::is_expected<Ret>::value, \"F must return an expected\");\n\n  return exp.has_value() ? detail::invoke(std::forward<F>(f))\n                         : Ret(unexpect, std::forward<Exp>(exp).error());\n}\n#endif\n\n#ifdef TL_EXPECTED_CXX14\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              *std::declval<Exp>())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nconstexpr auto expected_map_impl(Exp &&exp, F &&f) {\n  using result = ret_t<Exp, detail::decay_t<Ret>>;\n  return exp.has_value() ? result(detail::invoke(std::forward<F>(f),\n                                                 *std::forward<Exp>(exp)))\n                         : result(unexpect, std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              *std::declval<Exp>())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\nauto expected_map_impl(Exp &&exp, F &&f) {\n  using result = expected<void, err_t<Exp>>;\n  if (exp.has_value()) {\n    detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp));\n    return result();\n  }\n\n  return result(unexpect, std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nconstexpr auto expected_map_impl(Exp &&exp, F &&f) {\n  using result = ret_t<Exp, detail::decay_t<Ret>>;\n  return exp.has_value() ? result(detail::invoke(std::forward<F>(f)))\n                         : result(unexpect, std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\nauto expected_map_impl(Exp &&exp, F &&f) {\n  using result = expected<void, err_t<Exp>>;\n  if (exp.has_value()) {\n    detail::invoke(std::forward<F>(f));\n    return result();\n  }\n\n  return result(unexpect, std::forward<Exp>(exp).error());\n}\n#else\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              *std::declval<Exp>())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\n\nconstexpr auto expected_map_impl(Exp &&exp,\n                                 F &&f) -> ret_t<Exp, detail::decay_t<Ret>> {\n  using result = ret_t<Exp, detail::decay_t<Ret>>;\n\n  return exp.has_value() ? result(detail::invoke(std::forward<F>(f),\n                                                 *std::forward<Exp>(exp)))\n                         : result(unexpect, std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              *std::declval<Exp>())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\n\nauto expected_map_impl(Exp &&exp, F &&f) -> expected<void, err_t<Exp>> {\n  if (exp.has_value()) {\n    detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp));\n    return {};\n  }\n\n  return unexpected<err_t<Exp>>(std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\n\nconstexpr auto expected_map_impl(Exp &&exp,\n                                 F &&f) -> ret_t<Exp, detail::decay_t<Ret>> {\n  using result = ret_t<Exp, detail::decay_t<Ret>>;\n\n  return exp.has_value() ? result(detail::invoke(std::forward<F>(f)))\n                         : result(unexpect, std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\n\nauto expected_map_impl(Exp &&exp, F &&f) -> expected<void, err_t<Exp>> {\n  if (exp.has_value()) {\n    detail::invoke(std::forward<F>(f));\n    return {};\n  }\n\n  return unexpected<err_t<Exp>>(std::forward<Exp>(exp).error());\n}\n#endif\n\n#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \\\n    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nconstexpr auto map_error_impl(Exp &&exp, F &&f) {\n  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;\n  return exp.has_value()\n             ? result(*std::forward<Exp>(exp))\n             : result(unexpect, detail::invoke(std::forward<F>(f),\n                                               std::forward<Exp>(exp).error()));\n}\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\nauto map_error_impl(Exp &&exp, F &&f) {\n  using result = expected<exp_t<Exp>, monostate>;\n  if (exp.has_value()) {\n    return result(*std::forward<Exp>(exp));\n  }\n\n  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());\n  return result(unexpect, monostate{});\n}\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nconstexpr auto map_error_impl(Exp &&exp, F &&f) {\n  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;\n  return exp.has_value()\n             ? result()\n             : result(unexpect, detail::invoke(std::forward<F>(f),\n                                               std::forward<Exp>(exp).error()));\n}\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\nauto map_error_impl(Exp &&exp, F &&f) {\n  using result = expected<exp_t<Exp>, monostate>;\n  if (exp.has_value()) {\n    return result();\n  }\n\n  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());\n  return result(unexpect, monostate{});\n}\n#else\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nconstexpr auto map_error_impl(Exp &&exp, F &&f)\n    -> expected<exp_t<Exp>, detail::decay_t<Ret>> {\n  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;\n\n  return exp.has_value()\n             ? result(*std::forward<Exp>(exp))\n             : result(unexpect, detail::invoke(std::forward<F>(f),\n                                               std::forward<Exp>(exp).error()));\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\nauto map_error_impl(Exp &&exp, F &&f) -> expected<exp_t<Exp>, monostate> {\n  using result = expected<exp_t<Exp>, monostate>;\n  if (exp.has_value()) {\n    return result(*std::forward<Exp>(exp));\n  }\n\n  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());\n  return result(unexpect, monostate{});\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nconstexpr auto map_error_impl(Exp &&exp, F &&f)\n    -> expected<exp_t<Exp>, detail::decay_t<Ret>> {\n  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;\n\n  return exp.has_value()\n             ? result()\n             : result(unexpect, detail::invoke(std::forward<F>(f),\n                                               std::forward<Exp>(exp).error()));\n}\n\ntemplate <class Exp, class F,\n          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\nauto map_error_impl(Exp &&exp, F &&f) -> expected<exp_t<Exp>, monostate> {\n  using result = expected<exp_t<Exp>, monostate>;\n  if (exp.has_value()) {\n    return result();\n  }\n\n  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());\n  return result(unexpect, monostate{});\n}\n#endif\n\n#ifdef TL_EXPECTED_CXX14\ntemplate <class Exp, class F,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nconstexpr auto or_else_impl(Exp &&exp, F &&f) {\n  static_assert(detail::is_expected<Ret>::value, \"F must return an expected\");\n  return exp.has_value() ? std::forward<Exp>(exp)\n                         : detail::invoke(std::forward<F>(f),\n                                          std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\ndetail::decay_t<Exp> or_else_impl(Exp &&exp, F &&f) {\n  return exp.has_value() ? std::forward<Exp>(exp)\n                         : (detail::invoke(std::forward<F>(f),\n                                           std::forward<Exp>(exp).error()),\n                            std::forward<Exp>(exp));\n}\n#else\ntemplate <class Exp, class F,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>\nauto or_else_impl(Exp &&exp, F &&f) -> Ret {\n  static_assert(detail::is_expected<Ret>::value, \"F must return an expected\");\n  return exp.has_value() ? std::forward<Exp>(exp)\n                         : detail::invoke(std::forward<F>(f),\n                                          std::forward<Exp>(exp).error());\n}\n\ntemplate <class Exp, class F,\n          class Ret = decltype(detail::invoke(std::declval<F>(),\n                                              std::declval<Exp>().error())),\n          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>\ndetail::decay_t<Exp> or_else_impl(Exp &&exp, F &&f) {\n  return exp.has_value() ? std::forward<Exp>(exp)\n                         : (detail::invoke(std::forward<F>(f),\n                                           std::forward<Exp>(exp).error()),\n                            std::forward<Exp>(exp));\n}\n#endif\n}  // namespace detail\n\ntemplate <class T, class E, class U, class F>\nconstexpr bool operator==(const expected<T, E> &lhs,\n                          const expected<U, F> &rhs) {\n  return (lhs.has_value() != rhs.has_value())\n             ? false\n             : (!lhs.has_value() ? lhs.error() == rhs.error() : *lhs == *rhs);\n}\ntemplate <class T, class E, class U, class F>\nconstexpr bool operator!=(const expected<T, E> &lhs,\n                          const expected<U, F> &rhs) {\n  return (lhs.has_value() != rhs.has_value())\n             ? true\n             : (!lhs.has_value() ? lhs.error() != rhs.error() : *lhs != *rhs);\n}\ntemplate <class E, class F>\nconstexpr bool operator==(const expected<void, E> &lhs,\n                          const expected<void, F> &rhs) {\n  return (lhs.has_value() != rhs.has_value())\n             ? false\n             : (!lhs.has_value() ? lhs.error() == rhs.error() : true);\n}\ntemplate <class E, class F>\nconstexpr bool operator!=(const expected<void, E> &lhs,\n                          const expected<void, F> &rhs) {\n  return (lhs.has_value() != rhs.has_value())\n             ? true\n             : (!lhs.has_value() ? lhs.error() != rhs.error() : false);\n}\n\ntemplate <class T, class E, class U>\nconstexpr bool operator==(const expected<T, E> &x, const U &v) {\n  return x.has_value() ? *x == v : false;\n}\ntemplate <class T, class E, class U>\nconstexpr bool operator==(const U &v, const expected<T, E> &x) {\n  return x.has_value() ? *x == v : false;\n}\ntemplate <class T, class E, class U>\nconstexpr bool operator!=(const expected<T, E> &x, const U &v) {\n  return x.has_value() ? *x != v : true;\n}\ntemplate <class T, class E, class U>\nconstexpr bool operator!=(const U &v, const expected<T, E> &x) {\n  return x.has_value() ? *x != v : true;\n}\n\ntemplate <class T, class E>\nconstexpr bool operator==(const expected<T, E> &x, const unexpected<E> &e) {\n  return x.has_value() ? false : x.error() == e.value();\n}\ntemplate <class T, class E>\nconstexpr bool operator==(const unexpected<E> &e, const expected<T, E> &x) {\n  return x.has_value() ? false : x.error() == e.value();\n}\ntemplate <class T, class E>\nconstexpr bool operator!=(const expected<T, E> &x, const unexpected<E> &e) {\n  return x.has_value() ? true : x.error() != e.value();\n}\ntemplate <class T, class E>\nconstexpr bool operator!=(const unexpected<E> &e, const expected<T, E> &x) {\n  return x.has_value() ? true : x.error() != e.value();\n}\n\ntemplate <class T, class E,\n          detail::enable_if_t<(std::is_void<T>::value ||\n                               std::is_move_constructible<T>::value) &&\n                              detail::is_swappable<T>::value &&\n                              std::is_move_constructible<E>::value &&\n                              detail::is_swappable<E>::value> * = nullptr>\nvoid swap(expected<T, E> &lhs,\n          expected<T, E> &rhs) noexcept(noexcept(lhs.swap(rhs))) {\n  lhs.swap(rhs);\n}\n}  // namespace tl\n\n#endif\n"
  },
  {
    "path": "src/include/zvec/ailego/pattern/factory.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstring>\n#include <functional>\n#include <map>\n#include <memory>\n#include <string>\n#include <tuple>\n#include <vector>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Factory\n */\ntemplate <typename TBase>\nclass Factory {\n public:\n  /*! Factory Register\n   */\n  template <typename TImpl, typename = typename std::enable_if<\n                                std::is_base_of<TBase, TImpl>::value>::type>\n  class Register {\n   public:\n    //! Constructor\n    Register(const char *key) {\n      Factory::Instance()->set(key, [] { return Register::Construct(); });\n    }\n\n    //! Constructor\n    template <typename... TArgs>\n    Register(const char *key, TArgs &&...args) {\n      std::tuple<TArgs...> tuple(std::forward<TArgs>(args)...);\n\n      Factory::Instance()->set(key, [tuple] {\n        return Register::Construct(\n            tuple, typename TupleIndexMaker<sizeof...(TArgs)>::Type());\n      });\n    }\n\n   protected:\n    //! Tuple Index Maker\n    template <size_t N, size_t... I>\n    struct TupleIndexMaker : TupleIndexMaker<N - 1, N - 1, I...> {};\n\n    //! Tuple Index\n    template <size_t...>\n    struct TupleIndex {};\n\n    //! Tuple Index Maker (special)\n    template <size_t... I>\n    struct TupleIndexMaker<0, I...> {\n      typedef TupleIndex<I...> Type;\n    };\n\n    //! Construct a register object\n    template <typename... TArgs, size_t... I>\n    static TImpl *Construct(const std::tuple<TArgs...> &tuple,\n                            TupleIndex<I...>) {\n      return new (std::nothrow) TImpl(std::get<I>(tuple)...);\n    }\n\n    //! Construct a register object\n    static TImpl *Construct(void) {\n      return new (std::nothrow) TImpl();\n    }\n  };\n\n  //! Produce an instance (c_ptr)\n  static TBase *Make(const char *key) {\n    return Factory::Instance()->produce(key);\n  }\n\n  //! Produce an instance (shared_ptr)\n  static std::shared_ptr<TBase> MakeShared(const char *key) {\n    return std::shared_ptr<TBase>(Factory::Make(key));\n  }\n\n  //! Produce an instance (unique_ptr)\n  static std::unique_ptr<TBase> MakeUnique(const char *key) {\n    return std::unique_ptr<TBase>(Factory::Make(key));\n  }\n\n  //! Test if the class is exist\n  static bool Has(const char *key) {\n    return Factory::Instance()->has(key);\n  }\n\n  //! Retrieve classes in factory\n  static std::vector<std::string> Classes(void) {\n    return Factory::Instance()->classes();\n  }\n\n protected:\n  //! Constructor\n  Factory(void) : map_() {}\n\n  //! Retrieve the singleton factory\n  static Factory *Instance(void) {\n    static Factory factory;\n    return (&factory);\n  }\n\n  //! Inserts a new class into map\n  template <typename TFunc>\n  void set(const char *key, TFunc &&func) {\n    map_[key] = std::forward<TFunc>(func);\n  }\n\n  //! Produce an instance\n  TBase *produce(const char *key) {\n    auto iter = map_.find(key);\n    if (iter != map_.end()) {\n      return iter->second();\n    }\n    return nullptr;\n  }\n\n  //! Test if the class is exist\n  bool has(const char *key) {\n    return (map_.find(key) != map_.end());\n  }\n\n  //! Retrieve classes in factory\n  std::vector<std::string> classes(void) const {\n    std::vector<std::string> vec;\n    for (const auto &it : map_) {\n      vec.push_back(std::string(it.first));\n    }\n    return vec;\n  }\n\n private:\n  //! Disable them\n  Factory(const Factory &);\n  Factory(Factory &&);\n  Factory &operator=(const Factory &);\n\n  /*! Key Comparer\n   */\n  struct KeyComparer {\n    bool operator()(const char *lhs, const char *rhs) const {\n      return (std::strcmp(lhs, rhs) < 0);\n    }\n  };\n\n  //! Don't use variable buffer as key store.\n  //! The key must be use a static buffer to store.\n  std::map<const char *, std::function<TBase *()>, KeyComparer> map_;\n};\n\n//! Factory Register\n#define AILEGO_FACTORY_REGISTER(__NAME__, __BASE__, __IMPL__, ...) \\\n  static ailego::Factory<__BASE__>::Register<__IMPL__>             \\\n      __ailegoFactoryRegister_##__NAME__(#__NAME__, ##__VA_ARGS__)\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/pattern/singleton.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <type_traits>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Singleton (C++11)\n */\ntemplate <typename T>\nclass Singleton {\n public:\n  using ObjectType = typename std::remove_reference<T>::type;\n\n  //! Retrieve instance of object\n  static ObjectType &Instance(void) noexcept(\n      std::is_nothrow_constructible<ObjectType>::value) {\n    // Since it's a static variable, if the class has already been created,\n    // it won't be created again. And it is thread-safe in C++11.\n    static ObjectType obj;\n    return obj;\n  }\n\n protected:\n  //! Constructor (Allow inheritance)\n  Singleton(void) {}\n\n private:\n  //! Disable them\n  Singleton(Singleton const &) = delete;\n  Singleton(Singleton &&) = delete;\n  Singleton &operator=(Singleton const &) = delete;\n  Singleton &operator=(Singleton &&) = delete;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/string/string_concat_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdlib>\n#include <vector>\n#include <zvec/ailego/string/string_view.h>\n\nnamespace zvec {\nnamespace ailego {\nnamespace internal {\n\n//! Helper class to convert integer and float types to string, facilitating\n//! string concatenation memory allocation.\nclass Alphameric {\n public:\n  //! Deals with int, int8_t, int16_t, int32_t, bool, short, signed char, non\n  //! class enum\n  Alphameric(int n)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%d\", n)) {}\n\n  //! Deals with unsigned int, uint8_t, uint16_t, uint32_t, unsigned short,\n  //! unsigned char\n  Alphameric(unsigned int n)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%u\", n)) {}\n\n  //! Deals with long, int32_t, int64_t\n  Alphameric(long n)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%ld\", n)) {}\n\n  //! Deals with unsigned long, uint32_t, uint64_t\n  Alphameric(unsigned long n)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%lu\", n)) {}\n\n  //! Deals with long long, int64_t\n  Alphameric(long long n)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%lld\", n)) {}\n\n  //! Deals with unsigned long long, uint64_t\n  Alphameric(unsigned long long n)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%llu\", n)) {}\n\n  //! Deals with float, with 6 precision digit the same as std::to_string\n  Alphameric(float f)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%g\", f)) {}\n\n  //! Deals with double, with 6 precision digit the same as std::to_string\n  Alphameric(double f)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%g\", f)) {}\n\n  //! Deals with long double, with 6 precision digit the same as std::to_string\n  Alphameric(long double f)\n      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), \"%Lg\", f)) {}\n\n  //! Deals with const char*\n  Alphameric(const char *s) : view_(s) {}\n\n  //! Deals with std::string\n  Alphameric(const std::string &s) : view_(s) {}\n\n  //! Deals with StringView\n  Alphameric(StringView s) : view_(s) {}\n\n  // Use string literals \":\" instead of character literals ':'.\n  Alphameric(char c) = delete;\n  Alphameric(const Alphameric &) = delete;\n  Alphameric &operator=(const Alphameric &) = delete;\n\n  //! Deals with enum class with non int underlying type\n  template <typename T,\n            typename = typename std::enable_if<\n                std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type>\n  Alphameric(T e)\n      : Alphameric(static_cast<typename std::underlying_type<T>::type>(e)) {}\n\n  //! Deals with std::vector<bool> subscript reference\n  template <typename T,\n            typename std::enable_if<\n                std::is_class<T>::value &&\n                (std::is_same<T, std::vector<bool>::reference>::value ||\n                 std::is_same<T, std::vector<bool>::const_reference>::value)>::\n                type * = nullptr>\n  Alphameric(T e) : Alphameric(static_cast<bool>(e)) {}\n\n  //! string size\n  size_t size() const {\n    return view_.size();\n  }\n\n  //! string data\n  const char *data() const {\n    return view_.data();\n  }\n\n  //! string view\n  StringView view() const {\n    return view_;\n  }\n\n private:\n  static constexpr int kBufferSize = 32;\n  char buffer_[kBufferSize];\n  StringView view_;\n};\n\n}  // namespace internal\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/string/string_view.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstring>\n#include <string>\n\nnamespace zvec {\nnamespace ailego {\n\n//! StringView provides a lightweight view into the string data provided by\n//! a `std::string`, double-quoted string literal, character array, or even\n//! another `StringView`.\n//!\n//! A `StringView` does *not* own the string to which it\n//! points, and that data cannot be modified through the view.\nclass StringView {\n public:\n  //! Default constructor\n  StringView() = default;\n\n  //! Construct from c-string\n  StringView(const char *str)\n      : data_(str), size_(str != nullptr ? strlen(str) : 0) {}\n\n  //! Construct from [str, str+s)\n  StringView(const char *str, size_t len) : data_(str), size_(len) {}\n\n  //! Construct from std::string\n  StringView(const std::string &str) : data_(str.data()), size_(str.size()) {}\n\n  //! Retrieve data of string\n  const char *data() const {\n    return data_;\n  }\n\n  //! Retrieve size of string\n  size_t size() const {\n    return size_;\n  }\n\n  //! Retrieve non-zero if it is empty\n  bool empty() const {\n    return size_ == 0;\n  }\n\n private:\n  const char *data_{nullptr};\n  size_t size_{0};\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/utility/file_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstring>\n#include <string>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! File Helper Module\n */\nstruct FileHelper {\n#if defined(_WIN32) || defined(_WIN64)\n  //! Native Handle in Windows\n  typedef void *NativeHandle;\n#else\n  //! Native Handle in POSIX\n  typedef int NativeHandle;\n#endif\n\n  //! Invalid Handle\n  static constexpr NativeHandle InvalidHandle = (NativeHandle)(-1);\n\n  //! Retrieve the path of self process\n  static bool GetSelfPath(std::string *path);\n\n  //! Retrieve the final path for the specified file\n  static bool GetFilePath(NativeHandle handle, std::string *path);\n\n  //! Retrieve current working directory\n  static bool GetWorkingDirectory(std::string *path);\n\n  //! Get the size of a file\n  static bool GetFileSize(const char *path, size_t *psz);\n\n  //! Delete a name and possibly the file it refers to\n  static bool DeleteFile(const char *path);\n\n  //! Change the name or location of a file\n  static bool RenameFile(const char *oldpath, const char *newpath);\n\n  //! Make directories' path\n  static bool MakePath(const char *path);\n\n  //! Remove a file or a directory (includes files & subdirectories)\n  static bool RemovePath(const char *path);\n\n  //! Remove a directory (includes files & subdirectories)\n  static bool RemoveDirectory(const char *path);\n\n  //! Retrieve non-zero if the path exists\n  static bool IsExist(const char *path);\n\n  //! Retrieve non-zero if the path is a regular file\n  static bool IsRegular(const char *path);\n\n  //! Retrieve non-zero if the path is a directory\n  static bool IsDirectory(const char *path);\n\n  //! Retrieve non-zero if the path is a symbolic link\n  static bool IsSymbolicLink(const char *path);\n\n  //! Retrieve non-zero if two paths are pointing to the same file\n  static bool IsSame(const char *path1, const char *path2);\n\n  //! Retrieve the size of a file\n  static size_t FileSize(const char *path) {\n    size_t file_size = 0;\n    GetFileSize(path, &file_size);\n    return file_size;\n  }\n\n  //! Retrieve the base name from a path\n  static const char *BaseName(const char *path) {\n    const char *output = std::strrchr(path, '/');\n    if (!output) {\n      output = std::strrchr(path, '\\\\');\n    }\n    return (output ? output + 1 : path);\n  }\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/utility/float_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstddef>\n#include <cstdint>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Float Helper\n */\nstruct FloatHelper {\n  //! Convert FP16 to FP32\n  static float ToFP32(uint16_t val);\n\n  //! Convert FP16 to FP32 (array)\n  static void ToFP32(const uint16_t *arr, size_t size, float *out);\n\n  //! Convert FP16 to FP32 with normalization (array)\n  static void ToFP32(const uint16_t *arr, size_t size, float norm, float *out);\n\n  //! Convert FP32 to FP16\n  static uint16_t ToFP16(float val);\n\n  //! Convert FP32 to FP16 (array)\n  static void ToFP16(const float *arr, size_t size, uint16_t *out);\n\n  //! Convert FP32 to FP16 with normalization (array)\n  static void ToFP16(const float *arr, size_t size, float norm, uint16_t *out);\n\n  //! Convert FP16 to FP32 with normalization\n  static inline float ToFP32(uint16_t val, float norm) {\n    return (FloatHelper::ToFP32(val) / norm);\n  }\n\n  //! Convert FP32 to FP16 with normalization\n  static inline uint16_t ToFP16(float val, float norm) {\n    return FloatHelper::ToFP16(val / norm);\n  }\n};\n\n#if !defined(__aarch64__)\n/*! Half-Precision Floating Point\n */\nclass Float16 {\n public:\n  //! Constructor\n  Float16(void) : value_(0) {}\n\n  //! Constructor\n  Float16(float val) : value_(FloatHelper::ToFP16(val)) {}\n\n  //! Constructor\n  Float16(double val) : value_(FloatHelper::ToFP16(static_cast<float>(val))) {}\n\n  //! Assigment\n  Float16 &operator=(float val) {\n    this->value_ = FloatHelper::ToFP16(val);\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator+=(float val) {\n    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) + val);\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator-=(float val) {\n    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) - val);\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator*=(float val) {\n    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) * val);\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator/=(float val) {\n    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) / val);\n    return *this;\n  }\n\n  //! Retrieve value in FP32\n  operator float() const {\n    return FloatHelper::ToFP32(this->value_);\n  }\n\n  //! Equal operator\n  bool operator==(const Float16 &rhs) const {\n    return this->value_ == rhs.value_;\n  }\n\n  //! No equal operator\n  bool operator!=(const Float16 &rhs) const {\n    return this->value_ != rhs.value_;\n  }\n\n  //! Less than operator\n  bool operator<(const Float16 &rhs) const {\n    return FloatHelper::ToFP32(this->value_) < FloatHelper::ToFP32(rhs.value_);\n  }\n\n  //! Less than or equal operator\n  bool operator<=(const Float16 &rhs) const {\n    return FloatHelper::ToFP32(this->value_) <= FloatHelper::ToFP32(rhs.value_);\n  }\n\n  //! Greater than operator\n  bool operator>(const Float16 &rhs) const {\n    return FloatHelper::ToFP32(this->value_) > FloatHelper::ToFP32(rhs.value_);\n  }\n\n  //! Greater than or equal operator\n  bool operator>=(const Float16 &rhs) const {\n    return FloatHelper::ToFP32(this->value_) >= FloatHelper::ToFP32(rhs.value_);\n  }\n\n  //! Calculate the absolute value\n  static inline Float16 Absolute(const Float16 &x) {\n    Float16 abs;\n    abs.value_ = static_cast<uint16_t>(x.value_ & 0x7fff);\n    return abs;\n  }\n\n private:\n  uint16_t value_;\n};\n#else\n/*! Half-Precision Floating Point\n */\nclass Float16 {\n public:\n  //! Constructor\n  Float16(void) : value_(0) {}\n\n  //! Constructor\n  Float16(__fp16 val) : value_(val) {}\n\n  //! Assigment\n  Float16 &operator=(__fp16 val) {\n    this->value_ = val;\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator+=(__fp16 val) {\n    this->value_ = this->value_ + val;\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator-=(__fp16 val) {\n    this->value_ = this->value_ - val;\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator*=(__fp16 val) {\n    this->value_ = this->value_ * val;\n    return *this;\n  }\n\n  //! Assigment\n  Float16 &operator/=(__fp16 val) {\n    this->value_ = this->value_ / val;\n    return *this;\n  }\n\n  //! Retrieve value in FP16\n  operator __fp16() const {\n    return this->value_;\n  }\n\n  //! Equal operator\n  bool operator==(const Float16 &rhs) const {\n    return this->value_ == rhs.value_;\n  }\n\n  //! No equal operator\n  bool operator!=(const Float16 &rhs) const {\n    return this->value_ != rhs.value_;\n  }\n\n  //! Less than operator\n  bool operator<(const Float16 &rhs) const {\n    return this->value_ < rhs.value_;\n  }\n\n  //! Less than or equal operator\n  bool operator<=(const Float16 &rhs) const {\n    return this->value_ <= rhs.value_;\n  }\n\n  //! Greater than operator\n  bool operator>(const Float16 &rhs) const {\n    return this->value_ > rhs.value_;\n  }\n\n  //! Greater than or equal operator\n  bool operator>=(const Float16 &rhs) const {\n    return this->value_ >= rhs.value_;\n  }\n\n  //! Calculate the absolute value\n  static inline Float16 Absolute(const Float16 &x) {\n    Float16 abs(x.value_);\n    uint16_t *p = reinterpret_cast<uint16_t *>(&abs.value_);\n    *p &= 0x7fff;\n    return abs;\n  }\n\n private:\n  __fp16 value_;\n};\n#endif\n\n// Check size of Float16\nstatic_assert(sizeof(Float16) == 2, \"Float16 must be aligned with 2 bytes\");\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/utility/string_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <string>\n#include <vector>\n#include <zvec/ailego/string/string_concat_helper.h>\n#include <zvec/ailego/utility/string_helper_impl.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! String Helper\n */\nstruct StringHelper {\n  //! Return true if the `ref` starts with the given prefix\n  static bool StartsWith(const std::string &ref, const std::string &prefix);\n\n  //! Return true if the `ref` ends with the given suffix\n  static bool EndsWith(const std::string &ref, const std::string &suffix);\n\n  //! Split a string into a vector of T\n  //! NOTE: delim better NOT contain valid symbol for T,\n  //!       i.e. digits + - for integers,\n  //!            digits + - E e . for floating numbers\n  //!       otherwise there will be performance overhead.\n  template <typename T>\n  static void Split(const std::string &str, char delim, std::vector<T> *out) {\n    return details::SplitImpl<char, T>(str, delim, out);\n  }\n  template <typename T>\n  static void Split(const std::string &str, const char *delim,\n                    std::vector<T> *out) {\n    return details::SplitImpl<const char *, T>(str, delim, out);\n  }\n  template <typename T>\n  static void Split(const std::string &str, const std::string &delim,\n                    std::vector<T> *out) {\n    return details::SplitImpl<const std::string &, T>(str, delim, out);\n  }\n\n  template <typename T>\n  static void Split(const std::string &str, char delim, std::vector<T> *out,\n                    bool skip_empty) {\n    return details::SplitImpl<char, T>(str, delim, out, skip_empty);\n  }\n  template <typename T>\n  static void Split(const std::string &str, const char *delim,\n                    std::vector<T> *out, bool skip_empty) {\n    return details::SplitImpl<const char *, T>(str, delim, out, skip_empty);\n  }\n  template <typename T>\n  static void Split(const std::string &str, const std::string &delim,\n                    std::vector<T> *out, bool skip_empty) {\n    return details::SplitImpl<const std::string &, T>(str, delim, out,\n                                                      skip_empty);\n  }\n\n  // Trim from start (in place)\n  static void LeftTrim(std::string &str);\n\n  // Trim from end (in place)\n  static void RightTrim(std::string &str);\n\n  // Trim from both ends (in place)\n  static void Trim(std::string &str);\n\n  // Trim from start (copying)\n  static std::string CopyLeftTrim(std::string str);\n\n  // Trim from end (copying)\n  static std::string CopyRightTrim(std::string str);\n\n  // Trim from both ends (copying)\n  static std::string CopyTrim(std::string str);\n\n  //! Compare ignore case\n  static bool CompareIgnoreCase(const std::string &a, const std::string &b);\n\n  //! Convert string to floating-point number (double)\n  static bool ToDouble(const std::string &str, double *val) {\n    char *endptr = nullptr;\n    *val = std::strtod(str.c_str(), &endptr);\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to floating-point number (float)\n  static bool ToFloat(const std::string &str, float *val) {\n    char *endptr = nullptr;\n    *val = std::strtof(str.c_str(), &endptr);\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to integer number (int8_t)\n  static bool ToInt8(const std::string &str, int8_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<int8_t>(std::strtol(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to integer number (int16_t)\n  static bool ToInt16(const std::string &str, int16_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<int16_t>(std::strtol(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to integer number (int32_t)\n  static bool ToInt32(const std::string &str, int32_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<int32_t>(std::strtol(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to integer number (int64_t)\n  static bool ToInt64(const std::string &str, int64_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<int64_t>(std::strtoll(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to unsigned integer number (uint8_t)\n  static bool ToUint8(const std::string &str, uint8_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<uint8_t>(std::strtoul(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to unsigned integer number (uint16_t)\n  static bool ToUint16(const std::string &str, uint16_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<uint16_t>(std::strtoul(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to unsigned integer number (uint32_t)\n  static bool ToUint32(const std::string &str, uint32_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<uint32_t>(std::strtoul(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert string to unsigned integer number (uint64_t)\n  static bool ToUint64(const std::string &str, uint64_t *val) {\n    char *endptr = nullptr;\n    *val = static_cast<uint64_t>(std::strtoull(str.c_str(), &endptr, 0));\n    return (endptr && *endptr == '\\0');\n  }\n\n  //! Convert floating-point number string (double)\n  static std::string ToString(double val) {\n    return std::to_string(val);\n  }\n\n  //! Convert floating-point number string (float)\n  static std::string ToString(float val) {\n    return std::to_string(val);\n  }\n\n  //! Convert integer number to string (int8_t)\n  static std::string ToString(int8_t val) {\n    return std::to_string(val);\n  }\n\n  //! Convert integer number to string (int16_t)\n  static std::string ToString(int16_t val) {\n    return std::to_string(val);\n  }\n\n  //! Convert integer number to string (int32_t)\n  static std::string ToString(int32_t val) {\n    return std::to_string(val);\n  }\n\n  //! Convert integer number to string (int64_t)\n  static std::string ToString(int64_t val) {\n    return std::to_string(val);\n  }\n\n  //! Convert unsigned integer number to string (uint8_t)\n  static std::string ToString(uint8_t val) {\n    return std::to_string(val);\n  }\n\n  //! Convert unsigned integer number to string (uint16_t)\n  static std::string ToString(uint16_t val) {\n    return std::to_string(val);\n  }\n\n  //! Convert unsigned integer number to string (uint32_t)\n  static std::string ToString(uint32_t val) {\n    return std::to_string(val);\n  }\n\n  //! Convert unsigned integer number to string (uint64_t)\n  static std::string ToString(uint64_t val) {\n    return std::to_string(val);\n  }\n\n  //! Concatenation of arbitrary number of std::string, c-string, integers,\n  //! floating point numbers with one memory allocation.\n  //! E.g. auto s = Concat(\"foo\", 123, std::string(\"bar\"), 3.14159);\n  //!\n  //! Do not do the following, use Append instead\n  //! str = Concat(str, ...);\n  //! str.append(Concat(str, ...));\n  //! str += Concat(str, ...);\n  //!\n  //! NOTE: char literal(e.g. ':') is not allowed,\n  //! use string literal(e.g. \":\") instead.\n  static std::string Concat() {\n    return {};\n  }\n  static std::string Concat(const internal::Alphameric &a);\n  static std::string Concat(const internal::Alphameric &a,\n                            const internal::Alphameric &b);\n  static std::string Concat(const internal::Alphameric &a,\n                            const internal::Alphameric &b,\n                            const internal::Alphameric &c);\n  static std::string Concat(const internal::Alphameric &a,\n                            const internal::Alphameric &b,\n                            const internal::Alphameric &c,\n                            const internal::Alphameric &d);\n  // Support 5 or more arguments\n  template <typename... T>\n  static std::string Concat(const internal::Alphameric &a,\n                            const internal::Alphameric &b,\n                            const internal::Alphameric &c,\n                            const internal::Alphameric &d,\n                            const internal::Alphameric &e, const T &...args) {\n    std::string result;\n    Append(&result, a, b, c, d, e, args...);\n    return result;\n  }\n\n  //! Append arbitrary number of std::string, c-string, integers,\n  //! floating point numbers to existing string with one memory allocation.\n  //! E.g. Append(&str, \"foo\", 123, std::string(\"bar\"), 3.14159);\n  //!\n  //! WARNING: Append requires that none of the arguments be a reference to\n  //! destination str.\n  //!\n  //! Do not do the following\n  //! std::string s = \"foo\";\n  //! Append(&s, s);\n  //!\n  //! NOTE: char literal(e.g. ':') is not allowed,\n  //! use string literal(e.g. \":\") instead.\n  static void Append(std::string *) {}\n  static void Append(std::string *str, const internal::Alphameric &a);\n  static void Append(std::string *str, const internal::Alphameric &a,\n                     const internal::Alphameric &b);\n  static void Append(std::string *str, const internal::Alphameric &a,\n                     const internal::Alphameric &b,\n                     const internal::Alphameric &c);\n  static void Append(std::string *str, const internal::Alphameric &a,\n                     const internal::Alphameric &b,\n                     const internal::Alphameric &c,\n                     const internal::Alphameric &d);\n  // Support 5 or more arguments\n  template <typename... T>\n  static void Append(std::string *str, const internal::Alphameric &a,\n                     const internal::Alphameric &b,\n                     const internal::Alphameric &c,\n                     const internal::Alphameric &d,\n                     const internal::Alphameric &e, const T &...args) {\n    AppendViews(str,\n                {a.view(), b.view(), c.view(), d.view(), e.view(),\n                 static_cast<const internal::Alphameric &>(args).view()...});\n  }\n\n  //! Append list of StringView to str.\n  static void AppendViews(std::string *str,\n                          std::initializer_list<StringView> views);\n};\n\ninline std::string StringHelper::Concat(const internal::Alphameric &a) {\n  std::string result;\n  Append(&result, a);\n  return result;\n}\n\ninline std::string StringHelper::Concat(const internal::Alphameric &a,\n                                        const internal::Alphameric &b) {\n  std::string result;\n  Append(&result, a, b);\n  return result;\n}\n\ninline std::string StringHelper::Concat(const internal::Alphameric &a,\n                                        const internal::Alphameric &b,\n                                        const internal::Alphameric &c) {\n  std::string result;\n  Append(&result, a, b, c);\n  return result;\n}\n\ninline std::string StringHelper::Concat(const internal::Alphameric &a,\n                                        const internal::Alphameric &b,\n                                        const internal::Alphameric &c,\n                                        const internal::Alphameric &d) {\n  std::string result;\n  Append(&result, a, b, c, d);\n  return result;\n}\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/utility/string_helper_impl.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstring>\n#include <string>\n#include <vector>\n\nnamespace zvec {\nnamespace ailego {\nnamespace details {\n\n//! Convert string to integers or floating point numbers\ntemplate <typename T>\nstatic T CStringToType(const char *begin, char **endptr) {\n  static_assert(\n      std::is_same<T, int32_t>::value || std::is_same<T, int16_t>::value ||\n          std::is_same<T, int8_t>::value || std::is_same<T, int64_t>::value ||\n          std::is_same<T, uint64_t>::value ||\n          std::is_same<T, uint32_t>::value ||\n          std::is_same<T, uint16_t>::value || std::is_same<T, uint8_t>::value ||\n          std::is_same<T, float>::value || std::is_same<T, double>::value,\n      \"type not supported\");\n  if (std::is_same<T, int32_t>::value || std::is_same<T, int16_t>::value ||\n      std::is_same<T, int8_t>::value) {\n    return static_cast<T>(strtol(begin, endptr, 0));\n  } else if (std::is_same<T, int64_t>::value) {\n    return static_cast<T>(strtoll(begin, endptr, 0));\n  } else if (std::is_same<T, uint32_t>::value ||\n             std::is_same<T, uint16_t>::value ||\n             std::is_same<T, uint8_t>::value) {\n    return static_cast<T>(strtoul(begin, endptr, 0));\n  } else if (std::is_same<T, uint64_t>::value) {\n    return static_cast<T>(strtoull(begin, endptr, 0));\n  } else if (std::is_same<T, float>::value) {\n    return static_cast<T>(strtof(begin, endptr));\n  } else {\n    return static_cast<T>(strtod(begin, endptr));\n  }\n}\n\n//! Convert [begin, end) to T\n//! If [end, ) contains valid T symbol, extra overhead will be incurred by\n//! constructing std::string\ntemplate <typename T>\nstruct StringToType {\n  T operator()(const char *begin, const char *end) {\n    char *eptr = nullptr;\n    auto v = CStringToType<T>(begin, &eptr);\n    if (eptr > end) {\n      // NOTE: [begin, end) is not 0 terminated\n      // If delimiter contains valid T symbol, eptr might point to location\n      // after end.\n      // We create string here, which is guaranteed to be 0 terminated.\n      std::string s{begin, end};\n      return CStringToType<T>(s.c_str(), &eptr);\n    }\n    return v;\n  }\n};\n\n//! Specialization for std::string\ntemplate <>\nstruct StringToType<std::string> {\n  std::string operator()(const char *begin, const char *end) {\n    return {begin, end};\n  }\n};\n\n//! Return delimiter length.\ntemplate <typename T>\nstruct DelimiterLen {\n  size_t operator()(T delimiter);\n};\n\n//! Return delimiter length for char.\ntemplate <>\nstruct DelimiterLen<char> {\n  size_t operator()(char) {\n    return 1;\n  }\n};\n\n//! Return delimiter length for const char*.\ntemplate <>\nstruct DelimiterLen<const char *> {\n  size_t operator()(const char *delimiter) {\n    return delimiter == nullptr ? 0 : std::strlen(delimiter);\n  }\n};\n\n//! Return delimiter length for std::string.\ntemplate <>\nstruct DelimiterLen<const std::string &> {\n  size_t operator()(const std::string &delimiter) {\n    return delimiter.size();\n  }\n};\n\n//! Split implementation.\ntemplate <typename D, typename T,\n          typename = typename std::enable_if<\n              std::is_same<char, D>::value ||\n                  std::is_same<const std::string &, D>::value ||\n                  std::is_same<const char *, D>::value,\n              D>::type>\nstatic void SplitImpl(const std::string &str, D delim, std::vector<T> *out) {\n  StringToType<T> func;\n  out->clear();\n\n  auto s = str.data();\n  size_t delimiter_len = DelimiterLen<D>()(delim);\n  if (delimiter_len != 0) {\n    size_t a = 0, b = str.find(delim);\n    while (b != std::string::npos) {\n      out->push_back(func(s + a, s + b));\n      a = b + delimiter_len;\n      b = str.find(delim, a);\n    }\n    out->push_back(func(s + a, s + str.length()));\n  } else {\n    out->push_back(func(s + 0, s + str.length()));\n  }\n}\n\n//! Split implementation.\ntemplate <typename D, typename T,\n          typename = typename std::enable_if<\n              std::is_same<char, D>::value ||\n                  std::is_same<const std::string &, D>::value ||\n                  std::is_same<const char *, D>::value,\n              D>::type>\nstatic void SplitImpl(const std::string &str, D delim, std::vector<T> *out,\n                      bool skip_empty) {\n  StringToType<T> func;\n  out->clear();\n\n  auto s = str.data();\n  size_t delimiter_len = DelimiterLen<D>()(delim);\n  if (delimiter_len != 0) {\n    size_t a = 0, b = str.find(delim);\n    while (b != std::string::npos) {\n      if (!skip_empty || b - a > 0) {\n        out->push_back(func(s + a, s + b));\n      }\n\n      a = b + delimiter_len;\n      b = str.find(delim, a);\n    }\n    if (!skip_empty || str.length() - a > 0) {\n      out->push_back(func(s + a, s + str.length()));\n    }\n  } else {\n    if (!skip_empty || str.length() > 0) {\n      out->push_back(func(s + 0, s + str.length()));\n    }\n  }\n}\n}  // namespace details\n}  // namespace ailego\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/ailego/utility/time_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include <zvec/ailego/internal/platform.h>\n\nnamespace zvec {\nnamespace ailego {\n\n/*! Monotime\n */\nstruct Monotime {\n  //! Retrieve monotonic time in nanoseconds\n  static uint64_t NanoSeconds(void);\n\n  //! Retrieve monotonic time in microseconds\n  static uint64_t MicroSeconds(void);\n\n  //! Retrieve monotonic time in milliseconds\n  static uint64_t MilliSeconds(void);\n\n  //! Retrieve monotonic time in seconds\n  static uint64_t Seconds(void);\n};\n\n/*! Realtime\n */\nstruct Realtime {\n  //! Retrieve system time in nanoseconds\n  static uint64_t NanoSeconds(void);\n\n  //! Retrieve system time in microseconds\n  static uint64_t MicroSeconds(void);\n\n  //! Retrieve system time in milliseconds\n  static uint64_t MilliSeconds(void);\n\n  //! Retrieve system time in seconds\n  static uint64_t Seconds(void);\n\n  //! Retrieve a timestamp as a specific local time format\n  static size_t Localtime(uint64_t stamp, const char *format, char *buf,\n                          size_t len);\n\n  //! Retrieve a timestamp as a specific GMT time format\n  static size_t Gmtime(uint64_t stamp, const char *format, char *buf,\n                       size_t len);\n\n  //! Retrieve local time in string\n  static size_t Localtime(const char *format, char *buf, size_t len);\n\n  //! Retrieve GMT time in string\n  static size_t Gmtime(const char *format, char *buf, size_t len);\n\n  //! Retrieve local time in string\n  static size_t Localtime(char *buf, size_t len) {\n    return Localtime(\"%Y-%m-%d %H:%M:%S\", buf, len);\n  }\n\n  //! Retrieve GMT time in string\n  static size_t Gmtime(char *buf, size_t len) {\n    return Gmtime(\"%Y-%m-%d %H:%M:%S\", buf, len);\n  }\n\n  //! Retrieve local time in string\n  static std::string Localtime(void) {\n    char str[32];\n    Localtime(str, sizeof(str));\n    return std::string(str);\n  }\n\n  //! Retrieve GMT time in string\n  static std::string Gmtime(void) {\n    char str[32];\n    Gmtime(str, sizeof(str));\n    return std::string(str);\n  }\n\n  //! Retrieve a timestamp as a specific local time format\n  static size_t Localtime(uint64_t stamp, char *buf, size_t len) {\n    return Localtime(stamp, \"%Y-%m-%d %H:%M:%S\", buf, len);\n  }\n\n  //! Retrieve a timestamp as a specific GMT time format\n  static size_t Gmtime(uint64_t stamp, char *buf, size_t len) {\n    return Gmtime(stamp, \"%Y-%m-%d %H:%M:%S\", buf, len);\n  }\n\n  //! Retrieve a timestamp as a specific local time format\n  static std::string Localtime(uint64_t stamp) {\n    char str[32];\n    Localtime(stamp, str, sizeof(str));\n    return std::string(str);\n  }\n\n  //! Retrieve a timestamp as a specific GMT time format\n  static std::string Gmtime(uint64_t stamp) {\n    char str[32];\n    Gmtime(stamp, str, sizeof(str));\n    return std::string(str);\n  }\n};\n\n/*! Thread-specific CPU time\n */\nstruct CPUtime {\n  //! Retrieve CPU time in nanoseconds\n  static uint64_t NanoSeconds(void);\n\n  //! Retrieve CPU time in microseconds\n  static uint64_t MicroSeconds(void);\n\n  //! Retrieve CPU time in milliseconds\n  static uint64_t MilliSeconds(void);\n\n  //! Retrieve CPU time in seconds\n  static uint64_t Seconds(void);\n};\n\n/*! Elapsed Time\n */\nclass ElapsedTime {\n public:\n  //! Constructor\n  ElapsedTime(void) : stamp_(Monotime::NanoSeconds()) {}\n\n  //! Retrieve the elapsed time in nanoseconds\n  uint64_t nano_seconds(void) const {\n    return (Monotime::NanoSeconds() - stamp_);\n  }\n\n  //! Retrieve the elapsed time in milliseconds\n  uint64_t micro_seconds(void) const {\n    return (this->nano_seconds() / 1000u);\n  }\n\n  //! Retrieve the elapsed time in milliseconds\n  uint64_t milli_seconds(void) const {\n    return (this->nano_seconds() / 1000000u);\n  }\n\n  //! Retrieve the elapsed time in seconds\n  uint64_t seconds(void) const {\n    return (this->nano_seconds() / 1000000000u);\n  }\n\n  //! Update time stamp\n  void reset(void) {\n    stamp_ = Monotime::NanoSeconds();\n  }\n\n private:\n  uint64_t stamp_;\n};\n\n/*! Elapsed CPU Time\n */\nclass ElapsedCPUTime {\n public:\n  //! Constructor\n  ElapsedCPUTime(void) : stamp_(CPUtime::NanoSeconds()) {}\n\n  //! Retrieve the elapsed time in nanoseconds\n  uint64_t nano_seconds(void) const {\n    return (CPUtime::NanoSeconds() - stamp_);\n  }\n\n  //! Retrieve the elapsed time in milliseconds\n  uint64_t micro_seconds(void) const {\n    return (this->nano_seconds() / 1000u);\n  }\n\n  //! Retrieve the elapsed time in milliseconds\n  uint64_t milli_seconds(void) const {\n    return (this->nano_seconds() / 1000000u);\n  }\n\n  //! Retrieve the elapsed time in seconds\n  uint64_t seconds(void) const {\n    return (this->nano_seconds() / 1000000000u);\n  }\n\n  //! Update time stamp\n  void reset(void) {\n    stamp_ = CPUtime::NanoSeconds();\n  }\n\n private:\n  uint64_t stamp_;\n};\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/ailego/utility/type_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <functional>\n#include <type_traits>\n#include <zvec/ailego/utility/float_helper.h>\n\nnamespace zvec {\nnamespace ailego {\n\n//! Determines if a type is an arithmetic type (includes Float16)\ntemplate <typename T>\nstruct IsArithmetic\n    : std::integral_constant<bool, std::is_arithmetic<T>::value ||\n                                       std::is_same<T, Float16>::value> {};\n\n//! Determines if a type is a signed arithmetic type (includes Float16)\ntemplate <typename T>\nstruct IsSignedArithmetic\n    : std::integral_constant<bool, std::is_signed<T>::value ||\n                                       std::is_same<T, Float16>::value> {};\n\n//! Determines if a type is a unsigned arithmetic type (includes Float16)\ntemplate <typename T>\nstruct IsUnsignedArithmetic\n    : std::integral_constant<bool, std::is_unsigned<T>::value> {};\n\n//! Determines if a type is a floating-point type (includes Float16)\ntemplate <typename T>\nstruct IsFloatingPoint\n    : std::integral_constant<bool, std::is_floating_point<T>::value ||\n                                       std::is_same<T, Float16>::value> {};\n\n#if __GNUC__ >= 5 || defined(_MSC_VER) || defined(__clang__)\ntemplate <typename T>\nusing IsTriviallyCopyable = std::is_trivially_copyable<T>;\n#else\ntemplate <typename T>\nusing IsTriviallyCopyable = std::has_trivial_copy_constructor<T>;\n#endif\n\n#if __cplusplus >= 201703L  // C++17\n\n//! Determines if a type can be invoked with the specified argument types\ntemplate <typename TFunc, typename... TArgs>\nusing IsInvocable = std::is_invocable<TFunc, TArgs...>;\n\n//! Determines if a type can be invoked with the specified argument types\ntemplate <typename R, typename TFunc, typename... TArgs>\nusing IsInvocableWithResult = std::is_invocable_r<R, TFunc, TArgs...>;\n\n#else\n//! Determines if a type can be invoked with the specified argument types\ntemplate <typename TFunc, typename... TArgs>\nstruct IsInvocable\n    : std::is_constructible<std::function<void(TArgs...)>,\n                            std::reference_wrapper<\n                                typename std::remove_reference<TFunc>::type> > {\n};\n\n//! Determines if a type can be invoked with the specified argument types\ntemplate <typename R, typename TFunc, typename... TArgs>\nstruct IsInvocableWithResult\n    : std::is_constructible<std::function<R(TArgs...)>,\n                            std::reference_wrapper<\n                                typename std::remove_reference<TFunc>::type> > {\n};\n#endif\n\n//! Fixed underlying_type used with conditional\ntemplate <typename T, bool = std::is_enum<T>::value>\nstruct UnderlyingType {\n  typedef typename std::remove_cv<T>::type type;\n};\n\n//! Fixed underlying_type used with conditional\ntemplate <typename T>\nstruct UnderlyingType<T, true> {\n  typedef typename std::underlying_type<T>::type type;\n};\n\n#if __cplusplus >= 201703L  // C++17\n\n//! Variadic logical AND metafunction\ntemplate <typename... TConds>\nusing Conjunction = std::conjunction<TConds...>;\n\n//! Variadic logical OR metafunction\ntemplate <typename... TConds>\nusing Disjunction = std::disjunction<TConds...>;\n\n#else\n//! Variadic logical AND metafunction\ntemplate <typename... TConds>\nstruct Conjunction : std::true_type {};\n\n//! Variadic logical AND metafunction\ntemplate <typename TCond, typename... TConds>\nstruct Conjunction<TCond, TConds...>\n    : std::conditional<TCond::value, Conjunction<TConds...>,\n                       std::false_type>::type {};\n\n//! Variadic logical OR metafunction\ntemplate <typename... TConds>\nstruct Disjunction : std::false_type {};\n\n//! Variadic logical OR metafunction\ntemplate <typename TCond, typename... TConds>\nstruct Disjunction<TCond, TConds...>\n    : std::conditional<TCond::value, std::true_type,\n                       Disjunction<TConds...> >::type {};\n#endif\n\n}  // namespace ailego\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_builder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_runner.h>\n\nnamespace zvec {\nnamespace core {\n\nclass IndexBuilder : public IndexRunner {\n public:\n  typedef std::shared_ptr<IndexBuilder> Pointer;\n\n  //! Destructor\n  virtual ~IndexBuilder(void) {}\n\n  //! Initialize the builder\n  virtual int init(const IndexMeta & /*meta*/,\n                   const ailego::Params & /*params*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Train and build the index\n  static int TrainAndBuild(const IndexBuilder::Pointer &builder,\n                           IndexHolder::Pointer holder) {\n    auto two_pass_holder = IndexHelper::MakeTwoPassHolder(std::move(holder));\n    int ret = builder->train(two_pass_holder);\n    if (ret == 0) {\n      ret = builder->build(std::move(two_pass_holder));\n    }\n    return ret;\n  }\n\n  //! Train, build and dump the index\n  static int TrainBuildAndDump(const IndexBuilder::Pointer &builder,\n                               IndexHolder::Pointer holder,\n                               const IndexDumper::Pointer &dumper) {\n    int ret = IndexBuilder::TrainAndBuild(builder, std::move(holder));\n    if (ret == 0) {\n      ret = builder->dump(dumper);\n    }\n    return ret;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_bundle.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <map>\n#include <memory>\n#include <string>\n#include <zvec/ailego/container/blob.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/io/mmap_file.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Bundle\n */\nstruct IndexBundle {\n  //! Index Bundle Pointer\n  typedef std::shared_ptr<IndexBundle> Pointer;\n\n  //! Destructor\n  virtual ~IndexBundle(void) {}\n\n  //! Retrieve index buffer via key\n  virtual ailego::BlobWrap get(const std::string &key) const = 0;\n\n  //! Test if the key is exist\n  virtual bool has(const std::string &key) const = 0;\n\n  //! Retrieve all\n  virtual std::map<std::string, ailego::BlobWrap> all(void) const = 0;\n\n  //! Retrieve the count of indexes\n  virtual size_t count(void) const = 0;\n};\n\n/*! Trivial Index Bundle\n */\nclass TrivialIndexBundle : public IndexBundle {\n public:\n  //! Trivial Index Bundle Pointer\n  typedef std::shared_ptr<TrivialIndexBundle> Pointer;\n\n  //! Retrieve index buffer via key\n  virtual ailego::BlobWrap get(const std::string &key) const {\n    auto iter = map_.find(key);\n    if (iter != map_.end()) {\n      return iter->second;\n    }\n    return ailego::BlobWrap();\n  }\n\n  //! Test if the key is exist\n  virtual bool has(const std::string &key) const {\n    return (map_.find(key) != map_.end());\n  }\n\n  //! Retrieve all\n  virtual std::map<std::string, ailego::BlobWrap> all(void) const {\n    return map_;\n  }\n\n  //! Retrieve the count of indexes\n  virtual size_t count(void) const {\n    return map_.size();\n  }\n\n  //! Set an index buffer in bundle\n  void set(const std::string &key, const ailego::BlobWrap &blob) {\n    map_[key] = blob;\n  }\n\n  //! Set an index buffer in bundle\n  void set(std::string &&key, const ailego::BlobWrap &blob) {\n    map_[std::move(key)] = blob;\n  }\n\n  //! Set an index buffer in bundle\n  void set(const std::string &key, const void *buf, size_t len) {\n    map_[key] = ailego::BlobWrap(buf, len);\n  }\n\n  //! Set an index buffer in bundle\n  void set(std::string &&key, const void *buf, size_t len) {\n    map_[std::move(key)] = ailego::BlobWrap(buf, len);\n  }\n\n private:\n  std::map<std::string, ailego::BlobWrap> map_;\n};\n\n/*! Memory Index Bundle\n */\nclass MemoryIndexBundle : public IndexBundle {\n public:\n  //! Memory Index Bundle Pointer\n  typedef std::shared_ptr<MemoryIndexBundle> Pointer;\n\n  //! Retrieve index buffer via key\n  virtual ailego::BlobWrap get(const std::string &key) const {\n    auto iter = map_.find(key);\n    if (iter != map_.end()) {\n      return ailego::BlobWrap(iter->second.data(), iter->second.size());\n    }\n    return ailego::BlobWrap();\n  }\n\n  //! Test if the key is exist\n  virtual bool has(const std::string &key) const {\n    return (map_.find(key) != map_.end());\n  }\n\n  //! Retrieve all\n  virtual std::map<std::string, ailego::BlobWrap> all(void) const {\n    std::map<std::string, ailego::BlobWrap> result;\n    for (const auto &it : map_) {\n      result.emplace(it.first,\n                     ailego::BlobWrap(it.second.data(), it.second.size()));\n    }\n    return result;\n  }\n\n  //! Retrieve the count of indexes\n  virtual size_t count(void) const {\n    return map_.size();\n  }\n\n  //! Set an index buffer in bundle\n  void set(const std::string &key, const std::string &buf) {\n    map_[key] = buf;\n  }\n\n  //! Set an index buffer in bundle\n  void set(std::string &&key, const std::string &buf) {\n    map_[std::move(key)] = buf;\n  }\n\n  //! Set an index buffer in bundle\n  void set(const std::string &key, std::string &&buf) {\n    map_[key] = std::move(buf);\n  }\n\n  //! Set an index buffer in bundle\n  void set(std::string &&key, std::string &&buf) {\n    map_[std::move(key)] = std::move(buf);\n  }\n\n  //! Set an index buffer in bundle\n  void set(const std::string &key, const void *buf, size_t len) {\n    map_[key].assign(reinterpret_cast<const char *>(buf), len);\n  }\n\n  //! Set an index buffer in bundle\n  void set(std::string &&key, const void *buf, size_t len) {\n    map_[std::move(key)].assign(reinterpret_cast<const char *>(buf), len);\n  }\n\n private:\n  std::map<std::string, std::string> map_;\n};\n\n/*! MMap File Index Bundle\n */\nclass MMapFileIndexBundle : public IndexBundle {\n public:\n  //! Memory Index Bundle Pointer\n  typedef std::shared_ptr<MMapFileIndexBundle> Pointer;\n\n  //! Retrieve index buffer via key\n  virtual ailego::BlobWrap get(const std::string &key) const {\n    auto iter = map_.find(key);\n    if (iter != map_.end()) {\n      return ailego::BlobWrap(iter->second.region(), iter->second.size());\n    }\n    return ailego::BlobWrap();\n  }\n\n  //! Test if the key is exist\n  virtual bool has(const std::string &key) const {\n    return (map_.find(key) != map_.end());\n  }\n\n  //! Retrieve all\n  virtual std::map<std::string, ailego::BlobWrap> all(void) const {\n    std::map<std::string, ailego::BlobWrap> result;\n    for (const auto &it : map_) {\n      result.emplace(it.first,\n                     ailego::BlobWrap(it.second.region(), it.second.size()));\n    }\n    return result;\n  }\n\n  //! Retrieve the count of indexes\n  virtual size_t count(void) const {\n    return map_.size();\n  }\n\n  //! Create a memory mapping file in bundle\n  bool create(const std::string &prefix, const std::string &key, size_t len) {\n    ailego::MMapFile file;\n    if (!file.create(prefix + '/' + key, len)) {\n      return false;\n    }\n    map_[key] = std::move(file);\n    return true;\n  }\n\n  //! Create a memory mapping file in bundle\n  bool create(const std::string &prefix, std::string &&key, size_t len) {\n    ailego::MMapFile file;\n    if (!file.create(prefix + '/' + key, len)) {\n      return false;\n    }\n    map_[std::move(key)] = std::move(file);\n    return true;\n  }\n\n  //! Create a memory mapping file in bundle\n  bool create(const std::string &path, size_t len) {\n    ailego::MMapFile file;\n    if (!file.create(path, len)) {\n      return false;\n    }\n    map_[ailego::File::BaseName(path)] = std::move(file);\n    return true;\n  }\n\n  //! Open a memory mapping file in bundle\n  bool open(const std::string &prefix, const std::string &key, bool rdonly) {\n    ailego::MMapFile file;\n    if (!file.open(prefix + '/' + key, rdonly)) {\n      return false;\n    }\n    map_[key] = std::move(file);\n    return true;\n  }\n\n  //! Open a memory mapping file in bundle\n  bool open(const std::string &prefix, std::string &&key, bool rdonly) {\n    ailego::MMapFile file;\n    if (!file.open(prefix + '/' + key, rdonly)) {\n      return false;\n    }\n    map_[std::move(key)] = std::move(file);\n    return true;\n  }\n\n  //! Open a memory mapping file in bundle\n  bool open(const std::string &path, bool rdonly) {\n    ailego::MMapFile file;\n    if (!file.open(path, rdonly)) {\n      return false;\n    }\n    map_[ailego::File::BaseName(path)] = std::move(file);\n    return true;\n  }\n\n private:\n  std::map<std::string, ailego::MMapFile> map_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_cluster.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/core/framework/index_bundle.h>\n#include <zvec/core/framework/index_features.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_module.h>\n#include <zvec/core/framework/index_threads.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Cluster\n */\nstruct IndexCluster : public IndexModule {\n public:\n  //! Index Cluster Pointer\n  typedef std::shared_ptr<IndexCluster> Pointer;\n\n  /*! Index Cluster Centroid\n   */\n  class Centroid {\n   public:\n    //! Constructor\n    Centroid(void)\n        : buffer_(), score_(0.0), follows_(0), similars_(), subitems_() {}\n\n    //! Constructor\n    Centroid(const void *feat, size_t bytes)\n        : buffer_(std::string(reinterpret_cast<const char *>(feat), bytes)),\n          score_(0.0),\n          follows_(0),\n          similars_(),\n          subitems_() {}\n\n    //! Constructor\n    Centroid(const Centroid &rhs)\n        : buffer_(rhs.buffer_),\n          score_(rhs.score_),\n          follows_(rhs.follows_),\n          similars_(rhs.similars_),\n          subitems_(rhs.subitems_) {}\n\n    //! Constructor\n    Centroid(Centroid &&rhs)\n        : buffer_(std::move(rhs.buffer_)),\n          score_(rhs.score_),\n          follows_(rhs.follows_),\n          similars_(std::move(rhs.similars_)),\n          subitems_(std::move(rhs.subitems_)) {}\n\n    //! Assignment\n    Centroid &operator=(const Centroid &rhs) {\n      buffer_ = rhs.buffer_;\n      score_ = rhs.score_;\n      follows_ = rhs.follows_;\n      similars_ = rhs.similars_;\n      subitems_ = rhs.subitems_;\n      return *this;\n    }\n\n    //! Assignment\n    Centroid &operator=(Centroid &&rhs) {\n      buffer_ = std::move(rhs.buffer_);\n      score_ = rhs.score_;\n      follows_ = rhs.follows_;\n      similars_ = std::move(rhs.similars_);\n      subitems_ = std::move(rhs.subitems_);\n      return *this;\n    }\n\n    //! Less than\n    bool operator<(const Centroid &rhs) const {\n      return (this->score_ < rhs.score_);\n    }\n\n    //! Test if matchs the meta\n    bool is_matched(const IndexMeta &meta) const {\n      if (buffer_.size() != meta.element_size()) {\n        return false;\n      }\n      for (const auto &it : subitems_) {\n        if (!it.is_matched(meta)) {\n          return false;\n        }\n      }\n      return true;\n    }\n\n    //! Set feature of centroid\n    void set_feature(const void *feat, size_t bytes) {\n      buffer_.assign(std::string(reinterpret_cast<const char *>(feat), bytes));\n    }\n\n    //! Set feature of centroid\n    template <typename T>\n    void set_feature(const ailego::NumericalVector<T> &feat) {\n      buffer_.assign(feat);\n    }\n\n    //! Set feature of centroid\n    template <typename T>\n    void set_feature(ailego::NumericalVector<T> &&feat) {\n      buffer_.assign(std::forward<ailego::NumericalVector<T>>(feat));\n    }\n\n    //! Set score of centroid\n    void set_score(double val) {\n      score_ = val;\n    }\n\n    //! Set follows of centroid\n    void set_follows(size_t count) {\n      follows_ = count;\n    }\n\n    //! Set similars of centroid\n    void set_similars(const std::vector<const void *> &feats) {\n      similars_ = feats;\n    }\n\n    //! Set similars of centroid\n    void set_similars(std::vector<const void *> &&feats) {\n      similars_ = std::move(feats);\n    }\n\n    //! Set subitems of centroid\n    void set_subitems(const std::vector<Centroid> &cents) {\n      subitems_ = cents;\n    }\n\n    //! Set subitems of centroid\n    void set_subitems(std::vector<Centroid> &&cents) {\n      subitems_ = std::move(cents);\n    }\n\n    //! Retrieve feature buffer\n    std::string *mutable_buffer(void) {\n      return &buffer_;\n    }\n\n    //! Retrieve feature buffer\n    const std::string &buffer(void) const {\n      return buffer_;\n    }\n\n    //! Retrieve feature vector\n    template <typename T>\n    ailego::NumericalVector<T> *mutable_vector(void) {\n      return static_cast<ailego::NumericalVector<T> *>(&buffer_);\n    }\n\n    //! Retrieve feature vector\n    template <typename T>\n    const ailego::NumericalVector<T> &vector(void) const {\n      return static_cast<const ailego::NumericalVector<T> &>(buffer_);\n    }\n\n    //! Retrieve feature pointer\n    const void *feature(void) const {\n      return buffer_.data();\n    }\n\n    //! Retrieve size of centroid in bytes\n    size_t size(void) const {\n      return buffer_.size();\n    }\n\n    //! Retrieve score of centroid\n    double score(void) const {\n      return score_;\n    }\n\n    //! Retrieve follows' count of centroid\n    size_t follows(void) const {\n      return follows_;\n    }\n\n    //! Retrieve similars of centroid\n    const std::vector<const void *> &similars(void) const {\n      return similars_;\n    }\n\n    //! Retrieve similars of centroid\n    std::vector<const void *> *mutable_similars(void) {\n      return &similars_;\n    }\n\n    //! Retrieve the sub centroids\n    const std::vector<Centroid> &subitems(void) const {\n      return subitems_;\n    }\n\n    //! Retrieve the sub centroids\n    std::vector<Centroid> *mutable_subitems(void) {\n      return &subitems_;\n    }\n\n    //! Retrieve the count of subitems (includes children's children)\n    size_t subcount(void) const {\n      size_t total = subitems_.size();\n      for (const auto &it : subitems_) {\n        total += it.subcount();\n      }\n      return total;\n    }\n\n   private:\n    //! Members\n    std::string buffer_;\n    double score_;\n    size_t follows_;\n    std::vector<const void *> similars_;\n    std::vector<Centroid> subitems_;\n  };\n\n  //! Index Cluster Centroid List\n  typedef std::vector<Centroid> CentroidList;\n\n  //! Destructor\n  virtual ~IndexCluster(void) {}\n\n  //! Deserialize centroids from bundle\n  static int Deserialize(const IndexMeta &meta, IndexBundle::Pointer bundle,\n                         CentroidList *cents);\n\n  //! Serialize centroids into bundle\n  static int Serialize(const IndexMeta &meta, const CentroidList &cents,\n                       IndexBundle::Pointer *out);\n\n  //! Initialize Cluster\n  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;\n\n  //! Cleanup Cluster\n  virtual int cleanup(void) = 0;\n\n  //! Reset Cluster\n  virtual int reset(void) = 0;\n\n  //! Update Cluster\n  virtual int update(const ailego::Params &params) = 0;\n\n  //! Suggest dividing to K clusters\n  virtual void suggest(uint32_t k) = 0;\n\n  //! Mount features\n  virtual int mount(IndexFeatures::Pointer feats) = 0;\n\n  //! Cluster\n  virtual int cluster(CentroidList &cents) {\n    return this->cluster(nullptr, cents);\n  }\n\n  //! Cluster\n  virtual int cluster(IndexThreads::Pointer threads, CentroidList &cents) = 0;\n\n  //! Classify\n  virtual int classify(CentroidList &cents) {\n    return this->classify(nullptr, cents);\n  }\n\n  //! Classify\n  virtual int classify(IndexThreads::Pointer threads, CentroidList &cents) = 0;\n\n  //! Label\n  virtual int label(const CentroidList &cents, std::vector<uint32_t> *out) {\n    return this->label(nullptr, cents, out);\n  }\n\n  //! Label\n  virtual int label(IndexThreads::Pointer threads, const CentroidList &cents,\n                    std::vector<uint32_t> *out) = 0;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_context.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_document.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_filter.h>\n#include <zvec/core/framework/index_groupby.h>\n#include <zvec/core/framework/index_metric.h>\n#include <zvec/core/framework/index_stats.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Profiler\n */\nstruct Profiler {\n  Profiler() = default;\n  ~Profiler() = default;\n\n  void add(const std::string &name, double time) {\n    timings[name] += time;\n  }\n\n  std::string display() const {\n    std::string info = \"================================================\\n\";\n\n    for (auto itr = timings.begin(); itr != timings.end(); ++itr) {\n      info +=\n          itr->first + std::string(\": \") + std::to_string(itr->second) + \" s\\n\";\n    }\n\n    info += \"================================================\\n\";\n\n    return info;\n  }\n\n  std::map<std::string, double> timings;\n};\n\n/*! Index Context\n */\nclass IndexContext {\n public:\n  //! Index Context Pointer\n  typedef std::unique_ptr<IndexContext> Pointer;\n\n  //! Index Context UPointer\n  typedef std::unique_ptr<IndexContext> UPointer;\n\n  /*! Index Context Stats\n   */\n  class Stats : public IndexStats {\n   public:\n    //! Set count of documents filtered\n    void set_filtered_count(size_t count) {\n      filtered_count_ = count;\n    }\n\n    //! Set count of documents dist calced\n    void set_dist_calced_count(size_t count) {\n      dist_calced_count_ = count;\n    }\n\n    //! Retrieve count of documents filtered\n    size_t filtered_count(void) const {\n      return filtered_count_;\n    }\n\n    //! Retrieve count of documents dist-calced\n    size_t dist_calced_count(void) const {\n      return dist_calced_count_;\n    }\n\n    //! Retrieve count of documents filtered (mutable)\n    size_t *mutable_filtered_count(void) {\n      return &filtered_count_;\n    }\n\n    //! Retrieve count of documents dist-calced (mutable)\n    size_t *mutable_dist_calced_count(void) {\n      return &dist_calced_count_;\n    }\n\n    void clear() {\n      this->clear_attributes();\n\n      filtered_count_ = 0u;\n      dist_calced_count_ = 0u;\n    }\n\n   private:\n    //! Members\n    size_t filtered_count_{0u};\n    size_t dist_calced_count_{0u};\n  };\n\n  //! Constructor\n  IndexContext() {}\n\n  //! Constructor\n  IndexContext(IndexMetric::Pointer index_metric)\n      : index_metric_(std::move(index_metric)) {}\n\n  //! Destructor\n  virtual ~IndexContext(void) {}\n\n  //! Set topk of search result\n  virtual void set_topk(uint32_t topk) = 0;\n\n  virtual uint32_t topk() const {\n    return 0;\n  }\n\n  virtual void set_group_params(uint32_t /*group_mum*/,\n                                uint32_t /*group_topk*/){};\n\n  //! Set brute force threshold\n  virtual void set_bruteforce_threshold(uint32_t /*bruteforce_threshold*/) {}\n\n  //! Set mode of debug\n  virtual void set_debug_mode(bool /*enable*/) {}\n\n  //! Set fetch vector\n  virtual void set_fetch_vector(bool /*enable*/) {}\n\n  //! Retrieve search result\n  virtual const IndexDocumentList &result(void) const = 0;\n\n  //! Retrieve search result with index\n  virtual const IndexDocumentList &result(size_t /*index*/) const {\n    return this->result();\n  }\n\n  //! Retrieve mutable result with index\n  virtual IndexDocumentList *mutable_result(size_t idx) = 0;\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(void) const {\n    // to make it compile\n    static const IndexGroupDocumentList empty_list{};\n    return empty_list;\n  };\n\n  //! Retrieve search group result with index\n  virtual const IndexGroupDocumentList &group_result(size_t /*idx*/) const {\n    return this->group_result();\n  }\n\n  //! Update the parameters of context\n  virtual int update(const ailego::Params & /*params*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Retrieve mode of debug\n  virtual bool debug_mode(void) const {\n    return false;\n  }\n\n  //! Retrieve debug information\n  virtual std::string debug_string(void) const {\n    return std::string();\n  }\n\n  //! Retrieve magic number\n  virtual uint32_t magic(void) const {\n    return 0;\n  }\n\n  //! Retrieve search filter\n  const IndexFilter &filter(void) const {\n    return filter_;\n  }\n\n  //! Retrieve fetch vector\n  virtual bool fetch_vector(void) const {\n    return false;\n  }\n\n  //! Reset context\n  virtual void reset(void) {}\n\n  //! Set the filter of context\n  template <typename T>\n  void set_filter(T &&func) {\n    filter_.set(std::forward<T>(func));\n  }\n\n  //! Reset the filter of context\n  void reset_filter(void) {\n    filter_.reset();\n  }\n\n  //! Retrieve search groupby\n  const IndexGroupBy &group_by(void) const {\n    return group_by_;\n  }\n\n  //! Set the groupby of context\n  template <typename T>\n  void set_group_by(T &&func) {\n    group_by_.set(std::forward<T>(func));\n  }\n\n  //! Reset the groupby of context\n  void reset_group_by(void) {\n    group_by_.reset();\n  }\n\n  //! Set threshold for RNN\n  void set_threshold(float val) {\n    if (index_metric_ && index_metric_->support_normalize()) {\n      index_metric_->denormalize(&val);\n    }\n\n    threshold_ = val;\n  }\n\n  //! Retrieve value of threshold for RNN\n  float threshold(void) const {\n    return threshold_;\n  }\n\n  //! Reset value of threshold for RNN\n  void reset_threshold(void) {\n    threshold_ = std::numeric_limits<float>::max();\n  }\n\n  //! Generate a global magic number\n  static uint32_t GenerateMagic(void);\n\n  //! Profiler\n  Profiler &profiler() {\n    return profiler_;\n  }\n\n private:\n  //! Members\n  IndexFilter filter_{};\n  IndexGroupBy group_by_{};\n  float threshold_{std::numeric_limits<float>::max()};\n\n\n  Profiler profiler_{};\n\n protected:\n  IndexMetric::Pointer index_metric_{nullptr};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/core/framework/index_converter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <atomic>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_stats.h>\n#include \"zvec/core/framework/index_reformer.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Converter\n */\nclass IndexConverter : public IndexModule {\n public:\n  //! Index Converter Pointer\n  typedef std::shared_ptr<IndexConverter> Pointer;\n\n  /*! Index Converter Stats\n   */\n  class Stats : public IndexStats {\n   public:\n    Stats() {}\n    Stats(const Stats &stats) {\n      *this = stats;\n    }\n    Stats &operator=(const Stats &stats) {\n      this->trained_count_.store(stats.trained_count_.load());\n      this->transformed_count_.store(stats.transformed_count_.load());\n      this->dumped_size_.store(stats.dumped_size_.load());\n      this->discarded_count_.store(stats.discarded_count_.load());\n      this->trained_costtime_.store(stats.trained_costtime_.load());\n      this->transformed_costtime_.store(stats.transformed_costtime_.load());\n      this->dumped_costtime_.store(stats.dumped_costtime_.load());\n      return *this;\n    }\n    //! Set count of documents trained\n    void set_trained_count(size_t count) {\n      trained_count_ = count;\n    }\n\n    //! Set count of documents transformed\n    void set_transformed_count(size_t count) {\n      transformed_count_ = count;\n    }\n\n    //! Set size of documents dumped\n    void set_dumped_size(size_t size) {\n      dumped_size_ = size;\n    }\n\n    //! Set count of documents discarded\n    void set_discarded_count(size_t count) {\n      discarded_count_ = count;\n    }\n\n    //! Set time cost of documents trained\n    void set_trained_costtime(uint64_t cost) {\n      trained_costtime_ = cost;\n    }\n\n    //! Set time cost of documents transformed\n    void set_transformed_costtime(uint64_t cost) {\n      transformed_costtime_ = cost;\n    }\n\n    //! Set time cost of documents dumped\n    void set_dumped_costtime(uint64_t cost) {\n      dumped_costtime_ = cost;\n    }\n\n    //! Retrieve count of documents trained\n    size_t trained_count(void) const {\n      return trained_count_;\n    }\n\n    //! Retrieve count of documents transformed\n    size_t transformed_count(void) const {\n      return transformed_count_;\n    }\n\n    //! Retrieve size of documents dumped\n    size_t dumped_size(void) const {\n      return dumped_size_;\n    }\n\n    //! Retrieve count of documents discarded\n    size_t discarded_count(void) const {\n      return discarded_count_;\n    }\n\n    //! Retrieve time cost of documents trained\n    uint64_t trained_costtime(void) const {\n      return trained_costtime_;\n    }\n\n    //! Retrieve time cost of documents transformed\n    uint64_t transformed_costtime(void) const {\n      return transformed_costtime_;\n    }\n\n    //! Retrieve time cost of documents dumped\n    uint64_t dumped_costtime(void) const {\n      return dumped_costtime_;\n    }\n\n    //! Retrieve count of documents trained (mutable)\n    std::atomic<size_t> *mutable_trained_count(void) {\n      return &trained_count_;\n    }\n\n    //! Retrieve count of documents transformed (mutable)\n    std::atomic<size_t> *mutable_transformed_count(void) {\n      return &transformed_count_;\n    }\n\n    //! Retrieve size of documents dumped (mutable)\n    std::atomic<size_t> *mutable_dumped_size(void) {\n      return &dumped_size_;\n    }\n\n    //! Retrieve count of documents discarded (mutable)\n    std::atomic<size_t> *mutable_discarded_count(void) {\n      return &discarded_count_;\n    }\n\n    //! Retrieve time cost of documents trained (mutable)\n    std::atomic<uint64_t> *mutable_trained_costtime(void) {\n      return &trained_costtime_;\n    }\n\n    //! Retrieve time cost of documents transformed (mutable)\n    std::atomic<uint64_t> *mutable_transformed_costtime(void) {\n      return &transformed_costtime_;\n    }\n\n    //! Retrieve time cost of documents dumped (mutable)\n    std::atomic<uint64_t> *mutable_dumped_costtime(void) {\n      return &dumped_costtime_;\n    }\n\n   private:\n    //! Members\n    std::atomic<size_t> trained_count_{0u};\n    std::atomic<size_t> transformed_count_{0u};\n    std::atomic<size_t> dumped_size_{0u};\n    std::atomic<size_t> discarded_count_{0u};\n    std::atomic<uint64_t> trained_costtime_{0u};\n    std::atomic<uint64_t> transformed_costtime_{0u};\n    std::atomic<uint64_t> dumped_costtime_{0u};\n  };\n\n  //! Destructor\n  virtual ~IndexConverter(void) {}\n\n  //! Initialize Converter\n  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;\n\n  //! Cleanup Converter\n  virtual int cleanup(void) = 0;\n\n  //! Train the data\n  virtual int train(IndexHolder::Pointer) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Train the data\n  virtual int train(IndexSparseHolder::Pointer) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Transform the data\n  virtual int transform(IndexHolder::Pointer) {\n    return IndexError_NotImplemented;\n  };\n\n  //! Transform the data\n  virtual int transform(IndexSparseHolder::Pointer) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer &dumper) = 0;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const = 0;\n\n  //! Retrieve a holder as result\n  virtual IndexHolder::Pointer result(void) const {\n    return nullptr;\n  }\n\n  //! Retrieve a holder as result\n  virtual IndexSparseHolder::Pointer sparse_result(void) const {\n    return nullptr;\n  }\n\n  //! Retrieve Index Meta\n  virtual const IndexMeta &meta(void) const = 0;\n\n  //! Train and transform the index\n  static int TrainAndTransform(const IndexConverter::Pointer &converter,\n                               IndexHolder::Pointer holder);\n\n  //! Train, transform and dump the index\n  static int TrainTransformAndDump(const IndexConverter::Pointer &converter,\n                                   IndexHolder::Pointer holder,\n                                   const IndexDumper::Pointer &dumper);\n\n  //! Convert to reformer\n  virtual int to_reformer(IndexReformer::Pointer *) {\n    return IndexError_NotImplemented;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_document.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n#include <string>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/core/framework/index_storage.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Sparse Document\n */\nclass IndexSparseDocument {\n public:\n  //! Constructor\n  IndexSparseDocument() = default;\n\n  //! Retrieve mutable sparse count\n  uint32_t *mutable_sparse_count(void) {\n    return &sparse_count_;\n  }\n\n  //! Retrieve mutable sparse indices\n  std::string *mutable_sparse_indices(void) {\n    return &sparse_indices_;\n  }\n\n  //! Retrieve mutable sparse values\n  std::string *mutable_sparse_values(void) {\n    return &sparse_values_;\n  }\n\n  //! Retrieve sparse count\n  uint32_t sparse_count(void) const {\n    return sparse_count_;\n  }\n\n  //! Retrieve sparse indices\n  const std::string &sparse_indices(void) const {\n    return sparse_indices_;\n  }\n\n  //! Retrieve sparse values\n  const std::string &sparse_values(void) const {\n    return sparse_values_;\n  }\n\n private:\n  uint32_t sparse_count_{0};\n  std::string sparse_indices_{};\n  std::string sparse_values_{};\n};\n\n/*! Index Document\n */\nclass IndexDocument {\n public:\n  //! Constructor\n  IndexDocument() = default;\n\n  //! Constructor\n  IndexDocument(uint64_t k, float v) : key_(k), score_(v) {}\n\n  //! Constructor\n  IndexDocument(uint64_t k, float v, uint32_t i)\n      : key_(k), score_(v), index_(i) {}\n\n  //! Constructor\n  IndexDocument(uint64_t k, float v, uint32_t i, const void *vector)\n      : key_(k), score_(v), index_(i), vector_(vector) {}\n\n  IndexDocument(uint64_t k, float v, uint32_t i,\n                IndexStorage::MemoryBlock vec_block)\n      : key_(k), score_(v), index_(i), vec_mem_block_(vec_block) {\n    vector_ = vec_mem_block_.data();\n    has_vec_mem_block_ = true;\n  }\n\n  //! Constructor\n  IndexDocument(uint64_t k, float v, uint32_t i, const void *vector,\n                IndexSparseDocument sparse_doc)\n      : key_(k),\n        score_(v),\n        index_(i),\n        vector_(vector),\n        sparse_doc_(std::move(sparse_doc)) {}\n\n  IndexDocument(uint64_t k, float v, uint32_t i,\n                IndexStorage::MemoryBlock vec_block,\n                IndexSparseDocument sparse_doc)\n      : key_(k),\n        score_(v),\n        index_(i),\n        vec_mem_block_(vec_block),\n        sparse_doc_(std::move(sparse_doc)) {\n    has_vec_mem_block_ = true;\n    vector_ = vec_mem_block_.data();\n  }\n\n  //! Constructor\n  IndexDocument(const IndexDocument &rhs)\n      : key_(rhs.key_),\n        score_(rhs.score_),\n        index_(rhs.index_),\n        vector_(rhs.vector_),\n        sparse_doc_{rhs.sparse_doc_} {\n    if (rhs.has_vec_mem_block_) {\n      vec_mem_block_ = rhs.vec_mem_block_;\n      has_vec_mem_block_ = true;\n    }\n  }\n\n  //! Assignment\n  IndexDocument &operator=(const IndexDocument &rhs) {\n    if (this != &rhs) {\n      key_ = rhs.key_;\n      score_ = rhs.score_;\n      index_ = rhs.index_;\n      vector_ = rhs.vector_;\n      if (rhs.has_vec_mem_block_) {\n        vec_mem_block_ = rhs.vec_mem_block_;\n        has_vec_mem_block_ = true;\n      }\n      sparse_doc_ = rhs.sparse_doc_;\n    }\n    return *this;\n  }\n\n  //! Less than\n  bool operator<(const IndexDocument &rhs) const {\n    return (this->score_ < rhs.score_);\n  }\n\n  //! Greater than\n  bool operator>(const IndexDocument &rhs) const {\n    return (this->score_ > rhs.score_);\n  }\n\n  //! Retrieve primary key\n  uint64_t key(void) const {\n    return key_;\n  }\n\n  //! Retrieve score value\n  float score(void) const {\n    return score_;\n  }\n\n  //! Retrieve index id\n  uint32_t index(void) const {\n    return index_;\n  }\n\n  //! Retrieve vec\n  const void *vector() const {\n    return vector_;\n  }\n\n  //! Retrieve vec\n  const IndexSparseDocument &sparse_doc() const {\n    return sparse_doc_;\n  }\n\n  //! Retrieve mutable primary key\n  uint64_t *mutable_key(void) {\n    return &key_;\n  }\n\n  //! Retrieve mutable score value\n  float *mutable_score(void) {\n    return &score_;\n  }\n\n  //! Retrieve mutable index id\n  uint32_t *mutable_index(void) {\n    return &index_;\n  }\n\n  //! Retrieve primary key\n  void set_key(uint64_t val) {\n    key_ = val;\n  }\n\n  //! Retrieve score value\n  void set_score(float val) {\n    score_ = val;\n  }\n\n  //! Retrieve index id\n  void set_index(uint32_t val) {\n    index_ = val;\n  }\n\n private:\n  //! Data members\n  uint64_t key_{0u};\n  float score_{0.0f};\n  uint32_t index_{0u};\n  const void *vector_{nullptr};\n  bool has_vec_mem_block_{false};\n  mutable IndexStorage::MemoryBlock vec_mem_block_{};\n  IndexSparseDocument sparse_doc_{};\n};\n\n/*! Index Document Heap\n */\nclass IndexDocumentHeap : public ailego::Heap<IndexDocument> {\n public:\n  //! Constructor\n  IndexDocumentHeap(void) : ailego::Heap<IndexDocument>() {}\n\n  //! Constructor\n  IndexDocumentHeap(size_t max) : ailego::Heap<IndexDocument>(max) {}\n\n  //! Constructor\n  IndexDocumentHeap(size_t max, float val)\n      : ailego::Heap<IndexDocument>(max), threshold_(val) {}\n\n  //! Constructor\n  IndexDocumentHeap(const IndexDocumentHeap &rhs)\n      : ailego::Heap<IndexDocument>(rhs), threshold_(rhs.threshold_) {}\n\n  //! Constructor\n  IndexDocumentHeap(IndexDocumentHeap &&rhs)\n      : ailego::Heap<IndexDocument>(std::move(rhs)),\n        threshold_(rhs.threshold_) {}\n\n  //! Constructor\n  IndexDocumentHeap(const std::vector<IndexDocument> &rhs)\n      : ailego::Heap<IndexDocument>(rhs) {}\n\n  //! Constructor\n  IndexDocumentHeap(std::vector<IndexDocument> &&rhs)\n      : ailego::Heap<IndexDocument>(std::move(rhs)) {}\n\n  //! Insert a document into the heap\n  void emplace(uint64_t key, float score) {\n    if (score <= threshold_) {\n      ailego::Heap<IndexDocument>::emplace(key, score);\n    }\n  }\n\n  //! Insert a document into the heap\n  void emplace(uint64_t key, float score, uint32_t index) {\n    if (score <= threshold_) {\n      ailego::Heap<IndexDocument>::emplace(key, score, index);\n    }\n  }\n\n  //! Set threshold for RNN\n  void set_threshold(float val) {\n    threshold_ = val;\n  }\n\n  //! Retrieve value of threshold for RNN\n  float threshold(void) const {\n    return threshold_;\n  }\n\n private:\n  //! members\n  float threshold_{std::numeric_limits<float>::max()};\n};\n\nclass GroupIndexDocument {\n public:\n  GroupIndexDocument() = default;\n\n  GroupIndexDocument(const std::string &group_id,\n                     const std::vector<IndexDocument> &docs)\n      : group_id_(group_id), docs_(docs) {}\n\n  const std::string &group_id() const {\n    return group_id_;\n  }\n\n  const std::vector<IndexDocument> &docs() const {\n    return docs_;\n  }\n\n  std::string *mutable_group_id() {\n    return &group_id_;\n  }\n\n  std::vector<IndexDocument> *mutable_docs() {\n    return &docs_;\n  }\n\n  void set_group_id(const std::string &group_id) {\n    group_id_ = group_id;\n  }\n\n private:\n  std::string group_id_;\n  std::vector<IndexDocument> docs_;\n};\n\n/*! Index Document List\n */\nusing IndexDocumentList = std::vector<IndexDocument>;\nusing IndexGroupDocumentList = std::vector<GroupIndexDocument>;\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_dumper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_module.h>\n#include <zvec/core/framework/index_packer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Dumper\n */\nclass IndexDumper : public IndexModule {\n public:\n  //! Index Dumper Pointer\n  typedef std::shared_ptr<IndexDumper> Pointer;\n\n  //! Destructor\n  virtual ~IndexDumper(void) {}\n\n  //! Initialize dumper\n  virtual int init(const ailego::Params &params) = 0;\n\n  //! Cleanup dumper\n  virtual int cleanup(void) = 0;\n\n  //! Create a file for dumping\n  virtual int create(const std::string &path) = 0;\n\n  //! Close file\n  virtual int close(void) = 0;\n\n  //! Append a segment meta into table\n  virtual int append(const std::string &id, size_t data_size,\n                     size_t padding_size, uint32_t crc) = 0;\n\n  //! Write data to the storage\n  virtual size_t write(const void *data, size_t len) = 0;\n\n  //! Retrieve magic number of index\n  virtual uint32_t magic(void) const = 0;\n};\n\n/*! Index Segment Dumper\n */\nclass IndexSegmentDumper : public IndexDumper {\n public:\n  //! Index Segment Dumper Pointer\n  typedef std::shared_ptr<IndexSegmentDumper> Pointer;\n\n  //! Constructor\n  IndexSegmentDumper(IndexDumper::Pointer dumper, std::string segid)\n      : segment_id_(std::move(segid)), dumper_(std::move(dumper)) {}\n\n  //! Destructor\n  virtual ~IndexSegmentDumper(void) {\n    this->close_index();\n  }\n\n  //! Initialize dumper\n  int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup dumper\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Create a file for dumping\n  int create(const std::string &segid) override {\n    if (dumped_size_ != 0) {\n      return IndexError_NoReady;\n    }\n\n    auto write_data = [&](const void *buf, size_t size) {\n      return this->write_to_dumper(buf, size);\n    };\n    if (!packer_.setup(write_data)) {\n      return IndexError_WriteData;\n    }\n    segment_id_ = segid;\n    return 0;\n  }\n\n  //! Close file\n  int close(void) override {\n    return this->close_index();\n  }\n\n  //! Append a segment meta into table\n  int append(const std::string &id, size_t data_size, size_t padding_size,\n             uint32_t crc) override {\n    stab_.emplace_back(id, data_size, padding_size, crc);\n    return 0;\n  }\n\n  //! Write data to the storage\n  size_t write(const void *data, size_t len) override {\n    auto write_data = [&](const void *buf, size_t size) {\n      return this->write_to_dumper(buf, size);\n    };\n\n    if (dumped_size_ == 0 && !packer_.setup(write_data)) {\n      return 0;\n    }\n    return packer_.pack(write_data, data, len);\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return packer_.magic();\n  }\n\n protected:\n  //! Write data to dumper\n  size_t write_to_dumper(const void *data, size_t len) {\n    size_t wrlen = dumper_->write(data, len);\n    dumped_size_ += wrlen;\n    return wrlen;\n  }\n\n  //! Close index file\n  int close_index(void) {\n    if (dumped_size_ == 0) {\n      return 0;\n    }\n\n    auto write_data = [&](const void *buf, size_t size) {\n      return this->write_to_dumper(buf, size);\n    };\n    if (!packer_.finish(write_data, stab_)) {\n      return IndexError_WriteData;\n    }\n    stab_.clear();\n\n    int ret = dumper_->append(segment_id_, dumped_size_, 0, 0);\n    dumped_size_ = 0u;\n    return ret;\n  }\n\n private:\n  size_t dumped_size_{0};\n  std::string segment_id_{};\n  IndexDumper::Pointer dumper_{};\n  IndexPacker packer_{};\n  std::vector<IndexPacker::SegmentMeta> stab_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_error.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <map>\n#include <zvec/ailego/pattern/expected.hpp>\n\nnamespace zvec {\nnamespace core {\n\n/*! Error\n */\n\nclass ErrorCode;\n\ntemplate <typename T>\nusing Result = tl::expected<T, ErrorCode>;\n\ntemplate <typename T>\nusing result_unexpect = tl::unexpected<ErrorCode>;\n\n/*! Index Error\n */\nclass IndexError {\n public:\n  /*! Index Error Code\n   */\n  class Code {\n   public:\n    //! Constructor\n    Code(int val, const char *str) : value_(-val), desc_(str) {\n      IndexError::Instance()->emplace(this);\n    }\n\n    //! Retrieve the value of code\n    operator int() const {\n      return (this->value_);\n    }\n\n    //! Retrieve the value of code\n    int value() const {\n      return (this->value_);\n    }\n\n    //! Retrieve the description of code\n    const char *desc() const {\n      return (this->desc_);\n    }\n\n   private:\n    int value_;\n    const char *desc_;\n  };\n\n  //! Retrieve the description of code\n  static const char *What(int val) {\n    return IndexError::Instance()->what(val);\n  }\n\n protected:\n  //! Constructor\n  IndexError(void) : map_() {}\n\n  //! Inserts a new code into map\n  void emplace(const IndexError::Code *code) {\n    map_.emplace(code->value(), code);\n  }\n\n  //! Retrieve the description of code\n  const char *what(int val) const {\n    auto iter = map_.find(val);\n    if (iter != map_.end()) {\n      return iter->second->desc();\n    }\n    return \"\";\n  }\n\n  //! Retrieve the singleton\n  static IndexError *Instance(void) {\n    static IndexError error;\n    return (&error);\n  }\n\n private:\n  //! Disable them\n  IndexError(const IndexError &) = delete;\n  IndexError(IndexError &&) = delete;\n  IndexError &operator=(const IndexError &) = delete;\n\n  //! Error code map\n  std::map<int, const IndexError::Code *> map_;\n};\n\n//! Index Error Code Define\n#define INDEX_ERROR_CODE_DEFINE(__NAME__, __VAL__, __DESC__)           \\\n  const IndexError::Code IndexError_##__NAME__((__VAL__), (__DESC__)); \\\n  const IndexError::Code &_IndexErrorCode_##__VAL__##_Register(        \\\n      IndexError_##__NAME__)\n\n//! Index Error Code Declare\n#define INDEX_ERROR_CODE_DECLARE(__NAME__) \\\n  extern const IndexError::Code IndexError_##__NAME__\n\n//! Build-in error code\nINDEX_ERROR_CODE_DECLARE(Success);      // Success\nINDEX_ERROR_CODE_DECLARE(Runtime);      // Runtime error\nINDEX_ERROR_CODE_DECLARE(Logic);        // Logic error\nINDEX_ERROR_CODE_DECLARE(Type);         // Type error\nINDEX_ERROR_CODE_DECLARE(System);       // System call error\nINDEX_ERROR_CODE_DECLARE(Cast);         // Cast error\nINDEX_ERROR_CODE_DECLARE(IO);           // IO error\nINDEX_ERROR_CODE_DECLARE(AuthExpired);  // Auth expired error\n\nINDEX_ERROR_CODE_DECLARE(NotImplemented);  // Not implemented\nINDEX_ERROR_CODE_DECLARE(Unsupported);     // Unsupported\nINDEX_ERROR_CODE_DECLARE(Denied);          // Permission denied\nINDEX_ERROR_CODE_DECLARE(Canceled);        // Operation canceled\nINDEX_ERROR_CODE_DECLARE(Overflow);        // Overflow\nINDEX_ERROR_CODE_DECLARE(Underflow);       // Underflow\nINDEX_ERROR_CODE_DECLARE(OutOfRange);      // Out of range\nINDEX_ERROR_CODE_DECLARE(NoBuffer);        // No buffer space available\nINDEX_ERROR_CODE_DECLARE(NoMemory);        // Not enough space\nINDEX_ERROR_CODE_DECLARE(NoParamFound);    // No parameter found\nINDEX_ERROR_CODE_DECLARE(NoReady);         // No ready\nINDEX_ERROR_CODE_DECLARE(NoExist);         // No exist\nINDEX_ERROR_CODE_DECLARE(Exist);           // Already exist\nINDEX_ERROR_CODE_DECLARE(Mismatch);        // Mismatch\nINDEX_ERROR_CODE_DECLARE(Duplicate);       // Duplicate\nINDEX_ERROR_CODE_DECLARE(Uninitialized);   // Uninitialized\n\nINDEX_ERROR_CODE_DECLARE(InvalidArgument);  // Invalid argument\nINDEX_ERROR_CODE_DECLARE(InvalidFormat);    // Invalid format\nINDEX_ERROR_CODE_DECLARE(InvalidLength);    // Invalid length\nINDEX_ERROR_CODE_DECLARE(InvalidChecksum);  // Invalid checksum\nINDEX_ERROR_CODE_DECLARE(InvalidValue);     // Invalid value\n\nINDEX_ERROR_CODE_DECLARE(CreateDirectory);  // Create directory error\nINDEX_ERROR_CODE_DECLARE(OpenDirectory);    // Open directory error\nINDEX_ERROR_CODE_DECLARE(Serialize);        // Serialize error\nINDEX_ERROR_CODE_DECLARE(Deserialize);      // Deserialize error\nINDEX_ERROR_CODE_DECLARE(CreateFile);       // Create file error\nINDEX_ERROR_CODE_DECLARE(OpenFile);         // Open file error\nINDEX_ERROR_CODE_DECLARE(SeekFile);         // Seek file error\nINDEX_ERROR_CODE_DECLARE(CloseFile);        // Close file error\nINDEX_ERROR_CODE_DECLARE(TruncateFile);     // TruncateFile file error\nINDEX_ERROR_CODE_DECLARE(MMapFile);         // MMap file error\nINDEX_ERROR_CODE_DECLARE(FlushFile);        // Flush file error\nINDEX_ERROR_CODE_DECLARE(WriteData);        // Write data error\nINDEX_ERROR_CODE_DECLARE(ReadData);         // Read data error\n\nINDEX_ERROR_CODE_DECLARE(PackIndex);      // Read data error\nINDEX_ERROR_CODE_DECLARE(UnpackIndex);    // Read data error\nINDEX_ERROR_CODE_DECLARE(IndexLoaded);    // Index loaded\nINDEX_ERROR_CODE_DECLARE(NoIndexLoaded);  // No index loaded\nINDEX_ERROR_CODE_DECLARE(NoTrained);      // No trained\nINDEX_ERROR_CODE_DECLARE(IndexFull);      // Index full\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/core/framework/index_factory.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/pattern/factory.h>\n#include <zvec/core/framework/index_builder.h>\n#include <zvec/core/framework/index_cluster.h>\n#include <zvec/core/framework/index_converter.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_logger.h>\n#include <zvec/core/framework/index_metric.h>\n#include <zvec/core/framework/index_reducer.h>\n#include <zvec/core/framework/index_refiner.h>\n#include <zvec/core/framework/index_reformer.h>\n#include <zvec/core/framework/index_searcher.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/framework/index_streamer.h>\n#include <zvec/core/framework/index_trainer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Factory\n */\nstruct IndexFactory {\n  //! Create a index Metric by name\n  static IndexMetric::Pointer CreateMetric(const std::string &name);\n\n  //! Test if the Metric is exist\n  static bool HasMetric(const std::string &name);\n\n  //! Retrieve all Metric classes\n  static std::vector<std::string> AllMetrics(void);\n\n  //! Create a index logger by name\n  static IndexLogger::Pointer CreateLogger(const std::string &name);\n\n  //! Test if the logger is exist\n  static bool HasLogger(const std::string &name);\n\n  //! Retrieve all logger classes\n  static std::vector<std::string> AllLoggers(void);\n\n  //! Create a index dumper by name\n  static IndexDumper::Pointer CreateDumper(const std::string &name);\n\n  //! Test if the dumper is exist\n  static bool HasDumper(const std::string &name);\n\n  //! Retrieve all dumper classes\n  static std::vector<std::string> AllDumpers(void);\n\n  //! Test if the container is exist\n  static bool HasContainer(const std::string &name);\n\n  //! Retrieve all container classes\n  static std::vector<std::string> AllContainers(void);\n\n  //! Create a index storage by name\n  static IndexStorage::Pointer CreateStorage(const std::string &name);\n\n  //! Test if the storage is exist\n  static bool HasStorage(const std::string &name);\n\n  //! Retrieve all storage classes\n  static std::vector<std::string> AllStorages(void);\n\n  //! Create a index converter by name\n  static IndexConverter::Pointer CreateConverter(const std::string &name);\n\n  //! Test if the converter is exist\n  static bool HasConverter(const std::string &name);\n\n  //! Retrieve all converter classes\n  static std::vector<std::string> AllConverters(void);\n\n  //! Create a index reformer by name\n  static IndexReformer::Pointer CreateReformer(const std::string &name);\n\n  //! Test if the reformer is exist\n  static bool HasReformer(const std::string &name);\n\n  //! Retrieve all reformer classes\n  static std::vector<std::string> AllReformers(void);\n\n  //! Create a index trainer by name\n  static IndexTrainer::Pointer CreateTrainer(const std::string &name);\n\n  //! Test if the trainer is exist\n  static bool HasTrainer(const std::string &name);\n\n  //! Retrieve all trainer classes\n  static std::vector<std::string> AllTrainers(void);\n\n  //! Create a index builder by name\n  static IndexBuilder::Pointer CreateBuilder(const std::string &name);\n\n  //! Test if the builder is exist\n  static bool HasBuilder(const std::string &name);\n\n  //! Retrieve all builder classes\n  static std::vector<std::string> AllBuilders(void);\n\n  //! Create a index searcher by name\n  static IndexSearcher::Pointer CreateSearcher(const std::string &name);\n\n  //! Test if the searcher is exist\n  static bool HasSearcher(const std::string &name);\n\n  //! Retrieve all searcher classes\n  static std::vector<std::string> AllSearchers(void);\n\n  //! Create a index streamer by name\n  static IndexStreamer::Pointer CreateStreamer(const std::string &name);\n\n  //! Test if the streamer is exist\n  static bool HasStreamer(const std::string &name);\n\n  //! Retrieve all streamer classes\n  static std::vector<std::string> AllStreamers(void);\n\n  //! Create a index reducer by name\n  static IndexReducer::Pointer CreateReducer(const std::string &name);\n\n  //! Test if the reducer is exist\n  static bool HasReducer(const std::string &name);\n\n  //! Retrieve all reducer classes\n  static std::vector<std::string> AllReducers(void);\n\n  //! Create a index cluster by name\n  static IndexCluster::Pointer CreateCluster(const std::string &name);\n\n  //! Test if the cluster is exist\n  static bool HasCluster(const std::string &name);\n\n  //! Retrieve all cluster classes\n  static std::vector<std::string> AllClusters(void);\n\n  //! Create a index streamer reducer by name\n  static IndexStreamerReducer::Pointer CreateStreamerReducer(\n      const std::string &name);\n\n  //! Test if the streamer reducer is exist\n  static bool HasStreamerReducer(const std::string &name);\n\n  //! Retrieve all streamer reducer classes\n  static std::vector<std::string> AllStreamerReducers(void);\n\n  //! Create a refiner by name\n  static IndexRefiner::Pointer CreateRefiner(const std::string &name);\n\n  //! Test if the refiner is exist\n  static bool HasRefiner(const std::string &name);\n\n  //! Retrieve all refiner classes\n  static std::vector<std::string> AllRefiners(void);\n};\n\n//! Register Index Metric\n#define INDEX_FACTORY_REGISTER_METRIC_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexMetric, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Metric\n#define INDEX_FACTORY_REGISTER_METRIC(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_METRIC_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Logger\n#define INDEX_FACTORY_REGISTER_LOGGER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexLogger, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Logger\n#define INDEX_FACTORY_REGISTER_LOGGER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_LOGGER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Dumper\n#define INDEX_FACTORY_REGISTER_DUMPER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexDumper, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Dumper\n#define INDEX_FACTORY_REGISTER_DUMPER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_DUMPER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Storage\n#define INDEX_FACTORY_REGISTER_STORAGE_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexStorage, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Storage\n#define INDEX_FACTORY_REGISTER_STORAGE(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_STORAGE_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Converter\n#define INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexConverter, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Converter\n#define INDEX_FACTORY_REGISTER_CONVERTER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Reformer\n#define INDEX_FACTORY_REGISTER_REFORMER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexReformer, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Reformer\n#define INDEX_FACTORY_REGISTER_REFORMER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_REFORMER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Trainer\n#define INDEX_FACTORY_REGISTER_TRAINER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexTrainer, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Trainer\n#define INDEX_FACTORY_REGISTER_TRAINER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_TRAINER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Builder\n#define INDEX_FACTORY_REGISTER_BUILDER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexBuilder, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Builder\n#define INDEX_FACTORY_REGISTER_BUILDER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_BUILDER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Searcher\n#define INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexSearcher, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Searcher\n#define INDEX_FACTORY_REGISTER_SEARCHER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Streamer\n#define INDEX_FACTORY_REGISTER_STREAMER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexStreamer, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Streamer\n#define INDEX_FACTORY_REGISTER_STREAMER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_STREAMER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Reducer\n#define INDEX_FACTORY_REGISTER_REDUCER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexReducer, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Reducer\n#define INDEX_FACTORY_REGISTER_REDUCER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_REDUCER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Streamer Reducer\n#define INDEX_FACTORY_REGISTER_STREAMER_REDUCER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexStreamerReducer, __IMPL__,            \\\n                          ##__VA_ARGS__)\n\n//! Register Index Streamer Reducer\n#define INDEX_FACTORY_REGISTER_STREAMER_REDUCER(__IMPL__, ...)      \\\n  INDEX_FACTORY_REGISTER_STREAMER_REDUCER_ALIAS(__IMPL__, __IMPL__, \\\n                                                ##__VA_ARGS__)\n\n//! Register Index Cluster\n#define INDEX_FACTORY_REGISTER_CLUSTER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexCluster, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Cluster\n#define INDEX_FACTORY_REGISTER_CLUSTER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_CLUSTER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Refiner\n#define INDEX_FACTORY_REGISTER_REFINER_ALIAS(__NAME__, __IMPL__, ...) \\\n  AILEGO_FACTORY_REGISTER(__NAME__, IndexRefiner, __IMPL__, ##__VA_ARGS__)\n\n//! Register Index Refiner\n#define INDEX_FACTORY_REGISTER_REFINER(__IMPL__, ...) \\\n  INDEX_FACTORY_REGISTER_REFINER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_features.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstring>\n#include <memory>\n#include <random>\n#include <string>\n#include <vector>\n#include <zvec/core/framework/index_meta.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Features\n */\nstruct IndexFeatures {\n  //! Index Features Pointer\n  typedef std::shared_ptr<IndexFeatures> Pointer;\n\n  //! Destructor\n  virtual ~IndexFeatures(void) {}\n\n  //! Retrieve feature via index\n  virtual const void *element(size_t i) const = 0;\n\n  //! Retrieve count of elements\n  virtual size_t count(void) const = 0;\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const = 0;\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const = 0;\n\n  //! Test if it is a compacted buffer\n  virtual bool is_compacted(void) const {\n    return false;\n  }\n\n  //! Retrieve pointer of compacted buffer\n  virtual const void *data(void) const {\n    return nullptr;\n  }\n\n  //! Retrieve size of feature\n  virtual size_t element_size(void) const {\n    return IndexMeta::ElementSizeof(this->data_type(), this->dimension());\n  }\n\n  //! Operator []\n  const void *operator[](size_t i) const {\n    return this->element(i);\n  }\n\n  //! Test if matchs the meta\n  bool is_matched(const IndexMeta &meta) const {\n    return (meta.data_type() == this->data_type() &&\n            meta.dimension() == this->dimension() &&\n            meta.element_size() == this->element_size());\n  }\n};\n\n/*! Coherent Index Features\n */\nclass CoherentIndexFeatures : public IndexFeatures {\n public:\n  //! Coherent Index Features Pointer\n  typedef std::shared_ptr<CoherentIndexFeatures> Pointer;\n\n  //! Constructor\n  CoherentIndexFeatures(void)\n      : features_buffer_(nullptr),\n        features_count_(0),\n        feature_size_(0),\n        feature_dimension_(0),\n        data_type_(IndexMeta::DataType::DT_UNDEFINED) {}\n\n  //! Constructor\n  CoherentIndexFeatures(const IndexMeta &meta)\n      : features_buffer_(nullptr),\n        features_count_(0),\n        feature_size_(meta.element_size()),\n        feature_dimension_(meta.dimension()),\n        data_type_(meta.data_type()) {}\n\n  //! Constructor\n  CoherentIndexFeatures(const IndexMeta &meta, const void *buf, size_t len)\n      : features_buffer_(buf),\n        features_count_(len / meta.element_size()),\n        feature_size_(meta.element_size()),\n        feature_dimension_(meta.dimension()),\n        data_type_(meta.data_type()) {}\n\n  //! Mount features\n  void mount(const IndexMeta &meta, const void *buf, size_t len) {\n    features_buffer_ = buf;\n    data_type_ = meta.data_type();\n    feature_size_ = meta.element_size();\n    feature_dimension_ = meta.dimension();\n    features_count_ = len / feature_size_;\n  }\n\n  //! Mount features\n  void mount(const void *buf, size_t len) {\n    features_buffer_ = buf;\n    features_count_ = len / feature_size_;\n  }\n\n  //! Retrieve count of elements\n  virtual size_t count(void) const {\n    return features_count_;\n  }\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const {\n    return feature_dimension_;\n  }\n\n  //! Retrieve feature via index\n  virtual const void *element(size_t i) const {\n    return (reinterpret_cast<const char *>(features_buffer_) +\n            feature_size_ * i);\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const {\n    return data_type_;\n  }\n\n  //! Test if it is a compacted buffer\n  virtual bool is_compacted(void) const {\n    return true;\n  }\n\n  //! Retrieve pointer of compacted buffer\n  virtual const void *data(void) const {\n    return features_buffer_;\n  }\n\n  //! Retrieve size of feature\n  virtual size_t element_size(void) const {\n    return feature_size_;\n  }\n\n private:\n  const void *features_buffer_;\n  size_t features_count_;\n  size_t feature_size_;\n  size_t feature_dimension_;\n  IndexMeta::DataType data_type_;\n};\n\n/*! Flexible Index Features\n */\nclass FlexibleIndexFeatures : public IndexFeatures {\n public:\n  //! Flexible Index Features Pointer\n  typedef std::shared_ptr<FlexibleIndexFeatures> Pointer;\n\n  //! Constructor\n  FlexibleIndexFeatures(void)\n      : features_(nullptr),\n        features_count_(0),\n        feature_size_(0),\n        feature_dimension_(0),\n        data_type_(IndexMeta::DataType::DT_UNDEFINED) {}\n\n  //! Constructor\n  FlexibleIndexFeatures(const IndexMeta &meta)\n      : features_(nullptr),\n        features_count_(0),\n        feature_size_(meta.element_size()),\n        feature_dimension_(meta.dimension()),\n        data_type_(meta.data_type()) {}\n\n  //! Constructor\n  FlexibleIndexFeatures(const IndexMeta &meta, const void *const *feats,\n                        size_t feats_count)\n      : features_(feats),\n        features_count_(feats_count),\n        feature_size_(meta.element_size()),\n        feature_dimension_(meta.dimension()),\n        data_type_(meta.data_type()) {}\n\n  //! Mount features\n  void mount(const IndexMeta &meta, const void *const *feats,\n             size_t feats_count) {\n    features_ = feats;\n    features_count_ = feats_count;\n    data_type_ = meta.data_type();\n    feature_size_ = meta.element_size();\n    feature_dimension_ = meta.dimension();\n  }\n\n  //! Mount features\n  void mount(const void *const *feats, size_t feats_count) {\n    features_ = feats;\n    features_count_ = feats_count;\n  }\n\n  //! Retrieve count of elements\n  virtual size_t count(void) const {\n    return features_count_;\n  }\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const {\n    return feature_dimension_;\n  }\n\n  //! Retrieve feature via index\n  virtual const void *element(size_t i) const {\n    return *(features_ + i);\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const {\n    return data_type_;\n  }\n\n  //! Retrieve size of feature\n  virtual size_t element_size(void) const {\n    return feature_size_;\n  }\n\n private:\n  const void *const *features_;\n  size_t features_count_;\n  size_t feature_size_;\n  size_t feature_dimension_;\n  IndexMeta::DataType data_type_;\n};\n\n/*! Gap Index Features\n */\nclass GapIndexFeatures : public IndexFeatures {\n public:\n  //! Gap Index Features Pointer\n  typedef std::shared_ptr<GapIndexFeatures> Pointer;\n\n  //! Constructor\n  GapIndexFeatures(const IndexMeta &meta)\n      : features_(),\n        bucket_limit_(0),\n        features_count_(0),\n        feature_size_(meta.element_size()),\n        feature_dimension_(meta.dimension()),\n        data_type_(meta.data_type()) {\n    if (feature_size_ >= 1024 * 1024) {\n      bucket_limit_ = 64u;\n    } else {\n      bucket_limit_ = (1024 * 1024 * 64) / feature_size_;\n    }\n  }\n\n  //! Constructor\n  GapIndexFeatures(const GapIndexFeatures &rhs)\n      : features_(rhs.features_),\n        bucket_limit_(rhs.bucket_limit_),\n        features_count_(rhs.features_count_),\n        feature_size_(rhs.feature_size_),\n        feature_dimension_(rhs.feature_dimension_),\n        data_type_(rhs.data_type_) {}\n\n  //! Constructor\n  GapIndexFeatures(GapIndexFeatures &&rhs)\n      : features_(std::move(rhs.features_)),\n        bucket_limit_(rhs.bucket_limit_),\n        features_count_(rhs.features_count_),\n        feature_size_(rhs.feature_size_),\n        feature_dimension_(rhs.feature_dimension_),\n        data_type_(rhs.data_type_) {}\n\n  //! Assignment\n  GapIndexFeatures &operator=(const GapIndexFeatures &rhs) {\n    features_ = rhs.features_;\n    bucket_limit_ = rhs.bucket_limit_;\n    features_count_ = rhs.features_count_;\n    feature_size_ = rhs.feature_size_;\n    feature_dimension_ = rhs.feature_dimension_;\n    data_type_ = rhs.data_type_;\n    return *this;\n  }\n\n  //! Assignment\n  GapIndexFeatures &operator=(GapIndexFeatures &&rhs) {\n    features_ = std::move(rhs.features_);\n    bucket_limit_ = rhs.bucket_limit_;\n    features_count_ = rhs.features_count_;\n    feature_size_ = rhs.feature_size_;\n    feature_dimension_ = rhs.feature_dimension_;\n    data_type_ = rhs.data_type_;\n    return *this;\n  }\n\n  //! Append a feature\n  void emplace(const void *feat) {\n    if (features_count_ % bucket_limit_ == 0) {\n      std::string bucket;\n      bucket.reserve(bucket_limit_ * feature_size_);\n      bucket.assign(reinterpret_cast<const char *>(feat), feature_size_);\n      features_.push_back(std::move(bucket));\n    } else {\n      features_[features_count_ / bucket_limit_].append(\n          reinterpret_cast<const char *>(feat), feature_size_);\n    }\n    ++features_count_;\n  }\n\n  //! Replace a feature\n  void replace(size_t i, const void *feat) {\n    std::memcpy(const_cast<char *>(features_[i / bucket_limit_].data()) +\n                    feature_size_ * (i % bucket_limit_),\n                feat, feature_size_);\n  }\n\n  //! Clear the features\n  void clear(void) {\n    features_.clear();\n    features_count_ = 0;\n  }\n\n  //! Retrieve feature via index\n  void *at(size_t i) {\n    return (const_cast<char *>(features_[i / bucket_limit_].data()) +\n            feature_size_ * (i % bucket_limit_));\n  }\n\n  //! Retrieve feature via index\n  const void *at(size_t i) const {\n    return (features_[i / bucket_limit_].data() +\n            feature_size_ * (i % bucket_limit_));\n  }\n\n  //! Retrieve count of elements\n  virtual size_t count(void) const {\n    return features_count_;\n  }\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const {\n    return feature_dimension_;\n  }\n\n  //! Retrieve feature via index\n  virtual const void *element(size_t i) const {\n    return this->at(i);\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const {\n    return data_type_;\n  }\n\n  //! Test if it is a compacted buffer\n  virtual bool is_compacted(void) const {\n    return (features_.size() == 1u);\n  }\n\n  //! Retrieve pointer of compacted buffer\n  virtual const void *data(void) const {\n    return (features_.size() == 1u ? features_.front().data() : nullptr);\n  }\n\n  //! Retrieve size of feature\n  virtual size_t element_size(void) const {\n    return feature_size_;\n  }\n\n private:\n  //! Disable them\n  GapIndexFeatures(void) = delete;\n\n  //! Members\n  std::vector<std::string> features_;\n  size_t bucket_limit_;\n  size_t features_count_;\n  size_t feature_size_;\n  size_t feature_dimension_;\n  IndexMeta::DataType data_type_;\n};\n\n/*! Compact Index Features\n */\nclass CompactIndexFeatures : public IndexFeatures {\n public:\n  //! Compact Index Features Pointer\n  typedef std::shared_ptr<CompactIndexFeatures> Pointer;\n\n  //! Constructor\n  CompactIndexFeatures(const IndexMeta &meta)\n      : features_(),\n        feature_size_(meta.element_size()),\n        feature_dimension_(meta.dimension()),\n        data_type_(meta.data_type()) {}\n\n  //! Constructor\n  CompactIndexFeatures(const CompactIndexFeatures &rhs)\n      : features_(rhs.features_),\n        feature_size_(rhs.feature_size_),\n        feature_dimension_(rhs.feature_dimension_),\n        data_type_(rhs.data_type_) {}\n\n  //! Constructor\n  CompactIndexFeatures(CompactIndexFeatures &&rhs)\n      : features_(std::move(rhs.features_)),\n        feature_size_(rhs.feature_size_),\n        feature_dimension_(rhs.feature_dimension_),\n        data_type_(rhs.data_type_) {}\n\n  //! Assignment\n  CompactIndexFeatures &operator=(const CompactIndexFeatures &rhs) {\n    features_ = rhs.features_;\n    feature_size_ = rhs.feature_size_;\n    feature_dimension_ = rhs.feature_dimension_;\n    data_type_ = rhs.data_type_;\n    return *this;\n  }\n\n  //! Assignment\n  CompactIndexFeatures &operator=(CompactIndexFeatures &&rhs) {\n    features_ = std::move(rhs.features_);\n    feature_size_ = rhs.feature_size_;\n    feature_dimension_ = rhs.feature_dimension_;\n    data_type_ = rhs.data_type_;\n    return *this;\n  }\n\n  //! Append a feature\n  void emplace(const void *feat) {\n    features_.append(reinterpret_cast<const char *>(feat), feature_size_);\n  }\n\n  //! Replace a feature\n  void replace(size_t i, const void *feat) {\n    std::memcpy(const_cast<char *>(features_.data()) + feature_size_ * i, feat,\n                feature_size_);\n  }\n\n  //! Resize the container\n  void resize(size_t n) {\n    features_.resize(feature_size_ * n);\n  }\n\n  //! Reserve the container\n  void reserve(size_t n) {\n    features_.reserve(feature_size_ * n);\n  }\n\n  //! Clear the features\n  void clear(void) {\n    features_.clear();\n  }\n\n  //! Retrieve feature via index\n  void *at(size_t i) {\n    return (const_cast<char *>(features_.data()) + feature_size_ * i);\n  }\n\n  //! Retrieve feature via index\n  const void *at(size_t i) const {\n    return (features_.data() + feature_size_ * i);\n  }\n\n  //! Retrieve count of elements\n  virtual size_t count(void) const {\n    return (features_.size() / feature_size_);\n  }\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const {\n    return feature_dimension_;\n  }\n\n  //! Retrieve feature via index\n  virtual const void *element(size_t i) const {\n    return this->at(i);\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const {\n    return data_type_;\n  }\n\n  //! Test if it is a compacted buffer\n  virtual bool is_compacted(void) const {\n    return true;\n  }\n\n  //! Retrieve pointer of compacted buffer\n  virtual const void *data(void) const {\n    return features_.data();\n  }\n\n  //! Retrieve size of feature\n  virtual size_t element_size(void) const {\n    return feature_size_;\n  }\n\n private:\n  //! Disable them\n  CompactIndexFeatures(void) = delete;\n\n  //! Members\n  std::string features_;\n  size_t feature_size_;\n  size_t feature_dimension_;\n  IndexMeta::DataType data_type_;\n};\n\n/*! Sample Index Features\n */\ntemplate <typename TBase>\nclass SampleIndexFeatures : public TBase {\n public:\n  //! Sample Index Features Pointer\n  typedef std::shared_ptr<SampleIndexFeatures<TBase>> Pointer;\n\n  //! Constructor\n  SampleIndexFeatures(const IndexMeta &meta, size_t cnt)\n      : TBase(meta), samples_(std::max<size_t>(cnt, 1u)), total_(0), mt_() {}\n\n  //! Constructor\n  SampleIndexFeatures(const SampleIndexFeatures &rhs)\n      : TBase(rhs), samples_(rhs.samples_), total_(rhs.total_), mt_() {}\n\n  //! Constructor\n  SampleIndexFeatures(SampleIndexFeatures &&rhs)\n      : TBase(std::move(rhs)),\n        samples_(rhs.samples_),\n        total_(rhs.total_),\n        mt_() {}\n\n  //! Assignment\n  SampleIndexFeatures &operator=(const SampleIndexFeatures &rhs) {\n    TBase::operator=(static_cast<const TBase &>(rhs));\n    samples_ = rhs.samples_;\n    total_ = rhs.total_;\n    return *this;\n  }\n\n  //! Assignment\n  SampleIndexFeatures &operator=(SampleIndexFeatures &&rhs) {\n    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));\n    samples_ = rhs.samples_;\n    total_ = rhs.total_;\n    return *this;\n  }\n\n  //! Retrieve count of samples\n  size_t samples(void) const {\n    return samples_;\n  }\n\n  //! Retrieve count of total\n  size_t total(void) const {\n    return total_;\n  }\n\n  //! Append a feature\n  void emplace(const void *feat) {\n    if (TBase::count() >= samples_) {\n      std::uniform_int_distribution<size_t> dt(0, total_);\n      size_t i = dt(mt_);\n\n      if (i < samples_) {\n        TBase::replace(i, feat);\n      }\n    } else {\n      TBase::emplace(feat);\n    }\n    ++total_;\n  }\n\n  //! Clear the features\n  void clear(void) {\n    TBase::clear();\n    total_ = 0;\n  }\n\n private:\n  //! Disable them\n  SampleIndexFeatures(void) = delete;\n\n  //! Members\n  size_t samples_;\n  size_t total_;\n  std::mt19937 mt_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_filter.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <functional>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Filter\n */\nclass IndexFilter {\n public:\n  //! Constructor\n  IndexFilter(void) {}\n\n  //! Constructor\n  IndexFilter(const IndexFilter &rhs) : filter_(rhs.filter_) {}\n\n  //! Constructor\n  IndexFilter(IndexFilter &&rhs)\n      : filter_(std::forward<decltype(filter_)>(rhs.filter_)) {}\n\n  //! Copy assignment operator\n  IndexFilter &operator=(const IndexFilter &rhs) {\n    filter_ = rhs.filter_;\n    return *this;\n  }\n\n  //! Copy assignment operator\n  IndexFilter &operator=(IndexFilter &&rhs) {\n    filter_ = std::forward<decltype(filter_)>(rhs.filter_);\n    return *this;\n  }\n\n  //! Function call\n  bool operator()(uint64_t key) const {\n    return (filter_ ? filter_(key) : false);\n  }\n\n  //! Set the filter function\n  template <typename T>\n  void set(T &&func) {\n    filter_ = std::forward<T>(func);\n  }\n\n  //! Reset the filter function\n  void reset(void) {\n    filter_ = nullptr;\n  }\n\n  //! Test if the function is valid\n  bool is_valid(void) const {\n    return (!!filter_);\n  }\n\n private:\n  //! Members\n  std::function<bool(uint64_t key)> filter_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_flow.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <zvec/core/framework/index_reformer.h>\n#include <zvec/core/framework/index_searcher.h>\nnamespace zvec {\nnamespace core {\n\n/*! Index Flow\n */\nclass IndexFlow {\n public:\n  /*! Index Flow Context\n   */\n  class Context {\n   public:\n    //! Index Flow Pointer\n    typedef std::unique_ptr<Context> Pointer;\n\n    //! Index Flow UPointer\n    typedef std::unique_ptr<Context> UPointer;\n\n    //! Retrieve searcher context\n    IndexSearcher::Context::Pointer &searcher_context(void) {\n      return searcher_context_;\n    }\n\n    //! Set topk of search result\n    void set_topk(uint32_t topk) {\n      return searcher_context_->set_topk(topk);\n    }\n\n    //! Retrieve search results\n    const IndexDocumentList &result(void) const {\n      return searcher_context_->result();\n    }\n\n    //! Retrieve search result with index\n    const IndexDocumentList &result(size_t index) const {\n      return searcher_context_->result(index);\n    }\n\n    //! Set the filter of context\n    template <typename T>\n    void set_filter(T &&func) {\n      searcher_context_->set_filter(std::forward<T>(func));\n    }\n\n    //! Reset the filter of context\n    void reset_filter(void) {\n      searcher_context_->reset_filter();\n    }\n\n    //! Set mode of debug\n    void set_debug_mode(bool enable) {\n      searcher_context_->set_debug_mode(enable);\n    }\n\n    //! Update the parameters of context\n    int update(const ailego::Params &params) {\n      return searcher_context_->update(params);\n    }\n\n    //! Retrieve debug information\n    std::string debug_string(void) const {\n      return searcher_context_->debug_string();\n    }\n\n    //! Retrieve magic number\n    uint32_t magic(void) const {\n      return searcher_context_->magic();\n    }\n\n    //! Retrieve mode of debug\n    bool debug_mode(void) const {\n      return searcher_context_->debug_mode();\n    }\n\n    //! Retrieve mutable features buffer\n    std::string *mutable_features(void) {\n      return &features_;\n    }\n\n    //! Retrieve features buffer\n    const std::string &features(void) const {\n      return features_;\n    }\n\n   protected:\n    friend class IndexFlow;\n\n    //! Constructor\n    Context(IndexSearcher::Context::Pointer &&ctx)\n        : searcher_context_(std::move(ctx)) {}\n\n   private:\n    IndexSearcher::Context::Pointer searcher_context_{};\n    std::string features_{};\n  };\n\n  //! Constructor\n  IndexFlow(void) {}\n\n  //! Constructor\n  IndexFlow(IndexFlow &&rhs)\n      : storage_(std::move(rhs.storage_)),\n        reformer_(std::move(rhs.reformer_)),\n        searcher_(std::move(rhs.searcher_)),\n        metric_(std::move(rhs.metric_)),\n        user_reformer_(std::move(rhs.user_reformer_)),\n        user_searcher_(std::move(rhs.user_searcher_)),\n        user_metric_name_(std::move(rhs.user_metric_name_)),\n        user_metric_params_(std::move(rhs.user_metric_params_)) {}\n\n  //! Assignment\n  IndexFlow &operator=(IndexFlow &&rhs) {\n    storage_ = std::move(rhs.storage_);\n    reformer_ = std::move(rhs.reformer_);\n    searcher_ = std::move(rhs.searcher_);\n    metric_ = std::move(rhs.metric_);\n    user_reformer_ = std::move(rhs.user_reformer_);\n    user_searcher_ = std::move(rhs.user_searcher_);\n    user_metric_name_ = std::move(rhs.user_metric_name_);\n    user_metric_params_ = std::move(rhs.user_metric_params_);\n    return *this;\n  }\n\n  //! Retrieve index meta\n  const IndexMeta &meta(void) const {\n    return meta_;\n  }\n\n  //! Retrieve index reformer\n  const IndexReformer::Pointer &reformer(void) const {\n    return reformer_;\n  }\n\n  //! Retrieve index searcher\n  const IndexSearcher::Pointer &searcher(void) const {\n    return searcher_;\n  }\n\n  //! Retrieve index metric\n  const IndexMetric::Pointer &metric(void) const {\n    return metric_;\n  }\n\n  //! Set the index storage (user)\n  int set_storage(const std::string &name, const ailego::Params &params);\n\n  //! Set the index reformer (user)\n  int set_reformer(const std::string &name, const ailego::Params &params);\n\n  //! Set the index searcher (user)\n  int set_searcher(const std::string &name, const ailego::Params &params);\n\n  //! Set the index searcher (user)\n  int set_searcher(IndexSearcher::Pointer searcher);\n\n  //! Set the index metric (user)\n  int set_metric(const std::string &name, const ailego::Params &params);\n\n  //! Load index\n  int load(const std::string &path);\n\n  //! Unload index\n  int unload(void);\n\n  //! Similarity brute force search\n  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                     Context::Pointer &context) const;\n\n  //! Similarity search\n  int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                  Context::Pointer &context) const;\n\n  //! Similarity brute force search\n  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,\n                     uint32_t count, Context::Pointer &context) const;\n\n  //! Similarity search\n  int search_impl(const void *query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::Pointer &context) const;\n\n  //! Similarity search (FP16)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP16>::type>\n  int search_bf(const ailego::Float16 *vec, size_t dim,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (FP32)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP32>::type>\n  int search_bf(const float *vec, size_t dim, Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (INT8)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT8>::type>\n  int search_bf(const int8_t *vec, size_t dim,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (INT4)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT4>::type>\n  int search_bf(const uint8_t *vec, size_t dim,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (BINARY)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_BINARY32>::type>\n  int search_bf(const uint32_t *vec, size_t dim,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search in batch (FP16)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP16>::type>\n  int search_bf(const ailego::Float16 *vec, size_t dim, size_t rows,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity search in batch (FP32)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP32>::type>\n  int search_bf(const float *vec, size_t dim, size_t rows,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity search in batch (INT8)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT8>::type>\n  int search_bf(const int8_t *vec, size_t dim, size_t rows,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity search in batch (INT4)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT4>::type>\n  int search_bf(const uint8_t *vec, size_t dim, size_t rows,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity Search in batch (BINARY)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_BINARY32>::type>\n  int search_bf(const uint32_t *vec, size_t dim, size_t rows,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity search (FP16)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP16>::type>\n  int search(const ailego::Float16 *vec, size_t dim,\n             Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (FP32)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP32>::type>\n  int search(const float *vec, size_t dim, Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (INT8)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT8>::type>\n  int search(const int8_t *vec, size_t dim, Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (INT4)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT4>::type>\n  int search(const uint8_t *vec, size_t dim, Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search (BINARY32)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_BINARY32>::type>\n  int search(const uint32_t *vec, size_t dim, Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);\n  }\n\n  //! Similarity search in batch (FP16)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP16>::type>\n  int search(const ailego::Float16 *vec, size_t dim, size_t rows,\n             Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity search in batch (FP32)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP32>::type>\n  int search(const float *vec, size_t dim, size_t rows,\n             Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity search in batch (INT8)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT8>::type>\n  int search(const int8_t *vec, size_t dim, size_t rows,\n             Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity search in batch (INT4)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_INT4>::type>\n  int search(const uint8_t *vec, size_t dim, size_t rows,\n             Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Similarity Search in batch (BINARY)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_BINARY32>::type>\n  int search(const uint32_t *vec, size_t dim, size_t rows,\n             Context::Pointer &context) const {\n    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);\n  }\n\n  //! Create a flow context\n  Context::Pointer create_context(void) const {\n    return Context::Pointer(new Context(searcher_->create_context()));\n  }\n\n private:\n  //! Disable them\n  IndexFlow(const IndexFlow &) = delete;\n  IndexFlow &operator=(const IndexFlow &) = delete;\n\n  int load_internal();\n\n  //! Members\n  IndexMeta meta_{};\n  IndexStorage::Pointer storage_{};\n  IndexReformer::Pointer reformer_{};\n  IndexSearcher::Pointer searcher_{};\n  IndexMetric::Pointer metric_{};\n  IndexReformer::Pointer user_reformer_{};\n  IndexSearcher::Pointer user_searcher_{};\n  std::string user_metric_name_{};\n  ailego::Params user_metric_params_{};\n};\n\n\n/*! Index Sparse Flow\n */\nclass IndexSparseFlow {\n public:\n  /*! Index Sparse Flow Context\n   */\n  class Context {\n   public:\n    //! Index Flow Pointer\n    typedef std::unique_ptr<Context> Pointer;\n\n    //! Index Flow UPointer\n    typedef std::unique_ptr<Context> UPointer;\n\n    //! Retrieve searcher context\n    IndexSearcher::Context::Pointer &searcher_context(void) {\n      return searcher_context_;\n    }\n\n    //! Set topk of search result\n    void set_topk(uint32_t topk) {\n      return searcher_context_->set_topk(topk);\n    }\n\n    //! Retrieve search results\n    const IndexDocumentList &result(void) const {\n      return searcher_context_->result();\n    }\n\n    //! Retrieve search result with index\n    const IndexDocumentList &result(size_t index) const {\n      return searcher_context_->result(index);\n    }\n\n    //! Set the filter of context\n    template <typename T>\n    void set_filter(T &&func) {\n      searcher_context_->set_filter(std::forward<T>(func));\n    }\n\n    //! Reset the filter of context\n    void reset_filter(void) {\n      searcher_context_->reset_filter();\n    }\n\n    //! Set mode of debug\n    void set_debug_mode(bool enable) {\n      searcher_context_->set_debug_mode(enable);\n    }\n\n    //! Update the parameters of context\n    int update(const ailego::Params &params) {\n      return searcher_context_->update(params);\n    }\n\n    //! Retrieve debug information\n    std::string debug_string(void) const {\n      return searcher_context_->debug_string();\n    }\n\n    //! Retrieve magic number\n    uint32_t magic(void) const {\n      return searcher_context_->magic();\n    }\n\n    //! Retrieve mode of debug\n    bool debug_mode(void) const {\n      return searcher_context_->debug_mode();\n    }\n\n    //! Retrieve mutable features buffer\n    std::string *mutable_features(void) {\n      return &features_;\n    }\n\n    //! Retrieve features buffer\n    const std::string &features(void) const {\n      return features_;\n    }\n\n   protected:\n    friend class IndexSparseFlow;\n\n    //! Constructor\n    Context(IndexSearcher::Context::Pointer &&ctx)\n        : searcher_context_(std::move(ctx)) {}\n\n   private:\n    IndexSearcher::Context::Pointer searcher_context_{};\n    std::string features_{};\n  };\n\n  //! Constructor\n  IndexSparseFlow(void) {}\n\n  //! Constructor\n  IndexSparseFlow(IndexSparseFlow &&rhs)\n      : storage_(std::move(rhs.storage_)),\n        reformer_(std::move(rhs.reformer_)),\n        searcher_(std::move(rhs.searcher_)),\n        metric_(std::move(rhs.metric_)),\n        user_reformer_(std::move(rhs.user_reformer_)),\n        user_searcher_(std::move(rhs.user_searcher_)),\n        user_metric_name_(std::move(rhs.user_metric_name_)),\n        user_metric_params_(std::move(rhs.user_metric_params_)) {}\n\n  //! Assignment\n  IndexSparseFlow &operator=(IndexSparseFlow &&rhs) {\n    storage_ = std::move(rhs.storage_);\n    reformer_ = std::move(rhs.reformer_);\n    searcher_ = std::move(rhs.searcher_);\n    metric_ = std::move(rhs.metric_);\n    user_reformer_ = std::move(rhs.user_reformer_);\n    user_searcher_ = std::move(rhs.user_searcher_);\n    user_metric_name_ = std::move(rhs.user_metric_name_);\n    user_metric_params_ = std::move(rhs.user_metric_params_);\n    return *this;\n  }\n\n  //! Retrieve index sparse meta\n  const IndexMeta &meta(void) const {\n    return meta_;\n  }\n\n  //! Retrieve index reformer\n  const IndexReformer::Pointer &reformer(void) const {\n    return reformer_;\n  }\n\n  //! Retrieve index searcher\n  const IndexSearcher::Pointer &searcher(void) const {\n    return searcher_;\n  }\n\n  //! Retrieve index metric\n  const IndexMetric::Pointer &metric(void) const {\n    return metric_;\n  }\n\n  //! Set the index storage (user)\n  int set_storage(const std::string &name, const ailego::Params &params);\n\n  //! Set the index reformer (user)\n  int set_reformer(const std::string &name, const ailego::Params &params);\n\n  //! Set the index searcher (user)\n  int set_searcher(const std::string &name, const ailego::Params &params);\n\n  //! Set the index searcher (user)\n  int set_searcher(IndexSearcher::Pointer searcher);\n\n  //! Set the index metric (user)\n  int set_metric(const std::string &name, const ailego::Params &params);\n\n  //! Load index\n  int load(const std::string &path);\n\n  //! Unload index\n  int unload(void);\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  Context::Pointer &context) const;\n\n  //! Similarity search with sparse inputs\n  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,\n                  const void *sparse_query, const IndexQueryMeta &qmeta,\n                  uint32_t count, Context::Pointer &context) const;\n\n  //! Similarity brute force search and sparse inputs\n  int search_bf_impl(const uint32_t sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta,\n                     Context::Pointer &context) const;\n\n  //! Similarity brute force search with sparse inputs\n  int search_bf_impl(const uint32_t *sparse_count,\n                     const uint32_t *sparse_indices, const void *sparse_query,\n                     const IndexQueryMeta &qmeta, uint32_t count,\n                     Context::Pointer &context) const;\n\n  //! Similarity search (FP16)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP16>::type>\n  int search_bf(const uint32_t sparse_count, const uint32_t *sparse_indices,\n                const ailego::Float16 *sparse_query,\n                Context::Pointer &context) const {\n    return this->search_bf_impl(sparse_count, sparse_indices, sparse_query,\n                                IndexQueryMeta(DT), context);\n  }\n\n  //! Similarity search (FP32)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP32>::type>\n  int search_bf(const uint32_t sparse_count, const uint32_t *sparse_indices,\n                const float *sparse_query, Context::Pointer &context) const {\n    return this->search_bf_impl(sparse_count, sparse_indices, sparse_query,\n                                IndexQueryMeta(DT), context);\n  }\n\n  //! Similarity search (FP16)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP16>::type>\n  int search(const uint32_t sparse_count, const uint32_t *sparse_indices,\n             const ailego::Float16 *sparse_query,\n             Context::Pointer &context) const {\n    return this->search_impl(sparse_count, sparse_indices, sparse_query,\n                             IndexQueryMeta(DT), context);\n  }\n\n  //! Similarity search (FP32)\n  template <IndexMeta::DataType DT,\n            typename = typename std::enable_if<\n                DT == IndexMeta::DataType::DT_FP32>::type>\n  int search(const uint32_t sparse_count, const uint32_t *sparse_indices,\n             const float *sparse_query, Context::Pointer &context) const {\n    return this->search_impl(sparse_count, sparse_indices, sparse_query,\n                             IndexQueryMeta(DT), context);\n  }\n\n  //! Create a flow context\n  Context::Pointer create_context(void) const {\n    return Context::Pointer(new Context(searcher_->create_context()));\n  }\n\n private:\n  //! Disable them\n  IndexSparseFlow(const IndexSparseFlow &) = delete;\n  IndexSparseFlow &operator=(const IndexSparseFlow &) = delete;\n\n  int load_internal();\n\n  //! Members\n  IndexMeta meta_{};\n  IndexStorage::Pointer storage_{};\n  IndexReformer::Pointer reformer_{};\n  IndexSearcher::Pointer searcher_{};\n  IndexMetric::Pointer metric_{};\n  IndexReformer::Pointer user_reformer_{};\n  IndexSearcher::Pointer user_searcher_{};\n  std::string user_metric_name_{};\n  ailego::Params user_metric_params_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_format.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstring>\n#include <random>\n#include <string>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Format\n */\nstruct IndexFormat {\n  /*! Version Number\n   */\n  enum { FORMAT_VERSION = 0x0002 };\n\n  /*! Index Format Meta Header\n   */\n  struct MetaHeader {\n    uint32_t header_crc;\n    uint16_t reserved1_;\n    uint16_t version;\n    uint32_t revision;\n    uint32_t magic;\n    uint16_t meta_header_size;\n    uint16_t meta_footer_size;\n    uint32_t meta_footer_offset;\n    uint64_t content_offset;\n    uint64_t setup_time;\n    uint64_t reserved3_[3];\n  };\n\n  static_assert(sizeof(MetaHeader) % 32 == 0,\n                \"MetaHeader must be aligned with 32 bytes\");\n\n  /*! Index Format Meta Footer\n   */\n  struct MetaFooter {\n    uint32_t footer_crc;\n    uint32_t segments_meta_crc;\n    uint32_t content_crc;\n    uint32_t segment_count;\n    // meta section size\n    uint32_t segments_meta_size;\n    uint32_t reserved1_;\n    // segments' data section size\n    uint64_t content_size;\n    uint64_t content_padding_size;\n\n    uint64_t check_point;\n    uint64_t update_time;\n    uint64_t reserved2_[7];\n    uint64_t next_meta_header_offset;\n    uint64_t total_size;\n  };\n\n  static_assert(sizeof(MetaFooter) % 32 == 0,\n                \"MetaFooter must be aligned with 32 bytes\");\n\n  /*! Index Format Segment Meta\n   */\n  struct SegmentMeta {\n    uint32_t segment_id_offset;\n    // used only by immutable segments, e.g., IndexMeta, or searcher\n    uint32_t data_crc;\n    uint64_t data_index;\n    uint64_t data_size;\n    uint64_t padding_size;\n  };\n\n  static_assert(sizeof(SegmentMeta) % 32 == 0,\n                \"SegmentMeta must be aligned with 32 bytes\");\n\n  /*! Index Format Segment Meta Buffer\n   */\n  class SegmentMetaBuffer {\n   public:\n    //! Constructor\n    SegmentMetaBuffer(uint32_t count) : capacity_(count) {\n      buffer_.clear();\n      buffer_.resize(sizeof(SegmentMeta) * capacity_);\n    }\n\n    //! Append a segment meta into buffer\n    bool append(const std::string &id, size_t data_size, size_t padding_size,\n                uint32_t data_crc) {\n      if (count_ >= capacity_) {\n        return false;\n      }\n      SegmentMeta *meta = (SegmentMeta *)buffer_.data() + count_;\n      meta->segment_id_offset = static_cast<uint32_t>(buffer_.size());\n      meta->data_index = offset_;\n      meta->data_size = data_size;\n      meta->data_crc = data_crc;\n      meta->padding_size = padding_size;\n      buffer_.append(id.c_str(), std::strlen(id.c_str()) + 1);\n      count_ += 1;\n      offset_ += data_size + padding_size;\n      return true;\n    }\n\n    //! Resize the buffer\n    void resize(size_t val) {\n      buffer_.resize(val);\n    }\n\n    //! Retrieve pointer of data\n    const void *data(void) const {\n      return buffer_.data();\n    }\n\n    //! Retrieve size of data\n    size_t size(void) const {\n      return buffer_.size();\n    }\n\n    //! Retrieve crc of buffer\n    uint32_t crc(void) const {\n      return ailego::Crc32c::Hash(buffer_.data(), buffer_.size(), 0);\n    }\n\n   private:\n    //! Disable them\n    SegmentMetaBuffer(void) = delete;\n\n    //! Members\n    std::string buffer_{};\n    size_t offset_{0u};\n    uint32_t capacity_{0u};\n    uint32_t count_{0u};\n  };\n\n  //! Setup meta header structure\n  static void SetupMetaHeader(MetaHeader *header, uint32_t footer_offset,\n                              uint32_t content_offset) {\n    memset(header, 0, sizeof(MetaHeader));\n    header->version = IndexFormat::FORMAT_VERSION;\n    header->revision = 0;\n    header->magic = std::random_device()();\n    header->meta_header_size = sizeof(MetaHeader);\n    header->meta_footer_size = sizeof(MetaFooter);\n    header->meta_footer_offset = footer_offset;\n    header->content_offset = content_offset;\n    header->setup_time = ailego::Realtime::Seconds();\n    header->header_crc = ailego::Crc32c::Hash(header, sizeof(MetaHeader), 0);\n  }\n\n  static void UpdateMetaHeader(MetaHeader *header) {\n    header->header_crc = 0;\n    header->header_crc = ailego::Crc32c::Hash(header, sizeof(MetaHeader), 0);\n  }\n\n  //! Setup meta footer structure\n  static void SetupMetaFooter(MetaFooter *footer) {\n    memset(footer, 0, sizeof(MetaFooter));\n  }\n\n  //! Update meta footer structure\n  static void UpdateMetaFooter(MetaFooter *footer, uint64_t check_point) {\n    if (check_point != 0) {\n      footer->check_point = check_point;\n    }\n    footer->update_time = ailego::Realtime::Seconds();\n    footer->footer_crc = 0;\n    footer->footer_crc = ailego::Crc32c::Hash(footer, sizeof(MetaFooter), 0);\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_framework.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_builder.h>\n#include <zvec/core/framework/index_bundle.h>\n#include <zvec/core/framework/index_cluster.h>\n#include <zvec/core/framework/index_converter.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_filter.h>\n#include <zvec/core/framework/index_flow.h>\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_mapping.h>\n#include <zvec/core/framework/index_memory.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_plugin.h>\n#include <zvec/core/framework/index_runner.h>\n#include <zvec/core/framework/index_searcher.h>\n#include <zvec/core/framework/index_trainer.h>\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_groupby.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <functional>\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index GroupBy\n */\nclass IndexGroupBy {\n public:\n  //! Function call\n  std::string operator()(uint64_t key) const {\n    return (group_by_ ? group_by_(key) : \"\");\n  }\n\n  //! Set the group by function\n  template <typename T>\n  void set(T &&func) {\n    group_by_ = std::forward<T>(func);\n  }\n\n  //! Reset the group by function\n  void reset(void) {\n    group_by_ = nullptr;\n  }\n\n  //! Test if the function is valid\n  bool is_valid(void) const {\n    return (!!group_by_);\n  }\n\n private:\n  //! Members\n  std::function<std::string(uint64_t key)> group_by_{};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/core/framework/index_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_storage.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Helper\n */\nstruct IndexHelper {\n  //! Serialize meta information to dumper\n  static int SerializeToDumper(const IndexMeta &mt, IndexDumper *dumper,\n                               const std::string &key);\n\n  //! Serialize meta information to storage\n  static int SerializeToStorage(const IndexMeta &mt, IndexStorage *storage,\n                                const std::string &key);\n\n  //! Derialize meta information from storage\n  static int DeserializeFromStorage(IndexStorage *storage,\n                                    const std::string &key, IndexMeta *out);\n\n  //! Serialize meta information to dumper\n  static int SerializeToDumper(const IndexMeta &mt, IndexDumper *dumper) {\n    return SerializeToDumper(mt, dumper, \"IndexMeta\");\n  }\n\n  //! Serialize meta information to storage\n  static int SerializeToStorage(const IndexMeta &mt, IndexStorage *storage) {\n    return SerializeToStorage(mt, storage, \"IndexMeta\");\n  }\n\n  //! Derialize meta information from storage\n  static int DeserializeFromStorage(IndexStorage *storage, IndexMeta *out) {\n    return DeserializeFromStorage(storage, \"IndexMeta\", out);\n  }\n\n  //! Create a proxy holder that can be traversed twice.\n  static IndexHolder::Pointer MakeTwoPassHolder(IndexHolder::Pointer holder);\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_holder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstring>\n#include <list>\n#include <memory>\n#include <vector>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/core/framework/index_features.h>\n#include <zvec/core/framework/index_meta.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Holder\n */\nstruct IndexHolder {\n  //! Index Holder Pointer\n  typedef std::shared_ptr<IndexHolder> Pointer;\n\n  /*! Index Holder Iterator\n   */\n  struct Iterator {\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    virtual const void *data(void) const = 0;\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const = 0;\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const = 0;\n\n    //! Next iterator\n    virtual void next(void) = 0;\n  };\n\n  //! Destructor\n  virtual ~IndexHolder(void) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  virtual size_t count(void) const = 0;\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const = 0;\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const = 0;\n\n  //! Retrieve element size in bytes\n  virtual size_t element_size(void) const = 0;\n\n  //! Retrieve if it can multi-pass\n  virtual bool multipass(void) const = 0;\n\n  //! Create a new iterator\n  virtual Iterator::Pointer create_iterator(void) = 0;\n\n  //! Test if matchs the meta\n  bool is_matched(const IndexMeta &meta) const {\n    return (this->data_type() == meta.data_type() &&\n            this->dimension() == meta.dimension() &&\n            this->element_size() == meta.element_size());\n  }\n};\n\n/*! Index Hybrid Holder\n */\nstruct IndexHybridHolder : public IndexHolder {\n  //! Index Holder Pointer\n  typedef std::shared_ptr<IndexHybridHolder> Pointer;\n\n  /*! Index Holder Iterator\n   */\n  struct Iterator : public IndexHolder::Iterator {\n    //! Index Holder Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    virtual const void *data(void) const = 0;\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const = 0;\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const = 0;\n\n    //! Retrieve sparse count\n    virtual uint32_t sparse_count() const = 0;\n\n    //! Retrieve sparse indicies\n    virtual const uint32_t *sparse_indices() const = 0;\n\n    //! Retrieve sparse data\n    virtual const void *sparse_data() const = 0;\n\n    //! Next iterator\n    virtual void next(void) = 0;\n  };\n\n  //! Destructor\n  virtual ~IndexHybridHolder(void) {}\n\n  //! Retrieve sparse count summing up over all the docs\n  virtual size_t total_sparse_count(void) const = 0;\n\n  //! Create a new hybrid iterator\n  virtual Iterator::Pointer create_hybrid_iterator(void) = 0;\n};\n\n/*! Index Sparse Holder\n */\nstruct IndexSparseHolder {\n  //! Index Sparse Holder Pointer\n  typedef std::shared_ptr<IndexSparseHolder> Pointer;\n\n  /*! Index Holder Iterator\n   */\n  struct Iterator {\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const = 0;\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const = 0;\n\n    //! Retrieve sparse count\n    virtual uint32_t sparse_count() const = 0;\n\n    //! Retrieve sparse indicies\n    virtual const uint32_t *sparse_indices() const = 0;\n\n    //! Retrieve sparse data\n    virtual const void *sparse_data() const = 0;\n\n    //! Next iterator\n    virtual void next(void) = 0;\n  };\n\n  //! Destructor\n  virtual ~IndexSparseHolder(void) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  virtual size_t count(void) const = 0;\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const = 0;\n\n  //! Retrieve if it can multi-pass\n  virtual bool multipass(void) const = 0;\n\n  //! Create a new iterator\n  virtual Iterator::Pointer create_iterator(void) = 0;\n\n  //! Test if matchs the meta\n  bool is_matched(const IndexMeta &meta) const {\n    return (this->data_type() == meta.data_type());\n  }\n\n  //! Retrieve sparse count summing up over all the docs for reserving space\n  virtual size_t total_sparse_count(void) const = 0;\n};\n\n/*! One-Pass Numerical Index Holder\n */\ntemplate <typename T>\nclass OnePassNumericalIndexHolder : public IndexHolder {\n public:\n  /*! One-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(OnePassNumericalIndexHolder *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return features_iter_->second.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      holder_->features_.erase(features_iter_++);\n    }\n\n   private:\n    OnePassNumericalIndexHolder *holder_{nullptr};\n    typename std::list<std::pair<uint64_t, ailego::NumericalVector<T>>>::\n        iterator features_iter_{};\n  };\n\n  //! Constructor\n  OnePassNumericalIndexHolder(size_t dim) : dimension_(dim) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return dimension_ * sizeof(T);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return false;\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new OnePassNumericalIndexHolder::Iterator(this));\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::NumericalVector<T> &vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, vec);\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::NumericalVector<T> &&vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, std::move(vec));\n    return true;\n  }\n\n private:\n  //! Disable them\n  OnePassNumericalIndexHolder(void) = delete;\n\n  //! Members\n  size_t dimension_{0};\n  std::list<std::pair<uint64_t, ailego::NumericalVector<T>>> features_;\n};\n\n/*! Multi-Pass Numerical Index Holder\n */\ntemplate <typename T>\nclass MultiPassNumericalIndexHolder : public IndexHolder {\n public:\n  /*! Multi-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(MultiPassNumericalIndexHolder *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return features_iter_->second.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      ++features_iter_;\n    }\n\n   private:\n    MultiPassNumericalIndexHolder *holder_{nullptr};\n    typename std::vector<std::pair<uint64_t, ailego::NumericalVector<T>>>::\n        iterator features_iter_{};\n  };\n\n  //! Constructor\n  MultiPassNumericalIndexHolder(size_t dim) : dimension_(dim) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return dimension_ * sizeof(T);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return true;\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new MultiPassNumericalIndexHolder::Iterator(this));\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::NumericalVector<T> &vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, vec);\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::NumericalVector<T> &&vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, std::move(vec));\n    return true;\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t size) {\n    features_.reserve(size);\n  }\n\n  //! Get vector data pointer by index\n  const void *get_vector_by_index(size_t index) const {\n    if (index >= features_.size()) {\n      return nullptr;\n    }\n    return features_[index].second.data();\n  }\n\n protected:\n  //! Members\n  size_t dimension_{0};\n  std::vector<std::pair<uint64_t, ailego::NumericalVector<T>>> features_;\n\n private:\n  //! Disable them\n  MultiPassNumericalIndexHolder(void) = delete;\n};\n\n/*! One-Pass Binary Index Holder\n */\ntemplate <typename T>\nclass OnePassBinaryIndexHolder : public IndexHolder {\n public:\n  /*! One-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(OnePassBinaryIndexHolder *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return features_iter_->second.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      holder_->features_.erase(features_iter_++);\n    }\n\n   private:\n    OnePassBinaryIndexHolder *holder_{nullptr};\n    typename std::list<std::pair<uint64_t, ailego::BinaryVector<T>>>::iterator\n        features_iter_{};\n  };\n\n  //! Constructor\n  OnePassBinaryIndexHolder(size_t dim) : dimension_(dim) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return (dimension_ + (sizeof(T) << 3) - 1) / (sizeof(T) << 3) * sizeof(T);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return false;\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new OnePassBinaryIndexHolder::Iterator(this));\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::BinaryVector<T> &vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, vec);\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::BinaryVector<T> &&vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, std::move(vec));\n    return true;\n  }\n\n private:\n  //! Disable them\n  OnePassBinaryIndexHolder(void) = delete;\n\n  //! Members\n  size_t dimension_{0};\n  std::list<std::pair<uint64_t, ailego::BinaryVector<T>>> features_;\n};\n\n/*! Multi-Pass Binary Index Holder\n */\ntemplate <typename T>\nclass MultiPassBinaryIndexHolder : public IndexHolder {\n public:\n  /*! Multi-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(MultiPassBinaryIndexHolder *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return features_iter_->second.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      ++features_iter_;\n    }\n\n   private:\n    MultiPassBinaryIndexHolder *holder_{nullptr};\n    typename std::vector<std::pair<uint64_t, ailego::BinaryVector<T>>>::iterator\n        features_iter_{};\n  };\n\n  //! Constructor\n  MultiPassBinaryIndexHolder(size_t dim) : dimension_(dim) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return (dimension_ + (sizeof(T) << 3) - 1) / (sizeof(T) << 3) * sizeof(T);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return true;\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new MultiPassBinaryIndexHolder::Iterator(this));\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::BinaryVector<T> &vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, vec);\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::BinaryVector<T> &&vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, std::move(vec));\n    return true;\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t size) {\n    features_.reserve(size);\n  }\n\n  //! Get vector data pointer by index\n  const void *get_vector_by_index(size_t index) const {\n    if (index >= features_.size()) {\n      return nullptr;\n    }\n    return features_[index].second.data();\n  }\n\n protected:\n  //! Members\n  size_t dimension_{0};\n  std::vector<std::pair<uint64_t, ailego::BinaryVector<T>>> features_;\n\n private:\n  //! Disable them\n  MultiPassBinaryIndexHolder(void) = delete;\n};\n\n/*! One-Pass Index Hybrid Holder\n */\ntemplate <typename T>\nclass OnePassIndexHybridHolderBase : public IndexHybridHolder {\n public:\n  /*! One-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexHybridHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(OnePassIndexHybridHolderBase *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return features_iter_->second.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      holder_->features_.erase(features_iter_++);\n    }\n\n    //! Retrieve primary key\n    uint32_t sparse_count() const override {\n      return features_iter_->second.sparse_count();\n    }\n\n    //! Retrieve primary key\n    const uint32_t *sparse_indices() const override {\n      return features_iter_->second.sparse_indices();\n    }\n\n    //! Retrieve primary key\n    const void *sparse_data() const override {\n      return features_iter_->second.sparse_data();\n    }\n\n   private:\n    OnePassIndexHybridHolderBase *holder_{nullptr};\n    typename std::list<std::pair<uint64_t, ailego::HybridVector<T>>>::iterator\n        features_iter_{};\n  };\n\n  //! Constructor\n  OnePassIndexHybridHolderBase(size_t dim) : dimension_(dim) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return dimension_ * sizeof(T);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return false;\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new OnePassIndexHybridHolderBase::Iterator(this));\n  }\n\n  //! Create a new hybrid iterator\n  IndexHybridHolder::Iterator::Pointer create_hybrid_iterator(void) override {\n    return IndexHybridHolder::Iterator::Pointer(\n        new OnePassIndexHybridHolderBase::Iterator(this));\n  }\n\n  //! Retrieve sparse count summing up over all the docs\n  size_t total_sparse_count(void) const override {\n    return total_sparse_count_;\n    ;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::HybridVector<T> &vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, vec);\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::HybridVector<T> &&vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, std::move(vec));\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n private:\n  //! Disable them\n  OnePassIndexHybridHolderBase(void) = delete;\n\n  //! Members\n  size_t dimension_{0};\n  std::list<std::pair<uint64_t, ailego::HybridVector<T>>> features_;\n  size_t total_sparse_count_{0};\n};\n\n/*! Multi-Pass Index Hybrid Holder Base\n */\ntemplate <typename T>\nclass MultiPassIndexHybridHolderBase : public IndexHybridHolder {\n public:\n  /*! Multi-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexHybridHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(MultiPassIndexHybridHolderBase *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    const void *data(void) const override {\n      return features_iter_->second.data();\n    }\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      ++features_iter_;\n    }\n\n    //! Retrieve primary key\n    uint32_t sparse_count() const override {\n      return features_iter_->second.sparse_count();\n    }\n\n    //! Retrieve primary key\n    const uint32_t *sparse_indices() const override {\n      return features_iter_->second.sparse_indices();\n    }\n\n    //! Retrieve primary key\n    const void *sparse_data() const override {\n      return features_iter_->second.sparse_data();\n    }\n\n   private:\n    MultiPassIndexHybridHolderBase *holder_{nullptr};\n    typename std::vector<std::pair<uint64_t, ailego::HybridVector<T>>>::iterator\n        features_iter_{};\n  };\n\n  //! Constructor\n  MultiPassIndexHybridHolderBase(size_t dim) : dimension_(dim) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return dimension_;\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return dimension_ * sizeof(T);\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return true;\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new MultiPassIndexHybridHolderBase::Iterator(this));\n  }\n\n  //! Create a new hybrid iterator\n  IndexHybridHolder::Iterator::Pointer create_hybrid_iterator(void) override {\n    return IndexHybridHolder::Iterator::Pointer(\n        new MultiPassIndexHybridHolderBase::Iterator(this));\n  }\n\n  //! Retrieve sparse count summing up over all the docs\n  size_t total_sparse_count(void) const override {\n    return 0;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::HybridVector<T> &vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n\n    features_.emplace_back(key, vec);\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::HybridVector<T> &&vec) {\n    if (vec.size() != dimension_) {\n      return false;\n    }\n    features_.emplace_back(key, std::move(vec));\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t size) {\n    features_.reserve(size);\n  }\n\n private:\n  //! Disable them\n  MultiPassIndexHybridHolderBase(void) = delete;\n\n  //! Members\n  size_t dimension_{0};\n  std::vector<std::pair<uint64_t, ailego::HybridVector<T>>> features_;\n  size_t total_sparse_count_{0};\n};\n\n/*! One-Pass Index Sparse Holder\n */\ntemplate <typename T>\nclass OnePassIndexSparseHolderBase : public IndexSparseHolder {\n public:\n  /*! One-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexSparseHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(OnePassIndexSparseHolderBase *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      holder_->features_.erase(features_iter_++);\n    }\n\n    //! Retrieve primary key\n    uint32_t sparse_count() const override {\n      return features_iter_->second.sparse_count();\n    }\n\n    //! Retrieve primary key\n    const uint32_t *sparse_indices() const override {\n      return features_iter_->second.sparse_indices();\n    }\n\n    //! Retrieve primary key\n    const void *sparse_data() const override {\n      return features_iter_->second.sparse_data();\n    }\n\n   private:\n    OnePassIndexSparseHolderBase *holder_{nullptr};\n    typename std::list<std::pair<uint64_t, ailego::SparseVector<T>>>::iterator\n        features_iter_{};\n  };\n\n  //! Constructor\n  OnePassIndexSparseHolderBase() {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return false;\n  }\n\n  //! Create a new iterator\n  IndexSparseHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexSparseHolder::Iterator::Pointer(\n        new OnePassIndexSparseHolderBase::Iterator(this));\n  }\n\n  //! Retrieve sparse count summing up over all the docs\n  size_t total_sparse_count(void) const override {\n    return total_sparse_count_;\n    ;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::SparseVector<T> &vec) {\n    features_.emplace_back(key, vec);\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::SparseVector<T> &&vec) {\n    features_.emplace_back(key, std::move(vec));\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n private:\n  //! Members\n  std::list<std::pair<uint64_t, ailego::SparseVector<T>>> features_;\n  size_t total_sparse_count_{0};\n};\n\n/*! Multi-Pass Index Sparse Holder Base\n */\ntemplate <typename T>\nclass MultiPassIndexSparseHolderBase : public IndexSparseHolder {\n public:\n  /*! Multi-Pass Index Holder Iterator\n   */\n  class Iterator : public IndexSparseHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(MultiPassIndexSparseHolderBase *owner) : holder_(owner) {\n      features_iter_ = holder_->features_.begin();\n    }\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Test if the iterator is valid\n    bool is_valid(void) const override {\n      return (features_iter_ != holder_->features_.end());\n    }\n\n    //! Retrieve primary key\n    uint64_t key(void) const override {\n      return features_iter_->first;\n    }\n\n    //! Next iterator\n    void next(void) override {\n      ++features_iter_;\n    }\n\n    //! Retrieve primary key\n    uint32_t sparse_count() const override {\n      return features_iter_->second.sparse_count();\n    }\n\n    //! Retrieve primary key\n    const uint32_t *sparse_indices() const override {\n      return features_iter_->second.sparse_indices();\n    }\n\n    //! Retrieve primary key\n    const void *sparse_data() const override {\n      return features_iter_->second.sparse_data();\n    }\n\n   private:\n    MultiPassIndexSparseHolderBase *holder_{nullptr};\n    typename std::vector<std::pair<uint64_t, ailego::SparseVector<T>>>::iterator\n        features_iter_{};\n  };\n\n  //! Constructor\n  MultiPassIndexSparseHolderBase() {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  size_t count(void) const override {\n    return features_.size();\n  }\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_UNDEFINED;\n  }\n\n  //! Retrieve if it can multi-pass\n  bool multipass(void) const override {\n    return true;\n  }\n\n  //! Create a new iterator\n  IndexSparseHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexSparseHolder::Iterator::Pointer(\n        new MultiPassIndexSparseHolderBase::Iterator(this));\n  }\n\n  //! Retrieve sparse count summing up over all the docs\n  size_t total_sparse_count(void) const override {\n    return 0;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::SparseVector<T> &vec) {\n    features_.emplace_back(key, vec);\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::SparseVector<T> &&vec) {\n    features_.emplace_back(key, std::move(vec));\n\n    total_sparse_count_ += vec.sparse_count();\n\n    return true;\n  }\n\n  //! Request a change in capacity\n  void reserve(size_t size) {\n    features_.reserve(size);\n  }\n\n private:\n  //! Members\n  std::vector<std::pair<uint64_t, ailego::SparseVector<T>>> features_;\n  size_t total_sparse_count_{0};\n};\n\n/*! One-Pass Index Holder\n */\ntemplate <IndexMeta::DataType FT>\nstruct OnePassIndexHolder;\n\n/*! One-Pass Index Holder (BINARY32)\n */\ntemplate <>\nstruct OnePassIndexHolder<IndexMeta::DataType::DT_BINARY32>\n    : public OnePassBinaryIndexHolder<uint32_t> {\n  //! Constructor\n  using OnePassBinaryIndexHolder::OnePassBinaryIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_BINARY32;\n  }\n};\n\n/*! One-Pass Index Holder (BINARY64)\n */\ntemplate <>\nstruct OnePassIndexHolder<IndexMeta::DataType::DT_BINARY64>\n    : public OnePassBinaryIndexHolder<uint64_t> {\n  //! Constructor\n  using OnePassBinaryIndexHolder::OnePassBinaryIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_BINARY64;\n  }\n};\n\n/*! One-Pass Index Holder (FP16)\n */\ntemplate <>\nstruct OnePassIndexHolder<IndexMeta::DataType::DT_FP16>\n    : public OnePassNumericalIndexHolder<ailego::Float16> {\n  //! Constructor\n  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n};\n\n/*! One-Pass Index Holder (FP32)\n */\ntemplate <>\nstruct OnePassIndexHolder<IndexMeta::DataType::DT_FP32>\n    : public OnePassNumericalIndexHolder<float> {\n  //! Constructor\n  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n};\n\n/*! One-Pass Index Holder (FP64)\n */\ntemplate <>\nstruct OnePassIndexHolder<IndexMeta::DataType::DT_FP64>\n    : public OnePassNumericalIndexHolder<double> {\n  //! Constructor\n  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP64;\n  }\n};\n\n/*! One-Pass Index Holder (INT8)\n */\ntemplate <>\nstruct OnePassIndexHolder<IndexMeta::DataType::DT_INT8>\n    : public OnePassNumericalIndexHolder<int8_t> {\n  //! Constructor\n  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT8;\n  }\n};\n\n/*! One-Pass Index Holder (INT16)\n */\ntemplate <>\nstruct OnePassIndexHolder<IndexMeta::DataType::DT_INT16>\n    : public OnePassNumericalIndexHolder<int16_t> {\n  //! Constructor\n  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT16;\n  }\n};\n\n/*! Multi-Pass Index Holder\n */\ntemplate <IndexMeta::DataType FT>\nstruct MultiPassIndexHolder;\n\n/*! Multi-Pass Index Holder (BINARY32)\n */\ntemplate <>\nstruct MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY32>\n    : public MultiPassBinaryIndexHolder<uint32_t> {\n  //! Constructor\n  using MultiPassBinaryIndexHolder::MultiPassBinaryIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_BINARY32;\n  }\n};\n\n/*! Multi-Pass Index Holder (BINARY64)\n */\ntemplate <>\nstruct MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY64>\n    : public MultiPassBinaryIndexHolder<uint64_t> {\n  //! Constructor\n  using MultiPassBinaryIndexHolder::MultiPassBinaryIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_BINARY64;\n  }\n};\n\n/*! Multi-Pass Index Holder (FP16)\n */\ntemplate <>\nstruct MultiPassIndexHolder<IndexMeta::DataType::DT_FP16>\n    : public MultiPassNumericalIndexHolder<ailego::Float16> {\n  //! Constructor\n  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n};\n\n/*! Multi-Pass Index Holder (FP32)\n */\ntemplate <>\nstruct MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>\n    : public MultiPassNumericalIndexHolder<float> {\n  //! Constructor\n  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n};\n\n/*! Multi-Pass Index Holder (FP64)\n */\ntemplate <>\nstruct MultiPassIndexHolder<IndexMeta::DataType::DT_FP64>\n    : public MultiPassNumericalIndexHolder<double> {\n  //! Constructor\n  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP64;\n  }\n};\n\n/*! Multi-Pass Index Holder (INT8)\n */\ntemplate <>\nstruct MultiPassIndexHolder<IndexMeta::DataType::DT_INT8>\n    : public MultiPassNumericalIndexHolder<int8_t> {\n  //! Constructor\n  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT8;\n  }\n};\n\n/*! Multi-Pass Index Holder (INT16)\n */\ntemplate <>\nstruct MultiPassIndexHolder<IndexMeta::DataType::DT_INT16>\n    : public MultiPassNumericalIndexHolder<int16_t> {\n  //! Constructor\n  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT16;\n  }\n};\n\n/*! One-Pass Index Hybrid Holder\n */\ntemplate <IndexMeta::DataType FT>\nstruct OnePassIndexHybridHolder;\n\n/*! One-Pass Index Hybrid Holder (FP16)\n */\ntemplate <>\nstruct OnePassIndexHybridHolder<IndexMeta::DataType::DT_FP16>\n    : public OnePassIndexHybridHolderBase<ailego::Float16> {\n  //! Constructor\n  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n};\n\n/*! One-Pass Index Hybrid Holder (FP32)\n */\ntemplate <>\nstruct OnePassIndexHybridHolder<IndexMeta::DataType::DT_FP32>\n    : public OnePassIndexHybridHolderBase<float> {\n  //! Constructor\n  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n};\n\n/*! One-Pass Index Hybrid Holder (FP64)\n */\ntemplate <>\nstruct OnePassIndexHybridHolder<IndexMeta::DataType::DT_FP64>\n    : public OnePassIndexHybridHolderBase<double> {\n  //! Constructor\n  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP64;\n  }\n};\n\n/*! One-Pass Index Hybrid Holder (INT8)\n */\ntemplate <>\nstruct OnePassIndexHybridHolder<IndexMeta::DataType::DT_INT8>\n    : public OnePassIndexHybridHolderBase<int8_t> {\n  //! Constructor\n  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT8;\n  }\n};\n\n/*! One-Pass Index Hybrid Holder (INT16)\n */\ntemplate <>\nstruct OnePassIndexHybridHolder<IndexMeta::DataType::DT_INT16>\n    : public OnePassIndexHybridHolderBase<int16_t> {\n  //! Constructor\n  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT16;\n  }\n};\n\n/*! Multi-Pass Index Hybrid Holder\n */\ntemplate <IndexMeta::DataType FT>\nstruct MultiPassIndexHybridHolder;\n\n/*! Multi-Pass Index Hybrid Holder (FP16)\n */\ntemplate <>\nstruct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_FP16>\n    : public MultiPassIndexHybridHolderBase<ailego::Float16> {\n  //! Constructor\n  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n};\n\n/*! Multi-Pass Index Hybrid Holder (FP32)\n */\ntemplate <>\nstruct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_FP32>\n    : public MultiPassIndexHybridHolderBase<float> {\n  //! Constructor\n  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n};\n\n/*! Multi-Pass Index Hybrid Holder (FP64)\n */\ntemplate <>\nstruct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_FP64>\n    : public MultiPassIndexHybridHolderBase<double> {\n  //! Constructor\n  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP64;\n  }\n};\n\n/*! Multi-Pass Index Hybrid Holder (INT8)\n */\ntemplate <>\nstruct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_INT8>\n    : public MultiPassIndexHybridHolderBase<int8_t> {\n  //! Constructor\n  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT8;\n  }\n};\n\n/*! Multi-Pass Index Hybrid Holder (INT16)\n */\ntemplate <>\nstruct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_INT16>\n    : public MultiPassIndexHybridHolderBase<int16_t> {\n  //! Constructor\n  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT16;\n  }\n};\n\n/*! One-Pass Index Sparse Holder\n */\ntemplate <IndexMeta::DataType FT>\nstruct OnePassIndexSparseHolder;\n\n/*! One-Pass Index Sparse Holder (FP16)\n */\ntemplate <>\nstruct OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP16>\n    : public OnePassIndexSparseHolderBase<ailego::Float16> {\n  //! Constructor\n  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n};\n\n/*! One-Pass Index Sparse Holder (FP32)\n */\ntemplate <>\nstruct OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>\n    : public OnePassIndexSparseHolderBase<float> {\n  //! Constructor\n  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n};\n\n/*! One-Pass Index Sparse Holder (FP64)\n */\ntemplate <>\nstruct OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP64>\n    : public OnePassIndexSparseHolderBase<double> {\n  //! Constructor\n  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP64;\n  }\n};\n\n/*! One-Pass Index Sparse Holder (INT8)\n */\ntemplate <>\nstruct OnePassIndexSparseHolder<IndexMeta::DataType::DT_INT8>\n    : public OnePassIndexSparseHolderBase<int8_t> {\n  //! Constructor\n  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT8;\n  }\n};\n\n/*! One-Pass Index Sparse Holder (INT16)\n */\ntemplate <>\nstruct OnePassIndexSparseHolder<IndexMeta::DataType::DT_INT16>\n    : public OnePassIndexSparseHolderBase<int16_t> {\n  //! Constructor\n  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT16;\n  }\n};\n\n/*! Multi-Pass Index Sparse Holder\n */\ntemplate <IndexMeta::DataType FT>\nstruct MultiPassIndexSparseHolder;\n\n/*! Multi-Pass Index Sparse Holder (FP16)\n */\ntemplate <>\nstruct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP16>\n    : public MultiPassIndexSparseHolderBase<ailego::Float16> {\n  //! Constructor\n  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n};\n\n/*! Multi-Pass Index Sparse Holder (FP32)\n */\ntemplate <>\nstruct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>\n    : public MultiPassIndexSparseHolderBase<float> {\n  //! Constructor\n  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n};\n\n/*! Multi-Pass Index Sparse Holder (FP64)\n */\ntemplate <>\nstruct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP64>\n    : public MultiPassIndexSparseHolderBase<double> {\n  //! Constructor\n  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP64;\n  }\n};\n\n/*! Multi-Pass Index Sparse Holder (INT8)\n */\ntemplate <>\nstruct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_INT8>\n    : public MultiPassIndexSparseHolderBase<int8_t> {\n  //! Constructor\n  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT8;\n  }\n};\n\n/*! Multi-Pass Index Sparse Holder (INT16)\n */\ntemplate <>\nstruct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_INT16>\n    : public MultiPassIndexSparseHolderBase<int16_t> {\n  //! Constructor\n  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT16;\n  }\n};\n\n/*! Random Access Index Holder\n */\nclass RandomAccessIndexHolder : public IndexHolder {\n public:\n  //! Index Holder Iterator Pointer\n  typedef std::shared_ptr<RandomAccessIndexHolder> Pointer;\n\n  /*! Random Access Index Holder Iterator\n   */\n  class Iterator : public IndexHolder::Iterator {\n   public:\n    //! Index Holder Iterator Pointer\n    typedef std::unique_ptr<Iterator> Pointer;\n\n    //! Constructor\n    Iterator(RandomAccessIndexHolder *owner) : holder_(owner) {}\n\n    //! Destructor\n    virtual ~Iterator(void) {}\n\n    //! Retrieve pointer of data\n    virtual const void *data(void) const override {\n      return holder_->element(id_);\n    }\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return id_ < holder_->count();\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return holder_->key(id_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      ++id_;\n    }\n\n   private:\n    //! Members\n    RandomAccessIndexHolder *holder_{nullptr};\n    uint32_t id_{0};\n  };\n\n  //! Constructor\n  RandomAccessIndexHolder(const IndexMeta &meta)\n      : features_(std::make_shared<CompactIndexFeatures>(meta)) {}\n\n  //! Retrieve count of elements in holder (-1 indicates unknown)\n  virtual size_t count(void) const override {\n    return features_->count();\n  }\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const override {\n    return features_->dimension();\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const override {\n    return features_->data_type();\n  }\n\n  //! Retrieve element size in bytes\n  virtual size_t element_size(void) const override {\n    return features_->element_size();\n  }\n\n  //! Retrieve if it can multi-pass\n  virtual bool multipass(void) const override {\n    return true;\n  }\n\n  //! Create a new iterator\n  virtual IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return IndexHolder::Iterator::Pointer(\n        new RandomAccessIndexHolder::Iterator(this));\n  }\n\n  void reserve(size_t elems) {\n    features_->reserve(elems);\n    keys_.reserve(elems);\n  }\n\n  //! Append an element into holder\n  void emplace(uint64_t pkey, const void *vec) {\n    features_->emplace(vec);\n    keys_.emplace_back(pkey);\n  }\n\n  //! Retrieve feature via local id\n  const void *element(size_t id) const {\n    return features_->element(id);\n  }\n\n  //! Retrieve key via local id\n  uint64_t key(size_t id) const {\n    ailego_assert_with(id < keys_.size(), \"Index Overflow\");\n    return keys_[id];\n  }\n\n private:\n  //! Disable them\n  RandomAccessIndexHolder(void) = delete;\n\n  //! Members\n  CompactIndexFeatures::Pointer features_{};\n  std::vector<uint64_t> keys_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_logger.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdarg>\n#include <memory>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"index_module.h\"\n\n#define ZVEC_LOG_IMPL(level, format, ...)                                   \\\n  do {                                                                      \\\n    if (zvec::core::IndexLoggerBroker::IsLevelEnabled(level)) {             \\\n      zvec::core::IndexLoggerBroker::Log(level, __FILE__, __LINE__, format, \\\n                                         ##__VA_ARGS__);                    \\\n    }                                                                       \\\n  } while (0)\n\n//! Log Debug Message\n#ifndef LOG_DEBUG\n#define LOG_DEBUG(format, ...) \\\n  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_DEBUG, format, ##__VA_ARGS__)\n#endif\n\n//! Log Information Message\n#ifndef LOG_INFO\n#define LOG_INFO(format, ...) \\\n  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_INFO, format, ##__VA_ARGS__)\n#endif\n\n//! Log Warn Message\n#ifndef LOG_WARN\n#define LOG_WARN(format, ...) \\\n  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_WARN, format, ##__VA_ARGS__)\n#endif\n\n//! Log Error Message\n#ifndef LOG_ERROR\n#define LOG_ERROR(format, ...) \\\n  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_ERROR, format, ##__VA_ARGS__)\n#endif\n\n//! Log Fatal Message\n#ifndef LOG_FATAL\n#define LOG_FATAL(format, ...) \\\n  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_FATAL, format, ##__VA_ARGS__)\n#endif\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Logger\n */\nstruct IndexLogger : public IndexModule {\n  //! Index Logger Pointer\n  typedef std::shared_ptr<IndexLogger> Pointer;\n\n  static const int LEVEL_DEBUG;\n  static const int LEVEL_INFO;\n  static const int LEVEL_WARN;\n  static const int LEVEL_ERROR;\n  static const int LEVEL_FATAL;\n\n  //! Retrieve string of level\n  static const char *LevelString(int level) {\n    static const char *info[] = {\"DEBUG\", \" INFO\", \" WARN\", \"ERROR\", \"FATAL\"};\n    if (level < (int)(sizeof(info) / sizeof(info[0]))) {\n      return info[level];\n    }\n    return \"\";\n  }\n\n  //! Retrieve symbol of level\n  static char LevelSymbol(int level) {\n    static const char info[5] = {'D', 'I', 'W', 'E', 'F'};\n    if (level < (int)(sizeof(info) / sizeof(info[0]))) {\n      return info[level];\n    }\n    return ' ';\n  }\n\n  //! Destructor\n  virtual ~IndexLogger(void) {}\n\n  //! Initialize Logger\n  virtual int init(const zvec::ailego::Params &params) = 0;\n\n  //! Cleanup Logger\n  virtual int cleanup(void) = 0;\n\n  //! Log Message\n  virtual void log(int level, const char *file, int line, const char *format,\n                   va_list args) = 0;\n};\n\n/*! Index Logger Broker\n */\nclass IndexLoggerBroker {\n public:\n  //! Register Logger\n  static IndexLogger::Pointer Register(IndexLogger::Pointer logger) {\n    IndexLogger::Pointer ret = std::move(logger_);\n    logger_ = std::move(logger);\n    return ret;\n  }\n\n  //! Register Logger with init params\n  static int Register(IndexLogger::Pointer logger,\n                      const ailego::Params &params) {\n    //! Cleanup the previous, before initizlizing the new one\n    if (logger_) {\n      logger_->cleanup();\n    }\n    logger_ = std::move(logger);\n    return logger_->init(params);\n  }\n\n  //! Unregister Logger\n  static void Unregister(void) {\n    logger_ = nullptr;\n  }\n\n  //! Set Level of Logger\n  static void SetLevel(int level) {\n    logger_level_ = level;\n  }\n\n  //! Check if log level is enabled\n  static bool IsLevelEnabled(int level) {\n    return logger_level_ <= level && logger_;\n  }\n\n  //! Log Message\n  __attribute__((format(printf, 4, 5))) static void Log(\n      int level, const char *file, int line, const char *format, ...) {\n    if (IsLevelEnabled(level)) {\n      va_list args;\n      va_start(args, format);\n      logger_->log(level, file, line, format, args);\n      va_end(args);\n    }\n  }\n\n private:\n  //! Disable them\n  IndexLoggerBroker(void) = delete;\n  IndexLoggerBroker(const IndexLoggerBroker &) = delete;\n  IndexLoggerBroker(IndexLoggerBroker &&) = delete;\n\n  //! Members\n  static int logger_level_;\n  static IndexLogger::Pointer logger_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_mapping.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <map>\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/core/framework/index_format.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Mapping\n */\nclass IndexMapping {\n public:\n  /*! Index Mapping Segment\n   */\n  class Segment {\n   public:\n    //! Constructor\n    Segment(void) {}\n\n    //! Constructor\n    Segment(IndexFormat::SegmentMeta *segmeta) : meta_(segmeta) {}\n\n    //! Flush the segment\n    bool flush(void) const {\n      ailego_false_if_false(this->meta_ && this->data_);\n      return ailego::File::MemoryFlush(\n          this->data_, this->meta_->data_size + this->meta_->padding_size);\n    }\n\n    //! Retrieve mapping address of the segment\n    void *data(void) const {\n      return data_;\n    }\n\n    //! Retrieve pointer of SegmentMeta\n    IndexFormat::SegmentMeta *meta(void) const {\n      return meta_;\n    }\n\n    //! Retrieve dirty flag of the segment\n    bool dirty(void) const {\n      return dirty_;\n    }\n\n    //! Set the segment as dirty\n    void set_dirty(void) const {\n      dirty_ = true;\n    }\n\n   private:\n    friend class IndexMapping;\n\n    //! Set the mapping address of the segment\n    void set_data(void *addr) {\n      data_ = addr;\n    }\n\n    //! Clear the dirty flag\n    void reset_dirty(void) const {\n      dirty_ = false;\n    }\n\n   private:\n    //! Members\n    IndexFormat::SegmentMeta *meta_{nullptr};\n    void *data_{nullptr};\n    mutable bool dirty_{false};\n  };\n\n  struct SegmentInfo {\n    Segment segment;\n    uint64_t segment_header_start_offset;\n    IndexFormat::MetaHeader *segment_header;\n  };\n\n  //! Constructor\n  IndexMapping(void) {}\n\n  //! Constructor\n  IndexMapping(IndexMapping &&rhs)\n      : segment_ids_offset_(rhs.segment_ids_offset_),\n        segment_start_(rhs.segment_start_),\n        header_(rhs.header_),\n        footer_(rhs.footer_),\n        segments_(std::move(rhs.segments_)),\n        file_(std::move(rhs.file_)) {\n    rhs.segment_ids_offset_ = 0;\n    rhs.segment_start_ = nullptr;\n    rhs.header_ = nullptr;\n    rhs.footer_ = nullptr;\n  }\n\n  //! Assignment\n  IndexMapping &operator=(IndexMapping &&rhs) {\n    segment_ids_offset_ = rhs.segment_ids_offset_;\n    segment_start_ = rhs.segment_start_;\n    header_ = rhs.header_;\n    footer_ = rhs.footer_;\n    segments_ = std::move(rhs.segments_);\n    file_ = std::move(rhs.file_);\n    rhs.segment_ids_offset_ = 0;\n    rhs.segment_start_ = nullptr;\n    rhs.header_ = nullptr;\n    rhs.footer_ = nullptr;\n    return *this;\n  }\n\n  //! Open a index file\n  int open(const std::string &path, bool cow, bool full_mode);\n\n  //! Create a index file\n  int create(const std::string &path, size_t segs_size);\n\n  //! Close the index\n  void close(void);\n\n  //! Refresh meta information (checksum, update time, etc.)\n  void refresh(uint64_t check_point);\n\n  //! Append a segment into index\n  int append(const std::string &id, size_t size);\n\n  //! Map a segment by id\n  Segment *map(const std::string &id, bool warmup, bool lock);\n\n  //! Unmap a segment by id\n  void unmap(const std::string &id);\n\n  //! Unmap all segments\n  void unmap_all(void);\n\n  //! Flush the index mapping\n  int flush(void);\n\n  //! Test if the segment is exist\n  bool has(const std::string &id) const {\n    return (segments_.find(id) != segments_.end());\n  }\n\n  //! Retrieve count of segments\n  size_t segment_count(void) const {\n    return segments_.size();\n  }\n\n  //! Retrieve size of index mapping\n  size_t index_size(void) const {\n    return index_size_;\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const {\n    return (header_ ? header_->magic : 0);\n  }\n\n  //! Retrieve header information\n  const IndexFormat::MetaHeader &header(void) const {\n    return *header_;\n  }\n\n  //! Retrieve footer information\n  const IndexFormat::MetaFooter &footer(void) const {\n    return *footer_;\n  }\n\n  bool huge_page() const {\n    return huge_page_;\n  }\n\n protected:\n  //! Initialize index file mapping\n  int init_index_mapping(size_t len);\n\n  bool Ishugetlbfs(const std::string &path) const;\n\n  int init_meta_section();\n  int init_hugepage_meta_section();\n\n private:\n  //! Disable them\n  IndexMapping(const IndexMapping &) = delete;\n  IndexMapping &operator=(const IndexMapping &) = delete;\n\n  //! Members\n  uint32_t segment_ids_offset_{0};\n  IndexFormat::SegmentMeta *segment_start_{nullptr};\n  IndexFormat::MetaHeader *header_{nullptr};\n  std::map<uint64_t, IndexFormat::MetaHeader *> header_addr_map_{};\n  IndexFormat::MetaFooter *footer_{nullptr};\n  std::map<std::string, SegmentInfo> segments_{};\n  size_t index_size_{0u};\n  ailego::File file_{};\n  std::string path_;\n  bool copy_on_write_{false};\n  bool full_mode_{false};\n  bool header_dirty_{false};\n  bool huge_page_{false};\n  size_t seg_meta_capacity_{0u};\n  uint64_t current_header_start_offset_{0u};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/core/framework/index_memory.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <iterator>\n#include <map>\n#include <memory>\n#include <mutex>\n#include <string>\n#include <utility>\n#include <vector>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Memory\n */\nclass IndexMemory {\n public:\n  /*! Index Memory Block\n   */\n  class Block {\n   public:\n    //! Constructor\n    Block(size_t sz) : buffer_(sz) {}\n\n    //! Constructor\n    Block(const Block &rhs) : buffer_(rhs.buffer_) {}\n\n    //! Constructor\n    Block(Block &&rhs) noexcept : buffer_(std::move(rhs.buffer_)) {}\n\n    //! Assignment\n    Block &operator=(const Block &rhs) {\n      buffer_ = rhs.buffer_;\n      return *this;\n    }\n\n    //! Assignment\n    Block &operator=(Block &&rhs) {\n      buffer_ = std::move(rhs.buffer_);\n      return *this;\n    }\n\n    //! Retrieve size of buffer\n    size_t size(void) const {\n      return buffer_.size();\n    }\n\n    //! Append data into the block\n    size_t append(const void *data, size_t len) {\n      std::copy(reinterpret_cast<const uint8_t *>(data),\n                reinterpret_cast<const uint8_t *>(data) + len,\n                std::back_inserter(buffer_));\n      return len;\n    }\n\n    //! Write data into the block\n    size_t write(size_t off, const void *data, size_t len) {\n      size_t region_size = buffer_.size();\n      if (off + len > region_size) {\n        if (off > region_size) {\n          off = region_size;\n        }\n        len = region_size - off;\n      }\n      std::copy(reinterpret_cast<const uint8_t *>(data),\n                reinterpret_cast<const uint8_t *>(data) + len,\n                buffer_.data() + off);\n      return len;\n    }\n\n    //! Fetch data from the storage (with own buffer)\n    size_t fetch(size_t off, void *buf, size_t len) const {\n      size_t region_size = buffer_.size();\n      if (off + len > region_size) {\n        if (off > region_size) {\n          off = region_size;\n        }\n        len = region_size - off;\n      }\n      std::copy(buffer_.data(), buffer_.data() + len,\n                reinterpret_cast<uint8_t *>(buf));\n      return len;\n    }\n\n    //! Read data from the storage (Zero-copy)\n    size_t read(size_t off, const void **data, size_t len) {\n      size_t region_size = buffer_.size();\n      if (off + len > region_size) {\n        if (off > region_size) {\n          off = region_size;\n        }\n        len = region_size - off;\n      }\n      *data = buffer_.data() + off;\n      return len;\n    }\n\n   private:\n    //! Members\n    std::vector<uint8_t> buffer_{};\n  };\n\n  /*! Index Memory Rope\n   */\n  class Rope {\n   public:\n    //! Index Memory Rope Pointer\n    typedef std::shared_ptr<Rope> Pointer;\n\n    //! Constructor\n    Rope(void) {}\n\n    //! Constructor\n    Rope(const Rope &rhs) : blocks_(rhs.blocks_) {}\n\n    //! Constructor\n    Rope(Rope &&rhs) : blocks_(std::move(rhs.blocks_)) {}\n\n    //! Assignment\n    Rope &operator=(const Rope &rhs) {\n      blocks_ = rhs.blocks_;\n      return *this;\n    }\n\n    //! Assignment\n    Rope &operator=(Rope &&rhs) {\n      blocks_ = std::move(rhs.blocks_);\n      return *this;\n    }\n\n    //! Retrieve the block at index n\n    Block &operator[](size_t n) {\n      return blocks_[n];\n    }\n\n    //! Retrieve the block at index n\n    const Block &operator[](size_t n) const {\n      return blocks_[n];\n    }\n\n    //! Retrieve count of blocks\n    size_t count(void) const {\n      return blocks_.size();\n    }\n\n    //! Retrieve memory size of rope\n    size_t size(void) const {\n      size_t sum = 0u;\n      for (const auto &it : blocks_) {\n        sum += it.size();\n      }\n      return sum;\n    }\n\n    //! Test if the rope is empty\n    bool empty(void) const {\n      return blocks_.empty();\n    }\n\n    //! Append a new memory block\n    Block &append(size_t init_size) {\n      return *blocks_.emplace(blocks_.end(), init_size);\n    }\n\n   private:\n    //! Members\n    std::vector<Block> blocks_{};\n  };\n\n  //! Constructor\n  IndexMemory(void) {}\n\n  //! Constructor\n  IndexMemory(IndexMemory &&rhs) {\n    std::lock_guard<std::mutex> latch(rhs.mutex_);\n    pool_ = std::move(rhs.pool_);\n  }\n\n  //! Assignment\n  IndexMemory &operator=(IndexMemory &&rhs) {\n    std::lock_guard<std::mutex> latch1(mutex_);\n    {\n      std::lock_guard<std::mutex> latch2(rhs.mutex_);\n      pool_ = std::move(rhs.pool_);\n    }\n    return *this;\n  }\n\n  //! Retrieve the singleton memory\n  static IndexMemory *Instance(void) {\n    static IndexMemory mem;\n    return (&mem);\n  }\n\n  //! Clear the memory\n  void clear(void) {\n    std::lock_guard<std::mutex> latch(mutex_);\n    pool_.clear();\n  }\n\n  //! Test if the element is exist\n  bool has(const std::string &key) const {\n    std::lock_guard<std::mutex> latch(mutex_);\n    return (pool_.find(key) != pool_.end());\n  }\n\n  //! Create or overwrite a new memory rope\n  Rope::Pointer create(const std::string &key) {\n    std::lock_guard<std::mutex> latch(mutex_);\n    auto it = pool_.emplace(key, nullptr).first;\n    it->second = std::make_shared<Rope>();\n    return it->second;\n  }\n\n  //! Create or overwrite a new memory rope\n  Rope::Pointer create(std::string &&key) {\n    std::lock_guard<std::mutex> latch(mutex_);\n    auto it = pool_.emplace(std::move(key), nullptr).first;\n    it->second = std::make_shared<Rope>();\n    return it->second;\n  }\n\n  //! Open a memory rope (read only)\n  Rope::Pointer open(const std::string &key) const {\n    std::lock_guard<std::mutex> latch(mutex_);\n    auto it = pool_.find(key);\n    if (it == pool_.end()) {\n      return nullptr;\n    }\n    return it->second;\n  }\n\n  //! Remove a memory rope\n  void remove(const std::string &key) {\n    std::lock_guard<std::mutex> latch(mutex_);\n    auto it = pool_.find(key);\n    if (it != pool_.end()) {\n      pool_.erase(it);\n    }\n  }\n\n private:\n  //! Disable them\n  IndexMemory(const IndexMemory &) = delete;\n  IndexMemory &operator=(const IndexMemory &) = delete;\n\n  //! Members\n  std::map<std::string, Rope::Pointer> pool_{};\n  mutable std::mutex mutex_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_meta.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/container/params.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Meta\n */\nclass IndexMeta {\n public:\n  /*! Meta Types\n   */\n  enum MetaType { MT_UNDEFINED = 0, MT_DENSE = 1, MT_SPARSE = 2 };\n\n  /*! Data Types\n   */\n  enum DataType {\n    DT_UNDEFINED = 0,\n    DT_FP16 = 1,\n    DT_FP32 = 2,\n    DT_FP64 = 3,\n    DT_INT8 = 4,\n    DT_INT16 = 5,\n    DT_INT4 = 6,\n    DT_BINARY32 = 7,\n    DT_BINARY64 = 8,\n  };\n\n  /*! Major Orders\n   */\n  enum MajorOrder {\n    MO_UNDEFINED = 0,\n    MO_ROW = 1,\n    MO_COLUMN = 2,\n  };\n\n  //! Constructor\n  IndexMeta(void) {\n    this->set_meta(DataType::DT_FP32, 128u);\n    this->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  }\n\n  //! Constructor\n  IndexMeta(DataType data_type, uint32_t dim) {\n    meta_type_ = MT_DENSE;\n    this->set_meta(data_type, dim);\n    this->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  }\n\n  //! Constructor\n  IndexMeta(MetaType meta_type, DataType data_type) {\n    meta_type_ = meta_type;\n\n    this->set_meta(data_type, 0);\n    this->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  }\n\n  //! Constructor\n  IndexMeta(const IndexMeta &rhs)\n      : meta_type_{rhs.meta_type_},\n        major_order_(rhs.major_order_),\n        data_type_(rhs.data_type_),\n        dimension_(rhs.dimension_),\n        unit_size_(rhs.unit_size_),\n        element_size_(rhs.element_size_),\n        space_id_(rhs.space_id_),\n        metric_revision_(rhs.metric_revision_),\n        converter_revision_(rhs.converter_revision_),\n        reformer_revision_(rhs.reformer_revision_),\n        trainer_revision_(rhs.trainer_revision_),\n        builder_revision_(rhs.builder_revision_),\n        reducer_revision_(rhs.reducer_revision_),\n        searcher_revision_(rhs.searcher_revision_),\n        streamer_revision_(rhs.streamer_revision_),\n        metric_name_(rhs.metric_name_),\n        converter_name_(rhs.converter_name_),\n        reformer_name_(rhs.reformer_name_),\n        trainer_name_(rhs.trainer_name_),\n        builder_name_(rhs.builder_name_),\n        reducer_name_(rhs.reducer_name_),\n        searcher_name_(rhs.searcher_name_),\n        streamer_name_(rhs.streamer_name_),\n        metric_params_(rhs.metric_params_),\n        converter_params_(rhs.converter_params_),\n        reformer_params_(rhs.reformer_params_),\n        trainer_params_(rhs.trainer_params_),\n        builder_params_(rhs.builder_params_),\n        reducer_params_(rhs.reducer_params_),\n        searcher_params_(rhs.searcher_params_),\n        streamer_params_(rhs.streamer_params_),\n        attributes_(rhs.attributes_) {}\n\n  //! Constructor\n  IndexMeta(IndexMeta &&rhs)\n      : meta_type_{rhs.meta_type_},\n        major_order_(rhs.major_order_),\n        data_type_(rhs.data_type_),\n        dimension_(rhs.dimension_),\n        unit_size_(rhs.unit_size_),\n        element_size_(rhs.element_size_),\n        space_id_(rhs.space_id_),\n        metric_revision_(rhs.metric_revision_),\n        converter_revision_(rhs.converter_revision_),\n        reformer_revision_(rhs.reformer_revision_),\n        trainer_revision_(rhs.trainer_revision_),\n        builder_revision_(rhs.builder_revision_),\n        reducer_revision_(rhs.reducer_revision_),\n        searcher_revision_(rhs.searcher_revision_),\n        streamer_revision_(rhs.streamer_revision_),\n        metric_name_(std::move(rhs.metric_name_)),\n        converter_name_(std::move(rhs.converter_name_)),\n        reformer_name_(std::move(rhs.reformer_name_)),\n        trainer_name_(std::move(rhs.trainer_name_)),\n        builder_name_(std::move(rhs.builder_name_)),\n        reducer_name_(std::move(rhs.reducer_name_)),\n        searcher_name_(std::move(rhs.searcher_name_)),\n        streamer_name_(std::move(rhs.streamer_name_)),\n        metric_params_(std::move(rhs.metric_params_)),\n        converter_params_(std::move(rhs.converter_params_)),\n        reformer_params_(std::move(rhs.reformer_params_)),\n        trainer_params_(std::move(rhs.trainer_params_)),\n        builder_params_(std::move(rhs.builder_params_)),\n        reducer_params_(std::move(rhs.reducer_params_)),\n        searcher_params_(std::move(rhs.searcher_params_)),\n        streamer_params_(std::move(rhs.streamer_params_)),\n        attributes_(std::move(rhs.attributes_)) {}\n\n  //! Assignment\n  IndexMeta &operator=(const IndexMeta &rhs) {\n    meta_type_ = rhs.meta_type_;\n    major_order_ = rhs.major_order_;\n    data_type_ = rhs.data_type_;\n    dimension_ = rhs.dimension_;\n    unit_size_ = rhs.unit_size_;\n    element_size_ = rhs.element_size_;\n    space_id_ = rhs.space_id_;\n    metric_revision_ = rhs.metric_revision_;\n    converter_revision_ = rhs.converter_revision_;\n    reformer_revision_ = rhs.reformer_revision_;\n    trainer_revision_ = rhs.trainer_revision_;\n    builder_revision_ = rhs.builder_revision_;\n    reducer_revision_ = rhs.reducer_revision_;\n    searcher_revision_ = rhs.searcher_revision_;\n    streamer_revision_ = rhs.streamer_revision_;\n    metric_name_ = std::move(rhs.metric_name_);\n    converter_name_ = std::move(rhs.converter_name_);\n    reformer_name_ = std::move(rhs.reformer_name_);\n    trainer_name_ = std::move(rhs.trainer_name_);\n    builder_name_ = std::move(rhs.builder_name_);\n    reducer_name_ = std::move(rhs.reducer_name_);\n    searcher_name_ = std::move(rhs.searcher_name_);\n    streamer_name_ = std::move(rhs.streamer_name_);\n    metric_params_ = std::move(rhs.metric_params_);\n    converter_params_ = std::move(rhs.converter_params_);\n    reformer_params_ = std::move(rhs.reformer_params_);\n    trainer_params_ = std::move(rhs.trainer_params_);\n    builder_params_ = std::move(rhs.builder_params_);\n    reducer_params_ = std::move(rhs.reducer_params_);\n    searcher_params_ = std::move(rhs.searcher_params_);\n    streamer_params_ = std::move(rhs.streamer_params_);\n    attributes_ = std::move(rhs.attributes_);\n\n    return *this;\n  }\n\n  //! Assignment\n  IndexMeta &operator=(IndexMeta &&rhs) {\n    meta_type_ = rhs.meta_type_;\n    major_order_ = rhs.major_order_;\n    data_type_ = rhs.data_type_;\n    dimension_ = rhs.dimension_;\n    unit_size_ = rhs.unit_size_;\n    element_size_ = rhs.element_size_;\n    space_id_ = rhs.space_id_;\n    metric_revision_ = rhs.metric_revision_;\n    converter_revision_ = rhs.converter_revision_;\n    reformer_revision_ = rhs.reformer_revision_;\n    trainer_revision_ = rhs.trainer_revision_;\n    builder_revision_ = rhs.builder_revision_;\n    reducer_revision_ = rhs.reducer_revision_;\n    searcher_revision_ = rhs.searcher_revision_;\n    streamer_revision_ = rhs.streamer_revision_;\n    metric_name_ = std::move(rhs.metric_name_);\n    converter_name_ = std::move(rhs.converter_name_);\n    reformer_name_ = std::move(rhs.reformer_name_);\n    trainer_name_ = std::move(rhs.trainer_name_);\n    builder_name_ = std::move(rhs.builder_name_);\n    reducer_name_ = std::move(rhs.reducer_name_);\n    searcher_name_ = std::move(rhs.searcher_name_);\n    streamer_name_ = std::move(rhs.streamer_name_);\n    metric_params_ = std::move(rhs.metric_params_);\n    converter_params_ = std::move(rhs.converter_params_);\n    reformer_params_ = std::move(rhs.reformer_params_);\n    trainer_params_ = std::move(rhs.trainer_params_);\n    builder_params_ = std::move(rhs.builder_params_);\n    reducer_params_ = std::move(rhs.reducer_params_);\n    searcher_params_ = std::move(rhs.searcher_params_);\n    streamer_params_ = std::move(rhs.streamer_params_);\n    attributes_ = std::move(rhs.attributes_);\n\n    return *this;\n  }\n\n  //! Reset the meta\n  void clear(void) {\n    meta_type_ = MetaType::MT_DENSE;\n    major_order_ = MajorOrder::MO_UNDEFINED;\n    data_type_ = DataType::DT_UNDEFINED;\n    dimension_ = 0;\n    unit_size_ = 0;\n    element_size_ = 0;\n    space_id_ = 0;\n    metric_revision_ = 0;\n    converter_revision_ = 0;\n    reformer_revision_ = 0;\n    trainer_revision_ = 0;\n    builder_revision_ = 0;\n    reducer_revision_ = 0;\n    searcher_revision_ = 0;\n    streamer_revision_ = 0;\n    metric_name_.clear();\n    converter_name_.clear();\n    reformer_name_.clear();\n    trainer_name_.clear();\n    builder_name_.clear();\n    reducer_name_.clear();\n    searcher_name_.clear();\n    streamer_name_.clear();\n    metric_params_.clear();\n    converter_params_.clear();\n    reformer_params_.clear();\n    trainer_params_.clear();\n    builder_params_.clear();\n    reducer_params_.clear();\n    searcher_params_.clear();\n    streamer_params_.clear();\n    attributes_.clear();\n  }\n\n  //! Retrieve major order information\n  MetaType meta_type(void) const {\n    return meta_type_;\n  }\n\n  //! Retrieve major order information\n  MajorOrder major_order(void) const {\n    return major_order_;\n  }\n\n  //! Retrieve type information\n  DataType data_type(void) const {\n    return data_type_;\n  }\n\n  //! Retrieve dimension\n  uint32_t dimension(void) const {\n    return dimension_;\n  }\n\n  //! Retrieve unit size in bytes\n  uint32_t unit_size(void) const {\n    return unit_size_;\n  }\n\n  //! Retrieve element size in bytes\n  uint32_t element_size(void) const {\n    return element_size_;\n  }\n\n  //! Retrieve space id\n  uint64_t space_id(void) const {\n    return space_id_;\n  }\n\n  //! Retrieve revision of metric\n  uint32_t metric_revision(void) const {\n    return metric_revision_;\n  }\n\n  //! Retrieve revision of converter\n  uint32_t converter_revision(void) const {\n    return converter_revision_;\n  }\n\n  //! Retrieve revision of reformer\n  uint32_t reformer_revision(void) const {\n    return reformer_revision_;\n  }\n\n  //! Retrieve revision of trainer\n  uint32_t trainer_revision(void) const {\n    return trainer_revision_;\n  }\n\n  //! Retrieve revision of builder\n  uint32_t builder_revision(void) const {\n    return builder_revision_;\n  }\n\n  //! Retrieve revision of searcher\n  uint32_t searcher_revision(void) const {\n    return searcher_revision_;\n  }\n\n  //! Retrieve revision of reducer\n  uint32_t reducer_revision(void) const {\n    return reducer_revision_;\n  }\n\n  //! Retrieve revision of streamer\n  uint32_t streamer_revision(void) const {\n    return streamer_revision_;\n  }\n\n  //! Retrieve name of metric\n  const std::string &metric_name(void) const {\n    return metric_name_;\n  }\n\n  //! Retrieve name of converter\n  const std::string &converter_name(void) const {\n    return converter_name_;\n  }\n\n  //! Retrieve name of reformer\n  const std::string &reformer_name(void) const {\n    return reformer_name_;\n  }\n\n  //! Retrieve name of trainer\n  const std::string &trainer_name(void) const {\n    return trainer_name_;\n  }\n\n  //! Retrieve name of builder\n  const std::string &builder_name(void) const {\n    return builder_name_;\n  }\n\n  //! Retrieve name of reducer\n  const std::string &reducer_name(void) const {\n    return reducer_name_;\n  }\n\n  //! Retrieve name of searcher\n  const std::string &searcher_name(void) const {\n    return searcher_name_;\n  }\n\n  //! Retrieve name of streamer\n  const std::string &streamer_name(void) const {\n    return streamer_name_;\n  }\n\n  //! Retrieve metric params\n  const ailego::Params &metric_params(void) const {\n    return metric_params_;\n  }\n\n  //! Retrieve converter params\n  const ailego::Params &converter_params(void) const {\n    return converter_params_;\n  }\n\n  //! Retrieve reformer params\n  const ailego::Params &reformer_params(void) const {\n    return reformer_params_;\n  }\n\n  //! Retrieve trainer params\n  const ailego::Params &trainer_params(void) const {\n    return trainer_params_;\n  }\n\n  //! Retrieve builder params\n  const ailego::Params &builder_params(void) const {\n    return builder_params_;\n  }\n\n  //! Retrieve reducer params\n  const ailego::Params &reducer_params(void) const {\n    return reducer_params_;\n  }\n\n  //! Retrieve searcher params\n  const ailego::Params &searcher_params(void) const {\n    return searcher_params_;\n  }\n\n  //! Retrieve streamer params\n  const ailego::Params &streamer_params(void) const {\n    return streamer_params_;\n  }\n\n  //! Retrieve attributes\n  const ailego::Params &attributes(void) const {\n    return attributes_;\n  }\n\n  //! Retrieve mutable attributes\n  ailego::Params *mutable_attributes(void) {\n    return &attributes_;\n  }\n\n  //! Set meta type\n  void set_meta_type(MetaType meta_type) {\n    meta_type_ = meta_type;\n  }\n\n  //! Set major order of features\n  void set_major_order(MajorOrder major_order) {\n    major_order_ = major_order;\n  }\n\n  //! Set dimension of feature\n  void set_dimension(uint32_t dim) {\n    dimension_ = dim;\n    element_size_ = IndexMeta::ElementSizeof(data_type_, unit_size_, dim);\n  }\n\n  //! Set meta information of feature\n  void set_data_type(DataType data_type) {\n    data_type_ = data_type;\n    unit_size_ = UnitSizeof(data_type);\n  }\n\n  //! Set meta information of feature\n  void set_meta(DataType data_type, uint32_t unit, uint32_t dim) {\n    data_type_ = data_type;\n    dimension_ = dim;\n    unit_size_ = unit;\n    element_size_ = ElementSizeof(data_type, unit, dim);\n  }\n\n  //! Set meta information of feature\n  void set_meta(DataType data_type, uint32_t dim) {\n    this->set_meta(data_type, UnitSizeof(data_type), dim);\n  }\n\n  //! Set information of metric\n  template <typename TName, typename TParams>\n  void set_metric(TName &&name, uint32_t rev, TParams &&params) {\n    metric_name_ = std::forward<TName>(name);\n    metric_revision_ = rev;\n    metric_params_ = std::forward<TParams>(params);\n  }\n\n  //! Set information of converter\n  template <typename TName, typename TParams>\n  void set_converter(TName &&name, uint32_t rev, TParams &&params) {\n    converter_name_ = std::forward<TName>(name);\n    converter_revision_ = rev;\n    converter_params_ = std::forward<TParams>(params);\n  }\n\n  //! Set information of reformer\n  template <typename TName, typename TParams>\n  void set_reformer(TName &&name, uint32_t rev, TParams &&params) {\n    reformer_name_ = std::forward<TName>(name);\n    reformer_revision_ = rev;\n    reformer_params_ = std::forward<TParams>(params);\n  }\n\n  //! Set information of trainer\n  template <typename TName, typename TParams>\n  void set_trainer(TName &&name, uint32_t rev, TParams &&params) {\n    trainer_name_ = std::forward<TName>(name);\n    trainer_revision_ = rev;\n    trainer_params_ = std::forward<TParams>(params);\n  }\n\n  //! Set information of builder\n  template <typename TName, typename TParams>\n  void set_builder(TName &&name, uint32_t rev, TParams &&params) {\n    builder_name_ = std::forward<TName>(name);\n    builder_revision_ = rev;\n    builder_params_ = std::forward<TParams>(params);\n  }\n\n  //! Set information of reducer\n  template <typename TName, typename TParams>\n  void set_reducer(TName &&name, uint32_t rev, TParams &&params) {\n    reducer_name_ = std::forward<TName>(name);\n    reducer_revision_ = rev;\n    reducer_params_ = std::forward<TParams>(params);\n  }\n\n  //! Set information of searcher\n  template <typename TName, typename TParams>\n  void set_searcher(TName &&name, uint32_t rev, TParams &&params) {\n    searcher_name_ = std::forward<TName>(name);\n    searcher_revision_ = rev;\n    searcher_params_ = std::forward<TParams>(params);\n  }\n\n  //! Set information of streamer\n  template <typename TName, typename TParams>\n  void set_streamer(TName &&name, uint32_t rev, TParams &&params) {\n    streamer_name_ = std::forward<TName>(name);\n    streamer_revision_ = rev;\n    streamer_params_ = std::forward<TParams>(params);\n  }\n\n  //! Serialize meta information into buffer\n  void serialize(std::string *out) const;\n\n  //! Derialize meta information from buffer\n  bool deserialize(const void *data, size_t len);\n\n  //! Calculate unit size of feature\n  static uint32_t UnitSizeof(DataType data_type) {\n    static const uint32_t unit_size_table[] = {\n        0u,                // DT_UNDEFINED\n        sizeof(uint16_t),  // DT_FP16\n        sizeof(float),     // DT_FP32\n        sizeof(double),    // DT_FP64\n        sizeof(int8_t),    // DT_INT8\n        sizeof(int16_t),   // DT_INT16\n        sizeof(uint8_t),   // DT_INT4\n        sizeof(uint32_t),  // DT_BINARY32\n        sizeof(uint64_t)   // DT_BINARY64\n    };\n    return unit_size_table[data_type];\n  }\n\n  //! Calculate align size of feature\n  static uint32_t AlignSizeof(DataType ft) {\n    static const uint32_t align_size_table[] = {\n        0u,                   // DT_UNDEFINED\n        sizeof(uint16_t),     // DT_FP16\n        sizeof(float),        // DT_FP32\n        sizeof(double),       // DT_FP64\n        sizeof(int8_t) * 4,   // DT_INT8\n        sizeof(int16_t),      // DT_INT16\n        sizeof(uint8_t) * 4,  // DT_INT4\n        sizeof(uint32_t),     // DT_BINARY32\n        sizeof(uint64_t)      // DT_BINARY64\n    };\n    return align_size_table[ft];\n  }\n\n  //! Calculate element size of feature\n  static uint32_t ElementSizeof(DataType data_type, uint32_t unit,\n                                uint32_t dim) {\n    switch (data_type) {\n      case DataType::DT_UNDEFINED:\n        return 0;\n      case DataType::DT_FP16:\n      case DataType::DT_FP32:\n      case DataType::DT_FP64:\n      case DataType::DT_INT8:\n      case DataType::DT_INT16:\n        return (dim * unit);\n      case DataType::DT_INT4:\n        return (dim + unit * 2 - 1) / (unit * 2) * unit;\n      case DataType::DT_BINARY32:\n      case DataType::DT_BINARY64:\n        return (dim + unit * 8 - 1) / (unit * 8) * unit;\n    }\n    return 0;\n  }\n\n  //! Calculate element size of vector\n  static uint32_t ElementSizeof(DataType data_type, uint32_t dim) {\n    return ElementSizeof(data_type, UnitSizeof(data_type), dim);\n  }\n\n private:\n  MetaType meta_type_{MetaType::MT_DENSE};\n  MajorOrder major_order_{MajorOrder::MO_UNDEFINED};\n  DataType data_type_{DataType::DT_UNDEFINED};\n  uint32_t dimension_{0};\n  uint32_t unit_size_{0};\n  uint32_t element_size_{0};\n  uint64_t space_id_{0};\n  uint32_t metric_revision_{0};\n  uint32_t converter_revision_{0};\n  uint32_t reformer_revision_{0};\n  uint32_t trainer_revision_{0};\n  uint32_t builder_revision_{0};\n  uint32_t reducer_revision_{0};\n  uint32_t searcher_revision_{0};\n  uint32_t streamer_revision_{0};\n\n  std::string metric_name_{};\n  std::string converter_name_{};\n  std::string reformer_name_{};\n  std::string trainer_name_{};\n  std::string builder_name_{};\n  std::string reducer_name_{};\n  std::string searcher_name_{};\n  std::string streamer_name_{};\n\n  ailego::Params metric_params_{};\n  ailego::Params converter_params_{};\n  ailego::Params reformer_params_{};\n  ailego::Params trainer_params_{};\n  ailego::Params builder_params_{};\n  ailego::Params reducer_params_{};\n  ailego::Params searcher_params_{};\n  ailego::Params streamer_params_{};\n  ailego::Params attributes_{};\n};\n\n/*! Index Query Meta\n */\nclass IndexQueryMeta {\n public:\n  //! Constructor\n  IndexQueryMeta(void) {}\n\n  //! Constructor\n  IndexQueryMeta(IndexMeta::MetaType meta_type, IndexMeta::DataType data_type,\n                 uint32_t unit, uint32_t dim)\n      : meta_type_(meta_type),\n        data_type_(data_type),\n        dimension_(dim),\n        unit_size_(unit),\n        element_size_(IndexMeta::ElementSizeof(data_type, unit, dim)) {}\n\n  //! Constructor\n  IndexQueryMeta(IndexMeta::DataType data_type, uint32_t dim)\n      : IndexQueryMeta{IndexMeta::MetaType::MT_DENSE, data_type,\n                       IndexMeta::UnitSizeof(data_type), dim} {}\n\n  //! Constructor\n  IndexQueryMeta(IndexMeta::DataType data_type)\n      : IndexQueryMeta{IndexMeta::MetaType::MT_SPARSE, data_type,\n                       IndexMeta::UnitSizeof(data_type), 0} {}\n\n  //! Constructor\n  IndexQueryMeta(IndexMeta::MetaType meta_type, IndexMeta::DataType data_type,\n                 uint32_t dim = 0)\n      : IndexQueryMeta{meta_type, data_type, IndexMeta::UnitSizeof(data_type),\n                       dim} {}\n\n  //! Retrieve meta type\n  IndexMeta::MetaType meta_type(void) const {\n    return meta_type_;\n  }\n\n  //! Retrieve data\n  IndexMeta::DataType data_type(void) const {\n    return data_type_;\n  }\n\n  //! Retrieve dimension of features\n  uint32_t dimension(void) const {\n    return dimension_;\n  }\n\n  //! Retrieve unit size of feature\n  uint32_t unit_size(void) const {\n    return unit_size_;\n  }\n\n  //! Retrieve element size of feature\n  uint32_t element_size(void) const {\n    return element_size_;\n  }\n\n  //! Set dimension of feature\n  void set_dimension(uint32_t dim) {\n    dimension_ = dim;\n    element_size_ = IndexMeta::ElementSizeof(data_type_, unit_size_, dim);\n  }\n\n  //! Set meta type\n  void set_meta_type(IndexMeta::MetaType meta_type) {\n    meta_type_ = meta_type;\n  }\n\n  //! Set data type\n  void set_data_type(IndexMeta::DataType data_type) {\n    data_type_ = data_type;\n  }\n\n  //! Set meta information of feature\n  void set_meta(IndexMeta::DataType data_type, uint32_t unit, uint32_t dim) {\n    data_type_ = data_type;\n    dimension_ = dim;\n    unit_size_ = unit;\n    element_size_ = IndexMeta::ElementSizeof(data_type, unit, dim);\n  }\n\n  //! Set meta information of feature\n  void set_meta(IndexMeta::DataType data_type, uint32_t dim) {\n    this->set_meta(data_type, IndexMeta::UnitSizeof(data_type), dim);\n  }\n\n private:\n  IndexMeta::MetaType meta_type_{IndexMeta::MetaType::MT_DENSE};\n  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_UNDEFINED};\n  uint32_t dimension_{0};\n  uint32_t unit_size_{0};\n  uint32_t element_size_{0};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_metric.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/math_batch/utils.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_module.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Metric\n */\nstruct IndexMetric : public IndexModule {\n  //! Index Metric Pointer\n  typedef std::shared_ptr<IndexMetric> Pointer;\n\n  //! Matrix Distance Function\n  typedef void (*MatrixDistanceHandle)(const void *m, const void *q, size_t dim,\n                                       float *out);\n\n  //! Matrix Distance Function Object\n  using MatrixDistance =\n      std::function<void(const void *m, const void *q, size_t dim, float *out)>;\n\n  //! Matrix Sparse Distance Function\n  typedef void (*MatrixSparseDistanceHandle)(const void *m_sparse_data,\n                                             const void *q_sparse_data,\n                                             float *out);\n\n  //! Matrix Sparse Distance Function Object\n  using MatrixSparseDistance = std::function<void(\n      const void *m_sparse_data, const void *q_sparse_data, float *out)>;\n\n\n  //! Matrix Batch Distance Function\n  typedef void (*MatrixBatchDistanceHandle)(const void **m, const void *q,\n                                            size_t num, size_t dim, float *out);\n\n  //! Matrix Batch Distance Function Object\n  using MatrixBatchDistance = std::function<void(\n      const void **m, const void *q, size_t num, size_t dim, float *out)>;\n\n  //! Destructor\n  virtual ~IndexMetric(void) {}\n\n  //! Initialize Metric\n  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;\n\n  //! Cleanup Metric\n  virtual int cleanup(void) = 0;\n\n  //! Retrieve if it matched\n  virtual bool is_matched(const IndexMeta &meta) const = 0;\n\n  //! Retrieve if it matched\n  virtual bool is_matched(const IndexMeta &meta,\n                          const IndexQueryMeta &qmeta) const = 0;\n\n  //! Retrieve distance function for query\n  virtual MatrixDistance distance(void) const {\n    return nullptr;\n  }\n\n  //! Retrieve hybrid distance function for query\n  virtual MatrixSparseDistance sparse_distance(void) const {\n    return nullptr;\n  };\n\n  //! Retrieve distance function for query\n  virtual MatrixBatchDistance batch_distance(void) const {\n    return nullptr;\n  }\n\n  //! Retrieve distance function for index features\n  virtual MatrixDistance distance_matrix(size_t /*m*/, size_t /*n*/) const {\n    return nullptr;\n  }\n\n  //! Retrieve params of Metric\n  virtual const ailego::Params &params(void) const = 0;\n\n  //! Retrieve query metric object of this index metric\n  virtual Pointer query_metric(void) const = 0;\n\n  //! Normalize result\n  virtual void normalize(float *score) const {\n    (void)score;\n  }\n\n  //! Denormalize result\n  virtual void denormalize(float *score) const {\n    (void)score;\n  }\n\n  //! Retrieve if it supports normalization\n  virtual bool support_normalize(void) const {\n    return false;\n  }\n\n  //! Train the metric\n  virtual int train(const void *vec, size_t dim) {\n    (void)vec;\n    (void)dim;\n    return 0;\n  }\n\n  //! Retrieve if it supports training\n  virtual bool support_train(void) const {\n    return false;\n  }\n\n  //! Compute the distance between feature and query\n  float distance(const void *m, const void *q, size_t dim) const {\n    float dist;\n    (this->distance())(m, q, dim, &dist);\n    return dist;\n  }\n\n  using DistanceBatchQueryPreprocessFunc =\n      ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc;\n\n  virtual DistanceBatchQueryPreprocessFunc get_query_preprocess_func() const {\n    return nullptr;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_module.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <string>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Module\n */\nclass IndexModule {\n public:\n  //! Index Module Pointer\n  typedef std::shared_ptr<IndexModule> Pointer;\n\n  //! Destructor\n  virtual ~IndexModule(void) {}\n\n  //! Retrieve debug information\n  virtual std::string debug_string(void) const {\n    return std::string();\n  }\n\n  //! Retrieve name of module\n  const std::string &name(void) const {\n    return name_;\n  }\n\n  //! Retrieve revision of module\n  uint32_t revision(void) const {\n    return revision_;\n  }\n\n protected:\n  friend struct IndexFactory;\n\n  //! Set name of module\n  void set_name(const std::string &str) {\n    name_ = str;\n  }\n\n  //! Set revision of module\n  void set_revision(uint32_t val) {\n    revision_ = val;\n  }\n\n private:\n  //! Members\n  uint32_t revision_{0u};\n  std::string name_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_packer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/internal/platform.h>\n#include <zvec/ailego/utility/type_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_format.h>\n#include <zvec/core/framework/index_version.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Packer\n */\nclass IndexPacker {\n public:\n  /*! Index Packer Segment Meta\n   */\n  class SegmentMeta {\n   public:\n    //! Constructor\n    SegmentMeta(const std::string &str, size_t dsz, size_t psz, uint32_t crc)\n        : data_size_(dsz), padding_size_(psz), data_crc_(crc), id_(str) {}\n\n    //! Constructor\n    SegmentMeta(std::string &&str, size_t dsz, size_t psz, uint32_t crc)\n        : data_size_(dsz),\n          padding_size_(psz),\n          data_crc_(crc),\n          id_(std::forward<std::string>(str)) {}\n\n    //! Constructor\n    SegmentMeta(const SegmentMeta &rhs)\n        : data_size_(rhs.data_size_),\n          padding_size_(rhs.padding_size_),\n          data_crc_(rhs.data_crc_),\n          id_(rhs.id_) {}\n\n    //! Constructor\n    SegmentMeta(SegmentMeta &&rhs)\n        : data_size_(rhs.data_size_),\n          padding_size_(rhs.padding_size_),\n          data_crc_(rhs.data_crc_),\n          id_(std::move(rhs.id_)) {}\n\n    //! Retrieve id of segment\n    const std::string &id(void) const {\n      return id_;\n    }\n\n    //! Retrieve size of data\n    size_t data_size(void) const {\n      return data_size_;\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const {\n      return data_crc_;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const {\n      return padding_size_;\n    }\n\n   private:\n    size_t data_size_{0};\n    size_t padding_size_{0};\n    uint32_t data_crc_{0};\n    std::string id_{};\n  };\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const {\n    return magic_;\n  }\n\n  //! Reset the packer\n  void reset(void) {\n    magic_ = 0;\n    data_crc_ = 0u;\n    data_size_ = 0u;\n  }\n\n  //! Setup header of index pacakge\n  template <typename TFunc>\n  bool setup(TFunc write_data) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,\n                                                size_t>::value,\n                  \"Invocable function type\");\n    IndexFormat::MetaHeader header;\n    IndexFormat::SetupMetaHeader(\n        &header, (uint32_t)(0 - sizeof(IndexFormat::MetaFooter)),\n        sizeof(IndexFormat::MetaHeader));\n\n    if (write_data(&header, sizeof(header)) != sizeof(header)) {\n      return false;\n    }\n    magic_ = header.magic;\n    return true;\n  }\n\n  //! Pack index data\n  template <typename TFunc>\n  size_t pack(TFunc write_data, const void *data, size_t len) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,\n                                                size_t>::value,\n                  \"Invocable function type\");\n    size_t wrlen = write_data(data, len);\n    if (wrlen > 0u) {\n      data_crc_ = ailego::Crc32c::Hash(data, wrlen, data_crc_);\n      data_size_ += wrlen;\n    }\n    return wrlen;\n  }\n\n  //! Finish packing data\n  template <typename TFunc>\n  bool finish(TFunc write_data, std::vector<SegmentMeta> &stab) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,\n                                                size_t>::value,\n                  \"Invocable function type\");\n\n    size_t content_size = 0u;\n    for (const auto &it : stab) {\n      content_size += it.data_size() + it.padding_size();\n    }\n\n    if (content_size != data_size_) {\n      return false;\n    }\n\n    if (!this->pack_version(write_data, stab)) {\n      return false;\n    }\n\n    // Write the padding if need\n    size_t content_padding_size = ailego_align(data_size_, 32) - data_size_;\n    if (content_padding_size) {\n      std::string padding(content_padding_size, '\\0');\n\n      if (write_data(padding.data(), padding.size()) != padding.size()) {\n        return false;\n      }\n    }\n\n    // Prepare segment meta buffer\n    IndexFormat::SegmentMetaBuffer buffer(stab.size());\n    for (const auto &it : stab) {\n      buffer.append(it.id(), it.data_size(), it.padding_size(), it.data_crc());\n    }\n    buffer.resize(ailego_align(buffer.size(), 32));\n\n    // Write segment table into file\n    if (write_data(buffer.data(), buffer.size()) != buffer.size()) {\n      return false;\n    }\n\n    // Update footer\n    IndexFormat::MetaFooter footer;\n    IndexFormat::SetupMetaFooter(&footer);\n    footer.segments_meta_crc = buffer.crc();\n    footer.content_crc = data_crc_;\n    footer.segment_count = stab.size();\n    footer.segments_meta_size = buffer.size();\n    footer.content_size = data_size_;\n    footer.content_padding_size = content_padding_size;\n    footer.total_size = footer.content_size + footer.content_padding_size +\n                        footer.segments_meta_size +\n                        sizeof(IndexFormat::MetaHeader) +\n                        sizeof(IndexFormat::MetaFooter);\n    IndexFormat::UpdateMetaFooter(&footer, 0);\n\n    // Write footer into file\n    if (write_data(&footer, sizeof(footer)) != sizeof(footer)) {\n      return false;\n    }\n    return true;\n  }\n\n  //! Pack index version\n  template <typename TFunc>\n  bool pack_version(TFunc write_data, std::vector<SegmentMeta> &stab) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,\n                                                size_t>::value,\n                  \"Invocable function type\");\n    std::string buffer(IndexVersion::Details());\n\n    size_t data_size = buffer.size();\n    uint32_t data_crc = ailego::Crc32c::Hash(buffer.data(), buffer.size(), 0);\n    buffer.resize((data_size + 31u) & ~31u);\n\n    if (write_data(buffer.data(), buffer.size()) != buffer.size()) {\n      return false;\n    }\n    data_crc_ = ailego::Crc32c::Hash(buffer.data(), buffer.size(), data_crc_);\n    data_size_ += buffer.size();\n    stab.emplace_back(std::string(\"IndexVersion\"), data_size,\n                      buffer.size() - data_size, data_crc);\n    return true;\n  }\n\n private:\n  uint32_t magic_{0u};\n  uint32_t data_crc_{0u};\n  size_t data_size_{0u};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_plugin.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include <vector>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Plugin\n */\nclass IndexPlugin {\n public:\n  //! Constructor\n  IndexPlugin(void) : handle_(nullptr) {}\n\n  //! Constructor\n  IndexPlugin(IndexPlugin &&plugin) : handle_(plugin.handle_) {\n    plugin.handle_ = nullptr;\n  }\n\n  //! Constructor\n  explicit IndexPlugin(const std::string &path) : handle_(nullptr) {\n    this->load(path);\n  }\n\n  //! Destructor\n  ~IndexPlugin(void) {}\n\n  //! Test if the plugin is valid\n  bool is_valid(void) const {\n    return (!!handle_);\n  }\n\n  //! Retrieve the handle\n  void *handle(void) const {\n    return handle_;\n  }\n\n  //! Load the library path\n  bool load(const std::string &path);\n\n  //! Load the library path\n  bool load(const std::string &path, std::string *err);\n\n  //! Unload plugin\n  void unload(void);\n\n private:\n  //! Disable them\n  IndexPlugin(const IndexPlugin &) = delete;\n  IndexPlugin &operator=(const IndexPlugin &) = delete;\n\n  //! Members\n  void *handle_;\n};\n\n/*! Index Plugin Broker\n */\nclass IndexPluginBroker {\n public:\n  //! Constructor\n  IndexPluginBroker(void) : plugins_() {}\n\n  //! Constructor\n  IndexPluginBroker(IndexPluginBroker &&broker)\n      : plugins_(std::move(broker.plugins_)) {}\n\n  //! Destructor\n  ~IndexPluginBroker(void) {}\n\n  //! Emplace a plugin\n  bool emplace(IndexPlugin &&plugin);\n\n  //! Emplace a plugin via library path\n  bool emplace(const std::string &path) {\n    return this->emplace(IndexPlugin(path));\n  }\n\n  //! Emplace a plugin via library path\n  bool emplace(const std::string &path, std::string *err) {\n    IndexPlugin plugin;\n    if (!plugin.load(path, err)) {\n      return false;\n    }\n    return this->emplace(std::move(plugin));\n  }\n\n  //! Retrieve count of plugins in broker\n  size_t count(void) const {\n    return plugins_.size();\n  }\n\n private:\n  //! Disable them\n  IndexPluginBroker(const IndexPluginBroker &) = delete;\n  IndexPluginBroker &operator=(const IndexPluginBroker &) = delete;\n\n  //! Members\n  std::vector<IndexPlugin> plugins_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_provider.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <unordered_map>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_storage.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Provider\n */\nstruct IndexProvider : public IndexHolder {\n  //! Index Provider Pointer\n  typedef std::shared_ptr<IndexProvider> Pointer;\n\n  //! Destructor\n  virtual ~IndexProvider(void) {}\n\n  bool multipass() const override {\n    return true;\n  }\n\n public:  // Provider's unique method\n  //! Retrieve a vector using a primary key\n  virtual const void *get_vector(const uint64_t key) const = 0;\n\n  //! Retrieve a vector using a primary key\n  virtual int get_vector(const uint64_t /*key*/,\n                         IndexStorage::MemoryBlock & /*block*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Retrieve the owner class\n  virtual const std::string &owner_class(void) const = 0;\n};\n\n/*! Index SparseProvider\n */\nstruct IndexSparseProvider : IndexSparseHolder {\n  //! Index Provider Pointer\n  typedef std::shared_ptr<IndexSparseProvider> Pointer;\n\n  //! Destructor\n  virtual ~IndexSparseProvider(void) {}\n\n  bool multipass() const override {\n    return true;\n  }\n\n public:  // Provider's unique method\n  //! Retrieve a vector using a primary key\n  virtual int get_sparse_vector(uint64_t key, uint32_t *sparse_count,\n                                std::string *sparse_indices_buffer,\n                                std::string *sparse_values_buffer) const = 0;\n\n  //! Retrieve the owner class\n  virtual const std::string &owner_class(void) const = 0;\n};\n\n/*! Multi-Pass Numerical Index Provider\n */\ntemplate <typename T>\nclass MultiPassNumericalIndexProvider : public IndexProvider {\n public:\n  //! Constructor\n  explicit MultiPassNumericalIndexProvider(size_t dim)\n      : holder_(dim), owner_class_(\"MultiPassNumericalIndexProvider\") {}\n\n  //! Destructor\n  virtual ~MultiPassNumericalIndexProvider(void) {}\n\n  //! Retrieve count of elements in holder\n  size_t count(void) const override {\n    return holder_.count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return holder_.dimension();\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return holder_.element_size();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return holder_.create_iterator();\n  }\n\n  //! Retrieve a vector using a primary key\n  const void *get_vector(const uint64_t key) const override {\n    auto it = indice_map_.find(key);\n    if (it == indice_map_.end()) {\n      return nullptr;\n    }\n    return holder_.get_vector_by_index(it->second);\n  }\n\n  //! Retrieve a vector using a primary key\n  int get_vector(const uint64_t key,\n                 IndexStorage::MemoryBlock &block) const override {\n    const void *data = get_vector(key);\n    if (data == nullptr) {\n      return IndexError_NoExist;\n    }\n    block.reset(const_cast<void *>(data));\n    return 0;\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_class_;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::NumericalVector<T> &vec) {\n    if (!holder_.emplace(key, vec)) {\n      return false;\n    }\n    indice_map_[key] = static_cast<int>(holder_.count() - 1);\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::NumericalVector<T> &&vec) {\n    if (!holder_.emplace(key, std::move(vec))) {\n      return false;\n    }\n    indice_map_[key] = static_cast<int>(holder_.count() - 1);\n    return true;\n  }\n\n private:\n  //! Members\n  MultiPassNumericalIndexHolder<T> holder_;\n  std::unordered_map<uint64_t, int> indice_map_;\n  std::string owner_class_;\n};\n\n/*! Multi-Pass Binary Index Provider\n */\ntemplate <typename T>\nclass MultiPassBinaryIndexProvider : public IndexProvider {\n public:\n  //! Constructor\n  explicit MultiPassBinaryIndexProvider(size_t dim)\n      : holder_(dim), owner_class_(\"MultiPassBinaryIndexProvider\") {}\n\n  //! Destructor\n  virtual ~MultiPassBinaryIndexProvider(void) {}\n\n  //! Retrieve count of elements in holder\n  size_t count(void) const override {\n    return holder_.count();\n  }\n\n  //! Retrieve dimension\n  size_t dimension(void) const override {\n    return holder_.dimension();\n  }\n\n  //! Retrieve element size in bytes\n  size_t element_size(void) const override {\n    return holder_.element_size();\n  }\n\n  //! Create a new iterator\n  IndexHolder::Iterator::Pointer create_iterator(void) override {\n    return holder_.create_iterator();\n  }\n\n  //! Retrieve a vector using a primary key\n  const void *get_vector(const uint64_t key) const override {\n    auto it = indice_map_.find(key);\n    if (it == indice_map_.end()) {\n      return nullptr;\n    }\n    return holder_.get_vector_by_index(it->second);\n  }\n\n  //! Retrieve a vector using a primary key\n  int get_vector(const uint64_t key,\n                 IndexStorage::MemoryBlock &block) const override {\n    const void *data = get_vector(key);\n    if (data == nullptr) {\n      return IndexError_NoExist;\n    }\n    block.reset(const_cast<void *>(data));\n    return 0;\n  }\n\n  //! Retrieve the owner class\n  const std::string &owner_class(void) const override {\n    return owner_class_;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, const ailego::BinaryVector<T> &vec) {\n    if (!holder_.emplace(key, vec)) {\n      return false;\n    }\n    indice_map_[key] = static_cast<int>(holder_.count() - 1);\n    return true;\n  }\n\n  //! Append an element into holder\n  bool emplace(uint64_t key, ailego::BinaryVector<T> &&vec) {\n    if (!holder_.emplace(key, std::move(vec))) {\n      return false;\n    }\n    indice_map_[key] = static_cast<int>(holder_.count() - 1);\n    return true;\n  }\n\n private:\n  //! Members\n  MultiPassBinaryIndexHolder<T> holder_;\n  std::unordered_map<uint64_t, int> indice_map_;\n  std::string owner_class_;\n};\n\n/*! Multi-Pass Index Provider\n */\ntemplate <IndexMeta::DataType FT>\nstruct MultiPassIndexProvider;\n\n/*! Multi-Pass Index Provider (BINARY32)\n */\ntemplate <>\nstruct MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY32>\n    : public MultiPassBinaryIndexProvider<uint32_t> {\n  //! Constructor\n  using MultiPassBinaryIndexProvider::MultiPassBinaryIndexProvider;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_BINARY32;\n  }\n};\n\n/*! Multi-Pass Index Provider (BINARY64)\n */\ntemplate <>\nstruct MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY64>\n    : public MultiPassBinaryIndexProvider<uint64_t> {\n  //! Constructor\n  using MultiPassBinaryIndexProvider::MultiPassBinaryIndexProvider;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_BINARY64;\n  }\n};\n\n/*! Multi-Pass Index Provider (FP16)\n */\ntemplate <>\nstruct MultiPassIndexProvider<IndexMeta::DataType::DT_FP16>\n    : public MultiPassNumericalIndexProvider<ailego::Float16> {\n  //! Constructor\n  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP16;\n  }\n};\n\n/*! Multi-Pass Index Provider (FP32)\n */\ntemplate <>\nstruct MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>\n    : public MultiPassNumericalIndexProvider<float> {\n  //! Constructor\n  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP32;\n  }\n};\n\n/*! Multi-Pass Index Provider (FP64)\n */\ntemplate <>\nstruct MultiPassIndexProvider<IndexMeta::DataType::DT_FP64>\n    : public MultiPassNumericalIndexProvider<double> {\n  //! Constructor\n  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_FP64;\n  }\n};\n\n/*! Multi-Pass Index Provider (INT8)\n */\ntemplate <>\nstruct MultiPassIndexProvider<IndexMeta::DataType::DT_INT8>\n    : public MultiPassNumericalIndexProvider<int8_t> {\n  //! Constructor\n  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT8;\n  }\n};\n\n/*! Multi-Pass Index Provider (INT16)\n */\ntemplate <>\nstruct MultiPassIndexProvider<IndexMeta::DataType::DT_INT16>\n    : public MultiPassNumericalIndexProvider<int16_t> {\n  //! Constructor\n  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;\n\n  //! Retrieve type information\n  IndexMeta::DataType data_type(void) const override {\n    return IndexMeta::DataType::DT_INT16;\n  }\n};\n\n/*! Convert IndexHolder to IndexProvider\n *  @param holder The IndexHolder to convert\n *  @return IndexProvider::Pointer\n */\ninline IndexProvider::Pointer convert_holder_to_provider(\n    const IndexHolder::Pointer &holder) {\n  if (!holder) {\n    return nullptr;\n  }\n\n  IndexMeta::DataType data_type = holder->data_type();\n  size_t dimension = holder->dimension();\n\n  switch (data_type) {\n    case IndexMeta::DataType::DT_FP16: {\n      auto provider = std::make_shared<\n          MultiPassIndexProvider<IndexMeta::DataType::DT_FP16>>(dimension);\n      auto iter = holder->create_iterator();\n      while (iter->is_valid()) {\n        uint64_t key = iter->key();\n        const ailego::Float16 *data =\n            static_cast<const ailego::Float16 *>(iter->data());\n        ailego::NumericalVector<ailego::Float16> vec(dimension);\n        std::memcpy(vec.data(), data, dimension * sizeof(ailego::Float16));\n        provider->emplace(key, std::move(vec));\n        iter->next();\n      }\n      return provider;\n    }\n\n    case IndexMeta::DataType::DT_FP32: {\n      auto provider = std::make_shared<\n          MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dimension);\n      auto iter = holder->create_iterator();\n      while (iter->is_valid()) {\n        uint64_t key = iter->key();\n        const float *data = static_cast<const float *>(iter->data());\n        ailego::NumericalVector<float> vec(dimension);\n        std::memcpy(vec.data(), data, dimension * sizeof(float));\n        provider->emplace(key, std::move(vec));\n        iter->next();\n      }\n      return provider;\n    }\n\n    case IndexMeta::DataType::DT_FP64: {\n      auto provider = std::make_shared<\n          MultiPassIndexProvider<IndexMeta::DataType::DT_FP64>>(dimension);\n      auto iter = holder->create_iterator();\n      while (iter->is_valid()) {\n        uint64_t key = iter->key();\n        const double *data = static_cast<const double *>(iter->data());\n        ailego::NumericalVector<double> vec(dimension);\n        std::memcpy(vec.data(), data, dimension * sizeof(double));\n        provider->emplace(key, std::move(vec));\n        iter->next();\n      }\n      return provider;\n    }\n\n    case IndexMeta::DataType::DT_INT8: {\n      auto provider = std::make_shared<\n          MultiPassIndexProvider<IndexMeta::DataType::DT_INT8>>(dimension);\n      auto iter = holder->create_iterator();\n      while (iter->is_valid()) {\n        uint64_t key = iter->key();\n        const int8_t *data = static_cast<const int8_t *>(iter->data());\n        ailego::NumericalVector<int8_t> vec(dimension);\n        std::memcpy(vec.data(), data, dimension * sizeof(int8_t));\n        provider->emplace(key, std::move(vec));\n        iter->next();\n      }\n      return provider;\n    }\n\n    case IndexMeta::DataType::DT_INT16: {\n      auto provider = std::make_shared<\n          MultiPassIndexProvider<IndexMeta::DataType::DT_INT16>>(dimension);\n      auto iter = holder->create_iterator();\n      while (iter->is_valid()) {\n        uint64_t key = iter->key();\n        const int16_t *data = static_cast<const int16_t *>(iter->data());\n        ailego::NumericalVector<int16_t> vec(dimension);\n        std::memcpy(vec.data(), data, dimension * sizeof(int16_t));\n        provider->emplace(key, std::move(vec));\n        iter->next();\n      }\n      return provider;\n    }\n\n    case IndexMeta::DataType::DT_BINARY32: {\n      auto provider = std::make_shared<\n          MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY32>>(dimension);\n      auto iter = holder->create_iterator();\n      while (iter->is_valid()) {\n        uint64_t key = iter->key();\n        const uint32_t *data = static_cast<const uint32_t *>(iter->data());\n        size_t binary_size = (dimension + 31) / 32;\n        ailego::BinaryVector<uint32_t> vec(dimension);\n        std::memcpy(vec.data(), data, binary_size * sizeof(uint32_t));\n        provider->emplace(key, std::move(vec));\n        iter->next();\n      }\n      return provider;\n    }\n\n    case IndexMeta::DataType::DT_BINARY64: {\n      auto provider = std::make_shared<\n          MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY64>>(dimension);\n      auto iter = holder->create_iterator();\n      while (iter->is_valid()) {\n        uint64_t key = iter->key();\n        const uint64_t *data = static_cast<const uint64_t *>(iter->data());\n        size_t binary_size = (dimension + 63) / 64;\n        ailego::BinaryVector<uint64_t> vec(dimension);\n        std::memcpy(vec.data(), data, binary_size * sizeof(uint64_t));\n        provider->emplace(key, std::move(vec));\n        iter->next();\n      }\n      return provider;\n    }\n\n    default:\n      return nullptr;\n  }\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_reducer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/core/framework/index_builder.h>\n#include <zvec/core/framework/index_converter.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_filter.h>\n#include <zvec/core/framework/index_reformer.h>\n#include <zvec/core/framework/index_stats.h>\n#include <zvec/core/framework/index_streamer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Reducer Base\n */\nclass IndexReducerBase : public IndexModule {\n public:\n  //! Index Reducer Pointer\n  typedef std::shared_ptr<IndexReducerBase> Pointer;\n\n  /*! Index Reducer Stats\n   */\n  class Stats : public IndexStats {\n   public:\n    Stats() {}\n    Stats(const Stats &stats) {\n      *this = stats;\n    }\n    Stats &operator=(const Stats &stats) {\n      this->loaded_count_.store(stats.loaded_count_.load());\n      this->dumped_count_.store(stats.dumped_count_.load());\n      this->filtered_count_.store(stats.filtered_count_.load());\n      this->duplicated_count_.store(stats.duplicated_count_.load());\n      this->reduced_costtime_.store(stats.reduced_costtime_.load());\n      this->dumped_costtime_.store(stats.dumped_costtime_.load());\n      return *this;\n    }\n    //! Set count of documents loaded\n    void set_loaded_count(size_t count) {\n      loaded_count_ = count;\n    }\n\n    //! Set count of documents dumped\n    void set_dumped_count(size_t count) {\n      dumped_count_ = count;\n    }\n\n    //! Set count of documents filtered\n    void set_filtered_count(size_t count) {\n      filtered_count_ = count;\n    }\n\n    //! Set count of documents duplicated\n    void set_duplicated_count(size_t count) {\n      duplicated_count_ = count;\n    }\n\n    //! Set time cost of documents reduced\n    void set_reduced_costtime(uint64_t cost) {\n      reduced_costtime_ = cost;\n    }\n\n    //! Set time cost of documents dumped\n    void set_dumped_costtime(uint64_t cost) {\n      dumped_costtime_ = cost;\n    }\n\n    //! Retrieve count of documents loaded\n    size_t loaded_count(void) const {\n      return loaded_count_;\n    }\n\n    //! Retrieve count of documents dumped\n    size_t dumped_count(void) const {\n      return dumped_count_;\n    }\n\n    //! Retrieve count of documents filtered\n    size_t filtered_count(void) const {\n      return filtered_count_;\n    }\n\n    //! Retrieve count of documents duplicated\n    size_t duplicated_count(void) const {\n      return duplicated_count_;\n    }\n\n    //! Retrieve time cost of documents reduced\n    uint64_t reduced_costtime(void) const {\n      return reduced_costtime_;\n    }\n\n    //! Retrieve time cost of documents dumped\n    uint64_t dumped_costtime(void) const {\n      return dumped_costtime_;\n    }\n\n    //! Retrieve count of documents loaded (mutable)\n    std::atomic<size_t> *mutable_loaded_count(void) {\n      return &loaded_count_;\n    }\n\n    //! Retrieve count of documents dumped (mutable)\n    std::atomic<size_t> *mutable_dumped_count(void) {\n      return &dumped_count_;\n    }\n\n    //! Retrieve count of documents filtered (mutable)\n    std::atomic<size_t> *mutable_filtered_count(void) {\n      return &filtered_count_;\n    }\n\n    //! Retrieve count of documents duplicated (mutable)\n    std::atomic<size_t> *mutable_duplicated_count(void) {\n      return &duplicated_count_;\n    }\n\n    //! Retrieve time cost of documents reduced (mutable)\n    std::atomic<uint64_t> *mutable_reduced_costtime(void) {\n      return &reduced_costtime_;\n    }\n\n    //! Retrieve time cost of documents dumped (mutable)\n    std::atomic<uint64_t> *mutable_dumped_costtime(void) {\n      return &dumped_costtime_;\n    }\n\n   private:\n    //! Members\n    std::atomic<size_t> loaded_count_{0u};\n    std::atomic<size_t> dumped_count_{0u};\n    std::atomic<size_t> filtered_count_{0u};\n    std::atomic<size_t> duplicated_count_{0u};\n    std::atomic<uint64_t> reduced_costtime_{0u};\n    std::atomic<uint64_t> dumped_costtime_{0u};\n  };\n\n  //! Destructor\n  virtual ~IndexReducerBase(void) = default;\n\n  //! Initialize Reducer\n  virtual int init(const ailego::Params &params) = 0;\n\n  //! Cleanup Reducer\n  virtual int cleanup(void) = 0;\n\n  //! Reduce operator (with filter)\n  virtual int reduce(const IndexFilter &filter) = 0;\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer &dumper) = 0;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const = 0;\n\n  //! Set thread pool\n  void set_thread_pool(ailego::ThreadPool *pool) {\n    thread_pool_ = pool;\n  }\n\n  //! Set stop flag\n  void set_stop_flag(std::atomic<bool> *stop_flag) {\n    stop_flag_ = stop_flag;\n  }\n\n protected:\n  ailego::ThreadPool *thread_pool_{nullptr};\n  std::atomic<bool> *stop_flag_{nullptr};\n};\n\n/*! Index Reducer\n */\nclass IndexReducer : public IndexReducerBase {\n public:\n  //! Index Reducer Pointer\n  typedef std::shared_ptr<IndexReducer> Pointer;\n\n  //! Destructor\n  virtual ~IndexReducer(void) = default;\n};\n\n/*! Index Sparse Reducer\n */\nclass IndexSparseReducer : public IndexReducerBase {\n public:\n  //! Index Reducer Pointer\n  typedef std::shared_ptr<IndexSparseReducer> Pointer;\n\n  //! Destructor\n  virtual ~IndexSparseReducer(void) = default;\n};\n\n/*! Index Streamer Reducer\n */\nclass IndexStreamerReducer : public IndexReducerBase {\n public:\n  //! Index Reducer Pointer\n  typedef std::shared_ptr<IndexStreamerReducer> Pointer;\n\n  virtual int set_target_streamer_wiht_info(\n      const IndexBuilder::Pointer builder,\n      const IndexStreamer::Pointer streamer,\n      const IndexConverter::Pointer converter,\n      const IndexReformer::Pointer reformer = nullptr,\n      const IndexQueryMeta &original_query_meta = IndexQueryMeta()) = 0;\n  virtual int feed_streamer_with_reformer(\n      IndexStreamer::Pointer streamer,\n      const IndexReformer::Pointer reformer) = 0;\n\n  virtual ~IndexStreamerReducer(void) = default;\n};\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_refiner.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_searcher.h>\n#include <zvec/core/framework/index_streamer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Refiner\n */\nclass IndexRefiner : public IndexModule {\n public:\n  //! Index Refiner Pointer\n  typedef std::shared_ptr<IndexRefiner> Pointer;\n\n  /*! Index Searcher Context\n   */\n  struct Context : public IndexContext {\n   public:\n    Context() = default;\n    ~Context() = default;\n\n    virtual int set_contexts(IndexRunner::Context::Pointer base_ctx,\n                             IndexRunner::Context::Pointer refine_ctx) = 0;\n  };\n\n  //! Initialize refiner with streamer\n  virtual int init(IndexRunner::Pointer base_runner,\n                   IndexRunner::Pointer refine_runner,\n                   const ailego::Params &params) = 0;\n\n  //! Cleanup\n  virtual int cleanup() = 0;\n\n  //! Create a context\n  virtual Context::Pointer create_context(void) const = 0;\n\n  //! Add a vector into index\n  virtual int add_impl(uint64_t key, const void *base_query,\n                       const IndexQueryMeta &base_qmeta,\n                       const void *refine_query,\n                       const IndexQueryMeta &refine_qmeta,\n                       Context::Pointer &context) = 0;\n\n  //! Similarity search\n  virtual int search_impl(const void *base_query,\n                          const IndexQueryMeta &base_qmeta,\n                          const void *refine_query,\n                          const IndexQueryMeta &refine_qmeta,\n                          Context::Pointer &context) const = 0;\n  //! Similarity search\n  virtual int search_impl(const void *base_query,\n                          const IndexQueryMeta &base_qmeta,\n                          const void *refine_query,\n                          const IndexQueryMeta &refine_qmeta, uint32_t count,\n                          Context::Pointer &context) const = 0;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *base_query,\n                             const IndexQueryMeta &base_qmeta,\n                             const void *refine_query,\n                             const IndexQueryMeta &refine_qmeta,\n                             Context::Pointer &context) const = 0;\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void *base_query,\n                             const IndexQueryMeta &base_qmeta,\n                             const void *refine_query,\n                             const IndexQueryMeta &refine_qmeta, uint32_t count,\n                             Context::Pointer &context) const = 0;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_reformer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_document.h>\n#include <zvec/core/framework/index_meta.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Reformer\n */\nclass IndexReformer : public IndexModule {\n public:\n  //! Index Reformer Pointer\n  typedef std::shared_ptr<IndexReformer> Pointer;\n\n  //! Destructor\n  virtual ~IndexReformer(void) {}\n\n  //! Initialize Reformer\n  virtual int init(const ailego::Params &params) = 0;\n\n  //! Cleanup Reformer\n  virtual int cleanup(void) = 0;\n\n  //! Load index from container\n  virtual int load(IndexStorage::Pointer cntr) = 0;\n\n  //! Unload index\n  virtual int unload(void) = 0;\n\n  //! Transform a query\n  virtual int transform(const void * /*query*/,\n                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,\n                        IndexQueryMeta * /*ometa*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Transform queries\n  virtual int transform(const void * /*query*/,\n                        const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,\n                        std::string * /*out*/,\n                        IndexQueryMeta * /*ometa*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Convert a record\n  virtual int convert(const void *record, const IndexQueryMeta &rmeta,\n                      std::string *out, IndexQueryMeta *ometa) const {\n    return this->transform(record, rmeta, out, ometa);\n  }\n\n  //! Convert records\n  virtual int convert(const void *records, const IndexQueryMeta &rmeta,\n                      uint32_t count, std::string *out,\n                      IndexQueryMeta *ometa) const {\n    return this->transform(records, rmeta, count, out, ometa);\n  }\n\n  //! Normalize results\n  virtual int normalize(const void * /*query*/,\n                        const IndexQueryMeta & /*qmeta*/,\n                        IndexDocumentList & /*result*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  virtual bool need_revert() const {\n    return false;\n  }\n\n  virtual int revert(const void * /*in*/, const IndexQueryMeta & /*qmeta*/,\n                     std::string * /*out*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Transform a query\n  virtual int transform(uint32_t /*sparse_count*/,\n                        const uint32_t * /*sparse_indices*/,\n                        const void * /*sparse_query*/,\n                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,\n                        IndexQueryMeta * /*ometa*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Transform queries\n  virtual int transform(const uint32_t * /*sparse_count*/,\n                        const uint32_t * /*sparse_indices*/,\n                        const void * /*sparse_query*/,\n                        const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,\n                        std::string * /*out*/,\n                        IndexQueryMeta * /*ometa*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Convert a record\n  virtual int convert(uint32_t sparse_count, const uint32_t *sparse_indices,\n                      const void *sparse_query, const IndexQueryMeta &qmeta,\n                      std::string *out, IndexQueryMeta *ometa) const {\n    return this->transform(sparse_count, sparse_indices, sparse_query, qmeta,\n                           out, ometa);\n  }\n\n  //! Convert records\n  virtual int convert(const uint32_t *sparse_count,\n                      const uint32_t *sparse_indices, const void *sparse_query,\n                      const IndexQueryMeta &qmeta, uint32_t count,\n                      std::string *out, IndexQueryMeta *ometa) const {\n    return this->transform(sparse_count, sparse_indices, sparse_query, qmeta,\n                           count, out, ometa);\n  }\n\n  virtual int revert(const uint32_t /*sparse_count*/,\n                     const uint32_t * /*sparse_indices*/,\n                     const void * /*sparse_query*/,\n                     const IndexQueryMeta & /*qmeta*/,\n                     std::string * /*sparse_query_out*/) const {\n    return 0;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_runner.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <atomic>\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_metric.h>\n#include <zvec/core/framework/index_module.h>\n#include <zvec/core/framework/index_provider.h>\n#include <zvec/core/framework/index_stats.h>\n#include <zvec/core/framework/index_threads.h>\n#include <zvec/core/framework/index_trainer.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Runner\n */\nclass IndexRunner : public IndexModule {\n public:\n  //! Index Searcher Pointer\n  typedef std::shared_ptr<IndexRunner> Pointer;\n\n  /*! Index Searcher Context\n   */\n  using Context = IndexContext;\n\n  /*! Index Searcher Provider\n   */\n  using Provider = IndexProvider;\n\n  /*! Index Sparse Searcher Provider\n   */\n  using SparseProvider = IndexSparseProvider;\n\n  /*! Index Streamer Stats\n   */\n  class Stats : public IndexStats {\n   public:\n    Stats() {}\n    Stats(const Stats &stats) {\n      *this = stats;\n    }\n    Stats &operator=(const Stats &stats) {\n      this->revision_id_ = stats.revision_id_;\n\n      this->trained_count_.store(stats.trained_count_.load());\n      this->built_count_.store(stats.built_count_.load());\n      this->dumped_count_.store(stats.dumped_count_.load());\n      this->loaded_count_.store(stats.loaded_count_.load());\n      this->added_count_.store(stats.added_count_.load());\n      this->discarded_count_.store(stats.discarded_count_.load());\n      this->updated_count_.store(stats.updated_count_.load());\n      this->deleted_count_.store(stats.deleted_count_.load());\n\n      this->index_size_.store(stats.index_size_.load());\n      this->dumped_size_.store(stats.dumped_size_.load());\n\n      this->check_point_.store(stats.check_point_.load());\n\n      this->create_time_.store(stats.create_time_.load());\n      this->update_time_.store(stats.update_time_.load());\n      this->loaded_costtime_.store(stats.loaded_costtime_.load());\n      this->trained_costtime_.store(stats.trained_costtime_.load());\n      this->built_costtime_.store(stats.built_costtime_.load());\n      this->dumped_costtime_.store(stats.dumped_costtime_.load());\n\n      return *this;\n    }\n    //! Set revision id\n    void set_revision_id(size_t rev) {\n      revision_id_ = rev;\n    }\n\n    //! Set count of documents trained\n    void set_trained_count(size_t count) {\n      trained_count_ = count;\n    }\n\n    //! Set count of documents built\n    void set_built_count(size_t count) {\n      built_count_ = count;\n    }\n\n    //! Set count of documents dumped\n    void set_dumped_count(size_t count) {\n      dumped_count_ = count;\n    }\n\n    //! Set count of documents loaded\n    void set_loaded_count(size_t count) {\n      loaded_count_ = count;\n    }\n\n    //! Set count of documents added\n    void set_added_count(size_t count) {\n      added_count_ = count;\n    }\n\n    //! Set count of documents discarded\n    void set_discarded_count(size_t count) {\n      discarded_count_ = count;\n    }\n\n    //! Set count of documents updated\n    void set_updated_count(size_t count) {\n      updated_count_ = count;\n    }\n\n    //! Set count of documents deleted\n    void set_deleted_count(size_t count) {\n      deleted_count_ = count;\n    }\n\n    //! Set size of index\n    void set_index_size(size_t count) {\n      index_size_ = count;\n    }\n\n    //! Set size of index dumped\n    void set_dumped_size(size_t count) {\n      dumped_size_ = count;\n    }\n\n    //! Set size of index dumped\n    void set_check_point(uint64_t val) {\n      check_point_ = val;\n    }\n\n    //! Retrieve create time\n    void set_create_time(uint64_t val) {\n      create_time_ = val;\n    }\n\n    //! Retrieve update time\n    void set_update_time(uint64_t val) {\n      update_time_ = val;\n    }\n\n    //! Retrieve loaded costtime\n    void set_loaded_costtime(uint64_t val) {\n      loaded_costtime_ = val;\n    }\n\n    //! Retrieve train costtime\n    void set_trained_costtime(uint64_t val) {\n      trained_costtime_ = val;\n    }\n\n    //! Retrieve built costtime\n    void set_built_costtime(uint64_t val) {\n      built_costtime_ = val;\n    }\n\n    //! Retrieve update time\n    void set_dumped_costtime(uint64_t val) {\n      dumped_costtime_ = val;\n    }\n\n    //! Retrieve revision id\n    size_t revision_id(void) const {\n      return revision_id_;\n    }\n\n    //! Retrieve count of documents trained\n    size_t trained_count(void) const {\n      return trained_count_;\n    }\n\n    //! Retrieve count of documents built\n    size_t built_count(void) const {\n      return built_count_;\n    }\n\n    //! Retrieve count of documents dumped\n    size_t dumped_count(void) const {\n      return dumped_count_;\n    }\n\n    //! Retrieve count of documents loaded\n    size_t loaded_count(void) const {\n      return loaded_count_;\n    }\n\n    //! Retrieve count of documents added\n    size_t added_count(void) const {\n      return added_count_;\n    }\n\n    //! Retrieve count of documents discarded\n    size_t discarded_count(void) const {\n      return discarded_count_;\n    }\n\n    //! Retrieve count of documents updated\n    size_t updated_count(void) const {\n      return updated_count_;\n    }\n\n    //! Retrieve count of documents deleted\n    size_t deleted_count(void) const {\n      return deleted_count_;\n    }\n\n    //! Retrieve size of index\n    size_t index_size(void) const {\n      return index_size_;\n    }\n\n    //! Retrieve size of index dumped\n    size_t dumped_size(void) const {\n      return dumped_size_;\n    }\n\n    //! Retrieve check point of index\n    uint64_t check_point(void) const {\n      return check_point_;\n    }\n\n    //! Retrieve create time of index\n    uint64_t create_time(void) const {\n      return create_time_;\n    }\n\n    //! Retrieve update time of index\n    uint64_t update_time(void) const {\n      return update_time_;\n    }\n\n    //! Retrieve loaded cost time of index\n    uint64_t loaded_costtime(void) const {\n      return loaded_costtime_;\n    }\n\n    //! Retrieve trained cost time of index\n    uint64_t trained_costtime(void) const {\n      return trained_costtime_;\n    }\n\n    //! Retrieve built cost time of index\n    uint64_t built_costtime(void) const {\n      return built_costtime_;\n    }\n\n    //! Retrieve dumped cost time of index\n    uint64_t dumped_costtime(void) const {\n      return dumped_costtime_;\n    }\n\n    //! Retrieve count of documents loaded (mutable)\n    std::atomic<size_t> *mutable_trained_count(void) {\n      return &loaded_count_;\n    }\n\n    //! Retrieve count of documents built (mutable)\n    std::atomic<size_t> *mutable_built_count(void) {\n      return &built_count_;\n    }\n\n    //! Retrieve count of documents dumped (mutable)\n    std::atomic<size_t> *mutable_dumped_count(void) {\n      return &dumped_count_;\n    }\n\n    //! Retrieve count of documents loaded (mutable)\n    std::atomic<size_t> *mutable_loaded_count(void) {\n      return &loaded_count_;\n    }\n\n    //! Retrieve count of documents added (mutable)\n    std::atomic<size_t> *mutable_added_count(void) {\n      return &added_count_;\n    }\n\n    //! Retrieve count of documents discarded (mutable)\n    std::atomic<size_t> *mutable_discarded_count(void) {\n      return &discarded_count_;\n    }\n\n    //! Retrieve count of documents updated (mutable)\n    std::atomic<size_t> *mutable_updated_count(void) {\n      return &updated_count_;\n    }\n\n    //! Retrieve count of documents deleted (mutable)\n    std::atomic<size_t> *mutable_deleted_count(void) {\n      return &deleted_count_;\n    }\n\n    //! Retrieve size of index (mutable)\n    std::atomic<size_t> *mutable_index_size(void) {\n      return &index_size_;\n    }\n\n    //! Retrieve size of index dumped (mutable)\n    std::atomic<size_t> *mutable_dumped_size(void) {\n      return &dumped_size_;\n    }\n\n    //! Retrieve check point of index (mutable)\n    std::atomic<uint64_t> *mutable_check_point(void) {\n      return &check_point_;\n    }\n\n    //! Retrieve create time of index (mutable)\n    std::atomic<uint64_t> *mutable_create_time(void) {\n      return &create_time_;\n    }\n\n    //! Retrieve update time of index (mutable)\n    std::atomic<uint64_t> *mutable_update_time(void) {\n      return &update_time_;\n    }\n\n    //! Retrieve loaded time of index (mutable)\n    std::atomic<uint64_t> *mutable_loaded_costtime(void) {\n      return &loaded_costtime_;\n    }\n\n    //! Retrieve trained costtime of index (mutable)\n    std::atomic<uint64_t> *mutable_trained_costtime(void) {\n      return &trained_costtime_;\n    }\n\n    //! Retrieve built costtime of index (mutable)\n    std::atomic<uint64_t> *mutable_built_costtime(void) {\n      return &built_costtime_;\n    }\n\n    //! Retrieve dump costtime of index (mutable)\n    std::atomic<uint64_t> *mutable_dumped_costtime(void) {\n      return &dumped_costtime_;\n    }\n\n    void clear() {\n      this->clear_attributes();\n\n      revision_id_ = 0u;\n\n      trained_count_ = 0u;\n      built_count_ = 0u;\n      dumped_count_ = 0u;\n      loaded_count_ = 0u;\n      added_count_ = 0u;\n      discarded_count_ = 0u;\n      updated_count_ = 0u;\n      deleted_count_ = 0u;\n\n      index_size_ = 0u;\n      dumped_size_ = 0u;\n      check_point_ = 0u;\n\n      create_time_ = 0u;\n      update_time_ = 0u;\n      loaded_costtime_ = 0u;\n      trained_costtime_ = 0u;\n      built_costtime_ = 0u;\n      dumped_costtime_ = 0u;\n    }\n\n   private:\n    //! Members\n    size_t revision_id_{0u};\n\n    std::atomic<size_t> trained_count_{0u};\n    std::atomic<size_t> built_count_{0u};\n    std::atomic<size_t> dumped_count_{0u};\n    std::atomic<size_t> loaded_count_{0u};\n    std::atomic<size_t> added_count_{0u};\n    std::atomic<size_t> discarded_count_{0u};\n    std::atomic<size_t> updated_count_{0u};\n    std::atomic<size_t> deleted_count_{0u};\n\n    std::atomic<size_t> index_size_{0u};\n    std::atomic<size_t> dumped_size_{0u};\n    std::atomic<uint64_t> check_point_{0u};\n\n    std::atomic<uint64_t> create_time_{0u};\n    std::atomic<uint64_t> update_time_{0u};\n    std::atomic<uint64_t> loaded_costtime_{0u};\n    std::atomic<uint64_t> trained_costtime_{0u};\n    std::atomic<uint64_t> built_costtime_{0u};\n    std::atomic<uint64_t> dumped_costtime_{0u};\n  };\n\n  //! Constructor\n  IndexRunner() = default;\n\n  //! Destructor\n  virtual ~IndexRunner() = default;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const = 0;\n\n  //! Cleanup Searcher\n  virtual int cleanup() = 0;\n\n  //! Unload Searcher\n  virtual int unload() {\n    return IndexError_NotImplemented;\n  }\n\n  //! Print debug info\n  virtual void print_debug_info() {};\n\n  //! Create a searcher context\n  virtual Context::Pointer create_context(void) const {\n    return Context::Pointer();\n  }\n\n  //! Create a searcher provider\n  virtual Provider::Pointer create_provider(void) const {\n    return Provider::Pointer();\n  }\n\n  //! Create a searcher sparse provider\n  virtual SparseProvider::Pointer create_sparse_provider(void) const {\n    return SparseProvider::Pointer();\n  }\n\n  //! Get vector by key\n  virtual const void *get_vector(uint64_t /*key*/) const {\n    return nullptr;\n  }\n\n  virtual int get_vector(const uint64_t /*key*/,\n                         IndexStorage::MemoryBlock & /*block*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Fetch vector by id\n  virtual const void *get_vector_by_id(uint32_t /*id*/) const {\n    return nullptr;\n  }\n\n  virtual int get_vector_by_id(const uint32_t /*id*/,\n                               IndexStorage::MemoryBlock & /*block*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  virtual int get_vector_by_key(const uint64_t /*key*/,\n                                IndexStorage::MemoryBlock & /*block*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Get vector by key\n  virtual int get_sparse_vector(uint64_t /*key*/, uint32_t * /*sparse_count*/,\n                                std::string * /*sparse_indices_buffer*/,\n                                std::string * /*sparse_values_buffer*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Fetch vector by id\n  virtual int get_sparse_vector_by_id(\n      uint32_t /*id*/, uint32_t * /*sparse_count*/,\n      std::string * /*sparse_indices_buffer*/,\n      std::string * /*sparse_values_buffer*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Add a vector into index\n  virtual int add_impl(uint64_t /*key*/, const void * /*query*/,\n                       const IndexQueryMeta & /*qmeta*/,\n                       Context::Pointer & /*context*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Add a vector with id into index\n  virtual int add_with_id_impl(uint32_t /*id*/, const void * /*query*/,\n                               const IndexQueryMeta & /*qmeta*/,\n                               Context::Pointer & /*context*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Similarity search\n  virtual int search_impl(const void * /*query*/,\n                          const IndexQueryMeta & /*qmeta*/,\n                          Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n  //! Similarity search\n  virtual int search_impl(const void * /*query*/,\n                          const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,\n                          Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  virtual int search_impl(const uint32_t * /*sparse_count*/,\n                          const uint32_t * /*sparse_indices*/,\n                          const void * /*sparse_query*/,\n                          const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,\n                          Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Similarity search with sparse inputs\n  virtual int search_impl(const uint32_t /*sparse_count*/,\n                          const uint32_t * /*sparse_indices*/,\n                          const void * /*sparse_query*/,\n                          const IndexQueryMeta & /*qmeta*/,\n                          Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void * /*query*/,\n                             const IndexQueryMeta & /*qmeta*/,\n                             Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n  //! Similarity brute force search\n  virtual int search_bf_impl(const void * /*query*/,\n                             const IndexQueryMeta & /*qmeta*/,\n                             uint32_t /*count*/,\n                             Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Add a vector into index with dense and sparse inputs\n  virtual int add_impl(uint64_t /* pkey */, const uint32_t /* sparse_count*/,\n                       const uint32_t * /* sparse_indices */,\n                       const void * /* sparse_query */,\n                       const IndexQueryMeta & /* qmeta */,\n                       Context::Pointer & /* context */) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Add a vector with id into index\n  virtual int add_with_id_impl(uint32_t /* id */,\n                               const uint32_t /* sparse_count*/,\n                               const uint32_t * /* sparse_indices */,\n                               const void * /* sparse_query */,\n                               const IndexQueryMeta & /* qmeta */,\n                               Context::Pointer & /* context */) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Bruteforce search with sparse inputs\n  virtual int search_bf_impl(const uint32_t * /*sparse_count*/,\n                             const uint32_t * /*sparse_indices*/,\n                             const void * /*sparse_query*/,\n                             const IndexQueryMeta & /*qmeta*/,\n                             uint32_t /*count*/,\n                             Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Bruteforce search with sparse inputs\n  virtual int search_bf_impl(const uint32_t /*sparse_count*/,\n                             const uint32_t * /*sparse_indices*/,\n                             const void * /*sparse_query*/,\n                             const IndexQueryMeta & /*qmeta*/,\n                             Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Similarity brute force search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,\n      const IndexQueryMeta &qmeta, Context::Pointer &context) const {\n    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);\n  }\n\n  //! Similarity brute force search by primary keys\n  virtual int search_bf_by_p_keys_impl(\n      const void * /*query*/,\n      const std::vector<std::vector<uint64_t>> & /*p_keys*/,\n      const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,\n      Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Linear search by primary keys with dense and sparse inputs\n  virtual int search_bf_by_p_keys_impl(\n      const uint32_t /* sparse_count */, const uint32_t * /* sparse_indices */,\n      const void * /* sparse_query */,\n      const std::vector<std::vector<uint64_t>> & /* p_keys */,\n      const IndexQueryMeta & /* qmeta */,\n      Context::Pointer & /* context */) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Linear search by primary keys with dense and sparse inputs\n  virtual int search_bf_by_p_keys_impl(\n      const uint32_t * /* sparse_count */,\n      const uint32_t * /* sparse_indices */, const void * /* sparse_query */,\n      const std::vector<std::vector<uint64_t>> & /*p_keys */,\n      const IndexQueryMeta & /* qmeta */, uint32_t /* count */,\n      Context::Pointer & /* context */) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Linear search by primary keys with dense and sparse inputs\n  virtual int search_bf_by_p_keys_impl(\n      const void * /*dense_query*/, const uint32_t /*sparse_count*/,\n      const uint32_t * /*sparse_indices*/, const void * /*sparse_query*/,\n      const std::vector<std::vector<uint64_t>> & /*p_keys*/,\n      const IndexQueryMeta & /*qmeta*/, Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Linear search by primary keys with dense and sparse inputs\n  virtual int search_bf_by_p_keys_impl(\n      const void * /*dense_query*/, const uint32_t * /*sparse_count*/,\n      const uint32_t * /*sparse_indices*/, const void * /*sparse_query*/,\n      const std::vector<std::vector<uint64_t>> & /*p_keys*/,\n      const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,\n      Context::Pointer & /*context*/) const {\n    return IndexError_NotImplemented;\n  }\n\n  //! Update the vector in index\n  virtual int update_impl(uint64_t /*key*/, const void * /*query*/,\n                          const IndexQueryMeta & /*qmeta*/,\n                          Context::Pointer & /*context*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Delete the vector in index\n  virtual int remove_impl(uint64_t /*key*/, Context::Pointer & /*context*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Optimize the index\n  virtual int optimize_impl(IndexThreads::Pointer) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Delete the vector in index\n  int remove(uint64_t key, Context::Pointer &context) {\n    return this->remove_impl(key, context);\n  }\n\n  //! Optimize the index\n  int optimize(IndexThreads::Pointer threads) {\n    return this->optimize_impl(threads);\n  }\n\n  //! Train the data\n  virtual int train(IndexHolder::Pointer holder) {\n    return this->train(nullptr, std::move(holder));\n  }\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer /*threads*/,\n                    IndexHolder::Pointer /*holder*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Train the data\n  virtual int train(const IndexTrainer::Pointer & /*trainer*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Train the data\n  virtual int train(IndexSparseHolder::Pointer holder) {\n    return this->train(nullptr, std::move(holder));\n  }\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer /*threads*/,\n                    IndexSparseHolder::Pointer /*holder*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Build the index\n  virtual int build(IndexHolder::Pointer holder) {\n    return this->build(nullptr, std::move(holder));\n  }\n\n  //! Build the index\n  virtual int build(IndexThreads::Pointer /*threads*/,\n                    IndexHolder::Pointer /*holder*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Build the index\n  virtual int build(IndexSparseHolder::Pointer holder) {\n    return this->build(nullptr, std::move(holder));\n  }\n\n  //! Build the index\n  virtual int build(IndexThreads::Pointer /*threads*/,\n                    IndexSparseHolder::Pointer /*holder*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Build the index with indptr format\n  virtual int build(size_t count, const uint64_t *keys,\n                    const uint64_t *sparse_indptr,\n                    const uint32_t *sparse_indices, const void *sparse_data) {\n    return this->build(nullptr, count, keys, sparse_indptr, sparse_indices,\n                       sparse_data);\n  }\n\n  virtual int build(const IndexQueryMeta &qmeta, size_t count,\n                    const uint64_t *keys, const uint64_t *sparse_indptr,\n                    const uint32_t *sparse_indices, const void *sparse_data) {\n    return this->build(nullptr, qmeta, count, keys, sparse_indptr,\n                       sparse_indices, sparse_data);\n  }\n\n  //! Build the index with indptr format\n  virtual int build(IndexThreads::Pointer /*threads*/, size_t /*count*/,\n                    const uint64_t * /*keys*/,\n                    const uint64_t * /*sparse_indptr*/,\n                    const uint32_t * /*sparse_indices*/,\n                    const void * /*sparse_data*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Build the index with indptr format\n  virtual int build(IndexThreads::Pointer /*threads*/,\n                    const IndexQueryMeta & /*qmeta*/, size_t /*count*/,\n                    const uint64_t * /*keys*/,\n                    const uint64_t * /*sparse_indptr*/,\n                    const uint32_t * /*sparse_indices*/,\n                    const void * /*sparse_data*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {\n    return IndexError_NotImplemented;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_searcher.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_metric.h>\n#include <zvec/core/framework/index_module.h>\n#include <zvec/core/framework/index_provider.h>\n#include <zvec/core/framework/index_runner.h>\n#include <zvec/core/framework/index_stats.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Searcher\n */\nclass IndexSearcher : public IndexRunner {\n public:\n  //! Index Searcher Pointer\n  typedef std::shared_ptr<IndexSearcher> Pointer;\n\n  //! Constructor\n  IndexSearcher() = default;\n\n  //! Destructor\n  virtual ~IndexSearcher() = default;\n\n  //! Initialize Searcher\n  virtual int init(const ailego::Params & /*params*/) = 0;\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const = 0;\n\n  //! Retrieve params of index\n  virtual const ailego::Params &params(void) const = 0;\n\n  virtual int load(IndexStorage::Pointer /*container*/,\n                   IndexMetric::Pointer /*metric*/) {\n    return IndexError_NotImplemented;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_segment_storage.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_module.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/framework/index_unpacker.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Segment Container\n */\nclass IndexSegmentStorage : public IndexStorage {\n public:\n  //! Index Segment Container Pointer\n  typedef std::shared_ptr<IndexSegmentStorage> Pointer;\n\n  /*! Index Container Segment\n   */\n  class Segment : public IndexStorage::Segment,\n                  public std::enable_shared_from_this<Segment> {\n   public:\n    //! Index Container Pointer\n    typedef std::shared_ptr<Segment> Pointer;\n\n    //! Constructor\n    Segment(const Segment &rhs) = delete;\n\n    //! Constructor\n    Segment(const IndexStorage::Segment::Pointer &parent,\n            const IndexUnpacker::SegmentMeta &segment)\n        : data_offset_(segment.data_offset()),\n          data_size_(segment.data_size()),\n          padding_size_(segment.padding_size()),\n          region_size_(segment.data_size() + segment.padding_size()),\n          data_crc_(segment.data_crc()),\n          parent_(parent->clone()) {}\n\n    //! Destructor\n    virtual ~Segment(void) {}\n\n    //! Retrieve size of data\n    size_t data_size(void) const override {\n      return data_size_;\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const override {\n      return data_crc_;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const override {\n      return padding_size_;\n    }\n\n    size_t capacity(void) const override {\n      return region_size_;\n    }\n\n    //! Fetch data from segment (with own buffer)\n    size_t fetch(size_t offset, void *buf, size_t len) const override {\n      return parent_->fetch(data_offset_ + offset, buf, len);\n    }\n\n    //! Read data from segment\n    size_t read(size_t offset, const void **data, size_t len) override {\n      return parent_->read(data_offset_ + offset, data, len);\n    }\n\n    size_t read(size_t offset, MemoryBlock &data, size_t len) override {\n      const void **data_ptr = nullptr;\n      size_t ret = parent_->read(data_offset_ + offset, data_ptr, len);\n      data.reset((void *)*data_ptr);\n      return ret;\n    }\n\n    //! Read data from segment\n    bool read(SegmentData *iovec, size_t count) override {\n      for (SegmentData *it = iovec, *end = iovec + count; it != end; ++it) {\n        it->offset += data_offset_;\n      }\n      bool success = parent_->read(iovec, count);\n      for (SegmentData *it = iovec, *end = iovec + count; it != end; ++it) {\n        it->offset -= data_offset_;\n      }\n      return success;\n    }\n\n    size_t write(size_t, const void *, size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    size_t resize(size_t) override {\n      return IndexError_NotImplemented;\n    }\n\n    void update_data_crc(uint32_t) override {\n      return;\n    }\n\n    //! Clone the segment\n    IndexStorage::Segment::Pointer clone(void) override {\n      return shared_from_this();\n    }\n\n   private:\n    size_t data_offset_{0u};\n    size_t data_size_{0u};\n    size_t padding_size_{0u};\n    size_t region_size_{0u};\n    uint32_t data_crc_{0u};\n    IndexStorage::Segment::Pointer parent_{nullptr};\n  };\n\n  //! Constructor\n  IndexSegmentStorage(IndexStorage::Segment::Pointer &&seg)\n      : parent_(std::move(seg)) {}\n\n  //! Constructor\n  IndexSegmentStorage(const IndexStorage::Segment::Pointer &seg)\n      : parent_(seg) {}\n\n  //! Destructor\n  virtual ~IndexSegmentStorage(void) {}\n\n  //! Initialize container\n  int init(const ailego::Params &) override {\n    return 0;\n  }\n\n  //! Cleanup container\n  int cleanup(void) override {\n    return 0;\n  }\n\n  //! Load the current segment, ignore path\n  int open(const std::string &, bool) override {\n    if (!parent_) {\n      LOG_ERROR(\"Failed to load an empty segment\");\n      return IndexError_NoReady;\n    }\n\n    auto read_data = [this](size_t offset, const void **data, size_t len) {\n      return this->parent_->read(offset, data, len);\n    };\n\n    IndexUnpacker unpacker;\n    if (!unpacker.unpack(read_data, parent_->data_size(), false)) {\n      LOG_ERROR(\"Failed to unpack segment data\");\n      return IndexError_UnpackIndex;\n    }\n    segments_ = std::move(*unpacker.mutable_segments());\n    magic_ = unpacker.magic();\n    return 0;\n  }\n\n  //! Retrieve a segment by id\n  IndexStorage::Segment::Pointer get(const std::string &id, int) override {\n    if (!parent_) {\n      return IndexStorage::Segment::Pointer();\n    }\n    auto it = segments_.find(id);\n    if (it == segments_.end()) {\n      return IndexStorage::Segment::Pointer();\n    }\n    return std::make_shared<IndexSegmentStorage::Segment>(parent_, it->second);\n  }\n\n  //! Test if it a segment exists\n  bool has(const std::string &id) const override {\n    return (segments_.find(id) != segments_.end());\n  }\n\n  //! Retrieve all segments\n  std::map<std::string, IndexStorage::Segment::Pointer> get_all(\n      void) const override {\n    std::map<std::string, IndexStorage::Segment::Pointer> result;\n    if (parent_) {\n      for (const auto &it : segments_) {\n        result.emplace(it.first, std::make_shared<IndexSegmentStorage::Segment>(\n                                     parent_, it.second));\n      }\n    }\n    return result;\n  }\n\n  //! Unload all indexes\n  int close(void) override {\n    parent_ = nullptr;\n    segments_.clear();\n    return 0;\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const override {\n    return magic_;\n  }\n\n  int flush(void) override {\n    return IndexError_NotImplemented;\n  }\n\n  int append(const std::string & /*id*/, size_t /*size*/) override {\n    return IndexError_NotImplemented;\n  }\n\n  void refresh(uint64_t) override {\n    return;\n  }\n\n  uint64_t check_point(void) const override {\n    return 0;\n  }\n\n private:\n  uint32_t magic_{0};\n  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};\n  IndexStorage::Segment::Pointer parent_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_stats.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/container/params.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Stats\n */\nclass IndexStats {\n public:\n  //! Test if the element is exist\n  bool has_attribute(const std::string &key) const {\n    return attributes_.has(key);\n  }\n\n  //! Set the value of key in T\n  template <typename T>\n  bool set_attribute(const std::string &key, T &&val) {\n    return attributes_.set<T>(key, std::forward<T>(val));\n  }\n\n  //! Retrieve attribute with key\n  template <typename T>\n  bool get_attribute(const std::string &key, T *out) const {\n    return attributes_.get<T>(key, out);\n  }\n\n  //! Erase the pair via a key\n  bool erase_attribute(const std::string &key) {\n    return attributes_.erase(key);\n  }\n\n  //! Clear the attributes\n  void clear_attributes(void) {\n    attributes_.clear();\n  }\n\n  //! Retrieve attributes\n  const ailego::Params &attributes(void) const {\n    return attributes_;\n  }\n\n  //! Retrieve mutable attributes\n  ailego::Params *mutable_attributes(void) {\n    return &attributes_;\n  }\n\n private:\n  //! Members\n  ailego::Params attributes_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_storage.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/buffer/buffer_pool.h>\n#include <zvec/ailego/container/params.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_module.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Storage\n */\nclass IndexStorage : public IndexModule {\n public:\n  //! Index Storage Pointer\n  typedef std::shared_ptr<IndexStorage> Pointer;\n\n  struct MemoryBlock {\n    enum MemoryBlockType {\n      MBT_UNKNOWN = 0,\n      MBT_MMAP = 1,\n      MBT_BUFFERPOOL = 2,\n    };\n\n    MemoryBlock() {}\n    MemoryBlock(ailego::VecBufferPoolHandle *buffer_pool_handle,\n                size_t block_id, void *data)\n        : type_(MemoryBlockType::MBT_BUFFERPOOL) {\n      buffer_pool_handle_ = buffer_pool_handle;\n      buffer_block_id_ = block_id;\n      data_ = data;\n    }\n    MemoryBlock(void *data) : type_(MemoryBlockType::MBT_MMAP), data_(data) {}\n\n    MemoryBlock(const MemoryBlock &rhs) {\n      switch (rhs.type_) {\n        case MemoryBlockType::MBT_MMAP:\n          this->reset(rhs.data_);\n          break;\n        case MemoryBlockType::MBT_BUFFERPOOL:\n          this->reset(rhs.buffer_pool_handle_, rhs.buffer_block_id_, rhs.data_);\n          buffer_pool_handle_->acquire_one(buffer_block_id_);\n          break;\n        default:\n          break;\n      }\n    }\n\n    MemoryBlock(MemoryBlock &&rhs) {\n      switch (rhs.type_) {\n        case MemoryBlockType::MBT_MMAP:\n          this->reset(std::move(rhs.data_));\n          break;\n        case MemoryBlockType::MBT_BUFFERPOOL:\n          this->reset(std::move(rhs.buffer_pool_handle_),\n                      std::move(rhs.buffer_block_id_), std::move(rhs.data_));\n          rhs.buffer_pool_handle_ = nullptr;\n          rhs.type_ = MemoryBlockType::MBT_UNKNOWN;\n          break;\n        default:\n          break;\n      }\n    }\n\n    MemoryBlock &operator=(const MemoryBlock &rhs) {\n      if (this != &rhs) {\n        switch (rhs.type_) {\n          case MemoryBlockType::MBT_MMAP:\n            this->reset(rhs.data_);\n            break;\n          case MemoryBlockType::MBT_BUFFERPOOL:\n            this->reset(rhs.buffer_pool_handle_, rhs.buffer_block_id_,\n                        rhs.data_);\n            buffer_pool_handle_->acquire_one(buffer_block_id_);\n            break;\n          default:\n            break;\n        }\n      }\n      return *this;\n    }\n\n    MemoryBlock &operator=(MemoryBlock &&rhs) {\n      if (this != &rhs) {\n        switch (rhs.type_) {\n          case MemoryBlockType::MBT_MMAP:\n            this->reset(std::move(rhs.data_));\n            break;\n          case MemoryBlockType::MBT_BUFFERPOOL:\n            this->reset(std::move(rhs.buffer_pool_handle_),\n                        std::move(rhs.buffer_block_id_), std::move(rhs.data_));\n            rhs.buffer_pool_handle_ = nullptr;\n            rhs.type_ = MemoryBlockType::MBT_UNKNOWN;\n            break;\n          default:\n            break;\n        }\n      }\n      return *this;\n    }\n\n    ~MemoryBlock() {\n      switch (type_) {\n        case MemoryBlockType::MBT_MMAP:\n          break;\n        case MemoryBlockType::MBT_BUFFERPOOL:\n          if (buffer_pool_handle_) {\n            buffer_pool_handle_->release_one(buffer_block_id_);\n          }\n          break;\n        default:\n          break;\n      }\n      data_ = nullptr;\n    }\n\n    const void *data() const {\n      return data_;\n    }\n\n    void reset(ailego::VecBufferPoolHandle *buffer_pool_handle, size_t block_id,\n               void *data) {\n      if (type_ == MemoryBlockType::MBT_BUFFERPOOL) {\n        buffer_pool_handle_->release_one(buffer_block_id_);\n      }\n      type_ = MemoryBlockType::MBT_BUFFERPOOL;\n      buffer_pool_handle_ = buffer_pool_handle;\n      buffer_block_id_ = block_id;\n      data_ = data;\n    }\n\n    void reset(void *data) {\n      if (type_ == MemoryBlockType::MBT_BUFFERPOOL) {\n        buffer_pool_handle_->release_one(buffer_block_id_);\n        buffer_pool_handle_ = nullptr;\n      }\n      type_ = MemoryBlockType::MBT_MMAP;\n      data_ = data;\n    }\n\n    MemoryBlockType type_{MBT_UNKNOWN};\n    void *data_{nullptr};\n    mutable ailego::VecBufferPoolHandle *buffer_pool_handle_{nullptr};\n    size_t buffer_block_id_{0};\n  };\n\n  struct SegmentData {\n    //! Constructor\n    SegmentData(void) : offset(0u), length(0u), data(nullptr) {}\n\n    //! Constructor\n    SegmentData(size_t off, size_t len)\n        : offset(off), length(len), data(nullptr) {}\n\n    //! Members\n    size_t offset;\n    size_t length;\n    const void *data;\n  };\n\n  /*! Index Storage Segment\n   */\n  struct Segment {\n    //! Index Storage Pointer\n    typedef std::shared_ptr<Segment> Pointer;\n\n    //! Destructor\n    virtual ~Segment(void) {}\n\n    //! Retrieve size of data\n    virtual size_t data_size(void) const = 0;\n\n    //! Retrieve crc of data\n    virtual uint32_t data_crc(void) const = 0;\n\n    //! Retrieve size of padding\n    virtual size_t padding_size(void) const = 0;\n\n    //! Retrieve capacity of segment\n    virtual size_t capacity(void) const = 0;\n\n    //! Fetch data from segment (with own buffer)\n    virtual size_t fetch(size_t offset, void *buf, size_t len) const = 0;\n\n    //! Read data from segment\n    virtual size_t read(size_t offset, const void **data, size_t len) = 0;\n\n    virtual size_t read(size_t offset, MemoryBlock &data, size_t len) = 0;\n\n    virtual bool read(SegmentData *, size_t) {\n      return false;\n    }\n\n    //! Write data into the storage with offset\n    virtual size_t write(size_t offset, const void *data, size_t len) = 0;\n\n    //! Resize size of data\n    virtual size_t resize(size_t size) = 0;\n\n    //! Update crc of data\n    virtual void update_data_crc(uint32_t crc) = 0;\n\n    //! Clone the segment\n    virtual Pointer clone(void) = 0;\n  };\n\n  //! Destructor\n  virtual ~IndexStorage(void) {}\n\n  //! Initialize storage\n  virtual int init(const ailego::Params &params) = 0;\n\n  //! Cleanup storage\n  virtual int cleanup(void) = 0;\n\n  //! Open storage\n  virtual int open(const std::string &path, bool create) = 0;\n\n  //! Flush storage\n  virtual int flush(void) = 0;\n\n  //! Close storage\n  virtual int close(void) = 0;\n\n  //! Append a segment into storage\n  virtual int append(const std::string &id, size_t size) = 0;\n\n  //! Refresh meta information (checksum, update time, etc.)\n  virtual void refresh(uint64_t check_point) = 0;\n\n  //! Retrieve check point of storage\n  virtual uint64_t check_point(void) const = 0;\n\n  //! Retrieve a segment by id\n  virtual Segment::Pointer get(const std::string &id, int level = -1) = 0;\n\n  virtual std::map<std::string, Segment::Pointer> get_all(void) const {\n    // LOG_ERROR(\"get_all() Not Implemented\");\n    std::map<std::string, Segment::Pointer> result;\n    return result;\n  }\n\n  //! Test if it a segment exists\n  virtual bool has(const std::string &id) const = 0;\n\n  //! Retrieve magic number of index\n  virtual uint32_t magic(void) const = 0;\n\n  //! huge page\n  virtual bool isHugePage(void) const {\n    return false;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_streamer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_helper.h>\n#include <zvec/core/framework/index_provider.h>\n#include <zvec/core/framework/index_runner.h>\n#include <zvec/core/framework/index_stats.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Streamer\n */\nclass IndexStreamer : public IndexRunner {\n public:\n  //! Index Streamer Pointer\n  typedef std::shared_ptr<IndexStreamer> Pointer;\n\n  //! Destructor\n  virtual ~IndexStreamer(void) = default;\n\n  //! Initialize the builder\n  virtual int init(const IndexMeta & /*meta*/,\n                   const ailego::Params & /*params*/) {\n    return IndexError_NotImplemented;\n  }\n\n  //! Open a index from storage\n  virtual int open(IndexStorage::Pointer stg) = 0;\n\n  //! Flush index\n  virtual int flush(uint64_t check_point) = 0;\n\n  //! Close index\n  virtual int close(void) = 0;\n\n  //! Retrieve meta of index\n  virtual const IndexMeta &meta(void) const = 0;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_threads.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <thread>\n#include <utility>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/pattern/closure.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Threads\n *  Index ThreadPool maintains multiple threads resources to execute the tasks\n *  concurrently\n */\nclass IndexThreads {\n public:\n  using Pointer = std::shared_ptr<IndexThreads>;\n\n  /*! Threads Task Group\n   *  Manage of a group of sub-tasks which can be seen as a big task,\n   *  so we can wait all sub-tasks finished, or get the status of them\n   */\n  class TaskGroup {\n   public:\n    using Pointer = std::shared_ptr<TaskGroup>;\n\n    //! Destructor\n    virtual ~TaskGroup(void) {}\n\n    //! Submit a task to be executed asynchronous\n    virtual void submit(ailego::ClosureHandler &&task) = 0;\n\n    //! Check if the group is finished\n    virtual bool is_finished(void) const = 0;\n\n    //! Wait until all tasks in group finished\n    virtual void wait_finish(void) = 0;\n  };\n\n  //! Destructor\n  virtual ~IndexThreads(void) {}\n\n  //! Retrieve thread count in pool\n  virtual size_t count(void) const = 0;\n\n  //! Stop all threads\n  virtual void stop(void) = 0;\n\n  //! Submit a task to be executed asynchronous\n  virtual void submit(ailego::ClosureHandler &&task) = 0;\n\n  //! Make a task group\n  virtual TaskGroup::Pointer make_group(void) = 0;\n\n  //! Get the current work thread index\n  virtual int indexof_this(void) const = 0;\n};\n\n/*! Single Queue Index Threads\n */\nclass SingleQueueIndexThreads : public IndexThreads {\n public:\n  /*! Single Queue Index Threads Task Group\n   */\n  class SingleQueueTaskGroup : public TaskGroup {\n   public:\n    using Pointer = std::shared_ptr<SingleQueueTaskGroup>;\n\n    //! Constructor\n    explicit SingleQueueTaskGroup(\n        ailego::ThreadPool::TaskGroup::Pointer task_group)\n        : task_group_(std::move(task_group)) {}\n\n    //! Submit a task to be executed asynchronous\n    void submit(ailego::ClosureHandler &&task) override {\n      while (task_group_->pending_count() >= kMaxQueueSize) {\n        std::this_thread::sleep_for(std::chrono::milliseconds(1));\n      }\n      task_group_->enqueue_and_wake(std::move(task));\n    }\n\n    //! Check if the group is finished\n    bool is_finished(void) const override {\n      return task_group_->is_finished();\n    }\n\n    //! Wait until all tasks in group finished\n    void wait_finish(void) override {\n      return task_group_->wait_finish();\n    }\n\n   private:\n    //! Members\n    ailego::ThreadPool::TaskGroup::Pointer task_group_{};\n  };\n\n  //! Constructor\n  SingleQueueIndexThreads(uint32_t size, bool binding)\n      : pool_(\n            size > 0 ? size : std::max(std::thread::hardware_concurrency(), 1u),\n            binding) {}\n\n  //! Constructor\n  explicit SingleQueueIndexThreads(bool binding)\n      : SingleQueueIndexThreads(0, binding) {}\n\n  //! Constructor\n  SingleQueueIndexThreads(void) : SingleQueueIndexThreads{false} {}\n\n  //! Destructor\n  virtual ~SingleQueueIndexThreads(void) {}\n\n  //! Retrieve thread count in pool\n  size_t count(void) const override {\n    return pool_.count();\n  }\n\n  //! Stop all threads\n  void stop(void) override {\n    pool_.stop();\n  }\n\n  //! Submit a task to be executed asynchronous\n  void submit(ailego::ClosureHandler &&task) override {\n    while (pool_.pending_count() >= kMaxQueueSize) {\n      std::this_thread::sleep_for(std::chrono::milliseconds(1));\n    }\n    pool_.enqueue_and_wake(std::move(task));\n  }\n\n  //! Make a task group\n  TaskGroup::Pointer make_group(void) override {\n    return std::make_shared<SingleQueueTaskGroup>(pool_.make_group());\n  }\n\n  //! Get the current work thread index\n  int indexof_this(void) const override {\n    return pool_.indexof_this();\n  }\n\n private:\n  static constexpr size_t kMaxQueueSize = 4096u;\n\n  //! Disable them\n  SingleQueueIndexThreads(const SingleQueueIndexThreads &) = delete;\n  SingleQueueIndexThreads(SingleQueueIndexThreads &&) = delete;\n  SingleQueueIndexThreads &operator=(const SingleQueueIndexThreads &) = delete;\n\n  //! Members\n  ailego::ThreadPool pool_{};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_trainer.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/core/framework/index_bundle.h>\n#include <zvec/core/framework/index_dumper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_stats.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/framework/index_threads.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Trainer\n */\nclass IndexTrainer : public IndexModule {\n public:\n  //! Index Trainer Pointer\n  typedef std::shared_ptr<IndexTrainer> Pointer;\n\n  /*! Index Trainer Stats\n   */\n  class Stats : public IndexStats {\n   public:\n    //! Set count of documents trained\n    void set_trained_count(size_t count) {\n      trained_count_ = count;\n    }\n\n    //! Set count of documents discarded\n    void set_discarded_count(size_t count) {\n      discarded_count_ = count;\n    }\n\n    //! Set time cost of documents trained\n    void set_trained_costtime(uint64_t cost) {\n      trained_costtime_ = cost;\n    }\n\n    //! Retrieve count of documents trained\n    size_t trained_count(void) const {\n      return trained_count_;\n    }\n\n    //! Retrieve count of documents discarded\n    size_t discarded_count(void) const {\n      return discarded_count_;\n    }\n\n    //! Retrieve time cost of documents trained\n    uint64_t trained_costtime(void) const {\n      return trained_costtime_;\n    }\n\n   private:\n    //! Members\n    size_t trained_count_{0u};\n    size_t discarded_count_{0u};\n    uint64_t trained_costtime_{0u};\n  };\n\n  //! Destructor\n  virtual ~IndexTrainer(void) {}\n\n  //! Initialize Trainer\n  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;\n\n  //! Cleanup Trainer\n  virtual int cleanup(void) = 0;\n\n  //! Train the data\n  virtual int train(IndexHolder::Pointer holder) {\n    return this->train(nullptr, holder);\n  }\n\n  //! Train the data\n  virtual int train(IndexThreads::Pointer threads,\n                    IndexHolder::Pointer holder) = 0;\n\n  //! Load index from container\n  virtual int load(IndexStorage::Pointer cntr) = 0;\n\n  //! Dump index into storage\n  virtual int dump(const IndexDumper::Pointer &dumper) = 0;\n\n  //! Retrieve Index Meta\n  virtual const IndexMeta &meta(void) const = 0;\n\n  //! Retrieve statistics\n  virtual const Stats &stats(void) const = 0;\n\n  //! Retrieve the output indexes\n  virtual IndexBundle::Pointer indexes(void) const = 0;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_unpacker.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <map>\n#include <zvec/ailego/utility/type_helper.h>\n#include <zvec/core/framework/index_error.h>\n#include <zvec/core/framework/index_format.h>\n#include <zvec/core/framework/index_logger.h>\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Unpacker\n */\nclass IndexUnpacker {\n public:\n  /*! Index Unpacker Segment Meta\n   */\n  class SegmentMeta {\n   public:\n    //! Constructor\n    SegmentMeta(size_t offset, size_t dsz, size_t psz, uint32_t crc)\n        : data_offset_(offset),\n          data_size_(dsz),\n          padding_size_(psz),\n          data_crc_(crc) {}\n\n    //! Retrieve offset of data\n    size_t data_offset(void) const {\n      return data_offset_;\n    }\n\n    //! Retrieve size of data\n    size_t data_size(void) const {\n      return data_size_;\n    }\n\n    //! Retrieve crc of data\n    uint32_t data_crc(void) const {\n      return data_crc_;\n    }\n\n    //! Retrieve size of padding\n    size_t padding_size(void) const {\n      return padding_size_;\n    }\n\n   private:\n    size_t data_offset_{0};\n    size_t data_size_{0};\n    size_t padding_size_{0};\n    uint32_t data_crc_{0};\n  };\n\n  //! Reset the unpacker\n  void reset(void) {\n    segments_.clear();\n  }\n\n  //! Retrieve segments of index package\n  const std::map<std::string, SegmentMeta> &segments(void) const {\n    return segments_;\n  }\n\n  //! Retrieve magic number of index\n  uint32_t magic(void) const {\n    return header_.magic;\n  }\n\n  //! Retrieve header of index package\n  const IndexFormat::MetaHeader &header(void) const {\n    return header_;\n  }\n\n  //! Retrieve footer of index package\n  const IndexFormat::MetaFooter &footer(void) const {\n    return footer_;\n  }\n\n  //! Retrieve version information\n  const std::string &version(void) const {\n    return version_;\n  }\n\n  //! Retrieve mutable segments of index package\n  std::map<std::string, SegmentMeta> *mutable_segments(void) {\n    return &segments_;\n  }\n\n  //! Unpack index data\n  template <typename TFunc>\n  bool unpack(TFunc read_data, size_t total, bool checksum) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,\n                                                const void **, size_t>::value,\n                  \"Invocable function type\");\n\n    while (true) {\n      if (!this->unpack_header(read_data)) {\n        LOG_ERROR(\"Failed to unpack index header\");\n        return false;\n      }\n      if (!this->unpack_footer(read_data, total)) {\n        LOG_ERROR(\"Failed to unpack index footer\");\n        return false;\n      }\n      if (!this->unpack_segments(read_data, total)) {\n        LOG_ERROR(\"Failed to unpack index segments' meta\");\n        return false;\n      }\n      if (checksum && !this->validate_checksum(read_data)) {\n        LOG_ERROR(\"Failed to validate checksum of index content\");\n        return false;\n      }\n      if (footer_.next_meta_header_offset == 0) {\n        break;\n      }\n      current_header_start_offset_ = footer_.next_meta_header_offset;\n    }\n\n    if (!this->unpack_version(read_data)) {\n      LOG_ERROR(\"Failed to unpack index version\");\n      return false;\n    }\n    return true;\n  }\n\n  //! Unpack index header\n  template <typename TFunc>\n  bool unpack_header(TFunc read_data) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,\n                                                const void **, size_t>::value,\n                  \"Invocable function type\");\n    const void *data = nullptr;\n    if (read_data(current_header_start_offset_, &data, sizeof(header_)) !=\n        sizeof(header_)) {\n      return false;\n    }\n\n    memcpy(&header_, data, sizeof(header_));\n    if (header_.meta_header_size != sizeof(header_)) {\n      return false;\n    }\n    if (ailego::Crc32c::Hash(&header_, sizeof(header_), header_.header_crc) !=\n        header_.header_crc) {\n      return false;\n    }\n    return true;\n  }\n\n  //! Unpack index footer\n  template <typename TFunc>\n  bool unpack_footer(TFunc read_data, size_t total) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,\n                                                const void **, size_t>::value,\n                  \"Invocable function type\");\n    if (header_.meta_footer_size != sizeof(footer_)) {\n      return false;\n    }\n\n    size_t footer_offset = ((int32_t)header_.meta_footer_offset < 0)\n                               ? total + (int32_t)header_.meta_footer_offset\n                               : header_.meta_footer_offset;\n    if (footer_offset + sizeof(footer_) > total) {\n      return false;\n    }\n\n    const void *data = nullptr;\n    if (read_data(current_header_start_offset_ + footer_offset, &data,\n                  sizeof(footer_)) != sizeof(footer_)) {\n      return false;\n    }\n\n    memcpy(&footer_, data, sizeof(footer_));\n    if (footer_.content_size + footer_.content_padding_size +\n            header_.content_offset >\n        footer_.total_size) {\n      return false;\n    }\n    if (ailego::Crc32c::Hash(&footer_, sizeof(footer_), footer_.footer_crc) !=\n        footer_.footer_crc) {\n      return false;\n    }\n    return true;\n  }\n\n  //! Unpack segments' meta\n  template <typename TFunc>\n  bool unpack_segments(TFunc read_data, size_t total) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,\n                                                const void **, size_t>::value,\n                  \"Invocable function type\");\n    if (sizeof(IndexFormat::SegmentMeta) * footer_.segment_count >\n        footer_.segments_meta_size) {\n      return false;\n    }\n    size_t offset = ((int32_t)header_.meta_footer_offset < 0)\n                        ? total + (int32_t)header_.meta_footer_offset\n                        : header_.meta_footer_offset;\n    if (offset < footer_.segments_meta_size || offset > total) {\n      return false;\n    }\n    offset -= footer_.segments_meta_size;\n\n    const void *data = nullptr;\n    if (read_data(current_header_start_offset_ + offset, &data,\n                  footer_.segments_meta_size) != footer_.segments_meta_size) {\n      return false;\n    }\n    if (ailego::Crc32c::Hash(data, footer_.segments_meta_size, 0u) !=\n        footer_.segments_meta_crc) {\n      return false;\n    }\n\n    IndexFormat::SegmentMeta *seg = (IndexFormat::SegmentMeta *)data;\n    for (size_t i = 0; i < footer_.segment_count; ++i, ++seg) {\n      if (seg->segment_id_offset > footer_.segments_meta_size) {\n        return false;\n      }\n      if (seg->data_index > footer_.content_size) {\n        return false;\n      }\n      if (seg->data_index + seg->data_size > footer_.content_size) {\n        return false;\n      }\n      segments_.emplace(\n          std::string(reinterpret_cast<const char *>(data) +\n                      seg->segment_id_offset),\n          SegmentMeta(seg->data_index + header_.content_offset +\n                          current_header_start_offset_,\n                      seg->data_size, seg->padding_size, seg->data_crc));\n    }\n    return true;\n  }\n\n  //! Unpack index version\n  template <typename TFunc>\n  bool unpack_version(TFunc read_data) {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,\n                                                const void **, size_t>::value,\n                  \"Invocable function type\");\n\n    auto it = segments_.find(\"IndexVersion\");\n    if (it == segments_.end()) {\n      return false;\n    }\n\n    const SegmentMeta &segment = it->second;\n    const void *data = nullptr;\n\n    if (read_data(0 + segment.data_offset(), &data, segment.data_size()) !=\n        segment.data_size()) {\n      return false;\n    }\n    if (segment.data_crc() != 0u &&\n        ailego::Crc32c::Hash(data, segment.data_size(), 0u) !=\n            segment.data_crc()) {\n      return false;\n    }\n    version_.assign(reinterpret_cast<const char *>(data), segment.data_size());\n    return true;\n  }\n\n  //! Validate checksum of content\n  template <typename TFunc>\n  bool validate_checksum(TFunc read_data) const {\n    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,\n                                                const void **, size_t>::value,\n                  \"Invocable function type\");\n    if (footer_.content_crc == 0) {\n      return true;\n    }\n    const size_t block_size = 4096u;\n    const void *data = nullptr;\n    uint32_t checksum = 0u;\n    size_t total = footer_.content_size;\n    size_t offset = sizeof(header_);\n\n    while (total >= block_size) {\n      if (read_data(current_header_start_offset_ + offset, &data, block_size) !=\n          block_size) {\n        return false;\n      }\n      checksum = ailego::Crc32c::Hash(data, block_size, checksum);\n      total -= block_size;\n      offset += block_size;\n    }\n    if (read_data(current_header_start_offset_ + offset, &data, total) !=\n        total) {\n      return false;\n    }\n    checksum = ailego::Crc32c::Hash(data, total, checksum);\n    return (checksum == footer_.content_crc);\n  }\n\n private:\n  IndexFormat::MetaHeader header_{};\n  IndexFormat::MetaFooter footer_{};\n  std::string version_{};\n  std::map<std::string, SegmentMeta> segments_{};\n  uint64_t current_header_start_offset_{0u};\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/framework/index_version.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\nnamespace zvec {\nnamespace core {\n\n/*! Index Version\n */\nstruct IndexVersion {\n  //! Retrieve the version number in string\n  static const char *String(void);\n\n  //! Retrieve the detailed version information\n  static const char *Details(void);\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/core/interface/constants.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n\nnamespace zvec::core_interface {\n\nconstexpr static uint32_t kDefaultHnswEfConstruction = 500;\nconstexpr static uint32_t kDefaultHnswNeighborCnt = 50;\n\nconstexpr static uint32_t kDefaultHnswEfSearch = 300;\n\nconstexpr const uint32_t kDefaultRabitqTotalBits = 7;\nconstexpr const uint32_t kDefaultRabitqNumClusters = 16;\n\n\n}  // namespace zvec::core_interface"
  },
  {
    "path": "src/include/zvec/core/interface/index.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <memory>\n#include <string>\n#include <variant>\n#include <vector>\n#include <zvec/core/framework/index_context.h>\n#include <zvec/core/framework/index_converter.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_filter.h>\n#include <zvec/core/framework/index_holder.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/framework/index_metric.h>\n#include <zvec/core/framework/index_reducer.h>\n#include <zvec/core/framework/index_reformer.h>\n#include <zvec/core/framework/index_searcher.h>\n#include <zvec/core/framework/index_storage.h>\n#include <zvec/core/interface/index_param.h>\n#include \"zvec/core/framework/index_provider.h\"\n\nnamespace zvec::core_interface {\n\nclass IndexFactory;\n\nstruct DenseVector {\n  const void *data;\n  // core::IndexQueryMeta meta;\n  // DenseVector(void *data) : data(data) {\n  //   meta.set_meta_type(core::IndexMeta::MetaType::MT_DENSE);\n  // };\n};\n\nstruct SparseVector {\n  uint32_t count;\n  const void *indices;\n  const void *values;\n\n  const uint32_t *get_indices() const {\n    return reinterpret_cast<const uint32_t *>(indices);\n  }\n\n  template <typename T = void>\n  const T *get_values() const {\n    return reinterpret_cast<const T *>(values);\n  }\n};\n\nstruct VectorData {\n  std::variant<DenseVector, SparseVector> vector;\n\n  // DenseVector dense_vector;\n  // SparseVector sparse_vector;\n};\n\n// Used to pass mutable vectors\nstruct DenseVectorBuffer {\n  std::string data;  // use string to manage memory\n};\n\nstruct SparseVectorBuffer {\n  uint32_t count;\n  std::string indices;\n  std::string values;\n\n  uint32_t *get_indices() {\n    return reinterpret_cast<uint32_t *>(indices.data());\n  }\n\n  template <typename T = void>\n  T *get_values() {\n    return reinterpret_cast<T *>(values.data());\n  }\n};\n\nstruct VectorDataBuffer {\n  std::variant<DenseVectorBuffer, SparseVectorBuffer> vector_buffer;\n};\n\n\nstruct SearchResult {\n  core::IndexDocumentList doc_list_;\n  // use string to manage memory\n  std::vector<std::string> reverted_vector_list_{};\n  std::vector<std::string> reverted_sparse_values_list_{};\n};\n\nclass Index {\n public:\n  typedef std::shared_ptr<Index> Pointer;\n  virtual ~Index() = default;\n\n  // static Index::Pointer Create(const BaseIndexParam &param); //IndexFactory\n  virtual int Open(const std::string &file_path,\n                   StorageOptions storage_options);\n  int Close();\n  int Flush();\n  // virtual int Serialize(const std::string &file_path);\n  // virtual int Deserialize(const std::string &file_path);\n\n  // // TODO: use holder\n  // virtual int Build() = 0;\n  virtual int Train() {\n    is_trained_ = true;\n    return 0;\n  }\n\n  // virtual int Dump(const std::string &file_path) = 0;\n  virtual int Merge(const std::vector<Index::Pointer> &indexes,\n                    const IndexFilter &filter,\n                    const MergeOptions &options = {});\n  // TODO: static reduce\n\n  virtual int Add(const VectorData &vector, uint32_t doc_id);\n  virtual int Fetch(const uint32_t doc_id,\n                    VectorDataBuffer *vector_data_buffer);\n  virtual int Search(const VectorData &query,\n                     const BaseIndexQueryParam::Pointer &search_param,\n                     SearchResult *result);\n\n  virtual BaseIndexParam::Pointer GetParam() const {\n    return std::make_shared<BaseIndexParam>(param_);\n  }\n\n  virtual bool IsTrained() const {\n    return is_trained_;\n  }\n\n  uint32_t GetDocCount() const {\n    if (streamer_ == nullptr) {\n      return -1;\n    }\n    if (is_sparse_) {\n      return streamer_->create_sparse_provider()->count();\n    } else {\n      return streamer_->create_provider()->count();\n    }\n  }\n\n  core::IndexStreamer::Pointer index_searcher() {\n    return streamer_;\n  }\n\n  core::IndexProvider::Pointer create_index_provider() const {\n    return streamer_->create_provider();\n  }\n\n  static std::string get_metric_name(MetricType metric_type, bool is_sparse);\n\n protected:\n  int _sparse_fetch(const uint32_t doc_id,\n                    VectorDataBuffer *vector_data_buffer);\n  virtual int _dense_fetch(const uint32_t doc_id,\n                           VectorDataBuffer *vector_data_buffer);\n\n  int _sparse_add(const VectorData &vector, const uint32_t doc_id,\n                  core::IndexContext::Pointer &context);\n  int _dense_add(const VectorData &vector, const uint32_t doc_id,\n                 core::IndexContext::Pointer &context);\n  int _sparse_search(const VectorData &query,\n                     const BaseIndexQueryParam::Pointer &search_param,\n                     SearchResult *result,\n                     core::IndexContext::Pointer &context);\n  int _dense_search(const VectorData &query,\n                    const BaseIndexQueryParam::Pointer &search_param,\n                    SearchResult *result, core::IndexContext::Pointer &context);\n  virtual int _prepare_for_search(\n      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,\n      core::IndexContext::Pointer &context) = 0;\n  virtual int _get_coarse_search_topk(\n      const BaseIndexQueryParam::Pointer &search_param);\n\n protected:\n  friend class IndexFactory;\n  Index() = default;\n  int Init(const BaseIndexParam &param);\n\n\n protected:\n  int ParseMetricName(const BaseIndexParam &param);\n  int CreateAndInitMetric(const BaseIndexParam &param);\n  int CreateAndInitConverterReformer(const QuantizerParam &param,\n                                     const BaseIndexParam &index_param);\n  virtual int CreateAndInitStreamer(const BaseIndexParam &param) = 0;\n\n protected:\n  bool init_context();\n  core::IndexContext::Pointer &acquire_context();\n  void release_context() {\n    // context_list_[get_context_index()]->reset();\n  }\n\n protected:\n  bool is_trained_{false};\n\n  BaseIndexParam param_;\n  ailego::Params proxima_index_params_{};\n  core::IndexMeta proxima_index_meta_{};  // IndexQueryMeta + other index config\n  core::IndexQueryMeta input_vector_meta_;     // input\n  core::IndexQueryMeta streamer_vector_meta_;  // after reformer.convert()\n\n  core::IndexBuilder::Pointer builder_{};\n  core::IndexStreamer::Pointer streamer_{};\n  core::IndexReformer::Pointer reformer_{};\n  core::IndexConverter::Pointer converter_{};  // for build()\n  core::IndexMetric::Pointer metric_{};        // to do normalization\n\n  size_t context_index_;\n  core::IndexStorage::Pointer storage_{};\n\n  bool is_open_{false};\n  bool is_sparse_{false};\n  bool is_huge_page_{false};\n  bool is_read_only_{false};\n};\n\n\nclass FlatIndex : public Index {\n public:\n  FlatIndex() = default;\n  // FlatIndex(const FlatIndexParam &param) : param_(param) {}\n  // FlatIndex(FlatIndexParam &&param) : param(std::move(param)) {}\n\n\n protected:\n  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;\n\n  virtual int _prepare_for_search(\n      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,\n      core::IndexContext::Pointer &context) override;\n\n private:\n  FlatIndexParam param_{};\n};\n\nclass IVFIndex : public Index {\n public:\n  IVFIndex() = default;\n\n protected:\n  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;\n\n  virtual int _prepare_for_search(\n      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,\n      core::IndexContext::Pointer &context) override;\n\n  virtual int Add(const VectorData &vector, uint32_t doc_id) override;\n\n  virtual int Train() override;\n\n  virtual int Open(const std::string &file_path,\n                   StorageOptions storage_options) override;\n\n  virtual int _dense_fetch(const uint32_t doc_id,\n                           VectorDataBuffer *vector_data_buffer) override;\n  virtual int Merge(const std::vector<Index::Pointer> &indexes,\n                    const IndexFilter &filter,\n                    const MergeOptions &options) override;\n  int GenerateHolder();\n\n private:\n  IVFIndexParam param_{};\n  std::mutex mutex_{};\n  std::vector<std::pair<uint64_t, std::string>> doc_cache_;\n  core::IndexHolder::Pointer holder_{};\n  std::string file_path_;\n};\n\n\nclass HNSWIndex : public Index {\n public:\n  HNSWIndex() = default;\n\n protected:\n  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;\n\n  virtual int _prepare_for_search(\n      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,\n      core::IndexContext::Pointer &context) override;\n  int _get_coarse_search_topk(\n      const BaseIndexQueryParam::Pointer &search_param) override;\n\n\n private:\n  HNSWIndexParam param_{};\n};\n\nclass HNSWRabitqIndex : public Index {\n public:\n  HNSWRabitqIndex() = default;\n\n protected:\n  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;\n\n  virtual int _prepare_for_search(\n      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,\n      core::IndexContext::Pointer &context) override;\n  int _get_coarse_search_topk(\n      const BaseIndexQueryParam::Pointer &search_param) override;\n\n private:\n  HNSWRabitqIndexParam param_{};\n};\n\n\n}  // namespace zvec::core_interface\n"
  },
  {
    "path": "src/include/zvec/core/interface/index_factory.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include <zvec/core/interface/index.h>\n#include <zvec/core/interface/index_param.h>\n\nnamespace zvec::core_interface {\n\n// 索引的工厂类\nclass IndexFactory {\n public:\n  static Index::Pointer CreateAndInitIndex(const BaseIndexParam &param);\n\n  static BaseIndexParam::Pointer DeserializeIndexParamFromJson(\n      const std::string &json_str);\n\n\n  static std::string QueryParamSerializeToJson(\n      const BaseIndexQueryParam &param);\n\n\n  template <\n      typename QueryParamType,\n      std::enable_if_t<std::is_base_of_v<BaseIndexQueryParam, QueryParamType>,\n                       bool> = true>\n  static std::string QueryParamSerializeToJson(const QueryParamType &param,\n                                               bool omit_empty_value = false);\n\n  template <\n      typename QueryParamType,\n      std::enable_if_t<std::is_base_of_v<BaseIndexQueryParam, QueryParamType>,\n                       bool> = true>\n  static typename QueryParamType::Pointer QueryParamDeserializeFromJson(\n      const std::string &json_str);\n\n  // register() -- Index class should have a `create` interface\n};\n\n\n}  // namespace zvec::core_interface"
  },
  {
    "path": "src/include/zvec/core/interface/index_param.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <memory>\n#include <string>\n#include <vector>\n#include <zvec/ailego/encoding/json.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/core/framework/index_filter.h>\n#include <zvec/core/framework/index_meta.h>\n#include <zvec/core/interface/constants.h>\n#include \"zvec/core/framework/index_framework.h\"\n\nnamespace zvec::core_interface {\n#define MAX_DIMENSION 65536\n// #define MAX_EF_CONSTRUCTION 65536\n// #define MAX_EF_SEARCH 100\n\nclass IndexFactory;\nclass Index;\nclass BaseIndexParam;\nclass BaseIndexQueryParam;\n\nstruct StorageOptions {\n  enum class StorageType { kNone, kMMAP, kMemory, kBufferPool };\n\n  StorageType type = StorageType::kNone;\n  bool create_new = false;\n  bool read_only = false;\n};\n\nstruct MergeOptions {\n  uint32_t write_concurrency = 1;\n  ailego::ThreadPool *pool = nullptr;\n};\n\nusing IndexMeta = core::IndexMeta;\nusing IndexQueryMeta = core::IndexQueryMeta;\nusing DataType = core::IndexMeta::DataType;\nusing IndexFilter = core::IndexFilter;\n\n\n// 定义支持的索引类型\nenum class IndexType {\n  // to do: support factory's register, may change to\n  // `static constexpr std::string_view`, which may incur str comp overhead\n  kNone,\n  kFlat,\n  kIVF,  // it's actual a two-layer index\n  kHNSW,\n  kHNSWRabitq,\n};\n\nenum class IVFSearchMethod { kBF, kHNSW };\n\nenum class MetricType {\n  kNone,\n  kL2sq,  // Euclidean\n  kInnerProduct,\n  kCosine,\n  kMIPSL2sq  // spherical?\n};\n\nenum class QuantizerType {\n  kNone,\n  kPQ,        // Product Quantization\n  kQuickADC,  // TODO: +refiner ? // should be a type of index?\n  kAQ,\n  kFP16,\n  kInt8,\n  kInt4,\n  kRabitq,\n};\n\nstruct SerializableBase {\n  std::string SerializeToJson(bool omit_empty_value = false) const {\n    return zvec::ailego::JsonValue(SerializeToJsonObject(omit_empty_value))\n        .as_json_string()\n        .as_stl_string();\n  }\n\n  bool DeserializeFromJson(const std::string &json_str) {\n    ailego::JsonValue json_value;\n    if (!json_value.parse(json_str)) {\n      return false;\n    }\n    return DeserializeFromJsonObject(json_value.as_object());\n  }\n\n protected:\n  virtual ailego::JsonObject SerializeToJsonObject(\n      bool omit_empty_value = false) const = 0;\n  virtual bool DeserializeFromJsonObject(\n      const ailego::JsonObject &json_obj) = 0;\n};\n\n// TODO: maybe a base class for quantizer?\nstruct QuantizerParam : public SerializableBase {\n  QuantizerType type = QuantizerType::kNone;\n  int num_subquantizers = 8;  // M\n  int num_bits = 8;           // bits per subquantizer\n\n  // Constructors\n  // QuantizerParam() = default;\n  QuantizerParam(QuantizerType t = QuantizerType::kNone, int subquantizers = 8,\n                 int bits = 8)\n      : type(t), num_subquantizers(subquantizers), num_bits(bits) {}\n\n\n protected:\n  friend class BaseIndexParam;\n  virtual ailego::JsonObject SerializeToJsonObject(\n      bool omit_empty_value = false) const override;\n\n  virtual bool DeserializeFromJsonObject(\n      const ailego::JsonObject &json_obj) override;\n};\n\n// preprocessor\nenum class PreprocessorType {\n  kNone,\n  kPCA,\n  kOPQ,\n};\n\nstruct PreprocessorParam {\n  PreprocessorType type = PreprocessorType::kNone;\n\n  // Constructors\n  // PreprocessorParam() = default;\n  explicit PreprocessorParam(PreprocessorType t = PreprocessorType::kNone)\n      : type(t) {}\n};\n\nstruct RefinerParam {\n  using Pointer = std::shared_ptr<RefinerParam>;\n\n  float scale_factor_{0};\n  std::shared_ptr<Index> reference_index = nullptr;\n};\n\n// --- Query Parameters (can be passed to search methods) ---\nclass BaseIndexQueryParam {\n public:\n  using Pointer = std::shared_ptr<BaseIndexQueryParam>;\n\n  virtual ~BaseIndexQueryParam() = default;\n\n  uint32_t topk = 10;\n  bool fetch_vector = false;\n  std::shared_ptr<IndexFilter> filter = nullptr;\n  std::shared_ptr<std::vector<uint64_t>> bf_pks = nullptr;\n  float radius = 0.0f;\n  bool is_linear = false;\n  RefinerParam::Pointer refiner_param = nullptr;\n\n  virtual Pointer Clone() const = 0;\n};\n\nstruct FlatQueryParam : public BaseIndexQueryParam {\n  using Pointer = std::shared_ptr<FlatQueryParam>;\n\n  BaseIndexQueryParam::Pointer Clone() const override {\n    return std::make_shared<FlatQueryParam>(*this);\n  }\n};\n\nstruct HNSWQueryParam : public BaseIndexQueryParam {\n  using Pointer = std::shared_ptr<HNSWQueryParam>;\n\n  uint32_t ef_search = kDefaultHnswEfSearch;\n\n  BaseIndexQueryParam::Pointer Clone() const override {\n    return std::make_shared<HNSWQueryParam>(*this);\n  }\n};\n\nstruct HNSWRabitqQueryParam : public BaseIndexQueryParam {\n  using Pointer = std::shared_ptr<HNSWRabitqQueryParam>;\n\n  uint32_t ef_search = kDefaultHnswEfSearch;\n\n  BaseIndexQueryParam::Pointer Clone() const override {\n    return std::make_shared<HNSWRabitqQueryParam>(*this);\n  }\n};\n\nstruct IVFQueryParam : public BaseIndexQueryParam {\n  int nprobe = 10;\n  std::shared_ptr<BaseIndexQueryParam> l1QueryParam = nullptr;\n  std::shared_ptr<BaseIndexQueryParam> l2QueryParam = nullptr;\n\n  using Pointer = std::shared_ptr<IVFQueryParam>;\n\n  BaseIndexQueryParam::Pointer Clone() const override {\n    auto cloned_this = std::make_shared<IVFQueryParam>(*this);\n    cloned_this->l1QueryParam = l1QueryParam ? l1QueryParam->Clone() : nullptr;\n    cloned_this->l2QueryParam = l2QueryParam ? l2QueryParam->Clone() : nullptr;\n    return cloned_this;\n  }\n};\n\n// --- Construction Parameters ---\n// template<typename IndexQueryParamType>\nclass BaseIndexParam : public SerializableBase {\n public:\n  using Pointer = std::shared_ptr<BaseIndexParam>;\n\n  explicit BaseIndexParam(IndexType type = IndexType::kNone,\n                          MetricType metric = MetricType::kL2sq, int dim = 0,\n                          int ver = 0)\n      : index_type(type), metric_type(metric), dimension(dim), version(ver) {}\n\n  virtual ~BaseIndexParam() = default;\n\n  IndexType index_type = IndexType::kNone;\n  MetricType metric_type = MetricType::kL2sq;\n  int dimension = 0;  // [1, MAX_DIMENSION]\n  int version = 0;    // for compatibility\n  bool is_sparse = false;\n  bool is_huge_page = false;\n  DataType data_type = DataType::DT_UNDEFINED;\n  bool use_id_map = true;\n\n  // IndexMeta meta;\n  ailego::Params params;\n\n  // pipeline\n  PreprocessorParam preprocess_param;\n  QuantizerParam quantizer_param;\n\n  BaseIndexQueryParam::Pointer default_query_param = nullptr;\n  // virtual std::shared_ptr<BaseIndexQueryParam> GetDefaultQueryParam() const\n  // {\n  //   return std::make_shared<BaseIndexQueryParam>();\n  // }\n  //\n\n protected:\n  virtual bool DeserializeFromJsonObject(\n      const ailego::JsonObject &json_obj) override;\n  virtual ailego::JsonObject SerializeToJsonObject(\n      bool omit_empty_value = false) const override;\n};\n\nstruct FlatIndexParam : public BaseIndexParam {\n  using Pointer = std::shared_ptr<FlatIndexParam>;\n  FlatIndexParam() : BaseIndexParam(IndexType::kFlat) {}\n\n  IndexMeta::MajorOrder major_order = IndexMeta::MajorOrder::MO_ROW;\n\n protected:\n  bool DeserializeFromJsonObject(const ailego::JsonObject &json_obj) override;\n  ailego::JsonObject SerializeToJsonObject(\n      bool omit_empty_value = false) const override;\n};\n\nstruct IVFIndexParam : public BaseIndexParam {\n  using Pointer = std::shared_ptr<IVFIndexParam>;\n  int nlist = 1024;\n  int niters = 10;\n  std::shared_ptr<BaseIndexParam> l1Index = nullptr;\n  std::shared_ptr<BaseIndexParam> l2Index = nullptr;\n  bool use_soar = false;\n\n  // Constructors with delegation\n  IVFIndexParam() : BaseIndexParam(IndexType::kIVF) {}\n\n  IVFIndexParam(int nlist, int niters, std::shared_ptr<BaseIndexParam> l1Index,\n                std::shared_ptr<BaseIndexParam> l2Index)\n      : BaseIndexParam(IndexType::kIVF),\n        nlist(nlist),\n        niters(niters),\n        l1Index(std::move(l1Index)),\n        l2Index(std::move(l2Index)) {}\n\n  IVFIndexParam(MetricType metric, int dim, int nlist, int niters,\n                std::shared_ptr<BaseIndexParam> l1Index,\n                std::shared_ptr<BaseIndexParam> l2Index)\n      : BaseIndexParam(IndexType::kIVF, metric, dim),\n        nlist(nlist),\n        niters(niters),\n        l1Index(std::move(l1Index)),\n        l2Index(std::move(l2Index)) {}\n\n  // query param:\n  // topk of l1Index's param ==== IVFIndexQueryParam.nprobe\n  // topk of l2Index's param ==== IVFIndexQueryParam.topK\n\n  // IVFIndexParam.metric_type === l2Index's metric_type\n  // IVFIndexParam.quantization === l2Index's quantization\n};\n\nstruct HNSWIndexParam : public BaseIndexParam {\n  using Pointer = std::shared_ptr<HNSWIndexParam>;\n  int m = kDefaultHnswNeighborCnt;\n  int ef_construction = kDefaultHnswEfConstruction;\n\n  // Constructors with delegation\n  HNSWIndexParam() : BaseIndexParam(IndexType::kHNSW) {}\n\n  HNSWIndexParam(int m, int ef_construction)\n      : BaseIndexParam(IndexType::kHNSW),\n        m(m),\n        ef_construction(ef_construction) {}\n\n  HNSWIndexParam(MetricType metric, int dim, int m, int ef_construction)\n      : BaseIndexParam(IndexType::kHNSW, metric, dim),\n        m(m),\n        ef_construction(ef_construction) {}\n\n protected:\n  bool DeserializeFromJsonObject(const ailego::JsonObject &json_obj) override;\n  ailego::JsonObject SerializeToJsonObject(\n      bool omit_empty_value = false) const override;\n};\n\nstruct HNSWRabitqIndexParam : public BaseIndexParam {\n  using Pointer = std::shared_ptr<HNSWRabitqIndexParam>;\n\n  // HNSW parameters\n  int m = kDefaultHnswNeighborCnt;\n  int ef_construction = kDefaultHnswEfConstruction;\n\n  // Rabitq parameters\n  int total_bits = kDefaultRabitqTotalBits;\n  int num_clusters = kDefaultRabitqNumClusters;\n  int sample_count = 0;\n  core::IndexProvider::Pointer provider = nullptr;\n  core::IndexReformer::Pointer reformer = nullptr;\n\n  // Constructors with delegation\n  HNSWRabitqIndexParam() : BaseIndexParam(IndexType::kHNSWRabitq) {}\n\n  HNSWRabitqIndexParam(int m, int ef_construction)\n      : BaseIndexParam(IndexType::kHNSWRabitq),\n        m(m),\n        ef_construction(ef_construction) {}\n\n  HNSWRabitqIndexParam(MetricType metric, int dim, int m, int ef_construction)\n      : BaseIndexParam(IndexType::kHNSWRabitq, metric, dim),\n        m(m),\n        ef_construction(ef_construction) {}\n\n protected:\n  bool DeserializeFromJsonObject(const ailego::JsonObject &json_obj) override;\n  ailego::JsonObject SerializeToJsonObject(\n      bool omit_empty_value = false) const override;\n};\n\n}  // namespace zvec::core_interface"
  },
  {
    "path": "src/include/zvec/core/interface/index_param_builders.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <zvec/core/interface/index_param.h>\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"zvec/core/interface/index.h\"\n\nnamespace zvec::core_interface {\n\n// struct ConditionalIndexParam {\n//     // predicate / rule / threshold\n//     // candidate\n// };\n\n\n// chaining calls builder\ntemplate <typename ActualIndexParamBuilderType, typename ActualIndexParamType>\nclass BaseIndexParamBuilder {  //  : public\n                               //  std::enable_shared_from_this<Resource>\n public:\n  BaseIndexParamBuilder() : param(std::make_shared<ActualIndexParamType>()) {};\n  virtual ~BaseIndexParamBuilder() = default;\n\n  ActualIndexParamBuilderType &WithVersion(int version) {\n    param.version = version;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n  ActualIndexParamBuilderType &WithIndexType(IndexType index_type) {\n    param->index_type = index_type;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n  ActualIndexParamBuilderType &WithMetricType(MetricType metric_type) {\n    param->metric_type = metric_type;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n  ActualIndexParamBuilderType &WithDimension(int dimension) {\n    param->dimension = dimension;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n  ActualIndexParamBuilderType &WithPreprocessParam(\n      const PreprocessorParam &preprocess_param) {\n    param->preprocess_param =\n        std::make_shared<PreprocessorParam>(preprocess_param);\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n  ActualIndexParamBuilderType &WithQuantizerParam(\n      const QuantizerParam &quantizer_param) {\n    param->quantizer_param = quantizer_param;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n  // ActualIndexParamBuilderType &WithRefinerParam(\n  //     const RefinerParam &refiner_param) {\n  //   param->refiner_param = refiner_param;\n  //   return static_cast<ActualIndexParamBuilderType &>(*this);\n  // }\n  // ActualIndexParamBuilderType &WithDefaultQueryParam(\n  //     const BaseIndexQueryParam &default_query_param) {\n  //   param->default_query_param = default_query_param;\n  //   return static_cast<ActualIndexParamBuilderType &>(*this);\n  // }\n\n  ActualIndexParamBuilderType &WithIsSparse(bool is_sparse) {\n    param->is_sparse = is_sparse;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n  ActualIndexParamBuilderType &WithDataType(DataType data_type) {\n    param->data_type = data_type;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n\n  ActualIndexParamBuilderType &WithUseIDMap(bool use_id_map) {\n    param->use_id_map = use_id_map;\n    return static_cast<ActualIndexParamBuilderType &>(*this);\n  }\n\n  virtual std::shared_ptr<ActualIndexParamType> Build() = 0;\n\n protected:\n  std::shared_ptr<ActualIndexParamType> param;\n};\n\nclass FlatIndexParamBuilder\n    : public BaseIndexParamBuilder<FlatIndexParamBuilder, FlatIndexParam> {\n public:\n  FlatIndexParamBuilder() = default;\n  std::shared_ptr<FlatIndexParam> Build() override {\n    return param;\n  }\n};\n\nclass IVFIndexParamBuilder\n    : public BaseIndexParamBuilder<IVFIndexParamBuilder, IVFIndexParam> {\n public:\n  IVFIndexParamBuilder() = default;\n  IVFIndexParamBuilder &WithNList(int nlist) {\n    param->nlist = nlist;\n    return *this;\n  }\n  IVFIndexParamBuilder &WithNiters(int niters) {\n    param->niters = niters;\n    return *this;\n  }\n  IVFIndexParamBuilder &WithL1Index(const BaseIndexParam &l1Index) {\n    param->l1Index = std::make_shared<BaseIndexParam>(l1Index);\n    return *this;\n  }\n  IVFIndexParamBuilder &WithL2Index(const BaseIndexParam &l2Index) {\n    param->l2Index = std::make_shared<BaseIndexParam>(l2Index);\n    return *this;\n  }\n  IVFIndexParamBuilder &WithUseSoar(bool use_soar) {\n    param->use_soar = use_soar;\n    return *this;\n  }\n\n  std::shared_ptr<IVFIndexParam> Build() override {\n    return param;\n  }\n};\n\nclass HNSWIndexParamBuilder\n    : public BaseIndexParamBuilder<HNSWIndexParamBuilder, HNSWIndexParam> {\n public:\n  HNSWIndexParamBuilder() = default;\n  HNSWIndexParamBuilder &WithM(int m) {\n    param->m = m;\n    return *this;\n  }\n  HNSWIndexParamBuilder &WithEFConstruction(int ef_construction) {\n    param->ef_construction = ef_construction;\n    return *this;\n  }\n\n  std::shared_ptr<HNSWIndexParam> Build() override {\n    return param;\n  }\n};\n\nclass HNSWRabitqIndexParamBuilder\n    : public BaseIndexParamBuilder<HNSWRabitqIndexParamBuilder,\n                                   HNSWRabitqIndexParam> {\n public:\n  HNSWRabitqIndexParamBuilder() = default;\n  HNSWRabitqIndexParamBuilder &WithM(int m) {\n    param->m = m;\n    return *this;\n  }\n  HNSWRabitqIndexParamBuilder &WithEFConstruction(int ef_construction) {\n    param->ef_construction = ef_construction;\n    return *this;\n  }\n  HNSWRabitqIndexParamBuilder &WithTotalBits(int total_bits) {\n    param->total_bits = total_bits;\n    return *this;\n  }\n  HNSWRabitqIndexParamBuilder &WithNumClusters(int num_clusters) {\n    param->num_clusters = num_clusters;\n    return *this;\n  }\n  HNSWRabitqIndexParamBuilder &WithSampleCount(int sample_count) {\n    param->sample_count = sample_count;\n    return *this;\n  }\n  HNSWRabitqIndexParamBuilder &WithReformer(\n      core::IndexReformer::Pointer reformer) {\n    param->reformer = std::move(reformer);\n    return *this;\n  }\n  HNSWRabitqIndexParamBuilder &WithProvider(\n      core::IndexProvider::Pointer provider) {\n    param->provider = std::move(provider);\n    return *this;\n  }\n  std::shared_ptr<HNSWRabitqIndexParam> Build() override {\n    return param;\n  }\n};\n\n//     class CompositeIndexParamBuilder : public\n//     BaseIndexParamBuilder<CompositeIndexParamBuilder, CompositeIndexParam>\n//     { public:\n//         CompositeIndexParamBuilder() = default;\n//         CompositeIndexParamBuilder &WithLayers(const\n//         std::vector<std::shared_ptr<BaseIndexParam>> &layers) {\n//             param.layers = layers;\n//             return *this;\n//         }\n//         // with layer\n//         CompositeIndexParamBuilder &WithLayer(const BaseIndexParam &layer)\n//         {\n//             param.layers.push_back(std::make_shared<BaseIndexParam>(layer));\n//             return *this;\n//         }\n\n//         CompositeIndexParamBuilder &WithLayer(const BaseIndexParam &layer,\n//                                               const BaseIndexQueryParam\n//                                               &default_query_param) {\n//             param.layers.push_back(std::make_shared<BaseIndexParam>(layer));\n//             param.layers.back()->default_query_param =\n//             std::make_shared<BaseIndexQueryParam>(default_query_param);\n//             return *this;\n//         }\n//         std::shared_ptr<CompositeIndexParam> Build() { return\n//         std::make_shared<CompositeIndexParam>(param); }\n\n//     private:\n//         CompositeIndexParam param;\n//     };\n\n\n#include <memory>\n#include <vector>\n\ntemplate <typename T, typename Derived>\nclass BaseIndexQueryParamBuilder {\n public:\n  // This allows derived builders to access the protected member\n  T m_param;\n\n  // Fluent setters for BaseIndexQueryParam fields\n  Derived &with_topk(int topk) {\n    m_param.topk = topk;\n    return static_cast<Derived &>(*this);\n  }\n\n  Derived &with_fetch_vector(bool fetch_vector) {\n    m_param.fetch_vector = fetch_vector;\n    return static_cast<Derived &>(*this);\n  }\n\n  Derived &with_filter(std::shared_ptr<IndexFilter> filter) {\n    m_param.filter = std::move(filter);\n    return static_cast<Derived &>(*this);\n  }\n\n  // Using a vector of uint64_t for the next one\n  Derived &with_bf_pks(std::shared_ptr<std::vector<uint64_t>> bf_pks) {\n    m_param.bf_pks = std::move(bf_pks);\n    return static_cast<Derived &>(*this);\n  }\n\n  Derived &with_radius(float radius) {\n    m_param.radius = radius;\n    return static_cast<Derived &>(*this);\n  }\n\n  Derived &with_is_linear(bool is_linear) {\n    m_param.is_linear = is_linear;\n    return static_cast<Derived &>(*this);\n  }\n\n  Derived &with_refiner_param(RefinerParam::Pointer refiner_param) {\n    m_param.refiner_param = std::move(refiner_param);\n    return static_cast<Derived &>(*this);\n  }\n};\n\n// FLAT builder (no extra fields, just inherits base functionality)\nclass FlatQueryParamBuilder\n    : public BaseIndexQueryParamBuilder<FlatQueryParam, FlatQueryParamBuilder> {\n public:\n  FlatQueryParam::Pointer build() {\n    return std::make_shared<FlatQueryParam>(std::move(m_param));\n  }\n};\n\n// Example Usage:\n// FlatQueryParam::Pointer flat_config = FlatQueryParamBuilder()\n//     .with_topk(20)\n//     .with_fetch_vector(true)\n//     .build();\n\n// HNSW builder (adds one specific field: ef_search)\nclass HNSWQueryParamBuilder\n    : public BaseIndexQueryParamBuilder<HNSWQueryParam, HNSWQueryParamBuilder> {\n public:\n  HNSWQueryParamBuilder &with_ef_search(int ef_search) {\n    m_param.ef_search = ef_search;\n    return *this;\n  }\n\n  HNSWQueryParam::Pointer build() {\n    return std::make_shared<HNSWQueryParam>(std::move(m_param));\n  }\n};\n\n// Example Usage:\n// HNSWQueryParam::Pointer hnsw_config = HNSWQueryParamBuilder()\n//     .with_topk(5)\n//     .with_ef_search(128) // HNSW specific\n//     .with_is_linear(false)\n//     .build();\n\n// IVF builder (adds specific fields: nprobe, l1QueryParam, l2QueryParam)\nclass IVFQueryParamBuilder\n    : public BaseIndexQueryParamBuilder<IVFQueryParam, IVFQueryParamBuilder> {\n public:\n  IVFQueryParamBuilder &with_nprobe(int nprobe) {\n    m_param.nprobe = nprobe;\n    return *this;\n  }\n\n  // Since l1QueryParam and l2QueryParam are shared_ptr to BaseIndexQueryParam,\n  // they can accept ANY derived configuration object.\n  IVFQueryParamBuilder &with_l1_query_param(\n      BaseIndexQueryParam::Pointer l1QueryParam) {\n    m_param.l1QueryParam = std::move(l1QueryParam);\n    return *this;\n  }\n\n  IVFQueryParamBuilder &with_l2_query_param(\n      BaseIndexQueryParam::Pointer l2QueryParam) {\n    m_param.l2QueryParam = std::move(l2QueryParam);\n    return *this;\n  }\n\n  IVFQueryParam::Pointer build() {\n    return std::make_shared<IVFQueryParam>(std::move(m_param));\n  }\n};\n\n// HNSW-Rabitq builder (adds ef_search field)\nclass HNSWRabitqQueryParamBuilder\n    : public BaseIndexQueryParamBuilder<HNSWRabitqQueryParam,\n                                        HNSWRabitqQueryParamBuilder> {\n public:\n  HNSWRabitqQueryParamBuilder &with_ef_search(int ef_search) {\n    m_param.ef_search = ef_search;\n    return *this;\n  }\n\n  HNSWRabitqQueryParam::Pointer build() {\n    return std::make_shared<HNSWRabitqQueryParam>(std::move(m_param));\n  }\n};\n\n// Example Usage:\n// // First, build the required nested params\n// auto nested_hnsw = HNSWQueryParamBuilder().with_ef_search(64).build();\n//\n// // Then, build the IVF param\n// IVFQueryParam::Pointer ivf_config = IVFQueryParamBuilder()\n//     .with_topk(10)\n//     .with_nprobe(50) // IVF specific\n//     .with_l1_query_param(nested_hnsw) // Set a nested config object\n//     .build();\n\n\nnamespace predefined {\n// some predefined index param builders, e.g., SCANN\nclass SCANNIndexParamBuilder {\n public:\n  // alias SCANNIIndexParam = xxxxx\n  std::shared_ptr<IVFIndexParam> Build() {\n    // SCANN\n    auto param_ptr =\n        IVFIndexParamBuilder()\n            .WithNList(40000)  //  10000000 -> 40000\n            .WithUseSoar(\n                true)  //  由于1个数据点可能对应2个partition，因此140个点中可能有重复，需要去重（保留一个取均值）\n            .WithQuantizerParam(QuantizerParam(QuantizerType::kQuickADC))\n            // .WithDefaultQueryParam(\n            //     IVFQueryParamBuilder().with_topk(140).with_nprobe(68).build())\n            // .WithRefinerParam(RefinerParam{\n            //     10,  // 140 -> 10\n            //     nullptr,\n            //     std::make_shared<QuantizerParam>(\n            //         QuantizerParam{QuantizerType::kFP16}),\n            // })\n            .WithL1Index(*(\n                IVFIndexParamBuilder()\n                    .WithMetricType(\n                        MetricType::kInnerProduct)  // Layer2  flat index\n                    .WithNList(700)                 //  40000 -> 700\n                    .WithQuantizerParam(\n                        QuantizerParam{QuantizerType::kQuickADC})\n                    // .WithDefaultQueryParam(IVFQueryParamBuilder()\n                    //                            .with_topk(68)\n                    //                            .with_nprobe(20)\n                    //                            .build())\n                    .WithL1Index(*(\n                        FlatIndexParamBuilder()\n                            .WithMetricType(MetricType::kL2sq)\n                            // implicit :\n                            // .WithDefaultQueryParam(FlatQueryParamBuilder().with_topk(20).build())\n                            .Build()))\n                    .Build()))\n            .Build();\n\n    return param_ptr;\n  }\n};\n\n}  // namespace predefined\n}  // namespace zvec::core_interface"
  },
  {
    "path": "src/include/zvec/db/collection.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <string>\n#include <vector>\n#include <zvec/db/doc.h>\n#include <zvec/db/options.h>\n#include <zvec/db/stats.h>\n#include <zvec/db/status.h>\n\nnamespace zvec {\n\nclass Collection {\n public:\n  using Ptr = std::shared_ptr<Collection>;\n\n  /**\n   * @brief Create and open a collection.\n   *\n   * @param path The path to the collection.\n   * @param schema The schema of the collection.\n   * @param option The options of the collection.\n   * @return The collection OR an error.\n   */\n  static Result<Ptr> CreateAndOpen(const std::string &path,\n                                   const CollectionSchema &schema,\n                                   const CollectionOptions &option);\n\n  /**\n   * @brief Open an existing collection.\n   *\n   * @param path The path to the collection.\n   * @param option The options of the collection.\n   * @return The collection OR an error.\n   */\n  static Result<Ptr> Open(const std::string &path,\n                          const CollectionOptions &option);\n\n  virtual ~Collection();\n\n public:\n  virtual Status Destroy() = 0;\n\n  virtual Status Flush() = 0;\n\n  virtual Result<std::string> Path() const = 0;\n\n  virtual Result<CollectionStats> Stats() const = 0;\n\n  virtual Result<CollectionSchema> Schema() const = 0;\n\n  virtual Result<CollectionOptions> Options() const = 0;\n\n public:\n  virtual Status CreateIndex(\n      const std::string &column_name, const IndexParams::Ptr &index_params,\n      const CreateIndexOptions &options = CreateIndexOptions{0}) = 0;\n\n  virtual Status DropIndex(const std::string &column_name) = 0;\n\n  virtual Status Optimize(const OptimizeOptions &options = OptimizeOptions{\n                              0}) = 0;\n\n  virtual Status AddColumn(const FieldSchema::Ptr &column_schema,\n                           const std::string &expression,\n                           const AddColumnOptions &options = AddColumnOptions{\n                               0}) = 0;\n\n  virtual Status DropColumn(const std::string &column_name) = 0;\n\n  virtual Status AlterColumn(\n      const std::string &column_name, const std::string &rename,\n      const FieldSchema::Ptr &new_column_schema = nullptr,\n      const AlterColumnOptions &options = AlterColumnOptions{0}) = 0;\n\n  virtual Result<WriteResults> Insert(std::vector<Doc> &docs) = 0;\n\n  virtual Result<WriteResults> Upsert(std::vector<Doc> &docs) = 0;\n\n  virtual Result<WriteResults> Update(std::vector<Doc> &docs) = 0;\n\n  virtual Result<WriteResults> Delete(const std::vector<std::string> &pks) = 0;\n\n  virtual Status DeleteByFilter(const std::string &filter) = 0;\n\n  virtual Result<DocPtrList> Query(const VectorQuery &query) const = 0;\n\n  virtual Result<GroupResults> GroupByQuery(\n      const GroupByVectorQuery &query) const = 0;\n\n  virtual Result<DocPtrMap> Fetch(\n      const std::vector<std::string> &pks) const = 0;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/config.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <atomic>\n#include <cstdint>\n#include <memory>\n#include <zvec/ailego/pattern/singleton.h>\n#include <zvec/db/status.h>\n\nnamespace zvec {\n\nconst uint32_t MIN_LOG_FILE_SIZE = 128;\nconst uint32_t DEFAULT_LOG_FILE_SIZE = 2048;\nconst uint32_t DEFAULT_LOG_OVERDUE_DAYS = 7;\nconst std::string CONSOLE_LOG_TYPE_NAME = \"ConsoleLogger\";\nconst std::string FILE_LOG_TYPE_NAME = \"AppendLogger\";\nconst std::string DEFAULT_LOG_DIR = \"./logs\";\nconst std::string DEFAULT_LOG_BASENAME = \"zvec.log\";\n\nclass GlobalConfig : public ailego::Singleton<GlobalConfig> {\n  friend class ailego::Singleton<GlobalConfig>;\n\n public:\n  enum class LogLevel : uint8_t {\n    DEBUG = 0,\n    INFO,\n    WARN,\n    ERROR,\n    FATAL,\n  };\n\n  struct LogConfig {\n    LogLevel level;\n\n    LogConfig(LogLevel level) : level(level) {}\n    virtual ~LogConfig() = default;\n    virtual std::string GetLoggerType() const = 0;\n  };\n\n  // Console log configuration\n  struct ConsoleLogConfig : LogConfig {\n    ConsoleLogConfig(LogLevel level = LogLevel::WARN) : LogConfig{level} {}\n\n    std::string GetLoggerType() const override {\n      return CONSOLE_LOG_TYPE_NAME;\n    }\n  };\n\n  // File log configuration\n  struct FileLogConfig : LogConfig {\n    std::string dir;\n    std::string basename;\n    uint32_t file_size;  // MB\n    uint32_t overdue_days;\n\n    FileLogConfig(LogLevel level = LogLevel::WARN,\n                  std::string dir = DEFAULT_LOG_DIR,\n                  std::string basename = DEFAULT_LOG_BASENAME,\n                  uint32_t file_size = DEFAULT_LOG_FILE_SIZE,\n                  uint32_t overdue_days = DEFAULT_LOG_OVERDUE_DAYS)\n        : LogConfig{level},\n          dir{dir},\n          basename{basename},\n          file_size{file_size},\n          overdue_days(overdue_days) {}\n\n    std::string GetLoggerType() const override {\n      return FILE_LOG_TYPE_NAME;\n    }\n  };\n\n  // Configuration data structure\n  struct ConfigData {\n    uint64_t memory_limit_bytes;\n\n    // log\n    std::shared_ptr<LogConfig> log_config;\n\n    // query\n    uint32_t query_thread_count;\n    float invert_to_forward_scan_ratio;\n    float brute_force_by_keys_ratio;\n\n    // optimize\n    uint32_t optimize_thread_count;\n\n    ConfigData();\n  };\n\n  // Initialize the configuration (can only be called once)\n  Status Initialize(const ConfigData &config);\n\n  Status Validate(const ConfigData &config) const;\n\n  // Read-only accessors\n  uint64_t memory_limit_bytes() const noexcept;\n\n  const LogConfig &log_config() const noexcept {\n    return *config_.log_config;\n  }\n\n  std::string log_type() const noexcept {\n    return config_.log_config->GetLoggerType();\n  }\n\n  LogLevel log_level() const noexcept {\n    return config_.log_config->level;\n  }\n\n  // File log specific accessors (only valid when using FileLogConfig)\n  const std::string &log_dir() const noexcept {\n    const FileLogConfig *file_config =\n        dynamic_cast<const FileLogConfig *>(config_.log_config.get());\n    static const std::string empty_string = \"\";\n    return file_config ? file_config->dir : empty_string;\n  }\n\n  const std::string &log_file_basename() const noexcept {\n    const FileLogConfig *file_config =\n        dynamic_cast<const FileLogConfig *>(config_.log_config.get());\n    static const std::string empty_string = \"\";\n    return file_config ? file_config->basename : empty_string;\n  }\n\n  uint32_t log_file_size() const noexcept {\n    const FileLogConfig *file_config =\n        dynamic_cast<const FileLogConfig *>(config_.log_config.get());\n    return file_config ? file_config->file_size : 0;\n  }\n\n  uint32_t log_overdue_days() const noexcept {\n    const FileLogConfig *file_config =\n        dynamic_cast<const FileLogConfig *>(config_.log_config.get());\n    return file_config ? file_config->overdue_days : 0;\n  }\n\n  //! Query thread count\n  uint32_t query_thread_count() const noexcept {\n    return config_.query_thread_count;\n  }\n\n  //! Invert to forward scan ratio\n  float invert_to_forward_scan_ratio() const noexcept {\n    return config_.invert_to_forward_scan_ratio;\n  }\n\n  //! Brute force by keys ratio\n  float brute_force_by_keys_ratio() const noexcept {\n    return config_.brute_force_by_keys_ratio;\n  }\n\n  //! Optimize thread count\n  uint32_t optimize_thread_count() const noexcept {\n    return config_.optimize_thread_count;\n  }\n\n private:\n  // Configuration data\n  ConfigData config_;\n\n  // Atomic flag to ensure initialization happens only once\n  std::atomic<bool> initialized_{false};\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/doc.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <optional>\n#include <sstream>\n#include <string>\n#include <unordered_map>\n#include <variant>\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/db/query_params.h>\n#include <zvec/db/schema.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n\nnamespace zvec {\n\nusing float16_t = ailego::Float16;\n\nclass Doc {\n public:\n  using Value = std::variant<\n      std::monostate,  // 0 - represents null value\n      bool, int32_t, uint32_t, int64_t, uint64_t, float, double,  // 1~7\n      std::string,                                                // 8\n      std::vector<bool>,                                          // 9\n      std::vector<int8_t>,                                        // 10\n      std::vector<int16_t>,                                       // 11\n      std::vector<int32_t>,                                       // 12\n      std::vector<int64_t>,                                       // 13\n      std::vector<uint32_t>,                                      // 14\n      std::vector<uint64_t>,                                      // 15\n      std::vector<float16_t>,                                     // 16\n      std::vector<float>,                                         // 17\n      std::vector<double>,                                        // 18\n      std::vector<std::string>,                                   // 19\n      std::pair<std::vector<uint32_t>, std::vector<float>>,       // 20\n      std::pair<std::vector<uint32_t>, std::vector<float16_t>>>;  // 21\n\n  using Ptr = std::shared_ptr<Doc>;\n\n  Doc() = default;\n  ~Doc() = default;\n\n  Doc(const Doc &) = default;\n  Doc &operator=(const Doc &) = default;\n  Doc(Doc &&) = default;\n  Doc &operator=(Doc &&) = default;\n\n public:\n  void set_pk(std::string pk) {\n    pk_ = std::move(pk);\n  }\n\n  std::string pk() const {\n    return pk_;\n  }\n\n  void set_score(float score) {\n    score_ = score;\n  }\n\n  float score() const {\n    return score_;\n  }\n\n  void set_doc_id(uint64_t doc_id) {\n    doc_id_ = doc_id;\n  }\n\n  uint64_t doc_id() const {\n    return doc_id_;\n  }\n\n  std::vector<std::string> field_names() const {\n    std::vector<std::string> names;\n    names.reserve(fields_.size());\n\n    for (const auto &[name, _] : fields_) {\n      names.emplace_back(name);\n    }\n\n    return names;\n  }\n\n  void set_operator(const Operator op) {\n    op_ = op;\n  }\n\n  Operator get_operator() {\n    return op_;\n  }\n\n  // Set field value\n  template <typename T>\n  bool set(const std::string &field_name, T value) {\n    // TODO: support char*\n    static_assert(is_valid_type_v<T>, \"Unsupported type\");\n    fields_[field_name] = std::move(value);\n    return true;\n  }\n\n  // Set field to null\n  void set_null(const std::string &field_name) {\n    fields_[field_name] = std::monostate{};\n  }\n\n  // Check if field exists\n  bool has(const std::string &field_name) const {\n    return fields_.find(field_name) != fields_.end();\n  }\n\n  // Check if field exists and is not null\n  bool has_value(const std::string &field_name) const {\n    auto it = fields_.find(field_name);\n    if (it == fields_.end()) {\n      return false;\n    }\n    return !std::holds_alternative<std::monostate>(it->second);\n  }\n\n  // Check if field is null\n  bool is_null(const std::string &field_name) const {\n    auto it = fields_.find(field_name);\n    if (it == fields_.end()) {\n      return false;  // Field does not exist is not equal to null\n    }\n    return std::holds_alternative<std::monostate>(it->second);\n  }\n\n  // Check if fields is empty\n  bool is_empty() const {\n    return fields_.empty();\n  }\n\n  // Field get status enumeration\n  enum class FieldGetStatus {\n    SUCCESS,       // Successfully got value\n    NOT_FOUND,     // Field does not exist\n    IS_NULL,       // Field exists but is null\n    TYPE_MISMATCH  // Field exists but type mismatch\n  };\n\n  // Field get result template class\n  template <typename T>\n  class FieldGetResult {\n   public:\n    // Constructor - success case\n    explicit FieldGetResult(const T &value)\n        : status_(FieldGetStatus::SUCCESS), value_(value) {}\n\n    // Constructor - error case\n    explicit FieldGetResult(FieldGetStatus status) : status_(status) {\n      if (status == FieldGetStatus::SUCCESS) {\n        throw std::invalid_argument(\"Use value constructor for SUCCESS status\");\n      }\n    }\n\n    // Get status\n    FieldGetStatus status() const {\n      return status_;\n    }\n\n    // Get value (only available when successful)\n    const T &value() const {\n      if (status_ != FieldGetStatus::SUCCESS) {\n        throw std::runtime_error(\"No value available\");\n      }\n      return value_;\n    }\n\n    // Check if successful\n    bool ok() const {\n      return status_ == FieldGetStatus::SUCCESS;\n    }\n\n    // Convert to optional\n    operator std::optional<T>() const {\n      if (status_ == FieldGetStatus::SUCCESS) {\n        return value_;\n      }\n      return std::nullopt;\n    }\n\n   private:\n    FieldGetStatus status_;\n    T value_;\n  };\n\n\n  // Get field value, distinguish between not found, null and type mismatch\n  // cases\n  template <typename T>\n  typename Doc::template FieldGetResult<T> get_field(\n      const std::string &field_name) const {\n    static_assert(is_valid_type_v<T>, \"Unsupported type\");\n\n    auto it = fields_.find(field_name);\n    if (it == fields_.end()) {\n      return FieldGetResult<T>(FieldGetStatus::NOT_FOUND);\n    }\n\n    if (std::holds_alternative<std::monostate>(it->second)) {\n      return FieldGetResult<T>(FieldGetStatus::IS_NULL);\n    }\n\n    try {\n      return FieldGetResult<T>(std::get<T>(it->second));\n    } catch (const std::bad_variant_access &) {\n      return FieldGetResult<T>(FieldGetStatus::TYPE_MISMATCH);\n    }\n  }\n\n  template <typename T>\n  std::optional<T> get(const std::string &field_name) const {\n    auto result = get_field<T>(field_name);\n    if (result.status() == FieldGetStatus::SUCCESS) {\n      return result.value();\n    }\n    return std::nullopt;\n  }\n\n  void remove(const std::string &field_name) {\n    fields_.erase(field_name);\n  }\n\n  Status validate(const CollectionSchema::Ptr &schema,\n                  bool is_update = false) const;\n\n  size_t memory_usage() const;\n\n  void clear() {\n    pk_.clear();\n    score_ = 0.0f;\n    doc_id_ = 0;\n    fields_.clear();\n  }\n\n  const std::string to_string() const {\n    std::stringstream ss;\n    ss << \"[op:\" << (uint32_t)op_ << \", doc_id: \" << doc_id_\n       << \", score: \" << score_ << \", pk: \" << pk_\n       << \", fields: \" << fields_.size() << \"]\";\n    return ss.str();\n  }\n\n  std::string to_detail_string() const;\n\n  bool operator==(const Doc &other) const;\n\n  bool operator!=(const Doc &other) const {\n    return !(*this == other);\n  }\n\n public:\n  std::vector<uint8_t> serialize() const;\n\n  static Doc::Ptr deserialize(const uint8_t *data, size_t size);\n  static Doc::Ptr deserialize(const std::vector<uint8_t> &data) {\n    return deserialize(data.data(), data.size());\n  }\n\n public:\n  void merge(const Doc &other) {\n    pk_ = other.pk_;\n    score_ = other.score_;\n    doc_id_ = other.doc_id_;\n    op_ = other.op_;\n    for (const auto &[field_name, value] : other.fields_) {\n      fields_[field_name] = value;\n    }\n  }\n\n private:\n  static void serialize_value(std::vector<uint8_t> &buffer, const Value &value);\n\n  static Value deserialize_value(const uint8_t *&data, uint8_t type);\n  static Value deserialize_value(const uint8_t *&data);\n\n  static void write_to_buffer(std::vector<uint8_t> &buffer, const void *src,\n                              size_t size);\n\n  static void read_from_buffer(const uint8_t *&data, void *dest, size_t size);\n\n  struct ValueEqual;\n\n private:\n  std::string pk_;\n  float score_{0.0f};\n  uint64_t doc_id_;\n  Operator op_;\n\n  template <typename T>\n  static constexpr bool is_valid_type_v =\n      std::is_same_v<T, std::monostate> ||            // 0 - Added null support\n      std::is_same_v<T, bool> ||                      // 1\n      std::is_same_v<T, int32_t> ||                   // 2\n      std::is_same_v<T, uint32_t> ||                  // 3\n      std::is_same_v<T, int64_t> ||                   // 4\n      std::is_same_v<T, uint64_t> ||                  // 5\n      std::is_same_v<T, float> ||                     // 6\n      std::is_same_v<T, double> ||                    // 7\n      std::is_same_v<T, std::string> ||               // 8\n      std::is_same_v<T, std::vector<bool>> ||         // 9\n      std::is_same_v<T, std::vector<int8_t>> ||       // 10\n      std::is_same_v<T, std::vector<int16_t>> ||      // 11\n      std::is_same_v<T, std::vector<int32_t>> ||      // 12\n      std::is_same_v<T, std::vector<uint32_t>> ||     // 13\n      std::is_same_v<T, std::vector<int64_t>> ||      // 14\n      std::is_same_v<T, std::vector<uint64_t>> ||     // 15\n      std::is_same_v<T, std::vector<float16_t>> ||    // 16\n      std::is_same_v<T, std::vector<float>> ||        // 17\n      std::is_same_v<T, std::vector<double>> ||       // 18\n      std::is_same_v<T, std::vector<std::string>> ||  // 19\n      std::is_same_v<\n          T, std::pair<std::vector<uint32_t>, std::vector<float16_t>>> ||  // 20\n      std::is_same_v<\n          T, std::pair<std::vector<uint32_t>, std::vector<float>>>;  // 21\n\n  std::unordered_map<std::string, Value> fields_;\n};\n\nstd::string get_value_type_name(const Doc::Value &value, bool is_vector);\n\nusing DocPtrList = std::vector<Doc::Ptr>;\n\nusing DocPtrMap = std::unordered_map<std::string, Doc::Ptr>;\n\nusing WriteResults = std::vector<Status>;\n\nstruct VectorQuery {\n  int topk_;\n  std::string field_name_;\n  std::string query_vector_;  // fp16, void *\n  std::string query_sparse_indices_;\n  std::string query_sparse_values_;\n  std::string filter_;\n  bool include_vector_{false};\n  bool include_doc_id_{false};\n  // select * by default, select no field if output_fields_ is empty, select\n  // specific fields if output_fields_ is not empty\n  std::optional<std::vector<std::string>> output_fields_;\n  QueryParams::Ptr query_params_;\n\n  Status validate(const FieldSchema *schema) const;\n};\n\nstruct GroupByVectorQuery {\n  std::string field_name_;\n  std::string query_vector_;\n  std::string query_sparse_indices_;\n  std::string query_sparse_values_;\n  std::string filter_;\n  bool include_vector_;\n  // select * by default, select no field if output_fields_ is empty, select\n  // specific fields if output_fields_ is not empty\n  std::optional<std::vector<std::string>> output_fields_;\n  std::string group_by_field_name_;\n  uint32_t group_count_ = 2;\n  uint32_t group_topk_ = 3;\n  QueryParams::Ptr query_params_;\n};\n\nstruct GroupResult {\n  std::string group_by_value_;\n  std::vector<Doc> docs_;\n};\n\nusing GroupResults = std::vector<GroupResult>;\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/db/index_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <sstream>\n#include <string>\n#include <zvec/core/interface/constants.h>\n#include <zvec/db/type.h>\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n\nnamespace zvec {\n\n/*\n * Column index params\n */\nclass IndexParams {\n public:\n  using Ptr = std::shared_ptr<IndexParams>;\n\n  IndexParams(IndexType type) : type_(type) {}\n\n  virtual ~IndexParams() = default;\n\n  virtual Ptr clone() const = 0;\n\n  virtual bool operator==(const IndexParams &other) const = 0;\n\n  virtual std::string to_string() const = 0;\n\n  virtual bool operator!=(const IndexParams &other) const {\n    return !(*this == other);\n  }\n\n  bool is_vector_index_type() const {\n    return type_ == IndexType::FLAT || type_ == IndexType::HNSW ||\n           type_ == IndexType::HNSW_RABITQ || type_ == IndexType::IVF;\n  }\n\n  IndexType type() const {\n    return type_;\n  }\n\n protected:\n  IndexType type_;\n};\n\n/*\n * Scalar: Invert index params\n */\nclass InvertIndexParams : public IndexParams {\n public:\n  InvertIndexParams(bool enable_range_optimization = true,\n                    bool enable_extended_wildcard = false)\n      : IndexParams(IndexType::INVERT),\n        enable_range_optimization_(enable_range_optimization),\n        enable_extended_wildcard_(enable_extended_wildcard) {}\n\n  using OPtr = std::shared_ptr<InvertIndexParams>;\n\n  Ptr clone() const override {\n    return std::make_shared<InvertIndexParams>(enable_range_optimization_,\n                                               enable_extended_wildcard_);\n  }\n\n  std::string to_string() const override;\n\n  bool operator==(const IndexParams &other) const override {\n    if (type() != other.type()) {\n      return false;\n    }\n    auto &other_invert = dynamic_cast<const InvertIndexParams &>(other);\n    return enable_range_optimization_ ==\n               other_invert.enable_range_optimization_ &&\n           enable_extended_wildcard_ == other_invert.enable_extended_wildcard_;\n  }\n\n  bool enable_range_optimization() const {\n    return enable_range_optimization_;\n  }\n\n  void set_enable_range_optimization(bool enable_range_optimization) {\n    enable_range_optimization_ = enable_range_optimization;\n  }\n\n  bool enable_extended_wildcard() const {\n    return enable_extended_wildcard_;\n  }\n\n  // Enables suffix and infix search.\n  // Note that prefix search is always enabled regardless of this setting.\n  void set_enable_extended_wildcard(bool enable_extended_wildcard) {\n    enable_extended_wildcard_ = enable_extended_wildcard;\n  }\n\n private:\n  bool enable_range_optimization_{false};\n  bool enable_extended_wildcard_{false};\n};\n\n/*\n * Column index params\n */\nclass VectorIndexParams : public IndexParams {\n public:\n  VectorIndexParams(IndexType type, MetricType metric_type,\n                    QuantizeType quantize_type = QuantizeType::UNDEFINED)\n      : IndexParams(type),\n        metric_type_(metric_type),\n        quantize_type_(quantize_type) {}\n\n  virtual ~VectorIndexParams() = default;\n\n  std::string vector_index_params_to_string(const std::string &class_name,\n                                            MetricType metric_type,\n                                            QuantizeType quantize_type) const;\n\n  MetricType metric_type() const {\n    return metric_type_;\n  }\n\n  void set_metric_type(MetricType metric_type) {\n    metric_type_ = metric_type;\n  }\n\n  QuantizeType quantize_type() const {\n    return quantize_type_;\n  }\n\n  void set_quantize_type(QuantizeType quantize_type) {\n    quantize_type_ = quantize_type;\n  }\n\n protected:\n  MetricType metric_type_;\n  QuantizeType quantize_type_;\n};\n\n/*\n * Vector: Hnsw index params\n */\nclass HnswIndexParams : public VectorIndexParams {\n public:\n  HnswIndexParams(\n      MetricType metric_type, int m = core_interface::kDefaultHnswNeighborCnt,\n      int ef_construction = core_interface::kDefaultHnswEfConstruction,\n      QuantizeType quantize_type = QuantizeType::UNDEFINED)\n      : VectorIndexParams(IndexType::HNSW, metric_type, quantize_type),\n        m_(m),\n        ef_construction_(ef_construction) {}\n\n  using OPtr = std::shared_ptr<HnswIndexParams>;\n\n public:\n  Ptr clone() const override {\n    return std::make_shared<HnswIndexParams>(metric_type_, m_, ef_construction_,\n                                             quantize_type_);\n  }\n\n  std::string to_string() const override {\n    auto base_str = vector_index_params_to_string(\"HnswIndexParams\",\n                                                  metric_type_, quantize_type_);\n    std::ostringstream oss;\n    oss << base_str << \",m:\" << m_ << \",ef_construction:\" << ef_construction_\n        << \"}\";\n    return oss.str();\n  }\n\n  bool operator==(const IndexParams &other) const override {\n    return type() == other.type() &&\n           metric_type() ==\n               static_cast<const HnswIndexParams &>(other).metric_type() &&\n           m_ == static_cast<const HnswIndexParams &>(other).m_ &&\n           ef_construction_ ==\n               static_cast<const HnswIndexParams &>(other).ef_construction_ &&\n           quantize_type() ==\n               static_cast<const HnswIndexParams &>(other).quantize_type();\n  }\n\n  void set_m(int m) {\n    m_ = m;\n  }\n  int m() const {\n    return m_;\n  }\n  void set_ef_construction(int ef_construction) {\n    ef_construction_ = ef_construction;\n  }\n  int ef_construction() const {\n    return ef_construction_;\n  }\n\n protected:\n  int m_;\n  int ef_construction_;\n};\n\nclass HnswRabitqIndexParams : public VectorIndexParams {\n public:\n  HnswRabitqIndexParams(\n      MetricType metric_type,\n      int total_bits = core_interface::kDefaultRabitqTotalBits,\n      int num_clusters = core_interface::kDefaultRabitqNumClusters,\n      int m = core_interface::kDefaultHnswNeighborCnt,\n      int ef_construction = core_interface::kDefaultHnswEfConstruction,\n      int sample_count = 0)\n      : VectorIndexParams(IndexType::HNSW_RABITQ, metric_type,\n                          QuantizeType::RABITQ),\n        total_bits_(total_bits),\n        num_clusters_(num_clusters),\n        sample_count_(sample_count),\n        m_(m),\n        ef_construction_(ef_construction) {}\n\n  using OPtr = std::shared_ptr<HnswRabitqIndexParams>;\n\n  Ptr clone() const override {\n    auto obj = std::make_shared<HnswRabitqIndexParams>(\n        metric_type_, total_bits_, num_clusters_, m_, ef_construction_,\n        sample_count_);\n    obj->set_rabitq_reformer(rabitq_reformer_);\n    obj->set_raw_vector_provider(raw_vector_provider_);\n    return obj;\n  }\n\n  std::string to_string() const override {\n    auto base_str = vector_index_params_to_string(\"HnswRabitqIndexParams\",\n                                                  metric_type_, quantize_type_);\n    std::ostringstream oss;\n    oss << base_str << \",total_bits:\" << total_bits_\n        << \",num_clusters:\" << num_clusters_\n        << \",sample_count:\" << sample_count_ << \",m:\" << m_\n        << \",ef_construction:\" << ef_construction_ << \"}\";\n    return oss.str();\n  }\n\n  bool operator==(const IndexParams &other) const override {\n    if (type() != other.type()) {\n      return false;\n    }\n    auto &other_rabitq = dynamic_cast<const HnswRabitqIndexParams &>(other);\n    return metric_type() == other_rabitq.metric_type() &&\n           quantize_type_ == other_rabitq.quantize_type_ &&\n           total_bits_ == other_rabitq.total_bits_ &&\n           num_clusters_ == other_rabitq.num_clusters_ &&\n           sample_count_ == other_rabitq.sample_count_ &&\n           m_ == other_rabitq.m_ &&\n           ef_construction_ == other_rabitq.ef_construction_;\n  }\n\n  void set_m(int m) {\n    m_ = m;\n  }\n  int m() const {\n    return m_;\n  }\n  void set_ef_construction(int ef_construction) {\n    ef_construction_ = ef_construction;\n  }\n  int ef_construction() const {\n    return ef_construction_;\n  }\n\n  void set_raw_vector_provider(\n      core::IndexProvider::Pointer raw_vector_provider) {\n    raw_vector_provider_ = std::move(raw_vector_provider);\n  }\n\n  void set_rabitq_reformer(core::IndexReformer::Pointer rabitq_reformer) {\n    rabitq_reformer_ = std::move(rabitq_reformer);\n  }\n  core::IndexReformer::Pointer rabitq_reformer() const {\n    return rabitq_reformer_;\n  }\n  core::IndexProvider::Pointer raw_vector_provider() const {\n    return raw_vector_provider_;\n  }\n\n  void set_total_bits(int total_bits) {\n    total_bits_ = total_bits;\n  }\n  int total_bits() const {\n    return total_bits_;\n  }\n\n  void set_num_clusters(int num_clusters) {\n    num_clusters_ = num_clusters;\n  }\n  int num_clusters() const {\n    return num_clusters_;\n  }\n\n  void set_sample_count(int sample_count) {\n    sample_count_ = sample_count;\n  }\n  int sample_count() const {\n    return sample_count_;\n  }\n\n private:\n  int total_bits_;\n  int num_clusters_;\n  int sample_count_;\n  int m_;\n  int ef_construction_;\n  core::IndexProvider::Pointer raw_vector_provider_;\n  core::IndexReformer::Pointer rabitq_reformer_;\n};\n\nclass FlatIndexParams : public VectorIndexParams {\n public:\n  FlatIndexParams(MetricType metric_type,\n                  QuantizeType quantize_type = QuantizeType::UNDEFINED)\n      : VectorIndexParams(IndexType::FLAT, metric_type, quantize_type) {}\n\n  using OPtr = std::shared_ptr<FlatIndexParams>;\n\n public:\n  Ptr clone() const override {\n    return std::make_shared<FlatIndexParams>(metric_type_, quantize_type_);\n  }\n\n  std::string to_string() const override {\n    auto base_str = vector_index_params_to_string(\"FlatIndexParams\",\n                                                  metric_type_, quantize_type_);\n    std::ostringstream oss;\n    oss << base_str << \"}\";\n    return oss.str();\n  }\n\n  bool operator==(const IndexParams &other) const override {\n    return type() == other.type() &&\n           metric_type() ==\n               static_cast<const VectorIndexParams &>(other).metric_type() &&\n           quantize_type() ==\n               static_cast<const VectorIndexParams &>(other).quantize_type();\n  }\n};\n\n// define default index params\nconst FlatIndexParams DefaultVectorIndexParams(MetricType::IP);\n\ninline FlatIndexParams MakeDefaultVectorIndexParams(MetricType metric_type) {\n  return FlatIndexParams(metric_type);\n}\n\ninline FlatIndexParams MakeDefaultQuantVectorIndexParams(\n    MetricType metric_type, QuantizeType quantize_type) {\n  return FlatIndexParams(metric_type, quantize_type);\n}\n\nclass IVFIndexParams : public VectorIndexParams {\n public:\n  IVFIndexParams(MetricType metric_type, int n_list = 1024, int n_iters = 10,\n                 bool use_soar = false,\n                 QuantizeType quantize_type = QuantizeType::UNDEFINED)\n      : VectorIndexParams(IndexType::IVF, metric_type, quantize_type),\n        n_list_(n_list),\n        n_iters_(n_iters),\n        use_soar_(use_soar) {}\n\n  using OPtr = std::shared_ptr<IVFIndexParams>;\n\n public:\n  Ptr clone() const override {\n    return std::make_shared<IVFIndexParams>(metric_type_, n_list_, n_iters_,\n                                            use_soar_, quantize_type_);\n  }\n\n  std::string to_string() const override {\n    auto base_str = vector_index_params_to_string(\"IVFIndexParams\",\n                                                  metric_type_, quantize_type_);\n    std::ostringstream oss;\n    oss << base_str << \",n_list:\" << n_list_ << \",n_iters:\" << n_iters_ << \"}\";\n    return oss.str();\n  }\n\n  int n_list() const {\n    return n_list_;\n  }\n\n  void set_n_list(int n_list) {\n    n_list_ = n_list;\n  }\n\n  int n_iters() const {\n    return n_iters_;\n  }\n\n  void set_n_iters(int n_iters) {\n    n_iters_ = n_iters;\n  }\n\n  bool use_soar() const {\n    return use_soar_;\n  }\n\n  void set_use_soar(bool use_soar) {\n    use_soar_ = use_soar;\n  }\n\n  bool operator==(const IndexParams &other) const override {\n    return type() == other.type() &&\n           metric_type() ==\n               static_cast<const IVFIndexParams &>(other).metric_type() &&\n           n_list_ == static_cast<const IVFIndexParams &>(other).n_list_ &&\n           n_iters_ == static_cast<const IVFIndexParams &>(other).n_iters_ &&\n           use_soar_ == static_cast<const IVFIndexParams &>(other).use_soar_ &&\n           quantize_type() ==\n               static_cast<const IVFIndexParams &>(other).quantize_type();\n  }\n\n private:\n  int n_list_;\n  int n_iters_;\n  bool use_soar_;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/options.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n\nnamespace zvec {\n\nconst uint32_t DEFAULT_MAX_BUFFER_SIZE = 64 * 1024 * 1024;  // 128M\n\nstruct CollectionOptions {\n  bool read_only_{false};\n  bool enable_mmap_{true};  // ignnored when load collection\n  uint32_t max_buffer_size_{\n      DEFAULT_MAX_BUFFER_SIZE};  // ignored when read_only=true\n\n  bool operator==(const CollectionOptions &other) const {\n    return read_only_ == other.read_only_ &&\n           enable_mmap_ == other.enable_mmap_ &&\n           max_buffer_size_ == other.max_buffer_size_;\n  }\n\n  bool operator!=(const CollectionOptions &other) const {\n    return !(*this == other);\n  }\n\n  CollectionOptions() = default;\n\n  CollectionOptions(bool read_only, bool enable_mmap,\n                    uint32_t max_buffer_size = DEFAULT_MAX_BUFFER_SIZE)\n      : read_only_(read_only),\n        enable_mmap_(enable_mmap),\n        max_buffer_size_(max_buffer_size) {}\n};\n\nstruct SegmentOptions {\n  bool read_only_;\n  bool enable_mmap_;\n  uint32_t max_buffer_size_{DEFAULT_MAX_BUFFER_SIZE};\n};\n\nstruct CreateIndexOptions {\n  int concurrency_{0};  // default use config.optimize_thread_pool\n};\n\nstruct OptimizeOptions {\n  int concurrency_{0};\n};\n\nstruct AddColumnOptions {\n  int concurrency_{0};\n};\n\nstruct AlterColumnOptions {\n  int concurrency_{0};\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/query_params.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <memory>\n#include <zvec/core/interface/constants.h>\n#include <zvec/db/type.h>\n\nnamespace zvec {\n\n/*\n * Query Index params\n */\nclass QueryParams {\n public:\n  using Ptr = std::shared_ptr<QueryParams>;\n\n  QueryParams(IndexType type) : type_(type) {}\n  virtual ~QueryParams() = default;\n\n  IndexType type() const {\n    return type_;\n  }\n\n  void set_type(IndexType type) {\n    type_ = type;\n  }\n\n  float radius() const {\n    return radius_;\n  }\n\n  void set_radius(float radius) {\n    radius_ = radius;\n  }\n\n  bool is_linear() const {\n    return is_linear_;\n  }\n\n  void set_is_linear(bool is_linear) {\n    is_linear_ = is_linear;\n  }\n\n  void set_is_using_refiner(bool is_using_refiner) {\n    is_using_refiner_ = is_using_refiner;\n  }\n  bool is_using_refiner() const {\n    return is_using_refiner_;\n  }\n\n private:\n  IndexType type_;\n  float radius_{0.0f};\n  bool is_linear_{false};\n\n  bool is_using_refiner_{false};\n};\n\nclass HnswQueryParams : public QueryParams {\n public:\n  HnswQueryParams(int ef = core_interface::kDefaultHnswEfSearch,\n                  float radius = 0.0f, bool is_linear = false,\n                  bool is_using_refiner = false)\n      : QueryParams(IndexType::HNSW), ef_(ef) {\n    set_radius(radius);\n    set_is_linear(is_linear);\n    set_is_using_refiner(is_using_refiner);\n  }\n\n  virtual ~HnswQueryParams() = default;\n\n  int ef() const {\n    return ef_;\n  }\n\n  void set_ef(int ef) {\n    ef_ = ef;\n  }\n\n private:\n  int ef_;\n};\n\nclass IVFQueryParams : public QueryParams {\n public:\n  IVFQueryParams(int nprobe = 10, bool is_using_refiner = false,\n                 float scale_factor = 10)\n      : QueryParams(IndexType::IVF), nprobe_(nprobe) {\n    set_is_using_refiner(is_using_refiner);\n    set_scale_factor(scale_factor);\n  }\n\n  virtual ~IVFQueryParams() = default;\n\n  int nprobe() const {\n    return nprobe_;\n  }\n\n  void set_nprobe(int nprobe) {\n    nprobe_ = nprobe;\n  }\n\n  float scale_factor() const {\n    return scale_factor_;\n  }\n\n  void set_scale_factor(float scale_factor) {\n    scale_factor_ = scale_factor;\n  }\n\n private:\n  int nprobe_;\n  float scale_factor_{10};\n};\n\nclass HnswRabitqQueryParams : public QueryParams {\n public:\n  HnswRabitqQueryParams(int ef = core_interface::kDefaultHnswEfSearch,\n                        float radius = 0.0f, bool is_linear = false,\n                        bool is_using_refiner = false)\n      : QueryParams(IndexType::HNSW_RABITQ), ef_(ef) {\n    set_radius(radius);\n    set_is_linear(is_linear);\n    set_is_using_refiner(is_using_refiner);\n  }\n\n  virtual ~HnswRabitqQueryParams() = default;\n\n  int ef() const {\n    return ef_;\n  }\n\n  void set_ef(int ef) {\n    ef_ = ef;\n  }\n\n private:\n  int ef_;\n};\n\nclass FlatQueryParams : public QueryParams {\n public:\n  FlatQueryParams(bool is_using_refiner = false, float scale_factor = 10)\n      : QueryParams(IndexType::FLAT) {\n    set_is_using_refiner(is_using_refiner);\n    set_scale_factor(scale_factor);\n  }\n\n  virtual ~FlatQueryParams() = default;\n\n  float scale_factor() const {\n    return scale_factor_;\n  }\n\n  void set_scale_factor(float scale_factor) {\n    scale_factor_ = scale_factor;\n  }\n\n private:\n  float scale_factor_{10};\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/schema.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n#include <unordered_map>\n#include <zvec/db/index_params.h>\n#include <zvec/db/status.h>\n#include <zvec/db/type.h>\n\nnamespace zvec {\n\nconst uint64_t MAX_DOC_COUNT_PER_SEGMENT = 10000000;\nconst uint64_t MAX_DOC_COUNT_PER_SEGMENT_MIN_THRESHOLD = 1000;\n\n/*\n * Field schema\n */\nclass FieldSchema {\n public:\n  using Ptr = std::shared_ptr<FieldSchema>;\n\n public:\n  FieldSchema() = default;\n  FieldSchema(const std::string &name, DataType type)\n      : name_(name),\n        data_type_(type),\n        nullable_(false),\n        dimension_(0),\n        index_params_(nullptr) {}\n  FieldSchema(const std::string &name, DataType type, bool nullable,\n              const IndexParams::Ptr &index_params = nullptr)\n      : name_(name),\n        data_type_(type),\n        nullable_(nullable),\n        dimension_(0),\n        index_params_(index_params ? index_params->clone() : nullptr) {}\n  FieldSchema(const std::string &name, DataType type, uint32_t dimension,\n              bool nullable, const IndexParams::Ptr &index_params = nullptr)\n      : name_(name),\n        data_type_(type),\n        nullable_(nullable),\n        dimension_(dimension),\n        index_params_(index_params ? index_params->clone() : nullptr) {}\n  FieldSchema(const FieldSchema &other)\n      : name_(other.name_),\n        data_type_(other.data_type_),\n        nullable_(other.nullable_),\n        dimension_(other.dimension_),\n        index_params_(other.index_params_ ? other.index_params_->clone()\n                                          : nullptr) {}\n  FieldSchema &operator=(const FieldSchema &other) {\n    if (this != &other) {\n      name_ = other.name_;\n      data_type_ = other.data_type_;\n      nullable_ = other.nullable_;\n      dimension_ = other.dimension_;\n      index_params_ =\n          other.index_params_ ? other.index_params_->clone() : nullptr;\n    }\n    return *this;\n  }\n  FieldSchema(FieldSchema &&) = default;\n  FieldSchema &operator=(FieldSchema &&) = default;\n  ;\n  ~FieldSchema() = default;\n\n public:\n  bool operator==(const FieldSchema &other) const {\n    bool index_params_equal = false;\n    if (index_params_ == nullptr && other.index_params_ == nullptr) {\n      index_params_equal = true;\n    } else if (index_params_ != nullptr && other.index_params_ != nullptr) {\n      index_params_equal = (*index_params_ == *(other.index_params_));\n    } else {\n      index_params_equal = false;\n    }\n\n    return name_ == other.name_ && data_type_ == other.data_type_ &&\n           nullable_ == other.nullable_ && dimension_ == other.dimension_ &&\n           index_params_equal;\n  }\n\n  bool operator!=(const FieldSchema &other) const {\n    return !(*this == other);\n  }\n\n  std::string to_string() const;\n\n  std::string to_string_formatted(int indent_level = 0) const;\n\n public:\n  void set_name(const std::string &name) {\n    name_ = name;\n  }\n\n  const std::string &name() const {\n    return name_;\n  }\n\n  void set_data_type(DataType type) {\n    data_type_ = type;\n  }\n\n  DataType data_type() const {\n    return data_type_;\n  }\n\n  DataType element_data_type() const {\n    return get_element_data_type(data_type_);\n  }\n\n  size_t element_data_size() const {\n    return get_element_data_size(data_type_);\n  }\n\n  bool is_vector_field() const {\n    return is_vector_field(data_type_);\n  }\n\n  bool is_dense_vector() const {\n    return is_dense_vector_field(data_type_);\n  }\n\n  bool is_sparse_vector() const {\n    return is_sparse_vector_field(data_type_);\n  }\n\n  bool nullable() const {\n    return nullable_;\n  }\n\n  void set_nullable(bool nullable) {\n    nullable_ = nullable;\n  }\n\n  bool has_invert_index() const {\n    return !is_vector_field() && index_params_ != nullptr;\n  }\n\n  bool is_array_type() const {\n    return data_type_ >= DataType::ARRAY_BINARY &&\n           data_type_ <= DataType::ARRAY_DOUBLE;\n  }\n\n  void set_dimension(uint32_t dimension) {\n    dimension_ = dimension;\n  }\n\n  uint32_t dimension() const {\n    return dimension_;\n  }\n\n  IndexType index_type() const {\n    if (index_params_) {\n      return index_params_->type();\n    }\n    return IndexType::UNDEFINED;\n  }\n\n  IndexParams::Ptr index_params() const {\n    return index_params_;\n  }\n\n  void set_index_params(const IndexParams::Ptr &index_params) {\n    index_params_ = index_params;\n  }\n\n  void set_index_params(const IndexParams &index_params) {\n    index_params_ = index_params.clone();\n  }\n\n  Status validate() const;\n\n public:\n  static bool is_dense_vector_field(DataType type) {\n    return type >= DataType::VECTOR_BINARY32 && type <= DataType::VECTOR_INT16;\n  }\n\n  static bool is_sparse_vector_field(DataType type) {\n    return type >= DataType::SPARSE_VECTOR_FP16 &&\n           type <= DataType::SPARSE_VECTOR_FP32;\n  }\n\n  static bool is_vector_field(DataType type) {\n    return is_dense_vector_field(type) || is_sparse_vector_field(type);\n  }\n\n  static DataType get_element_data_type(DataType data_type) {\n    switch (data_type) {\n      case DataType::ARRAY_BINARY:\n        return DataType::BINARY;\n      case DataType::ARRAY_STRING:\n        return DataType::STRING;\n      case DataType::ARRAY_BOOL:\n        return DataType::BOOL;\n      case DataType::ARRAY_INT32:\n        return DataType::INT32;\n      case DataType::ARRAY_INT64:\n        return DataType::INT64;\n      case DataType::ARRAY_UINT32:\n        return DataType::UINT32;\n      case DataType::ARRAY_UINT64:\n        return DataType::UINT64;\n      case DataType::ARRAY_FLOAT:\n        return DataType::FLOAT;\n      case DataType::ARRAY_DOUBLE:\n        return DataType::DOUBLE;\n      default:\n        return data_type;\n    }\n  }\n\n  static size_t get_element_data_size(DataType data_type) {\n    switch (data_type) {\n      case DataType::ARRAY_BINARY:\n        return 0;\n      case DataType::ARRAY_STRING:\n        return 0;\n      case DataType::ARRAY_BOOL:\n        return sizeof(bool);\n      case DataType::ARRAY_INT32:\n        return sizeof(int32_t);\n      case DataType::ARRAY_INT64:\n        return sizeof(int64_t);\n      case DataType::ARRAY_UINT32:\n        return sizeof(uint32_t);\n      case DataType::ARRAY_UINT64:\n        return sizeof(uint64_t);\n      case DataType::ARRAY_FLOAT:\n        return sizeof(float);\n      case DataType::ARRAY_DOUBLE:\n        return sizeof(double);\n      case DataType::BINARY:\n        return 0;\n      case DataType::STRING:\n        return 0;\n      case DataType::BOOL:\n        return sizeof(bool);\n      case DataType::INT32:\n        return sizeof(int32_t);\n      case DataType::INT64:\n        return sizeof(int64_t);\n      case DataType::UINT32:\n        return sizeof(uint32_t);\n      case DataType::UINT64:\n        return sizeof(uint64_t);\n      case DataType::FLOAT:\n        return sizeof(float);\n      case DataType::DOUBLE:\n        return sizeof(double);\n      default:\n        return 0;\n    }\n  }\n\n\n private:\n  std::string name_;\n  DataType data_type_{DataType::UNDEFINED};\n  bool nullable_{false};\n  uint32_t dimension_{0U};\n  IndexParams::Ptr index_params_;\n};\n\nusing FieldSchemaPtrList = std::vector<FieldSchema::Ptr>;\nusing FieldSchemaPtrMap = std::unordered_map<std::string, FieldSchema::Ptr>;\n\n/*\n * Collection schema\n */\nclass CollectionSchema {\n public:\n  using Ptr = std::shared_ptr<CollectionSchema>;\n\n public:\n  CollectionSchema() = default;\n\n  CollectionSchema(const std::string &name) : name_(name) {}\n\n  CollectionSchema(const std::string &name, const FieldSchemaPtrList &fields)\n      : name_(name) {\n    copy_fields(fields);\n  }\n\n  CollectionSchema(const CollectionSchema &other) {\n    name_ = other.name_;\n    copy_fields(other.fields_);\n    max_doc_count_per_segment_ = other.max_doc_count_per_segment_;\n  }\n\n public:\n  std::string to_string() const;\n\n\n  std::string to_string_formatted(int indent_level = 0) const;\n\n  std::string name() const {\n    return name_;\n  }\n\n  void set_name(const std::string &name) {\n    name_ = name;\n  }\n\n  Status add_field(FieldSchema::Ptr column_schema);\n\n  Status alter_field(const std::string &column_name,\n                     const FieldSchema::Ptr &new_column_options);\n\n  Status drop_field(const std::string &column_name);\n\n  bool has_field(const std::string &column) const;\n\n  const FieldSchema *get_field(const std::string &column) const;\n  FieldSchema *get_field(const std::string &column);\n  const FieldSchema *get_forward_field(const std::string &column) const;\n  FieldSchema *get_forward_field(const std::string &column);\n  const FieldSchema *get_vector_field(const std::string &column) const;\n  FieldSchema *get_vector_field(const std::string &column);\n\n  FieldSchemaPtrList fields() const;\n\n  FieldSchemaPtrList forward_fields() const;\n\n  FieldSchemaPtrList forward_fields_with_index() const;\n\n  std::vector<std::string> forward_field_names() const;\n\n  std::vector<std::string> forward_field_names_with_index() const;\n\n  std::vector<std::string> all_field_names() const;\n\n  FieldSchemaPtrList vector_fields() const;\n\n  uint64_t max_doc_count_per_segment() const;\n\n  void set_max_doc_count_per_segment(uint64_t max_doc_count_per_segment);\n\n  Status validate() const;\n\n public:\n  Status add_index(const std::string &column,\n                   const IndexParams::Ptr &index_options);\n\n  Status drop_index(const std::string &column);\n\n  bool has_index(const std::string &column) const;\n\n public:\n  bool operator==(const CollectionSchema &other) const {\n    if (name_ != other.name_ || fields_.size() != other.fields_.size()) {\n      return false;\n    }\n\n    for (size_t i = 0; i < fields_.size(); ++i) {\n      if (*fields_[i] != *other.fields_[i]) {\n        return false;\n      }\n    }\n\n    return true;\n  }\n\n  bool operator!=(const CollectionSchema &other) const {\n    return !(*this == other);\n  }\n\n private:\n  void copy_fields(const FieldSchemaPtrList &fields) {\n    for (auto &field : fields) {\n      auto c = std::make_shared<FieldSchema>(*field);\n      fields_.push_back(c);\n      fields_map_[field->name()] = c;\n    }\n  }\n\n private:\n  std::string name_{};\n  FieldSchemaPtrList fields_{};\n  FieldSchemaPtrMap fields_map_{};\n\n  uint64_t max_doc_count_per_segment_{MAX_DOC_COUNT_PER_SEGMENT};\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/stats.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n#include <string>\n#include <unordered_map>\n\nnamespace zvec {\n\n/*\n * Collection stats\n */\nstruct CollectionStats {\n  uint64_t doc_count{0};\n  // column -> completeness\n  std::unordered_map<std::string, float> index_completeness;\n\n  std::string to_string() const;\n\n  std::string to_string_formatted(int indent_level = 0) const;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/status.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n#include <iostream>\n#include <string>\n#include <zvec/ailego/pattern/expected.hpp>\n#include <zvec/ailego/utility/string_helper.h>\n\nnamespace zvec {\n\nclass Status;\ntemplate <typename T>\nusing Result = tl::expected<T, Status>;\n\nstd::ostream &operator<<(std::ostream &os, const Status &s);\n\n/**\n * @brief Enumeration of common error codes.\n */\nenum class StatusCode {\n  OK = 0,\n  NOT_FOUND,\n  ALREADY_EXISTS,\n  INVALID_ARGUMENT,\n  PERMISSION_DENIED,\n  FAILED_PRECONDITION,\n  RESOURCE_EXHAUSTED,\n  UNAVAILABLE,\n  INTERNAL_ERROR,\n  NOT_SUPPORTED,\n  UNKNOWN\n};\n\n// Helper: get default message for code\nconst char *GetDefaultMessage(StatusCode code);\n\n/**\n * @class Status\n * @brief Represents the result of an operation: success or failure with\n * message.\n *\n * This class is used to return error information from functions without using\n * exceptions. It stores a status code and an optional error message.\n *\n * @note This class is thread-compatible: const methods can be called from\n * multiple threads.\n */\nclass Status {\n public:\n  /// @brief Default constructor: OK status\n  Status() noexcept : code_(StatusCode::OK) {}\n\n  /// @brief Construct a failed status with code and message\n  Status(StatusCode code, const std::string &msg) : code_(code), msg_(msg) {\n    ensure_not_ok(code);\n  }\n\n  /// @brief Construct a failed status with code and rvalue message\n  Status(StatusCode code, std::string &&msg)\n      : code_(code), msg_(std::move(msg)) {\n    ensure_not_ok(code);\n  }\n\n  /// @brief Copy constructor\n  Status(const Status &) = default;\n\n  /// @brief Copy assignment\n  Status &operator=(const Status &) = default;\n\n  /// @brief Move constructor\n  Status(Status &&) = default;\n\n  /// @brief Move assignment\n  Status &operator=(Status &&) = default;\n\n  /// @brief Destructor\n  ~Status() = default;\n\n  /// @brief Check if the status is OK (no error)\n  bool ok() const noexcept {\n    return code_ == StatusCode::OK;\n  }\n\n  /// @brief Get the status code\n  StatusCode code() const noexcept {\n    return code_;\n  }\n\n  /// @brief Get the error message (empty if OK)\n  const std::string &message() const noexcept {\n    return msg_;\n  }\n\n  /// @brief Get C-style string (safe because msg_ owns the string)\n  const char *c_str() const noexcept {\n    return msg_.c_str();\n  }\n\n  /// @brief Comparison operators\n  bool operator==(const Status &other) const noexcept;\n  bool operator!=(const Status &other) const noexcept {\n    return !(*this == other);\n  }\n\n  /// @brief Factory: Success\n  static Status OK() noexcept {\n    return Status();\n  }\n\n  /// @brief Factory: Invalid argument\n  template <typename... Args>\n  static Status InvalidArgument(Args &&...args) {\n    return Status(StatusCode::INVALID_ARGUMENT,\n                  concat(std::forward<Args>(args)...));\n  }\n\n  /// @brief Factory: Not found\n  template <typename... Args>\n  static Status NotFound(Args &&...args) {\n    return Status(StatusCode::NOT_FOUND, concat(std::forward<Args>(args)...));\n  }\n\n  /// @brief Factory: Already exists\n  template <typename... Args>\n  static Status AlreadyExists(Args &&...args) {\n    return Status(StatusCode::ALREADY_EXISTS,\n                  concat(std::forward<Args>(args)...));\n  }\n\n  /// @brief Factory: Internal error\n  template <typename... Args>\n  static Status InternalError(Args &&...args) {\n    return Status(StatusCode::INTERNAL_ERROR,\n                  concat(std::forward<Args>(args)...));\n  }\n\n  /// @brief Factory: Permission denied\n  template <typename... Args>\n  static Status PermissionDenied(Args &&...args) {\n    return Status(StatusCode::PERMISSION_DENIED,\n                  concat(std::forward<Args>(args)...));\n  }\n\n  /// @brief Factory: Not supported\n  template <typename... Args>\n  static Status NotSupported(Args &&...args) {\n    return Status(StatusCode::NOT_SUPPORTED,\n                  concat(std::forward<Args>(args)...));\n  }\n\n  // Add more factories as needed...\n\n private:\n  /// @brief Ensure non-OK status has non-empty message (optional)\n  static void ensure_not_ok(StatusCode /*code*/) noexcept {\n    // Optional: assert(code == StatusCode::OK || \"non-OK status should have\n    // message\")\n  }\n\n  /// @brief Helper: concatenate any number of arguments into a string\n  template <typename... Args>\n  static std::string concat(Args &&...args) {\n    return ailego::StringHelper::Concat(std::forward<Args>(args)...);\n  }\n\n  StatusCode code_;\n  std::string msg_;\n};\n\n}  // namespace zvec"
  },
  {
    "path": "src/include/zvec/db/type.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n\nnamespace zvec {\n\n/*\n * Column index types\n */\nenum class IndexType : uint32_t {\n  UNDEFINED = 0,\n  HNSW = 1,\n  IVF = 2,\n  FLAT = 3,\n  HNSW_RABITQ = 4,\n  INVERT = 10,\n};\n\n/*\n * Column data types\n */\nenum class DataType : uint32_t {\n  UNDEFINED = 0,\n\n  BINARY = 1,\n  STRING = 2,\n  BOOL = 3,\n  INT32 = 4,\n  INT64 = 5,\n  UINT32 = 6,\n  UINT64 = 7,\n  FLOAT = 8,\n  DOUBLE = 9,\n\n  VECTOR_BINARY32 = 20,\n  VECTOR_BINARY64 = 21,\n  VECTOR_FP16 = 22,\n  VECTOR_FP32 = 23,\n  VECTOR_FP64 = 24,\n  VECTOR_INT4 = 25,\n  VECTOR_INT8 = 26,\n  VECTOR_INT16 = 27,\n\n  SPARSE_VECTOR_FP16 = 30,\n  SPARSE_VECTOR_FP32 = 31,\n\n  ARRAY_BINARY = 40,\n  ARRAY_STRING = 41,\n  ARRAY_BOOL = 42,\n  ARRAY_INT32 = 43,\n  ARRAY_INT64 = 44,\n  ARRAY_UINT32 = 45,\n  ARRAY_UINT64 = 46,\n  ARRAY_FLOAT = 47,\n  ARRAY_DOUBLE = 48,\n};\n\nenum class QuantizeType : uint32_t {\n  UNDEFINED = 0,\n  FP16 = 1,\n  INT8 = 2,\n  INT4 = 3,\n  RABITQ = 4,\n};\n\nenum class MetricType : uint32_t {\n  UNDEFINED = 0,\n  L2 = 1,\n  IP = 2,\n  COSINE = 3,\n  MIPSL2 = 4,\n};\n\nenum class Operator : uint32_t {\n  INSERT = 0,\n  UPSERT = 1,\n  UPDATE = 2,\n  DELETE = 3,\n};\n\nenum class CompareOp : uint32_t {\n  NONE = 0,\n  EQ,\n  NE,\n  LT,\n  LE,\n  GT,\n  GE,\n  LIKE,\n  CONTAIN_ALL,\n  CONTAIN_ANY,\n  NOT_CONTAIN_ALL,\n  NOT_CONTAIN_ANY,\n  IS_NULL,\n  IS_NOT_NULL,\n  HAS_PREFIX,\n  HAS_SUFFIX,\n};\n\nenum RelationOp : uint32_t {\n  NONE = 0,\n\n  AND = 1,\n  OR = 2\n};\n\nenum BlockType : uint32_t {\n  UNDEFINED = 0,\n  SCALAR = 1,\n  SCALAR_INDEX = 2,\n  VECTOR_INDEX = 3,\n  VECTOR_INDEX_QUANTIZE = 4,\n};\n\n\nenum class FileFormat : uint32_t {\n  UNKNOWN = 0,\n  IPC = 1,\n  PARQUET = 2,\n};\n\nenum class ColumnOp : uint32_t {\n  UNDEFINED = 0,\n  ADD,\n  ALTER,\n  DROP,\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "src/include/zvec/turbo/turbo.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <functional>\n#include <zvec/ailego/math_batch/utils.h>\n\nnamespace zvec::turbo {\n\nusing DistanceFunc =\n    std::function<void(const void *m, const void *q, size_t dim, float *out)>;\nusing BatchDistanceFunc = std::function<void(\n    const void **m, const void *q, size_t num, size_t dim, float *out)>;\nusing QueryPreprocessFunc =\n    zvec::ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc;\n\nenum class MetricType {\n  kSquaredEuclidean,\n  kCosine,\n  kMipsSquaredEuclidean,\n  kUnknown,\n};\n\nenum class DataType {\n  kInt8,\n  kUnknown,\n};\n\nenum class QuantizeType {\n  kDefault,\n};\n\nDistanceFunc get_distance_func(MetricType metric_type, DataType data_type,\n                               QuantizeType quantize_type);\n\nBatchDistanceFunc get_batch_distance_func(MetricType metric_type,\n                                          DataType data_type,\n                                          QuantizeType quantize_type);\n\nQueryPreprocessFunc get_query_preprocess_func(MetricType metric_type,\n                                              DataType data_type,\n                                              QuantizeType quantize_type);\n\n}  // namespace zvec::turbo\n"
  },
  {
    "path": "src/turbo/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(NOT ANDROID AND AUTO_DETECT_ARCH)\n    if(CMAKE_SYSTEM_PROCESSOR MATCHES \"x86_64|i686|i386|x64\")\n        setup_compiler_march_for_x86(TURBO_MARCH_FLAG_SSE TURBO_MARCH_FLAG_AVX2 TURBO_MARCH_FLAG_AVX512 TURBO_MARCH_FLAG_AVX512FP16)\n    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES \"aarch64|arm64|ARM64\")\n        # ARM64 architecture - no special march flags needed for now\n        # NEON implementations can be added here if needed\n        message(STATUS \"turbo: ARM64 detected, skipping x86-specific optimizations\")\n    endif()\nendif()\n\nfile(GLOB_RECURSE ALL_SRCS *.cc *.c *.h)\n\n# Set per-file compile flags for AVX512-VNNI sources.\n# set_source_files_properties is directory-scoped, so it must be called in the\n# same directory that adds the sources to a target (i.e. here, not in a\n# subdirectory).\nif(NOT ANDROID AND AUTO_DETECT_ARCH)\n    if(CMAKE_SYSTEM_PROCESSOR MATCHES \"x86_64|i686|i386|x64\")\n        file(GLOB_RECURSE AVX512_VNNI_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/avx512_vnni/*.cc)\n        set_source_files_properties(\n            ${AVX512_VNNI_SRCS}\n            PROPERTIES\n            COMPILE_FLAGS \"${TURBO_MARCH_FLAG_AVX512}\"\n        )\n    endif()\nendif()\n\ncc_library(\n    NAME zvec_turbo STATIC STRICT PACKED\n    SRCS ${ALL_SRCS}\n    LIBS zvec_ailego\n    INCS ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_ROOT_DIR}/src/include\n)\n"
  },
  {
    "path": "src/turbo/avx512_vnni/record_quantized_int8/common.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n// Shared AVX512-VNNI inner product kernels for record_quantized_int8 distance\n// implementations (cosine, l2, mips_l2, etc.).\n//\n// All functions are marked always_inline so that when this header is included\n// from a per-file-march .cc translation unit, the compiler can fully inline\n// and optimize them under the correct -march flag without any cross-TU call\n// overhead.\n\n#pragma once\n\n#if defined(__AVX512VNNI__)\n#include <immintrin.h>\n#include <array>\n#include <cstdint>\n\nnamespace zvec::turbo::avx512_vnni::internal {\n\nstatic inline int32_t HorizontalAdd_INT32_V256(__m256i v) {\n  __m256i x1 = _mm256_hadd_epi32(v, v);\n  __m256i x2 = _mm256_hadd_epi32(x1, x1);\n  __m128i x3 = _mm256_extractf128_si256(x2, 1);\n  __m128i x4 = _mm_add_epi32(_mm256_castsi256_si128(x2), x3);\n  return _mm_cvtsi128_si32(x4);\n}\n\n#define FMA_INT8_GENERAL(m, q, sum) sum += static_cast<float>(m * q);\n\n// Compute the raw integer inner product of two int8 vectors of length `size`.\n// The result is written to `*distance` as a float.\n// Both `a` and `b` must point to int8_t arrays.\nstatic __attribute__((always_inline)) void ip_int8_avx512_vnni(\n    const void *a, const void *b, size_t size, float *distance) {\n  const __m256i ONES_INT16_AVX = _mm256_set1_epi32(0x00010001);\n  const __m128i ONES_INT16_SSE = _mm_set1_epi32(0x00010001);\n\n  const int8_t *lhs = reinterpret_cast<const int8_t *>(a);\n  const int8_t *rhs = reinterpret_cast<const int8_t *>(b);\n\n  const int8_t *last = lhs + size;\n  const int8_t *last_aligned = lhs + ((size >> 6) << 6);\n\n  float result = 0.0f;\n\n  __m256i ymm_sum_0 = _mm256_setzero_si256();\n  __m256i ymm_sum_1 = _mm256_setzero_si256();\n\n  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));\n\n      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);\n      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);\n      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);\n      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);\n\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),\n                            ONES_INT16_AVX),\n          ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);\n      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);\n      ymm_rhs = _mm256_abs_epi8(ymm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      lhs += 32;\n      rhs += 32;\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);\n      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);\n      xmm_rhs = _mm_abs_epi8(xmm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(),\n                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),\n                                          ONES_INT16_SSE)),\n          ymm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  } else {\n    for (; lhs != last_aligned; lhs += 64, rhs += 64) {\n      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));\n      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));\n      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));\n      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));\n\n      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);\n      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);\n      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);\n      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);\n\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      ymm_sum_1 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),\n                            ONES_INT16_AVX),\n          ymm_sum_1);\n    }\n\n    if (last >= last_aligned + 32) {\n      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);\n      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);\n      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);\n      ymm_rhs = _mm256_abs_epi8(ymm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),\n                            ONES_INT16_AVX),\n          ymm_sum_0);\n      lhs += 32;\n      rhs += 32;\n    }\n\n    if (last >= lhs + 16) {\n      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);\n      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);\n      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);\n      xmm_rhs = _mm_abs_epi8(xmm_rhs);\n      ymm_sum_0 = _mm256_add_epi32(\n          _mm256_set_m128i(_mm_setzero_si128(),\n                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),\n                                          ONES_INT16_SSE)),\n          ymm_sum_0);\n      lhs += 16;\n      rhs += 16;\n    }\n  }\n  result = static_cast<float>(\n      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));\n\n  switch (last - lhs) {\n    case 15:\n      FMA_INT8_GENERAL(lhs[14], rhs[14], result)\n      /* FALLTHRU */\n    case 14:\n      FMA_INT8_GENERAL(lhs[13], rhs[13], result)\n      /* FALLTHRU */\n    case 13:\n      FMA_INT8_GENERAL(lhs[12], rhs[12], result)\n      /* FALLTHRU */\n    case 12:\n      FMA_INT8_GENERAL(lhs[11], rhs[11], result)\n      /* FALLTHRU */\n    case 11:\n      FMA_INT8_GENERAL(lhs[10], rhs[10], result)\n      /* FALLTHRU */\n    case 10:\n      FMA_INT8_GENERAL(lhs[9], rhs[9], result)\n      /* FALLTHRU */\n    case 9:\n      FMA_INT8_GENERAL(lhs[8], rhs[8], result)\n      /* FALLTHRU */\n    case 8:\n      FMA_INT8_GENERAL(lhs[7], rhs[7], result)\n      /* FALLTHRU */\n    case 7:\n      FMA_INT8_GENERAL(lhs[6], rhs[6], result)\n      /* FALLTHRU */\n    case 6:\n      FMA_INT8_GENERAL(lhs[5], rhs[5], result)\n      /* FALLTHRU */\n    case 5:\n      FMA_INT8_GENERAL(lhs[4], rhs[4], result)\n      /* FALLTHRU */\n    case 4:\n      FMA_INT8_GENERAL(lhs[3], rhs[3], result)\n      /* FALLTHRU */\n    case 3:\n      FMA_INT8_GENERAL(lhs[2], rhs[2], result)\n      /* FALLTHRU */\n    case 2:\n      FMA_INT8_GENERAL(lhs[1], rhs[1], result)\n      /* FALLTHRU */\n    case 1:\n      FMA_INT8_GENERAL(lhs[0], rhs[0], result)\n  }\n  *distance = result;\n}\n\n#undef FMA_INT8_GENERAL\n\n// Shift the first `original_dim` bytes of `query` in-place from int8 to uint8\n// by adding 128 to each element. The metadata tail beyond `original_dim` is\n// left untouched. This prepares the query for use with dpbusd (uint8 * int8).\nstatic __attribute__((always_inline)) void shift_int8_to_uint8_avx512(\n    void *query, size_t original_dim) {\n  const int8_t *input = reinterpret_cast<const int8_t *>(query);\n  uint8_t *output = reinterpret_cast<uint8_t *>(query);\n\n  // 128 represented as int8_t wraps to -128, but two's complement addition\n  // produces the correct uint8 result.\n  const __m512i offset = _mm512_set1_epi8(static_cast<int8_t>(128));\n\n  size_t i = 0;\n  for (; i + 64 <= original_dim; i += 64) {\n    __m512i data =\n        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(input + i));\n    __m512i shifted = _mm512_add_epi8(data, offset);\n    _mm512_storeu_si512(reinterpret_cast<__m512i *>(output + i), shifted);\n  }\n  for (; i < original_dim; ++i) {\n    output[i] = static_cast<uint8_t>(static_cast<int>(input[i]) + 128);\n  }\n}\n\n// Compute raw integer inner products for a batch of int8 vectors against a\n// single query. Uses AVX512-VNNI dpbusd instruction.\n// `query` is treated as uint8 (preprocessed), `vectors[i]` as int8.\ntemplate <size_t batch_size>\n__attribute__((always_inline)) void ip_int8_batch_avx512_vnni_impl(\n    const void *query, const void *const *vectors,\n    const std::array<const void *, batch_size> &prefetch_ptrs,\n    size_t dimensionality, float *distances) {\n  __m512i accs[batch_size];\n  for (size_t i = 0; i < batch_size; ++i) {\n    accs[i] = _mm512_setzero_si512();\n  }\n  size_t dim = 0;\n  for (; dim + 64 <= dimensionality; dim += 64) {\n    __m512i q = _mm512_loadu_si512(reinterpret_cast<const __m512i *>(\n        reinterpret_cast<const int8_t *>(query) + dim));\n    __m512i data_regs[batch_size];\n    for (size_t i = 0; i < batch_size; ++i) {\n      data_regs[i] = _mm512_loadu_si512(reinterpret_cast<const __m512i *>(\n          reinterpret_cast<const int8_t *>(vectors[i]) + dim));\n    }\n    for (size_t i = 0; i < batch_size; ++i) {\n      if (prefetch_ptrs[i]) {\n        _mm_prefetch(\n            reinterpret_cast<const char *>(\n                reinterpret_cast<const int8_t *>(prefetch_ptrs[i]) + dim),\n            _MM_HINT_T0);\n      }\n      accs[i] = _mm512_dpbusd_epi32(accs[i], q, data_regs[i]);\n    }\n  }\n  std::array<int, batch_size> temp_results{};\n  for (size_t i = 0; i < batch_size; ++i) {\n    temp_results[i] = _mm512_reduce_add_epi32(accs[i]);\n  }\n  for (; dim < dimensionality; ++dim) {\n    int q = static_cast<int>(reinterpret_cast<const uint8_t *>(query)[dim]);\n    for (size_t i = 0; i < batch_size; ++i) {\n      temp_results[i] +=\n          q *\n          static_cast<int>(reinterpret_cast<const int8_t *>(vectors[i])[dim]);\n    }\n  }\n  for (size_t i = 0; i < batch_size; ++i) {\n    distances[i] = static_cast<float>(temp_results[i]);\n  }\n}\n\n// Dispatch batched inner product over all `n` vectors with prefetching.\nstatic __attribute__((always_inline)) void ip_int8_batch_avx512_vnni(\n    const void *const *vectors, const void *query, size_t n, size_t dim,\n    float *distances) {\n  static constexpr size_t batch_size = 2;\n  static constexpr size_t prefetch_step = 2;\n  size_t i = 0;\n  for (; i + batch_size <= n; i += batch_size) {\n    std::array<const void *, batch_size> prefetch_ptrs;\n    for (size_t j = 0; j < batch_size; ++j) {\n      if (i + j + batch_size * prefetch_step < n) {\n        prefetch_ptrs[j] = vectors[i + j + batch_size * prefetch_step];\n      } else {\n        prefetch_ptrs[j] = nullptr;\n      }\n    }\n    ip_int8_batch_avx512_vnni_impl<batch_size>(\n        query, &vectors[i], prefetch_ptrs, dim, distances + i);\n  }\n  for (; i < n; i++) {\n    std::array<const void *, 1> prefetch_ptrs{nullptr};\n    ip_int8_batch_avx512_vnni_impl<1>(query, &vectors[i], prefetch_ptrs, dim,\n                                      distances + i);\n  }\n}\n\n}  // namespace zvec::turbo::avx512_vnni::internal\n\n#endif  // defined(__AVX512VNNI__)\n"
  },
  {
    "path": "src/turbo/avx512_vnni/record_quantized_int8/cosine.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n// This file is compiled with per-file -march=avx512vnni (set in CMakeLists.txt)\n// so that all AVX512-VNNI intrinsics and the inlined inner product kernels from\n// common.h are compiled with the correct target ISA.\n\n#include \"avx512_vnni/record_quantized_int8/cosine.h\"\n#include \"avx512_vnni/record_quantized_int8/common.h\"\n#if defined(__AVX512VNNI__)\n#include <immintrin.h>\n#endif\n\n// Tail layout for quantized INT8 cosine vectors:\n//\n//   [ original_dim bytes: int8_t elements ]\n//   [ float scale_a       ]  (ma)\n//   [ float bias_a        ]  (mb)\n//   [ float sum_a         ]  (ms)\n//   [ float square_sum_a  ]  (ms2)\n//   [ int  int8_sum  ]  (sum of raw int8 elements, used when query is\n//                        preprocessed to uint8 via +128 shift)\n//\n// The query tail has the same layout (qa, qb, qs, qs2) without int8_sum.\n\nnamespace zvec::turbo::avx512_vnni {\n\nvoid cosine_int8_distance(const void *a, const void *b, size_t dim,\n                          float *distance) {\n#if defined(__AVX512VNNI__)\n  // `dim` is the full encoded size; the original vector occupies dim-24 bytes.\n  const int original_dim = dim - 24;\n  if (original_dim <= 0) {\n    return;\n  }\n\n  // Compute raw integer inner product over the original_dim bytes.\n  // Note: for the single-vector path there is no query preprocessing, so both\n  // sides are treated as int8_t (same as the non-preprocessed path in\n  // MinusInnerProductDistanceBatchWithScoreUnquantized<int8_t>).\n  internal::ip_int8_avx512_vnni(a, b, original_dim, distance);\n\n  const float *a_tail = reinterpret_cast<const float *>(\n      reinterpret_cast<const int8_t *>(a) + original_dim);\n  const float *b_tail = reinterpret_cast<const float *>(\n      reinterpret_cast<const int8_t *>(b) + original_dim);\n\n  float ma = a_tail[0];\n  float mb = a_tail[1];\n  float ms = a_tail[2];\n\n  float qa = b_tail[0];\n  float qb = b_tail[1];\n  float qs = b_tail[2];\n\n  // Dequantize and compute cosine distance:\n  //   cosine_dist = -(ma * qa * ip + mb * qa * qs + qb * ma * ms\n  //                   + original_dim * qb * mb)\n  *distance = -(ma * qa * *distance + mb * qa * qs + qb * ma * ms +\n                static_cast<float>(original_dim) * qb * mb);\n#else\n  (void)a;\n  (void)b;\n  (void)dim;\n  (void)distance;\n#endif\n}\n\nvoid cosine_int8_batch_distance(const void *const *vectors, const void *query,\n                                size_t n, size_t dim, float *distances) {\n#if defined(__AVX512VNNI__)\n  // `dim` is the full encoded size; the original vector occupies dim-24 bytes.\n  const int original_dim = dim - 24;\n  if (original_dim <= 0) {\n    return;\n  }\n\n  // Compute raw inner products for all vectors. The query has been preprocessed\n  // (int8 + 128 -> uint8) so dpbusd can be used via ip_int8_batch_avx512_vnni.\n  internal::ip_int8_batch_avx512_vnni(vectors, query, n, original_dim,\n                                      distances);\n\n  const float *q_tail = reinterpret_cast<const float *>(\n      reinterpret_cast<const int8_t *>(query) + original_dim);\n  float qa = q_tail[0];\n  float qb = q_tail[1];\n  float qs = q_tail[2];\n\n  for (int i = 0; i < n; ++i) {\n    const float *m_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const int8_t *>(vectors[i]) + original_dim);\n    float ma = m_tail[0];\n    float mb = m_tail[1];\n    float ms = m_tail[2];\n    // Correct for the +128 shift applied to the query during preprocessing:\n    //   dpbusd computes sum(uint8_query[i] * int8_data[i])\n    //         = sum((int8_query[i] + 128) * int8_data[i])\n    //         = true_ip + 128 * sum(int8_data[i])\n    // int8_sum is stored as the 5th int-sized field after the 4 floats.\n    int int8_sum = reinterpret_cast<const int *>(m_tail)[4];\n    float &result = distances[i];\n    result -= 128.0f * static_cast<float>(int8_sum);\n\n    // Dequantize and compute cosine distance:\n    //   cosine_dist = -(ma * qa * ip + mb * qa * qs + qb * ma * ms\n    //                   + original_dim * qb * mb)\n    result = -(ma * qa * result + mb * qa * qs + qb * ma * ms +\n               static_cast<float>(original_dim) * qb * mb);\n  }\n#else\n  (void)vectors;\n  (void)query;\n  (void)n;\n  (void)dim;\n  (void)distances;\n#endif\n}\n\nvoid cosine_int8_query_preprocess(void *query, size_t dim) {\n#if defined(__AVX512VNNI__)\n  // The original vector occupies dim-24 bytes; only those bytes are shifted.\n  const int original_dim = static_cast<int>(dim) - 24;\n  if (original_dim <= 0) {\n    return;\n  }\n  internal::shift_int8_to_uint8_avx512(query, original_dim);\n#else\n  (void)query;\n  (void)dim;\n#endif\n}\n\n}  // namespace zvec::turbo::avx512_vnni\n"
  },
  {
    "path": "src/turbo/avx512_vnni/record_quantized_int8/cosine.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstddef>\n\nnamespace zvec::turbo::avx512_vnni {\n\n// Compute cosine distance (negative inner product after normalization) between\n// a single quantized INT8 vector pair.\n// `dim` includes the original vector bytes plus a 24-byte metadata tail\n// (3 floats: scale_a, bias_a, sum_a).\nvoid cosine_int8_distance(const void *a, const void *b, size_t dim,\n                          float *distance);\n\n// Batch version of cosine_int8_distance.\n// The query must have been preprocessed by cosine_int8_query_preprocess\n// (int8 -> uint8 via +128 shift) before calling this function.\nvoid cosine_int8_batch_distance(const void *const *vectors, const void *query,\n                                size_t n, size_t dim, float *distances);\n\n// Preprocess the query vector in-place (shift int8 -> uint8 by adding 128)\n// so that the AVX512-VNNI dpbusd instruction can be used for inner product.\n// `dim` includes the 24-byte metadata tail.\nvoid cosine_int8_query_preprocess(void *query, size_t dim);\n\n}  // namespace zvec::turbo::avx512_vnni\n"
  },
  {
    "path": "src/turbo/avx512_vnni/record_quantized_int8/squared_euclidean.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n// This file is compiled with per-file -march=avx512vnni (set in CMakeLists.txt)\n// so that all AVX512-VNNI intrinsics and the inlined inner product kernels from\n// common.h are compiled with the correct target ISA.\n\n#include \"avx512_vnni/record_quantized_int8/squared_euclidean.h\"\n#include \"avx512_vnni/record_quantized_int8/common.h\"\n#if defined(__AVX512VNNI__)\n#include <immintrin.h>\n#endif\n\n// Tail layout for quantized INT8 squared Euclidean vectors:\n//\n//   [ original_dim bytes: int8_t elements ]\n//   [ float scale_a  ]  (ma)\n//   [ float bias_a   ]  (mb)\n//   [ float sum_a    ]  (ms)\n//   [ float sum2_a   ]  (ms2)\n//   [ int  int8_sum  ]  (sum of raw int8 elements, used for bias correction\n//                        when the query has been shifted to uint8 via +128)\n//\n// Total tail size: 4 floats + 1 int = 20 bytes, so dim = original_dim + 20.\n\nnamespace zvec::turbo::avx512_vnni {\n\nvoid squared_euclidean_int8_distance(const void *a, const void *b, size_t dim,\n                                     float *distance) {\n#if defined(__AVX512VNNI__)\n  const int original_dim = dim - 20;\n  if (original_dim <= 0) {\n    return;\n  }\n  internal::ip_int8_avx512_vnni(a, b, original_dim, distance);\n\n  const float *a_tail = reinterpret_cast<const float *>(\n      reinterpret_cast<const int8_t *>(a) + original_dim);\n  const float *b_tail = reinterpret_cast<const float *>(\n      reinterpret_cast<const int8_t *>(b) + original_dim);\n\n  float ma = a_tail[0];\n  float mb = a_tail[1];\n  float ms = a_tail[2];\n  float ms2 = a_tail[3];\n\n  float qa = b_tail[0];\n  float qb = b_tail[1];\n  float qs = b_tail[2];\n  float qs2 = b_tail[3];\n\n  const float sum = qa * qs;\n  const float sum2 = qa * qa * qs2;\n\n  *distance = ma * ma * ms2 + sum2 - 2 * ma * qa * *distance +\n              (mb - qb) * (mb - qb) * original_dim +\n              2 * (mb - qb) * (ms * ma - sum);\n#else\n  (void)a;\n  (void)b;\n  (void)dim;\n  (void)distance;\n#endif\n}\n\nvoid squared_euclidean_int8_batch_distance(const void *const *vectors,\n                                           const void *query, size_t n,\n                                           size_t dim, float *distances) {\n#if defined(__AVX512VNNI__)\n  const int original_dim = dim - 20;\n  if (original_dim <= 0) {\n    return;\n  }\n\n  internal::ip_int8_batch_avx512_vnni(vectors, query, n, original_dim,\n                                      distances);\n  const float *q_tail = reinterpret_cast<const float *>(\n      reinterpret_cast<const int8_t *>(query) + original_dim);\n  float qa = q_tail[0];\n  float qb = q_tail[1];\n  float qs = q_tail[2];\n  float qs2 = q_tail[3];\n\n  const float sum = qa * qs;\n  const float sum2 = qa * qa * qs2;\n  for (size_t i = 0; i < n; ++i) {\n    const float *m_tail = reinterpret_cast<const float *>(\n        reinterpret_cast<const int8_t *>(vectors[i]) + original_dim);\n    float ma = m_tail[0];\n    float mb = m_tail[1];\n    float ms = m_tail[2];\n    float ms2 = m_tail[3];\n    // Correct for the +128 shift applied to the query during preprocessing:\n    //   dpbusd computes sum(uint8_query[i] * int8_data[i])\n    //         = sum((int8_query[i] + 128) * int8_data[i])\n    //         = true_ip + 128 * sum(int8_data[i])\n    // int8_sum is stored as the 5th int-sized field after the 4 floats.\n    int int8_sum = reinterpret_cast<const int *>(m_tail)[4];\n    float &result = distances[i];\n    result -= 128.0f * static_cast<float>(int8_sum);\n    result = ma * ma * ms2 + sum2 - 2 * ma * qa * result +\n             (mb - qb) * (mb - qb) * original_dim +\n             2 * (mb - qb) * (ms * ma - sum);\n  }\n#else\n  (void)vectors;\n  (void)query;\n  (void)n;\n  (void)dim;\n  (void)distances;\n#endif\n}\n\nvoid squared_euclidean_int8_query_preprocess(void *query, size_t dim) {\n#if defined(__AVX512VNNI__)\n  const int original_dim = static_cast<int>(dim) - 20;\n  if (original_dim <= 0) {\n    return;\n  }\n  internal::shift_int8_to_uint8_avx512(query, original_dim);\n#else\n  (void)query;\n  (void)dim;\n#endif\n}\n\n}  // namespace zvec::turbo::avx512_vnni\n"
  },
  {
    "path": "src/turbo/avx512_vnni/record_quantized_int8/squared_euclidean.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstddef>\n\nnamespace zvec::turbo::avx512_vnni {\n\n// Compute squared Euclidean distance between a single quantized INT8\n// vector pair.\n// `dim` includes the original vector bytes plus a 20-byte metadata tail\n// (4 floats: scale_a, bias_a, sum_a, sum2_a).\nvoid squared_euclidean_int8_distance(const void *a, const void *b, size_t dim,\n                                     float *distance);\n\n// Batch version of squared_euclidean_int8_distance.\n// The query must have been preprocessed by\n// squared_euclidean_int8_query_preprocess (int8 -> uint8 via +128 shift)\n// before calling this function.\nvoid squared_euclidean_int8_batch_distance(const void *const *vectors,\n                                           const void *query, size_t n,\n                                           size_t dim, float *distances);\n\n// Preprocess the query vector in-place (shift int8 -> uint8 by adding 128)\n// for the batch path. Only the original_dim bytes are shifted; the metadata\n// tail is left intact. `dim` includes the 20-byte metadata tail.\nvoid squared_euclidean_int8_query_preprocess(void *query, size_t dim);\n\n}  // namespace zvec::turbo::avx512_vnni\n"
  },
  {
    "path": "src/turbo/turbo.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include <zvec/turbo/turbo.h>\n#include \"avx512_vnni/record_quantized_int8/cosine.h\"\n#include \"avx512_vnni/record_quantized_int8/squared_euclidean.h\"\n\nnamespace zvec::turbo {\n\nDistanceFunc get_distance_func(MetricType metric_type, DataType data_type,\n                               QuantizeType quantize_type) {\n  if (data_type == DataType::kInt8) {\n    if (quantize_type == QuantizeType::kDefault) {\n      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {\n        if (metric_type == MetricType::kSquaredEuclidean) {\n          return avx512_vnni::squared_euclidean_int8_distance;\n        }\n        if (metric_type == MetricType::kCosine) {\n          return avx512_vnni::cosine_int8_distance;\n        }\n      }\n    }\n  }\n  return nullptr;\n}\n\nBatchDistanceFunc get_batch_distance_func(MetricType metric_type,\n                                          DataType data_type,\n                                          QuantizeType quantize_type) {\n  if (data_type == DataType::kInt8) {\n    if (quantize_type == QuantizeType::kDefault) {\n      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {\n        if (metric_type == MetricType::kSquaredEuclidean) {\n          return avx512_vnni::squared_euclidean_int8_batch_distance;\n        }\n        if (metric_type == MetricType::kCosine) {\n          return avx512_vnni::cosine_int8_batch_distance;\n        }\n      }\n    }\n  }\n  return nullptr;\n}\n\nQueryPreprocessFunc get_query_preprocess_func(MetricType metric_type,\n                                              DataType data_type,\n                                              QuantizeType quantize_type) {\n  if (data_type == DataType::kInt8) {\n    if (quantize_type == QuantizeType::kDefault) {\n      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {\n        if (metric_type == MetricType::kSquaredEuclidean) {\n          return avx512_vnni::squared_euclidean_int8_query_preprocess;\n        }\n        if (metric_type == MetricType::kCosine) {\n          return avx512_vnni::cosine_int8_query_preprocess;\n        }\n      }\n    }\n  }\n  return nullptr;\n}\n\n}  // namespace zvec::turbo\n"
  },
  {
    "path": "tests/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_directories(ailego)\ncc_directories(db)\ncc_directories(core)\n"
  },
  {
    "path": "tests/ailego/CMakeLists.txt",
    "content": "\ninclude(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n    NAME ${CC_TARGET} STRICT\n    LIBS zvec_ailego\n    Arrow::arrow_depends\n    Arrow::parquet_static\n    SRCS ${CC_SRCS}\n  )\n  cc_test_suite(zvec_ailego ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/ailego/algorithm/integer_quantizer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <stdlib.h>\n#include <random>\n#include <ailego/algorithm/integer_quantizer.h>\n#include <gtest/gtest.h>\n\nnamespace zvec::ailego {\n\nTEST(IntegerQuantizer, INT8_Uniform_Distribution) {\n  std::vector<size_t> tests = {1, 100, 1000, 10000, 100000};\n  for (auto COUNT : tests) {\n    std::random_device rd;\n    std::mt19937 gen(rd());\n    std::vector<float> data;\n\n    std::uniform_real_distribution<float> dist(1.0, 2.0);\n    float max = -std::numeric_limits<float>::max();\n    float min = std::numeric_limits<float>::max();\n    for (size_t i = 0; i < COUNT; ++i) {\n      auto v = dist(gen);\n      max = std::max(max, v);\n      min = std::min(min, v);\n      data.emplace_back(v);\n    }\n    // data.emplace_back(10);  // deviation point\n    EntropyInt8Quantizer quantizer;\n    quantizer.set_max(max);\n    quantizer.set_min(min);\n    quantizer.feed(data.data(), data.size());\n\n    ASSERT_TRUE(quantizer.train());\n\n    std::vector<int8_t> qdata(data.size(), 0);\n    quantizer.encode(data.data(), qdata.size(), qdata.data());\n\n    std::vector<float> recover_data(data.size(), 0.0f);\n    quantizer.decode(qdata.data(), qdata.size(), recover_data.data());\n\n    float var = 0.0f;\n    for (size_t i = 0; i < data.size(); ++i) {\n      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    }\n    EXPECT_LT(var / COUNT, 0.01);\n  }\n}\n\nTEST(IntegerQuantizer, INT8_Normal_Distribution) {\n  const size_t COUNT = 1000000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::normal_distribution<float> dist(3, 1.5);\n  float max = -std::numeric_limits<float>::max();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    auto v = dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyInt8Quantizer quantizer;\n  bool non_bias = dist(gen) > 5;\n  quantizer.set_non_bias(non_bias);\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);\n\n  std::vector<int8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), qdata.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    // printf(\"%f %f\\n\", data[i], recover_data[i]);\n  }\n#if 0\n  printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n         *std::min_element(data.begin(), data.end()));\n  printf(\"recover max=%f min=%f\\n\",\n         *std::max_element(recover_data.begin(), recover_data.end()),\n         *std::min_element(recover_data.begin(), recover_data.end()));\n  printf(\"var=%f\\n\", var);\n#endif\n  EXPECT_LT(var / COUNT, 0.001);\n}\n\nTEST(IntegerQuantizer, INT8_Poisson_Distribution) {\n  const size_t COUNT = 100000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::poisson_distribution<int> dist(10000);\n  float max = -std::numeric_limits<float>::min();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    float v = (float)dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyInt8Quantizer quantizer;\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n\n  std::vector<int8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), qdata.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n  }\n  EXPECT_LT(var / COUNT, 100);\n}\n\nTEST(IntegerQuantizer, INT4_Uniform_Distribution) {\n  std::vector<size_t> tests = {2, 1000, 10000, 100000};\n  for (auto COUNT : tests) {\n    std::random_device rd;\n    std::mt19937 gen(rd());\n    std::vector<float> data;\n\n    std::uniform_real_distribution<float> dist(1.0, 2.0);\n    float max = -std::numeric_limits<float>::min();\n    float min = std::numeric_limits<float>::max();\n    for (size_t i = 0; i < COUNT; ++i) {\n      auto v = dist(gen);\n      max = std::max(max, v);\n      min = std::min(min, v);\n      data.emplace_back(v);\n    }\n    // data.emplace_back(10);  // deviation point\n    EntropyInt4Quantizer quantizer;\n    quantizer.set_max(max);\n    quantizer.set_min(min);\n    quantizer.feed(data.data(), data.size());\n\n    ASSERT_TRUE(quantizer.train());\n\n    std::vector<uint8_t> qdata(data.size() / 2, 0);\n    quantizer.encode(data.data(), data.size(), qdata.data());\n\n    std::vector<float> recover_data(data.size(), 0.0f);\n    quantizer.decode(qdata.data(), data.size(), recover_data.data());\n\n    float var = 0.0f;\n    for (size_t i = 0; i < data.size(); ++i) {\n      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n      // printf(\"%f %f\\n\", data[i], recover_data[i]);\n    }\n#if 0\n    printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n           *std::min_element(data.begin(), data.end()));\n    printf(\"recover max=%f min=%f\\n\",\n           *std::max_element(recover_data.begin(), recover_data.end()),\n           *std::min_element(recover_data.begin(), recover_data.end()));\n    printf(\"var=%f\\n\", var);\n#endif\n    EXPECT_LT(var / COUNT, 0.1);\n  }\n}\n\nTEST(IntegerQuantizer, INT4_Normal_Distribution) {\n  const size_t COUNT = 10000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::normal_distribution<float> avg(-1, 1);\n  std::normal_distribution<float> dist(avg(gen), 5);\n  float max = -std::numeric_limits<float>::max();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    auto v = dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyInt4Quantizer quantizer;\n  bool non_bias = avg(gen) > 0;\n  quantizer.set_non_bias(non_bias);\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);\n\n  std::vector<uint8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), data.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), data.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    // printf(\"%f %f\\n\", data[i], recover_data[i]);\n  }\n#if 0\n  printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n         *std::min_element(data.begin(), data.end()));\n  printf(\"recover max=%f min=%f\\n\",\n         *std::max_element(recover_data.begin(), recover_data.end()),\n         *std::min_element(recover_data.begin(), recover_data.end()));\n  printf(\"var=%f\\n\", var);\n#endif\n  EXPECT_LT(var / COUNT, 1.0f);\n}\n\nTEST(IntegerQuantizer, INT4_Poisson_Distribution) {\n  const size_t COUNT = 100000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::poisson_distribution<int> dist(10000);\n  float max = -std::numeric_limits<float>::min();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    float v = (float)dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyInt4Quantizer quantizer;\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n\n  std::vector<uint8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), data.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), data.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    // printf(\"%f %f\\n\", data[i], recover_data[i]);\n  }\n#if 0\n  printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n         *std::min_element(data.begin(), data.end()));\n  printf(\"recover max=%f min=%f\\n\",\n         *std::max_element(recover_data.begin(), recover_data.end()),\n         *std::min_element(recover_data.begin(), recover_data.end()));\n  printf(\"var=%f\\n\", var);\n#endif\n  EXPECT_LT(var / COUNT, 500);\n}\n\nTEST(IntegerQuantizer, UINT8_Uniform_Distribution) {\n  std::vector<size_t> tests = {1, 100, 1000, 10000, 100000};\n  for (auto COUNT : tests) {\n    std::random_device rd;\n    std::mt19937 gen(rd());\n    std::vector<float> data;\n\n    std::uniform_real_distribution<float> dist(1.0, 2.0);\n    float max = -std::numeric_limits<float>::max();\n    float min = std::numeric_limits<float>::max();\n    for (size_t i = 0; i < COUNT; ++i) {\n      auto v = dist(gen);\n      max = std::max(max, v);\n      min = std::min(min, v);\n      data.emplace_back(v);\n    }\n    // data.emplace_back(10);  // deviation point\n    EntropyUInt8Quantizer quantizer;\n    quantizer.set_max(max);\n    quantizer.set_min(min);\n    quantizer.feed(data.data(), data.size());\n\n    ASSERT_TRUE(quantizer.train());\n\n    std::vector<uint8_t> qdata(data.size(), 0);\n    quantizer.encode(data.data(), qdata.size(), qdata.data());\n\n    std::vector<float> recover_data(data.size(), 0.0f);\n    quantizer.decode(qdata.data(), qdata.size(), recover_data.data());\n\n    float var = 0.0f;\n    for (size_t i = 0; i < data.size(); ++i) {\n      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    }\n    EXPECT_LT(var / COUNT, 0.01);\n  }\n}\n\nTEST(IntegerQuantizer, UINT8_Normal_Distribution) {\n  const size_t COUNT = 10000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::normal_distribution<float> dist(5.0f, 1.4f);\n  float max = -std::numeric_limits<float>::max();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    auto v = dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyUInt8Quantizer quantizer;\n  bool non_bias = dist(gen) > 5;\n  quantizer.set_non_bias(non_bias);\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);\n\n  std::vector<uint8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), qdata.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    // printf(\"%f %f\\n\", data[i], recover_data[i]);\n  }\n#if 0\n  printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n         *std::min_element(data.begin(), data.end()));\n  printf(\"recover max=%f min=%f\\n\",\n         *std::max_element(recover_data.begin(), recover_data.end()),\n         *std::min_element(recover_data.begin(), recover_data.end()));\n  printf(\"var=%f\\n\", var);\n#endif\n  EXPECT_LT(var / COUNT, 0.01);\n}\n\nTEST(IntegerQuantizer, UINT8_Poisson_Distribution) {\n  const size_t COUNT = 100000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::poisson_distribution<int> dist(10000);\n  float max = -std::numeric_limits<float>::min();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    float v = (float)dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyUInt8Quantizer quantizer;\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n\n  std::vector<uint8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), qdata.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    // printf(\"%f %f\\n\", data[i], recover_data[i]);\n  }\n  EXPECT_LT(var / COUNT, 100);\n}\n\nTEST(IntegerQuantizer, UINT4_Uniform_Distribution) {\n  std::vector<size_t> tests = {2, 100, 5000, 10000, 100000};\n  for (auto COUNT : tests) {\n    std::random_device rd;\n    std::mt19937 gen(rd());\n    std::vector<float> data;\n\n    std::uniform_real_distribution<float> dist(1.0, 2.0);\n    float max = -std::numeric_limits<float>::min();\n    float min = std::numeric_limits<float>::max();\n    for (size_t i = 0; i < COUNT; ++i) {\n      auto v = dist(gen);\n      max = std::max(max, v);\n      min = std::min(min, v);\n      data.emplace_back(v);\n    }\n    // data.emplace_back(10);  // deviation point\n    EntropyUInt4Quantizer quantizer;\n    quantizer.set_max(max);\n    quantizer.set_min(min);\n    quantizer.feed(data.data(), data.size());\n\n    ASSERT_TRUE(quantizer.train());\n\n    std::vector<uint8_t> qdata(data.size() / 2, 0);\n    quantizer.encode(data.data(), data.size(), qdata.data());\n\n    std::vector<float> recover_data(data.size(), 0.0f);\n    quantizer.decode(qdata.data(), data.size(), recover_data.data());\n\n    float var = 0.0f;\n    for (size_t i = 0; i < data.size(); ++i) {\n      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n      // printf(\"%f %f\\n\", data[i], recover_data[i]);\n    }\n#if 0\n    printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n           *std::min_element(data.begin(), data.end()));\n    printf(\"recover max=%f min=%f\\n\",\n           *std::max_element(recover_data.begin(), recover_data.end()),\n           *std::min_element(recover_data.begin(), recover_data.end()));\n    printf(\"var=%f\\n\", var);\n#endif\n    EXPECT_LT(var / COUNT, 0.1);\n  }\n}\n\nTEST(IntegerQuantizer, UINT4_Normal_Distribution) {\n  const size_t COUNT = 100000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::normal_distribution<float> avg(5, 1.0);\n  std::normal_distribution<float> dist(avg(gen), 2);\n  float max = -std::numeric_limits<float>::max();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    auto v = dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyUInt4Quantizer quantizer;\n  bool non_bias = avg(gen) > 5;\n  quantizer.set_non_bias(non_bias);\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);\n\n  std::vector<uint8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), data.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), data.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    // printf(\"%f %f\\n\", data[i], recover_data[i]);\n  }\n#if 0\n  printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n         *std::min_element(data.begin(), data.end()));\n  printf(\"recover max=%f min=%f\\n\",\n         *std::max_element(recover_data.begin(), recover_data.end()),\n         *std::min_element(recover_data.begin(), recover_data.end()));\n  printf(\"var=%f\\n\", var);\n#endif\n  EXPECT_LT(var / COUNT, 2.0f);\n}\n\nTEST(IntegerQuantizer, UINT4_Poisson_Distribution) {\n  const size_t COUNT = 100000u;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::vector<float> data;\n\n  std::poisson_distribution<int> dist(10000);\n  float max = -std::numeric_limits<float>::min();\n  float min = std::numeric_limits<float>::max();\n  for (size_t i = 0; i < COUNT; ++i) {\n    float v = (float)dist(gen);\n    max = std::max(max, v);\n    min = std::min(min, v);\n    data.emplace_back(v);\n  }\n  // data.emplace_back(10);  // deviation point\n  EntropyUInt4Quantizer quantizer;\n\n  quantizer.set_max(max);\n  quantizer.set_min(min);\n  quantizer.feed(data.data(), data.size());\n\n  ASSERT_TRUE(quantizer.train());\n\n  std::vector<uint8_t> qdata(data.size(), 0);\n  quantizer.encode(data.data(), data.size(), qdata.data());\n\n  std::vector<float> recover_data(data.size(), 0.0f);\n  quantizer.decode(qdata.data(), data.size(), recover_data.data());\n\n  float var = 0.0f;\n  for (size_t i = 0; i < data.size(); ++i) {\n    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);\n    // printf(\"%f %f\\n\", data[i], recover_data[i]);\n  }\n#if 0\n  printf(\"max=%f min=%f\\n\", *std::max_element(data.begin(), data.end()),\n         *std::min_element(data.begin(), data.end()));\n  printf(\"recover max=%f min=%f\\n\",\n         *std::max_element(recover_data.begin(), recover_data.end()),\n         *std::min_element(recover_data.begin(), recover_data.end()));\n  printf(\"var=%f\\n\", var);\n#endif\n  EXPECT_LT(var / COUNT, 350);\n}\n}  // namespace zvec::ailego"
  },
  {
    "path": "tests/ailego/algorithm/kmeans_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n\n#define protected public\n#define private public\n#include <ailego/algorithm/kmeans.h>\n\nusing namespace zvec;\n\nTEST(NumericalKmeans, FP32_General) {\n  const size_t DIMENSION = 20;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalKmeans<float, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<float, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    kmeans.append(vec.data(), vec.size());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n\nTEST(NumericalKmeans, FP16_General) {\n  const size_t DIMENSION = 20;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalKmeans<ailego::Float16, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<ailego::Float16, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    kmeans.append(vec.data(), vec.size());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n\nTEST(NumericalKmeans, INT8_General) {\n  const size_t DIMENSION = 20 * 4;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalKmeans<int8_t, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_int_distribution<int> dist(-127, 127);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<int8_t, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    kmeans.append(vec.data(), vec.size());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n\nTEST(NumericalKmeans, BINARY32_General) {\n  const size_t DIMENSION = 16 * 32;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::BinaryKmeans<uint32_t, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::BinaryVector<uint32_t> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      if (dist(gen) >= 0.5) {\n        vec.set(j);\n      }\n    }\n    kmeans.append(vec.data(), vec.dimension());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n\n#if defined(AILEGO_M64)\nTEST(NumericalKmeans, BINARY64_General) {\n  const size_t DIMENSION = 8 * 64;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::BinaryKmeans<uint64_t, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::BinaryVector<uint64_t> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      if (dist(gen) >= 0.5) {\n        vec.set(j);\n      }\n    }\n    kmeans.append(vec.data(), vec.dimension());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n#endif  // AILEGO_M64\n\nTEST(NibbleKmeans, INT4_General) {\n  const size_t DIMENSION = 32;\n  const size_t K_VALUE = 63;\n  const size_t COUNT = 40000u;\n\n  ailego::NumericalKmeans<int8_t, ailego::ThreadPool> kmeans1;\n  ailego::NibbleKmeans<int32_t, ailego::ThreadPool> kmeans2;\n  kmeans1.reset(K_VALUE, DIMENSION);\n  kmeans2.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_int_distribution<int> dist(-8, 7);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::NumericalVector<int8_t> vec1(DIMENSION);\n    ailego::NibbleVector<int32_t> vec2(DIMENSION);\n\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      int8_t val = (int8_t)dist(gen);\n      vec1[j] = val;\n      vec2.set(j, val);\n    }\n    kmeans1.append(vec1.data(), vec1.size());\n    kmeans2.append(vec2.data(), vec2.size());\n  }\n\n  ailego::ThreadPool pool;\n  {\n    const ailego::NumericalKmeans<int8_t, ailego::ThreadPool> &kmeans1_ref =\n        kmeans1;\n    ailego::Kmc2CentroidsGenerator<decltype(kmeans1_ref), ailego::ThreadPool> g;\n\n    kmeans1.init_centroids(pool);\n\n    g.set_chain_length(20);\n    kmeans1.init_centroids(pool, g);\n\n    g.set_assumption_free(true);\n    kmeans1.init_centroids(pool, g);\n\n    // Shared centroids\n    auto centroids = kmeans1.centroids();\n    for (size_t i = 0; i < centroids.count(); ++i) {\n      ailego::NibbleVector<int8_t> nvec;\n      nvec.assign(centroids[i], centroids.dimension());\n      kmeans2.mutable_centroids()->append(\n          reinterpret_cast<const uint32_t *>(nvec.data()), nvec.dimension());\n    }\n  }\n\n  double prev_sse1 = 0.0;\n  double prev_sse2 = 0.0;\n  for (size_t i = 0; i < 18; ++i) {\n    double sse1 = 0.0;\n    double sse2 = 0.0;\n    EXPECT_TRUE(kmeans1.cluster_once(pool, &sse1));\n    EXPECT_TRUE(kmeans2.cluster_once(pool, &sse2));\n    printf(\"1: (%zu) SSE: %f -> %f = %f\\n\", i, prev_sse1, sse1,\n           sse1 - prev_sse1);\n    printf(\"2: (%zu) SSE: %f -> %f = %f\\n\", i, prev_sse2, sse2,\n           sse2 - prev_sse2);\n    prev_sse1 = sse1;\n    prev_sse2 = sse2;\n  }\n\n  auto &cluster1 = kmeans1.context().clusters();\n  auto &cluster2 = kmeans2.context().clusters();\n  for (size_t i = 0; i < cluster1.size(); ++i) {\n    // printf(\"(%zu) INT8 %f: %zu\\n\", i, cluster1[i].cost(),\n    //        cluster1[i].count());\n    // printf(\"(%zu) INT4 %f: %zu\\n\", i, cluster2[i].cost(),\n    //        cluster2[i].count());\n\n    for (size_t j = 0; j < cluster1[i].accum_.size(); ++j) {\n      EXPECT_DOUBLE_EQ(cluster1[i].accum_[j], cluster2[i].accum_[j]);\n    }\n  }\n}\n\nTEST(NumericalKmeans, FP32_General_InnerProduct) {\n  const size_t DIMENSION = 20;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalInnerProductKmeans<float, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<float, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    kmeans.append(vec.data(), vec.size());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n\nTEST(NumericalKmeans, FP16_General_InnerProduct) {\n  const size_t DIMENSION = 20;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalInnerProductKmeans<ailego::Float16, ailego::ThreadPool>\n      kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<ailego::Float16, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    kmeans.append(vec.data(), vec.size());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n\nTEST(NumericalKmeans, INT8_General_InnerProduct) {\n  const size_t DIMENSION = 20 * 4;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalInnerProductKmeans<int8_t, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_int_distribution<int> dist(-127, 127);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<int8_t, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    kmeans.append(vec.data(), vec.size());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n\nTEST(NumericalKmeans, FP32_General_InnerProduct_Spherical) {\n  const size_t DIMENSION = 20;\n  const size_t K_VALUE = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalInnerProductKmeans<float, ailego::ThreadPool> kmeans;\n  kmeans.reset(K_VALUE, DIMENSION, true);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<float, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    kmeans.append(vec.data(), vec.size());\n  }\n\n  ailego::ThreadPool pool;\n  double prev_sse = 0.0;\n  for (size_t i = 0; i < 20; ++i) {\n    double sse = 0.0;\n    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));\n    printf(\"(%zu) SSE: %f -> %f = %f\\n\", i, prev_sse, sse, sse - prev_sse);\n    prev_sse = sse;\n  }\n\n  for (auto &it : kmeans.context().clusters()) {\n    printf(\"%f: %zu\\n\", it.cost(), it.count());\n  }\n}\n"
  },
  {
    "path": "tests/ailego/buffer/buffer_manager_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cstdint>\n#include <thread>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/ailego/logger/logger.h>\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec::ailego;\n\n\nconst std::string working_dir{\"./buffer_manager_dir/\"};\nconst std::string file_path_forward{working_dir + \"test.forward_index\"};\nconst std::string file_path_vector{working_dir + \"test.vector_index\"};\n\n\nclass BufferManagerTest : public testing::Test {\n  /*****  Global initialization and cleanup - Start  *****/\n public:\n  static void SetUpTestCase() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n\n    if (!File::MakePath(working_dir)) {\n      LOG_ERROR(\"Failed to create working directory.\");\n      return;\n    }\n\n    File file_vector_index;\n    size_t file_vector_size = 16 * 1024 * 1024;\n    if (!file_vector_index.create(file_path_vector, file_vector_size)) {\n      LOG_ERROR(\"Failed to create vector index file.\");\n      return;\n    }\n    // Populate vector file with number series\n    for (uint32_t i = 0; i < file_vector_size / sizeof(uint32_t); ++i) {\n      file_vector_index.write((void *)&i, sizeof(i));\n    }\n    file_vector_index.close();\n\n    BufferManager::Instance().init(4 * 1024 * 1024, 1);\n  }\n\n  static void TearDownTestCase() {}\n  /*****  Global initialization and cleanup - End  *****/\n  ;\n};\n\n\nTEST_F(BufferManagerTest, READ_VECTOR_FILE) {\n  uint32_t size_4KB = 4 * 1024;\n\n  auto read_and_verify_numbers = [&](uint32_t offset) {\n    BufferID id = BufferID::VectorID(file_path_vector, offset, size_4KB);\n    auto handle = BufferManager::Instance().acquire(id);\n    uint32_t *vector_data = (uint32_t *)handle.pin_vector_data();\n    uint32_t num_start = offset / sizeof(uint32_t);\n    for (uint32_t i = 0; i < size_4KB / sizeof(uint32_t); i++) {\n      ASSERT_EQ(*(vector_data + i), num_start + i);\n    }\n    handle.unpin_vector_data();\n  };\n\n  std::vector<std::thread> threads;\n\n  // Read the same part concurrently\n  for (int i = 0; i < 10; ++i) {\n    threads.emplace_back(read_and_verify_numbers, 3 * size_4KB);\n  }\n  for (auto &thread : threads) {\n    thread.join();\n  }\n\n  {  // Verify the reference count\n    BufferID id = BufferID::VectorID(file_path_vector, 3 * size_4KB, size_4KB);\n    auto handle = BufferManager::Instance().acquire(id);\n    handle.pin_vector_data();\n    ASSERT_EQ(handle.references(), 1);\n    handle.unpin_vector_data();\n    ASSERT_EQ(handle.references(), 0);\n  }\n\n  threads.clear();\n  // Read different parts concurrently\n  for (int i = 0; i < 30; ++i) {\n    threads.emplace_back(read_and_verify_numbers, i * size_4KB);\n  }\n  for (auto &thread : threads) {\n    thread.join();\n  }\n  ASSERT_EQ(BufferManager::Instance().total_size_in_bytes(), 30 * 4 * 1024);\n\n  {  // Read a large chunk so that the buffer is full\n    BufferID id =\n        BufferID::VectorID(file_path_vector, 4 * 1024 * 1024, 4 * 1024 * 1024);\n    auto handle = BufferManager::Instance().acquire(id);\n    handle.pin_vector_data();\n    handle.unpin_vector_data();\n  }\n\n  {  // Trigger eviction\n    BufferID id =\n        BufferID::VectorID(file_path_vector, 8 * 1024 * 1024, 4 * 1024 * 1024);\n    auto handle = BufferManager::Instance().acquire(id);\n    handle.pin_vector_data();\n    ASSERT_EQ(BufferManager::Instance().total_size_in_bytes(), 4 * 1024 * 1024);\n    handle.unpin_vector_data();\n    ASSERT_EQ(handle.references(), 0);\n  }\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/ailego/container/bitmap_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <algorithm>\n#include <bitset>\n#include <iostream>\n#include <memory>\n#include <set>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/utility/bitset_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/time_helper.h>\n\n#if defined(__AVX2__)\n#define INTRINSICS_SET \"AVX2\"\n#elif defined(__AVX__)\n#define INTRINSICS_SET \"AVX\"\n#elif defined(__SSE4_2__)\n#define INTRINSICS_SET \"SSE4.2\"\n#elif defined(__SSE4_1__)\n#define INTRINSICS_SET \"SSE4.1\"\n#elif defined(__SSE2__)\n#define INTRINSICS_SET \"SSE2\"\n#else\n#define INTRINSICS_SET \"NONE\"\n#endif\n\nusing namespace zvec::ailego;\n\nTEST(FixedBitset, General) {\n  FixedBitset<0> bitset0;\n  FixedBitset<32> bitset32;\n  FixedBitset<64> bitset64;\n\n  EXPECT_EQ(0u, bitset0.size());\n  EXPECT_EQ(32u, bitset32.size());\n  EXPECT_EQ(64u, bitset64.size());\n\n  EXPECT_TRUE(bitset32.test_none());\n  EXPECT_TRUE(bitset64.test_none());\n\n  bitset32.set(30);\n  bitset64.set(60);\n\n  FixedBitset<32> bitset32_2(bitset32);\n  FixedBitset<64> bitset64_2(bitset64);\n\n  bitset32.set(28);\n  bitset64.set(55);\n\n  EXPECT_TRUE(bitset32_2.test_any());\n  EXPECT_TRUE(bitset64_2.test_any());\n\n  EXPECT_FALSE(bitset32_2.test_all());\n  EXPECT_FALSE(bitset64_2.test_all());\n\n  EXPECT_EQ(1u, bitset32_2.cardinality());\n  EXPECT_EQ(1u, bitset64_2.cardinality());\n\n  bitset32_2 = bitset32;\n  bitset64_2 = bitset64;\n\n  EXPECT_EQ(2u, bitset32_2.cardinality());\n  EXPECT_EQ(2u, bitset64_2.cardinality());\n\n  bitset32.reset(28);\n  bitset64.reset(55);\n\n  bitset32_2 = bitset32;\n  bitset64_2 = bitset64;\n\n  EXPECT_EQ(1u, bitset32_2.cardinality());\n  EXPECT_EQ(1u, bitset64_2.cardinality());\n\n  bitset32.flip(30);\n  bitset64.flip(60);\n\n  EXPECT_EQ(0u, bitset32.cardinality());\n  EXPECT_EQ(0u, bitset64.cardinality());\n}\n\nTEST(FixedBitset, And) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<3552> bitset1;\n  FixedBitset<3552> bitset2;\n  FixedBitset<3552> bitset3;\n  std::bitset<3552> stl_bitset1;\n  std::bitset<3552> stl_bitset2;\n  std::bitset<3552> stl_bitset3;\n\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n\n  bitset3 = bitset1;\n  bitset3.bitwise_and(bitset2);\n  stl_bitset3 = stl_bitset1 & stl_bitset2;\n\n  for (uint32_t i = 0; i < bitset3.size(); ++i) {\n    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));\n  }\n  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());\n\n  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)\n      ->bitwise_and(*(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));\n}\n\nTEST(FixedBitset, Andnot) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  FixedBitset<2528> bitset2;\n  FixedBitset<2528> bitset3;\n  std::bitset<2528> stl_bitset1;\n  std::bitset<2528> stl_bitset2;\n  std::bitset<2528> stl_bitset3;\n\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n\n  bitset3 = bitset1;\n  bitset3.bitwise_andnot(bitset2);\n  stl_bitset3 = stl_bitset1 & (~stl_bitset2);\n\n  for (uint32_t i = 0; i < bitset3.size(); ++i) {\n    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));\n  }\n  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());\n\n  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)\n      ->bitwise_andnot(\n          *(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));\n}\n\nTEST(FixedBitset, Or) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  FixedBitset<2528> bitset2;\n  FixedBitset<2528> bitset3;\n  std::bitset<2528> stl_bitset1;\n  std::bitset<2528> stl_bitset2;\n  std::bitset<2528> stl_bitset3;\n\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n\n  bitset3 = bitset1;\n  bitset3.bitwise_or(bitset2);\n  stl_bitset3 = stl_bitset1 | stl_bitset2;\n\n  for (uint32_t i = 0; i < bitset3.size(); ++i) {\n    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));\n  }\n  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());\n\n  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)\n      ->bitwise_or(*(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));\n}\n\nTEST(FixedBitset, Xor) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  FixedBitset<2528> bitset2;\n  FixedBitset<2528> bitset3;\n  std::bitset<2528> stl_bitset1;\n  std::bitset<2528> stl_bitset2;\n  std::bitset<2528> stl_bitset3;\n\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 623; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n\n  bitset3 = bitset1;\n  bitset3.bitwise_xor(bitset2);\n  stl_bitset3 = stl_bitset1 ^ stl_bitset2;\n\n  for (uint32_t i = 0; i < bitset3.size(); ++i) {\n    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));\n  }\n  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());\n\n  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)\n      ->bitwise_xor(*(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));\n}\n\nTEST(FixedBitset, Not) {\n  FixedBitset<1504> bitset1;\n  EXPECT_FALSE(bitset1.test_all());\n  EXPECT_FALSE(bitset1.test_any());\n  EXPECT_TRUE(bitset1.test_none());\n  EXPECT_EQ(0u, bitset1.cardinality());\n\n  for (uint32_t i = 0; i < bitset1.size(); ++i) {\n    bitset1.set(i);\n  }\n  EXPECT_EQ(bitset1.size(), bitset1.cardinality());\n  EXPECT_TRUE(bitset1.test_all());\n  EXPECT_TRUE(bitset1.test_any());\n  EXPECT_FALSE(bitset1.test_none());\n\n  bitset1.bitwise_not();\n  EXPECT_FALSE(bitset1.test_all());\n  EXPECT_FALSE(bitset1.test_any());\n  EXPECT_TRUE(bitset1.test_none());\n\n  FixedBitset<512> bitset2;\n  EXPECT_FALSE(bitset2.test_all());\n  EXPECT_FALSE(bitset2.test_any());\n  EXPECT_TRUE(bitset2.test_none());\n\n  for (uint32_t i = 0; i < bitset2.size(); ++i) {\n    bitset2.set(i);\n  }\n  EXPECT_TRUE(bitset2.test_all());\n  EXPECT_TRUE(bitset2.test_any());\n  EXPECT_FALSE(bitset2.test_none());\n\n  bitset2.bitwise_not();\n  EXPECT_FALSE(bitset2.test_all());\n  EXPECT_FALSE(bitset2.test_any());\n  EXPECT_TRUE(bitset2.test_none());\n\n  FixedBitset<512 - 32>::Cast((uint32_t *)bitset2.data() + 1)->bitwise_not();\n}\n\nTEST(FixedBitset, TestAll) {\n  FixedBitset<1504> bitset;\n  EXPECT_FALSE(bitset.test_all());\n\n  for (uint32_t i = 0; i < bitset.size(); ++i) {\n    bitset.set(i);\n  }\n  EXPECT_TRUE(bitset.test_all());\n\n  bitset.reset(999u);\n  EXPECT_FALSE(bitset.test_all());\n  EXPECT_FALSE(\n      FixedBitset<1504 - 32>::Cast((uint32_t *)bitset.data() + 1)->test_all());\n}\n\nTEST(FixedBitset, TestAny) {\n  FixedBitset<1504> bitset;\n  EXPECT_FALSE(bitset.test_any());\n\n  for (uint32_t i = 666; i < 888; ++i) {\n    bitset.set(i);\n  }\n  EXPECT_TRUE(bitset.test_any());\n\n  for (uint32_t i = 666; i < 777; ++i) {\n    bitset.reset(i);\n  }\n  EXPECT_TRUE(bitset.test_any());\n  EXPECT_TRUE(\n      FixedBitset<1504 - 32>::Cast((uint32_t *)bitset.data() + 1)->test_any());\n}\n\nTEST(FixedBitset, TestNone) {\n  FixedBitset<1504> bitset;\n  EXPECT_TRUE(bitset.test_none());\n\n  for (uint32_t i = 1000; i < 1111; ++i) {\n    bitset.set(i);\n  }\n  EXPECT_FALSE(bitset.test_none());\n\n  for (uint32_t i = 1000; i < 1110; ++i) {\n    bitset.flip(i);\n  }\n  EXPECT_FALSE(bitset.test_none());\n  EXPECT_FALSE(\n      FixedBitset<1504 - 32>::Cast((uint32_t *)bitset.data() + 1)->test_none());\n}\n\nTEST(FixedBitset, Extract) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  std::vector<size_t> vector1;\n\n  for (uint32_t i = 0; i < 1111; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n\n    bitset1.set(val1);\n    vector1.push_back(val1);\n  }\n\n  std::sort(vector1.begin(), vector1.end());\n  vector1.erase(std::unique(vector1.begin(), vector1.end()), vector1.end());\n\n  std::vector<size_t> vector2;\n  bitset1.extract(&vector2);\n\n  EXPECT_EQ(vector1.size(), vector2.size());\n  EXPECT_TRUE(std::equal(vector1.begin(), vector1.end(), vector2.begin()));\n}\n\nTEST(FixedBitset, BitwiseXorCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  FixedBitset<2528> bitset2;\n  std::bitset<2528> stl_bitset1;\n  std::bitset<2528> stl_bitset2;\n\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ(0u, FixedBitset<2528>::BitwiseXorCardinality(bitset1, bitset1));\n  EXPECT_EQ(0u, FixedBitset<2528>::BitwiseXorCardinality(bitset2, bitset2));\n  EXPECT_EQ((stl_bitset1 ^ stl_bitset2).count(),\n            FixedBitset<2528>::BitwiseXorCardinality(bitset1, bitset2));\n\n  EXPECT_EQ(FixedBitset<2528>::BitwiseAndnotCardinality(bitset1, bitset2) +\n                FixedBitset<2528>::BitwiseAndnotCardinality(bitset2, bitset1),\n            FixedBitset<2528>::BitwiseXorCardinality(bitset1, bitset2));\n}\n\nTEST(FixedBitset, BitwiseOrCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  FixedBitset<2528> bitset2;\n  std::bitset<2528> stl_bitset1;\n  std::bitset<2528> stl_bitset2;\n\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 | stl_bitset2).count(),\n            FixedBitset<2528>::BitwiseOrCardinality(bitset1, bitset2));\n}\n\nTEST(FixedBitset, BitwiseAndCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  FixedBitset<2528> bitset2;\n  std::bitset<2528> stl_bitset1;\n  std::bitset<2528> stl_bitset2;\n\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 & stl_bitset2).count(),\n            FixedBitset<2528>::BitwiseAndCardinality(bitset1, bitset2));\n}\n\nTEST(FixedBitset, BitwiseAndnotCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<2528> bitset1;\n  FixedBitset<2528> bitset2;\n  std::bitset<2528> stl_bitset1;\n  std::bitset<2528> stl_bitset2;\n\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 & ~stl_bitset2).count(),\n            FixedBitset<2528>::BitwiseAndnotCardinality(bitset1, bitset2));\n\n  EXPECT_EQ((stl_bitset2 & ~stl_bitset1).count(),\n            FixedBitset<2528>::BitwiseAndnotCardinality(bitset2, bitset1));\n}\n\nTEST(FixedBitset, Benchmark) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  const uint32_t dimension = 2048u;\n  const uint32_t test_count = 100000u;\n\n  std::vector<FixedBitset<dimension>> bucket1_vec;\n  std::vector<FixedBitset<dimension>> bucket2_vec;\n\n  std::unique_ptr<FixedBitset<dimension>> bucket1(new FixedBitset<dimension>);\n  std::unique_ptr<FixedBitset<dimension>> bucket2(new FixedBitset<dimension>);\n\n  for (uint32_t i = 0; i < 2000; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bucket1->size());\n    uint32_t val2 = (uint32_t)(rand() % bucket2->size());\n\n    bucket1->set(val1);\n    bucket2->set(val2);\n  }\n  for (uint32_t i = 0; i < 1000; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bucket1->size());\n    uint32_t val2 = (uint32_t)(rand() % bucket2->size());\n\n    bucket1->flip(val1);\n    bucket2->flip(val2);\n  }\n  for (uint32_t i = 0; i < 500; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bucket1->size());\n    uint32_t val2 = (uint32_t)(rand() % bucket2->size());\n\n    bucket1->reset(val1);\n    bucket2->reset(val2);\n  }\n\n  bucket1_vec.reserve(test_count);\n  bucket2_vec.reserve(test_count);\n  for (uint32_t j = 0; j < test_count; ++j) {\n    bucket1_vec.push_back(*bucket1);\n    bucket2_vec.push_back(*bucket2);\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < test_count; ++i) {\n      sum += FixedBitset<dimension>::BitwiseAndCardinality(bucket1_vec[i],\n                                                           bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseAndCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < test_count; ++i) {\n      sum += FixedBitset<dimension>::BitwiseAndnotCardinality(bucket1_vec[i],\n                                                              bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseAndnotCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < test_count; ++i) {\n      sum += FixedBitset<dimension>::BitwiseXorCardinality(bucket1_vec[i],\n                                                           bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseXorCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < test_count; ++i) {\n      sum += FixedBitset<dimension>::BitwiseOrCardinality(bucket1_vec[i],\n                                                          bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseOrCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);\n    *bucket3 = bucket1_vec[0];\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < test_count; ++i) {\n      bucket3->bitwise_and(bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET << \" And: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n\n  {\n    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);\n    *bucket3 = bucket1_vec[0];\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < test_count; ++i) {\n      bucket3->bitwise_andnot(bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET << \" Andnot: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n\n  {\n    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);\n    *bucket3 = bucket1_vec[0];\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < test_count; ++i) {\n      bucket3->bitwise_or(bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET << \" Or: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n\n  {\n    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);\n    *bucket3 = bucket1_vec[0];\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < test_count; ++i) {\n      bucket3->bitwise_xor(bucket2_vec[i]);\n    }\n    std::cout << INTRINSICS_SET << \" Xor: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n}\n\nTEST(Bitset, General) {\n  Bitset bitset32(31);\n  Bitset bitset64(61);\n\n  EXPECT_EQ(32u, bitset32.size());\n  EXPECT_EQ(64u, bitset64.size());\n\n  EXPECT_TRUE(bitset32.test_none());\n  EXPECT_TRUE(bitset64.test_none());\n\n  bitset32.set(30);\n  bitset64.set(60);\n\n  Bitset bitset32_2(bitset32);\n  Bitset bitset64_2(bitset64);\n\n  bitset32.set(28);\n  bitset64.set(55);\n\n  EXPECT_TRUE(bitset32_2.test_any());\n  EXPECT_TRUE(bitset64_2.test_any());\n\n  EXPECT_FALSE(bitset32_2.test_all());\n  EXPECT_FALSE(bitset64_2.test_all());\n\n  EXPECT_EQ(1u, bitset32_2.cardinality());\n  EXPECT_EQ(1u, bitset64_2.cardinality());\n\n  bitset32_2 = bitset32;\n  bitset64_2 = bitset64;\n\n  EXPECT_EQ(2u, bitset32_2.cardinality());\n  EXPECT_EQ(2u, bitset64_2.cardinality());\n\n  bitset32.reset(28);\n  bitset64.reset(55);\n\n  bitset32_2 = bitset32;\n  bitset64_2 = bitset64;\n\n  EXPECT_EQ(1u, bitset32_2.cardinality());\n  EXPECT_EQ(1u, bitset64_2.cardinality());\n\n  bitset32.flip(30);\n  bitset64.flip(60);\n\n  EXPECT_EQ(0u, bitset32.cardinality());\n  EXPECT_EQ(0u, bitset64.cardinality());\n}\n\nTEST(Bitset, BitwiseXorCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitset bitset1;\n  Bitset bitset2;\n  bitset1.resize(500000);\n  bitset2.resize(630000);\n  std::bitset<638888> stl_bitset1;\n  std::bitset<638888> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    ;\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 ^ stl_bitset2).count(),\n            Bitset::BitwiseXorCardinality(bitset1, bitset2));\n\n  EXPECT_EQ(Bitset::BitwiseAndnotCardinality(bitset1, bitset2) +\n                Bitset::BitwiseAndnotCardinality(bitset2, bitset1),\n            Bitset::BitwiseXorCardinality(bitset1, bitset2));\n  EXPECT_EQ(Bitset::BitwiseXorCardinality(bitset1, bitset2),\n            Bitset::BitwiseXorCardinality(bitset2, bitset1));\n}\n\nTEST(Bitset, BitwiseOrCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitset bitset1;\n  Bitset bitset2;\n  bitset1.resize(599999);\n  bitset2.resize(500000);\n\n  std::bitset<638888> stl_bitset1;\n  std::bitset<638888> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 | stl_bitset2).count(),\n            Bitset::BitwiseOrCardinality(bitset1, bitset2));\n  EXPECT_EQ(Bitset::BitwiseOrCardinality(bitset1, bitset2),\n            Bitset::BitwiseOrCardinality(bitset2, bitset1));\n}\n\nTEST(Bitset, BitwiseAndCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitset bitset1;\n  Bitset bitset2;\n  bitset1.resize(500001);\n  bitset2.resize(599999);\n\n  std::bitset<638888> stl_bitset1;\n  std::bitset<638888> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 & stl_bitset2).count(),\n            Bitset::BitwiseAndCardinality(bitset1, bitset2));\n  EXPECT_EQ(Bitset::BitwiseAndCardinality(bitset1, bitset2),\n            Bitset::BitwiseAndCardinality(bitset2, bitset1));\n}\n\nTEST(Bitset, BitwiseAndnotCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitset bitset1;\n  Bitset bitset2;\n  bitset1.resize(599997);\n  bitset2.resize(500002);\n\n  std::bitset<638888> stl_bitset1;\n  std::bitset<638888> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 & ~stl_bitset2).count(),\n            Bitset::BitwiseAndnotCardinality(bitset1, bitset2));\n\n  EXPECT_EQ((stl_bitset2 & ~stl_bitset1).count(),\n            Bitset::BitwiseAndnotCardinality(bitset2, bitset1));\n}\n\nTEST(Bitmap, General) {\n  const uint32_t data1[] = {0,     1,      2,      4,      7,    9,\n                            31,    65,     77,     100,    1000, 1999,\n                            19999, 100000, 188888, 2999999};\n  const uint32_t data2[] = {8,     12,    13,    24,     7777,      9999,\n                            66666, 88888, 99999, 100002, 0x7fffffff};\n  Bitmap bitmap1;\n\n  EXPECT_EQ(0u, bitmap1.cardinality());\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    bitmap1.set(data1[i]);\n  }\n\n  // Test `Set`\n  Bitmap bitmap2(bitmap1);\n\n  EXPECT_NE(0u, bitmap2.cardinality());\n  EXPECT_EQ(sizeof(data1) / sizeof(data1[0]), bitmap2.cardinality());\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    EXPECT_TRUE(bitmap2.test(data1[i]));\n  }\n\n  // Test `Reset`\n  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {\n    bitmap1.reset(data2[i]);\n  }\n  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {\n    EXPECT_FALSE(bitmap1.test(data2[i]));\n  }\n\n  EXPECT_EQ(sizeof(data1) / sizeof(data1[0]), bitmap1.cardinality());\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    bitmap1.reset(data1[i]);\n  }\n  EXPECT_EQ(0u, bitmap1.cardinality());\n\n  // Test `Flip`\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    bitmap1.flip(data1[i]);\n  }\n  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {\n    bitmap1.flip(data2[i]);\n  }\n  EXPECT_EQ(sizeof(data1) / sizeof(data1[0]) + sizeof(data2) / sizeof(data2[0]),\n            bitmap1.cardinality());\n\n  bitmap2 = bitmap1;\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    EXPECT_TRUE(bitmap2.test(data1[i]));\n  }\n  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {\n    EXPECT_TRUE(bitmap2.test(data2[i]));\n  }\n\n  // Test `ShrinkToFit`\n  bitmap1.shrink_to_fit();\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    EXPECT_TRUE(bitmap1.test(data1[i]));\n  }\n  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {\n    EXPECT_TRUE(bitmap1.test(data2[i]));\n  }\n\n  // Test `Clear`\n  EXPECT_NE(0u, bitmap1.cardinality());\n  bitmap2 = bitmap1;\n  bitmap1.clear();\n  EXPECT_EQ(0u, bitmap1.cardinality());\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    EXPECT_FALSE(bitmap1.test(data1[i]));\n  }\n  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {\n    EXPECT_FALSE(bitmap1.test(data2[i]));\n  }\n  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {\n    EXPECT_TRUE(bitmap2.test(data1[i]));\n  }\n  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {\n    EXPECT_TRUE(bitmap2.test(data2[i]));\n  }\n}\n\nTEST(Bitmap, ShrinkToFit) {\n  Bitmap bitmap1;\n  bitmap1.shrink_to_fit();\n\n  EXPECT_EQ(0u, bitmap1.bucket_size());\n  bitmap1.set(2);\n  EXPECT_EQ(1u, bitmap1.bucket_size());\n  bitmap1.reset(2);\n  EXPECT_EQ(1u, bitmap1.bucket_size());\n  bitmap1.shrink_to_fit();\n  EXPECT_EQ(0u, bitmap1.bucket_size());\n\n  bitmap1.set(100);\n  bitmap1.set(100000);\n  bitmap1.set(1000000);\n  EXPECT_EQ((1000000u + 0xffff) / 0x10000, bitmap1.bucket_size());\n\n  bitmap1.reset(100);\n  bitmap1.reset(1000000);\n  EXPECT_EQ((1000000u + 0xffff) / 0x10000, bitmap1.bucket_size());\n  bitmap1.shrink_to_fit();\n  EXPECT_EQ((100000u + 0xffff) / 0x10000, bitmap1.bucket_size());\n}\n\nTEST(Bitmap, And) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1, bitmap2;\n  std::set<size_t> set1, set2, set3;\n  std::vector<size_t> vec1, vec3;\n\n  for (uint32_t i = 0; i < 25000; ++i) {\n    uint32_t val1 = rand() % 1000000;\n    bitmap1.set(val1);\n    set1.insert(val1);\n  }\n\n  for (uint32_t i = 0; i < 45000; ++i) {\n    uint32_t val2 = rand() % 1000000;\n    bitmap2.set(val2);\n    set2.insert(val2);\n  }\n  std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(),\n                        std::inserter(set3, set3.begin()));\n  bitmap1.bitwise_and(bitmap2);\n  bitmap1.extract(&vec1);\n  ASSERT_EQ(bitmap1.cardinality(), vec1.size());\n  ASSERT_EQ(set3.size(), vec1.size());\n\n  vec3.reserve(set3.size());\n  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));\n  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n}\n\nTEST(Bitmap, Andnot) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1, bitmap2;\n  std::set<size_t> set1, set2, set3;\n  std::vector<size_t> vec1, vec3;\n\n  for (uint32_t i = 0; i < 20000; ++i) {\n    uint32_t val1 = rand() % 1000000;\n    bitmap1.set(val1);\n    set1.insert(val1);\n  }\n\n  for (uint32_t i = 0; i < 20000; ++i) {\n    uint32_t val2 = rand() % 1000000;\n    bitmap2.set(val2);\n    set2.insert(val2);\n  }\n  std::set_difference(set1.begin(), set1.end(), set2.begin(), set2.end(),\n                      std::inserter(set3, set3.begin()));\n  bitmap1.bitwise_andnot(bitmap2);\n  bitmap1.extract(&vec1);\n  ASSERT_EQ(bitmap1.cardinality(), vec1.size());\n  ASSERT_EQ(set3.size(), vec1.size());\n\n  vec3.reserve(set3.size());\n  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));\n  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n}\n\nTEST(Bitmap, Or) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1, bitmap2;\n  std::set<size_t> set1, set2, set3;\n  std::vector<size_t> vec1, vec3;\n\n  for (uint32_t i = 0; i < 3000; ++i) {\n    uint32_t val1 = rand() % 2000000;\n    bitmap1.set(val1);\n    set1.insert(val1);\n  }\n\n  for (uint32_t i = 0; i < 2000; ++i) {\n    uint32_t val2 = rand() % 2000000;\n    bitmap2.set(val2);\n    set2.insert(val2);\n  }\n  std::set_union(set1.begin(), set1.end(), set2.begin(), set2.end(),\n                 std::inserter(set3, set3.begin()));\n  bitmap1.bitwise_or(bitmap2);\n  bitmap1.extract(&vec1);\n  ASSERT_EQ(bitmap1.cardinality(), vec1.size());\n  ASSERT_EQ(set3.size(), vec1.size());\n\n  vec3.reserve(set3.size());\n  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));\n  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n}\n\nTEST(Bitmap, Xor) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1, bitmap2;\n  std::set<size_t> set1, set2, set3;\n  std::vector<size_t> vec1, vec3;\n\n  for (uint32_t i = 0; i < 3000; ++i) {\n    uint32_t val1 = rand() % 2000000;\n    bitmap1.set(val1);\n    set1.insert(val1);\n  }\n\n  for (uint32_t i = 0; i < 2000; ++i) {\n    uint32_t val2 = rand() % 2000000;\n    bitmap2.set(val2);\n    set2.insert(val2);\n  }\n  std::set_symmetric_difference(set1.begin(), set1.end(), set2.begin(),\n                                set2.end(), std::inserter(set3, set3.begin()));\n  bitmap1.bitwise_xor(bitmap2);\n  bitmap1.extract(&vec1);\n  ASSERT_EQ(bitmap1.cardinality(), vec1.size());\n  ASSERT_EQ(set3.size(), vec1.size());\n\n  vec3.reserve(set3.size());\n  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));\n  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n}\n\nTEST(Bitmap, Not) {\n  Bitmap bitmap1, bitmap2, bitmap3;\n  std::set<size_t> set1, set2, set3;\n  std::vector<size_t> vec1;\n\n  for (uint32_t i = 0; i < 20000; ++i) {\n    uint32_t val1 = rand() % 1000000;\n    bitmap1.set(val1);\n    set1.insert(val1);\n  }\n\n  for (uint32_t i = 0; i < 20000; ++i) {\n    uint32_t val2 = rand() % 1000000;\n    bitmap2.set(val2);\n    set2.insert(val2);\n  }\n\n  bitmap2.bitwise_not();\n  bitmap2.bitwise_not();\n\n  {\n    set3.clear();\n    vec1.clear();\n    std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(),\n                          std::inserter(set3, set3.begin()));\n\n    bitmap3 = bitmap1;\n    bitmap3.bitwise_and(bitmap2);\n    bitmap3.extract(&vec1);\n    ASSERT_EQ(bitmap3.cardinality(), vec1.size());\n    ASSERT_EQ(set3.size(), vec1.size());\n\n    std::vector<size_t> vec3(set3.begin(), set3.end());\n    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n  }\n\n  {\n    set3.clear();\n    vec1.clear();\n    std::set_difference(set1.begin(), set1.end(), set2.begin(), set2.end(),\n                        std::inserter(set3, set3.begin()));\n\n    bitmap3 = bitmap1;\n    bitmap3.bitwise_andnot(bitmap2);\n    bitmap3.extract(&vec1);\n    ASSERT_EQ(bitmap3.cardinality(), vec1.size());\n    ASSERT_EQ(set3.size(), vec1.size());\n\n    std::vector<size_t> vec3(set3.begin(), set3.end());\n    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n  }\n\n  {\n    set3.clear();\n    vec1.clear();\n    std::set_union(set1.begin(), set1.end(), set2.begin(), set2.end(),\n                   std::inserter(set3, set3.begin()));\n\n    bitmap3 = bitmap1;\n    bitmap3.bitwise_or(bitmap2);\n    bitmap3.extract(&vec1);\n    ASSERT_EQ(bitmap3.cardinality(), vec1.size());\n    ASSERT_EQ(set3.size(), vec1.size());\n\n    std::vector<size_t> vec3(set3.begin(), set3.end());\n    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n  }\n\n  {\n    set3.clear();\n    vec1.clear();\n    std::set_symmetric_difference(set1.begin(), set1.end(), set2.begin(),\n                                  set2.end(),\n                                  std::inserter(set3, set3.begin()));\n\n    bitmap3 = bitmap1;\n    bitmap3.bitwise_xor(bitmap2);\n    bitmap3.extract(&vec1);\n    ASSERT_EQ(bitmap3.cardinality(), vec1.size());\n    ASSERT_EQ(set3.size(), vec1.size());\n\n    std::vector<size_t> vec3(set3.begin(), set3.end());\n    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));\n  }\n}\n\nTEST(Bitmap, TestAll) {\n  Bitmap bitmap;\n  EXPECT_FALSE(bitmap.test_all());\n\n  for (uint32_t i = 0; i < Bitmap::Bucket::MAX_SIZE * 2; ++i) {\n    bitmap.set(i);\n  }\n  EXPECT_TRUE(bitmap.test_all());\n\n  bitmap.reset(Bitmap::Bucket::MAX_SIZE + 2);\n  EXPECT_FALSE(bitmap.test_all());\n}\n\nTEST(Bitmap, TestAny) {\n  Bitmap bitmap;\n  EXPECT_FALSE(bitmap.test_any());\n\n  for (uint32_t i = 69000; i < 70000; ++i) {\n    bitmap.set(i);\n  }\n  EXPECT_TRUE(bitmap.test_any());\n\n  for (uint32_t i = 69888; i < 70111; ++i) {\n    bitmap.reset(i);\n  }\n  EXPECT_TRUE(bitmap.test_any());\n}\n\nTEST(Bitmap, TestNone) {\n  Bitmap bitmap;\n  EXPECT_TRUE(bitmap.test_none());\n\n  for (uint32_t i = 65000; i < 70000; ++i) {\n    bitmap.set(i);\n  }\n  EXPECT_FALSE(bitmap.test_none());\n\n  for (uint32_t i = 65555; i < 70022; ++i) {\n    bitmap.flip(i);\n  }\n  EXPECT_FALSE(bitmap.test_none());\n}\n\nTEST(Bitmap, Extract) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1;\n  std::vector<size_t> vector1;\n\n  for (uint32_t i = 0; i < 1111; ++i) {\n    uint32_t val1 = rand();\n\n    bitmap1.set(val1);\n    vector1.push_back(val1);\n  }\n\n  std::sort(vector1.begin(), vector1.end());\n  vector1.erase(std::unique(vector1.begin(), vector1.end()), vector1.end());\n\n  std::vector<size_t> vector2;\n  bitmap1.extract(&vector2);\n\n  EXPECT_EQ(vector1.size(), vector2.size());\n  EXPECT_TRUE(std::equal(vector1.begin(), vector1.end(), vector2.begin()));\n}\n\nTEST(Bitmap, BitwiseXorCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1;\n  Bitmap bitmap2;\n  std::bitset<500000> stl_bitset1;\n  std::bitset<500000> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitmap2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitmap2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 ^ stl_bitset2).count(),\n            Bitmap::BitwiseXorCardinality(bitmap1, bitmap2));\n\n  EXPECT_EQ(Bitmap::BitwiseAndnotCardinality(bitmap1, bitmap2) +\n                Bitmap::BitwiseAndnotCardinality(bitmap2, bitmap1),\n            Bitmap::BitwiseXorCardinality(bitmap1, bitmap2));\n  EXPECT_EQ(Bitmap::BitwiseXorCardinality(bitmap2, bitmap1),\n            Bitmap::BitwiseXorCardinality(bitmap1, bitmap2));\n}\n\nTEST(Bitmap, BitwiseOrCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1;\n  Bitmap bitmap2;\n  std::bitset<500000> stl_bitset1;\n  std::bitset<500000> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitmap2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitmap2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 | stl_bitset2).count(),\n            Bitmap::BitwiseOrCardinality(bitmap1, bitmap2));\n  EXPECT_EQ(Bitmap::BitwiseOrCardinality(bitmap2, bitmap1),\n            Bitmap::BitwiseOrCardinality(bitmap1, bitmap2));\n}\n\nTEST(Bitmap, BitwiseAndCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1;\n  Bitmap bitmap2;\n  std::bitset<500000> stl_bitset1;\n  std::bitset<500000> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitmap2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitmap2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 & stl_bitset2).count(),\n            Bitmap::BitwiseAndCardinality(bitmap1, bitmap2));\n  EXPECT_EQ(Bitmap::BitwiseAndCardinality(bitmap2, bitmap1),\n            Bitmap::BitwiseAndCardinality(bitmap1, bitmap2));\n}\n\nTEST(Bitmap, BitwiseAndnotCardinality) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1;\n  Bitmap bitmap2;\n  std::bitset<500000> stl_bitset1;\n  std::bitset<500000> stl_bitset2;\n\n  for (uint32_t i = 0; i < 800; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitmap2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 600; ++i) {\n    uint32_t val1 = rand() % 500000;\n    uint32_t val2 = rand() % 500000;\n\n    bitmap1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitmap2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n  EXPECT_EQ((stl_bitset1 & ~stl_bitset2).count(),\n            Bitmap::BitwiseAndnotCardinality(bitmap1, bitmap2));\n\n  EXPECT_EQ((stl_bitset2 & ~stl_bitset1).count(),\n            Bitmap::BitwiseAndnotCardinality(bitmap2, bitmap1));\n}\n\nTEST(Bitmap, Benchmark) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  Bitmap bitmap1, bitmap2;\n\n  for (uint32_t i = 0; i < 2000; ++i) {\n    uint32_t val1 = rand() % 200000000u;\n    uint32_t val2 = rand() % 200000000u;\n\n    bitmap1.set(val1);\n    bitmap2.set(val2);\n  }\n  for (uint32_t i = 0; i < 1000; ++i) {\n    uint32_t val1 = rand() % 200000000u;\n    uint32_t val2 = rand() % 200000000u;\n\n    bitmap1.flip(val1);\n    bitmap2.flip(val2);\n  }\n  for (uint32_t i = 0; i < 500; ++i) {\n    uint32_t val1 = rand() % 200000000u;\n    uint32_t val2 = rand() % 200000000u;\n\n    bitmap1.reset(val1);\n    bitmap2.reset(val2);\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < 3; ++i) {\n      sum += Bitmap::BitwiseAndCardinality(bitmap1, bitmap2);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseAndCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < 3; ++i) {\n      sum += Bitmap::BitwiseAndnotCardinality(bitmap1, bitmap2);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseAndnotCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < 3; ++i) {\n      sum += Bitmap::BitwiseXorCardinality(bitmap1, bitmap2);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseXorCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    uint64_t t1 = Monotime::MicroSeconds();\n    uint64_t sum = 0;\n    for (uint32_t i = 0; i < 3; ++i) {\n      sum += Bitmap::BitwiseOrCardinality(bitmap1, bitmap2);\n    }\n    std::cout << INTRINSICS_SET\n              << \" BitwiseOrCardinality: \" << Monotime::MicroSeconds() - t1\n              << \" us, sum: \" << sum << std::endl;\n  }\n\n  {\n    Bitmap bitmap3;\n    bitmap3 = bitmap1;\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < 3; ++i) {\n      bitmap1.bitwise_and(bitmap2);\n    }\n    std::cout << INTRINSICS_SET << \" And: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n\n  {\n    Bitmap bitmap3;\n    bitmap3 = bitmap1;\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < 3; ++i) {\n      bitmap1.bitwise_andnot(bitmap2);\n    }\n    std::cout << INTRINSICS_SET << \" Andnot: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n\n  {\n    Bitmap bitmap3;\n    bitmap3 = bitmap1;\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < 3; ++i) {\n      bitmap1.bitwise_or(bitmap2);\n    }\n    std::cout << INTRINSICS_SET << \" Or: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n\n  {\n    Bitmap bitmap3;\n    bitmap3 = bitmap1;\n\n    uint64_t t1 = Monotime::MicroSeconds();\n    for (uint32_t i = 0; i < 3; ++i) {\n      bitmap1.bitwise_xor(bitmap2);\n    }\n    std::cout << INTRINSICS_SET << \" Xor: \" << Monotime::MicroSeconds() - t1\n              << \" us\" << std::endl;\n  }\n}\n"
  },
  {
    "path": "tests/ailego/container/blob_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <type_traits>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/blob.h>\n\nusing namespace zvec;\n\nTEST(BlobWrap, Constructor) {\n  ailego::BlobWrap blob1;\n  EXPECT_EQ(0u, blob1.size());\n  EXPECT_FALSE(blob1.buffer());\n  EXPECT_FALSE(blob1.is_valid());\n\n  std::string buf2;\n  ailego::BlobWrap blob2(buf2);\n  EXPECT_EQ(0u, blob2.size());\n  EXPECT_TRUE(blob2.buffer());\n  EXPECT_FALSE(blob2.is_valid());\n\n  buf2.append(\"good...\");\n  EXPECT_EQ(0u, blob2.size());\n  EXPECT_TRUE(blob2.buffer());\n\n  ailego::BlobWrap blob3(blob2);\n  EXPECT_EQ(0u, blob3.size());\n  EXPECT_TRUE(blob3.buffer());\n\n  std::string buf4(\"........\");\n  ailego::BlobWrap blob4(buf4);\n  EXPECT_NE(0u, blob4.size());\n  EXPECT_TRUE(blob4.buffer());\n  EXPECT_TRUE(blob4.is_valid());\n\n  ailego::BlobWrap blob5(std::move(blob4));\n  EXPECT_EQ(0u, blob4.size());\n  EXPECT_FALSE(blob4.buffer());\n  EXPECT_NE(0u, blob5.size());\n  EXPECT_TRUE(blob5.buffer());\n\n  blob4 = blob5;\n  EXPECT_NE(0u, blob4.size());\n  EXPECT_TRUE(blob4.buffer());\n  EXPECT_NE(0u, blob5.size());\n  EXPECT_TRUE(blob5.buffer());\n\n  blob1 = std::move(blob5);\n  EXPECT_NE(0u, blob1.size());\n  EXPECT_TRUE(blob1.buffer());\n  EXPECT_EQ(0u, blob5.size());\n  EXPECT_FALSE(blob5.buffer());\n}\n\nTEST(BlobWrap, General) {\n  ailego::BlobWrap blob1;\n  std::string buf1(\"11111111111\");\n\n  blob1.mount(buf1);\n  EXPECT_TRUE(blob1.buffer());\n\n  blob1.umount();\n  EXPECT_FALSE(blob1.buffer());\n\n  std::string buf2(\"22222222222222222\");\n  const ailego::BlobWrap blob2(buf2);\n  EXPECT_TRUE(\n      std::is_const<\n          typename std::remove_pointer<decltype(blob2.buffer())>::type>::value);\n\n  ailego::BlobWrap blob3;\n  std::string buf3(\"3333\");\n  blob3.mount(const_cast<char *>(buf3.data()), buf3.size());\n  blob3.copy(blob2);\n  EXPECT_FALSE(\n      std::is_const<\n          typename std::remove_pointer<decltype(blob3.buffer())>::type>::value);\n\n  std::string buf4(\"444444444444444444444\");\n  ailego::BlobWrap blob4;\n  blob4.mount(buf4);\n  blob4.copy(buf1.data(), buf1.size());\n\n  std::string buf5(\"55555\");\n  ailego::BlobWrap blob5(buf5);\n  blob5.zero();\n  blob4.copy(buf4);\n}\n"
  },
  {
    "path": "tests/ailego/container/bloom_filter_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <ailego/container/bloom_filter.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec::ailego;\n\nTEST(BloomFilterCalculator, General) {\n  EXPECT_EQ(13487125u,\n            BloomFilterCalculator::NumberOfItems(536454615, 5, 0.000023));\n  EXPECT_EQ(295835133u,\n            BloomFilterCalculator::NumberOfBytes(123456789, 0.0001));\n  EXPECT_EQ(11924878998u,\n            BloomFilterCalculator::NumberOfBits(536454615, 0.000023));\n  EXPECT_FLOAT_EQ(0.00032803119f, (float)BloomFilterCalculator::Probability(\n                                      400204, 7005007, 8));\n  EXPECT_FLOAT_EQ(0.747645072f,\n                  (float)BloomFilterCalculator::Probability(10000, 10000, 2));\n  EXPECT_EQ(12u, BloomFilterCalculator::NumberOfHash(400204, 7005007));\n  EXPECT_EQ(24120650u,\n            BloomFilterCalculator::NumberOfBits(1000000, 5, 0.00023));\n\n  double p = 0.000023;\n  size_t n = 536454615;\n  size_t m = BloomFilterCalculator::NumberOfBits(n, p);\n  size_t k = BloomFilterCalculator::NumberOfHash(n, m);\n  double p2 = BloomFilterCalculator::Probability(n, m, k);\n  std::cout << \"Probability: \" << p << std::endl;\n  std::cout << \"Probability2: \" << p2 << std::endl;\n}\n\nTEST(BloomFilter, General) {\n  BloomFilter<5> filter(10000, 0.00023);\n  EXPECT_TRUE(filter.insert(19009, 134, 1234, 54511, 43423));\n  EXPECT_EQ(1u, filter.count());\n  EXPECT_TRUE(filter.has(19009, 134, 1234, 54511, 43423));\n  EXPECT_FALSE(filter.has(19009, 135, 1234, 54511, 43423));\n\n  filter.force_insert(19009, 135, 1234, 54511, 43423);\n  EXPECT_TRUE(filter.has(19009, 135, 1234, 54511, 43423));\n\n  filter.clear();\n  EXPECT_EQ(0u, filter.count());\n  EXPECT_FALSE(filter.has(19009, 134, 1234, 54511, 43423));\n\n  BloomFilter<0> filter0;\n  (void)filter0;\n\n  BloomFilter<6> filter6;\n  EXPECT_FALSE(filter6.reset(0, 23.1));\n  EXPECT_TRUE(filter6.reset(100000, 0.00023));\n  std::cout << \"bits_count: \" << filter6.bits_count() << std::endl;\n  std::cout << \"capacity: \" << filter6.capacity() << std::endl;\n  std::cout << \"count: \" << filter6.count() << std::endl;\n  std::cout << \"probability: \" << filter6.probability() << std::endl;\n}\n"
  },
  {
    "path": "tests/ailego/container/cube_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <string>\n#include <vector>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/cube.h>\n\nusing namespace zvec::ailego;\n\nTEST(Cube, TypeInfo) {\n  std::cout << \"* bool:                \" << typeid(bool).name() << std::endl;\n\n  std::cout << \"* int8_t:              \" << typeid(int8_t).name() << std::endl;\n  std::cout << \"* char:                \" << typeid(char).name() << std::endl;\n  std::cout << \"* signed char:         \" << typeid(signed char).name()\n            << std::endl;\n  std::cout << \"* uint8_t:             \" << typeid(uint8_t).name() << std::endl;\n  std::cout << \"* unsigned char:       \" << typeid(unsigned char).name()\n            << std::endl;\n\n  std::cout << \"* int16_t:             \" << typeid(int16_t).name() << std::endl;\n  std::cout << \"* short:               \" << typeid(short).name() << std::endl;\n  std::cout << \"* signed short:        \" << typeid(signed short).name()\n            << std::endl;\n  std::cout << \"* uint16_t:            \" << typeid(uint16_t).name()\n            << std::endl;\n  std::cout << \"* unsigned short:      \" << typeid(unsigned short).name()\n            << std::endl;\n\n  std::cout << \"* int32_t:             \" << typeid(int32_t).name() << std::endl;\n  std::cout << \"* int:                 \" << typeid(int).name() << std::endl;\n  std::cout << \"* signed int:          \" << typeid(signed int).name()\n            << std::endl;\n  std::cout << \"* uint32_t:            \" << typeid(uint32_t).name()\n            << std::endl;\n  std::cout << \"* unsigned int:        \" << typeid(unsigned int).name()\n            << std::endl;\n\n  std::cout << \"* int64_t:             \" << typeid(int64_t).name() << std::endl;\n  std::cout << \"* long:                \" << typeid(long).name() << std::endl;\n  std::cout << \"* signed long:         \" << typeid(signed long).name()\n            << std::endl;\n  std::cout << \"* uint64_t:            \" << typeid(uint64_t).name()\n            << std::endl;\n  std::cout << \"* unsigned long:       \" << typeid(unsigned long).name()\n            << std::endl;\n\n  std::cout << \"* long long:           \" << typeid(long).name() << std::endl;\n  std::cout << \"* signed long long:    \" << typeid(signed long).name()\n            << std::endl;\n  std::cout << \"* unsigned long long:  \" << typeid(unsigned long).name()\n            << std::endl;\n}\n\nTEST(Cube, General) {\n  Cube cube1 = 11111;\n  EXPECT_EQ(11111, cube1.unsafe_cast<int>());\n  EXPECT_EQ(11111, cube1.cast<int>());\n  int int1 = cube1;\n  EXPECT_EQ(11111, int1);\n  EXPECT_TRUE(!cube1.empty());\n  EXPECT_EQ(sizeof(11111), cube1.size());\n\n  Cube cube2 = 22222;\n  EXPECT_EQ(22222, cube2.unsafe_cast<int>());\n  EXPECT_EQ(22222, cube2.cast<int>());\n  int int2 = (const int &)cube2;\n  EXPECT_EQ(22222, int2);\n  EXPECT_TRUE(!cube2.empty());\n  EXPECT_EQ(sizeof(22222), cube2.size());\n\n  Cube cube3 = std::vector<int>();\n  cube3.unsafe_cast<std::vector<int>>().push_back(1);\n  cube3.unsafe_cast<std::vector<int>>().push_back(2);\n  cube3.unsafe_cast<std::vector<int>>().push_back(3);\n  EXPECT_EQ(3u, cube3.unsafe_cast<std::vector<int>>().size());\n  EXPECT_EQ(3u, cube3.cast<std::vector<int>>().size());\n  std::vector<int> &vec3 = cube3;\n  EXPECT_EQ(3u, vec3.size());\n  EXPECT_TRUE(!cube3.empty());\n  EXPECT_EQ(sizeof(std::vector<int>), cube3.size());\n\n  std::vector<long> vec4;\n  vec4.push_back(1);\n  vec4.push_back(2);\n  vec4.push_back(3);\n  vec4.push_back(4);\n  Cube cube4 = vec4;\n  EXPECT_EQ(4u, cube4.unsafe_cast<std::vector<long>>().size());\n  EXPECT_EQ(4u, cube4.cast<std::vector<long>>().size());\n  const std::vector<long> &vec44 = cube4;\n  EXPECT_EQ(4u, vec44.size());\n  EXPECT_TRUE(!cube4.empty());\n  EXPECT_EQ(sizeof(std::vector<long>), cube4.size());\n\n  Cube cube5, cube6;\n  EXPECT_TRUE(cube5.empty());\n  EXPECT_TRUE(cube6.empty());\n  EXPECT_EQ(cube5.type(), cube6.type());\n  EXPECT_EQ(0u, cube5.size());\n  EXPECT_EQ(0u, cube6.size());\n\n  EXPECT_EQ(cube1.type(), cube2.type());\n  EXPECT_NE(cube3.type(), cube4.type());\n  EXPECT_NE(cube1.type(), cube3.type());\n  EXPECT_NE(cube2.type(), cube4.type());\n  EXPECT_NE(cube1.type(), cube5.type());\n  EXPECT_NE(cube2.type(), cube5.type());\n  EXPECT_NE(cube3.type(), cube5.type());\n  EXPECT_NE(cube4.type(), cube5.type());\n  EXPECT_TRUE(cube1.compatible(cube2));\n  EXPECT_TRUE(cube5.compatible(cube6));\n  EXPECT_FALSE(cube1.compatible(cube3));\n  EXPECT_FALSE(cube3.compatible(cube5));\n\n  cube1.reset();\n  cube3.reset();\n  cube5.reset();\n  cube6.reset();\n  EXPECT_TRUE(cube1.empty());\n  EXPECT_TRUE(cube3.empty());\n  EXPECT_TRUE(cube5.empty());\n  EXPECT_TRUE(cube6.empty());\n}\n\nTEST(Cube, LargeObject) {\n  std::string str1(\"1111111\");\n  std::string str2(\"2222222\");\n  std::string str3(\"3333333\");\n  std::string str4(\"4444444\");\n  std::string str5(\"5555555\");\n  std::string str6(\"6666666\");\n  std::string str7(\"7777777\");\n\n  Cube cube1(str1);\n  Cube cube2;\n  cube2 = str2;\n  Cube cube3 = str3;\n\n  EXPECT_EQ(str1, cube1.cast<std::string>());\n  EXPECT_EQ(str2, cube2.cast<std::string>());\n  EXPECT_TRUE(cube1.compatible(cube2));\n\n  cube1 = std::move(cube2);\n  EXPECT_EQ(str2, cube1.cast<std::string>());\n  EXPECT_TRUE(cube2.empty());\n  EXPECT_FALSE(cube1.compatible(cube2));\n\n  EXPECT_EQ(str3, cube3.cast<std::string>());\n  cube3 = cube1;\n  EXPECT_EQ(str2, cube3.cast<std::string>());\n  EXPECT_EQ(str2, cube1.cast<std::string>());\n\n  // Test Constructor Cube(T &&rhs) / Cube(const T &rhs)\n  Cube cube41(std::string(\"444444\"));\n  Cube cube42(str4);\n  EXPECT_NE(std::string(\"\"), str4);\n  Cube cube43(std::move(str4));\n  EXPECT_EQ(std::string(\"\"), str4);\n\n  const std::string str41 = str4;\n  Cube cube44(str41);\n  EXPECT_EQ(str41, str4);\n  EXPECT_EQ(str4, cube44.cast<std::string>());\n\n  // Test Assignment operator=(T &&rhs) / operator=(const T &rhs)\n  Cube cube51, cube52, cube53, cube54;\n  cube51 = std::string(\"55555\");\n  cube52 = str5;\n  EXPECT_NE(std::string(\"\"), str5);\n  cube53 = std::move(str5);\n  EXPECT_EQ(std::string(\"\"), str5);\n\n  const std::string str51 = str5;\n  cube54 = str51;\n  EXPECT_EQ(str51, str5);\n  EXPECT_EQ(str5, cube54.cast<std::string>());\n\n  // Test Constructor Cube(Cube &&rhs) / Cube(const Cube &rhs)\n  Cube cube6(str6);\n  Cube cube61(cube6);\n  EXPECT_EQ(str6, cube61.cast<std::string>());\n  EXPECT_FALSE(cube6.empty());\n  Cube cube62(std::move(cube6));\n  EXPECT_EQ(str6, cube62.cast<std::string>());\n  EXPECT_TRUE(cube6.empty());\n\n  const Cube cube63 = cube62;\n  Cube cube64(cube63);\n  EXPECT_EQ(str6, cube64.cast<std::string>());\n  EXPECT_FALSE(cube63.empty());\n\n  // Test Assignment operator=(Cube &&rhs) / operator=(const Cube &rhs)\n  Cube cube7(str7), cube71, cube72;\n  cube71 = cube7;\n  EXPECT_EQ(str7, cube71.cast<std::string>());\n  EXPECT_FALSE(cube7.empty());\n  cube72 = std::move(cube7);\n  EXPECT_EQ(str7, cube72.cast<std::string>());\n  EXPECT_TRUE(cube7.empty());\n\n  const Cube cube73(cube72);\n  Cube cube74;\n  cube74 = cube73;\n  EXPECT_EQ(str7, cube74.cast<std::string>());\n  EXPECT_EQ(str7, cube73.cast<std::string>());\n  EXPECT_FALSE(cube74.empty());\n}\n\nstruct SmallObject {\n  SmallObject() {\n    ++assign_count;\n  }\n\n  SmallObject(const SmallObject &) {\n    ++clone_count;\n  }\n\n  SmallObject(SmallObject &&) {\n    ++move_count;\n  }\n\n  ~SmallObject() {\n    ++cleanup_count;\n  }\n\n  int val{0};\n  static int assign_count;\n  static int clone_count;\n  static int move_count;\n  static int cleanup_count;\n};\n\nint SmallObject::assign_count = 0;\nint SmallObject::clone_count = 0;\nint SmallObject::move_count = 0;\nint SmallObject::cleanup_count = 0;\n\nTEST(Cube, CubePolicy) {\n  EXPECT_EQ(0, SmallObject::assign_count);\n  EXPECT_EQ(0, SmallObject::clone_count);\n  EXPECT_EQ(0, SmallObject::move_count);\n  EXPECT_EQ(0, SmallObject::cleanup_count);\n\n  SmallObject obj1, obj2, obj3, obj4, obj5;\n  EXPECT_EQ(5, SmallObject::assign_count);\n  EXPECT_EQ(0, SmallObject::clone_count);\n  EXPECT_EQ(0, SmallObject::move_count);\n  EXPECT_EQ(0, SmallObject::cleanup_count);\n\n  Cube cube1(obj1);\n  EXPECT_EQ(5, SmallObject::assign_count);\n  EXPECT_EQ(1, SmallObject::clone_count);\n  EXPECT_EQ(0, SmallObject::move_count);\n  EXPECT_EQ(0, SmallObject::cleanup_count);\n\n  Cube cube2(std::move(obj2));\n  EXPECT_EQ(5, SmallObject::assign_count);\n  EXPECT_EQ(1, SmallObject::clone_count);\n  EXPECT_EQ(1, SmallObject::move_count);\n  EXPECT_EQ(0, SmallObject::cleanup_count);\n\n  {\n    Cube cube3(std::move(obj3));\n    EXPECT_EQ(5, SmallObject::assign_count);\n    EXPECT_EQ(1, SmallObject::clone_count);\n    EXPECT_EQ(2, SmallObject::move_count);\n    EXPECT_EQ(0, SmallObject::cleanup_count);\n  }\n\n  EXPECT_EQ(5, SmallObject::assign_count);\n  EXPECT_EQ(1, SmallObject::clone_count);\n  EXPECT_EQ(2, SmallObject::move_count);\n  EXPECT_EQ(1, SmallObject::cleanup_count);\n\n  {\n    Cube cube4(obj4);\n    EXPECT_EQ(5, SmallObject::assign_count);\n    EXPECT_EQ(2, SmallObject::clone_count);\n    EXPECT_EQ(2, SmallObject::move_count);\n    EXPECT_EQ(1, SmallObject::cleanup_count);\n  }\n\n  EXPECT_EQ(5, SmallObject::assign_count);\n  EXPECT_EQ(2, SmallObject::clone_count);\n  EXPECT_EQ(2, SmallObject::move_count);\n  EXPECT_EQ(2, SmallObject::cleanup_count);\n\n  {\n    Cube cube5(obj5);\n    EXPECT_EQ(5, SmallObject::assign_count);\n    EXPECT_EQ(3, SmallObject::clone_count);\n    EXPECT_EQ(2, SmallObject::move_count);\n    EXPECT_EQ(2, SmallObject::cleanup_count);\n  }\n\n  EXPECT_EQ(5, SmallObject::assign_count);\n  EXPECT_EQ(3, SmallObject::clone_count);\n  EXPECT_EQ(2, SmallObject::move_count);\n  EXPECT_EQ(3, SmallObject::cleanup_count);\n}\n\nTEST(Cube, SmallObject) {\n  uint64_t uint1 = 1111111;\n  uint64_t uint2 = 2222222;\n  uint64_t uint3 = 3333333;\n  uint64_t uint4 = 4444444;\n  uint64_t uint5 = 5555555;\n  uint64_t uint6 = 6666666;\n  uint64_t uint7 = 7777777;\n\n  Cube cube1(uint1);\n  Cube cube2;\n  cube2 = uint2;\n  Cube cube3 = uint3;\n\n  EXPECT_EQ(uint1, cube1.cast<uint64_t>());\n  EXPECT_EQ(uint2, cube2.cast<uint64_t>());\n  EXPECT_TRUE(cube1.compatible(cube2));\n\n  cube1 = std::move(cube2);\n  EXPECT_EQ(uint2, cube1.cast<uint64_t>());\n  EXPECT_TRUE(cube2.empty());\n  EXPECT_FALSE(cube1.compatible(cube2));\n\n  EXPECT_EQ(uint3, cube3.cast<uint64_t>());\n  cube3 = cube1;\n  EXPECT_EQ(uint2, cube3.cast<uint64_t>());\n  EXPECT_EQ(uint2, cube1.cast<uint64_t>());\n\n  // Test Conuintuctor Cube(T &&rhs) / Cube(const T &rhs)\n  Cube cube41(uint64_t(444444));\n  Cube cube42(uint4);\n  EXPECT_NE(uint64_t(0), uint4);\n  Cube cube43(std::move(uint4));\n  EXPECT_NE(uint64_t(0), uint4);\n\n  const uint64_t uint41 = uint4;\n  Cube cube44(uint41);\n  EXPECT_EQ(uint41, uint4);\n  EXPECT_EQ(uint4, cube44.cast<uint64_t>());\n\n  // Test Assignment operator=(T &&rhs) / operator=(const T &rhs)\n  Cube cube51, cube52, cube53, cube54;\n  cube51 = uint64_t(55555);\n  cube52 = uint5;\n  EXPECT_NE(uint64_t(0), uint5);\n  cube53 = std::move(uint5);\n  EXPECT_NE(uint64_t(0), uint5);\n\n  const uint64_t uint51 = uint5;\n  cube54 = uint51;\n  EXPECT_EQ(uint51, uint5);\n  EXPECT_EQ(uint5, cube54.cast<uint64_t>());\n\n  // Test Conuintuctor Cube(Cube &&rhs) / Cube(const Cube &rhs)\n  Cube cube6(uint6);\n  Cube cube61(cube6);\n  EXPECT_EQ(uint6, cube61.cast<uint64_t>());\n  EXPECT_FALSE(cube6.empty());\n  Cube cube62(std::move(cube6));\n  EXPECT_EQ(uint6, cube62.cast<uint64_t>());\n  EXPECT_TRUE(cube6.empty());\n\n  const Cube cube63 = cube62;\n  Cube cube64(cube63);\n  EXPECT_EQ(uint6, cube64.cast<uint64_t>());\n  EXPECT_FALSE(cube63.empty());\n\n  // Test Assignment operator=(Cube &&rhs) / operator=(const Cube &rhs)\n  Cube cube7(uint7), cube71, cube72;\n  cube71 = cube7;\n  EXPECT_EQ(uint7, cube71.cast<uint64_t>());\n  EXPECT_FALSE(cube7.empty());\n  cube72 = std::move(cube7);\n  EXPECT_EQ(uint7, cube72.cast<uint64_t>());\n  EXPECT_TRUE(cube7.empty());\n\n  const Cube cube73(cube72);\n  Cube cube74;\n  cube74 = cube73;\n  EXPECT_EQ(uint7, cube74.cast<uint64_t>());\n  EXPECT_EQ(uint7, cube73.cast<uint64_t>());\n  EXPECT_FALSE(cube74.empty());\n}\n\nenum EnumValueType { Unknown, Binary, Float, Double };\nenum class EnumClassType { Unknown, RED, GREEN, BLUE };\n\nTEST(Cube, EnumObject) {\n  std::cout << \"* uint32_t: \" << typeid(uint32_t).name() << std::endl;\n  std::cout << \"* int32_t: \" << typeid(int32_t).name() << std::endl;\n  std::cout << \"* EnumValueType: \" << typeid(EnumValueType).name() << std::endl;\n  std::cout << \"* EnumValueType (underlying_type): \"\n            << typeid(typename std::underlying_type<EnumValueType>::type).name()\n            << std::endl;\n\n  std::cout << \"* EnumClassType: \" << typeid(EnumClassType).name() << std::endl;\n  std::cout << \"* EnumClassType (underlying_type): \"\n            << typeid(typename std::underlying_type<EnumClassType>::type).name()\n            << std::endl;\n\n  EnumValueType a(EnumValueType::Binary), c(EnumValueType::Unknown);\n  EnumClassType b(EnumClassType::RED), d(EnumClassType::Unknown);\n\n  Cube cubeA(a);\n  Cube cubeB(b);\n\n  EXPECT_EQ(a, cubeA.cast<EnumValueType>());\n  EXPECT_NE(c, cubeA.cast<EnumValueType>());\n  c = cubeA.cast<EnumValueType>();\n  EXPECT_EQ(a, c);\n\n  EXPECT_EQ(b, cubeB.cast<EnumClassType>());\n  EXPECT_NE(d, cubeB.cast<EnumClassType>());\n  d = cubeB.cast<EnumClassType>();\n  EXPECT_EQ(b, d);\n\n  Cube cubeC((std::underlying_type<EnumValueType>::type)1);\n  Cube cubeD((std::underlying_type<EnumClassType>::type)1);\n\n  std::cout << \"* cubeA: \" << cubeA.type().name() << std::endl;\n  std::cout << \"* cubeB: \" << cubeB.type().name() << std::endl;\n  std::cout << \"* cubeC: \" << cubeC.type().name() << std::endl;\n  std::cout << \"* cubeD: \" << cubeD.type().name() << std::endl;\n\n  // EXPECT_TRUE(typeid(std::underlying_type<EnumValueType>::type) ==\n  //             typeid(uint32_t));\n  // EXPECT_TRUE(typeid(std::underlying_type<EnumClassType>::type) ==\n  //             typeid(int32_t));\n\n  EXPECT_TRUE(cubeA.compatible<EnumValueType>());\n  EXPECT_TRUE(cubeB.compatible<EnumClassType>());\n  EXPECT_TRUE(cubeA.compatible<std::underlying_type<EnumValueType>::type>());\n  EXPECT_TRUE(cubeB.compatible<std::underlying_type<EnumClassType>::type>());\n  EXPECT_TRUE(cubeC.compatible<std::underlying_type<EnumValueType>::type>());\n  EXPECT_TRUE(cubeD.compatible<std::underlying_type<EnumClassType>::type>());\n\n  EnumValueType e =\n      (EnumValueType)cubeA.cast<std::underlying_type<EnumValueType>::type>();\n  EnumClassType f =\n      (EnumClassType)cubeB.cast<std::underlying_type<EnumClassType>::type>();\n  EXPECT_EQ(a, e);\n  EXPECT_EQ(b, f);\n}\n"
  },
  {
    "path": "tests/ailego/container/heap_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/heap.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(Heap, General) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 100);\n\n  {\n    ailego::Heap<float> heap;\n\n    for (size_t i = 0; i < 12; ++i) {\n      heap.emplace(dist(gen));\n    }\n    EXPECT_EQ(12u, heap.size());\n    EXPECT_FALSE(heap.full());\n\n    for (auto it : heap) {\n      std::cout << it << \" \";\n    }\n    std::cout << std::endl;\n\n    ailego::Heap<float> heap1(std::move(heap));\n    EXPECT_TRUE(heap.empty());\n    EXPECT_FALSE(heap1.empty());\n    for (size_t i = 0; i < 12; ++i) {\n      heap1.pop();\n    }\n    EXPECT_TRUE(heap1.empty());\n  }\n\n  {\n    ailego::Heap<float> heap(12);\n\n    for (size_t i = 0; i < 200; ++i) {\n      heap.push(dist(gen));\n    }\n    EXPECT_EQ(12u, heap.size());\n    EXPECT_TRUE(std::is_heap(heap.begin(), heap.end()));\n    EXPECT_TRUE(heap.full());\n\n    ailego::Heap<float> heap2(heap);\n    for (auto it : heap2) {\n      std::cout << it << \" \";\n    }\n    std::cout << std::endl;\n\n    for (size_t i = 0; i < 12; ++i) {\n      heap2.pop();\n    }\n    EXPECT_TRUE(heap2.empty());\n    EXPECT_FALSE(heap.empty());\n  }\n\n  {\n    ailego::Heap<float> heap(12);\n    ailego::Heap<float> heap1;\n    ailego::Heap<float> heap2;\n\n    for (size_t i = 0; i < 50; ++i) {\n      heap.emplace(dist(gen));\n    }\n\n    EXPECT_NE(heap1.limit(), heap.limit());\n    EXPECT_FALSE(heap.empty());\n    EXPECT_TRUE(heap1.empty());\n    heap1 = heap;\n\n    EXPECT_FALSE(heap.empty());\n    EXPECT_FALSE(heap1.empty());\n    EXPECT_EQ(heap1.limit(), heap.limit());\n\n    heap2 = std::move(heap);\n    EXPECT_TRUE(heap.empty());\n    EXPECT_FALSE(heap2.empty());\n    EXPECT_EQ(heap2.limit(), heap.limit());\n  }\n\n  {\n    ailego::Heap<float> heap(12);\n    ailego::Heap<float> heap1;\n\n    for (size_t i = 0; i < 50; ++i) {\n      heap.emplace(dist(gen));\n    }\n\n    heap.swap(heap1);\n    EXPECT_FALSE(heap1.empty());\n    EXPECT_TRUE(heap.empty());\n  }\n\n  {\n    ailego::Heap<float> heap(32);\n\n    for (size_t i = 0; i < 200; ++i) {\n      heap.emplace(dist(gen));\n    }\n    EXPECT_EQ(32u, heap.size());\n    EXPECT_TRUE(std::is_heap(heap.begin(), heap.end()));\n\n    heap.limit(55);\n    for (size_t i = 0; i < 100; ++i) {\n      heap.emplace(dist(gen));\n    }\n    EXPECT_TRUE(std::is_heap(heap.begin(), heap.end()));\n    EXPECT_EQ(55u, heap.size());\n    EXPECT_TRUE(heap.full());\n  }\n}\n\nTEST(Heap, Make) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 100);\n\n  std::vector<float> raw_data;\n  for (size_t i = 0; i < 200; ++i) {\n    raw_data.push_back(dist(gen));\n  }\n\n  ailego::Heap<float> heap(raw_data);\n  EXPECT_FALSE(raw_data.empty());\n  EXPECT_EQ(heap.front(), *std::max_element(raw_data.begin(), raw_data.end()));\n\n  ailego::Heap<float> heap1(std::move(raw_data));\n  EXPECT_TRUE(raw_data.empty());\n  EXPECT_EQ(heap1.front(), *std::max_element(heap.begin(), heap.end()));\n\n  raw_data = std::move(heap);\n  EXPECT_FALSE(raw_data.empty());\n  EXPECT_TRUE(heap.empty());\n}\n\nTEST(Heap, Sort) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 100);\n\n  std::vector<float> raw_data;\n  for (size_t i = 0; i < 200; ++i) {\n    raw_data.push_back(dist(gen));\n  }\n\n  ailego::Heap<float> heap(raw_data);\n  EXPECT_EQ(heap.front(), *std::max_element(raw_data.begin(), raw_data.end()));\n\n  heap.sort();\n  EXPECT_EQ(heap.front(), *std::min_element(raw_data.begin(), raw_data.end()));\n\n  heap.limit(50);\n  EXPECT_EQ(200u, heap.size());\n  heap.update();\n  EXPECT_EQ(50u, heap.size());\n  EXPECT_EQ(heap.front(), *std::max_element(heap.begin(), heap.end()));\n\n  heap.sort();\n  EXPECT_EQ(heap.front(), *std::min_element(raw_data.begin(), raw_data.end()));\n}\n\nstruct HeapValue {\n  HeapValue(void) : score(0.0f) {\n    std::cout << \"HeapValue(void)\" << std::endl;\n  }\n\n  HeapValue(float val) : score(val) {\n    std::cout << \"HeapValue(float)\" << std::endl;\n  }\n\n  HeapValue(const HeapValue &rhs) : score(rhs.score) {\n    std::cout << \"HeapValue(const HeapValue &)\" << std::endl;\n  }\n\n  HeapValue(HeapValue &&rhs) : score(rhs.score) {\n    std::cout << \"HeapValue(HeapValue &&)\" << std::endl;\n  }\n\n  //! Less than\n  bool operator<(const HeapValue &rhs) const {\n    return (this->score < rhs.score);\n  }\n\n  //! Greater than\n  bool operator>(const HeapValue &rhs) const {\n    return (this->score > rhs.score);\n  }\n\n  //! Assignment\n  HeapValue &operator=(const HeapValue &rhs) {\n    std::cout << \"operator=(const HeapValue &)\" << std::endl;\n    score = rhs.score;\n    return *this;\n  }\n\n  //! Assignment\n  HeapValue &operator=(HeapValue &&rhs) {\n    std::cout << \"operator=(HeapValue &&)\" << std::endl;\n    score = rhs.score;\n    return *this;\n  }\n\n  float score;\n};\n\nTEST(Heap, Constructor) {\n  ailego::Heap<HeapValue> heap(2);\n  heap.push(HeapValue(2.0f));\n  heap.emplace(1.0f);\n\n  HeapValue val;\n  heap.push(val);\n\n  heap.pop();\n  EXPECT_EQ(1u, heap.size());\n  heap.pop();\n  EXPECT_EQ(0u, heap.size());\n  // heap.pop(); // disallowed\n}\n\ntemplate <typename T, class TAllocator = std::allocator<T>>\nclass HeapVector {\n public:\n  typedef size_t size_type;\n  typedef typename std::remove_reference<T>::type value_type;\n  typedef TAllocator allocator_type;\n\n  //! Constructor\n  HeapVector(void) : begin_(nullptr), end_(nullptr), capacity_(0u), alloc_() {}\n\n  //! Constructor\n  HeapVector(const HeapVector &rhs)\n      : begin_(nullptr), end_(nullptr), capacity_(0u), alloc_() {\n    size_type count = rhs.size();\n    if (count) {\n      this->expand(count);\n\n      end_ = begin_ + count;\n      for (value_type *iter = begin_, *src = rhs.begin_; iter != end_;\n           ++iter, ++src) {\n        iter->value_type(*src);\n      }\n    }\n  }\n\n  //! Constructor\n  HeapVector(HeapVector &&rhs)\n      : begin_(rhs.begin_), end_(rhs.end_), capacity_(rhs.capacity_), alloc_() {\n    rhs.begin_ = nullptr;\n    rhs.end_ = nullptr;\n    rhs.capacity_ = 0u;\n  }\n\n  //! Destructor\n  ~HeapVector(void) {\n    if (capacity_) {\n      for (value_type *iter = begin_; iter != end_; ++iter) {\n        iter->~value_type();\n      }\n      alloc_.deallocate(begin_, capacity_);\n    }\n  }\n\n  //! Assignment\n  HeapVector &operator=(const HeapVector &rhs) {\n    this->clear();\n\n    size_type count = rhs.size();\n    if (capacity_ < count) {\n      this->expand(count);\n    }\n\n    if (count) {\n      end_ = begin_ + count;\n      for (value_type *iter = begin_, *src = rhs.begin_; iter != end_;\n           ++iter, ++src) {\n        iter->value_type(*src);\n      }\n    }\n    return *this;\n  }\n\n  //! Assignment\n  HeapVector &operator=(HeapVector &&rhs) {\n    this->clear();\n    begin_ = rhs.begin_;\n    end_ = rhs.end_;\n    capacity_ = rhs.capacity_;\n    rhs.begin_ = nullptr;\n    rhs.end_ = nullptr;\n    rhs.capacity_ = 0u;\n    return *this;\n  }\n\n  //! Clear the vector\n  void clear(void) {\n    for (value_type *iter = begin_; iter != end_; ++iter) {\n      iter->~value_type();\n    }\n    end_ = begin_;\n  }\n\n  //! Retrieve the begin iterator\n  value_type *begin(void) {\n    return begin_;\n  }\n\n  //! Retrieve the begin iterator\n  const value_type *begin(void) const {\n    return begin_;\n  }\n\n  //! Retrieve the end iterator\n  value_type *end(void) {\n    return end_;\n  }\n\n  //! Retrieve the end iterator\n  const value_type *end(void) const {\n    return end_;\n  }\n\n  //! Retrieve the front element\n  value_type &front(void) {\n    return *begin_;\n  }\n\n  //! Retrieve the front element\n  const value_type &front(void) const {\n    return *begin_;\n  }\n\n  //! Retrieve the back element\n  value_type &back(void) {\n    return *(end_ - 1);\n  }\n\n  //! Retrieve the back element\n  const value_type &back(void) const {\n    return *(end_ - 1);\n  }\n\n  //! Retrieve count of elements in vector\n  size_type size(void) const {\n    return (end_ - begin_);\n  }\n\n  //! Retrieve capacity of vector\n  size_type capacity(void) const {\n    return capacity_;\n  }\n\n  //! Check whether the heap is empty\n  bool empty(void) const {\n    return (begin_ == end_);\n  }\n\n  //! Request a change in capacity\n  void reserve(size_type n) {\n    if (capacity_ < n) {\n      this->expand(n);\n    }\n  }\n\n  void push_back(const value_type &val) {\n    size_type count = this->size();\n\n    if (count == capacity_) {\n      this->expand(count + 1);\n    }\n    // (end_++)->value_type(val);\n    *(end_++) = val;\n  }\n\n  void push_back(value_type &&val) {\n    size_type count = this->size();\n\n    if (count == capacity_) {\n      this->expand(count + 1);\n    }\n    // (end_++)->value_type(std::move(val));\n    *(end_++) = std::move(val);\n  }\n\n  void pop_back(void) {\n    (--end_)->~value_type();\n  }\n\n protected:\n  //! Find the number which is upper power of 2\n  static inline size_type clp2(size_type n) {\n    n = n - 1;\n    n = n | (n >> 1);\n    n = n | (n >> 2);\n    n = n | (n >> 4);\n    n = n | (n >> 8);\n    n = n | (n >> 16);\n    // n = n | (n >> 32);\n    return (n + 1);\n  }\n\n  //! Expand the buffer\n  void expand(size_type need) {\n    need = clp2(need);\n    value_type *buf = alloc_.allocate(need);\n    size_type count = this->size();\n\n    if (count) {\n      memcpy(buf, begin_, sizeof(value_type) * count);\n    }\n    alloc_.deallocate(begin_, capacity_);\n    begin_ = buf;\n    end_ = buf + count;\n    capacity_ = need;\n  }\n\n private:\n  //! Members\n  value_type *begin_;\n  value_type *end_;\n  size_type capacity_;\n  allocator_type alloc_;\n};\n\nTEST(Heap, Becnhmark) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 100);\n\n  std::vector<float> raw_data;\n  for (size_t i = 0; i < 1000000; ++i) {\n    raw_data.push_back(dist(gen));\n  }\n\n  ailego::Heap<float> heap1(100);\n  ailego::Heap<float, std::less<float>, HeapVector<float>> heap2(100);\n\n  ailego::ElapsedTime stamp;\n  stamp.reset();\n  for (uint32_t i = 0; i < raw_data.size(); ++i) {\n    heap1.emplace(raw_data[i]);\n  }\n  std::cout << \"Heap 1: \" << stamp.milli_seconds() << \" ms\" << std::endl;\n  EXPECT_EQ(100u, heap1.size());\n\n  stamp.reset();\n  for (uint32_t i = 0; i < raw_data.size(); ++i) {\n    heap2.push(raw_data[i]);\n  }\n  std::cout << \"Heap 2: \" << stamp.milli_seconds() << \" ms\" << std::endl;\n  EXPECT_EQ(100u, heap2.size());\n}\n"
  },
  {
    "path": "tests/ailego/container/hypercube_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/hypercube.h>\n\nusing namespace zvec::ailego;\n\nTEST(Hypercube, General) {\n  Hypercube hyper;\n  hyper.insert(\"1\", 1);\n  hyper.insert(\"2\", 2);\n  hyper.insert(\"3\", 3);\n\n  EXPECT_EQ(1, hyper[\"1\"].cast<int>());\n  EXPECT_EQ(2, hyper[\"2\"].cast<int>());\n  EXPECT_EQ(3, hyper[\"3\"].cast<int>());\n\n  hyper.insert_or_assign(\"1\", 11);\n  hyper.insert_or_assign(\"2\", 22);\n  hyper.insert_or_assign(\"3\", 33);\n  hyper.insert_or_assign(\"4\", 44);\n  hyper.insert_or_assign(\"5\", 55);\n  EXPECT_EQ(11, hyper[\"1\"].cast<int>());\n  EXPECT_EQ(22, hyper[\"2\"].cast<int>());\n  EXPECT_EQ(33, hyper[\"3\"].cast<int>());\n  EXPECT_EQ(44, hyper[\"4\"].cast<int>());\n  EXPECT_EQ(55, hyper[\"5\"].cast<int>());\n\n  std::string key1(\"111\"), key2(\"222\");\n  Cube val1(11);\n  hyper.insert_or_assign(key1, val1);\n  hyper.insert_or_assign(std::move(key2), val1);\n  hyper.insert_or_assign(\"345464\", 435465);\n  EXPECT_FALSE(key1.empty());\n  EXPECT_TRUE(key2.empty());\n}\n"
  },
  {
    "path": "tests/ailego/container/params_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/params.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nTEST(Params, General) {\n  Params params;\n  Params params1;\n\n  EXPECT_TRUE(params.empty());\n  EXPECT_TRUE(params1.empty());\n  params1.merge(params);\n\n  char test_string1[] = \"test1\";\n  params.set(test_string1, test_string1);\n\n  const char *test_string2 = \"test2\";\n  params.set(test_string2, test_string2);\n\n  params.set(\"11111\", \"11111\");\n  params.set(\"22222\", params1);\n  params.set(\"true\", true);\n  params.set(\"-8\", int8_t(-8));\n  params.set(\"-16\", int16_t(-16));\n  params.set(\"-32\", int32_t(-32));\n  params.set(\"-64\", int64_t(-64));\n  params.set(\"8\", uint8_t(8));\n  params.set(\"16\", uint16_t(16));\n  params.set(\"32\", uint32_t(32));\n  params.set(\"64\", uint64_t(64));\n  params.set(\"100.0\", float(100.0f));\n  params.set(\"1000.0\", double(1000.0f));\n  params.set(std::string(\"string\"), \"string\");\n\n  EXPECT_FALSE(params.empty());\n\n  EXPECT_TRUE(params.has(\"64\"));\n  EXPECT_TRUE(params.has(\"32\"));\n  EXPECT_TRUE(params.has(\"16\"));\n  EXPECT_TRUE(params.has(\"8\"));\n  EXPECT_TRUE(params.has(\"-64\"));\n  EXPECT_TRUE(params.has(\"-32\"));\n  EXPECT_TRUE(params.has(\"-16\"));\n  EXPECT_TRUE(params.has(\"-8\"));\n  EXPECT_TRUE(params.has(\"true\"));\n  EXPECT_FALSE(params.has(\"false\"));\n  EXPECT_FALSE(params.has(\"\"));\n  EXPECT_TRUE(params.has(\"100.0\"));\n  EXPECT_TRUE(params.has(\"1000.0\"));\n  EXPECT_FALSE(params.has(\"10000.0\"));\n  EXPECT_TRUE(params.has(\"string\"));\n\n  EXPECT_EQ(\"1\", params.get_as_string(\"true\"));\n  EXPECT_EQ(\"-8\", params.get_as_string(\"-8\"));\n  EXPECT_EQ(\"-16\", params.get_as_string(\"-16\"));\n  EXPECT_EQ(\"-32\", params.get_as_string(\"-32\"));\n  EXPECT_EQ(\"-64\", params.get_as_string(\"-64\"));\n  EXPECT_EQ(\"8\", params.get_as_string(\"8\"));\n  EXPECT_EQ(\"16\", params.get_as_string(\"16\"));\n  EXPECT_EQ(\"32\", params.get_as_string(\"32\"));\n  EXPECT_EQ(\"64\", params.get_as_string(\"64\"));\n  EXPECT_EQ(\"100.000000\", params.get_as_string(\"100.0\"));\n  EXPECT_EQ(\"1000.000000\", params.get_as_string(\"1000.0\"));\n  EXPECT_EQ(\"\", params.get_as_string(\"10000.0\"));\n\n  EXPECT_EQ(64u, params.get_as_uint64(\"64\"));\n  EXPECT_EQ(32u, params.get_as_uint64(\"32\"));\n  EXPECT_EQ(16u, params.get_as_uint64(\"16\"));\n  EXPECT_EQ(8u, params.get_as_uint64(\"8\"));\n  EXPECT_EQ((uint64_t)(-64), params.get_as_uint64(\"-64\"));\n  EXPECT_EQ((uint64_t)(-32), params.get_as_uint64(\"-32\"));\n  EXPECT_EQ((uint64_t)(-16), params.get_as_uint64(\"-16\"));\n  EXPECT_EQ((uint64_t)(-8), params.get_as_uint64(\"-8\"));\n  EXPECT_EQ(1u, params.get_as_uint64(\"true\"));\n  EXPECT_EQ(100u, params.get_as_uint64(\"100.0\"));\n  EXPECT_EQ(1000u, params.get_as_uint64(\"1000.0\"));\n  EXPECT_EQ(0u, params.get_as_uint64(\"10000.0\"));\n\n  EXPECT_EQ(64u, params.get_as_uint32(\"64\"));\n  EXPECT_EQ(32u, params.get_as_uint32(\"32\"));\n  EXPECT_EQ(16u, params.get_as_uint32(\"16\"));\n  EXPECT_EQ(8u, params.get_as_uint32(\"8\"));\n  EXPECT_EQ(4294967232u, params.get_as_uint32(\"-64\"));\n  EXPECT_EQ((uint32_t)(-32), params.get_as_uint32(\"-32\"));\n  EXPECT_EQ((uint32_t)(-16), params.get_as_uint32(\"-16\"));\n  EXPECT_EQ((uint32_t)(-8), params.get_as_uint32(\"-8\"));\n  EXPECT_EQ(1u, params.get_as_uint32(\"true\"));\n  EXPECT_EQ(100u, params.get_as_uint32(\"100.0\"));\n  EXPECT_EQ(1000u, params.get_as_uint32(\"1000.0\"));\n  EXPECT_EQ(0u, params.get_as_uint32(\"10000.0\"));\n\n  EXPECT_EQ(64u, params.get_as_uint16(\"64\"));\n  EXPECT_EQ(32u, params.get_as_uint16(\"32\"));\n  EXPECT_EQ(16u, params.get_as_uint16(\"16\"));\n  EXPECT_EQ(8u, params.get_as_uint16(\"8\"));\n  EXPECT_EQ(65472u, params.get_as_uint16(\"-64\"));\n  EXPECT_EQ(65504u, params.get_as_uint16(\"-32\"));\n  EXPECT_EQ((uint16_t)(-16), params.get_as_uint16(\"-16\"));\n  EXPECT_EQ((uint16_t)(-8), params.get_as_uint16(\"-8\"));\n  EXPECT_EQ(1u, params.get_as_uint16(\"true\"));\n  EXPECT_EQ(100u, params.get_as_uint16(\"100.0\"));\n  EXPECT_EQ(1000u, params.get_as_uint16(\"1000.0\"));\n  EXPECT_EQ(0u, params.get_as_uint16(\"10000.0\"));\n\n  EXPECT_EQ(64u, params.get_as_uint8(\"64\"));\n  EXPECT_EQ(32u, params.get_as_uint8(\"32\"));\n  EXPECT_EQ(32u, params.get_as_uint8(\"32\"));\n  EXPECT_EQ(8u, params.get_as_uint8(\"8\"));\n  EXPECT_EQ(192u, params.get_as_uint8(\"-64\"));\n  EXPECT_EQ(224u, params.get_as_uint8(\"-32\"));\n  EXPECT_EQ(240u, params.get_as_uint8(\"-16\"));\n  EXPECT_EQ((uint8_t)(-8), params.get_as_uint8(\"-8\"));\n  EXPECT_EQ(1u, params.get_as_uint8(\"true\"));\n  EXPECT_EQ(100u, params.get_as_uint8(\"100.0\"));\n  EXPECT_EQ(232u, params.get_as_uint8(\"1000.0\"));\n  EXPECT_EQ(0u, params.get_as_uint8(\"10000.0\"));\n\n  EXPECT_TRUE(params.get_as_bool(\"64\"));\n  EXPECT_TRUE(params.get_as_bool(\"32\"));\n  EXPECT_TRUE(params.get_as_bool(\"16\"));\n  EXPECT_TRUE(params.get_as_bool(\"8\"));\n  EXPECT_TRUE(params.get_as_bool(\"-64\"));\n  EXPECT_TRUE(params.get_as_bool(\"-32\"));\n  EXPECT_TRUE(params.get_as_bool(\"-16\"));\n  EXPECT_TRUE(params.get_as_bool(\"-8\"));\n  EXPECT_TRUE(params.get_as_bool(\"true\"));\n  EXPECT_FALSE(params.get_as_bool(\"false\"));\n  EXPECT_FALSE(params.get_as_bool(\"\"));\n  EXPECT_TRUE(params.get_as_bool(\"100.0\"));\n  EXPECT_TRUE(params.get_as_bool(\"1000.0\"));\n  EXPECT_FALSE(params.get_as_bool(\"10000.0\"));\n  EXPECT_FALSE(params.get_as_bool(\"string\"));\n\n  EXPECT_EQ(64, params.get_as_int64(\"64\"));\n  EXPECT_EQ(32, params.get_as_int64(\"32\"));\n  EXPECT_EQ(16, params.get_as_int64(\"16\"));\n  EXPECT_EQ(8, params.get_as_int64(\"8\"));\n  EXPECT_EQ(-64, params.get_as_int64(\"-64\"));\n  EXPECT_EQ(-32, params.get_as_int64(\"-32\"));\n  EXPECT_EQ(-16, params.get_as_int64(\"-16\"));\n  EXPECT_EQ(-8, params.get_as_int64(\"-8\"));\n  EXPECT_EQ(1, params.get_as_int64(\"true\"));\n  EXPECT_EQ(100, params.get_as_int64(\"100.0\"));\n  EXPECT_EQ(1000, params.get_as_int64(\"1000.0\"));\n  EXPECT_EQ(0, params.get_as_int64(\"10000.0\"));\n\n  EXPECT_EQ(64, params.get_as_int32(\"64\"));\n  EXPECT_EQ(32, params.get_as_int32(\"32\"));\n  EXPECT_EQ(16, params.get_as_int32(\"16\"));\n  EXPECT_EQ(8, params.get_as_int32(\"8\"));\n  EXPECT_EQ(-64, params.get_as_int32(\"-64\"));\n  EXPECT_EQ(-32, params.get_as_int32(\"-32\"));\n  EXPECT_EQ(-16, params.get_as_int32(\"-16\"));\n  EXPECT_EQ(-8, params.get_as_int32(\"-8\"));\n  EXPECT_EQ(1, params.get_as_int32(\"true\"));\n  EXPECT_EQ(100, params.get_as_int32(\"100.0\"));\n  EXPECT_EQ(1000, params.get_as_int32(\"1000.0\"));\n  EXPECT_EQ(0, params.get_as_int32(\"10000.0\"));\n  params1.merge(params);\n\n  EXPECT_EQ(64, params.get_as_int16(\"64\"));\n  EXPECT_EQ(32, params.get_as_int16(\"32\"));\n  EXPECT_EQ(16, params.get_as_int16(\"16\"));\n  EXPECT_EQ(8, params.get_as_int16(\"8\"));\n  EXPECT_EQ(-64, params.get_as_int16(\"-64\"));\n  EXPECT_EQ(-32, params.get_as_int16(\"-32\"));\n  EXPECT_EQ(-16, params.get_as_int16(\"-16\"));\n  EXPECT_EQ(-8, params.get_as_int16(\"-8\"));\n  EXPECT_EQ(1, params.get_as_int16(\"true\"));\n  EXPECT_EQ(100, params.get_as_int16(\"100.0\"));\n  EXPECT_EQ(1000, params.get_as_int16(\"1000.0\"));\n  EXPECT_EQ(0, params.get_as_int16(\"10000.0\"));\n  params1.merge(params);\n\n  EXPECT_EQ(64, params.get_as_int8(\"64\"));\n  EXPECT_EQ(32, params.get_as_int8(\"32\"));\n  EXPECT_EQ(16, params.get_as_int8(\"16\"));\n  EXPECT_EQ(8, params.get_as_int8(\"8\"));\n  EXPECT_EQ(-64, params.get_as_int8(\"-64\"));\n  EXPECT_EQ(-32, params.get_as_int8(\"-32\"));\n  EXPECT_EQ(-16, params.get_as_int8(\"-16\"));\n  EXPECT_EQ(-8, params.get_as_int8(\"-8\"));\n  EXPECT_EQ(1, params.get_as_int8(\"true\"));\n  EXPECT_EQ(100, params.get_as_int8(\"100.0\"));\n  EXPECT_EQ(-24, params.get_as_int8(\"1000.0\"));\n  EXPECT_EQ(0, params.get_as_int8(\"10000.0\"));\n  params1.merge(params);\n\n  params.erase(\"64\");\n  params.erase(\"32\");\n  params.erase(\"16\");\n  params.erase(\"8\");\n  params.erase(\"-64\");\n  params.erase(\"-32\");\n  params.erase(\"-16\");\n  params.erase(\"-8\");\n  params.erase(\"true\");\n  params.erase(\"false\");\n  params.erase(\"\");\n  params.erase(\"100.0\");\n  params.erase(\"1000.0\");\n  params.erase(\"10000.0\");\n  params.erase(\"string\");\n  params1.merge(params);\n  params.clear();\n}\n\nTEST(Params, OverloadedOperator) {\n  Params params;\n  Params params1;\n\n  char test_string1[] = \"test1\";\n  params[test_string1] = test_string1;\n\n  const char *test_string2 = \"test2\";\n  params[test_string2] = test_string2;\n\n  params[\"11111\"] = \"11111\";\n  params[\"22222\"] = params1;\n  params[\"true\"] = true;\n  params[\"-8\"] = int8_t(-8);\n  params[\"-16\"] = int16_t(-16);\n  params[\"-32\"] = int32_t(-32);\n  params[\"-64\"] = int64_t(-64);\n  params[\"8\"] = uint8_t(8);\n  params[\"16\"] = uint16_t(16);\n  params[\"32\"] = uint32_t(32);\n  params[\"64\"] = uint64_t(64);\n  params[\"100.0\"] = float(100.0f);\n  params[\"1000.0\"] = double(1000.0f);\n  params[\"size_t\"] = size_t(1234);\n  params[std::string(\"string\")] = std::string(\"string\");\n\n  EXPECT_EQ(64u, params.get_as_uint64(\"64\"));\n  EXPECT_EQ(32u, params.get_as_uint64(\"32\"));\n  EXPECT_EQ(16u, params.get_as_uint64(\"16\"));\n  EXPECT_EQ(8u, params.get_as_uint64(\"8\"));\n  EXPECT_EQ((uint64_t)(-64), params.get_as_uint64(\"-64\"));\n  EXPECT_EQ((uint64_t)(-32), params.get_as_uint64(\"-32\"));\n  EXPECT_EQ((uint64_t)(-16), params.get_as_uint64(\"-16\"));\n  EXPECT_EQ((uint64_t)(-8), params.get_as_uint64(\"-8\"));\n  EXPECT_EQ(1u, params.get_as_uint64(\"true\"));\n  EXPECT_EQ(100u, params.get_as_uint64(\"100.0\"));\n  EXPECT_EQ(1000u, params.get_as_uint64(\"1000.0\"));\n  EXPECT_EQ(0u, params.get_as_uint64(\"10000.0\"));\n  EXPECT_EQ(1234u, params.get_as_uint64(\"size_t\"));\n  EXPECT_EQ(1234u, params.get_as_uint32(\"size_t\"));\n\n  std::cout << \"float: \" << typeid(float).name() << std::endl;\n  std::cout << \"double: \" << typeid(double).name() << std::endl;\n  std::cout << \"long double: \" << typeid(long double).name() << std::endl;\n  std::cout << \"char: \" << typeid(char).name() << std::endl;\n  std::cout << \"signed char: \" << typeid(signed char).name() << std::endl;\n  std::cout << \"unsigned char: \" << typeid(unsigned char).name() << std::endl;\n  std::cout << \"short int: \" << typeid(short int).name() << std::endl;\n  std::cout << \"int: \" << typeid(int).name() << std::endl;\n  std::cout << \"long int: \" << typeid(long int).name() << std::endl;\n  std::cout << \"long long int: \" << typeid(long long int).name() << std::endl;\n  std::cout << \"unsigned short int: \" << typeid(unsigned short int).name()\n            << std::endl;\n  std::cout << \"unsigned int: \" << typeid(unsigned int).name() << std::endl;\n  std::cout << \"unsigned long int: \" << typeid(unsigned long int).name()\n            << std::endl;\n  std::cout << \"unsigned long long int: \"\n            << typeid(unsigned long long int).name() << std::endl;\n\n  size_t size;\n  EXPECT_TRUE(params.get(\"8\", &size));\n  EXPECT_TRUE(params.get(\"16\", &size));\n  EXPECT_TRUE(params.get(\"32\", &size));\n  EXPECT_TRUE(params.get(\"64\", &size));\n  EXPECT_TRUE(params.get(\"-8\", &size));\n  EXPECT_TRUE(params.get(\"-16\", &size));\n  EXPECT_TRUE(params.get(\"-32\", &size));\n  EXPECT_TRUE(params.get(\"-64\", &size));\n  EXPECT_TRUE(params.get(\"size_t\", &size));\n}\n\nTEST(Params, GeneralString) {\n  Params params;\n  EXPECT_TRUE(params.empty());\n\n  params.set(\"11111\", \"11111\");\n  params.set(\"22222\", \"22222\");\n  params.set(\"yes\", \"yes\");\n  params.set(\"no\", \"no\");\n  params.set(\"No\", \"No\");\n  params.set(\"Yes\", \"Yes\");\n  params.set(\"true\", \"true\");\n  params.set(\"True\", \"True\");\n  params.set(\"False\", \"False\");\n  params.set(\"false\", \"false\");\n  params.set(\"string\", \"string\");\n\n  EXPECT_TRUE(params.get_as_bool(\"yes\"));\n  EXPECT_TRUE(params.get_as_bool(\"Yes\"));\n  EXPECT_TRUE(params.get_as_bool(\"True\"));\n  EXPECT_TRUE(params.get_as_bool(\"true\"));\n  EXPECT_FALSE(params.get_as_bool(\"No\"));\n  EXPECT_FALSE(params.get_as_bool(\"no\"));\n  EXPECT_FALSE(params.get_as_bool(\"False\"));\n  EXPECT_FALSE(params.get_as_bool(\"false\"));\n  EXPECT_FALSE(params.get_as_bool(\"string\"));\n\n  EXPECT_TRUE(params.get_as_bool(\"11111\"));\n  EXPECT_EQ(103, params.get_as_int8(\"11111\"));\n  EXPECT_EQ(11111, params.get_as_int16(\"11111\"));\n  EXPECT_EQ(11111, params.get_as_int32(\"11111\"));\n  EXPECT_EQ(11111, params.get_as_int64(\"11111\"));\n  EXPECT_EQ(103u, params.get_as_uint8(\"11111\"));\n  EXPECT_EQ(11111u, params.get_as_uint16(\"11111\"));\n  EXPECT_EQ(11111u, params.get_as_uint32(\"11111\"));\n  EXPECT_EQ(11111u, params.get_as_uint64(\"11111\"));\n  EXPECT_FLOAT_EQ(11111.0, params.get_as_float(\"11111\"));\n  EXPECT_FLOAT_EQ(11111.0, params.get_as_double(\"11111\"));\n\n  EXPECT_TRUE(params.get_as_bool(\"22222\"));\n  EXPECT_EQ(-50, params.get_as_int8(\"22222\"));\n  EXPECT_EQ(22222, params.get_as_int16(\"22222\"));\n  EXPECT_EQ(22222, params.get_as_int32(\"22222\"));\n  EXPECT_EQ(22222, params.get_as_int64(\"22222\"));\n  EXPECT_EQ(206u, params.get_as_uint8(\"22222\"));\n  EXPECT_EQ(22222u, params.get_as_uint16(\"22222\"));\n  EXPECT_EQ(22222u, params.get_as_uint32(\"22222\"));\n  EXPECT_EQ(22222u, params.get_as_uint64(\"22222\"));\n  EXPECT_FLOAT_EQ(22222.0, params.get_as_float(\"22222\"));\n  EXPECT_FLOAT_EQ(22222.0, params.get_as_double(\"22222\"));\n}\n\nTEST(Params, ParseFromEnvironment) {\n  Params params;\n  Params::ParseFromEnvironment(&params);\n  std::cout << params.get_as_string(\"PATH\") << std::endl;\n}\n\nTEST(Params, ParseFromBuffer) {\n  std::string str =\n      \"{ -1111: -1111.11, -2222: -2222,  1111: 1111, 2222: \"\n      \"\\\"2222\\\", 1: true, \\'object\\' : {  } }\";\n  Params params;\n  Params::ParseFromBuffer(str, &params);\n\n  ASSERT_FLOAT_EQ(-1111.11, params.get_as_float(\"-1111\"));\n  ASSERT_EQ(-2222, params.get_as_int32(\"-2222\"));\n  ASSERT_EQ(1111, params.get_as_int32(\"1111\"));\n  ASSERT_EQ(true, params.get_as_bool(\"1\"));\n  ASSERT_EQ(std::string(\"2222\"), params.get_as_string(\"2222\"));\n\n  ASSERT_TRUE(params.has(\"object\"));\n\n  std::string str1 = \"{proxima.general.cluster.count: 4000 }\";\n  Params::ParseFromBuffer(str1, &params);\n  ASSERT_TRUE(params.has(\"proxima.general.cluster.count\"));\n\n  uint32_t count = 0;\n  params.get(\"proxima.general.cluster.count\", &count);\n  ASSERT_EQ(4000u, count);\n}\n\nTEST(Params, SerializeToBuffer) {\n  std::string str =\n      \"{ -1111: -1111.11, -2222: -2222,  1111: 1111, 2222: \"\n      \"\\\"2222\\\", 1: true, \\'object\\' : \"\n      \"{ \\\"eeee\\\": false, \\'null\\':null } }\";\n  Params params;\n  Params::ParseFromBuffer(str, &params);\n  params.set(\"unsupported_string_pointer\", &str);\n  params.set(\"supported_string\", str);\n\n  std::string str1 = params.debug_string();\n  printf(\"%s\\n\", str1.c_str());\n\n  Params params1;\n  EXPECT_TRUE(Params::ParseFromBuffer(str1, &params1));\n  EXPECT_EQ(str1, params1.debug_string());\n}\n"
  },
  {
    "path": "tests/ailego/container/reservoir_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <ailego/container/reservoir.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(Reservoir, General) {\n  ailego::Reservoir<size_t> sampler(20);\n  EXPECT_EQ(0u, sampler.pool().size());\n  EXPECT_EQ(0u, sampler.total());\n  EXPECT_EQ(20u, sampler.samples());\n\n  for (size_t i = 0; i < sampler.samples(); ++i) {\n    sampler.fill(i);\n  }\n  EXPECT_EQ(sampler.samples(), sampler.pool().size());\n  EXPECT_EQ(sampler.samples(), sampler.total());\n\n  for (size_t i = 0; i < sampler.pool().size(); ++i) {\n    EXPECT_EQ(i, (sampler.pool())[i]);\n  }\n\n  for (size_t i = 0; i < 10000; ++i) {\n    sampler.fill(i);\n  }\n  EXPECT_EQ(sampler.samples(), sampler.pool().size());\n  EXPECT_EQ(10020u, sampler.total());\n\n  for (auto it : sampler.pool()) {\n    std::cout << it << ' ';\n  }\n  std::cout << std::endl;\n\n  sampler.reset();\n  EXPECT_EQ(0u, sampler.pool().size());\n  EXPECT_EQ(0u, sampler.total());\n  EXPECT_EQ(20u, sampler.samples());\n}\n"
  },
  {
    "path": "tests/ailego/container/vector_array_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <ailego/container/vector_array.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(NumericalVectorArray, General) {\n  ailego::NumericalVectorArray<float> arr;\n  ailego::NumericalVectorArray<float> &const_arr = arr;\n  EXPECT_TRUE(arr.empty());\n  EXPECT_EQ(0u, arr.dimension());\n  EXPECT_EQ(0u, arr.count());\n  EXPECT_EQ(0u, arr.bytes());\n  EXPECT_NE(nullptr, arr.data());\n  EXPECT_NE(nullptr, const_arr.data());\n  arr.shrink_to_fit();\n  arr.clear();\n  EXPECT_EQ(0u, arr.dimension());\n  EXPECT_EQ(0u, arr.count());\n\n  try {\n    arr.at(0);\n  } catch (const std::out_of_range &oor) {\n    std::cerr << \"Out of Range error: \" << oor.what() << '\\n';\n  }\n  try {\n    const_arr.at(0);\n  } catch (const std::out_of_range &oor) {\n    std::cerr << \"Out of Range error: \" << oor.what() << '\\n';\n  }\n\n  ailego::NumericalVector<float> vec1 = {10.0f, 11.0f, 12.0f, 13.0f, 14.0f,\n                                         15.0f, 16.0f, 17.0f, 18.0f, 19.0f};\n  ailego::NumericalVector<float> vec2 = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,\n                                         6.0f, 7.0f, 8.0f, 9.0f, 0.0f};\n  arr.reset(10);\n  arr.append(vec1);\n  arr.append(vec2);\n  arr.append(vec1);\n  EXPECT_EQ(3u, arr.count());\n  EXPECT_EQ(10u, arr.dimension());\n\n  arr.reserve(10);\n  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr.at(0)),\n                              arr.dimension() * sizeof(float)));\n  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr[2]),\n                              arr.dimension() * sizeof(float)));\n  arr.replace(2, vec2);\n  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr.at(1)),\n                              arr.dimension() * sizeof(float)));\n  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr[2]),\n                              arr.dimension() * sizeof(float)));\n\n  arr.clear();\n  EXPECT_EQ(10u, arr.dimension());\n  EXPECT_EQ(0u, arr.count());\n\n  arr.reset(2);\n  arr.append(vec1.data(), 2, 5);\n  arr.append(vec2.data(), 2, 5);\n  EXPECT_EQ(2u, arr.dimension());\n  EXPECT_EQ(10u, arr.count());\n\n  ailego::NumericalVectorArray<float> arr1 = std::move(arr);\n  EXPECT_TRUE(arr.empty());\n  EXPECT_EQ(2u, arr.dimension());\n  EXPECT_EQ(0u, arr.count());\n  EXPECT_EQ(2u, arr1.dimension());\n  EXPECT_EQ(10u, arr1.count());\n\n  arr1.resize(8u);\n  EXPECT_EQ(8u, arr1.count());\n\n  arr1.resize(15u);\n  EXPECT_EQ(15u, arr1.count());\n}\n\nTEST(NumericalVectorArray, Batch) {\n  const size_t DIMENSION = 20;\n  const size_t COUNT = 20000u;\n\n  ailego::NumericalVectorArray<float> arr(DIMENSION);\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  std::string buffer;\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    ailego::FixedVector<float, DIMENSION> vec;\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    arr.append(vec.data(), vec.size());\n    buffer.append((const char *)vec.data(), sizeof(vec));\n  }\n  EXPECT_EQ(COUNT, arr.count());\n  EXPECT_EQ(buffer, std::string((const char *)arr.data(), arr.bytes()));\n}\n\nTEST(BinaryVectorArray, General) {\n  ailego::BinaryVectorArray<uint64_t> arr64;\n  ailego::BinaryVectorArray<uint64_t> &const_arr64 = arr64;\n  EXPECT_TRUE(arr64.empty());\n  EXPECT_EQ(0u, arr64.dimension());\n  EXPECT_EQ(0u, arr64.count());\n  EXPECT_EQ(0u, arr64.bytes());\n  EXPECT_NE(nullptr, arr64.data());\n  EXPECT_NE(nullptr, const_arr64.data());\n  arr64.shrink_to_fit();\n  arr64.clear();\n  EXPECT_EQ(0u, arr64.dimension());\n  EXPECT_EQ(0u, arr64.count());\n\n  try {\n    arr64.at(0);\n  } catch (const std::out_of_range &oor) {\n    std::cerr << \"Out of Range error: \" << oor.what() << '\\n';\n  }\n  try {\n    const_arr64.at(0);\n  } catch (const std::out_of_range &oor) {\n    std::cerr << \"Out of Range error: \" << oor.what() << '\\n';\n  }\n\n  ailego::BinaryVector<uint64_t> vec1 = {true, false, true,  true, false,\n                                         true, false, false, true, false};\n  ailego::BinaryVector<uint64_t> vec2 = {true,  true,  true,  true,\n                                         false, false, false, true,\n                                         false, false, true,  false};\n  EXPECT_EQ(64u, vec1.dimension());\n  EXPECT_EQ(64u, vec2.dimension());\n  arr64.reset(10);\n  arr64.append(vec1);\n  arr64.append(vec2);\n  arr64.append(vec1);\n  EXPECT_EQ(3u, arr64.count());\n  EXPECT_EQ(64u, arr64.dimension());\n  EXPECT_EQ(0u, arr64.bytes() % sizeof(uint64_t));\n\n  arr64.reserve(10);\n  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr64.at(0)),\n                              arr64.dimension() >> 3));\n  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr64[2]),\n                              arr64.dimension() >> 3));\n  arr64.replace(2, vec2);\n  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr64.at(1)),\n                              arr64.dimension() >> 3));\n  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr64[2]),\n                              arr64.dimension() >> 3));\n\n  arr64.clear();\n  EXPECT_EQ(64u, arr64.dimension());\n  EXPECT_EQ(0u, arr64.count());\n\n  ailego::BinaryVectorArray<uint32_t> arr32(1);\n  EXPECT_EQ(32u, arr32.dimension());\n  arr32.append((const uint32_t *)vec1.data(), 32, 2);\n  arr32.append((const uint32_t *)vec2.data(), 32, 2);\n  EXPECT_EQ(32u, arr32.dimension());\n  EXPECT_EQ(4u, arr32.count());\n  EXPECT_EQ(0u, arr64.bytes() % sizeof(uint32_t));\n\n  ailego::BinaryVectorArray<uint32_t> arr1 = std::move(arr32);\n  EXPECT_TRUE(arr32.empty());\n  EXPECT_EQ(32u, arr32.dimension());\n  EXPECT_EQ(0u, arr32.count());\n  EXPECT_EQ(32u, arr1.dimension());\n  EXPECT_EQ(4u, arr1.count());\n\n  arr1.resize(8u);\n  EXPECT_EQ(8u, arr1.count());\n\n  arr1.resize(1u);\n  EXPECT_EQ(1u, arr1.count());\n}\n"
  },
  {
    "path": "tests/ailego/container/vector_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(FixedVector, General) {\n  int aaa[512];\n  ailego::FixedVector<int, 512> *v = ailego::FixedVector<int, 512>::Cast(aaa);\n  ASSERT_EQ(aaa, v->data());\n  EXPECT_EQ(512u, v->size());\n\n  ailego::FixedVector<int, 128> bbb{11, 22, 33};\n  EXPECT_EQ(11, bbb[0]);\n  EXPECT_EQ(22, bbb[1]);\n  EXPECT_EQ(33, bbb[2]);\n  EXPECT_EQ(128u, bbb.size());\n\n  bbb = {55, 66, 77};\n  EXPECT_EQ(55, bbb[0]);\n  EXPECT_EQ(66, bbb[1]);\n  EXPECT_EQ(77, bbb[2]);\n  EXPECT_EQ(128u, bbb.size());\n}\n\nTEST(NumericalVector, General) {\n  ailego::NumericalVector<float> vec(10);\n  for (size_t i = 0; i < vec.size(); ++i) {\n    vec[i] = (float)i;\n  }\n\n  {\n    size_t index = 0;\n    for (auto v : vec) {\n      EXPECT_FLOAT_EQ(v, (float)(index++));\n    }\n  }\n\n  vec.reserve(20);\n  EXPECT_EQ(10u, vec.size());\n\n  vec.append(\n      {10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f});\n  {\n    size_t index = 0;\n    for (auto v : vec) {\n      EXPECT_FLOAT_EQ(v, (float)(index++));\n    }\n  }\n  EXPECT_EQ(20u, vec.size());\n\n  EXPECT_FALSE(vec.empty());\n  vec.clear();\n  EXPECT_EQ(0u, vec.size());\n  EXPECT_TRUE(vec.empty());\n\n  ailego::NumericalVector<float> vec1(10, 1.0f);\n  for (auto v : vec1) {\n    EXPECT_FLOAT_EQ(1.0f, v);\n  }\n\n  vec.swap(vec1);\n  for (auto v : vec) {\n    EXPECT_FLOAT_EQ(1.0f, v);\n  }\n  {\n    size_t index = 0;\n    for (auto v : vec1) {\n      EXPECT_FLOAT_EQ(v, (float)(index++));\n    }\n  }\n\n  ailego::NumericalVector<float> vec2(\n      {10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f});\n  {\n    size_t index = 10;\n    for (auto v : vec1) {\n      EXPECT_FLOAT_EQ(v, (float)(index++));\n    }\n  }\n}\n\nTEST(NumericalVector, Assign) {\n  ailego::NumericalVector<float> vec1;\n  EXPECT_TRUE(vec1.data());\n  EXPECT_EQ(0u, vec1.size());\n\n  ailego::NumericalVector<size_t> vec2(222u);\n  EXPECT_TRUE(!!vec2.data());\n  EXPECT_EQ(222u, vec2.size());\n\n  ailego::NumericalVector<size_t> vec3(vec2);\n  EXPECT_TRUE(!!vec2.data());\n  EXPECT_EQ(222u, vec2.size());\n  EXPECT_TRUE(!!vec3.data());\n  EXPECT_EQ(222u, vec3.size());\n\n  ailego::NumericalVector<size_t> vec4;\n  vec4 = vec3;\n  EXPECT_TRUE(!!vec3.data());\n  EXPECT_EQ(222u, vec3.size());\n  EXPECT_TRUE(!!vec4.data());\n  EXPECT_EQ(222u, vec4.size());\n\n  ailego::NumericalVector<size_t> vec5;\n  vec5 = std::move(vec4);\n  EXPECT_TRUE(vec4.data());\n  EXPECT_EQ(0u, vec4.size());\n  EXPECT_TRUE(!!vec5.data());\n  EXPECT_EQ(222u, vec5.size());\n\n  ailego::NumericalVector<size_t> vec6(std::move(vec5));\n  EXPECT_TRUE(vec5.data());\n  EXPECT_EQ(0u, vec5.size());\n  EXPECT_TRUE(!!vec6.data());\n  EXPECT_EQ(222u, vec6.size());\n\n  ailego::NumericalVector<int> vec7 = {1, 2, 3, 4, 5, 6, 7};\n  EXPECT_TRUE(!!vec7.data());\n  EXPECT_EQ(7u, vec7.size());\n}\n\nTEST(BinaryVector, General) {\n  ailego::BinaryVector<char> a8({true, false, true, false, true, true});\n  EXPECT_EQ(8u, a8.size());\n  EXPECT_FALSE(a8.empty());\n  EXPECT_FALSE(a8.at(1));\n  EXPECT_TRUE(a8[0]);\n\n  for (auto val : a8) {\n    std::cout << val << ' ';\n  }\n  std::cout << std::endl;\n\n  ailego::BinaryVector<int16_t> a16({true, false, true, false, true, true});\n  EXPECT_EQ(16u, a16.size());\n  EXPECT_FALSE(a16.at(1));\n  EXPECT_TRUE(a16[0]);\n\n  for (auto val : a16) {\n    std::cout << val << ' ';\n  }\n  std::cout << std::endl;\n\n  ailego::BinaryVector<uint32_t> a32({true, false, true, false, true, true});\n  EXPECT_EQ(32u, a32.size());\n  EXPECT_FALSE(a32.at(1));\n  EXPECT_TRUE(a32[2]);\n\n  for (auto val : a32) {\n    std::cout << val << ' ';\n  }\n  std::cout << std::endl;\n\n  ailego::BinaryVector<int64_t> a64({true, false, true, false, true, true});\n  EXPECT_EQ(64u, a64.size());\n  EXPECT_FALSE(a64.at(1));\n  EXPECT_TRUE(a64[2]);\n\n  for (auto val : a64) {\n    std::cout << val << ' ';\n  }\n  std::cout << std::endl;\n\n  ailego::BinaryVector<uint64_t> aaa(21, true);\n  EXPECT_EQ(64u, aaa.size());\n  for (auto val : aaa) {\n    EXPECT_TRUE(val);\n  }\n  for (size_t i = 0; i < aaa.size(); ++i) {\n    EXPECT_TRUE(aaa[i]);\n    aaa.reset(i);\n    EXPECT_FALSE(aaa.at(i));\n  }\n\n  ailego::BinaryVector<int32_t> bbb(100);\n  EXPECT_EQ(128u, bbb.size());\n  for (auto val : bbb) {\n    EXPECT_FALSE(val);\n  }\n  for (size_t i = 0; i < bbb.size(); ++i) {\n    EXPECT_FALSE(bbb[i]);\n    bbb.set(i);\n    EXPECT_TRUE(bbb.at(i));\n  }\n\n  ailego::BinaryVector<bool> ccc(100);\n  EXPECT_EQ(\n      (100u + sizeof(bool) * 8 - 1) / (sizeof(bool) * 8) * (sizeof(bool) * 8),\n      ccc.size());\n  for (auto val : ccc) {\n    EXPECT_FALSE(val);\n  }\n  for (size_t i = 0; i < ccc.size(); ++i) {\n    EXPECT_FALSE(ccc[i]);\n    ccc.flip(i);\n    EXPECT_TRUE(ccc.at(i));\n  }\n\n  ailego::BinaryVector<int32_t> ddd;\n  EXPECT_TRUE(ddd.empty());\n  EXPECT_FALSE(bbb.empty());\n  ddd = std::move(bbb);\n  EXPECT_FALSE(ddd.empty());\n  EXPECT_TRUE(bbb.empty());\n\n  ailego::BinaryVector<int32_t> eee;\n  EXPECT_TRUE(eee.empty());\n  eee = ddd;\n  EXPECT_FALSE(ddd.empty());\n  EXPECT_FALSE(eee.empty());\n  ddd.clear();\n  bbb.clear();\n  EXPECT_TRUE(ddd.empty());\n  EXPECT_TRUE(bbb.empty());\n\n  ailego::BinaryVector<int32_t> fff;\n  for (auto val : fff) {\n    (void)val;\n    EXPECT_TRUE(0);\n  }\n\n  std::string str;\n  ailego::BinaryVector<int32_t> ggg(str);\n  ailego::BinaryVector<char> hhh(str);\n\n  str.resize(128);\n  ailego::BinaryVector<char> iii(str);\n  ailego::BinaryVector<int64_t> jjj(std::move(str));\n\n  jjj.assign({true, true, true, false, true, true, false, true, true, false,\n              true, true});\n  EXPECT_NE(0u, jjj.capacity());\n  EXPECT_TRUE(jjj.front());\n  EXPECT_FALSE(jjj.back());\n\n  ailego::BinaryVector<int64_t> mmm;\n  EXPECT_TRUE(mmm.data());\n  ailego::BinaryVector<int64_t> &nnn = mmm;\n  EXPECT_TRUE(nnn.data());\n\n  ailego::BinaryVector<int64_t> ooo;\n  ooo.reserve(1111);\n  EXPECT_NE(0u, ooo.capacity());\n  EXPECT_EQ(0u, ooo.size());\n  EXPECT_TRUE(ooo.empty());\n  ooo.assign({true});\n  EXPECT_EQ(64u, ooo.size());\n\n  ooo.swap(mmm);\n  EXPECT_EQ(0u, ooo.size());\n}\n\nTEST(BinaryVector, Iterator) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<size_t>(1, 129);\n  size_t dimension = dist(gen) * 32;\n\n  ailego::BinaryVector<uint32_t> bt(dimension);\n  std::vector<bool> vec(dimension);\n\n  for (size_t i = 0; i != vec.size(); ++i) {\n    bool val = (dist(gen) % 7 == 0);\n    vec[i] = val;\n    if (val) {\n      bt.set(i);\n    }\n  }\n\n  size_t index = 0;\n  for (auto iter = bt.begin(); iter != bt.end(); ++iter) {\n    EXPECT_EQ(vec[index], *iter);\n    ++index;\n  }\n}\n\nTEST(BinaryVector, LittleEndian) {\n  ailego::BinaryVector<uint8_t> bs8(128 * 4);\n  ailego::BinaryVector<uint16_t> bs16(128 * 4);\n  ailego::BinaryVector<uint32_t> bs32(128 * 4);\n  ailego::BinaryVector<uint64_t> bs64(128 * 4);\n\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<size_t>(0, 128 * 4);\n\n  for (int i = 0; i < 18; ++i) {\n    size_t val = dist(gen);\n    bs8.set(val);\n    bs16.set(val);\n    bs32.set(val);\n    bs64.set(val);\n    EXPECT_TRUE(bs8.at(val));\n    EXPECT_TRUE(bs16.at(val));\n    EXPECT_TRUE(bs32.at(val));\n    EXPECT_TRUE(bs64.at(val));\n  }\n\n  EXPECT_TRUE(memcmp(bs8.data(), bs16.data(), bs8.bytes()) == 0);\n  EXPECT_TRUE(memcmp(bs8.data(), bs32.data(), bs8.bytes()) == 0);\n  EXPECT_TRUE(memcmp(bs8.data(), bs64.data(), bs8.bytes()) == 0);\n}\n\nTEST(NibbleVector, General) {\n  ailego::NibbleVector<int> nv1(\n      {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7});\n\n  EXPECT_FALSE(nv1.empty());\n  EXPECT_EQ(16, nv1.size());\n  EXPECT_EQ(16, nv1.dimension());\n  EXPECT_EQ(8, nv1.bytes());\n  for (int i = -8; i != 8; ++i) {\n    EXPECT_EQ(i, nv1.at(i + 8));\n  }\n\n  ailego::NibbleVector<uint32_t> nv2(31, 5);\n  for (size_t i = 0; i != nv2.size(); ++i) {\n    EXPECT_EQ(5u, nv2.at(i));\n  }\n\n  ailego::NibbleVector<int32_t> nv3(56);\n  nv3.assign({-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7});\n  EXPECT_EQ(16u, nv3.size());\n\n  ailego::NibbleVector<uint32_t> nv4(25);\n  nv4.assign(88, 6);\n  for (size_t i = 0; i != nv4.size(); ++i) {\n    EXPECT_EQ(6u, nv4.at(i));\n  }\n}\n\nTEST(NibbleVector, UnsignedIterator) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<uint32_t>(0, 15);\n  size_t dimension = ((std::uniform_int_distribution<size_t>(1, 63))(gen)) * 2;\n\n  ailego::NibbleVector<uint32_t> nv;\n  std::vector<uint32_t> vec;\n\n  for (size_t i = 0; i != dimension; i += 2) {\n    uint32_t lo = dist(gen);\n    uint32_t hi = dist(gen);\n    vec.push_back(lo);\n    vec.push_back(hi);\n    nv.append(lo, hi);\n  }\n\n  size_t index = 0;\n  for (auto iter = nv.begin(); iter != nv.end(); ++iter) {\n    EXPECT_EQ(vec[index], *iter);\n    ++index;\n  }\n}\n\nTEST(NibbleVector, SignedIterator) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<int32_t>(-8, 7);\n  size_t dimension = ((std::uniform_int_distribution<size_t>(1, 63))(gen)) * 2;\n\n  ailego::NibbleVector<int32_t> nv;\n  std::vector<int32_t> vec;\n  EXPECT_TRUE(nv.empty());\n\n  for (size_t i = 0; i != dimension; i += 2) {\n    int32_t lo = dist(gen);\n    int32_t hi = dist(gen);\n    vec.push_back(lo);\n    vec.push_back(hi);\n    nv.append(lo, hi);\n  }\n  EXPECT_FALSE(nv.empty());\n  EXPECT_EQ(vec.size(), nv.size());\n  EXPECT_EQ(vec.size(), nv.dimension());\n  EXPECT_EQ(vec.size() / 2, nv.bytes());\n\n  size_t index = 0;\n  for (auto iter = nv.begin(); iter != nv.end(); ++iter) {\n    EXPECT_EQ(vec[index], *iter);\n    ++index;\n  }\n\n  // Test again\n  for (size_t i = 0; i != dimension; i += 2) {\n    int32_t lo = dist(gen);\n    int32_t hi = dist(gen);\n    vec[i + 0] = lo;\n    vec[i + 1] = hi;\n    nv.set(i + 0, lo);\n    nv.set(i + 1, hi);\n  }\n  index = 0;\n  for (auto iter = nv.begin(); iter != nv.end(); ++iter) {\n    EXPECT_EQ(vec[index], *iter);\n    ++index;\n  }\n}\n"
  },
  {
    "path": "tests/ailego/encoding/json_parse_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include <zvec/ailego/encoding/json.h>\n\nusing namespace zvec::ailego;\n\nTEST(Json, JsonParser) {\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    JsonString str =\n        \"{first: {int: 123, float: 1.0, \"\n        \"true:[true, true, true, true], false:[false],  zero:[0,0,0]}, \"\n        \"true:true, false:[false, false, false, false], zero:[0,0]}\";\n\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val.refer() == 1);\n\n    const JsonValue &val2 = val;\n    const JsonObject &obj2 = val2.as_object();\n\n    EXPECT_TRUE(val2.as_object().refer() == 1);\n    JsonValue val_result;\n    EXPECT_TRUE(obj2.get(\"first\", &val_result));\n    EXPECT_TRUE(val_result.refer() == 2);\n    EXPECT_TRUE(val_result.as_object().refer() == 2);\n\n    EXPECT_TRUE(obj2.get(\"true\", &val_result));\n    EXPECT_TRUE(obj2.get(\"false\", &val_result));\n    EXPECT_TRUE(obj2.get(\"zero\", &val_result));\n\n    const JsonValue val3 = val;\n    EXPECT_TRUE(val3.refer() == 2);\n    EXPECT_TRUE(val3.as_object().refer() == 1);\n\n    JsonValue val4 = val;\n    EXPECT_TRUE(val4.refer() == 3);\n    EXPECT_TRUE(val3.refer() == 3);\n    EXPECT_TRUE(val2.refer() == 3);\n\n    JsonObject &obj4 = val4.as_object();\n    EXPECT_TRUE(obj4.refer() == 2);\n    EXPECT_TRUE(val4.refer() == 0);\n    EXPECT_TRUE(val3.refer() == 2);\n    EXPECT_TRUE(val3.as_object().refer() == 2);\n    EXPECT_TRUE(val2.refer() == 2);\n  }\n\n  {\n    JsonString str =\n        \"[ true,,\\'\\\\u9701abcd \\\\u38981515\\\\u89454845\\\\uabcd\\\\uef12\\'\";\n    JsonParser parser;\n    JsonValue val = JsonValue();\n\n    JsonValue tmp;\n    EXPECT_FALSE(tmp.parse(str));\n\n    parser.set_squote();\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonParser parser;\n    JsonString str = \"{ 0:0, 1: 1, 2:2, 3:3, 4: 4, 5:5}\";\n    JsonValue val;\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val.refer() == 1);\n\n    JsonValue val2 = val;\n    EXPECT_TRUE(val2.refer() == 2);\n\n    JsonObject &obj2 = val2.as_object();\n    EXPECT_TRUE(val2.refer() == 0);\n    EXPECT_TRUE(obj2.refer() == 2);\n    EXPECT_TRUE(obj2[\"0\"].refer() == 2);\n    EXPECT_TRUE(obj2[\"1\"].refer() == 2);\n    EXPECT_TRUE(obj2[\"2\"].refer() == 2);\n    EXPECT_TRUE(obj2[\"3\"].refer() == 2);\n    EXPECT_TRUE(obj2[\"4\"].refer() == 2);\n    EXPECT_TRUE(obj2[\"5\"].refer() == 2);\n    EXPECT_TRUE(obj2.refer() == 0);\n\n    JsonValue val3 = val;\n    EXPECT_TRUE(val3.refer() == 2);\n\n    JsonObject::const_iterator iter = obj2.begin();\n    EXPECT_TRUE(iter->key().refer() == 2);\n    EXPECT_TRUE(iter->value().refer() == 2);\n  }\n\n  {\n    JsonParser parser;\n    JsonString str = \"[0, 1, 2, 3, 4, 5]\";\n    JsonValue val;\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val.refer() == 1);\n\n    JsonValue val2 = val;\n    EXPECT_TRUE(val2.refer() == 2);\n\n    JsonArray &arr2 = val2.as_array();\n    EXPECT_TRUE(val2.refer() == 0);\n    EXPECT_TRUE(arr2.refer() == 2);\n    EXPECT_TRUE(arr2[0].refer() == 2);\n    EXPECT_TRUE(arr2[1].refer() == 2);\n    EXPECT_TRUE(arr2[2].refer() == 2);\n    EXPECT_TRUE(arr2[3].refer() == 2);\n    EXPECT_TRUE(arr2[4].refer() == 2);\n    EXPECT_TRUE(arr2[5].refer() == 2);\n    EXPECT_TRUE(arr2.refer() == 0);\n\n    JsonValue val3 = val;\n    EXPECT_TRUE(val3.refer() == 2);\n\n    JsonArray::const_iterator iter = arr2.begin();\n    EXPECT_TRUE(iter->refer() == 2);\n  }\n\n  {\n    JsonString str =\n        \"[ 15, true, null,\\'\\\\u9701abcd \"\n        \"\\\\u38981515\\\\u89454845\\\\uabcd\\\\uef12\\',]\";\n    JsonParser parser;\n    JsonValue val(true);\n\n    parser.set_squote();\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[1].as_bool());\n    val[1] = val[2];\n    EXPECT_FALSE(val[1].as_bool());\n  }\n\n  {\n    JsonParser parser;\n    JsonValue val1, val2, val3, val4;\n\n    EXPECT_TRUE(parser.parse(\n        \"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, \"\n        \"6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\"\n        \"17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\"\n        \"\\\"32\\\",\\\"33\\\",\\\"34\\\",\\\"35\\\",\\\"36\\\",\"\n        \"{\\\"5\\\":5,\\\"4\\\":4,\\\"3\\\":3,\\\"2\\\":2,\\\"1\\\":1,\\\"0\\\":0,\\\"-1\\\":-1}]\",\n        &val1));\n    EXPECT_TRUE(\n        parser.parse(\"[\\\"0\\\",\\\"1\\\",\\\"2\\\",\\\"3\\\",\\\"4\\\",\\\"5\\\",\\\"6\\\",\\\"7\\\",\\\"8\\\",\"\n                     \"\\\"9\\\",\\\"10\\\",\\\"11\\\",\\\"12\\\",\\\"13\\\",\\\"14\\\",\\\"15\\\",\\\"16\\\",\"\n                     \"\\\"17\\\",\\\"18\\\",\\\"19\\\",\\\"20\\\",\\\"21\\\",\\\"22\\\",\\\"23\\\",\\\"24\\\",\"\n                     \"\\\"25\\\",\\\"26\\\",\\\"27\\\",\\\"28\\\",\\\"29\\\",\\\"30\\\",\\\"31\\\",\"\n                     \"\\\"32\\\",\\\"33\\\",\\\"34\\\",\\\"35\\\",\\\"36\\\",\"\n                     \"{\\\"-2\\\":\\\"-2\\\",\\\"-1\\\":\\\"-1\\\",\\\"1\\\":\\\"1\\\",\\\"2\\\":\\\"2\\\",\"\n                     \"\\\"3\\\":\\\"3\\\",\\\"4\\\":\\\"4\\\",\\\"5\\\":\\\"5\\\",\\\"6\\\":\\\"6\\\"},\"\n                     \"[],null,true,false,0.0,1.0,9.999,-1]\",\n                     &val2));\n    EXPECT_TRUE(\n        parser.parse(\"[\\\"0\\\",\\\"1\\\",\\\"2\\\",\\\"3\\\",\\\"4\\\",\\\"5\\\",\\\"6\\\",\\\"7\\\",\\\"8\\\",\"\n                     \"\\\"9\\\",\\\"10\\\",\\\"11\\\",\\\"12\\\",\\\"13\\\",\\\"14\\\",\\\"15\\\",\\\"16\\\",\"\n                     \"\\\"17\\\",\\\"18\\\",\\\"19\\\",\\\"20\\\",\\\"21\\\",\\\"22\\\",\\\"23\\\",\\\"24\\\",\"\n                     \"\\\"25\\\",\\\"26\\\",\\\"27\\\",\\\"28\\\",\\\"29\\\",\\\"30\\\",\\\"31\\\",\"\n                     \"\\\"32\\\",\\\"33\\\",\\\"34\\\",\\\"35\\\",\\\"36\\\",\"\n                     \"{\\\"5\\\":\\\"5\\\",\\\"4\\\":\\\"4\\\",\\\"3\\\":\\\"3\\\",\"\n                     \"\\\"2\\\":\\\"2\\\",\\\"1\\\":\\\"1\\\",\\\"0\\\":0,\"\n                     \"\\\"-1\\\":\\\"-1\\\",\\\"-2\\\":\\\"-2\\\",\\\"6\\\":\\\"6\\\"},\"\n                     \"[],null,true,false,0.0,1.0,9.999,-1]\",\n                     &val3));\n    EXPECT_TRUE(\n        parser.parse(\"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, \"\n                     \"6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\"\n                     \"17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,\"\n                     \"\\\"32\\\",\\\"33\\\",\\\"34\\\",\\\"35\\\",\\\"36\\\",\"\n                     \"{\\\"-2\\\":\\\"-2\\\",\\\"-1\\\":-1,\\\"1\\\":1,\\\"2\\\":2,\"\n                     \"\\\"3\\\":3,\\\"4\\\":4,\\\"5\\\":5,\\\"6\\\":\\\"6\\\",\\\"0\\\":0},\"\n                     \"[],null,true,false,0.0,1.0,9.999,-1]\",\n                     &val4));\n\n    JsonValue tmp1 = val1;\n    tmp1.merge(val2);\n\n    JsonValue tmp2 = val2;\n    tmp2.merge(val1);\n\n    JsonDumper dumper;\n    EXPECT_TRUE(dumper.dump(val1));\n    EXPECT_TRUE(dumper.dump(val2));\n    EXPECT_TRUE(dumper.dump(val3));\n    EXPECT_TRUE(dumper.dump(val4));\n    EXPECT_TRUE(dumper.dump(tmp1));\n\n    EXPECT_TRUE(tmp1.as_json_string() == val3.as_json_string());\n    EXPECT_TRUE(tmp2.as_json_string() == val4.as_json_string());\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str =\n        \"{\\\"req\\\": {\\\"aid\\\": \\\"\\\", \\\"friend\\\": \"\n        \"\\\"1234567890\\\", \\\"uintype\\\": \"\n        \"0}}\";\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_FALSE(parser.parse(\"\", &val));\n\n    JsonValue tmp;\n    EXPECT_TRUE(tmp.parse(str));\n    EXPECT_TRUE(tmp == val);\n    EXPECT_FALSE(tmp != val);\n\n    const JsonValue &req = val[\"req\"];\n    EXPECT_TRUE(req.is_object());\n    EXPECT_TRUE(req[\"show\"].as_integer() == 0);\n    EXPECT_TRUE(req[\"friend\"].as_integer() == 1234567890);\n    EXPECT_TRUE(req[1].is_null());\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"[true, false, 0, 1, 2, \\\"3\\\"]\";\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[0u].as_bool());\n    EXPECT_FALSE(val[1].as_bool());\n    EXPECT_TRUE(val[2].as_integer() == 0);\n    EXPECT_TRUE(val[3].as_integer() == 1);\n    EXPECT_TRUE(val[4].as_integer() == 2);\n    EXPECT_TRUE(val[5].as_integer() == 3);\n\n    JsonValue tmp;\n    EXPECT_TRUE(tmp.parse(str));\n    EXPECT_TRUE(tmp == val);\n    EXPECT_FALSE(tmp != val);\n\n    const JsonValue val2 = val;\n    EXPECT_TRUE(val2[0u].as_bool());\n    EXPECT_FALSE(val2[1].as_bool());\n    EXPECT_TRUE(val2[2].as_integer() == 0);\n    EXPECT_TRUE(val2[3].as_integer() == 1);\n    EXPECT_TRUE(val2[4].as_integer() == 2);\n    EXPECT_TRUE(val2[5].as_integer() == 3);\n    EXPECT_TRUE(val2[6].is_null());\n    EXPECT_TRUE(val2[(JsonValue::size_type)-1].as_integer() == 0);\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{abcd:\\\"1234\\\"}\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_comment(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_c_string() == std::string(\"1234\"));\n\n    parser.set_unstrict(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_integer() == 1234);\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"/*comments*/ { abcd\\t  :  /* //comments */\\\"1234\\\" }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_comment(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    parser.set_comment(false);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    parser.set_comment(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_c_string() == std::string(\"1234\"));\n\n    parser.set_unstrict(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_integer() == 1234);\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ abcd/*  fff*/  :  /* //comments */\\\"1234\\\" }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_comment(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    parser.set_comment(false);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    parser.set_comment(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_c_string() == std::string(\"1234\"));\n\n    parser.set_unstrict(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_integer() == 1234);\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str =\n        \"{ \\\"abcd\\\\\\\"/*  fff*/  :  /* //comments */\\\"1234\\\" , {, [,  ]}}\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_comment(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_unstrict(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ abcd///comments */\\\"1234\\\", [] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_comment(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ abcd/*//*/ : \\t  \\\"1234\\\" }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_c_string() == NULL);\n    EXPECT_TRUE(val[\"abcd/*//*/\"].as_c_string() == std::string(\"1234\"));\n\n    parser.set_comment(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_c_string() == std::string(\"1234\"));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ ,{}, \\\"abcd/*//*/ : \\t  \\\"1234\\\", }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_comment(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ ccdd: [], abcd\\\" /*//*/ \\n: \\t  \\\"1234\\\" }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_comment(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_c_string() == NULL);\n    EXPECT_TRUE(val[\"abcd\\\"\"].as_c_string() == std::string(\"1234\"));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{,, \\\"\\\" \\n: \\t  \\\"1234\\\" }\";\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"\"].as_c_string() == std::string(\"1234\"));\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"\"].as_c_string() == std::string(\"1234\"));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ ,  \\n: \\t  \\\"1234\\\" }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"\"].as_c_string() == NULL);\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ \\'ccdd\\': [], \\'abcd\\' /*//*/ \\n: \\t  \\\"1234\\\" }\";\n\n    parser.set_comment(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"\\'abcd\\'\"].as_c_string() == std::string(\"1234\"));\n\n    parser.set_squote(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"abcd\"].as_c_string() == std::string(\"1234\"));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ 1234 : \\'abcd\\', \\'5678\\' : [5, \\'5678\\'] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_squote(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"1234\"].as_c_string() == std::string(\"abcd\"));\n    EXPECT_TRUE(val[\"5678\"].as_array().at(1).as_c_string() ==\n                std::string(\"5678\"));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ 1234 : \\'ab\\\"cd\\', \\'5678\\' : [\\\"5\\\", \\'5678\\'] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_squote(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"1234\"].as_c_string() == std::string(\"ab\\\"cd\"));\n    EXPECT_TRUE(val[\"5678\"].as_array().at(1).as_c_string() ==\n                std::string(\"5678\"));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ 1234 : \\'ab\\\\\\'cd\\', \\'5678\\' : [\\\"5\\\", \\'5678\\'] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_squote(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_unstrict(true);\n    EXPECT_TRUE(parser.parse(str.c_str(), &val));\n    EXPECT_TRUE(val[\"1234\"].as_c_string() == std::string(\"ab\\\\\\'cd\"));\n    EXPECT_TRUE(val[\"5678\"].as_array().at(1).as_c_string() ==\n                std::string(\"5678\"));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str = \"{ \\'1234\\'\\' : \\'abcd\\', \\'5678\\' : [\\\"5\\\", \\'5678\\'] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_squote(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_unstrict(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str =\n        \"{ \\'1234\\' : \\'abcd\\' \\\", \\'5678\\' : [\\\"5\\\", \\'5678\\'] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_squote(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_unstrict(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str =\n        \"{ \\'1234\\' : \\'abcd\\' , \\'5678\\' : [\\\"5\\\" \\\", \\'5678\\'] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_squote(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_unstrict(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val;\n    JsonParser parser;\n\n    std::string str =\n        \"{ \\'1234\\' : \\'abcd\\' , \\'5678\\' : [\\\"5\\\" , \\'5678\\' \\'] }\";\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_squote(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_simple(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n\n    parser.set_unstrict(true);\n    EXPECT_FALSE(parser.parse(str.c_str(), &val));\n  }\n\n  {\n    JsonValue val1;\n    JsonValue val2;\n    JsonString str =\n        \"{\\\"a\\\":1, \\\"b\\\":2, \\\"c\\\":3, \\\"string\\\":  \\\"string\\\", \"\n        \"\\\"array\\\": [null, true, false, \"\n        \", 0], \\\"object\\\": {\\\"a\\\":1.0, \\\"b\\\":2.0, \\\"c\\\":3.0}, \"\n        \"\\\"true\\\": true, \\\"null\\\": null}\";\n\n    EXPECT_TRUE(val1.parse(str));\n    EXPECT_TRUE(val2.parse(str));\n    EXPECT_TRUE(val2 == val1);\n    EXPECT_FALSE((val2 != val1));\n    EXPECT_TRUE(val2.as_object() == val1);\n    EXPECT_TRUE(val2 == val1.as_object());\n    EXPECT_TRUE(val2.as_object() == val1.as_object());\n    EXPECT_TRUE(val1[\"string\"] == val2[\"string\"]);\n  }\n}\n\nTEST(Json, JsonObject) {\n  {\n    JsonObject jobj;\n\n    for (int i = 0; i < 1000; ++i) {\n      JsonValue key(i);\n      EXPECT_TRUE(jobj.set(key.as_json_string().c_str(), JsonValue((float)i)));\n\n      JsonValue::integer_type val;\n      EXPECT_TRUE(jobj.get(key.as_json_string(), &val));\n      EXPECT_EQ(val, i);\n    }\n\n    for (int i = 0; i < 1000; ++i) {\n      JsonValue key(i);\n      jobj.unset(key.as_json_string().c_str());\n\n      JsonValue::integer_type val = 0;\n      EXPECT_FALSE(jobj.get(key.as_stl_string(), &val));\n      EXPECT_EQ(val, 0);\n    }\n  }\n\n  {\n    JsonObject obj;\n\n    obj.set(\"0\", JsonValue(0));\n    obj.set(\"1\", JsonValue(1));\n    obj.set(\"2\", JsonValue(2));\n    obj.set(\"3\", JsonValue(3));\n    obj.set(\"4\", JsonValue(4));\n    obj.set(\"5\", JsonValue(5));\n    obj.set(\"6\", JsonValue(6));\n    obj.set(\"7\", JsonValue(7));\n    obj.set(\"8\", JsonValue(8));\n    obj.set(\"9\", JsonValue(9));\n\n    EXPECT_EQ(obj.size(), 10u);\n    EXPECT_EQ(obj[\"0\"].as_integer(), 0);\n    EXPECT_EQ(obj[\"1\"].as_integer(), 1);\n    EXPECT_EQ(obj[\"2\"].as_integer(), 2);\n    EXPECT_EQ(obj[\"3\"].as_integer(), 3);\n    EXPECT_EQ(obj[\"4\"].as_integer(), 4);\n    EXPECT_EQ(obj[\"5\"].as_integer(), 5);\n    EXPECT_EQ(obj[\"6\"].as_integer(), 6);\n    EXPECT_EQ(obj[\"7\"].as_integer(), 7);\n    EXPECT_EQ(obj[\"8\"].as_integer(), 8);\n    EXPECT_EQ(obj[\"9\"].as_integer(), 9);\n    EXPECT_EQ(obj.size(), 10u);\n\n    int index_id = 0;\n    for (JsonObject::const_iterator it = obj.cbegin(); it != obj.cend();\n         ++it, ++index_id) {\n      EXPECT_EQ(it->value().as_integer(), index_id);\n    }\n\n    int index_id_r = 9;\n    for (JsonObject::const_reverse_iterator it = obj.crbegin();\n         it != obj.crend(); ++it, --index_id_r) {\n      EXPECT_EQ(it->value().as_integer(), index_id_r);\n    }\n\n    obj.unset(\"1\");\n    EXPECT_EQ(obj.size(), 9u);\n    obj.unset(\"3\");\n    EXPECT_EQ(obj.size(), 8u);\n    obj.unset(\"5\");\n    EXPECT_EQ(obj.size(), 7u);\n    obj.unset(\"7\");\n    EXPECT_EQ(obj.size(), 6u);\n    obj.unset(\"9\");\n    EXPECT_EQ(obj.size(), 5u);\n\n    obj.clear();\n    EXPECT_EQ(obj.size(), 0u);\n  }\n\n  {\n    JsonObject obj;\n\n    // 0\n    EXPECT_FALSE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_FALSE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_FALSE(obj.has(\"44444\"));\n    EXPECT_FALSE(obj.has(\"55555\"));\n    EXPECT_FALSE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_FALSE(obj.has(\"88888\"));\n    EXPECT_FALSE(obj.has(\"99999\"));\n\n    // 1\n    EXPECT_TRUE(obj.set(\"55555\", \"55555\"));\n    EXPECT_FALSE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_FALSE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_FALSE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_FALSE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_FALSE(obj.has(\"88888\"));\n    EXPECT_FALSE(obj.has(\"99999\"));\n\n    // 2\n    EXPECT_TRUE(obj.set(\"88888\", \"88888\"));\n    EXPECT_FALSE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_FALSE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_FALSE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_FALSE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_FALSE(obj.has(\"99999\"));\n\n    // 2\n    EXPECT_TRUE(obj.set(\"66666\", \"66666\"));\n    EXPECT_FALSE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_FALSE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_FALSE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_FALSE(obj.has(\"99999\"));\n\n    // 3\n    EXPECT_TRUE(obj.set(\"44444\", \"44444\"));\n    EXPECT_FALSE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_FALSE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_TRUE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_FALSE(obj.has(\"99999\"));\n\n    // 4\n    EXPECT_TRUE(obj.set(\"99999\", \"99999\"));\n    EXPECT_FALSE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_FALSE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_TRUE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_TRUE(obj.has(\"99999\"));\n\n    // 5\n    EXPECT_TRUE(obj.set(\"22222\", \"22222\"));\n    EXPECT_FALSE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_TRUE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_TRUE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_TRUE(obj.has(\"99999\"));\n\n    // 6\n    EXPECT_TRUE(obj.set(\"00000\", \"00000\"));\n    EXPECT_TRUE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_TRUE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_TRUE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_FALSE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_TRUE(obj.has(\"99999\"));\n\n    // 7\n    EXPECT_TRUE(obj.set(\"77777\", \"77777\"));\n    EXPECT_TRUE(obj.has(\"00000\"));\n    EXPECT_FALSE(obj.has(\"11111\"));\n    EXPECT_TRUE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_TRUE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_TRUE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_TRUE(obj.has(\"99999\"));\n\n    // 8\n    EXPECT_TRUE(obj.set(\"11111\", \"11111\"));\n    EXPECT_TRUE(obj.has(\"00000\"));\n    EXPECT_TRUE(obj.has(\"11111\"));\n    EXPECT_TRUE(obj.has(\"22222\"));\n    EXPECT_FALSE(obj.has(\"33333\"));\n    EXPECT_TRUE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_TRUE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_TRUE(obj.has(\"99999\"));\n\n    // 9\n    EXPECT_TRUE(obj.set(\"33333\", \"33333\"));\n    EXPECT_TRUE(obj.has(\"00000\"));\n    EXPECT_TRUE(obj.has(\"11111\"));\n    EXPECT_TRUE(obj.has(\"22222\"));\n    EXPECT_TRUE(obj.has(\"33333\"));\n    EXPECT_TRUE(obj.has(\"44444\"));\n    EXPECT_TRUE(obj.has(\"55555\"));\n    EXPECT_TRUE(obj.has(\"66666\"));\n    EXPECT_TRUE(obj.has(\"77777\"));\n    EXPECT_TRUE(obj.has(\"88888\"));\n    EXPECT_TRUE(obj.has(\"99999\"));\n\n    EXPECT_EQ(10u, obj.size());\n\n    int index_ids[] = {0,     11111, 22222, 33333, 44444,\n                       55555, 66666, 77777, 88888, 99999};\n    int i = 0;\n    for (JsonObject::iterator it = obj.begin(); it != obj.end(); ++it, ++i) {\n      EXPECT_EQ(it->value().as_integer(), index_ids[i]);\n    }\n    EXPECT_EQ(i, (int)obj.size());\n  }\n\n  {\n    JsonObject obj;\n\n    EXPECT_TRUE(obj.set(\"key0\", \"key0\"));\n    EXPECT_TRUE(obj.set(\"key1\", \"key1\"));\n    EXPECT_TRUE(obj.set(\"key2\", \"key2\"));\n    EXPECT_TRUE(obj.set(\"key3\", \"key3\"));\n    EXPECT_TRUE(obj.set(\"key4\", \"key4\"));\n    EXPECT_TRUE(obj.set(\"key5\", \"key5\"));\n    EXPECT_FALSE(obj.set(\"key0\", \"000000\"));\n    EXPECT_FALSE(obj.set(\"key1\", \"000000\"));\n    EXPECT_FALSE(obj.set(\"key5\", \"000000\"));\n    EXPECT_EQ(obj.size(), 6u);\n\n    JsonString str;\n    EXPECT_TRUE(obj.get(\"key0\", &str));\n    EXPECT_TRUE(str == JsonString(\"key0\"));\n    EXPECT_TRUE(obj.get(\"key3\", &str));\n    EXPECT_TRUE(str == JsonString(\"key3\"));\n    EXPECT_TRUE(obj.get(\"key5\", &str));\n    EXPECT_TRUE(str == JsonString(\"key5\"));\n    EXPECT_EQ(obj.size(), 6u);\n\n    obj[\"key0\"] = 0;\n    obj[\"key1\"] = 1;\n    obj[\"key2\"] = 2;\n    obj[\"key3\"] = 3;\n    obj[\"key4\"] = 4;\n    obj[\"key5\"] = 5;\n\n    EXPECT_EQ(obj.size(), 6u);\n\n    int index_id = 0;\n    for (JsonObject::iterator it = obj.begin(); it != obj.end();\n         ++it, ++index_id) {\n      EXPECT_EQ(it->value().as_integer(), index_id);\n    }\n    EXPECT_EQ(index_id, 6);\n\n    int index_id_r = 5;\n    for (JsonObject::reverse_iterator it = obj.rbegin(); it != obj.rend();\n         ++it, --index_id_r) {\n      EXPECT_EQ(it->value().as_integer(), index_id_r);\n    }\n    EXPECT_EQ(index_id_r, -1);\n  }\n\n  {\n    JsonObject::reverse_iterator it1 = JsonObject::iterator();\n    JsonObject::reverse_iterator it2 = JsonObject::reverse_iterator();\n    EXPECT_TRUE(it1 == it2);\n\n    JsonObject::iterator it3 = JsonObject::reverse_iterator();\n    JsonObject::iterator it4 = JsonObject::iterator();\n    EXPECT_TRUE(it3 == it4);\n\n    JsonObject::const_iterator it5 = JsonObject::const_iterator();\n    JsonObject::const_iterator it6 = JsonObject::iterator();\n    EXPECT_TRUE(it5 == it6);\n\n    JsonObject::const_iterator it7 = JsonObject::reverse_iterator();\n    JsonObject::const_iterator it8 = JsonObject::const_reverse_iterator();\n    EXPECT_TRUE(it7 == it8);\n\n    JsonObject::const_reverse_iterator it9 = JsonObject::const_iterator();\n    JsonObject::const_reverse_iterator it10 = JsonObject::iterator();\n    EXPECT_TRUE(it9 == it10);\n\n    JsonObject::const_reverse_iterator it11 = JsonObject::reverse_iterator();\n    JsonObject::const_reverse_iterator it12 =\n        JsonObject::const_reverse_iterator();\n    EXPECT_TRUE(it11 == it12);\n  }\n\n  {\n    JsonObject obj1;\n    JsonObject obj2;\n    JsonObject obj3;\n    JsonObject::iterator iter1;\n\n    EXPECT_TRUE(obj1.set(\"aaa\", \"123456\"));\n    obj2 = obj1;\n    iter1 = obj1.begin();\n    obj3 = obj1;\n    iter1->value() = \"abcdefg\";\n    EXPECT_TRUE(obj1[\"aaa\"].as_string() == \"abcdefg\");\n    EXPECT_TRUE(obj2[\"aaa\"].as_string() == \"123456\");\n    EXPECT_TRUE(obj3[\"aaa\"].as_string() == \"123456\");\n  }\n\n  {\n    JsonObject obj1;\n\n    obj1.set(\"FTitle\", \"123456789\");\n    obj1.set(\"FDesc\", \"abcdef\");\n\n    const JsonObject &obj2 = obj1;\n    EXPECT_TRUE(obj1[\"FTitle\"].as_stl_string() == \"123456789\");\n    EXPECT_TRUE(obj1[\"FDesc\"].as_stl_string() == \"abcdef\");\n    EXPECT_TRUE(obj2[\"FTitle\"].as_stl_string() == \"123456789\");\n    EXPECT_TRUE(obj2[\"FDesc\"].as_stl_string() == \"abcdef\");\n  }\n}\n\nTEST(Json, JsonArray) {\n  {\n    JsonArray arr1;\n    arr1.push(JsonValue(0.0));\n    arr1.push(JsonValue(2));\n    arr1.push(\"2\");\n    arr1.push(JsonValue(true));\n    arr1.push(JsonArray());\n    arr1.push(JsonObject());\n    arr1.push(JsonValue());\n    arr1.push(JsonString());\n\n    JsonArray arr2 = arr1;\n    EXPECT_TRUE(arr2 == arr1);\n\n    JsonArray arr3;\n    arr3.push(JsonValue(0.0));\n    arr3.push(JsonValue(2));\n    arr3.push(\"2\");\n    arr3.push(JsonValue(true));\n    arr3.push(JsonArray());\n    arr3.push(JsonObject());\n    arr3.push(JsonValue());\n    arr3.push(JsonString());\n    EXPECT_TRUE(arr2 == arr3);\n    EXPECT_TRUE(arr1 == arr3);\n\n    arr2.push(JsonObject());\n    EXPECT_TRUE(arr2 != arr3);\n    EXPECT_TRUE(arr2 != arr1);\n    EXPECT_TRUE(arr1 == arr3);\n  }\n\n  {\n    JsonArray jarr;\n\n    EXPECT_TRUE(jarr.capacity() == 0);\n    EXPECT_TRUE(jarr.size() == 0);\n    jarr.reserve(21);\n    EXPECT_TRUE(jarr.capacity() == 32);\n    EXPECT_TRUE(jarr.size() == 0);\n    jarr.reserve(2);\n    EXPECT_TRUE(jarr.capacity() == 32);\n    EXPECT_TRUE(jarr.size() == 0);\n    jarr.reserve(33);\n    EXPECT_TRUE(jarr.capacity() == 64);\n    EXPECT_TRUE(jarr.size() == 0);\n  }\n\n  {\n    JsonArray arr1;\n    JsonArray arr2;\n    JsonArray arr3;\n    JsonArray::iterator iter1;\n\n    arr1.push(\"123456\");\n    arr2 = arr1;\n    iter1 = arr1.begin();\n    arr3 = arr1;\n    *iter1 = \"abcdefg\";\n    EXPECT_TRUE(arr1[0].as_string() == \"abcdefg\");\n    EXPECT_TRUE(arr2[0].as_string() == \"123456\");\n    EXPECT_TRUE(arr3[0].as_string() == \"123456\");\n  }\n\n  {\n    JsonArray arr1;\n    JsonArray arr2;\n    JsonArray arr3;\n\n    arr1.push(\"123456\");\n    arr2 = arr1;\n\n    JsonValue &val1 = arr1.front();\n    arr3 = arr1;\n    val1 = \"abcdefg\";\n    EXPECT_TRUE(arr1[0].as_string() == \"abcdefg\");\n    EXPECT_TRUE(arr2[0].as_string() == \"123456\");\n    EXPECT_TRUE(arr3[0].as_string() == \"123456\");\n  }\n\n  {\n    JsonArray arr;\n    JsonValue val(666);\n\n    arr.push(\"0\");\n    arr.push(JsonValue(1));\n    arr.push(JsonValue(2));\n    arr.push(\"3\");\n    arr.push(\"4\");\n    arr.push(\"5\");\n    arr.push(\"6\");\n    arr.push(JsonValue(7.0));\n    EXPECT_TRUE(arr.size() == 8);\n    EXPECT_TRUE(arr.capacity() == 32);\n    EXPECT_TRUE(arr[0].as_string() == \"0\");\n    EXPECT_TRUE(arr[1].as_integer() == 1);\n    EXPECT_TRUE(arr[2].as_integer() == 2);\n    EXPECT_TRUE(arr[3].as_integer() == 3);\n    EXPECT_TRUE(arr[4].as_integer() == 4);\n    EXPECT_TRUE(arr[5].as_integer() == 5);\n    EXPECT_TRUE(arr[6].as_integer() == 6);\n    EXPECT_TRUE(arr[7].as_integer() == 7);\n    arr.resize(20, val);\n    EXPECT_TRUE(arr.size() == 20);\n    arr.resize(5, val);\n    EXPECT_TRUE(arr.size() == 5);\n    EXPECT_TRUE(arr[0].as_string() == \"0\");\n    EXPECT_TRUE(arr[1].as_integer() == 1);\n    EXPECT_TRUE(arr[2].as_integer() == 2);\n    EXPECT_TRUE(arr[3].as_string() == \"3\");\n    EXPECT_TRUE(arr[4].as_string() == \"4\");\n    EXPECT_TRUE(val.as_integer() == 666);\n\n    arr.reverse();\n    EXPECT_TRUE(arr.size() == 5);\n    EXPECT_TRUE(arr[4].as_string() == \"0\");\n    EXPECT_TRUE(arr[3].as_integer() == 1);\n    EXPECT_TRUE(arr[2].as_integer() == 2);\n    EXPECT_TRUE(arr[1].as_string() == \"3\");\n    EXPECT_TRUE(arr[0].as_string() == \"4\");\n\n    arr.shift();\n    arr.reverse();\n    EXPECT_TRUE(arr.size() == 4);\n    EXPECT_TRUE(arr[0].as_string() == \"0\");\n    EXPECT_TRUE(arr[1].as_integer() == 1);\n    EXPECT_TRUE(arr[2].as_integer() == 2);\n    EXPECT_TRUE(arr[3].as_string() == \"3\");\n  }\n\n  {\n    JsonArray::reverse_iterator it1 = JsonArray::iterator();\n    JsonArray::reverse_iterator it2 = JsonArray::reverse_iterator();\n    EXPECT_TRUE(it1 == it2);\n\n    JsonArray::iterator it3 = JsonArray::reverse_iterator();\n    JsonArray::iterator it4 = JsonArray::iterator();\n    EXPECT_TRUE(it3 == it4);\n\n    JsonArray::const_iterator it5 = JsonArray::const_iterator();\n    JsonArray::const_iterator it6 = JsonArray::iterator();\n    EXPECT_TRUE(it5 == it6);\n\n    JsonArray::const_iterator it7 = JsonArray::reverse_iterator();\n    JsonArray::const_iterator it8 = JsonArray::const_reverse_iterator();\n    EXPECT_TRUE(it7 == it8);\n\n    JsonArray::const_reverse_iterator it9 = JsonArray::const_iterator();\n    JsonArray::const_reverse_iterator it10 = JsonArray::iterator();\n    EXPECT_TRUE(it9 == it10);\n\n    JsonArray::const_reverse_iterator it11 = JsonArray::reverse_iterator();\n    JsonArray::const_reverse_iterator it12 =\n        JsonArray::const_reverse_iterator();\n    EXPECT_TRUE(it11 == it12);\n  }\n\n  {\n    JsonArray arr;\n    arr.resize(1023);\n    EXPECT_TRUE(arr.size() == 1023);\n    EXPECT_TRUE(arr.capacity() == 1024);\n    EXPECT_TRUE(arr[0].is_null());\n    EXPECT_TRUE(arr[1022].is_null());\n  }\n\n  {\n    JsonArray arr;\n    EXPECT_TRUE(arr.capacity() == 0);\n    arr.resize(0);\n    EXPECT_TRUE(arr.capacity() == 32);\n    arr.push(0);\n    EXPECT_TRUE(arr.capacity() == 32);\n    EXPECT_TRUE(arr.size() == 1);\n    arr.resize(0);\n    EXPECT_TRUE(arr.size() == 0);\n    arr.resize(1);\n    EXPECT_TRUE(arr.capacity() == 32);\n    EXPECT_TRUE(arr.size() == 1);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[0, 1]\"));\n    EXPECT_TRUE(val.as_array().front() == 0);\n    EXPECT_TRUE(val.as_array().front().as_integer() == 0);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[-1]\"));\n    EXPECT_TRUE(val.as_array().front() == -1);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -1);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -1);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -1);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[4294967295]\"));\n    EXPECT_TRUE(val.parse(\"[+4294967295]\"));\n    EXPECT_TRUE(val.as_array().front() == 4294967295);\n    EXPECT_TRUE((int32_t)val.as_array().front().as_integer() == -1);\n    EXPECT_TRUE(val.as_array().front().as_integer() == 4294967295);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[ 2147483647 ]\"));\n    EXPECT_TRUE(val.parse(\"[ +2147483647 ]\"));\n    EXPECT_TRUE(val.as_array().front() == 2147483647);\n    EXPECT_TRUE(val.as_array().front().as_integer() == 2147483647);\n    EXPECT_TRUE(val.as_array().front().as_integer() == 2147483647);\n    EXPECT_TRUE(val.as_array().front().as_integer() == 2147483647);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[ -2147483647 ]\"));\n    EXPECT_TRUE(val.as_array().front() == -2147483647);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -2147483647);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -2147483647);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -2147483647);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[9223372036854775807]\"));\n    EXPECT_TRUE(val.parse(\"[+9223372036854775807]\"));\n    EXPECT_TRUE(val.as_array().front() == 9223372036854775807uLL);\n    EXPECT_TRUE((int32_t)val.as_array().front().as_integer() == -1);\n    EXPECT_TRUE(val.as_array().front().as_integer() == 9223372036854775807uLL);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[-9223372036854775807]\"));\n    EXPECT_TRUE(val.as_array().front() == -9223372036854775807LL);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -9223372036854775807LL);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[ 1844674407370955161 ]\"));\n    EXPECT_TRUE(val.parse(\"[ +1844674407370955161 ]\"));\n    EXPECT_TRUE(val.as_array().front() == 1844674407370955161uLL);\n    EXPECT_TRUE(val.as_array().front().as_integer() == 1844674407370955161uLL);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[ 18446744073709551615 ]\"));\n    EXPECT_TRUE(val.parse(\"[ +18446744073709551615 ]\"));\n    EXPECT_TRUE(val.as_array().front() == 18446744073709551615uLL);\n    EXPECT_TRUE(val.as_array().front().as_integer() == -1);\n    EXPECT_TRUE(val.as_array().front().as_integer() ==\n                JsonValue::integer_type(18446744073709551615uLL));\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[ 18446744073709551616 ]\"));\n    EXPECT_FALSE(val.as_array().front().is_integer());\n    EXPECT_TRUE(val.as_array().front() == 18446744073709551616.0);\n    EXPECT_TRUE(val.as_array().front().as_float() == 18446744073709551616.0);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[ 1e+30, 1.3e12 ]\"));\n    EXPECT_TRUE(val.as_array().front() == 1e+30);\n    EXPECT_TRUE(val.as_array().back() == 1.3e12);\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[0,[0]]\"));\n    EXPECT_TRUE(val.refer() == 1);\n    EXPECT_TRUE(val.as_array().refer() == 1);\n    EXPECT_TRUE(val.refer() == 0);\n\n    val.as_array().push(val);\n    EXPECT_TRUE(val.as_json_string() == \"[0,[0],[0,[0]]]\");\n\n    val.as_array().pop();\n    val.as_array().push(val);\n    EXPECT_TRUE(val.as_json_string() == \"[0,[0],[0,[0]]]\");\n\n    val.as_array().pop();\n    val.as_array().pop();\n    val.as_array().pop();\n    val.as_array().push(val);\n    EXPECT_TRUE(val.as_json_string() == \"[[]]\");\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"[0,[0]]\"));\n    EXPECT_TRUE(val.refer() == 1);\n    EXPECT_TRUE(val.as_array().refer() == 1);\n    EXPECT_TRUE(val.refer() == 0);\n\n    val.as_array()[0] = val;\n    EXPECT_TRUE(val.as_json_string() == \"[[0,[0]],[0]]\");\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"{\\\"0\\\":[0]}\"));\n    EXPECT_TRUE(val.refer() == 1);\n    EXPECT_TRUE(val.as_object().refer() == 1);\n    EXPECT_TRUE(val.refer() == 0);\n\n    JsonObject obj = val.as_object();\n    val.as_object()[\"1\"].assign(obj);\n    EXPECT_TRUE(val.as_json_string() == \"{\\\"0\\\":[0],\\\"1\\\":{\\\"0\\\":[0]}}\");\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"{\\\"0\\\":[0]}\"));\n    EXPECT_TRUE(val.refer() == 1);\n    EXPECT_TRUE(val.as_object().refer() == 1);\n    EXPECT_TRUE(val.refer() == 0);\n\n    EXPECT_TRUE(val.as_object().set(\"1\", val));\n    EXPECT_TRUE(val.as_json_string() == \"{\\\"0\\\":[0],\\\"1\\\":{\\\"0\\\":[0]}}\");\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"{\\\"0\\\":[0]}\"));\n    EXPECT_TRUE(val.refer() == 1);\n    EXPECT_TRUE(val.as_object().refer() == 1);\n    EXPECT_TRUE(val.refer() == 0);\n\n    JsonValue val2 = val;\n    val.as_object()[\"1\"] = val2;\n    EXPECT_TRUE(val.as_json_string() == \"{\\\"0\\\":[0],\\\"1\\\":{\\\"0\\\":[0]}}\");\n  }\n\n  {\n    JsonValue val;\n    EXPECT_TRUE(val.parse(\"{\\\"0\\\":[0]}\"));\n    EXPECT_TRUE(val.refer() == 1);\n    EXPECT_TRUE(val.as_object().refer() == 1);\n    EXPECT_TRUE(val.refer() == 0);\n\n    JsonObject obj = val.as_object();\n    val.as_object()[\"1\"] = obj;\n    EXPECT_TRUE(val.as_json_string() == \"{\\\"0\\\":[0],\\\"1\\\":{\\\"0\\\":[0]}}\");\n  }\n}\n\nTEST(Json, JsonString) {\n  {\n    JsonString str1(\"1234567890abcdefghijklmn\");\n    EXPECT_TRUE(str1 == str1.decode());\n  }\n\n  {\n    JsonString str1(\"\\\\\\\"1234\\\\\\\\567890abcdefghijklmn\\\\t\");\n    JsonString str2 = \"\\\"1234\\\\567890abcdefghijklmn\\t\";\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString str1(\" \\\\/ \\\\\\\\ \\\\\\\" \\\\b \\\\f \\\\n \\\\r \\\\t \");\n    JsonString str2 = \" / \\\\ \\\" \\b \\f \\n \\r \\t \";\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString str1(\"\\\\n\\\\r \\\\u8096 \\\\u5141 \\\\u950B \\\\u000a \\\\u000d\");\n    JsonString str2(\"\\n\\r \\xE8\\x82\\x96 \\xE5\\x85\\x81 \\xE9\\x94\\x8B \\n \\r\");\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString str1(\"\\\\u007f\");\n    JsonString str2(\"\\x7F\");\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString str1(\"\\\\u0080\");\n    JsonString str2(\"\\xC2\\x80\");\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString str1(\"\\\\u07FF\");\n    JsonString str2(\"\\xDF\\xBF\");\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString str1(\"\\\\u0800\");\n    JsonString str2(\"\\xE0\\xA0\\x80\");\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString str1(\"\\\\uFFFF\");\n    JsonString str2(\"\\xEF\\xBF\\xBF\");\n    EXPECT_TRUE(str2 == str1.decode());\n  }\n\n  {\n    JsonString a(\"abcdefg\");\n    JsonString b(\"abcdefl\");\n    JsonString c(\"abcdefg\");\n    EXPECT_TRUE(a == c);\n    EXPECT_TRUE(b != c);\n    EXPECT_TRUE(b != a);\n\n    EXPECT_TRUE(a.compare(c) == 0);\n    EXPECT_TRUE(b.compare(c) != 0);\n    EXPECT_TRUE(b.compare(c) != 0);\n  }\n\n  {\n    JsonString a(\"abcdefg\\\"\");\n    JsonString b(\"abcd\");\n    JsonString c(\"abcdefg\");\n    EXPECT_TRUE(a != c);\n    EXPECT_TRUE(b != c);\n    EXPECT_TRUE(b != a);\n  }\n\n  {\n    JsonString a(\"abcd\\0efg\");\n    JsonString b(\"abcd\");\n    JsonString c(\"abcdefg\\0\");\n    EXPECT_TRUE(a != c);\n    EXPECT_TRUE(b != c);\n    EXPECT_TRUE(b == a);\n    EXPECT_TRUE(a.compare(b) == 0);\n  }\n\n  {\n    JsonString a(\"abcd\\0efg\", 8);\n    JsonString b(\"abcd\");\n    JsonString c(\"abcdefg\\0\");\n    EXPECT_TRUE(a != c);\n    EXPECT_TRUE(b != c);\n    EXPECT_TRUE(b != a);\n\n    EXPECT_TRUE(a.compare(\"abcd\") == 0);\n    EXPECT_TRUE(b.compare(\"abcd\\0\") == 0);\n    EXPECT_TRUE(c.compare(\"abcdefg\") == 0);\n  }\n\n  {\n    JsonString a(\"abcd\\0efg\", 8);\n    JsonString b(\"abcd\");\n    JsonString c(\"abcd\\0efg\", 8);\n    EXPECT_TRUE(a == c);\n    EXPECT_TRUE(b != c);\n    EXPECT_TRUE(b != a);\n  }\n\n  {\n    JsonString a;\n    JsonString b(\"\\0\");\n    JsonString c(NULL);\n    EXPECT_TRUE(a == c);\n    EXPECT_TRUE(b == c);\n    EXPECT_TRUE(b == a);\n  }\n\n  {\n    JsonString a;\n    JsonString b(\"\\0\", 1);\n    JsonString c(NULL);\n    EXPECT_TRUE(a == c);\n    EXPECT_TRUE(b != c);\n    EXPECT_TRUE(b != a);\n  }\n\n  {\n    JsonString str1(\n        \"author:\\\\u8096\\\\u5141\\\\u950b;\\\\r\\\\ntest:\\\\u007f \\\\u0080 \\\\u07ff \"\n        \"\\\\u0800 \\\\uffff\");\n    JsonString str2(\n        \"author:\\xE8\\x82\\x96\\xE5\\x85\\x81\\xE9\\x94\\x8B;\\r\\ntest:\"\n        \"\\x7F \\xC2\\x80 \"\n        \"\\xDF\\xBF \\xE0\\xA0\\x80 \\xEF\\xBF\\xBF\");\n    JsonString str3(\n        \"author:\\xE8\\x82\\x96\\xE5\\x85\\x81\\xE9\\x94\\x8B;\"\n        \"\\\\r\\\\ntest:\\x7F \\xC2\\x80 \"\n        \"\\xDF\\xBF \\xE0\\xA0\\x80 \\xEF\\xBF\\xBF\");\n    EXPECT_TRUE(str2 == str1.decode());\n    EXPECT_TRUE(str2.encode() == str3);\n  }\n\n  {\n    JsonString str1(\"\\\\007f \\\\0080 \\\\u07ff \\\\u0800 \\\\uffff\");\n    JsonString str2(\"\\\\u008\\\\u07ff \\\\u0800 \\\\uffff\");\n    EXPECT_FALSE(str1.decode().is_valid());\n    EXPECT_FALSE(str2.decode().is_valid());\n  }\n\n  {\n    JsonString str1(\" \\x1f \\x0e \\x01 \\x1e / \\\\ AAA\\\" AAA\\b \\f \\n \\r \\t \");\n    JsonString str2(\n        \" \\\\u001f \\\\u000e \\\\u0001 \\\\u001e / \\\\\\\\ AAA\\\\\\\" \"\n        \"AAA\\\\b \\\\f \\\\n \\\\r \"\n        \"\\\\t \");\n    EXPECT_TRUE(str1.encode() == str2);\n    EXPECT_TRUE(str1 == str2.decode());\n  }\n\n  {\n    JsonString jstr;\n\n    EXPECT_TRUE(jstr.capacity() == 0);\n    EXPECT_TRUE(jstr.size() == 0);\n    jstr.reserve(21);\n    EXPECT_TRUE(jstr.capacity() == 32 - 1);\n    EXPECT_TRUE(jstr.size() == 0);\n    jstr.reserve(2);\n    EXPECT_TRUE(jstr.capacity() == 32 - 1);\n    EXPECT_TRUE(jstr.size() == 0);\n    jstr.reserve(32);\n    EXPECT_TRUE(jstr.capacity() == 64 - 1);\n    EXPECT_TRUE(jstr.size() == 0);\n\n    char buf[1000];\n    buf[0] = '\\0';\n    jstr.assign(buf, sizeof(buf));\n    EXPECT_TRUE(jstr.capacity() == 1024 - 1);\n    EXPECT_TRUE(jstr.length() == 1000);\n    EXPECT_TRUE(JsonString(jstr.c_str()) == \"\");\n\n    memcpy(buf, \"abcdef\", 7);\n    jstr.assign(buf, 200);\n    EXPECT_TRUE(jstr.capacity() == 1024 - 1);\n    EXPECT_TRUE(jstr.length() == 200);\n    EXPECT_TRUE(JsonString(jstr.c_str()) == \"abcdef\");\n  }\n}\n\nTEST(Json, JsonValue) {\n  {\n    EXPECT_TRUE(JsonValue(true) == JsonValue(true));\n    EXPECT_TRUE(JsonValue(false) == JsonValue(false));\n    EXPECT_TRUE(JsonValue((char)'\\r') == JsonValue(0xd));\n    EXPECT_TRUE(JsonValue((char)'\\r') == JsonValue('\\r'));\n    EXPECT_TRUE(JsonValue(10000) == JsonValue(10000));\n    EXPECT_TRUE(JsonValue(0xffff) == JsonValue(0xffff));\n    EXPECT_TRUE(JsonValue(0x10000) == JsonValue(0x10000));\n    EXPECT_TRUE(JsonValue(0xffffffff) == JsonValue(0xffffffff));\n    EXPECT_TRUE(JsonValue(0x100000000) == JsonValue(0x100000000));\n    EXPECT_TRUE(JsonValue(0xffffffffffffffff) == JsonValue(0xffffffffffffffff));\n    EXPECT_TRUE(JsonValue(0.999999) == JsonValue(0.999999));\n    EXPECT_TRUE(JsonValue(false) != JsonValue(0.0));\n    EXPECT_TRUE(JsonValue(0.0) != JsonValue(0));\n    EXPECT_TRUE(JsonValue(\"0.0\") != JsonValue(0));\n    EXPECT_TRUE(JsonValue(\"0.0\") == JsonValue(\"0.0\"));\n    EXPECT_TRUE(JsonValue(std::string(\"0.0001\")) == JsonValue(\"0.0001\"));\n  }\n\n  {\n    EXPECT_EQ(JsonValue(0).as_json_string().as_stl_string(), \"0\");\n    EXPECT_EQ(JsonValue(1).as_json_string().as_stl_string(), \"1\");\n    EXPECT_EQ(JsonValue(-1).as_json_string().as_stl_string(), \"-1\");\n    EXPECT_EQ(JsonValue(99).as_json_string().as_stl_string(), \"99\");\n    EXPECT_EQ(JsonValue(-99).as_json_string().as_stl_string(), \"-99\");\n    EXPECT_EQ(JsonValue(188).as_json_string().as_stl_string(), \"188\");\n    EXPECT_EQ(JsonValue(-188).as_json_string().as_stl_string(), \"-188\");\n    EXPECT_EQ(JsonValue(1520).as_json_string().as_stl_string(), \"1520\");\n    EXPECT_EQ(JsonValue(-1520).as_json_string().as_stl_string(), \"-1520\");\n\n    EXPECT_EQ(JsonValue(12345).as_json_string().as_stl_string(), \"12345\");\n    EXPECT_EQ(JsonValue(-12345).as_json_string().as_stl_string(), \"-12345\");\n\n    EXPECT_EQ(JsonValue(65535).as_json_string().as_stl_string(), \"65535\");\n    EXPECT_EQ(JsonValue(-65535).as_json_string().as_stl_string(), \"-65535\");\n\n    EXPECT_EQ(JsonValue(65536).as_json_string().as_stl_string(), \"65536\");\n    EXPECT_EQ(JsonValue(-65536).as_json_string().as_stl_string(), \"-65536\");\n\n    EXPECT_EQ(JsonValue(234567).as_json_string().as_stl_string(), \"234567\");\n    EXPECT_EQ(JsonValue(-234567).as_json_string().as_stl_string(), \"-234567\");\n\n    EXPECT_EQ(JsonValue(1234567890).as_json_string().as_stl_string(),\n              \"1234567890\");\n    EXPECT_EQ(JsonValue(-1234567890).as_json_string().as_stl_string(),\n              \"-1234567890\");\n\n    EXPECT_EQ(JsonValue(9999999999).as_json_string().as_stl_string(),\n              \"9999999999\");\n    EXPECT_EQ(JsonValue(-9999999999).as_json_string().as_stl_string(),\n              \"-9999999999\");\n\n    EXPECT_EQ(JsonValue(4294967295).as_json_string().as_stl_string(),\n              \"4294967295\");\n    // EXPECT_EQ(JsonValue(-4294967295).as_json_string().as_stl_string(),\n    //           \"-4294967295LL\");\n\n    EXPECT_EQ(JsonValue(4294967296).as_json_string().as_stl_string(),\n              \"4294967296\");\n    EXPECT_EQ(JsonValue(-4294967296).as_json_string().as_stl_string(),\n              \"-4294967296\");\n\n    EXPECT_EQ(JsonValue(281474976710655).as_json_string().as_stl_string(),\n              \"281474976710655\");\n    EXPECT_EQ(JsonValue(-281474976710655).as_json_string().as_stl_string(),\n              \"-281474976710655\");\n\n    EXPECT_EQ(JsonValue(281474976710656).as_json_string().as_stl_string(),\n              \"281474976710656\");\n    EXPECT_EQ(JsonValue(-281474976710656).as_json_string().as_stl_string(),\n              \"-281474976710656\");\n\n    EXPECT_EQ(JsonValue(9223372036854775807ll).as_json_string().as_stl_string(),\n              \"9223372036854775807\");\n    EXPECT_EQ(\n        JsonValue(-9223372036854775807ll).as_json_string().as_stl_string(),\n        \"-9223372036854775807\");\n  }\n\n  {\n    JsonValue jval;\n\n    jval.assign(\"aaaaaaaaaaaa\");\n    jval.assign(\"122326263\", 10);\n    jval.assign(200);\n    jval.assign(0xffffffffffff);\n  }\n\n  {\n    JsonValue val1;\n    JsonValue val2;\n    JsonValue val3;\n    JsonValue val4;\n\n    val1 = \"abcdef\";\n    val2 = val1;\n    val3 = val1;\n\n    EXPECT_TRUE(val1.refer() == 3);\n    EXPECT_TRUE(val2.refer() == 3);\n    EXPECT_TRUE(val3.refer() == 3);\n    EXPECT_TRUE(val3.as_stl_string() == \"abcdef\");\n\n    JsonString &str = val1.as_string();\n    EXPECT_TRUE(str.refer() == 2);\n    val4 = val1;\n    str = \"123456\";\n\n    EXPECT_TRUE(val1.refer() == 0);\n    EXPECT_TRUE(val2.refer() == 2);\n    EXPECT_TRUE(val3.refer() == 2);\n    EXPECT_TRUE(val4.refer() == 1);\n    EXPECT_TRUE(val1.as_stl_string() == \"123456\");\n    EXPECT_TRUE(val2.as_stl_string() == \"abcdef\");\n    EXPECT_TRUE(val3.as_stl_string() == \"abcdef\");\n    EXPECT_TRUE(val4.as_stl_string() == \"abcdef\");\n  }\n\n  {\n    JsonValue val1;\n    JsonValue val2;\n    JsonValue val3;\n\n    val1[\"abcd\"] = \"1234\";\n    val2 = val1.as_object();\n    val3 = val2;\n\n    EXPECT_TRUE(val1.refer() == 0);\n    EXPECT_TRUE(val2.refer() == 2);\n    EXPECT_TRUE(val3.refer() == 2);\n    EXPECT_TRUE(val1.as_object().refer() == 0);\n    EXPECT_TRUE(val2.as_object().refer() == 2);\n    EXPECT_TRUE(val3.as_object().refer() == 2);\n  }\n}\n\nTEST(Json, General) {\n  {\n    JsonObject obj;\n    JsonArray arr;\n    JsonValue val;\n    JsonString str;\n\n    EXPECT_TRUE(obj.refer() == -1);\n    EXPECT_TRUE(arr.refer() == -1);\n    EXPECT_TRUE(val.refer() == -1);\n    EXPECT_TRUE(str.refer() == -1);\n\n    val = str;\n    EXPECT_TRUE(val.refer() == 1);\n\n    val = obj;\n    EXPECT_TRUE(val.refer() == 1);\n\n    val = arr;\n    EXPECT_TRUE(val.refer() == 1);\n\n    arr.push(\"acdef\");\n    EXPECT_TRUE(arr.refer() == 1);\n    arr.begin();\n    EXPECT_TRUE(arr.refer() == 0);\n    arr.end();\n    EXPECT_TRUE(arr.refer() == 0);\n\n    JsonArray arr1 = arr;\n    JsonArray arr2 = arr1;\n    JsonArray arr3 = arr;\n    EXPECT_TRUE(arr1.refer() == 2);\n    EXPECT_TRUE(arr2.refer() == 2);\n    EXPECT_TRUE(arr3.refer() == 1);\n    EXPECT_TRUE(arr.refer() == 0);\n\n    obj.set(\"1111\", \"null\");\n    EXPECT_TRUE(obj.refer() == 1);\n    obj.rbegin();\n    EXPECT_TRUE(obj.refer() == 0);\n    obj.rend();\n    EXPECT_TRUE(obj.refer() == 0);\n\n    JsonObject obj1 = obj;\n    JsonObject obj2 = obj1;\n    JsonObject obj3 = obj;\n    EXPECT_TRUE(obj1.refer() == 2);\n    EXPECT_TRUE(obj2.refer() == 2);\n    EXPECT_TRUE(obj3.refer() == 1);\n    EXPECT_TRUE(obj.refer() == 0);\n  }\n\n  {\n    short a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    unsigned short a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    int a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    unsigned int a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    long a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    unsigned long a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    long long a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    unsigned long long a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    float a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    double a = 1;\n    enum { A = 1, B = 2 };\n    EXPECT_TRUE(a == A);\n    EXPECT_TRUE(A == a);\n    EXPECT_TRUE(a != B);\n    EXPECT_TRUE(B != a);\n  }\n\n  {\n    EXPECT_TRUE(JsonString() == JsonString());\n    EXPECT_TRUE(JsonValue() == JsonValue());\n    EXPECT_TRUE(JsonObject() == JsonObject());\n    EXPECT_TRUE(JsonArray() == JsonArray());\n\n    EXPECT_FALSE((JsonString() != JsonString()));\n    EXPECT_FALSE((JsonValue() != JsonValue()));\n    EXPECT_FALSE((JsonObject() != JsonObject()));\n    EXPECT_FALSE((JsonArray() != JsonArray()));\n\n    EXPECT_TRUE(JsonString() != JsonValue());\n    EXPECT_TRUE(JsonObject() != JsonValue());\n    EXPECT_TRUE(JsonArray() != JsonValue());\n    EXPECT_TRUE(JsonValue() != JsonString());\n    EXPECT_TRUE(JsonValue() != JsonObject());\n    EXPECT_TRUE(JsonValue() != JsonArray());\n\n    EXPECT_FALSE((JsonString() == JsonValue()));\n    EXPECT_FALSE((JsonObject() == JsonValue()));\n    EXPECT_FALSE((JsonArray() == JsonValue()));\n    EXPECT_FALSE((JsonValue() == JsonString()));\n    EXPECT_FALSE((JsonValue() == JsonObject()));\n    EXPECT_FALSE((JsonValue() == JsonArray()));\n\n    EXPECT_TRUE(JsonString() == std::string());\n    EXPECT_TRUE(std::string() == JsonString());\n    EXPECT_FALSE((JsonString() != std::string()));\n    EXPECT_FALSE((std::string() != JsonString()));\n\n    EXPECT_TRUE(JsonString() == std::string(\"\"));\n    EXPECT_TRUE(std::string(\"\") == JsonString());\n    EXPECT_FALSE((JsonString() != std::string(\"\")));\n    EXPECT_FALSE((std::string(\"\") != JsonString()));\n\n    EXPECT_TRUE(JsonString(\"\") == std::string());\n    EXPECT_TRUE(std::string() == JsonString(\"\"));\n    EXPECT_FALSE((JsonString(\"\") != std::string()));\n    EXPECT_FALSE((std::string() != JsonString(\"\")));\n  }\n}\n"
  },
  {
    "path": "tests/ailego/hash/crc32c_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <string>\n#include <vector>\n#include <gtest/gtest.h>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(Crc32c, Crc32c) {\n  {\n    char data[] = \"\";\n    EXPECT_EQ(0u, ailego::Crc32c::Hash(data, strlen(data), 0u));\n  }\n  {\n    char data[] = \"123456789\";\n    EXPECT_EQ(0x58E3FA20u, ailego::Crc32c::Hash(data, strlen(data), 0));\n  }\n  {\n    char data[] = \"whiz bang boom\";\n    EXPECT_EQ(0x8CAE40C8u, ailego::Crc32c::Hash(data, strlen(data), 0u));\n    EXPECT_EQ(0xDF19F0C8u, ailego::Crc32c::Hash(data, strlen(data), 5678));\n  }\n  {\n    char data[] = \"foo bar baz\";\n    EXPECT_EQ(0xF58C78ACu, ailego::Crc32c::Hash(data, strlen(data), 0u));\n    EXPECT_EQ(0x348DACCEu, ailego::Crc32c::Hash(data, strlen(data), 1234u));\n  }\n  {\n    uint32_t result[10] = {3263744690, 2184491954, 1881115848, 3193814825,\n                           1570985216, 371133708,  2843540871, 3970904592,\n                           1491335712, 551906596};\n    char data[] = \"123456789\";\n    for (size_t i = 0; i < 10; ++i) {\n      EXPECT_EQ(result[i], ailego::Crc32c::Hash(data, i + 1, 0u));\n    }\n  }\n  {\n    uint8_t data = 0;\n    EXPECT_EQ(0u, ailego::Crc32c::Hash(&data, sizeof(data), 0u));\n    EXPECT_NE(0u, ailego::Crc32c::Hash(&data, sizeof(data), 55u));\n  }\n\n  {\n    char test1[] = \"Hello world\";\n    std::string test2(\"Hello world\");\n\n    EXPECT_EQ(ailego::Crc32c::Hash(test1, strlen(test1), 0u),\n              ailego::Crc32c::Hash(test2.data(), test2.size(), 0u));\n    EXPECT_EQ(ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 0u),\n              ailego::Crc32c::Hash(test2.data(), test2.size(), 0u));\n\n    EXPECT_EQ(ailego::Crc32c::Hash(test1, strlen(test1), 1),\n              ailego::Crc32c::Hash(test2.data(), test2.size(), 1));\n    EXPECT_EQ(ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 1),\n              ailego::Crc32c::Hash(test2.data(), test2.size(), 1));\n\n    EXPECT_NE(ailego::Crc32c::Hash(test1, 0u),\n              ailego::Crc32c::Hash(test1, 1, 0u));\n    EXPECT_NE(ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 0u),\n              ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 1));\n    EXPECT_NE(ailego::Crc32c::Hash(test2.data(), test2.size(), 0u),\n              ailego::Crc32c::Hash(test2.data(), test2.size(), 1));\n  }\n}\n\nTEST(Crc32c, Crc32cChecksum) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  {\n    size_t len = 10000;\n    std::string str;\n\n    for (size_t i = 0; i < len; i++) {\n      str.push_back((char)rand());\n    }\n\n    *((uint32_t *)str.data()) = 0u;\n    uint32_t crc = ailego::Crc32c::Hash(str.data(), str.size(), 0u);\n\n    *((uint32_t *)str.data()) = crc;\n    EXPECT_EQ(crc, ailego::Crc32c::Hash(str.data(), str.size(), crc));\n\n    uint32_t crc2 = ailego::Crc32c::Hash(str.data() + 4, str.size() - 4, 0);\n    EXPECT_EQ(crc2, ailego::Crc32c::Hash(&crc, 0, crc2));\n  }\n  {\n    size_t len = 20000;\n    std::string str;\n\n    for (size_t i = 0; i < len; i++) {\n      str.push_back((char)rand());\n    }\n\n    *((uint32_t *)str.data()) = 0xffffffffu;\n    uint32_t crc = ailego::Crc32c::Hash(str.data(), str.size(), 0xffffffffu);\n\n    *((uint32_t *)str.data()) = crc;\n    EXPECT_EQ(crc, ailego::Crc32c::Hash(str.data(), str.size(), crc));\n\n    uint32_t crc2 = ailego::Crc32c::Hash(str.data() + 4, str.size() - 4, 0);\n    EXPECT_EQ(crc2, ailego::Crc32c::Hash(&crc, 0, crc2));\n  }\n}\n\nTEST(Crc32c, Crc32cBenchmark) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  size_t len = 100000;\n  std::vector<uint32_t> data;\n  for (size_t i = 0; i < len; ++i) {\n    data.push_back((uint32_t)rand());\n  }\n\n  {\n    uint64_t t1 = ailego::Monotime::MicroSeconds();\n    uint32_t hash =\n        ailego::Crc32c::Hash(&data[0], data.size() * sizeof(uint32_t), 0u);\n    for (int i = 0; i < 100; ++i) {\n      hash =\n          ailego::Crc32c::Hash(&data[0], data.size() * sizeof(uint32_t), hash);\n    }\n    uint64_t t2 = ailego::Monotime::MicroSeconds();\n    printf(\"ailego::Crc32c::Hash = %u: %u us\\n\", hash, (uint32_t)(t2 - t1));\n  }\n}\n"
  },
  {
    "path": "tests/ailego/hash/jump_hash_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <set>\n#include <string>\n#include <vector>\n#include <gtest/gtest.h>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/hash/jump_hash.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nTEST(JumpHash, JumpHash) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_int_distribution<uint32_t> dist1(25353195, 25358555);\n  std::uniform_int_distribution<uint32_t> dist2(1, 10000);\n  std::set<uint32_t> result1;\n  std::set<uint32_t> result2;\n\n  const int total = 10000;\n  for (int i = 0; i < total; ++i) {\n    uint32_t ticket = dist1(gen);\n    uint32_t signal = dist2(gen);\n\n    uint64_t key = ((uint64_t)signal << 32) | ticket;\n    uint32_t hash1 = (JumpHash(key, 32) << 27) | (ticket & 0x7ffffff);\n\n    uint32_t hash2 = (signal << 27) | (ticket & 0x7ffffff);\n    result1.insert(hash1);\n    result2.insert(hash2);\n  }\n  printf(\"Conflict 1: %f\\n\", (double)(total - result1.size()) / (double)total);\n  printf(\"Conflict 2: %f\\n\", (double)(total - result2.size()) / (double)total);\n}\n"
  },
  {
    "path": "tests/ailego/internal/cpu_features_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/internal/cpu_features.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec::ailego::internal;\n\nTEST(CpuFeatures, General) {\n  std::cout << \"* Intrinsics:       \" << CpuFeatures::Intrinsics() << std::endl;\n  std::cout << \"* F16C:             \" << CpuFeatures::F16C() << std::endl;\n  std::cout << \"* SSE:              \" << CpuFeatures::SSE() << std::endl;\n  std::cout << \"* SSE2:             \" << CpuFeatures::SSE2() << std::endl;\n  std::cout << \"* SSE3:             \" << CpuFeatures::SSE3() << std::endl;\n  std::cout << \"* SSSE3:            \" << CpuFeatures::SSSE3() << std::endl;\n  std::cout << \"* SSE4_1:           \" << CpuFeatures::SSE4_1() << std::endl;\n  std::cout << \"* SSE4_2:           \" << CpuFeatures::SSE4_2() << std::endl;\n  std::cout << \"* AVX:              \" << CpuFeatures::AVX() << std::endl;\n  std::cout << \"* AVX2:             \" << CpuFeatures::AVX2() << std::endl;\n  std::cout << \"* AVX512F:          \" << CpuFeatures::AVX512F() << std::endl;\n  std::cout << \"* AVX512DQ:         \" << CpuFeatures::AVX512DQ() << std::endl;\n  std::cout << \"* AVX512PF:         \" << CpuFeatures::AVX512PF() << std::endl;\n  std::cout << \"* AVX512ER:         \" << CpuFeatures::AVX512ER() << std::endl;\n  std::cout << \"* AVX512CD:         \" << CpuFeatures::AVX512CD() << std::endl;\n  std::cout << \"* AVX512BW:         \" << CpuFeatures::AVX512BW() << std::endl;\n  std::cout << \"* AVX512VL:         \" << CpuFeatures::AVX512VL() << std::endl;\n  std::cout << \"* AVX512_IFMA:      \" << CpuFeatures::AVX512_IFMA()\n            << std::endl;\n  std::cout << \"* AVX512_VBMI:      \" << CpuFeatures::AVX512_VBMI()\n            << std::endl;\n  std::cout << \"* AVX512_VBMI2:     \" << CpuFeatures::AVX512_VBMI2()\n            << std::endl;\n  std::cout << \"* AVX512_VNNI:      \" << CpuFeatures::AVX512_VNNI()\n            << std::endl;\n  std::cout << \"* AVX512_BITALG:    \" << CpuFeatures::AVX512_BITALG()\n            << std::endl;\n  std::cout << \"* AVX512_VPOPCNTDQ: \" << CpuFeatures::AVX512_VPOPCNTDQ()\n            << std::endl;\n  std::cout << \"* AVX512_4VNNIW:    \" << CpuFeatures::AVX512_4VNNIW()\n            << std::endl;\n  std::cout << \"* AVX512_4FMAPS:    \" << CpuFeatures::AVX512_4FMAPS()\n            << std::endl;\n  std::cout << \"* AVX512_FP16:      \" << CpuFeatures::AVX512_FP16()\n            << std::endl;\n  std::cout << \"* CX8:              \" << CpuFeatures::CX8() << std::endl;\n  std::cout << \"* CX16:             \" << CpuFeatures::CX16() << std::endl;\n  std::cout << \"* PCLMULQDQ:        \" << CpuFeatures::PCLMULQDQ() << std::endl;\n  std::cout << \"* VPCLMULQDQ:       \" << CpuFeatures::VPCLMULQDQ() << std::endl;\n  std::cout << \"* CMOV:             \" << CpuFeatures::CMOV() << std::endl;\n  std::cout << \"* MOVBE:            \" << CpuFeatures::MOVBE() << std::endl;\n  std::cout << \"* ERMS:             \" << CpuFeatures::ERMS() << std::endl;\n  std::cout << \"* POPCNT:           \" << CpuFeatures::POPCNT() << std::endl;\n  std::cout << \"* XSAVE:            \" << CpuFeatures::XSAVE() << std::endl;\n  std::cout << \"* FMA:              \" << CpuFeatures::FMA() << std::endl;\n  std::cout << \"* ADX:              \" << CpuFeatures::ADX() << std::endl;\n  std::cout << \"* GFNI:             \" << CpuFeatures::GFNI() << std::endl;\n  std::cout << \"* AES:              \" << CpuFeatures::AES() << std::endl;\n  std::cout << \"* VAES:             \" << CpuFeatures::VAES() << std::endl;\n  std::cout << \"* RDSEED:           \" << CpuFeatures::RDSEED() << std::endl;\n  std::cout << \"* RDRAND:           \" << CpuFeatures::RDRAND() << std::endl;\n  std::cout << \"* SHA:              \" << CpuFeatures::SHA() << std::endl;\n  std::cout << \"* BMI1:             \" << CpuFeatures::BMI1() << std::endl;\n  std::cout << \"* BMI2:             \" << CpuFeatures::BMI2() << std::endl;\n  std::cout << \"* CLFLUSH:          \" << CpuFeatures::CLFLUSH() << std::endl;\n  std::cout << \"* CLFLUSHOPT:       \" << CpuFeatures::CLFLUSHOPT() << std::endl;\n  std::cout << \"* CLWB:             \" << CpuFeatures::CLWB() << std::endl;\n  std::cout << \"* RDPID:            \" << CpuFeatures::RDPID() << std::endl;\n  std::cout << \"* FPU:              \" << CpuFeatures::FPU() << std::endl;\n  std::cout << \"* HT:               \" << CpuFeatures::HT() << std::endl;\n  std::cout << \"* VMX:              \" << CpuFeatures::VMX() << std::endl;\n  std::cout << \"* HYPERVISOR:       \" << CpuFeatures::HYPERVISOR() << std::endl;\n\n// #if defined(__AVX512VBMI2__)\n//     EXPECT_TRUE(CpuFeatures::AVX512VBMI2());\n// #endif\n// #if defined(__AVX512VBMI__)\n//     EXPECT_TRUE(CpuFeatures::AVX512VBMI());\n// #endif\n// #if defined(__AVX512VL__)\n//     EXPECT_TRUE(CpuFeatures::AVX512VL());\n// #endif\n// #if defined(__AVX512BW__)\n//     EXPECT_TRUE(CpuFeatures::AVX512BW());\n// #endif\n// #if defined(__AVX512CD__)\n//     EXPECT_TRUE(CpuFeatures::AVX512CD());\n// #endif\n// #if defined(__AVX512ER__)\n//     EXPECT_TRUE(CpuFeatures::AVX512ER());\n// #endif\n// #if defined(__AVX512PF__)\n//     EXPECT_TRUE(CpuFeatures::AVX512PF());\n// #endif\n// #if defined(__AVX512IFMA__)\n//     EXPECT_TRUE(CpuFeatures::AVX512IFMA());\n// #endif\n// #if defined(__AVX512DQ__)\n//     EXPECT_TRUE(CpuFeatures::AVX512DQ());\n// #endif\n// #if defined(__AVX512F__)\n//     EXPECT_TRUE(CpuFeatures::AVX512F());\n// #endif\n#if defined(__AVX2__)\n  EXPECT_TRUE(CpuFeatures::AVX2());\n  EXPECT_TRUE(CpuFeatures::AVX());\n  EXPECT_TRUE(CpuFeatures::SSE4_2());\n  EXPECT_TRUE(CpuFeatures::SSE4_1());\n  EXPECT_TRUE(CpuFeatures::SSSE3());\n  EXPECT_TRUE(CpuFeatures::SSE3());\n  EXPECT_TRUE(CpuFeatures::SSE2());\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n#if defined(__AVX__)\n  EXPECT_TRUE(CpuFeatures::AVX());\n  EXPECT_TRUE(CpuFeatures::SSE4_2());\n  EXPECT_TRUE(CpuFeatures::SSE4_1());\n  EXPECT_TRUE(CpuFeatures::SSSE3());\n  EXPECT_TRUE(CpuFeatures::SSE3());\n  EXPECT_TRUE(CpuFeatures::SSE2());\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n#if defined(__SSE4_2__)\n  EXPECT_TRUE(CpuFeatures::SSE4_2());\n  EXPECT_TRUE(CpuFeatures::SSE4_1());\n  EXPECT_TRUE(CpuFeatures::SSSE3());\n  EXPECT_TRUE(CpuFeatures::SSE3());\n  EXPECT_TRUE(CpuFeatures::SSE2());\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n  EXPECT_TRUE(CpuFeatures::POPCNT());\n#endif\n#if defined(__SSE4_1__)\n  EXPECT_TRUE(CpuFeatures::SSE4_1());\n  EXPECT_TRUE(CpuFeatures::SSSE3());\n  EXPECT_TRUE(CpuFeatures::SSE3());\n  EXPECT_TRUE(CpuFeatures::SSE2());\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n#if defined(__SSSE3__)\n  EXPECT_TRUE(CpuFeatures::SSSE3());\n  EXPECT_TRUE(CpuFeatures::SSE3());\n  EXPECT_TRUE(CpuFeatures::SSE2());\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n#if defined(__SSE3__)\n  EXPECT_TRUE(CpuFeatures::SSE3());\n  EXPECT_TRUE(CpuFeatures::SSE2());\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n#if defined(__SSE2__)\n  EXPECT_TRUE(CpuFeatures::SSE2());\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n#if defined(__SSE__)\n  EXPECT_TRUE(CpuFeatures::SSE());\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n#if defined(__MMX__)\n  EXPECT_TRUE(CpuFeatures::MMX());\n#endif\n}\n\n\nTEST(CpuFeatures, Static) {\n  std::cout << \"* F16C:             \" << CpuFeatures::static_flags_.F16C\n            << std::endl;\n  std::cout << \"* SSE:              \" << CpuFeatures::static_flags_.SSE\n            << std::endl;\n  std::cout << \"* SSE2:             \" << CpuFeatures::static_flags_.SSE2\n            << std::endl;\n  std::cout << \"* SSE3:             \" << CpuFeatures::static_flags_.SSE3\n            << std::endl;\n  std::cout << \"* SSSE3:            \" << CpuFeatures::static_flags_.SSSE3\n            << std::endl;\n  std::cout << \"* SSE4_1:           \" << CpuFeatures::static_flags_.SSE4_1\n            << std::endl;\n  std::cout << \"* SSE4_2:           \" << CpuFeatures::static_flags_.SSE4_2\n            << std::endl;\n  std::cout << \"* AVX:              \" << CpuFeatures::static_flags_.AVX\n            << std::endl;\n  std::cout << \"* AVX2:             \" << CpuFeatures::static_flags_.AVX2\n            << std::endl;\n  std::cout << \"* AVX512F:          \" << CpuFeatures::static_flags_.AVX512F\n            << std::endl;\n  std::cout << \"* AVX512DQ:         \" << CpuFeatures::static_flags_.AVX512DQ\n            << std::endl;\n  std::cout << \"* AVX512PF:         \" << CpuFeatures::static_flags_.AVX512PF\n            << std::endl;\n  std::cout << \"* AVX512ER:         \" << CpuFeatures::static_flags_.AVX512ER\n            << std::endl;\n  std::cout << \"* AVX512CD:         \" << CpuFeatures::static_flags_.AVX512CD\n            << std::endl;\n  std::cout << \"* AVX512BW:         \" << CpuFeatures::static_flags_.AVX512BW\n            << std::endl;\n  std::cout << \"* AVX512VL:         \" << CpuFeatures::static_flags_.AVX512VL\n            << std::endl;\n  std::cout << \"* AVX512_IFMA:      \" << CpuFeatures::static_flags_.AVX512_IFMA\n            << std::endl;\n  std::cout << \"* AVX512_VBMI:      \" << CpuFeatures::static_flags_.AVX512_VBMI\n            << std::endl;\n  std::cout << \"* AVX512_VBMI2:     \" << CpuFeatures::static_flags_.AVX512_VBMI2\n            << std::endl;\n  std::cout << \"* AVX512_VNNI:      \" << CpuFeatures::static_flags_.AVX512_VNNI\n            << std::endl;\n  std::cout << \"* AVX512_BITALG:    \"\n            << CpuFeatures::static_flags_.AVX512_BITALG << std::endl;\n  std::cout << \"* AVX512_VPOPCNTDQ: \"\n            << CpuFeatures::static_flags_.AVX512_VPOPCNTDQ << std::endl;\n  std::cout << \"* AVX512_4VNNIW:    \"\n            << CpuFeatures::static_flags_.AVX512_4VNNIW << std::endl;\n  std::cout << \"* AVX512_4FMAPS:    \"\n            << CpuFeatures::static_flags_.AVX512_4FMAPS << std::endl;\n  std::cout << \"* AVX512_FP16:      \" << CpuFeatures::static_flags_.AVX512_FP16\n            << std::endl;\n  std::cout << \"* CX8:              \" << CpuFeatures::static_flags_.CX8\n            << std::endl;\n  std::cout << \"* CX16:             \" << CpuFeatures::static_flags_.CX16\n            << std::endl;\n  std::cout << \"* PCLMULQDQ:        \" << CpuFeatures::static_flags_.PCLMULQDQ\n            << std::endl;\n  std::cout << \"* VPCLMULQDQ:       \" << CpuFeatures::static_flags_.VPCLMULQDQ\n            << std::endl;\n  std::cout << \"* CMOV:             \" << CpuFeatures::static_flags_.CMOV\n            << std::endl;\n  std::cout << \"* MOVBE:            \" << CpuFeatures::static_flags_.MOVBE\n            << std::endl;\n  std::cout << \"* ERMS:             \" << CpuFeatures::static_flags_.ERMS\n            << std::endl;\n  std::cout << \"* POPCNT:           \" << CpuFeatures::static_flags_.POPCNT\n            << std::endl;\n  std::cout << \"* XSAVE:            \" << CpuFeatures::static_flags_.XSAVE\n            << std::endl;\n  std::cout << \"* FMA:              \" << CpuFeatures::static_flags_.FMA\n            << std::endl;\n  std::cout << \"* ADX:              \" << CpuFeatures::static_flags_.ADX\n            << std::endl;\n  std::cout << \"* GFNI:             \" << CpuFeatures::static_flags_.GFNI\n            << std::endl;\n  std::cout << \"* AES:              \" << CpuFeatures::static_flags_.AES\n            << std::endl;\n  std::cout << \"* VAES:             \" << CpuFeatures::static_flags_.VAES\n            << std::endl;\n  std::cout << \"* RDSEED:           \" << CpuFeatures::static_flags_.RDSEED\n            << std::endl;\n  std::cout << \"* RDRAND:           \" << CpuFeatures::static_flags_.RDRAND\n            << std::endl;\n  std::cout << \"* SHA:              \" << CpuFeatures::static_flags_.SHA\n            << std::endl;\n  std::cout << \"* BMI1:             \" << CpuFeatures::static_flags_.BMI1\n            << std::endl;\n  std::cout << \"* BMI2:             \" << CpuFeatures::static_flags_.BMI2\n            << std::endl;\n  std::cout << \"* CLFLUSH:          \" << CpuFeatures::static_flags_.CLFLUSH\n            << std::endl;\n  std::cout << \"* CLFLUSHOPT:       \" << CpuFeatures::static_flags_.CLFLUSHOPT\n            << std::endl;\n  std::cout << \"* CLWB:             \" << CpuFeatures::static_flags_.CLWB\n            << std::endl;\n  std::cout << \"* RDPID:            \" << CpuFeatures::static_flags_.RDPID\n            << std::endl;\n  std::cout << \"* FPU:              \" << CpuFeatures::static_flags_.FPU\n            << std::endl;\n  std::cout << \"* HT:               \" << CpuFeatures::static_flags_.HT\n            << std::endl;\n  std::cout << \"* VMX:              \" << CpuFeatures::static_flags_.VMX\n            << std::endl;\n  std::cout << \"* HYPERVISOR:       \" << CpuFeatures::static_flags_.HYPERVISOR\n            << std::endl;\n}"
  },
  {
    "path": "tests/ailego/io/file_lock_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/io/file_lock.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec::ailego;\n\nTEST(FileLock, General) {\n  File file;\n  const char *path = \"file_lock_test.dat\";\n\n  if (!File::IsExist(path)) {\n    ASSERT_TRUE(file.create(path, 128));\n  } else {\n    ASSERT_TRUE(file.open(path, false));\n  }\n\n  FileLock file_lock(file);\n  ASSERT_TRUE(file_lock.lock());\n  ASSERT_TRUE(file_lock.unlock());\n\n  ASSERT_TRUE(file_lock.try_lock_shared());\n  ASSERT_TRUE(file_lock.unlock());\n\n  ASSERT_TRUE(file_lock.lock_shared());\n  ASSERT_TRUE(file_lock.unlock());\n\n  ASSERT_TRUE(file_lock.try_lock());\n  ASSERT_TRUE(file_lock.unlock());\n  file.close();\n}\n"
  },
  {
    "path": "tests/ailego/io/file_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/utility/memory_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/io/file.h>\n\nusing namespace zvec::ailego;\n\nTEST(File, General) {\n  EXPECT_TRUE(File::IsDirectory(\".\"));\n  EXPECT_TRUE(File::IsDirectory(\"..\"));\n  EXPECT_TRUE(File::IsDirectory(\"../\"));\n  EXPECT_TRUE(File::IsDirectory(\"..//\"));\n  EXPECT_TRUE(File::IsDirectory(\"..//\"));\n\n  EXPECT_FALSE(File::IsSymbolicLink(\".\"));\n  EXPECT_FALSE(File::IsSymbolicLink(\"..\"));\n  EXPECT_FALSE(File::IsSymbolicLink(\"../\"));\n  EXPECT_FALSE(File::IsSymbolicLink(\"..//\"));\n  EXPECT_FALSE(File::IsSymbolicLink(\"..//\"));\n\n  EXPECT_FALSE(File::IsRegular(\".\"));\n  EXPECT_FALSE(File::IsRegular(\"..\"));\n  EXPECT_FALSE(File::IsRegular(\"../\"));\n  EXPECT_FALSE(File::IsRegular(\"..//\"));\n  EXPECT_FALSE(File::IsRegular(\"..//\"));\n\n  EXPECT_TRUE(File::IsExist(\".\"));\n  EXPECT_TRUE(File::IsExist(\"..\"));\n  EXPECT_TRUE(File::IsExist(\"../\"));\n  EXPECT_TRUE(File::IsExist(\"..//\"));\n  EXPECT_TRUE(File::IsExist(\"..//\"));\n}\n\nTEST(File, MakePath) {\n  EXPECT_TRUE(File::MakePath(\"\"));\n  EXPECT_TRUE(File::MakePath(\".\"));\n  EXPECT_TRUE(File::MakePath(\"..\"));\n  EXPECT_TRUE(File::MakePath(\"../\"));\n  EXPECT_TRUE(File::MakePath(\"..//\"));\n  EXPECT_TRUE(File::MakePath(\"..//\"));\n  EXPECT_TRUE(File::MakePath(\"/\"));\n\n  EXPECT_TRUE(File::MakePath(\"./file_test_makepath\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_makepath\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_makepath/1/2/3/\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_makepath/1/2/3\"));\n}\n\nbool TouchFile(const char *path) {\n  std::string buf(path);\n  char *sp = (char *)strrchr(buf.data(), '/');\n  *sp = '\\0';\n  File::MakePath(buf.data());\n  *sp = '/';\n  File file;\n  return file.create(path, 0);\n}\n\nTEST(File, RemoveDirectory) {\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/1/2/3\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/1/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/1/2/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/1/2/3/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/a/1/2/3\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/a/b/1/2/3\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmdir/a/b/c/1/2/3\"));\n\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/a/b/c/1/2/3/A\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/a/b/c/1/2/3/B\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/C\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/D\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/1/2/3/E\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/a/b/c/d/F\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/1/a/b/c/d/G\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/1/2/a/b/c/d/H\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/1/2/3/a/b/c/d/I\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/a/1/2/3/J\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/a/b/1/2/3/K\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/1/2/3/M\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmdir/1/2/a/b/c/d/N\"));\n\n  EXPECT_FALSE(File::RemoveDirectory(\"file_test_rmdir/1/2/a/b/c/d/N\"));\n  EXPECT_FALSE(File::RemoveDirectory(\"file_test_rmdir/1/2/3/a/b/c/d/I\"));\n  EXPECT_FALSE(File::RemoveDirectory(\"file_test_rmdir/C\"));\n  EXPECT_FALSE(File::RemoveDirectory(\"file_test_rmdir/D\"));\n\n  EXPECT_TRUE(File::IsDirectory(\"file_test_rmdir/\"));\n  EXPECT_TRUE(File::IsDirectory(\"file_test_makepath/\"));\n  EXPECT_TRUE(File::RemoveDirectory(\"file_test_rmdir/\"));\n  EXPECT_TRUE(File::RemoveDirectory(\"file_test_makepath\"));\n}\n\nTEST(File, RemovePath) {\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/1/2/3\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/1/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/1/2/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/1/2/3/a/b/c/d\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/a/1/2/3\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/a/b/1/2/3\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/a/b/c/1/2/3\"));\n\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/a/b/c/1/2/3/A\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/a/b/c/1/2/3/B\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/C\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/D\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/1/2/3/E\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/a/b/c/d/F\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/1/a/b/c/d/G\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/1/2/a/b/c/d/H\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/1/2/3/a/b/c/d/I\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/a/1/2/3/J\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/a/b/1/2/3/K\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/1/2/3/M\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/1/2/a/b/c/d/N\"));\n  EXPECT_TRUE(File::IsExist(\"file_test_rmpath/1/2/a/b/c/d/N\"));\n\n  EXPECT_TRUE(File::IsDirectory(\"file_test_rmpath/\"));\n  EXPECT_TRUE(File::RemovePath(\"file_test_rmpath/\"));\n\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/AAA\"));\n  EXPECT_TRUE(File::MakePath(\"file_test_rmpath/BBB\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/CCC\"));\n  EXPECT_TRUE(TouchFile(\"file_test_rmpath/DDD\"));\n  EXPECT_TRUE(File::IsExist(\"file_test_rmpath/BBB\"));\n\n  EXPECT_FALSE(File::RemovePath(\"file_test_rmpath/CCC/\"));\n  EXPECT_FALSE(File::RemovePath(\"file_test_rmpath/DDD/\"));\n  EXPECT_TRUE(File::RemovePath(\"file_test_rmpath/CCC\"));\n  EXPECT_TRUE(File::RemovePath(\"file_test_rmpath/DDD\"));\n  EXPECT_TRUE(File::RemovePath(\"file_test_rmpath\"));\n}\n\nTEST(File, CreateAndOpen) {\n  const char *file_path = \"file_create_testing.tmp\";\n  size_t file_size = 12 * 1022 * 1021;\n\n  File::Delete(file_path);\n  EXPECT_FALSE(File::IsRegular(file_path));\n\n  {\n    File file;\n    EXPECT_FALSE(file.is_valid());\n    EXPECT_TRUE(file.create(file_path, file_size, true));\n    EXPECT_TRUE(file.is_valid());\n    EXPECT_TRUE(File::IsRegular(file_path));\n    EXPECT_EQ(file_size, file.size());\n  }\n  // create again with exist file\n  {\n    File file;\n    EXPECT_FALSE(file.is_valid());\n    EXPECT_TRUE(file.create(file_path, file_size / 10));\n    EXPECT_TRUE(file.is_valid());\n    EXPECT_FALSE(file.read_only());\n    EXPECT_EQ(file_size / 10, file.size());\n  }\n\n  {\n    File file;\n    EXPECT_FALSE(file.is_valid());\n    EXPECT_TRUE(file.create(file_path, file_size * 3, true));\n    EXPECT_TRUE(file.is_valid());\n    EXPECT_FALSE(file.read_only());\n    EXPECT_EQ(file_size * 3, file.size());\n  }\n\n  {\n    File file;\n    EXPECT_TRUE(file.open(file_path, true, true));\n    EXPECT_TRUE(file.is_valid());\n    EXPECT_TRUE(file.read_only());\n    EXPECT_EQ(file_size * 3, file.size());\n  }\n\n  {\n    File file;\n    EXPECT_TRUE(file.open(file_path, false, true));\n    EXPECT_TRUE(file.is_valid());\n    EXPECT_FALSE(file.read_only());\n    EXPECT_EQ(file_size * 3, file.size());\n  }\n  File::Delete(file_path);\n}\n\nTEST(File, ReadAndWrite) {\n  const char *file_path = \"file_read_testing.tmp\";\n  size_t file_size = 2u * 1024u * 1024u + 12u * 1024;\n\n  File::Delete(file_path);\n  EXPECT_FALSE(File::IsRegular(file_path));\n\n  File file;\n  EXPECT_FALSE(file.is_valid());\n  EXPECT_TRUE(file.create(file_path, file_size));\n  EXPECT_TRUE(File::IsRegular(file_path));\n\n  EXPECT_TRUE(file.is_valid());\n  EXPECT_EQ(0, file.offset());\n  EXPECT_EQ(file_size, file.size());\n\n  std::string buf;\n  buf.resize(file_size, 0x55);\n  ASSERT_EQ(file_size, buf.size());\n  EXPECT_EQ(file_size, file.write(buf.data(), buf.size()));\n  EXPECT_EQ(file_size, file.size());\n  EXPECT_EQ((ssize_t)buf.size(), file.offset());\n  EXPECT_TRUE(file.flush());\n\n  buf.clear();\n  buf.resize(file_size);\n  file.reset();\n  EXPECT_EQ(file_size, file.read((void *)buf.data(), buf.size()));\n\n  File::Delete(file_path);\n}\n\nTEST(File, MemoryMap) {\n  const char *file_path = \"file_map_testing.tmp\";\n  size_t file_size = 2u * 1024u * 1024u + 12u * 1024;\n  size_t map_offset = MemoryHelper::PageSize() * 16;\n  size_t map_size = file_size - MemoryHelper::PageSize();\n\n  File::Delete(file_path);\n  EXPECT_FALSE(File::IsRegular(file_path));\n\n  File file;\n  EXPECT_FALSE(file.is_valid());\n  EXPECT_TRUE(file.create(file_path, file_size));\n  EXPECT_TRUE(File::IsRegular(file_path));\n  EXPECT_EQ(file_size, file.size());\n\n  void *addr = file.map(map_offset, map_size, 0);\n  EXPECT_TRUE(addr != nullptr);\n  EXPECT_TRUE(File::MemoryFlush(addr, map_size));\n  File::MemoryUnmap(addr, map_size);\n  file.close();\n\n  EXPECT_TRUE(file.open(file_path, true));\n  EXPECT_EQ(file_size, file.size());\n  addr = file.map(map_offset, map_size, 0);\n  EXPECT_TRUE(addr != nullptr);\n  EXPECT_TRUE(File::MemoryFlush(addr, map_size));\n  File::MemoryUnmap(addr, map_size);\n\n  // void *addr1 = file.map(map_offset, map_size, 0);\n  // void *addr2 = file.map(map_offset, map_size, 0);\n  // EXPECT_EQ(addr1, addr2);\n  file.close();\n\n  EXPECT_TRUE(file.open(file_path, true));\n  EXPECT_EQ(file_size, file.size());\n  addr = file.map(map_offset, map_size, File::MMAP_SHARED);\n  EXPECT_TRUE(addr != nullptr);\n  EXPECT_TRUE(File::MemoryFlush(addr, map_size));\n\n#if defined(__linux) || defined(__linux__) || defined(__NetBSD__)\n  EXPECT_TRUE(File::MemoryRemap(addr, map_size, addr, map_size * 2));\n  addr = File::MemoryRemap(addr, map_size, nullptr, map_size * 3);\n  EXPECT_TRUE(addr);\n#endif\n\n  File::MemoryUnmap(addr, map_size);\n  file.close();\n\n#if !defined(_WIN32)\n  addr = File::MemoryMap(map_size, 0);\n  EXPECT_TRUE(addr != nullptr);\n  File::MemoryUnmap(addr, map_size);\n\n  addr = File::MemoryMap(map_size, File::MMAP_SHARED);\n  EXPECT_TRUE(addr != nullptr);\n  File::MemoryUnmap(addr, map_size);\n#endif\n}\n\nTEST(File, Append) {\n  const char *file_path = \"file_append_testing.tmp\";\n  File file;\n  EXPECT_FALSE(file.is_valid());\n  EXPECT_TRUE(file.create(file_path, MemoryHelper::PageSize()));\n  EXPECT_TRUE(File::IsRegular(file_path));\n\n  std::string padding;\n  padding.resize(MemoryHelper::PageSize());\n  for (size_t i = 0; i < 10; ++i) {\n    EXPECT_EQ(padding.size(),\n              file.write(file.size(), padding.data(), padding.size()));\n  }\n  EXPECT_EQ(padding.size() * 11, file.size());\n\n  file.truncate(padding.size() * 7);\n  EXPECT_EQ(padding.size() * 7, file.size());\n\n  file.truncate(padding.size() * 16);\n  EXPECT_EQ(padding.size() * 16, file.size());\n  file.close();\n}\n\nTEST(File, Seek) {\n  const char *file_path = \"file_seek_testing.tmp\";\n  File file;\n  EXPECT_FALSE(file.is_valid());\n  EXPECT_TRUE(file.create(file_path, 0));\n  EXPECT_TRUE(File::IsRegular(file_path));\n\n  std::string padding;\n  padding.resize(MemoryHelper::PageSize());\n  for (size_t i = 0; i < 10; ++i) {\n    EXPECT_EQ(padding.size(), file.write(padding.data(), padding.size()));\n  }\n  EXPECT_EQ(padding.size() * 10, (size_t)file.size());\n  EXPECT_EQ(padding.size() * 10, (size_t)file.offset());\n\n  EXPECT_TRUE(file.seek(0, File::Origin::Begin));\n  EXPECT_EQ(0, file.offset());\n\n  EXPECT_TRUE(file.seek(-20, File::Origin::End));\n  EXPECT_EQ((ssize_t)file.size() - 20, file.offset());\n\n  EXPECT_TRUE(file.seek(20, File::Origin::Current));\n  EXPECT_EQ((ssize_t)file.size(), file.offset());\n  file.close();\n}\n"
  },
  {
    "path": "tests/ailego/io/mmap_file_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/io/mmap_file.h>\n\nusing namespace zvec::ailego;\n\nTEST(MMapFile, Create) {\n  const char *file_path = \"mmap_file_create_testing.tmp\";\n  size_t file_size = 12 * 1022 * 1021;\n\n  File::Delete(file_path);\n  EXPECT_FALSE(File::IsRegular(file_path));\n\n  {\n    MMapFile file;\n    EXPECT_EQ(0u, file.size());\n    EXPECT_EQ(0u, file.offset());\n    EXPECT_FALSE(file.is_valid());\n    EXPECT_TRUE(file.create(file_path, file_size));\n    EXPECT_TRUE(file.is_valid());\n    EXPECT_TRUE(File::IsRegular(file_path));\n\n    memset(file.region(), 0xff, file.size());\n    file.close();\n    file.warmup();\n    EXPECT_FALSE(file.lock());\n    EXPECT_FALSE(file.unlock());\n  }\n  // create again with exist file\n  {\n    MMapFile file;\n    EXPECT_FALSE(file.is_valid());\n    EXPECT_TRUE(file.create(file_path, file_size));\n    EXPECT_TRUE(file.is_valid());\n    EXPECT_FALSE(file.read_only());\n    memset(file.region(), 0xff, file.size());\n  }\n  File::Delete(file_path);\n}\n\nTEST(MMapFile, Open) {\n  const char *file_path = \"mmap_file_open_testing.tmp\";\n  const char *file_path2 = \"mmap_file_open_testing2.tmp\";\n  size_t file_size = 23 * 1022 * 1021;\n  std::string raw_data;\n\n  File::Delete(file_path);\n  raw_data.resize(file_size, 0x74);\n\n  // create a file\n  {\n    MMapFile file;\n    EXPECT_TRUE(file.create(file_path, file_size));\n    EXPECT_EQ(file_size, file.size());\n    EXPECT_EQ(0u, file.offset());\n    EXPECT_TRUE(File::IsRegular(file_path));\n    file.warmup();\n    file.lock();\n\n    MMapFile file2 = std::move(file);\n    memset(file2.region(), 0x74, file2.size());\n    EXPECT_EQ(0, memcmp(file2.region(), raw_data.data(), raw_data.size()));\n    file.flush();\n    file2.lock();\n  }\n\n  File::Delete(file_path2);\n  ASSERT_TRUE(File::Rename(file_path, file_path2));\n\n  // open a file\n  {\n    MMapFile file;\n    EXPECT_TRUE(File::IsRegular(file_path2));\n    EXPECT_TRUE(file.open(file_path2, true));\n    EXPECT_TRUE(file.read_only());\n    EXPECT_EQ(0, memcmp(file.region(), raw_data.data(), raw_data.size()));\n    file.lock();\n  }\n  {\n    MMapFile file;\n    MMapFile file2 = std::move(file);\n    EXPECT_TRUE(file2.open(file_path2, false));\n\n    EXPECT_FALSE(file.lock());\n    EXPECT_FALSE(file.unlock());\n    file2.warmup();\n    file2.lock();\n    file2.unlock();\n  }\n  // clean up\n  File::Delete(file_path2);\n}\n\nTEST(MMapFile, ReadAndWrite) {\n  const char *file_path = \"mmap_file_read_testing.tmp\";\n  size_t file_size = 11 * 1022 * 1021;\n\n  File::Delete(file_path);\n  EXPECT_FALSE(File::IsRegular(file_path));\n\n  MMapFile file;\n  EXPECT_EQ(0u, file.size());\n  EXPECT_EQ(0u, file.offset());\n  EXPECT_FALSE(file.is_valid());\n  EXPECT_TRUE(file.create(file_path, file_size));\n  EXPECT_EQ(file_size, file.size());\n  EXPECT_TRUE(file.is_valid());\n  EXPECT_TRUE(File::IsRegular(file_path));\n\n  char buf[] = \"abcdefghijklmnopqrstuvwxyz\";\n  EXPECT_LT(sizeof(buf), file.size());\n  EXPECT_EQ(sizeof(buf), file.write(buf, sizeof(buf)));\n  EXPECT_EQ(0u, file.write(file_size + 2, buf, sizeof(buf)));\n\n  std::string str;\n  str.resize(sizeof(buf) - 1);\n  EXPECT_EQ(str.size(), file.read(0, (uint8_t *)str.data(), str.size()));\n  EXPECT_EQ(str, std::string(buf));\n\n  EXPECT_EQ(11u, file.write(file_size - 11u, buf, sizeof(buf)));\n  const void *p1 = nullptr;\n  EXPECT_EQ(11u, file.read(file_size - 11u, &p1, sizeof(buf)));\n  EXPECT_TRUE(!!p1);\n  EXPECT_EQ(std::string((char *)p1, 11u), std::string(buf, 11u));\n\n  EXPECT_EQ(sizeof(buf), file.offset());\n  file.reset();\n  EXPECT_EQ(0u, file.offset());\n\n  std::string str2;\n  str2.resize(sizeof(buf) - 1);\n  EXPECT_EQ(str2.size(), file.read((uint8_t *)str2.data(), str2.size()));\n  EXPECT_EQ(str, std::string(buf));\n\n  const void *p2 = nullptr;\n  file.reset();\n  EXPECT_EQ(0u, file.read(file_size + 11u, &p2, sizeof(buf)));\n  const void *p3 = nullptr;\n  EXPECT_EQ(sizeof(buf), file.read(&p3, sizeof(buf)));\n  EXPECT_EQ(std::string((char *)p3), std::string(buf));\n\n  char dest[64];\n  EXPECT_EQ(11u, file.read(file_size - 11u, dest, sizeof(dest)));\n  EXPECT_EQ(std::string(dest, 11u), std::string(buf, 11u));\n\n  File::Delete(file_path);\n}\n"
  },
  {
    "path": "tests/ailego/logger/logger_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstatic void DoLogging() {\n  static int log_count = 0;\n  LOG_INFO(\"DoLogging: %d\", ++log_count);\n}\n\nstatic void DoErrLogging() {\n  static int err_log_count = 0;\n  LOG_ERROR(\"DoErrLogging: %d\", ++err_log_count);\n}\n\nTEST(IndexLogger, General) {\n  ASSERT_TRUE(ailego::Factory<Logger>::Has(\"ConsoleLogger\"));\n\n  for (int i = 0; i < 10; ++i) {\n    LoggerBroker::SetLevel(i);\n    LOG_DEBUG(\"level: %d, %s\", i, \"LOG_DEBUG\");\n    LOG_INFO(\"level: %d, %s\", i, \"LOG_INFO\");\n    LOG_WARN(\"level: %d, %s\", i, \"LOG_WARN\");\n    LOG_ERROR(\"level: %d, %s\", i, \"LOG_ERROR\");\n    LOG_FATAL(\"level: %d, %s\", i, \"LOG_FATAL\");\n  }\n\n  LoggerBroker::SetLevel(0);\n  LOG_DEBUG(\"%s\", std::string(\"LOG_DEBUG\").c_str());\n  LOG_INFO(\"%s\", std::string(\"LOG_INFO\").c_str());\n  LOG_WARN(\"%s\", std::string(\"LOG_WARN\").c_str());\n  LOG_ERROR(\"%s\", std::string(\"LOG_ERROR\").c_str());\n  LOG_FATAL(\"%s\", std::string(\"LOG_FATAL\").c_str());\n\n  ThreadPool pool;\n  for (uint32_t i = 0; i < 20; ++i) {\n    pool.enqueue(Closure::New(DoLogging));\n  }\n  for (uint32_t i = 0; i < 20; ++i) {\n    pool.enqueue(Closure::New(DoErrLogging));\n  }\n  pool.wake_all();\n  pool.wait_finish();\n\n  LoggerBroker::Unregister();\n  LOG_DEBUG(\"%s\", \"LOG_DEBUG\");\n  LOG_INFO(\"%s\", \"LOG_INFO\");\n  LOG_WARN(\"%s\", \"LOG_WARN\");\n  LOG_ERROR(\"%s\", \"LOG_ERROR\");\n  LOG_FATAL(\"%s\", \"LOG_FATAL\");\n}"
  },
  {
    "path": "tests/ailego/math/cosine_distance_matrix_fp16_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/utility/math_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\ninline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\ninline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,\n                            size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\ntemplate <size_t N>\nstatic float CosineDistance(const FixedVector<Float16, N> &lhs,\n                            const FixedVector<Float16, N> &rhs) {\n  size_t dimension = lhs.size() + 2;\n\n  float l_norm = 0.0f;\n  Norm2Matrix<Float16, 1>::Compute(lhs.data(), N, &l_norm);\n\n  float r_norm = 0.0f;\n  Norm2Matrix<Float16, 1>::Compute(rhs.data(), N, &r_norm);\n\n  std::string lhs_normed;\n\n  lhs_normed.resize(dimension * sizeof(uint16_t));\n\n  Float16 *lhs_buf = reinterpret_cast<Float16 *>(&(lhs_normed[0]));\n\n  for (size_t i = 0; i < N; ++i) {\n    lhs_buf[i] = lhs[i] / l_norm;\n  }\n  ::memcpy(reinterpret_cast<uint16_t *>(&(lhs_normed[0])) + N, &l_norm,\n           sizeof(float));\n\n  std::string rhs_normed;\n\n  rhs_normed.resize(dimension * sizeof(uint16_t));\n\n  Float16 *rhs_buf = reinterpret_cast<Float16 *>(&(rhs_normed[0]));\n\n  for (size_t i = 0; i < N; ++i) {\n    rhs_buf[i] = rhs[i] / r_norm;\n  }\n  ::memcpy(reinterpret_cast<uint16_t *>(&(rhs_normed[0])) + N, &r_norm,\n           sizeof(float));\n\n  return Distance::Cosine(reinterpret_cast<const Float16 *>(lhs_normed.data()),\n                          reinterpret_cast<const Float16 *>(rhs_normed.data()),\n                          dimension);\n}\n\nTEST(DistanceMatrix, Cosine_General) {\n  const float epsilon = 1e-3;\n\n  FixedVector<Float16, 2> a{1.0f, 1.0f}, b{1.0f, 1.0f};\n  EXPECT_NEAR(0.0f, CosineDistance(a, b), epsilon);\n\n  FixedVector<Float16, 3> c{0.2f, 0.9f, 0.6f}, d{0.3f, 0.5f, 0.7f};\n  EXPECT_NEAR(0.072000861f, CosineDistance(c, d), epsilon);\n\n  FixedVector<Float16, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                             5.2f, 2.1f, 7.1f, 6.8f, 1.2f},\n      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n  EXPECT_NEAR(0.28025103f, CosineDistance(e, f), epsilon);\n\n  // FixedVector<Float16, 1> a{0.0f}, b{0.0f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(a, b));\n\n  // FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(c, d));\n\n  // FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(e, f));\n\n  // FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f,\n  // 0.3f}; EXPECT_FLOAT_EQ(0.0f, CosineDistance(g, h));\n\n  // FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},\n  //     j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(i, j));\n\n  // FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},\n  //     k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(l, k));\n\n  // FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},\n  //     n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(m, n));\n\n  // FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n  //     p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(o, p));\n\n  // FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n  //                           0.5f, 0.6f, 0.7f, 0.8f},\n  //     r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(q, r));\n\n  // FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n  //                            0.5f, 0.6f, 0.7f, 0.8f, 0.9f},\n  //     t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};\n  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(s, t));\n\n  // FixedVector<Float16, 11> u{0.0f},\n  //     v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n  // EXPECT_TRUE(MathHelper::IsAlmostEqual(3.84983f, CosineDistance(u, v),\n  // 1000));\n\n  // FixedVector<Float16, 12> w{0.0f},\n  //     x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f,\n  //     0.9f, 1.0f, 1.1f};\n  // EXPECT_TRUE(MathHelper::IsAlmostEqual(5.05897f, CosineDistance(w, x),\n  // 1000));\n\n  // FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f,\n  // 0.6f,\n  //                                     0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};\n  // EXPECT_TRUE(MathHelper::IsAlmostEqual(6.499438f, CosineDistance(y, z),\n  // 1000));\n\n  // FixedVector<Float16, 14> x14{0.0f},\n  //     y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n  //         0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};\n  // EXPECT_TRUE(\n  //     MathHelper::IsAlmostEqual(10.49944f, CosineDistance(x14, y14), 1000));\n\n  // FixedVector<Float16, 15> x15{0.0f},\n  //     y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,\n  //         0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};\n  // EXPECT_TRUE(\n  //     MathHelper::IsAlmostEqual(19.49944f, CosineDistance(x15, y15), 1000));\n}\n\n#if 0\ntemplate <size_t M, size_t N>\nvoid TestCosineMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const Float16 *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      CosineDistanceMatrix<Float16, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  CosineDistanceMatrix<Float16, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    // EXPECT_FLOAT_EQ(result1[i], result2[i]);\n    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));\n  }\n}\n\nTEST(DistanceMatrix, Cosine_1x1) {\n  TestCosineMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_2x1) {\n  TestCosineMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_2x2) {\n  TestCosineMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_3x3) {\n  TestCosineMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Cosine_4x1) {\n  TestCosineMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_4x2) {\n  TestCosineMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_4x4) {\n  TestCosineMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_8x1) {\n  TestCosineMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_8x2) {\n  TestCosineMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_8x4) {\n  TestCosineMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_8x8) {\n  TestCosineMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_16x1) {\n  TestCosineMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_16x2) {\n  TestCosineMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_16x4) {\n  TestCosineMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_16x8) {\n  TestCosineMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_16x16) {\n  TestCosineMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_32x1) {\n  TestCosineMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_32x2) {\n  TestCosineMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_32x4) {\n  TestCosineMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_32x8) {\n  TestCosineMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_32x16) {\n  TestCosineMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_32x32) {\n  TestCosineMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_64x1) {\n  TestCosineMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_64x2) {\n  TestCosineMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_64x4) {\n  TestCosineMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_64x8) {\n  TestCosineMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_64x16) {\n  TestCosineMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_64x32) {\n  TestCosineMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_64x64) {\n  TestCosineMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, Cosine_128x1) {\n  TestCosineMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_128x2) {\n  TestCosineMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_128x4) {\n  TestCosineMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_128x8) {\n  TestCosineMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_128x16) {\n  TestCosineMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_128x32) {\n  TestCosineMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_128x64) {\n  TestCosineMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, Cosine_128x128) {\n  TestCosineMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid CosineBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      CosineDistanceMatrix<Float16, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    CosineDistanceMatrix<Float16, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        CosineDistanceMatrix<Float16, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Cosine_Benchmark) {\n  CosineBenchmark<2, 1, 512, 64>();\n  CosineBenchmark<2, 2, 512, 64>();\n  CosineBenchmark<4, 1, 512, 64>();\n  CosineBenchmark<4, 2, 512, 64>();\n  CosineBenchmark<4, 4, 512, 64>();\n  CosineBenchmark<8, 1, 512, 64>();\n  CosineBenchmark<8, 2, 512, 64>();\n  CosineBenchmark<8, 4, 512, 64>();\n  CosineBenchmark<8, 8, 512, 64>();\n  CosineBenchmark<16, 1, 512, 64>();\n  CosineBenchmark<16, 2, 512, 64>();\n  CosineBenchmark<16, 4, 512, 64>();\n  CosineBenchmark<16, 8, 512, 64>();\n  CosineBenchmark<16, 16, 512, 64>();\n  CosineBenchmark<32, 1, 512, 64>();\n  CosineBenchmark<32, 2, 512, 64>();\n  CosineBenchmark<32, 4, 512, 64>();\n  CosineBenchmark<32, 8, 512, 64>();\n  CosineBenchmark<32, 16, 512, 64>();\n  CosineBenchmark<32, 32, 512, 64>();\n  CosineBenchmark<64, 1, 512, 64>();\n  CosineBenchmark<64, 2, 512, 64>();\n  CosineBenchmark<64, 4, 512, 64>();\n  CosineBenchmark<64, 8, 512, 64>();\n  CosineBenchmark<128, 1, 512, 64>();\n  CosineBenchmark<1, 1, 1024, 256>();\n}\n\n#endif\n"
  },
  {
    "path": "tests/ailego/math/cosine_distance_matrix_fp32_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/utility/math_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\ninline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\ninline void MatrixTranspose(float *dst, const float *src, size_t M, size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\ntemplate <size_t N>\nstatic float CosineDistance(const FixedVector<float, N> &lhs,\n                            const FixedVector<float, N> &rhs) {\n  size_t dimension = lhs.size() + 1;\n\n  float l_norm = 0.0f;\n  Norm2Matrix<float, 1>::Compute(lhs.data(), N, &l_norm);\n\n  float r_norm = 0.0f;\n  Norm2Matrix<float, 1>::Compute(rhs.data(), N, &r_norm);\n\n  std::string lhs_normed;\n\n  lhs_normed.resize(dimension * sizeof(float));\n\n  float *lhs_buf = reinterpret_cast<float *>(&(lhs_normed[0]));\n\n  for (size_t i = 0; i < N; ++i) {\n    lhs_buf[i] = lhs[i] / l_norm;\n  }\n  lhs_buf[N] = l_norm;\n\n  std::string rhs_normed;\n\n  rhs_normed.resize(dimension * sizeof(float));\n\n  float *rhs_buf = reinterpret_cast<float *>(&(rhs_normed[0]));\n\n  for (size_t i = 0; i < N; ++i) {\n    rhs_buf[i] = rhs[i] / r_norm;\n  }\n  rhs_buf[N] = r_norm;\n\n  return Distance::Cosine(reinterpret_cast<const float *>(lhs_normed.data()),\n                          reinterpret_cast<const float *>(rhs_normed.data()),\n                          dimension);\n}\n\nTEST(DistanceMatrix, Cosine_General) {\n  const float epsilon = 1e-3;\n\n  FixedVector<float, 2> a{0.2f, 0.9f}, b{0.3f, 0.5f};\n\n  EXPECT_NEAR(0.05131668f, CosineDistance(a, b), epsilon);\n\n  FixedVector<float, 3> c{0.2f, 0.9f, 0.6f}, d{0.3f, 0.5f, 0.7f};\n\n  EXPECT_NEAR(0.07199293f, CosineDistance(c, d), epsilon);\n\n  FixedVector<float, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                           5.2f, 2.1f, 7.1f, 6.8f, 1.2f},\n      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n\n  EXPECT_NEAR(0.2803060f, CosineDistance(e, f), epsilon);\n}\n\n#if 0\ntemplate <size_t M, size_t N>\nvoid TestCosineMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  // size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t dimension = 4;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const float *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      CosineDistanceMatrix<float, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n\n  CosineDistanceMatrix<float, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));\n  }\n}\n\nTEST(DistanceMatrix, Cosine_1x1) {\n  TestCosineMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_2x1) {\n  TestCosineMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_2x2) {\n  TestCosineMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_3x3) {\n  TestCosineMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Cosine_4x1) {\n  TestCosineMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_4x2) {\n  TestCosineMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_4x4) {\n  TestCosineMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_8x1) {\n  TestCosineMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_8x2) {\n  TestCosineMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_8x4) {\n  TestCosineMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_8x8) {\n  TestCosineMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_16x1) {\n  TestCosineMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_16x2) {\n  TestCosineMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_16x4) {\n  TestCosineMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_16x8) {\n  TestCosineMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_16x16) {\n  TestCosineMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_32x1) {\n  TestCosineMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_32x2) {\n  TestCosineMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_32x4) {\n  TestCosineMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_32x8) {\n  TestCosineMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_32x16) {\n  TestCosineMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_32x32) {\n  TestCosineMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_64x1) {\n  TestCosineMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_64x2) {\n  TestCosineMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_64x4) {\n  TestCosineMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_64x8) {\n  TestCosineMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_64x16) {\n  TestCosineMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_64x32) {\n  TestCosineMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_64x64) {\n  TestCosineMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, Cosine_128x1) {\n  TestCosineMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_128x2) {\n  TestCosineMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_128x4) {\n  TestCosineMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_128x8) {\n  TestCosineMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_128x16) {\n  TestCosineMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_128x32) {\n  TestCosineMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_128x64) {\n  TestCosineMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, Cosine_128x128) {\n  TestCosineMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid CosineBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Cosine\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      CosineDistanceMatrix<float, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Cosine\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    CosineDistanceMatrix<float, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Cosine\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        CosineDistanceMatrix<float, 1, 1>::Compute(&matrix_batch[k * dimension],\n                                                   current_query, dimension,\n                                                   &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Cosine_Benchmark) {\n  CosineBenchmark<2, 1, 512, 64>();\n  CosineBenchmark<2, 2, 512, 64>();\n  CosineBenchmark<4, 1, 512, 64>();\n  CosineBenchmark<4, 2, 512, 64>();\n  CosineBenchmark<4, 4, 512, 64>();\n  CosineBenchmark<8, 1, 512, 64>();\n  CosineBenchmark<8, 2, 512, 64>();\n  CosineBenchmark<8, 4, 512, 64>();\n  CosineBenchmark<8, 8, 512, 64>();\n  CosineBenchmark<16, 1, 512, 64>();\n  CosineBenchmark<16, 2, 512, 64>();\n  CosineBenchmark<16, 4, 512, 64>();\n  CosineBenchmark<16, 8, 512, 64>();\n  CosineBenchmark<16, 16, 512, 64>();\n  CosineBenchmark<32, 1, 512, 64>();\n  CosineBenchmark<32, 2, 512, 64>();\n  CosineBenchmark<32, 4, 512, 64>();\n  CosineBenchmark<32, 8, 512, 64>();\n  CosineBenchmark<32, 16, 512, 64>();\n  CosineBenchmark<32, 32, 512, 64>();\n  CosineBenchmark<64, 1, 512, 64>();\n  CosineBenchmark<64, 2, 512, 64>();\n  CosineBenchmark<64, 4, 512, 64>();\n  CosineBenchmark<64, 8, 512, 64>();\n  CosineBenchmark<128, 1, 512, 64>();\n  CosineBenchmark<1, 1, 1024, 256>();\n}\n\n#endif"
  },
  {
    "path": "tests/ailego/math/cosine_distance_matrix_int8_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <string>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/utility/math_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\ninline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\ninline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                            size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\ntemplate <size_t N>\nstatic float CosineDistance(const FixedVector<int8_t, N> &lhs,\n                            const FixedVector<int8_t, N> &rhs) {\n  size_t dimension = lhs.size() + 4;\n\n  float l_norm = 0.0f;\n  Norm2Matrix<int8_t, 1>::Compute(lhs.data(), N, &l_norm);\n\n  float r_norm = 0.0f;\n  Norm2Matrix<int8_t, 1>::Compute(rhs.data(), N, &r_norm);\n\n  std::string lhs_normed;\n\n  lhs_normed.resize(dimension * sizeof(int8_t));\n\n  int8_t *lhs_buf = reinterpret_cast<int8_t *>(&(lhs_normed[0]));\n\n  for (size_t i = 0; i < N; ++i) {\n    lhs_buf[i] = lhs[i] / l_norm;\n  }\n  ::memcpy(reinterpret_cast<int8_t *>(&(lhs_normed[0])) + N, &l_norm,\n           sizeof(float));\n\n  std::string rhs_normed;\n\n  rhs_normed.resize(dimension * sizeof(int8_t));\n\n  int8_t *rhs_buf = reinterpret_cast<int8_t *>(&(rhs_normed[0]));\n\n  for (size_t i = 0; i < N; ++i) {\n    rhs_buf[i] = rhs[i] / r_norm;\n  }\n  ::memcpy(reinterpret_cast<int8_t *>(&(rhs_normed[0])) + N, &r_norm,\n           sizeof(float));\n\n  return Distance::Cosine(reinterpret_cast<const int8_t *>(lhs_normed.data()),\n                          reinterpret_cast<const int8_t *>(rhs_normed.data()),\n                          dimension);\n}\n\n#if 0\n\nTEST(DistanceMatrix, Cosine_General) {\n  int8_t a8[] = {127, 0, 1, 2, -127, -127, -127, -127};\n  int8_t b8[] = {-127, -127, -127, -127, 1, 2, 1, 127};\n  int8_t a16[] = {127, 127, 16,   3,   100,  -127, 1,    2,\n                  3,   4,   -127, 100, -127, -127, -127, -127};\n  int8_t b16[] = {-127, 123, -127, -127, -127, -127, 127, 127,\n                  1,    2,   3,    4,    127,  127,  121, 16};\n  int8_t a32[] = {127, 127,  0,    0,   -127, -127, 0,    0,    0,    0, 0,\n                  0,   -127, -127, 127, 127,  0,    0,    -127, -127, 0, 0,\n                  127, 127,  127,  127, 0,    0,    -127, -127, 0,    0};\n  int8_t b32[] = {-127, -127, 0,    0,    127,  127, 0,   0,   0,   0, 0,\n                  0,    127,  127,  -127, -127, 0,   0,   127, 127, 0, 0,\n                  -127, -127, -127, -127, 0,    0,   127, 127, 0,   0};\n\n  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,\n                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,\n                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,\n                  0,   0, -127, 112,  -127, -127, -127, -127, 127,  127,\n                  1,   2, 3,    4,    127,  127,  120};\n  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,\n                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,\n                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,\n                  0,    0, 127,  127, 80,   111,  122, -127, 1,   2,\n                  3,    4, -127, 112, -127, -127, -127};\n\n  EXPECT_FLOAT_EQ(1.4109956f,\n                  CosineDistance(*FixedVector<int8_t, 8>::Cast(a8),\n                                 *FixedVector<int8_t, 8>::Cast(b8)));\n  EXPECT_FLOAT_EQ(1.3013078f,\n                  CosineDistance(*FixedVector<int8_t, 16>::Cast(a16),\n                                 *FixedVector<int8_t, 16>::Cast(b16)));\n  EXPECT_FLOAT_EQ(2.0f, CosineDistance(*FixedVector<int8_t, 32>::Cast(a32),\n                                       *FixedVector<int8_t, 32>::Cast(b32)));\n  EXPECT_FLOAT_EQ(1.7623165f,\n                  CosineDistance(*FixedVector<int8_t, 47>::Cast(a47),\n                                 *FixedVector<int8_t, 47>::Cast(b47)));\n}\n\ntemplate <size_t M, size_t N>\nvoid TestCosineMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      CosineDistanceMatrix<int8_t, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  CosineDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(DistanceMatrix, Cosine_1x1) {\n  TestCosineMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_2x1) {\n  TestCosineMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_2x2) {\n  TestCosineMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_3x3) {\n  TestCosineMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Cosine_4x1) {\n  TestCosineMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_4x2) {\n  TestCosineMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_4x4) {\n  TestCosineMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_8x1) {\n  TestCosineMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_8x2) {\n  TestCosineMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_8x4) {\n  TestCosineMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_8x8) {\n  TestCosineMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_16x1) {\n  TestCosineMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_16x2) {\n  TestCosineMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_16x4) {\n  TestCosineMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_16x8) {\n  TestCosineMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_16x16) {\n  TestCosineMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_32x1) {\n  TestCosineMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_32x2) {\n  TestCosineMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_32x4) {\n  TestCosineMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_32x8) {\n  TestCosineMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_32x16) {\n  TestCosineMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_32x32) {\n  TestCosineMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_64x1) {\n  TestCosineMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_64x2) {\n  TestCosineMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_64x4) {\n  TestCosineMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_64x8) {\n  TestCosineMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_64x16) {\n  TestCosineMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_64x32) {\n  TestCosineMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_64x64) {\n  TestCosineMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, Cosine_128x1) {\n  TestCosineMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Cosine_128x2) {\n  TestCosineMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Cosine_128x4) {\n  TestCosineMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Cosine_128x8) {\n  TestCosineMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Cosine_128x16) {\n  TestCosineMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Cosine_128x32) {\n  TestCosineMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Cosine_128x64) {\n  TestCosineMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, Cosine_128x128) {\n  TestCosineMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid CosineBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Cosine\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      CosineDistanceMatrix<int8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Cosine\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    CosineDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Cosine\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        CosineDistanceMatrix<int8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Cosine (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Cosine_Benchmark) {\n  CosineBenchmark<2, 1, 512, 128>();\n  CosineBenchmark<2, 2, 512, 128>();\n  CosineBenchmark<4, 1, 512, 128>();\n  CosineBenchmark<4, 2, 512, 128>();\n  CosineBenchmark<4, 4, 512, 128>();\n  CosineBenchmark<8, 1, 512, 128>();\n  CosineBenchmark<8, 2, 512, 128>();\n  CosineBenchmark<8, 4, 512, 128>();\n  CosineBenchmark<8, 8, 512, 128>();\n  CosineBenchmark<16, 1, 512, 128>();\n  CosineBenchmark<16, 2, 512, 128>();\n  CosineBenchmark<16, 4, 512, 128>();\n  CosineBenchmark<16, 8, 512, 128>();\n  CosineBenchmark<16, 16, 512, 128>();\n  CosineBenchmark<32, 1, 512, 128>();\n  CosineBenchmark<32, 2, 512, 128>();\n  CosineBenchmark<32, 4, 512, 128>();\n  CosineBenchmark<32, 8, 512, 128>();\n  CosineBenchmark<32, 16, 512, 128>();\n  CosineBenchmark<32, 32, 512, 128>();\n  CosineBenchmark<64, 1, 512, 128>();\n  CosineBenchmark<64, 2, 512, 128>();\n  CosineBenchmark<64, 4, 512, 128>();\n  CosineBenchmark<64, 8, 512, 128>();\n  CosineBenchmark<128, 1, 512, 128>();\n  CosineBenchmark<1, 1, 1024, 256>();\n}\n\n#endif"
  },
  {
    "path": "tests/ailego/math/euclidean_distance_matrix_fp16_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/utility/math_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,\n                                   size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\ntemplate <size_t N>\nstatic float EuclideanDistance(const FixedVector<Float16, N> &lhs,\n                               const FixedVector<Float16, N> &rhs) {\n  return Distance::Euclidean(lhs.data(), rhs.data(), lhs.size());\n}\n\ntemplate <size_t N>\nstatic float SquaredEuclideanDistance(const FixedVector<Float16, N> &lhs,\n                                      const FixedVector<Float16, N> &rhs) {\n  return Distance::SquaredEuclidean(lhs.data(), rhs.data(), lhs.size());\n}\n\nTEST(DistanceMatrix, Euclidean_General) {\n  FixedVector<Float16, 1> a{0.0f}, b{0.0f};\n  EXPECT_FLOAT_EQ(0.0f, EuclideanDistance(a, b));\n\n  FixedVector<Float16, 3> c{1.0f, 2.0f, 3.0f}, d{2.0f, 4.0f, 6.0f};\n  EXPECT_FLOAT_EQ(3.741657f, EuclideanDistance(c, d));\n\n  FixedVector<Float16, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                             5.2f, 2.1f, 7.1f, 6.8f, 1.2f},\n      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n  EXPECT_TRUE(\n      MathHelper::IsAlmostEqual(8.86837f, EuclideanDistance(e, f), 1000));\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_General) {\n  FixedVector<Float16, 1> a{0.0f}, b{0.0f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(a, b));\n\n  FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(c, d));\n\n  FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(e, f));\n\n  FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(g, h));\n\n  FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},\n      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(i, j));\n\n  FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},\n      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(l, k));\n\n  FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},\n      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(m, n));\n\n  FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(o, p));\n\n  FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                            0.5f, 0.6f, 0.7f, 0.8f},\n      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(q, r));\n\n  FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                             0.5f, 0.6f, 0.7f, 0.8f, 0.9f},\n      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(s, t));\n\n  FixedVector<Float16, 11> u{0.0f},\n      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(3.84983f,\n                                        SquaredEuclideanDistance(u, v), 1000));\n\n  FixedVector<Float16, 12> w{0.0f},\n      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(5.05897f,\n                                        SquaredEuclideanDistance(w, x), 1000));\n\n  FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n                                      0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(6.499438f,\n                                        SquaredEuclideanDistance(y, z), 1000));\n\n  FixedVector<Float16, 14> x14{0.0f},\n      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      10.49944f, SquaredEuclideanDistance(x14, y14), 1000));\n\n  FixedVector<Float16, 15> x15{0.0f},\n      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,\n          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      19.49944f, SquaredEuclideanDistance(x15, y15), 1000));\n}\n\ntemplate <size_t M, size_t N>\nvoid TestEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(32, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const Float16 *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      EuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  EuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    // EXPECT_FLOAT_EQ(result1[i], result2[i]);\n    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(32, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const Float16 *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  SquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    // EXPECT_FLOAT_EQ(result1[i], result2[i]);\n    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));\n  }\n}\n\nTEST(DistanceMatrix, Euclidean_1x1) {\n  TestEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x1) {\n  TestEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x2) {\n  TestEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_3x3) {\n  TestEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x1) {\n  TestEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x2) {\n  TestEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x4) {\n  TestEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x1) {\n  TestEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x2) {\n  TestEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x4) {\n  TestEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x8) {\n  TestEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x1) {\n  TestEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x2) {\n  TestEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x4) {\n  TestEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x8) {\n  TestEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x16) {\n  TestEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x1) {\n  TestEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x2) {\n  TestEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x4) {\n  TestEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x8) {\n  TestEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x16) {\n  TestEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x32) {\n  TestEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x1) {\n  TestEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x2) {\n  TestEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x4) {\n  TestEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x8) {\n  TestEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x16) {\n  TestEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x32) {\n  TestEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x64) {\n  TestEuclideanMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x1) {\n  TestEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x2) {\n  TestEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x4) {\n  TestEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x8) {\n  TestEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x16) {\n  TestEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x32) {\n  TestEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x64) {\n  TestEuclideanMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x128) {\n  TestEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_1x1) {\n  TestSquaredEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x1) {\n  TestSquaredEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x2) {\n  TestSquaredEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_3x3) {\n  TestSquaredEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x1) {\n  TestSquaredEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x2) {\n  TestSquaredEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x4) {\n  TestSquaredEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x1) {\n  TestSquaredEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x2) {\n  TestSquaredEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x4) {\n  TestSquaredEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x8) {\n  TestSquaredEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x1) {\n  TestSquaredEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x2) {\n  TestSquaredEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x4) {\n  TestSquaredEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x8) {\n  TestSquaredEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x16) {\n  TestSquaredEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x1) {\n  TestSquaredEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x2) {\n  TestSquaredEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x4) {\n  TestSquaredEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x8) {\n  TestSquaredEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x16) {\n  TestSquaredEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x32) {\n  TestSquaredEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x1) {\n  TestSquaredEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x2) {\n  TestSquaredEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x4) {\n  TestSquaredEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x8) {\n  TestSquaredEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x16) {\n  TestSquaredEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x32) {\n  TestSquaredEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x64) {\n  TestSquaredEuclideanMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x1) {\n  TestSquaredEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x2) {\n  TestSquaredEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x4) {\n  TestSquaredEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x8) {\n  TestSquaredEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x16) {\n  TestSquaredEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x32) {\n  TestSquaredEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x64) {\n  TestSquaredEuclideanMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x128) {\n  TestSquaredEuclideanMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid EuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      EuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    EuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        EuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid SquaredEuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      SquaredEuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    SquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {\n  EuclideanBenchmark<2, 1, 512, 64>();\n  EuclideanBenchmark<2, 2, 512, 64>();\n  EuclideanBenchmark<4, 1, 512, 64>();\n  EuclideanBenchmark<4, 2, 512, 64>();\n  EuclideanBenchmark<4, 4, 512, 64>();\n  EuclideanBenchmark<8, 1, 512, 64>();\n  EuclideanBenchmark<8, 2, 512, 64>();\n  EuclideanBenchmark<8, 4, 512, 64>();\n  EuclideanBenchmark<8, 8, 512, 64>();\n  EuclideanBenchmark<16, 1, 512, 64>();\n  EuclideanBenchmark<16, 2, 512, 64>();\n  EuclideanBenchmark<16, 4, 512, 64>();\n  EuclideanBenchmark<16, 8, 512, 64>();\n  EuclideanBenchmark<16, 16, 512, 64>();\n  EuclideanBenchmark<32, 1, 512, 64>();\n  EuclideanBenchmark<32, 2, 512, 64>();\n  EuclideanBenchmark<32, 4, 512, 64>();\n  EuclideanBenchmark<32, 8, 512, 64>();\n  EuclideanBenchmark<32, 16, 512, 64>();\n  EuclideanBenchmark<32, 32, 512, 64>();\n  EuclideanBenchmark<64, 1, 512, 64>();\n  EuclideanBenchmark<64, 2, 512, 64>();\n  EuclideanBenchmark<64, 4, 512, 64>();\n  EuclideanBenchmark<64, 8, 512, 64>();\n  EuclideanBenchmark<128, 1, 512, 64>();\n  EuclideanBenchmark<1, 1, 1024, 256>();\n}\n\nTEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {\n  SquaredEuclideanBenchmark<2, 1, 512, 64>();\n  SquaredEuclideanBenchmark<2, 2, 512, 64>();\n  SquaredEuclideanBenchmark<4, 1, 512, 64>();\n  SquaredEuclideanBenchmark<4, 2, 512, 64>();\n  SquaredEuclideanBenchmark<4, 4, 512, 64>();\n  SquaredEuclideanBenchmark<8, 1, 512, 64>();\n  SquaredEuclideanBenchmark<8, 2, 512, 64>();\n  SquaredEuclideanBenchmark<8, 4, 512, 64>();\n  SquaredEuclideanBenchmark<8, 8, 512, 64>();\n  SquaredEuclideanBenchmark<16, 1, 512, 64>();\n  SquaredEuclideanBenchmark<16, 2, 512, 64>();\n  SquaredEuclideanBenchmark<16, 4, 512, 64>();\n  SquaredEuclideanBenchmark<16, 8, 512, 64>();\n  SquaredEuclideanBenchmark<16, 16, 512, 64>();\n  SquaredEuclideanBenchmark<32, 1, 512, 64>();\n  SquaredEuclideanBenchmark<32, 2, 512, 64>();\n  SquaredEuclideanBenchmark<32, 4, 512, 64>();\n  SquaredEuclideanBenchmark<32, 8, 512, 64>();\n  SquaredEuclideanBenchmark<32, 16, 512, 64>();\n  SquaredEuclideanBenchmark<32, 32, 512, 64>();\n  SquaredEuclideanBenchmark<64, 1, 512, 64>();\n  SquaredEuclideanBenchmark<64, 2, 512, 64>();\n  SquaredEuclideanBenchmark<64, 4, 512, 64>();\n  SquaredEuclideanBenchmark<64, 8, 512, 64>();\n  SquaredEuclideanBenchmark<128, 1, 512, 64>();\n  SquaredEuclideanBenchmark<1, 1, 1024, 256>();\n}\n\nTEST(DistanceMatrix, DISABLED_Euclidean_BenchmarkSimple) {\n  std::mt19937 gen((std::random_device())());\n\n  size_t dimension = 768;\n  size_t loop_cnt = 10000LLU;\n\n  std::vector<Float16> data(dimension);\n  std::vector<Float16> query(dimension);\n\n  float result;\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < dimension; ++i) {\n    data[i] = dist(gen);\n  }\n  for (size_t i = 0; i < dimension; ++i) {\n    query[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    EuclideanDistanceMatrix<Float16, 1, 1>::Compute(&data[0], &query[0],\n                                                    dimension, &result);\n  }\n}\n\nTEST(DistanceMatrix, DISABLED_SquaredEuclidean_BenchmarkSimple) {\n  std::mt19937 gen((std::random_device())());\n\n  size_t dimension = 768;\n  size_t loop_cnt = 10000LLU;\n\n  std::vector<Float16> data(dimension);\n  std::vector<Float16> query(dimension);\n\n  float result;\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < dimension; ++i) {\n    data[i] = dist(gen);\n  }\n  for (size_t i = 0; i < dimension; ++i) {\n    query[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(&data[0], &query[0],\n                                                           dimension, &result);\n  }\n}\n"
  },
  {
    "path": "tests/ailego/math/euclidean_distance_matrix_fp32_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(float *dst, const float *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\ntemplate <size_t N>\nstatic float EuclideanDistance(const FixedVector<float, N> &lhs,\n                               const FixedVector<float, N> &rhs) {\n  return Distance::Euclidean(lhs.data(), rhs.data(), lhs.size());\n}\n\ntemplate <size_t N>\nstatic float SquaredEuclideanDistance(const FixedVector<float, N> &lhs,\n                                      const FixedVector<float, N> &rhs) {\n  return Distance::SquaredEuclidean(lhs.data(), rhs.data(), lhs.size());\n}\n\nTEST(DistanceMatrix, Euclidean_General) {\n  FixedVector<float, 1> a{0.0f}, b{0.0f};\n  EXPECT_FLOAT_EQ(0.0f, EuclideanDistance(a, b));\n\n  FixedVector<float, 3> c{1.0f, 2.0f, 3.0f}, d{2.0f, 4.0f, 6.0f};\n  EXPECT_FLOAT_EQ(3.741657f, EuclideanDistance(c, d));\n\n  FixedVector<float, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                           5.2f, 2.1f, 7.1f, 6.8f, 1.2f},\n      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n  EXPECT_FLOAT_EQ(8.86905f, EuclideanDistance(e, f));\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_General) {\n  FixedVector<float, 1> a{0.0f}, b{0.0f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(a, b));\n\n  FixedVector<float, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(c, d));\n\n  FixedVector<float, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(e, f));\n\n  FixedVector<float, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(g, h));\n\n  FixedVector<float, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},\n      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(i, j));\n\n  FixedVector<float, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},\n      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(l, k));\n\n  FixedVector<float, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},\n      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(m, n));\n\n  FixedVector<float, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(o, p));\n\n  FixedVector<float, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f},\n      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(q, r));\n\n  FixedVector<float, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                           0.5f, 0.6f, 0.7f, 0.8f, 0.9f},\n      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};\n  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(s, t));\n\n  FixedVector<float, 11> u{0.0f},\n      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n  EXPECT_FLOAT_EQ(3.85f, SquaredEuclideanDistance(u, v));\n\n  FixedVector<float, 12> w{0.0f},\n      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};\n  EXPECT_FLOAT_EQ(5.06f, SquaredEuclideanDistance(w, x));\n\n  FixedVector<float, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n                                    0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};\n  EXPECT_FLOAT_EQ(6.5f, SquaredEuclideanDistance(y, z));\n\n  FixedVector<float, 14> x14{0.0f},\n      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};\n  EXPECT_FLOAT_EQ(10.5f, SquaredEuclideanDistance(x14, y14));\n\n  FixedVector<float, 15> x15{0.0f},\n      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,\n          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};\n  EXPECT_FLOAT_EQ(19.5f, SquaredEuclideanDistance(x15, y15));\n}\n\ntemplate <size_t M, size_t N>\nvoid TestEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const float *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      EuclideanDistanceMatrix<float, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  EuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const float *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  SquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));\n  }\n}\n\nTEST(DistanceMatrix, Euclidean_1x1) {\n  TestEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x1) {\n  TestEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x2) {\n  TestEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_3x3) {\n  TestEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x1) {\n  TestEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x2) {\n  TestEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x4) {\n  TestEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x1) {\n  TestEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x2) {\n  TestEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x4) {\n  TestEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x8) {\n  TestEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x1) {\n  TestEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x2) {\n  TestEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x4) {\n  TestEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x8) {\n  TestEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x16) {\n  TestEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x1) {\n  TestEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x2) {\n  TestEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x4) {\n  TestEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x8) {\n  TestEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x16) {\n  TestEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x32) {\n  TestEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x1) {\n  TestEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x2) {\n  TestEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x4) {\n  TestEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x8) {\n  TestEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x16) {\n  TestEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x32) {\n  TestEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x64) {\n  TestEuclideanMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x1) {\n  TestEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x2) {\n  TestEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x4) {\n  TestEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x8) {\n  TestEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x16) {\n  TestEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x32) {\n  TestEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x64) {\n  TestEuclideanMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x128) {\n  TestEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_1x1) {\n  TestSquaredEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x1) {\n  TestSquaredEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x2) {\n  TestSquaredEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_3x3) {\n  TestSquaredEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x1) {\n  TestSquaredEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x2) {\n  TestSquaredEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x4) {\n  TestSquaredEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x1) {\n  TestSquaredEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x2) {\n  TestSquaredEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x4) {\n  TestSquaredEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x8) {\n  TestSquaredEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x1) {\n  TestSquaredEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x2) {\n  TestSquaredEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x4) {\n  TestSquaredEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x8) {\n  TestSquaredEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x16) {\n  TestSquaredEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x1) {\n  TestSquaredEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x2) {\n  TestSquaredEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x4) {\n  TestSquaredEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x8) {\n  TestSquaredEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x16) {\n  TestSquaredEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x32) {\n  TestSquaredEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x1) {\n  TestSquaredEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x2) {\n  TestSquaredEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x4) {\n  TestSquaredEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x8) {\n  TestSquaredEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x16) {\n  TestSquaredEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x32) {\n  TestSquaredEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x64) {\n  TestSquaredEuclideanMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x1) {\n  TestSquaredEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x2) {\n  TestSquaredEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x4) {\n  TestSquaredEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x8) {\n  TestSquaredEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x16) {\n  TestSquaredEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x32) {\n  TestSquaredEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x64) {\n  TestSquaredEuclideanMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x128) {\n  TestSquaredEuclideanMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid EuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      EuclideanDistanceMatrix<float, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    EuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        EuclideanDistanceMatrix<float, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid SquaredEuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      SquaredEuclideanDistanceMatrix<float, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    SquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {\n  EuclideanBenchmark<2, 1, 512, 64>();\n  EuclideanBenchmark<2, 2, 512, 64>();\n  EuclideanBenchmark<4, 1, 512, 64>();\n  EuclideanBenchmark<4, 2, 512, 64>();\n  EuclideanBenchmark<4, 4, 512, 64>();\n  EuclideanBenchmark<8, 1, 512, 64>();\n  EuclideanBenchmark<8, 2, 512, 64>();\n  EuclideanBenchmark<8, 4, 512, 64>();\n  EuclideanBenchmark<8, 8, 512, 64>();\n  EuclideanBenchmark<16, 1, 512, 64>();\n  EuclideanBenchmark<16, 2, 512, 64>();\n  EuclideanBenchmark<16, 4, 512, 64>();\n  EuclideanBenchmark<16, 8, 512, 64>();\n  EuclideanBenchmark<16, 16, 512, 64>();\n  EuclideanBenchmark<32, 1, 512, 64>();\n  EuclideanBenchmark<32, 2, 512, 64>();\n  EuclideanBenchmark<32, 4, 512, 64>();\n  EuclideanBenchmark<32, 8, 512, 64>();\n  EuclideanBenchmark<32, 16, 512, 64>();\n  EuclideanBenchmark<32, 32, 512, 64>();\n  EuclideanBenchmark<64, 1, 512, 64>();\n  EuclideanBenchmark<64, 2, 512, 64>();\n  EuclideanBenchmark<64, 4, 512, 64>();\n  EuclideanBenchmark<64, 8, 512, 64>();\n  EuclideanBenchmark<128, 1, 512, 64>();\n  EuclideanBenchmark<1, 1, 1024, 256>();\n}\n\nTEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {\n  SquaredEuclideanBenchmark<2, 1, 512, 64>();\n  SquaredEuclideanBenchmark<2, 2, 512, 64>();\n  SquaredEuclideanBenchmark<4, 1, 512, 64>();\n  SquaredEuclideanBenchmark<4, 2, 512, 64>();\n  SquaredEuclideanBenchmark<4, 4, 512, 64>();\n  SquaredEuclideanBenchmark<8, 1, 512, 64>();\n  SquaredEuclideanBenchmark<8, 2, 512, 64>();\n  SquaredEuclideanBenchmark<8, 4, 512, 64>();\n  SquaredEuclideanBenchmark<8, 8, 512, 64>();\n  SquaredEuclideanBenchmark<16, 1, 512, 64>();\n  SquaredEuclideanBenchmark<16, 2, 512, 64>();\n  SquaredEuclideanBenchmark<16, 4, 512, 64>();\n  SquaredEuclideanBenchmark<16, 8, 512, 64>();\n  SquaredEuclideanBenchmark<16, 16, 512, 64>();\n  SquaredEuclideanBenchmark<32, 1, 512, 64>();\n  SquaredEuclideanBenchmark<32, 2, 512, 64>();\n  SquaredEuclideanBenchmark<32, 4, 512, 64>();\n  SquaredEuclideanBenchmark<32, 8, 512, 64>();\n  SquaredEuclideanBenchmark<32, 16, 512, 64>();\n  SquaredEuclideanBenchmark<32, 32, 512, 64>();\n  SquaredEuclideanBenchmark<64, 1, 512, 64>();\n  SquaredEuclideanBenchmark<64, 2, 512, 64>();\n  SquaredEuclideanBenchmark<64, 4, 512, 64>();\n  SquaredEuclideanBenchmark<64, 8, 512, 64>();\n  SquaredEuclideanBenchmark<128, 1, 512, 64>();\n  SquaredEuclideanBenchmark<1, 1, 1024, 256>();\n}\n\nTEST(DistanceMatrix, DISABLED_Euclidean_BenchmarkSimple) {\n  std::mt19937 gen((std::random_device())());\n\n  size_t dimension = 768;\n  size_t loop_cnt = 10000LLU;\n\n  std::vector<float> data(dimension);\n  std::vector<float> query(dimension);\n\n  float result;\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < dimension; ++i) {\n    data[i] = dist(gen);\n  }\n  for (size_t i = 0; i < dimension; ++i) {\n    query[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    EuclideanDistanceMatrix<float, 1, 1>::Compute(&data[0], &query[0],\n                                                  dimension, &result);\n  }\n}\n\nTEST(DistanceMatrix, DISABLED_SquaredEuclidean_BenchmarkSimple) {\n  std::mt19937 gen((std::random_device())());\n\n  size_t dimension = 768;\n  size_t loop_cnt = 10000LLU;\n\n  std::vector<float> data(dimension);\n  std::vector<float> query(dimension);\n\n  float result;\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < dimension; ++i) {\n    data[i] = dist(gen);\n  }\n  for (size_t i = 0; i < dimension; ++i) {\n    query[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(&data[0], &query[0],\n                                                         dimension, &result);\n  }\n}\n"
  },
  {
    "path": "tests/ailego/math/euclidean_distance_matrix_int4_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/utility/matrix_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nTEST(DistanceMatrix, Euclidean_General) {\n  std::mt19937 gen((std::random_device())());\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 1;\n\n  std::vector<int8_t> vec1(dimension), query1(dimension);\n  std::vector<uint8_t> vec2(dimension >> 1), query2(dimension >> 1);\n\n  std::uniform_int_distribution<int> dist(-8, 7);\n\n  for (size_t k = 0; k < 100; ++k) {\n    for (size_t i = 0; i < dimension; i += 2) {\n      vec1[i + 0] = (int8_t)dist(gen);\n      vec1[i + 1] = (int8_t)dist(gen);\n      vec2[i >> 1] =\n          ((uint8_t)(vec1[i + 0]) << 4) | ((uint8_t)(vec1[i + 1]) & 0xf);\n      EXPECT_EQ(vec1[i + 0] * vec1[i + 1], Int4MulTable[vec2[i >> 1]]);\n\n      query1[i + 0] = (int8_t)dist(gen);\n      query1[i + 1] = (int8_t)dist(gen);\n      query2[i >> 1] =\n          ((uint8_t)(query1[i + 0]) << 4) | ((uint8_t)(query1[i + 1]) & 0xf);\n      EXPECT_EQ(query1[i + 0] * query1[i + 1], Int4MulTable[query2[i >> 1]]);\n    }\n\n    EXPECT_FLOAT_EQ(\n        Distance::SquaredEuclidean(vec1.data(), query1.data(), dimension),\n        Distance::SquaredEuclidean(vec2.data(), query2.data(), dimension));\n    EXPECT_FLOAT_EQ(Distance::Euclidean(vec1.data(), query1.data(), dimension),\n                    Distance::Euclidean(vec2.data(), query2.data(), dimension));\n    EXPECT_FLOAT_EQ(std::sqrt(Distance::SquaredEuclidean(\n                        vec1.data(), query1.data(), dimension)),\n                    Distance::Euclidean(vec2.data(), query2.data(), dimension));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;\n  size_t matrix_size = batch_size * (dimension / 2);\n  size_t query_matrix_size = query_size * (dimension / 2);\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(0, 0xff);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (uint8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (uint8_t)dist(gen);\n  }\n  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(\n      matrix1.data(), dimension / 8, &matrix2[0]);\n  ailego::MatrixHelper::Transpose<uint32_t, query_size>(\n      query1.data(), dimension / 8, &query2[0]);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint8_t *cur_query = &query1[i * dimension / 2];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);\n    }\n  }\n  EuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;\n  size_t matrix_size = batch_size * (dimension / 2);\n  size_t query_matrix_size = query_size * (dimension / 2);\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(0, 0xff);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (uint8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (uint8_t)dist(gen);\n  }\n  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(\n      matrix1.data(), dimension / 8, &matrix2[0]);\n  ailego::MatrixHelper::Transpose<uint32_t, query_size>(\n      query1.data(), dimension / 8, &query2[0]);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint8_t *cur_query = &query1[i * dimension / 2];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);\n    }\n  }\n  SquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(DistanceMatrix, Euclidean_1x1) {\n  TestEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x1) {\n  TestEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x2) {\n  TestEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_3x3) {\n  TestEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x1) {\n  TestEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x2) {\n  TestEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x4) {\n  TestEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x1) {\n  TestEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x2) {\n  TestEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x4) {\n  TestEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x8) {\n  TestEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x1) {\n  TestEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x2) {\n  TestEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x4) {\n  TestEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x8) {\n  TestEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x16) {\n  TestEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x1) {\n  TestEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x2) {\n  TestEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x4) {\n  TestEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x8) {\n  TestEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x16) {\n  TestEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x32) {\n  TestEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x1) {\n  TestEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x2) {\n  TestEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x4) {\n  TestEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x8) {\n  TestEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x16) {\n  TestEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x32) {\n  TestEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x64) {\n  TestEuclideanMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x1) {\n  TestEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x2) {\n  TestEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x4) {\n  TestEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x8) {\n  TestEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x16) {\n  TestEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x32) {\n  TestEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x64) {\n  TestEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x128) {\n  TestEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_1x1) {\n  TestSquaredEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x1) {\n  TestSquaredEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x2) {\n  TestSquaredEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_3x3) {\n  TestSquaredEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x1) {\n  TestSquaredEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x2) {\n  TestSquaredEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x4) {\n  TestSquaredEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x1) {\n  TestSquaredEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x2) {\n  TestSquaredEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x4) {\n  TestSquaredEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x8) {\n  TestSquaredEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x1) {\n  TestSquaredEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x2) {\n  TestSquaredEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x4) {\n  TestSquaredEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x8) {\n  TestSquaredEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x16) {\n  TestSquaredEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x1) {\n  TestSquaredEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x2) {\n  TestSquaredEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x4) {\n  TestSquaredEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x8) {\n  TestSquaredEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x16) {\n  TestSquaredEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x32) {\n  TestSquaredEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x1) {\n  TestSquaredEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x2) {\n  TestSquaredEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x4) {\n  TestSquaredEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x8) {\n  TestSquaredEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x16) {\n  TestSquaredEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x32) {\n  TestSquaredEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x64) {\n  TestSquaredEuclideanMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x1) {\n  TestSquaredEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x2) {\n  TestSquaredEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x4) {\n  TestSquaredEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x8) {\n  TestSquaredEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x16) {\n  TestSquaredEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x32) {\n  TestSquaredEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x64) {\n  TestSquaredEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x128) {\n  TestSquaredEuclideanMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid EuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension / 2;\n  const size_t query_matrix_size = query_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(0, 0xff);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (uint8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (uint8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension / 2;\n    ailego::MatrixHelper::Transpose<uint32_t, batch_size>(\n        &matrix1[start_pos], dimension / 8, &matrix2[start_pos]);\n  }\n  ailego::MatrixHelper::Transpose<uint32_t, query_size>(\n      query1.data(), dimension / 8, &query2[0]);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT4 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      EuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    EuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension / 2], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid SquaredEuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension / 2;\n  const size_t query_matrix_size = query_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(0, 0xff);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (uint8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (uint8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension / 2;\n    ailego::MatrixHelper::Transpose<uint32_t, batch_size>(\n        &matrix1[start_pos], dimension / 8, &matrix2[start_pos]);\n  }\n  ailego::MatrixHelper::Transpose<uint32_t, query_size>(\n      query1.data(), dimension / 8, &query2[0]);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT4 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched SquaredEuclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      SquaredEuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched SquaredEuclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    SquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched SquaredEuclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension / 2], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {\n  EuclideanBenchmark<2, 1, 512, 128>();\n  EuclideanBenchmark<2, 2, 512, 128>();\n  EuclideanBenchmark<4, 1, 512, 128>();\n  EuclideanBenchmark<4, 2, 512, 128>();\n  EuclideanBenchmark<4, 4, 512, 128>();\n  EuclideanBenchmark<8, 1, 512, 128>();\n  EuclideanBenchmark<8, 2, 512, 128>();\n  EuclideanBenchmark<8, 4, 512, 128>();\n  EuclideanBenchmark<8, 8, 512, 128>();\n  EuclideanBenchmark<16, 1, 512, 128>();\n  EuclideanBenchmark<16, 2, 512, 128>();\n  EuclideanBenchmark<16, 4, 512, 128>();\n  EuclideanBenchmark<16, 8, 512, 128>();\n  EuclideanBenchmark<16, 16, 512, 128>();\n  EuclideanBenchmark<32, 1, 512, 128>();\n  EuclideanBenchmark<32, 2, 512, 128>();\n  EuclideanBenchmark<32, 4, 512, 128>();\n  EuclideanBenchmark<32, 8, 512, 128>();\n  EuclideanBenchmark<32, 16, 512, 128>();\n  EuclideanBenchmark<32, 32, 512, 128>();\n  EuclideanBenchmark<64, 1, 512, 128>();\n  EuclideanBenchmark<64, 2, 512, 128>();\n  EuclideanBenchmark<64, 4, 512, 128>();\n  EuclideanBenchmark<64, 8, 512, 128>();\n  EuclideanBenchmark<128, 1, 512, 128>();\n}\n\nTEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {\n  SquaredEuclideanBenchmark<2, 1, 512, 128>();\n  SquaredEuclideanBenchmark<2, 2, 512, 128>();\n  SquaredEuclideanBenchmark<4, 1, 512, 128>();\n  SquaredEuclideanBenchmark<4, 2, 512, 128>();\n  SquaredEuclideanBenchmark<4, 4, 512, 128>();\n  SquaredEuclideanBenchmark<8, 1, 512, 128>();\n  SquaredEuclideanBenchmark<8, 2, 512, 128>();\n  SquaredEuclideanBenchmark<8, 4, 512, 128>();\n  SquaredEuclideanBenchmark<8, 8, 512, 128>();\n  SquaredEuclideanBenchmark<16, 1, 512, 128>();\n  SquaredEuclideanBenchmark<16, 2, 512, 128>();\n  SquaredEuclideanBenchmark<16, 4, 512, 128>();\n  SquaredEuclideanBenchmark<16, 8, 512, 128>();\n  SquaredEuclideanBenchmark<16, 16, 512, 128>();\n  SquaredEuclideanBenchmark<32, 1, 512, 128>();\n  SquaredEuclideanBenchmark<32, 2, 512, 128>();\n  SquaredEuclideanBenchmark<32, 4, 512, 128>();\n  SquaredEuclideanBenchmark<32, 8, 512, 128>();\n  SquaredEuclideanBenchmark<32, 16, 512, 128>();\n  SquaredEuclideanBenchmark<32, 32, 512, 128>();\n  SquaredEuclideanBenchmark<64, 1, 512, 128>();\n  SquaredEuclideanBenchmark<64, 2, 512, 128>();\n  SquaredEuclideanBenchmark<64, 4, 512, 128>();\n  SquaredEuclideanBenchmark<64, 8, 512, 128>();\n  SquaredEuclideanBenchmark<128, 1, 512, 128>();\n}\n"
  },
  {
    "path": "tests/ailego/math/euclidean_distance_matrix_int8_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\ntemplate <size_t N>\nstatic float EuclideanDistance(const FixedVector<int8_t, N> &lhs,\n                               const FixedVector<int8_t, N> &rhs) {\n  return Distance::Euclidean(lhs.data(), rhs.data(), lhs.size());\n}\n\ntemplate <size_t N>\nstatic float SquaredEuclideanDistance(const FixedVector<int8_t, N> &lhs,\n                                      const FixedVector<int8_t, N> &rhs) {\n  return Distance::SquaredEuclidean(lhs.data(), rhs.data(), lhs.size());\n}\n\nTEST(DistanceMatrix, Euclidean_General) {\n  FixedVector<int8_t, 1> a{(int8_t)0}, b{(int8_t)0};\n  EXPECT_FLOAT_EQ(0.0f, EuclideanDistance(a, b));\n\n  FixedVector<int8_t, 3> c{(int8_t)1, (int8_t)2, (int8_t)3},\n      d{(int8_t)2, (int8_t)4, (int8_t)6};\n  EXPECT_FLOAT_EQ(3.741657f, EuclideanDistance(c, d));\n\n  FixedVector<int8_t, 4> e{(int8_t)0, (int8_t)0, (int8_t)127, (int8_t)127},\n      f{(int8_t)127, (int8_t)127, (int8_t)0, (int8_t)0};\n  EXPECT_FLOAT_EQ(254.0f, EuclideanDistance(e, f));\n\n  FixedVector<int8_t, 5> g{(int8_t)0, (int8_t)0, (int8_t)127, (int8_t)127,\n                           (int8_t)-127},\n      h{(int8_t)127, (int8_t)127, (int8_t)0, (int8_t)0, (int8_t)127};\n  EXPECT_FLOAT_EQ(359.21024f, EuclideanDistance(g, h));\n\n  int8_t a2048[] = {\n      59, 46, 36, 99, 49, 61, 45, 68, 79, 86, 2,  8,  73, 14, 45, 85, 5,  63,\n      71, 45, 36, 54, 72, 18, 79, 15, 78, 29, 9,  12, 96, 27, 3,  45, 81, 37,\n      51, 19, 43, 5,  55, 93, 85, 61, 86, 54, 2,  33, 39, 74, 95, 7,  98, 32,\n      25, 30, 83, 45, 2,  7,  76, 95, 58, 52, 80, 85, 71, 56, 92, 41, 67, 98,\n      32, 97, 3,  71, 59, 58, 87, 84, 44, 54, 81, 74, 0,  51, 7,  28, 81, 83,\n      45, 88, 95, 87, 31, 65, 61, 84, 37, 13, 98, 59, 35, 41, 40, 12, 36, 87,\n      4,  84, 15, 96, 97, 15, 19, 7,  67, 87, 13, 40, 56, 80, 86, 3,  85, 99,\n      56, 94, 8,  63, 69, 24, 72, 44, 75, 58, 93, 19, 63, 81, 59, 90, 54, 99,\n      2,  37, 20, 72, 57, 56, 25, 78, 27, 83, 77, 9,  66, 66, 62, 21, 81, 69,\n      8,  13, 29, 95, 8,  75, 20, 48, 76, 53, 5,  97, 8,  26, 93, 76, 63, 48,\n      26, 51, 69, 46, 6,  42, 76, 44, 84, 40, 85, 79, 44, 62, 78, 52, 18, 70,\n      95, 9,  13, 71, 15, 2,  28, 98, 98, 44, 98, 64, 44, 9,  17, 71, 27, 73,\n      24, 54, 24, 64, 68, 38, 90, 20, 89, 4,  79, 20, 56, 33, 92, 65, 64, 83,\n      33, 92, 24, 4,  51, 16, 76, 14, 72, 36, 95, 22, 32, 27, 42, 58, 15, 87,\n      23, 19, 76, 2,  35, 41, 1,  18, 77, 48, 51, 50, 14, 14, 22, 80, 23, 39,\n      53, 69, 61, 63, 45, 91, 67, 75, 51, 9,  40, 42, 16, 3,  61, 18, 28, 58,\n      28, 13, 79, 23, 43, 40, 99, 87, 63, 63, 72, 74, 74, 93, 10, 61, 86, 84,\n      8,  63, 31, 98, 18, 79, 54, 25, 47, 61, 98, 15, 42, 53, 26, 40, 59, 77,\n      80, 62, 73, 53, 22, 21, 6,  38, 31, 3,  80, 91, 53, 77, 36, 25, 28, 64,\n      60, 31, 49, 91, 6,  50, 70, 94, 36, 66, 29, 32, 66, 64, 92, 78, 30, 87,\n      29, 26, 16, 87, 29, 64, 13, 60, 1,  63, 2,  75, 31, 44, 8,  3,  65, 50,\n      48, 26, 94, 44, 7,  45, 9,  94, 56, 57, 25, 95, 5,  8,  92, 71, 10, 83,\n      62, 57, 74, 3,  95, 38, 90, 99, 53, 40, 37, 12, 88, 60, 78, 61, 54, 60,\n      73, 73, 18, 48, 75, 42, 30, 19, 18, 96, 44, 62, 80, 46, 4,  63, 98, 12,\n      44, 34, 83, 42, 36, 95, 84, 2,  48, 7,  68, 68, 47, 71, 74, 93, 78, 5,\n      83, 99, 33, 17, 81, 49, 8,  75, 24, 67, 26, 44, 89, 38, 99, 22, 45, 52,\n      89, 14, 94, 74, 35, 24, 46, 63, 6,  55, 23, 3,  91, 97, 6,  33, 53, 76,\n      13, 68, 35, 9,  24, 43, 86, 5,  24, 4,  64, 86, 85, 12, 24, 60, 2,  9,\n      23, 48, 12, 88, 71, 28, 1,  79, 31, 31, 82, 21, 49, 8,  31, 3,  99, 91,\n      93, 29, 15, 67, 9,  50, 94, 46, 72, 77, 33, 30, 4,  16, 56, 86, 62, 76,\n      78, 77, 77, 25, 30, 88, 29, 17, 38, 2,  80, 86, 73, 8,  8,  38, 3,  50,\n      67, 49, 42, 75, 49, 96, 36, 77, 55, 27, 52, 77, 82, 92, 42, 52, 35, 55,\n      60, 16, 14, 96, 90, 39, 49, 68, 47, 36, 53, 1,  78, 43, 93, 58, 66, 45,\n      80, 56, 51, 23, 64, 49, 56, 28, 77, 99, 18, 21, 98, 46, 88, 97, 98, 21,\n      58, 31, 33, 77, 61, 71, 37, 80, 80, 18, 2,  92, 70, 83, 3,  41, 93, 38,\n      24, 92, 98, 69, 80, 84, 81, 74, 80, 89, 74, 90, 92, 81, 29, 54, 3,  49,\n      52, 24, 78, 99, 33, 35, 54, 98, 36, 90, 66, 71, 67, 39, 79, 55, 68, 68,\n      94, 58, 60, 74, 81, 26, 43, 50, 67, 21, 27, 41, 44, 85, 60, 39, 48, 6,\n      49, 75, 17, 1,  94, 69, 98, 38, 77, 96, 61, 99, 52, 89, 81, 0,  73, 30,\n      68, 90, 4,  77, 93, 16, 56, 92, 58, 49, 22, 49, 60, 4,  66, 33, 54, 49,\n      4,  3,  75, 81, 36, 99, 7,  87, 60, 15, 90, 93, 33, 42, 21, 42, 1,  29,\n      72, 20, 94, 3,  83, 56, 48, 94, 41, 78, 84, 98, 61, 70, 71, 10, 15, 75,\n      86, 6,  57, 53, 7,  38, 94, 2,  94, 38, 79, 21, 53, 69, 89, 84, 53, 59,\n      99, 69, 81, 84, 60, 27, 1,  10, 93, 0,  88, 4,  71, 44, 12, 35, 63, 60,\n      65, 15, 5,  20, 60, 30, 86, 46, 50, 28, 67, 39, 1,  13, 26, 40, 57, 59,\n      87, 46, 5,  51, 14, 62, 78, 92, 11, 42, 61, 59, 4,  70, 24, 54, 34, 54,\n      30, 83, 58, 61, 23, 13, 17, 96, 32, 39, 85, 81, 70, 53, 53, 23, 96, 36,\n      18, 84, 22, 96, 5,  90, 17, 62, 25, 91, 71, 70, 91, 88, 93, 86, 30, 90,\n      25, 79, 86, 12, 28, 87, 46, 64, 82, 19, 82, 91, 79, 40, 83, 38, 83, 86,\n      50, 84, 67, 0,  41, 80, 70, 36, 36, 41, 81, 68, 71, 40, 36, 10, 60, 7,\n      87, 67, 41, 10, 26, 98, 45, 66, 92, 22, 63, 70, 91, 90, 34, 98, 31, 45,\n      70, 34, 69, 55, 55, 32, 0,  81, 78, 7,  13, 50, 95, 69, 13, 76, 90, 85,\n      28, 62, 22, 66, 44, 92, 95, 54, 2,  12, 43, 74, 51, 54, 97, 14, 34, 45,\n      17, 13, 57, 29, 87, 49, 79, 20, 78, 92, 20, 82, 21, 71, 93, 51, 18, 58,\n      12, 55, 70, 97, 60, 51, 94, 65, 64, 76, 27, 57, 76, 2,  32, 64, 9,  56,\n      8,  37, 17, 53, 26, 45, 93, 61, 56, 9,  74, 32, 39, 82, 29, 1,  8,  95,\n      2,  93, 93, 66, 56, 16, 60, 42, 28, 11, 47, 58, 98, 34, 93, 25, 49, 22,\n      95, 6,  1,  78, 78, 11, 19, 13, 6,  80, 90, 20, 82, 26, 48, 51, 16, 84,\n      51, 54, 94, 67, 59, 9,  29, 59, 53, 46, 13, 55, 92, 87, 48, 17, 45, 71,\n      52, 86, 96, 4,  18, 32, 87, 40, 93, 98, 8,  85, 76, 88, 82, 57, 7,  61,\n      5,  72, 99, 37, 45, 42, 15, 70, 8,  5,  41, 14, 28, 50, 20, 2,  77, 48,\n      53, 16, 95, 78, 88, 78, 54, 19, 30, 80, 78, 97, 69, 23, 93, 48, 72, 92,\n      88, 82, 17, 58, 98, 99, 70, 97, 52, 46, 66, 97, 95, 65, 38, 47, 1,  4,\n      18, 31, 99, 16, 64, 84, 44, 40, 46, 2,  46, 32, 8,  47, 64, 28, 87, 70,\n      80, 25, 85, 17, 43, 56, 97, 91, 20, 7,  70, 82, 32, 58, 46, 43, 25, 81,\n      12, 97, 40, 73, 52, 27, 13, 30, 58, 1,  89, 68, 75, 17, 91, 22, 12, 48,\n      41, 98, 81, 44, 60, 93, 54, 81, 3,  8,  43, 16, 11, 62, 33, 81, 1,  49,\n      51, 67, 83, 83, 93, 7,  63, 71, 41, 39, 63, 52, 77, 77, 47, 20, 32, 26,\n      20, 66, 64, 62, 94, 55, 37, 39, 28, 45, 67, 76, 6,  43, 10, 18, 55, 44,\n      35, 41, 29, 33, 96, 90, 72, 70, 87, 75, 97, 43, 36, 14, 79, 8,  10, 83,\n      33, 29, 83, 74, 72, 83, 96, 77, 72, 91, 41, 9,  85, 34, 7,  51, 13, 88,\n      69, 47, 23, 22, 64, 2,  7,  38, 66, 58, 7,  8,  35, 92, 53, 65, 4,  94,\n      79, 29, 88, 23, 81, 72, 55, 22, 44, 78, 75, 80, 74, 28, 54, 16, 8,  16,\n      73, 92, 31, 17, 44, 6,  32, 80, 5,  61, 2,  58, 7,  80, 89, 51, 59, 63,\n      65, 42, 93, 14, 44, 16, 36, 79, 41, 45, 33, 36, 13, 92, 85, 75, 7,  47,\n      31, 62, 98, 66, 5,  20, 55, 26, 21, 93, 50, 62, 44, 3,  66, 43, 11, 15,\n      35, 78, 73, 26, 55, 90, 90, 8,  40, 74, 17, 8,  61, 47, 76, 41, 43, 50,\n      94, 62, 85, 44, 47, 91, 72, 86, 10, 86, 62, 18, 51, 23, 83, 0,  61, 41,\n      99, 24, 15, 72, 42, 56, 19, 34, 54, 63, 5,  14, 3,  64, 26, 6,  1,  21,\n      25, 64, 19, 84, 49, 55, 32, 85, 76, 62, 1,  52, 15, 86, 21, 49, 92, 22,\n      79, 20, 90, 27, 32, 46, 76, 55, 23, 69, 56, 80, 35, 35, 30, 43, 70, 79,\n      73, 12, 60, 20, 22, 80, 83, 72, 66, 56, 41, 68, 4,  8,  94, 97, 41, 76,\n      96, 3,  53, 61, 15, 89, 65, 45, 65, 15, 6,  83, 82, 69, 76, 68, 95, 81,\n      55, 55, 85, 26, 75, 34, 67, 75, 28, 95, 58, 11, 73, 96, 44, 70, 82, 89,\n      72, 40, 17, 89, 51, 87, 69, 85, 45, 59, 2,  53, 82, 87, 24, 33, 41, 53,\n      97, 35, 0,  54, 7,  94, 71, 42, 68, 88, 53, 15, 41, 79, 1,  24, 49, 54,\n      26, 88, 23, 89, 14, 41, 52, 8,  12, 92, 98, 54, 56, 27, 17, 11, 89, 82,\n      34, 81, 78, 15, 63, 18, 17, 18, 40, 85, 41, 57, 68, 21, 7,  34, 44, 97,\n      20, 5,  67, 14, 32, 86, 8,  48, 8,  6,  28, 50, 74, 91, 82, 18, 26, 51,\n      38, 21, 90, 54, 64, 91, 65, 32, 6,  67, 6,  97, 32, 70, 88, 39, 80, 39,\n      86, 13, 72, 81, 6,  93, 10, 67, 41, 32, 32, 8,  60, 95, 94, 11, 63, 45,\n      25, 25, 46, 28, 10, 91, 16, 82, 23, 88, 10, 21, 32, 31, 90, 26, 55, 59,\n      74, 36, 49, 78, 86, 68, 6,  22, 25, 59, 51, 96, 77, 60, 20, 32, 36, 91,\n      56, 52, 85, 42, 26, 30, 17, 31, 5,  18, 74, 42, 75, 45, 31, 40, 81, 65,\n      20, 29, 94, 10, 71, 40, 69, 83, 83, 24, 76, 25, 73, 40, 47, 75, 44, 66,\n      11, 52, 90, 6,  30, 85, 18, 56, 22, 18, 51, 54, 18, 18, 99, 80, 37, 89,\n      83, 8,  83, 74, 18, 48, 39, 3,  45, 47, 70, 59, 14, 15, 94, 84, 39, 62,\n      42, 79, 84, 88, 26, 52, 34, 48, 92, 28, 20, 59, 53, 81, 34, 5,  98, 36,\n      18, 80, 36, 8,  83, 28, 98, 67, 92, 44, 9,  47, 65, 59, 11, 31, 33, 88,\n      77, 2,  20, 22, 0,  24, 12, 45, 88, 11, 38, 75, 43, 99, 30, 71, 66, 47,\n      67, 14, 22, 57, 40, 88, 48, 12, 89, 6,  93, 28, 96, 37, 99, 38, 75, 72,\n      68, 42, 11, 76, 53, 4,  9,  38, 7,  77, 47, 46, 66, 73, 27, 93, 17, 87,\n      9,  72, 77, 78, 1,  74, 97, 54, 87, 44, 43, 64, 70, 34, 62, 82, 74, 48,\n      41, 54, 41, 78, 75, 4,  21, 30, 80, 41, 17, 13, 76, 87, 47, 68, 37, 17,\n      42, 32, 23, 15, 70, 56, 40, 31, 33, 79, 77, 73, 21, 4,  54, 41, 25, 67,\n      18, 6,  26, 42, 36, 44, 33, 87, 94, 22, 41, 79, 15, 16, 5,  84, 29, 30,\n      25, 67, 3,  55, 96, 36, 36, 89, 2,  47, 92, 94, 23, 63, 54, 45, 14, 41,\n      18, 48, 61, 91, 33, 99, 9,  52, 59, 71, 20, 62, 99, 94, 6,  79, 59, 99,\n      94, 3,  9,  16, 53, 74, 55, 43, 44, 62, 89, 2,  17, 97, 47, 99, 87, 31,\n      90, 82, 26, 33, 7,  92, 0,  98, 78, 94, 44, 89, 5,  97, 18, 43, 19, 6,\n      74, 57, 33, 0,  14, 50, 43, 8,  19, 21, 96, 95, 28, 60, 11, 81, 65, 10,\n      20, 51, 45, 45, 54, 16, 22, 26, 35, 30, 79, 51, 16, 91, 25, 40, 25, 75,\n      85, 43, 72, 3,  23, 5,  59, 90, 12, 89, 81, 86, 28, 75, 5,  79, 45, 28,\n      33, 65, 22, 15, 14, 76, 29, 85, 89, 37, 19, 84, 5,  51};\n  int8_t b2048[] = {\n      43, 84, 90, 44, 54, 43, 49, 42, 24, 10, 61, 8,  68, 2,  75, 9,  25, 25,\n      80, 6,  9,  62, 33, 22, 84, 43, 20, 34, 33, 53, 47, 8,  16, 15, 4,  96,\n      3,  73, 75, 61, 75, 68, 37, 6,  25, 48, 40, 0,  67, 89, 98, 92, 37, 72,\n      44, 94, 88, 42, 97, 24, 11, 24, 39, 13, 34, 30, 58, 22, 29, 28, 22, 82,\n      15, 16, 57, 99, 9,  7,  76, 57, 39, 31, 21, 7,  44, 73, 88, 8,  62, 47,\n      45, 65, 11, 78, 82, 89, 72, 18, 9,  24, 59, 75, 17, 0,  70, 1,  62, 52,\n      51, 67, 5,  99, 83, 80, 82, 16, 43, 43, 94, 8,  52, 58, 68, 60, 72, 26,\n      57, 22, 72, 95, 70, 12, 51, 43, 28, 53, 72, 0,  12, 67, 96, 89, 34, 28,\n      9,  96, 5,  82, 19, 52, 28, 8,  8,  45, 60, 34, 66, 60, 54, 41, 87, 13,\n      15, 23, 96, 29, 70, 50, 72, 10, 87, 98, 81, 11, 43, 27, 96, 9,  17, 16,\n      6,  14, 31, 12, 89, 55, 37, 91, 50, 74, 12, 63, 10, 77, 81, 5,  98, 96,\n      22, 9,  3,  48, 96, 1,  36, 87, 54, 40, 91, 51, 35, 38, 56, 78, 84, 4,\n      95, 2,  20, 18, 87, 60, 73, 28, 69, 55, 8,  12, 86, 2,  31, 55, 46, 57,\n      77, 25, 54, 50, 58, 13, 93, 6,  79, 80, 83, 78, 27, 1,  14, 52, 70, 82,\n      87, 81, 82, 63, 86, 24, 37, 12, 66, 22, 63, 93, 21, 11, 86, 92, 22, 47,\n      33, 84, 28, 69, 69, 31, 39, 43, 2,  29, 14, 14, 62, 42, 75, 37, 36, 88,\n      98, 53, 18, 81, 40, 3,  49, 85, 99, 65, 15, 21, 23, 88, 42, 80, 79, 94,\n      46, 2,  46, 91, 80, 4,  13, 90, 3,  52, 23, 65, 30, 1,  37, 86, 71, 64,\n      63, 56, 44, 10, 49, 6,  31, 10, 85, 75, 50, 27, 65, 58, 96, 0,  26, 0,\n      69, 70, 3,  69, 91, 96, 59, 44, 29, 20, 22, 54, 16, 69, 0,  16, 3,  69,\n      64, 68, 55, 9,  71, 62, 38, 84, 6,  27, 21, 50, 42, 1,  27, 14, 49, 16,\n      74, 10, 45, 31, 37, 61, 72, 8,  94, 93, 25, 81, 62, 9,  35, 15, 21, 48,\n      64, 62, 18, 72, 38, 85, 55, 27, 20, 86, 56, 84, 72, 12, 59, 54, 94, 83,\n      21, 25, 34, 11, 82, 32, 59, 90, 97, 81, 29, 18, 38, 16, 5,  53, 96, 85,\n      19, 88, 37, 72, 32, 38, 41, 74, 70, 12, 60, 3,  67, 29, 2,  60, 38, 6,\n      82, 34, 53, 24, 31, 18, 14, 40, 39, 61, 10, 6,  69, 40, 76, 32, 9,  4,\n      47, 65, 13, 45, 60, 35, 59, 53, 67, 88, 74, 71, 3,  32, 97, 4,  77, 55,\n      25, 27, 38, 18, 91, 48, 86, 18, 30, 66, 22, 3,  24, 8,  43, 72, 75, 22,\n      7,  46, 5,  58, 67, 10, 95, 55, 99, 12, 59, 40, 57, 89, 50, 80, 41, 41,\n      36, 28, 35, 87, 66, 94, 9,  11, 24, 19, 94, 51, 3,  34, 21, 44, 33, 71,\n      12, 1,  58, 84, 78, 85, 55, 41, 63, 25, 13, 15, 69, 7,  43, 55, 52, 15,\n      16, 19, 85, 63, 71, 66, 29, 55, 64, 27, 79, 74, 15, 62, 54, 83, 50, 38,\n      54, 2,  40, 29, 94, 65, 32, 50, 41, 72, 5,  68, 15, 8,  4,  50, 74, 37,\n      76, 61, 53, 71, 9,  70, 1,  1,  44, 38, 7,  6,  49, 53, 44, 57, 80, 45,\n      79, 97, 85, 2,  81, 3,  67, 72, 31, 52, 41, 42, 83, 97, 30, 32, 39, 38,\n      71, 32, 17, 96, 12, 34, 52, 64, 25, 20, 60, 2,  53, 66, 1,  38, 10, 75,\n      98, 44, 11, 16, 15, 53, 12, 29, 18, 46, 91, 13, 26, 36, 74, 32, 3,  97,\n      76, 97, 80, 11, 27, 54, 57, 9,  0,  10, 28, 8,  55, 83, 56, 57, 82, 2,\n      70, 42, 2,  64, 84, 97, 1,  34, 2,  7,  42, 54, 20, 55, 39, 77, 79, 58,\n      59, 16, 98, 95, 31, 22, 80, 77, 15, 12, 39, 29, 86, 8,  4,  13, 72, 95,\n      67, 45, 2,  53, 61, 3,  87, 94, 33, 60, 63, 33, 42, 33, 44, 35, 69, 22,\n      96, 69, 73, 33, 28, 0,  79, 23, 54, 23, 80, 87, 99, 32, 56, 0,  51, 40,\n      12, 28, 68, 74, 6,  71, 68, 18, 72, 99, 58, 48, 44, 12, 55, 98, 46, 19,\n      93, 62, 65, 36, 43, 38, 10, 23, 3,  48, 27, 51, 5,  48, 97, 28, 73, 64,\n      43, 77, 10, 52, 36, 5,  1,  44, 18, 20, 58, 21, 30, 14, 12, 35, 66, 90,\n      31, 69, 93, 30, 51, 17, 43, 10, 53, 83, 91, 65, 44, 72, 32, 41, 41, 3,\n      48, 67, 98, 86, 65, 67, 82, 25, 73, 53, 23, 99, 86, 95, 43, 52, 53, 82,\n      65, 79, 59, 64, 69, 89, 71, 13, 60, 28, 61, 97, 88, 39, 31, 65, 90, 40,\n      20, 51, 2,  6,  74, 2,  62, 97, 21, 6,  25, 23, 42, 72, 24, 96, 72, 84,\n      55, 29, 32, 55, 98, 79, 16, 52, 69, 85, 74, 19, 26, 25, 6,  47, 88, 90,\n      40, 63, 58, 45, 64, 59, 65, 83, 27, 62, 15, 65, 23, 68, 23, 95, 13, 35,\n      6,  93, 97, 91, 37, 37, 7,  86, 98, 81, 34, 61, 44, 4,  85, 87, 74, 54,\n      80, 45, 68, 19, 48, 27, 73, 78, 76, 90, 75, 93, 4,  32, 36, 87, 19, 71,\n      47, 37, 83, 83, 99, 58, 83, 2,  34, 25, 18, 25, 74, 8,  12, 96, 83, 93,\n      36, 96, 4,  82, 9,  57, 70, 36, 96, 73, 88, 72, 69, 80, 10, 12, 20, 11,\n      33, 97, 79, 52, 83, 56, 71, 59, 20, 70, 50, 63, 79, 60, 15, 97, 72, 47,\n      53, 60, 89, 53, 98, 24, 86, 40, 74, 9,  39, 27, 15, 59, 11, 84, 41, 68,\n      91, 13, 27, 40, 52, 89, 29, 52, 32, 37, 33, 48, 44, 10, 62, 18, 87, 53,\n      56, 84, 95, 57, 38, 73, 75, 58, 66, 93, 65, 81, 45, 66, 54, 73, 27, 72,\n      46, 46, 19, 46, 53, 53, 5,  77, 88, 3,  19, 99, 67, 16, 89, 93, 68, 37,\n      29, 94, 69, 3,  29, 8,  76, 25, 0,  28, 24, 71, 25, 90, 87, 97, 32, 80,\n      23, 90, 86, 30, 80, 40, 80, 46, 17, 66, 97, 4,  36, 2,  31, 14, 75, 15,\n      34, 84, 56, 76, 61, 15, 93, 87, 52, 69, 26, 2,  18, 39, 60, 37, 31, 79,\n      27, 84, 36, 53, 76, 62, 71, 62, 74, 51, 59, 26, 70, 94, 56, 89, 72, 3,\n      26, 27, 66, 49, 16, 13, 81, 44, 85, 7,  54, 6,  14, 35, 60, 84, 48, 24,\n      11, 29, 57, 15, 0,  76, 23, 72, 11, 50, 69, 90, 20, 5,  32, 64, 4,  23,\n      82, 33, 94, 69, 28, 99, 80, 85, 27, 89, 8,  45, 37, 34, 57, 87, 37, 57,\n      73, 17, 56, 45, 25, 1,  67, 67, 67, 56, 81, 20, 23, 25, 37, 93, 93, 13,\n      5,  58, 62, 93, 16, 61, 69, 43, 52, 66, 59, 20, 65, 89, 84, 67, 98, 98,\n      10, 21, 10, 27, 83, 39, 69, 6,  49, 88, 95, 83, 61, 87, 78, 38, 67, 43,\n      45, 61, 69, 71, 4,  45, 49, 78, 51, 30, 84, 4,  47, 18, 71, 73, 32, 73,\n      24, 56, 76, 82, 99, 40, 39, 42, 71, 24, 57, 83, 31, 68, 6,  38, 38, 2,\n      46, 46, 90, 61, 89, 30, 15, 5,  76, 24, 70, 35, 90, 45, 45, 91, 47, 73,\n      34, 30, 53, 64, 61, 94, 96, 58, 84, 37, 32, 19, 34, 12, 96, 75, 28, 86,\n      66, 91, 55, 93, 93, 6,  69, 51, 44, 92, 40, 85, 22, 1,  42, 10, 38, 86,\n      52, 28, 19, 7,  75, 5,  47, 28, 52, 76, 50, 27, 56, 59, 95, 85, 89, 63,\n      62, 73, 56, 52, 89, 5,  8,  70, 28, 62, 36, 21, 15, 6,  19, 10, 19, 38,\n      4,  61, 27, 87, 71, 54, 34, 5,  27, 48, 8,  26, 48, 29, 4,  76, 52, 29,\n      21, 36, 34, 87, 11, 97, 78, 0,  34, 46, 93, 51, 77, 14, 47, 86, 2,  92,\n      84, 92, 15, 57, 67, 12, 37, 5,  74, 49, 59, 13, 88, 0,  59, 29, 86, 91,\n      19, 20, 2,  19, 4,  51, 68, 5,  77, 26, 36, 88, 73, 13, 68, 5,  77, 18,\n      25, 13, 25, 47, 66, 69, 75, 45, 51, 35, 2,  61, 95, 60, 86, 97, 17, 74,\n      19, 52, 43, 76, 26, 51, 38, 27, 13, 81, 53, 3,  87, 2,  99, 36, 7,  72,\n      44, 42, 10, 8,  78, 87, 20, 75, 9,  36, 25, 0,  56, 37, 20, 13, 41, 80,\n      69, 76, 39, 47, 61, 28, 87, 81, 30, 11, 4,  62, 66, 3,  77, 7,  0,  95,\n      52, 81, 42, 84, 47, 78, 55, 25, 55, 13, 63, 32, 16, 68, 8,  35, 1,  30,\n      66, 75, 79, 63, 71, 63, 65, 70, 92, 74, 68, 92, 61, 97, 36, 86, 61, 3,\n      85, 13, 97, 69, 56, 58, 22, 71, 70, 86, 61, 33, 79, 91, 21, 72, 80, 65,\n      27, 14, 82, 82, 20, 87, 47, 4,  38, 49, 89, 63, 10, 45, 48, 96, 8,  78,\n      95, 67, 3,  6,  2,  64, 44, 89, 13, 31, 18, 83, 95, 92, 11, 80, 35, 87,\n      14, 14, 58, 22, 86, 16, 98, 7,  26, 67, 27, 91, 96, 56, 28, 19, 17, 81,\n      4,  56, 23, 19, 17, 77, 54, 93, 64, 27, 21, 40, 31, 24, 24, 55, 28, 73,\n      13, 33, 76, 47, 38, 48, 66, 95, 72, 84, 23, 77, 65, 5,  28, 55, 32, 0,\n      14, 47, 57, 33, 36, 26, 59, 98, 85, 2,  49, 29, 40, 44, 84, 24, 23, 88,\n      66, 91, 4,  0,  4,  99, 40, 94, 55, 19, 13, 22, 96, 37, 89, 94, 78, 50,\n      0,  37, 48, 79, 69, 16, 15, 57, 91, 52, 85, 92, 18, 38, 56, 55, 11, 10,\n      27, 48, 98, 53, 83, 27, 14, 25, 53, 64, 71, 67, 26, 47, 53, 30, 76, 76,\n      67, 83, 9,  20, 4,  61, 69, 10, 93, 63, 37, 22, 26, 64, 10, 75, 39, 86,\n      34, 44, 42, 44, 4,  42, 37, 85, 3,  95, 49, 43, 84, 44, 73, 7,  59, 33,\n      21, 46, 86, 88, 17, 88, 83, 32, 53, 6,  83, 85, 54, 32, 92, 45, 13, 20,\n      49, 42, 7,  54, 76, 62, 58, 13, 99, 43, 94, 60, 43, 94, 58, 35, 69, 84,\n      23, 57, 22, 81, 97, 97, 49, 91, 76, 65, 71, 82, 72, 39, 53, 92, 58, 77,\n      20, 39, 20, 48, 46, 52, 20, 9,  85, 9,  48, 89, 24, 65, 73, 81, 73, 10,\n      1,  25, 89, 83, 48, 38, 56, 82, 68, 27, 35, 87, 68, 32, 89, 23, 90, 5,\n      99, 19, 55, 97, 83, 41, 34, 29, 69, 58, 8,  2,  90, 54, 66, 66, 37, 27,\n      86, 46, 48, 50, 63, 76, 96, 41, 36, 9,  38, 31, 46, 58, 17, 53, 53, 81,\n      79, 94, 95, 98, 96, 40, 43, 63, 2,  5,  26, 22, 10, 21, 43, 30, 30, 29,\n      80, 49, 51, 74, 41, 64, 86, 50, 23, 81, 48, 41, 48, 98, 55, 38, 61, 40,\n      52, 79, 99, 17, 71, 78, 62, 40, 5,  15, 26, 47, 75, 67, 17, 46, 93, 90,\n      2,  81, 78, 22, 12, 74, 7,  7,  36, 48, 13, 41, 30, 68, 86, 50, 28, 72,\n      40, 45, 82, 92, 38, 95, 68, 48, 42, 23, 4,  40, 82, 9,  59, 81, 58, 33,\n      68, 12, 60, 71, 91, 47, 49, 21, 55, 1,  77, 57, 53, 4,  67, 4,  13, 29,\n      76, 28, 70, 29, 20, 25, 81, 1,  57, 26, 74, 79, 95, 63, 83, 3,  28, 31,\n      49, 30, 87, 84, 29, 60, 47, 49, 45, 16, 37, 68, 13, 19};\n\n  EXPECT_FLOAT_EQ(1844.638672f,\n                  EuclideanDistance(*FixedVector<int8_t, 2048>::Cast(a2048),\n                                    *FixedVector<int8_t, 2048>::Cast(b2048)));\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_General) {\n  int8_t a8[] = {127, 0, 1, 2, -127, -127, -127, -127};\n  int8_t b8[] = {-127, -127, -127, -127, 1, 2, 1, 127};\n  int8_t a16[] = {127, 127, 16,   3,   100,  -127, 1,    2,\n                  3,   4,   -127, 100, -127, -127, -127, -127};\n  int8_t b16[] = {-127, 123, -127, -127, -127, -127, 127, 127,\n                  1,    2,   3,    4,    127,  127,  121, 16};\n  int8_t a32[] = {127, 127,  0,    0,   -127, -127, 0,    0,    0,    0, 0,\n                  0,   -127, -127, 127, 127,  0,    0,    -127, -127, 0, 0,\n                  127, 127,  127,  127, 0,    0,    -127, -127, 0,    0};\n  int8_t b32[] = {-127, -127, 0,    0,    127,  127, 0,   0,   0,   0, 0,\n                  0,    127,  127,  -127, -127, 0,   0,   127, 127, 0, 0,\n                  -127, -127, -127, -127, 0,    0,   127, 127, 0,   0};\n\n  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,\n                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,\n                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,\n                  0,   0, -127, 112,  -127, -127, -127, -127, 127,  127,\n                  1,   2, 3,    4,    127,  127,  120};\n  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,\n                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,\n                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,\n                  0,    0, 127,  127, 80,   111,  122, -127, 1,   2,\n                  3,    4, -127, 112, -127, -127, -127};\n\n  EXPECT_FLOAT_EQ(227595.0f,\n                  SquaredEuclideanDistance(*FixedVector<int8_t, 8>::Cast(a8),\n                                           *FixedVector<int8_t, 8>::Cast(b8)));\n  EXPECT_FLOAT_EQ(\n      422020.0f, SquaredEuclideanDistance(*FixedVector<int8_t, 16>::Cast(a16),\n                                          *FixedVector<int8_t, 16>::Cast(b16)));\n  EXPECT_FLOAT_EQ(1032256.0f, SquaredEuclideanDistance(\n                                  *FixedVector<int8_t, 32>::Cast(a32),\n                                  *FixedVector<int8_t, 32>::Cast(b32)));\n  EXPECT_FLOAT_EQ(1379578.0f, SquaredEuclideanDistance(\n                                  *FixedVector<int8_t, 47>::Cast(a47),\n                                  *FixedVector<int8_t, 47>::Cast(b47)));\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  SquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestEuclideanMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  EuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_1x1) {\n  TestSquaredEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x1) {\n  TestSquaredEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_2x2) {\n  TestSquaredEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_3x3) {\n  TestSquaredEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x1) {\n  TestSquaredEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x2) {\n  TestSquaredEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_4x4) {\n  TestSquaredEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x1) {\n  TestSquaredEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x2) {\n  TestSquaredEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x4) {\n  TestSquaredEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_8x8) {\n  TestSquaredEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x1) {\n  TestSquaredEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x2) {\n  TestSquaredEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x4) {\n  TestSquaredEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x8) {\n  TestSquaredEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_16x16) {\n  TestSquaredEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x1) {\n  TestSquaredEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x2) {\n  TestSquaredEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x4) {\n  TestSquaredEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x8) {\n  TestSquaredEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x16) {\n  TestSquaredEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_32x32) {\n  TestSquaredEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x1) {\n  TestSquaredEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x2) {\n  TestSquaredEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x4) {\n  TestSquaredEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x8) {\n  TestSquaredEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x16) {\n  TestSquaredEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x32) {\n  TestSquaredEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_64x64) {\n  TestSquaredEuclideanMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x1) {\n  TestSquaredEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x2) {\n  TestSquaredEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x4) {\n  TestSquaredEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x8) {\n  TestSquaredEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x16) {\n  TestSquaredEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x32) {\n  TestSquaredEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x64) {\n  TestSquaredEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, SquaredEuclidean_128x128) {\n  TestSquaredEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, Euclidean_1x1) {\n  TestEuclideanMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x1) {\n  TestEuclideanMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_2x2) {\n  TestEuclideanMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_3x3) {\n  TestEuclideanMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x1) {\n  TestEuclideanMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x2) {\n  TestEuclideanMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_4x4) {\n  TestEuclideanMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x1) {\n  TestEuclideanMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x2) {\n  TestEuclideanMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x4) {\n  TestEuclideanMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_8x8) {\n  TestEuclideanMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x1) {\n  TestEuclideanMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x2) {\n  TestEuclideanMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x4) {\n  TestEuclideanMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x8) {\n  TestEuclideanMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_16x16) {\n  TestEuclideanMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x1) {\n  TestEuclideanMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x2) {\n  TestEuclideanMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x4) {\n  TestEuclideanMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x8) {\n  TestEuclideanMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x16) {\n  TestEuclideanMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_32x32) {\n  TestEuclideanMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x1) {\n  TestEuclideanMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x2) {\n  TestEuclideanMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x4) {\n  TestEuclideanMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x8) {\n  TestEuclideanMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x16) {\n  TestEuclideanMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x32) {\n  TestEuclideanMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_64x64) {\n  TestEuclideanMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x1) {\n  TestEuclideanMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x2) {\n  TestEuclideanMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x4) {\n  TestEuclideanMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x8) {\n  TestEuclideanMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x16) {\n  TestEuclideanMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x32) {\n  TestEuclideanMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x64) {\n  TestEuclideanMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, Euclidean_128x128) {\n  TestEuclideanMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid EuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      EuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    EuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Euclidean (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid SquaredEuclideanBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched SquaredEuclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      SquaredEuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched SquaredEuclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    SquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched SquaredEuclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched SquaredEuclidean (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {\n  EuclideanBenchmark<2, 1, 512, 128>();\n  EuclideanBenchmark<2, 2, 512, 128>();\n  EuclideanBenchmark<4, 1, 512, 128>();\n  EuclideanBenchmark<4, 2, 512, 128>();\n  EuclideanBenchmark<4, 4, 512, 128>();\n  EuclideanBenchmark<8, 1, 512, 128>();\n  EuclideanBenchmark<8, 2, 512, 128>();\n  EuclideanBenchmark<8, 4, 512, 128>();\n  EuclideanBenchmark<8, 8, 512, 128>();\n  EuclideanBenchmark<16, 1, 512, 128>();\n  EuclideanBenchmark<16, 2, 512, 128>();\n  EuclideanBenchmark<16, 4, 512, 128>();\n  EuclideanBenchmark<16, 8, 512, 128>();\n  EuclideanBenchmark<16, 16, 512, 128>();\n  EuclideanBenchmark<32, 1, 512, 128>();\n  EuclideanBenchmark<32, 2, 512, 128>();\n  EuclideanBenchmark<32, 4, 512, 128>();\n  EuclideanBenchmark<32, 8, 512, 128>();\n  EuclideanBenchmark<32, 16, 512, 128>();\n  EuclideanBenchmark<32, 32, 512, 128>();\n  EuclideanBenchmark<64, 1, 512, 128>();\n  EuclideanBenchmark<64, 2, 512, 128>();\n  EuclideanBenchmark<64, 4, 512, 128>();\n  EuclideanBenchmark<64, 8, 512, 128>();\n  EuclideanBenchmark<128, 1, 512, 128>();\n}\n\nTEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {\n  SquaredEuclideanBenchmark<2, 1, 512, 128>();\n  SquaredEuclideanBenchmark<2, 2, 512, 128>();\n  SquaredEuclideanBenchmark<4, 1, 512, 128>();\n  SquaredEuclideanBenchmark<4, 2, 512, 128>();\n  SquaredEuclideanBenchmark<4, 4, 512, 128>();\n  SquaredEuclideanBenchmark<8, 1, 512, 128>();\n  SquaredEuclideanBenchmark<8, 2, 512, 128>();\n  SquaredEuclideanBenchmark<8, 4, 512, 128>();\n  SquaredEuclideanBenchmark<8, 8, 512, 128>();\n  SquaredEuclideanBenchmark<16, 1, 512, 128>();\n  SquaredEuclideanBenchmark<16, 2, 512, 128>();\n  SquaredEuclideanBenchmark<16, 4, 512, 128>();\n  SquaredEuclideanBenchmark<16, 8, 512, 128>();\n  SquaredEuclideanBenchmark<16, 16, 512, 128>();\n  SquaredEuclideanBenchmark<32, 1, 512, 128>();\n  SquaredEuclideanBenchmark<32, 2, 512, 128>();\n  SquaredEuclideanBenchmark<32, 4, 512, 128>();\n  SquaredEuclideanBenchmark<32, 8, 512, 128>();\n  SquaredEuclideanBenchmark<32, 16, 512, 128>();\n  SquaredEuclideanBenchmark<32, 32, 512, 128>();\n  SquaredEuclideanBenchmark<64, 1, 512, 128>();\n  SquaredEuclideanBenchmark<64, 2, 512, 128>();\n  SquaredEuclideanBenchmark<64, 4, 512, 128>();\n  SquaredEuclideanBenchmark<64, 8, 512, 128>();\n  SquaredEuclideanBenchmark<128, 1, 512, 128>();\n}\n"
  },
  {
    "path": "tests/ailego/math/hamming_distance_matrix_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <bitset>\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\nstatic inline void MatrixTranspose(uint64_t *dst, const uint64_t *src, size_t M,\n                                   size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\nTEST(DistanceMatrix, Hamming_General) {\n  srand((uint32_t)time(NULL));\n  srand((uint32_t)rand());\n\n  FixedBitset<63936> bitset1;\n  FixedBitset<63936> bitset2;\n  std::bitset<63936> stl_bitset1;\n  std::bitset<63936> stl_bitset2;\n\n  for (uint32_t i = 0; i < 1333; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.set(val1);\n    stl_bitset1.set(val1);\n\n    bitset2.set(val2);\n    stl_bitset2.set(val2);\n  }\n  for (uint32_t i = 0; i < 1666; ++i) {\n    uint32_t val1 = (uint32_t)(rand() % bitset1.size());\n    uint32_t val2 = (uint32_t)(rand() % bitset2.size());\n\n    bitset1.flip(val1);\n    stl_bitset1.flip(val1);\n\n    bitset2.flip(val2);\n    stl_bitset2.flip(val2);\n  }\n\n  float result0 = (float)(stl_bitset1 ^ stl_bitset2).count();\n  float result1 = Distance::Hamming(bitset1.data(), bitset2.data(),\n                                    bitset1.size() / 32 * 32);\n  float result2 = Distance::Hamming((const uint64_t *)bitset1.data(),\n                                    (const uint64_t *)bitset2.data(),\n                                    bitset1.size() / 64 * 64);\n  EXPECT_FLOAT_EQ(result0, result1);\n  EXPECT_FLOAT_EQ(result0, result2);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestHamming32Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t count = (std::uniform_int_distribution<size_t>(1, 8192))(gen);\n  size_t matrix_size = batch_size * count;\n  size_t query_matrix_size = query_size * count;\n\n  std::vector<uint32_t> matrix1(matrix_size);\n  std::vector<uint32_t> matrix2(matrix_size);\n  std::vector<uint32_t> query1(query_matrix_size);\n  std::vector<uint32_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<uint32_t> dist(0, 0xfffffffful);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), count, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint32_t *cur_query = &query1[i * count];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      HammingDistanceMatrix<uint32_t, 1, 1>::Compute(\n          &matrix1[j * count], cur_query, count * 32, &query_result[j]);\n    }\n  }\n  HammingDistanceMatrix<uint32_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], count * 32, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestHammingSquareRoot32Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t count = (std::uniform_int_distribution<size_t>(1, 8192))(gen);\n  size_t matrix_size = batch_size * count;\n  size_t query_matrix_size = query_size * count;\n\n  std::vector<uint32_t> matrix1(matrix_size);\n  std::vector<uint32_t> matrix2(matrix_size);\n  std::vector<uint32_t> query1(query_matrix_size);\n  std::vector<uint32_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<uint32_t> dist(0, 0xfffffffful);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), count, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint32_t *cur_query = &query1[i * count];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute(\n          &matrix1[j * count], cur_query, count * 32, &query_result[j]);\n    }\n  }\n  HammingSquareRootDistanceMatrix<uint32_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], count * 32, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(DistanceMatrix, Hamming32_1x1) {\n  TestHamming32Matrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_2x1) {\n  TestHamming32Matrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_2x2) {\n  TestHamming32Matrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Hamming32_3x3) {\n  TestHamming32Matrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Hamming32_4x1) {\n  TestHamming32Matrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_4x2) {\n  TestHamming32Matrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Hamming32_4x4) {\n  TestHamming32Matrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Hamming32_8x1) {\n  TestHamming32Matrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_8x2) {\n  TestHamming32Matrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Hamming32_8x4) {\n  TestHamming32Matrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Hamming32_8x8) {\n  TestHamming32Matrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Hamming32_16x1) {\n  TestHamming32Matrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_16x2) {\n  TestHamming32Matrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Hamming32_16x4) {\n  TestHamming32Matrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Hamming32_16x8) {\n  TestHamming32Matrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Hamming32_16x16) {\n  TestHamming32Matrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Hamming32_32x1) {\n  TestHamming32Matrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_32x2) {\n  TestHamming32Matrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Hamming32_32x4) {\n  TestHamming32Matrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Hamming32_32x8) {\n  TestHamming32Matrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Hamming32_32x16) {\n  TestHamming32Matrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Hamming32_32x32) {\n  TestHamming32Matrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Hamming32_64x1) {\n  TestHamming32Matrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_64x2) {\n  TestHamming32Matrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Hamming32_64x4) {\n  TestHamming32Matrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Hamming32_64x8) {\n  TestHamming32Matrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Hamming32_64x16) {\n  TestHamming32Matrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Hamming32_64x32) {\n  TestHamming32Matrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Hamming32_64x64) {\n  TestHamming32Matrix<64, 64>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x1) {\n  TestHamming32Matrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x2) {\n  TestHamming32Matrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x4) {\n  TestHamming32Matrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x8) {\n  TestHamming32Matrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x16) {\n  TestHamming32Matrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x32) {\n  TestHamming32Matrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x64) {\n  TestHamming32Matrix<128, 64>();\n}\n\nTEST(DistanceMatrix, Hamming32_128x128) {\n  TestHamming32Matrix<128, 128>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_1x1) {\n  TestHammingSquareRoot32Matrix<1, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_2x1) {\n  TestHammingSquareRoot32Matrix<2, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_2x2) {\n  TestHammingSquareRoot32Matrix<2, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_3x3) {\n  TestHammingSquareRoot32Matrix<3, 3>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_4x1) {\n  TestHammingSquareRoot32Matrix<4, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_4x2) {\n  TestHammingSquareRoot32Matrix<4, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_4x4) {\n  TestHammingSquareRoot32Matrix<4, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_8x1) {\n  TestHammingSquareRoot32Matrix<8, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_8x2) {\n  TestHammingSquareRoot32Matrix<8, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_8x4) {\n  TestHammingSquareRoot32Matrix<8, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_8x8) {\n  TestHammingSquareRoot32Matrix<8, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_16x1) {\n  TestHammingSquareRoot32Matrix<16, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_16x2) {\n  TestHammingSquareRoot32Matrix<16, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_16x4) {\n  TestHammingSquareRoot32Matrix<16, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_16x8) {\n  TestHammingSquareRoot32Matrix<16, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_16x16) {\n  TestHammingSquareRoot32Matrix<16, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_32x1) {\n  TestHammingSquareRoot32Matrix<32, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_32x2) {\n  TestHammingSquareRoot32Matrix<32, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_32x4) {\n  TestHammingSquareRoot32Matrix<32, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_32x8) {\n  TestHammingSquareRoot32Matrix<32, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_32x16) {\n  TestHammingSquareRoot32Matrix<32, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_32x32) {\n  TestHammingSquareRoot32Matrix<32, 32>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_64x1) {\n  TestHammingSquareRoot32Matrix<64, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_64x2) {\n  TestHammingSquareRoot32Matrix<64, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_64x4) {\n  TestHammingSquareRoot32Matrix<64, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_64x8) {\n  TestHammingSquareRoot32Matrix<64, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_64x16) {\n  TestHammingSquareRoot32Matrix<64, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_64x32) {\n  TestHammingSquareRoot32Matrix<64, 32>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_64x64) {\n  TestHammingSquareRoot32Matrix<64, 64>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x1) {\n  TestHammingSquareRoot32Matrix<128, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x2) {\n  TestHammingSquareRoot32Matrix<128, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x4) {\n  TestHammingSquareRoot32Matrix<128, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x8) {\n  TestHammingSquareRoot32Matrix<128, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x16) {\n  TestHammingSquareRoot32Matrix<128, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x32) {\n  TestHammingSquareRoot32Matrix<128, 32>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x64) {\n  TestHammingSquareRoot32Matrix<128, 64>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot32_128x128) {\n  TestHammingSquareRoot32Matrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid Hamming32Benchmark(void) {\n  const size_t count = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * count;\n  const size_t query_matrix_size = count * query_size;\n\n  std::vector<uint32_t> matrix1(matrix_size);\n  std::vector<uint32_t> matrix2(matrix_size);\n  std::vector<uint32_t> query1(query_matrix_size);\n  std::vector<uint32_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<uint32_t> dist(0, 0xfffffffful);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * count;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], count,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), count, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") UINT32 \" << count << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Hamming\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint32_t *matrix_batch = &matrix2[i * batch_size * count];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint32_t *current_query = &query1[j * count];\n      float *current_results = &results[j * batch_size];\n\n      HammingDistanceMatrix<uint32_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, count * 32, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Hamming (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Hamming\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint32_t *matrix_batch = &matrix2[i * batch_size * count];\n\n    HammingDistanceMatrix<uint32_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], count * 32, results.data());\n  }\n  std::cout << \"* N Batched Hamming (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Hamming\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint32_t *matrix_batch = &matrix1[i * batch_size * count];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint32_t *current_query = &query1[j * count];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        HammingDistanceMatrix<uint32_t, 1, 1>::Compute(\n            &matrix_batch[k * count], current_query, count * 32,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Hamming (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Hamming32_Benchmark) {\n  Hamming32Benchmark<2, 1, 512, 64>();\n  Hamming32Benchmark<2, 2, 512, 64>();\n  Hamming32Benchmark<4, 1, 2048, 16>();\n  Hamming32Benchmark<4, 2, 2048, 16>();\n  Hamming32Benchmark<4, 4, 2048, 16>();\n  Hamming32Benchmark<8, 1, 512, 64>();\n  Hamming32Benchmark<8, 2, 512, 64>();\n  Hamming32Benchmark<8, 4, 512, 64>();\n  Hamming32Benchmark<8, 8, 512, 64>();\n  Hamming32Benchmark<16, 1, 512, 64>();\n  Hamming32Benchmark<16, 2, 512, 64>();\n  Hamming32Benchmark<16, 4, 512, 64>();\n  Hamming32Benchmark<16, 8, 512, 64>();\n  Hamming32Benchmark<16, 16, 512, 64>();\n  Hamming32Benchmark<32, 1, 512, 64>();\n  Hamming32Benchmark<32, 2, 512, 64>();\n  Hamming32Benchmark<32, 4, 512, 64>();\n  Hamming32Benchmark<32, 8, 512, 64>();\n  Hamming32Benchmark<32, 16, 512, 64>();\n  Hamming32Benchmark<32, 32, 512, 64>();\n  Hamming32Benchmark<64, 1, 512, 64>();\n  Hamming32Benchmark<64, 2, 512, 64>();\n  Hamming32Benchmark<64, 4, 512, 64>();\n  Hamming32Benchmark<64, 8, 512, 64>();\n  Hamming32Benchmark<128, 1, 512, 64>();\n}\n\n#if defined(AILEGO_M64)\ntemplate <size_t M, size_t N>\nvoid TestHamming64Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t count = (std::uniform_int_distribution<size_t>(1, 512))(gen);\n  size_t matrix_size = batch_size * count;\n  size_t query_matrix_size = query_size * count;\n\n  std::vector<uint64_t> matrix1(matrix_size);\n  std::vector<uint64_t> matrix2(matrix_size);\n  std::vector<uint64_t> query1(query_matrix_size);\n  std::vector<uint64_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<uint64_t> dist(0, 0x7fffffffffffffffull);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), count, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint64_t *cur_query = &query1[i * count];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      HammingDistanceMatrix<uint64_t, 1, 1>::Compute(\n          &matrix1[j * count], cur_query, count * 64, &query_result[j]);\n    }\n  }\n  HammingDistanceMatrix<uint64_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], count * 64, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestHammingSquareRoot64Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t count = (std::uniform_int_distribution<size_t>(1, 512))(gen);\n  size_t matrix_size = batch_size * count;\n  size_t query_matrix_size = query_size * count;\n\n  std::vector<uint64_t> matrix1(matrix_size);\n  std::vector<uint64_t> matrix2(matrix_size);\n  std::vector<uint64_t> query1(query_matrix_size);\n  std::vector<uint64_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<uint64_t> dist(0, 0x7fffffffffffffffull);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), count, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint64_t *cur_query = &query1[i * count];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute(\n          &matrix1[j * count], cur_query, count * 64, &query_result[j]);\n    }\n  }\n  HammingSquareRootDistanceMatrix<uint64_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], count * 64, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(DistanceMatrix, Hamming64_1x1) {\n  TestHamming64Matrix<1, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_2x1) {\n  TestHamming64Matrix<2, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_2x2) {\n  TestHamming64Matrix<2, 2>();\n}\n\nTEST(DistanceMatrix, Hamming64_3x3) {\n  TestHamming64Matrix<3, 3>();\n}\n\nTEST(DistanceMatrix, Hamming64_4x1) {\n  TestHamming64Matrix<4, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_4x2) {\n  TestHamming64Matrix<4, 2>();\n}\n\nTEST(DistanceMatrix, Hamming64_4x4) {\n  TestHamming64Matrix<4, 4>();\n}\n\nTEST(DistanceMatrix, Hamming64_8x1) {\n  TestHamming64Matrix<8, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_8x2) {\n  TestHamming64Matrix<8, 2>();\n}\n\nTEST(DistanceMatrix, Hamming64_8x4) {\n  TestHamming64Matrix<8, 4>();\n}\n\nTEST(DistanceMatrix, Hamming64_8x8) {\n  TestHamming64Matrix<8, 8>();\n}\n\nTEST(DistanceMatrix, Hamming64_16x1) {\n  TestHamming64Matrix<16, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_16x2) {\n  TestHamming64Matrix<16, 2>();\n}\n\nTEST(DistanceMatrix, Hamming64_16x4) {\n  TestHamming64Matrix<16, 4>();\n}\n\nTEST(DistanceMatrix, Hamming64_16x8) {\n  TestHamming64Matrix<16, 8>();\n}\n\nTEST(DistanceMatrix, Hamming64_16x16) {\n  TestHamming64Matrix<16, 16>();\n}\n\nTEST(DistanceMatrix, Hamming64_32x1) {\n  TestHamming64Matrix<32, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_32x2) {\n  TestHamming64Matrix<32, 2>();\n}\n\nTEST(DistanceMatrix, Hamming64_32x4) {\n  TestHamming64Matrix<32, 4>();\n}\n\nTEST(DistanceMatrix, Hamming64_32x8) {\n  TestHamming64Matrix<32, 8>();\n}\n\nTEST(DistanceMatrix, Hamming64_32x16) {\n  TestHamming64Matrix<32, 16>();\n}\n\nTEST(DistanceMatrix, Hamming64_32x32) {\n  TestHamming64Matrix<32, 32>();\n}\n\nTEST(DistanceMatrix, Hamming64_64x1) {\n  TestHamming64Matrix<64, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_64x2) {\n  TestHamming64Matrix<64, 2>();\n}\n\nTEST(DistanceMatrix, Hamming64_64x4) {\n  TestHamming64Matrix<64, 4>();\n}\n\nTEST(DistanceMatrix, Hamming64_64x8) {\n  TestHamming64Matrix<64, 8>();\n}\n\nTEST(DistanceMatrix, Hamming64_64x16) {\n  TestHamming64Matrix<64, 16>();\n}\n\nTEST(DistanceMatrix, Hamming64_64x32) {\n  TestHamming64Matrix<64, 32>();\n}\n\nTEST(DistanceMatrix, Hamming64_64x64) {\n  TestHamming64Matrix<64, 64>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x1) {\n  TestHamming64Matrix<128, 1>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x2) {\n  TestHamming64Matrix<128, 2>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x4) {\n  TestHamming64Matrix<128, 4>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x8) {\n  TestHamming64Matrix<128, 8>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x16) {\n  TestHamming64Matrix<128, 16>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x32) {\n  TestHamming64Matrix<128, 32>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x64) {\n  TestHamming64Matrix<128, 64>();\n}\n\nTEST(DistanceMatrix, Hamming64_128x128) {\n  TestHamming64Matrix<128, 128>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_1x1) {\n  TestHammingSquareRoot64Matrix<1, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_2x1) {\n  TestHammingSquareRoot64Matrix<2, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_2x2) {\n  TestHammingSquareRoot64Matrix<2, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_3x3) {\n  TestHammingSquareRoot64Matrix<3, 3>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_4x1) {\n  TestHammingSquareRoot64Matrix<4, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_4x2) {\n  TestHammingSquareRoot64Matrix<4, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_4x4) {\n  TestHammingSquareRoot64Matrix<4, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_8x1) {\n  TestHammingSquareRoot64Matrix<8, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_8x2) {\n  TestHammingSquareRoot64Matrix<8, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_8x4) {\n  TestHammingSquareRoot64Matrix<8, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_8x8) {\n  TestHammingSquareRoot64Matrix<8, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_16x1) {\n  TestHammingSquareRoot64Matrix<16, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_16x2) {\n  TestHammingSquareRoot64Matrix<16, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_16x4) {\n  TestHammingSquareRoot64Matrix<16, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_16x8) {\n  TestHammingSquareRoot64Matrix<16, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_16x16) {\n  TestHammingSquareRoot64Matrix<16, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_32x1) {\n  TestHammingSquareRoot64Matrix<32, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_32x2) {\n  TestHammingSquareRoot64Matrix<32, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_32x4) {\n  TestHammingSquareRoot64Matrix<32, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_32x8) {\n  TestHammingSquareRoot64Matrix<32, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_32x16) {\n  TestHammingSquareRoot64Matrix<32, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_32x32) {\n  TestHammingSquareRoot64Matrix<32, 32>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_64x1) {\n  TestHammingSquareRoot64Matrix<64, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_64x2) {\n  TestHammingSquareRoot64Matrix<64, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_64x4) {\n  TestHammingSquareRoot64Matrix<64, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_64x8) {\n  TestHammingSquareRoot64Matrix<64, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_64x16) {\n  TestHammingSquareRoot64Matrix<64, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_64x32) {\n  TestHammingSquareRoot64Matrix<64, 32>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_64x64) {\n  TestHammingSquareRoot64Matrix<64, 64>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x1) {\n  TestHammingSquareRoot64Matrix<128, 1>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x2) {\n  TestHammingSquareRoot64Matrix<128, 2>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x4) {\n  TestHammingSquareRoot64Matrix<128, 4>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x8) {\n  TestHammingSquareRoot64Matrix<128, 8>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x16) {\n  TestHammingSquareRoot64Matrix<128, 16>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x32) {\n  TestHammingSquareRoot64Matrix<128, 32>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x64) {\n  TestHammingSquareRoot64Matrix<128, 64>();\n}\n\nTEST(DistanceMatrix, HammingSquareRoot64_128x128) {\n  TestHammingSquareRoot64Matrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid Hamming64Benchmark(void) {\n  const size_t count = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * count;\n  const size_t query_matrix_size = count * query_size;\n\n  std::vector<uint64_t> matrix1(matrix_size);\n  std::vector<uint64_t> matrix2(matrix_size);\n  std::vector<uint64_t> query1(query_matrix_size);\n  std::vector<uint64_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<uint32_t> dist(0, 0x7ffffffful);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * count;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], count,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), count, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") UINT64 \" << count << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Hamming\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint64_t *matrix_batch = &matrix2[i * batch_size * count];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint64_t *current_query = &query1[j * count];\n      float *current_results = &results[j * batch_size];\n\n      HammingDistanceMatrix<uint64_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, count * 64, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched Hamming (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // N Batched Hamming\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint64_t *matrix_batch = &matrix2[i * batch_size * count];\n\n    HammingDistanceMatrix<uint64_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], count * 64, results.data());\n  }\n  std::cout << \"* N Batched Hamming (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Hamming\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint64_t *matrix_batch = &matrix1[i * batch_size * count];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint64_t *current_query = &query1[j * count];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        HammingDistanceMatrix<uint64_t, 1, 1>::Compute(\n            &matrix_batch[k * count], current_query, count * 64,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched Hamming (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_Hamming64_Benchmark) {\n  Hamming64Benchmark<2, 1, 512, 64>();\n  Hamming64Benchmark<2, 2, 512, 64>();\n  Hamming64Benchmark<4, 1, 2048, 16>();\n  Hamming64Benchmark<4, 2, 2048, 16>();\n  Hamming64Benchmark<4, 4, 2048, 16>();\n  Hamming64Benchmark<8, 1, 512, 64>();\n  Hamming64Benchmark<8, 2, 512, 64>();\n  Hamming64Benchmark<8, 4, 512, 64>();\n  Hamming64Benchmark<8, 8, 512, 64>();\n  Hamming64Benchmark<16, 1, 512, 64>();\n  Hamming64Benchmark<16, 2, 512, 64>();\n  Hamming64Benchmark<16, 4, 512, 64>();\n  Hamming64Benchmark<16, 8, 512, 64>();\n  Hamming64Benchmark<16, 16, 512, 64>();\n  Hamming64Benchmark<32, 1, 512, 64>();\n  Hamming64Benchmark<32, 2, 512, 64>();\n  Hamming64Benchmark<32, 4, 512, 64>();\n  Hamming64Benchmark<32, 8, 512, 64>();\n  Hamming64Benchmark<32, 16, 512, 64>();\n  Hamming64Benchmark<32, 32, 512, 64>();\n  Hamming64Benchmark<64, 1, 512, 64>();\n  Hamming64Benchmark<64, 2, 512, 64>();\n  Hamming64Benchmark<64, 4, 512, 64>();\n  Hamming64Benchmark<64, 8, 512, 64>();\n  Hamming64Benchmark<128, 1, 512, 64>();\n}\n#endif  // AILEGO_M64\n"
  },
  {
    "path": "tests/ailego/math/inner_product_matrix_fp16_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <iostream>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,\n                                   size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\ntemplate <size_t N>\nstatic float InnerProductDistance(const FixedVector<Float16, N> &lhs,\n                                  const FixedVector<Float16, N> &rhs) {\n  return Distance::InnerProduct(lhs.data(), rhs.data(), lhs.size());\n}\n\ntemplate <size_t N>\nstatic float MinusInnerProductDistance(const FixedVector<Float16, N> &lhs,\n                                       const FixedVector<Float16, N> &rhs) {\n  return Distance::MinusInnerProduct(lhs.data(), rhs.data(), lhs.size());\n}\n\nTEST(DistanceMatrix, InnerProduct_General) {\n  FixedVector<Float16, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,\n                               555.0f, 0.8f,  5.5f,   3.75f, 9.0f,\n                               6.6f,   0.1f,  8.8f,   0.2f,  5.6f},\n      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,\n          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(308441.62f,\n                                        InnerProductDistance(x15, y15), 1000));\n\n  FixedVector<Float16, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,\n                               9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,\n                               0.1f,  8.8f,  0.2f,   5.6f},\n      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,\n          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(308526.19f,\n                                        InnerProductDistance(x16, y16), 1000));\n\n  FixedVector<Float16, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,\n                               1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,\n                               3.4f, 4.5f,  5.6f, 1.6f,  1.3f},\n      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,\n          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(307.1762f,\n                                        InnerProductDistance(x17, y17), 1000));\n\n  FixedVector<Float16, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,\n                               4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,\n                               4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},\n      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(378.67197f,\n                                        InnerProductDistance(x18, y18), 1000));\n\n  FixedVector<Float16, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,\n                               2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,\n                               5.6f,  1.6f,  2.3f,  1.11f,  2.3f},\n      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(380.33203f,\n                                        InnerProductDistance(x19, y19), 1000));\n\n  FixedVector<Float16, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,\n                               1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,\n                               4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},\n      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,\n          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(222.23581f,\n                                        InnerProductDistance(x20, y20), 1000));\n\n  FixedVector<Float16, 21> x21{0.0f}, y21{0.0f};\n  EXPECT_TRUE(\n      MathHelper::IsAlmostEqual(0.0f, InnerProductDistance(x21, y21), 1000));\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_General) {\n  FixedVector<Float16, 1> x1{0.7f}, y1{0.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -0.35009766f, MinusInnerProductDistance(x1, y1), 1000));\n\n  FixedVector<Float16, 2> x2{2.0f, 3.76f}, y2{2.0f, 0.901f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -7.387093f, MinusInnerProductDistance(x2, y2), 1000));\n\n  FixedVector<Float16, 3> x3{2.0f, 3.0f, 0.7f}, y3{2.0f, 3.0f, 2.0f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -14.400391f, MinusInnerProductDistance(x3, y3), 1000));\n\n  FixedVector<Float16, 4> x4{7.8f, -8.9f, 9.0f, 5.6f},\n      y4{7.8f, 8.9f, -9.0f, -0.1f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      99.89003f, MinusInnerProductDistance(x4, y4), 1000));\n\n  FixedVector<Float16, 5> x5{7.8f, 8.9f, 9.0f, 0.1f, 5.6f},\n      y5{7.8f, 8.9f, 9.0f, 0.1f, 0.2f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -222.16441f, MinusInnerProductDistance(x5, y5), 1000));\n\n  FixedVector<Float16, 6> x6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},\n      y6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -222.66985f, MinusInnerProductDistance(x6, y6), 1000));\n\n  FixedVector<Float16, 7> x7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},\n      y7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -249.9052f, MinusInnerProductDistance(x7, y7), 1000));\n\n  FixedVector<Float16, 8> x8{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f, 5.6f},\n      y8{5.22f, 0.711f, -7.8f, -8.9f, -9.0f, 0.1f, 0.2f, 0.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      190.44284f, MinusInnerProductDistance(x8, y8), 1000));\n\n  FixedVector<Float16, 9> x9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f,\n                             6.6f,  0.1f,   0.2f, 5.6f},\n      y9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 6.6f, 0.1f, 0.2f, 0.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -295.20654f, MinusInnerProductDistance(x9, y9), 1000));\n\n  FixedVector<Float16, 10> x10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f,\n                               9.0f,  6.6f,   0.1f, 0.2f, 5.6f},\n      y10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f, 9.0f, 6.6f, 0.1f, 0.2f, 0.522f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -325.57962f, MinusInnerProductDistance(x10, y10), 1000));\n\n  FixedVector<Float16, 11> x11{2.3f,    -1.11f, 3.04f, 8.23f, 1.0f, 7.7f,\n                               -11.11f, 2.3f,   3.4f,  4.5f,  5.6f},\n      y11{2.3f,    1.11f, 3.04f, 8.23f, -1.0f, 7.7f,\n          -11.11f, 2.3f,  3.4f,  4.5f,  0.511f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -302.63904f, MinusInnerProductDistance(x11, y11), 1000));\n\n  FixedVector<Float16, 12> x12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,\n                               7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},\n      y12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,\n          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  0.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -309.60065f, MinusInnerProductDistance(x12, y12), 1000));\n\n  FixedVector<Float16, 13> x13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,\n                               7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},\n      y13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,\n          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  3.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -346.13144f, MinusInnerProductDistance(x13, y13), 1000));\n\n  FixedVector<Float16, 14> x14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,\n                               3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 5.6f},\n      y14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,\n          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 0.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -417.96613f, MinusInnerProductDistance(x14, y14), 1000));\n\n  FixedVector<Float16, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,\n                               555.0f, 0.8f,  5.5f,   3.75f, 9.0f,\n                               6.6f,   0.1f,  8.8f,   0.2f,  5.6f},\n      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,\n          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -308441.62f, MinusInnerProductDistance(x15, y15), 1000));\n\n  FixedVector<Float16, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,\n                               9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,\n                               0.1f,  8.8f,  0.2f,   5.6f},\n      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,\n          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -308526.19f, MinusInnerProductDistance(x16, y16), 1000));\n\n  FixedVector<Float16, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,\n                               1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,\n                               3.4f, 4.5f,  5.6f, 1.6f,  1.3f},\n      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,\n          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -307.17618f, MinusInnerProductDistance(x17, y17), 1000));\n\n  FixedVector<Float16, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,\n                               4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,\n                               4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},\n      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -378.67197f, MinusInnerProductDistance(x18, y18), 1000));\n\n  FixedVector<Float16, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,\n                               2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,\n                               5.6f,  1.6f,  2.3f,  1.11f,  2.3f},\n      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -380.33203f, MinusInnerProductDistance(x19, y19), 1000));\n\n  FixedVector<Float16, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,\n                               1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,\n                               4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},\n      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,\n          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      -222.23581f, MinusInnerProductDistance(x20, y20), 1000));\n\n  FixedVector<Float16, 21> x21{0.0f}, y21{0.0f};\n  EXPECT_TRUE(MathHelper::IsAlmostEqual(\n      0.0f, MinusInnerProductDistance(x21, y21), 1000));\n}\n\ntemplate <size_t M, size_t N>\nvoid TestInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const Float16 *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      InnerProductMatrix<Float16, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  InnerProductMatrix<Float16, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestMinusInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const Float16 *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MinusInnerProductMatrix<Float16, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  MinusInnerProductMatrix<Float16, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));\n  }\n}\n\nTEST(DistanceMatrix, InnerProduct_1x1) {\n  TestInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x1) {\n  TestInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x2) {\n  TestInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_3x3) {\n  TestInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x1) {\n  TestInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x2) {\n  TestInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x4) {\n  TestInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x1) {\n  TestInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x2) {\n  TestInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x4) {\n  TestInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x8) {\n  TestInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x1) {\n  TestInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x2) {\n  TestInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x4) {\n  TestInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x8) {\n  TestInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x16) {\n  TestInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x1) {\n  TestInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x2) {\n  TestInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x4) {\n  TestInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x8) {\n  TestInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x16) {\n  TestInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x32) {\n  TestInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x1) {\n  TestInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x2) {\n  TestInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x4) {\n  TestInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x8) {\n  TestInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x16) {\n  TestInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x32) {\n  TestInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x64) {\n  TestInnerProductMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x1) {\n  TestInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x2) {\n  TestInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x4) {\n  TestInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x8) {\n  TestInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x16) {\n  TestInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x32) {\n  TestInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x64) {\n  TestInnerProductMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x128) {\n  TestInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_1x1) {\n  TestMinusInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x1) {\n  TestMinusInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x2) {\n  TestMinusInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_3x3) {\n  TestMinusInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x1) {\n  TestMinusInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x2) {\n  TestMinusInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x4) {\n  TestMinusInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x1) {\n  TestMinusInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x2) {\n  TestMinusInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x4) {\n  TestMinusInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x8) {\n  TestMinusInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x1) {\n  TestMinusInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x2) {\n  TestMinusInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x4) {\n  TestMinusInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x8) {\n  TestMinusInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x16) {\n  TestMinusInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x1) {\n  TestMinusInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x2) {\n  TestMinusInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x4) {\n  TestMinusInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x8) {\n  TestMinusInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x16) {\n  TestMinusInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x32) {\n  TestMinusInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x1) {\n  TestMinusInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x2) {\n  TestMinusInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x4) {\n  TestMinusInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x8) {\n  TestMinusInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x16) {\n  TestMinusInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x32) {\n  TestMinusInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x64) {\n  TestMinusInnerProductMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x1) {\n  TestMinusInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x2) {\n  TestMinusInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x4) {\n  TestMinusInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x8) {\n  TestMinusInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x16) {\n  TestMinusInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x32) {\n  TestMinusInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x64) {\n  TestMinusInnerProductMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x128) {\n  TestMinusInnerProductMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid InnerProductBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      InnerProductMatrix<Float16, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    InnerProductMatrix<Float16, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        InnerProductMatrix<Float16, 1, 1>::Compute(&matrix_batch[k * dimension],\n                                                   current_query, dimension,\n                                                   &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MinusInnerProductBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MinusInnerProductMatrix<Float16, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MinusInnerProductMatrix<Float16, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MinusInnerProductMatrix<Float16, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {\n  InnerProductBenchmark<2, 1, 512, 64>();\n  InnerProductBenchmark<2, 2, 512, 64>();\n  InnerProductBenchmark<4, 1, 512, 64>();\n  InnerProductBenchmark<4, 2, 512, 64>();\n  InnerProductBenchmark<4, 4, 512, 64>();\n  InnerProductBenchmark<8, 1, 512, 64>();\n  InnerProductBenchmark<8, 2, 512, 64>();\n  InnerProductBenchmark<8, 4, 512, 64>();\n  InnerProductBenchmark<8, 8, 512, 64>();\n  InnerProductBenchmark<16, 1, 512, 64>();\n  InnerProductBenchmark<16, 2, 512, 64>();\n  InnerProductBenchmark<16, 4, 512, 64>();\n  InnerProductBenchmark<16, 8, 512, 64>();\n  InnerProductBenchmark<16, 16, 512, 64>();\n  InnerProductBenchmark<32, 1, 512, 64>();\n  InnerProductBenchmark<32, 2, 512, 64>();\n  InnerProductBenchmark<32, 4, 512, 64>();\n  InnerProductBenchmark<32, 8, 512, 64>();\n  InnerProductBenchmark<32, 16, 512, 64>();\n  InnerProductBenchmark<32, 32, 512, 64>();\n  InnerProductBenchmark<64, 1, 512, 64>();\n  InnerProductBenchmark<64, 2, 512, 64>();\n  InnerProductBenchmark<64, 4, 512, 64>();\n  InnerProductBenchmark<64, 8, 512, 64>();\n  InnerProductBenchmark<128, 1, 512, 64>();\n  InnerProductBenchmark<1, 1, 1024, 256>();\n}\n\nTEST(DistanceMatrix, DISABLED_MinusInnerProduct_Benchmark) {\n  MinusInnerProductBenchmark<2, 1, 512, 64>();\n  MinusInnerProductBenchmark<2, 2, 512, 64>();\n  MinusInnerProductBenchmark<4, 1, 512, 64>();\n  MinusInnerProductBenchmark<4, 2, 512, 64>();\n  MinusInnerProductBenchmark<4, 4, 512, 64>();\n  MinusInnerProductBenchmark<8, 1, 512, 64>();\n  MinusInnerProductBenchmark<8, 2, 512, 64>();\n  MinusInnerProductBenchmark<8, 4, 512, 64>();\n  MinusInnerProductBenchmark<8, 8, 512, 64>();\n  MinusInnerProductBenchmark<16, 1, 512, 64>();\n  MinusInnerProductBenchmark<16, 2, 512, 64>();\n  MinusInnerProductBenchmark<16, 4, 512, 64>();\n  MinusInnerProductBenchmark<16, 8, 512, 64>();\n  MinusInnerProductBenchmark<16, 16, 512, 64>();\n  MinusInnerProductBenchmark<32, 1, 512, 64>();\n  MinusInnerProductBenchmark<32, 2, 512, 64>();\n  MinusInnerProductBenchmark<32, 4, 512, 64>();\n  MinusInnerProductBenchmark<32, 8, 512, 64>();\n  MinusInnerProductBenchmark<32, 16, 512, 64>();\n  MinusInnerProductBenchmark<32, 32, 512, 64>();\n  MinusInnerProductBenchmark<64, 1, 512, 64>();\n  MinusInnerProductBenchmark<64, 2, 512, 64>();\n  MinusInnerProductBenchmark<64, 4, 512, 64>();\n  MinusInnerProductBenchmark<64, 8, 512, 64>();\n  MinusInnerProductBenchmark<128, 1, 512, 64>();\n  MinusInnerProductBenchmark<1, 1, 1024, 256>();\n}\n\nTEST(DistanceMatrix, DISABLED_MinusInnerProduct_BenchmarkSimple) {\n  std::mt19937 gen((std::random_device())());\n\n  size_t dimension = 768;\n  size_t loop_cnt = 100000000LLU;\n\n  std::vector<Float16> data(dimension);\n  std::vector<Float16> query(dimension);\n\n  float result;\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < dimension; ++i) {\n    data[i] = dist(gen);\n  }\n  for (size_t i = 0; i < dimension; ++i) {\n    query[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    MinusInnerProductMatrix<Float16, 1, 1>::Compute(&data[0], &query[0],\n                                                    dimension, &result);\n  }\n}\n\nstatic inline float SparseDistanceCommon(uint32_t count1, uint32_t *index1,\n                                         Float16 *value1, uint32_t count2,\n                                         uint32_t *index2, Float16 *value2) {\n  float result{0.0f};\n\n  size_t m = 0;\n  size_t q = 0;\n  while (m < count1 && q < count2) {\n    if (index1[m] == index2[q]) {\n      result += value1[m] * value2[q];\n\n      ++m;\n      ++q;\n    } else if (index1[m] < index2[q]) {\n      ++m;\n    } else {\n      ++q;\n    }\n  }\n\n  return result;\n}\n\nvoid TestInnerProductSparse(void) {\n  // test 1\n  const uint32_t sparse_vec_count_0 = 52;\n  uint32_t sparse_vec_index_0[] = {\n      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,\n      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,\n      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,\n      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,\n      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};\n  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{\n      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,\n      -0.767956138324, -0.410683610329, 0.763314047145,  0.347851184825,\n      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,\n      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,\n      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,\n      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,\n      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,\n      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,\n      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,\n      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,\n      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,\n      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,\n      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};\n\n  const uint32_t sparse_vec_count_1 = 43;\n  uint32_t sparse_vec_index_1[] = {\n      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,\n      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,\n      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,\n      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};\n  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{\n      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,\n      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,\n      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,\n      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,\n      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,\n      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,\n      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,\n      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,\n      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,\n      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,\n      0.247628793044,  -0.984985692607,  -0.427929860028};\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),\n      sparse_query_buffer_0);\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),\n      sparse_query_buffer_1);\n\n  float result0{0.0f};\n  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,\n                                 sparse_vec_value_0.data(), sparse_vec_count_1,\n                                 sparse_vec_index_1, sparse_vec_value_1.data());\n\n  float result1{0.0f};\n  MinusInnerProductSparseMatrix<Float16>::Compute(\n      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);\n  result1 = -result1;\n\n  EXPECT_GE(0.00001, std::abs(result0 - result1));\n\n  // test 2\n  constexpr uint32_t sparse_vec_count_2 = 49;\n  uint32_t sparse_vec_index_2[] = {\n      13200,  20900,  36300,  41800,  50600,  74800,  78100,  81400,  93500,\n      99000,  107800, 121000, 127600, 137500, 140800, 143000, 145200, 166100,\n      174900, 193600, 194700, 195800, 233200, 261800, 262900, 273900, 277200,\n      299200, 302500, 343200, 381700, 387200, 418000, 421300, 436700, 449900,\n      480700, 510400, 586300, 596200, 603900, 607200, 612700, 625900, 632500,\n      633600, 639100, 642400, 650100};\n  FixedVector<Float16, sparse_vec_count_2> sparse_vec_value_2{\n      0.167493264953,  0.178347102375,   0.61850792017,    0.707662206696,\n      -0.604456492928, 0.898905062153,   -0.971984671516,  -0.337950525868,\n      -0.942538751319, -0.115612454156,  0.78433412971,    0.601522288928,\n      -0.640321042923, -0.235673191423,  0.00632807223978, 0.629970437467,\n      0.966519256786,  -0.279362437157,  0.396153064627,   -0.614592812875,\n      -0.642157513141, 0.686723258138,   0.10227967727,    -0.5921196708,\n      0.499411577177,  -0.0188556369919, 0.512245212443,   0.424666758023,\n      0.299827154891,  -0.615468257454,  -0.0499098903374, -0.54873640329,\n      0.899673049133,  -0.873237346565,  0.463117084808,   -0.810200151551,\n      0.676836615658,  0.596247430713,   0.946225552468,   0.968425796351,\n      -0.821041580744, -0.697734977387,  0.295618053879,   -0.476597945375,\n      -0.246035224835, 0.927603570489,   -0.640242995569,  0.610224433234,\n      -0.657550506633};\n\n  constexpr uint32_t sparse_vec_count_3 = 58;\n  uint32_t sparse_vec_index_3[] = {\n      13200,  19800,  37400,  56100,  68200,  78100,  81400,  99000,  103400,\n      107800, 108900, 110000, 111100, 125400, 127600, 137500, 141900, 151800,\n      154000, 155100, 158400, 163900, 165000, 173800, 198000, 201300, 215600,\n      247500, 249700, 264000, 269500, 287100, 291500, 311300, 312400, 336600,\n      353100, 354200, 361900, 367400, 390500, 398200, 407000, 414700, 424600,\n      510400, 533500, 535700, 551100, 556600, 568700, 576400, 577500, 590700,\n      592900, 618200, 631400, 636900};\n  FixedVector<Float16, sparse_vec_count_3> sparse_vec_value_3{\n      0.175769744964,  -0.198506965419,  0.0842021015107, 0.544957076263,\n      0.0856447356878, 0.838582935178,   0.796525374862,  -0.931940801441,\n      0.555150441425,  0.957490431546,   -0.422126167235, -0.40903200281,\n      0.242643233475,  0.698565387541,   -0.325754491857, 0.540403772154,\n      -0.449888493042, 0.349262051644,   -0.612943655195, 0.874112675658,\n      0.943939922271,  -0.994946966212,  -0.978705162429, 0.321190597007,\n      0.17722019302,   0.6041089417,     -0.353184098327, -0.938569390092,\n      -0.92268220981,  -0.268600478592,  -0.598069229627, 0.0720175726713,\n      0.426800021137,  0.369250757861,   -0.823348360327, -0.664061107875,\n      -0.418342805261, -0.430818720049,  0.0941988181812, 0.0765632945538,\n      -0.148533061047, 0.404665036566,   -0.170747760502, -0.206564280292,\n      0.311035754032,  0.498520039471,   -0.16255148444,  -0.137950933749,\n      -0.234990864629, 0.602901363949,   0.0297103943437, -0.730955584059,\n      0.117169059405,  -0.0746546228896, 0.39067258928,   -0.214782717972,\n      -0.111009971497, -0.87766242691};\n\n  std::string sparse_query_buffer_2;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2.data(),\n      sparse_query_buffer_2);\n\n  std::string sparse_query_buffer_3;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3.data(),\n      sparse_query_buffer_3);\n\n  float result2{0.0f};\n  result2 = SparseDistanceCommon(sparse_vec_count_2, sparse_vec_index_2,\n                                 sparse_vec_value_2.data(), sparse_vec_count_3,\n                                 sparse_vec_index_3, sparse_vec_value_3.data());\n\n  float result3{0.0f};\n  MinusInnerProductSparseMatrix<Float16>::Compute(\n      sparse_query_buffer_2.data(), sparse_query_buffer_3.data(), &result3);\n  result3 = -result3;\n\n  EXPECT_GE(0.00001, std::abs(result2 - result3));\n}\n\nvoid TestInnerProductSparseMore(void) {\n  std::vector<uint32_t> sparse_vec_counts;\n  std::vector<uint32_t *> sparse_vec_indices;\n  std::vector<Float16 *> sparse_vec_values;\n\n  const uint32_t sparse_vec_count_0 = 173;\n  uint32_t sparse_vec_index_0[] = {\n      1012,  1996,  2001,  2018,  2020,  2036,  2037,  2056,  2058,  2069,\n      2111,  2116,  2138,  2162,  2166,  2245,  2253,  2259,  2306,  2307,\n      2318,  2331,  2351,  2359,  2390,  2419,  2426,  2428,  2466,  2470,\n      2535,  2554,  2557,  2568,  2590,  2622,  2671,  2739,  2765,  2812,\n      2817,  2837,  2913,  2920,  3003,  3092,  3112,  3125,  3144,  3214,\n      3241,  3249,  3260,  3268,  3271,  3278,  3280,  3330,  3463,  3478,\n      3716,  3739,  3768,  3800,  3908,  3934,  3992,  4028,  4045,  4072,\n      4146,  4254,  4301,  4382,  4454,  4471,  4504,  4517,  4598,  4806,\n      4807,  4847,  4928,  4988,  5081,  5113,  5177,  5190,  5197,  5201,\n      5234,  5456,  5621,  5689,  5792,  5817,  5823,  5875,  5920,  5921,\n      5951,  5968,  6033,  6112,  6145,  6215,  6344,  6396,  6429,  6438,\n      6529,  6627,  6691,  6731,  6801,  6865,  6950,  7036,  7128,  7155,\n      7461,  7551,  7596,  7691,  7784,  7789,  7848,  7857,  8044,  8052,\n      8053,  8553,  8573,  8664,  8817,  8826,  9250,  9273,  9593,  9727,\n      10013, 10106, 10617, 10639, 10753, 11657, 12108, 13128, 13463, 13702,\n      13787, 14152, 14332, 15237, 15313, 15359, 15699, 16724, 17171, 17571,\n      17669, 20168, 20805, 20972, 22134, 22229, 22779, 24762, 24823, 25526,\n      25699, 26761, 27885};\n  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{\n      0.36311877,  0.10386213,  0.64821976,   0.26300138,    0.29727572,\n      0.047292523, 0.022334402, 0.118793316,  0.7198291,     0.73566943,\n      0.19491579,  0.5763569,   0.5245229,    0.022828134,   0.43562022,\n      0.6946562,   0.09275672,  0.9687072,    0.1751608,     0.09703954,\n      0.18717986,  0.43182945,  0.055112287,  0.0021027816,  0.13972417,\n      0.1019873,   0.8679199,   0.26797894,   0.097350314,   0.5125363,\n      0.2829703,   0.052232087, 0.3248494,    1.1258097,     0.90756655,\n      1.6490538,   0.45066822,  0.004210417,  0.028443621,   0.41171393,\n      0.09246816,  0.053040083, 0.052729037,  0.00041907438, 0.32047704,\n      0.2290303,   1.3542659,   0.28811434,   1.1722984,     0.4484738,\n      0.73670006,  0.22390367,  0.0058781556, 0.48173144,    0.76392287,\n      0.32048634,  0.42589885,  0.8624791,    0.0376546,     0.56702816,\n      0.002337549, 1.5856861,   0.14177673,   0.22762497,    0.6601752,\n      1.0603137,   0.914821,    0.34792075,   1.4387932,     0.035774633,\n      0.04391008,  0.7179224,   0.49199906,   0.043692447,   1.1404462,\n      0.47572234,  0.22777049,  0.7626374,    0.59730506,    1.4541638,\n      1.6540457,   0.089919806, 0.0050144624, 0.15902519,    0.2989032,\n      0.121926464, 0.11911,     0.27476037,   1.2774497,     0.42462146,\n      0.30179682,  0.18773684,  0.82144237,   1.2033592,     0.07180116,\n      0.06378868,  0.029040875, 0.2089903,    0.03591103,    0.94913304,\n      0.18240769,  0.9050947,   0.0034226696, 1.2841027,     0.629526,\n      0.06401547,  1.0698998,   0.11138009,   0.20497903,    0.017457427,\n      0.6316996,   0.12303611,  0.01563728,   0.090583175,   0.23981698,\n      0.48518667,  0.6207808,   1.8336427,    2.3282833,     0.8153351,\n      0.026216522, 0.6143031,   0.17374748,   0.32929608,    0.33730298,\n      1.1497657,   0.1926745,   0.14235665,   1.1076177,     0.945609,\n      0.48826388,  0.10458124,  0.19699246,   0.20899634,    0.44853806,\n      0.26411146,  0.7495864,   1.3681723,    1.4299264,     0.037516754,\n      0.17946614,  0.98060745,  0.055851664,  0.2002921,     0.45136684,\n      0.33716172,  0.58752763,  0.34051904,   1.9018586,     0.20597915,\n      0.82819384,  0.23866963,  0.4160662,    0.11889692,    0.172538,\n      0.005433464, 0.089198045, 0.3896585,    0.74038976,    0.24974349,\n      0.044961147, 0.32671204,  0.044312827,  0.25430596,    0.021065181,\n      0.071978964, 1.992692,    0.02640776,   1.7344381,     0.09561436,\n      0.07097204,  0.2922402,   0.8794989};\n\n  const uint32_t sparse_vec_count_1 = 144;\n  uint32_t sparse_vec_index_1[] = {\n      1012,  1016,  1059,  1996,  2001,  2020,  2049,  2068,  2076,  2088,\n      2109,  2138,  2145,  2149,  2162,  2203,  2220,  2224,  2256,  2259,\n      2318,  2373,  2381,  2390,  2393,  2419,  2462,  2466,  2485,  2506,\n      2554,  2557,  2580,  2590,  2622,  2633,  2645,  2671,  2716,  2724,\n      2900,  2942,  2943,  3003,  3029,  3092,  3112,  3125,  3260,  3271,\n      3278,  3283,  3288,  3439,  3466,  3478,  3521,  3578,  3594,  3595,\n      3607,  3647,  3690,  3800,  3826,  3896,  3908,  3934,  3947,  3987,\n      4045,  4068,  4204,  4254,  4255,  4302,  4329,  4471,  4504,  4517,\n      4566,  4736,  4762,  4789,  5081,  5094,  5105,  5195,  5197,  5201,\n      5233,  5234,  5584,  5817,  5823,  5832,  5875,  5951,  5968,  6033,\n      6035,  6179,  6215,  6245,  6383,  6394,  6396,  6529,  6613,  6691,\n      6801,  7091,  7128,  7155,  7240,  7461,  7551,  7596,  7691,  7738,\n      7784,  8027,  8144,  8192,  8249,  8309,  8573,  8647,  8826,  9379,\n      9593,  9767,  10400, 10461, 10530, 11028, 12799, 13787, 14487, 14670,\n      15237, 15523, 20168, 25755};\n  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{\n      0.3815109,   0.21950184,   0.389138,    0.03037462,  0.738938,\n      0.11151163,  0.21257511,   0.008723602, 0.42403504,  0.17748593,\n      0.38613674,  0.38208488,   0.49048766,  0.056615792, 1.285813,\n      1.1482359,   0.016783785,  0.7362169,   0.21784282,  1.0905122,\n      0.37420613,  0.81915,      0.67411584,  0.35778007,  0.80538017,\n      0.10094925,  1.2726786,    0.12334787,  0.18297458,  0.13315988,\n      0.041079145, 0.2655652,    0.10946682,  0.6782494,   1.7451618,\n      0.17126456,  0.17718226,   0.7430134,   0.9090848,   0.31985787,\n      0.21779177,  0.13639484,   1.2293936,   0.065131165, 0.03718982,\n      0.64121664,  0.46517274,   0.39498892,  0.07401267,  1.2061241,\n      0.1276834,   0.059918232,  1.1935436,   0.61886644,  0.32731527,\n      0.37830237,  1.0287925,    0.09565632,  0.4313508,   0.03845683,\n      0.066990376, 0.10886483,   0.097683005, 0.29624575,  0.48645914,\n      0.250733,    0.03274726,   1.205507,    0.048636433, 0.034002367,\n      0.83021015,  0.044592205,  0.06007409,  1.1224703,   0.45620173,\n      0.16457361,  0.053571727,  0.12527509,  0.1308366,   0.92323685,\n      0.7821679,   0.23838642,   0.2558486,   0.09402168,  0.22815736,\n      0.51750314,  0.08442147,   0.5565446,   0.3642559,   0.6661639,\n      0.73750395,  0.17278494,   0.05865512,  0.013724559, 0.023783961,\n      0.04283593,  0.24765956,   0.3991119,   1.5201892,   0.035530984,\n      0.049782272, 0.06485597,   0.5367931,   0.15097857,  0.014405596,\n      0.14585418,  0.22106051,   0.49575308,  0.08290891,  0.17875223,\n      0.21095915,  0.0038430362, 2.3110201,   0.6543391,   0.06421487,\n      0.3782336,   0.3514111,    0.5225064,   0.21472597,  0.07987356,\n      0.06002587,  1.5242931,    0.081204355, 0.32025364,  0.39068836,\n      0.027896391, 0.2872351,    0.50436527,  0.5434884,   1.653683,\n      1.444315,    0.988968,     0.024239752, 0.055084217, 0.074782506,\n      0.021114044, 0.07288233,   0.822755,    0.10772858,  0.6189507,\n      0.29534152,  0.20032129,   0.5609191,   1.2844883};\n\n  const uint32_t sparse_vec_count_2 = 153;\n  uint32_t sparse_vec_index_2[] = {\n      1012,  1059,  1996,  2001,  2020,  2049,  2052,  2055,  2056,  2081,\n      2088,  2124,  2138,  2156,  2158,  2162,  2191,  2231,  2242,  2256,\n      2259,  2311,  2318,  2359,  2373,  2381,  2390,  2437,  2458,  2466,\n      2477,  2510,  2554,  2580,  2590,  2622,  2640,  2671,  2689,  2825,\n      2844,  2881,  2904,  2957,  3029,  3112,  3125,  3144,  3214,  3246,\n      3271,  3312,  3330,  3399,  3443,  3478,  3578,  3595,  3647,  3697,\n      3740,  3800,  3817,  3818,  3928,  3934,  3987,  4034,  4072,  4079,\n      4172,  4204,  4254,  4255,  4302,  4517,  4526,  4695,  4706,  4795,\n      4807,  4986,  5081,  5091,  5113,  5195,  5197,  5234,  5253,  5263,\n      5623,  5646,  5656,  5817,  5875,  5951,  5954,  5968,  6033,  6061,\n      6108,  6119,  6157,  6213,  6215,  6287,  6384,  6396,  6461,  6469,\n      6613,  6801,  6842,  7128,  7240,  7305,  7477,  7551,  7596,  7609,\n      7624,  7723,  7779,  7857,  7935,  8144,  8238,  8249,  8275,  8547,\n      8573,  8647,  8826,  8927,  9036,  9491,  9593,  9767,  10267, 10461,\n      10505, 10660, 10721, 11028, 12578, 13787, 14487, 14874, 15523, 20168,\n      21565, 24212, 25628};\n  FixedVector<Float16, sparse_vec_count_2> sparse_vec_value_2{\n      0.19194126,  0.11344757,   0.21317342,  0.6771587,    0.08591107,\n      0.006228663, 0.28981656,   0.58056134,  0.064362876,  0.5794717,\n      0.4288167,   0.59527594,   0.6106896,   0.23139843,   0.897008,\n      0.20689227,  0.28713426,   0.38175523,  0.4028853,    0.08509491,\n      1.0562526,   0.1165676,    0.06347306,  0.41331312,   0.16935593,\n      0.1626863,   0.29352358,   0.45827967,  0.21193665,   0.39532298,\n      0.0789344,   0.026420705,  0.1763078,   0.18424834,   0.7216729,\n      1.6683924,   0.06257952,   0.13419773,  0.6851299,    1.2139059,\n      0.092483185, 0.10803583,   0.74339646,  0.14461784,   0.2389669,\n      0.9306581,   0.5645601,    0.83565444,  0.11930474,   0.22862941,\n      0.6214566,   0.0033283439, 0.42018214,  0.15267797,   0.029068783,\n      0.24103808,  0.18765616,   0.11574381,  0.31545344,   0.09386852,\n      0.038362045, 0.7730324,    0.4456206,   0.20152733,   0.94718367,\n      1.1934134,   0.12610391,   0.014013804, 0.47198555,   0.21791361,\n      0.05394335,  0.08415188,   0.066486694, 0.47462225,   0.16693182,\n      0.9021425,   0.27905586,   0.09939155,  0.12642553,   0.27529165,\n      0.024804203, 0.24346212,   0.25561446,  1.4675297,    0.21566682,\n      0.5453194,   0.21558505,   0.21294887,  0.2740208,    0.43185237,\n      0.2280337,   0.0048945076, 0.26826337,  0.016979327,  0.3338952,\n      0.23080347,  0.21200272,   1.3268396,   0.05323057,   0.30005422,\n      0.088871606, 0.13259241,   0.04766706,  0.0017769856, 0.2698414,\n      0.08068406,  0.38578644,   0.09752118,  0.13972333,   0.0731375,\n      0.36664346,  0.12214721,   0.1541759,   2.2295072,    0.22542699,\n      0.028530587, 0.022988612,  0.35836184,  0.10530607,   0.53756726,\n      0.05818686,  0.044951066,  0.05753079,  0.09009998,   0.24644017,\n      0.22693348,  0.0019512648, 0.035316195, 0.057344455,  0.36419895,\n      0.1534858,   0.18924302,   0.38702026,  1.2569604,    0.07787755,\n      1.7163913,   1.1903315,    0.8173934,   0.13888475,   0.10908335,\n      0.35437793,  0.15787303,   0.25039884,  0.130508,     0.09830101,\n      0.5841259,   0.22020355,   0.37849018,  0.14054261,   0.5179198,\n      1.1891438,   0.44022372,   0.1794719};\n\n  const uint32_t sparse_vec_count_3 = 166;\n  uint32_t sparse_vec_index_3[] = {\n      1012,  1059,  1996,  1997,  2001,  2020,  2034,  2076,  2086,  2104,\n      2138,  2149,  2162,  2170,  2171,  2220,  2231,  2236,  2259,  2311,\n      2315,  2318,  2328,  2343,  2344,  2359,  2381,  2390,  2419,  2458,\n      2462,  2466,  2472,  2479,  2491,  2510,  2557,  2558,  2565,  2580,\n      2590,  2622,  2724,  2764,  2817,  2837,  2881,  2900,  2911,  2933,\n      2949,  3003,  3029,  3058,  3092,  3101,  3125,  3188,  3271,  3330,\n      3386,  3399,  3434,  3447,  3474,  3478,  3578,  3595,  3607,  3650,\n      3690,  3740,  3779,  3800,  3817,  3818,  3826,  3910,  3918,  3934,\n      3987,  3992,  4006,  4034,  4068,  4075,  4114,  4146,  4172,  4255,\n      4302,  4327,  4503,  4517,  4758,  4883,  4944,  4975,  5036,  5195,\n      5205,  5218,  5233,  5234,  5253,  5456,  5623,  5656,  5687,  5817,\n      5875,  5951,  5954,  5968,  6059,  6119,  6145,  6157,  6215,  6262,\n      6384,  6394,  6613,  6787,  6801,  6842,  6993,  7128,  7156,  7240,\n      7305,  7421,  7551,  7596,  7676,  7935,  8547,  8573,  8647,  8773,\n      8826,  8886,  8911,  9036,  9274,  9433,  9593,  9767,  9915,  10267,\n      10461, 10505, 11028, 11274, 11593, 13058, 13787, 14487, 15237, 17060,\n      20168, 21695, 23041, 24363, 25526, 25755};\n  FixedVector<Float16, sparse_vec_count_3> sparse_vec_value_3{\n      0.17927244,   0.20557176,   0.40560228,   0.32370853,  0.8060634,\n      0.21424179,   1.0674698,    0.6046889,    0.21051478,  0.46186206,\n      0.24661283,   0.5616991,    1.016811,     0.2618776,   0.9686127,\n      0.869671,     0.1458332,    0.60725594,   1.206012,    0.10357225,\n      0.4350595,    0.83702874,   0.146196,     0.8644738,   0.15587087,\n      0.16456357,   0.36376593,   1.053665,     0.06609649,  0.6504239,\n      0.9697015,    0.04947369,   0.43753505,   0.04289205,  0.42075413,\n      0.330524,     0.1743388,    0.6540892,    0.012900644, 0.23207273,\n      0.2674499,    1.9736407,    0.21540764,   0.63648874,  0.049446102,\n      0.3750183,    0.17441651,   0.123951435,  0.015306404, 0.1767618,\n      0.24109434,   0.4245122,    0.114403255,  0.91849947,  0.12018716,\n      0.01165807,   0.47680765,   0.036503244,  0.5782868,   0.9163635,\n      0.27396393,   0.16385026,   0.052631885,  0.72294754,  0.4022935,\n      0.06351255,   0.27786675,   0.25394455,   0.08041568,  1.3137422,\n      0.5514297,    0.2503315,    0.009040705,  0.40985608,  0.27673048,\n      0.14055687,   0.50529444,   0.6049716,    1.0692317,   1.207644,\n      0.108388424,  0.9495853,    0.35366973,   0.3762234,   0.19875458,\n      0.14685634,   0.0060924664, 1.0126622,    0.034943417, 0.49489433,\n      0.34451365,   0.21992311,   0.7039926,    0.9501215,   0.34629604,\n      0.20126931,   0.23908958,   0.019030606,  0.12528977,  0.6009518,\n      0.056694727,  0.19225678,   0.61745095,   0.26769277,  0.18739952,\n      0.10380342,   0.08536158,   0.18679029,   0.040631995, 0.23538794,\n      0.081166975,  0.3206779,    0.0018739193, 1.5819491,   0.07052032,\n      0.2504746,    0.7514167,    0.06575893,   0.08000714,  0.0012445971,\n      0.23989597,   0.12001178,   0.51009554,   0.14469045,  0.12445986,\n      0.08644873,   0.5645543,    2.539498,     0.54383165,  0.22437337,\n      0.0018195114, 0.11787724,   0.34932667,   0.49611032,  0.24439196,\n      0.100613214,  0.2844197,    0.38720158,   0.22204469,  0.078220785,\n      0.76444066,   1.7794204,    0.17640579,   0.04227443,  0.28023362,\n      0.06434563,   1.320367,     0.9287479,    0.14726646,  0.27983913,\n      0.022449814,  0.09246922,   0.22375125,   0.10417365,  0.034148056,\n      0.12830476,   0.6065902,    0.16593556,   0.25840235,  0.2596266,\n      0.6388732,    1.6666834,    0.030998405,  0.14869562,  0.30502653,\n      1.183558};\n\n  const uint32_t sparse_vec_count_4 = 104;\n  uint32_t sparse_vec_index_4[] = {\n      1012,  1996,  1997,  2001,  2033,  2034,  2080,  2120,  2142,  2149,\n      2220,  2231,  2259,  2284,  2318,  2338,  2381,  2405,  2424,  2436,\n      2458,  2472,  2533,  2544,  2557,  2580,  2609,  2622,  2627,  2688,\n      2800,  2820,  2837,  2862,  2932,  2949,  3029,  3036,  3181,  3390,\n      3439,  3690,  3780,  3784,  3818,  3872,  3931,  3934,  4034,  4037,\n      4075,  4219,  4348,  4517,  4573,  4617,  4773,  4809,  4822,  4879,\n      5234,  5272,  5851,  5968,  6119,  6378,  6396,  6613,  6702,  6728,\n      6787,  7128,  7156,  7240,  7479,  7551,  7596,  7692,  7809,  8027,\n      8249,  8264,  8299,  8573,  8826,  9123,  9152,  9274,  9445,  9593,\n      9915,  11377, 11744, 12935, 13308, 14487, 14947, 15720, 17060, 17669,\n      18079, 18629, 19841, 21053};\n  FixedVector<Float16, sparse_vec_count_4> sparse_vec_value_4{\n      0.2030336,   0.1411735,   0.12635018,  0.45823106,  0.22794029,\n      1.4105916,   0.2769118,   0.75515395,  0.07748295,  0.19260094,\n      0.12458416,  0.065163694, 0.9765741,   0.07470863,  0.80718166,\n      0.12307288,  0.9393725,   0.048733678, 0.17115222,  1.1922649,\n      0.03547645,  0.33111426,  0.03772038,  0.46104532,  0.3141086,\n      0.25707254,  1.1549219,   1.8509476,   0.98180383,  0.7270674,\n      0.91343564,  0.3373339,   0.081498206, 0.01140901,  0.43917242,\n      0.072401166, 0.11307132,  0.8945273,   0.10071963,  0.1945517,\n      0.7594797,   0.096463405, 0.07759007,  0.11009286,  0.012562437,\n      1.1797432,   0.02481144,  1.2393609,   0.50596905,  1.48781,\n      0.53125334,  0.9950063,   1.4128636,   1.5830894,   0.93246186,\n      0.60709685,  0.40433922,  0.14255294,  0.7125986,   0.021445543,\n      0.4104336,   0.14560317,  0.3189296,   0.51019174,  0.041676614,\n      0.22844397,  0.18406813,  0.1604107,   1.2178165,   0.46861333,\n      0.04899898,  2.4448788,   0.6505235,   0.051029652, 0.7550255,\n      0.00625443,  0.5090246,   0.7109037,   0.1125403,   0.05059699,\n      0.03856528,  0.4538238,   0.72464395,  0.1360473,   0.5109412,\n      2.0780752,   0.049649376, 0.31396037,  0.114775784, 0.9717559,\n      0.05478335,  0.12228666,  1.3433831,   1.6574994,   0.053257514,\n      0.51201975,  0.029570522, 0.35752434,  0.39366165,  0.25994724,\n      1.1072603,   2.0454218,   1.1423918,   0.59795356};\n\n  const uint32_t sparse_vec_count_5 = 147;\n  uint32_t sparse_vec_index_5[] = {\n      1012,  1996,  2001,  2018,  2020,  2034,  2047,  2081,  2154,  2162,\n      2170,  2171,  2207,  2210,  2220,  2233,  2251,  2253,  2257,  2259,\n      2287,  2315,  2318,  2328,  2381,  2390,  2458,  2466,  2510,  2557,\n      2580,  2609,  2622,  2645,  2688,  2707,  2724,  2762,  2838,  2900,\n      2911,  2915,  3047,  3058,  3260,  3282,  3290,  3295,  3297,  3386,\n      3390,  3578,  3603,  3607,  3690,  3746,  3826,  3861,  3908,  3910,\n      3918,  3934,  3987,  4006,  4045,  4075,  4088,  4110,  4255,  4302,\n      4517,  4620,  4761,  4871,  4916,  5195,  5221,  5234,  5246,  5532,\n      5700,  5798,  5832,  5855,  5951,  5968,  6033,  6215,  6219,  6302,\n      6394,  6396,  6529,  6950,  7008,  7084,  7128,  7155,  7156,  7240,\n      7421,  7467,  7551,  7596,  7738,  7760,  8088,  8367,  8372,  8479,\n      8573,  8647,  8773,  8826,  9188,  9274,  9290,  9433,  9593,  9767,\n      9913,  9919,  9982,  10461, 10815, 11028, 11721, 12416, 12496, 12779,\n      13221, 13702, 13787, 14487, 15699, 16164, 18801, 20168, 21650, 24291,\n      24321, 25209, 25526, 25755, 28110, 28682, 28858};\n  FixedVector<Float16, sparse_vec_count_5> sparse_vec_value_5{\n      0.22246745,  0.1639393,    0.6902539,    0.087209724, 0.3150326,\n      1.3589038,   0.39210027,   0.06905281,   0.2940129,   0.48745865,\n      0.5185849,   0.06468885,   0.33793828,   0.01934533,  0.9160348,\n      0.12213709,  0.64625627,   0.05484681,   0.18600157,  0.7439921,\n      1.4779477,   0.50866294,   0.9324953,    0.11494038,  0.14815839,\n      0.4024814,   0.0025193223, 0.0039419075, 0.04004241,  0.1137441,\n      0.100572474, 0.09889997,   1.6465691,    0.45031455,  0.4567774,\n      0.7614913,   0.5324026,    0.09957147,   0.21556115,  0.36752453,\n      0.13450043,  0.06911261,   0.04267344,   1.2791942,   0.054822505,\n      0.06269096,  1.3170663,    0.8852742,    0.37885663,  0.92810893,\n      0.12803665,  0.10517517,   0.24920024,   0.16889784,  1.3619378,\n      0.59796244,  0.81389725,   0.06489252,   0.020069994, 0.06319,\n      0.71297073,  1.2515233,    0.019061586,  0.04731544,  0.3536146,\n      0.50835687,  0.56439734,   0.09884678,   1.1007178,   0.1480219,\n      1.6361246,   0.3891063,    0.03873499,   0.050479025, 0.5629584,\n      1.0016122,   0.16247666,   0.06476003,   0.43833405,  1.3702114,\n      0.11968183,  0.29155007,   0.12643526,   0.518913,    0.41796717,\n      1.740134,    0.015489911,  0.2183447,    1.5380116,   1.058654,\n      0.06226158,  0.270943,     0.91666347,   0.06422295,  0.33474496,\n      0.002399514, 2.0762439,    0.8989307,    0.7876583,   0.03783609,\n      0.22333156,  0.13323776,   0.27660817,   0.56637865,  0.21507333,\n      0.6770579,   0.7013793,    0.7085848,    0.15651116,  0.05219105,\n      0.03743524,  0.30775747,   0.073243596,  0.8181374,   0.28133482,\n      0.23539418,  0.07533616,   0.2044144,    1.574523,    1.1304078,\n      0.24084339,  1.3286508,    0.775562,     0.10096621,  0.197577,\n      0.2307252,   1.719028,     0.07254901,   0.13916898,  0.17486195,\n      0.8424586,   0.27879223,   0.8650824,    0.35050592,  0.24243252,\n      0.31039444,  0.17227773,   0.90619636,   0.63083464,  2.2181685,\n      0.20995331,  0.14425081,   0.37305146,   0.5955121,   0.87200415,\n      1.028527,    1.0835907};\n\n  const uint32_t sparse_vec_count_6 = 141;\n  uint32_t sparse_vec_index_6[] = {\n      1012,  1059,  1996,  1997,  1998,  2001,  2012,  2018,  2020,  2021,\n      2025,  2055,  2056,  2076,  2077,  2127,  2130,  2134,  2138,  2143,\n      2162,  2197,  2203,  2220,  2259,  2318,  2328,  2338,  2345,  2381,\n      2390,  2458,  2462,  2466,  2501,  2517,  2580,  2622,  2631,  2645,\n      2688,  2707,  2724,  2748,  2764,  2808,  2900,  2911,  2933,  2949,\n      3047,  3058,  3074,  3075,  3092,  3101,  3188,  3271,  3283,  3439,\n      3478,  3535,  3595,  3607,  3690,  3720,  3740,  3793,  3818,  3826,\n      3906,  3908,  3934,  3981,  3986,  4028,  4138,  4469,  4496,  4503,\n      4515,  4517,  4566,  4704,  4706,  4761,  4839,  5036,  5175,  5233,\n      5234,  5246,  5254,  5263,  5491,  5817,  5823,  5839,  5875,  5968,\n      6215,  6254,  6268,  6394,  6407,  6801,  6848,  7128,  7177,  7321,\n      7421,  7487,  7551,  7596,  7681,  7940,  8145,  8264,  8321,  8551,\n      8573,  8647,  8773,  8826,  8832,  9472,  9593,  9599,  9767,  10530,\n      12149, 13787, 14487, 15237, 15523, 17060, 20168, 23633, 24363, 25526,\n      25755};\n  FixedVector<Float16, sparse_vec_count_6> sparse_vec_value_6{\n      0.48692977,  0.23770119,  0.24359323,   0.030566106,  0.121271,\n      0.5703241,   0.12787338,  0.037069157,  0.075816214,  0.05305081,\n      0.45591223,  0.5893366,   0.01829792,   0.42078727,   0.036012013,\n      0.0750098,   0.20031127,  0.033489488,  0.10935432,   0.054307006,\n      1.0000131,   0.20630358,  1.1161063,    0.5766484,    0.86030954,\n      0.65358734,  0.062234607, 0.8518808,    0.23441537,   0.14816457,\n      0.19284223,  0.94708407,  1.0017378,    0.51629704,   0.082293354,\n      0.09170858,  0.2138309,   1.533815,     0.0030641577, 0.029126635,\n      0.3632337,   0.1761491,   0.34924436,   0.67822266,   0.5976219,\n      0.8595736,   0.17943758,  0.038340267,  0.0052374,    0.29047492,\n      0.070157826, 0.6779024,   0.75593567,   0.054473646,  0.4906121,\n      0.11288958,  0.15934071,  0.3192689,    0.1435216,    0.30725288,\n      0.37506026,  0.7213243,   0.18401349,   0.01871983,   0.19455475,\n      0.02040177,  0.28111485,  0.043639474,  0.19826981,   0.27416018,\n      1.429636,    0.05111553,  1.0482118,    0.98164123,   0.17426124,\n      0.10582682,  1.002954,    1.0261939,    0.83377177,   0.6798103,\n      0.015373114, 0.8136259,   0.95782644,   0.13387722,   0.40847424,\n      0.80647326,  0.28733957,  0.0029352994, 0.30276307,   0.4768307,\n      0.32016084,  0.10302183,  0.3044403,    0.040031943,  0.44271877,\n      0.061298616, 0.08278493,  0.107188344,  0.5086274,    1.3297924,\n      0.050804485, 0.68582493,  0.21776867,   0.027724598,  0.5286007,\n      0.1899133,   0.04971613,  2.2401748,    0.09252626,   0.80688274,\n      0.014750206, 0.07568165,  0.021886598,  0.23429997,   1.1812011,\n      0.6390751,   0.2643012,   0.13720371,   0.10989579,   1.4969206,\n      0.2209742,   0.54690766,  0.15685914,   0.47841135,   0.566988,\n      0.08368683,  1.2788389,   0.09509155,   1.0241207,    0.07167757,\n      0.29240122,  0.5619141,   0.016415644,  0.28731114,   0.035925347,\n      0.34043407,  0.60646313,  0.07248792,   0.08602479,   0.10247773,\n      1.13258};\n\n  const uint32_t sparse_vec_count_7 = 221;\n  uint32_t sparse_vec_index_7[] = {\n      1059,  1996,  2001,  2003,  2008,  2010,  2020,  2029,  2034,  2076,\n      2080,  2081,  2103,  2104,  2137,  2138,  2142,  2149,  2162,  2163,\n      2220,  2231,  2236,  2253,  2256,  2259,  2315,  2318,  2328,  2329,\n      2343,  2344,  2350,  2359,  2381,  2390,  2419,  2458,  2462,  2466,\n      2470,  2472,  2490,  2510,  2537,  2550,  2554,  2557,  2580,  2590,\n      2599,  2608,  2622,  2631,  2640,  2645,  2662,  2710,  2724,  2728,\n      2762,  2764,  2817,  2820,  2832,  2837,  2856,  2866,  2881,  2891,\n      2957,  2974,  2983,  3003,  3010,  3029,  3050,  3058,  3063,  3068,\n      3092,  3101,  3125,  3135,  3257,  3271,  3282,  3330,  3386,  3399,\n      3474,  3578,  3595,  3603,  3607,  3650,  3690,  3758,  3800,  3817,\n      3826,  3878,  3910,  3918,  3934,  3947,  3965,  3987,  3992,  4006,\n      4034,  4045,  4068,  4146,  4172,  4202,  4255,  4302,  4327,  4351,\n      4503,  4517,  4637,  4707,  4944,  5025,  5036,  5195,  5201,  5233,\n      5234,  5253,  5501,  5584,  5623,  5656,  5687,  5814,  5817,  5911,\n      5951,  5954,  5968,  6035,  6108,  6119,  6145,  6157,  6177,  6215,\n      6254,  6262,  6384,  6394,  6613,  6728,  6787,  6801,  6842,  6845,\n      6922,  6960,  7128,  7155,  7156,  7240,  7421,  7551,  7596,  7609,\n      7654,  7676,  7723,  7779,  7935,  8049,  8144,  8151,  8249,  8547,\n      8573,  8647,  8773,  8826,  8864,  8886,  9036,  9274,  9290,  9433,\n      9593,  9667,  9767,  9915,  10267, 10505, 10544, 10753, 10815, 11028,\n      11593, 11837, 12496, 13058, 13308, 13625, 13702, 14487, 15523, 17669,\n      18457, 18800, 18826, 20168, 20843, 21695, 24363, 25526, 25755, 26234,\n      26911};\n  FixedVector<Float16, sparse_vec_count_7> sparse_vec_value_7{\n      0.29634815,  0.3303992,    1.0099697,   0.09545747,  0.046319153,\n      0.001999375, 0.27222815,   0.107896015, 1.0792782,   0.5411261,\n      0.27695096,  0.020715078,  0.021571944, 0.61097443,  0.10560424,\n      0.15401895,  0.46480918,   0.6496758,   1.0116925,   0.0040072273,\n      0.8931394,   0.2361543,    0.74389607,  0.039703716, 0.020886008,\n      1.1108406,   0.09039394,   0.69578373,  0.27737862,  0.3083219,\n      0.5698159,   0.31437457,   0.7131746,   0.14947455,  0.33504876,\n      1.1611847,   0.8632542,    1.058698,    1.0307701,   0.15223494,\n      0.9391413,   0.9473978,    0.3767169,   0.5806728,   0.70086235,\n      0.8544429,   0.07839825,   0.46189323,  0.57343185,  0.17151174,\n      0.45118546,  0.03416668,   2.037371,    0.1311739,   0.22600843,\n      0.061421365, 0.0063685803, 0.9023181,   0.17874505,  1.458104,\n      0.09657643,  0.36346155,   0.11396522,  0.2762966,   0.11472289,\n      0.16151813,  0.5954224,    0.68847394,  0.6934064,   1.0951325,\n      0.008113728, 0.320056,     0.2934685,   0.38948777,  0.64446163,\n      0.11539491,  1.4196212,    0.6417532,   0.10939098,  0.115132414,\n      0.10055387,  0.15150718,   0.3015885,   0.36512154,  0.85847276,\n      0.42005107,  0.06733843,   0.9194887,   0.2446694,   0.3528377,\n      0.30540454,  0.0549386,    0.15950806,  0.12754358,  0.22250807,\n      1.3793756,   0.01503605,   0.33390692,  0.2052875,   0.32573462,\n      0.66194123,  0.03896839,   0.921685,    1.1364039,   1.2451752,\n      0.072772495, 0.10148866,   0.2922106,   0.97420144,  0.25800666,\n      0.13455145,  0.3459612,    0.16713561,  0.21625288,  0.20754638,\n      0.017042752, 1.2139128,    0.38501504,  0.18923776,  0.58807755,\n      0.42623222,  1.8636363,    0.15489826,  0.24531981,  0.330716,\n      0.6148099,   0.12145276,   0.938947,    0.08298498,  0.5002425,\n      0.42643633,  0.3724926,    0.351435,    0.35051146,  0.15093777,\n      0.2753887,   0.11030835,   0.05864477,  0.12825343,  0.4938676,\n      0.4091608,   0.13155867,   1.362572,    0.26034647,  0.005735014,\n      0.25208464,  0.77931124,   0.08418636,  0.2567355,   0.108983725,\n      0.04566572,  0.06202907,   0.3991703,   0.2785334,   0.45871663,\n      1.584949,    0.099409536,  0.114265166, 0.0603091,   0.71120745,\n      0.35286796,  0.03805246,   2.6303916,   0.6235311,   0.6544235,\n      0.254192,    0.5172861,    0.46474016,  0.51770395,  0.3868696,\n      0.030558605, 0.79667675,   0.1053426,   0.08400551,  0.26797673,\n      0.52138245,  0.13453461,   0.070371106, 0.003556521, 0.34309983,\n      0.2104394,   0.02274147,   0.19070747,  0.9488226,   0.09138845,\n      2.092856,    0.10931594,   0.18929166,  0.113100395, 0.08495193,\n      1.124685,    0.08020554,   1.0792019,   0.27422333,  0.31508496,\n      0.20671548,  0.05064338,   0.46511328,  0.38314936,  0.52556884,\n      0.36894837,  1.4199936,    0.05843645,  0.055732273, 0.26817194,\n      0.2876586,   1.0425944,    0.062882155, 0.09840146,  0.1544766,\n      0.98742366,  0.20589906,   2.1226256,   0.47266316,  0.33193296,\n      2.0077822,   0.23509863,   0.53764015,  1.2505449,   1.719803,\n      0.39262286};\n\n  std::vector<std::string> sparse_query_buffers;\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),\n      sparse_query_buffer_0);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_0));\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),\n      sparse_query_buffer_1);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_1));\n\n  std::string sparse_query_buffer_2;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2.data(),\n      sparse_query_buffer_2);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_2));\n\n  std::string sparse_query_buffer_3;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3.data(),\n      sparse_query_buffer_3);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_3));\n\n  std::string sparse_query_buffer_4;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_4, sparse_vec_index_4, sparse_vec_value_4.data(),\n      sparse_query_buffer_4);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_4));\n\n  std::string sparse_query_buffer_5;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_5, sparse_vec_index_5, sparse_vec_value_5.data(),\n      sparse_query_buffer_5);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_5));\n\n  std::string sparse_query_buffer_6;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_6, sparse_vec_index_6, sparse_vec_value_6.data(),\n      sparse_query_buffer_6);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_6));\n\n  std::string sparse_query_buffer_7;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_7, sparse_vec_index_7, sparse_vec_value_7.data(),\n      sparse_query_buffer_7);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_7));\n\n  sparse_vec_counts.emplace_back(sparse_vec_count_0);\n  sparse_vec_counts.emplace_back(sparse_vec_count_1);\n  sparse_vec_counts.emplace_back(sparse_vec_count_2);\n  sparse_vec_counts.emplace_back(sparse_vec_count_3);\n  sparse_vec_counts.emplace_back(sparse_vec_count_4);\n  sparse_vec_counts.emplace_back(sparse_vec_count_5);\n  sparse_vec_counts.emplace_back(sparse_vec_count_6);\n  sparse_vec_counts.emplace_back(sparse_vec_count_7);\n\n  sparse_vec_indices.emplace_back(sparse_vec_index_0);\n  sparse_vec_indices.emplace_back(sparse_vec_index_1);\n  sparse_vec_indices.emplace_back(sparse_vec_index_2);\n  sparse_vec_indices.emplace_back(sparse_vec_index_3);\n  sparse_vec_indices.emplace_back(sparse_vec_index_4);\n  sparse_vec_indices.emplace_back(sparse_vec_index_5);\n  sparse_vec_indices.emplace_back(sparse_vec_index_6);\n  sparse_vec_indices.emplace_back(sparse_vec_index_7);\n\n  sparse_vec_values.emplace_back(sparse_vec_value_0.data());\n  sparse_vec_values.emplace_back(sparse_vec_value_1.data());\n  sparse_vec_values.emplace_back(sparse_vec_value_2.data());\n  sparse_vec_values.emplace_back(sparse_vec_value_3.data());\n  sparse_vec_values.emplace_back(sparse_vec_value_4.data());\n  sparse_vec_values.emplace_back(sparse_vec_value_5.data());\n  sparse_vec_values.emplace_back(sparse_vec_value_6.data());\n  sparse_vec_values.emplace_back(sparse_vec_value_7.data());\n\n  for (size_t i = 0; i < sparse_query_buffers.size(); ++i) {\n    for (size_t j = 0; j < sparse_query_buffers.size(); ++j) {\n      float result0{0.0f};\n      result0 = SparseDistanceCommon(\n          sparse_vec_counts[i], sparse_vec_indices[i], sparse_vec_values[i],\n          sparse_vec_counts[j], sparse_vec_indices[j], sparse_vec_values[j]);\n\n      float result1{0.0f};\n      MinusInnerProductSparseMatrix<Float16>::Compute(\n          sparse_query_buffers[i].data(), sparse_query_buffers[j].data(),\n          &result1);\n      result1 = -result1;\n\n      float epsilon = 0.001 * std::max(result0, result1);\n      EXPECT_GE(epsilon, std::abs(result0 - result1));\n    }\n  }\n}\n\nTEST(DistanceMatrix, InnerProductSparse) {\n  TestInnerProductSparse();\n}\n\nTEST(DistanceMatrix, InnerProductSparseMore) {\n  TestInnerProductSparseMore();\n}\n\nTEST(DistanceMatrix, DISABLED_InnerProductSparse_Benchmark) {\n  const uint32_t sparse_vec_count_0 = 52;\n  uint32_t sparse_vec_index_0[] = {\n      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,\n      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,\n      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,\n      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,\n      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};\n  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{\n      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,\n      -0.767956138324, -0.410683610330, 0.763314047145,  0.347851184825,\n      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,\n      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,\n      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,\n      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,\n      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,\n      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,\n      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,\n      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,\n      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,\n      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,\n      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};\n\n  const uint32_t sparse_vec_count_1 = 43;\n  uint32_t sparse_vec_index_1[] = {\n      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,\n      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,\n      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,\n      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};\n  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{\n      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,\n      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,\n      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,\n      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,\n      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,\n      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,\n      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,\n      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,\n      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,\n      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,\n      0.247628793044,  -0.984985692607,  -0.427929860028};\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),\n      sparse_query_buffer_0);\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),\n      sparse_query_buffer_1);\n\n  size_t loop_cnt = 100000000LLU;\n  float result[100];\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    MinusInnerProductSparseMatrix<Float16>::Compute(\n        sparse_query_buffer_0.data(), sparse_query_buffer_1.data(),\n        result + (i % 100));\n  }\n\n  EXPECT_EQ(result[0], result[1]);\n}\n\nTEST(DistanceMatrix, TestInnerProductSparseDimWithZero) {\n  // test 1\n  const uint32_t sparse_vec_count_0 = 10;\n  uint32_t sparse_vec_index_0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{\n      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};\n\n  const uint32_t sparse_vec_count_1 = 10;\n  uint32_t sparse_vec_index_1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{\n      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),\n      sparse_query_buffer_0);\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),\n      sparse_query_buffer_1);\n\n  float result0{0.0f};\n  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,\n                                 sparse_vec_value_0.data(), sparse_vec_count_1,\n                                 sparse_vec_index_1, sparse_vec_value_1.data());\n\n  float result1{0.0f};\n  MinusInnerProductSparseMatrix<Float16>::Compute(\n      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);\n  result1 = -result1;\n\n  EXPECT_GE(0.00001, std::abs(result0 - result1));\n}\n"
  },
  {
    "path": "tests/ailego/math/inner_product_matrix_fp32_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(float *dst, const float *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\ntemplate <size_t N>\nstatic float InnerProductDistance(const FixedVector<float, N> &lhs,\n                                  const FixedVector<float, N> &rhs) {\n  return Distance::InnerProduct(lhs.data(), rhs.data(), lhs.size());\n}\n\ntemplate <size_t N>\nstatic float MinusInnerProductDistance(const FixedVector<float, N> &lhs,\n                                       const FixedVector<float, N> &rhs) {\n  return Distance::MinusInnerProduct(lhs.data(), rhs.data(), lhs.size());\n}\n\nTEST(DistanceMatrix, InnerProduct_General) {\n  FixedVector<float, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,\n                             555.0f, 0.8f,  5.5f,   3.75f, 9.0f,\n                             6.6f,   0.1f,  8.8f,   0.2f,  5.6f},\n      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,\n          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};\n  EXPECT_FLOAT_EQ(308441.62f, InnerProductDistance(x15, y15));\n\n  FixedVector<float, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,\n                             9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,\n                             0.1f,  8.8f,  0.2f,   5.6f},\n      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,\n          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};\n  EXPECT_FLOAT_EQ(308526.19f, InnerProductDistance(x16, y16));\n\n  FixedVector<float, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,\n                             1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,\n                             3.4f, 4.5f,  5.6f, 1.6f,  1.3f},\n      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,\n          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};\n  EXPECT_FLOAT_EQ(307.1218f, InnerProductDistance(x17, y17));\n\n  FixedVector<float, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,\n                             4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,\n                             4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},\n      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};\n  EXPECT_FLOAT_EQ(378.72156f, InnerProductDistance(x18, y18));\n\n  FixedVector<float, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,\n                             2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,\n                             5.6f,  1.6f,  2.3f,  1.11f,  2.3f},\n      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_FLOAT_EQ(380.37f, InnerProductDistance(x19, y19));\n\n  FixedVector<float, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,\n                             1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,\n                             4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},\n      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,\n          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_FLOAT_EQ(222.23868f, InnerProductDistance(x20, y20));\n\n  FixedVector<float, 21> x21{0.0f}, y21{0.0f};\n  EXPECT_FLOAT_EQ(0.0f, InnerProductDistance(x21, y21));\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_General) {\n  FixedVector<float, 1> x1{0.7f}, y1{0.5f};\n  EXPECT_FLOAT_EQ(-0.35f, MinusInnerProductDistance(x1, y1));\n\n  FixedVector<float, 2> x2{2.0f, 3.76f}, y2{2.0f, 0.901f};\n  EXPECT_FLOAT_EQ(-7.38776f, MinusInnerProductDistance(x2, y2));\n\n  FixedVector<float, 3> x3{2.0f, 3.0f, 0.7f}, y3{2.0f, 3.0f, 2.0f};\n  EXPECT_FLOAT_EQ(-14.4f, MinusInnerProductDistance(x3, y3));\n\n  FixedVector<float, 4> x4{7.8f, -8.9f, 9.0f, 5.6f},\n      y4{7.8f, 8.9f, -9.0f, -0.1f};\n  EXPECT_FLOAT_EQ(99.93f, MinusInnerProductDistance(x4, y4));\n\n  FixedVector<float, 5> x5{7.8f, 8.9f, 9.0f, 0.1f, 5.6f},\n      y5{7.8f, 8.9f, 9.0f, 0.1f, 0.2f};\n  EXPECT_FLOAT_EQ(-222.18f, MinusInnerProductDistance(x5, y5));\n\n  FixedVector<float, 6> x6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},\n      y6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};\n  EXPECT_FLOAT_EQ(-222.6855f, MinusInnerProductDistance(x6, y6));\n\n  FixedVector<float, 7> x7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},\n      y7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};\n  EXPECT_FLOAT_EQ(-249.9339f, MinusInnerProductDistance(x7, y7));\n\n  FixedVector<float, 8> x8{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f, 5.6f},\n      y8{5.22f, 0.711f, -7.8f, -8.9f, -9.0f, 0.1f, 0.2f, 0.5f};\n  EXPECT_FLOAT_EQ(190.44608f, MinusInnerProductDistance(x8, y8));\n\n  FixedVector<float, 9> x9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f,\n                           6.6f,  0.1f,   0.2f, 5.6f},\n      y9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 6.6f, 0.1f, 0.2f, 0.5f};\n  EXPECT_FLOAT_EQ(-295.214f, MinusInnerProductDistance(x9, y9));\n\n  FixedVector<float, 10> x10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f,\n                             9.0f,  6.6f,   0.1f, 0.2f, 5.6f},\n      y10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f, 9.0f, 6.6f, 0.1f, 0.2f, 0.522f};\n  EXPECT_FLOAT_EQ(-325.587f, MinusInnerProductDistance(x10, y10));\n\n  FixedVector<float, 11> x11{2.3f,    -1.11f, 3.04f, 8.23f, 1.0f, 7.7f,\n                             -11.11f, 2.3f,   3.4f,  4.5f,  5.6f},\n      y11{2.3f,    1.11f, 3.04f, 8.23f, -1.0f, 7.7f,\n          -11.11f, 2.3f,  3.4f,  4.5f,  0.511f};\n  EXPECT_FLOAT_EQ(-302.716f, MinusInnerProductDistance(x11, y11));\n\n  FixedVector<float, 12> x12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,\n                             7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},\n      y12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,\n          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  0.5f};\n  EXPECT_FLOAT_EQ(-309.67868f, MinusInnerProductDistance(x12, y12));\n\n  FixedVector<float, 13> x13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,\n                             7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},\n      y13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,\n          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  3.5f};\n  EXPECT_FLOAT_EQ(-346.19229f, MinusInnerProductDistance(x13, y13));\n\n  FixedVector<float, 14> x14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,\n                             3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 5.6f},\n      y14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,\n          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 0.5f};\n  EXPECT_FLOAT_EQ(-418.029f, MinusInnerProductDistance(x14, y14));\n\n  FixedVector<float, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,\n                             555.0f, 0.8f,  5.5f,   3.75f, 9.0f,\n                             6.6f,   0.1f,  8.8f,   0.2f,  5.6f},\n      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,\n          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};\n  EXPECT_FLOAT_EQ(-308441.62f, MinusInnerProductDistance(x15, y15));\n\n  FixedVector<float, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,\n                             9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,\n                             0.1f,  8.8f,  0.2f,   5.6f},\n      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,\n          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};\n  EXPECT_FLOAT_EQ(-308526.19f, MinusInnerProductDistance(x16, y16));\n\n  FixedVector<float, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,\n                             1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,\n                             3.4f, 4.5f,  5.6f, 1.6f,  1.3f},\n      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,\n          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};\n  EXPECT_FLOAT_EQ(-307.1218f, MinusInnerProductDistance(x17, y17));\n\n  FixedVector<float, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,\n                             4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,\n                             4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},\n      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};\n  EXPECT_FLOAT_EQ(-378.72156f, MinusInnerProductDistance(x18, y18));\n\n  FixedVector<float, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,\n                             2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,\n                             5.6f,  1.6f,  2.3f,  1.11f,  2.3f},\n      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,\n          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_FLOAT_EQ(-380.37f, MinusInnerProductDistance(x19, y19));\n\n  FixedVector<float, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,\n                             1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,\n                             4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},\n      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,\n          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};\n  EXPECT_FLOAT_EQ(-222.23868f, MinusInnerProductDistance(x20, y20));\n\n  FixedVector<float, 21> x21{0.0f}, y21{0.0f};\n  EXPECT_FLOAT_EQ(0.0f, MinusInnerProductDistance(x21, y21));\n}\n\ntemplate <size_t M, size_t N>\nvoid TestMinusInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 0.5);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const float *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MinusInnerProductMatrix<float, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  MinusInnerProductMatrix<float, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 0.5);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const float *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      InnerProductMatrix<float, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  InnerProductMatrix<float, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));\n  }\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_1x1) {\n  TestMinusInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x1) {\n  TestMinusInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x2) {\n  TestMinusInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_3x3) {\n  TestMinusInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x1) {\n  TestMinusInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x2) {\n  TestMinusInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x4) {\n  TestMinusInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x1) {\n  TestMinusInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x2) {\n  TestMinusInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x4) {\n  TestMinusInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x8) {\n  TestMinusInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x1) {\n  TestMinusInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x2) {\n  TestMinusInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x4) {\n  TestMinusInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x8) {\n  TestMinusInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x16) {\n  TestMinusInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x1) {\n  TestMinusInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x2) {\n  TestMinusInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x4) {\n  TestMinusInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x8) {\n  TestMinusInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x16) {\n  TestMinusInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x32) {\n  TestMinusInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x1) {\n  TestMinusInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x2) {\n  TestMinusInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x4) {\n  TestMinusInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x8) {\n  TestMinusInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x16) {\n  TestMinusInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x32) {\n  TestMinusInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x64) {\n  TestMinusInnerProductMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x1) {\n  TestMinusInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x2) {\n  TestMinusInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x4) {\n  TestMinusInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x8) {\n  TestMinusInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x16) {\n  TestMinusInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x32) {\n  TestMinusInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x64) {\n  TestMinusInnerProductMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x128) {\n  TestMinusInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, InnerProduct_1x1) {\n  TestInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x1) {\n  TestInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x2) {\n  TestInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_3x3) {\n  TestInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x1) {\n  TestInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x2) {\n  TestInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x4) {\n  TestInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x1) {\n  TestInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x2) {\n  TestInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x4) {\n  TestInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x8) {\n  TestInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x1) {\n  TestInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x2) {\n  TestInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x4) {\n  TestInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x8) {\n  TestInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x16) {\n  TestInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x1) {\n  TestInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x2) {\n  TestInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x4) {\n  TestInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x8) {\n  TestInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x16) {\n  TestInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x32) {\n  TestInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x1) {\n  TestInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x2) {\n  TestInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x4) {\n  TestInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x8) {\n  TestInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x16) {\n  TestInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x32) {\n  TestInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x64) {\n  TestInnerProductMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x1) {\n  TestInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x2) {\n  TestInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x4) {\n  TestInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x8) {\n  TestInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x16) {\n  TestInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x32) {\n  TestInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x64) {\n  TestInnerProductMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x128) {\n  TestInnerProductMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid InnerProductBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      InnerProductMatrix<float, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    InnerProductMatrix<float, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        InnerProductMatrix<float, 1, 1>::Compute(&matrix_batch[k * dimension],\n                                                 current_query, dimension,\n                                                 &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MinusInnerProductBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MinusInnerProductMatrix<float, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MinusInnerProductMatrix<float, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MinusInnerProductMatrix<float, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {\n  InnerProductBenchmark<2, 1, 512, 64>();\n  InnerProductBenchmark<2, 2, 512, 64>();\n  InnerProductBenchmark<4, 1, 512, 64>();\n  InnerProductBenchmark<4, 2, 512, 64>();\n  InnerProductBenchmark<4, 4, 512, 64>();\n  InnerProductBenchmark<8, 1, 512, 64>();\n  InnerProductBenchmark<8, 2, 512, 64>();\n  InnerProductBenchmark<8, 4, 512, 64>();\n  InnerProductBenchmark<8, 8, 512, 64>();\n  InnerProductBenchmark<16, 1, 512, 64>();\n  InnerProductBenchmark<16, 2, 512, 64>();\n  InnerProductBenchmark<16, 4, 512, 64>();\n  InnerProductBenchmark<16, 8, 512, 64>();\n  InnerProductBenchmark<32, 1, 512, 64>();\n  InnerProductBenchmark<32, 2, 512, 64>();\n  InnerProductBenchmark<32, 4, 512, 64>();\n  InnerProductBenchmark<32, 8, 512, 64>();\n  InnerProductBenchmark<32, 16, 512, 64>();\n  InnerProductBenchmark<32, 32, 512, 64>();\n  InnerProductBenchmark<64, 1, 512, 64>();\n  InnerProductBenchmark<64, 2, 512, 64>();\n  InnerProductBenchmark<64, 4, 512, 64>();\n  InnerProductBenchmark<64, 8, 512, 64>();\n  InnerProductBenchmark<128, 1, 512, 64>();\n}\n\nTEST(DistanceMatrix, DISABLED_MinusInnerProduct_Benchmark) {\n  MinusInnerProductBenchmark<2, 1, 512, 64>();\n  MinusInnerProductBenchmark<2, 2, 512, 64>();\n  MinusInnerProductBenchmark<4, 1, 512, 64>();\n  MinusInnerProductBenchmark<4, 2, 512, 64>();\n  MinusInnerProductBenchmark<4, 4, 512, 64>();\n  MinusInnerProductBenchmark<8, 1, 512, 64>();\n  MinusInnerProductBenchmark<8, 2, 512, 64>();\n  MinusInnerProductBenchmark<8, 4, 512, 64>();\n  MinusInnerProductBenchmark<8, 8, 512, 64>();\n  MinusInnerProductBenchmark<16, 1, 512, 64>();\n  MinusInnerProductBenchmark<16, 2, 512, 64>();\n  MinusInnerProductBenchmark<16, 4, 512, 64>();\n  MinusInnerProductBenchmark<16, 8, 512, 64>();\n  MinusInnerProductBenchmark<16, 16, 512, 64>();\n  MinusInnerProductBenchmark<32, 1, 512, 64>();\n  MinusInnerProductBenchmark<32, 2, 512, 64>();\n  MinusInnerProductBenchmark<32, 4, 512, 64>();\n  MinusInnerProductBenchmark<32, 8, 512, 64>();\n  MinusInnerProductBenchmark<32, 16, 512, 64>();\n  MinusInnerProductBenchmark<32, 32, 512, 64>();\n  MinusInnerProductBenchmark<64, 1, 512, 64>();\n  MinusInnerProductBenchmark<64, 2, 512, 64>();\n  MinusInnerProductBenchmark<64, 4, 512, 64>();\n  MinusInnerProductBenchmark<64, 8, 512, 64>();\n  MinusInnerProductBenchmark<128, 1, 512, 64>();\n}\n\nTEST(DistanceMatrix, DISABLED_MinusInnerProduct_BenchmarkSimple) {\n  std::mt19937 gen((std::random_device())());\n\n  size_t dimension = 768;\n  size_t loop_cnt = 100000000LLU;\n\n  std::vector<float> data(dimension);\n  std::vector<float> query(dimension);\n\n  float result;\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < dimension; ++i) {\n    data[i] = dist(gen);\n  }\n  for (size_t i = 0; i < dimension; ++i) {\n    query[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    MinusInnerProductMatrix<float, 1, 1>::Compute(&data[0], &query[0],\n                                                  dimension, &result);\n  }\n}\n\nstatic inline float SparseDistanceCommon(uint32_t count1, uint32_t *index1,\n                                         float *value1, uint32_t count2,\n                                         uint32_t *index2, float *value2) {\n  float result{0.0f};\n\n  size_t m = 0;\n  size_t q = 0;\n  while (m < count1 && q < count2) {\n    if (index1[m] == index2[q]) {\n      result += value1[m] * value2[q];\n\n      ++m;\n      ++q;\n    } else if (index1[m] < index2[q]) {\n      ++m;\n    } else {\n      ++q;\n    }\n  }\n\n  return result;\n}\n\nvoid TestInnerProductSparse(void) {\n  // test 1\n  const uint32_t sparse_vec_count_0 = 52;\n\n  uint32_t sparse_vec_index_0[] = {\n      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,\n      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,\n      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,\n      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,\n      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};\n  float sparse_vec_value_0[] = {\n      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,\n      -0.767956138324, -0.410683610329, 0.763314047145,  0.347851184825,\n      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,\n      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,\n      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,\n      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,\n      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,\n      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,\n      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,\n      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,\n      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,\n      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,\n      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};\n\n  const uint32_t sparse_vec_count_1 = 43;\n  uint32_t sparse_vec_index_1[] = {\n      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,\n      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,\n      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,\n      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};\n  float sparse_vec_value_1[] = {\n      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,\n      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,\n      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,\n      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,\n      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,\n      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,\n      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,\n      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,\n      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,\n      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,\n      0.247628793044,  -0.984985692607,  -0.427929860028};\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,\n      sparse_query_buffer_0);\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,\n      sparse_query_buffer_1);\n\n  float result0{0.0f};\n  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,\n                                 sparse_vec_value_0, sparse_vec_count_1,\n                                 sparse_vec_index_1, sparse_vec_value_1);\n\n  float result1{0.0f};\n  MinusInnerProductSparseMatrix<float>::Compute(\n      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);\n  result1 = -result1;\n\n  EXPECT_GE(0.00001, std::abs(result0 - result1));\n\n  // test 2\n  constexpr uint32_t sparse_vec_count_2 = 49;\n\n  uint32_t sparse_vec_index_2[] = {\n      13200,  20900,  36300,  41800,  50600,  74800,  78100,  81400,  93500,\n      99000,  107800, 121000, 127600, 137500, 140800, 143000, 145200, 166100,\n      174900, 193600, 194700, 195800, 233200, 261800, 262900, 273900, 277200,\n      299200, 302500, 343200, 381700, 387200, 418000, 421300, 436700, 449900,\n      480700, 510400, 586300, 596200, 603900, 607200, 612700, 625900, 632500,\n      633600, 639100, 642400, 650100};\n  float sparse_vec_value_2[] = {\n      0.167493264953,  0.178347102375,   0.61850792017,    0.707662206696,\n      -0.604456492928, 0.898905062153,   -0.971984671516,  -0.337950525868,\n      -0.942538751319, -0.115612454156,  0.78433412971,    0.601522288928,\n      -0.640321042923, -0.235673191423,  0.00632807223978, 0.629970437467,\n      0.966519256786,  -0.279362437157,  0.396153064627,   -0.614592812875,\n      -0.642157513141, 0.686723258138,   0.10227967727,    -0.5921196708,\n      0.499411577177,  -0.0188556369919, 0.512245212443,   0.424666758023,\n      0.299827154891,  -0.615468257454,  -0.0499098903374, -0.54873640329,\n      0.899673049133,  -0.873237346565,  0.463117084808,   -0.810200151551,\n      0.676836615658,  0.596247430713,   0.946225552468,   0.968425796351,\n      -0.821041580744, -0.697734977387,  0.295618053879,   -0.476597945375,\n      -0.246035224835, 0.927603570489,   -0.640242995569,  0.610224433234,\n      -0.657550506633};\n\n  constexpr uint32_t sparse_vec_count_3 = 58;\n  uint32_t sparse_vec_index_3[] = {\n      13200,  19800,  37400,  56100,  68200,  78100,  81400,  99000,  103400,\n      107800, 108900, 110000, 111100, 125400, 127600, 137500, 141900, 151800,\n      154000, 155100, 158400, 163900, 165000, 173800, 198000, 201300, 215600,\n      247500, 249700, 264000, 269500, 287100, 291500, 311300, 312400, 336600,\n      353100, 354200, 361900, 367400, 390500, 398200, 407000, 414700, 424600,\n      510400, 533500, 535700, 551100, 556600, 568700, 576400, 577500, 590700,\n      592900, 618200, 631400, 636900};\n  float sparse_vec_value_3[] = {\n      0.175769744964,  -0.198506965419,  0.0842021015107, 0.544957076263,\n      0.0856447356878, 0.838582935178,   0.796525374862,  -0.931940801441,\n      0.555150441425,  0.957490431546,   -0.422126167235, -0.40903200281,\n      0.242643233475,  0.698565387541,   -0.325754491857, 0.540403772154,\n      -0.449888493042, 0.349262051644,   -0.612943655195, 0.874112675658,\n      0.943939922271,  -0.994946966212,  -0.978705162429, 0.321190597007,\n      0.17722019302,   0.6041089417,     -0.353184098327, -0.938569390092,\n      -0.92268220981,  -0.268600478592,  -0.598069229627, 0.0720175726713,\n      0.426800021137,  0.369250757861,   -0.823348360327, -0.664061107875,\n      -0.418342805261, -0.430818720049,  0.0941988181812, 0.0765632945538,\n      -0.148533061047, 0.404665036566,   -0.170747760502, -0.206564280292,\n      0.311035754032,  0.498520039471,   -0.16255148444,  -0.137950933749,\n      -0.234990864629, 0.602901363949,   0.0297103943437, -0.730955584059,\n      0.117169059405,  -0.0746546228896, 0.39067258928,   -0.214782717972,\n      -0.111009971497, -0.87766242691};\n\n  std::string sparse_query_buffer_2;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2,\n      sparse_query_buffer_2);\n\n  std::string sparse_query_buffer_3;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3,\n      sparse_query_buffer_3);\n\n  float result2{0.0f};\n  result2 = SparseDistanceCommon(sparse_vec_count_2, sparse_vec_index_2,\n                                 sparse_vec_value_2, sparse_vec_count_3,\n                                 sparse_vec_index_3, sparse_vec_value_3);\n\n  float result3{0.0f};\n  MinusInnerProductSparseMatrix<float>::Compute(\n      sparse_query_buffer_2.data(), sparse_query_buffer_3.data(), &result3);\n  result3 = -result3;\n\n  EXPECT_GE(0.00001, std::abs(result2 - result3));\n}\n\nvoid TestInnerProductSparseMore(void) {\n  std::vector<uint32_t> sparse_vec_counts;\n  std::vector<uint32_t *> sparse_vec_indices;\n  std::vector<float *> sparse_vec_values;\n\n  const uint32_t sparse_vec_count_0 = 173;\n  uint32_t sparse_vec_index_0[] = {\n      1012,  1996,  2001,  2018,  2020,  2036,  2037,  2056,  2058,  2069,\n      2111,  2116,  2138,  2162,  2166,  2245,  2253,  2259,  2306,  2307,\n      2318,  2331,  2351,  2359,  2390,  2419,  2426,  2428,  2466,  2470,\n      2535,  2554,  2557,  2568,  2590,  2622,  2671,  2739,  2765,  2812,\n      2817,  2837,  2913,  2920,  3003,  3092,  3112,  3125,  3144,  3214,\n      3241,  3249,  3260,  3268,  3271,  3278,  3280,  3330,  3463,  3478,\n      3716,  3739,  3768,  3800,  3908,  3934,  3992,  4028,  4045,  4072,\n      4146,  4254,  4301,  4382,  4454,  4471,  4504,  4517,  4598,  4806,\n      4807,  4847,  4928,  4988,  5081,  5113,  5177,  5190,  5197,  5201,\n      5234,  5456,  5621,  5689,  5792,  5817,  5823,  5875,  5920,  5921,\n      5951,  5968,  6033,  6112,  6145,  6215,  6344,  6396,  6429,  6438,\n      6529,  6627,  6691,  6731,  6801,  6865,  6950,  7036,  7128,  7155,\n      7461,  7551,  7596,  7691,  7784,  7789,  7848,  7857,  8044,  8052,\n      8053,  8553,  8573,  8664,  8817,  8826,  9250,  9273,  9593,  9727,\n      10013, 10106, 10617, 10639, 10753, 11657, 12108, 13128, 13463, 13702,\n      13787, 14152, 14332, 15237, 15313, 15359, 15699, 16724, 17171, 17571,\n      17669, 20168, 20805, 20972, 22134, 22229, 22779, 24762, 24823, 25526,\n      25699, 26761, 27885};\n  float sparse_vec_value_0[] = {\n      0.36311877,  0.10386213,  0.64821976,   0.26300138,    0.29727572,\n      0.047292523, 0.022334402, 0.118793316,  0.7198291,     0.73566943,\n      0.19491579,  0.5763569,   0.5245229,    0.022828134,   0.43562022,\n      0.6946562,   0.09275672,  0.9687072,    0.1751608,     0.09703954,\n      0.18717986,  0.43182945,  0.055112287,  0.0021027816,  0.13972417,\n      0.1019873,   0.8679199,   0.26797894,   0.097350314,   0.5125363,\n      0.2829703,   0.052232087, 0.3248494,    1.1258097,     0.90756655,\n      1.6490538,   0.45066822,  0.004210417,  0.028443621,   0.41171393,\n      0.09246816,  0.053040083, 0.052729037,  0.00041907438, 0.32047704,\n      0.2290303,   1.3542659,   0.28811434,   1.1722984,     0.4484738,\n      0.73670006,  0.22390367,  0.0058781556, 0.48173144,    0.76392287,\n      0.32048634,  0.42589885,  0.8624791,    0.0376546,     0.56702816,\n      0.002337549, 1.5856861,   0.14177673,   0.22762497,    0.6601752,\n      1.0603137,   0.914821,    0.34792075,   1.4387932,     0.035774633,\n      0.04391008,  0.7179224,   0.49199906,   0.043692447,   1.1404462,\n      0.47572234,  0.22777049,  0.7626374,    0.59730506,    1.4541638,\n      1.6540457,   0.089919806, 0.0050144624, 0.15902519,    0.2989032,\n      0.121926464, 0.11911,     0.27476037,   1.2774497,     0.42462146,\n      0.30179682,  0.18773684,  0.82144237,   1.2033592,     0.07180116,\n      0.06378868,  0.029040875, 0.2089903,    0.03591103,    0.94913304,\n      0.18240769,  0.9050947,   0.0034226696, 1.2841027,     0.629526,\n      0.06401547,  1.0698998,   0.11138009,   0.20497903,    0.017457427,\n      0.6316996,   0.12303611,  0.01563728,   0.090583175,   0.23981698,\n      0.48518667,  0.6207808,   1.8336427,    2.3282833,     0.8153351,\n      0.026216522, 0.6143031,   0.17374748,   0.32929608,    0.33730298,\n      1.1497657,   0.1926745,   0.14235665,   1.1076177,     0.945609,\n      0.48826388,  0.10458124,  0.19699246,   0.20899634,    0.44853806,\n      0.26411146,  0.7495864,   1.3681723,    1.4299264,     0.037516754,\n      0.17946614,  0.98060745,  0.055851664,  0.2002921,     0.45136684,\n      0.33716172,  0.58752763,  0.34051904,   1.9018586,     0.20597915,\n      0.82819384,  0.23866963,  0.4160662,    0.11889692,    0.172538,\n      0.005433464, 0.089198045, 0.3896585,    0.74038976,    0.24974349,\n      0.044961147, 0.32671204,  0.044312827,  0.25430596,    0.021065181,\n      0.071978964, 1.992692,    0.02640776,   1.7344381,     0.09561436,\n      0.07097204,  0.2922402,   0.8794989};\n\n  const uint32_t sparse_vec_count_1 = 144;\n  uint32_t sparse_vec_index_1[] = {\n      1012,  1016,  1059,  1996,  2001,  2020,  2049,  2068,  2076,  2088,\n      2109,  2138,  2145,  2149,  2162,  2203,  2220,  2224,  2256,  2259,\n      2318,  2373,  2381,  2390,  2393,  2419,  2462,  2466,  2485,  2506,\n      2554,  2557,  2580,  2590,  2622,  2633,  2645,  2671,  2716,  2724,\n      2900,  2942,  2943,  3003,  3029,  3092,  3112,  3125,  3260,  3271,\n      3278,  3283,  3288,  3439,  3466,  3478,  3521,  3578,  3594,  3595,\n      3607,  3647,  3690,  3800,  3826,  3896,  3908,  3934,  3947,  3987,\n      4045,  4068,  4204,  4254,  4255,  4302,  4329,  4471,  4504,  4517,\n      4566,  4736,  4762,  4789,  5081,  5094,  5105,  5195,  5197,  5201,\n      5233,  5234,  5584,  5817,  5823,  5832,  5875,  5951,  5968,  6033,\n      6035,  6179,  6215,  6245,  6383,  6394,  6396,  6529,  6613,  6691,\n      6801,  7091,  7128,  7155,  7240,  7461,  7551,  7596,  7691,  7738,\n      7784,  8027,  8144,  8192,  8249,  8309,  8573,  8647,  8826,  9379,\n      9593,  9767,  10400, 10461, 10530, 11028, 12799, 13787, 14487, 14670,\n      15237, 15523, 20168, 25755};\n  float sparse_vec_value_1[] = {\n      0.3815109,   0.21950184,   0.389138,    0.03037462,  0.738938,\n      0.11151163,  0.21257511,   0.008723602, 0.42403504,  0.17748593,\n      0.38613674,  0.38208488,   0.49048766,  0.056615792, 1.285813,\n      1.1482359,   0.016783785,  0.7362169,   0.21784282,  1.0905122,\n      0.37420613,  0.81915,      0.67411584,  0.35778007,  0.80538017,\n      0.10094925,  1.2726786,    0.12334787,  0.18297458,  0.13315988,\n      0.041079145, 0.2655652,    0.10946682,  0.6782494,   1.7451618,\n      0.17126456,  0.17718226,   0.7430134,   0.9090848,   0.31985787,\n      0.21779177,  0.13639484,   1.2293936,   0.065131165, 0.03718982,\n      0.64121664,  0.46517274,   0.39498892,  0.07401267,  1.2061241,\n      0.1276834,   0.059918232,  1.1935436,   0.61886644,  0.32731527,\n      0.37830237,  1.0287925,    0.09565632,  0.4313508,   0.03845683,\n      0.066990376, 0.10886483,   0.097683005, 0.29624575,  0.48645914,\n      0.250733,    0.03274726,   1.205507,    0.048636433, 0.034002367,\n      0.83021015,  0.044592205,  0.06007409,  1.1224703,   0.45620173,\n      0.16457361,  0.053571727,  0.12527509,  0.1308366,   0.92323685,\n      0.7821679,   0.23838642,   0.2558486,   0.09402168,  0.22815736,\n      0.51750314,  0.08442147,   0.5565446,   0.3642559,   0.6661639,\n      0.73750395,  0.17278494,   0.05865512,  0.013724559, 0.023783961,\n      0.04283593,  0.24765956,   0.3991119,   1.5201892,   0.035530984,\n      0.049782272, 0.06485597,   0.5367931,   0.15097857,  0.014405596,\n      0.14585418,  0.22106051,   0.49575308,  0.08290891,  0.17875223,\n      0.21095915,  0.0038430362, 2.3110201,   0.6543391,   0.06421487,\n      0.3782336,   0.3514111,    0.5225064,   0.21472597,  0.07987356,\n      0.06002587,  1.5242931,    0.081204355, 0.32025364,  0.39068836,\n      0.027896391, 0.2872351,    0.50436527,  0.5434884,   1.653683,\n      1.444315,    0.988968,     0.024239752, 0.055084217, 0.074782506,\n      0.021114044, 0.07288233,   0.822755,    0.10772858,  0.6189507,\n      0.29534152,  0.20032129,   0.5609191,   1.2844883};\n\n  const uint32_t sparse_vec_count_2 = 153;\n  uint32_t sparse_vec_index_2[] = {\n      1012,  1059,  1996,  2001,  2020,  2049,  2052,  2055,  2056,  2081,\n      2088,  2124,  2138,  2156,  2158,  2162,  2191,  2231,  2242,  2256,\n      2259,  2311,  2318,  2359,  2373,  2381,  2390,  2437,  2458,  2466,\n      2477,  2510,  2554,  2580,  2590,  2622,  2640,  2671,  2689,  2825,\n      2844,  2881,  2904,  2957,  3029,  3112,  3125,  3144,  3214,  3246,\n      3271,  3312,  3330,  3399,  3443,  3478,  3578,  3595,  3647,  3697,\n      3740,  3800,  3817,  3818,  3928,  3934,  3987,  4034,  4072,  4079,\n      4172,  4204,  4254,  4255,  4302,  4517,  4526,  4695,  4706,  4795,\n      4807,  4986,  5081,  5091,  5113,  5195,  5197,  5234,  5253,  5263,\n      5623,  5646,  5656,  5817,  5875,  5951,  5954,  5968,  6033,  6061,\n      6108,  6119,  6157,  6213,  6215,  6287,  6384,  6396,  6461,  6469,\n      6613,  6801,  6842,  7128,  7240,  7305,  7477,  7551,  7596,  7609,\n      7624,  7723,  7779,  7857,  7935,  8144,  8238,  8249,  8275,  8547,\n      8573,  8647,  8826,  8927,  9036,  9491,  9593,  9767,  10267, 10461,\n      10505, 10660, 10721, 11028, 12578, 13787, 14487, 14874, 15523, 20168,\n      21565, 24212, 25628};\n  float sparse_vec_value_2[] = {\n      0.19194126,  0.11344757,   0.21317342,  0.6771587,    0.08591107,\n      0.006228663, 0.28981656,   0.58056134,  0.064362876,  0.5794717,\n      0.4288167,   0.59527594,   0.6106896,   0.23139843,   0.897008,\n      0.20689227,  0.28713426,   0.38175523,  0.4028853,    0.08509491,\n      1.0562526,   0.1165676,    0.06347306,  0.41331312,   0.16935593,\n      0.1626863,   0.29352358,   0.45827967,  0.21193665,   0.39532298,\n      0.0789344,   0.026420705,  0.1763078,   0.18424834,   0.7216729,\n      1.6683924,   0.06257952,   0.13419773,  0.6851299,    1.2139059,\n      0.092483185, 0.10803583,   0.74339646,  0.14461784,   0.2389669,\n      0.9306581,   0.5645601,    0.83565444,  0.11930474,   0.22862941,\n      0.6214566,   0.0033283439, 0.42018214,  0.15267797,   0.029068783,\n      0.24103808,  0.18765616,   0.11574381,  0.31545344,   0.09386852,\n      0.038362045, 0.7730324,    0.4456206,   0.20152733,   0.94718367,\n      1.1934134,   0.12610391,   0.014013804, 0.47198555,   0.21791361,\n      0.05394335,  0.08415188,   0.066486694, 0.47462225,   0.16693182,\n      0.9021425,   0.27905586,   0.09939155,  0.12642553,   0.27529165,\n      0.024804203, 0.24346212,   0.25561446,  1.4675297,    0.21566682,\n      0.5453194,   0.21558505,   0.21294887,  0.2740208,    0.43185237,\n      0.2280337,   0.0048945076, 0.26826337,  0.016979327,  0.3338952,\n      0.23080347,  0.21200272,   1.3268396,   0.05323057,   0.30005422,\n      0.088871606, 0.13259241,   0.04766706,  0.0017769856, 0.2698414,\n      0.08068406,  0.38578644,   0.09752118,  0.13972333,   0.0731375,\n      0.36664346,  0.12214721,   0.1541759,   2.2295072,    0.22542699,\n      0.028530587, 0.022988612,  0.35836184,  0.10530607,   0.53756726,\n      0.05818686,  0.044951066,  0.05753079,  0.09009998,   0.24644017,\n      0.22693348,  0.0019512648, 0.035316195, 0.057344455,  0.36419895,\n      0.1534858,   0.18924302,   0.38702026,  1.2569604,    0.07787755,\n      1.7163913,   1.1903315,    0.8173934,   0.13888475,   0.10908335,\n      0.35437793,  0.15787303,   0.25039884,  0.130508,     0.09830101,\n      0.5841259,   0.22020355,   0.37849018,  0.14054261,   0.5179198,\n      1.1891438,   0.44022372,   0.1794719};\n\n  const uint32_t sparse_vec_count_3 = 166;\n  uint32_t sparse_vec_index_3[] = {\n      1012,  1059,  1996,  1997,  2001,  2020,  2034,  2076,  2086,  2104,\n      2138,  2149,  2162,  2170,  2171,  2220,  2231,  2236,  2259,  2311,\n      2315,  2318,  2328,  2343,  2344,  2359,  2381,  2390,  2419,  2458,\n      2462,  2466,  2472,  2479,  2491,  2510,  2557,  2558,  2565,  2580,\n      2590,  2622,  2724,  2764,  2817,  2837,  2881,  2900,  2911,  2933,\n      2949,  3003,  3029,  3058,  3092,  3101,  3125,  3188,  3271,  3330,\n      3386,  3399,  3434,  3447,  3474,  3478,  3578,  3595,  3607,  3650,\n      3690,  3740,  3779,  3800,  3817,  3818,  3826,  3910,  3918,  3934,\n      3987,  3992,  4006,  4034,  4068,  4075,  4114,  4146,  4172,  4255,\n      4302,  4327,  4503,  4517,  4758,  4883,  4944,  4975,  5036,  5195,\n      5205,  5218,  5233,  5234,  5253,  5456,  5623,  5656,  5687,  5817,\n      5875,  5951,  5954,  5968,  6059,  6119,  6145,  6157,  6215,  6262,\n      6384,  6394,  6613,  6787,  6801,  6842,  6993,  7128,  7156,  7240,\n      7305,  7421,  7551,  7596,  7676,  7935,  8547,  8573,  8647,  8773,\n      8826,  8886,  8911,  9036,  9274,  9433,  9593,  9767,  9915,  10267,\n      10461, 10505, 11028, 11274, 11593, 13058, 13787, 14487, 15237, 17060,\n      20168, 21695, 23041, 24363, 25526, 25755};\n  float sparse_vec_value_3[] = {\n      0.17927244,   0.20557176,   0.40560228,   0.32370853,  0.8060634,\n      0.21424179,   1.0674698,    0.6046889,    0.21051478,  0.46186206,\n      0.24661283,   0.5616991,    1.016811,     0.2618776,   0.9686127,\n      0.869671,     0.1458332,    0.60725594,   1.206012,    0.10357225,\n      0.4350595,    0.83702874,   0.146196,     0.8644738,   0.15587087,\n      0.16456357,   0.36376593,   1.053665,     0.06609649,  0.6504239,\n      0.9697015,    0.04947369,   0.43753505,   0.04289205,  0.42075413,\n      0.330524,     0.1743388,    0.6540892,    0.012900644, 0.23207273,\n      0.2674499,    1.9736407,    0.21540764,   0.63648874,  0.049446102,\n      0.3750183,    0.17441651,   0.123951435,  0.015306404, 0.1767618,\n      0.24109434,   0.4245122,    0.114403255,  0.91849947,  0.12018716,\n      0.01165807,   0.47680765,   0.036503244,  0.5782868,   0.9163635,\n      0.27396393,   0.16385026,   0.052631885,  0.72294754,  0.4022935,\n      0.06351255,   0.27786675,   0.25394455,   0.08041568,  1.3137422,\n      0.5514297,    0.2503315,    0.009040705,  0.40985608,  0.27673048,\n      0.14055687,   0.50529444,   0.6049716,    1.0692317,   1.207644,\n      0.108388424,  0.9495853,    0.35366973,   0.3762234,   0.19875458,\n      0.14685634,   0.0060924664, 1.0126622,    0.034943417, 0.49489433,\n      0.34451365,   0.21992311,   0.7039926,    0.9501215,   0.34629604,\n      0.20126931,   0.23908958,   0.019030606,  0.12528977,  0.6009518,\n      0.056694727,  0.19225678,   0.61745095,   0.26769277,  0.18739952,\n      0.10380342,   0.08536158,   0.18679029,   0.040631995, 0.23538794,\n      0.081166975,  0.3206779,    0.0018739193, 1.5819491,   0.07052032,\n      0.2504746,    0.7514167,    0.06575893,   0.08000714,  0.0012445971,\n      0.23989597,   0.12001178,   0.51009554,   0.14469045,  0.12445986,\n      0.08644873,   0.5645543,    2.539498,     0.54383165,  0.22437337,\n      0.0018195114, 0.11787724,   0.34932667,   0.49611032,  0.24439196,\n      0.100613214,  0.2844197,    0.38720158,   0.22204469,  0.078220785,\n      0.76444066,   1.7794204,    0.17640579,   0.04227443,  0.28023362,\n      0.06434563,   1.320367,     0.9287479,    0.14726646,  0.27983913,\n      0.022449814,  0.09246922,   0.22375125,   0.10417365,  0.034148056,\n      0.12830476,   0.6065902,    0.16593556,   0.25840235,  0.2596266,\n      0.6388732,    1.6666834,    0.030998405,  0.14869562,  0.30502653,\n      1.183558};\n\n  const uint32_t sparse_vec_count_4 = 104;\n  uint32_t sparse_vec_index_4[] = {\n      1012,  1996,  1997,  2001,  2033,  2034,  2080,  2120,  2142,  2149,\n      2220,  2231,  2259,  2284,  2318,  2338,  2381,  2405,  2424,  2436,\n      2458,  2472,  2533,  2544,  2557,  2580,  2609,  2622,  2627,  2688,\n      2800,  2820,  2837,  2862,  2932,  2949,  3029,  3036,  3181,  3390,\n      3439,  3690,  3780,  3784,  3818,  3872,  3931,  3934,  4034,  4037,\n      4075,  4219,  4348,  4517,  4573,  4617,  4773,  4809,  4822,  4879,\n      5234,  5272,  5851,  5968,  6119,  6378,  6396,  6613,  6702,  6728,\n      6787,  7128,  7156,  7240,  7479,  7551,  7596,  7692,  7809,  8027,\n      8249,  8264,  8299,  8573,  8826,  9123,  9152,  9274,  9445,  9593,\n      9915,  11377, 11744, 12935, 13308, 14487, 14947, 15720, 17060, 17669,\n      18079, 18629, 19841, 21053};\n  float sparse_vec_value_4[] = {\n      0.2030336,   0.1411735,   0.12635018,  0.45823106,  0.22794029,\n      1.4105916,   0.2769118,   0.75515395,  0.07748295,  0.19260094,\n      0.12458416,  0.065163694, 0.9765741,   0.07470863,  0.80718166,\n      0.12307288,  0.9393725,   0.048733678, 0.17115222,  1.1922649,\n      0.03547645,  0.33111426,  0.03772038,  0.46104532,  0.3141086,\n      0.25707254,  1.1549219,   1.8509476,   0.98180383,  0.7270674,\n      0.91343564,  0.3373339,   0.081498206, 0.01140901,  0.43917242,\n      0.072401166, 0.11307132,  0.8945273,   0.10071963,  0.1945517,\n      0.7594797,   0.096463405, 0.07759007,  0.11009286,  0.012562437,\n      1.1797432,   0.02481144,  1.2393609,   0.50596905,  1.48781,\n      0.53125334,  0.9950063,   1.4128636,   1.5830894,   0.93246186,\n      0.60709685,  0.40433922,  0.14255294,  0.7125986,   0.021445543,\n      0.4104336,   0.14560317,  0.3189296,   0.51019174,  0.041676614,\n      0.22844397,  0.18406813,  0.1604107,   1.2178165,   0.46861333,\n      0.04899898,  2.4448788,   0.6505235,   0.051029652, 0.7550255,\n      0.00625443,  0.5090246,   0.7109037,   0.1125403,   0.05059699,\n      0.03856528,  0.4538238,   0.72464395,  0.1360473,   0.5109412,\n      2.0780752,   0.049649376, 0.31396037,  0.114775784, 0.9717559,\n      0.05478335,  0.12228666,  1.3433831,   1.6574994,   0.053257514,\n      0.51201975,  0.029570522, 0.35752434,  0.39366165,  0.25994724,\n      1.1072603,   2.0454218,   1.1423918,   0.59795356};\n\n  const uint32_t sparse_vec_count_5 = 147;\n  uint32_t sparse_vec_index_5[] = {\n      1012,  1996,  2001,  2018,  2020,  2034,  2047,  2081,  2154,  2162,\n      2170,  2171,  2207,  2210,  2220,  2233,  2251,  2253,  2257,  2259,\n      2287,  2315,  2318,  2328,  2381,  2390,  2458,  2466,  2510,  2557,\n      2580,  2609,  2622,  2645,  2688,  2707,  2724,  2762,  2838,  2900,\n      2911,  2915,  3047,  3058,  3260,  3282,  3290,  3295,  3297,  3386,\n      3390,  3578,  3603,  3607,  3690,  3746,  3826,  3861,  3908,  3910,\n      3918,  3934,  3987,  4006,  4045,  4075,  4088,  4110,  4255,  4302,\n      4517,  4620,  4761,  4871,  4916,  5195,  5221,  5234,  5246,  5532,\n      5700,  5798,  5832,  5855,  5951,  5968,  6033,  6215,  6219,  6302,\n      6394,  6396,  6529,  6950,  7008,  7084,  7128,  7155,  7156,  7240,\n      7421,  7467,  7551,  7596,  7738,  7760,  8088,  8367,  8372,  8479,\n      8573,  8647,  8773,  8826,  9188,  9274,  9290,  9433,  9593,  9767,\n      9913,  9919,  9982,  10461, 10815, 11028, 11721, 12416, 12496, 12779,\n      13221, 13702, 13787, 14487, 15699, 16164, 18801, 20168, 21650, 24291,\n      24321, 25209, 25526, 25755, 28110, 28682, 28858};\n  float sparse_vec_value_5[] = {\n      0.22246745,  0.1639393,    0.6902539,    0.087209724, 0.3150326,\n      1.3589038,   0.39210027,   0.06905281,   0.2940129,   0.48745865,\n      0.5185849,   0.06468885,   0.33793828,   0.01934533,  0.9160348,\n      0.12213709,  0.64625627,   0.05484681,   0.18600157,  0.7439921,\n      1.4779477,   0.50866294,   0.9324953,    0.11494038,  0.14815839,\n      0.4024814,   0.0025193223, 0.0039419075, 0.04004241,  0.1137441,\n      0.100572474, 0.09889997,   1.6465691,    0.45031455,  0.4567774,\n      0.7614913,   0.5324026,    0.09957147,   0.21556115,  0.36752453,\n      0.13450043,  0.06911261,   0.04267344,   1.2791942,   0.054822505,\n      0.06269096,  1.3170663,    0.8852742,    0.37885663,  0.92810893,\n      0.12803665,  0.10517517,   0.24920024,   0.16889784,  1.3619378,\n      0.59796244,  0.81389725,   0.06489252,   0.020069994, 0.06319,\n      0.71297073,  1.2515233,    0.019061586,  0.04731544,  0.3536146,\n      0.50835687,  0.56439734,   0.09884678,   1.1007178,   0.1480219,\n      1.6361246,   0.3891063,    0.03873499,   0.050479025, 0.5629584,\n      1.0016122,   0.16247666,   0.06476003,   0.43833405,  1.3702114,\n      0.11968183,  0.29155007,   0.12643526,   0.518913,    0.41796717,\n      1.740134,    0.015489911,  0.2183447,    1.5380116,   1.058654,\n      0.06226158,  0.270943,     0.91666347,   0.06422295,  0.33474496,\n      0.002399514, 2.0762439,    0.8989307,    0.7876583,   0.03783609,\n      0.22333156,  0.13323776,   0.27660817,   0.56637865,  0.21507333,\n      0.6770579,   0.7013793,    0.7085848,    0.15651116,  0.05219105,\n      0.03743524,  0.30775747,   0.073243596,  0.8181374,   0.28133482,\n      0.23539418,  0.07533616,   0.2044144,    1.574523,    1.1304078,\n      0.24084339,  1.3286508,    0.775562,     0.10096621,  0.197577,\n      0.2307252,   1.719028,     0.07254901,   0.13916898,  0.17486195,\n      0.8424586,   0.27879223,   0.8650824,    0.35050592,  0.24243252,\n      0.31039444,  0.17227773,   0.90619636,   0.63083464,  2.2181685,\n      0.20995331,  0.14425081,   0.37305146,   0.5955121,   0.87200415,\n      1.028527,    1.0835907};\n\n  const uint32_t sparse_vec_count_6 = 141;\n  uint32_t sparse_vec_index_6[] = {\n      1012,  1059,  1996,  1997,  1998,  2001,  2012,  2018,  2020,  2021,\n      2025,  2055,  2056,  2076,  2077,  2127,  2130,  2134,  2138,  2143,\n      2162,  2197,  2203,  2220,  2259,  2318,  2328,  2338,  2345,  2381,\n      2390,  2458,  2462,  2466,  2501,  2517,  2580,  2622,  2631,  2645,\n      2688,  2707,  2724,  2748,  2764,  2808,  2900,  2911,  2933,  2949,\n      3047,  3058,  3074,  3075,  3092,  3101,  3188,  3271,  3283,  3439,\n      3478,  3535,  3595,  3607,  3690,  3720,  3740,  3793,  3818,  3826,\n      3906,  3908,  3934,  3981,  3986,  4028,  4138,  4469,  4496,  4503,\n      4515,  4517,  4566,  4704,  4706,  4761,  4839,  5036,  5175,  5233,\n      5234,  5246,  5254,  5263,  5491,  5817,  5823,  5839,  5875,  5968,\n      6215,  6254,  6268,  6394,  6407,  6801,  6848,  7128,  7177,  7321,\n      7421,  7487,  7551,  7596,  7681,  7940,  8145,  8264,  8321,  8551,\n      8573,  8647,  8773,  8826,  8832,  9472,  9593,  9599,  9767,  10530,\n      12149, 13787, 14487, 15237, 15523, 17060, 20168, 23633, 24363, 25526,\n      25755};\n  float sparse_vec_value_6[] = {\n      0.48692977,  0.23770119,  0.24359323,   0.030566106,  0.121271,\n      0.5703241,   0.12787338,  0.037069157,  0.075816214,  0.05305081,\n      0.45591223,  0.5893366,   0.01829792,   0.42078727,   0.036012013,\n      0.0750098,   0.20031127,  0.033489488,  0.10935432,   0.054307006,\n      1.0000131,   0.20630358,  1.1161063,    0.5766484,    0.86030954,\n      0.65358734,  0.062234607, 0.8518808,    0.23441537,   0.14816457,\n      0.19284223,  0.94708407,  1.0017378,    0.51629704,   0.082293354,\n      0.09170858,  0.2138309,   1.533815,     0.0030641577, 0.029126635,\n      0.3632337,   0.1761491,   0.34924436,   0.67822266,   0.5976219,\n      0.8595736,   0.17943758,  0.038340267,  0.0052374,    0.29047492,\n      0.070157826, 0.6779024,   0.75593567,   0.054473646,  0.4906121,\n      0.11288958,  0.15934071,  0.3192689,    0.1435216,    0.30725288,\n      0.37506026,  0.7213243,   0.18401349,   0.01871983,   0.19455475,\n      0.02040177,  0.28111485,  0.043639474,  0.19826981,   0.27416018,\n      1.429636,    0.05111553,  1.0482118,    0.98164123,   0.17426124,\n      0.10582682,  1.002954,    1.0261939,    0.83377177,   0.6798103,\n      0.015373114, 0.8136259,   0.95782644,   0.13387722,   0.40847424,\n      0.80647326,  0.28733957,  0.0029352994, 0.30276307,   0.4768307,\n      0.32016084,  0.10302183,  0.3044403,    0.040031943,  0.44271877,\n      0.061298616, 0.08278493,  0.107188344,  0.5086274,    1.3297924,\n      0.050804485, 0.68582493,  0.21776867,   0.027724598,  0.5286007,\n      0.1899133,   0.04971613,  2.2401748,    0.09252626,   0.80688274,\n      0.014750206, 0.07568165,  0.021886598,  0.23429997,   1.1812011,\n      0.6390751,   0.2643012,   0.13720371,   0.10989579,   1.4969206,\n      0.2209742,   0.54690766,  0.15685914,   0.47841135,   0.566988,\n      0.08368683,  1.2788389,   0.09509155,   1.0241207,    0.07167757,\n      0.29240122,  0.5619141,   0.016415644,  0.28731114,   0.035925347,\n      0.34043407,  0.60646313,  0.07248792,   0.08602479,   0.10247773,\n      1.13258};\n\n  const uint32_t sparse_vec_count_7 = 221;\n  uint32_t sparse_vec_index_7[] = {\n      1059,  1996,  2001,  2003,  2008,  2010,  2020,  2029,  2034,  2076,\n      2080,  2081,  2103,  2104,  2137,  2138,  2142,  2149,  2162,  2163,\n      2220,  2231,  2236,  2253,  2256,  2259,  2315,  2318,  2328,  2329,\n      2343,  2344,  2350,  2359,  2381,  2390,  2419,  2458,  2462,  2466,\n      2470,  2472,  2490,  2510,  2537,  2550,  2554,  2557,  2580,  2590,\n      2599,  2608,  2622,  2631,  2640,  2645,  2662,  2710,  2724,  2728,\n      2762,  2764,  2817,  2820,  2832,  2837,  2856,  2866,  2881,  2891,\n      2957,  2974,  2983,  3003,  3010,  3029,  3050,  3058,  3063,  3068,\n      3092,  3101,  3125,  3135,  3257,  3271,  3282,  3330,  3386,  3399,\n      3474,  3578,  3595,  3603,  3607,  3650,  3690,  3758,  3800,  3817,\n      3826,  3878,  3910,  3918,  3934,  3947,  3965,  3987,  3992,  4006,\n      4034,  4045,  4068,  4146,  4172,  4202,  4255,  4302,  4327,  4351,\n      4503,  4517,  4637,  4707,  4944,  5025,  5036,  5195,  5201,  5233,\n      5234,  5253,  5501,  5584,  5623,  5656,  5687,  5814,  5817,  5911,\n      5951,  5954,  5968,  6035,  6108,  6119,  6145,  6157,  6177,  6215,\n      6254,  6262,  6384,  6394,  6613,  6728,  6787,  6801,  6842,  6845,\n      6922,  6960,  7128,  7155,  7156,  7240,  7421,  7551,  7596,  7609,\n      7654,  7676,  7723,  7779,  7935,  8049,  8144,  8151,  8249,  8547,\n      8573,  8647,  8773,  8826,  8864,  8886,  9036,  9274,  9290,  9433,\n      9593,  9667,  9767,  9915,  10267, 10505, 10544, 10753, 10815, 11028,\n      11593, 11837, 12496, 13058, 13308, 13625, 13702, 14487, 15523, 17669,\n      18457, 18800, 18826, 20168, 20843, 21695, 24363, 25526, 25755, 26234,\n      26911};\n  float sparse_vec_value_7[] = {\n      0.29634815,  0.3303992,    1.0099697,   0.09545747,  0.046319153,\n      0.001999375, 0.27222815,   0.107896015, 1.0792782,   0.5411261,\n      0.27695096,  0.020715078,  0.021571944, 0.61097443,  0.10560424,\n      0.15401895,  0.46480918,   0.6496758,   1.0116925,   0.0040072273,\n      0.8931394,   0.2361543,    0.74389607,  0.039703716, 0.020886008,\n      1.1108406,   0.09039394,   0.69578373,  0.27737862,  0.3083219,\n      0.5698159,   0.31437457,   0.7131746,   0.14947455,  0.33504876,\n      1.1611847,   0.8632542,    1.058698,    1.0307701,   0.15223494,\n      0.9391413,   0.9473978,    0.3767169,   0.5806728,   0.70086235,\n      0.8544429,   0.07839825,   0.46189323,  0.57343185,  0.17151174,\n      0.45118546,  0.03416668,   2.037371,    0.1311739,   0.22600843,\n      0.061421365, 0.0063685803, 0.9023181,   0.17874505,  1.458104,\n      0.09657643,  0.36346155,   0.11396522,  0.2762966,   0.11472289,\n      0.16151813,  0.5954224,    0.68847394,  0.6934064,   1.0951325,\n      0.008113728, 0.320056,     0.2934685,   0.38948777,  0.64446163,\n      0.11539491,  1.4196212,    0.6417532,   0.10939098,  0.115132414,\n      0.10055387,  0.15150718,   0.3015885,   0.36512154,  0.85847276,\n      0.42005107,  0.06733843,   0.9194887,   0.2446694,   0.3528377,\n      0.30540454,  0.0549386,    0.15950806,  0.12754358,  0.22250807,\n      1.3793756,   0.01503605,   0.33390692,  0.2052875,   0.32573462,\n      0.66194123,  0.03896839,   0.921685,    1.1364039,   1.2451752,\n      0.072772495, 0.10148866,   0.2922106,   0.97420144,  0.25800666,\n      0.13455145,  0.3459612,    0.16713561,  0.21625288,  0.20754638,\n      0.017042752, 1.2139128,    0.38501504,  0.18923776,  0.58807755,\n      0.42623222,  1.8636363,    0.15489826,  0.24531981,  0.330716,\n      0.6148099,   0.12145276,   0.938947,    0.08298498,  0.5002425,\n      0.42643633,  0.3724926,    0.351435,    0.35051146,  0.15093777,\n      0.2753887,   0.11030835,   0.05864477,  0.12825343,  0.4938676,\n      0.4091608,   0.13155867,   1.362572,    0.26034647,  0.005735014,\n      0.25208464,  0.77931124,   0.08418636,  0.2567355,   0.108983725,\n      0.04566572,  0.06202907,   0.3991703,   0.2785334,   0.45871663,\n      1.584949,    0.099409536,  0.114265166, 0.0603091,   0.71120745,\n      0.35286796,  0.03805246,   2.6303916,   0.6235311,   0.6544235,\n      0.254192,    0.5172861,    0.46474016,  0.51770395,  0.3868696,\n      0.030558605, 0.79667675,   0.1053426,   0.08400551,  0.26797673,\n      0.52138245,  0.13453461,   0.070371106, 0.003556521, 0.34309983,\n      0.2104394,   0.02274147,   0.19070747,  0.9488226,   0.09138845,\n      2.092856,    0.10931594,   0.18929166,  0.113100395, 0.08495193,\n      1.124685,    0.08020554,   1.0792019,   0.27422333,  0.31508496,\n      0.20671548,  0.05064338,   0.46511328,  0.38314936,  0.52556884,\n      0.36894837,  1.4199936,    0.05843645,  0.055732273, 0.26817194,\n      0.2876586,   1.0425944,    0.062882155, 0.09840146,  0.1544766,\n      0.98742366,  0.20589906,   2.1226256,   0.47266316,  0.33193296,\n      2.0077822,   0.23509863,   0.53764015,  1.2505449,   1.719803,\n      0.39262286};\n\n  std::vector<std::string> sparse_query_buffers;\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,\n      sparse_query_buffer_0);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_0));\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,\n      sparse_query_buffer_1);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_1));\n\n  std::string sparse_query_buffer_2;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2,\n      sparse_query_buffer_2);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_2));\n\n  std::string sparse_query_buffer_3;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3,\n      sparse_query_buffer_3);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_3));\n\n  std::string sparse_query_buffer_4;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_4, sparse_vec_index_4, sparse_vec_value_4,\n      sparse_query_buffer_4);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_4));\n\n  std::string sparse_query_buffer_5;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_5, sparse_vec_index_5, sparse_vec_value_5,\n      sparse_query_buffer_5);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_5));\n\n  std::string sparse_query_buffer_6;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_6, sparse_vec_index_6, sparse_vec_value_6,\n      sparse_query_buffer_6);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_6));\n\n  std::string sparse_query_buffer_7;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_7, sparse_vec_index_7, sparse_vec_value_7,\n      sparse_query_buffer_7);\n  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_7));\n\n  sparse_vec_counts.emplace_back(sparse_vec_count_0);\n  sparse_vec_counts.emplace_back(sparse_vec_count_1);\n  sparse_vec_counts.emplace_back(sparse_vec_count_2);\n  sparse_vec_counts.emplace_back(sparse_vec_count_3);\n  sparse_vec_counts.emplace_back(sparse_vec_count_4);\n  sparse_vec_counts.emplace_back(sparse_vec_count_5);\n  sparse_vec_counts.emplace_back(sparse_vec_count_6);\n  sparse_vec_counts.emplace_back(sparse_vec_count_7);\n\n  sparse_vec_indices.emplace_back(sparse_vec_index_0);\n  sparse_vec_indices.emplace_back(sparse_vec_index_1);\n  sparse_vec_indices.emplace_back(sparse_vec_index_2);\n  sparse_vec_indices.emplace_back(sparse_vec_index_3);\n  sparse_vec_indices.emplace_back(sparse_vec_index_4);\n  sparse_vec_indices.emplace_back(sparse_vec_index_5);\n  sparse_vec_indices.emplace_back(sparse_vec_index_6);\n  sparse_vec_indices.emplace_back(sparse_vec_index_7);\n\n  sparse_vec_values.emplace_back(sparse_vec_value_0);\n  sparse_vec_values.emplace_back(sparse_vec_value_1);\n  sparse_vec_values.emplace_back(sparse_vec_value_2);\n  sparse_vec_values.emplace_back(sparse_vec_value_3);\n  sparse_vec_values.emplace_back(sparse_vec_value_4);\n  sparse_vec_values.emplace_back(sparse_vec_value_5);\n  sparse_vec_values.emplace_back(sparse_vec_value_6);\n  sparse_vec_values.emplace_back(sparse_vec_value_7);\n\n  for (size_t i = 0; i < sparse_query_buffers.size(); ++i) {\n    for (size_t j = 0; j < sparse_query_buffers.size(); ++j) {\n      float result0{0.0f};\n      result0 = SparseDistanceCommon(\n          sparse_vec_counts[i], sparse_vec_indices[i], sparse_vec_values[i],\n          sparse_vec_counts[j], sparse_vec_indices[j], sparse_vec_values[j]);\n\n      float result1{0.0f};\n      MinusInnerProductSparseMatrix<float>::Compute(\n          sparse_query_buffers[i].data(), sparse_query_buffers[j].data(),\n          &result1);\n      result1 = -result1;\n\n      // float epsilon = 0.001*std::max(result0, result1);\n      EXPECT_GE(0.0001, std::abs(result0 - result1));\n    }\n  }\n}\n\nTEST(DistanceMatrix, InnerProductSparse) {\n  TestInnerProductSparse();\n}\n\nTEST(DistanceMatrix, InnerProductSparseMore) {\n  TestInnerProductSparseMore();\n}\n\nTEST(DistanceMatrix, DISABLED_InnerProductSparse_Benchmark) {\n  const uint32_t sparse_vec_count_0 = 52;\n  uint32_t sparse_vec_index_0[] = {\n      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,\n      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,\n      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,\n      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,\n      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};\n  float sparse_vec_value_0[] = {\n      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,\n      -0.767956138324, -0.410683610329, 0.763314047145,  0.347851184825,\n      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,\n      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,\n      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,\n      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,\n      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,\n      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,\n      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,\n      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,\n      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,\n      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,\n      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};\n\n  const uint32_t sparse_vec_count_1 = 43;\n  uint32_t sparse_vec_index_1[] = {\n      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,\n      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,\n      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,\n      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};\n  float sparse_vec_value_1[] = {\n      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,\n      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,\n      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,\n      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,\n      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,\n      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,\n      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,\n      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,\n      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,\n      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,\n      0.247628793044,  -0.984985692607,  -0.427929860028};\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,\n      sparse_query_buffer_0);\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,\n      sparse_query_buffer_1);\n\n  size_t loop_cnt = 100000000LLU;\n  float result[100];\n\n  for (size_t i = 0; i < loop_cnt; ++i) {\n    MinusInnerProductSparseMatrix<float>::Compute(sparse_query_buffer_0.data(),\n                                                  sparse_query_buffer_1.data(),\n                                                  result + (i % 100));\n  }\n\n  EXPECT_EQ(result[0], result[1]);\n}\n\nTEST(DistanceMatrix, TestInnerProductSparseDimWithZero) {\n  // test 1\n  const uint32_t sparse_vec_count_0 = 10;\n  uint32_t sparse_vec_index_0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  float sparse_vec_value_0[] = {2.0, 2.0, 2.0, 2.0, 2.0,\n                                2.0, 2.0, 2.0, 2.0, 2.0};\n\n  const uint32_t sparse_vec_count_1 = 10;\n  uint32_t sparse_vec_index_1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  float sparse_vec_value_1[] = {2.0, 2.0, 2.0, 2.0, 2.0,\n                                2.0, 2.0, 2.0, 2.0, 2.0};\n\n  std::string sparse_query_buffer_0;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,\n      sparse_query_buffer_0);\n\n  std::string sparse_query_buffer_1;\n  MinusInnerProductSparseMatrix<float>::transform_sparse_format(\n      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,\n      sparse_query_buffer_1);\n\n  float result0{0.0f};\n  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,\n                                 sparse_vec_value_0, sparse_vec_count_1,\n                                 sparse_vec_index_1, sparse_vec_value_1);\n\n  float result1{0.0f};\n  MinusInnerProductSparseMatrix<float>::Compute(\n      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);\n  result1 = -result1;\n\n  EXPECT_GE(0.00001, std::abs(result0 - result1));\n}\n"
  },
  {
    "path": "tests/ailego/math/inner_product_matrix_int4_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/utility/matrix_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nTEST(DistanceMatrix, InnerProduct_General) {\n  std::mt19937 gen((std::random_device())());\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 1;\n\n  std::vector<int8_t> vec1(dimension), query1(dimension);\n  std::vector<uint8_t> vec2(dimension >> 1), query2(dimension >> 1);\n\n  std::uniform_int_distribution<int> dist(-8, 7);\n\n  for (size_t k = 0; k < 100; ++k) {\n    for (size_t i = 0; i < dimension; i += 2) {\n      vec1[i + 0] = (int8_t)dist(gen);\n      vec1[i + 1] = (int8_t)dist(gen);\n      vec2[i >> 1] =\n          ((uint8_t)(vec1[i + 0]) << 4) | ((uint8_t)(vec1[i + 1]) & 0xf);\n      EXPECT_EQ(vec1[i + 0] * vec1[i + 1], Int4MulTable[vec2[i >> 1]]);\n\n      query1[i + 0] = (int8_t)dist(gen);\n      query1[i + 1] = (int8_t)dist(gen);\n      query2[i >> 1] =\n          ((uint8_t)(query1[i + 0]) << 4) | ((uint8_t)(query1[i + 1]) & 0xf);\n      EXPECT_EQ(query1[i + 0] * query1[i + 1], Int4MulTable[query2[i >> 1]]);\n    }\n\n    EXPECT_FLOAT_EQ(\n        Distance::MinusInnerProduct(vec1.data(), query1.data(), dimension),\n        Distance::MinusInnerProduct(vec2.data(), query2.data(), dimension));\n    EXPECT_FLOAT_EQ(\n        Distance::InnerProduct(vec1.data(), query1.data(), dimension),\n        Distance::InnerProduct(vec2.data(), query2.data(), dimension));\n    EXPECT_FLOAT_EQ(\n        Distance::MinusInnerProduct(vec1.data(), query1.data(), dimension),\n        -Distance::InnerProduct(vec2.data(), query2.data(), dimension));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;\n  size_t matrix_size = batch_size * (dimension / 2);\n  size_t query_matrix_size = query_size * (dimension / 2);\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(0, 0xff);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (uint8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (uint8_t)dist(gen);\n  }\n  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(\n      matrix1.data(), dimension / 8, &matrix2[0]);\n  ailego::MatrixHelper::Transpose<uint32_t, query_size>(\n      query1.data(), dimension / 8, &query2[0]);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint8_t *cur_query = &query1[i * dimension / 2];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      InnerProductMatrix<uint8_t, 1, 1>::Compute(\n          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);\n    }\n  }\n  InnerProductMatrix<uint8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestMinusInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;\n  size_t matrix_size = batch_size * (dimension / 2);\n  size_t query_matrix_size = query_size * (dimension / 2);\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(0, 0xff);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (uint8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (uint8_t)dist(gen);\n  }\n  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(\n      matrix1.data(), dimension / 8, &matrix2[0]);\n  ailego::MatrixHelper::Transpose<uint32_t, query_size>(\n      query1.data(), dimension / 8, &query2[0]);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint8_t *cur_query = &query1[i * dimension / 2];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(\n          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);\n    }\n  }\n  MinusInnerProductMatrix<uint8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(DistanceMatrix, InnerProduct_1x1) {\n  TestInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x1) {\n  TestInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x2) {\n  TestInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_3x3) {\n  TestInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x1) {\n  TestInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x2) {\n  TestInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x4) {\n  TestInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x1) {\n  TestInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x2) {\n  TestInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x4) {\n  TestInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x8) {\n  TestInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x1) {\n  TestInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x2) {\n  TestInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x4) {\n  TestInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x8) {\n  TestInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x16) {\n  TestInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x1) {\n  TestInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x2) {\n  TestInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x4) {\n  TestInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x8) {\n  TestInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x16) {\n  TestInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x32) {\n  TestInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x1) {\n  TestInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x2) {\n  TestInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x4) {\n  TestInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x8) {\n  TestInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x16) {\n  TestInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x32) {\n  TestInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x64) {\n  TestInnerProductMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x1) {\n  TestInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x2) {\n  TestInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x4) {\n  TestInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x8) {\n  TestInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x16) {\n  TestInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x32) {\n  TestInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x64) {\n  TestInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x128) {\n  TestInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_1x1) {\n  TestMinusInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x1) {\n  TestMinusInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x2) {\n  TestMinusInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_3x3) {\n  TestMinusInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x1) {\n  TestMinusInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x2) {\n  TestMinusInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x4) {\n  TestMinusInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x1) {\n  TestMinusInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x2) {\n  TestMinusInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x4) {\n  TestMinusInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x8) {\n  TestMinusInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x1) {\n  TestMinusInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x2) {\n  TestMinusInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x4) {\n  TestMinusInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x8) {\n  TestMinusInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x16) {\n  TestMinusInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x1) {\n  TestMinusInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x2) {\n  TestMinusInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x4) {\n  TestMinusInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x8) {\n  TestMinusInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x16) {\n  TestMinusInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x32) {\n  TestMinusInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x1) {\n  TestMinusInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x2) {\n  TestMinusInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x4) {\n  TestMinusInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x8) {\n  TestMinusInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x16) {\n  TestMinusInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x32) {\n  TestMinusInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x64) {\n  TestMinusInnerProductMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x1) {\n  TestMinusInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x2) {\n  TestMinusInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x4) {\n  TestMinusInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x8) {\n  TestMinusInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x16) {\n  TestMinusInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x32) {\n  TestMinusInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x64) {\n  TestMinusInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x128) {\n  TestMinusInnerProductMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid InnerProductBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension / 2;\n  const size_t query_matrix_size = query_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(0, 0xff);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (uint8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (uint8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension / 2;\n    ailego::MatrixHelper::Transpose<uint32_t, batch_size>(\n        &matrix1[start_pos], dimension / 8, &matrix2[start_pos]);\n  }\n  ailego::MatrixHelper::Transpose<uint32_t, query_size>(\n      query1.data(), dimension / 8, &query2[0]);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT4 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      InnerProductMatrix<uint8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    InnerProductMatrix<uint8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        InnerProductMatrix<uint8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension / 2], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {\n  InnerProductBenchmark<2, 1, 512, 128>();\n  InnerProductBenchmark<2, 2, 512, 128>();\n  InnerProductBenchmark<4, 1, 512, 128>();\n  InnerProductBenchmark<4, 2, 512, 128>();\n  InnerProductBenchmark<4, 4, 512, 128>();\n  InnerProductBenchmark<8, 1, 512, 128>();\n  InnerProductBenchmark<8, 2, 512, 128>();\n  InnerProductBenchmark<8, 4, 512, 128>();\n  InnerProductBenchmark<8, 8, 512, 128>();\n  InnerProductBenchmark<16, 1, 512, 128>();\n  InnerProductBenchmark<16, 2, 512, 128>();\n  InnerProductBenchmark<16, 4, 512, 128>();\n  InnerProductBenchmark<16, 8, 512, 128>();\n  InnerProductBenchmark<16, 16, 512, 128>();\n  InnerProductBenchmark<32, 1, 512, 128>();\n  InnerProductBenchmark<32, 2, 512, 128>();\n  InnerProductBenchmark<32, 4, 512, 128>();\n  InnerProductBenchmark<32, 8, 512, 128>();\n  InnerProductBenchmark<32, 16, 512, 128>();\n  InnerProductBenchmark<32, 32, 512, 128>();\n  InnerProductBenchmark<64, 1, 512, 128>();\n  InnerProductBenchmark<64, 2, 512, 128>();\n  InnerProductBenchmark<64, 4, 512, 128>();\n  InnerProductBenchmark<64, 8, 512, 128>();\n  InnerProductBenchmark<128, 1, 512, 128>();\n}\n"
  },
  {
    "path": "tests/ailego/math/inner_product_matrix_int8_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t i = 0; i < N; ++i) {\n    for (size_t j = 0; j < M; ++j) {\n      dst[j * N + i] = src[i * M + j];\n    }\n  }\n}\n\ntemplate <size_t N>\nstatic float InnerProductDistance(const FixedVector<int8_t, N> &lhs,\n                                  const FixedVector<int8_t, N> &rhs) {\n  return Distance::InnerProduct(lhs.data(), rhs.data(), lhs.size());\n}\n\ntemplate <size_t N>\nstatic float MinusInnerProductDistance(const FixedVector<int8_t, N> &lhs,\n                                       const FixedVector<int8_t, N> &rhs) {\n  return Distance::MinusInnerProduct(lhs.data(), rhs.data(), lhs.size());\n}\n\nTEST(DistanceMatrix, InnerProduct_General) {\n  int8_t a1[] = {0};\n  int8_t b1[] = {0};\n\n  int8_t a17[] = {127, -1,  -1,  127, 127, 127, 127, -1, 127,\n                  127, 127, 127, 127, 127, -1,  -1,  127};\n  int8_t b17[] = {127, -1,  -1,  127, 127, 127, -1, 127, 127,\n                  127, 127, 127, 127, 127, -1,  -1, 127};\n\n  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,\n                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,\n                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,\n                  0,   0, -127, 126,  -127, -127, -127, -127, 127,  127,\n                  1,   2, 3,    4,    127,  127,  111};\n  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,\n                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,\n                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,\n                  0,    0, 127,  127, 100,  122,  123, -127, 1,   2,\n                  3,    4, -127, 122, -127, -127, -127};\n\n  EXPECT_FLOAT_EQ(0.0f,\n                  InnerProductDistance(*FixedVector<int8_t, 1>::Cast(a1),\n                                       *FixedVector<int8_t, 1>::Cast(b1)));\n  EXPECT_FLOAT_EQ(177169.0f,\n                  InnerProductDistance(*FixedVector<int8_t, 17>::Cast(a17),\n                                       *FixedVector<int8_t, 17>::Cast(b17)));\n  EXPECT_FLOAT_EQ(-299458.0f,\n                  InnerProductDistance(*FixedVector<int8_t, 47>::Cast(a47),\n                                       *FixedVector<int8_t, 47>::Cast(b47)));\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_General) {\n  int8_t a1[] = {0};\n  int8_t b1[] = {0};\n\n  int8_t a17[] = {127, -1,  -1,  127, 127, 127, 127, -1, 127,\n                  127, 127, 127, 127, 127, -1,  -1,  127};\n  int8_t b17[] = {127, -1,  -1,  127, 127, 127, -1, 127, 127,\n                  127, 127, 127, 127, 127, -1,  -1, 127};\n\n  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,\n                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,\n                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,\n                  0,   0, -127, 126,  -127, -127, -127, -127, 127,  127,\n                  1,   2, 3,    4,    127,  127,  111};\n  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,\n                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,\n                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,\n                  0,    0, 127,  127, 100,  122,  123, -127, 1,   2,\n                  3,    4, -127, 122, -127, -127, -127};\n\n  EXPECT_FLOAT_EQ(0.0f,\n                  MinusInnerProductDistance(*FixedVector<int8_t, 1>::Cast(a1),\n                                            *FixedVector<int8_t, 1>::Cast(b1)));\n  EXPECT_FLOAT_EQ(-177169.0f, MinusInnerProductDistance(\n                                  *FixedVector<int8_t, 17>::Cast(a17),\n                                  *FixedVector<int8_t, 17>::Cast(b17)));\n  EXPECT_FLOAT_EQ(299458.0f, MinusInnerProductDistance(\n                                 *FixedVector<int8_t, 47>::Cast(a47),\n                                 *FixedVector<int8_t, 47>::Cast(b47)));\n}\n\ntemplate <size_t M, size_t N>\nvoid TestMinusInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MinusInnerProductMatrix<int8_t, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  MinusInnerProductMatrix<int8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestInnerProductMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      InnerProductMatrix<int8_t, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);\n    }\n  }\n  InnerProductMatrix<int8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_1x1) {\n  TestMinusInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x1) {\n  TestMinusInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_2x2) {\n  TestMinusInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_3x3) {\n  TestMinusInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x1) {\n  TestMinusInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x2) {\n  TestMinusInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_4x4) {\n  TestMinusInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x1) {\n  TestMinusInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x2) {\n  TestMinusInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x4) {\n  TestMinusInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_8x8) {\n  TestMinusInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x1) {\n  TestMinusInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x2) {\n  TestMinusInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x4) {\n  TestMinusInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x8) {\n  TestMinusInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_16x16) {\n  TestMinusInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x1) {\n  TestMinusInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x2) {\n  TestMinusInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x4) {\n  TestMinusInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x8) {\n  TestMinusInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x16) {\n  TestMinusInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_32x32) {\n  TestMinusInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x1) {\n  TestMinusInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x2) {\n  TestMinusInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x4) {\n  TestMinusInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x8) {\n  TestMinusInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x16) {\n  TestMinusInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x32) {\n  TestMinusInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_64x64) {\n  TestMinusInnerProductMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x1) {\n  TestMinusInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x2) {\n  TestMinusInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x4) {\n  TestMinusInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x8) {\n  TestMinusInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x16) {\n  TestMinusInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x32) {\n  TestMinusInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x64) {\n  TestMinusInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, MinusInnerProduct_128x128) {\n  TestMinusInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, InnerProduct_1x1) {\n  TestInnerProductMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x1) {\n  TestInnerProductMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_2x2) {\n  TestInnerProductMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_3x3) {\n  TestInnerProductMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x1) {\n  TestInnerProductMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x2) {\n  TestInnerProductMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_4x4) {\n  TestInnerProductMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x1) {\n  TestInnerProductMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x2) {\n  TestInnerProductMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x4) {\n  TestInnerProductMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_8x8) {\n  TestInnerProductMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x1) {\n  TestInnerProductMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x2) {\n  TestInnerProductMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x4) {\n  TestInnerProductMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x8) {\n  TestInnerProductMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_16x16) {\n  TestInnerProductMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x1) {\n  TestInnerProductMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x2) {\n  TestInnerProductMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x4) {\n  TestInnerProductMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x8) {\n  TestInnerProductMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x16) {\n  TestInnerProductMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_32x32) {\n  TestInnerProductMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x1) {\n  TestInnerProductMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x2) {\n  TestInnerProductMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x4) {\n  TestInnerProductMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x8) {\n  TestInnerProductMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x16) {\n  TestInnerProductMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x32) {\n  TestInnerProductMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_64x64) {\n  TestInnerProductMatrix<64, 128>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x1) {\n  TestInnerProductMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x2) {\n  TestInnerProductMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x4) {\n  TestInnerProductMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x8) {\n  TestInnerProductMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x16) {\n  TestInnerProductMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x32) {\n  TestInnerProductMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x64) {\n  TestInnerProductMatrix<128, 128>();\n}\n\nTEST(DistanceMatrix, InnerProduct_128x128) {\n  TestInnerProductMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid InnerProductBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      InnerProductMatrix<int8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    InnerProductMatrix<int8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched InnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        InnerProductMatrix<int8_t, 1, 1>::Compute(&matrix_batch[k * dimension],\n                                                  current_query, dimension,\n                                                  &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched InnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MinusInnerProductBenchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = (int8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched MinusInnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MinusInnerProductMatrix<int8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched MinusInnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MinusInnerProductMatrix<int8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, results.data());\n  }\n  std::cout << \"* N Batched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched MinusInnerProduct\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MinusInnerProductMatrix<int8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched MinusInnerProduct (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {\n  InnerProductBenchmark<2, 1, 512, 128>();\n  InnerProductBenchmark<2, 2, 512, 128>();\n  InnerProductBenchmark<4, 1, 512, 128>();\n  InnerProductBenchmark<4, 2, 512, 128>();\n  InnerProductBenchmark<4, 4, 512, 128>();\n  InnerProductBenchmark<8, 1, 512, 128>();\n  InnerProductBenchmark<8, 2, 512, 128>();\n  InnerProductBenchmark<8, 4, 512, 128>();\n  InnerProductBenchmark<8, 8, 512, 128>();\n  InnerProductBenchmark<16, 1, 512, 128>();\n  InnerProductBenchmark<16, 2, 512, 128>();\n  InnerProductBenchmark<16, 4, 512, 128>();\n  InnerProductBenchmark<16, 8, 512, 128>();\n  InnerProductBenchmark<16, 16, 512, 128>();\n  InnerProductBenchmark<32, 1, 512, 128>();\n  InnerProductBenchmark<32, 2, 512, 128>();\n  InnerProductBenchmark<32, 4, 512, 128>();\n  InnerProductBenchmark<32, 8, 512, 128>();\n  InnerProductBenchmark<32, 16, 512, 128>();\n  InnerProductBenchmark<32, 32, 512, 128>();\n  InnerProductBenchmark<64, 1, 512, 128>();\n  InnerProductBenchmark<64, 2, 512, 128>();\n  InnerProductBenchmark<64, 4, 512, 128>();\n  InnerProductBenchmark<64, 8, 512, 128>();\n  InnerProductBenchmark<128, 1, 512, 128>();\n}\n\nTEST(DistanceMatrix, DISABLED_MinusInnerProduct_Benchmark) {\n  MinusInnerProductBenchmark<2, 1, 512, 128>();\n  MinusInnerProductBenchmark<2, 2, 512, 128>();\n  MinusInnerProductBenchmark<4, 1, 512, 128>();\n  MinusInnerProductBenchmark<4, 2, 512, 128>();\n  MinusInnerProductBenchmark<4, 4, 512, 128>();\n  MinusInnerProductBenchmark<8, 1, 512, 128>();\n  MinusInnerProductBenchmark<8, 2, 512, 128>();\n  MinusInnerProductBenchmark<8, 4, 512, 128>();\n  MinusInnerProductBenchmark<8, 8, 512, 128>();\n  MinusInnerProductBenchmark<16, 1, 512, 128>();\n  MinusInnerProductBenchmark<16, 2, 512, 128>();\n  MinusInnerProductBenchmark<16, 4, 512, 128>();\n  MinusInnerProductBenchmark<16, 8, 512, 128>();\n  MinusInnerProductBenchmark<16, 16, 512, 128>();\n  MinusInnerProductBenchmark<32, 1, 512, 128>();\n  MinusInnerProductBenchmark<32, 2, 512, 128>();\n  MinusInnerProductBenchmark<32, 4, 512, 128>();\n  MinusInnerProductBenchmark<32, 8, 512, 128>();\n  MinusInnerProductBenchmark<32, 16, 512, 128>();\n  MinusInnerProductBenchmark<32, 32, 512, 128>();\n  MinusInnerProductBenchmark<64, 1, 512, 128>();\n  MinusInnerProductBenchmark<64, 2, 512, 128>();\n  MinusInnerProductBenchmark<64, 4, 512, 128>();\n  MinusInnerProductBenchmark<64, 8, 512, 128>();\n  MinusInnerProductBenchmark<128, 1, 512, 128>();\n}\n"
  },
  {
    "path": "tests/ailego/math/mips_euclidean_distance_matrix_fp16_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/math/norm2_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float MipsSquaredEuclideanDistance(const Float16 *lhs,\n                                          const Float16 *rhs, size_t dim,\n                                          size_t m_value, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclideanDistance(const FixedVector<Float16, N> &lhs,\n                                          const FixedVector<Float16, N> &rhs,\n                                          size_t m_value, float e2) {\n  return MipsSquaredEuclideanDistance(lhs.data(), rhs.data(), lhs.size(),\n                                      m_value, e2);\n}\n\nstatic float ConvertAndComputeByMips(const Float16 *lhs, const Float16 *rhs,\n                                     size_t dim, size_t m_value, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + m_value);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; ++i) {\n    float val = lhs[i] * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    lhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n  std::vector<float> rhs_vec(dim + m_value);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = rhs[i] * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    rhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + m_value);\n}\n\ntemplate <size_t N>\nstatic float ConvertAndComputeByMips(const FixedVector<Float16, N> &lhs,\n                                     const FixedVector<Float16, N> &rhs,\n                                     size_t m_value, float e2) {\n  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), m_value,\n                                 e2);\n}\n\nTEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = (std::uniform_int_distribution<size_t>(1, 4))(gen);\n  const float u_val = (std::uniform_real_distribution<float>(0.1, 1.0))(gen);\n  const float epsilon = 1e-2;\n  const uint32_t dim = (std::uniform_int_distribution<uint32_t>(2, 128))(gen);\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<Float16> vec1(dim);\n    std::vector<Float16> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    float norm1{0.0}, norm2{0.0};\n    SquaredNorm2Matrix<Float16, 1>::Compute(vec1.data(), dim, &norm1);\n    SquaredNorm2Matrix<Float16, 1>::Compute(vec2.data(), dim, &norm2);\n    const float e2 = u_val * u_val / std::max(norm1, norm2);\n    ASSERT_NEAR(\n        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),\n        MipsSquaredEuclideanDistance(vec1.data(), vec2.data(), dim, m_val, e2),\n        epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = 4;\n  const float u_val = 0.68f;\n  const float l2_norm = 15.5f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-2;\n\n  FixedVector<Float16, 1> a{0.0f}, b{0.0f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(a, b, m_val, e2), epsilon);\n\n  FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(c, d, m_val, e2), epsilon);\n\n  FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(e, f, m_val, e2), epsilon);\n\n  FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(g, h, m_val, e2), epsilon);\n\n  FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},\n      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(i, j, m_val, e2), epsilon);\n\n  FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},\n      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(l, k, m_val, e2), epsilon);\n\n  FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},\n      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(m, n, m_val, e2), epsilon);\n\n  FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(o, p, m_val, e2), epsilon);\n\n  FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                            0.5f, 0.6f, 0.7f, 0.8f},\n      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(q, r, m_val, e2), epsilon);\n\n  FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                             0.5f, 0.6f, 0.7f, 0.8f, 0.9f},\n      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(s, t, m_val, e2), epsilon);\n\n  FixedVector<Float16, 11> u{0.0f},\n      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n  EXPECT_NEAR(0.00746485f, MipsSquaredEuclideanDistance(u, v, m_val, e2),\n              epsilon);\n\n  FixedVector<Float16, 12> w{0.0f},\n      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};\n  EXPECT_NEAR(0.00983364f, MipsSquaredEuclideanDistance(w, x, m_val, e2),\n              epsilon);\n\n  FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n                                      0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};\n  EXPECT_NEAR(0.0126668f, MipsSquaredEuclideanDistance(y, z, m_val, e2),\n              epsilon);\n\n  FixedVector<Float16, 14> x14{0.0f},\n      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};\n  EXPECT_NEAR(0.0206175f, MipsSquaredEuclideanDistance(x14, y14, m_val, e2),\n              epsilon);\n\n  FixedVector<Float16, 15> x15{0.0f},\n      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,\n          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};\n  EXPECT_NEAR(0.0389414f, MipsSquaredEuclideanDistance(x15, y15, m_val, e2),\n              epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrixRepeatedQuadraticInjection(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t m_val = (std::uniform_int_distribution<size_t>(1, 4))(gen);\n  const float u_val = (std::uniform_real_distribution<float>(0.3, 0.9))(gen);\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score{0.0};\n    SquaredNorm2Matrix<Float16, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score{0.0};\n    SquaredNorm2Matrix<Float16, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = u_val * u_val / squared_l2_norm;\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const Float16 *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, m_val, e2,\n          &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_NEAR(result1[i], result2[i], 1e-2);\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsRepeatedQuadraticInjectionBenchMark(void) {\n  const size_t m_val = 4;\n  const float u_val = 0.6;\n  const float l2_norm = 1.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, m_val, e2, current_results);\n    }\n  }\n  std::cout\n      << \"* 1 Batched MipsSquaredEuclideanDistance(RepeatedQuadraticInjection) \"\n         \"(us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MipsSquaredEuclideanDistanceMatrix<Float16, batch_size,\n                                       query_size>::Compute(matrix_batch,\n                                                            &query2[0],\n                                                            dimension, m_val,\n                                                            e2, results.data());\n  }\n  std::cout\n      << \"* N Batched MipsSquaredEuclideanDistance(RepeatedQuadraticInjection) \"\n         \"(us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension, m_val, e2,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout\n      << \"* Unbatched MipsSquaredEuclideanDistance(RepeatedQuadraticInjection) \"\n         \"(us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix,\n     DISABLED_MipsSquaredEuclideanRepeatedQuadraticInjection_Benchmark) {\n  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();\n}\n\nstatic float MipsSquaredEuclidean(const Float16 *lhs, const Float16 *rhs,\n                                  size_t dim, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclidean(const FixedVector<Float16, N> &lhs,\n                                  const FixedVector<Float16, N> &rhs,\n                                  float e2) {\n  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nstatic float ConvertAndComputeByMips(const Float16 *lhs, const Float16 *rhs,\n                                     size_t dim, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + 1);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; ++i) {\n    float val = lhs[i] * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n  }\n  float norm2;\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);\n  lhs_vec[dim] = std::sqrt(1 - norm2);\n\n  std::vector<float> rhs_vec(dim + 1);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = rhs[i] * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n  }\n  std::cout << \"squ: \" << squ << std::endl;\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);\n  rhs_vec[dim] = std::sqrt(1 - norm2);\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + 1);\n}\n\ntemplate <size_t N>\nstatic float ConvertAndComputeByMips(const FixedVector<float, N> &lhs,\n                                     const FixedVector<float, N> &rhs,\n                                     float e2) {\n  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nTEST(DistanceMatrix, GeneralSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = std::uniform_real_distribution<float>(0.5, 1.0)(gen);\n  const float epsilon = 1e-2;\n  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<Float16> vec1(dim);\n    std::vector<Float16> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    float norm1{0.0}, norm2{0.0};\n    SquaredNorm2Matrix<Float16, 1>::Compute(vec1.data(), dim, &norm1);\n    SquaredNorm2Matrix<Float16, 1>::Compute(vec2.data(), dim, &norm2);\n    const float e2 = u_val * u_val / std::max(norm1, norm2);\n    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),\n                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),\n                epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = 0.68f;\n  const float l2_norm = 15.5f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-2;\n\n  FixedVector<Float16, 1> a{0.0f}, b{0.0f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, e2), epsilon);\n\n  FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, e2), epsilon);\n\n  FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, e2), epsilon);\n\n  FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, e2), epsilon);\n\n  FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},\n      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, e2), epsilon);\n\n  FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},\n      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, e2), epsilon);\n\n  FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},\n      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, e2), epsilon);\n\n  FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, e2), epsilon);\n\n  FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                            0.5f, 0.6f, 0.7f, 0.8f},\n      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, e2), epsilon);\n\n  FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                             0.5f, 0.6f, 0.7f, 0.8f, 0.9f},\n      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, e2), epsilon);\n\n  FixedVector<Float16, 11> u{0.0f},\n      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n  EXPECT_NEAR(0.00742372544f, MipsSquaredEuclidean(u, v, e2), epsilon);\n\n  FixedVector<Float16, 12> w{0.0f},\n      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};\n  EXPECT_NEAR(0.00976261682f, MipsSquaredEuclidean(w, x, e2), epsilon);\n\n  FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n                                      0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};\n  EXPECT_NEAR(0.01254967600f, MipsSquaredEuclidean(y, z, e2), epsilon);\n\n  FixedVector<Float16, 14> x14{0.0f},\n      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};\n  EXPECT_NEAR(0.02031209506f, MipsSquaredEuclidean(x14, y14, e2), epsilon);\n\n  FixedVector<Float16, 15> x15{0.0f},\n      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,\n          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};\n  EXPECT_NEAR(0.03788981214f, MipsSquaredEuclidean(x15, y15, e2), epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestMipsSphericalInjectionMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score{0.0};\n    SquaredNorm2Matrix<Float16, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score{0.0};\n    SquaredNorm2Matrix<Float16, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = 0.98f / squared_l2_norm;\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const Float16 *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, e2, &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, e2, &result2[0]);\n\n  const float epsilon = 1e-2;\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_NEAR(result1[i], result2[i], epsilon);\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {\n  TestMipsSphericalInjectionMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {\n  TestMipsSphericalInjectionMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {\n  TestMipsSphericalInjectionMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {\n  TestMipsSphericalInjectionMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {\n  TestMipsSphericalInjectionMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {\n  TestMipsSphericalInjectionMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {\n  TestMipsSphericalInjectionMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {\n  TestMipsSphericalInjectionMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {\n  TestMipsSphericalInjectionMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {\n  TestMipsSphericalInjectionMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {\n  TestMipsSphericalInjectionMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {\n  TestMipsSphericalInjectionMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {\n  TestMipsSphericalInjectionMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {\n  TestMipsSphericalInjectionMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {\n  TestMipsSphericalInjectionMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {\n  TestMipsSphericalInjectionMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {\n  TestMipsSphericalInjectionMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {\n  TestMipsSphericalInjectionMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {\n  TestMipsSphericalInjectionMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {\n  TestMipsSphericalInjectionMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {\n  TestMipsSphericalInjectionMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {\n  TestMipsSphericalInjectionMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {\n  TestMipsSphericalInjectionMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {\n  TestMipsSphericalInjectionMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {\n  TestMipsSphericalInjectionMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {\n  TestMipsSphericalInjectionMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {\n  TestMipsSphericalInjectionMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {\n  TestMipsSphericalInjectionMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {\n  TestMipsSphericalInjectionMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {\n  TestMipsSphericalInjectionMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {\n  TestMipsSphericalInjectionMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {\n  TestMipsSphericalInjectionMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {\n  TestMipsSphericalInjectionMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {\n  TestMipsSphericalInjectionMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {\n  TestMipsSphericalInjectionMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {\n  TestMipsSphericalInjectionMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {\n  TestMipsSphericalInjectionMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsSphericalInjectionBenchMarkk(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n  const float e2 = 1.0 / dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<Float16> query1(query_matrix_size);\n  std::vector<Float16> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, e2, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MipsSquaredEuclideanDistanceMatrix<Float16, batch_size,\n                                       query_size>::Compute(matrix_batch,\n                                                            &query2[0],\n                                                            dimension, e2,\n                                                            results.data());\n  }\n  std::cout << \"* N Batched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const Float16 *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension, e2,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix,\n     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {\n  MipsSphericalInjectionBenchMarkk<2, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<2, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<4, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<4, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<4, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 16, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 16, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 32, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<128, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<1, 1, 1024, 256>();\n}\n"
  },
  {
    "path": "tests/ailego/math/mips_euclidean_distance_matrix_fp32_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/math/norm2_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(float *dst, const float *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float MipsSquaredEuclidean(const float *lhs, const float *rhs,\n                                  size_t dim, size_t m_value, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclidean(const FixedVector<float, N> &lhs,\n                                  const FixedVector<float, N> &rhs,\n                                  size_t m_value, float e2) {\n  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), m_value, e2);\n}\n\nstatic float ConvertAndComputeByMips(const float *lhs, const float *rhs,\n                                     size_t dim, size_t m_value, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + m_value);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; ++i) {\n    float val = lhs[i] * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    lhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n\n  std::vector<float> rhs_vec(dim + m_value);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = rhs[i] * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    rhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + m_value);\n}\n\nTEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);\n  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);\n  const float epsilon = 1e-5;\n  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<float> vec1(dim);\n    std::vector<float> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    float norm1, norm2;\n    SquaredNorm2Matrix<float, 1>::Compute(vec1.data(), dim, &norm1);\n    SquaredNorm2Matrix<float, 1>::Compute(vec2.data(), dim, &norm2);\n    const float e2 = u_val * u_val / std::max(norm1, norm2);\n    ASSERT_NEAR(\n        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),\n        MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, m_val, e2),\n        epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = 4;\n  const float u_val = 0.68f;\n  const float l2_norm = 15.5f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = std::numeric_limits<float>::epsilon();\n\n  FixedVector<float, 1> a{0.0f}, b{0.0f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, m_val, e2), epsilon);\n\n  FixedVector<float, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, m_val, e2), epsilon);\n\n  FixedVector<float, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, m_val, e2), epsilon);\n\n  FixedVector<float, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, m_val, e2), epsilon);\n\n  FixedVector<float, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},\n      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, m_val, e2), epsilon);\n\n  FixedVector<float, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},\n      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, m_val, e2), epsilon);\n\n  FixedVector<float, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},\n      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, m_val, e2), epsilon);\n\n  FixedVector<float, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, m_val, e2), epsilon);\n\n  FixedVector<float, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f},\n      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, m_val, e2), epsilon);\n\n  FixedVector<float, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                           0.5f, 0.6f, 0.7f, 0.8f, 0.9f},\n      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, m_val, e2), epsilon);\n\n  FixedVector<float, 11> u{0.0f},\n      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n  EXPECT_NEAR(0.00746485f, MipsSquaredEuclidean(u, v, m_val, e2), epsilon);\n\n  FixedVector<float, 12> w{0.0f},\n      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};\n  EXPECT_NEAR(0.00983364f, MipsSquaredEuclidean(w, x, m_val, e2), epsilon);\n\n  FixedVector<float, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n                                    0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};\n  EXPECT_NEAR(0.0126668f, MipsSquaredEuclidean(y, z, m_val, e2), epsilon);\n\n  FixedVector<float, 14> x14{0.0f},\n      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};\n  EXPECT_NEAR(0.0206175f, MipsSquaredEuclidean(x14, y14, m_val, e2), epsilon);\n\n  FixedVector<float, 15> x15{0.0f},\n      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,\n          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};\n  EXPECT_NEAR(0.0389414f, MipsSquaredEuclidean(x15, y15, m_val, e2), epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestMipsRepeatedQuadraticInjectionMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);\n  const float u_val = std::uniform_real_distribution<float>(0.5, 0.9)(gen);\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<float, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<float, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = u_val * u_val / squared_l2_norm;\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const float *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, m_val, e2,\n          &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);\n\n  const float epsilon = 1e-4;\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_NEAR(result1[i], result2[i], epsilon);\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {\n  TestMipsRepeatedQuadraticInjectionMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {\n  TestMipsRepeatedQuadraticInjectionMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {\n  TestMipsRepeatedQuadraticInjectionMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {\n  TestMipsRepeatedQuadraticInjectionMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {\n  TestMipsRepeatedQuadraticInjectionMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {\n  TestMipsRepeatedQuadraticInjectionMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {\n  TestMipsRepeatedQuadraticInjectionMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {\n  TestMipsRepeatedQuadraticInjectionMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {\n  TestMipsRepeatedQuadraticInjectionMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {\n  TestMipsRepeatedQuadraticInjectionMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {\n  TestMipsRepeatedQuadraticInjectionMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {\n  TestMipsRepeatedQuadraticInjectionMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {\n  TestMipsRepeatedQuadraticInjectionMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {\n  TestMipsRepeatedQuadraticInjectionMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {\n  TestMipsRepeatedQuadraticInjectionMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {\n  TestMipsRepeatedQuadraticInjectionMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {\n  TestMipsRepeatedQuadraticInjectionMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {\n  TestMipsRepeatedQuadraticInjectionMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {\n  TestMipsRepeatedQuadraticInjectionMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {\n  TestMipsRepeatedQuadraticInjectionMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {\n  TestMipsRepeatedQuadraticInjectionMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {\n  TestMipsRepeatedQuadraticInjectionMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {\n  TestMipsRepeatedQuadraticInjectionMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsRepeatedQuadraticInjectionBenchMark(void) {\n  const size_t m_val = 4;\n  const float u_val = 0.6;\n  const float l2_norm = 1.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<float, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, m_val, e2, current_results);\n    }\n  }\n  std::cout\n      << \"* 1 Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, m_val, e2, results.data());\n  }\n  std::cout\n      << \"* N Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension, m_val, e2,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout\n      << \"* Unbatched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix,\n     DISABLED_MipsSquaredEuclideanRepeatedQuadraticInjection_Benchmark) {\n  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();\n}\n\nstatic float MipsSquaredEuclidean(const float *lhs, const float *rhs,\n                                  size_t dim, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclidean(const FixedVector<float, N> &lhs,\n                                  const FixedVector<float, N> &rhs, float e2) {\n  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nstatic float ConvertAndComputeByMips(const float *lhs, const float *rhs,\n                                     size_t dim, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + 1);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; ++i) {\n    float val = lhs[i] * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n  }\n  float norm2;\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);\n  lhs_vec[dim] = std::sqrt(1 - norm2);\n\n  std::vector<float> rhs_vec(dim + 1);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = rhs[i] * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n  }\n  std::cout << \"squ: \" << squ << std::endl;\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);\n  rhs_vec[dim] = std::sqrt(1 - norm2);\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + 1);\n}\n\ntemplate <size_t N>\nstatic float ConvertAndComputeByMips(const FixedVector<float, N> &lhs,\n                                     const FixedVector<float, N> &rhs,\n                                     float e2) {\n  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nTEST(DistanceMatrix, GeneralSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = std::uniform_real_distribution<float>(0.5, 0.9)(gen);\n  const float epsilon = 1e-6;\n  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<float> vec1(dim);\n    std::vector<float> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    float norm1, norm2;\n    SquaredNorm2Matrix<float, 1>::Compute(vec1.data(), dim, &norm1);\n    SquaredNorm2Matrix<float, 1>::Compute(vec2.data(), dim, &norm2);\n    const float e2 = u_val * u_val / std::max(norm1, norm2);\n    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),\n                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),\n                epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = 0.68f;\n  const float l2_norm = 15.5f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = std::numeric_limits<float>::epsilon();\n\n  FixedVector<float, 1> a{0.0f}, b{0.0f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, e2), epsilon);\n\n  FixedVector<float, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, e2), epsilon);\n\n  FixedVector<float, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, e2), epsilon);\n\n  FixedVector<float, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, e2), epsilon);\n\n  FixedVector<float, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},\n      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, e2), epsilon);\n\n  FixedVector<float, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},\n      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, e2), epsilon);\n\n  FixedVector<float, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},\n      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, e2), epsilon);\n\n  FixedVector<float, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},\n      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, e2), epsilon);\n\n  FixedVector<float, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f},\n      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, e2), epsilon);\n\n  FixedVector<float, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,\n                           0.5f, 0.6f, 0.7f, 0.8f, 0.9f},\n      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, e2), epsilon);\n\n  FixedVector<float, 11> u{0.0f},\n      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n  EXPECT_NEAR(0.00742372544f, MipsSquaredEuclidean(u, v, e2), epsilon);\n\n  FixedVector<float, 12> w{0.0f},\n      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};\n  EXPECT_NEAR(0.00976261682f, MipsSquaredEuclidean(w, x, e2), epsilon);\n\n  FixedVector<float, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n                                    0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};\n  EXPECT_NEAR(0.01254967600f, MipsSquaredEuclidean(y, z, e2), epsilon);\n\n  FixedVector<float, 14> x14{0.0f},\n      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,\n          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};\n  EXPECT_NEAR(0.02031209506f, MipsSquaredEuclidean(x14, y14, e2), epsilon);\n\n  FixedVector<float, 15> x15{0.0f},\n      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,\n          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};\n  EXPECT_NEAR(0.03788981214f, MipsSquaredEuclidean(x15, y15, e2), epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestMipsSphericalInjectionMatrix(void) {\n  std::mt19937 gen((std::random_device())());\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<float, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<float, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = 0.98f / squared_l2_norm;\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const float *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, e2, &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, e2, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_GE(1e-4, std::abs(result1[i] - result2[i]));\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {\n  TestMipsSphericalInjectionMatrix<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {\n  TestMipsSphericalInjectionMatrix<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {\n  TestMipsSphericalInjectionMatrix<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {\n  TestMipsSphericalInjectionMatrix<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {\n  TestMipsSphericalInjectionMatrix<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {\n  TestMipsSphericalInjectionMatrix<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {\n  TestMipsSphericalInjectionMatrix<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {\n  TestMipsSphericalInjectionMatrix<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {\n  TestMipsSphericalInjectionMatrix<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {\n  TestMipsSphericalInjectionMatrix<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {\n  TestMipsSphericalInjectionMatrix<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {\n  TestMipsSphericalInjectionMatrix<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {\n  TestMipsSphericalInjectionMatrix<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {\n  TestMipsSphericalInjectionMatrix<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {\n  TestMipsSphericalInjectionMatrix<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {\n  TestMipsSphericalInjectionMatrix<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {\n  TestMipsSphericalInjectionMatrix<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {\n  TestMipsSphericalInjectionMatrix<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {\n  TestMipsSphericalInjectionMatrix<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {\n  TestMipsSphericalInjectionMatrix<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {\n  TestMipsSphericalInjectionMatrix<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {\n  TestMipsSphericalInjectionMatrix<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {\n  TestMipsSphericalInjectionMatrix<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {\n  TestMipsSphericalInjectionMatrix<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {\n  TestMipsSphericalInjectionMatrix<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {\n  TestMipsSphericalInjectionMatrix<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {\n  TestMipsSphericalInjectionMatrix<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {\n  TestMipsSphericalInjectionMatrix<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {\n  TestMipsSphericalInjectionMatrix<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {\n  TestMipsSphericalInjectionMatrix<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {\n  TestMipsSphericalInjectionMatrix<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {\n  TestMipsSphericalInjectionMatrix<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {\n  TestMipsSphericalInjectionMatrix<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {\n  TestMipsSphericalInjectionMatrix<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {\n  TestMipsSphericalInjectionMatrix<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {\n  TestMipsSphericalInjectionMatrix<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {\n  TestMipsSphericalInjectionMatrix<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsSphericalInjectionBenchMarkk(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n  const float e2 = 1.0 / dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> query1(query_matrix_size);\n  std::vector<float> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<float, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, e2, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, e2, results.data());\n  }\n  std::cout << \"* N Batched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const float *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension, e2,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix,\n     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {\n  MipsSphericalInjectionBenchMarkk<2, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<2, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<4, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<4, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<4, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<8, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<16, 16, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 16, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<32, 32, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 2, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 4, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<64, 8, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<128, 1, 512, 64>();\n  MipsSphericalInjectionBenchMarkk<1, 1, 1024, 256>();\n}"
  },
  {
    "path": "tests/ailego/math/mips_euclidean_distance_matrix_int4_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <iostream>\n#include <ostream>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/math/norm2_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,\n                                  size_t dim, size_t m_value, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclidean(const FixedVector<uint8_t, N> &lhs,\n                                  const FixedVector<uint8_t, N> &rhs,\n                                  size_t m_value, float e2) {\n  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), m_value, e2);\n}\n\nstatic float ConvertAndComputeByMips(const uint8_t *lhs, const uint8_t *rhs,\n                                     size_t dim, size_t m_value, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + m_value);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; i += 2) {\n    uint8_t v = lhs[i / 2];\n    int8_t lo = (int8_t)(v << 4) >> 4;\n    int8_t hi = (int8_t)(v & 0xf0) >> 4;\n    float val = lo * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n    val = hi * eta;\n    lhs_vec[i + 1] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    lhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n  std::vector<float> rhs_vec(dim + m_value);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; i += 2) {\n    uint8_t v = rhs[i / 2];\n    int8_t lo = (int8_t)(v << 4) >> 4;\n    int8_t hi = (int8_t)(v & 0xf0) >> 4;\n    float val = lo * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n    val = hi * eta;\n    rhs_vec[i + 1] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    rhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + m_value);\n}\n\ntemplate <size_t N>\nstatic float ConvertAndComputeByMips(const FixedVector<uint8_t, N> &lhs,\n                                     const FixedVector<uint8_t, N> &rhs,\n                                     size_t m_value, float e2) {\n  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), m_value,\n                                 e2);\n}\n\nTEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);\n  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);\n  const float l2_norm =\n      std::uniform_real_distribution<float>(100.0, 150.0)(gen);\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-6;\n  const uint32_t dim =\n      (std::uniform_int_distribution<uint32_t>(2, 128))(gen) * 2;\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_int_distribution<uint8_t> dist(0, 255);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<uint8_t> vec1(dim / 2);\n    std::vector<uint8_t> vec2(dim / 2);\n    for (size_t d = 0; d < dim / 2; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    ASSERT_NEAR(\n        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),\n        MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, m_val, e2),\n        epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = 4;\n  const float u_val = 0.68f;\n  const float l2_norm = 20.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-5;\n\n  uint8_t a[] = {0}, b[] = {0};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 2, m_val, e2), epsilon);\n\n  uint8_t c[] = {0, 1}, d[] = {0, 1};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 4, m_val, e2), epsilon);\n\n  uint8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 6, m_val, e2), epsilon);\n\n  uint8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 8, m_val, e2), epsilon);\n\n  uint8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 10, m_val, e2), epsilon);\n\n  uint8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 12, m_val, e2), epsilon);\n\n  uint8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 14, m_val, e2), epsilon);\n\n  uint8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 16, m_val, e2), epsilon);\n\n  uint8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 18, m_val, e2), epsilon);\n\n  uint8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},\n          t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 20, m_val, e2), epsilon);\n\n  uint8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},\n          v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};\n  EXPECT_NEAR(0.458308637f, MipsSquaredEuclidean(u, v, 22, m_val, e2), epsilon);\n\n  uint8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};\n  EXPECT_NEAR(0.512402892f, MipsSquaredEuclidean(w, x, 24, m_val, e2), epsilon);\n\n  uint8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};\n  EXPECT_NEAR(0.548633813f, MipsSquaredEuclidean(y, z, 26, m_val, e2), epsilon);\n\n  uint8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20};\n  EXPECT_NEAR(0.588600754f, MipsSquaredEuclidean(x14, y14, 28, m_val, e2),\n              epsilon);\n\n  uint8_t x15[15] = {0},\n          y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30};\n  EXPECT_NEAR(0.600657463f, MipsSquaredEuclidean(x15, y15, 30, m_val, e2),\n              epsilon);\n  uint8_t x16[16] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9},\n          y16[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30, 50};\n  EXPECT_NEAR(2.628833294f, MipsSquaredEuclidean(x16, y16, 32, m_val, e2),\n              epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrixRepeatedQuadraticInjection(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);\n  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen) * 8;\n  size_t matrix_size = batch_size * dimension / 2;\n  size_t query_matrix_size = query_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<uint8_t> dist(0, 255);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<uint8_t, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<uint8_t, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = u_val * u_val / squared_l2_norm;\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 8, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 8, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint8_t *cur_query = &query1[i * dimension / 2];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n          &matrix1[j * dimension / 2], cur_query, dimension, m_val, e2,\n          &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    ASSERT_NEAR(result1[i], result2[i], 1e-4);\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsRepeatedQuadraticInjectionBenchMark(void) {\n  const size_t m_val = 4;\n  const float u_val = 0.6;\n  const float l2_norm = 1.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension / 2;\n  const size_t query_matrix_size = query_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<uint8_t> dist(0, 255);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension / 2;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 8,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 8, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT4 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, m_val, e2, current_results);\n    }\n  }\n  std::cout\n      << \"* 1 Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size,\n                                       query_size>::Compute(matrix_batch,\n                                                            &query2[0],\n                                                            dimension, m_val,\n                                                            e2, results.data());\n  }\n  std::cout\n      << \"* N Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension / 2], current_query, dimension, m_val,\n            e2, &current_results[k]);\n      }\n    }\n  }\n  std::cout\n      << \"* Unbatched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_) {\n  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();\n}\n\nstatic float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,\n                                  size_t dim, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclidean(const FixedVector<uint8_t, N> &lhs,\n                                  const FixedVector<uint8_t, N> &rhs,\n                                  float e2) {\n  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nstatic float ConvertAndComputeByMips(const uint8_t *lhs, const uint8_t *rhs,\n                                     size_t dim, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + 1);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; i += 2) {\n    uint8_t v = lhs[i / 2];\n    int8_t lo = (int8_t)(v << 4) >> 4;\n    int8_t hi = (int8_t)(v & 0xf0) >> 4;\n    float val = lo * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n    val = hi * eta;\n    lhs_vec[i + 1] = val;\n    squ += val * val;\n  }\n  float norm2;\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);\n  lhs_vec[dim] = std::sqrt(1 - norm2);\n\n  std::vector<float> rhs_vec(dim + 1);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; i += 2) {\n    uint8_t v = rhs[i / 2];\n    int8_t lo = (int8_t)(v << 4) >> 4;\n    int8_t hi = (int8_t)(v & 0xf0) >> 4;\n    float val = lo * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n    val = hi * eta;\n    rhs_vec[i + 1] = val;\n    squ += val * val;\n  }\n  std::cout << \"squ: \" << squ << std::endl;\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);\n  rhs_vec[dim] = std::sqrt(1 - norm2);\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + 1);\n}\n\ntemplate <size_t N>\nstatic float ConvertAndComputeByMips(const FixedVector<uint8_t, N> &lhs,\n                                     const FixedVector<uint8_t, N> &rhs,\n                                     float e2) {\n  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nTEST(DistanceMatrix, GeneralSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);\n  const float l2_norm =\n      std::uniform_real_distribution<float>(100.0, 150.0)(gen);\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-6;\n  const uint32_t dim =\n      (std::uniform_int_distribution<uint32_t>(2, 128))(gen) * 2;\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_int_distribution<uint8_t> dist(0, 255);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<uint8_t> vec1(dim / 2);\n    std::vector<uint8_t> vec2(dim / 2);\n    for (size_t d = 0; d < dim / 2; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),\n                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),\n                epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = 0.68f;\n  const float l2_norm = 20.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-5;\n\n  uint8_t a[] = {0}, b[] = {0};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 2, e2), epsilon);\n\n  uint8_t c[] = {0, 1}, d[] = {0, 1};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 4, e2), epsilon);\n\n  uint8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 6, e2), epsilon);\n\n  uint8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 8, e2), epsilon);\n\n  uint8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 10, e2), epsilon);\n\n  uint8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 12, e2), epsilon);\n\n  uint8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 14, e2), epsilon);\n\n  uint8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 16, e2), epsilon);\n\n  uint8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 18, e2), epsilon);\n\n  uint8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},\n          t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 20, e2), epsilon);\n\n  uint8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},\n          v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};\n  EXPECT_NEAR(0.367926508f, MipsSquaredEuclidean(u, v, 22, e2), epsilon);\n\n  uint8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};\n  EXPECT_NEAR(0.403734415f, MipsSquaredEuclidean(w, x, 24, e2), epsilon);\n\n  uint8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};\n  EXPECT_NEAR(0.427079230f, MipsSquaredEuclidean(y, z, 26, e2), epsilon);\n\n  uint8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20};\n  EXPECT_NEAR(0.452268809f, MipsSquaredEuclidean(x14, y14, 28, e2), epsilon);\n\n  uint8_t x15[15] = {0},\n          y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30};\n  EXPECT_NEAR(0.459755957f, MipsSquaredEuclidean(x15, y15, 30, e2), epsilon);\n  uint8_t x16[16] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9},\n          y16[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30, 50};\n  EXPECT_NEAR(1.566913843f, MipsSquaredEuclidean(x16, y16, 32, e2), epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrixSphericalInjection(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen) * 8;\n  size_t matrix_size = batch_size * dimension / 2;\n  size_t query_matrix_size = query_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<uint8_t> dist(0, 255);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<uint8_t, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<uint8_t, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = u_val * u_val / squared_l2_norm;\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 8, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 8, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint8_t *cur_query = &query1[i * dimension / 2];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n          &matrix1[j * dimension / 2], cur_query, dimension, e2,\n          &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, e2, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    ASSERT_NEAR(result1[i], result2[i], 1e-4);\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {\n  TestSquaredEuclideanMatrixSphericalInjection<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsSphericalInjectionBenchMark(void) {\n  const float u_val = 0.99;\n  const float l2_norm = 100.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension / 2;\n  const size_t query_matrix_size = query_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<uint8_t> dist(0, 255);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension / 2;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 8,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 8, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT4 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, e2, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n\n    MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size,\n                                       query_size>::Compute(matrix_batch,\n                                                            &query2[0],\n                                                            dimension, e2,\n                                                            results.data());\n  }\n  std::cout << \"* N Batched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const uint8_t *current_query = &query1[j * dimension / 2];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension / 2], current_query, dimension, e2,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix,\n     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {\n  MipsSphericalInjectionBenchMark<2, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<2, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<4, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<4, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<4, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 16, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 16, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 32, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<128, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<1, 1, 1024, 256>();\n}\n"
  },
  {
    "path": "tests/ailego/math/mips_euclidean_distance_matrix_int8_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <random>\n#include <string>\n#include <thread>\n#include <vector>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/distance.h>\n#include <ailego/math/norm2_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,\n                                  size_t dim, size_t m_value, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclidean(const FixedVector<int8_t, N> &lhs,\n                                  const FixedVector<int8_t, N> &rhs,\n                                  size_t m_value, float e2) {\n  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), m_value, e2);\n}\n\nstatic float ConvertAndComputeByMips(const int8_t *lhs, const int8_t *rhs,\n                                     size_t dim, size_t m_value, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + m_value);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; ++i) {\n    float val = lhs[i] * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    lhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n  std::vector<float> rhs_vec(dim + m_value);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = rhs[i] * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n  }\n  for (size_t i = dim; i < dim + m_value; ++i) {\n    rhs_vec[i] = 0.5f - squ;\n    squ *= squ;\n  }\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + m_value);\n}\n\ntemplate <size_t N>\nstatic float ConvertAndComputeByMips(const FixedVector<int8_t, N> &lhs,\n                                     const FixedVector<int8_t, N> &rhs,\n                                     size_t m_value, float e2) {\n  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), m_value,\n                                 e2);\n}\n\nTEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);\n  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);\n  const float l2_norm =\n      std::uniform_real_distribution<float>(1000.0, 1500.0)(gen);\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-6;\n  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_int_distribution<int8_t> dist(-127, 127);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<int8_t> vec1(dim);\n    std::vector<int8_t> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    ASSERT_NEAR(\n        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),\n        MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, m_val, e2),\n        epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {\n  std::mt19937 gen((std::random_device())());\n  const size_t m_val = 4;\n  const float u_val = 0.68f;\n  const float l2_norm = 30.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-5;\n\n  int8_t a[] = {0}, b[] = {0};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 1, m_val, e2), epsilon);\n\n  int8_t c[] = {0, 1}, d[] = {0, 1};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 2, m_val, e2), epsilon);\n\n  int8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 3, m_val, e2), epsilon);\n\n  int8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 4, m_val, e2), epsilon);\n\n  int8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 5, m_val, e2), epsilon);\n\n  int8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 6, m_val, e2), epsilon);\n\n  int8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 7, m_val, e2), epsilon);\n\n  int8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 8, m_val, e2), epsilon);\n\n  int8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 9, m_val, e2), epsilon);\n\n  int8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},\n         t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 10, m_val, e2), epsilon);\n\n  int8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},\n         v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};\n  EXPECT_NEAR(0.2384642f, MipsSquaredEuclidean(u, v, 11, m_val, e2), epsilon);\n\n  int8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};\n  EXPECT_NEAR(0.3321453f, MipsSquaredEuclidean(w, x, 12, m_val, e2), epsilon);\n\n  int8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};\n  EXPECT_NEAR(0.4580747f, MipsSquaredEuclidean(y, z, 13, m_val, e2), epsilon);\n\n  int8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20};\n  EXPECT_NEAR(0.9224106f, MipsSquaredEuclidean(x14, y14, 14, m_val, e2),\n              epsilon);\n\n  int8_t x15[15] = {0},\n         y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30};\n  EXPECT_NEAR(5.0584077f, MipsSquaredEuclidean(x15, y15, 15, m_val, e2),\n              epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrixRepeatedQuadraticInjection(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);\n  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = std::uniform_int_distribution<size_t>(2, 128)(gen) * 4;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int8_t> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<int8_t, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<int8_t, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = u_val * u_val / squared_l2_norm;\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, m_val, e2,\n          &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_NEAR(result1[i], result2[i], 1e-4);\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {\n  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsRepeatedQuadraticInjectionBenchMark(void) {\n  const size_t m_val = 4;\n  const float u_val = 0.6;\n  const float l2_norm = 1.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int8_t> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, m_val, e2, current_results);\n    }\n  }\n  std::cout\n      << \"* 1 Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, m_val, e2, results.data());\n  }\n  std::cout\n      << \"* N Batched MipsSquaredErclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension, m_val, e2,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout\n      << \"* Unbatched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \\t\"\n      << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix,\n     DISABLED_MipsSquaredEuclideanRepeatedQuadraticInjection_Benchmark) {\n  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();\n  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();\n}\n\nstatic float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,\n                                  size_t dim, float e2) {\n  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);\n}\n\ntemplate <size_t N>\nstatic float MipsSquaredEuclidean(const FixedVector<int8_t, N> &lhs,\n                                  const FixedVector<int8_t, N> &rhs, float e2) {\n  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nstatic float ConvertAndComputeByMips(const int8_t *lhs, const int8_t *rhs,\n                                     size_t dim, float e2) {\n  float squ = 0.0f;\n  std::vector<float> lhs_vec(dim + 1);\n  const float eta = std::sqrt(e2);\n  for (size_t i = 0; i < dim; ++i) {\n    float val = lhs[i] * eta;\n    lhs_vec[i] = val;\n    squ += val * val;\n  }\n  float norm2;\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);\n  lhs_vec[dim] = std::sqrt(1 - norm2);\n\n  std::vector<float> rhs_vec(dim + 1);\n  squ = 0.0f;\n  for (size_t i = 0; i < dim; ++i) {\n    float val = rhs[i] * eta;\n    rhs_vec[i] = val;\n    squ += val * val;\n  }\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);\n  rhs_vec[dim] = std::sqrt(1 - norm2);\n  std::cout << \"squ: \" << squ << std::endl;\n  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),\n                                            dim + 1);\n}\n\ntemplate <size_t N>\nstatic float ConvertAndComputeByMips(const FixedVector<int8_t, N> &lhs,\n                                     const FixedVector<int8_t, N> &rhs,\n                                     float e2) {\n  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);\n}\n\nTEST(DistanceMatrix, GeneralSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);\n  const float l2_norm =\n      std::uniform_real_distribution<float>(1000.0, 1500.0)(gen);\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-6;\n  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);\n  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);\n  std::uniform_int_distribution<int8_t> dist(-127, 127);\n  for (size_t i = 0; i < count; ++i) {\n    std::vector<int8_t> vec1(dim);\n    std::vector<int8_t> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = dist(gen);\n      vec2[d] = dist(gen);\n    }\n    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),\n                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),\n                epsilon);\n  }\n}\n\nTEST(DistanceMatrix, FixedVectorsSphericalInjection) {\n  std::mt19937 gen((std::random_device())());\n  const float u_val = 0.68f;\n  const float l2_norm = 100.0f;\n  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);\n  const float epsilon = 1e-5;\n\n  int8_t a[] = {0}, b[] = {0};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 1, e2), epsilon);\n\n  int8_t c[] = {0, 1}, d[] = {0, 1};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 2, e2), epsilon);\n\n  int8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 3, e2), epsilon);\n\n  int8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 4, e2), epsilon);\n\n  int8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 5, e2), epsilon);\n\n  int8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 6, e2), epsilon);\n\n  int8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 7, e2), epsilon);\n\n  int8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 8, e2), epsilon);\n\n  int8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 9, e2), epsilon);\n\n  int8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},\n         t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 10, e2), epsilon);\n\n  int8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},\n         v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};\n  EXPECT_NEAR(0.0178823452f, MipsSquaredEuclidean(u, v, 11, e2), epsilon);\n\n  int8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};\n  EXPECT_NEAR(0.0235359258f, MipsSquaredEuclidean(w, x, 12, e2), epsilon);\n\n  int8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};\n  EXPECT_NEAR(0.0302853006f, MipsSquaredEuclidean(y, z, 13, e2), epsilon);\n\n  int8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};\n  EXPECT_NEAR(0.0382360629f, MipsSquaredEuclidean(x14, y14, 14, e2), epsilon);\n\n  int8_t x15[15] = {0},\n         y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15};\n  EXPECT_NEAR(0.0488716699f, MipsSquaredEuclidean(x15, y15, 15, e2), epsilon);\n}\n\ntemplate <size_t M, size_t N>\nvoid TestSquaredEuclideanMatrixSphericalInjection(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = std::uniform_int_distribution<size_t>(2, 128)(gen) * 4;\n  size_t matrix_size = batch_size * dimension;\n  size_t query_matrix_size = query_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  std::uniform_int_distribution<int8_t> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<int8_t, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<int8_t, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = u_val * u_val / squared_l2_norm;\n  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * dimension];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n          &matrix1[j * dimension], cur_query, dimension, e2, &query_result[j]);\n    }\n  }\n  MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n      &matrix2[0], &query2[0], dimension, e2, &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_GE(1e-4, std::abs(result1[i] - result2[i]));\n  }\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<1, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<2, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<2, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {\n  TestSquaredEuclideanMatrixSphericalInjection<3, 3>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<4, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<4, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<4, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<8, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<16, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {\n  TestSquaredEuclideanMatrixSphericalInjection<32, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {\n  TestSquaredEuclideanMatrixSphericalInjection<64, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 1>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 2>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 4>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 8>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 16>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 32>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 64>();\n}\n\nTEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {\n  TestSquaredEuclideanMatrixSphericalInjection<128, 128>();\n}\n\ntemplate <size_t M, size_t N, size_t B, size_t D>\nvoid MipsSphericalInjectionBenchMark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t query_size = N;\n  const size_t matrix_size = block_size * batch_size * dimension;\n  const size_t query_matrix_size = dimension * query_size;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int8_t> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  for (size_t i = 0; i < query_matrix_size; ++i) {\n    query1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),\n                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,\n                    batch_size);\n  }\n  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),\n                  dimension / 4, query_size);\n\n  float squared_l2_norm = 0.0f;\n  for (size_t i = 0; i < matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<int8_t, 1>::Compute(&matrix1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  for (size_t i = 0; i < query_matrix_size; i += dimension) {\n    float score;\n    SquaredNorm2Matrix<int8_t, 1>::Compute(&query1[i], dimension, &score);\n    squared_l2_norm = std::max(squared_l2_norm, score);\n  }\n  const float e2 = 0.98f / squared_l2_norm;\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size * query_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << query_size << \" * \" << block_size\n            << std::endl;\n\n  // 1 Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(\n          matrix_batch, current_query, dimension, e2, current_results);\n    }\n  }\n  std::cout << \"* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // N Batched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n\n    MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(\n        matrix_batch, &query2[0], dimension, e2, results.data());\n  }\n  std::cout << \"* N Batched MipsSquaredErclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n\n  // Unbatched Euclidean\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n\n    for (size_t j = 0; j < query_size; ++j) {\n      const int8_t *current_query = &query1[j * dimension];\n      float *current_results = &results[j * batch_size];\n\n      for (size_t k = 0; k < batch_size; ++k) {\n        MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(\n            &matrix_batch[k * dimension], current_query, dimension, e2,\n            &current_results[k]);\n      }\n    }\n  }\n  std::cout << \"* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \\t\"\n            << elapsed_time.micro_seconds() << std::endl;\n}\n\nTEST(DistanceMatrix,\n     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {\n  MipsSphericalInjectionBenchMark<2, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<2, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<4, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<4, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<4, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<8, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<16, 16, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 16, 512, 64>();\n  MipsSphericalInjectionBenchMark<32, 32, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 2, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 4, 512, 64>();\n  MipsSphericalInjectionBenchMark<64, 8, 512, 64>();\n  MipsSphericalInjectionBenchMark<128, 1, 512, 64>();\n  MipsSphericalInjectionBenchMark<1, 1, 1024, 256>();\n}\n"
  },
  {
    "path": "tests/ailego/math/norm_matrix_fp16_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/norm_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float Norm1(const std::vector<Float16> &vec) {\n  float out = 0.0f;\n  Norm1Matrix<Float16, 1>::Compute(vec.data(), vec.size(), &out);\n  return out;\n}\n\nstatic float Norm2(const std::vector<Float16> &vec) {\n  float out = 0.0f;\n  Norm2Matrix<Float16, 1>::Compute(vec.data(), vec.size(), &out);\n  return out;\n}\n\nTEST(NormMatrix, Norm1_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(0.0, 0.5);\n\n  for (size_t d = 1; d < 100; ++d) {\n    std::vector<Float16> vec;\n    float result = 0.0f;\n    for (size_t i = 0; i < d; ++i) {\n      Float16 val = dist(gen);\n      result += Float16::Absolute(val);\n      vec.push_back(val);\n    }\n    // EXPECT_FLOAT_EQ(result, Norm1(vec));\n    EXPECT_GT(0.005, std::abs((Norm1(vec) - result) / result));\n  }\n}\n\nTEST(NormMatrix, Norm2_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t d = 1; d < 100; ++d) {\n    std::vector<Float16> vec;\n    float result = 0.0f;\n    for (size_t i = 0; i < d; ++i) {\n      Float16 val = dist(gen);\n      result += val * val;\n      vec.push_back(val);\n    }\n    result = std::sqrt(result);\n    // EXPECT_FLOAT_EQ(result, Norm2(vec));\n    EXPECT_GT(0.005, std::abs((Norm2(vec) - result) / result));\n  }\n}\n\ntemplate <size_t M>\nvoid TestNorm1Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    Norm1Matrix<Float16, 1>::Compute(&matrix1[j * dimension], dimension,\n                                     &result1[j]);\n  }\n  Norm1Matrix<Float16, batch_size>::Compute(&matrix2[0], dimension,\n                                            &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    // EXPECT_FLOAT_EQ(result1[i], result2[i]);\n    EXPECT_GT(0.005, std::abs((result1[i] - result2[i]) / result1[i]));\n  }\n}\n\ntemplate <size_t M>\nvoid TestNorm2Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 0.5);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    Norm2Matrix<Float16, 1>::Compute(&matrix1[j * dimension], dimension,\n                                     &result1[j]);\n  }\n  Norm2Matrix<Float16, batch_size>::Compute(&matrix2[0], dimension,\n                                            &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    // EXPECT_FLOAT_EQ(result1[i], result2[i]);\n    EXPECT_GT(0.005, std::abs((result1[i] - result2[i]) / result1[i]));\n  }\n}\n\ntemplate <size_t M>\nvoid TestSquaredNorm2Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    SquaredNorm2Matrix<Float16, 1>::Compute(&matrix1[j * dimension], dimension,\n                                            &result1[j]);\n  }\n  SquaredNorm2Matrix<Float16, batch_size>::Compute(&matrix2[0], dimension,\n                                                   &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    EXPECT_GT(0.005, std::abs((result1[i] - result2[i]) / result1[i]));\n  }\n}\n\nTEST(NormMatrix, Norm1_Matrix) {\n  TestNorm1Matrix<1>();\n  TestNorm1Matrix<3>();\n  TestNorm1Matrix<4>();\n  TestNorm1Matrix<8>();\n  TestNorm1Matrix<10>();\n  TestNorm1Matrix<12>();\n  TestNorm1Matrix<16>();\n  TestNorm1Matrix<29>();\n  TestNorm1Matrix<32>();\n  TestNorm1Matrix<38>();\n  TestNorm1Matrix<40>();\n  TestNorm1Matrix<51>();\n  TestNorm1Matrix<64>();\n  TestNorm1Matrix<65>();\n}\n\nTEST(NormMatrix, Norm2_Matrix) {\n  TestNorm2Matrix<1>();\n  TestNorm2Matrix<3>();\n  TestNorm2Matrix<4>();\n  TestNorm2Matrix<8>();\n  TestNorm2Matrix<10>();\n  TestNorm2Matrix<12>();\n  TestNorm2Matrix<16>();\n  TestNorm2Matrix<29>();\n  TestNorm2Matrix<32>();\n  TestNorm2Matrix<38>();\n  TestNorm2Matrix<40>();\n  TestNorm2Matrix<51>();\n  TestNorm2Matrix<64>();\n  TestNorm2Matrix<65>();\n}\n\nTEST(NormMatrix, SquaredNorm2_Matrix) {\n  TestSquaredNorm2Matrix<1>();\n  TestSquaredNorm2Matrix<3>();\n  TestSquaredNorm2Matrix<4>();\n  TestSquaredNorm2Matrix<8>();\n  TestSquaredNorm2Matrix<10>();\n  TestSquaredNorm2Matrix<12>();\n  TestSquaredNorm2Matrix<16>();\n  TestSquaredNorm2Matrix<29>();\n  TestSquaredNorm2Matrix<32>();\n  TestSquaredNorm2Matrix<38>();\n  TestSquaredNorm2Matrix<40>();\n  TestSquaredNorm2Matrix<51>();\n  TestSquaredNorm2Matrix<64>();\n  TestSquaredNorm2Matrix<65>();\n}\n\ntemplate <size_t M, size_t B, size_t D>\nvoid Norm1Benchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t matrix_size = block_size * batch_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << block_size << std::endl;\n\n  // 1 Batched Norm1\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n    Norm1Matrix<Float16, batch_size>::Compute(matrix_batch, dimension,\n                                              &results[0]);\n  }\n  std::cout << \"* Batched Norm1 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Norm1\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n    for (size_t k = 0; k < batch_size; ++k) {\n      Norm1Matrix<Float16, 1>::Compute(&matrix_batch[k * dimension], dimension,\n                                       &results[k]);\n    }\n  }\n  std::cout << \"* Unbatched Norm1 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\ntemplate <size_t M, size_t B, size_t D>\nvoid Norm2Benchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t matrix_size = block_size * batch_size * dimension;\n\n  std::vector<Float16> matrix1(matrix_size);\n  std::vector<Float16> matrix2(matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP16 \" << dimension << \"d, \"\n            << batch_size << \" * \" << block_size << std::endl;\n\n  // 1 Batched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];\n    Norm2Matrix<Float16, batch_size>::Compute(matrix_batch, dimension,\n                                              &results[0]);\n  }\n  std::cout << \"* Batched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];\n    for (size_t k = 0; k < batch_size; ++k) {\n      Norm2Matrix<Float16, 1>::Compute(&matrix_batch[k * dimension], dimension,\n                                       &results[k]);\n    }\n  }\n  std::cout << \"* Unbatched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(NormMatrix, DISABLED_Norm1_Benchmark) {\n  Norm1Benchmark<2, 512, 128>();\n  Norm1Benchmark<4, 512, 128>();\n  Norm1Benchmark<8, 512, 128>();\n  Norm1Benchmark<16, 512, 128>();\n  Norm1Benchmark<32, 512, 128>();\n  Norm1Benchmark<64, 512, 128>();\n}\n\nTEST(NormMatrix, DISABLED_Norm2_Benchmark) {\n  Norm2Benchmark<2, 512, 128>();\n  Norm2Benchmark<4, 512, 128>();\n  Norm2Benchmark<8, 512, 128>();\n  Norm2Benchmark<16, 512, 128>();\n  Norm2Benchmark<32, 512, 128>();\n  Norm2Benchmark<64, 512, 128>();\n}\n"
  },
  {
    "path": "tests/ailego/math/norm_matrix_fp32_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/norm_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(float *dst, const float *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float Norm1(const std::vector<float> &vec) {\n  float out = 0.0f;\n  Norm1Matrix<float, 1>::Compute(vec.data(), vec.size(), &out);\n  return out;\n}\n\nstatic float Norm2(const std::vector<float> &vec) {\n  float out = 0.0f;\n  Norm2Matrix<float, 1>::Compute(vec.data(), vec.size(), &out);\n  return out;\n}\n\nTEST(NormMatrix, Norm1_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t d = 1; d < 100; ++d) {\n    std::vector<float> vec;\n    double result = 0.0f;\n    for (size_t i = 0; i < d; ++i) {\n      float val = dist(gen);\n      result += std::abs(val);\n      vec.push_back(val);\n    }\n    EXPECT_FLOAT_EQ(Norm1(vec), (float)result);\n    // EXPECT_GE(0.00002, std::abs(Norm1(vec) - result));\n  }\n}\n\nTEST(NormMatrix, Norm2_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t d = 1; d < 100; ++d) {\n    std::vector<float> vec;\n    double result = 0.0f;\n    for (size_t i = 0; i < d; ++i) {\n      float val = dist(gen);\n      result += val * val;\n      vec.push_back(val);\n    }\n    EXPECT_FLOAT_EQ(Norm2(vec), (float)std::sqrt(result));\n    // EXPECT_GE(0.00002, std::abs(Norm2(vec) - std::sqrt(result)));\n  }\n}\n\ntemplate <size_t M>\nvoid TestNorm1Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 0.5);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    Norm1Matrix<float, 1>::Compute(&matrix1[j * dimension], dimension,\n                                   &result1[j]);\n  }\n  Norm1Matrix<float, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    EXPECT_GE(0.00002, std::abs((double)result1[i] - (double)result2[i]));\n  }\n}\n\ntemplate <size_t M>\nvoid TestNorm2Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 0.5);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    Norm2Matrix<float, 1>::Compute(&matrix1[j * dimension], dimension,\n                                   &result1[j]);\n  }\n  Norm2Matrix<float, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    EXPECT_GE(0.00002, std::abs((double)result1[i] - (double)result2[i]));\n  }\n}\n\ntemplate <size_t M>\nvoid TestSquaredNorm2Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_real_distribution<float> dist(0.0, 0.5);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    SquaredNorm2Matrix<float, 1>::Compute(&matrix1[j * dimension], dimension,\n                                          &result1[j]);\n  }\n  SquaredNorm2Matrix<float, batch_size>::Compute(&matrix2[0], dimension,\n                                                 &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    EXPECT_GE(0.00002, std::abs((double)result1[i] - (double)result2[i]));\n  }\n}\n\nTEST(NormMatrix, Norm1_Matrix) {\n  TestNorm1Matrix<1>();\n  TestNorm1Matrix<3>();\n  TestNorm1Matrix<4>();\n  TestNorm1Matrix<8>();\n  TestNorm1Matrix<10>();\n  TestNorm1Matrix<12>();\n  TestNorm1Matrix<16>();\n  TestNorm1Matrix<29>();\n  TestNorm1Matrix<32>();\n  TestNorm1Matrix<38>();\n  TestNorm1Matrix<40>();\n  TestNorm1Matrix<51>();\n  TestNorm1Matrix<64>();\n  TestNorm1Matrix<65>();\n}\n\nTEST(NormMatrix, Norm2_Matrix) {\n  TestNorm2Matrix<1>();\n  TestNorm2Matrix<3>();\n  TestNorm2Matrix<4>();\n  TestNorm2Matrix<8>();\n  TestNorm2Matrix<10>();\n  TestNorm2Matrix<12>();\n  TestNorm2Matrix<16>();\n  TestNorm2Matrix<29>();\n  TestNorm2Matrix<32>();\n  TestNorm2Matrix<38>();\n  TestNorm2Matrix<40>();\n  TestNorm2Matrix<51>();\n  TestNorm2Matrix<64>();\n  TestNorm2Matrix<65>();\n}\n\nTEST(NormMatrix, SquaredNorm2_Matrix) {\n  TestSquaredNorm2Matrix<1>();\n  TestSquaredNorm2Matrix<3>();\n  TestSquaredNorm2Matrix<4>();\n  TestSquaredNorm2Matrix<8>();\n  TestSquaredNorm2Matrix<10>();\n  TestSquaredNorm2Matrix<12>();\n  TestSquaredNorm2Matrix<16>();\n  TestSquaredNorm2Matrix<29>();\n  TestSquaredNorm2Matrix<32>();\n  TestSquaredNorm2Matrix<38>();\n  TestSquaredNorm2Matrix<40>();\n  TestSquaredNorm2Matrix<51>();\n  TestSquaredNorm2Matrix<64>();\n  TestSquaredNorm2Matrix<65>();\n}\n\ntemplate <size_t M, size_t B, size_t D>\nvoid Norm1Benchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t matrix_size = block_size * batch_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << block_size << std::endl;\n\n  // 1 Batched Norm1\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n    Norm1Matrix<float, batch_size>::Compute(matrix_batch, dimension,\n                                            &results[0]);\n  }\n  std::cout << \"* Batched Norm1 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Norm1\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n    for (size_t k = 0; k < batch_size; ++k) {\n      Norm1Matrix<float, 1>::Compute(&matrix_batch[k * dimension], dimension,\n                                     &results[k]);\n    }\n  }\n  std::cout << \"* Unbatched Norm1 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\ntemplate <size_t M, size_t B, size_t D>\nvoid Norm2Benchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t matrix_size = block_size * batch_size * dimension;\n\n  std::vector<float> matrix1(matrix_size);\n  std::vector<float> matrix2(matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,\n                    batch_size);\n  }\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") FP32 \" << dimension << \"d, \"\n            << batch_size << \" * \" << block_size << std::endl;\n\n  // 1 Batched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix2[i * batch_size * dimension];\n    Norm2Matrix<float, batch_size>::Compute(matrix_batch, dimension,\n                                            &results[0]);\n  }\n  std::cout << \"* Batched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const float *matrix_batch = &matrix1[i * batch_size * dimension];\n    for (size_t k = 0; k < batch_size; ++k) {\n      Norm2Matrix<float, 1>::Compute(&matrix_batch[k * dimension], dimension,\n                                     &results[k]);\n    }\n  }\n  std::cout << \"* Unbatched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(NormMatrix, DISABLED_Norm1_Benchmark) {\n  Norm1Benchmark<2, 512, 128>();\n  Norm1Benchmark<4, 512, 128>();\n  Norm1Benchmark<8, 512, 128>();\n  Norm1Benchmark<16, 512, 128>();\n  Norm1Benchmark<32, 512, 128>();\n  Norm1Benchmark<64, 512, 128>();\n}\n\nTEST(NormMatrix, DISABLED_Norm2_Benchmark) {\n  Norm2Benchmark<2, 512, 128>();\n  Norm2Benchmark<4, 512, 128>();\n  Norm2Benchmark<8, 512, 128>();\n  Norm2Benchmark<16, 512, 128>();\n  Norm2Benchmark<32, 512, 128>();\n  Norm2Benchmark<64, 512, 128>();\n}\n"
  },
  {
    "path": "tests/ailego/math/norm_matrix_int4_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/norm_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float Norm2(const std::vector<uint8_t> &vec) {\n  float out = 0.0f;\n  Norm2Matrix<uint8_t, 1>::Compute(vec.data(), vec.size() * 2, &out);\n  return out;\n}\n\nstatic float SquaredNorm2(const std::vector<uint8_t> &vec) {\n  float out = 0.0f;\n  SquaredNorm2Matrix<uint8_t, 1>::Compute(vec.data(), vec.size() * 2, &out);\n  return out;\n}\n\nTEST(NormMatrix, Norm2_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-8, 7);\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 1;\n\n  for (size_t d = 2; d < dimension; d += 2) {\n    std::vector<uint8_t> vec;\n    float result = 0.0f;\n    for (size_t i = 0; i < d; i += 2) {\n      int8_t v1 = (int8_t)dist(gen);\n      int8_t v2 = (int8_t)dist(gen);\n      result += v1 * v1;\n      result += v2 * v2;\n      uint8_t v =\n          ((static_cast<uint8_t>(v2) << 4) | (static_cast<uint8_t>(v1) & 0xF));\n      vec.push_back(v);\n    }\n    EXPECT_FLOAT_EQ(result, SquaredNorm2(vec));\n    EXPECT_FLOAT_EQ(std::sqrt(result), Norm2(vec));\n  }\n}\n\ntemplate <size_t M>\nvoid TestNorm2Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 3;\n  size_t matrix_size = batch_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_int_distribution<int> dist(-8, 7);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    int8_t v1 = (int8_t)dist(gen);\n    int8_t v2 = (int8_t)dist(gen);\n    uint8_t v =\n        ((static_cast<uint8_t>(v2) << 4) | (static_cast<uint8_t>(v1) & 0xF));\n    matrix1[i] = v;\n  }\n  MatrixTranspose((uint32_t *)&matrix2[0], (const uint32_t *)matrix1.data(),\n                  dimension / 8, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    Norm2Matrix<uint8_t, 1>::Compute(&matrix1[j * dimension / 2], dimension,\n                                     &result1[j]);\n  }\n  Norm2Matrix<uint8_t, batch_size>::Compute(&matrix2[0], dimension,\n                                            &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(NormMatrix, Norm2_Matrix) {\n  TestNorm2Matrix<2>();\n  TestNorm2Matrix<3>();\n  TestNorm2Matrix<4>();\n  TestNorm2Matrix<8>();\n  TestNorm2Matrix<10>();\n  TestNorm2Matrix<12>();\n  TestNorm2Matrix<16>();\n  TestNorm2Matrix<29>();\n  TestNorm2Matrix<32>();\n  TestNorm2Matrix<38>();\n  TestNorm2Matrix<40>();\n  TestNorm2Matrix<51>();\n  TestNorm2Matrix<64>();\n  TestNorm2Matrix<65>();\n}\n\ntemplate <size_t M, size_t B, size_t D>\nvoid Norm2Benchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t matrix_size = block_size * batch_size * dimension / 2;\n\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-8, 7);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    int8_t v1 = (int8_t)dist(gen);\n    int8_t v2 = (int8_t)dist(gen);\n    uint8_t v =\n        ((static_cast<uint8_t>(v2) << 4) | (static_cast<uint8_t>(v1) & 0xF));\n    matrix1[i] = v;\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension / 2;\n    MatrixTranspose((uint32_t *)&matrix2[start_pos],\n                    (const uint32_t *)&matrix1[start_pos], dimension / 8,\n                    batch_size);\n  }\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT4 \" << dimension << \"d, \"\n            << batch_size << \" * \" << block_size << std::endl;\n\n  // 1 Batched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];\n    Norm2Matrix<uint8_t, batch_size>::Compute(matrix_batch, dimension,\n                                              &results[0]);\n  }\n  std::cout << \"* Batched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];\n    for (size_t k = 0; k < batch_size; ++k) {\n      Norm2Matrix<uint8_t, 1>::Compute(&matrix_batch[k * dimension / 2],\n                                       dimension, &results[k]);\n    }\n  }\n  std::cout << \"* Unbatched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(NormMatrix, DISABLED_Norm2_Benchmark) {\n  Norm2Benchmark<2, 512, 128>();\n  Norm2Benchmark<4, 512, 128>();\n  Norm2Benchmark<8, 512, 128>();\n  Norm2Benchmark<16, 512, 128>();\n  Norm2Benchmark<32, 512, 128>();\n  Norm2Benchmark<64, 512, 128>();\n}"
  },
  {
    "path": "tests/ailego/math/norm_matrix_int8_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/norm_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nstatic inline const char *IntelIntrinsics(void) {\n  return internal::CpuFeatures::Intrinsics();\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\nstatic float Norm1(const std::vector<int8_t> &vec) {\n  float out = 0.0f;\n  Norm1Matrix<int8_t, 1>::Compute(vec.data(), vec.size(), &out);\n  return out;\n}\n\nstatic float Norm2(const std::vector<int8_t> &vec) {\n  float out = 0.0f;\n  Norm2Matrix<int8_t, 1>::Compute(vec.data(), vec.size(), &out);\n  return out;\n}\n\nTEST(NormMatrix, Norm1_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n\n  for (size_t d = 1; d < 100; ++d) {\n    std::vector<int8_t> vec;\n    float result = 0.0f;\n    for (size_t i = 0; i < d; ++i) {\n      int8_t val = (int8_t)dist(gen);\n      result += std::abs(val);\n      vec.push_back(val);\n    }\n    EXPECT_FLOAT_EQ(result, Norm1(vec));\n  }\n}\n\nTEST(NormMatrix, Norm2_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n\n  for (size_t d = 1; d < 100; ++d) {\n    std::vector<int8_t> vec;\n    float result = 0.0f;\n    for (size_t i = 0; i < d; ++i) {\n      int8_t val = (int8_t)dist(gen);\n      result += val * val;\n      vec.push_back(val);\n    }\n    EXPECT_FLOAT_EQ(std::sqrt(result), Norm2(vec));\n  }\n}\n\ntemplate <size_t M>\nvoid TestNorm1Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 2;\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  MatrixTranspose((uint32_t *)&matrix2[0], (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    Norm1Matrix<int8_t, 1>::Compute(&matrix1[j * dimension], dimension,\n                                    &result1[j]);\n  }\n  Norm1Matrix<int8_t, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\ntemplate <size_t M>\nvoid TestNorm2Matrix(void) {\n  std::mt19937 gen((std::random_device())());\n\n  const size_t batch_size = M;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 2;\n  size_t matrix_size = batch_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  std::vector<float> result1(batch_size);\n  std::vector<float> result2(batch_size);\n\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n  MatrixTranspose((uint32_t *)&matrix2[0], (const uint32_t *)matrix1.data(),\n                  dimension / 4, batch_size);\n\n  for (size_t j = 0; j < batch_size; ++j) {\n    Norm2Matrix<int8_t, 1>::Compute(&matrix1[j * dimension], dimension,\n                                    &result1[j]);\n  }\n  Norm2Matrix<int8_t, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    EXPECT_FLOAT_EQ(result1[i], result2[i]);\n  }\n}\n\nTEST(NormMatrix, Norm1_Matrix) {\n  TestNorm1Matrix<1>();\n  TestNorm1Matrix<3>();\n  TestNorm1Matrix<4>();\n  TestNorm1Matrix<8>();\n  TestNorm1Matrix<10>();\n  TestNorm1Matrix<12>();\n  TestNorm1Matrix<16>();\n  TestNorm1Matrix<29>();\n  TestNorm1Matrix<32>();\n  TestNorm1Matrix<38>();\n  TestNorm1Matrix<40>();\n  TestNorm1Matrix<51>();\n  TestNorm1Matrix<64>();\n  TestNorm1Matrix<65>();\n}\n\nTEST(NormMatrix, Norm2_Matrix) {\n  TestNorm2Matrix<1>();\n  TestNorm2Matrix<3>();\n  TestNorm2Matrix<4>();\n  TestNorm2Matrix<8>();\n  TestNorm2Matrix<10>();\n  TestNorm2Matrix<12>();\n  TestNorm2Matrix<16>();\n  TestNorm2Matrix<29>();\n  TestNorm2Matrix<32>();\n  TestNorm2Matrix<38>();\n  TestNorm2Matrix<40>();\n  TestNorm2Matrix<51>();\n  TestNorm2Matrix<64>();\n  TestNorm2Matrix<65>();\n}\n\ntemplate <size_t M, size_t B, size_t D>\nvoid Norm1Benchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t matrix_size = block_size * batch_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)&matrix2[start_pos],\n                    (const uint32_t *)&matrix1[start_pos], dimension / 4,\n                    batch_size);\n  }\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << block_size << std::endl;\n\n  // 1 Batched Norm1\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n    Norm1Matrix<int8_t, batch_size>::Compute(matrix_batch, dimension,\n                                             &results[0]);\n  }\n  std::cout << \"* Batched Norm1 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Norm1\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n    for (size_t k = 0; k < batch_size; ++k) {\n      Norm1Matrix<int8_t, 1>::Compute(&matrix_batch[k * dimension], dimension,\n                                      &results[k]);\n    }\n  }\n  std::cout << \"* Unbatched Norm1 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\ntemplate <size_t M, size_t B, size_t D>\nvoid Norm2Benchmark(void) {\n  const size_t dimension = D;\n  const size_t batch_size = M;\n  const size_t block_size = B;\n  const size_t matrix_size = block_size * batch_size * dimension;\n\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<int> dist(-127, 127);\n  for (size_t i = 0; i < matrix_size; ++i) {\n    matrix1[i] = (int8_t)dist(gen);\n  }\n\n  for (size_t i = 0; i < block_size; ++i) {\n    size_t start_pos = i * batch_size * dimension;\n    MatrixTranspose((uint32_t *)&matrix2[start_pos],\n                    (const uint32_t *)&matrix1[start_pos], dimension / 4,\n                    batch_size);\n  }\n\n  ElapsedTime elapsed_time;\n  std::vector<float> results(batch_size);\n\n  std::cout << \"# (\" << IntelIntrinsics() << \") INT8 \" << dimension << \"d, \"\n            << batch_size << \" * \" << block_size << std::endl;\n\n  // 1 Batched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];\n    Norm2Matrix<int8_t, batch_size>::Compute(matrix_batch, dimension,\n                                             results.data());\n  }\n  std::cout << \"* Batched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n\n  // Unbatched Norm2\n  elapsed_time.reset();\n  for (size_t i = 0; i < block_size; ++i) {\n    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];\n    for (size_t k = 0; k < batch_size; ++k) {\n      Norm2Matrix<int8_t, 1>::Compute(&matrix_batch[k * dimension], dimension,\n                                      &results[k]);\n    }\n  }\n  std::cout << \"* Unbatched Norm2 (us) \\t\" << elapsed_time.micro_seconds()\n            << std::endl;\n}\n\nTEST(NormMatrix, DISABLED_Norm1_Benchmark) {\n  Norm1Benchmark<2, 512, 128>();\n  Norm1Benchmark<4, 512, 128>();\n  Norm1Benchmark<8, 512, 128>();\n  Norm1Benchmark<16, 512, 128>();\n  Norm1Benchmark<32, 512, 128>();\n  Norm1Benchmark<64, 512, 128>();\n}\n\nTEST(NormMatrix, DISABLED_Norm2_Benchmark) {\n  Norm2Benchmark<2, 512, 128>();\n  Norm2Benchmark<4, 512, 128>();\n  Norm2Benchmark<8, 512, 128>();\n  Norm2Benchmark<16, 512, 128>();\n  Norm2Benchmark<32, 512, 128>();\n  Norm2Benchmark<64, 512, 128>();\n}\n"
  },
  {
    "path": "tests/ailego/math/normalizer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <ailego/container/bitmap.h>\n#include <ailego/internal/cpu_features.h>\n#include <ailego/math/normalizer.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec::ailego;\n\nTEST(Normalizer, FP32_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t i = 0; i < 100; ++i) {\n    std::vector<float> vec1;\n    std::vector<float> vec2;\n    for (size_t j = 0; j < 111; ++j) {\n      float val = dist(gen);\n      vec1.push_back(val);\n      vec2.push_back(val);\n    }\n\n    Normalizer<float>::Compute(vec1.data(), vec1.size(), 1.1f);\n    for (size_t j = 0; j < vec1.size(); ++j) {\n      EXPECT_FLOAT_EQ(vec1[j] * 1.1f, vec2[j]);\n    }\n\n    float l1 = 0.0f, l2 = 0.0f;\n    Normalizer<float>::L1(vec1.data(), vec1.size(), &l1);\n    Normalizer<float>::L2(vec2.data(), vec2.size(), &l2);\n  }\n}\n\nTEST(Normalizer, FP16_General) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (size_t i = 0; i < 100; ++i) {\n    std::vector<Float16> vec1;\n    std::vector<Float16> vec2;\n    for (size_t j = 0; j < 111; ++j) {\n      float val = dist(gen);\n      vec1.push_back(val);\n      vec2.push_back(val);\n    }\n\n    Normalizer<Float16>::Compute(vec1.data(), vec1.size(), 1.0f);\n    for (size_t j = 0; j < vec1.size(); ++j) {\n      EXPECT_FLOAT_EQ(vec1[j], vec2[j]);\n    }\n\n    float l1 = 0.0f, l2 = 0.0f;\n    Normalizer<Float16>::L1(vec1.data(), vec1.size(), &l1);\n    Normalizer<Float16>::L2(vec2.data(), vec2.size(), &l2);\n  }\n}\n\nTEST(Normalizer, FP32_Zero) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<uint32_t> dist(1, 128);\n  const uint32_t dimension = dist(gen);\n\n  std::vector<float> vec1(dimension, 0.0f);\n  std::vector<float> vec2(dimension, 0.0f);\n\n  float norm;\n  Normalizer<float>::L1(vec1.data(), vec1.size(), &norm);\n  Normalizer<float>::L2(vec2.data(), vec2.size(), &norm);\n  for (auto v : vec1) {\n    EXPECT_FALSE(std::isnan(v));\n  }\n  for (auto v : vec2) {\n    EXPECT_FALSE(std::isnan(v));\n  }\n}\n\nTEST(Normalizer, FP16_Zero) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<uint32_t> dist(1, 128);\n  const uint32_t dimension = dist(gen);\n\n  std::vector<Float16> vec1(dimension, 0.0f);\n  std::vector<Float16> vec2(dimension, 0.0f);\n\n  float norm;\n  Normalizer<Float16>::L2(vec1.data(), vec1.size(), &norm);\n  Normalizer<Float16>::L2(vec2.data(), vec2.size(), &norm);\n  for (auto v : vec1) {\n    EXPECT_FALSE(std::isnan(v));\n  }\n  for (auto v : vec2) {\n    EXPECT_FALSE(std::isnan(v));\n  }\n}\n"
  },
  {
    "path": "tests/ailego/parallel/lock_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <mutex>\n#include <ailego/parallel/lock.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n\nusing namespace zvec;\n\nTEST(SpinMutex, General) {\n  ailego::SpinMutex mutex;\n  { std::unique_lock<ailego::SpinMutex> signal_lock1(mutex); }\n  { std::lock_guard<ailego::SpinMutex> signal_lock2(mutex); }\n\n  ailego::SpinMutex mutex2;\n  int result = std::try_lock(mutex, mutex2);\n  if (result == -1) {\n    mutex.unlock();\n    mutex2.unlock();\n  }\n}\n\nTEST(WriteLock, General) {\n  ailego::SharedMutex mutex;\n  ailego::WriteLock wrlock(mutex);\n  { std::unique_lock<ailego::WriteLock> signal_lock1(wrlock); }\n  { std::lock_guard<ailego::WriteLock> signal_lock2(wrlock); }\n}\n\nTEST(ReadLock, General) {\n  ailego::SharedMutex mutex;\n  ailego::ReadLock rdlock(mutex);\n  { std::unique_lock<ailego::ReadLock> signal_lock1(rdlock); }\n  { std::lock_guard<ailego::ReadLock> signal_lock2(rdlock); }\n}\n\nTEST(Mutex, General) {\n  ailego::ThreadPool pool;\n  std::mutex mutex;\n\n  int count = 0;\n  for (int i = 0; i < 2000; ++i) {\n    pool.execute([&]() {\n      std::lock_guard<std::mutex> lock(mutex);\n      ++count;\n    });\n  }\n  pool.wait_finish();\n  EXPECT_EQ(2000, count);\n}\n\nclass NoLockTest {\n public:\n  virtual void open() {\n    no_lock_opened_ = true;\n  }\n  virtual int read(volatile int *count) {\n    if (!no_lock_opened_) {\n      return -1;\n    }\n    (*count)++;\n    std::this_thread::sleep_for(std::chrono::milliseconds(10));\n    return 0;\n  }\n  virtual int write(volatile int *count) {\n    if (!no_lock_opened_) {\n      return -1;\n    }\n    (*count)++;\n    std::this_thread::sleep_for(std::chrono::milliseconds(10));\n    return 0;\n  }\n  virtual void close() {\n    no_lock_opened_ = false;\n  }\n\n private:\n  volatile bool no_lock_opened_{false};\n};\n\nclass AtomicLockTest : public NoLockTest {\n public:\n  void open() override {\n    opened_.store(true);\n    NoLockTest::open();\n  }\n  int read(volatile int *count) override {\n    AILEGO_SAFE_ACCESS(-1);\n    return NoLockTest::read(count);\n  }\n  int write(volatile int *count) override {\n    AILEGO_SAFE_ACCESS(-1);\n    return NoLockTest::write(count);\n  }\n  void close() override {\n    AILEGO_SAFE_CLOSE;\n    NoLockTest::close();\n  }\n\n private:\n  mutable std::atomic<int> counter_{0};\n  std::atomic<bool> opened_{false};\n};\n\nvoid test_lock(NoLockTest &test_obj) {\n  ailego::ThreadPool pool;\n  test_obj.open();\n\n  auto start = std::chrono::system_clock::now();\n\n  const int kTestCount = 10000;\n  volatile int count = 0;\n  for (int i = 0; i < kTestCount; ++i) {\n    pool.execute([&]() {\n      test_obj.read(&count);\n      test_obj.write(&count);\n    });\n  }\n  test_obj.close();\n  pool.wait_finish();\n\n  auto stop = std::chrono::system_clock::now();\n  std::chrono::nanoseconds time_used = stop - start;\n  std::cout << \"use: \" << time_used.count() / 1000 << \" us\" << std::endl;\n\n  std::cout << \"count: \" << count << std::endl;\n  EXPECT_LE(count, kTestCount * 2);\n}\n\nTEST(CloseLock, Perf) {\n  std::cout << \"NoLockTest\" << std::endl;\n  NoLockTest test_obj;\n  test_lock(test_obj);\n\n  std::cout << \"AtomicLockTest\" << std::endl;\n  AtomicLockTest test_obj3;\n  test_lock(test_obj3);\n}\n"
  },
  {
    "path": "tests/ailego/parallel/multi_thread_list_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <memory>\n#include <zvec/ailego/parallel/thread_pool.h>\n\n#define private public\n#include <ailego/parallel/multi_thread_list.h>\n#undef private\n\n#include <gtest/gtest.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\nusing namespace std;\n\nstruct Item {\n  uint32_t a_;\n  std::string b_;\n  Item() {};\n  Item(uint32_t a, std::string b) : a_(a), b_(b) {}\n};\n\nMultiThreadList<Item> mt_queue(100);\n\nvoid producer(uint32_t i) {\n  Item item{i, std::to_string(i)};\n  mt_queue.produce(item);\n  return;\n}\nvoid consumer(uint32_t i, uint32_t *result) {\n  Item item;\n  while (mt_queue.consume(&item)) {\n    *result += item.a_;\n  }\n}\nvoid producer_done(uint32_t i) {\n  Item item{i, std::to_string(i)};\n  EXPECT_EQ(false, mt_queue.produce(item));\n  return;\n}\n\nTEST(MultiThreadListTest, General) {\n  int times = 100;\n  while (times--) {\n    cout << \"================================\" << endl;\n    cout << \"times: \" << times << endl;\n\n    mt_queue.reset();\n\n    ailego::ThreadPool producer_pool;\n    ailego::ThreadPool consumer_pool;\n    ailego::ThreadPool producer_done_pool;\n\n    uint32_t num_of_consumer = 100;\n    uint32_t num_of_producer = 100;\n    uint32_t num_of_producer_done = 100;\n\n    std::vector<uint32_t> consumer_results(num_of_consumer);\n    std::fill(consumer_results.begin(), consumer_results.end(), 0);\n\n    for (uint32_t i = 0; i < num_of_consumer; i++) {\n      consumer_pool.execute(consumer, i + 1, &consumer_results[i]);\n    }\n\n    for (uint32_t i = 0; i < num_of_producer; i++) {\n      producer_pool.execute(producer, i + 1);\n    }\n\n    producer_pool.wait_finish();\n    mt_queue.done();\n    consumer_pool.wait_finish();\n\n    // produce after queue done\n    for (uint32_t i = 0; i < num_of_producer_done; i++) {\n      producer_done_pool.execute(producer_done, i + 1);\n    }\n    producer_done_pool.wait_finish();\n\n    uint32_t total = 0;\n    for (uint32_t i = 0; i < num_of_consumer; i++) {\n      cout << consumer_results[i] << \" \";\n      total += consumer_results[i];\n    }\n    cout << endl;\n\n    EXPECT_EQ(total, 5050);\n  }\n}\n\nTEST(MultiThreadListTest, FullQueueQuit) {\n  mt_queue.reset();\n\n  ailego::ThreadPool producer_pool;\n\n  uint32_t num_of_producer = 1000;\n\n  for (uint32_t i = 1; i <= num_of_producer; i++) {\n    producer_pool.execute(producer, i);\n  }\n\n  mt_queue.done();\n  producer_pool.wait_finish();\n}\n\nTEST(MultiThreadListTest, ConsumeStopResume) {\n  mt_queue.reset();\n\n  ailego::ThreadPool producer_pool;\n  ailego::ThreadPool consumer_pool;\n\n  uint32_t num_of_consumer = 100;\n  uint32_t num_of_producer = 100;\n\n  std::vector<uint32_t> consumer_results(2 * num_of_consumer);\n  std::fill(consumer_results.begin(), consumer_results.end(), 0);\n\n  for (uint32_t i = 0; i < num_of_consumer; i++) {\n    consumer_pool.execute(consumer, i + 1, &consumer_results[i]);\n  }\n\n  for (uint32_t i = 0; i < num_of_producer; i++) {\n    producer_pool.execute(producer, i + 1);\n  }\n\n  producer_pool.wait_finish();\n\n  std::this_thread::sleep_for(std::chrono::milliseconds(100));\n\n  mt_queue.stop_consume();\n  consumer_pool.wait_finish();\n\n  uint32_t total = 0;\n  for (uint32_t i = 0; i < num_of_consumer; i++) {\n    cout << consumer_results[i] << \" \";\n    total += consumer_results[i];\n  }\n  cout << endl;\n\n  cout << \"mt queue size: \" << mt_queue.list_.size() << endl;\n\n  EXPECT_EQ(total, 5050);\n\n  for (uint32_t i = num_of_producer; i < 2 * num_of_producer; i++) {\n    producer_pool.execute(producer, i + 1);\n  }\n\n  mt_queue.resume_consume();\n\n  for (uint32_t i = num_of_producer; i < 2 * num_of_consumer; i++) {\n    consumer_pool.execute(consumer, i + 1, &consumer_results[i]);\n  }\n\n  producer_pool.wait_finish();\n  mt_queue.done();\n  consumer_pool.wait_finish();\n\n  total = 0;\n  for (uint32_t i = num_of_consumer; i < 2 * num_of_consumer; i++) {\n    cout << consumer_results[i] << \" \";\n    total += consumer_results[i];\n  }\n  cout << endl;\n\n  cout << \"mt queue size: \" << mt_queue.list_.size() << endl;\n\n  EXPECT_EQ(total, 15050);\n}\n\nstruct MoveableItem {\n  uint32_t a_;\n  std::string b_;\n  MoveableItem() {};\n  MoveableItem(uint32_t a, std::string b) : a_(a), b_(b) {}\n\n  MoveableItem(const MoveableItem &) = delete;\n  MoveableItem &operator=(const MoveableItem &) = delete;\n\n  MoveableItem(MoveableItem &&) = default;\n  MoveableItem &operator=(MoveableItem &&) = default;\n};\n\nMultiThreadList<MoveableItem> mt_moveable_queue(100);\n\nvoid producer_moveable(uint32_t i) {\n  MoveableItem item{i, std::to_string(i)};\n  mt_moveable_queue.produce(std::move(item));\n  return;\n}\nvoid consumer_moveable(uint32_t i, uint32_t *result) {\n  MoveableItem item;\n  while (mt_moveable_queue.consume(&item)) {\n    *result += item.a_;\n  }\n}\nvoid producer_moveable_done(uint32_t i) {\n  MoveableItem item{i, std::to_string(i)};\n  EXPECT_EQ(false, mt_moveable_queue.produce(std::move(item)));\n  return;\n}\n\nTEST(MultiThreadListTest, General_Moveable) {\n  int times = 100;\n  while (times--) {\n    cout << \"================================\" << endl;\n    cout << \"times: \" << times << endl;\n\n    mt_moveable_queue.reset();\n\n    ailego::ThreadPool producer_pool;\n    ailego::ThreadPool consumer_pool;\n    ailego::ThreadPool producer_done_pool;\n\n    uint32_t num_of_consumer = 100;\n    uint32_t num_of_producer = 100;\n    uint32_t num_of_producer_done = 100;\n\n    std::vector<uint32_t> consumer_results(num_of_consumer);\n    std::fill(consumer_results.begin(), consumer_results.end(), 0);\n\n    for (uint32_t i = 0; i < num_of_consumer; i++) {\n      consumer_pool.execute(consumer_moveable, i + 1, &consumer_results[i]);\n    }\n\n    for (uint32_t i = 0; i < num_of_producer; i++) {\n      producer_pool.execute(producer_moveable, i + 1);\n    }\n\n    producer_pool.wait_finish();\n    mt_moveable_queue.done();\n    consumer_pool.wait_finish();\n\n    // produce after queue done\n    for (uint32_t i = 0; i < num_of_producer_done; i++) {\n      producer_done_pool.execute(producer_moveable_done, i + 1);\n    }\n    producer_done_pool.wait_finish();\n\n    uint32_t total = 0;\n    for (uint32_t i = 0; i < num_of_consumer; i++) {\n      cout << consumer_results[i] << \" \";\n      total += consumer_results[i];\n    }\n    cout << endl;\n\n    EXPECT_EQ(total, 5050);\n  }\n}\n"
  },
  {
    "path": "tests/ailego/parallel/semaphore_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <mutex>\n#include <ailego/parallel/semaphore.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(Semaphore, General) {\n  ailego::ThreadPool pool;\n  ailego::Semaphore sem_mutex(1);\n\n  std::atomic<int> count(0);\n  for (int i = 0; i < 2000; ++i) {\n    pool.execute([&]() {\n      std::lock_guard<ailego::Semaphore> lock(sem_mutex);\n      ++count;\n    });\n  }\n  pool.wait_finish();\n  EXPECT_EQ(2000, count);\n}\n\nTEST(BinarySemaphores, General) {\n  ailego::ThreadPool pool;\n  const int sem_count = 35;\n  ailego::BinarySemaphores<32> sem_mutex0(0);\n  ailego::BinarySemaphores<32> sem_mutex32(sem_count);\n  ailego::BinarySemaphores<63> sem_mutex64(sem_count);\n  ailego::BinarySemaphores<15> sem_mutex16(sem_count);\n  ailego::BinarySemaphores<7> sem_mutex8(sem_count);\n  ailego::BinarySemaphores<1> sem_mutex1(sem_count);\n\n  std::atomic<uint32_t> total{0u};\n  std::vector<uint32_t> counts(sem_count, 0u);\n  for (int i = 0; i < 2000; ++i) {\n    pool.execute([&]() {\n      int index1 = sem_mutex32.acquire();\n      ++counts[index1];\n      ++total;\n      std::this_thread::sleep_for(\n          std::chrono::microseconds(std::rand() % 100 + 1));\n      sem_mutex32.release(index1);\n    });\n  }\n  pool.wait_finish();\n\n  uint32_t sum = 0;\n  for (int i = 0; i < sem_count; ++i) {\n    sum += counts[i];\n  }\n  EXPECT_EQ(total, sum);\n}\n\nTEST(BinarySemaphores, General2) {\n  ailego::ThreadPool pool;\n  const int sem_count = 32;\n  ailego::BinarySemaphores<64> sem_mutex64(sem_count);\n  std::atomic<uint32_t> total{0u};\n  std::vector<uint32_t> counts(sem_count, 0u);\n  bool flag = true;\n  for (int i = 0; i < 64; ++i) {\n    pool.execute([&]() {\n      while (flag) {\n        int index1 = sem_mutex64.acquire();\n        ++counts[index1];\n        ++total;\n        std::this_thread::sleep_for(\n            std::chrono::microseconds(std::rand() % 100000 + 100));\n        sem_mutex64.release(index1);\n      }\n    });\n  }\n  for (int i = 0; i < sem_count; ++i) {\n    printf(\"Begin acquire %d ...\\n\", i);\n    ailego::ElapsedTime timer;\n    int index = sem_mutex64.acquire(i);\n    uint64_t cost = timer.micro_seconds();\n    sem_mutex64.release(index);\n    printf(\"Acquire %d cost %zuus\\n\", i, (size_t)cost);\n  }\n  flag = false;\n  pool.wait_finish();\n  uint32_t sum = 0;\n  for (int i = 0; i < sem_count; ++i) {\n    sum += counts[i];\n  }\n  EXPECT_EQ(total, sum);\n}\n"
  },
  {
    "path": "tests/ailego/parallel/thread_pool_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <chrono>\n#include <iostream>\n#include <memory>\n#include <gtest/gtest.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n\nusing namespace zvec::ailego;\n\nstruct A {\n  A(void) : pool(std::make_shared<ThreadPool>()) {}\n\n  int ThreadMain(int32_t &thread_index, uint32_t &num) {\n    std::stringstream buf;\n    buf << num << \" Task (\" << thread_index << \" : \" << pool->indexof_this()\n        << \") \" << pool->active_count() << ' ' << pool->pending_count()\n        << std::endl;\n\n    // std::cout << buf.str();\n    ++run_count;\n    return 0;\n  }\n  std::atomic<uint32_t> run_count{0};\n  std::shared_ptr<ThreadPool> pool;\n};\n\nstruct B {\n  B(void) : pool(std::make_shared<ThreadPool>(true)) {}\n\n  std::string ThreadMain(uint32_t &num) {\n    aaa.pool->enqueue(\n        Closure::New(&aaa, &A::ThreadMain, pool->indexof_this(), num));\n    aaa.pool->wake_any();\n    // std::this_thread::sleep_for(\n    //    std::chrono::microseconds(std::rand() % 1000 + 1));\n    ++run_count;\n    return \"\";\n  }\n  A aaa;\n  std::atomic<uint32_t> run_count{0};\n  std::shared_ptr<ThreadPool> pool;\n};\n\nTEST(ThreadPool, General) {\n  // srand((uint32_t)time(NULL));\n  // srand((uint32_t)rand());\n\n  B bbb;\n  for (uint32_t i = 0; i < 10000u; ++i) {\n    bbb.pool->execute(&bbb, &B::ThreadMain, i);\n  }\n  bbb.pool->wait_finish();\n  bbb.aaa.pool->wait_finish();\n\n  while (!bbb.aaa.pool->is_finished() || !bbb.pool->is_finished()) {\n    EXPECT_LE(0u, bbb.aaa.pool->pending_count());\n  }\n  EXPECT_EQ(bbb.aaa.pool->pending_count(), 0u);\n\n  EXPECT_EQ(10000u, bbb.run_count);\n  EXPECT_EQ(10000u, bbb.aaa.run_count);\n\n  EXPECT_FALSE(bbb.aaa.pool->is_stopped());\n  EXPECT_FALSE(bbb.pool->is_stopped());\n  EXPECT_NE(0u, bbb.aaa.pool->worker_count());\n  EXPECT_NE(0u, bbb.pool->worker_count());\n\n  bbb.aaa.pool->stop();\n  bbb.aaa.pool->wait_stop();\n  bbb.pool->stop();\n  bbb.pool->wait_stop();\n\n  EXPECT_TRUE(bbb.aaa.pool->is_stopped());\n  EXPECT_TRUE(bbb.pool->is_stopped());\n  EXPECT_EQ(0u, bbb.aaa.pool->worker_count());\n  EXPECT_EQ(0u, bbb.pool->worker_count());\n}\n\nvoid ExecuteAndWaitThread(int *count) {\n  ++(*count);\n}\n\nTEST(ThreadPool, ExecuteAndWait) {\n  ThreadPool pool;\n  int count = 0;\n  for (int i = 0; i < 100; ++i) {\n    EXPECT_EQ(i * 2, count);\n    pool.execute_and_wait(ExecuteAndWaitThread, &count);\n    EXPECT_EQ(i * 2 + 1, count);\n    count++;\n  }\n  EXPECT_EQ(200, count);\n}\n\nTEST(ThreadPool, WaitFinish) {\n  ThreadPool pool;\n\n  for (int i = 0; i < 10000; ++i) {\n    std::atomic_uint count{0};\n    for (int j = 0; j < 10; ++j) {\n      pool.execute([&count]() { ++count; });\n    }\n    pool.wait_finish();\n    EXPECT_EQ(10, count);\n  }\n}\n\nTEST(ThreadPool, TaskGroup) {\n  ThreadPool pool1, pool2;\n  std::atomic_uint count{0};\n\n  for (int i = 0; i < 12; ++i) {\n    pool1.execute(\n        [&count](ThreadPool *p) {\n          auto group = p->make_group();\n\n          EXPECT_TRUE(group->is_finished());\n          EXPECT_EQ(0u, group->pending_count());\n          EXPECT_EQ(0u, group->active_count());\n\n          for (int j = 0; j < 12; ++j) {\n            group->execute([&count]() {\n              std::this_thread::sleep_for(\n                  std::chrono::microseconds(std::rand() % 1000 + 1));\n              ++count;\n            });\n          }\n          group->wait_finish();\n        },\n        &pool2);\n  }\n  pool1.wait_finish();\n  EXPECT_EQ(12u * 12u, count);\n}\n\nTEST(ThreadPool, TaskGroup2) {\n  ThreadPool pool;\n\n  auto group = pool.make_group();\n  for (int i = 0; i < 10000; ++i) {\n    std::atomic_uint count{0};\n    for (int j = 0; j < 10; ++j) {\n      group->execute([&count]() { ++count; });\n    }\n    group->wait_finish();\n    EXPECT_EQ(10, count);\n  }\n  pool.wait_finish();\n}\n"
  },
  {
    "path": "tests/ailego/parallel/thread_queue_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <chrono>\n#include <iostream>\n#include <memory>\n#include <gtest/gtest.h>\n#include <zvec/ailego/parallel/thread_queue.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nTEST(ThreadQueue, General) {\n  ThreadQueue queue;\n\n  std::this_thread::sleep_for(\n      std::chrono::microseconds(std::rand() % 1000 + 1));\n  queue.wake();\n\n  int count = 0;\n  for (int i = 0; i < 1000; ++i) {\n    queue[0].execute([&count, i]() {\n      EXPECT_EQ(i, count);\n      ++count;\n      // std::cout << count << std::endl;\n    });\n  }\n  std::this_thread::sleep_for(std::chrono::microseconds(20000));\n  EXPECT_EQ(1000, count);\n\n  queue.stop();\n  queue.wait_stop();\n}\n\nTEST(ThreadQueue, MutliThread) {\n  ThreadQueue queue;\n\n  std::this_thread::sleep_for(\n      std::chrono::microseconds(std::rand() % 1000 + 1));\n  queue.wake();\n\n  std::atomic_uint count{0};\n  for (int i = 0; i < 10000; ++i) {\n    queue.execute(std::rand(), [&count]() {\n      ++count;\n      // std::cout << count << std::endl;\n    });\n  }\n  std::this_thread::sleep_for(std::chrono::microseconds(20000));\n\n  EXPECT_EQ(10000u, count);\n  queue.stop();\n  queue.wait_stop();\n}\n\nTEST(ThreadQueue, MultiThreadWithHighPriority) {\n  ThreadQueue queue;\n\n  std::this_thread::sleep_for(\n      std::chrono::microseconds(std::rand() % 1000 + 1));\n  queue.wake();\n\n  std::atomic_uint count{0};\n  std::atomic_uint high_priority_count{0};\n\n  ailego::ElapsedTime timer;\n  uint64_t task_time;\n  uint64_t high_priority_task_time;\n\n  // Enqueue normal tasks\n  for (int i = 0; i < 1000; ++i) {\n    queue.execute(std::rand(), [&count, &timer, &task_time]() {\n      ++count;\n      std::this_thread::sleep_for(std::chrono::microseconds(100));\n      if (count == 1000) {\n        task_time = timer.milli_seconds();\n      }\n    });\n  }\n\n  // Enqueue high-priority tasks\n  for (int i = 0; i < 1000; ++i) {\n    queue.execute_high_priority(std::rand(), [&high_priority_count, &timer,\n                                              &high_priority_task_time]() {\n      ++high_priority_count;\n      std::this_thread::sleep_for(std::chrono::microseconds(500));\n      if (high_priority_count == 1000) {\n        high_priority_task_time = timer.milli_seconds();\n      }\n    });\n  }\n\n  // Wait for all tasks to complete\n  std::this_thread::sleep_for(std::chrono::seconds(3));\n\n  EXPECT_EQ(count, 1000);\n  EXPECT_EQ(high_priority_count, 1000);\n\n  // Verify that all high-priority tasks are completed first\n  EXPECT_GT(task_time, high_priority_task_time);\n  std::cout << \"task time: \" << task_time\n            << \", high priority task time: \" << high_priority_task_time\n            << std::endl;\n\n  queue.stop();\n  queue.wait_stop();\n}\n"
  },
  {
    "path": "tests/ailego/pattern/closure_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <functional>\n#include <iostream>\n#include <gtest/gtest.h>\n#include <zvec/ailego/pattern/closure.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nvoid GlobalProcess0(void) {}\nvoid GlobalProcess1(int) {}\n\nvoid GlobalProcess2(int a1, int *a2) {\n  EXPECT_EQ(a1 + 1, *a2);\n}\n\nvoid GlobalProcess3(int a1, int *a2, int &a3) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n}\n\nvoid GlobalProcess4(int a1, int *a2, int &a3, const int &a4) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n}\n\nvoid GlobalProcess5(int a1, int *a2, int &a3, const int &a4, volatile int *a5) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n  EXPECT_EQ(a4 + 1, *a5);\n}\n\nvoid GlobalProcess6(int a1, int *a2, int &a3, const int &a4, volatile int *a5,\n                    int *const volatile a6) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n  EXPECT_EQ(a4 + 1, *a5);\n  EXPECT_EQ(*a5 + 1, *a6);\n}\n\nvoid GlobalProcess7(int a1, int *a2, int &a3, const int &a4, volatile int *a5,\n                    int *const volatile a6, int &&a7) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n  EXPECT_EQ(a4 + 1, *a5);\n  EXPECT_EQ(*a5 + 1, *a6);\n  EXPECT_EQ(*a6 + 1, a7);\n}\n\nsize_t GlobalFunction0(void) {\n  return 0;\n}\nsize_t GlobalFunction1(long) {\n  return 1;\n}\n\nsize_t GlobalFunction2(long a1, long *a2) {\n  EXPECT_EQ(a1 + 1, *a2);\n  return 2;\n}\n\nsize_t GlobalFunction3(long a1, long *a2, long &a3) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  return 3;\n}\n\nsize_t GlobalFunction4(long a1, long *a2, long &a3, const long &a4) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n  return 4;\n}\n\nsize_t GlobalFunction5(long a1, long *a2, long &a3, const long &a4,\n                       volatile long *a5) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n  EXPECT_EQ(a4 + 1, *a5);\n  return 5;\n}\n\nsize_t GlobalFunction6(long a1, long *a2, long &a3, const long &a4,\n                       volatile long *a5, long *const volatile a6) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n  EXPECT_EQ(a4 + 1, *a5);\n  EXPECT_EQ(*a5 + 1, *a6);\n  return 6;\n}\n\nsize_t GlobalFunction7(long a1, long *a2, long &a3, const long &a4,\n                       volatile long *a5, long *const volatile a6, long &&a7) {\n  EXPECT_EQ(a1 + 1, *a2);\n  EXPECT_EQ(*a2 + 1, a3);\n  EXPECT_EQ(a3 + 1, a4);\n  EXPECT_EQ(a4 + 1, *a5);\n  EXPECT_EQ(*a5 + 1, *a6);\n  EXPECT_EQ(*a6 + 1, a7);\n  return 7;\n}\n\nstruct WithFunctionCall {\n  int operator()(int a) {\n    return a + b;\n  }\n  int do_something(int a) {\n    return a + b;\n  }\n  int b = 11;\n};\n\nstruct WithoutFunctionCall {\n  int do_something(int a) {\n    return a + b;\n  }\n  int b = 11;\n};\n\nstruct ClassA {\n  static void StaticProcess0(void) {}\n  static void StaticProcess1(int) {}\n\n  static void StaticProcess2(int a1, int *a2) {\n    EXPECT_EQ(a1 + 1, *a2);\n  }\n\n  static void StaticProcess3(int a1, int *a2, int &a3) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n  }\n\n  static void StaticProcess4(int a1, int *a2, int &a3, const int &a4) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n  }\n\n  static void StaticProcess5(int a1, int *a2, int &a3, const int &a4,\n                             volatile int *a5) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n  }\n\n  static void StaticProcess6(int a1, int *a2, int &a3, const int &a4,\n                             volatile int *a5, int *const volatile a6) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n  }\n\n  static void StaticProcess7(int a1, int *a2, int &a3, const int &a4,\n                             volatile int *a5, int *const volatile a6,\n                             int &&a7) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n    EXPECT_EQ(*a6 + 1, a7);\n  }\n\n  static size_t StaticFunction0(void) {\n    return 0;\n  }\n  static size_t StaticFunction1(long) {\n    return 1;\n  }\n\n  static size_t StaticFunction2(long a1, long *a2) {\n    EXPECT_EQ(a1 + 1, *a2);\n    return 2;\n  }\n\n  static size_t StaticFunction3(long a1, long *a2, long &a3) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    return 3;\n  }\n\n  static size_t StaticFunction4(long a1, long *a2, long &a3, const long &a4) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    return 4;\n  }\n\n  static size_t StaticFunction5(long a1, long *a2, long &a3, const long &a4,\n                                volatile long *a5) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    return 5;\n  }\n\n  static size_t StaticFunction6(long a1, long *a2, long &a3, const long &a4,\n                                volatile long *a5, long *const volatile a6) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n    return 6;\n  }\n\n  static size_t StaticFunction7(long a1, long *a2, long &a3, const long &a4,\n                                volatile long *a5, long *const volatile a6,\n                                long &&a7) {\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n    EXPECT_EQ(*a6 + 1, a7);\n    return 7;\n  }\n};\n\nclass ClassB {\n public:\n  ClassB(int v) : b_(v) {}\n\n  int operator()(int a1) {\n    EXPECT_TRUE(0);\n    return a1 + b_;\n  }\n\n  virtual void MemberProcess0(void) const {}\n\n  virtual void MemberProcess1(int a1) {\n    EXPECT_EQ(a1, b_);\n  }\n\n  void MemberProcess2(int a1, int *a2) {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n  }\n\n  void MemberProcess3(int a1, int *a2, int &a3) const {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n  }\n\n  virtual void MemberProcess4(int a1, int *a2, int &a3, const int &a4) {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n  }\n\n  virtual void MemberProcess5(int a1, int *a2, int &a3, const int &a4,\n                              volatile int *a5) const {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n  }\n\n  void MemberProcess6(int a1, int *a2, int &a3, const int &a4, volatile int *a5,\n                      int *const volatile a6) {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n  }\n\n  void MemberProcess7(int a1, int *a2, int &a3, const int &a4, volatile int *a5,\n                      int *const volatile a6, int &&a7) {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n    EXPECT_EQ(*a6 + 1, a7);\n  }\n\n  size_t MemberFunction0(void) {\n    return 0;\n  }\n  size_t MemberFunction1(long a1) {\n    EXPECT_EQ(a1, b_);\n    return 1;\n  }\n\n  size_t MemberFunction2(long a1, long *a2) {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    return 2;\n  }\n\n  size_t MemberFunction3(long a1, long *a2, long &a3) volatile {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    return 3;\n  }\n\n  size_t MemberFunction4(long a1, long *a2, long &a3, const long &a4) const {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    return 4;\n  }\n\n  size_t MemberFunction5(long a1, long *a2, long &a3, const long &a4,\n                         volatile long *a5) const volatile {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    return 5;\n  }\n\n  size_t MemberFunction6(long a1, long *a2, long &a3, const long &a4,\n                         volatile long *a5, long *const volatile a6) const {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n    return 6;\n  }\n\n  size_t MemberFunction7(long a1, long *a2, long &a3, const long &a4,\n                         volatile long *a5, long *const volatile a6,\n                         long &&a7) const volatile {\n    EXPECT_EQ(a1, b_);\n    EXPECT_EQ(a1 + 1, *a2);\n    EXPECT_EQ(*a2 + 1, a3);\n    EXPECT_EQ(a3 + 1, a4);\n    EXPECT_EQ(a4 + 1, *a5);\n    EXPECT_EQ(*a5 + 1, *a6);\n    EXPECT_EQ(*a6 + 1, a7);\n    return 7;\n  }\n\n private:\n  int b_{11};\n};\n\nclass ClassAB {\n public:\n  void Run1(void) const {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::ConstFunc, &bbb);\n  }\n\n  void Run2(void) {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::ConstFunc, &bbb);\n  }\n\n  void Run3(void) {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::MutableFunc, &bbb);\n  }\n\n  void Run4(void) const {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::VolatileConstFunc, &bbb);\n  }\n\n  void Run5(void) {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::VolatileMutableFunc, &bbb);\n  }\n\n  void Run6(void) const volatile {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::VolatileConstFunc, &bbb);\n  }\n\n  void Run7(void) volatile {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::VolatileConstFunc, &bbb);\n  }\n\n  void Run8(void) volatile {\n    ClassB bbb(1);\n    ailego::Closure::New(this, &ClassAB::VolatileMutableFunc, &bbb);\n  }\n\n protected:\n  void ConstFunc(const ClassB *b) const {\n    ClassA::StaticFunction0();\n    b->MemberProcess0();\n  }\n\n  void MutableFunc(const ClassB *b) {\n    ClassA::StaticFunction0();\n    b->MemberProcess0();\n  }\n\n  void VolatileConstFunc(const ClassB *b) const volatile {\n    ClassA::StaticFunction0();\n    b->MemberProcess0();\n  }\n\n  void VolatileMutableFunc(const ClassB *b) volatile {\n    ClassA::StaticFunction0();\n    b->MemberProcess0();\n  }\n};\n\nTEST(CallbackValidator, General) {\n  EXPECT_FALSE(ailego::CallbackValidator<int>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<long *>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<const void *>::Value);\n\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction0)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction0)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction1)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction1)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction2)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction2)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction3)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction3)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction4)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction4)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction5)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction5)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction6)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction6)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction7)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction7)>::Value);\n\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess0)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess0)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess1)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess1)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess2)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess2)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess3)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess3)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess4)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess4)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess5)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess5)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess6)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess6)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess7)>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess7)>::Value);\n\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction0)>>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction1)>>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction2)>>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction3)>>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction4)>>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction5)>>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction6)>>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<\n              std::function<decltype(GlobalFunction7)>>::Value);\n\n  EXPECT_TRUE(ailego::CallbackValidator<WithFunctionCall>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<WithFunctionCall &>::Value);\n  EXPECT_TRUE(ailego::CallbackValidator<const WithFunctionCall &>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<WithFunctionCall *>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<const WithFunctionCall *>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<WithoutFunctionCall>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<WithoutFunctionCall &>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<const WithoutFunctionCall &>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<WithoutFunctionCall *>::Value);\n  EXPECT_FALSE(ailego::CallbackValidator<const WithoutFunctionCall *>::Value);\n\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction0)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction0)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction1)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction1)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction2)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction2)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction3)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction3)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction4)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction4)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction5)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction5)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction6)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction6)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticFunction7)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticFunction7)>::Value);\n\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess0)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess0)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess1)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess1)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess2)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess2)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess3)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess3)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess4)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess4)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess5)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess5)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess6)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess6)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(ClassA::StaticProcess7)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassA::StaticProcess7)>::Value);\n\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction0)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction1)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction2)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction3)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction4)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction5)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction6)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberFunction7)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess0)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess1)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess2)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess3)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess4)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess5)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess6)>::Value);\n  EXPECT_TRUE(\n      ailego::CallbackValidator<decltype(&ClassB::MemberProcess7)>::Value);\n}\n\nTEST(CallbackTraits, General) {\n  EXPECT_EQ(0, ailego::CallbackTraits<decltype(GlobalProcess0)>::Arity);\n  EXPECT_EQ(1, ailego::CallbackTraits<decltype(GlobalProcess1)>::Arity);\n  EXPECT_EQ(2, ailego::CallbackTraits<decltype(GlobalProcess2)>::Arity);\n  EXPECT_EQ(3, ailego::CallbackTraits<decltype(GlobalProcess3)>::Arity);\n  EXPECT_EQ(4, ailego::CallbackTraits<decltype(GlobalProcess4)>::Arity);\n  EXPECT_EQ(5, ailego::CallbackTraits<decltype(GlobalProcess5)>::Arity);\n  EXPECT_EQ(6, ailego::CallbackTraits<decltype(GlobalProcess6)>::Arity);\n  EXPECT_EQ(7, ailego::CallbackTraits<decltype(GlobalProcess7)>::Arity);\n\n  EXPECT_EQ(0,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess0)>::Arity);\n  EXPECT_EQ(1,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess1)>::Arity);\n  EXPECT_EQ(2,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess2)>::Arity);\n  EXPECT_EQ(3,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess3)>::Arity);\n  EXPECT_EQ(4,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess4)>::Arity);\n  EXPECT_EQ(5,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess5)>::Arity);\n  EXPECT_EQ(6,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess6)>::Arity);\n  EXPECT_EQ(7,\n            ailego::CallbackTraits<decltype(&ClassA::StaticProcess7)>::Arity);\n\n  EXPECT_EQ(0,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess0)>::Arity);\n  EXPECT_EQ(1,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess1)>::Arity);\n  EXPECT_EQ(2,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess2)>::Arity);\n  EXPECT_EQ(3,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess3)>::Arity);\n  EXPECT_EQ(4,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess4)>::Arity);\n  EXPECT_EQ(5,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess5)>::Arity);\n  EXPECT_EQ(6,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess6)>::Arity);\n  EXPECT_EQ(7,\n            ailego::CallbackTraits<decltype(&ClassB::MemberProcess7)>::Arity);\n\n  EXPECT_EQ(\n      1u, sizeof(ailego::CallbackTraits<decltype(GlobalProcess0)>::TupleType));\n  EXPECT_EQ(1u, sizeof(ailego::CallbackTraits<\n                       decltype(&ClassA::StaticProcess0)>::TupleType));\n  EXPECT_EQ(1u, sizeof(ailego::CallbackTraits<\n                       decltype(&ClassB::MemberProcess0)>::TupleType));\n}\n\nTEST(Closure, Static) {\n  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  int b[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n\n  ailego::Closure::New(GlobalFunction0)->run();\n  ailego::Closure::New(&GlobalFunction0)->run();\n  ailego::Closure::New(GlobalFunction1, a[1])->run();\n  ailego::Closure::New(&GlobalFunction1, 1)->run();\n  ailego::Closure::New(GlobalFunction2, 1, &a[2])->run();\n  ailego::Closure::New(&GlobalFunction2, a[1], &a[2])->run();\n  ailego::Closure::New(GlobalFunction3, a[1], &a[2], a[3])->run();\n  ailego::Closure::New(&GlobalFunction3, 1, &a[2], a[3])->run();\n  ailego::Closure::New(GlobalFunction4, 1, &a[2], a[3], 4)->run();\n  ailego::Closure::New(&GlobalFunction4, a[1], &a[2], a[3], a[4])->run();\n  ailego::Closure::New(GlobalFunction5, a[1], &a[2], a[3], a[4], &a[5])->run();\n  ailego::Closure::New(&GlobalFunction5, 1, &a[2], a[3], 4, &a[5])->run();\n  ailego::Closure::New(GlobalFunction6, 1, &a[2], a[3], 4, &a[5], &a[6])->run();\n  ailego::Closure::New(&GlobalFunction6, a[1], &a[2], a[3], a[4], &a[5], &a[6])\n      ->run();\n  ailego::Closure::New(GlobalFunction7, 1, &a[2], a[3], 4, &a[5], &a[6], a[7])\n      ->run();\n  ailego::Closure::New(&GlobalFunction7, a[1], &a[2], a[3], a[4], &a[5], &a[6],\n                       7)\n      ->run();\n\n  ailego::Closure::New(GlobalProcess0)->run();\n  ailego::Closure::New(&GlobalProcess0)->run();\n  ailego::Closure::New(GlobalProcess1, b[1])->run();\n  ailego::Closure::New(&GlobalProcess1, 1)->run();\n  ailego::Closure::New(GlobalProcess2, 1, &b[2])->run();\n  ailego::Closure::New(&GlobalProcess2, b[1], &b[2])->run();\n  ailego::Closure::New(GlobalProcess3, b[1], &b[2], b[3])->run();\n  ailego::Closure::New(&GlobalProcess3, 1, &b[2], b[3])->run();\n  ailego::Closure::New(GlobalProcess4, 1, &b[2], b[3], 4)->run();\n  ailego::Closure::New(&GlobalProcess4, b[1], &b[2], b[3], b[4])->run();\n  ailego::Closure::New(GlobalProcess5, b[1], &b[2], b[3], b[4], &b[5])->run();\n  ailego::Closure::New(&GlobalProcess5, 1, &b[2], b[3], 4, &b[5])->run();\n  ailego::Closure::New(GlobalProcess6, 1, &b[2], b[3], 4, &b[5], &b[6])->run();\n  ailego::Closure::New(&GlobalProcess6, b[1], &b[2], b[3], b[4], &b[5], &b[6])\n      ->run();\n  ailego::Closure::New(GlobalProcess7, 1, &b[2], b[3], 4, &b[5], &b[6], b[7])\n      ->run();\n  ailego::Closure::New(&GlobalProcess7, b[1], &b[2], b[3], b[4], &b[5], &b[6],\n                       7)\n      ->run();\n\n  ailego::Closure::New(ClassA::StaticFunction0)->run();\n  ailego::Closure::New(&ClassA::StaticFunction0)->run();\n  ailego::Closure::New(ClassA::StaticFunction1, a[1])->run();\n  ailego::Closure::New(&ClassA::StaticFunction1, 1)->run();\n  ailego::Closure::New(ClassA::StaticFunction2, 1, &a[2])->run();\n  ailego::Closure::New(&ClassA::StaticFunction2, a[1], &a[2])->run();\n  ailego::Closure::New(ClassA::StaticFunction3, a[1], &a[2], a[3])->run();\n  ailego::Closure::New(&ClassA::StaticFunction3, 1, &a[2], a[3])->run();\n  ailego::Closure::New(ClassA::StaticFunction4, 1, &a[2], a[3], 4)->run();\n  ailego::Closure::New(&ClassA::StaticFunction4, a[1], &a[2], a[3], a[4])\n      ->run();\n  ailego::Closure::New(ClassA::StaticFunction5, a[1], &a[2], a[3], a[4], &a[5])\n      ->run();\n  ailego::Closure::New(&ClassA::StaticFunction5, 1, &a[2], a[3], 4, &a[5])\n      ->run();\n  ailego::Closure::New(ClassA::StaticFunction6, 1, &a[2], a[3], 4, &a[5], &a[6])\n      ->run();\n  ailego::Closure::New(&ClassA::StaticFunction6, a[1], &a[2], a[3], a[4], &a[5],\n                       &a[6])\n      ->run();\n  ailego::Closure::New(ClassA::StaticFunction7, 1, &a[2], a[3], 4, &a[5], &a[6],\n                       a[7])\n      ->run();\n  ailego::Closure::New(&ClassA::StaticFunction7, a[1], &a[2], a[3], a[4], &a[5],\n                       &a[6], 7)\n      ->run();\n\n  ailego::Closure::New(ClassA::StaticProcess0)->run();\n  ailego::Closure::New(&ClassA::StaticProcess0)->run();\n  ailego::Closure::New(ClassA::StaticProcess1, b[1])->run();\n  ailego::Closure::New(&ClassA::StaticProcess1, 1)->run();\n  ailego::Closure::New(ClassA::StaticProcess2, 1, &b[2])->run();\n  ailego::Closure::New(&ClassA::StaticProcess2, b[1], &b[2])->run();\n  ailego::Closure::New(ClassA::StaticProcess3, b[1], &b[2], b[3])->run();\n  ailego::Closure::New(&ClassA::StaticProcess3, 1, &b[2], b[3])->run();\n  ailego::Closure::New(ClassA::StaticProcess4, 1, &b[2], b[3], 4)->run();\n  ailego::Closure::New(&ClassA::StaticProcess4, b[1], &b[2], b[3], b[4])->run();\n  ailego::Closure::New(ClassA::StaticProcess5, b[1], &b[2], b[3], b[4], &b[5])\n      ->run();\n  ailego::Closure::New(&ClassA::StaticProcess5, 1, &b[2], b[3], 4, &b[5])\n      ->run();\n  ailego::Closure::New(ClassA::StaticProcess6, 1, &b[2], b[3], 4, &b[5], &b[6])\n      ->run();\n  ailego::Closure::New(&ClassA::StaticProcess6, b[1], &b[2], b[3], b[4], &b[5],\n                       &b[6])\n      ->run();\n  ailego::Closure::New(ClassA::StaticProcess7, 1, &b[2], b[3], 4, &b[5], &b[6],\n                       b[7])\n      ->run();\n  ailego::Closure::New(&ClassA::StaticProcess7, b[1], &b[2], b[3], b[4], &b[5],\n                       &b[6], 7)\n      ->run();\n}\n\nTEST(Closure, Member) {\n  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  int b[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  ClassB bbb(1);\n\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction0)->run();\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction1, 1)->run();\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction2, a[1], &a[2])->run();\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction3, 1, &a[2], a[3])->run();\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction4, a[1], &a[2], a[3], a[4])\n      ->run();\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction5, 1, &a[2], a[3], 4, &a[5])\n      ->run();\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction6, a[1], &a[2], a[3], a[4],\n                       &a[5], &a[6])\n      ->run();\n  ailego::Closure::New((const ClassB *)(&bbb), &ClassB::MemberFunction7, a[1],\n                       &a[2], a[3], a[4], &a[5], &a[6], 7)\n      ->run();\n  ailego::Closure::New((const volatile ClassB *)(&bbb),\n                       &ClassB::MemberFunction7, a[1], &a[2], a[3], a[4], &a[5],\n                       &a[6], 7)\n      ->run();\n\n  ClassB &&bbc = std::move(bbb);\n  ailego::Closure::New(&bbc, &ClassB::MemberProcess0)->run();\n  ailego::Closure::New(&bbc, &ClassB::MemberProcess1, 1)->run();\n  ailego::Closure::New(&bbc, &ClassB::MemberProcess2, b[1], &b[2])->run();\n  ailego::Closure::New(&bbc, &ClassB::MemberProcess3, 1, &b[2], b[3])->run();\n  ailego::Closure::New(&bbc, &ClassB::MemberProcess4, b[1], &b[2], b[3], b[4])\n      ->run();\n\n  ClassB &bbd = bbb;\n  ailego::Closure::New(&bbd, &ClassB::MemberProcess5, 1, &b[2], b[3], 4, &b[5])\n      ->run();\n  ailego::Closure::New(&bbd, &ClassB::MemberProcess6, b[1], &b[2], b[3], b[4],\n                       &b[5], &b[6])\n      ->run();\n  ailego::Closure::New(&bbd, &ClassB::MemberProcess7, b[1], &b[2], b[3], b[4],\n                       &b[5], &b[6], 7)\n      ->run();\n}\n\nTEST(Closure, Function) {\n  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  int b[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  ClassB bbb(1);\n\n  std::function<decltype(GlobalFunction0)> f0 =\n      std::bind(&ClassB::MemberFunction0, &bbb);\n  ailego::Closure::New(f0)->run();\n\n  std::function<decltype(GlobalFunction1)> f1 =\n      std::bind(&ClassB::MemberFunction1, &bbb, std::placeholders::_1);\n  ailego::Closure::New(f1, 1)->run();\n\n  std::function<decltype(GlobalFunction2)> f2 =\n      std::bind(&ClassB::MemberFunction2, &bbb, std::placeholders::_1,\n                std::placeholders::_2);\n  ailego::Closure::New(f2, a[1], &a[2])->run();\n\n  std::function<decltype(GlobalFunction3)> f3 =\n      std::bind(&ClassB::MemberFunction3, &bbb, std::placeholders::_1,\n                std::placeholders::_2, std::placeholders::_3);\n  ailego::Closure::New(f3, a[1], &a[2], a[3])->run();\n\n  std::function<decltype(GlobalFunction4)> f4 = std::bind(\n      &ClassB::MemberFunction4, &bbb, std::placeholders::_1,\n      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);\n  ailego::Closure::New(f4, 1, &a[2], a[3], a[4])->run();\n\n  std::function<decltype(GlobalFunction5)> f5 =\n      std::bind(&ClassB::MemberFunction5, &bbb, std::placeholders::_1,\n                std::placeholders::_2, std::placeholders::_3,\n                std::placeholders::_4, std::placeholders::_5);\n  ailego::Closure::New(f5, 1, &a[2], a[3], 4, &a[5])->run();\n\n  std::function<decltype(GlobalFunction6)> f6 = std::bind(\n      &ClassB::MemberFunction6, &bbb, std::placeholders::_1,\n      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,\n      std::placeholders::_5, std::placeholders::_6);\n  ailego::Closure::New(f6, 1, &a[2], a[3], a[4], &a[5], &a[6])->run();\n\n  std::function<decltype(GlobalFunction7)> f7 = std::bind(\n      &ClassB::MemberFunction7, &bbb, std::placeholders::_1,\n      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,\n      std::placeholders::_5, std::placeholders::_6, std::placeholders::_7);\n  ailego::Closure::New(f7, a[1], &a[2], a[3], a[4], &a[5], &a[6], 7)->run();\n\n  std::function<decltype(GlobalProcess0)> p0 =\n      std::bind(&ClassB::MemberProcess0, &bbb);\n  ailego::Closure::New(p0)->run();\n\n  std::function<decltype(GlobalProcess1)> p1 =\n      std::bind(&ClassB::MemberProcess1, &bbb, std::placeholders::_1);\n  ailego::Closure::New(p1, 1)->run();\n\n  std::function<decltype(GlobalProcess2)> p2 =\n      std::bind(&ClassB::MemberProcess2, &bbb, std::placeholders::_1,\n                std::placeholders::_2);\n  ailego::Closure::New(p2, b[1], &b[2])->run();\n\n  std::function<decltype(GlobalProcess3)> p3 =\n      std::bind(&ClassB::MemberProcess3, &bbb, std::placeholders::_1,\n                std::placeholders::_2, std::placeholders::_3);\n  ailego::Closure::New(p3, b[1], &b[2], b[3])->run();\n\n  std::function<decltype(GlobalProcess4)> p4 = std::bind(\n      &ClassB::MemberProcess4, &bbb, std::placeholders::_1,\n      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);\n  ailego::Closure::New(p4, 1, &b[2], b[3], b[4])->run();\n\n  std::function<decltype(GlobalProcess5)> p5 =\n      std::bind(&ClassB::MemberProcess5, &bbb, std::placeholders::_1,\n                std::placeholders::_2, std::placeholders::_3,\n                std::placeholders::_4, std::placeholders::_5);\n  ailego::Closure::New(p5, 1, &b[2], b[3], 4, &b[5])->run();\n\n  std::function<decltype(GlobalProcess6)> p6 = std::bind(\n      &ClassB::MemberProcess6, &bbb, std::placeholders::_1,\n      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,\n      std::placeholders::_5, std::placeholders::_6);\n  ailego::Closure::New(p6, 1, &b[2], b[3], b[4], &b[5], &b[6])->run();\n\n  std::function<decltype(GlobalProcess7)> p7 = std::bind(\n      &ClassB::MemberProcess7, &bbb, std::placeholders::_1,\n      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,\n      std::placeholders::_5, std::placeholders::_6, std::placeholders::_7);\n  ailego::Closure::New(p7, b[1], &b[2], b[3], b[4], &b[5], &b[6], 7)->run();\n}\n\nTEST(Closure, Lambda) {\n  auto lambda0 = []() { return 0; };\n  ailego::Closure::New(lambda0)->run();\n  ailego::Closure::New([&]() { return 0; })->run();\n\n  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n  ClassB bbb(1);\n\n  auto lambda1 = [&](long a1) { return bbb.MemberFunction1(a1); };\n  ailego::Closure::New(lambda1, 1)->run();\n\n  auto lambda2 = [&](long a1, long *a2) { return bbb.MemberFunction2(a1, a2); };\n  ailego::Closure::New(lambda2, 1, &a[2])->run();\n\n  auto lambda3 = [&](long a1, long *a2, long &a3) {\n    return bbb.MemberFunction3(a1, a2, a3);\n  };\n  ailego::Closure::New(lambda3, 1, &a[2], a[3])->run();\n\n  auto lambda4 = [&](long a1, long *a2, long &a3, const long &a4) {\n    return bbb.MemberFunction4(a1, a2, a3, a4);\n  };\n  ailego::Closure::New(lambda4, a[1], &a[2], a[3], a[4])->run();\n\n  auto lambda5 = [&](long a1, long *a2, long &a3, const long &a4,\n                     volatile long *a5) {\n    return bbb.MemberFunction5(a1, a2, a3, a4, a5);\n  };\n  ailego::Closure::New(lambda5, 1, &a[2], a[3], 4, &a[5])->run();\n\n  auto lambda6 = [&](long a1, long *a2, long &a3, const long &a4,\n                     volatile long *a5, long *const volatile a6) {\n    return bbb.MemberFunction6(a1, a2, a3, a4, a5, a6);\n  };\n  ailego::Closure::New(lambda6, 1, &a[2], a[3], 4, &a[5], &a[6])->run();\n\n  auto lambda7 = [&](long a1, long *a2, long &a3, const long &a4,\n                     volatile long *a5, long *const volatile a6, long &&a7) {\n    return bbb.MemberFunction7(a1, a2, a3, a4, a5, a6, std::move(a7));\n  };\n  ailego::Closure::New(lambda7, a[1], &a[2], a[3], a[4], &a[5], &a[6], 7)\n      ->run();\n}\n\nTEST(Closure, Return) {\n  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n\n  size_t r = 0;\n  ailego::Closure::New(&GlobalFunction0)->run(&r);\n  EXPECT_EQ(0u, r);\n  ailego::Closure::New(&GlobalFunction1, 1)->run(&r);\n  EXPECT_EQ(1u, r);\n  ailego::Closure::New(&GlobalFunction2, a[1], &a[2])->run(&r);\n  EXPECT_EQ(2u, r);\n  ailego::Closure::New(&GlobalFunction3, 1, &a[2], a[3])->run(&r);\n  EXPECT_EQ(3u, r);\n  ailego::Closure::New(&GlobalFunction4, a[1], &a[2], a[3], a[4])->run(&r);\n  EXPECT_EQ(4u, r);\n  ailego::Closure::New(&GlobalFunction5, 1, &a[2], a[3], 4, &a[5])->run(&r);\n  EXPECT_EQ(5u, r);\n  ailego::Closure::New(&GlobalFunction6, a[1], &a[2], a[3], a[4], &a[5], &a[6])\n      ->run(&r);\n  EXPECT_EQ(6u, r);\n  ailego::Closure::New(&GlobalFunction7, a[1], &a[2], a[3], a[4], &a[5], &a[6],\n                       7)\n      ->run(&r);\n  EXPECT_EQ(7u, r);\n\n  ClassB bbb(1);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction0)->run(&r);\n  EXPECT_EQ(0u, r);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction1, 1)->run(&r);\n  EXPECT_EQ(1u, r);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction2, a[1], &a[2])->run(&r);\n  EXPECT_EQ(2u, r);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction3, 1, &a[2], a[3])->run(&r);\n  EXPECT_EQ(3u, r);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction4, a[1], &a[2], a[3], a[4])\n      ->run(&r);\n  EXPECT_EQ(4u, r);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction5, 1, &a[2], a[3], 4, &a[5])\n      ->run(&r);\n  EXPECT_EQ(5u, r);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction6, a[1], &a[2], a[3], a[4],\n                       &a[5], &a[6])\n      ->run(&r);\n  EXPECT_EQ(6u, r);\n  ailego::Closure::New(&bbb, &ClassB::MemberFunction7, a[1], &a[2], a[3], a[4],\n                       &a[5], &a[6], 7)\n      ->run(&r);\n  EXPECT_EQ(7u, r);\n}\n\nstruct LeftValue {\n  LeftValue(void) {\n    std::cout << \"LeftValue(void)\" << std::endl;\n  }\n  LeftValue(const LeftValue &) {\n    ++count;\n    std::cout << \"LeftValue(const LeftValue &)\" << std::endl;\n  }\n  LeftValue(LeftValue &&) {\n    std::cout << \"LeftValue(LeftValue &&)\" << std::endl;\n    EXPECT_TRUE(0);\n  }\n  static int count;\n  int val = 1;\n};\n\nint LeftValue::count = 0;\n\nstruct RightValue {\n  RightValue(void) {\n    std::cout << \"RightValue(void)\" << std::endl;\n  }\n  RightValue(const RightValue &) {\n    std::cout << \"RightValue(const RightValue &)\" << std::endl;\n    EXPECT_TRUE(0);\n  }\n  RightValue(RightValue &&) {\n    ++count;\n    std::cout << \"RightValue(RightValue &&)\" << std::endl;\n  }\n  static int count;\n  int val = 2;\n};\n\nint RightValue::count = 0;\n\nstruct TestLeftRight {\n  static int Run(LeftValue &&, const RightValue &) {\n    return 0;\n  }\n  static int RunLeft(LeftValue &&) {\n    return 0;\n  }\n  static int RunRight(const RightValue &) {\n    return 0;\n  }\n};\n\nTEST(Closure, LeftRight) {\n  LeftValue lval;\n  RightValue rval;\n\n  std::cout << \"## Starting 1...\" << std::endl;\n  ailego::Closure::New(&TestLeftRight::RunLeft, lval)->run();\n  EXPECT_EQ(1, LeftValue::count);\n\n  std::cout << \"## Starting 2...\" << std::endl;\n  ailego::Closure::New(&TestLeftRight::RunRight, RightValue())->run();\n  EXPECT_EQ(1, RightValue::count);\n\n  std::cout << \"## Starting 3...\" << std::endl;\n  auto call = ailego::Closure::New(&TestLeftRight::Run, std::ref(lval),\n                                   std::move(rval));\n  (*call)();\n  EXPECT_EQ(2, LeftValue::count);\n  EXPECT_EQ(2, RightValue::count);\n}\n\nvoid NoinlineFunction(int *a) {\n  ++(*a);\n}\n\nTEST(Closure, Benchmark) {\n  const int count = 10000000;\n\n  ailego::ElapsedTime stamp0;\n  int num0 = 0;\n  typedef void (*FUNC)(int *);\n  volatile FUNC fn0 = NoinlineFunction;\n  for (int i = 0; i < count; i++) {\n    (*fn0)(&num0);\n  }\n  std::cout << \"Noinline elapsed: \" << stamp0.micro_seconds() << \" us\"\n            << std::endl;\n  EXPECT_EQ(count, num0);\n\n  ailego::ElapsedTime stamp1;\n  int num1 = 0;\n  auto fn1 = ailego::Closure::New([](int *a) { ++(*a); }, &num1);\n  for (int i = 0; i < count; i++) {\n    fn1->run();\n  }\n  std::cout << \"Closure elapsed: \" << stamp1.micro_seconds() << \" us\"\n            << std::endl;\n  EXPECT_EQ(count, num1);\n\n  ailego::ElapsedTime stamp2;\n  int num2 = 0;\n  auto fn2 = [](int *a) { ++(*a); };\n  for (int i = 0; i < count; i++) {\n    fn2(&num2);\n  }\n  std::cout << \"Lambda elapsed: \" << stamp2.micro_seconds() << \" us\"\n            << std::endl;\n  EXPECT_EQ(count, num2);\n\n  ailego::ElapsedTime stamp3;\n  int num3 = 0;\n  std::function<void(int *)> fn3 = [](int *a) { ++(*a); };\n  for (int i = 0; i < count; i++) {\n    fn3(&num3);\n  }\n  std::cout << \"Function elapsed: \" << stamp3.micro_seconds() << \" us\"\n            << std::endl;\n  EXPECT_EQ(count, num3);\n}\n"
  },
  {
    "path": "tests/ailego/pattern/factory_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include <zvec/ailego/pattern/factory.h>\n\nusing namespace zvec;\nusing namespace zvec::ailego;\n\nstruct Base {\n  virtual ~Base(void) {}\n  virtual void do_something() = 0;\n};\n\nstruct AAA : public Base {\n  AAA(void) {}\n\n  virtual void do_something() {\n    printf(\"do something\\n\");\n  }\n};\n\nAILEGO_FACTORY_REGISTER(AAA, Base, AAA);\n\nTEST(Factory, General) {\n  EXPECT_TRUE(!ailego::Factory<Base>::MakeShared(\"BBB\"));\n  EXPECT_TRUE(!ailego::Factory<Base>::Has(\"BBB\"));\n\n  auto aaa = ailego::Factory<Base>::MakeShared(\"AAA\");\n  ASSERT_TRUE(!!aaa);\n  aaa->do_something();\n  EXPECT_TRUE(!!ailego::Factory<Base>::Has(\"AAA\"));\n\n  auto vec = ailego::Factory<Base>::Classes();\n  EXPECT_EQ(1u, vec.size());\n  EXPECT_EQ(\"AAA\", std::string(vec[0]));\n}\n"
  },
  {
    "path": "tests/ailego/pattern/scope_guard_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/pattern/defer.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(ScopeGuard, Lambda) {\n  int count = 0;\n  auto a = ailego::ScopeGuard::Make(\n      [](int val) { printf(\"ScopeGuard: Lambda %d\\n\", val); }, 1);\n\n  auto b = ailego::ScopeGuard::Make([&] {\n    printf(\"ScopeGuard: Lambda 2\\n\");\n    ++count;\n  });\n\n  auto c = ailego::ScopeGuard::Make([] {\n    printf(\"ScopeGuard: Lambda 3\\n\");\n    return 0;\n  });\n\n  auto d = ailego::ScopeGuard::Make([&] {\n    printf(\"ScopeGuard: Lambda 4\\n\");\n    ++count;\n    return false;\n  });\n\n  EXPECT_EQ(0, count);\n}\n\nstruct ClassA {\n  static void StaticProcess0(void) {\n    printf(\"ScopeGuard: Static Function 1\\n\");\n    ++count;\n  }\n\n  static int StaticProcess1(int val) {\n    printf(\"ScopeGuard: Static Function %d\\n\", val);\n    ++count;\n    return 0;\n  }\n\n  static int count;\n};\n\nint ClassA::count{0};\n\nTEST(ScopeGuard, StaticFunction) {\n  auto a = ailego::ScopeGuard::Make(ClassA::StaticProcess0);\n  auto b = ailego::ScopeGuard::Make(ClassA::StaticProcess1, 2);\n\n  EXPECT_EQ(0, ClassA::count);\n}\n\nclass ClassB {\n public:\n  virtual void MemberProcess0(void) const {\n    printf(\"ScopeGuard: Member Function 0\\n\");\n    ++count;\n  }\n\n  virtual void MemberProcess1(int val) {\n    printf(\"ScopeGuard: Member Function %d\\n\", val);\n    ++count;\n  }\n\n  virtual void MemberProcess2(long val) const volatile {\n    printf(\"ScopeGuard: Member Function %ld\\n\", val);\n    ++count;\n  }\n\n  virtual void MemberProcess3(size_t val) volatile {\n    printf(\"ScopeGuard: Member Function %zu\\n\", val);\n    ++count;\n  }\n\n  static int count;\n};\n\nint ClassB::count{0};\n\nTEST(ScopeGuard, MemberFunction) {\n  ClassB bb;\n  auto a = ailego::ScopeGuard::Make(&bb, &ClassB::MemberProcess0);\n  auto b = ailego::ScopeGuard::Make(&bb, &ClassB::MemberProcess1, 2);\n  AILEGO_DEFER(&bb, &ClassB::MemberProcess2, 3);\n  AILEGO_DEFER(&bb, &ClassB::MemberProcess3, 4);\n  EXPECT_EQ(0, ClassB::count);\n}\n"
  },
  {
    "path": "tests/ailego/pattern/singleton_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/pattern/singleton.h>\n\nusing namespace zvec::ailego;\n\nstruct AAA {\n  void run() {\n    ++a;\n  }\n  uint32_t val() {\n    return a;\n  }\n  std::atomic_uint a{0};\n};\n\nTEST(Singleton, General) {\n  Singleton<int>::Instance() = 15;\n  EXPECT_EQ(15, Singleton<int>::Instance());\n\n  Singleton<double>::Instance() = 1.2;\n  EXPECT_DOUBLE_EQ(1.2, Singleton<double>::Instance());\n\n  ThreadPool pool1;\n  for (int i = 0; i < 1000; ++i) {\n    pool1.execute([] { Singleton<AAA>::Instance().run(); });\n  }\n  pool1.wait_finish();\n\n  EXPECT_EQ(1000u, Singleton<AAA>::Instance().val());\n}\n"
  },
  {
    "path": "tests/ailego/utility/bit_string_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cmath>\n#include <random>\n#include <ailego/utility/bit_string_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(BitStringHelper, General) {\n  size_t data_bits = 13;\n  size_t data_num = 10;\n  size_t buffer_size = (data_bits * data_num + 7) / 8;\n\n  std::vector<uint8_t> buffer;\n  buffer.reserve(buffer_size);\n\n  uint8_t *buffer_data = buffer.data();\n\n  ailego::BitStringWriter bsw(buffer_data, buffer_size);\n  for (size_t m = 0; m < data_num; m++) {\n    uint64_t data = m;\n\n    EXPECT_EQ(bsw.write(data, data_bits), true);\n  }\n\n  uint64_t data_read = 0;\n  ailego::BitStringReader bsr(buffer_data, buffer_size);\n  for (size_t m = 0; m < data_num; m++) {\n    EXPECT_EQ(bsr.read(data_read, data_bits), true);\n\n    EXPECT_EQ(data_read, m);\n\n    // std::cout << \"m: \" << m << \", data read: \" << data_read << std::endl;\n  }\n}\n"
  },
  {
    "path": "tests/ailego/utility/bitset_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cmath>\n#include <random>\n#include <ailego/utility/bitset_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(BitsetHelper, Benchmark) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_int_distribution<uint32_t> dist(0, 0xffffffff);\n  const size_t batch_size = 1000;\n  const size_t dimension = 1024;\n\n  std::vector<uint32_t> vec;\n  for (size_t i = 0; i < batch_size; ++i) {\n    for (size_t j = 0; j < (dimension >> 5); ++j) {\n      vec.push_back(dist(gen));\n    }\n  }\n\n  ailego::ElapsedTime elapsed_time;\n  size_t count = (dimension >> 5);\n  size_t total = 0;\n  std::cout << \"# \" << dimension << \"d, \" << batch_size << std::endl;\n\n  elapsed_time.reset();\n  for (size_t i = 0; i < batch_size; ++i) {\n    total += ailego::BitsetHelper::Cardinality(&vec[i * count], count);\n  }\n  printf(\"* Cardinality (us): \\t%zu\\n\", (size_t)elapsed_time.micro_seconds());\n  printf(\"* Result: \\t%zu\\n\", total);\n}\n"
  },
  {
    "path": "tests/ailego/utility/dl_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <string>\n#include <ailego/utility/dl_helper.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(DLHelper, General) {\n  std::string no_exist = \"no_exist_file\";\n  std::string error_msg;\n\n  EXPECT_EQ(nullptr, ailego::DLHelper::Load(no_exist, nullptr));\n  EXPECT_EQ(nullptr, ailego::DLHelper::Load(no_exist, &error_msg));\n  EXPECT_TRUE(!error_msg.empty());\n  printf(\"%s\\n\", error_msg.c_str());\n  ailego::DLHelper::Unload(nullptr);\n\n  EXPECT_EQ(nullptr, ailego::DLHelper::Symbol(nullptr, \"test\"));\n}\n"
  },
  {
    "path": "tests/ailego/utility/float_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cmath>\n#include <random>\n#include <ailego/math/norm_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/float_helper.h>\n\nusing namespace zvec;\n\nTEST(FloatHelper, General) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0f, 0.9f);\n  std::uniform_int_distribution<int> dist2(1, 250);\n\n  for (int i = 0; i < 1000; ++i) {\n    float fp32 = dist(gen);\n    float fp16 = ailego::FloatHelper::ToFP32(\n        ailego::FloatHelper::ToFP16(fp32, 1.0f), 1.0f);\n    EXPECT_GT(0.00025, std::abs(fp32 - fp16));\n  }\n\n  for (int i = 0; i < 1000; ++i) {\n    std::vector<float> vec1_fp32, vec2_fp32;\n    std::vector<ailego::Float16> vec1_fp16, vec2_fp16;\n    int count = dist2(gen);\n\n    vec1_fp32.resize(count);\n    vec2_fp32.resize(count);\n    vec1_fp16.resize(count);\n    vec2_fp16.resize(count);\n    for (size_t j = 0; j < vec1_fp32.size(); ++j) {\n      vec1_fp32[j] = dist(gen);\n    }\n    float norm1;\n    ailego::Norm2Matrix<float, 1>::Compute(vec1_fp32.data(), vec1_fp32.size(),\n                                           &norm1);\n    EXPECT_NE(1.0f, norm1);\n\n    // Convert to FP16\n    ailego::FloatHelper::ToFP16(vec1_fp32.data(), vec1_fp32.size(),\n                                (uint16_t *)vec1_fp16.data());\n    ailego::FloatHelper::ToFP16(vec1_fp32.data(), vec1_fp32.size(), norm1,\n                                (uint16_t *)vec2_fp16.data());\n    for (size_t j = 0; j < vec1_fp32.size(); ++j) {\n      EXPECT_GT(0.00025, std::abs(vec1_fp32[j] - vec1_fp16[j]));\n      // EXPECT_FLOAT_EQ(vec1_fp32[j], vec1_fp16[j]);\n    }\n\n    float norm2;\n    ailego::Norm2Matrix<ailego::Float16, 1>::Compute(vec1_fp16.data(),\n                                                     vec1_fp16.size(), &norm2);\n    EXPECT_NE(1.0f, norm2);\n\n    // Convert to FP32\n    ailego::FloatHelper::ToFP32((const uint16_t *)vec1_fp16.data(),\n                                vec1_fp16.size(), vec1_fp32.data());\n    ailego::FloatHelper::ToFP32((const uint16_t *)vec1_fp16.data(),\n                                vec1_fp16.size(), norm2, vec2_fp32.data());\n    for (size_t j = 0; j < vec1_fp32.size(); ++j) {\n      EXPECT_GT(0.00025, std::abs(vec1_fp32[j] - vec1_fp16[j]));\n      // EXPECT_FLOAT_EQ(vec1_fp32[j], vec1_fp16[j]);\n    }\n\n    ailego::Norm2Matrix<float, 1>::Compute(vec2_fp32.data(), vec2_fp32.size(),\n                                           &norm1);\n    ailego::Norm2Matrix<ailego::Float16, 1>::Compute(vec2_fp16.data(),\n                                                     vec2_fp16.size(), &norm2);\n    // EXPECT_FLOAT_EQ(norm1, norm2);\n    // EXPECT_FLOAT_EQ(1.0f, norm1);\n    EXPECT_GT(0.001, std::abs(1.0f - norm1));\n    // EXPECT_FLOAT_EQ(1.0f, norm2);\n    EXPECT_GT(0.001, std::abs(1.0f - norm2));\n  }\n}\n\nTEST(Float16, General) {\n  ailego::Float16 a1;\n  EXPECT_FLOAT_EQ(0.0f, a1);\n\n  ailego::Float16 a2 = 0.33f;\n  EXPECT_TRUE(0.0f < a2);\n  EXPECT_TRUE(0.0f <= a2);\n  EXPECT_TRUE(0.5f > a2);\n  EXPECT_TRUE(0.5f >= a2);\n  EXPECT_TRUE(0.0 < a2);\n  EXPECT_TRUE(0.0 <= a2);\n  EXPECT_TRUE(0.5 > a2);\n  EXPECT_TRUE(0.5 >= a2);\n  EXPECT_TRUE((float)a2 != 0.0);\n  EXPECT_FALSE((float)a2 == 0.0);\n\n  ailego::Float16 a3 = 0.55;\n  EXPECT_TRUE((double)a3 != 0.0);\n  EXPECT_FALSE((double)a3 == 0.0);\n\n  EXPECT_TRUE(a1 < a2);\n  EXPECT_TRUE(a2 <= a3);\n  EXPECT_TRUE(a2 > a1);\n  EXPECT_TRUE(a3 >= a1);\n\n  ailego::Float16 a4 = a2 + a3;\n  ailego::Float16 a5 = a2 - a3;\n  ailego::Float16 a6 = a2 * a3;\n  ailego::Float16 a7 = a2 / a3;\n\n  a4 *= 1.0;\n  a5 /= 1.0;\n  a6 -= 0.0;\n  a7 += 0.0;\n\n  EXPECT_TRUE(0.0f != a4);\n  EXPECT_TRUE(0.0f != a5);\n  EXPECT_TRUE(0.0f != a6);\n  EXPECT_TRUE(0.0f != a7);\n\n  ailego::Float16 one = 1.0;\n  uint16_t *one_encoded = (uint16_t *)(&one);\n  printf(\"One: %f, 0x%x\\n\", (float)one, *one_encoded);\n}\n"
  },
  {
    "path": "tests/ailego/utility/matrix_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <random>\n#include <ailego/utility/matrix_helper.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(MatrixHelper, Transpose) {\n  std::mt19937 gen((std::random_device())());\n\n  std::vector<float> result1(31 * 7);\n  std::vector<float> result2(31 * 7);\n  std::vector<float> result3(31 * 7);\n\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n  for (size_t i = 0; i < 31 * 7; ++i) {\n    result1[i] = dist(gen);\n  }\n\n  ailego::MatrixHelper::Transpose<float, 31>(result1.data(), 7, result2.data());\n  ailego::MatrixHelper::ReverseTranspose<float, 31>(result2.data(), 7,\n                                                    result3.data());\n  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),\n                      result1.size() * sizeof(float)));\n\n  ailego::MatrixHelper::Transpose<float, 7>(result1.data(), 31, result2.data());\n  ailego::MatrixHelper::ReverseTranspose<float, 7>(result2.data(), 31,\n                                                   result3.data());\n  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),\n                      result1.size() * sizeof(float)));\n\n  ailego::MatrixHelper::Transpose<float>(result1.data(), 31, 7, result2.data());\n  ailego::MatrixHelper::ReverseTranspose<float>(result2.data(), 31, 7,\n                                                result3.data());\n  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),\n                      result1.size() * sizeof(float)));\n\n  ailego::MatrixHelper::Transpose<float>(result1.data(), 7, 31, result2.data());\n  ailego::MatrixHelper::ReverseTranspose<float>(result2.data(), 7, 31,\n                                                result3.data());\n  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),\n                      result1.size() * sizeof(float)));\n}\n"
  },
  {
    "path": "tests/ailego/utility/memory_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <ailego/utility/memory_helper.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(MemoryHelper, General) {\n  size_t vsz, rss;\n  EXPECT_TRUE(ailego::MemoryHelper::SelfUsage(&vsz, &rss));\n\n  std::cout << \"Page Size: \" << ailego::MemoryHelper::PageSize() << std::endl;\n  std::cout << \"Usage: VSZ=\" << vsz << \", RSS=\" << rss << std::endl;\n  std::cout << \"RSS: \" << ailego::MemoryHelper::SelfRSS() << std::endl;\n  std::cout << \"Peak RSS: \" << ailego::MemoryHelper::SelfPeakRSS() << std::endl;\n  std::cout << \"Total RAM Size: \" << ailego::MemoryHelper::TotalRamSize()\n            << std::endl;\n  std::cout << \"Available RAM Size: \"\n            << ailego::MemoryHelper::AvailableRamSize() << std::endl;\n  std::cout << \"Used RAM Size: \" << ailego::MemoryHelper::UsedRamSize()\n            << std::endl;\n  std::cout << \"Total RAM Size in Container: \"\n            << ailego::MemoryHelper::ContainerAwareTotalRamSize() << std::endl;\n}\n"
  },
  {
    "path": "tests/ailego/utility/string_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cmath>\n#include <limits>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/string_helper.h>\n\nusing namespace zvec;\n\nTEST(StringHelper, Split) {\n  std::vector<std::string> out;\n\n  ailego::StringHelper::Split(\"\", \",\", &out);\n  EXPECT_EQ(1u, out.size());\n  EXPECT_EQ(\"\", out[0]);\n\n  ailego::StringHelper::Split(\"\", ';', &out);\n  EXPECT_EQ(1u, out.size());\n  EXPECT_EQ(\"\", out[0]);\n\n  ailego::StringHelper::Split(\"Hello, world!\", \"\", &out);\n  EXPECT_EQ(1u, out.size());\n  EXPECT_EQ(\"Hello, world!\", out[0]);\n\n  ailego::StringHelper::Split(\"Hello, world!\", '!', &out);\n  EXPECT_EQ(2u, out.size());\n  EXPECT_EQ(\"Hello, world\", out[0]);\n  EXPECT_EQ(\"\", out[1]);\n\n  ailego::StringHelper::Split(\"abxycdxyxydefxya\", \"xyz\", &out);\n  EXPECT_EQ(1u, out.size());\n  EXPECT_EQ(\"abxycdxyxydefxya\", out[0]);\n\n  ailego::StringHelper::Split(\"abxycdxyxydefxya\", 'a', &out);\n  EXPECT_EQ(3u, out.size());\n  EXPECT_EQ(\"\", out[0]);\n  EXPECT_EQ(\"bxycdxyxydefxy\", out[1]);\n  EXPECT_EQ(\"\", out[2]);\n\n  ailego::StringHelper::Split(\"abxycdxy!!xydefxya\", \"xy\", &out);\n  EXPECT_EQ(5u, out.size());\n  EXPECT_EQ(\"ab\", out[0]);\n  EXPECT_EQ(\"cd\", out[1]);\n  EXPECT_EQ(\"!!\", out[2]);\n  EXPECT_EQ(\"def\", out[3]);\n  EXPECT_EQ(\"a\", out[4]);\n\n  ailego::StringHelper::Split(\"abxycdxy!!xydefxya\", '!', &out);\n  EXPECT_EQ(3u, out.size());\n  EXPECT_EQ(\"abxycdxy\", out[0]);\n  EXPECT_EQ(\"\", out[1]);\n  EXPECT_EQ(\"xydefxya\", out[2]);\n\n  ailego::StringHelper::Split(\"abxycdxyxydefxya\", \"xy\", &out);\n  EXPECT_EQ(5u, out.size());\n  EXPECT_EQ(\"ab\", out[0]);\n  EXPECT_EQ(\"cd\", out[1]);\n  EXPECT_EQ(\"\", out[2]);\n  EXPECT_EQ(\"def\", out[3]);\n  EXPECT_EQ(\"a\", out[4]);\n\n  ailego::StringHelper::Split(\"abxycdxyxydefxya\", 'y', &out);\n  EXPECT_EQ(5u, out.size());\n  EXPECT_EQ(\"abx\", out[0]);\n  EXPECT_EQ(\"cdx\", out[1]);\n  EXPECT_EQ(\"x\", out[2]);\n  EXPECT_EQ(\"defx\", out[3]);\n  EXPECT_EQ(\"a\", out[4]);\n\n  ailego::StringHelper::Split(\"abxycdxyxydefxy\", \"xy\", &out);\n  EXPECT_EQ(5u, out.size());\n  EXPECT_EQ(\"ab\", out[0]);\n  EXPECT_EQ(\"cd\", out[1]);\n  EXPECT_EQ(\"\", out[2]);\n  EXPECT_EQ(\"def\", out[3]);\n  EXPECT_EQ(\"\", out[4]);\n\n  ailego::StringHelper::Split(\"abxycdxyxydefxy\", 'y', &out);\n  EXPECT_EQ(5u, out.size());\n  EXPECT_EQ(\"abx\", out[0]);\n  EXPECT_EQ(\"cdx\", out[1]);\n  EXPECT_EQ(\"x\", out[2]);\n  EXPECT_EQ(\"defx\", out[3]);\n  EXPECT_EQ(\"\", out[4]);\n\n  ailego::StringHelper::Split(\"xy\", \"xy\", &out);\n  EXPECT_EQ(2u, out.size());\n  EXPECT_EQ(\"\", out[0]);\n  EXPECT_EQ(\"\", out[1]);\n\n  ailego::StringHelper::Split(\"x\", 'x', &out);\n  EXPECT_EQ(2u, out.size());\n  EXPECT_EQ(\"\", out[0]);\n  EXPECT_EQ(\"\", out[1]);\n}\n\nTEST(StringHelper, SplitFloat) {\n  std::vector<float> out1;\n  ailego::StringHelper::Split(\"1.0, tt, 2,\", ',', &out1);\n  EXPECT_EQ(4u, out1.size());\n  EXPECT_FLOAT_EQ(1.0f, out1[0]);\n  EXPECT_FLOAT_EQ(0.0f, out1[1]);\n  EXPECT_FLOAT_EQ(2.0f, out1[2]);\n  EXPECT_FLOAT_EQ(0.0f, out1[3]);\n\n  std::vector<double> out2;\n  ailego::StringHelper::Split(\"1.0, tt, 2,\", ',', &out2);\n  EXPECT_EQ(4u, out2.size());\n  EXPECT_DOUBLE_EQ(1.0f, out2[0]);\n  EXPECT_DOUBLE_EQ(0.0f, out2[1]);\n  EXPECT_DOUBLE_EQ(2.0f, out2[2]);\n  EXPECT_DOUBLE_EQ(0.0f, out2[3]);\n}\n\nTEST(StringHelper, SplitInteger) {\n  std::vector<int32_t> out1;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", ',', &out1);\n  EXPECT_EQ(4u, out1.size());\n  EXPECT_EQ(-1, out1[0]);\n  EXPECT_EQ(0, out1[1]);\n  EXPECT_EQ(2, out1[2]);\n  EXPECT_EQ(0, out1[3]);\n\n  std::vector<uint32_t> out2;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", ',', &out2);\n  EXPECT_EQ(4u, out2.size());\n  EXPECT_EQ(0xffffffffu, out2[0]);\n  EXPECT_EQ(0u, out2[1]);\n  EXPECT_EQ(2u, out2[2]);\n  EXPECT_EQ(0u, out2[3]);\n\n  std::vector<int64_t> out3;\n  ailego::StringHelper::Split(\"-1.0, tt, 2.3,\", ',', &out3);\n  EXPECT_EQ(4u, out3.size());\n  EXPECT_EQ(-1, out3[0]);\n  EXPECT_EQ(0, out3[1]);\n  EXPECT_EQ(2, out3[2]);\n  EXPECT_EQ(0, out3[3]);\n\n  std::vector<uint64_t> out4;\n  ailego::StringHelper::Split(\"-1.0, tt, 2.3,\", ',', &out4);\n  EXPECT_EQ(4u, out4.size());\n  EXPECT_EQ((uint64_t)-1, out4[0]);\n  EXPECT_EQ(0u, out4[1]);\n  EXPECT_EQ(2u, out4[2]);\n  EXPECT_EQ(0u, out4[3]);\n\n  std::vector<int8_t> out5;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", ',', &out5);\n  EXPECT_EQ(4u, out5.size());\n  EXPECT_EQ(-1, out5[0]);\n  EXPECT_EQ(0, out5[1]);\n  EXPECT_EQ(2, out5[2]);\n  EXPECT_EQ(0, out5[3]);\n\n  std::vector<uint8_t> out6;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", ',', &out6);\n  EXPECT_EQ(4u, out6.size());\n  EXPECT_EQ(255u, out6[0]);\n  EXPECT_EQ(0u, out6[1]);\n  EXPECT_EQ(2u, out6[2]);\n  EXPECT_EQ(0u, out6[3]);\n\n  std::vector<int16_t> out7;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", ',', &out7);\n  EXPECT_EQ(4u, out7.size());\n  EXPECT_EQ(-1, out7[0]);\n  EXPECT_EQ(0, out7[1]);\n  EXPECT_EQ(2, out7[2]);\n  EXPECT_EQ(0, out7[3]);\n\n  std::vector<uint16_t> out8;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", ',', &out8);\n  EXPECT_EQ(4u, out8.size());\n  EXPECT_EQ(65535u, out8[0]);\n  EXPECT_EQ(0u, out8[1]);\n  EXPECT_EQ(2u, out8[2]);\n  EXPECT_EQ(0u, out8[3]);\n}\n\nTEST(StringHelper, SplitWithTValidDelimeter) {\n  std::vector<int32_t> out1;\n  ailego::StringHelper::Split(\"12321\", '2', &out1);\n  EXPECT_EQ(3u, out1.size());\n  EXPECT_EQ(1, out1[0]);\n  EXPECT_EQ(3, out1[1]);\n  EXPECT_EQ(1, out1[2]);\n\n  std::vector<double> out2;\n  ailego::StringHelper::Split(\"300e30e3\", 'e', &out2);\n  EXPECT_EQ(3u, out2.size());\n  EXPECT_DOUBLE_EQ(300.0f, out2[0]);\n  EXPECT_DOUBLE_EQ(30.0f, out2[1]);\n  EXPECT_DOUBLE_EQ(3.0f, out2[2]);\n}\n\nTEST(StringHelper, SplitByString) {\n  std::string sep = \",\";\n  std::vector<int32_t> out1;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", sep, &out1);\n  EXPECT_EQ(4u, out1.size());\n  EXPECT_EQ(-1, out1[0]);\n  EXPECT_EQ(0, out1[1]);\n  EXPECT_EQ(2, out1[2]);\n  EXPECT_EQ(0, out1[3]);\n\n  std::vector<uint32_t> out2;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", sep, &out2);\n  EXPECT_EQ(4u, out2.size());\n  EXPECT_EQ(0xffffffffu, out2[0]);\n  EXPECT_EQ(0u, out2[1]);\n  EXPECT_EQ(2u, out2[2]);\n  EXPECT_EQ(0u, out2[3]);\n\n  std::vector<int64_t> out3;\n  ailego::StringHelper::Split(\"-1.0, tt, 2.3,\", sep, &out3);\n  EXPECT_EQ(4u, out3.size());\n  EXPECT_EQ(-1, out3[0]);\n  EXPECT_EQ(0, out3[1]);\n  EXPECT_EQ(2, out3[2]);\n  EXPECT_EQ(0, out3[3]);\n\n  std::vector<uint64_t> out4;\n  ailego::StringHelper::Split(\"-1.0, tt, 2.3,\", sep, &out4);\n  EXPECT_EQ(4u, out4.size());\n  EXPECT_EQ((uint64_t)-1, out4[0]);\n  EXPECT_EQ(0u, out4[1]);\n  EXPECT_EQ(2u, out4[2]);\n  EXPECT_EQ(0u, out4[3]);\n\n  std::vector<int8_t> out5;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", sep, &out5);\n  EXPECT_EQ(4u, out5.size());\n  EXPECT_EQ(-1, out5[0]);\n  EXPECT_EQ(0, out5[1]);\n  EXPECT_EQ(2, out5[2]);\n  EXPECT_EQ(0, out5[3]);\n\n  std::vector<uint8_t> out6;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", sep, &out6);\n  EXPECT_EQ(4u, out6.size());\n  EXPECT_EQ(255u, out6[0]);\n  EXPECT_EQ(0u, out6[1]);\n  EXPECT_EQ(2u, out6[2]);\n  EXPECT_EQ(0u, out6[3]);\n\n  std::vector<int16_t> out7;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", sep, &out7);\n  EXPECT_EQ(4u, out7.size());\n  EXPECT_EQ(-1, out7[0]);\n  EXPECT_EQ(0, out7[1]);\n  EXPECT_EQ(2, out7[2]);\n  EXPECT_EQ(0, out7[3]);\n\n  std::vector<uint16_t> out8;\n  ailego::StringHelper::Split(\"-1.0, tt, 2,\", sep, &out8);\n  EXPECT_EQ(4u, out8.size());\n  EXPECT_EQ(65535u, out8[0]);\n  EXPECT_EQ(0u, out8[1]);\n  EXPECT_EQ(2u, out8[2]);\n  EXPECT_EQ(0u, out8[3]);\n}\n\nTEST(StringHelper, Trim) {\n  std::string aaa = \"  \\t123 45 67\\t\\n8\\r \\n\";\n  EXPECT_EQ(\"123 45 67\\t\\n8\\r \\n\", ailego::StringHelper::CopyLeftTrim(aaa));\n  EXPECT_EQ(\"  \\t123 45 67\\t\\n8\", ailego::StringHelper::CopyRightTrim(aaa));\n  EXPECT_EQ(\"123 45 67\\t\\n8\", ailego::StringHelper::CopyTrim(aaa));\n\n  std::string bbb = \"  \\t123 45 67\\t\\n8\\r \\n\";\n  ailego::StringHelper::LeftTrim(bbb);\n  EXPECT_EQ(\"123 45 67\\t\\n8\\r \\n\", bbb);\n\n  std::string ccc = \"  \\t123 45 67\\t\\n8\\r \\n\";\n  ailego::StringHelper::RightTrim(ccc);\n  EXPECT_EQ(\"  \\t123 45 67\\t\\n8\", ccc);\n\n  std::string ddd = \"  \\t123 45 67\\t\\n8\\r \\n\";\n  ailego::StringHelper::Trim(ddd);\n  EXPECT_EQ(\"123 45 67\\t\\n8\", ddd);\n}\n\nTEST(StringHelper, CompareIgnoreCase) {\n  {\n    std::string a = \"a b\\tc\\nd\";\n    std::string b = \"A B\\tC\\nd\";\n    EXPECT_TRUE(ailego::StringHelper::CompareIgnoreCase(a, b));\n  }\n  {\n    std::string a = \"a d\\tc\\nd\";\n    std::string b = \"A B\\tC\\nd\";\n    EXPECT_FALSE(ailego::StringHelper::CompareIgnoreCase(a, b));\n  }\n  {\n    std::string a = \"a d\\tc\\n\";\n    std::string b = \"A B\\tC\\nd\";\n    EXPECT_FALSE(ailego::StringHelper::CompareIgnoreCase(a, b));\n  }\n  {\n    std::string a = \"A D\\tc\\n123456\";\n    std::string b = \"A d\\tC\\n123456\";\n    EXPECT_TRUE(ailego::StringHelper::CompareIgnoreCase(a, b));\n  }\n  {\n    std::string a = \"A D\\tc\\n123456\";\n    std::string b = \"\";\n    EXPECT_FALSE(ailego::StringHelper::CompareIgnoreCase(a, b));\n  }\n}\n\nnamespace zvec::ailego {\nnamespace testing {\n\nTEST(StringHelperJoinAppend, Integer) {\n  short a = -1;\n  unsigned short b = 2;\n  long c = -3;\n  unsigned long d = 4;\n  long long e = -5;\n  unsigned long long f = 6;\n  ssize_t g = -7;\n  size_t h = 8;\n  auto res = StringHelper::Concat(a, b, c, d, e, f, g, h);\n  EXPECT_EQ(res, \"-12-34-56-78\");\n  std::string str = \"TEST\";\n  StringHelper::Append(&str, a, b, c, d, e, f, g, h);\n  EXPECT_EQ(str, \"TEST-12-34-56-78\");\n}\n\nTEST(StringHelperJoinAppend, SizedInteger) {\n  int8_t a = -1;\n  uint8_t b = 2;\n  int16_t c = -3;\n  uint16_t d = 4;\n  int32_t e = -5;\n  uint32_t f = 6;\n  int64_t g = -7;\n  uint64_t h = 8;\n  EXPECT_EQ(\"-12\", StringHelper::Concat(a, b));\n  EXPECT_EQ(\"-12-3\", StringHelper::Concat(a, b, c));\n  EXPECT_EQ(\"4-5\", StringHelper::Concat(d, e));\n  EXPECT_EQ(\"-78\", StringHelper::Concat(g, h));\n\n  auto res = StringHelper::Concat(a, b, c, d, e, f, g, h);\n  EXPECT_EQ(res, \"-12-34-56-78\");\n  std::string str = \"TEST\";\n  StringHelper::Append(&str, a, b, c, d, e, f, g, h);\n  EXPECT_EQ(str, \"TEST-12-34-56-78\");\n}\n\nTEST(StringHelperJoinAppend, MinMax) {\n  auto a = StringHelper::Concat(\n      std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),\n      std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max(),\n      std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max(),\n      std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max());\n  EXPECT_EQ(a,\n            \"-128127-3276832767-21474836482147483647-\"\n            \"92233720368547758089223372036854775807\");\n}\n\nTEST(StringHelperJoinAppend, Float) {\n  float f = 3.14f;\n  double d = 6.28;\n  long double ld = 9.42;\n  auto a = StringHelper::Concat(\n      f, d, ld, NAN, INFINITY, std::numeric_limits<float>::min(),\n      std::numeric_limits<float>::max(), std::numeric_limits<double>::min(),\n      std::numeric_limits<double>::max());\n  EXPECT_EQ(a,\n            \"3.146.289.42naninf1.17549e-383.40282e+382.22507e-3081.79769e+308\");\n}\n\nTEST(StringHelperJoinAppend, Enums) {\n  enum { kOne = 1, kTen = 10 };\n  enum class A : int64_t { kFirst = 100, kLast = 10000 };\n  auto a = StringHelper::Concat(kOne, kTen, A::kFirst, A::kLast);\n  EXPECT_EQ(a, \"11010010000\");\n}\n\nTEST(StringHelperJoinAppend, String) {\n  auto a = StringHelper::Concat(\"a\", std::string{\"b\"}, \"c\", std::string{\"d\"});\n  EXPECT_EQ(a, \"abcd\");\n  auto b = StringHelper::Concat(\"aaaa\", std::string{\"bbbb\"}, \"cccc\",\n                                std::string{\"dddd\"});\n  EXPECT_EQ(b, \"aaaabbbbccccdddd\");\n  auto c = StringHelper::Concat(\"aaaaaaaa\", std::string{\"bbbbbbbb\"}, \"cccccccc\",\n                                std::string{\"dddddddd\"});\n  EXPECT_EQ(c, \"aaaaaaaabbbbbbbbccccccccdddddddd\");\n}\n\nTEST(StringHelperJoinAppend, ArbitaryNumberOfArguments) {\n  auto a = StringHelper::Concat(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, \"a\", \"b\", \"c\",\n                                \"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\",\n                                \"m\", \"n\", \"o\", \"p\", \"q\", \"r\", \"s\", \"t\", \"u\",\n                                \"v\", \"w\", \"x\", \"y\", \"z\");\n  EXPECT_EQ(a, \"0123456789abcdefghijklmnopqrstuvwxyz\");\n\n  std::string str = \"TEST\";\n  StringHelper::Append(&str, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, \"a\", \"b\", \"c\", \"d\",\n                       \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\", \"m\", \"n\", \"o\",\n                       \"p\", \"q\", \"r\", \"s\", \"t\", \"u\", \"v\", \"w\", \"x\", \"y\", \"z\");\n  EXPECT_EQ(str, \"TEST0123456789abcdefghijklmnopqrstuvwxyz\");\n}\n\nTEST(StringHelperJoinAppend, Empty) {\n  for (const char *t :\n       {\"\", \"short string\", \"a very very very very very long string\"}) {\n    EXPECT_EQ(t, StringHelper::Concat(t));\n    EXPECT_EQ(t, StringHelper::Concat(t, \"\"));\n    EXPECT_EQ(t, StringHelper::Concat(t, \"\", \"\"));\n    EXPECT_EQ(t, StringHelper::Concat(t, \"\", \"\", \"\"));\n    EXPECT_EQ(t, StringHelper::Concat(t, \"\", \"\", \"\", \"\"));\n    EXPECT_EQ(t, StringHelper::Concat(t, \"\", std::string{}, \"\", \"\", \"\"));\n    EXPECT_EQ(t, StringHelper::Concat(t, \"\", std::string{}, \"\", std::string{},\n                                      \"\", \"\"));\n\n    std::string str = t;\n    StringHelper::Append(&str);\n    EXPECT_EQ(str, t);\n    StringHelper::Append(&str, \"\");\n    EXPECT_EQ(str, t);\n    StringHelper::Append(&str, \"\", \"\");\n    EXPECT_EQ(str, t);\n    StringHelper::Append(&str, \"\", \"\", \"\");\n    EXPECT_EQ(str, t);\n    StringHelper::Append(&str, \"\", \"\", \"\", \"\");\n    EXPECT_EQ(str, t);\n    StringHelper::Append(&str, \"\", std::string{}, \"\", \"\", \"\");\n    EXPECT_EQ(str, t);\n    StringHelper::Append(&str, \"\", std::string{}, \"\", std::string{}, \"\", \"\");\n    EXPECT_EQ(str, t);\n  }\n}\n\nTEST(StringHelperJoinAppend, StringView) {\n  StringView v1 = \"hello\";\n  StringView v2 = v1;\n  StringView v3 = nullptr;\n  std::string foo = \"foo\";\n  StringView v4 = foo;\n  StringView v5 = \"bar\";\n  StringView v6{v1.data() + 2, 2};\n  auto s = StringHelper::Concat(v1, v2, v3, v4, v5, v6);\n  EXPECT_EQ(s, \"hellohellofoobarll\");\n}\n\nTEST(StringHelper, SplitWithEmptySkipped) {\n  std::vector<std::string> out;\n\n  ailego::StringHelper::Split(\"\", \",\", &out, true);\n  EXPECT_EQ(0u, out.size());\n\n  ailego::StringHelper::Split(\";1;\", ';', &out, true);\n  EXPECT_EQ(1u, out.size());\n  EXPECT_EQ(\"1\", out[0]);\n\n  ailego::StringHelper::Split(\";;;\", \";\", &out, true);\n  EXPECT_EQ(0u, out.size());\n\n  ailego::StringHelper::Split(\";;;1\", ';', &out, true);\n  EXPECT_EQ(1u, out.size());\n  EXPECT_EQ(\"1\", out[0]);\n}\n\n}  // namespace testing\n}  // namespace zvec::ailego\n"
  },
  {
    "path": "tests/ailego/utility/time_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <chrono>\n#include <thread>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/time_helper.h>\n\nusing namespace zvec;\n\nTEST(TimeHelper, Monotime) {\n  std::cout << \"NanoSeconds: \" << ailego::Monotime::NanoSeconds() << std::endl;\n  std::cout << \"MicroSeconds: \" << ailego::Monotime::MicroSeconds()\n            << std::endl;\n  std::cout << \"MilliSeconds: \" << ailego::Monotime::MilliSeconds()\n            << std::endl;\n  std::cout << \"Seconds: \" << ailego::Monotime::Seconds() << std::endl;\n}\n\nTEST(TimeHelper, Realtime) {\n  std::cout << \"NanoSeconds: \" << ailego::Realtime::NanoSeconds() << std::endl;\n  std::cout << \"MicroSeconds: \" << ailego::Realtime::MicroSeconds()\n            << std::endl;\n  std::cout << \"MilliSeconds: \" << ailego::Realtime::MilliSeconds()\n            << std::endl;\n  std::cout << \"Seconds: \" << ailego::Realtime::Seconds() << std::endl;\n\n  uint64_t now = ailego::Realtime::Seconds();\n  std::cout << \"Localtime: \" << ailego::Realtime::Localtime(now) << std::endl;\n  std::cout << \"Gmtime: \" << ailego::Realtime::Gmtime(now) << std::endl;\n  std::cout << \"Localtime: \" << ailego::Realtime::Localtime() << std::endl;\n  std::cout << \"Gmtime: \" << ailego::Realtime::Gmtime() << std::endl;\n}\n\nTEST(TimeHelper, ElapsedTime) {\n  ailego::ElapsedTime stamp;\n  std::cout << \"elapsed: \" << stamp.nano_seconds() << \" ns\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.micro_seconds() << \" us\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.milli_seconds() << \" ms\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.seconds() << \" s\" << std::endl;\n  std::this_thread::sleep_for(std::chrono::milliseconds(101));\n\n  stamp.reset();\n  std::cout << \"elapsed: \" << stamp.nano_seconds() << \" ns\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.micro_seconds() << \" us\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.milli_seconds() << \" ms\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.seconds() << \" s\" << std::endl;\n  std::this_thread::sleep_for(std::chrono::milliseconds(101));\n\n  stamp.reset();\n  std::cout << \"elapsed: \" << stamp.nano_seconds() << \" ns\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.micro_seconds() << \" us\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.milli_seconds() << \" ms\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.seconds() << \" s\" << std::endl;\n  std::this_thread::sleep_for(std::chrono::milliseconds(101));\n\n  stamp.reset();\n  std::cout << \"elapsed: \" << stamp.nano_seconds() << \" ns\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.micro_seconds() << \" us\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.milli_seconds() << \" ms\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.seconds() << \" s\" << std::endl;\n  std::this_thread::sleep_for(std::chrono::milliseconds(101));\n\n  std::cout << \"elapsed: \" << stamp.nano_seconds() << \" ns\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.micro_seconds() << \" us\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.milli_seconds() << \" ms\" << std::endl;\n  std::cout << \"elapsed: \" << stamp.seconds() << \" s\" << std::endl;\n}\n"
  },
  {
    "path": "tests/ailego/utility/type_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/ailego/utility/type_helper.h>\n\nusing namespace zvec;\n\nTEST(TypeHelper, IsArithmetic) {\n  EXPECT_TRUE(ailego::IsArithmetic<uintptr_t>::value);\n  EXPECT_TRUE(ailego::IsArithmetic<int>::value);\n  EXPECT_TRUE(ailego::IsArithmetic<double>::value);\n  EXPECT_TRUE(ailego::IsArithmetic<float>::value);\n  EXPECT_TRUE(ailego::IsArithmetic<ailego::Float16>::value);\n  EXPECT_FALSE(ailego::IsArithmetic<void>::value);\n}\n\nTEST(TypeHelper, IsFloatingPoint) {\n  EXPECT_FALSE(ailego::IsFloatingPoint<long>::value);\n  EXPECT_FALSE(ailego::IsFloatingPoint<int>::value);\n  EXPECT_TRUE(ailego::IsFloatingPoint<double>::value);\n  EXPECT_TRUE(ailego::IsFloatingPoint<float>::value);\n  EXPECT_TRUE(ailego::IsFloatingPoint<ailego::Float16>::value);\n  EXPECT_FALSE(ailego::IsFloatingPoint<void>::value);\n}\n\ntemplate <typename... TArgs,\n          typename = typename std::enable_if<\n              ailego::Conjunction<std::is_integral<TArgs>...>::value>::type>\nstatic bool TrueAnd(TArgs...) {\n  return true;\n}\n\ntemplate <typename... TArgs,\n          typename = typename std::enable_if<\n              !ailego::Conjunction<std::is_integral<TArgs>...>::value>::type>\nstatic bool FalseAnd(TArgs...) {\n  return false;\n}\n\ntemplate <typename... TArgs,\n          typename = typename std::enable_if<\n              ailego::Disjunction<std::is_integral<TArgs>...>::value>::type>\nstatic bool TrueOr(TArgs...) {\n  return true;\n}\n\ntemplate <typename... TArgs,\n          typename = typename std::enable_if<\n              !ailego::Disjunction<std::is_integral<TArgs>...>::value>::type>\nstatic bool FalseOr(TArgs...) {\n  return false;\n}\n\nTEST(TypeHelper, Conjunction) {\n  EXPECT_TRUE(TrueAnd(1, 2, 2u, 0u));\n  EXPECT_FALSE(FalseAnd(1, 2, 2u, \"\"));\n  EXPECT_FALSE(FalseAnd(1, 2, 2u, 0.0));\n}\n\nTEST(TypeHelper, Disjunction) {\n  EXPECT_TRUE(TrueOr(1, 2, 2u, \"\"));\n  EXPECT_TRUE(TrueOr(0.0, \"\", 0u));\n  EXPECT_FALSE(FalseOr(\"\", \"\"));\n  EXPECT_FALSE(FalseOr(0.0, \"\"));\n}\n\nstruct TriviallyStruct {\n  float a;\n  uint32_t b;\n};\n\nTEST(TypeHelper, IsTriviallyCopyable) {\n  EXPECT_TRUE(ailego::IsTriviallyCopyable<ailego::Float16>::value);\n  EXPECT_TRUE(ailego::IsTriviallyCopyable<float>::value);\n  EXPECT_TRUE(ailego::IsTriviallyCopyable<float>::value);\n  EXPECT_TRUE(ailego::IsTriviallyCopyable<uint64_t>::value);\n  EXPECT_TRUE(ailego::IsTriviallyCopyable<uint64_t *>::value);\n  EXPECT_TRUE(ailego::IsTriviallyCopyable<void *>::value);\n  // EXPECT_FALSE(ailego::IsTriviallyCopyable<uint64_t &>::value);\n  EXPECT_TRUE(ailego::IsTriviallyCopyable<TriviallyStruct>::value);\n}\n\nTEST(TypeHelper, IsInvocable) {\n  EXPECT_TRUE(ailego::IsInvocable<int()>::value);\n\n  EXPECT_TRUE(!!(ailego::IsInvocableWithResult<int, int()>::value));\n  EXPECT_TRUE(!!(ailego::IsInvocableWithResult<void, void(int), int>::value));\n}\n\nstatic_assert(ailego::IsInvocable<int()>::value, \"\");\nstatic_assert(ailego::IsInvocableWithResult<int, int()>::value, \"\");\nstatic_assert(ailego::IsInvocableWithResult<void, void(int), int>::value, \"\");\n"
  },
  {
    "path": "tests/ailego/version_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ailego/version.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(Version, General) {\n  printf(\"Version: %s\\n\\n\", ailego::Version::String());\n  printf(\"%s\", ailego::Version::Details());\n}\n"
  },
  {
    "path": "tests/core/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_directories(algorithm)\ncc_directories(framework)\ncc_directories(metric)\ncc_directories(utility)\ncc_directories(interface)\ncc_directories(quantizer)"
  },
  {
    "path": "tests/core/algorithm/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_directories(cluster)\ncc_directories(flat)\ncc_directories(flat_sparse)\ncc_directories(ivf)\ncc_directories(hnsw)\ncc_directories(hnsw_sparse)\nif(RABITQ_SUPPORTED)\ncc_directories(hnsw_rabitq)\nendif()\n"
  },
  {
    "path": "tests/core/algorithm/cluster/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_utility core_metric core_knn_cluster\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    )\nendforeach()"
  },
  {
    "path": "tests/core/algorithm/cluster/kmeans_cluster_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cmath>\n#include <random>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/params.h>\n#include \"zvec/core/framework/index_framework.h\"\n#include \"zvec/core/framework/index_meta.h\"\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\n\nTEST(KmeansCluster, General) {\n  // Prepare index data\n  const uint32_t count = 5000u;\n  const uint32_t dimension = 33u;\n\n  IndexMeta index_meta;\n  index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);\n  index_meta.set_metric(\"SquaredEuclidean\", 0, zvec::ailego::Params());\n\n  std::shared_ptr<CompactIndexFeatures> features(\n      new CompactIndexFeatures(index_meta));\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 5.0);\n\n  for (uint32_t i = 0; i < count; ++i) {\n    std::vector<float> vec(dimension);\n    for (size_t j = 0; j < dimension; ++j) {\n      vec[j] = dist(gen);\n    }\n    features->emplace(vec.data());\n  }\n\n  // Create a Kmeans cluster\n  // IndexCluster::Pointer cluster = std::make_shared<KmeansCluster>();\n  IndexCluster::Pointer cluster = IndexFactory::CreateCluster(\"KmeansCluster\");\n  ASSERT_TRUE(!!cluster);\n\n  zvec::ailego::Params params;\n  params.set(\"proxima.general.cluster.count\", 1);\n  params.set(\"proxima.kmeans.cluster.count\", 56);\n\n  ASSERT_EQ(0, cluster->init(index_meta, params));\n  ASSERT_EQ(0, cluster->mount(features));\n  cluster->suggest(64u);\n\n  auto threads = std::make_shared<SingleQueueIndexThreads>();\n\n  std::cout << \"---------- FIRST ----------\\n\";\n  std::vector<IndexCluster::Centroid> centroids;\n  std::vector<uint32_t> labels;\n  ASSERT_NE(0, cluster->classify(threads, centroids));\n  ASSERT_NE(0, cluster->label(threads, centroids, &labels));\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- SECOND ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- THIRD ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  ASSERT_EQ(0, cluster->classify(threads, centroids));\n  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));\n}\n"
  },
  {
    "path": "tests/core/algorithm/cluster/opt_kmeans_cluster_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cmath>\n#include <random>\n#include <ailego/algorithm/kmeans.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/params.h>\n#include \"zvec/core/framework/index_framework.h\"\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace zvec::ailego;\n\nTEST(OptKmeansCluster, General) {\n  // Prepare index data\n  const uint32_t count = 5000u;\n  const uint32_t dimension = 33u;\n\n  IndexMeta index_meta;\n  index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);\n\n  std::shared_ptr<CompactIndexFeatures> features(\n      new CompactIndexFeatures(index_meta));\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 5.0);\n\n  for (uint32_t i = 0; i < count; ++i) {\n    std::vector<float> vec(dimension);\n    for (size_t j = 0; j < dimension; ++j) {\n      vec[j] = dist(gen);\n    }\n    features->emplace(vec.data());\n  }\n\n  // Create a Kmeans cluster\n  IndexCluster::Pointer cluster =\n      IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  ASSERT_TRUE(!!cluster);\n\n  Params params;\n  params.set(\"proxima.general.cluster.count\", 1);\n  params.set(\"proxima.optkmeans.cluster.count\", 56);\n\n  ASSERT_EQ(0, cluster->init(index_meta, params));\n  ASSERT_EQ(0, cluster->mount(features));\n  cluster->suggest(64u);\n\n  auto threads = std::make_shared<SingleQueueIndexThreads>();\n\n  std::cout << \"---------- FIRST ----------\\n\";\n  std::vector<IndexCluster::Centroid> centroids;\n  std::vector<uint32_t> labels;\n  ASSERT_NE(0, cluster->classify(threads, centroids));\n  ASSERT_NE(0, cluster->label(threads, centroids, &labels));\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- SECOND ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- THIRD ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  ASSERT_EQ(0, cluster->classify(threads, centroids));\n  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));\n}\n\n// TEST(OptKmeansCluster, NoEmptyCentroids) {\n//   // Prepare index data\n//   const uint32_t count = 500u;\n//   const uint32_t dimension = 8u;\n\n//   IndexMeta index_meta;\n//   index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);\n//   index_meta.set_metric(\"SquaredEuclidean\", 0, Params());\n\n//   std::shared_ptr<CompactIndexFeatures> features(\n//       new CompactIndexFeatures(index_meta));\n\n//   std::random_device rd;\n//   std::mt19937 gen(rd());\n//   std::uniform_real_distribution<float> dist(0.0, 5.0);\n\n//   for (uint32_t i = 0; i < count; ++i) {\n//     std::vector<float> vec(dimension);\n//     for (size_t j = 0; j < dimension; ++j) {\n//       vec[j] = dist(gen);\n//     }\n//     features->emplace(vec.data());\n//   }\n\n//   // Create a Kmeans cluster\n//   IndexCluster::Pointer cluster =\n//       IndexFactory::CreateCluster(\"OptKmeansCluster\");\n//   ASSERT_TRUE(!!cluster);\n\n//   Params params;\n//   ASSERT_EQ(0, cluster->init(index_meta, params));\n//   ASSERT_EQ(0, cluster->mount(features));\n//   cluster->suggest(20u);\n\n//   auto threads = std::make_shared<SingleQueueIndexThreads>();\n//   std::vector<IndexCluster::Centroid> centroids;\n//   for (uint32_t i = 0; i < 3; ++i) {\n//     std::vector<float> vec(dimension);\n//     for (size_t j = 0; j < dimension; ++j) {\n//       vec[j] = NAN;\n//     }\n//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));\n//   }\n//   ASSERT_EQ(0, cluster->cluster(threads, centroids));\n//   ASSERT_EQ(3u, centroids.size());\n\n//   for (uint32_t i = 0; i < 3; ++i) {\n//     std::vector<float> vec(dimension);\n//     for (size_t j = 0; j < dimension; ++j) {\n//       vec[j] = dist(gen);\n//     }\n//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));\n//   }\n//   ASSERT_EQ(0, cluster->cluster(threads, centroids));\n//   ASSERT_EQ(6u, centroids.size());\n\n//   for (uint32_t i = 0; i < 3; ++i) {\n//     std::vector<float> vec(dimension);\n//     for (size_t j = 0; j < dimension; ++j) {\n//       vec[j] = NAN;\n//     }\n//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));\n//   }\n//   ASSERT_EQ(0, cluster->cluster(threads, centroids));\n//   ASSERT_EQ(9u, centroids.size());\n\n//   for (uint32_t i = 0; i < 3; ++i) {\n//     std::vector<float> vec(dimension);\n//     for (size_t j = 0; j < dimension; ++j) {\n//       vec[j] = dist(gen);\n//     }\n//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));\n//   }\n//   ASSERT_EQ(0, cluster->cluster(threads, centroids));\n//   ASSERT_EQ(12u, centroids.size());\n\n//   for (const auto &it : centroids) {\n//     const auto &vec = it.vector<float>();\n\n//     std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \",\n//     \"\n//               << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() -\n//               2]\n//               << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n//   }\n\n//   params.set(\"proxima.optkmeans.cluster.purge_empty\", true);\n//   cluster->update(params);\n\n//   ASSERT_EQ(12u, centroids.size());\n//   ASSERT_EQ(0, cluster->cluster(threads, centroids));\n//   ASSERT_EQ(7u, centroids.size());\n//   for (const auto &it : centroids) {\n//     const auto &vec = it.vector<float>();\n\n//     std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \",\n//     \"\n//               << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() -\n//               2]\n//               << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n//   }\n// }\n\nTEST(OptKmeansCluster, BinaryGeneral) {\n  // Prepare index data\n  const uint32_t count = 5000u;\n  const uint32_t dimension = 1024u;\n\n  IndexMeta index_meta;\n  index_meta.set_meta(IndexMeta::DataType::DT_BINARY32, dimension);\n  index_meta.set_metric(\"SquaredEuclidean\", 0, Params());\n\n  std::shared_ptr<CompactIndexFeatures> features(\n      new CompactIndexFeatures(index_meta));\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  for (uint32_t i = 0; i < count; ++i) {\n    BinaryVector<uint32_t> vec(dimension);\n    for (size_t j = 0; j < dimension; ++j) {\n      if (dist(gen) >= 0.5) {\n        vec.set(j);\n      }\n    }\n    features->emplace(vec.data());\n  }\n\n  std::cout << \"---------- FIRST ----------\\n\";\n\n  // Create a Kmeans cluster\n  IndexCluster::Pointer cluster =\n      IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  ASSERT_TRUE(!!cluster);\n\n  Params params;\n  params.set(\"proxima.general.cluster.count\", 1);\n  params.set(\"proxima.optkmeans.cluster.count\", 56);\n\n  ASSERT_EQ(0, cluster->init(index_meta, params));\n  ASSERT_EQ(0, cluster->mount(features));\n  cluster->suggest(64u);\n\n  auto threads = std::make_shared<SingleQueueIndexThreads>();\n\n  std::cout << \"---------- FIRST ----------\\n\";\n  std::vector<IndexCluster::Centroid> centroids;\n  std::vector<uint32_t> labels;\n  ASSERT_NE(0, cluster->classify(threads, centroids));\n  ASSERT_NE(0, cluster->label(threads, centroids, &labels));\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<uint32_t>();\n\n    uint mask = 0x1;\n    std::cout << it.follows() << \" (\" << it.score() << \") { \"\n              << !!(vec[0] & mask) << \", \" << !!(vec[0] & (mask << 1)) << \", \"\n              << !!(vec[0] & (mask << 2)) << \", ... , \"\n              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 2))) << \", \"\n              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 1))) << \" }\"\n              << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- SECOND ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<uint32_t>();\n\n    uint mask = 0x1;\n    std::cout << it.follows() << \" (\" << it.score() << \") { \"\n              << !!(vec[0] & mask) << \", \" << !!(vec[0] & (mask << 1)) << \", \"\n              << !!(vec[0] & (mask << 2)) << \", ... , \"\n              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 2))) << \", \"\n              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 1))) << \" }\"\n              << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- THIRD ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<uint32_t>();\n\n    uint mask = 0x1;\n    std::cout << it.follows() << \" (\" << it.score() << \") { \"\n              << !!(vec[0] & mask) << \", \" << !!(vec[0] & (mask << 1)) << \", \"\n              << !!(vec[0] & (mask << 2)) << \", ... , \"\n              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 2))) << \", \"\n              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 1))) << \" }\"\n              << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  ASSERT_EQ(0, cluster->classify(threads, centroids));\n  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));\n}\n\n\nTEST(OptKmeansCluster, IN4General) {\n  // Prepare index data\n  const uint32_t count = 5000u;\n  const uint32_t dimension = 64u;\n  const uint32_t dimension_wrong = 66u;\n\n  IndexMeta index_meta;\n  index_meta.set_meta(IndexMeta::DataType::DT_INT4, dimension);\n  index_meta.set_metric(\"SquaredEuclidean\", 0, Params());\n\n  IndexMeta index_meta_wrong;\n  index_meta_wrong.set_meta(IndexMeta::DataType::DT_INT4, dimension_wrong);\n  index_meta_wrong.set_metric(\"SquaredEuclidean\", 0, Params());\n\n  std::shared_ptr<CompactIndexFeatures> features(\n      new CompactIndexFeatures(index_meta));\n\n  std::shared_ptr<CompactIndexFeatures> features_wrong(\n      new CompactIndexFeatures(index_meta_wrong));\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_int_distribution<uint8_t> dist(0, UINT8_MAX);\n\n  for (uint32_t i = 0; i < count; ++i) {\n    std::vector<uint8_t> vec(dimension / 2);\n    std::vector<uint8_t> vec_wrong(dimension_wrong / 2);\n    for (size_t j = 0; j < dimension / 2; ++j) {\n      vec[j] = dist(gen);\n    }\n    for (size_t j = 0; j < dimension_wrong / 2; ++j) {\n      vec_wrong[j] = dist(gen);\n    }\n    features->emplace(vec.data());\n    features_wrong->emplace(vec_wrong.data());\n  }\n\n  // Create a OptKmeans cluster\n  IndexCluster::Pointer cluster =\n      IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  ASSERT_TRUE(!!cluster);\n\n  Params params;\n  ASSERT_EQ(0, cluster->init(index_meta_wrong, params));\n  ASSERT_NE(0, cluster->mount(features_wrong));\n\n  params.set(\"proxima.general.cluster.count\", 1);\n  params.set(\"proxima.optkmeans.cluster.count\", 56);\n\n  ASSERT_EQ(0, cluster->init(index_meta, params));\n  ASSERT_EQ(0, cluster->mount(features));\n  cluster->suggest(64u);\n\n  auto threads = std::make_shared<SingleQueueIndexThreads>();\n\n  std::cout << \"---------- FIRST ----------\\n\";\n  std::vector<IndexCluster::Centroid> centroids;\n  std::vector<uint32_t> labels;\n  ASSERT_NE(0, cluster->classify(threads, centroids));\n  ASSERT_NE(0, cluster->label(threads, centroids, &labels));\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- SECOND ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- THIRD ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  ASSERT_EQ(0, cluster->classify(threads, centroids));\n  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));\n}\n\n\nTEST(OptKmeansCluster, IN4Correctness) {\n  // Prepare index data\n  const uint32_t count = 5000u;\n  const uint32_t dimension = 64u;\n\n  IndexMeta index_meta1;\n  index_meta1.set_meta(IndexMeta::DataType::DT_INT8, dimension);\n  index_meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n\n  IndexMeta index_meta2;\n  index_meta2.set_meta(IndexMeta::DataType::DT_INT4, dimension);\n  index_meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n\n  std::shared_ptr<CompactIndexFeatures> features1(\n      new CompactIndexFeatures(index_meta1));\n\n  std::shared_ptr<CompactIndexFeatures> features2(\n      new CompactIndexFeatures(index_meta2));\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_int_distribution<int> dist(-8, 7);\n\n  // Generate features\n  for (size_t i = 0; i < count; ++i) {\n    NumericalVector<int8_t> vec1(dimension);\n    NibbleVector<int32_t> vec2(dimension);\n\n    for (size_t j = 0; j < dimension; ++j) {\n      int8_t val = (int8_t)dist(gen);\n      vec1[j] = val;\n      vec2.set(j, val);\n    }\n    features1->emplace(vec1.data());\n    features2->emplace(vec2.data());\n  }\n\n  // Create a OptKmeans cluster of int8, and cluster only once\n  IndexCluster::Pointer cluster_once =\n      IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  ASSERT_TRUE(!!cluster_once);\n\n  Params params_once;\n  params_once.set(\"proxima.general.cluster.count\", 65);\n  params_once.set(\"proxima.optkmeans.cluster.count\", 63);\n  params_once.set(\"proxima.optkmeans.cluster.max_iterations\", 1);\n  // Use KMC2 to init centroids\n  params_once.set(\"proxima.optkmeans.cluster.markov_chain_length\", 20);\n\n  ASSERT_EQ(0, cluster_once->init(index_meta1, params_once));\n  ASSERT_EQ(0, cluster_once->mount(features1));\n  cluster_once->suggest(63);\n\n  auto threads = std::make_shared<SingleQueueIndexThreads>();\n\n  // Cluster once and get centroids\n  std::vector<IndexCluster::Centroid> centroids1;\n  ASSERT_EQ(0, cluster_once->cluster(threads, centroids1));\n\n  // Use centroids_one as init centroids to both int8 and int4 cluster\n  // Create a int8 cluster\n  IndexCluster::Pointer cluster_int8 =\n      IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  ASSERT_TRUE(!!cluster_int8);\n\n  Params params_int8;\n  params_int8.set(\"proxima.general.cluster.count\", 65);\n  params_int8.set(\"proxima.optkmeans.cluster.count\", 63);\n\n  ASSERT_EQ(0, cluster_int8->init(index_meta1, params_int8));\n  ASSERT_EQ(0, cluster_int8->mount(features1));\n  cluster_int8->suggest(63u);\n\n  // Create a int4 cluster\n  IndexCluster::Pointer cluster_int4 =\n      IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  ASSERT_TRUE(!!cluster_int4);\n\n  Params params_int4;\n  params_int4.set(\"proxima.general.cluster.count\", 65);\n  params_int4.set(\"proxima.optkmeans.cluster.count\", 63);\n\n  ASSERT_EQ(0, cluster_int4->init(index_meta2, params_int4));\n  ASSERT_EQ(0, cluster_int4->mount(features2));\n  cluster_int4->suggest(63u);\n\n  std::vector<IndexCluster::Centroid> centroids2;\n\n  // Use centroids of int8 to init centroids of int4\n  for (size_t i = 0; i < centroids1.size(); ++i) {\n    NibbleVector<int8_t> nvec;\n    nvec.assign(reinterpret_cast<const int8_t *>(centroids1[i].feature()),\n                dimension);\n    IndexCluster::Centroid curr_centroid;\n    curr_centroid.set_score(centroids1[i].score());\n    curr_centroid.set_follows(centroids1[i].follows());\n    curr_centroid.set_feature(nvec.data(), nvec.dimension() >> 1);\n    centroids2.push_back(curr_centroid);\n  }\n\n  ASSERT_EQ(0, cluster_int8->cluster(threads, centroids1));\n  ASSERT_EQ(0, cluster_int4->cluster(threads, centroids2));\n\n  EXPECT_EQ(centroids1.size(), centroids2.size());\n  for (size_t i = 0; i < centroids1.size(); ++i) {\n    EXPECT_EQ(centroids1[i].follows(), centroids2[i].follows());\n    EXPECT_DOUBLE_EQ(centroids1[i].score(), centroids2[i].score());\n  }\n}\n\nTEST(OptKmeansCluster, InnerProduct) {\n  // Prepare index data\n  const uint32_t count = 5000u;\n  const uint32_t dimension = 33u;\n\n  IndexMeta index_meta;\n  index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);\n  index_meta.set_metric(\"InnerProduct\", 0, Params());\n\n  std::shared_ptr<CompactIndexFeatures> features(\n      new CompactIndexFeatures(index_meta));\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (uint32_t i = 0; i < count; ++i) {\n    std::vector<float> vec(dimension);\n    for (size_t j = 0; j < dimension; ++j) {\n      vec[j] = dist(gen);\n    }\n    features->emplace(vec.data());\n  }\n\n  // Create a Kmeans cluster\n  IndexCluster::Pointer cluster =\n      IndexFactory::CreateCluster(\"OptKmeansCluster\");\n  ASSERT_TRUE(!!cluster);\n\n  Params params;\n  params.set(\"proxima.general.cluster.count\", 1);\n  params.set(\"proxima.optkmeans.cluster.count\", 56);\n\n  ASSERT_EQ(0, cluster->init(index_meta, params));\n  ASSERT_EQ(0, cluster->mount(features));\n  cluster->suggest(64u);\n\n  auto threads = std::make_shared<SingleQueueIndexThreads>();\n\n  std::cout << \"---------- FIRST ----------\\n\";\n  std::vector<IndexCluster::Centroid> centroids;\n  std::vector<uint32_t> labels;\n  ASSERT_NE(0, cluster->classify(threads, centroids));\n  ASSERT_NE(0, cluster->label(threads, centroids, &labels));\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- SECOND ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  std::cout << \"---------- THIRD ----------\\n\";\n  ASSERT_EQ(0, cluster->cluster(threads, centroids));\n\n  for (const auto &it : centroids) {\n    const auto &vec = it.vector<float>();\n\n    std::cout << it.follows() << \" (\" << it.score() << \") { \" << vec[0] << \", \"\n              << vec[1] << \", \" << vec[2] << \", ... , \" << vec[vec.size() - 2]\n              << \", \" << vec[vec.size() - 1] << \" }\" << std::endl;\n    ASSERT_EQ(0u, it.similars().size());\n  }\n\n  ASSERT_EQ(0, cluster->classify(threads, centroids));\n  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));\n}\n"
  },
  {
    "path": "tests/core/algorithm/flat/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_flat \n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    )\nendforeach()"
  },
  {
    "path": "tests/core/algorithm/flat/flat_builder_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat/flat_builder.h\"\n#include <future>\n#include <iostream>\n#include <vector>\n#include <gtest/gtest.h>\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\nstatic inline size_t RandomDimension(void) {\n  std::mt19937 gen((std::random_device())());\n  return (std::uniform_int_distribution<size_t>(1, 129))(gen);\n}\n\nstatic size_t DIMENSION = RandomDimension();\nclass FlatBuilderTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n public:\n  static std::string dir_;\n  static IndexMeta meta_;\n};\n\nstd::string FlatBuilderTest ::dir_(\"flat_builder_test\");\nIndexMeta FlatBuilderTest::meta_;\n\nvoid FlatBuilderTest::SetUp(void) {\n  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n}\n\n//! self-check column-major and row-major search.\nvoid FlatBuilderTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid build_process(IndexBuilder::Pointer &builder,\n                   IndexHolder::Pointer holder) {\n  Params params;\n  ASSERT_EQ(0, builder->init(FlatBuilderTest::meta_, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  std::string path = FlatBuilderTest::dir_ + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n}\n\nTEST_F(FlatBuilderTest, TestInitSuccess) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n}\n\nTEST_F(FlatBuilderTest, TestInitFailedWithInvalidMeasure) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n  meta_.set_metric(\"invalid\", 0, Params());\n  Params params;\n  int ret = builder->init(meta_, params);\n  EXPECT_EQ(IndexError_InvalidArgument, ret);\n}\n\nTEST_F(FlatBuilderTest, TestInt8InvalidColumnMajor) {\n  size_t dim = (DIMENSION + 3) / 4 * 4;\n  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim + 2);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  ASSERT_EQ(IndexMeta::MO_COLUMN, meta_.major_order());\n  Params params;\n  ASSERT_NE(0, builder->init(meta_, params));\n}\n\nTEST_F(FlatBuilderTest, TestInt8WithRandomDimension) {\n  size_t dim = DIMENSION;\n  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_UNDEFINED);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n}\n\nTEST_F(FlatBuilderTest, TestBinaryInvalidColumnMajor) {\n  size_t dim = (DIMENSION + 31) / 32 * 32;\n  meta_.set_metric(\"Hamming\", 0, Params());\n  meta_.set_meta(IndexMeta::DT_BINARY32, dim + 2);\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n  std::string path = dir_ + \"/TestGeneral\";\n}\n\nTEST_F(FlatBuilderTest, TestBuildWithRowMajor) {\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_ROW);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);\n  size_t doc_cnt = 2000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  int ret = builder->train(holder);\n  EXPECT_EQ(0, ret);\n\n  ret = builder->build(holder);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(FlatBuilderTest, TestInt8BuildWithRowMajor) {\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_meta(IndexMeta::DT_INT8, DIMENSION);\n  meta_.set_major_order(IndexMeta::MO_ROW);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(DIMENSION);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<int8_t> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = (int8_t)(i % 128);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  int ret = builder->train(holder);\n  EXPECT_EQ(0, ret);\n\n  ret = builder->build(holder);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(FlatBuilderTest, TestBinaryBuildWithRowMajor) {\n  size_t dim = (DIMENSION + 31) / 32 * 32;\n  meta_.set_metric(\"Hamming\", 0, Params());\n  meta_.set_meta(IndexMeta::DT_BINARY32, dim);\n  meta_.set_major_order(IndexMeta::MO_ROW);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < dim && j < i; ++j) {\n      vec.set(j);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  int ret = builder->train(holder);\n  EXPECT_EQ(0, ret);\n\n  ret = builder->build(holder);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(FlatBuilderTest, TestBuildWithColumnMajor) {\n  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);\n  size_t doc_cnt = 2000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  int ret = builder->train(holder);\n  EXPECT_EQ(0, ret);\n\n  ret = builder->build(holder);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(FlatBuilderTest, TestInt8BuildWithColumnMajor) {\n  size_t dim = (DIMENSION + 3) / 4 * 4;\n  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder = std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(dim);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = (int8_t)(i % 128);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  int ret = builder->train(holder);\n  EXPECT_EQ(0, ret);\n\n  ret = builder->build(holder);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(FlatBuilderTest, TestBinaryBuildWithColumnMajor) {\n  size_t dim = (DIMENSION + 31) / 32 * 32;\n  meta_.set_metric(\"Hamming\", 0, Params());\n  meta_.set_meta(IndexMeta::DT_BINARY32, dim);\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  ASSERT_EQ(0, builder->init(meta_, params));\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < dim && j < i; ++j) {\n      vec.set(j);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  int ret = builder->train(holder);\n  EXPECT_EQ(0, ret);\n\n  ret = builder->build(holder);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(FlatBuilderTest, TestWithRowMajor) {\n  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_ROW);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);\n  size_t doc_cnt = 2000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  build_process(builder, holder);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n}\n\nTEST_F(FlatBuilderTest, TestInt8WithRowMajor) {\n  meta_.set_meta(IndexMeta::DataType::DT_INT8, DIMENSION);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_ROW);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(DIMENSION);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<int8_t> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = (int8_t)(i % 128);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  build_process(builder, holder);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n}\n\nTEST_F(FlatBuilderTest, TestBinaryWithRowMajor) {\n  size_t dim = (DIMENSION + 31) / 32 * 32;\n  meta_.set_metric(\"Hamming\", 0, Params());\n  meta_.set_meta(IndexMeta::DT_BINARY32, dim);\n  meta_.set_major_order(IndexMeta::MO_ROW);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < dim && j < i; ++j) {\n      vec.set(j);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  build_process(builder, holder);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n}\n\nTEST_F(FlatBuilderTest, TestWithColumnMajor) {\n  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);\n  size_t doc_cnt = 2000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  build_process(builder, holder);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n}\n\nTEST_F(FlatBuilderTest, TestInt8WithColumnMajor) {\n  size_t dim = (DIMENSION + 3) / 4 * 4;\n  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder = std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(dim);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = (int8_t)(i % 128);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  build_process(builder, holder);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n}\n\nTEST_F(FlatBuilderTest, TestBinaryWithColumnMajor) {\n  size_t dim = (DIMENSION + 31) / 32 * 32;\n  meta_.set_metric(\"Hamming\", 0, Params());\n  meta_.set_meta(IndexMeta::DT_BINARY32, dim);\n  meta_.set_major_order(IndexMeta::MO_COLUMN);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  Params params;\n  std::string path = dir_ + \"/TestGeneral\";\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);\n  size_t doc_cnt = 128UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < dim && j < i; ++j) {\n      vec.set(j);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  build_process(builder, holder);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/flat/flat_searcher_test.cpp",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat/flat_searcher.h\"\n#include <future>\n#include <iostream>\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"flat/flat_builder.h\"\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\nstatic const std::string INDEX_PATH = \"brute_force_searcher_test/out.indexes\";\n\nstatic void BuildIndex(const IndexMeta &meta, IndexHolder::Pointer holder,\n                       const std::string &path) {\n  auto builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n\n  ASSERT_NE(nullptr, builder);\n  ASSERT_NE(nullptr, dumper);\n\n  Params params;\n  ASSERT_EQ(0, builder->init(meta, params));\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, IndexBuilder::TrainBuildAndDump(builder, holder, dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n}\n\nstatic void BuildIndex(const IndexMeta &meta, const Params &params,\n                       IndexHolder::Pointer holder, const std::string &path) {\n  auto builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n\n  ASSERT_NE(nullptr, builder);\n  ASSERT_NE(nullptr, dumper);\n\n  ASSERT_EQ(0, builder->init(meta, params));\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, IndexBuilder::TrainBuildAndDump(builder, holder, dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n}\n\nstatic void LoadIndex(const std::string &path,\n                      IndexSearcher::Pointer &searcher) {\n  searcher = IndexFactory::CreateSearcher(\"FlatSearcher\");\n  auto storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n\n  ASSERT_NE(nullptr, searcher);\n  ASSERT_NE(nullptr, storage);\n\n  Params params;\n  ASSERT_EQ(0, searcher->init(params));\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n}\n\nstatic void Shuffle(std::vector<uint32_t> &keys) {\n  if (keys.size() <= 1) {\n    return;\n  }\n  for (size_t i = keys.size() - 1; i > 0; i--) {\n    std::mt19937 gen((std::random_device())());\n    std::uniform_int_distribution<size_t> dist(0, i);\n    size_t pos = dist(gen);\n    std::swap(keys[i], keys[pos]);\n  }\n}\n\nTEST(FlatSearcher, NoBatch_FP32) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);\n  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<size_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen) + static_cast<float>(i * 5);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher2->create_context();\n  auto context5 = searcher1->create_context();\n  auto context6 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context4->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });\n  context4->set_filter([](uint64_t) { return false; });\n  context5->set_filter([](uint64_t) { return true; });\n  context6->set_filter([](uint64_t) { return true; });\n\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<float> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen);\n    }\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),\n               context1));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),\n               context2));\n    ASSERT_EQ(topk, context1->result().size());\n    ASSERT_EQ(topk, context2->result().size());\n\n    // Test shared context\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),\n               context4));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),\n               context3));\n    ASSERT_EQ(topk, context3->result().size());\n    ASSERT_EQ(topk, context4->result().size());\n\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),\n               context5));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),\n               context6));\n    ASSERT_EQ(0u, context5->result().size());\n    ASSERT_EQ(0u, context6->result().size());\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result();\n      auto &result2 = context2->result();\n      auto &result3 = context3->result();\n      auto &result4 = context4->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      MathHelper math_help = MathHelper();\n      bool score_cmp_result =\n          math_help.IsAlmostEqual(result1[j].score(), result2[j].score(), 10);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n\n      ASSERT_EQ(result1[j].index(), result3[j].index());\n      ASSERT_EQ(result1[j].key(), result3[j].key());\n      score_cmp_result =\n          math_help.IsAlmostEqual(result1[j].score(), result3[j].score(), 10);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n\n      ASSERT_EQ(result2[j].index(), result4[j].index());\n      ASSERT_EQ(result2[j].key(), result4[j].key());\n      score_cmp_result =\n          math_help.IsAlmostEqual(result2[j].score(), result4[j].score(), 10);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n    }\n  }\n}\n\nTEST(FlatSearcher, NoBatch_FP16) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_real_distribution<float>(-1.0f, 1.0f);\n  size_t dim = (std::uniform_int_distribution<size_t>(1, 64))(gen);\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP16>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<size_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<Float16> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen) + static_cast<float>(i * 5);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_FP16, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_FP16, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher2->create_context();\n  auto context5 = searcher1->create_context();\n  auto context6 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context4->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });\n  context4->set_filter([](uint64_t) { return false; });\n  context5->set_filter([](uint64_t) { return true; });\n  context6->set_filter([](uint64_t) { return true; });\n\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<Float16> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen);\n    }\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),\n               context1));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),\n               context2));\n    ASSERT_EQ(topk, context1->result().size());\n    ASSERT_EQ(topk, context2->result().size());\n\n    // Test shared context\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),\n               context4));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),\n               context3));\n    ASSERT_EQ(topk, context3->result().size());\n    ASSERT_EQ(topk, context4->result().size());\n\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),\n               context5));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),\n               context6));\n    ASSERT_EQ(0u, context5->result().size());\n    ASSERT_EQ(0u, context6->result().size());\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result();\n      auto &result2 = context2->result();\n      auto &result3 = context3->result();\n      auto &result4 = context4->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      MathHelper math_help = MathHelper();\n      bool score_cmp_result = math_help.IsAlmostEqual(\n          result1[j].score(), result2[j].score(), 10000);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n\n      ASSERT_EQ(result1[j].index(), result3[j].index());\n      ASSERT_EQ(result1[j].key(), result3[j].key());\n      score_cmp_result = math_help.IsAlmostEqual(result1[j].score(),\n                                                 result3[j].score(), 10000);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n\n      ASSERT_EQ(result2[j].index(), result4[j].index());\n      ASSERT_EQ(result2[j].key(), result4[j].key());\n      score_cmp_result = math_help.IsAlmostEqual(result2[j].score(),\n                                                 result4[j].score(), 10000);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n    }\n  }\n}\n\nTEST(FlatSearcher, NoBatch_INT8) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<int>(-127, 127);\n  size_t dim =\n      ((std::uniform_int_distribution<size_t>(1, 512))(gen) + 3) / 4 * 4;\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_INT8>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher2->create_context();\n  auto context5 = searcher1->create_context();\n  auto context6 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context4->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });\n  context4->set_filter([](uint64_t) { return false; });\n  context5->set_filter([](uint64_t) { return true; });\n  context6->set_filter([](uint64_t) { return true; });\n\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),\n               context1));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),\n               context2));\n    ASSERT_EQ(topk, context1->result().size());\n    ASSERT_EQ(topk, context2->result().size());\n\n    // Test shared context\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),\n               context4));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),\n               context3));\n    ASSERT_EQ(topk, context3->result().size());\n    ASSERT_EQ(topk, context4->result().size());\n\n    ASSERT_EQ(\n        0, searcher1->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),\n               context5));\n    ASSERT_EQ(\n        0, searcher2->search_impl(\n               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),\n               context6));\n    ASSERT_EQ(0u, context5->result().size());\n    ASSERT_EQ(0u, context6->result().size());\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result();\n      auto &result2 = context2->result();\n      auto &result3 = context3->result();\n      auto &result4 = context4->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());\n\n      ASSERT_EQ(result1[j].index(), result3[j].index());\n      ASSERT_EQ(result1[j].key(), result3[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());\n\n      ASSERT_EQ(result2[j].index(), result4[j].index());\n      ASSERT_EQ(result2[j].key(), result4[j].key());\n      ASSERT_FLOAT_EQ(result2[j].score(), result4[j].score());\n    }\n  }\n}\n\nTEST(FlatSearcher, NoBatch_Binary32) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<uint32_t>(1, 512);\n  size_t dim = (dist(gen) + 31) / 32 * 32;\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DT_BINARY32>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < vec.dimension(); ++j) {\n      if (dist(gen) % 3 == 0) {\n        vec.set(j);\n      }\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher2->create_context();\n  auto context5 = searcher1->create_context();\n  auto context6 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context4->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });\n  context4->set_filter([](uint64_t) { return false; });\n  context5->set_filter([](uint64_t) { return true; });\n  context6->set_filter([](uint64_t) { return true; });\n\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n  for (uint32_t i = 0; i < query_count; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < vec.dimension(); ++j) {\n      if (dist(gen) % 7 == 0) {\n        vec.set(j);\n      }\n    }\n    ASSERT_EQ(0, searcher1->search_impl(\n                     vec.data(),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),\n                     context1));\n    ASSERT_EQ(0, searcher2->search_impl(\n                     vec.data(),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),\n                     context2));\n    ASSERT_EQ(topk, context1->result().size());\n    ASSERT_EQ(topk, context2->result().size());\n\n    // Test shared context\n    ASSERT_EQ(0, searcher1->search_impl(\n                     vec.data(),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),\n                     context4));\n    ASSERT_EQ(0, searcher2->search_impl(\n                     vec.data(),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),\n                     context3));\n    ASSERT_EQ(topk, context3->result().size());\n    ASSERT_EQ(topk, context4->result().size());\n\n    ASSERT_EQ(0, searcher1->search_impl(\n                     vec.data(),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),\n                     context5));\n    ASSERT_EQ(0, searcher2->search_impl(\n                     vec.data(),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),\n                     context6));\n    ASSERT_EQ(0u, context5->result().size());\n    ASSERT_EQ(0u, context6->result().size());\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result();\n      auto &result2 = context2->result();\n      auto &result3 = context3->result();\n      auto &result4 = context4->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());\n\n      ASSERT_EQ(result1[j].index(), result3[j].index());\n      ASSERT_EQ(result1[j].key(), result3[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());\n\n      ASSERT_EQ(result2[j].index(), result4[j].index());\n      ASSERT_EQ(result2[j].key(), result4[j].key());\n      ASSERT_FLOAT_EQ(result2[j].score(), result4[j].score());\n    }\n  }\n}\n\nTEST(FlatSearcher, RowBatch_FP32) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);\n  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<size_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen) + static_cast<float>(i * 5);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context1->set_filter([](uint64_t) { return false; });\n  context2->set_topk(topk);\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<float> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen);\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),\n                                      query_count, context1));\n\n  NumericalVector<float> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    ASSERT_EQ(0, searcher2->search_impl(\n                     (const float *)(&query_buffer[i * vec.bytes()]),\n                     IndexQueryMeta(IndexMeta::DT_FP32, dim), context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      MathHelper math_help = MathHelper();\n      bool score_cmp_result =\n          math_help.IsAlmostEqual(result1[j].score(), result2[j].score(), 10);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n    }\n  }\n}\n\nTEST(FlatSearcher, RowBatch_FP16) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_real_distribution<float>(-1.0f, 1.0f);\n  size_t dim = (std::uniform_int_distribution<size_t>(1, 256))(gen);\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP16>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<size_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<Float16> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen) + static_cast<float>(i * 5);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_FP16, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_FP16, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context2->set_filter([](uint64_t) { return false; });\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<Float16> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen);\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),\n                                      query_count, context1));\n\n  NumericalVector<Float16> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),\n                                        IndexQueryMeta(IndexMeta::DT_FP16, dim),\n                                        context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      MathHelper math_help = MathHelper();\n      bool score_cmp_result = math_help.IsAlmostEqual(\n          result1[j].score(), result2[j].score(), 10000);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n    }\n  }\n}\n\nTEST(FlatSearcher, RowBatch_INT8) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<int>(-127, 127);\n  size_t dim =\n      ((std::uniform_int_distribution<size_t>(1, 512))(gen) + 3) / 4 * 4;\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_INT8>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context1->set_filter([](uint64_t) { return false; });\n  context2->set_topk(topk);\n  context2->set_filter([](uint64_t) { return false; });\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),\n                                      query_count, context1));\n\n  NumericalVector<int8_t> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),\n                                        IndexQueryMeta(IndexMeta::DT_INT8, dim),\n                                        context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());\n    }\n  }\n}\n\nTEST(FlatSearcher, RowBatch_Binary32) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<uint32_t>(1, 512);\n  size_t dim = (dist(gen) + 31) / 32 * 32;\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DT_BINARY32>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < vec.dimension(); ++j) {\n      if (dist(gen) % 3 == 0) {\n        vec.set(j);\n      }\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (uint32_t j = 0; j < vec.dimension(); ++j) {\n      if (dist(gen) % 7 == 0) {\n        vec.set(j);\n      }\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n  ASSERT_EQ(0,\n            searcher1->search_impl(query_buffer.data(),\n                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),\n                                   query_count, context1));\n\n  BinaryVector<uint32_t> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    ASSERT_EQ(0, searcher2->search_impl(\n                     (&query_buffer[i * vec.bytes()]),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, dim), context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      ASSERT_EQ(result1[j].index(), result2[j].index());\n      ASSERT_EQ(result1[j].key(), result2[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());\n    }\n  }\n}\n\nTEST(FlatSearcher, ColumnBatch_Binary32) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<uint32_t>(1, 512);\n  size_t dim = (dist(gen) + 31) / 32 * 32;\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DT_BINARY32>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (size_t j = 0; j < vec.dimension(); ++j) {\n      if (dist(gen) % 3 == 0) {\n        vec.set(j);\n      }\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n\n  Params params1;\n  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER, true);\n  BuildIndex(meta1, params1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher1->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });\n  context4->set_topk(topk);\n  context4->set_filter([](uint64_t) { return true; });\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    BinaryVector<uint32_t> vec(dim);\n    for (uint32_t j = 0; j < vec.dimension(); ++j) {\n      if (dist(gen) % 7 == 0) {\n        vec.set(j);\n      }\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n  ASSERT_EQ(0,\n            searcher1->search_impl(query_buffer.data(),\n                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),\n                                   query_count, context1));\n  ASSERT_EQ(0,\n            searcher1->search_impl(query_buffer.data(),\n                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),\n                                   query_count, context3));\n  ASSERT_EQ(0,\n            searcher1->search_impl(query_buffer.data(),\n                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),\n                                   query_count, context4));\n\n  BinaryVector<uint32_t> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    ASSERT_EQ(0, searcher1->search_impl(\n                     (&query_buffer[i * vec.bytes()]),\n                     IndexQueryMeta(IndexMeta::DT_BINARY32, dim), context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      auto &result3 = context3->result(i);\n      auto &result4 = context4->result(i);\n      EXPECT_TRUE(result4.empty());\n\n      EXPECT_EQ(result1[j].index(), result2[j].index());\n      EXPECT_EQ(result1[j].key(), result2[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());\n\n      EXPECT_EQ(result1[j].index(), result3[j].index());\n      EXPECT_EQ(result1[j].key(), result3[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());\n    }\n  }\n}\n\nTEST(FlatSearcher, ColumnBatch_FP32) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);\n  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<size_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen) + static_cast<float>(i * 5);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);  // will invalide when set\n                                             // column_major_order in params\n\n  Params params1;\n  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER,\n              true);  // make it MO_COLUMN\n  BuildIndex(meta1, params1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher1->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });  // same as no filter\n  context4->set_topk(topk);\n  context4->set_filter([](uint64_t) { return true; });  // filter all result\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<float> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen);\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),\n                                      query_count, context1));\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),\n                                      query_count, context3));\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),\n                                      query_count, context4));\n\n  NumericalVector<float> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    // not batch\n    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),\n                                        IndexQueryMeta(IndexMeta::DT_FP32, dim),\n                                        context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      auto &result3 = context3->result(i);\n      auto &result4 = context4->result(i);\n      EXPECT_TRUE(result4.empty());\n\n      // batch result is equal to not batch result\n      EXPECT_EQ(result1[j].index(), result2[j].index());\n      EXPECT_EQ(result1[j].key(), result2[j].key());\n      MathHelper math_help = MathHelper();\n      bool score_cmp_result =\n          math_help.IsAlmostEqual(result1[j].score(), result2[j].score(), 10);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n\n      // test filter\n      EXPECT_EQ(result1[j].index(), result3[j].index());\n      EXPECT_EQ(result1[j].key(), result3[j].key());\n      score_cmp_result =\n          math_help.IsAlmostEqual(result1[j].score(), result3[j].score(), 10);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n    }\n  }\n}\n\nTEST(FlatSearcher, ColumnBatch_FP16) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_real_distribution<float>(-1.0f, 1.0f);\n  size_t dim = (std::uniform_int_distribution<size_t>(1, 256))(gen);\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP16>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<size_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<Float16> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen) + static_cast<float>(i * 5);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_FP16, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);  // will invalide when set\n                                             // column_major_order in params\n\n  Params params1;\n  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER,\n              true);  // make it MO_COLUMN\n  BuildIndex(meta1, params1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_FP16, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher1->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });  // same as no filter\n  context4->set_topk(topk);\n  context4->set_filter([](uint64_t) { return true; });  // filter all result\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<Float16> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen);\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),\n                                      query_count, context1));\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),\n                                      query_count, context3));\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),\n                                      query_count, context4));\n\n  NumericalVector<Float16> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    // not batch\n    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),\n                                        IndexQueryMeta(IndexMeta::DT_FP16, dim),\n                                        query_count, context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      auto &result3 = context3->result(i);\n      auto &result4 = context4->result(i);\n      EXPECT_TRUE(result4.empty());\n\n      // batch result is equal to not batch result\n      EXPECT_EQ(result1[j].index(), result2[j].index());\n      EXPECT_EQ(result1[j].key(), result2[j].key());\n      MathHelper math_help = MathHelper();\n      bool score_cmp_result = math_help.IsAlmostEqual(\n          result1[j].score(), result2[j].score(), 10000);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n\n      // test filter\n      EXPECT_EQ(result1[j].index(), result3[j].index());\n      EXPECT_EQ(result1[j].key(), result3[j].key());\n      score_cmp_result = math_help.IsAlmostEqual(result1[j].score(),\n                                                 result3[j].score(), 10000);\n      ASSERT_FLOAT_EQ(true, score_cmp_result);\n    }\n  }\n}\n\nTEST(FlatSearcher, ColumnBatch_INT8) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_int_distribution<int>(-127, 127);\n  size_t dim =\n      ((std::uniform_int_distribution<size_t>(1, 512))(gen) + 3) / 4 * 4;\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_INT8>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);  // will invalide when set\n                                             // column_major_order in params\n\n  Params params1;\n  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER,\n              true);  // make it MO_COLUMN\n  BuildIndex(meta1, params1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_INT8, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto context1 = searcher1->create_context();\n  auto context2 = searcher2->create_context();\n  auto context3 = searcher1->create_context();\n  auto context4 = searcher1->create_context();\n  uint32_t topk = std::min(10u, document_count);\n  context1->set_topk(topk);\n  context2->set_topk(topk);\n  context3->set_topk(topk);\n  context3->set_filter([](uint64_t) { return false; });  // same as no filter\n  context4->set_topk(topk);\n  context4->set_filter([](uint64_t) { return true; });  // filter all result\n\n  std::string query_buffer;\n  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);\n\n  for (uint32_t i = 0; i < query_count; i++) {\n    NumericalVector<int8_t> vec(dim);\n    for (uint32_t j = 0; j < vec.size(); ++j) {\n      vec[j] = (int8_t)dist(gen);\n    }\n    query_buffer.append((const char *)vec.data(), vec.bytes());\n  }\n\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),\n                                      query_count, context1));\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),\n                                      query_count, context3));\n  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),\n                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),\n                                      query_count, context4));\n\n  NumericalVector<int8_t> vec(dim);\n  for (uint32_t i = 0; i < query_count; i++) {\n    // not batch\n    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),\n                                        IndexQueryMeta(IndexMeta::DT_INT8, dim),\n                                        context2));\n\n    for (uint32_t j = 0; j < topk; ++j) {\n      auto &result1 = context1->result(i);\n      auto &result2 = context2->result();\n      auto &result3 = context3->result(i);\n      auto &result4 = context4->result(i);\n      EXPECT_TRUE(result4.empty());\n\n      // batch result is equal to not batch result\n      EXPECT_EQ(result1[j].index(), result2[j].index());\n      EXPECT_EQ(result1[j].key(), result2[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());\n\n      // test filter\n      EXPECT_EQ(result1[j].index(), result3[j].index());\n      EXPECT_EQ(result1[j].key(), result3[j].key());\n      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());\n    }\n  }\n}\n\nTEST(FlatProvider, Provider_FP32) {\n  std::mt19937 gen((std::random_device())());\n  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);\n  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);\n  uint32_t document_count =\n      (std::uniform_int_distribution<size_t>(1, 10000))(gen);\n\n  std::vector<uint32_t> keys(document_count);\n  for (uint32_t i = 0; i < document_count; i++) {\n    keys[i] = i;\n  }\n  Shuffle(keys);\n\n  for (uint32_t i = 0; i < document_count; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < vec.size(); ++j) {\n      vec[j] = dist(gen) + static_cast<float>(i * 5);\n    }\n    ASSERT_TRUE(holder->emplace(keys[i], vec));\n  }\n\n  IndexMeta meta1;\n  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta1.set_major_order(IndexMeta::MO_ROW);\n  BuildIndex(meta1, holder, INDEX_PATH + \".1\");\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  meta2.set_major_order(IndexMeta::MO_COLUMN);\n  BuildIndex(meta2, holder, INDEX_PATH + \".2\");\n\n  IndexSearcher::Pointer searcher1, searcher2;\n  LoadIndex(INDEX_PATH + \".1\", searcher1);\n  LoadIndex(INDEX_PATH + \".2\", searcher2);\n\n  auto provider1 = searcher1->create_provider();\n  auto provider2 = searcher2->create_provider();\n\n  ASSERT_TRUE(!!provider1);\n  ASSERT_TRUE(!!provider2);\n\n  ASSERT_EQ(document_count, provider1->count());\n  ASSERT_EQ(document_count, provider2->count());\n\n  ASSERT_EQ(\"FlatSearcher\", provider1->owner_class());\n  ASSERT_EQ(\"FlatSearcher\", provider2->owner_class());\n\n  auto it1 = provider1->create_iterator();\n  auto it2 = provider2->create_iterator();\n  auto holder_it = holder->create_iterator();\n\n  uint32_t readed_count = 0;\n  while (it1->is_valid() && it2->is_valid()) {\n    ASSERT_EQ(it1->key(), it2->key());\n    const float *data1 = (const float *)it1->data();\n    const float *data2 = (const float *)it2->data();\n    const float *holder_data = (const float *)holder_it->data();\n    for (size_t idx = 0; idx < dim; idx++) {\n      ASSERT_EQ(*data1, *data2) << \"Fail when dim is: \" << dim\n                                << \" document_count is: \" << document_count;\n      ASSERT_EQ(*data1, *holder_data);\n      data1++;\n      data2++;\n      holder_data++;\n    }\n    readed_count++;\n    const float *features1 = (const float *)provider1->get_vector(it1->key());\n    const float *features2 = (const float *)provider2->get_vector(it2->key());\n    for (size_t idx = 0; idx < dim; idx++) {\n      ASSERT_FLOAT_EQ(*features1, *features2);\n      features1++;\n      features2++;\n    }\n    it1->next();\n    it2->next();\n    holder_it->next();\n  }\n\n  ASSERT_FALSE(holder_it->is_valid());\n\n  ASSERT_EQ(readed_count, provider1->count());\n  ASSERT_EQ(readed_count, provider2->count());\n  ASSERT_EQ(readed_count, holder->count());\n}\n\nTEST(FlatSearcher, TestGroup) {\n  const int dim = 32;\n  static std::shared_ptr<IndexMeta> index_meta_ptr_;\n  index_meta_ptr_.reset(new (std::nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, Params());\n\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"FlatBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  Params params;\n\n  ASSERT_EQ(0, builder->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  std::string path = INDEX_PATH + \"/TestGroup\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  Params searcherParams;\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 5;\n\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = doc_cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = float(query_value) / 10 + 0.1f;\n  }\n\n  auto t1 = Realtime::MicroSeconds();\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, 1, ctx));\n  auto t2 = Realtime::MicroSeconds();\n\n  total_time += t2 - t1;\n  std::cout << \"Total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    const std::string &group_id = group_result[i].group_id();\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    std::cout << \"Group ID: \" << group_id << std::endl;\n\n    for (uint32_t j = 0; j < result.size(); ++j) {\n      std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n                << std::setprecision(3) << \", Score: \" << result[j].score()\n                << std::endl;\n    }\n  }\n\n  // do linear search by p_keys test\n  auto groupbyFuncLinear = [](uint64_t key) {\n    uint32_t group_id = key % 10;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  auto linear_pk_ctx = searcher->create_context();\n\n  linear_pk_ctx->set_group_params(group_num, group_topk);\n  linear_pk_ctx->set_group_by(groupbyFuncLinear);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};\n\n  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                  linear_pk_ctx));\n  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();\n  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);\n\n  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {\n    const std::string &group_id = linear_by_pkeys_group_result[i].group_id();\n    auto &result = linear_by_pkeys_group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    std::cout << \"Group ID: \" << group_id << std::endl;\n\n    for (uint32_t j = 0; j < result.size(); ++j) {\n      std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n                << std::setprecision(3) << \", Score: \" << result[j].score()\n                << std::endl;\n    }\n\n    ASSERT_EQ(10 - i, result[0].key());\n  }\n}\n"
  },
  {
    "path": "tests/core/algorithm/flat/flat_streamer_buffer_test.cc",
    "content": "#include <future>\n#include <string>\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <ailego/utility/memory_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_streamer.h>\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nconstexpr size_t static dim = 16;\n\nclass FlatStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void hybrid_scale(std::vector<float> &dense_value,\n                    std::vector<float> &sparse_value, float alpha_scale);\n\n  static std::string dir_;\n  static std::shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string FlatStreamerTest::dir_(\"streamer_test/\");\nstd::shared_ptr<IndexMeta> FlatStreamerTest::index_meta_ptr_;\n\nvoid FlatStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (std::nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid FlatStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(FlatStreamerTest, TestLinearSearch) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/LinearSearch\", true));\n  ASSERT_EQ(0, write_streamer->open(storage));\n\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 10000UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"BufferStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/LinearSearch\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  size_t topk = 3;\n  auto provider = read_streamer->create_provider();\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  ctx->set_topk(100U);\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 10.1f;\n  }\n  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  ASSERT_EQ(10, result[0].key());\n  ASSERT_EQ(11, result[1].key());\n  ASSERT_EQ(5, result[10].key());\n  ASSERT_EQ(0, result[20].key());\n  ASSERT_EQ(30, result[30].key());\n  ASSERT_EQ(35, result[35].key());\n  ASSERT_EQ(99, result[99].key());\n\n  ElapsedTime elapsed_time;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n  cout << \"Elapsed time: \" << elapsed_time.milli_seconds() << \" ms\" << endl;\n\n  read_streamer->close();\n  read_streamer.reset();\n}\n\nTEST_F(FlatStreamerTest, TestLinearSearchWithLRU) {\n  constexpr size_t static dim = 1600;\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  Params params;\n  IndexMeta meta = IndexMeta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, Params());\n  ASSERT_EQ(0, write_streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/LinearSearchWithLRU\", true));\n  ASSERT_EQ(0, write_streamer->open(storage));\n\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 1000000UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_EQ(0, read_streamer->init(meta, params));\n  auto read_storage = IndexFactory::CreateStorage(\"BufferStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/LinearSearchWithLRU\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  size_t topk = 3;\n  auto provider = read_streamer->create_provider();\n  ElapsedTime elapsed_time;\n  for (size_t i = 0; i < 10; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n  cout << \"Elapsed time: \" << elapsed_time.milli_seconds() << \" ms\" << endl;\n\n  read_streamer->close();\n  read_streamer.reset();\n}\n\nTEST_F(FlatStreamerTest, TestLinearSearchMMap) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/LinearSearchMMap\", true));\n  ASSERT_EQ(0, write_streamer->open(storage));\n\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 10000UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/LinearSearchMMap\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  size_t topk = 3;\n  auto provider = read_streamer->create_provider();\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  ctx->set_topk(100U);\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 10.1f;\n  }\n  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  ASSERT_EQ(10, result[0].key());\n  ASSERT_EQ(11, result[1].key());\n  ASSERT_EQ(5, result[10].key());\n  ASSERT_EQ(0, result[20].key());\n  ASSERT_EQ(30, result[30].key());\n  ASSERT_EQ(35, result[35].key());\n  ASSERT_EQ(99, result[99].key());\n\n  ElapsedTime elapsed_time;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      const float *data = (float *)provider->get_vector(result1[0].key());\n      EXPECT_FLOAT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  read_streamer->close();\n  read_streamer.reset();\n  cout << \"Elapsed time: \" << elapsed_time.milli_seconds() << \" ms\" << endl;\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc",
    "content": "#include <future>\n#include <string>\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <ailego/utility/memory_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_streamer.h>\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nconstexpr size_t static dim = 128;\n\nclass FlatStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void hybrid_scale(std::vector<float> &dense_value,\n                    std::vector<float> &sparse_value, float alpha_scale);\n\n  static std::string dir_;\n  static std::shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string FlatStreamerTest::dir_(\"streamer_test/\");\nstd::shared_ptr<IndexMeta> FlatStreamerTest::index_meta_ptr_;\n\nvoid FlatStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (std::nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid FlatStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(FlatStreamerTest, TestLinearSearchMMap) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/LinearSearchMMap\", true));\n  ASSERT_EQ(0, write_streamer->open(storage));\n\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t data_cnt = 300000UL, cnt = 500UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < data_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/LinearSearchMMap\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  size_t topk = 30;\n  ElapsedTime elapsed_time;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result1 = ctx->result();\n    // ASSERT_EQ(topk, result1.size());\n    // ASSERT_EQ(i, result1[0].key());\n\n    // for (size_t j = 0; j < dim; ++j) {\n    //   vec[j] = i + 0.1f;\n    // }\n    // ctx->set_topk(topk);\n    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result2 = ctx->result();\n    // ASSERT_EQ(topk, result2.size());\n    // ASSERT_EQ(i, result2[0].key());\n    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n  cout << \"Elapsed time: \" << elapsed_time.micro_seconds() << \" us\" << endl;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result1 = ctx->result();\n    // ASSERT_EQ(topk, result1.size());\n    // ASSERT_EQ(i, result1[0].key());\n\n    // for (size_t j = 0; j < dim; ++j) {\n    //   vec[j] = i + 0.1f;\n    // }\n    // ctx->set_topk(topk);\n    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result2 = ctx->result();\n    // ASSERT_EQ(topk, result2.size());\n    // ASSERT_EQ(i, result2[0].key());\n    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n  cout << \"Elapsed time: \" << elapsed_time.micro_seconds() << \" us\" << endl;\n  read_streamer->close();\n  read_streamer.reset();\n}\n\nTEST_F(FlatStreamerTest, TestLinearSearchBuffer) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/LinearSearchBuffer\", true));\n  ASSERT_EQ(0, write_streamer->open(storage));\n\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t data_cnt = 300000UL, cnt = 500UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < data_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"BufferStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/LinearSearchBuffer\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  size_t topk = 30;\n  ElapsedTime elapsed_time;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result1 = ctx->result();\n    // ASSERT_EQ(topk, result1.size());\n    // ASSERT_EQ(i, result1[0].key());\n\n    // for (size_t j = 0; j < dim; ++j) {\n    //   vec[j] = i + 0.1f;\n    // }\n    // ctx->set_topk(topk);\n    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result2 = ctx->result();\n    // ASSERT_EQ(topk, result2.size());\n    // ASSERT_EQ(i, result2[0].key());\n    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n  cout << \"Elapsed time: \" << elapsed_time.micro_seconds() << \" us\" << endl;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result1 = ctx->result();\n    // ASSERT_EQ(topk, result1.size());\n    // ASSERT_EQ(i, result1[0].key());\n\n    // for (size_t j = 0; j < dim; ++j) {\n    //   vec[j] = i + 0.1f;\n    // }\n    // ctx->set_topk(topk);\n    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    // auto &result2 = ctx->result();\n    // ASSERT_EQ(topk, result2.size());\n    // ASSERT_EQ(i, result2[0].key());\n    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n  cout << \"Elapsed time: \" << elapsed_time.micro_seconds() << \" us\" << endl;\n  read_streamer->close();\n  read_streamer.reset();\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/flat/flat_streamer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cstddef>\n#include <future>\n#include <string>\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <ailego/utility/memory_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/ailego/encoding/json/mod_json.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_streamer.h>\n#include \"algorithm/flat/flat_utility.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\nconstexpr size_t static dim = 16;\n\nclass FlatStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void hybrid_scale(std::vector<float> &dense_value,\n                    std::vector<float> &sparse_value, float alpha_scale);\n\n  static std::string dir_;\n  static std::shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string FlatStreamerTest::dir_(\"streamer_test/\");\nstd::shared_ptr<IndexMeta> FlatStreamerTest::index_meta_ptr_;\n\nvoid FlatStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (std::nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid FlatStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(FlatStreamerTest, TestAddVector) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/AddVector\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  auto provider = streamer->create_provider();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < 1000UL; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    const float *data = (float *)provider->get_vector(i);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(data[j], i);\n    }\n  }\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nTEST_F(FlatStreamerTest, TestLinearSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/AddVector\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  auto provider = streamer->create_provider();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 1000UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  size_t topk = 3;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    for (size_t j = 0; j < dim; ++j) {\n      const float *data = (float *)provider->get_vector(result1[0].key());\n      ASSERT_FLOAT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  ctx->set_topk(100U);\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 10.1f;\n  }\n  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  ASSERT_EQ(10, result[0].key());\n  ASSERT_EQ(11, result[1].key());\n  ASSERT_EQ(5, result[10].key());\n  ASSERT_EQ(0, result[20].key());\n  ASSERT_EQ(30, result[30].key());\n  ASSERT_EQ(35, result[35].key());\n  ASSERT_EQ(99, result[99].key());\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nTEST_F(FlatStreamerTest, TestAddAndSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestAddAndSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  const size_t topk = 200U, cnt = 2000U;\n  NumericalVector<float> vec(dim);\n  auto ctx = streamer->create_context();\n  ctx->set_topk(topk);\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &knnResult = ctx->result();\n    ASSERT_EQ(std::min(i + 1, topk), knnResult.size());\n  }\n}\n\nTEST_F(FlatStreamerTest, TestAddAndSearcherSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestAddAndSearcherSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  const size_t topk = 200U, cnt = 2000U;\n  NumericalVector<float> vec(dim);\n  auto ctx = streamer->create_context();\n  ctx->set_topk(topk);\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  std::string path1 = dir_ + \"/TestAddAndSearcherSearchDump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_EQ(0, dumper->init(Params()));\n  ASSERT_EQ(0, dumper->create(path1));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto container = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, container->init(Params()));\n  ASSERT_EQ(0, container->open(path1, false));\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSearcher\");\n  ASSERT_EQ(0, searcher->init(Params()));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n\n  auto linearCtx = searcher->create_context();\n  linearCtx->set_topk(topk);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, linearCtx));\n    auto &knnResult = linearCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n  }\n}\n\nTEST_F(FlatStreamerTest, TestLinearSearchRandomData) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  constexpr size_t static dim = 128;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, Params());\n  Params params;\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestKnnSearchRandomData\", true));\n  ASSERT_EQ(0, streamer->init(meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  size_t cnt = 1500;\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);\n    }\n    streamer->add_impl(i + cnt, vec.data(), qmeta, ctx);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  size_t topk = 100;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  cnt = 500;\n  for (size_t i = 0; i < cnt; i += 1) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);\n    }\n    auto t1 = Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t2 = Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t3 = Realtime::MicroSeconds();\n    knnTotalTime += t3 - t2;\n    linearTotalTime += t2 - t1;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n\n    topk1Hits += linearResult[0].key() == knnResult[0].key();\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 1.0f / cnt;\n#if 1\n  printf(\n      \"knnTotalTime=%zu linearTotalTime=%zu totalHits=%d totalCnts=%d \"\n      \"R@%zd=%f R@1=%f\\n\",\n      (size_t)knnTotalTime, (size_t)linearTotalTime, totalHits, totalCnts, topk,\n      recall, topk1Recall);\n#endif\n  EXPECT_GT(recall, 0.50f);\n  EXPECT_GT(topk1Recall, 0.80f);\n}\n\nTEST_F(FlatStreamerTest, TestOpenClose) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  constexpr size_t static dim = 2048;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, Params());\n  Params params;\n  // params.set(PARAM_FLAT_COLUMN_MAJOR_ORDER, false);\n  auto storage1 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  auto storage2 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage1);\n  ASSERT_NE(nullptr, storage2);\n  Params stg_params;\n  ASSERT_EQ(0, storage1->init(stg_params));\n  ASSERT_EQ(0, storage1->open(dir_ + \"TestOpenAndClose1\", true));\n  ASSERT_EQ(0, storage2->init(stg_params));\n  ASSERT_EQ(0, storage2->open(dir_ + \"TestOpenAndClose2\", true));\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto checkIter = [](size_t base, size_t total,\n                      IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = base;\n    size_t cnt = 0;\n    while (iter->is_valid()) {\n      float *data = (float *)provider->get_vector(cur);\n      for (size_t d = 0; d < dim; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, data[d]);\n      }\n      iter->next();\n      cur += 2;\n      cnt++;\n    }\n    ASSERT_EQ(cnt, total);\n  };\n\n  size_t testCnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < testCnt; i += 2) {\n    float v1 = (float)i;\n    ASSERT_EQ(0, streamer->open(storage1));\n    auto ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n    std::vector<float> vec1(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = v1;\n    }\n    ASSERT_EQ(0, streamer->add_impl(i, vec1.data(), qmeta, ctx));\n    checkIter(0, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n\n    float v2 = (float)(i + 1);\n    std::vector<float> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec2[d] = v2;\n    }\n    ASSERT_EQ(0, streamer->open(storage2));\n    ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n    ASSERT_EQ(0, streamer->add_impl(i + 1, vec2.data(), qmeta, ctx));\n    checkIter(1, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n  }\n\n  IndexStreamer::Pointer streamer1 =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer1->init(meta, params));\n  ASSERT_EQ(0, streamer1->open(storage1));\n\n  IndexStreamer::Pointer streamer2 =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer2->init(meta, params));\n  ASSERT_EQ(0, streamer2->open(storage2));\n\n  checkIter(0, testCnt / 2, streamer1);\n  checkIter(1, testCnt / 2, streamer2);\n}\n\nTEST_F(FlatStreamerTest, TestNoInit) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  streamer->cleanup();\n}\n\nTEST_F(FlatStreamerTest, TestForceFlush) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  stg_params.set(\"proxima.mmap_file.storage.copy_on_write\", true);\n  stg_params.set(\"proxima.mmap_file.storage.force_flush\", true);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = 0;\n    while (iter->is_valid()) {\n      float *data = (float *)provider->get_vector(cur);\n      for (size_t d = 0; d < dim; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, data[d]);\n      }\n      iter->next();\n      cur++;\n    }\n    ASSERT_EQ(cur, total);\n  };\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    checkIter(i + 1, streamer);\n  }\n\n  streamer->flush(0UL);\n  streamer->close();\n  storage->close();\n\n  storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n  checkIter(cnt, streamer);\n\n  // check getVector\n  auto provider = streamer->create_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    const float *data = (const float *)provider->get_vector(i);\n    ASSERT_NE(data, nullptr);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(i, data[j]);\n    }\n  }\n}\n\nTEST_F(FlatStreamerTest, TestMultiThread) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  constexpr size_t static dim = 32;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, Params());\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnMultiThread\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    NumericalVector<float> vec(dim);\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n    size_t succAdd = 0;\n    auto ctx = streamer->create_context();\n    for (size_t i = 0; i < addCnt; i++) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = (float)i + baseKey;\n      }\n      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto t1 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000U, t1.get());\n  ASSERT_EQ(1000U, t2.get());\n  ASSERT_EQ(1000U, t3.get());\n  streamer->close();\n\n  // checking data\n  ASSERT_EQ(0, streamer->open(storage));\n  auto provider = streamer->create_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n  while (iter->is_valid()) {\n    float *data = (float *)iter->data();\n    for (size_t d = 0; d < dim; ++d) {\n      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    iter->next();\n  }\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n\n  // ====== multi thread search\n  size_t topk = 100;\n  size_t cnt = 3000;\n  auto knnSearch = [&]() {\n    NumericalVector<float> vec(dim);\n    auto linearCtx = streamer->create_context();\n    auto linearByPkeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n    linearCtx->set_topk(topk);\n    linearByPkeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    for (size_t i = 0; i < cnt; i += 1) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = i + 0.1f;\n      }\n      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      ASSERT_EQ(i, r2[0].key());\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    // printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);\n  };\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  auto s3 = std::async(std::launch::async, knnSearch);\n  s1.wait();\n  s2.wait();\n  s3.wait();\n}\n\nTEST_F(FlatStreamerTest, TestConcurrentAddAndSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  constexpr size_t static dim = 32;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, Params());\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnConcurrentAddAndSearch\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    NumericalVector<float> vec(dim);\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n    auto ctx = streamer->create_context();\n    size_t succAdd = 0;\n    for (size_t i = 0; i < addCnt; i++) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = (float)i + baseKey;\n      }\n      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n\n  // ====== multi thread search\n  auto knnSearch = [&]() {\n    size_t topk = 100;\n    size_t cnt = 3000;\n    NumericalVector<float> vec(dim);\n    auto linearCtx = streamer->create_context();\n    auto linearByPKeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    linearCtx->set_topk(topk);\n    linearByPKeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n    for (size_t i = 0; i < cnt; i += 1) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = i + 0.1f;\n      }\n      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n#if 0\n      printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n              r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n      printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n              r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    //        printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);\n  };\n  auto t0 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000, t0.get());\n  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  ASSERT_EQ(1000, t1.get());\n  ASSERT_EQ(1000, t2.get());\n  s1.wait();\n  s2.wait();\n\n  // checking data\n  auto provider = streamer->create_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n  while (iter->is_valid()) {\n    float *data = (float *)iter->data();\n    for (size_t d = 0; d < dim; ++d) {\n      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    iter->next();\n  }\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n}\n\nTEST_F(FlatStreamerTest, TestFilter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessFilter\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  ctx->set_topk(10U);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    p_keys[0].push_back(i);\n  }\n\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 100.1;\n  }\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(10, results.size());\n  ASSERT_EQ(100, results[0].key());\n  ASSERT_EQ(101, results[1].key());\n  ASSERT_EQ(99, results[2].key());\n\n  auto filterFunc = [](uint64_t key) {\n    if (key == 100UL || key == 101UL) {\n      return true;\n    }\n    return false;\n  };\n  ctx->set_filter(filterFunc);\n\n  // after set filter\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto &results1 = ctx->result();\n  ASSERT_EQ(10, results1.size());\n  ASSERT_EQ(99, results1[0].key());\n  ASSERT_EQ(102, results1[1].key());\n  ASSERT_EQ(98, results1[2].key());\n\n  // linear\n  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &results2 = ctx->result();\n  ASSERT_EQ(10, results2.size());\n  ASSERT_EQ(99, results2[0].key());\n  ASSERT_EQ(102, results2[1].key());\n  ASSERT_EQ(98, results2[2].key());\n\n  auto &results3 = ctx->result();\n  ASSERT_EQ(10, results3.size());\n  ASSERT_EQ(99, results3[0].key());\n  ASSERT_EQ(102, results3[1].key());\n  ASSERT_EQ(98, results3[2].key());\n}\n\nTEST_F(FlatStreamerTest, TestMaxIndexSize) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  constexpr size_t static dim = 128;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, Params());\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessMaxIndexSize\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t vsz0 = 0;\n  size_t rss0 = 0;\n  if (!MemoryHelper::SelfUsage(&vsz0, &rss0)) {\n    // do not check if get mem usage failed\n    return;\n  }\n  if (vsz0 > 1024 * 1024 * 1024 * 1024UL) {\n    // asan mode\n    return;\n  }\n\n  NumericalVector<float> vec(dim);\n  size_t writeCnt1 = 10000;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  auto ctx = streamer->create_context();\n  for (size_t i = 0; i < writeCnt1; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  size_t vsz1 = 0;\n  size_t rss1 = 0;\n  MemoryHelper::SelfUsage(&vsz1, &rss1);\n  size_t increment1 = rss1 - rss0;\n  // data + key + block_header\n  size_t expect_size =\n      writeCnt1 * 128 * 4 + writeCnt1 * 8 + writeCnt1 * 28 / 32;\n  LOG_INFO(\"increment1: %lu, expect_size: %lu\", increment1, expect_size);\n\n  ASSERT_GT(expect_size, increment1 * 0.75f);\n  ASSERT_LT(expect_size, increment1 * 1.25f);\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nTEST_F(FlatStreamerTest, TestCleanUp) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage1 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage1);\n  Params stg_params;\n  ASSERT_EQ(0, storage1->init(stg_params));\n  ASSERT_EQ(0, storage1->open(dir_ + \"TessKnnCluenUp1\", true));\n  Params params;\n  constexpr size_t static dim1 = 32;\n  IndexMeta meta1(IndexMeta::DataType::DT_FP32, dim1);\n  meta1.set_metric(\"SquaredEuclidean\", 0, Params());\n  NumericalVector<float> vec1(dim1);\n  ASSERT_EQ(0, streamer->init(meta1, params));\n  ASSERT_EQ(0, streamer->open(storage1));\n  IndexQueryMeta qmeta1(IndexMeta::DT_FP32, dim1);\n  auto ctx1 = streamer->create_context();\n  ASSERT_EQ(0, streamer->add_impl(1, vec1.data(), qmeta1, ctx1));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n\n  auto storage2 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage2);\n  ASSERT_EQ(0, storage2->init(stg_params));\n  ASSERT_EQ(0, storage2->open(dir_ + \"TessKnnCluenUp2\", true));\n  constexpr size_t static dim2 = 64;\n  IndexMeta meta2(IndexMeta::DataType::DT_FP32, dim2);\n  meta2.set_metric(\"SquaredEuclidean\", 0, Params());\n  NumericalVector<float> vec2(dim2);\n  ASSERT_EQ(0, streamer->init(meta2, params));\n  ASSERT_EQ(0, streamer->open(storage2));\n  IndexQueryMeta qmeta2(IndexMeta::DT_FP32, dim2);\n  auto ctx2 = streamer->create_context();\n  ASSERT_EQ(0, streamer->add_impl(2, vec2.data(), qmeta2, ctx2));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n}\n\nTEST_F(FlatStreamerTest, TestBloomFilter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestBloomFilter\", true));\n  Params params;\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  ctx->set_topk(10U);\n  size_t cnt = 5000;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    if ((i + 1) % 10 == 0) {\n      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n      auto &results = ctx->result();\n      ASSERT_EQ(10, results.size());\n    }\n  }\n}\n\nTEST_F(FlatStreamerTest, TestGroup) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGroup.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t doc_cnt = 5000U;\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n\n  for (size_t i = 0; i < doc_cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 5;\n\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = doc_cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = query_value * 1.0 / 10 + 0.1f;\n  }\n\n  auto t1 = Realtime::MicroSeconds();\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, 1, ctx));\n  auto t2 = Realtime::MicroSeconds();\n\n  total_time += t2 - t1;\n  std::cout << \"Total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    const std::string &group_id = group_result[i].group_id();\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    std::cout << \"Group ID: \" << group_id << std::endl;\n\n    for (uint32_t j = 0; j < result.size(); ++j) {\n      std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n                << std::setprecision(3) << \", Score: \" << result[j].score()\n                << std::endl;\n    }\n  }\n\n  // do linear search by p_keys test\n  auto groupbyFuncLinear = [](uint64_t key) {\n    uint32_t group_id = key % 10;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  auto linear_pk_ctx = streamer->create_context();\n\n  linear_pk_ctx->set_group_params(group_num, group_topk);\n  linear_pk_ctx->set_group_by(groupbyFuncLinear);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};\n\n  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                  linear_pk_ctx));\n  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();\n  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);\n\n  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {\n    const std::string &group_id = linear_by_pkeys_group_result[i].group_id();\n    auto &result = linear_by_pkeys_group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    std::cout << \"Group ID: \" << group_id << std::endl;\n\n    for (uint32_t j = 0; j < result.size(); ++j) {\n      std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n                << std::setprecision(3) << \", Score: \" << result[j].score()\n                << std::endl;\n    }\n\n    ASSERT_EQ(10 - i, result[0].key());\n  }\n}\n\nTEST_F(FlatStreamerTest, TestAddAndSearchWithID) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGroup.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  auto ctx = streamer->create_context();\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 20000U;\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i += 2) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);\n  }\n  for (size_t i = 1; i < cnt; i += 2) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);\n  }\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  for (size_t i = 0; i < cnt; i += 100) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n#if 1\n  printf(\n      \"knnTotalTime=%zu linearTotalTime=%zu totalHits=%d totalCnts=%d \"\n      \"R@%zd=%f R@1=%f\\n\",\n      (size_t)knnTotalTime, (size_t)linearTotalTime, totalHits, totalCnts, topk,\n      recall, topk1Recall);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n}\n\nTEST_F(FlatStreamerTest, TestAddAndSearchWithID2) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_NE(write_streamer, nullptr);\n\n  Params write_params;\n  Params write_stg_params;\n  auto write_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, write_storage->init(write_stg_params));\n  ASSERT_EQ(0, write_storage->open(dir_ + \"/TestGroup.index\", true));\n  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, write_params));\n  ASSERT_EQ(0, write_streamer->open(write_storage));\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 20000U;\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i += 2) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);\n  }\n  for (size_t i = 1; i < cnt; i += 2) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();  //\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  Params read_params;\n  read_params.set(PARAM_FLAT_USE_ID_MAP, false);\n  Params read_stg_params;\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, read_storage->init(read_stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/TestGroup.index\", true));\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, read_params));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  auto linearCtx = read_streamer->create_context();\n  auto knnCtx = read_streamer->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  for (size_t i = 0; i < cnt; i += 100) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = Realtime::MicroSeconds();\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = Realtime::MicroSeconds();\n    ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/flat_sparse/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_flat_sparse\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    )\nendforeach()"
  },
  {
    "path": "tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"flat_sparse/flat_sparse_builder.h\"\n#include <future>\n#include <iostream>\n#include <vector>\n#include <gtest/gtest.h>\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nclass FlatSparseBuilderTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string _dir;\n  static shared_ptr<IndexMeta> _index_meta_ptr;\n};\n\nstd::string FlatSparseBuilderTest::_dir(\"FlatSparseBuilderTest\");\nshared_ptr<IndexMeta> FlatSparseBuilderTest::_index_meta_ptr;\n\nvoid FlatSparseBuilderTest::SetUp(void) {\n  _index_meta_ptr.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                                IndexMeta::DataType::DT_FP32));\n  _index_meta_ptr->set_metric(\"InnerProductSparse\", 0, Params());\n}\n\nvoid FlatSparseBuilderTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", _dir.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(FlatSparseBuilderTest, TestGeneral) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"FlatSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = make_shared<OnePassIndexSparseHolder<IndexMeta::DT_FP32>>();\n  uint32_t sparse_count = 4;\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  Params params;\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n\n  ASSERT_EQ(0, builder->train(holder));\n\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_EQ(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 = make_shared<MultiPassIndexSparseHolder<IndexMeta::DT_FP32>>();\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_EQ(stats.built_costtime(), 0UL);\n}\n\nTEST_F(FlatSparseBuilderTest, TestIndexThreads) {\n  IndexBuilder::Pointer builder1 =\n      IndexFactory::CreateBuilder(\"FlatSparseBuilder\");\n  ASSERT_NE(builder1, nullptr);\n  IndexBuilder::Pointer builder2 =\n      IndexFactory::CreateBuilder(\"FlatSparseBuilder\");\n  ASSERT_NE(builder2, nullptr);\n\n  auto holder = make_shared<MultiPassIndexSparseHolder<IndexMeta::DT_FP32>>();\n\n  size_t doc_cnt = 1000UL;\n  uint32_t sparse_count = 32;\n\n  for (size_t i = 0; i < doc_cnt; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  Params params;\n  std::srand(Realtime::MilliSeconds());\n  auto threads =\n      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);\n  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));\n\n  auto build_index1 = [&]() {\n    ASSERT_EQ(0, builder1->train(threads, holder));\n    ASSERT_EQ(0, builder1->build(threads, holder));\n  };\n  auto build_index2 = [&]() {\n    ASSERT_EQ(0, builder2->train(threads, holder));\n    ASSERT_EQ(0, builder2->build(threads, holder));\n  };\n\n  auto t1 = std::async(std::launch::async, build_index1);\n  auto t2 = std::async(std::launch::async, build_index2);\n  t1.wait();\n  t2.wait();\n\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestIndexThreads\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder1->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder2->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats1 = builder1->stats();\n  ASSERT_EQ(doc_cnt, stats1.built_count());\n  auto &stats2 = builder2->stats();\n  ASSERT_EQ(doc_cnt, stats2.built_count());\n}\n\nTEST_F(FlatSparseBuilderTest, TestHalfFloatConverter) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"FlatSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder = make_shared<OnePassIndexSparseHolder<IndexMeta::DT_FP32>>();\n  uint32_t sparse_count = 4;\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"HalfFloatSparseConverter\");\n  converter->init(*_index_meta_ptr, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->sparse_result();\n\n  Params params;\n  ASSERT_EQ(0, builder->init(index_meta, converter_params));\n\n  ASSERT_EQ(0, builder->train(converted_holder));\n\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestHalFloatConverter\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_EQ(stats.built_costtime(), 0UL);\n  //ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 = make_shared<MultiPassIndexSparseHolder<IndexMeta::DT_FP32>>();\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_EQ(stats.built_costtime(), 0UL);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n"
  },
  {
    "path": "tests/core/algorithm/flat_sparse/flat_sparse_searcher_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <future>\n#include <iostream>\n#include <random>\n#include <vector>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_meta.h\"\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nconstexpr size_t static sparse_dim_count = 16;\n\nclass FlatSparseSearcherTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void generate_sparse_data(\n      size_t cnt, uint32_t sparse_dim_count,\n      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);\n\n  static std::string dir_;\n  static std::shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string FlatSparseSearcherTest::dir_(\"searcher_test/\");\nstd::shared_ptr<IndexMeta> FlatSparseSearcherTest::index_meta_ptr_;\n\nvoid FlatSparseSearcherTest::generate_sparse_data(\n    size_t cnt, uint32_t sparse_dim_count,\n    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < cnt; ++i) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_vec[j] = dist(gen);\n    }\n\n    float norm;\n    Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count, &norm);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_vec[j] = sparse_vec[j] / norm;\n    }\n\n    sparse_indices_list.push_back(sparse_indices);\n    sparse_vec_list.push_back(sparse_vec);\n  }\n}\n\n\nvoid FlatSparseSearcherTest::SetUp(void) {\n  IndexLoggerBroker::SetLevel(2);\n\n  index_meta_ptr_.reset(new IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                      IndexMeta::DataType::DT_FP32));\n  index_meta_ptr_->set_metric(\"InnerProductSparse\", 0, Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid FlatSparseSearcherTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(FlatSparseSearcherTest, TestGeneral) {\n  // init storage\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_TRUE(storage != nullptr);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral\", true));\n\n\n  // init streamer\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, Params());\n\n  Params params;\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // generate sparse data\n  size_t sparse_dim_count = 32;\n  size_t cnt = 100U;\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  // test add data\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  // test get data\n  uint32_t sparse_count;\n  std::string sparse_indices_buffer;\n  std::string sparse_values_buffer;\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(\n        0, streamer->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n    ASSERT_EQ(sparse_dim_count, sparse_count);\n    const uint32_t *sparse_indices_ptr =\n        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);\n      // std::cout << \"1: \" << sparse_values_ptr[j]\n      //           << \" 2: \" << sparse_vec_list[i][j] << std::endl;\n    }\n\n    // must clear ^_^\n    sparse_indices_buffer.clear();\n    sparse_values_buffer.clear();\n  }\n\n  // test dump\n  auto path = dir_ + \"/TestGeneral_dump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher get vector\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_TRUE(read_storage != nullptr);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->init(Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  // test searcher get data\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(\n        0, searcher->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n    ASSERT_EQ(sparse_dim_count, sparse_count);\n    const uint32_t *sparse_indices_ptr =\n        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);\n      // std::cout << \"1: \" << sparse_values_ptr[j]\n      //           << \" 2: \" << sparse_vec_list[i][j] << std::endl;\n    }\n\n    // must clear ^_^\n    sparse_indices_buffer.clear();\n    sparse_values_buffer.clear();\n  }\n}\n\nTEST_F(FlatSparseSearcherTest, TestStreamerDump) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestStreamerDump.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 10000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  auto path = dir_ + \"/TestStreamerDump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher knn\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  for (size_t i = 0; i < cnt; i += 50) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, linearCtx));\n\n    auto t3 = Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    auto &linearResult = linearCtx->result();\n\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(topk, knnResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      ASSERT_EQ(linearResult[k].key(), knnResult[k].key());\n    }\n  }\n\n  printf(\"linear: %zu, knn: %zu\\n\", (size_t)linearTotalTime,\n         (size_t)knnTotalTime);\n}\n\nTEST_F(FlatSparseSearcherTest, TestLoadClose) {\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_TRUE(storage != nullptr);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral\", true));\n\n\n  // init streamer\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, Params());\n\n  Params params;\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // generate sparse data\n  size_t sparse_dim_count = 32;\n  size_t cnt = 100U;\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  // test add data\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  // test get data\n  uint32_t sparse_count;\n  std::string sparse_indices_buffer;\n  std::string sparse_values_buffer;\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(\n        0, streamer->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n    ASSERT_EQ(sparse_dim_count, sparse_count);\n    const uint32_t *sparse_indices_ptr =\n        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);\n      // std::cout << \"1: \" << sparse_values_ptr[j]\n      //           << \" 2: \" << sparse_vec_list[i][j] << std::endl;\n    }\n\n    // must clear ^_^\n    sparse_indices_buffer.clear();\n    sparse_values_buffer.clear();\n  }\n\n  // test dump\n  auto path = dir_ + \"/TestGeneral_dump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher get vector\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_TRUE(read_storage != nullptr);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->init(Params()));\n\n  uint32_t loop = 5;\n  while (loop--) {\n    std::cout << \"loop: \" << loop << std::endl;\n\n    ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n    // test searcher get data\n    for (size_t i = 0; i < cnt; i++) {\n      ASSERT_EQ(0, searcher->get_sparse_vector(i, &sparse_count,\n                                               &sparse_indices_buffer,\n                                               &sparse_values_buffer));\n      ASSERT_EQ(sparse_dim_count, sparse_count);\n      const uint32_t *sparse_indices_ptr =\n          reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());\n      const float *sparse_values_ptr =\n          reinterpret_cast<const float *>(sparse_values_buffer.data());\n      for (size_t j = 0; j < sparse_count; ++j) {\n        ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);\n        ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);\n        // std::cout << \"1: \" << sparse_values_ptr[j]\n        //           << \" 2: \" << sparse_vec_list[i][j] << std::endl;\n      }\n\n      // must clear ^_^\n      sparse_indices_buffer.clear();\n      sparse_values_buffer.clear();\n    }\n\n    ASSERT_EQ(searcher->unload(), 0);\n  }\n}\n\nTEST_F(FlatSparseSearcherTest, TestSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n\n  // test dump\n  auto path = dir_ + \"/TestGeneral_dump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher get vector\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_TRUE(read_storage != nullptr);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->init(Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  size_t step = 50;\n  for (size_t i = 0; i < cnt; i += step) {\n    // std::cout << \"search \" << i << std::endl;\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i + 1.0f;\n    }\n\n    ctx->set_topk(1U);\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(0, result1[0].key());\n    // std::cout << result1[0].key() << \" \" << result1[0].score() << std::endl;\n\n    ctx->set_topk(3U);\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(3UL, result2.size());\n    for (size_t i = 0; i < 3UL; ++i) {\n      // std::cout << result2[i].key() << \" \" << result2[i].score() <<\n      // std::endl;\n      ASSERT_EQ(i, result2[i].key());\n    }\n  }\n\n  ctx->set_topk(100U);\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 10.1f;\n  }\n\n  ASSERT_EQ(0, searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  for (size_t i = 0; i < 100; ++i) {\n    ASSERT_EQ(i, result[i].key());\n  }\n}\n\nTEST_F(FlatSparseSearcherTest, TestSearchPKeys) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearchByKeys.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0].resize(cnt);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n\n    p_keys[0][i] = i;\n  }\n\n  // test dump\n  auto path = dir_ + \"/TestGeneral_dump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher get vector\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_TRUE(read_storage != nullptr);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->init(Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  size_t topk = 3;\n  for (size_t i = 0; i < cnt; i += 50) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i + 1.0f;\n    }\n    ctx->set_topk(1U);\n    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(0, result1[0].key());\n\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(0, result2[0].key());\n    ASSERT_EQ(1, result2[1].key());\n    ASSERT_EQ(2, result2[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 1.0f;\n    }\n    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(0, result[0].key());\n    ASSERT_EQ(1, result[1].key());\n    ASSERT_EQ(10, result[10].key());\n    ASSERT_EQ(20, result[20].key());\n    ASSERT_EQ(30, result[30].key());\n    ASSERT_EQ(35, result[35].key());\n    ASSERT_EQ(99, result[99].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 10.0f;\n    }\n\n    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};\n    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(3U, result.size());\n    ASSERT_EQ(1, result[0].key());\n    ASSERT_EQ(10, result[1].key());\n    ASSERT_EQ(15, result[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 9.0f;\n    }\n    p_keys[0].clear();\n    for (size_t j = 0; j < cnt; j += 10) {\n      p_keys[0].push_back((uint64_t)j);\n    }\n    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(0, result[0].key());\n    ASSERT_EQ(10, result[1].key());\n    ASSERT_EQ(100, result[10].key());\n    ASSERT_EQ(200, result[20].key());\n    ASSERT_EQ(300, result[30].key());\n    ASSERT_EQ(350, result[35].key());\n    ASSERT_EQ(990, result[99].key());\n  }\n}\n\nTEST_F(FlatSparseSearcherTest, TestMultiThread) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  constexpr size_t static sparse_dim_count = 32;\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, Params());\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnMultiThread\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n    size_t succAdd = 0;\n    auto ctx = streamer->create_context();\n    for (size_t i = 0; i < addCnt; i++) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = (float)i + baseKey;\n      }\n\n      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,\n                                     sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n\n  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto t1 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000U, t1.get());\n  ASSERT_EQ(1000U, t2.get());\n  ASSERT_EQ(1000U, t3.get());\n  streamer->close();\n\n  // checking data\n  ASSERT_EQ(0, streamer->open(storage));\n  auto provider = streamer->create_sparse_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n\n  std::set<uint64_t> keys;\n\n  while (iter->is_valid()) {\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_EQ((float)iter->key(), data[j]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    keys.insert(iter->key());\n    iter->next();\n  }\n\n  ASSERT_EQ(3000, keys.size());\n\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n\n  // test dump\n  auto path = dir_ + \"/TestGeneral_dump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher get vector\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"FlatSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_TRUE(read_storage != nullptr);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->init(Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  // ====== multi thread search\n  size_t topk = 10;\n  size_t cnt = 3000;\n  auto knnSearch = [&]() {\n    auto linearCtx = searcher->create_context();\n    auto linearByPkeysCtx = searcher->create_context();\n    auto ctx = searcher->create_context();\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n    linearCtx->set_topk(topk);\n    linearByPkeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    for (size_t i = 0; i < cnt; i += 1) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = ((float)i + 1.1f);\n      }\n\n      ASSERT_EQ(0,\n                searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, ctx));\n      ASSERT_EQ(\n          0, searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{cnt - 1, cnt - 2, cnt - 3}};\n      ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                       sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      // std::cout << \"r1 top1: \" << r1[0].key() << \", score: \" << r1[0].score()\n      //           << std::endl;\n      ASSERT_EQ(cnt - 1, r1[0].key());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      // std::cout << \"r2 top1: \" << r2[0].key() << \", score: \" << r2[0].score()\n      //           << std::endl;\n      ASSERT_EQ(cnt - 1, r2[0].key());\n      auto &r3 = linearByPkeysCtx->result();\n      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_FLOAT_EQ(1.0f, totalHits * 1.0f / totalCnts);\n  };\n\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  auto s3 = std::async(std::launch::async, knnSearch);\n  s1.wait();\n  s2.wait();\n  s3.wait();\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/flat_sparse/flat_sparse_streamer_buffer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <string>\n#include <vector>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <ailego/utility/memory_helper.h>\n#include <algorithm/flat_sparse/flat_sparse_utility.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_streamer.h>\n\nusing namespace std;\nusing namespace testing;\nusing namespace zvec::ailego;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nnamespace zvec {\nnamespace core {\n\nclass FlatSparseStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void generate_sparse_data(\n      size_t cnt, uint32_t sparse_dim_count,\n      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);\n\n  static std::string dir_;\n  static shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string FlatSparseStreamerTest::dir_(\"FlatSparseStreamerTest/\");\nshared_ptr<IndexMeta> FlatSparseStreamerTest::index_meta_ptr_;\n\nvoid FlatSparseStreamerTest::generate_sparse_data(\n    size_t cnt, uint32_t sparse_dim_count,\n    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < cnt; ++i) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_vec[j] = dist(gen);\n    }\n\n    float norm;\n    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,\n                                           &norm);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_vec[j] = sparse_vec[j] / norm;\n    }\n\n    sparse_indices_list.push_back(sparse_indices);\n    sparse_vec_list.push_back(sparse_vec);\n  }\n}\n\nvoid FlatSparseStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                                IndexMeta::DataType::DT_FP32));\n  index_meta_ptr_->set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid FlatSparseStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(FlatSparseStreamerTest, TestGeneral) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n\n  ailego::Params stg_params;\n  auto write_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, write_storage->init(stg_params));\n  ASSERT_EQ(0, write_storage->open(dir_ + \"/Test/FlatSparseSearch\", true));\n  ASSERT_EQ(0, write_streamer->init(index_meta, params));\n  ASSERT_EQ(0, write_streamer->open(write_storage));\n\n  size_t cnt = 20000U;\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, write_streamer->add_impl(\n                     i, sparse_dim_count, sparse_indices_list[i].data(),\n                     sparse_vec_list[i].data(), qmeta, ctx));\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/FlatSparseSearch\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n\n  auto linearCtx = read_streamer->create_context();\n  ASSERT_TRUE(!!linearCtx);\n\n  auto knnCtx = read_streamer->create_context();\n  ASSERT_TRUE(!!knnCtx);\n\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  for (size_t i = 0; i < cnt; i += 100) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(\n        0, read_streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, read_streamer->search_bf_impl(\n                     sparse_dim_count, sparse_indices.data(), sparse_vec.data(),\n                     qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    // std::cout << \"i: \" << i << std::endl;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n  // float cost = linearTotalTime * 1.0f / knnTotalTime;\n\n  std::cout << \"knnTotalTime=\" << knnTotalTime\n            << \" linearTotalTime=\" << linearTotalTime << std::endl;\n\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/flat_sparse/flat_sparse_streamer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <future>\n#include <string>\n#include <vector>\n#include <ailego/math/norm2_matrix.h>\n#include <ailego/utility/math_helper.h>\n#include <ailego/utility/memory_helper.h>\n#include <algorithm/flat_sparse/flat_sparse_utility.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_streamer.h>\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nconstexpr static size_t sparse_dim_count = 16;\n\nclass FlatSparseStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void generate_sparse_data(\n      size_t cnt, uint32_t sparse_dim_count,\n      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);\n\n  static std::string dir_;\n  static std::shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string FlatSparseStreamerTest::dir_(\"streamer_test/\");\nstd::shared_ptr<IndexMeta> FlatSparseStreamerTest::index_meta_ptr_;\n\nvoid FlatSparseStreamerTest::generate_sparse_data(\n    size_t cnt, uint32_t sparse_dim_count,\n    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < cnt; ++i) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_vec[j] = dist(gen);\n    }\n\n    float norm;\n    Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count, &norm);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_vec[j] = sparse_vec[j] / norm;\n    }\n\n    sparse_indices_list.push_back(sparse_indices);\n    sparse_vec_list.push_back(sparse_vec);\n  }\n}\n\n\nvoid FlatSparseStreamerTest::SetUp(void) {\n  IndexLoggerBroker::SetLevel(2);\n\n  index_meta_ptr_.reset(new IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                      IndexMeta::DataType::DT_FP32));\n  index_meta_ptr_->set_metric(\"InnerProductSparse\", 0, Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid FlatSparseStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(FlatSparseStreamerTest, TestGeneral) {\n  // init storage\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_TRUE(storage != nullptr);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral\", true));\n\n\n  // init streamer\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, Params());\n\n  Params params;\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // generate sparse data\n  size_t sparse_dim_count = 32;\n  size_t cnt = 100U;\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  // test add data\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  // test get data\n  uint32_t sparse_count;\n  std::string sparse_indices_buffer;\n  std::string sparse_values_buffer;\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(\n        0, streamer->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n    ASSERT_EQ(sparse_dim_count, sparse_count);\n    const uint32_t *sparse_indices_ptr =\n        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);\n      // std::cout << \"1: \" << sparse_values_ptr[j]\n      //           << \" 2: \" << sparse_vec_list[i][j] << std::endl;\n    }\n\n    // must clear ^_^\n    sparse_indices_buffer.clear();\n    sparse_values_buffer.clear();\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestLinearSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n\n  for (size_t i = 0; i < cnt; i++) {\n    // std::cout << \"search \" << i << std::endl;\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i + 1.0f;\n    }\n\n    ctx->set_topk(1U);\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(0, result1[0].key());\n    // std::cout << result1[0].key() << \" \" << result1[0].score() << std::endl;\n\n    ctx->set_topk(3U);\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(3UL, result2.size());\n    for (size_t i = 0; i < 3UL; ++i) {\n      // std::cout << result2[i].key() << \" \" << result2[i].score() <<\n      // std::endl;\n      ASSERT_EQ(i, result2[i].key());\n    }\n  }\n\n  ctx->set_topk(100U);\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 10.1f;\n  }\n\n  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  for (size_t i = 0; i < 100; ++i) {\n    ASSERT_EQ(i, result[i].key());\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestLinearSearchByKeys) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearchByKeys.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0].resize(cnt);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n\n    p_keys[0][i] = i;\n  }\n\n  size_t topk = 3;\n  for (size_t i = 0; i < cnt; i += 1) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i + 1.0f;\n    }\n    ctx->set_topk(1U);\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(0, result1[0].key());\n\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(0, result2[0].key());\n    ASSERT_EQ(1, result2[1].key());\n    ASSERT_EQ(2, result2[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 1.0f;\n    }\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(0, result[0].key());\n    ASSERT_EQ(1, result[1].key());\n    ASSERT_EQ(10, result[10].key());\n    ASSERT_EQ(20, result[20].key());\n    ASSERT_EQ(30, result[30].key());\n    ASSERT_EQ(35, result[35].key());\n    ASSERT_EQ(99, result[99].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 10.0f;\n    }\n\n    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(3U, result.size());\n    ASSERT_EQ(1, result[0].key());\n    ASSERT_EQ(10, result[1].key());\n    ASSERT_EQ(15, result[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 9.0f;\n    }\n    p_keys[0].clear();\n    for (size_t j = 0; j < cnt; j += 10) {\n      p_keys[0].push_back((uint64_t)j);\n    }\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(0, result[0].key());\n    ASSERT_EQ(10, result[1].key());\n    ASSERT_EQ(100, result[10].key());\n    ASSERT_EQ(200, result[20].key());\n    ASSERT_EQ(300, result[30].key());\n    ASSERT_EQ(350, result[35].key());\n    ASSERT_EQ(990, result[99].key());\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestCreateIterator) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestCreateIterator\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_sparse_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = 0;\n    while (iter->is_valid()) {\n      float *sparse_data = (float *)iter->sparse_data();\n      ASSERT_EQ(cur, iter->key());\n      for (size_t d = 0; d < sparse_dim_count; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);\n      }\n      iter->next();\n      cur++;\n    }\n    ASSERT_EQ(cur, total);\n  };\n\n  size_t cnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);\n    NumericalVector<float> sparse_velues1(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices1[j] = j * 20;\n      sparse_velues1[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),\n                                    sparse_velues1.data(), qmeta, ctx));\n    checkIter(i + 1, streamer);\n  }\n\n  // check getVector\n  auto provider = streamer->create_sparse_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices_buffer;\n    std::string sparse_values_buffer;\n\n    ASSERT_EQ(\n        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);\n    }\n  }\n\n  streamer->flush(0UL);\n  streamer->close();\n  ASSERT_EQ(0, streamer->open(storage));\n  checkIter(cnt, streamer);\n\n  // check getVector\n  provider = streamer->create_sparse_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices_buffer;\n    std::string sparse_values_buffer;\n\n    ASSERT_EQ(\n        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);\n    }\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestOpenAndClose) {\n  constexpr size_t static sparse_dim_count = 2048;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, Params());\n  Params params;\n  auto storage1 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  auto storage2 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage1);\n  ASSERT_NE(nullptr, storage2);\n  Params stg_params;\n  ASSERT_EQ(0, storage1->init(stg_params));\n  ASSERT_EQ(0, storage1->open(dir_ + \"TessOpenAndClose1\", true));\n  ASSERT_EQ(0, storage2->init(stg_params));\n  ASSERT_EQ(0, storage2->open(dir_ + \"TessOpenAndClose2\", true));\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto checkIter = [](size_t base, size_t total,\n                      IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_sparse_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = base;\n    size_t cnt = 0;\n    while (iter->is_valid()) {\n      float *sparse_data = (float *)iter->sparse_data();\n      ASSERT_EQ(cur, iter->key());\n      for (size_t d = 0; d < sparse_dim_count; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);\n      }\n      iter->next();\n      cur += 2;\n      cnt++;\n    }\n    ASSERT_EQ(cnt, total);\n  };\n\n  size_t testCnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  for (size_t i = 0; i < testCnt; i += 2) {\n    float v1 = (float)i;\n    ASSERT_EQ(0, streamer->open(storage1));\n    auto ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n\n    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);\n    NumericalVector<float> sparse_velues1(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices1[j] = j * 20;\n      sparse_velues1[j] = v1;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),\n                                    sparse_velues1.data(), qmeta, ctx));\n\n    checkIter(0, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n\n    float v2 = (float)(i + 1);\n    NumericalVector<uint32_t> sparse_indices2(sparse_dim_count);\n    NumericalVector<float> sparse_velues2(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices2[j] = j * 20;\n      sparse_velues2[j] = v2;\n    }\n\n    ASSERT_EQ(0, streamer->open(storage2));\n    ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n    ASSERT_EQ(\n        0, streamer->add_impl(i + 1, sparse_dim_count, sparse_indices2.data(),\n                              sparse_velues2.data(), qmeta, ctx));\n    checkIter(1, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n  }\n\n  IndexStreamer::Pointer streamer1 =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer1->init(meta, params));\n  ASSERT_EQ(0, streamer1->open(storage1));\n\n  IndexStreamer::Pointer streamer2 =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer2->init(meta, params));\n  ASSERT_EQ(0, streamer2->open(storage2));\n\n  checkIter(0, testCnt / 2, streamer1);\n  checkIter(1, testCnt / 2, streamer2);\n}\n\nTEST_F(FlatSparseStreamerTest, TestNoInit) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  streamer->cleanup();\n}\n\nTEST_F(FlatSparseStreamerTest, TestForceFlush) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  stg_params.set(\"proxima.mmap_file.storage.copy_on_write\", true);\n  stg_params.set(\"proxima.mmap_file.storage.force_flush\", true);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_sparse_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = 0;\n    while (iter->is_valid()) {\n      ASSERT_EQ(cur, iter->key());\n      const uint32_t sparse_count = iter->sparse_count();\n      ASSERT_EQ(sparse_count, sparse_dim_count);\n\n      const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        ASSERT_FLOAT_EQ((float)cur, data[j]);\n      }\n\n      iter->next();\n      cur++;\n    }\n    ASSERT_EQ(cur, total);\n  };\n\n  size_t cnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  auto ctx = streamer->create_context();\n\n  for (size_t i = 0; i < cnt; ++i) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n    checkIter(i + 1, streamer);\n  }\n\n  streamer->flush(0UL);\n  streamer->close();\n  storage->close();\n\n  storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n  checkIter(cnt, streamer);\n\n  // check getVector\n  auto provider = streamer->create_sparse_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices_buffer;\n    std::string sparse_values_buffer;\n\n    ASSERT_EQ(\n        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);\n    }\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestMultiThread) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  constexpr size_t static sparse_dim_count = 32;\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, Params());\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnMultiThread\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n    size_t succAdd = 0;\n    auto ctx = streamer->create_context();\n    for (size_t i = 0; i < addCnt; i++) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = (float)i + baseKey;\n      }\n\n      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,\n                                     sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n\n  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto t1 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000U, t1.get());\n  ASSERT_EQ(1000U, t2.get());\n  ASSERT_EQ(1000U, t3.get());\n  streamer->close();\n\n  // checking data\n  ASSERT_EQ(0, streamer->open(storage));\n  auto provider = streamer->create_sparse_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n\n  std::set<uint64_t> keys;\n\n  while (iter->is_valid()) {\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    keys.insert(iter->key());\n    iter->next();\n  }\n\n  ASSERT_EQ(3000, keys.size());\n\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n\n  // ====== multi thread search\n  size_t topk = 10;\n  size_t cnt = 3000;\n  auto knnSearch = [&]() {\n    auto linearCtx = streamer->create_context();\n    auto linearByPkeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n    linearCtx->set_topk(topk);\n    linearByPkeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    for (size_t i = 0; i < cnt; i += 1) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = ((float)i + 1.1f);\n      }\n\n      ASSERT_EQ(0,\n                streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, ctx));\n      ASSERT_EQ(\n          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{cnt - 1, cnt - 2, cnt - 3}};\n      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                       sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      ASSERT_EQ(cnt - 1, r2[0].key());\n      auto &r3 = linearByPkeysCtx->result();\n      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_FLOAT_EQ(1.0f, totalHits * 1.0f / totalCnts);\n  };\n\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  auto s3 = std::async(std::launch::async, knnSearch);\n  s1.wait();\n  s2.wait();\n  s3.wait();\n}\n\nTEST_F(FlatSparseStreamerTest, TestConcurrentAddAndSearch) {\n  constexpr size_t static sparse_dim_count = 32;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  // meta.set_metric(\"InnerProductSparse\", 0, Params());\n  meta.set_metric(\"SquaredEuclideanSparse\", 0, Params());\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessConcurrentAddAndSearch\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n    size_t succAdd = 0;\n    auto ctx = streamer->create_context();\n    for (size_t i = 0; i < addCnt; i++) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = (float)i + baseKey;\n      }\n\n      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,\n                                     sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n\n  auto knnSearch = [&]() {\n    size_t topk = 100;\n    size_t cnt = 3000;\n    auto linearCtx = streamer->create_context();\n    auto linearByPkeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n    linearCtx->set_topk(topk);\n    linearByPkeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    for (size_t i = 0; i < cnt; i += 1) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = -((float)i + 1.1f);\n      }\n\n      ASSERT_EQ(0,\n                streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, ctx));\n      ASSERT_EQ(\n          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};\n      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                       sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      ASSERT_EQ(0, r2[0].key());\n      auto &r3 = linearByPkeysCtx->result();\n      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_TRUE(totalHits * 1.0f / totalCnts > 0.8f);\n  };\n\n  auto t0 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000, t0.get());\n  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  ASSERT_EQ(1000, t1.get());\n  ASSERT_EQ(1000, t2.get());\n  s1.wait();\n  s2.wait();\n\n  // checking data\n  auto provider = streamer->create_sparse_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n  while (iter->is_valid()) {\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    iter->next();\n  }\n\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n}\n\nTEST_F(FlatSparseStreamerTest, TestFilter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestFilter\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 100UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  ctx->set_topk(10U);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = (float)i + 1.0f;\n    }\n\n    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), qmeta, ctx);\n    p_keys[0].push_back(i);\n  }\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = -100.1;\n  }\n  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(10, results.size());\n  ASSERT_EQ(0, results[0].key());\n  ASSERT_EQ(1, results[1].key());\n  ASSERT_EQ(2, results[2].key());\n\n  auto filterFunc = [](uint64_t key) {\n    if (key == 0UL || key == 3UL) {\n      return true;\n    }\n    return false;\n  };\n  ctx->set_filter(filterFunc);\n\n  // after set filter\n  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n  auto &results1 = ctx->result();\n  ASSERT_EQ(10, results1.size());\n  ASSERT_EQ(1, results1[0].key());\n  ASSERT_EQ(2, results1[1].key());\n  ASSERT_EQ(4, results1[2].key());\n\n  // linear\n  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx));\n  auto &results2 = ctx->result();\n  ASSERT_EQ(10, results2.size());\n  ASSERT_EQ(1, results2[0].key());\n  ASSERT_EQ(2, results2[1].key());\n  ASSERT_EQ(4, results2[2].key());\n\n  // linear by p_keys\n  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                   sparse_dim_count, sparse_indices.data(),\n                   sparse_velues.data(), p_keys, qmeta, ctx));\n  auto &results3 = ctx->result();\n  ASSERT_EQ(10, results3.size());\n  // for (int i = 0; i < 10; i++) {\n  //   std::cout << \"i: \" << results3[i].key() << std::endl;\n  // }\n\n  ASSERT_EQ(1, results3[0].key());\n  ASSERT_EQ(2, results3[1].key());\n  ASSERT_EQ(4, results3[2].key());\n}\n\nTEST_F(FlatSparseStreamerTest, TestProvider) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestProvider.index\", true));\n  Params params;\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n\n  //! prepare data\n  size_t docs = 10000UL;\n  srand(Realtime::MilliSeconds());\n  std::vector<uint64_t> keys(docs);\n  bool rand_key = rand() % 2;\n  bool rand_order = rand() % 2;\n  size_t step = rand() % 2 + 1;\n  LOG_DEBUG(\"randKey=%u randOrder=%u step=%zu\", rand_key, rand_order, step);\n  if (true) {\n    std::mt19937 mt;\n    std::uniform_int_distribution<size_t> dt(\n        0, std::numeric_limits<size_t>::max());\n    for (size_t i = 0; i < docs; ++i) {\n      keys[i] = dt(mt);\n    }\n  } else {\n    std::iota(keys.begin(), keys.end(), 0U);\n    std::transform(keys.begin(), keys.end(), keys.begin(),\n                   [&](uint64_t k) { return step * k; });\n    if (rand_order) {\n      uint32_t seed = Realtime::Seconds();\n      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));\n    }\n  }\n\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  for (size_t i = 0; i < keys.size(); i++) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = keys[i];\n    }\n\n    for (size_t j = 0; j < sparse_dim_count; j++) {\n      ASSERT_FLOAT_EQ(sparse_velues[j], keys[i]);\n    }\n\n    ASSERT_EQ(\n        0, streamer->add_impl(keys[i], sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx));\n\n    // std::cout << \"i: \" << i << \" key: \" << keys[i] << std::endl;\n  }\n\n  {\n    // check streamer\n    auto iter = streamer->create_sparse_provider()->create_iterator();\n    size_t cnt = 0;\n    while (iter->is_valid()) {\n      auto key = iter->key();\n\n      const uint32_t sparse_count = iter->sparse_count();\n      ASSERT_EQ(sparse_count, sparse_dim_count);\n\n      const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n\n      // std::cout << \"cnt: \" << cnt << \" key: \" << key\n      //           << \", gt_key: \" << keys[cnt] << std::endl;\n\n      // for (size_t j = 0; j < sparse_count; ++j) {\n      //   std::cout << \"j: \" << j << \" data: \" << data[j] << std::endl;\n      // }\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        ASSERT_FLOAT_EQ((float)key, data[j]);\n      }\n\n      cnt++;\n      iter->next();\n    }\n    ASSERT_EQ(cnt, docs);\n  }\n\n  // dump\n  // auto path1 = dir_ + \"/TestProvider\";\n  // auto dumper1 = IndexFactory::CreateDumper(\"FileDumper\");\n  // ASSERT_NE(dumper1, nullptr);\n  // ASSERT_EQ(0, dumper1->create(path1));\n  // ASSERT_EQ(0, streamer->dump(dumper1));\n  // ASSERT_EQ(0, dumper1->close());\n  // streamer->close();\n\n  // // check dump index\n  // IndexSparseSearcher::Pointer searcher =\n  //     IndexFactory::CreateSparseSearcher(\"FlatSparseSearcher\");\n  // auto container = IndexFactory::CreateStorage(\"MMapFileContainer\");\n  // ASSERT_EQ(0, container->init(Params()));\n  // ASSERT_EQ(0, container->load(path1));\n  // ASSERT_NE(searcher, nullptr);\n  // ASSERT_EQ(0, searcher->init(Params()));\n  // ASSERT_EQ(0, searcher->load(container, IndexSparseMeasure::Pointer()));\n  // auto iter = searcher->create_sparse_provider()->create_iterator();\n  // size_t cnt = 0;\n  // while (iter->is_valid()) {\n  //   auto key = iter->key();\n\n  //   const uint32_t sparse_count = iter->sparse_count();\n  //   ASSERT_EQ(sparse_count, sparse_dim_count);\n\n  //   const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n  //   for (size_t j = 0; j < sparse_dim_count; ++j) {\n  //     ASSERT_FLOAT_EQ((float)key, data[j]);\n  //   }\n\n  //   cnt++;\n  //   iter->next();\n  // }\n  // ASSERT_EQ(cnt, docs);\n\n  // // check streamer\n  // ASSERT_EQ(0, streamer->open(storage));\n  // iter = streamer->create_sparse_provider()->create_iterator();\n  // cnt = 0;\n  // while (iter->is_valid()) {\n  //   auto key = iter->key();\n\n  //   const uint32_t sparse_count = iter->sparse_count();\n  //   ASSERT_EQ(sparse_count, sparse_dim_count);\n\n  //   const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n  //   for (size_t j = 0; j < sparse_dim_count; ++j) {\n  //     ASSERT_FLOAT_EQ((float)key, data[j]);\n  //   }\n\n  //   cnt++;\n  //   iter->next();\n  // }\n  // ASSERT_EQ(cnt, docs);\n\n  // auto searcher_provider = searcher->create_sparse_provider();\n  // auto streamer_provider = streamer->create_sparse_provider();\n  // for (size_t i = 0; i < keys.size(); ++i) {\n  //   {\n  //     uint32_t sparse_count;\n  //     std::string sparse_indices_buffer;\n  //     std::string sparse_values_buffer;\n\n  //     ASSERT_EQ(0, searcher_provider->get_sparse_vector(keys[i],\n  //     &sparse_count,\n  //                                                       &sparse_indices_buffer,\n  //                                                       &sparse_values_buffer));\n\n  //     const float *sparse_values_ptr =\n  //         reinterpret_cast<const float *>(sparse_values_buffer.data());\n  //     ASSERT_EQ(sparse_count, sparse_dim_count);\n  //     for (size_t j = 0; j < sparse_count; ++j) {\n  //       ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);\n  //     }\n  //   }\n\n  //   {\n  //     uint32_t sparse_count;\n  //     std::string sparse_indices_buffer;\n  //     std::string sparse_values_buffer;\n  //     ASSERT_EQ(0, streamer_provider->get_sparse_vector(keys[i],\n  //     &sparse_count,\n  //                                                       &sparse_indices_buffer,\n  //                                                       &sparse_values_buffer));\n\n  //     const float *sparse_values_ptr =\n  //         reinterpret_cast<const float *>(sparse_values_buffer.data());\n  //     ASSERT_EQ(sparse_count, sparse_dim_count);\n  //     for (size_t j = 0; j < sparse_count; ++j) {\n  //       ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);\n  //     }\n  //   }\n  // }\n\n  // ASSERT_EQ(index_meta_ptr_->type(), streamer_provider->vector_type());\n}\n\nTEST_F(FlatSparseStreamerTest, TestParamsMaxDocCount) {\n  // init storage\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_TRUE(storage != nullptr);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral\", true));\n\n\n  // init streamer\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, Params());\n\n  Params params;\n  uint32_t max_doc_count = 100U;\n  params.set(PARAM_FLAT_SPARSE_STREAMER_MAX_DOC_CNT, max_doc_count);\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // generate sparse data\n  size_t sparse_dim_count = 32;\n  size_t cnt = max_doc_count * 2;\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  // test add data\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    auto ret =\n        streamer->add_impl(i, sparse_dim_count, sparse_indices_list[i].data(),\n                           sparse_vec_list[i].data(), qmeta, ctx);\n    if (i < max_doc_count) {\n      ASSERT_EQ(0, ret);\n    } else {\n      ASSERT_EQ(IndexError_IndexFull, ret);\n    }\n  }\n\n  // test get data\n  uint32_t sparse_count;\n  std::string sparse_indices_buffer;\n  std::string sparse_values_buffer;\n  for (size_t i = 0; i < cnt; i++) {\n    auto ret = streamer->get_sparse_vector(\n        i, &sparse_count, &sparse_indices_buffer, &sparse_values_buffer);\n    if (i < max_doc_count) {\n      ASSERT_EQ(ret, 0);\n      ASSERT_EQ(0, streamer->get_sparse_vector(i, &sparse_count,\n                                               &sparse_indices_buffer,\n                                               &sparse_values_buffer));\n      ASSERT_EQ(sparse_dim_count, sparse_count);\n      const uint32_t *sparse_indices_ptr =\n          reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());\n      const float *sparse_values_ptr =\n          reinterpret_cast<const float *>(sparse_values_buffer.data());\n      for (size_t j = 0; j < sparse_count; ++j) {\n        ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);\n        ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);\n        // std::cout << \"1: \" << sparse_values_ptr[j]\n        //           << \" 2: \" << sparse_vec_list[i][j] << std::endl;\n      }\n\n      // must clear ^_^\n      sparse_indices_buffer.clear();\n      sparse_values_buffer.clear();\n    } else {\n      ASSERT_EQ(ret, IndexError_NoExist);\n    }\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestParamsDataChunkSize) {\n  // init storage\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_TRUE(storage != nullptr);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral\", true));\n\n\n  // init streamer\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, Params());\n\n  Params params;\n  uint32_t data_chunk_size = 1024 * 1024;\n  uint32_t max_data_chunk_cnt = 1;\n  params.set(PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE, data_chunk_size);\n  params.set(PARAM_FLAT_SPARSE_STREAMER_MAX_DATA_CHUNK_CNT, max_data_chunk_cnt);\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // generate sparse data\n  size_t sparse_dim_count = 128;\n  size_t cnt = 2000;\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  // test add data\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  uint32_t insert_fail_idx = -1U;\n  for (size_t i = 0; i < cnt; i++) {\n    auto ret =\n        streamer->add_impl(i, sparse_dim_count, sparse_indices_list[i].data(),\n                           sparse_vec_list[i].data(), qmeta, ctx);\n    if (insert_fail_idx != -1U) {\n      ASSERT_EQ(ret, IndexError_IndexFull);\n    }\n    if (ret != 0 && insert_fail_idx == -1U) {\n      insert_fail_idx = i;\n    }\n  }\n\n  // test get data\n  uint32_t sparse_count;\n  std::string sparse_indices_buffer;\n  std::string sparse_values_buffer;\n  for (size_t i = 0; i < cnt; i++) {\n    auto ret = streamer->get_sparse_vector(\n        i, &sparse_count, &sparse_indices_buffer, &sparse_values_buffer);\n    if (i < insert_fail_idx) {\n      ASSERT_EQ(ret, 0);\n      ASSERT_EQ(0, streamer->get_sparse_vector(i, &sparse_count,\n                                               &sparse_indices_buffer,\n                                               &sparse_values_buffer));\n      ASSERT_EQ(sparse_dim_count, sparse_count);\n      const uint32_t *sparse_indices_ptr =\n          reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());\n      const float *sparse_values_ptr =\n          reinterpret_cast<const float *>(sparse_values_buffer.data());\n      for (size_t j = 0; j < sparse_count; ++j) {\n        ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);\n        ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);\n        // std::cout << \"1: \" << sparse_values_ptr[j]\n        //           << \" 2: \" << sparse_vec_list[i][j] << std::endl;\n      }\n\n      // must clear ^_^\n      sparse_indices_buffer.clear();\n      sparse_values_buffer.clear();\n    } else {\n      ASSERT_EQ(ret, IndexError_NoExist);\n    }\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestSharedContext) {\n  auto create_streamer = [](std::string path) {\n    IndexStreamer::Pointer streamer =\n        IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n    auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n    Params stg_params;\n    storage->init(stg_params);\n    storage->open(path, true);\n    Params params;\n    streamer->init(*index_meta_ptr_, params);\n    streamer->open(storage);\n    return streamer;\n  };\n  auto streamer1 = create_streamer(dir_ + \"TestSharedContext.index1\");\n  auto streamer2 = create_streamer(dir_ + \"TestSharedContext.index2\");\n  auto streamer3 = create_streamer(dir_ + \"TestSharedContext.index3\");\n\n  srand(Realtime::MilliSeconds());\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n  auto do_test = [&](int start) {\n    auto code = rand() % 3;\n    IndexStreamer::Context::Pointer ctx;\n    switch (code) {\n      case 0:\n        ctx = streamer1->create_context();\n        break;\n      case 1:\n        ctx = streamer2->create_context();\n        break;\n      case 2:\n        ctx = streamer3->create_context();\n        break;\n    };\n    ctx->set_topk(1);\n    uint64_t key1 = start + 0;\n    uint64_t key2 = start + 1;\n    uint64_t key3 = start + 2;\n\n    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);\n    NumericalVector<float> query_sparse_velues(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      query_sparse_indices[j] = j * 20;\n      query_sparse_velues[j] = 1.1f;\n    }\n\n    for (int i = 0; i < 1000; ++i) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = rand();\n      }\n\n      int ret = 0;\n      auto code = rand() % 3;\n      switch (code) {\n        case 0:\n          streamer1->add_impl(key1, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key1 += 3;\n          ret = streamer1->search_impl(sparse_dim_count,\n                                       query_sparse_indices.data(),\n                                       query_sparse_velues.data(), qmeta, ctx);\n          break;\n        case 1:\n          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key2 += 3;\n          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key2 += 3;\n          ret = streamer2->search_impl(sparse_dim_count,\n                                       query_sparse_indices.data(),\n                                       query_sparse_velues.data(), qmeta, ctx);\n          break;\n        case 2:\n          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key3 += 3;\n          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key3 += 3;\n          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key3 += 3;\n          ret = streamer3->search_impl(sparse_dim_count,\n                                       query_sparse_indices.data(),\n                                       query_sparse_velues.data(), qmeta, ctx);\n          break;\n      }\n      EXPECT_EQ(0, ret);\n      auto &results = ctx->result();\n      EXPECT_EQ(1, results.size());\n      EXPECT_EQ(code, results[0].key() % 3);\n    }\n  };\n\n  auto t1 = std::async(std::launch::async, do_test, 0);\n  auto t2 = std::async(std::launch::async, do_test, 30000000);\n  t1.wait();\n  t2.wait();\n}\n\nTEST_F(FlatSparseStreamerTest, TestGroupBy) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearchGroup.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_topk = 200;\n  size_t group_num = 5;\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  std::vector<std::string> expect_group_ids = {\n      \"g_0\", \"g_1\", \"g_2\", \"g_3\", \"g_4\", \"g_5\", \"g_6\", \"g_7\", \"g_8\", \"g_9\"};\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 10.1f;\n  }\n\n  auto t1 = Monotime::MicroSeconds();\n  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx));\n  auto t2 = Monotime::MicroSeconds();\n  std::cout << \"Search time: \" << (t2 - t1) << \" us\" << std::endl;\n\n  auto &group_result = ctx->group_result();\n  ASSERT_EQ(group_num, group_result.size());\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    const std::string &group_id = group_result[i].group_id();\n    auto &result = group_result[i].docs();\n    std::cout << \"Group ID: \" << group_id << std::endl;\n\n    ASSERT_EQ(group_id, expect_group_ids[i]);\n\n    ASSERT_GE(result.size(), group_topk);\n\n    for (uint32_t j = 0; j < result.size(); ++j) {\n      ASSERT_EQ(result[j].key() / 10 % 10, i);\n      // std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n      //           << std::setprecision(3) << \", Score: \" << result[j].score()\n      //           << std::endl;\n    }\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestGroupByNotEnoughNum) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearchGroup.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_topk = 200;\n  size_t group_num = 12;\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  std::vector<std::string> expect_group_ids = {\n      \"g_0\", \"g_1\", \"g_2\", \"g_3\", \"g_4\", \"g_5\", \"g_6\", \"g_7\", \"g_8\", \"g_9\"};\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 10.1f;\n  }\n\n  auto t1 = Monotime::MicroSeconds();\n  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx));\n  auto t2 = Monotime::MicroSeconds();\n  std::cout << \"Search time: \" << (t2 - t1) << \" us\" << std::endl;\n\n  auto &group_result = ctx->group_result();\n  ASSERT_EQ(10, group_result.size());\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    const std::string &group_id = group_result[i].group_id();\n    auto &result = group_result[i].docs();\n    std::cout << \"Group ID: \" << group_id << std::endl;\n\n    ASSERT_EQ(group_id, expect_group_ids[i]);\n\n    ASSERT_GE(result.size(), group_topk);\n\n    for (uint32_t j = 0; j < result.size(); ++j) {\n      ASSERT_EQ(result[j].key() / 10 % 10, i);\n      // std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n      //           << std::setprecision(3) << \", Score: \" << result[j].score()\n      //           << std::endl;\n    }\n  }\n}\n\nTEST_F(FlatSparseStreamerTest, TestAddAndSearchWithID) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"FlatSparseStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  Params params;\n  Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGroup.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  auto ctx = streamer->create_context();\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  constexpr size_t cnt = 1000U;\n  constexpr size_t sparse_dim_count = cnt;\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i += 2) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j;\n      sparse_vec[j] = (i == j ? 1.0f : 0.0f);\n    }\n    streamer->add_with_id_impl(i, sparse_dim_count, sparse_indices.data(),\n                               sparse_vec.data(), qmeta, ctx);\n  }\n  for (size_t i = 1; i < cnt; i += 2) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j;\n      sparse_vec[j] = (i == j ? 1.0f : 0.0f);\n    }\n    streamer->add_with_id_impl(i, sparse_dim_count, sparse_indices.data(),\n                               sparse_vec.data(), qmeta, ctx);\n  }\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  for (size_t i = 0; i < cnt; i += 100) {\n    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);\n    NumericalVector<float> query_sparse_velues(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      query_sparse_indices[j] = j;\n      query_sparse_velues[j] = (i == j ? 1.1f : 0.0f);\n    }\n    auto t1 = Realtime::MicroSeconds();\n    ASSERT_EQ(\n        0, streamer->search_impl(sparse_dim_count, query_sparse_indices.data(),\n                                 query_sparse_velues.data(), qmeta, knnCtx));\n    auto t2 = Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(\n                     sparse_dim_count, query_sparse_indices.data(),\n                     query_sparse_velues.data(), qmeta, linearCtx));\n    auto t3 = Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw core_knn_flat\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm/hnsw\n    )\nendforeach()"
  },
  {
    "path": "tests/core/algorithm/hnsw/hnsw_builder_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_builder.h\"\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <future>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_framework.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace std;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static dim = 16;\n\nclass HnswBuilderTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string _dir;\n  static shared_ptr<IndexMeta> _index_meta_ptr;\n};\n\nstd::string HnswBuilderTest::_dir(\"hnswBuilderTest\");\nshared_ptr<IndexMeta> HnswBuilderTest::_index_meta_ptr;\n\nvoid HnswBuilderTest::SetUp(void) {\n  _index_meta_ptr.reset(new (nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  _index_meta_ptr->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n}\n\nvoid HnswBuilderTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", _dir.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswBuilderTest, TestGeneral) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  // params.set(\"proxima.hnsw.builder.thread_count\", 1);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n\n  ASSERT_EQ(0, builder->train(holder));\n\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswBuilderTest, TestMemquota) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.builder.memory_quota\", 100000UL);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(IndexError_NoMemory, builder->build(holder));\n}\n\nTEST_F(HnswBuilderTest, TestIndexThreads) {\n  IndexBuilder::Pointer builder1 = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder1, nullptr);\n  IndexBuilder::Pointer builder2 = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder2, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  std::srand(ailego::Realtime::MilliSeconds());\n  auto threads =\n      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);\n  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));\n\n  auto build_index1 = [&]() {\n    ASSERT_EQ(0, builder1->train(threads, holder));\n    ASSERT_EQ(0, builder1->build(threads, holder));\n  };\n  auto build_index2 = [&]() {\n    ASSERT_EQ(0, builder2->train(threads, holder));\n    ASSERT_EQ(0, builder2->build(threads, holder));\n  };\n\n  auto t1 = std::async(std::launch::async, build_index1);\n  auto t2 = std::async(std::launch::async, build_index2);\n  t1.wait();\n  t2.wait();\n\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestIndexThreads\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder1->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder2->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats1 = builder1->stats();\n  ASSERT_EQ(doc_cnt, stats1.built_count());\n  auto &stats2 = builder2->stats();\n  ASSERT_EQ(doc_cnt, stats2.built_count());\n}\n\nTEST_F(HnswBuilderTest, TestCosine) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->result();\n\n  ailego::Params params;\n  // params.set(\"proxima.hnsw.builder.thread_count\", 1);\n  ASSERT_EQ(0, builder->init(index_meta, params));\n\n  ASSERT_EQ(0, builder->train(converted_holder));\n\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestCosine\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswBuilderTest, TestCosineFp16Converter) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp16Converter\");\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->result();\n\n  ailego::Params params;\n\n  // params.set(\"proxima.hnsw.builder.thread_count\", 1);\n  ASSERT_EQ(0, builder->init(index_meta, params));\n\n  ASSERT_EQ(0, builder->train(converted_holder));\n\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestCosineFp16Converter\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswBuilderTest, TestCosineInt8Converter) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineInt8Converter\");\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->result();\n\n  ailego::Params params;\n  // params.set(\"proxima.hnsw.builder.thread_count\", 1);\n  ASSERT_EQ(0, builder->init(index_meta, params));\n\n  ASSERT_EQ(0, builder->train(converted_holder));\n\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestCosineInt8Converter\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswBuilderTest, TestCosineInt4Converter) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineInt4Converter\");\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->result();\n\n  ailego::Params params;\n  // params.set(\"proxima.hnsw.builder.thread_count\", 1);\n  ASSERT_EQ(0, builder->init(index_meta, params));\n\n  ASSERT_EQ(0, builder->train(converted_holder));\n\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestCosineInt4Converter\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw/hnsw_searcher_test.cpp",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <cstdio>\n#include <future>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_builder.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_meta.h\"\n#include \"hnsw_params.h\"\n\nusing namespace std;\nusing namespace testing;\nusing namespace zvec::ailego;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static dim = 16;\n\nclass HnswSearcherTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string _dir;\n  static shared_ptr<IndexMeta> _index_meta_ptr;\n};\n\nstd::string HnswSearcherTest::_dir(\"HnswSearcherTest/\");\nshared_ptr<IndexMeta> HnswSearcherTest::_index_meta_ptr;\n\nvoid HnswSearcherTest::SetUp(void) {\n  _index_meta_ptr.reset(new (nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  _index_meta_ptr->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n}\n\nvoid HnswSearcherTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", _dir.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswSearcherTest, TestRnnSearch) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestRnnSearch\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 0.0;\n  }\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 50;\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  float radius = results[topk / 2].score();\n  ctx->set_threshold(radius);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test Reset Threshold\n  ctx->reset_threshold();\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(radius, results[topk - 1].score());\n}\n\nTEST_F(HnswSearcherTest, TestRnnSearchInnerProduct) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);\n  index_meta.set_metric(\"InnerProduct\", 0, ailego::Params());\n\n  ASSERT_EQ(0, builder->init(index_meta, ailego::Params()));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestRnnSearchInnerProduct\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 50;\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  float radius = -results[topk / 2].score();\n  ctx->set_threshold(radius);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test Reset Threshold\n  ctx->reset_threshold();\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(-radius, results[topk - 1].score());\n}\n\nTEST_F(HnswSearcherTest, TestRnnSearchCosine) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = dist(gen);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->result();\n\n  ASSERT_EQ(0, builder->init(index_meta, ailego::Params()));\n  ASSERT_EQ(0, builder->train(converted_holder));\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestRnnSearchCosine\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  std::string new_query;\n  IndexQueryMeta new_meta;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n  size_t topk = 50;\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  float radius = 0.5f;\n  ctx->set_threshold(radius);\n  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, ctx));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test Reset Threshold\n  ctx->reset_threshold();\n  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, ctx));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(radius, results[topk - 1].score());\n}\n\nTEST_F(HnswSearcherTest, TestRnnSearchMipsSquaredEuclidean) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);\n  index_meta.set_metric(\"MipsSquaredEuclidean\", 0, ailego::Params());\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestStreamerDump.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t doc_cnt = 1000UL;\n  auto streamer_ctx = streamer->create_context();\n  ASSERT_TRUE(!!streamer_ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n\n    streamer->add_impl(i, vec.data(), qmeta, streamer_ctx);\n  }\n\n  {\n    // Test Reset Threshold\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = 1.0;\n    }\n\n    size_t topk = 50;\n    streamer_ctx->set_topk(topk);\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, streamer_ctx));\n    auto &results = streamer_ctx->result();\n    ASSERT_EQ(topk, results.size());\n\n    float radius = -results[topk / 2].score();\n    streamer_ctx->set_threshold(radius);\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, streamer_ctx));\n    ASSERT_GT(topk, results.size());\n    for (size_t k = 0; k < results.size(); ++k) {\n      ASSERT_GE(radius, results[k].score());\n    }\n\n    streamer_ctx->reset_threshold();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, streamer_ctx));\n    ASSERT_EQ(topk, results.size());\n    ASSERT_LT(-radius, results[topk - 1].score());\n  }\n\n  auto path = _dir + \"/TestStreamerDump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n  auto searcher_ctx = searcher->create_context();\n  ASSERT_TRUE(!!searcher_ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n\n  {\n    size_t topk = 50;\n    searcher_ctx->set_topk(topk);\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, searcher_ctx));\n    auto &results = searcher_ctx->result();\n    ASSERT_EQ(topk, results.size());\n\n    float radius = -results[topk / 2].score();\n    searcher_ctx->set_threshold(radius);\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, searcher_ctx));\n    ASSERT_GT(topk, results.size());\n    for (size_t k = 0; k < results.size(); ++k) {\n      ASSERT_GE(radius, results[k].score());\n    }\n\n    // Test Reset Threshold\n    searcher_ctx->reset_threshold();\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, searcher_ctx));\n    ASSERT_EQ(topk, results.size());\n    ASSERT_LT(-radius, results[topk - 1].score());\n  }\n}\n\nTEST_F(HnswSearcherTest, TestGeneral) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  ailego::Params params;\n  // params.set(\"proxima.hnsw.builder.max_neighbor_count\", 16);\n  params.set(\"proxima.hnsw.builder.scaling_factor\", 16);\n  params.set(\"proxima.hnsw.builder.ef_construction\", 10);\n  params.set(\"proxima.hnsw.builder.thread_count\", 2);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 1);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!linearByPKeysCtx);\n  ASSERT_TRUE(!!knnCtx);\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 200;\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  linearCtx->set_topk(topk);\n  linearByPKeysCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  // do linear search test\n  {\n    std::vector<float> query(dim);\n    for (size_t i = 0; i < dim; ++i) {\n      query[i] = 3.1f;\n    }\n    ASSERT_EQ(0, searcher->search_bf_impl(query.data(), qmeta, linearCtx));\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(3UL, linearResult[0].key());\n    ASSERT_EQ(4UL, linearResult[1].key());\n    ASSERT_EQ(2UL, linearResult[2].key());\n    ASSERT_EQ(5UL, linearResult[3].key());\n    ASSERT_EQ(1UL, linearResult[4].key());\n    ASSERT_EQ(6UL, linearResult[5].key());\n    ASSERT_EQ(0UL, linearResult[6].key());\n    ASSERT_EQ(7UL, linearResult[7].key());\n    for (size_t i = 8; i < topk; ++i) {\n      ASSERT_EQ(i, linearResult[i].key());\n    }\n  }\n\n  // do linear search by p_keys test\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0] = {8, 9, 10, 11, 3, 2, 1, 0};\n  {\n    std::vector<float> query(dim);\n    for (size_t i = 0; i < dim; ++i) {\n      query[i] = 3.1f;\n    }\n    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(query.data(), p_keys, qmeta,\n                                                    linearByPKeysCtx));\n    auto &linearByPKeysResult = linearByPKeysCtx->result();\n    ASSERT_EQ(8, linearByPKeysResult.size());\n    ASSERT_EQ(3UL, linearByPKeysResult[0].key());\n    ASSERT_EQ(2UL, linearByPKeysResult[1].key());\n    ASSERT_EQ(1UL, linearByPKeysResult[2].key());\n    ASSERT_EQ(0UL, linearByPKeysResult[3].key());\n    ASSERT_EQ(8UL, linearByPKeysResult[4].key());\n    ASSERT_EQ(9UL, linearByPKeysResult[5].key());\n    ASSERT_EQ(10UL, linearByPKeysResult[6].key());\n    ASSERT_EQ(11UL, linearByPKeysResult[7].key());\n  }\n\n  size_t step = 50;\n  for (size_t i = 0; i < doc_cnt; i += step) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    // TODO: check\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * step * step * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / doc_cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.90f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSearcherTest, TestClearAndReload) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  ailego::Params params;\n  params.set(\"proxima.hnsw.builder.thread_count\", 3);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.check_crc_enable\", true);\n  searcherParams.set(\"proxima.hnsw.searcher.max_scan_ratio\",\n                     1.1f);  // including upper layer\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!knnCtx);\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 100;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n  auto &knnResult = knnCtx->result();\n  ASSERT_EQ(topk, knnResult.size());\n  auto &linearResult = linearCtx->result();\n  ASSERT_EQ(topk, linearResult.size());\n  auto &stats = searcher->stats();\n  ASSERT_EQ(doc_cnt, stats.loaded_count());\n  // ASSERT_GT(stats.loaded_costtime(), 0UL);\n\n  //! cleanup\n  ASSERT_EQ(0, searcher->cleanup());\n  ASSERT_EQ(nullptr, searcher->create_context());\n  ASSERT_EQ(IndexError_Runtime,\n            searcher->load(storage, IndexMetric::Pointer()));\n  ASSERT_EQ(0UL, stats.loaded_count());\n\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  linearCtx = searcher->create_context();\n  knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!knnCtx);\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n  auto &knnResult1 = knnCtx->result();\n  ASSERT_EQ(topk, knnResult1.size());\n  auto &linearResult1 = linearCtx->result();\n  ASSERT_EQ(topk, linearResult1.size());\n  ASSERT_EQ(doc_cnt, stats.loaded_count());\n\n  //! unload\n  ASSERT_EQ(0, searcher->unload());\n  ASSERT_EQ(nullptr, searcher->create_context());\n  ASSERT_EQ(0UL, stats.loaded_count());\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  linearCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  linearCtx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n  auto &linearResult2 = linearCtx->result();\n  ASSERT_EQ(topk, linearResult2.size());\n  ASSERT_EQ(doc_cnt, stats.loaded_count());\n}\n\nTEST_F(HnswSearcherTest, TestFilter) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 100UL;\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n    p_keys[0].push_back(i);\n  }\n  ailego::Params params;\n  params.set(\"proxima.hnsw.builder.thread_count\", 3);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.check_crc_enable\", true);\n  searcherParams.set(\"proxima.hnsw.searcher.max_scan_ratio\", 1.0f);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!linearByPKeysCtx);\n  ASSERT_TRUE(!!knnCtx);\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 10.1f;\n  }\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 10;\n  linearCtx->set_topk(topk);\n  linearByPKeysCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                  linearByPKeysCtx));\n\n  auto filterFunc = [](uint64_t key) {\n    if (key == 10UL || key == 11UL) {\n      return true;\n    }\n    return false;\n  };\n  auto &knnResult = knnCtx->result();\n  ASSERT_EQ(topk, knnResult.size());\n  ASSERT_EQ(10UL, knnResult[0].key());\n  ASSERT_EQ(11UL, knnResult[1].key());\n  ASSERT_EQ(9UL, knnResult[2].key());\n\n  auto &linearResult = linearCtx->result();\n  ASSERT_EQ(topk, linearResult.size());\n  ASSERT_EQ(10UL, linearResult[0].key());\n  ASSERT_EQ(11UL, linearResult[1].key());\n  ASSERT_EQ(9UL, linearResult[2].key());\n\n  auto &linearByPKeysResult = linearByPKeysCtx->result();\n  ASSERT_EQ(topk, linearByPKeysResult.size());\n  ASSERT_EQ(10UL, linearByPKeysResult[0].key());\n  ASSERT_EQ(11UL, linearByPKeysResult[1].key());\n  ASSERT_EQ(9UL, linearByPKeysResult[2].key());\n\n  knnCtx->set_filter(filterFunc);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n  auto &knnResult1 = knnCtx->result();\n  ASSERT_EQ(topk, knnResult1.size());\n  ASSERT_EQ(9UL, knnResult1[0].key());\n  ASSERT_EQ(12UL, knnResult1[1].key());\n  ASSERT_EQ(8UL, knnResult1[2].key());\n\n  linearCtx->set_filter(filterFunc);\n  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n  auto &linearResult1 = linearCtx->result();\n  ASSERT_EQ(topk, linearResult1.size());\n  ASSERT_EQ(9UL, linearResult1[0].key());\n  ASSERT_EQ(12UL, linearResult1[1].key());\n  ASSERT_EQ(8UL, linearResult1[2].key());\n\n  linearByPKeysCtx->set_filter(filterFunc);\n  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                  linearByPKeysCtx));\n  auto &linearByPKeysResult1 = linearByPKeysCtx->result();\n  ASSERT_EQ(topk, linearByPKeysResult1.size());\n  ASSERT_EQ(9UL, linearByPKeysResult1[0].key());\n  ASSERT_EQ(12UL, linearByPKeysResult1[1].key());\n  ASSERT_EQ(8UL, linearByPKeysResult1[2].key());\n}\n\nTEST_F(HnswSearcherTest, TestStreamerDump) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestStreamerDump.index\", true));\n  ASSERT_EQ(0, streamer->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 5000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  auto path = _dir + \"/TestStreamerDump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher knn\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  size_t step = 50;\n  for (size_t i = 0; i < cnt; i += step) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * step * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSearcherTest, TestSharedContext) {\n  auto gen_holder = [](int start, size_t doc_cnt) {\n    auto holder =\n        make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n    uint64_t key = start;\n    for (size_t i = 0; i < doc_cnt; i++) {\n      NumericalVector<float> vec(dim);\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = i;\n      }\n      key += 3;\n      holder->emplace(key, vec);\n    }\n    return holder;\n  };\n  auto gen_index = [&gen_holder](int start, size_t docs, std::string path) {\n    auto holder = gen_holder(start, docs);\n    IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n    ailego::Params params;\n    builder->init(*_index_meta_ptr, params);\n    builder->train(holder);\n    builder->build(holder);\n    auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n    dumper->create(path);\n    builder->dump(dumper);\n    dumper->close();\n\n    IndexSearcher::Pointer searcher =\n        IndexFactory::CreateSearcher(\"HnswSearcher\");\n    auto name = rand() % 2 ? \"FileReadStorage\" : \"MMapFileReadStorage\";\n    auto storage = IndexFactory::CreateStorage(name);\n    storage->open(path, false);\n    params.set(\"proxima.hnsw.searcher.visit_bloomfilter_enable\", rand() % 2);\n    searcher->init(ailego::Params());\n    searcher->load(storage, IndexMetric::Pointer());\n    return searcher;\n  };\n\n  srand(ailego::Realtime::MilliSeconds());\n  size_t docs1 = rand() % 500 + 100;\n  size_t docs2 = rand() % 5000 + 100;\n  size_t docs3 = rand() % 50000 + 100;\n  auto path1 = _dir + \"/TestSharedContext.index1\";\n  auto path2 = _dir + \"/TestSharedContext.index2\";\n  auto path3 = _dir + \"/TestSharedContext.index3\";\n  auto searcher1 = gen_index(0, docs1, path1);\n  auto searcher2 = gen_index(1, docs2, path2);\n  auto searcher3 = gen_index(2, docs3, path3);\n\n  srand(ailego::Realtime::MilliSeconds());\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto do_test = [&]() {\n    IndexSearcher::Context::Pointer ctx;\n    switch (rand() % 3) {\n      case 0:\n        ctx = searcher1->create_context();\n        break;\n      case 1:\n        ctx = searcher2->create_context();\n        break;\n      case 2:\n        ctx = searcher3->create_context();\n        break;\n    }\n    ctx->set_topk(10);\n\n    int ret = 0;\n    for (int i = 0; i < 100; ++i) {\n      NumericalVector<float> query(dim);\n      for (size_t j = 0; j < dim; ++j) {\n        query[j] = i + 0.1f;\n      }\n\n      auto code = rand() % 6;\n      switch (code) {\n        case 0:\n          ret = searcher1->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 1:\n          ret = searcher2->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 2:\n          ret = searcher3->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 3:\n          ret = searcher1->search_bf_impl(query.data(), qmeta, ctx);\n          break;\n        case 4:\n          ret = searcher2->search_bf_impl(query.data(), qmeta, ctx);\n          break;\n        case 5:\n          ret = searcher3->search_bf_impl(query.data(), qmeta, ctx);\n          break;\n      }\n\n      EXPECT_EQ(0, ret);\n      auto &results = ctx->result();\n      EXPECT_EQ(10, results.size());\n      for (int k = 0; k < 10; ++k) {\n        EXPECT_EQ(code % 3, results[k].key() % 3);\n      }\n    }\n  };\n  auto t1 = std::async(std::launch::async, do_test);\n  auto t2 = std::async(std::launch::async, do_test);\n  t1.wait();\n  t2.wait();\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  storage->init(ailego::Params());\n  storage->open(_dir + \"/TestSharedContext.index4\", true);\n  streamer->init(*_index_meta_ptr, ailego::Params());\n  streamer->open(storage);\n  NumericalVector<float> query(dim);\n  auto ctx1 = streamer->create_context();\n  EXPECT_EQ(IndexError_Unsupported,\n            searcher1->search_impl(query.data(), qmeta, ctx1));\n\n  auto ctx2 = searcher1->create_context();\n  EXPECT_EQ(IndexError_Unsupported,\n            streamer->search_impl(query.data(), qmeta, ctx2));\n}\n\nTEST_F(HnswSearcherTest, TestProvider) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 5000UL;\n  std::vector<key_t> keys(doc_cnt);\n  srand(ailego::Realtime::MilliSeconds());\n  bool rand_key = rand() % 2;\n  bool rand_order = rand() % 2;\n  size_t step = rand() % 2 + 1;\n  LOG_DEBUG(\"randKey=%u randOrder=%u step=%zu\", rand_key, rand_order, step);\n  if (rand_key) {\n    std::mt19937 mt;\n    std::uniform_int_distribution<uint16_t> dt(\n        0, std::numeric_limits<uint16_t>::max());\n    for (size_t i = 0; i < doc_cnt; ++i) {\n      keys[i] = dt(mt);\n    }\n  } else {\n    std::iota(keys.begin(), keys.end(), 0U);\n    std::transform(keys.begin(), keys.end(), keys.begin(),\n                   [&](key_t k) { return step * k; });\n    if (rand_order) {\n      uint32_t seed = ailego::Realtime::Seconds();\n      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));\n    }\n  }\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = keys[i];\n    }\n    ASSERT_TRUE(holder->emplace(keys[i], vec));\n  }\n  ailego::Params params;\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestProvider\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 1);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  auto provider = searcher->create_provider();\n  for (size_t i = 0; i < keys.size(); ++i) {\n    const float *d1 =\n        reinterpret_cast<const float *>(provider->get_vector(keys[i]));\n    ASSERT_TRUE(d1);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d1[j], keys[i]);\n    }\n  }\n\n  auto iter = provider->create_iterator();\n  size_t cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const float *d = reinterpret_cast<const float *>(iter->data());\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, doc_cnt);\n\n  ASSERT_EQ(dim, provider->dimension());\n  ASSERT_EQ(_index_meta_ptr->element_size(), provider->element_size());\n  ASSERT_EQ(_index_meta_ptr->data_type(), provider->data_type());\n}\n\nTEST_F(HnswSearcherTest, TestMipsEuclideanMetric) {\n  constexpr size_t static dim = 32;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"MipsSquaredEuclidean\", 0, ailego::Params());\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  const size_t COUNT = 10000UL;\n  for (size_t i = 0; i < COUNT; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 100.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(meta, ailego::Params()));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestMipsEuclideanMetric\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ailego::Params params;\n  params.set(\"proxima.hnsw.searcher.ef\", 10);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(params));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 50;\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  EXPECT_EQ(results.size(), topk);\n  EXPECT_NEAR((uint64_t)(COUNT - 1), results[0].key(), 20);\n}\n\nTEST_F(HnswSearcherTest, TestRandomPaddingTopk) {\n  std::mt19937 mt{};\n  std::uniform_real_distribution<float> gen(0.0f, 1.0f);\n  constexpr size_t static dim = 8;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  const size_t COUNT = 10000UL;\n  for (size_t i = 0; i < COUNT; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = gen(mt);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  ASSERT_EQ(0, builder->init(meta, ailego::Params()));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestRandomPadding\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ailego::Params params;\n  params.set(\"proxima.hnsw.searcher.force_padding_result_enable\", true);\n  params.set(\"proxima.hnsw.searcher.scan_ratio\", 0.01f);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(params));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  std::uniform_int_distribution<uint32_t> gen_int(1, COUNT);\n  size_t topk = gen_int(mt);\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  EXPECT_EQ(results.size(), topk);\n  for (size_t i = 0; i < results.size(); ++i) {\n    for (size_t j = 0; j < i; ++j) {\n      EXPECT_NE(results[i].key(), results[j].key());\n    }\n  }\n\n  ctx->set_filter([](uint64_t key) { return true; });\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto &results1 = ctx->result();\n  EXPECT_EQ(results1.size(), 0);\n}\n\n\nTEST_F(HnswSearcherTest, TestBruteForceSetupInContext) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  // params.set(\"proxima.hnsw.builder.max_neighbor_count\", 16);\n  params.set(\"proxima.hnsw.builder.scaling_factor\", 16);\n  params.set(\"proxima.hnsw.builder.ef_construction\", 10);\n  params.set(\"proxima.hnsw.builder.thread_count\", 2);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 1);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 200;\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  bool set_bf_threshold = false;\n  bool use_update = false;\n\n  size_t step = 50;\n  for (size_t i = 0; i < doc_cnt; i += step) {\n    auto linearCtx = searcher->create_context();\n    auto knnCtx = searcher->create_context();\n\n    ASSERT_TRUE(!!linearCtx);\n    ASSERT_TRUE(!!linearCtx);\n\n    linearCtx->set_topk(topk);\n    knnCtx->set_topk(topk);\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      if (use_update) {\n        ailego::Params searcherParamsExtra;\n\n        searcherParamsExtra.set(\"proxima.hnsw.searcher.brute_force_threshold\",\n                                doc_cnt);\n        knnCtx->update(searcherParamsExtra);\n      } else {\n        knnCtx->set_bruteforce_threshold(doc_cnt);\n      }\n\n      use_update = !use_update;\n    }\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n    // auto t3 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      linearTotalTime += t2 - t1;\n    } else {\n      knnTotalTime += t2 - t1;\n    }\n\n    set_bf_threshold = !set_bf_threshold;\n\n    auto &knnResult = knnCtx->result();\n    // TODO: check\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * step * step * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / doc_cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.90f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSearcherTest, TestCosine) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestCosine.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 5000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n    IndexQueryMeta new_meta;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  auto path = _dir + \"/TestCosine\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 100);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  size_t query_cnt = 200U;\n  auto linearCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!linearByPKeysCtx);\n  ASSERT_TRUE(!!knnCtx);\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  NumericalVector<float> qvec(dim);\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        qvec[j] = fixed_value;\n      else\n        qvec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0,\n              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 1.0f / query_cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.90f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSearcherTest, TestFetchVector) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);\n  index_meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestFetchVector.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  auto path = _dir + \"/TestFetchVector\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 100);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  for (size_t i = 0; i < cnt; i++) {\n    const void *vector = searcher->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    float vector_value = *(float *)(vector);\n    ASSERT_EQ(vector_value, i);\n  }\n\n  size_t query_cnt = 200U;\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  for (size_t i = 0; i < query_cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n    float vector_value = *((float *)(knnResult[0].vector()));\n    ASSERT_EQ(vector_value, i);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswSearcherTest, TestFetchVectorCosine) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestFetchVectorCosine.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 1e-2;\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  auto path = _dir + \"/TestFetchVectorCosine\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 100);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    const void *vector = searcher->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  size_t query_cnt = 200U;\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  NumericalVector<float> qvec(dim);\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        qvec[j] = fixed_value;\n      else\n        qvec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0,\n              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\n\nTEST_F(HnswSearcherTest, TestFetchVectorCosineHalfFloatConverter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP16, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineHalfFloatConverter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(\n      0, storage->open(_dir + \"/TestFetchVectorCosineHalfFloatConverter.index\",\n                       true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP16, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 0.1;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-2.0, 2.0);\n\n  std::vector<NumericalVector<uint16_t>> vecs;\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<uint16_t> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      float value = dist(gen);\n      vec[j] = ailego::FloatHelper::ToFP16(value);\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n\n    vecs.push_back(vec);\n  }\n\n  auto path = _dir + \"/TestFetchVectorCosineHalfFloatConverter\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 100);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  for (size_t i = 0; i < cnt; i++) {\n    uint16_t expected_vec_value = vecs[i][dim - 1];\n\n    const void *vector = searcher->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(uint16_t));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    uint16_t vector_value = *((uint16_t *)(denormalized_vec.data()) + dim - 1);\n    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);\n\n    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);\n\n    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);\n  }\n\n  size_t query_cnt = 200U;\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  NumericalVector<uint16_t> qvec(dim);\n\n  for (size_t i = 0; i < query_cnt; i++) {\n    auto &vec = vecs[i];\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(uint16_t));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    uint16_t expected_vec_value = vec[dim - 1];\n    uint16_t vector_value =\n        *(((uint16_t *)(denormalized_vec.data()) + dim - 1));\n\n    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);\n    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);\n\n    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswSearcherTest, TestFetchVectorCosineFp16Converter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp16Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestFetchVectorCosineFp16Converter.index\",\n                             true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 0.1;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-2.0, 2.0);\n\n  std::vector<NumericalVector<float>> vecs;\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = dist(gen);\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n\n    vecs.push_back(vec);\n  }\n\n  auto path = _dir + \"/TestFetchVectorCosineFp16Converter\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 100);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  for (size_t i = 0; i < cnt; i++) {\n    float expected_vec_value = vecs[i][dim - 1];\n\n    const void *vector = searcher->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n\n    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);\n  }\n\n  size_t query_cnt = 200U;\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  NumericalVector<float> qvec(dim);\n\n  for (size_t i = 0; i < query_cnt; i++) {\n    auto &vec = vecs[i];\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float expected_vec_value = vec[dim - 1];\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n\n    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswSearcherTest, TestFetchVectorCosineInt8Converter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineInt8Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestFetchVectorCosineInt8Converter.index\",\n                             true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 1e-2;\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  auto path = _dir + \"/TestFetchVectorCosineInt8Converter\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 100);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    const void *vector = searcher->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  size_t query_cnt = 200U;\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  NumericalVector<float> qvec(dim);\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        qvec[j] = fixed_value;\n      else\n        qvec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0,\n              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswSearcherTest, TestFetchVectorCosineInt4Converter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineInt4Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestFetchVectorCosineInt4Converter.index\",\n                             true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 1e-2;\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  auto path = _dir + \"/TestFetchVectorCosineInt4Converter\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 100);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    const void *vector = searcher->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  size_t query_cnt = 200U;\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t topk = 100;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  NumericalVector<float> qvec(dim);\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        qvec[j] = fixed_value;\n      else\n        qvec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0,\n              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswSearcherTest, TestGroup) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestGroup\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 50);\n  searcherParams.set(\"proxima.hnsw.searcher.max_scan_ratio\", 0.8);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 5;\n\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = doc_cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = float(query_value) / 10 + 0.1f;\n  }\n\n  auto t1 = ailego::Realtime::MicroSeconds();\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto t2 = ailego::Realtime::MicroSeconds();\n\n  total_time += t2 - t1;\n\n  std::cout << \"total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    // const std::string &group_id = group_result[i].group_id();\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    // std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n  }\n\n  // do linear search by p_keys test\n  auto groupbyFuncLinear = [](uint64_t key) {\n    uint32_t group_id = key % 10;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  auto linear_pk_ctx = searcher->create_context();\n\n  linear_pk_ctx->set_group_params(group_num, group_topk);\n  linear_pk_ctx->set_group_by(groupbyFuncLinear);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};\n\n  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                  linear_pk_ctx));\n  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();\n  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);\n\n  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {\n    // const std::string &group_id = linear_by_pkeys_group_result[i].group_id();\n    auto &result = linear_by_pkeys_group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    // std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n\n    ASSERT_EQ(10 - i, result[0].key());\n  }\n}\n\nTEST_F(HnswSearcherTest, TestGroupNotEnoughNum) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestGroupNotEnoughNum\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 50);\n  searcherParams.set(\"proxima.hnsw.searcher.max_scan_ratio\", 0.8);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 12;\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = doc_cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = float(query_value) / 10 + 0.1f;\n  }\n\n  auto t1 = ailego::Realtime::MicroSeconds();\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto t2 = ailego::Realtime::MicroSeconds();\n  total_time += t2 - t1;\n\n  std::cout << \"total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n  ASSERT_EQ(group_result.size(), 10);\n\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    // const std::string &group_id = group_result[i].group_id();\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    // std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n  }\n}\n\nTEST_F(HnswSearcherTest, TestGroupInBruteforceSearch) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"HnswBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = _dir + \"/TestGroupInBruteforceSearch\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.searcher.ef\", 50);\n  searcherParams.set(\"proxima.hnsw.searcher.max_scan_ratio\", 0.8);\n  searcherParams.set(\"proxima.hnsw.searcher.brute_force_threshold\",\n                     2 * doc_cnt);\n\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 5;\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = doc_cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = float(query_value) / 10 + 0.1f;\n  }\n\n  auto t1 = ailego::Realtime::MicroSeconds();\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto t2 = ailego::Realtime::MicroSeconds();\n  total_time += t2 - t1;\n\n  std::cout << \"total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n  ASSERT_EQ(group_result.size(), 5);\n\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    // const std::string &group_id = group_result[i].group_id();\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n    // std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n  }\n}\n\nTEST_F(HnswSearcherTest, TestBinaryConverter) {\n  uint32_t dimension = 256;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  // params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  // params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  // params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  // params.set(PARAM_HNSW_STREAMER_EF, 5);\n  // params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dimension);\n  index_meta_raw.set_metric(\"InnerProduct\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"BinaryConverter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(_dir + \"/TestBinaryConverter.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-2.0, 2.0);\n  std::vector<NumericalVector<float>> vecs;\n\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dimension);\n    for (size_t j = 0; j < dimension; ++j) {\n      vec[j] = dist(gen);\n    }\n\n    std::string new_vec;\n    IndexQueryMeta new_meta;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n\n    vecs.push_back(vec);\n  }\n\n  auto path = _dir + \"/TestBinaryConverter\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n\n  ailego::Params searcherParams;\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  size_t query_cnt = 200U;\n  auto knnCtx = searcher->create_context();\n\n  float epison = 1e-6;\n  for (size_t i = 0; i < query_cnt; i++) {\n    auto &vec = vecs[i];\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    size_t topk = 50;\n    knnCtx->set_topk(topk);\n    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));\n    auto &results = knnCtx->result();\n    ASSERT_EQ(topk, results.size());\n    ASSERT_EQ(i, results[0].key());\n    ASSERT_NEAR(0, results[0].score(), epison);\n  }\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc",
    "content": "#include <future>\n#include <string>\n#include <vector>\n#include <ailego/utility/math_helper.h>\n#include <ailego/utility/memory_helper.h>\n#include <algorithm/hnsw/hnsw_params.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/core/framework/index_framework.h>\n#include <zvec/core/framework/index_streamer.h>\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nconstexpr size_t static dim = 16;\n\nclass HnswStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void hybrid_scale(std::vector<float> &dense_value,\n                    std::vector<float> &sparse_value, float alpha_scale);\n\n  static std::string dir_;\n  static std::shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string HnswStreamerTest::dir_(\"streamer_test/\");\nstd::shared_ptr<IndexMeta> HnswStreamerTest::index_meta_ptr_;\n\nvoid HnswStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (std::nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid HnswStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswStreamerTest, TestHnswSearch) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  Params params;\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/HnswSearch\", true));\n  ASSERT_EQ(0, write_streamer->open(storage));\n\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 10000UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"BufferStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/HnswSearch\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  size_t topk = 3;\n  auto provider = read_streamer->create_provider();\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  ctx->set_topk(100U);\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 10.1f;\n  }\n  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  ASSERT_EQ(10, result[0].key());\n  ASSERT_EQ(11, result[1].key());\n  ASSERT_EQ(5, result[10].key());\n  ASSERT_EQ(0, result[20].key());\n  ASSERT_EQ(30, result[30].key());\n  ASSERT_EQ(35, result[35].key());\n  ASSERT_EQ(99, result[99].key());\n\n  ElapsedTime elapsed_time;\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  read_streamer->close();\n  read_streamer.reset();\n  cout << \"Elapsed time: \" << elapsed_time.milli_seconds() << \" ms\" << endl;\n}\n\nTEST_F(HnswStreamerTest, TestHnswSearchMMap) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  Params params;\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/HnswSearchMMap\", true));\n  ASSERT_EQ(0, write_streamer->open(storage));\n\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 10000UL;\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    write_streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  ElapsedTime elapsed_time;\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/HnswSearchMMap\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n  size_t topk = 3;\n  auto provider = read_streamer->create_provider();\n  for (size_t i = 0; i < cnt; i += 1) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(topk, result1.size());\n    IndexStorage::MemoryBlock block;\n    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));\n    const float *data = (float *)block.data();\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(data[j], i);\n    }\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  ctx->set_topk(100U);\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 10.1f;\n  }\n  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  ASSERT_EQ(10, result[0].key());\n  ASSERT_EQ(11, result[1].key());\n  ASSERT_EQ(5, result[10].key());\n  ASSERT_EQ(0, result[20].key());\n  ASSERT_EQ(30, result[30].key());\n  ASSERT_EQ(35, result[35].key());\n  ASSERT_EQ(99, result[99].key());\n\n  read_streamer->close();\n  read_streamer.reset();\n  cout << \"Elapsed time: \" << elapsed_time.milli_seconds() << \" ms\" << endl;\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw/hnsw_streamer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_streamer.h\"\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <future>\n#include <iostream>\n#include <memory>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace std;\nusing namespace testing;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static dim = 16;\n\nclass HnswStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string dir_;\n  static shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string HnswStreamerTest::dir_(\"streamer_test/\");\nshared_ptr<IndexMeta> HnswStreamerTest::index_meta_ptr_;\n\nvoid HnswStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid HnswStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswStreamerTest, TestAddVector) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw.streamer.scaling_factor\", 5U);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/AddVector\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < 1000UL; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\n// TODO: context cannot shared by different searcher\nTEST_F(HnswStreamerTest, TestLinearSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw.streamer.scaling_factor\", 5U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  NumericalVector<float> vec(dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  size_t topk = 3;\n  for (size_t i = 0; i < cnt; i += 1) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(1U);\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  ctx->set_topk(100U);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 10.1f;\n  }\n  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  ASSERT_EQ(10, result[0].key());\n  ASSERT_EQ(11, result[1].key());\n  ASSERT_EQ(5, result[10].key());\n  ASSERT_EQ(0, result[20].key());\n  ASSERT_EQ(30, result[30].key());\n  ASSERT_EQ(35, result[35].key());\n  ASSERT_EQ(99, result[99].key());\n}\n\n// TODO: context cannot shared by different searcher\n\nTEST_F(HnswStreamerTest, TestLinearSearchByKeys) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw.streamer.scaling_factor\", 5U);\n  params.set(\"proxima.hnsw.streamer.get_vector_enable\", true);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearchByKeys.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  NumericalVector<float> vec(dim);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0].resize(cnt);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    p_keys[0][i] = i;\n  }\n\n  size_t topk = 3;\n  for (size_t i = 0; i < cnt; i += 1) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ctx->set_topk(1U);\n    ASSERT_EQ(\n        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(i, result1[0].key());\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    ctx->set_topk(topk);\n    ASSERT_EQ(\n        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());\n    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = 10.1f;\n    }\n    ASSERT_EQ(\n        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(10, result[0].key());\n    ASSERT_EQ(11, result[1].key());\n    ASSERT_EQ(5, result[10].key());\n    ASSERT_EQ(0, result[20].key());\n    ASSERT_EQ(30, result[30].key());\n    ASSERT_EQ(35, result[35].key());\n    ASSERT_EQ(99, result[99].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = 10.1f;\n    }\n    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};\n    ASSERT_EQ(\n        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(3U, result.size());\n    ASSERT_EQ(10, result[0].key());\n    ASSERT_EQ(15, result[1].key());\n    ASSERT_EQ(1, result[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = 9.1f;\n    }\n    p_keys[0].clear();\n    for (size_t j = 0; j < cnt; j += 10) {\n      p_keys[0].push_back((uint64_t)j);\n    }\n    ASSERT_EQ(\n        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(10, result[0].key());\n    ASSERT_EQ(0, result[1].key());\n    ASSERT_EQ(100, result[10].key());\n    ASSERT_EQ(200, result[20].key());\n    ASSERT_EQ(300, result[30].key());\n    ASSERT_EQ(350, result[35].key());\n    ASSERT_EQ(990, result[99].key());\n  }\n}\n\nTEST_F(HnswStreamerTest, TestKnnSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestKnnSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 5000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswStreamerTest, TestAddAndSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestAddAndSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 20000U;\n  auto ctx = streamer->create_context();\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  // streamer->print_debug_info();\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  for (size_t i = 0; i < cnt; i += 100) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswStreamerTest, TestKnnSearchRandomData) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  constexpr size_t static dim = 128;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 128);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 20);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 200);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_EF, 10);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestKnnSearchRandomData\", true));\n  ASSERT_EQ(0, streamer->init(meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t cnt = 1500;\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);\n    }\n    streamer->add_impl(i + cnt, vec.data(), qmeta, ctx);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  size_t topk = 100;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  cnt = 500;\n  for (size_t i = 0; i < cnt; i += 1) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t3 - t2;\n    linearTotalTime += t2 - t1;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n\n    topk1Hits += linearResult[0].key() == knnResult[0].key();\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 1.0f / cnt;\n  // float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.50f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 5.0f);\n}\n\nTEST_F(HnswStreamerTest, TestOpenClose) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  constexpr size_t static dim = 2048;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  auto storage1 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  auto storage2 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage1);\n  ASSERT_NE(nullptr, storage2);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage1->init(stg_params));\n  ASSERT_EQ(0, storage1->open(dir_ + \"TessOpenAndClose1\", true));\n  ASSERT_EQ(0, storage2->init(stg_params));\n  ASSERT_EQ(0, storage2->open(dir_ + \"TessOpenAndClose2\", true));\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto checkIter = [](size_t base, size_t total,\n                      IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = base;\n    size_t cnt = 0;\n    while (iter->is_valid()) {\n      float *data = (float *)iter->data();\n      ASSERT_EQ(cur, iter->key());\n      for (size_t d = 0; d < dim; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, data[d]);\n      }\n      iter->next();\n      cur += 2;\n      cnt++;\n    }\n    ASSERT_EQ(cnt, total);\n  };\n\n  size_t testCnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < testCnt; i += 2) {\n    float v1 = (float)i;\n    ASSERT_EQ(0, streamer->open(storage1));\n    auto ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n    std::vector<float> vec1(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec1[d] = v1;\n    }\n    ASSERT_EQ(0, streamer->add_impl(i, vec1.data(), qmeta, ctx));\n    checkIter(0, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n\n    float v2 = (float)(i + 1);\n    std::vector<float> vec2(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec2[d] = v2;\n    }\n    ASSERT_EQ(0, streamer->open(storage2));\n    ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n    ASSERT_EQ(0, streamer->add_impl(i + 1, vec2.data(), qmeta, ctx));\n    checkIter(1, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n  }\n\n  IndexStreamer::Pointer streamer1 =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer1->init(meta, params));\n  ASSERT_EQ(0, streamer1->open(storage1));\n\n  IndexStreamer::Pointer streamer2 =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer2->init(meta, params));\n  ASSERT_EQ(0, streamer2->open(storage2));\n\n  checkIter(0, testCnt / 2, streamer1);\n  checkIter(1, testCnt / 2, streamer2);\n}\n\nTEST_F(HnswStreamerTest, TestCreateIterator) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestCreateIterator\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = 0;\n    while (iter->is_valid()) {\n      float *data = (float *)iter->data();\n      ASSERT_EQ(cur, iter->key());\n      for (size_t d = 0; d < dim; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, data[d]);\n      }\n      iter->next();\n      cur++;\n    }\n    ASSERT_EQ(cur, total);\n  };\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    checkIter(i + 1, streamer);\n  }\n\n  streamer->flush(0UL);\n  streamer->close();\n  ASSERT_EQ(0, streamer->open(storage));\n  checkIter(cnt, streamer);\n\n  // check getVector\n  auto provider = streamer->create_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    const float *data = (const float *)provider->get_vector(i);\n    ASSERT_NE(data, nullptr);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(i, data[j]);\n    }\n  }\n}\n\nTEST_F(HnswStreamerTest, TestNoInit) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  streamer->cleanup();\n}\n\nTEST_F(HnswStreamerTest, TestForceFlush) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  stg_params.set(\"proxima.mmap_file.storage.copy_on_write\", true);\n  stg_params.set(\"proxima.mmap_file.storage.force_flush\", true);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = 0;\n    while (iter->is_valid()) {\n      float *data = (float *)iter->data();\n      ASSERT_EQ(cur, iter->key());\n      for (size_t d = 0; d < dim; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, data[d]);\n      }\n      iter->next();\n      cur++;\n    }\n    ASSERT_EQ(cur, total);\n  };\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    checkIter(i + 1, streamer);\n  }\n\n  streamer->flush(0UL);\n  streamer->close();\n  storage->close();\n\n  storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n  checkIter(cnt, streamer);\n\n  // check getVector\n  auto provider = streamer->create_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    const float *data = (const float *)provider->get_vector(i);\n    ASSERT_NE(data, nullptr);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(i, data[j]);\n    }\n  }\n}\n\nTEST_F(HnswStreamerTest, TestKnnMultiThread) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  constexpr size_t static dim = 32;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 128);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 10);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 64);\n  params.set(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_EF, 32);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnMultiThread\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    NumericalVector<float> vec(dim);\n    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n    size_t succAdd = 0;\n    auto ctx = streamer->create_context();\n    for (size_t i = 0; i < addCnt; i++) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = (float)i + baseKey;\n      }\n      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto t1 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000U, t1.get());\n  ASSERT_EQ(1000U, t2.get());\n  ASSERT_EQ(1000U, t3.get());\n  streamer->close();\n\n  // checking data\n  ASSERT_EQ(0, streamer->open(storage));\n  auto provider = streamer->create_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n  while (iter->is_valid()) {\n    float *data = (float *)iter->data();\n    for (size_t d = 0; d < dim; ++d) {\n      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    iter->next();\n  }\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n\n  // ====== multi thread search\n  size_t topk = 100;\n  size_t cnt = 3000;\n  auto knnSearch = [&]() {\n    NumericalVector<float> vec(dim);\n    auto linearCtx = streamer->create_context();\n    auto linearByPkeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n    linearCtx->set_topk(topk);\n    linearByPkeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    for (size_t i = 0; i < cnt; i += 1) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = i + 0.1f;\n      }\n      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};\n      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                      linearByPkeysCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      ASSERT_EQ(i, r2[0].key());\n      auto &r3 = linearByPkeysCtx->result();\n      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    // printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);\n  };\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  auto s3 = std::async(std::launch::async, knnSearch);\n  s1.wait();\n  s2.wait();\n  s3.wait();\n}\n\nTEST_F(HnswStreamerTest, TestKnnConcurrentAddAndSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  constexpr size_t static dim = 32;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 128);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 10);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 64);\n  params.set(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_CHUNK_SIZE, 4096);\n  params.set(PARAM_HNSW_STREAMER_EF, 32);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnConcurrentAddAndSearch\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    NumericalVector<float> vec(dim);\n    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n    auto ctx = streamer->create_context();\n    size_t succAdd = 0;\n    for (size_t i = 0; i < addCnt; i++) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = (float)i + baseKey;\n      }\n      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n\n  // ====== multi thread search\n  auto knnSearch = [&]() {\n    size_t topk = 100;\n    size_t cnt = 3000;\n    NumericalVector<float> vec(dim);\n    auto linearCtx = streamer->create_context();\n    auto linearByPKeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    linearCtx->set_topk(topk);\n    linearByPKeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n    for (size_t i = 0; i < cnt; i += 1) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = i + 0.1f;\n      }\n      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};\n      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                      linearByPKeysCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      auto &r3 = linearByPKeysCtx->result();\n      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());\n// ASSERT_EQ(i, r2[0].key);\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    //        printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);\n  };\n  auto t0 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000, t0.get());\n  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  ASSERT_EQ(1000, t1.get());\n  ASSERT_EQ(1000, t2.get());\n  s1.wait();\n  s2.wait();\n\n  // checking data\n  auto provider = streamer->create_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n  while (iter->is_valid()) {\n    float *data = (float *)iter->data();\n    for (size_t d = 0; d < dim; ++d) {\n      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    iter->next();\n  }\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n}\n\nTEST_F(HnswStreamerTest, TestBfThreshold) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_EF, 16);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessBfThreshold\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 100000;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  ctx->set_topk(1U);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  streamer->flush(0UL);\n  streamer->close();\n\n  IndexStreamer::Pointer streamer1 =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer1, nullptr);\n  auto params1 = params;\n  params1.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, cnt - 1);\n  ASSERT_EQ(0, streamer1->init(*index_meta_ptr_, params1));\n  ASSERT_EQ(0, streamer1->open(storage));\n  auto ctx1 = streamer1->create_context();\n\n  IndexStreamer::Pointer streamer2 =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_NE(streamer2, nullptr);\n  auto params2 = params;\n  params2.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, cnt);\n  ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params2));\n  ASSERT_EQ(0, streamer2->open(storage));\n  auto ctx2 = streamer2->create_context();\n\n  // do searcher\n  size_t cost1 = 0;\n  size_t cost2 = 0;\n  for (size_t i = 0; i < 100; ++i) {\n    auto t1 = ailego::Monotime::MicroSeconds();\n    ASSERT_EQ(0, streamer1->search_impl(vec.data(), qmeta, ctx1));\n    auto t2 = ailego::Monotime::MicroSeconds();\n    ASSERT_EQ(0, streamer2->search_impl(vec.data(), qmeta, ctx2));\n    auto t3 = ailego::Monotime::MicroSeconds();\n    cost1 += t2 - t1;\n    cost2 += t3 - t2;\n  }\n\n  ASSERT_LT(cost1, cost2);\n\n  ailego::Params update_params;\n  update_params.set(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);\n  update_params.set(PARAM_HNSW_STREAMER_EF, 50);\n  ctx1->set_debug_mode(true);\n  ctx1->update(update_params);\n  ASSERT_EQ(0, streamer1->search_impl(vec.data(), qmeta, ctx1));\n  LOG_DEBUG(\"%s\", ctx1->debug_string().c_str());\n}\n\nTEST_F(HnswStreamerTest, TestFilter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 1000);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessFilter\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  ctx->set_topk(10U);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    p_keys[0].push_back(i);\n  }\n\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 100.1;\n  }\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(10, results.size());\n  ASSERT_EQ(100, results[0].key());\n  ASSERT_EQ(101, results[1].key());\n  ASSERT_EQ(99, results[2].key());\n\n  auto filterFunc = [](uint64_t key) {\n    if (key == 100UL || key == 101UL) {\n      return true;\n    }\n    return false;\n  };\n  ctx->set_filter(filterFunc);\n\n  // after set filter\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto &results1 = ctx->result();\n  ASSERT_EQ(10, results1.size());\n  ASSERT_EQ(99, results1[0].key());\n  ASSERT_EQ(102, results1[1].key());\n  ASSERT_EQ(98, results1[2].key());\n\n  // linear\n  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));\n  auto &results2 = ctx->result();\n  ASSERT_EQ(10, results2.size());\n  ASSERT_EQ(99, results2[0].key());\n  ASSERT_EQ(102, results2[1].key());\n  ASSERT_EQ(98, results2[2].key());\n\n  // linear by p_keys\n  ASSERT_EQ(0,\n            streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));\n  auto &results3 = ctx->result();\n  ASSERT_EQ(10, results3.size());\n  ASSERT_EQ(99, results3[0].key());\n  ASSERT_EQ(102, results3[1].key());\n  ASSERT_EQ(98, results3[2].key());\n}\n\nTEST_F(HnswStreamerTest, TestMaxIndexSize) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  constexpr size_t static dim = 128;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessMaxIndexSize\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t vsz0 = 0;\n  size_t rss0 = 0;\n  if (!ailego::MemoryHelper::SelfUsage(&vsz0, &rss0)) {\n    // do not check if get mem usage failed\n    return;\n  }\n  if (vsz0 > 1024 * 1024 * 1024 * 1024UL) {\n    // asan mode\n    return;\n  }\n\n  NumericalVector<float> vec(dim);\n  size_t writeCnt1 = 10000;\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto ctx = streamer->create_context();\n  for (size_t i = 0; i < writeCnt1; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n  size_t vsz1 = 0;\n  size_t rss1 = 0;\n  ailego::MemoryHelper::SelfUsage(&vsz1, &rss1);\n  size_t increment1 = rss1 - rss0;\n  ASSERT_GT(writeCnt1 * 128 * 4 + writeCnt1 * 100 * 4, increment1 * 0.8f);\n  ASSERT_LT(writeCnt1 * 128 * 4 + writeCnt1 * 100 * 4, increment1 * 1.2f);\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nTEST_F(HnswStreamerTest, TestKnnCleanUp) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage1 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage1);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage1->init(stg_params));\n  ASSERT_EQ(0, storage1->open(dir_ + \"TessKnnCluenUp1\", true));\n  ailego::Params params;\n  constexpr size_t static dim1 = 32;\n  IndexMeta meta1(IndexMeta::DataType::DT_FP32, dim1);\n  meta1.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  NumericalVector<float> vec1(dim1);\n  ASSERT_EQ(0, streamer->init(meta1, params));\n  ASSERT_EQ(0, streamer->open(storage1));\n  IndexQueryMeta qmeta1(IndexMeta::DataType::DT_FP32, dim1);\n  auto ctx1 = streamer->create_context();\n  ASSERT_EQ(0, streamer->add_impl(1, vec1.data(), qmeta1, ctx1));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n\n  auto storage2 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage2);\n  ASSERT_EQ(0, storage2->init(stg_params));\n  ASSERT_EQ(0, storage2->open(dir_ + \"TessKnnCluenUp2\", true));\n  constexpr size_t static dim2 = 64;\n  IndexMeta meta2(IndexMeta::DataType::DT_FP32, dim2);\n  meta2.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  NumericalVector<float> vec2(dim2);\n  ASSERT_EQ(0, streamer->init(meta2, params));\n  ASSERT_EQ(0, streamer->open(storage2));\n  IndexQueryMeta qmeta2(IndexMeta::DataType::DT_FP32, dim2);\n  auto ctx2 = streamer->create_context();\n  ASSERT_EQ(0, streamer->add_impl(2, vec2.data(), qmeta2, ctx2));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n}\n\nTEST_F(HnswStreamerTest, TestIndexSizeQuota) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestIndexSizeQuota\", true));\n  ailego::Params params;\n  constexpr size_t static dim = 512;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n  params.set(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, 2 * 1024 * 1024U);\n  params.set(PARAM_HNSW_STREAMER_CHUNK_SIZE, 100 * 1024U);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  NumericalVector<float> vec(dim);\n  size_t writeCnt1 = 850;\n  int ret = 0;\n  auto ctx = streamer->create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < writeCnt1; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    int iRet = streamer->add_impl(i, vec.data(), qmeta, ctx);\n    if (iRet != 0) {\n      ret = iRet;\n    }\n  }\n  ASSERT_EQ(IndexError_IndexFull, ret);\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n}\n\nTEST_F(HnswStreamerTest, TestBloomFilter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestBloomFilter\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  ctx->set_topk(10U);\n  size_t cnt = 5000;\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n    if ((i + 1) % 10 == 0) {\n      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n      auto &results = ctx->result();\n      ASSERT_EQ(10, results.size());\n    }\n  }\n}\n\nTEST_F(HnswStreamerTest, TestStreamerParams) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestStreamerParams\", true));\n  ailego::Params params;\n  params.set(\"proxima.hnsw.streamer.docs_hard_limit\", 5);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto ctx = streamer->create_context();\n  ASSERT_EQ(0, streamer->add_impl(1, vec.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(2, vec.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(3, vec.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(4, vec.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(5, vec.data(), qmeta, ctx));\n  ASSERT_EQ(IndexError_IndexFull,\n            streamer->add_impl(6, vec.data(), qmeta, ctx));\n}\n\n#if 0\nTEST_F(HnswStreamerTest, TestCheckCrc)\n{\n    IndexStreamer::Pointer streamer =\n        IndexFactory::CreateStreamer(\"HnswStreamer\");\n    ASSERT_TRUE(streamer != nullptr);\n\n    auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n    ASSERT_NE(nullptr, storage);\n    ailego::Params stg_params;\n    ASSERT_EQ(0, storage->init(stg_params));\n    std::string path = dir_ + \"TestCheckCrc\";\n    ASSERT_EQ(0, storage->open(path, true));\n    ailego::Params params;\n    params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n    params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n    params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n    params.set(PARAM_HNSW_STREAMER_EF, 100);\n    params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n    params.set(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);\n    ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n    ASSERT_EQ(0, streamer->open(storage));\n\n    NumericalVector<float> vec(dim);\n    auto ctx = streamer->create_context();\n    ASSERT_NE(nullptr, ctx);\n    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n    size_t cnt = 100;\n    for (size_t i = 0; i < cnt; i++) {\n        for (size_t j = 0; j < dim; ++j) {\n            vec[j] = i;\n        }\n        streamer->add_impl(i, vec.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    streamer->close();\n    storage->flush();\n    storage->close();\n\n    int fd = open(path.c_str(), O_RDWR);\n    ASSERT_GT(fd, 0);\n    struct stat fs;\n    ASSERT_EQ(0, fstat(fd, &fs));\n    char buf[1024];\n    pwrite(fd, buf, sizeof(buf), fs.st_size/2);\n\n    ASSERT_EQ(0, storage->open(path, true));\n    IndexStreamer::Pointer streamer2 =\n        IndexFactory::CreateStreamer(\"HnswStreamer\");\n    ASSERT_NE(streamer2, nullptr);\n\n    ailego::Params params2;\n    params2.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n    params2.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n    params2.set(\"proxima.hnsw.streamer.check_crc_enable\", true);\n    ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params2));\n    ASSERT_EQ(0, streamer2->open(storage));\n}\n#endif\n\nTEST_F(HnswStreamerTest, TestCheckStats) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  std::string path = dir_ + \"/TestCheckStats.index\";\n  ASSERT_EQ(0, storage->open(path, true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 100);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);\n  params.set(PARAM_HNSW_STREAMER_CHUNK_SIZE, 512 * 1024U);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto &stats = streamer->stats();\n  ASSERT_EQ(0U, stats.revision_id());\n  ASSERT_EQ(0U, stats.loaded_count());\n  ASSERT_EQ(0U, stats.added_count());\n  ASSERT_EQ(0U, stats.discarded_count());\n  ASSERT_EQ(0u, stats.index_size() % ailego::MemoryHelper::PageSize());\n  ASSERT_EQ(0U, stats.dumped_size());\n  ASSERT_EQ(0U, stats.check_point());\n  auto createTime = stats.create_time();\n  auto updateTime = stats.update_time();\n  ASSERT_GT(createTime, 0UL);\n  ASSERT_EQ(createTime, updateTime);\n\n  NumericalVector<float> vec(dim);\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t cnt = 3000;\n  size_t size1 = stats.index_size();\n  size_t size2 = 0;\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));\n    ASSERT_EQ(i + 1, stats.added_count());\n    if (i == 0UL) {\n      size2 = stats.index_size();\n    }\n  }\n  size_t size3 = stats.index_size();\n  ASSERT_GT(size2, size1);\n  ASSERT_GT(size3, size2);\n  LOG_INFO(\"size1=%zu size2=%zu size3=%zu\", size1, size2, size3);\n\n  uint64_t checkPoint = 23423UL;\n  streamer->flush(checkPoint);\n  size_t size4 = stats.index_size();\n  ASSERT_EQ(size3, size4);\n  auto stats1 = streamer->stats();\n  ASSERT_EQ(1U, stats1.revision_id());\n  ASSERT_EQ(0U, stats1.loaded_count());\n  ASSERT_EQ(cnt, stats1.added_count());\n  ASSERT_EQ(0U, stats1.discarded_count());\n  ASSERT_GT(stats1.index_size(), 0U);\n  ASSERT_EQ(0U, stats1.dumped_size());\n  ASSERT_EQ(checkPoint, stats1.check_point());\n  auto createTime1 = stats1.create_time();\n  auto updateTime1 = stats1.update_time();\n  ASSERT_GE(updateTime1, createTime1);\n  ASSERT_EQ(createTime, createTime1);\n  streamer->close();\n\n  ASSERT_EQ(0, streamer->open(storage));\n  auto &stats2 = streamer->stats();\n  ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  ASSERT_EQ(0, streamer->add_impl(10000UL, vec.data(), qmeta, ctx));\n  ASSERT_EQ(2U, stats2.revision_id());\n  ASSERT_EQ(cnt, stats2.loaded_count());\n  ASSERT_EQ(1U, stats2.added_count());\n  ASSERT_EQ(0U, stats2.discarded_count());\n  ASSERT_GT(stats1.index_size(), 0);\n  ASSERT_EQ(0U, stats2.dumped_size());\n  ASSERT_EQ(checkPoint, stats2.check_point());\n  auto createTime2 = stats2.create_time();\n  auto updateTime2 = stats2.update_time();\n  ASSERT_EQ(createTime2, createTime1);\n  ASSERT_GE(updateTime2, updateTime1);\n\n  sleep(1);\n  streamer->flush(checkPoint + 1);\n  ASSERT_NE(0, streamer->add_impl(0U, vec.data(), qmeta, ctx));\n  auto &stats3 = streamer->stats();\n  ASSERT_EQ(2U, stats3.revision_id());\n  ASSERT_EQ(cnt, stats3.loaded_count());\n  ASSERT_EQ(1U, stats3.added_count());\n  ASSERT_EQ(1U, stats3.discarded_count());\n  ASSERT_EQ(stats2.index_size(), stats3.index_size());\n  ASSERT_EQ(0U, stats3.dumped_size());\n  ASSERT_EQ(checkPoint + 1, stats3.check_point());\n  auto createTime3 = stats3.create_time();\n  auto updateTime3 = stats3.update_time();\n  ASSERT_EQ(createTime3, createTime1);\n  ASSERT_GT(updateTime3, updateTime2);\n\n  auto dpath = dir_ + \"/dumpIndex\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(dpath));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  size_t doc_cnt = stats3.loaded_count() + stats3.added_count();\n  struct stat st;\n  ASSERT_EQ(3001UL, doc_cnt);\n  ASSERT_EQ(0, stat(dpath.c_str(), &st));\n  ASSERT_LT(st.st_size - stats3.dumped_size(), 8192);\n\n  streamer->close();\n}\n\nTEST_F(HnswStreamerTest, TestCheckDuplicateAndGetVector) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestCheckDuplicateAndGetVec\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < 1000; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));\n  }\n  for (size_t i = 0; i < 1000; i += 10) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_EQ(IndexError_Duplicate,\n              streamer->add_impl(i, vec.data(), qmeta, ctx));\n  }\n  auto provider = streamer->create_provider();\n  for (size_t i = 0; i < 1000; i++) {\n    const float *data = (const float *)provider->get_vector(i);\n    ASSERT_NE(data, nullptr);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(i, data[j]);\n    }\n  }\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nclass TestDumper : public IndexDumper {\n  virtual int init(const ailego::Params &) {\n    return 0;\n  }\n  virtual int cleanup(void) {\n    return 0;\n  }\n  virtual int create(const std::string &path) {\n    return 0;\n  }\n  virtual uint32_t magic(void) const {\n    return 0;\n  }\n  virtual int close(void) {\n    return 0;\n  }\n  virtual int append(const std::string &id, size_t data_size,\n                     size_t padding_size, uint32_t crc) {\n    usleep(100000);\n    return 0;\n  }\n  virtual size_t write(const void *data, size_t len) {\n    return len;\n  }\n};\n\nTEST_F(HnswStreamerTest, TestDumpIndexAndAdd) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestDumpIndexAndAdd\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  auto ctx = streamer->create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  ASSERT_NE(nullptr, ctx);\n  int code = 0;\n  std::mutex mutex;\n  auto addVector = [&](int a, int b) {\n    int success = 0;\n    mutex.unlock();\n    for (int i = a; i < b; i++) {\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = i;\n      }\n      int ret = streamer->add_impl(i, vec.data(), qmeta, ctx);\n      if (ret != 0) {\n        code = ret;\n        ASSERT_EQ(IndexError_Unsupported, code);\n        i = i - 1;  // retry\n        usleep(10000);\n      } else {\n        success++;\n      }\n    }\n    std::cout << \"addVector: \" << success << \" success\" << std::endl;\n  };\n  mutex.lock();\n  addVector(0, 2000);\n  mutex.lock();\n  auto t2 = std::async(std::launch::async, addVector, 2000, 3000);\n  auto path1 = dir_ + \"/dumpIndex1\";\n  auto dumper1 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper1, nullptr);\n  ASSERT_EQ(0, dumper1->create(path1));\n  mutex.lock();  // sync: wait addVector start and release lock\n  auto test_dumper = std::make_shared<TestDumper>();\n  ASSERT_EQ(0, streamer->dump(test_dumper));\n  mutex.unlock();\n  ASSERT_EQ(0, streamer->dump(dumper1));\n  ASSERT_EQ(0, dumper1->close());\n  t2.get();\n  streamer->close();\n  ASSERT_EQ(IndexError_Unsupported, code);\n\n  // check dump index\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->init(ailego::Params()));\n  ASSERT_EQ(0, container->open(path1, false));\n  ASSERT_NE(searcher, nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n  auto iter = searcher->create_provider()->create_iterator();\n  size_t docs = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const float *d = reinterpret_cast<const float *>(iter->data());\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    docs++;\n    iter->next();\n  }\n  ASSERT_GE(docs, 2000U);\n\n  // check streamer\n  ASSERT_EQ(0, streamer->open(storage));\n  iter = streamer->create_provider()->create_iterator();\n  docs = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const float *d = reinterpret_cast<const float *>(iter->data());\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    docs++;\n    iter->next();\n  }\n  ASSERT_EQ(docs, 3000U);\n}\n\n\nTEST_F(HnswStreamerTest, TestProvider) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestGetVector\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n\n  //! prepare data\n  size_t docs = 10000UL;\n  srand(ailego::Realtime::MilliSeconds());\n  std::vector<key_t> keys(docs);\n  bool rand_key = rand() % 2;\n  bool rand_order = rand() % 2;\n  size_t step = rand() % 2 + 1;\n  LOG_DEBUG(\"randKey=%u randOrder=%u step=%zu\", rand_key, rand_order, step);\n  if (rand_key) {\n    std::mt19937 mt;\n    std::uniform_int_distribution<size_t> dt(\n        0, std::numeric_limits<size_t>::max());\n    for (size_t i = 0; i < docs; ++i) {\n      keys[i] = dt(mt);\n    }\n  } else {\n    std::iota(keys.begin(), keys.end(), 0U);\n    std::transform(keys.begin(), keys.end(), keys.begin(),\n                   [&](key_t k) { return step * k; });\n    if (rand_order) {\n      uint32_t seed = ailego::Realtime::Seconds();\n      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));\n    }\n  }\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < keys.size(); i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = keys[i];\n    }\n    streamer->add_impl(keys[i], vec.data(), qmeta, ctx);\n  }\n\n  auto path1 = dir_ + \"/TestGetVector1\";\n  auto dumper1 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper1, nullptr);\n  ASSERT_EQ(0, dumper1->create(path1));\n  ASSERT_EQ(0, streamer->dump(dumper1));\n  ASSERT_EQ(0, dumper1->close());\n  streamer->close();\n\n  // check dump index\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSearcher\");\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->init(ailego::Params()));\n  ASSERT_EQ(0, container->open(path1, false));\n  ASSERT_NE(searcher, nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n  auto iter = searcher->create_provider()->create_iterator();\n  size_t cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const float *d = reinterpret_cast<const float *>(iter->data());\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, docs);\n\n  // check streamer\n  ASSERT_EQ(0, streamer->open(storage));\n  iter = streamer->create_provider()->create_iterator();\n  cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const float *d = reinterpret_cast<const float *>(iter->data());\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, docs);\n\n\n  auto searcher_provider = searcher->create_provider();\n  auto streamer_provider = streamer->create_provider();\n  for (size_t i = 0; i < keys.size(); ++i) {\n    const float *d1 =\n        reinterpret_cast<const float *>(searcher_provider->get_vector(keys[i]));\n    ASSERT_TRUE(d1);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d1[j], keys[i]);\n    }\n\n    const float *d2 =\n        reinterpret_cast<const float *>(streamer_provider->get_vector(keys[i]));\n    ASSERT_TRUE(d2);\n    for (size_t j = 0; j < dim; ++j) {\n      ASSERT_FLOAT_EQ(d2[j], keys[i]);\n    }\n  }\n\n  ASSERT_EQ(dim, streamer_provider->dimension());\n  ASSERT_EQ(index_meta_ptr_->element_size(), streamer_provider->element_size());\n  ASSERT_EQ(index_meta_ptr_->data_type(), streamer_provider->data_type());\n}\n\nTEST_F(HnswStreamerTest, TestSharedContext) {\n  auto create_streamer = [](std::string path) {\n    IndexStreamer::Pointer streamer =\n        IndexFactory::CreateStreamer(\"HnswStreamer\");\n    auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n    ailego::Params stg_params;\n    storage->init(stg_params);\n    storage->open(path, true);\n    ailego::Params params;\n    streamer->init(*index_meta_ptr_, params);\n    streamer->open(storage);\n    return streamer;\n  };\n  auto streamer1 = create_streamer(dir_ + \"TestSharedContext.index1\");\n  auto streamer2 = create_streamer(dir_ + \"TestSharedContext.index2\");\n  auto streamer3 = create_streamer(dir_ + \"TestSharedContext.index3\");\n\n  srand(ailego::Realtime::MilliSeconds());\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto do_test = [&](int start) {\n    auto code = rand() % 3;\n    IndexStreamer::Context::Pointer ctx;\n    switch (code) {\n      case 0:\n        ctx = streamer1->create_context();\n        break;\n      case 1:\n        ctx = streamer2->create_context();\n        break;\n      case 2:\n        ctx = streamer3->create_context();\n        break;\n    };\n    ctx->set_topk(1);\n    uint64_t key1 = start + 0;\n    uint64_t key2 = start + 1;\n    uint64_t key3 = start + 2;\n    NumericalVector<float> query(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      query[j] = 0.1f;\n    }\n    for (int i = 0; i < 1000; ++i) {\n      NumericalVector<float> vec(dim);\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = rand();\n      }\n      int ret = 0;\n      auto code = rand() % 3;\n      switch (code) {\n        case 0:\n          streamer1->add_impl(key1, vec.data(), qmeta, ctx);\n          key1 += 3;\n          ret = streamer1->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 1:\n          streamer2->add_impl(key2, vec.data(), qmeta, ctx);\n          key2 += 3;\n          streamer2->add_impl(key2, vec.data(), qmeta, ctx);\n          key2 += 3;\n          ret = streamer2->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 2:\n          streamer3->add_impl(key3, vec.data(), qmeta, ctx);\n          key3 += 3;\n          streamer3->add_impl(key3, vec.data(), qmeta, ctx);\n          key3 += 3;\n          streamer3->add_impl(key3, vec.data(), qmeta, ctx);\n          key3 += 3;\n          ret = streamer3->search_impl(query.data(), qmeta, ctx);\n          break;\n      }\n      EXPECT_EQ(0, ret);\n      auto &results = ctx->result();\n      EXPECT_EQ(1, results.size());\n      EXPECT_EQ(code, results[0].key() % 3);\n    }\n  };\n\n  auto t1 = std::async(std::launch::async, do_test, 0);\n  auto t2 = std::async(std::launch::async, do_test, 30000000);\n  t1.wait();\n  t2.wait();\n}\n\nTEST_F(HnswStreamerTest, TestMipsEuclideanMetric) {\n  constexpr size_t static dim = 32;\n  std::srand(ailego::Realtime::MilliSeconds());\n  // int injection_type = rand() % 2;\n  int injection_type = 0;\n\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  ailego::Params params;\n  params.set(\"proxima.mips_euclidean.metric.injection_type\", injection_type);\n  meta.set_metric(\"MipsSquaredEuclidean\", 0, params);\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestMipsSquaredEuclidean\", true));\n  const size_t COUNT = 10000;\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  {\n    IndexStreamer::Pointer streamer =\n        IndexFactory::CreateStreamer(\"HnswStreamer\");\n    ASSERT_TRUE(streamer != nullptr);\n    ASSERT_EQ(0, streamer->init(meta, params));\n    ASSERT_EQ(0, streamer->open(storage));\n    const auto &metric_params = streamer->meta().metric_params();\n    EXPECT_FLOAT_EQ(0.0, metric_params.get_as_float(\n                             \"proxima.mips_euclidean.metric.max_l2_norm\"));\n    auto ctx = streamer->create_context();\n    for (size_t i = COUNT; i < 2 * COUNT; i++) {\n      std::vector<float> vec(dim);\n      for (size_t d = 0; d < dim; ++d) {\n        vec[d] = i;\n      }\n      ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));\n    }\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n  }\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  const auto &metric_params = streamer->meta().metric_params();\n  // NoTrain for LocalizedSpherical (type == 1), so max_l2_norm equals to 0\n  EXPECT_FLOAT_EQ(\n      injection_type == 0 ? 0.0f : 113131.0f,\n      metric_params.get_as_float(\"proxima.mips_euclidean.metric.max_l2_norm\"));\n  auto ctx = streamer->create_context();\n  for (size_t i = 0; i < COUNT; i++) {\n    std::vector<float> vec(dim);\n    for (size_t d = 0; d < dim; ++d) {\n      vec[d] = i;\n    }\n    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));\n  }\n  std::vector<float> vec(dim);\n  for (size_t d = 0; d < dim; ++d) {\n    vec[d] = 1.0;\n  }\n\n  ctx->set_topk(10);\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  const auto &results = ctx->result();\n  EXPECT_EQ(results.size(), 10);\n  EXPECT_NEAR((uint64_t)(2 * COUNT - 1), results[0].key(), 10);\n}\n\nTEST_F(HnswStreamerTest, TestBruteForceSetupInContext) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0,\n            storage->open(dir_ + \"/TestBruteForceSetupInContext.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 5000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  size_t topk = 200;\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  bool set_bf_threshold = false;\n  bool use_update = false;\n\n  for (size_t i = 0; i < cnt; i++) {\n    auto linearCtx = streamer->create_context();\n    auto knnCtx = streamer->create_context();\n\n    ASSERT_TRUE(!!linearCtx);\n    ASSERT_TRUE(!!linearCtx);\n\n    linearCtx->set_topk(topk);\n    knnCtx->set_topk(topk);\n\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      if (use_update) {\n        ailego::Params streamerParamsExtra;\n\n        streamerParamsExtra.set(\"proxima.hnsw.streamer.brute_force_threshold\",\n                                cnt);\n        knnCtx->update(streamerParamsExtra);\n      } else {\n        knnCtx->set_bruteforce_threshold(cnt);\n      }\n\n      use_update = !use_update;\n    }\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n\n    // auto t3 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      linearTotalTime += t2 - t1;\n    } else {\n      knnTotalTime += t2 - t1;\n    }\n\n    set_bf_threshold = !set_bf_threshold;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswStreamerTest, TestKnnSearchCosine) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestKnnSearchCosine.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 4000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n    IndexQueryMeta new_meta;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  size_t query_cnt = 200U;\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 1.0f / query_cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswStreamerTest, TestFetchVector) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);\n  index_meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestFetchVector.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  for (size_t i = 0; i < cnt; i++) {\n    const void *vector = streamer->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    float vector_value = *(float *)(vector);\n    ASSERT_FLOAT_EQ(vector_value, i);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t query_cnt = 200U;\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  for (size_t i = 0; i < query_cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n    float vector_value = *((float *)(knnResult[0].vector()));\n    ASSERT_FLOAT_EQ(vector_value, i);\n  }\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswStreamerTest, TestFetchVectorCosine) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestFetchVectorCosine.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 1e-2;\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    const void *vector = streamer->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t query_cnt = 200U;\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswStreamerTest, TestFetchVectorCosineHalfFloatConverter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP16, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineHalfFloatConverter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(\n      0, storage->open(dir_ + \"/TestFetchVectorCosineHalfFloatConverter.index\",\n                       true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP16, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 0.1;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-2.0, 2.0);\n\n  std::vector<NumericalVector<uint16_t>> vecs;\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<uint16_t> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = ailego::FloatHelper::ToFP16(dist(gen));\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n\n    vecs.push_back(vec);\n  }\n\n  for (size_t i = 0; i < cnt; i++) {\n    uint16_t expected_vec_value = vecs[i][dim - 1];\n\n    const void *vector = streamer->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(uint16_t));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    uint16_t vector_value = *((uint16_t *)(denormalized_vec.data()) + dim - 1);\n    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);\n\n    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);\n\n    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t query_cnt = 200U;\n  size_t topk = 30;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  for (size_t i = 0; i < query_cnt; i++) {\n    auto &vec = vecs[i];\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(uint16_t));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    uint16_t expected_vec_value = vec[dim - 1];\n    uint16_t vector_value =\n        *(((uint16_t *)(denormalized_vec.data()) + dim - 1));\n\n    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);\n    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);\n\n    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswStreamerTest, TestFetchVectorCosineFp16Converter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp16Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestFetchVectorCosineFp16Converter.index\",\n                             true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 0.1;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-2.0, 2.0);\n\n  std::vector<NumericalVector<float>> vecs;\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = dist(gen);\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n\n    vecs.push_back(vec);\n  }\n\n  for (size_t i = 0; i < cnt; i++) {\n    float expected_vec_value = vecs[i][dim - 1];\n\n    const void *vector = streamer->get_vector(i);\n\n\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n\n    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t query_cnt = 200U;\n  size_t topk = 30;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  for (size_t i = 0; i < query_cnt; i++) {\n    auto &vec = vecs[i];\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float expected_vec_value = vec[dim - 1];\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n\n    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswStreamerTest, TestFetchVectorCosineInt8Converter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineInt8Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestFetchVectorCosineInt8Converter.index\",\n                             true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 1e-2;\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    const void *vector = streamer->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t query_cnt = 200U;\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswStreamerTest, TestFetchVectorCosineInt4Converter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);\n  params.set(PARAM_HNSW_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineInt4Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestFetchVectorCosineInt4Converter.index\",\n                             true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 2000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  IndexQueryMeta new_meta;\n\n  const float epsilon = 1e-2;\n  float fixed_value = float(cnt) / 2;\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_vec;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  for (size_t i = 0; i < cnt; i++) {\n    float add_on = i * 10;\n\n    const void *vector = streamer->get_vector(i);\n    ASSERT_NE(vector, nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(vector, new_meta, &denormalized_vec);\n\n    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  knnCtx->set_fetch_vector(true);\n\n  size_t query_cnt = 100U;\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  for (size_t i = 0; i < query_cnt; i++) {\n    float add_on = i * 10;\n    for (size_t j = 0; j < dim; ++j) {\n      if (j < dim / 4)\n        vec[j] = fixed_value;\n      else\n        vec[j] = fixed_value + add_on;\n    }\n\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    ASSERT_NE(knnResult[0].vector(), nullptr);\n\n    std::string denormalized_vec;\n    denormalized_vec.resize(dim * sizeof(float));\n    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);\n\n    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));\n    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);\n  }\n\n  std::cout << \"knnTotalTime: \" << knnTotalTime << std::endl;\n  std::cout << \"linearTotalTime: \" << linearTotalTime << std::endl;\n}\n\nTEST_F(HnswStreamerTest, TestRnnSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  // params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestRnnSearchInnerProduct.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 1000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));\n  }\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n\n  size_t topk = 50;\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  float radius = results[topk / 2].score();\n  ctx->set_threshold(radius);\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test Reset Threshold\n  ctx->reset_threshold();\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(radius, results[topk - 1].score());\n}\n\nTEST_F(HnswStreamerTest, TestRnnSearchInnerProduct) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  // params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);\n  index_meta.set_metric(\"InnerProduct\", 0, ailego::Params());\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestRnnSearchInnerProduct.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 1000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));\n  }\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n\n  size_t topk = 50;\n  ctx->set_topk(topk);\n\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  float radius = -results[topk / 2].score();\n  ctx->set_threshold(radius);\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test Reset Threshold\n  ctx->reset_threshold();\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(-radius, results[topk - 1].score());\n}\n\nTEST_F(HnswStreamerTest, TestRnnSearchCosine) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  // params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestRnnSearchCosine.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 1000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = dist(gen);\n    }\n\n    std::string new_vec;\n    IndexQueryMeta new_meta;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n  }\n\n  size_t topk = 50;\n  ctx->set_topk(topk);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n\n  std::string new_query;\n  IndexQueryMeta new_meta;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n  ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  float radius = 0.5f;\n  ctx->set_threshold(radius);\n  ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, ctx));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test Reset Threshold\n  ctx->reset_threshold();\n  ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, ctx));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(radius, results[topk - 1].score());\n}\n\nTEST_F(HnswStreamerTest, TestGroup) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGroup.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 5000U;\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 5;\n\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = float(query_value) / 10 + 0.1f;\n  }\n\n  auto t1 = ailego::Realtime::MicroSeconds();\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto t2 = ailego::Realtime::MicroSeconds();\n\n  total_time += t2 - t1;\n  std::cout << \"Total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n  ASSERT_EQ(group_result.size(), group_num);\n\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n\n    // const std::string &group_id = group_result[i].group_id();\n    // std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n  }\n\n  // do linear search by p_keys test\n  auto groupbyFuncLinear = [](uint64_t key) {\n    uint32_t group_id = key % 10;\n\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  auto linear_pk_ctx = streamer->create_context();\n\n  linear_pk_ctx->set_group_params(group_num, group_topk);\n  linear_pk_ctx->set_group_by(groupbyFuncLinear);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};\n\n  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,\n                                                  linear_pk_ctx));\n  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();\n  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);\n\n  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {\n    auto &result = linear_by_pkeys_group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n\n    // const std::string &group_id = linear_by_pkeys_group_result[i].group_id();\n    //  std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n\n    ASSERT_EQ(10 - i, result[0].key());\n  }\n}\n\nTEST_F(HnswStreamerTest, TestGroupNotEnoughNum) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGroupNotEnoughNum.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  size_t cnt = 5000U;\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 12;\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = float(query_value) / 10 + 0.1f;\n  }\n\n  auto t1 = ailego::Realtime::MicroSeconds();\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto t2 = ailego::Realtime::MicroSeconds();\n  total_time += t2 - t1;\n\n  std::cout << \"Total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n\n  ASSERT_EQ(group_result.size(), 10);\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n\n    // const std::string &group_id = group_result[i].group_id();\n    // std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n  }\n}\n\nTEST_F(HnswStreamerTest, TestGroupInBruteforceSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  size_t cnt = 5000U;\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, cnt * 2);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0,\n            storage->open(dir_ + \"/TestGroupInBruteforceSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 10.0;\n    }\n    streamer->add_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  size_t group_topk = 20;\n  uint64_t total_time = 0;\n\n  auto groupbyFunc = [](uint64_t key) {\n    uint32_t group_id = key / 10 % 10;\n    // std::cout << \"key: \" << key << \", group id: \" << group_id << std::endl;\n    return std::string(\"g_\") + std::to_string(group_id);\n  };\n\n  size_t group_num = 5;\n  ctx->set_group_params(group_num, group_topk);\n  ctx->set_group_by(groupbyFunc);\n\n  size_t query_value = cnt / 2;\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = float(query_value) / 10 + 0.1f;\n  }\n\n  auto t1 = ailego::Realtime::MicroSeconds();\n  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));\n  auto t2 = ailego::Realtime::MicroSeconds();\n  total_time += t2 - t1;\n\n  std::cout << \"Total time: \" << total_time << std::endl;\n\n  auto &group_result = ctx->group_result();\n\n  ASSERT_EQ(group_result.size(), 5);\n  for (uint32_t i = 0; i < group_result.size(); ++i) {\n    auto &result = group_result[i].docs();\n\n    ASSERT_GT(result.size(), 0);\n\n    // const std::string &group_id = group_result[i].group_id();\n    //  std::cout << \"Group ID: \" << group_id << std::endl;\n\n    // for (uint32_t j = 0; j < result.size(); ++j) {\n    //   std::cout << \"\\tKey: \" << result[j].key() << std::fixed\n    //             << std::setprecision(3) << \", Score: \" << result[j].score()\n    //             << std::endl;\n    // }\n  }\n}\n\nTEST_F(HnswStreamerTest, TestBinaryConverter) {\n  uint32_t dimension = 2560;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  // params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  // params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  // params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  // params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dimension);\n  index_meta_raw.set_metric(\"InnerProduct\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"BinaryConverter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestBinaryConverter.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-2.0, 2.0);\n  std::vector<NumericalVector<float>> vecs;\n\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dimension);\n    for (size_t j = 0; j < dimension; ++j) {\n      vec[j] = dist(gen);\n    }\n\n    std::string new_vec;\n    IndexQueryMeta new_meta;\n\n    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));\n    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));\n\n    vecs.push_back(vec);\n  }\n\n  size_t query_cnt = 200U;\n  auto knnCtx = streamer->create_context();\n\n  float epison = 1e-6;\n  for (size_t i = 0; i < query_cnt; i++) {\n    auto &vec = vecs[i];\n    std::string new_query;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));\n\n    size_t topk = 50;\n    knnCtx->set_topk(topk);\n    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));\n    auto &results = knnCtx->result();\n    ASSERT_EQ(topk, results.size());\n    ASSERT_EQ(i, results[0].key());\n    ASSERT_NEAR(0, results[0].score(), epison);\n  }\n}\n\nTEST_F(HnswStreamerTest, TestAddAndSearchWithID) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestAddAndSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  NumericalVector<float> vec(dim);\n  size_t cnt = 20000U;\n  auto ctx = streamer->create_context();\n  auto linearCtx = streamer->create_context();\n  auto knnCtx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  for (size_t i = 0; i < cnt; i += 4) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  for (size_t i = 2; i < cnt; i += 4) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);\n  }\n\n  // streamer->print_debug_info();\n\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  for (size_t i = 0; i < cnt / 10; i += 2) {\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i + 0.1f;\n    }\n    auto t1 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));\n    auto t2 = ailego::Realtime::MicroSeconds();\n    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));\n    auto t3 = ailego::Realtime::MicroSeconds();\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n\n    for (size_t j = 0; j < topk; ++j) {\n      ASSERT_NE(linearResult[j].key(), kInvalidKey);\n      ASSERT_NE(linearResult[j].index(), kInvalidKey);\n      auto linearVec = static_cast<const float *>(\n          streamer->get_vector_by_id(linearResult[j].index()));\n\n      for (size_t z = 0; z < dim; ++z) {\n        ASSERT_FLOAT_EQ(linearVec[z], linearResult[j].index());\n      }\n    }\n    for (size_t j = 0; j < topk; ++j) {\n      ASSERT_NE(knnResult[j].key(), kInvalidKey);\n      ASSERT_NE(knnResult[j].index(), kInvalidKey);\n      auto knnVec = static_cast<const float *>(\n          streamer->get_vector_by_id(knnResult[j].index()));\n      for (size_t z = 0; z < dim; ++z) {\n        ASSERT_FLOAT_EQ(knnVec[z], knnResult[j].index());\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswStreamerTest, TestBasicRefiner) {\n  uint32_t dimension = 1120;\n\n  IndexStreamer::Pointer base_streamer =\n      IndexFactory::CreateStreamer(\"HnswStreamer\");\n  ASSERT_TRUE(base_streamer != nullptr);\n\n  IndexStreamer::Pointer refine_streamer =\n      IndexFactory::CreateStreamer(\"FlatStreamer\");\n  ASSERT_TRUE(refine_streamer != nullptr);\n\n  IndexRefiner::Pointer refiner = IndexFactory::CreateRefiner(\"BasicRefiner\");\n  ASSERT_TRUE(refiner != nullptr);\n\n  ailego::Params params;\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dimension);\n  index_meta.set_metric(\"InnerProduct\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"BinaryConverter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta, converter_params);\n\n  IndexMeta index_meta_binary = converter->meta();\n\n  auto reformer =\n      IndexFactory::CreateReformer(index_meta_binary.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta_binary.reformer_params()));\n\n  // base streamer\n  ailego::Params base_stg_params;\n  auto base_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, base_storage->init(base_stg_params));\n  ASSERT_EQ(0, base_storage->open(dir_ + \"/TestBasicRefinerBase.index\", true));\n  ASSERT_EQ(0, base_streamer->init(index_meta_binary, params));\n  ASSERT_EQ(0, base_streamer->open(base_storage));\n\n  auto base_ctx = base_streamer->create_context();\n  ASSERT_TRUE(!!base_ctx);\n\n  // refine streamer\n  ailego::Params refine_stg_params;\n  auto refine_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, refine_storage->init(refine_stg_params));\n  ASSERT_EQ(0,\n            refine_storage->open(dir_ + \"/TestBasicRefinerRefine.index\", true));\n  ASSERT_EQ(0, refine_streamer->init(index_meta, params));\n  ASSERT_EQ(0, refine_streamer->open(refine_storage));\n  auto refine_ctx = refine_streamer->create_context();\n  ASSERT_TRUE(!!refine_ctx);\n\n  ailego::Params refiner_params;\n  ASSERT_EQ(0, refiner->init(base_streamer, refine_streamer, refiner_params));\n\n  auto ctx = refiner->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension);\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n\n  std::uniform_real_distribution<float> dist(-2.0, 2.0);\n  std::vector<NumericalVector<float>> vecs;\n\n  size_t cnt = 5000U;\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<float> vec(dimension);\n    for (size_t j = 0; j < dimension; ++j) {\n      vec[j] = dist(gen);\n    }\n\n    std::string binary_vec;\n    IndexQueryMeta binary_qmeta;\n\n    ASSERT_EQ(0,\n              reformer->convert(vec.data(), qmeta, &binary_vec, &binary_qmeta));\n    ASSERT_EQ(0, refiner->add_impl(i, binary_vec.data(), binary_qmeta,\n                                   vec.data(), qmeta, ctx));\n\n    vecs.push_back(vec);\n  }\n\n  size_t query_cnt = 200U;\n  // size_t query_cnt = 1U;\n\n  auto searcherCtx = refiner->create_context();\n\n  for (size_t i = 0; i < query_cnt; i++) {\n    auto &vec = vecs[i];\n\n    // float abs_value{0};\n    // for (size_t j = 0; j < dimension; ++j) {\n    //   std::cout << \"dim: \" << j << \", value: \" << vec[j] << std::endl;\n\n    //   abs_value += std::abs(vec[j]);\n    // }\n    // std::cout << \"abs value: \" << abs_value << std::endl;\n\n    std::string new_query;\n    IndexQueryMeta binary_qmeta;\n    ASSERT_EQ(\n        0, reformer->transform(vec.data(), qmeta, &new_query, &binary_qmeta));\n\n    size_t topk = 50;\n    searcherCtx->set_topk(topk);\n    ASSERT_EQ(0, refiner->search_impl(new_query.data(), binary_qmeta,\n                                      vec.data(), qmeta, searcherCtx));\n    auto &results = searcherCtx->result();\n    ASSERT_EQ(topk, results.size());\n    ASSERT_EQ(i, results[0].key());\n\n    // for (size_t i = 0; i < results.size(); ++i) {\n    //   std::cout << i << \", id: \" << results[i].index()\n    //             << \", score: \" << results[i].score() << std::endl;\n    // }\n  }\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw_rabitq/CMakeLists.txt",
    "content": "include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake)\ninclude(${CMAKE_SOURCE_DIR}/cmake/option.cmake)\n\nif(APPLE)\n  set(APPLE_FRAMEWORK_LIBS\n    -framework CoreFoundation\n    -framework CoreGraphics\n    -framework CoreData\n    -framework CoreText\n    -framework Security\n    -framework Foundation\n    -Wl,-U,_MallocExtension_ReleaseFreeMemory\n    -Wl,-U,_ProfilerStart\n    -Wl,-U,_ProfilerStop\n    -Wl,-U,_RegisterThriftProtocol\n  )\nendif()\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw_rabitq core_knn_flat core_knn_cluster\n      ${CMAKE_THREAD_LIBS_INIT}\n      ${CMAKE_DL_LIBS}\n      SRCS ${CC_SRCS}\n      INCS . ${CMAKE_SOURCE_DIR}/src/core ${CMAKE_SOURCE_DIR}/src/core/algorithm/hnsw_rabitq\n      LDFLAGS ${APPLE_FRAMEWORK_LIBS}\n    )\n  cc_test_suite(hnsw_rabitq ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"hnsw_rabitq_builder.h\"\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <future>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_framework.h\"\n#include \"zvec/core/framework/index_logger.h\"\n#include \"zvec/core/framework/index_provider.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace std;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static dim = 128;\n\nclass HnswRabitqBuilderTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string _dir;\n  static shared_ptr<IndexMeta> _index_meta_ptr;\n};\n\nstd::string HnswRabitqBuilderTest::_dir(\"hnswRabitqBuilderTest\");\nshared_ptr<IndexMeta> HnswRabitqBuilderTest::_index_meta_ptr;\n\nvoid HnswRabitqBuilderTest::SetUp(void) {\n  IndexLoggerBroker::SetLevel(0);\n  _index_meta_ptr.reset(new (nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  _index_meta_ptr->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n}\n\nvoid HnswRabitqBuilderTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", _dir.c_str());\n  // system(cmdBuf);\n}\n\nTEST_F(HnswRabitqBuilderTest, TestGeneral) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n\n  ASSERT_EQ(0, builder->train(holder));\n\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswRabitqBuilderTest, TestLoad) {\n  // Load index with searcher and verify search\n  auto searcher = IndexFactory::CreateSearcher(\"HnswRabitqSearcher\");\n  ASSERT_NE(searcher, nullptr);\n\n  ailego::Params search_params;\n  search_params.set(\"proxima.hnsw_rabitq.searcher.ef\", 100UL);\n  ASSERT_EQ(0, searcher->init(search_params));\n\n  auto loader = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_NE(loader, nullptr);\n  ASSERT_EQ(0, loader->init(ailego::Params()));\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, loader->open(path, false));\n\n  ASSERT_EQ(0, searcher->load(loader, nullptr));\n\n  // Perform search verification\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = static_cast<float>(j) / 1000.0f;\n  }\n\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n\n  auto context = searcher->create_context();\n  ASSERT_NE(context, nullptr);\n  context->set_topk(10);\n\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n\n  const auto &result = context->result(0);\n  ASSERT_GT(result.size(), 0UL);\n  ASSERT_LE(result.size(), 10UL);\n}\n\nTEST_F(HnswRabitqBuilderTest, TestDimensions) {\n  std::vector<size_t> dimensions = {1,    2,    4,    8,    16,   32,   33,\n                                    63,   64,   128,  256,  512,  1024, 2047,\n                                    2048, 2049, 4095, 4096, 4097, 8192, 16384};\n  size_t doc_cnt = 100;\n\n  for (size_t test_dim : dimensions) {\n    std::cout << \"Testing dimension: \" << test_dim << std::endl;\n\n    IndexMeta index_meta(IndexMeta::DataType::DT_FP32, test_dim);\n    index_meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n\n    IndexBuilder::Pointer builder =\n        IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n    ASSERT_NE(builder, nullptr) << \"dim=\" << test_dim;\n\n    ailego::Params params;\n    params.set(\"proxima.rabitq.num_clusters\", 16UL);\n    params.set(\"proxima.rabitq.total_bits\", 2UL);\n    params.set(\"proxima.hnsw_rabitq.general.dimension\", test_dim);\n\n    int ret = builder->init(index_meta, params);\n\n    // dimension <= 63 or >= 4096: init() should return -31\n    if (test_dim <= 63 || test_dim >= 4096) {\n      ASSERT_EQ(-31, ret) << \"expected init to fail with -31, dim=\" << test_dim;\n      std::cout << \"Dimension \" << test_dim\n                << \" correctly rejected with ret=\" << ret << std::endl;\n      continue;\n    }\n\n    // Valid dimensions: verify full build succeeds\n    ASSERT_EQ(0, ret) << \"init failed, dim=\" << test_dim;\n\n    auto holder =\n        make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(\n            test_dim);\n    for (size_t i = 0; i < doc_cnt; i++) {\n      NumericalVector<float> vec(test_dim);\n      for (size_t j = 0; j < test_dim; ++j) {\n        vec[j] = static_cast<float>(i * test_dim + j) / 1000.0f;\n      }\n      ASSERT_TRUE(holder->emplace(i, std::move(vec))) << \"dim=\" << test_dim;\n    }\n\n    ret = builder->train(holder);\n    ASSERT_EQ(0, ret) << \"train failed, dim=\" << test_dim;\n\n    ret = builder->build(holder);\n    ASSERT_EQ(0, ret) << \"build failed, dim=\" << test_dim;\n\n    auto &stats = builder->stats();\n    ASSERT_EQ(doc_cnt, stats.built_count()) << \"dim=\" << test_dim;\n\n    std::cout << \"Dimension \" << test_dim << \" passed, built \"\n              << stats.built_count() << \" docs\" << std::endl;\n  }\n}\n\nTEST_F(HnswRabitqBuilderTest, TestMemquota) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n  params.set(\"proxima.hnsw_rabitq.builder.memory_quota\", 100000UL);\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(IndexError_NoMemory, builder->build(holder));\n}\n\nTEST_F(HnswRabitqBuilderTest, TestIndexThreads) {\n  IndexBuilder::Pointer builder1 =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder1, nullptr);\n  IndexBuilder::Pointer builder2 =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder2, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  std::srand(ailego::Realtime::MilliSeconds());\n  auto threads =\n      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);\n  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));\n\n  auto build_index1 = [&]() {\n    ASSERT_EQ(0, builder1->train(threads, holder));\n    ASSERT_EQ(0, builder1->build(threads, holder));\n  };\n  auto build_index2 = [&]() {\n    ASSERT_EQ(0, builder2->train(threads, holder));\n    ASSERT_EQ(0, builder2->build(threads, holder));\n  };\n\n  auto t1 = std::async(std::launch::async, build_index1);\n  auto t2 = std::async(std::launch::async, build_index2);\n  t1.wait();\n  t2.wait();\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestIndexThreads\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder1->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder2->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats1 = builder1->stats();\n  ASSERT_EQ(doc_cnt, stats1.built_count());\n  auto &stats2 = builder2->stats();\n  ASSERT_EQ(doc_cnt, stats2.built_count());\n}\n\nTEST_F(HnswRabitqBuilderTest, TestCosine) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->result();\n  converted_holder = convert_holder_to_provider(converted_holder);\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(index_meta, params));\n\n  ASSERT_EQ(0, builder->train(converted_holder));\n\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestCosine\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswRabitqBuilderTest, TestCleanupAndRebuild) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestCleanupAndRebuild\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n\n  // Cleanup and rebuild with more documents\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder2->emplace(i, std::move(vec)));\n  }\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n"
  },
  {
    "path": "tests/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"hnsw_rabitq_searcher.h\"\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <cstdio>\n#include <random>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_framework.h\"\n#include \"zvec/core/framework/index_logger.h\"\n#include \"hnsw_rabitq_builder.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace std;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static dim = 128;\n\nclass HnswRabitqSearcherTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string _dir;\n  static shared_ptr<IndexMeta> _index_meta_ptr;\n};\n\nstd::string HnswRabitqSearcherTest::_dir(\"HnswRabitqSearcherTest\");\nshared_ptr<IndexMeta> HnswRabitqSearcherTest::_index_meta_ptr;\n\nvoid HnswRabitqSearcherTest::SetUp(void) {\n  IndexLoggerBroker::SetLevel(0);\n  _index_meta_ptr.reset(new (nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  _index_meta_ptr->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n}\n\nvoid HnswRabitqSearcherTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", _dir.c_str());\n  // system(cmdBuf);\n}\n\nTEST_F(HnswRabitqSearcherTest, TestBasicSearch) {\n  // Build index first\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 10000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestBasicSearch\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // Test searcher\n  auto searcher = IndexFactory::CreateSearcher(\"HnswRabitqSearcher\");\n  ASSERT_NE(searcher, nullptr);\n\n  ailego::Params search_params;\n  search_params.set(\"proxima.hnsw_rabitq.searcher.ef\", 100UL);\n  ASSERT_EQ(0, searcher->init(search_params));\n\n  auto loader = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_NE(loader, nullptr);\n  ASSERT_EQ(0, loader->init(ailego::Params()));\n  ASSERT_EQ(0, loader->open(path, false));\n\n  ASSERT_EQ(0, searcher->load(loader, nullptr));\n\n  // Perform search\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = static_cast<float>(j) / 1000.0f;\n  }\n\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n\n  auto context = searcher->create_context();\n  ASSERT_TRUE(!!context);\n  context->set_topk(10);\n\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n\n  const auto &result = context->result(0);\n  ASSERT_GT(result.size(), 0UL);\n  ASSERT_LE(result.size(), 10UL);\n\n  // Verify results are sorted by distance\n  for (size_t i = 1; i < result.size(); ++i) {\n    ASSERT_LE(result[i - 1].score(), result[i].score());\n  }\n}\n\nTEST_F(HnswRabitqSearcherTest, DISABLED_TestRnnSearch) {\n  // Build index first\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 10000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestRnnSearch\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // Test searcher with radius search\n  auto searcher = IndexFactory::CreateSearcher(\"HnswRabitqSearcher\");\n  ASSERT_NE(searcher, nullptr);\n\n  ailego::Params search_params;\n  search_params.set(\"proxima.hnsw_rabitq.searcher.ef\", 100UL);\n  ASSERT_EQ(0, searcher->init(search_params));\n\n  auto loader = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_NE(loader, nullptr);\n  ASSERT_EQ(0, loader->init(ailego::Params()));\n  ASSERT_EQ(0, loader->open(path, false));\n\n  ASSERT_EQ(0, searcher->load(loader, nullptr));\n\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = 0.0f;\n  }\n\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n  auto context = searcher->create_context();\n  ASSERT_NE(context, nullptr);\n\n  size_t topk = 50;\n  context->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n\n  const auto &results = context->result(0);\n  ASSERT_EQ(topk, results.size());\n\n  // Test with radius threshold\n  float radius = results[topk / 2].score();\n  context->set_threshold(radius);\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test reset threshold\n  context->reset_threshold();\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(radius, results[topk - 1].score());\n}\n\nTEST_F(HnswRabitqSearcherTest, DISABLED_TestSearchInnerProduct) {\n  // Build index with InnerProduct metric\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 10000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);\n  index_meta.set_metric(\"InnerProduct\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(index_meta, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestSearchInnerProduct\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // Test searcher\n  auto searcher = IndexFactory::CreateSearcher(\"HnswRabitqSearcher\");\n  ASSERT_NE(searcher, nullptr);\n\n  ailego::Params search_params;\n  search_params.set(\"proxima.hnsw_rabitq.searcher.ef\", 100UL);\n  ASSERT_EQ(0, searcher->init(search_params));\n\n  auto loader = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_NE(loader, nullptr);\n  ASSERT_EQ(0, loader->init(ailego::Params()));\n  ASSERT_EQ(0, loader->open(path, false));\n\n  ASSERT_EQ(0, searcher->load(loader, nullptr));\n\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = 1.0f;\n  }\n\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n  auto context = searcher->create_context();\n  ASSERT_TRUE(!!context);\n\n  size_t topk = 50;\n  context->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n\n  const auto &results = context->result(0);\n  ASSERT_EQ(topk, results.size());\n\n  // Test with radius threshold (note: InnerProduct uses negative scores)\n  float radius = -results[topk / 2].score();\n  context->set_threshold(radius);\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    LOG_ERROR(\"radius: %f, score: %f\", radius, results[k].score());\n    EXPECT_GE(radius, results[k].score());\n  }\n\n  // Test reset threshold\n  context->reset_threshold();\n  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(-radius, results[topk - 1].score());\n}\n\nTEST_F(HnswRabitqSearcherTest, TestSearchCosine) {\n  // Build index with Cosine metric\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 10000UL;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = dist(gen);\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);\n  index_meta_raw.set_metric(\"Cosine\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineFp32Converter\");\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->result();\n  converted_holder = convert_holder_to_provider(converted_holder);\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(index_meta, params));\n  ASSERT_EQ(0, builder->train(converted_holder));\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestSearchCosine\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // Test searcher\n  auto searcher = IndexFactory::CreateSearcher(\"HnswRabitqSearcher\");\n  ASSERT_NE(searcher, nullptr);\n\n  ailego::Params search_params;\n  search_params.set(\"proxima.hnsw_rabitq.searcher.ef\", 100UL);\n  ASSERT_EQ(0, searcher->init(search_params));\n\n  auto loader = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_NE(loader, nullptr);\n  ASSERT_EQ(0, loader->init(ailego::Params()));\n  ASSERT_EQ(0, loader->open(path, false));\n\n  ASSERT_EQ(0, searcher->load(loader, nullptr));\n\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = 1.0f;\n  }\n\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  std::string new_query;\n  IndexQueryMeta new_meta;\n  ASSERT_EQ(0, reformer->transform(query_vec.data(), query_meta, &new_query,\n                                   &new_meta));\n\n  auto context = searcher->create_context();\n  ASSERT_TRUE(!!context);\n\n  size_t topk = 50;\n  context->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, 1, context));\n\n  const auto &results = context->result(0);\n  ASSERT_EQ(topk, results.size());\n\n  // Test with radius threshold\n  float radius = 0.5f;\n  context->set_threshold(radius);\n  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, 1, context));\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n\n  // Test reset threshold\n  context->reset_threshold();\n  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, 1, context));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(radius, results[topk - 1].score());\n}\n\nTEST_F(HnswRabitqSearcherTest, TestMultipleQueries) {\n  // Build index first\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 10000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestMultipleQueries\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // Test searcher with multiple queries\n  auto searcher = IndexFactory::CreateSearcher(\"HnswRabitqSearcher\");\n  ASSERT_NE(searcher, nullptr);\n\n  ailego::Params search_params;\n  search_params.set(\"proxima.hnsw_rabitq.searcher.ef\", 100UL);\n  ASSERT_EQ(0, searcher->init(search_params));\n\n  auto loader = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_NE(loader, nullptr);\n  ASSERT_EQ(0, loader->init(ailego::Params()));\n  ASSERT_EQ(0, loader->open(path, false));\n\n  ASSERT_EQ(0, searcher->load(loader, nullptr));\n\n  // Test with different query vectors\n  for (size_t query_id = 0; query_id < 5; ++query_id) {\n    NumericalVector<float> query_vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      query_vec[j] = static_cast<float>(query_id * dim + j) / 1000.0f;\n    }\n\n    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n\n    auto context = searcher->create_context();\n    ASSERT_TRUE(!!context);\n    context->set_topk(20);\n\n    ASSERT_EQ(0,\n              searcher->search_impl(query_vec.data(), query_meta, 1, context));\n\n    const auto &result = context->result(0);\n    ASSERT_GT(result.size(), 0UL);\n    ASSERT_LE(result.size(), 20UL);\n\n    // Verify results are sorted\n    for (size_t i = 1; i < result.size(); ++i) {\n      ASSERT_LE(result[i - 1].score(), result[i].score());\n    }\n  }\n}\n\nTEST_F(HnswRabitqSearcherTest, TestDifferentTopK) {\n  // Build index first\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswRabitqBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 10000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, std::move(vec)));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.rabitq.num_clusters\", 16UL);\n  params.set(\"proxima.rabitq.total_bits\", 2UL);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestDifferentTopK\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // Test searcher with different topk values\n  auto searcher = IndexFactory::CreateSearcher(\"HnswRabitqSearcher\");\n  ASSERT_NE(searcher, nullptr);\n\n  ailego::Params search_params;\n  search_params.set(\"proxima.hnsw_rabitq.searcher.ef\", 100UL);\n  ASSERT_EQ(0, searcher->init(search_params));\n\n  auto loader = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_NE(loader, nullptr);\n  ASSERT_EQ(0, loader->init(ailego::Params()));\n  ASSERT_EQ(0, loader->open(path, false));\n\n  ASSERT_EQ(0, searcher->load(loader, nullptr));\n\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = static_cast<float>(j) / 1000.0f;\n  }\n\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n\n  // Test with different topk values\n  std::vector<size_t> topk_values = {1, 5, 10, 20, 50, 100};\n  for (size_t topk : topk_values) {\n    auto context = searcher->create_context();\n    ASSERT_TRUE(!!context);\n    context->set_topk(topk);\n\n    ASSERT_EQ(0,\n              searcher->search_impl(query_vec.data(), query_meta, 1, context));\n\n    const auto &result = context->result(0);\n    ASSERT_GT(result.size(), 0UL);\n    ASSERT_LE(result.size(), topk);\n\n    // Verify results are sorted\n    for (size_t i = 1; i < result.size(); ++i) {\n      ASSERT_LE(result[i - 1].score(), result[i].score());\n    }\n  }\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n"
  },
  {
    "path": "tests/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"hnsw_rabitq_streamer.h\"\n#include <memory>\n#include <gtest/gtest.h>\n#include \"zvec/ailego/container/params.h\"\n#include \"zvec/core/framework/index_holder.h\"\n#include \"zvec/core/framework/index_streamer.h\"\n#include \"hnsw_rabitq_streamer.h\"\n#include \"rabitq_converter.h\"\n#include \"rabitq_reformer.h\"\n\nusing namespace std;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static dim = 128;\n\nclass HnswRabitqStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string dir_;\n  static shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string HnswRabitqStreamerTest::dir_(\"hnswRabitqStreamerTest\");\nshared_ptr<IndexMeta> HnswRabitqStreamerTest::index_meta_ptr_;\n\nvoid HnswRabitqStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (nothrow)\n                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n}\n\nvoid HnswRabitqStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswRabitqStreamerTest, TestBuildAndSearch) {\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  RabitqConverter converter;\n  converter.init(*index_meta_ptr_, ailego::Params());\n  ASSERT_EQ(converter.train(holder), 0);\n  std::shared_ptr<IndexReformer> index_reformer;\n  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);\n  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);\n  IndexStreamer::Pointer streamer =\n      std::make_shared<HnswRabitqStreamer>(holder, reformer);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw_rabitq.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw_rabitq.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw_rabitq.streamer.scaling_factor\", 5U);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/Test/AddVector\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto context = streamer->create_context();\n  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {\n    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n    ASSERT_EQ(0,\n              streamer->add_impl(it->key(), it->data(), query_meta, context));\n  }\n  streamer->flush(0UL);\n\n  // Perform search verification\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = static_cast<float>(j) / 1000.0f;\n  }\n\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n\n  context->set_topk(10);\n  ASSERT_EQ(0, streamer->search_impl(query_vec.data(), query_meta, 1, context));\n\n  const auto &result = context->result(0);\n  ASSERT_GT(result.size(), 0UL);\n  ASSERT_LE(result.size(), 10UL);\n\n  // reopen and load reformer from storage\n  ASSERT_EQ(0, streamer->close());\n  IndexStreamer::Pointer new_streamer =\n      std::make_shared<HnswRabitqStreamer>(holder);\n  ASSERT_EQ(0, new_streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, new_streamer->open(storage));\n}\n\nTEST_F(HnswRabitqStreamerTest, TestLinearSearch) {\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  RabitqConverter converter;\n  converter.init(*index_meta_ptr_, ailego::Params());\n  ASSERT_EQ(converter.train(holder), 0);\n  std::shared_ptr<IndexReformer> index_reformer;\n  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);\n  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);\n  IndexStreamer::Pointer streamer =\n      std::make_shared<HnswRabitqStreamer>(holder, reformer);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw_rabitq.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw_rabitq.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw_rabitq.streamer.scaling_factor\", 5U);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearch\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto context = streamer->create_context();\n  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {\n    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n    ASSERT_EQ(0,\n              streamer->add_impl(it->key(), it->data(), query_meta, context));\n  }\n\n  // Test linear search with exact match\n  size_t topk = 3;\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n  NumericalVector<float> query_vec(dim);\n\n  for (size_t i = 0; i < doc_cnt; i += 100) {\n    for (size_t j = 0; j < dim; ++j) {\n      query_vec[j] = static_cast<float>(i);\n    }\n    context->set_topk(1U);\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(query_vec.data(), query_meta, context));\n    auto &result1 = context->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(i, result1[0].key());\n\n    // Test with slight offset\n    for (size_t j = 0; j < dim; ++j) {\n      query_vec[j] = static_cast<float>(i) + 0.1f;\n    }\n    context->set_topk(topk);\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(query_vec.data(), query_meta, context));\n    auto &result2 = context->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(i, result2[0].key());\n  }\n}\n\nTEST_F(HnswRabitqStreamerTest, TestKnnSearch) {\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 2000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  RabitqConverter converter;\n  converter.init(*index_meta_ptr_, ailego::Params());\n  ASSERT_EQ(converter.train(holder), 0);\n  std::shared_ptr<IndexReformer> index_reformer;\n  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);\n  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);\n  IndexStreamer::Pointer streamer =\n      std::make_shared<HnswRabitqStreamer>(holder, reformer);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw_rabitq.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw_rabitq.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw_rabitq.streamer.scaling_factor\", 10U);\n  params.set(\"proxima.hnsw_rabitq.streamer.efconstruction\", 100U);\n  params.set(\"proxima.hnsw_rabitq.streamer.ef\", 50U);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestKnnSearch\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto context = streamer->create_context();\n  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {\n    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n    ASSERT_EQ(0,\n              streamer->add_impl(it->key(), it->data(), query_meta, context));\n  }\n\n  // Compare KNN search with brute force search\n  auto linear_ctx = streamer->create_context();\n  auto knn_ctx = streamer->create_context();\n  size_t topk = 50;\n  linear_ctx->set_topk(topk);\n  knn_ctx->set_topk(topk);\n\n  int total_hits = 0;\n  int total_cnts = 0;\n  int topk1_hits = 0;\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n  NumericalVector<float> query_vec(dim);\n\n  for (size_t i = 0; i < doc_cnt; i += 100) {\n    for (size_t j = 0; j < dim; ++j) {\n      query_vec[j] = static_cast<float>(i) + 0.1f;\n    }\n\n    ASSERT_EQ(0,\n              streamer->search_impl(query_vec.data(), query_meta, 1, knn_ctx));\n    ASSERT_EQ(\n        0, streamer->search_bf_impl(query_vec.data(), query_meta, linear_ctx));\n\n    auto &knn_result = knn_ctx->result(0);\n    ASSERT_EQ(topk, knn_result.size());\n    topk1_hits += (i == knn_result[0].key());\n\n    auto &linear_result = linear_ctx->result();\n    ASSERT_EQ(topk, linear_result.size());\n    ASSERT_EQ(i, linear_result[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      total_cnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linear_result[j].key() == knn_result[k].key()) {\n          total_hits++;\n          break;\n        }\n      }\n    }\n  }\n\n  float recall = total_hits * 1.0f / total_cnts;\n  float topk1_recall = topk1_hits * 100.0f / static_cast<float>(doc_cnt);\n  EXPECT_GT(recall, 0.60f);\n  // actual: no guarantee\n  // TODO(jiliang.ljl): check if ok?\n  EXPECT_GT(topk1_recall, 0.00f);\n}\n\nTEST_F(HnswRabitqStreamerTest, TestRandomData) {\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1500UL;\n\n  // Add random vectors\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  RabitqConverter converter;\n  converter.init(*index_meta_ptr_, ailego::Params());\n  ASSERT_EQ(converter.train(holder), 0);\n  std::shared_ptr<IndexReformer> index_reformer;\n  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);\n  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);\n  IndexStreamer::Pointer streamer =\n      std::make_shared<HnswRabitqStreamer>(holder, reformer);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw_rabitq.streamer.max_neighbor_count\", 32U);\n  params.set(\"proxima.hnsw_rabitq.streamer.upper_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw_rabitq.streamer.scaling_factor\", 20U);\n  params.set(\"proxima.hnsw_rabitq.streamer.efconstruction\", 200U);\n  params.set(\"proxima.hnsw_rabitq.streamer.ef\", 100U);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestRandomData\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto context = streamer->create_context();\n  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {\n    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n    ASSERT_EQ(0,\n              streamer->add_impl(it->key(), it->data(), query_meta, context));\n  }\n\n  // Test with random queries\n  auto linear_ctx = streamer->create_context();\n  auto knn_ctx = streamer->create_context();\n  size_t topk = 50;\n  linear_ctx->set_topk(topk);\n  knn_ctx->set_topk(topk);\n\n  int total_hits = 0;\n  int total_cnts = 0;\n  int topk1_hits = 0;\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n  NumericalVector<float> query_vec(dim);\n\n  size_t query_cnt = 200;\n  for (size_t i = 0; i < query_cnt; i++) {\n    for (size_t j = 0; j < dim; ++j) {\n      query_vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);\n    }\n\n    ASSERT_EQ(\n        0, streamer->search_bf_impl(query_vec.data(), query_meta, linear_ctx));\n    ASSERT_EQ(0,\n              streamer->search_impl(query_vec.data(), query_meta, 1, knn_ctx));\n\n    auto &knn_result = knn_ctx->result(0);\n    ASSERT_EQ(topk, knn_result.size());\n\n    auto &linear_result = linear_ctx->result();\n    ASSERT_EQ(topk, linear_result.size());\n\n    topk1_hits += (linear_result[0].key() == knn_result[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      total_cnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linear_result[j].key() == knn_result[k].key()) {\n          total_hits++;\n          break;\n        }\n      }\n    }\n  }\n\n  float recall = total_hits * 1.0f / total_cnts;\n  float topk1_recall = topk1_hits * 1.0f / query_cnt;\n  EXPECT_GT(recall, 0.50f);\n  EXPECT_GT(topk1_recall, 0.70f);\n}\n\nTEST_F(HnswRabitqStreamerTest, TestOpenClose) {\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 500UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  RabitqConverter converter;\n  converter.init(*index_meta_ptr_, ailego::Params());\n  ASSERT_EQ(converter.train(holder), 0);\n  std::shared_ptr<IndexReformer> index_reformer;\n  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);\n  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw_rabitq.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw_rabitq.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw_rabitq.streamer.scaling_factor\", 5U);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestOpenClose\", true));\n\n  IndexStreamer::Pointer streamer =\n      std::make_shared<HnswRabitqStreamer>(holder, reformer);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto context = streamer->create_context();\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n\n  // Add first half of vectors\n  for (size_t i = 0; i < doc_cnt / 2; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), query_meta, context));\n  }\n\n  ASSERT_EQ(0, streamer->flush(0UL));\n  ASSERT_EQ(0, streamer->close());\n\n  // Reopen and add second half\n  IndexStreamer::Pointer streamer2 =\n      std::make_shared<HnswRabitqStreamer>(holder);\n  ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer2->open(storage));\n\n  auto context2 = streamer2->create_context();\n  for (size_t i = doc_cnt / 2; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_EQ(0, streamer2->add_impl(i, vec.data(), query_meta, context2));\n  }\n\n  ASSERT_EQ(0, streamer2->flush(0UL));\n\n  // Verify search works after reopen\n  NumericalVector<float> query_vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    query_vec[j] = 10.0f;\n  }\n\n  context2->set_topk(5);\n  ASSERT_EQ(0,\n            streamer2->search_impl(query_vec.data(), query_meta, 1, context2));\n  const auto &result = context2->result(0);\n  ASSERT_EQ(5UL, result.size());\n  ASSERT_EQ(10UL, result[0].key());\n}\n\nTEST_F(HnswRabitqStreamerTest, TestCreateIterator) {\n  auto holder =\n      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 300UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  RabitqConverter converter;\n  converter.init(*index_meta_ptr_, ailego::Params());\n  ASSERT_EQ(converter.train(holder), 0);\n  std::shared_ptr<IndexReformer> index_reformer;\n  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);\n  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);\n  IndexStreamer::Pointer streamer =\n      std::make_shared<HnswRabitqStreamer>(holder, reformer);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw_rabitq.streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw_rabitq.streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw_rabitq.streamer.scaling_factor\", 5U);\n  params.set(\"proxima.hnsw_rabitq.general.dimension\", dim);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestCreateIterator\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto context = streamer->create_context();\n  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);\n\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), query_meta, context));\n  }\n\n  streamer->flush(0UL);\n\n  // Test iterator\n  auto provider = streamer->create_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n\n  size_t count = 0;\n  while (iter->is_valid()) {\n    ASSERT_EQ(count, iter->key());\n    // const float *data = (const float *)iter->data();\n    // for (size_t j = 0; j < dim; ++j) {\n    //   ASSERT_EQ(static_cast<float>(count), data[j]);\n    // }\n    iter->next();\n    count++;\n  }\n  ASSERT_EQ(doc_cnt, count);\n\n  // Test get_vector\n  // for (size_t i = 0; i < doc_cnt; i++) {\n  //   const float *data = (const float *)provider->get_vector(i);\n  //   ASSERT_NE(data, nullptr);\n  //   for (size_t j = 0; j < dim; ++j) {\n  //     ASSERT_EQ(static_cast<float>(i), data[j]);\n  //   }\n  // }\n}\n\nTEST_F(HnswRabitqStreamerTest, TestDimensions) {\n  std::vector<size_t> dimensions = {1,    2,    4,    8,    16,   32,   33,\n                                    63,   64,   128,  256,  512,  1024, 2047,\n                                    2048, 2049, 4095, 4096, 4097, 8192, 16384};\n  size_t doc_cnt = 100;\n\n  for (size_t test_dim : dimensions) {\n    std::cout << \"Testing dimension: \" << test_dim << std::endl;\n\n    IndexMeta index_meta(IndexMeta::DataType::DT_FP32, test_dim);\n    index_meta.set_metric(\"SquaredEuclidean\", 0, ailego::Params());\n\n    auto holder =\n        make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(\n            test_dim);\n    IndexStreamer::Pointer streamer =\n        std::make_shared<HnswRabitqStreamer>(holder);\n\n    ailego::Params params;\n    params.set(\"proxima.hnsw_rabitq.streamer.max_neighbor_count\", 16U);\n    params.set(\"proxima.hnsw_rabitq.streamer.upper_neighbor_count\", 8U);\n    params.set(\"proxima.hnsw_rabitq.streamer.scaling_factor\", 5U);\n    params.set(\"proxima.hnsw_rabitq.general.dimension\", test_dim);\n\n    int ret = streamer->init(index_meta, params);\n\n    // dimension <= 63 or >= 4096: init() should return -31\n    if (test_dim <= 63 || test_dim >= 4096) {\n      ASSERT_EQ(-31, ret) << \"expected init to fail with -31, dim=\" << test_dim;\n      std::cout << \"Dimension \" << test_dim\n                << \" correctly rejected with ret=\" << ret << std::endl;\n      continue;\n    }\n\n    // Valid dimensions: verify full streaming build succeeds\n    ASSERT_EQ(0, ret) << \"init failed, dim=\" << test_dim;\n\n    for (size_t i = 0; i < doc_cnt; i++) {\n      NumericalVector<float> vec(test_dim);\n      for (size_t j = 0; j < test_dim; ++j) {\n        vec[j] = static_cast<float>(i * test_dim + j) / 1000.0f;\n      }\n      ASSERT_TRUE(holder->emplace(i, std::move(vec))) << \"dim=\" << test_dim;\n    }\n\n    RabitqConverter converter;\n    converter.init(index_meta, ailego::Params());\n    ASSERT_EQ(0, converter.train(holder))\n        << \"converter train failed, dim=\" << test_dim;\n    std::shared_ptr<IndexReformer> index_reformer;\n    ASSERT_EQ(0, converter.to_reformer(&index_reformer)) << \"dim=\" << test_dim;\n    auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);\n\n    // Recreate streamer with reformer\n    streamer = std::make_shared<HnswRabitqStreamer>(holder, reformer);\n    ASSERT_EQ(0, streamer->init(index_meta, params))\n        << \"init with reformer failed, dim=\" << test_dim;\n\n    auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n    ASSERT_NE(nullptr, storage);\n    ailego::Params stg_params;\n    ASSERT_EQ(0, storage->init(stg_params));\n    std::string storage_path =\n        dir_ + \"/TestDimensions_\" + std::to_string(test_dim);\n    ASSERT_EQ(0, storage->open(storage_path, true))\n        << \"storage open failed, dim=\" << test_dim;\n    ASSERT_EQ(0, streamer->open(storage))\n        << \"streamer open failed, dim=\" << test_dim;\n\n    auto context = streamer->create_context();\n    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, test_dim);\n    for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {\n      ASSERT_EQ(0,\n                streamer->add_impl(it->key(), it->data(), query_meta, context))\n          << \"add failed, dim=\" << test_dim << \", key=\" << it->key();\n    }\n    ASSERT_EQ(0, streamer->flush(0UL)) << \"flush failed, dim=\" << test_dim;\n\n    std::cout << \"Dimension \" << test_dim << \" passed\" << std::endl;\n  }\n}\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "tests/core/algorithm/hnsw_sparse/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw_sparse\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm/hnsw_sparse\n    )\nendforeach()"
  },
  {
    "path": "tests/core/algorithm/hnsw_sparse/hnsw_sparse_builder_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_builder.h\"\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <future>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_framework.h\"\n#include \"hnsw_sparse_params.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace std;\nusing namespace testing;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseBuilderTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n\n  static std::string _dir;\n  static shared_ptr<IndexMeta> _index_meta_ptr;\n};\n\nstd::string HnswSparseBuilderTest::_dir(\"HnswSparseBuilderTest\");\nshared_ptr<IndexMeta> HnswSparseBuilderTest::_index_meta_ptr;\n\nvoid HnswSparseBuilderTest::SetUp(void) {\n  _index_meta_ptr.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                                IndexMeta::DataType::DT_FP32));\n  _index_meta_ptr->set_metric(\"InnerProductSparse\", 0, ailego::Params());\n}\n\nvoid HnswSparseBuilderTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", _dir.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswSparseBuilderTest, TestGeneral) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  uint32_t sparse_count = 4;\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n\n  ASSERT_EQ(0, builder->train(holder));\n\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestGeneral\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswSparseBuilderTest, TestMemquota) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt = 1000UL;\n  uint32_t sparse_count = 32;\n\n  for (size_t i = 0; i < doc_cnt; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_builder.memory_quota\", 100000UL);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(IndexError_NoMemory, builder->build(holder));\n}\n\nTEST_F(HnswSparseBuilderTest, TestIndexThreads) {\n  IndexBuilder::Pointer builder1 =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder1, nullptr);\n  IndexBuilder::Pointer builder2 =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder2, nullptr);\n\n  auto holder =\n      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n\n  size_t doc_cnt = 1000UL;\n  uint32_t sparse_count = 32;\n\n  for (size_t i = 0; i < doc_cnt; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  std::srand(ailego::Realtime::MilliSeconds());\n  auto threads =\n      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);\n  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));\n\n  auto build_index1 = [&]() {\n    ASSERT_EQ(0, builder1->train(threads, holder));\n    ASSERT_EQ(0, builder1->build(threads, holder));\n  };\n  auto build_index2 = [&]() {\n    ASSERT_EQ(0, builder2->train(threads, holder));\n    ASSERT_EQ(0, builder2->build(threads, holder));\n  };\n\n  auto t1 = std::async(std::launch::async, build_index1);\n  auto t2 = std::async(std::launch::async, build_index2);\n  t1.wait();\n  t2.wait();\n\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestIndexThreads\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder1->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder2->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats1 = builder1->stats();\n  ASSERT_EQ(doc_cnt, stats1.built_count());\n  auto &stats2 = builder2->stats();\n  ASSERT_EQ(doc_cnt, stats2.built_count());\n}\n\nTEST_F(HnswSparseBuilderTest, TestHalfFloatConverter) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  uint32_t sparse_count = 4;\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"HalfFloatSparseConverter\");\n  converter->init(*_index_meta_ptr, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  converter->transform(holder);\n\n  auto converted_holder = converter->sparse_result();\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);\n  ASSERT_EQ(0, builder->init(index_meta, converter_params));\n\n  ASSERT_EQ(0, builder->train(converted_holder));\n\n  ASSERT_EQ(0, builder->build(converted_holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestHalFloatConverter\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n\n  // cleanup and rebuild\n  ASSERT_EQ(0, builder->cleanup());\n\n  auto holder2 =\n      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt2 = 2000UL;\n  for (size_t i = 0; i < doc_cnt2; i++) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_count);\n    NumericalVector<float> sparse_values(sparse_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices[j] = 20 * j;\n      sparse_values[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_values);\n\n    ASSERT_TRUE(holder2->emplace(i, vec));\n  }\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder2));\n  ASSERT_EQ(0, builder->build(holder2));\n  auto dumper2 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper2, nullptr);\n  ASSERT_EQ(0, dumper2->create(path));\n  ASSERT_EQ(0, builder->dump(dumper2));\n  ASSERT_EQ(0, dumper2->close());\n\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt2, stats.built_count());\n  ASSERT_EQ(doc_cnt2, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n}\n\nTEST_F(HnswSparseBuilderTest, TestIndptr) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  uint32_t sparse_count = 4;\n  size_t doc_cnt = 1000UL;\n\n  std::vector<uint64_t> keys;\n  keys.reserve(doc_cnt);\n\n  std::vector<uint64_t> sparse_indptr;\n  sparse_indptr.reserve(doc_cnt + 1);\n\n  std::vector<uint32_t> sparse_indices;\n  sparse_indices.reserve(doc_cnt * sparse_count);\n\n  std::vector<float> sparse_values;\n  sparse_values.reserve(doc_cnt * sparse_count);\n\n  size_t sparse_count_total = 0;\n  sparse_indptr.push_back(0);\n  for (size_t i = 0; i < doc_cnt; i++) {\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices.push_back(20 * j);\n      sparse_values.push_back(i);\n    }\n\n    keys.push_back(i);\n\n    sparse_count_total += sparse_count;\n    sparse_indptr.push_back(sparse_count_total);\n  }\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n\n  ASSERT_EQ(0, builder->build(doc_cnt, keys.data(), sparse_indptr.data(),\n                              sparse_indices.data(), sparse_values.data()));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestIndptr\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n}\n\nTEST_F(HnswSparseBuilderTest, TestIndptrFp16) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  uint32_t sparse_count = 4;\n  size_t doc_cnt = 1000UL;\n\n  std::vector<uint64_t> keys;\n  keys.reserve(doc_cnt);\n\n  std::vector<uint64_t> sparse_indptr;\n  sparse_indptr.reserve(doc_cnt + 1);\n\n  std::vector<uint32_t> sparse_indices;\n  sparse_indices.reserve(doc_cnt * sparse_count);\n\n  std::vector<float> sparse_values;\n  sparse_values.reserve(doc_cnt * sparse_count);\n\n  size_t sparse_count_total = 0;\n  sparse_indptr.push_back(0);\n  for (size_t i = 0; i < doc_cnt; i++) {\n    for (size_t j = 0; j < sparse_count; ++j) {\n      sparse_indices.push_back(20 * j);\n      sparse_values.push_back(i);\n    }\n\n    keys.push_back(i);\n\n    sparse_count_total += sparse_count;\n    sparse_indptr.push_back(sparse_count_total);\n  }\n\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP16);\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);\n  ASSERT_EQ(0, builder->init(meta, params));\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  ASSERT_EQ(0, builder->build(qmeta, doc_cnt, keys.data(), sparse_indptr.data(),\n                              sparse_indices.data(), sparse_values.data()));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  string path = _dir + \"/TestIndptrFp16\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats = builder->stats();\n  ASSERT_EQ(0UL, stats.trained_count());\n  ASSERT_EQ(doc_cnt, stats.built_count());\n  ASSERT_EQ(doc_cnt, stats.dumped_count());\n  ASSERT_EQ(0UL, stats.discarded_count());\n  ASSERT_EQ(0UL, stats.trained_costtime());\n  ASSERT_GT(stats.built_costtime(), 0UL);\n  // ASSERT_GT(stats.dumped_costtime(), 0UL);\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_test.cpp",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_searcher.h\"\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <future>\n#include <iomanip>\n#include <ailego/math/distance.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_framework.h\"\n#include \"hnsw_sparse_params.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace std;\nusing namespace testing;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static sparse_dim_count = 16;\n\nclass HnswSparseSearcherTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void generate_sparse_data(\n      size_t cnt, uint32_t sparse_dim_count,\n      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);\n\n  static std::string dir_;\n  static shared_ptr<IndexMeta> _index_meta_ptr;\n};\n\nstd::string HnswSparseSearcherTest::dir_(\"HnswSparseSearcherTest/\");\nshared_ptr<IndexMeta> HnswSparseSearcherTest::_index_meta_ptr;\n\nvoid HnswSparseSearcherTest::generate_sparse_data(\n    size_t cnt, uint32_t sparse_dim_count,\n    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < cnt; ++i) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_vec[j] = dist(gen);\n    }\n\n    float norm;\n    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,\n                                           &norm);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_vec[j] = sparse_vec[j] / norm;\n    }\n\n    sparse_indices_list.push_back(sparse_indices);\n    sparse_vec_list.push_back(sparse_vec);\n  }\n}\n\nvoid HnswSparseSearcherTest::SetUp(void) {\n  _index_meta_ptr.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                                IndexMeta::DataType::DT_FP32));\n  _index_meta_ptr->set_metric(\"InnerProductSparse\", 0, ailego::Params());\n}\n\nvoid HnswSparseSearcherTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswSparseSearcherTest, TestGeneral) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // size_t cnt = 5000U;\n  size_t cnt = 20000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  auto path = dir_ + \"/TestGeneral\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher knn\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  size_t step = 50;\n  for (size_t i = 0; i < cnt; i += step) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseSearcherTest, TestRnnSearch) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt = 1000UL;\n\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_velues);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = dir_ + \"/TestRnnSearch\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 1.0;\n  }\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  size_t topk = 50;\n  ctx->set_topk(topk);\n\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  float radius = -results[topk / 2].score();\n  ctx->set_threshold(radius);\n\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n\n  ASSERT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(-results[k].score(), radius);\n  }\n\n  // Test Reset Threshold\n  ctx->reset_threshold();\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n  ASSERT_EQ(topk, results.size());\n  ASSERT_LT(-results[topk - 1].score(), radius);\n}\n\nTEST_F(HnswSparseSearcherTest, TestClearAndReload) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt = 1000UL;\n\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_velues);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_builder.thread_count\", 3);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = dir_ + \"/TestClearAndReload\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.sparse_searcher.check_crc_enable\", true);\n  searcherParams.set(\"proxima.hnsw.sparse_searcher.max_scan_ratio\",\n                     1.1f);  // including upper layer\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!knnCtx);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 1.0;\n  }\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  size_t topk = 100;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, knnCtx));\n  ASSERT_EQ(0,\n            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, linearCtx));\n\n  auto &knnResult = knnCtx->result();\n  ASSERT_EQ(topk, knnResult.size());\n  auto &linearResult = linearCtx->result();\n  ASSERT_EQ(topk, linearResult.size());\n  auto &stats = searcher->stats();\n  ASSERT_EQ(doc_cnt, stats.loaded_count());\n  // ASSERT_GT(stats.loaded_costtime(), 0UL);\n\n  //! cleanup\n  ASSERT_EQ(0, searcher->cleanup());\n  ASSERT_EQ(nullptr, searcher->create_context());\n  ASSERT_EQ(IndexError_Runtime,\n            searcher->load(storage, IndexMetric::Pointer()));\n  ASSERT_EQ(0UL, stats.loaded_count());\n\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  linearCtx = searcher->create_context();\n  knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!knnCtx);\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, knnCtx));\n  ASSERT_EQ(0,\n            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, linearCtx));\n\n  auto &knnResult1 = knnCtx->result();\n  ASSERT_EQ(topk, knnResult1.size());\n  auto &linearResult1 = linearCtx->result();\n  ASSERT_EQ(topk, linearResult1.size());\n  ASSERT_EQ(doc_cnt, stats.loaded_count());\n\n  //! unload\n  ASSERT_EQ(0, searcher->unload());\n  ASSERT_EQ(nullptr, searcher->create_context());\n  ASSERT_EQ(0UL, stats.loaded_count());\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  linearCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  linearCtx->set_topk(topk);\n\n  ASSERT_EQ(0,\n            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, linearCtx));\n\n  auto &linearResult2 = linearCtx->result();\n  ASSERT_EQ(topk, linearResult2.size());\n  ASSERT_EQ(doc_cnt, stats.loaded_count());\n}\n\nTEST_F(HnswSparseSearcherTest, TestFilter) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt = 100UL;\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      if (i <= 10) {\n        sparse_velues[j] = i;\n      } else {\n        sparse_velues[j] = 10 - (i - 10) * 0.5;\n      }\n    }\n\n    vec.add_sparses(sparse_indices, sparse_velues);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n    p_keys[0].push_back(i);\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_builder.thread_count\", 3);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = dir_ + \"/TestFilter\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.sparse_searcher.check_crc_enable\", true);\n  searcherParams.set(\"proxima.hnsw.sparse_searcher.max_scan_ratio\", 1.0f);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!linearByPKeysCtx);\n  ASSERT_TRUE(!!knnCtx);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 10.1f;\n  }\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  size_t topk = 10;\n  linearCtx->set_topk(topk);\n  linearByPKeysCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, knnCtx));\n  ASSERT_EQ(0,\n            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, linearCtx));\n  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                   sparse_dim_count, sparse_indices.data(),\n                   sparse_velues.data(), p_keys, qmeta, linearByPKeysCtx));\n\n  auto filterFunc = [](uint64_t key) {\n    if (key == 10UL || key == 11UL) {\n      return true;\n    }\n    return false;\n  };\n\n  auto &knnResult = knnCtx->result();\n  ASSERT_EQ(topk, knnResult.size());\n  ASSERT_EQ(10UL, knnResult[0].key());\n  ASSERT_EQ(11UL, knnResult[1].key());\n  ASSERT_EQ(12UL, knnResult[2].key());\n\n  auto &linearResult = linearCtx->result();\n  ASSERT_EQ(topk, linearResult.size());\n  ASSERT_EQ(10UL, linearResult[0].key());\n  ASSERT_EQ(11UL, linearResult[1].key());\n  ASSERT_EQ(12UL, linearResult[2].key());\n\n  auto &linearByPKeysResult = linearByPKeysCtx->result();\n  ASSERT_EQ(topk, linearByPKeysResult.size());\n  ASSERT_EQ(10UL, linearByPKeysResult[0].key());\n  ASSERT_EQ(11UL, linearByPKeysResult[1].key());\n  ASSERT_EQ(12UL, linearByPKeysResult[2].key());\n\n  knnCtx->set_filter(filterFunc);\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, knnCtx));\n  auto &knnResult1 = knnCtx->result();\n  ASSERT_EQ(topk, knnResult1.size());\n  ASSERT_EQ(12UL, knnResult1[0].key());\n  ASSERT_EQ(9UL, knnResult1[1].key());\n  ASSERT_EQ(13UL, knnResult1[2].key());\n\n  linearCtx->set_filter(filterFunc);\n  ASSERT_EQ(0,\n            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, linearCtx));\n  auto &linearResult1 = linearCtx->result();\n  ASSERT_EQ(topk, linearResult1.size());\n  ASSERT_EQ(12UL, linearResult1[0].key());\n  ASSERT_EQ(9UL, linearResult1[1].key());\n  ASSERT_EQ(13UL, linearResult1[2].key());\n\n  linearByPKeysCtx->set_filter(filterFunc);\n  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(\n                   sparse_dim_count, sparse_indices.data(),\n                   sparse_velues.data(), p_keys, qmeta, linearByPKeysCtx));\n  auto &linearByPKeysResult1 = linearByPKeysCtx->result();\n  ASSERT_EQ(topk, linearByPKeysResult1.size());\n  ASSERT_EQ(12UL, linearByPKeysResult1[0].key());\n  ASSERT_EQ(9UL, linearByPKeysResult1[1].key());\n  ASSERT_EQ(13UL, linearByPKeysResult1[2].key());\n}\n\nTEST_F(HnswSparseSearcherTest, TestBatchQuery) {\n  constexpr uint32_t sparse_dim_count = 8U;\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt = 5000UL;\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(2);\n  p_keys[0].resize(doc_cnt);\n  p_keys[1].resize(doc_cnt);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  float value_off = -(doc_cnt / 2.0);\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    SparseVector<float> vec;\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n\n      if (i <= 3 || i >= doc_cnt - 3) {\n        sparse_velues[j] = 0;\n      } else {\n        sparse_velues[j] = i + value_off;\n      }\n    }\n\n    vec.add_sparses(sparse_indices, sparse_velues);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n\n    p_keys[0][i] = i;\n    p_keys[1][i] = i;\n  }\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_builder.max_neighbor_count\", 160);\n  params.set(\"proxima.hnsw.sparse_builder.scaling_factor\", 16);\n  params.set(\"proxima.hnsw.sparse_builder.ef_construction\", 10);\n  params.set(\"proxima.hnsw.sparse_builder.thread_count\", 1);\n  ASSERT_EQ(0, builder->init(meta, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = dir_ + \"/TestBatchQuery\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.sparse_searcher.ef\", 1000);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto linearByPKeysCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  ASSERT_TRUE(!!linearCtx);\n  ASSERT_TRUE(!!linearByPKeysCtx);\n  ASSERT_TRUE(!!knnCtx);\n  linearCtx->set_debug_mode(true);\n  linearByPKeysCtx->set_debug_mode(true);\n  knnCtx->set_debug_mode(true);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  linearByPKeysCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  // // do linear search test\n  {\n    uint32_t dims[] = {sparse_dim_count, sparse_dim_count};\n    uint32_t indicies[] = {0, 20, 40, 60, 80, 100, 120, 140,\n                           0, 20, 40, 60, 80, 100, 120, 140};\n    float queries[] = {3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,\n                       -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f};\n\n    ASSERT_EQ(0, searcher->search_bf_impl(dims, indicies, queries, qmeta, 2,\n                                          linearCtx));\n\n    auto &linearResult0 = linearCtx->result(0);\n    ASSERT_EQ(4996UL, linearResult0[0].key());\n    ASSERT_EQ(4995UL, linearResult0[1].key());\n    ASSERT_EQ(4994UL, linearResult0[2].key());\n    ASSERT_EQ(4993UL, linearResult0[3].key());\n    ASSERT_EQ(4992UL, linearResult0[4].key());\n    ASSERT_EQ(4991UL, linearResult0[5].key());\n    ASSERT_EQ(4990UL, linearResult0[6].key());\n    ASSERT_EQ(4989UL, linearResult0[7].key());\n\n\n    auto &linearResult1 = linearCtx->result(1);\n    ASSERT_EQ(4UL, linearResult1[0].key());\n    ASSERT_EQ(5UL, linearResult1[1].key());\n    ASSERT_EQ(6UL, linearResult1[2].key());\n    ASSERT_EQ(7UL, linearResult1[3].key());\n    ASSERT_EQ(8UL, linearResult1[4].key());\n    ASSERT_EQ(9UL, linearResult1[5].key());\n    ASSERT_EQ(10UL, linearResult1[6].key());\n    ASSERT_EQ(11UL, linearResult1[7].key());\n  }\n\n  // // do linear search by p_keys test\n  {\n    uint32_t dims[] = {sparse_dim_count, sparse_dim_count};\n    uint32_t indicies[] = {0, 20, 40, 60, 80, 100, 120, 140,\n                           0, 20, 40, 60, 80, 100, 120, 140};\n    float queries[] = {3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,\n                       -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f};\n\n    ASSERT_EQ(\n        0, searcher->search_bf_by_p_keys_impl(dims, indicies, queries, p_keys,\n                                              qmeta, 2, linearByPKeysCtx));\n\n    auto &bfResult0 = linearByPKeysCtx->result(0);\n    ASSERT_EQ(4996UL, bfResult0[0].key());\n    ASSERT_EQ(4995UL, bfResult0[1].key());\n    ASSERT_EQ(4994UL, bfResult0[2].key());\n    ASSERT_EQ(4993UL, bfResult0[3].key());\n    ASSERT_EQ(4992UL, bfResult0[4].key());\n    ASSERT_EQ(4991UL, bfResult0[5].key());\n    ASSERT_EQ(4990UL, bfResult0[6].key());\n    ASSERT_EQ(4989UL, bfResult0[7].key());\n\n    auto &bfResult1 = linearByPKeysCtx->result(1);\n    ASSERT_EQ(4UL, bfResult1[0].key());\n    ASSERT_EQ(5UL, bfResult1[1].key());\n    ASSERT_EQ(6UL, bfResult1[2].key());\n    ASSERT_EQ(7UL, bfResult1[3].key());\n    ASSERT_EQ(8UL, bfResult1[4].key());\n    ASSERT_EQ(9UL, bfResult1[5].key());\n    ASSERT_EQ(10UL, bfResult1[6].key());\n    ASSERT_EQ(11UL, bfResult1[7].key());\n  }\n\n  // // do knn search test\n  {\n    uint32_t dims[] = {sparse_dim_count, sparse_dim_count};\n    uint32_t indicies[] = {0, 20, 40, 60, 80, 100, 120, 140,\n                           0, 20, 40, 60, 80, 100, 120, 140};\n    float queries[] = {3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,\n                       -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f};\n\n    ASSERT_EQ(0,\n              searcher->search_impl(dims, indicies, queries, qmeta, 2, knnCtx));\n\n    auto &knnResult0 = knnCtx->result(0);\n    ASSERT_EQ(4996UL, knnResult0[0].key());\n    ASSERT_EQ(4995UL, knnResult0[1].key());\n    ASSERT_EQ(4994UL, knnResult0[2].key());\n\n    auto &knnResult1 = knnCtx->result(1);\n    ASSERT_EQ(4UL, knnResult1[0].key());\n    ASSERT_EQ(5UL, knnResult1[1].key());\n    ASSERT_EQ(6UL, knnResult1[2].key());\n  }\n}\n\nTEST_F(HnswSparseSearcherTest, TestStreamerDump) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestStreamerDump.index\", true));\n  ASSERT_EQ(0, streamer->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 10000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  auto path = dir_ + \"/TestStreamerDump\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher knn\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  size_t step = 50;\n\n  for (size_t i = 0; i < cnt; i += step) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * step * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  EXPECT_GT(cost, 1.50f);\n}\n\nTEST_F(HnswSparseSearcherTest, TestSharedContext) {\n  auto gen_holder = [](int start, size_t doc_cnt) {\n    auto holder =\n        make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n    uint64_t key = start;\n\n    for (size_t i = 0; i < doc_cnt; ++i) {\n      SparseVector<float> vec;\n\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = i;\n      }\n\n      vec.add_sparses(sparse_indices, sparse_velues);\n\n      key += 3;\n\n      holder->emplace(key, vec);\n    }\n\n    return holder;\n  };\n  auto gen_index = [&gen_holder](int start, size_t docs, std::string path) {\n    auto holder = gen_holder(start, docs);\n    IndexBuilder::Pointer builder =\n        IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n    ailego::Params params;\n    builder->init(*_index_meta_ptr, params);\n    builder->train(holder);\n    builder->build(holder);\n    auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n    dumper->create(path);\n    builder->dump(dumper);\n    dumper->close();\n\n    IndexSearcher::Pointer searcher =\n        IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n    auto name = rand() % 2 ? \"FileReadStorage\" : \"MMapFileReadStorage\";\n    auto storage = IndexFactory::CreateStorage(name);\n    storage->open(path, false);\n    params.set(\"proxima.hnsw.sparse_searcher.visit_bloomfilter_enable\",\n               rand() % 2);\n    searcher->init(ailego::Params());\n    searcher->load(storage, IndexMetric::Pointer());\n    return searcher;\n  };\n\n  srand(ailego::Realtime::MilliSeconds());\n  size_t docs1 = rand() % 500 + 100;\n  size_t docs2 = rand() % 5000 + 100;\n  size_t docs3 = rand() % 50000 + 100;\n  auto path1 = dir_ + \"/TestSharedContext.index1\";\n  auto path2 = dir_ + \"/TestSharedContext.index2\";\n  auto path3 = dir_ + \"/TestSharedContext.index3\";\n  auto searcher1 = gen_index(0, docs1, path1);\n  auto searcher2 = gen_index(1, docs2, path2);\n  auto searcher3 = gen_index(2, docs3, path3);\n\n  srand(ailego::Realtime::MilliSeconds());\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  auto do_test = [&]() {\n    IndexSearcher::Context::Pointer ctx;\n    switch (rand() % 3) {\n      case 0:\n        ctx = searcher1->create_context();\n        break;\n      case 1:\n        ctx = searcher2->create_context();\n        break;\n      case 2:\n        ctx = searcher3->create_context();\n        break;\n    }\n    ctx->set_topk(10);\n\n    int ret = 0;\n    for (int i = 0; i < 100; ++i) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = -(i + 0.1f);\n      }\n\n      auto code = rand() % 6;\n      switch (code) {\n        case 0:\n          ret = searcher1->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx);\n          break;\n        case 1:\n          ret = searcher2->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx);\n          break;\n        case 2:\n          ret = searcher3->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx);\n          break;\n        case 3:\n          ret =\n              searcher1->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx);\n          break;\n        case 4:\n          ret =\n              searcher2->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx);\n          break;\n        case 5:\n          ret =\n              searcher3->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx);\n          break;\n      }\n\n      EXPECT_EQ(0, ret);\n      auto &results = ctx->result();\n      EXPECT_EQ(10, results.size());\n      for (int k = 0; k < 10; ++k) {\n        // std::cout << \"code: \" << code << \", i: \" << i << \", k: \" << k\n        //           << \", key: \" << results[k].key()\n        //           << \", score: \" << results[k].score() << std::endl;\n\n        EXPECT_EQ(code % 3, results[k].key() % 3);\n      }\n    }\n  };\n  auto t1 = std::async(std::launch::async, do_test);\n  auto t2 = std::async(std::launch::async, do_test);\n  t1.wait();\n  t2.wait();\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  storage->init(ailego::Params());\n  storage->open(dir_ + \"/TestSharedContext.index4\", true);\n  streamer->init(*_index_meta_ptr, ailego::Params());\n  streamer->open(storage);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 1.1f;\n  }\n\n  auto ctx1 = streamer->create_context();\n  EXPECT_EQ(IndexError_Unsupported,\n            searcher1->search_impl(sparse_dim_count, sparse_indices.data(),\n                                   sparse_velues.data(), qmeta, ctx1));\n\n  auto ctx2 = searcher1->create_context();\n  EXPECT_EQ(IndexError_Unsupported,\n            streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx2));\n}\n\nTEST_F(HnswSparseSearcherTest, TestProvider) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt = 5000UL;\n  std::vector<key_t> keys(doc_cnt);\n  srand(ailego::Realtime::MilliSeconds());\n  bool rand_key = rand() % 2;\n  bool rand_order = rand() % 2;\n  size_t step = rand() % 2 + 1;\n  LOG_DEBUG(\"randKey=%u randOrder=%u step=%zu\", rand_key, rand_order, step);\n  if (rand_key) {\n    std::mt19937 mt;\n    std::uniform_int_distribution<size_t> dt(\n        0, std::numeric_limits<size_t>::max());\n    for (size_t i = 0; i < doc_cnt; ++i) {\n      keys[i] = dt(mt);\n    }\n  } else {\n    std::iota(keys.begin(), keys.end(), 0U);\n    std::transform(keys.begin(), keys.end(), keys.begin(),\n                   [&](key_t k) { return step * k; });\n    if (rand_order) {\n      uint32_t seed = ailego::Realtime::Seconds();\n      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));\n    }\n  }\n\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = keys[i];\n    }\n\n    vec.add_sparses(sparse_indices, sparse_velues);\n\n    ASSERT_TRUE(holder->emplace(keys[i], vec));\n  }\n\n  ailego::Params params;\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = dir_ + \"/TestProvider\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.sparse_searcher.ef\", 1);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  auto provider = searcher->create_sparse_provider();\n  for (size_t i = 0; i < keys.size(); ++i) {\n    uint32_t sparse_count;\n    std::string sparse_indices_buffer;\n    std::string sparse_values_buffer;\n\n    ASSERT_EQ(0, provider->get_sparse_vector(keys[i], &sparse_count,\n                                             &sparse_indices_buffer,\n                                             &sparse_values_buffer));\n\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);\n    }\n  }\n\n  auto iter = provider->create_iterator();\n  size_t cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *d = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    cnt++;\n    iter->next();\n  }\n\n  ASSERT_EQ(cnt, doc_cnt);\n  ASSERT_EQ(_index_meta_ptr->data_type(), provider->data_type());\n}\n\nTEST_F(HnswSparseSearcherTest, TestRandomPaddingTopk) {\n  std::mt19937 mt{};\n  std::uniform_real_distribution<float> gen(0.0f, 1.0f);\n  constexpr size_t static sparse_dim_count = 8;\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  const size_t COUNT = 10000UL;\n\n  for (size_t i = 0; i < COUNT; ++i) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_velues);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ASSERT_EQ(0, builder->init(meta, ailego::Params()));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = dir_ + \"/TestRandomPadding\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_searcher.force_padding_result_enable\", true);\n  params.set(\"proxima.hnsw.sparse_searcher.scan_ratio\", 0.01f);\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(params));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 1.0f;\n  }\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  std::uniform_int_distribution<uint32_t> gen_int(1, COUNT);\n  size_t topk = gen_int(mt);\n  ctx->set_topk(topk);\n\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n\n  auto &results = ctx->result();\n  EXPECT_EQ(results.size(), topk);\n  for (size_t i = 0; i < results.size(); ++i) {\n    for (size_t j = 0; j < i; ++j) {\n      EXPECT_NE(results[i].key(), results[j].key());\n    }\n  }\n\n  ctx->set_filter([](uint64_t key) { return true; });\n\n  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n\n  auto &results1 = ctx->result();\n  EXPECT_EQ(results1.size(), 0);\n}\n\nTEST_F(HnswSparseSearcherTest, TestBruteForceSetupInContext) {\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(\"HnswSparseBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    SparseVector<float> vec;\n\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    vec.add_sparses(sparse_indices, sparse_velues);\n\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  ailego::Params params;\n  // params.set(\"proxima.hnsw.sparse_builder.max_neighbor_count\", 16);\n  params.set(\"proxima.hnsw.sparse_builder.scaling_factor\", 16);\n  params.set(\"proxima.hnsw.sparse_builder.ef_construction\", 10);\n  params.set(\"proxima.hnsw.sparse_builder.thread_count\", 2);\n  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = dir_ + \"/TestBruteForceSetupInContext\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  ASSERT_TRUE(searcher != nullptr);\n  ailego::Params searcherParams;\n  searcherParams.set(\"proxima.hnsw.sparse_searcher.ef\", 1);\n  ASSERT_EQ(0, searcher->init(searcherParams));\n\n  auto storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, storage->open(path, false));\n  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  size_t topk = 200;\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  bool set_bf_threshold = false;\n  bool use_update = false;\n\n  size_t step = 50;\n  for (size_t i = 0; i < doc_cnt; i += step) {\n    auto linearCtx = searcher->create_context();\n    auto knnCtx = searcher->create_context();\n\n    ASSERT_TRUE(!!linearCtx);\n    ASSERT_TRUE(!!knnCtx);\n\n    linearCtx->set_topk(topk);\n    knnCtx->set_topk(topk);\n\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i + 0.1f;\n    }\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      if (use_update) {\n        ailego::Params searcherParamsExtra;\n\n        searcherParamsExtra.set(\n            \"proxima.hnsw.sparse_searcher.brute_force_threshold\", doc_cnt);\n        knnCtx->update(searcherParamsExtra);\n      } else {\n        knnCtx->set_bruteforce_threshold(doc_cnt);\n      }\n\n      use_update = !use_update;\n    }\n    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, linearCtx));\n    // auto t3 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      linearTotalTime += t2 - t1;\n    } else {\n      knnTotalTime += t2 - t1;\n    }\n\n    set_bf_threshold = !set_bf_threshold;\n\n    auto &knnResult = knnCtx->result();\n    // TODO: check\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += doc_cnt - 1 == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(doc_cnt - 1, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * step * step * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / doc_cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.90f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseSearcherTest, TestHalfFloatConverter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  IndexMeta index_meta_raw(IndexMeta::MetaType::MT_SPARSE,\n                           IndexMeta::DataType::DT_FP32);\n  index_meta_raw.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"HalfFloatSparseConverter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  reformer->init(index_meta.reformer_params());\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestHalfFloatConverter.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // size_t cnt = 5000U;\n  size_t cnt = 20000U;\n  size_t sparse_dim_count = 32;\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    std::string new_vec;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->transform(\n                     sparse_dim_count, sparse_indices_list[i].data(),\n                     sparse_vec_list[i].data(), qmeta, &new_vec, &new_meta));\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    new_vec.data(), new_meta, ctx));\n  }\n\n  auto path = dir_ + \"/TestHalfFloatConverter\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher knn\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n  size_t step = 50;\n  for (size_t i = 0; i < cnt; i += step) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    std::string ovec;\n    IndexQueryMeta new_qmeta;\n    ASSERT_EQ(0,\n              reformer->transform(sparse_dim_count, sparse_indices.data(),\n                                  sparse_vec.data(), qmeta, &ovec, &new_qmeta));\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       ovec.data(), new_qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       ovec.data(), new_qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * step * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseSearcherTest, TestQueryFilteringRatio) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestQueryFilteringRatio.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // size_t cnt = 5000U;\n  size_t cnt = 20000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  auto path = dir_ + \"/TestQueryFilteringRatio\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher knn\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_TRUE(searcher != nullptr);\n\n  ailego::Params searcher_params;\n  searcher_params.set(PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO, 0.05);\n\n  ASSERT_EQ(0, searcher->init(searcher_params));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n  auto linearCtx = searcher->create_context();\n  auto knnCtx = searcher->create_context();\n  size_t topk = 20;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  size_t step = 100;\n  for (size_t i = 0; i < cnt; i += step) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    auto &knnResult = knnCtx->result();\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseSearcherTest, TestHalfFloatRevert) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_NE(streamer, nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);\n\n  IndexMeta index_meta_raw(IndexMeta::MetaType::MT_SPARSE,\n                           IndexMeta::DataType::DT_FP32);\n  index_meta_raw.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"HalfFloatSparseConverter\");\n  ASSERT_TRUE(converter != nullptr);\n\n  converter->init(index_meta_raw, converter_params);\n\n  IndexMeta index_meta = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n\n  reformer->init(index_meta.reformer_params());\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestHalfFloatRevert.index\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  // size_t cnt = 5000U;\n  size_t cnt = 20000U;\n  size_t sparse_dim_count = 32;\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  IndexQueryMeta new_meta;\n  for (size_t i = 0; i < cnt; i++) {\n    std::string new_vec;\n    ASSERT_EQ(0, reformer->transform(\n                     sparse_dim_count, sparse_indices_list[i].data(),\n                     sparse_vec_list[i].data(), qmeta, &new_vec, &new_meta));\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    new_vec.data(), new_meta, ctx));\n  }\n\n  const float epsilon = 1e-2;\n\n  for (size_t i = 0; i < cnt; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices;\n    std::string sparse_values;\n\n    ASSERT_EQ(streamer->get_sparse_vector(i, &sparse_count, &sparse_indices,\n                                          &sparse_values),\n              0);\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    std::string sparse_values_out;\n    sparse_values_out.resize(sparse_count * sizeof(float));\n\n    ASSERT_EQ(reformer->revert(\n                  sparse_count,\n                  reinterpret_cast<const uint32_t *>(sparse_indices.data()),\n                  sparse_values.data(), new_meta, &sparse_values_out),\n              0);\n\n    for (size_t j = 0; j < sparse_count; ++j) {\n      float vector_value = *((float *)(sparse_values_out.data()) + j);\n      EXPECT_NEAR(vector_value, sparse_vec_list[i][j], epsilon);\n    }\n  }\n\n  auto path = dir_ + \"/TestHalfFloatRevert\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, dumper->close());\n\n  // do searcher knn\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  auto read_storage = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, read_storage->open(path, false));\n  ASSERT_TRUE(searcher != nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));\n\n  for (size_t i = 0; i < cnt; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices;\n    std::string sparse_values;\n\n    ASSERT_EQ(searcher->get_sparse_vector(i, &sparse_count, &sparse_indices,\n                                          &sparse_values),\n              0);\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    std::string sparse_values_out;\n    sparse_values_out.resize(sparse_count * sizeof(float));\n\n    ASSERT_EQ(reformer->revert(\n                  sparse_count,\n                  reinterpret_cast<const uint32_t *>(sparse_indices.data()),\n                  sparse_values.data(), new_meta, &sparse_values_out),\n              0);\n\n    for (size_t j = 0; j < sparse_count; ++j) {\n      float vector_value = *((float *)(sparse_values_out.data()) + j);\n      EXPECT_NEAR(vector_value, sparse_vec_list[i][j], epsilon);\n    }\n  }\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_buffer_test.cpp",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <future>\n#include <iostream>\n#include <memory>\n#include <ailego/math/norm_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"hnsw_sparse_streamer.h\"\n\nusing namespace std;\nusing namespace testing;\nusing namespace zvec::ailego;\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nnamespace zvec {\nnamespace core {\n\nclass HnswSparseStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void generate_sparse_data(\n      size_t cnt, uint32_t sparse_dim_count,\n      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);\n\n  static std::string dir_;\n  static shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string HnswSparseStreamerTest::dir_(\"HnswSparseStreamerTest/\");\nshared_ptr<IndexMeta> HnswSparseStreamerTest::index_meta_ptr_;\n\nvoid HnswSparseStreamerTest::generate_sparse_data(\n    size_t cnt, uint32_t sparse_dim_count,\n    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < cnt; ++i) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_vec[j] = dist(gen);\n    }\n\n    float norm;\n    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,\n                                           &norm);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_vec[j] = sparse_vec[j] / norm;\n    }\n\n    sparse_indices_list.push_back(sparse_indices);\n    sparse_vec_list.push_back(sparse_vec);\n  }\n}\n\nvoid HnswSparseStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                                IndexMeta::DataType::DT_FP32));\n  index_meta_ptr_->set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid HnswSparseStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswSparseStreamerTest, TestGeneral) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n  auto write_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, write_storage->init(stg_params));\n  ASSERT_EQ(0, write_storage->open(dir_ + \"/Test/HnswSparseSearch\", true));\n  ASSERT_EQ(0, write_streamer->init(index_meta, params));\n  ASSERT_EQ(0, write_streamer->open(write_storage));\n\n  size_t cnt = 20000U;\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, write_streamer->add_impl(\n                     i, sparse_dim_count, sparse_indices_list[i].data(),\n                     sparse_vec_list[i].data(), qmeta, ctx));\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"BufferStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/HnswSparseSearch\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n\n  auto linearCtx = read_streamer->create_context();\n  ASSERT_TRUE(!!linearCtx);\n\n  auto knnCtx = read_streamer->create_context();\n  ASSERT_TRUE(!!knnCtx);\n\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  for (size_t i = 0; i < cnt; i += 100) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(\n        0, read_streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, read_streamer->search_bf_impl(\n                     sparse_dim_count, sparse_indices.data(), sparse_vec.data(),\n                     qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    // std::cout << \"i: \" << i << std::endl;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseStreamerTest, TestHnswSearchMMap) {\n  IndexStreamer::Pointer write_streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(write_streamer != nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n  auto write_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, write_storage->init(stg_params));\n  ASSERT_EQ(0, write_storage->open(dir_ + \"/Test/HnswSparseSearch\", true));\n  ASSERT_EQ(0, write_streamer->init(index_meta, params));\n  ASSERT_EQ(0, write_streamer->open(write_storage));\n\n  size_t cnt = 20000U;\n  auto ctx = write_streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, write_streamer->add_impl(\n                     i, sparse_dim_count, sparse_indices_list[i].data(),\n                     sparse_vec_list[i].data(), qmeta, ctx));\n  }\n  write_streamer->flush(0UL);\n  write_streamer->close();\n  write_streamer.reset();\n\n  IndexStreamer::Pointer read_streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, read_storage);\n  ASSERT_EQ(0, read_storage->init(stg_params));\n  ASSERT_EQ(0, read_storage->open(dir_ + \"/Test/HnswSparseSearch\", false));\n  ASSERT_EQ(0, read_streamer->open(read_storage));\n\n  auto linearCtx = read_streamer->create_context();\n  ASSERT_TRUE(!!linearCtx);\n\n  auto knnCtx = read_streamer->create_context();\n  ASSERT_TRUE(!!knnCtx);\n\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  for (size_t i = 0; i < cnt; i += 100) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(\n        0, read_streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, read_streamer->search_bf_impl(\n                     sparse_dim_count, sparse_indices.data(), sparse_vec.data(),\n                     qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    // std::cout << \"i: \" << i << std::endl;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"hnsw_sparse_streamer.h\"\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <future>\n#include <iostream>\n#include <memory>\n#include <ailego/math/norm_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace std;\nusing namespace testing;\nusing namespace zvec::ailego;\n\nnamespace zvec {\nnamespace core {\n\nconstexpr size_t static sparse_dim_count = 16;\n\nclass HnswSparseStreamerTest : public testing::Test {\n protected:\n  void SetUp(void);\n  void TearDown(void);\n  void generate_sparse_data(\n      size_t cnt, uint32_t sparse_dim_count,\n      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);\n\n  static std::string dir_;\n  static shared_ptr<IndexMeta> index_meta_ptr_;\n};\n\nstd::string HnswSparseStreamerTest::dir_(\"HnswSparseStreamerTest/\");\nshared_ptr<IndexMeta> HnswSparseStreamerTest::index_meta_ptr_;\n\nvoid HnswSparseStreamerTest::generate_sparse_data(\n    size_t cnt, uint32_t sparse_dim_count,\n    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,\n    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  for (size_t i = 0; i < cnt; ++i) {\n    // prepare sparse\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_vec(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_vec[j] = dist(gen);\n    }\n\n    float norm;\n    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,\n                                           &norm);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_vec[j] = sparse_vec[j] / norm;\n    }\n\n    sparse_indices_list.push_back(sparse_indices);\n    sparse_vec_list.push_back(sparse_vec);\n  }\n}\n\nvoid HnswSparseStreamerTest::SetUp(void) {\n  index_meta_ptr_.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,\n                                                IndexMeta::DataType::DT_FP32));\n  index_meta_ptr_->set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nvoid HnswSparseStreamerTest::TearDown(void) {\n  char cmdBuf[100];\n  snprintf(cmdBuf, 100, \"rm -rf %s\", dir_.c_str());\n  system(cmdBuf);\n}\n\nTEST_F(HnswSparseStreamerTest, TestGeneral) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 20000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  auto linearCtx = streamer->create_context();\n  ASSERT_TRUE(!!linearCtx);\n\n  auto knnCtx = streamer->create_context();\n  ASSERT_TRUE(!!knnCtx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  for (size_t i = 0; i < cnt; i += 100) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    // std::cout << \"i: \" << i << std::endl;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseStreamerTest, TestAddVector) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw.sparse_streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw.sparse_streamer.scaling_factor\", 5U);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestAddVector\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n  size_t cnt = 1000UL;\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nTEST_F(HnswSparseStreamerTest, TestLinearSearch) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw.sparse_streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw.sparse_streamer.scaling_factor\", 5U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearch.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n\n  size_t topk = 3;\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i + 1.0f;\n    }\n\n    ctx->set_topk(1U);\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(0, result1[0].key());\n\n    ctx->set_topk(topk);\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_velues.data(), qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(0, result2[0].key());\n    ASSERT_EQ(1, result2[1].key());\n    ASSERT_EQ(2, result2[2].key());\n  }\n\n  ctx->set_topk(100U);\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 10.1f;\n  }\n\n  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx));\n  auto &result = ctx->result();\n  ASSERT_EQ(100U, result.size());\n  ASSERT_EQ(0, result[0].key());\n  ASSERT_EQ(1, result[1].key());\n  ASSERT_EQ(10, result[10].key());\n  ASSERT_EQ(20, result[20].key());\n  ASSERT_EQ(30, result[30].key());\n  ASSERT_EQ(35, result[35].key());\n  ASSERT_EQ(99, result[99].key());\n}\n\nTEST_F(HnswSparseStreamerTest, TestLinearSearchByKeys) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_streamer.max_neighbor_count\", 16U);\n  params.set(\"proxima.hnsw.sparse_streamer.upper_neighbor_count\", 8U);\n  params.set(\"proxima.hnsw.sparse_streamer.scaling_factor\", 5U);\n  params.set(\"proxima.hnsw.sparse_streamer.get_vector_enable\", true);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestLinearSearchByKeys.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n  p_keys[0].resize(cnt);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; ++i) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = -1.0 * i - 1.0f;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n\n    p_keys[0][i] = i;\n  }\n\n  size_t topk = 3;\n  for (size_t i = 0; i < cnt; i += 1) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i + 1.0f;\n    }\n    ctx->set_topk(1U);\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result1 = ctx->result();\n    ASSERT_EQ(1UL, result1.size());\n    ASSERT_EQ(0, result1[0].key());\n\n    ctx->set_topk(topk);\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result2 = ctx->result();\n    ASSERT_EQ(topk, result2.size());\n    ASSERT_EQ(0, result2[0].key());\n    ASSERT_EQ(1, result2[1].key());\n    ASSERT_EQ(2, result2[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 1.0f;\n    }\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(0, result[0].key());\n    ASSERT_EQ(1, result[1].key());\n    ASSERT_EQ(10, result[10].key());\n    ASSERT_EQ(20, result[20].key());\n    ASSERT_EQ(30, result[30].key());\n    ASSERT_EQ(35, result[35].key());\n    ASSERT_EQ(99, result[99].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 10.0f;\n    }\n\n    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(3U, result.size());\n    ASSERT_EQ(1, result[0].key());\n    ASSERT_EQ(10, result[1].key());\n    ASSERT_EQ(15, result[2].key());\n  }\n\n  {\n    ctx->set_topk(100U);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = 9.0f;\n    }\n    p_keys[0].clear();\n    for (size_t j = 0; j < cnt; j += 10) {\n      p_keys[0].push_back((uint64_t)j);\n    }\n    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                     sparse_dim_count, sparse_indices.data(),\n                     sparse_velues.data(), p_keys, qmeta, ctx));\n    auto &result = ctx->result();\n    ASSERT_EQ(100U, result.size());\n    ASSERT_EQ(0, result[0].key());\n    ASSERT_EQ(10, result[1].key());\n    ASSERT_EQ(100, result[10].key());\n    ASSERT_EQ(200, result[20].key());\n    ASSERT_EQ(300, result[30].key());\n    ASSERT_EQ(350, result[35].key());\n    ASSERT_EQ(990, result[99].key());\n  }\n}\n\nTEST_F(HnswSparseStreamerTest, TestOpenClose) {\n  constexpr size_t static sparse_dim_count = 2048;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  auto storage1 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  auto storage2 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage1);\n  ASSERT_NE(nullptr, storage2);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage1->init(stg_params));\n  ASSERT_EQ(0, storage1->open(dir_ + \"TessOpenAndClose1\", true));\n  ASSERT_EQ(0, storage2->init(stg_params));\n  ASSERT_EQ(0, storage2->open(dir_ + \"TessOpenAndClose2\", true));\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto checkIter = [](size_t base, size_t total,\n                      IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_sparse_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = base;\n    size_t cnt = 0;\n    while (iter->is_valid()) {\n      float *sparse_data = (float *)iter->sparse_data();\n      ASSERT_EQ(cur, iter->key());\n      for (size_t d = 0; d < sparse_dim_count; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);\n      }\n      iter->next();\n      cur += 2;\n      cnt++;\n    }\n    ASSERT_EQ(cnt, total);\n  };\n\n  size_t testCnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < testCnt; i += 2) {\n    float v1 = (float)i;\n    ASSERT_EQ(0, streamer->open(storage1));\n    auto ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n\n    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);\n    NumericalVector<float> sparse_velues1(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices1[j] = j * 20;\n      sparse_velues1[j] = v1;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),\n                                    sparse_velues1.data(), qmeta, ctx));\n\n    checkIter(0, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n\n    float v2 = (float)(i + 1);\n    NumericalVector<uint32_t> sparse_indices2(sparse_dim_count);\n    NumericalVector<float> sparse_velues2(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices2[j] = j * 20;\n      sparse_velues2[j] = v2;\n    }\n\n    ASSERT_EQ(0, streamer->open(storage2));\n    ctx = streamer->create_context();\n    ASSERT_TRUE(!!ctx);\n    ASSERT_EQ(\n        0, streamer->add_impl(i + 1, sparse_dim_count, sparse_indices2.data(),\n                              sparse_velues2.data(), qmeta, ctx));\n    checkIter(1, i / 2 + 1, streamer);\n    ASSERT_EQ(0, streamer->flush(0UL));\n    ASSERT_EQ(0, streamer->close());\n  }\n\n  IndexStreamer::Pointer streamer1 =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer1->init(meta, params));\n  ASSERT_EQ(0, streamer1->open(storage1));\n\n  IndexStreamer::Pointer streamer2 =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ASSERT_EQ(0, streamer2->init(meta, params));\n  ASSERT_EQ(0, streamer2->open(storage2));\n\n  checkIter(0, testCnt / 2, streamer1);\n  checkIter(1, testCnt / 2, streamer2);\n}\n\nTEST_F(HnswSparseStreamerTest, TestCreateIterator) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestCreateIterator\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_sparse_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = 0;\n    while (iter->is_valid()) {\n      float *sparse_data = (float *)iter->sparse_data();\n      ASSERT_EQ(cur, iter->key());\n      for (size_t d = 0; d < sparse_dim_count; ++d) {\n        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);\n      }\n      iter->next();\n      cur++;\n    }\n    ASSERT_EQ(cur, total);\n  };\n\n  size_t cnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);\n    NumericalVector<float> sparse_velues1(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices1[j] = j * 20;\n      sparse_velues1[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),\n                                    sparse_velues1.data(), qmeta, ctx));\n    checkIter(i + 1, streamer);\n  }\n\n  streamer->flush(0UL);\n  streamer->close();\n  ASSERT_EQ(0, streamer->open(storage));\n  checkIter(cnt, streamer);\n\n  // check getVector\n  auto provider = streamer->create_sparse_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices_buffer;\n    std::string sparse_values_buffer;\n\n    ASSERT_EQ(\n        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);\n    }\n  }\n}\n\nTEST_F(HnswSparseStreamerTest, TestNoInit) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  streamer->cleanup();\n}\n\n\nTEST_F(HnswSparseStreamerTest, TestForceFlush) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  stg_params.set(\"proxima.mmap_file.storage.copy_on_write\", true);\n  stg_params.set(\"proxima.mmap_file.storage.force_flush\", true);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {\n    auto provider = streamer->create_sparse_provider();\n    auto iter = provider->create_iterator();\n    ASSERT_TRUE(!!iter);\n    size_t cur = 0;\n    while (iter->is_valid()) {\n      ASSERT_EQ(cur, iter->key());\n      const uint32_t sparse_count = iter->sparse_count();\n      ASSERT_EQ(sparse_count, sparse_dim_count);\n\n      const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        ASSERT_FLOAT_EQ((float)cur, data[j]);\n      }\n\n      iter->next();\n      cur++;\n    }\n    ASSERT_EQ(cur, total);\n  };\n\n  size_t cnt = 200;\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  auto ctx = streamer->create_context();\n\n  for (size_t i = 0; i < cnt; ++i) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n    checkIter(i + 1, streamer);\n  }\n\n  streamer->flush(0UL);\n  streamer->close();\n  storage->close();\n\n  storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestForceFlush\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n  checkIter(cnt, streamer);\n\n  // check getVector\n  auto provider = streamer->create_sparse_provider();\n  for (size_t i = 0; i < cnt; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices_buffer;\n    std::string sparse_values_buffer;\n\n    ASSERT_EQ(\n        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);\n    }\n  }\n}\n\nTEST_F(HnswSparseStreamerTest, TestKnnMultiThread) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  constexpr size_t static sparse_dim_count = 32;\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 128);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 64);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 32);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnMultiThread\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                         IndexMeta::DataType::DT_FP32);\n    size_t succAdd = 0;\n    auto ctx = streamer->create_context();\n    for (size_t i = 0; i < addCnt; i++) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = (float)i + baseKey;\n      }\n\n      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,\n                                     sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n\n  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto t1 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000U, t1.get());\n  ASSERT_EQ(1000U, t2.get());\n  ASSERT_EQ(1000U, t3.get());\n  streamer->close();\n\n  // checking data\n  ASSERT_EQ(0, streamer->open(storage));\n  auto provider = streamer->create_sparse_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n  while (iter->is_valid()) {\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_EQ((float)iter->key(), data[j]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    iter->next();\n  }\n\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n\n  // ====== multi thread search\n  size_t topk = 10;\n  size_t cnt = 3000;\n  auto knnSearch = [&]() {\n    auto linearCtx = streamer->create_context();\n    auto linearByPkeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                         IndexMeta::DataType::DT_FP32);\n    linearCtx->set_topk(topk);\n    linearByPkeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    for (size_t i = 0; i < cnt; i += 1) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = ((float)i + 1.1f);\n      }\n\n      ASSERT_EQ(0,\n                streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, ctx));\n      ASSERT_EQ(\n          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{cnt - 1, cnt - 2, cnt - 3}};\n      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                       sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      ASSERT_EQ(cnt - 1, r2[0].key());\n      auto &r3 = linearByPkeysCtx->result();\n      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);\n  };\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  auto s3 = std::async(std::launch::async, knnSearch);\n  s1.wait();\n  s2.wait();\n  s3.wait();\n}\n\nTEST_F(HnswSparseStreamerTest, TestKnnConcurrentAddAndSearch) {\n  constexpr size_t static sparse_dim_count = 32;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  // meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n  meta.set_metric(\"SquaredEuclideanSparse\", 0, ailego::Params());\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 128);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 64);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, 4096);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 32);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessKnnConcurrentAddAndSearch\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto addVector = [&streamer](int baseKey, size_t addCnt) {\n    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                         IndexMeta::DataType::DT_FP32);\n    size_t succAdd = 0;\n    auto ctx = streamer->create_context();\n    for (size_t i = 0; i < addCnt; i++) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = (float)i + baseKey;\n      }\n\n      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,\n                                     sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx);\n    }\n    streamer->flush(0UL);\n    return succAdd;\n  };\n\n  auto knnSearch = [&]() {\n    size_t topk = 100;\n    size_t cnt = 3000;\n    auto linearCtx = streamer->create_context();\n    auto linearByPkeysCtx = streamer->create_context();\n    auto ctx = streamer->create_context();\n    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                         IndexMeta::DataType::DT_FP32);\n    linearCtx->set_topk(topk);\n    linearByPkeysCtx->set_topk(topk);\n    ctx->set_topk(topk);\n    size_t totalCnts = 0;\n    size_t totalHits = 0;\n    for (size_t i = 0; i < cnt; i += 1) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = -((float)i + 1.1f);\n      }\n\n      ASSERT_EQ(0,\n                streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, ctx));\n      ASSERT_EQ(\n          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, linearCtx));\n      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};\n      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                       sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));\n      auto &r1 = ctx->result();\n      ASSERT_EQ(topk, r1.size());\n      auto &r2 = linearCtx->result();\n      ASSERT_EQ(topk, r2.size());\n      ASSERT_EQ(0, r2[0].key());\n      auto &r3 = linearByPkeysCtx->result();\n      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());\n#if 0\n            printf(\"linear: %zd => %zd %zd %zd %zd %zd\\n\", i, r2[0].key,\n                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);\n            printf(\"knn: %zd => %zd %zd %zd %zd %zd\\n\", i, r1[0].key, r1[1].key,\n                   r1[2].key, r1[3].key, r1[4].key);\n#endif\n      for (size_t k = 0; k < topk; ++k) {\n        totalCnts++;\n        for (size_t j = 0; j < topk; ++j) {\n          if (r2[j].key() == r1[k].key()) {\n            totalHits++;\n            break;\n          }\n        }\n      }\n    }\n    printf(\"%f\\n\", totalHits * 1.0f / totalCnts);\n    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);\n  };\n\n  auto t0 = std::async(std::launch::async, addVector, 0, 1000);\n  ASSERT_EQ(1000, t0.get());\n  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);\n  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);\n  auto s1 = std::async(std::launch::async, knnSearch);\n  auto s2 = std::async(std::launch::async, knnSearch);\n  ASSERT_EQ(1000, t1.get());\n  ASSERT_EQ(1000, t2.get());\n  s1.wait();\n  s2.wait();\n\n  // checking data\n  auto provider = streamer->create_sparse_provider();\n  auto iter = provider->create_iterator();\n  ASSERT_TRUE(!!iter);\n  size_t total = 0;\n  uint64_t min = 1000;\n  uint64_t max = 0;\n  while (iter->is_valid()) {\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);\n    }\n    total++;\n    min = std::min(min, iter->key());\n    max = std::max(max, iter->key());\n    iter->next();\n  }\n\n  ASSERT_EQ(3000, total);\n  ASSERT_EQ(0, min);\n  ASSERT_EQ(2999, max);\n}\n\nTEST_F(HnswSparseStreamerTest, TestBfThreshold) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 16);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TessBfThreshold\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 10000;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  ctx->set_topk(1U);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = (float)i + 1.0f;\n    }\n\n    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), qmeta, ctx);\n  }\n  streamer->flush(0UL);\n  streamer->close();\n\n  IndexStreamer::Pointer streamer1 =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_NE(streamer1, nullptr);\n  auto params1 = params;\n  params1.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, cnt - 1);\n  ASSERT_EQ(0, streamer1->init(*index_meta_ptr_, params1));\n  ASSERT_EQ(0, streamer1->open(storage));\n  auto ctx1 = streamer1->create_context();\n\n  IndexStreamer::Pointer streamer2 =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_NE(streamer2, nullptr);\n  auto params2 = params;\n  params2.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, cnt);\n  ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params2));\n  ASSERT_EQ(0, streamer2->open(storage));\n  auto ctx2 = streamer2->create_context();\n\n  // do searcher\n  size_t cost1 = 0;\n  size_t cost2 = 0;\n  for (size_t i = 0; i < 100; ++i) {\n    auto t1 = ailego::Monotime::MicroSeconds();\n    ASSERT_EQ(0, streamer1->search_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx1));\n    auto t2 = ailego::Monotime::MicroSeconds();\n    ASSERT_EQ(0, streamer2->search_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx2));\n    auto t3 = ailego::Monotime::MicroSeconds();\n    cost1 += t2 - t1;\n    cost2 += t3 - t2;\n  }\n\n  ASSERT_LT(cost1, cost2);\n\n  ailego::Params update_params;\n  update_params.set(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);\n  update_params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 50);\n  ctx1->set_debug_mode(true);\n  ctx1->update(update_params);\n  ASSERT_EQ(0, streamer1->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, ctx1));\n  LOG_DEBUG(\"%s\", ctx1->debug_string().c_str());\n}\n\nTEST_F(HnswSparseStreamerTest, TestFilter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  // constexpr size_t static sparse_dim_count = 64;\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 50);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 500);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 1000);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestFilter\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 100UL;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  ctx->set_topk(10U);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  std::vector<std::vector<uint64_t>> p_keys;\n  p_keys.resize(1);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = (float)i + 1.0f;\n    }\n\n    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), qmeta, ctx);\n    p_keys[0].push_back(i);\n  }\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = -100.1;\n  }\n  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(10, results.size());\n  ASSERT_EQ(0, results[0].key());\n  ASSERT_EQ(1, results[1].key());\n  ASSERT_EQ(2, results[2].key());\n\n  auto filterFunc = [](uint64_t key) {\n    if (key == 0UL || key == 3UL) {\n      return true;\n    }\n    return false;\n  };\n  ctx->set_filter(filterFunc);\n\n  // after set filter\n  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                     sparse_velues.data(), qmeta, ctx));\n  auto &results1 = ctx->result();\n  ASSERT_EQ(10, results1.size());\n  ASSERT_EQ(1, results1[0].key());\n  ASSERT_EQ(2, results1[1].key());\n  ASSERT_EQ(4, results1[2].key());\n\n  // linear\n  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                        sparse_velues.data(), qmeta, ctx));\n  auto &results2 = ctx->result();\n  ASSERT_EQ(10, results2.size());\n  ASSERT_EQ(1, results2[0].key());\n  ASSERT_EQ(2, results2[1].key());\n  ASSERT_EQ(4, results2[2].key());\n\n  // linear by p_keys\n  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(\n                   sparse_dim_count, sparse_indices.data(),\n                   sparse_velues.data(), p_keys, qmeta, ctx));\n  auto &results3 = ctx->result();\n  ASSERT_EQ(10, results3.size());\n  ASSERT_EQ(1, results3[0].key());\n  ASSERT_EQ(2, results3[1].key());\n  ASSERT_EQ(4, results3[2].key());\n}\n\nTEST_F(HnswSparseStreamerTest, TestMaxIndexSize) {\n  constexpr size_t static sparse_dim_count = 128;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestMaxIndexSize\", true));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t vsz0 = 0;\n  size_t rss0 = 0;\n  if (!ailego::MemoryHelper::SelfUsage(&vsz0, &rss0)) {\n    // do not check if get mem usage failed\n    return;\n  }\n  if (vsz0 > 1024 * 1024 * 1024 * 1024UL) {\n    // asan mode\n    return;\n  }\n\n  size_t writeCnt1 = 10000;\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  auto ctx = streamer->create_context();\n\n  for (size_t i = 0; i < writeCnt1; ++i) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n  size_t vsz1 = 0;\n  size_t rss1 = 0;\n  ailego::MemoryHelper::SelfUsage(&vsz1, &rss1);\n  size_t increment = rss1 - rss0;\n\n  size_t total_write =\n      writeCnt1 * sparse_dim_count * (sizeof(uint16_t) + sizeof(float)) +\n      writeCnt1 * 32 + writeCnt1 * 100 * 4;\n\n  ASSERT_GT(total_write, increment * 0.8f);\n  ASSERT_LT(total_write, increment * 1.2f);\n\n  LOG_DEBUG(\"total write: %zu, increment: %zu\", total_write, increment);\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nTEST_F(HnswSparseStreamerTest, TestKnnCleanUp) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage1 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage1);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage1->init(stg_params));\n  ASSERT_EQ(0, storage1->open(dir_ + \"TestKnnCluenUp1\", true));\n  ailego::Params params;\n\n  constexpr size_t static sparse_dim_count1 = 32;\n  IndexMeta meta1(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta1.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ASSERT_EQ(0, streamer->init(meta1, params));\n  ASSERT_EQ(0, streamer->open(storage1));\n  IndexQueryMeta qmeta1(IndexMeta::MetaType::MT_SPARSE,\n                        IndexMeta::DataType::DT_FP32);\n  auto ctx1 = streamer->create_context();\n\n  NumericalVector<uint32_t> sparse_indices1(sparse_dim_count1);\n  NumericalVector<float> sparse_velues1(sparse_dim_count1);\n\n  for (size_t j = 0; j < sparse_dim_count1; ++j) {\n    sparse_indices1[j] = j * 20;\n    sparse_velues1[j] = 1.1f;\n  }\n  ASSERT_EQ(0, streamer->add_impl(1, sparse_dim_count, sparse_indices1.data(),\n                                  sparse_velues1.data(), qmeta1, ctx1));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n\n  auto storage2 = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage2);\n  ASSERT_EQ(0, storage2->init(stg_params));\n  ASSERT_EQ(0, storage2->open(dir_ + \"TestKnnCluenUp2\", true));\n\n  constexpr size_t static sparse_dim_count2 = 64;\n  IndexMeta meta2(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta2.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ASSERT_EQ(0, streamer->init(meta2, params));\n  ASSERT_EQ(0, streamer->open(storage2));\n  IndexQueryMeta qmeta2(IndexMeta::MetaType::MT_SPARSE,\n                        IndexMeta::DataType::DT_FP32);\n  auto ctx2 = streamer->create_context();\n\n  NumericalVector<uint32_t> sparse_indices2(sparse_dim_count2);\n  NumericalVector<float> sparse_velues2(sparse_dim_count2);\n\n  for (size_t j = 0; j < sparse_dim_count2; ++j) {\n    sparse_indices2[j] = j * 20;\n    sparse_velues2[j] = 1.1f;\n  }\n\n  ASSERT_EQ(0, streamer->add_impl(2, sparse_dim_count, sparse_indices1.data(),\n                                  sparse_velues1.data(), qmeta2, ctx2));\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n}\n\nTEST_F(HnswSparseStreamerTest, TestIndexSizeQuota) {\n  constexpr size_t static sparse_dim_count = 512;\n\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestIndexSizeQuota\", true));\n  ailego::Params params;\n\n  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);\n  meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, 2 * 1024 * 1024U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, 100 * 1024U);\n  ASSERT_EQ(0, streamer->init(meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t writeCnt1 = 850;\n  int ret = 0;\n  auto ctx = streamer->create_context();\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n\n  for (size_t i = 0; i < writeCnt1; ++i) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    int iRet = streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx);\n    if (iRet != 0) {\n      ret = iRet;\n    }\n  }\n\n  ASSERT_EQ(IndexError_IndexFull, ret);\n  ASSERT_EQ(0, streamer->close());\n  ASSERT_EQ(0, streamer->cleanup());\n}\n\nTEST_F(HnswSparseStreamerTest, TestBloomFilter) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestBloomFilter\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 100);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  ctx->set_topk(10U);\n  size_t cnt = 5000;\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                       sparse_velues.data(), qmeta, ctx);\n\n    if ((i + 1) % 10 == 0) {\n      ASSERT_EQ(0,\n                streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                      sparse_velues.data(), qmeta, ctx));\n      auto &results = ctx->result();\n      ASSERT_EQ(10, results.size());\n    }\n  }\n}\n\nTEST_F(HnswSparseStreamerTest, TestStreamerParams) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestStreamerParams\", true));\n  ailego::Params params;\n  params.set(\"proxima.hnsw.sparse_streamer.docs_hard_limit\", 5);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  auto ctx = streamer->create_context();\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t j = 0; j < sparse_dim_count; ++j) {\n    sparse_indices[j] = j * 20;\n    sparse_velues[j] = 1.1f;\n  }\n\n  ASSERT_EQ(0, streamer->add_impl(1, sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(2, sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(3, sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(4, sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx));\n  ASSERT_EQ(0, streamer->add_impl(5, sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx));\n\n  ASSERT_EQ(IndexError_IndexFull,\n            streamer->add_impl(6, sparse_dim_count, sparse_indices.data(),\n                               sparse_velues.data(), qmeta, ctx));\n}\n\nTEST_F(HnswSparseStreamerTest, TestCheckStats) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  std::string path = dir_ + \"/TestCheckStats.index\";\n  ASSERT_EQ(0, storage->open(path, true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 100);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, 512 * 1024U);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto &stats = streamer->stats();\n  ASSERT_EQ(0U, stats.revision_id());\n  ASSERT_EQ(0U, stats.loaded_count());\n  ASSERT_EQ(0U, stats.added_count());\n  ASSERT_EQ(0U, stats.discarded_count());\n  // header chunk + meta chunk\n  size_t init_size = ailego::MemoryHelper::PageSize() * 2;\n  ASSERT_EQ(init_size, stats.index_size());\n  ASSERT_EQ(0U, stats.dumped_size());\n  ASSERT_EQ(0U, stats.check_point());\n  auto createTime = stats.create_time();\n  auto updateTime = stats.update_time();\n  ASSERT_GT(createTime, 0UL);\n  ASSERT_EQ(createTime, updateTime);\n\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  size_t cnt = 3000;\n  size_t size1 = stats.index_size();\n  size_t size2 = 0;\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t i = 0; i < cnt; i++) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n\n    ASSERT_EQ(i + 1, stats.added_count());\n    if (i == 0UL) {\n      size2 = stats.index_size();\n    }\n  }\n\n  size_t size3 = stats.index_size();\n  ASSERT_GT(size2, size1);\n  ASSERT_GT(size3, size2);\n  LOG_INFO(\"size1=%zu size2=%zu size3=%zu\", size1, size2, size3);\n\n  uint64_t checkPoint = 23423UL;\n  streamer->flush(checkPoint);\n  size_t size4 = stats.index_size();\n  ASSERT_EQ(size3, size4);\n  auto stats1 = streamer->stats();\n  ASSERT_EQ(1U, stats1.revision_id());\n  ASSERT_EQ(0U, stats1.loaded_count());\n  ASSERT_EQ(cnt, stats1.added_count());\n  ASSERT_EQ(0U, stats1.discarded_count());\n  ASSERT_GT(stats1.index_size(), 0U);\n  ASSERT_EQ(0U, stats1.dumped_size());\n  ASSERT_EQ(checkPoint, stats1.check_point());\n  auto createTime1 = stats1.create_time();\n  auto updateTime1 = stats1.update_time();\n  ASSERT_GE(updateTime1, createTime1);\n  ASSERT_EQ(createTime, createTime1);\n  streamer->close();\n\n  ASSERT_EQ(0, streamer->open(storage));\n  auto &stats2 = streamer->stats();\n  ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n\n  ASSERT_EQ(0,\n            streamer->add_impl(10000UL, sparse_dim_count, sparse_indices.data(),\n                               sparse_velues.data(), qmeta, ctx));\n\n  ASSERT_EQ(2U, stats2.revision_id());\n  ASSERT_EQ(cnt, stats2.loaded_count());\n  ASSERT_EQ(1U, stats2.added_count());\n  ASSERT_EQ(0U, stats2.discarded_count());\n  ASSERT_GT(stats1.index_size(), 0);\n  ASSERT_EQ(0U, stats2.dumped_size());\n  ASSERT_EQ(checkPoint, stats2.check_point());\n  auto createTime2 = stats2.create_time();\n  auto updateTime2 = stats2.update_time();\n  ASSERT_EQ(createTime2, createTime1);\n  ASSERT_GE(updateTime2, updateTime1);\n\n  sleep(1);\n  streamer->flush(checkPoint + 1);\n\n  ASSERT_NE(0, streamer->add_impl(0U, sparse_dim_count, sparse_indices.data(),\n                                  sparse_velues.data(), qmeta, ctx));\n\n  auto &stats3 = streamer->stats();\n  ASSERT_EQ(2U, stats3.revision_id());\n  ASSERT_EQ(cnt, stats3.loaded_count());\n  ASSERT_EQ(1U, stats3.added_count());\n  ASSERT_EQ(1U, stats3.discarded_count());\n  ASSERT_EQ(stats2.index_size(), stats3.index_size());\n  ASSERT_EQ(0U, stats3.dumped_size());\n  ASSERT_EQ(checkPoint + 1, stats3.check_point());\n  auto createTime3 = stats3.create_time();\n  auto updateTime3 = stats3.update_time();\n  ASSERT_EQ(createTime3, createTime1);\n  ASSERT_GT(updateTime3, updateTime2);\n\n  auto dpath = dir_ + \"/dumpIndex\";\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  ASSERT_EQ(0, dumper->create(dpath));\n  ASSERT_EQ(0, streamer->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  size_t doc_cnt = stats3.loaded_count() + stats3.added_count();\n  struct stat st;\n  ASSERT_EQ(3001UL, doc_cnt);\n  ASSERT_EQ(0, stat(dpath.c_str(), &st));\n  ASSERT_LT(st.st_size - stats3.dumped_size(), 8192);\n\n  streamer->close();\n}\n\nTEST_F(HnswSparseStreamerTest, TestCheckDuplicateAndGetVector) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestCheckDuplicateAndGetVector\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n\n  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n  NumericalVector<float> sparse_velues(sparse_dim_count);\n\n  for (size_t i = 0; i < 1000; i++) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n\n  for (size_t i = 0; i < 1000; i += 10) {\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    ASSERT_EQ(IndexError_Duplicate,\n              streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                 sparse_velues.data(), qmeta, ctx));\n  }\n\n  // check getVector\n  auto provider = streamer->create_sparse_provider();\n  for (size_t i = 0; i < 1000; i++) {\n    uint32_t sparse_count;\n    std::string sparse_indices_buffer;\n    std::string sparse_values_buffer;\n\n    ASSERT_EQ(\n        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,\n                                       &sparse_values_buffer));\n\n    const float *sparse_values_ptr =\n        reinterpret_cast<const float *>(sparse_values_buffer.data());\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n    for (size_t j = 0; j < sparse_count; ++j) {\n      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);\n    }\n  }\n\n  streamer->flush(0UL);\n  streamer.reset();\n}\n\nclass TestDumper : public IndexDumper {\n  virtual int init(const ailego::Params &) {\n    return 0;\n  }\n  virtual int cleanup(void) {\n    return 0;\n  }\n  virtual int create(const std::string &path) {\n    return 0;\n  }\n  virtual uint32_t magic(void) const {\n    return 0;\n  }\n  virtual int close(void) {\n    return 0;\n  }\n  virtual int append(const std::string &id, size_t data_size,\n                     size_t padding_size, uint32_t crc) {\n    usleep(100000);\n    return 0;\n  }\n  virtual size_t write(const void *data, size_t len) {\n    return len;\n  }\n};\n\nTEST_F(HnswSparseStreamerTest, TestDumpIndexAndAdd) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestDumpIndexAndAdd\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  auto ctx = streamer->create_context();\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  ASSERT_NE(nullptr, ctx);\n  int code = 0;\n  std::mutex mutex;\n\n  auto addVector = [&](int a, int b) {\n    mutex.unlock();\n    for (int i = a; i < b; i++) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = (float)i;\n      }\n\n      int ret = streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                   sparse_velues.data(), qmeta, ctx);\n      if (ret != 0) {\n        code = ret;\n        ASSERT_EQ(IndexError_Unsupported, code);\n        i = i - 1;  // retry\n        usleep(10000);\n      }\n    }\n  };\n\n  mutex.lock();\n  addVector(0, 2000);\n  mutex.lock();\n  auto t2 = std::async(std::launch::async, addVector, 2000, 3000);\n  auto path1 = dir_ + \"/dumpIndex1\";\n  auto dumper1 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper1, nullptr);\n  ASSERT_EQ(0, dumper1->create(path1));\n  mutex.lock();  // sync: wait addVector start and release lock\n  auto test_dumper = std::make_shared<TestDumper>();\n  ASSERT_EQ(0, streamer->dump(test_dumper));\n  mutex.unlock();\n  ASSERT_EQ(0, streamer->dump(dumper1));\n  ASSERT_EQ(0, dumper1->close());\n  t2.get();\n  streamer->close();\n  ASSERT_EQ(IndexError_Unsupported, code);\n\n  // check dump index\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->init(ailego::Params()));\n  ASSERT_EQ(0, container->open(path1, false));\n  ASSERT_NE(searcher, nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n  auto iter = searcher->create_sparse_provider()->create_iterator();\n\n  size_t docs = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ((float)key, data[j]);\n    }\n\n    docs++;\n    iter->next();\n  }\n\n  ASSERT_GE(docs, 2000U);\n\n  // check streamer\n  ASSERT_EQ(0, streamer->open(storage));\n  iter = streamer->create_sparse_provider()->create_iterator();\n\n  docs = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ((float)key, data[j]);\n    }\n\n    docs++;\n    iter->next();\n  }\n\n  ASSERT_EQ(docs, 3000U);\n}\n\nTEST_F(HnswSparseStreamerTest, TestProvider) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_NE(nullptr, storage);\n  ailego::Params stg_params;\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"TestProvider.index\", true));\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n  auto ctx = streamer->create_context();\n  ASSERT_NE(nullptr, ctx);\n\n  //! prepare data\n  size_t docs = 10000UL;\n  srand(ailego::Realtime::MilliSeconds());\n  std::vector<key_t> keys(docs);\n  bool rand_key = rand() % 2;\n  bool rand_order = rand() % 2;\n  size_t step = rand() % 2 + 1;\n  LOG_DEBUG(\"randKey=%u randOrder=%u step=%zu\", rand_key, rand_order, step);\n  if (rand_key) {\n    std::mt19937 mt;\n    std::uniform_int_distribution<size_t> dt(\n        0, std::numeric_limits<size_t>::max());\n    for (size_t i = 0; i < docs; ++i) {\n      keys[i] = dt(mt);\n    }\n  } else {\n    std::iota(keys.begin(), keys.end(), 0U);\n    std::transform(keys.begin(), keys.end(), keys.begin(),\n                   [&](key_t k) { return step * k; });\n    if (rand_order) {\n      uint32_t seed = ailego::Realtime::Seconds();\n      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));\n    }\n  }\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < keys.size(); i++) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = keys[i];\n    }\n\n    ASSERT_EQ(\n        0, streamer->add_impl(keys[i], sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx));\n  }\n\n  auto path1 = dir_ + \"/TestProvider\";\n  auto dumper1 = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper1, nullptr);\n  ASSERT_EQ(0, dumper1->create(path1));\n  ASSERT_EQ(0, streamer->dump(dumper1));\n  ASSERT_EQ(0, dumper1->close());\n  streamer->close();\n\n  // check dump index\n  IndexSearcher::Pointer searcher =\n      IndexFactory::CreateSearcher(\"HnswSparseSearcher\");\n  auto container = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_EQ(0, container->init(ailego::Params()));\n  ASSERT_EQ(0, container->open(path1, false));\n  ASSERT_NE(searcher, nullptr);\n  ASSERT_EQ(0, searcher->init(ailego::Params()));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n  auto iter = searcher->create_sparse_provider()->create_iterator();\n  size_t cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ((float)key, data[j]);\n    }\n\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, docs);\n\n  // check streamer\n  ASSERT_EQ(0, streamer->open(storage));\n  iter = streamer->create_sparse_provider()->create_iterator();\n  cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n\n    const uint32_t sparse_count = iter->sparse_count();\n    ASSERT_EQ(sparse_count, sparse_dim_count);\n\n    const float *data = reinterpret_cast<const float *>(iter->sparse_data());\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      ASSERT_FLOAT_EQ((float)key, data[j]);\n    }\n\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, docs);\n\n  auto searcher_provider = searcher->create_sparse_provider();\n  auto streamer_provider = streamer->create_sparse_provider();\n  for (size_t i = 0; i < keys.size(); ++i) {\n    {\n      uint32_t sparse_count;\n      std::string sparse_indices_buffer;\n      std::string sparse_values_buffer;\n\n      ASSERT_EQ(0, searcher_provider->get_sparse_vector(keys[i], &sparse_count,\n                                                        &sparse_indices_buffer,\n                                                        &sparse_values_buffer));\n\n      const float *sparse_values_ptr =\n          reinterpret_cast<const float *>(sparse_values_buffer.data());\n      ASSERT_EQ(sparse_count, sparse_dim_count);\n      for (size_t j = 0; j < sparse_count; ++j) {\n        ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);\n      }\n    }\n\n    {\n      uint32_t sparse_count;\n      std::string sparse_indices_buffer;\n      std::string sparse_values_buffer;\n      ASSERT_EQ(0, streamer_provider->get_sparse_vector(keys[i], &sparse_count,\n                                                        &sparse_indices_buffer,\n                                                        &sparse_values_buffer));\n\n      const float *sparse_values_ptr =\n          reinterpret_cast<const float *>(sparse_values_buffer.data());\n      ASSERT_EQ(sparse_count, sparse_dim_count);\n      for (size_t j = 0; j < sparse_count; ++j) {\n        ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);\n      }\n    }\n  }\n\n  ASSERT_EQ(index_meta_ptr_->data_type(), streamer_provider->data_type());\n}\n\nTEST_F(HnswSparseStreamerTest, TestSharedContext) {\n  auto create_streamer = [](std::string path) {\n    IndexStreamer::Pointer streamer =\n        IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n    auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n    ailego::Params stg_params;\n    storage->init(stg_params);\n    storage->open(path, true);\n    ailego::Params params;\n    streamer->init(*index_meta_ptr_, params);\n    streamer->open(storage);\n    return streamer;\n  };\n  auto streamer1 = create_streamer(dir_ + \"TestSharedContext.index1\");\n  auto streamer2 = create_streamer(dir_ + \"TestSharedContext.index2\");\n  auto streamer3 = create_streamer(dir_ + \"TestSharedContext.index3\");\n\n  srand(ailego::Realtime::MilliSeconds());\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  auto do_test = [&](int start) {\n    auto code = rand() % 3;\n    IndexStreamer::Context::Pointer ctx;\n    switch (code) {\n      case 0:\n        ctx = streamer1->create_context();\n        break;\n      case 1:\n        ctx = streamer2->create_context();\n        break;\n      case 2:\n        ctx = streamer3->create_context();\n        break;\n    };\n    ctx->set_topk(1);\n    uint64_t key1 = start + 0;\n    uint64_t key2 = start + 1;\n    uint64_t key3 = start + 2;\n\n    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);\n    NumericalVector<float> query_sparse_velues(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      query_sparse_indices[j] = j * 20;\n      query_sparse_velues[j] = 1.1f;\n    }\n\n    for (int i = 0; i < 1000; ++i) {\n      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n      NumericalVector<float> sparse_velues(sparse_dim_count);\n\n      for (size_t j = 0; j < sparse_dim_count; ++j) {\n        sparse_indices[j] = j * 20;\n        sparse_velues[j] = rand();\n      }\n\n      int ret = 0;\n      auto code = rand() % 3;\n      switch (code) {\n        case 0:\n          streamer1->add_impl(key1, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key1 += 3;\n          ret = streamer1->search_impl(sparse_dim_count,\n                                       query_sparse_indices.data(),\n                                       query_sparse_velues.data(), qmeta, ctx);\n          break;\n        case 1:\n          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key2 += 3;\n          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key2 += 3;\n          ret = streamer2->search_impl(sparse_dim_count,\n                                       query_sparse_indices.data(),\n                                       query_sparse_velues.data(), qmeta, ctx);\n          break;\n        case 2:\n          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key3 += 3;\n          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key3 += 3;\n          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),\n                              sparse_velues.data(), qmeta, ctx);\n          key3 += 3;\n          ret = streamer3->search_impl(sparse_dim_count,\n                                       query_sparse_indices.data(),\n                                       query_sparse_velues.data(), qmeta, ctx);\n          break;\n      }\n      EXPECT_EQ(0, ret);\n      auto &results = ctx->result();\n      EXPECT_EQ(1, results.size());\n      EXPECT_EQ(code, results[0].key() % 3);\n    }\n  };\n\n  auto t1 = std::async(std::launch::async, do_test, 0);\n  auto t2 = std::async(std::launch::async, do_test, 30000000);\n  t1.wait();\n  t2.wait();\n}\n\nTEST_F(HnswSparseStreamerTest, TestBruteForceSetupInContext) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  ailego::Params params;\n  // params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0,\n            storage->open(dir_ + \"/TestBruteForceSetupInContext.index\", true));\n  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 5000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);\n    NumericalVector<float> sparse_velues(sparse_dim_count);\n\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      sparse_indices[j] = j * 20;\n      sparse_velues[j] = i;\n    }\n\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),\n                                    sparse_velues.data(), qmeta, ctx));\n  }\n\n  size_t topk = 20;\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  bool set_bf_threshold = false;\n  bool use_update = false;\n\n  size_t step = 50;\n  for (size_t i = 0; i < cnt; i += step) {\n    // for (size_t i = 0; i < cnt; i++) {\n    auto linearCtx = streamer->create_context();\n    auto knnCtx = streamer->create_context();\n\n    ASSERT_TRUE(!!linearCtx);\n    ASSERT_TRUE(!!knnCtx);\n\n    linearCtx->set_topk(topk);\n    knnCtx->set_topk(topk);\n\n    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);\n    NumericalVector<float> query_sparse_velues(sparse_dim_count);\n    for (size_t j = 0; j < sparse_dim_count; ++j) {\n      query_sparse_indices[j] = j * 20;\n      query_sparse_velues[j] = i + 0.1f;\n    }\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      if (use_update) {\n        ailego::Params streamerParamsExtra;\n\n        streamerParamsExtra.set(\n            \"proxima.hnsw.sparse_streamer.brute_force_threshold\", cnt);\n        knnCtx->update(streamerParamsExtra);\n      } else {\n        knnCtx->set_bruteforce_threshold(cnt);\n      }\n\n      use_update = !use_update;\n    }\n    ASSERT_EQ(\n        0, streamer->search_impl(sparse_dim_count, query_sparse_indices.data(),\n                                 query_sparse_velues.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, streamer->search_bf_impl(\n                     sparse_dim_count, query_sparse_indices.data(),\n                     query_sparse_velues.data(), qmeta, linearCtx));\n\n    // auto t3 = ailego::Realtime::MicroSeconds();\n\n    if (set_bf_threshold) {\n      linearTotalTime += t2 - t1;\n    } else {\n      knnTotalTime += t2 - t1;\n    }\n\n    set_bf_threshold = !set_bf_threshold;\n\n    auto &knnResult = knnCtx->result();\n    // ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += cnt - 1 == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(cnt - 1, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.90f);\n  EXPECT_GT(topk1Recall, 0.95f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseStreamerTest, TestQueryFilteringRatio) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_QUERY_FILTERING_RATIO, 0.05);\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestQueryFilteringRatio\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 20000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  auto linearCtx = streamer->create_context();\n  ASSERT_TRUE(!!linearCtx);\n\n  auto knnCtx = streamer->create_context();\n  ASSERT_TRUE(!!knnCtx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  for (size_t i = 0; i < cnt; i++) {\n    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,\n                                    sparse_indices_list[i].data(),\n                                    sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  size_t step = 100;\n  for (size_t i = 0; i < cnt; i += step) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    // std::cout << \"i: \" << i << std::endl;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * step * 1.0f / cnt;\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\nTEST_F(HnswSparseStreamerTest, TestAddAndSearchWithID) {\n  IndexStreamer::Pointer streamer =\n      IndexFactory::CreateStreamer(\"HnswSparseStreamer\");\n  ASSERT_TRUE(streamer != nullptr);\n\n  size_t sparse_dim_count = 32;\n\n  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n  index_meta.set_metric(\"InnerProductSparse\", 0, ailego::Params());\n\n  ailego::Params params;\n  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);\n  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);\n\n  ailego::Params stg_params;\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_EQ(0, storage->init(stg_params));\n  ASSERT_EQ(0, storage->open(dir_ + \"/TestGeneral\", true));\n  ASSERT_EQ(0, streamer->init(index_meta, params));\n  ASSERT_EQ(0, streamer->open(storage));\n\n  size_t cnt = 20000U;\n  auto ctx = streamer->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  auto linearCtx = streamer->create_context();\n  ASSERT_TRUE(!!linearCtx);\n\n  auto knnCtx = streamer->create_context();\n  ASSERT_TRUE(!!knnCtx);\n\n  std::vector<NumericalVector<uint32_t>> sparse_indices_list;\n  std::vector<NumericalVector<float>> sparse_vec_list;\n\n  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,\n                       sparse_vec_list, true);\n\n  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,\n                       IndexMeta::DataType::DT_FP32);\n\n  for (size_t i = 0; i < cnt; i += 4) {\n    ASSERT_EQ(0, streamer->add_with_id_impl(\n                     i, sparse_dim_count, sparse_indices_list[i].data(),\n                     sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  for (size_t i = 2; i < cnt; i += 4) {\n    ASSERT_EQ(0, streamer->add_with_id_impl(\n                     i, sparse_dim_count, sparse_indices_list[i].data(),\n                     sparse_vec_list[i].data(), qmeta, ctx));\n  }\n\n  // streamer->print_debug_info();\n  size_t topk = 200;\n  linearCtx->set_topk(topk);\n  knnCtx->set_topk(topk);\n\n  uint64_t knnTotalTime = 0;\n  uint64_t linearTotalTime = 0;\n\n  int totalHits = 0;\n  int totalCnts = 0;\n  int topk1Hits = 0;\n\n  for (size_t i = 0; i < cnt / 100; i += 2) {\n    const auto &sparse_indices = sparse_indices_list[i];\n    const auto &sparse_vec = sparse_vec_list[i];\n\n    auto t1 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, knnCtx));\n\n    auto t2 = ailego::Realtime::MicroSeconds();\n\n    ASSERT_EQ(0,\n              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),\n                                       sparse_vec.data(), qmeta, linearCtx));\n\n    auto t3 = ailego::Realtime::MicroSeconds();\n\n    knnTotalTime += t2 - t1;\n    linearTotalTime += t3 - t2;\n\n    // std::cout << \"i: \" << i << std::endl;\n\n    auto &knnResult = knnCtx->result();\n    ASSERT_EQ(topk, knnResult.size());\n    topk1Hits += i == knnResult[0].key();\n\n    auto &linearResult = linearCtx->result();\n    ASSERT_EQ(topk, linearResult.size());\n    ASSERT_EQ(i, linearResult[0].key());\n\n    for (size_t k = 0; k < topk; ++k) {\n      totalCnts++;\n      for (size_t j = 0; j < topk; ++j) {\n        if (linearResult[j].key() == knnResult[k].key()) {\n          totalHits++;\n          break;\n        }\n      }\n\n      auto func = [&](const IndexDocumentList &result) {\n        for (size_t j = 0; j < topk / 10; ++j) {\n          ASSERT_NE(result[j].key(), -1LLU);\n          ASSERT_NE(result[j].index(), -1LLU);\n          uint32_t sparse_count = 0;\n          std::string sparse_indices_buffer;\n          std::string sparse_values_buffer;\n          ASSERT_EQ(0, streamer->get_sparse_vector_by_id(\n                           result[j].index(), &sparse_count,\n                           &sparse_indices_buffer, &sparse_values_buffer));\n          ASSERT_EQ(sparse_dim_count, sparse_count);\n\n          const auto &_sparse_indices = sparse_indices_list[result[j].index()];\n          const auto &_sparse_vec = sparse_vec_list[result[j].index()];\n          std::string original_sparse_values_buffer;\n          original_sparse_values_buffer.resize(_sparse_vec.size() *\n                                               sizeof(float));\n          memcpy((char *)original_sparse_values_buffer.data(),\n                 (char *)_sparse_vec.data(),\n                 _sparse_vec.size() * sizeof(float));\n\n          ASSERT_EQ(sparse_indices_buffer, _sparse_indices);\n\n          ASSERT_EQ(sparse_values_buffer, original_sparse_values_buffer);\n        }\n      };\n\n      func(linearResult);\n      func(knnResult);\n    }\n  }\n  float recall = totalHits * 1.0f / totalCnts;\n  float topk1Recall = topk1Hits * 100.0f / (float(cnt) / 100);\n  float cost = linearTotalTime * 1.0f / knnTotalTime;\n#if 0\n    printf(\"knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d \"\n           \"R@%zd=%f R@1=%f cost=%f\\n\",\n           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,\n           topk1Recall, cost);\n#endif\n  EXPECT_GT(recall, 0.80f);\n  EXPECT_GT(topk1Recall, 0.80f);\n  // EXPECT_GT(cost, 2.0f);\n}\n\n}  // namespace core\n}  // namespace zvec\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/algorithm/ivf/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_cluster core_knn_flat core_knn_ivf\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm/ivf\n    )\nendforeach()"
  },
  {
    "path": "tests/core/algorithm/ivf/ivf_builder_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_builder.h\"\n#include <future>\n#include <iostream>\n#include <vector>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\nclass IVFBuilderTest : public testing::Test {\n protected:\n  void SetUp();\n  void TearDown();\n\n  void prepare_index_holder(uint32_t base_key, uint32_t num);\n\n  IndexMeta index_meta_;\n  Params params_;\n  uint32_t dimension_;\n  IndexHolder::Pointer holder_;\n  IndexThreads::Pointer threads_{};\n};\n\nvoid IVFBuilderTest::SetUp() {\n  dimension_ = 8U;\n\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n\n  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"8\");\n  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n  std::mt19937 gen((std::random_device())());\n  bool v = std::uniform_int_distribution<size_t>(0, 1)(gen);\n  if (v) {\n    threads_ = std::make_shared<SingleQueueIndexThreads>();\n  }\n}\n\nvoid IVFBuilderTest::TearDown() {}\n\nvoid IVFBuilderTest::prepare_index_holder(uint32_t base_key, uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = 1.0f * i;\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  holder_.reset(holder);\n}\n\nTEST_F(IVFBuilderTest, TestInitSuccess) {\n  IVFBuilder builder;\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFBuilderTest, TestInitFailedWithInvalidMetric) {\n  IVFBuilder builder;\n  index_meta_.set_metric(\"invalid\", 0, Params());\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(IndexError_NoExist, ret);\n}\n\nTEST_F(IVFBuilderTest, TestInitFailedWithInvalidCentroidsNum) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"2\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(IndexError_InvalidArgument, ret);\n}\n\nTEST_F(IVFBuilderTest, TestTrainWithHolder1Level) {\n  IVFBuilder builder;\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n\n  prepare_index_holder(0, 1000);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  auto centroid_index = builder.centroid_index();\n  EXPECT_GT(centroid_index->centroids_count(), 0u);\n}\n\nTEST_F(IVFBuilderTest, TestTrainWithHolder2Level) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4*2\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n\n  prepare_index_holder(0, 1000);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  auto centroid_index = builder.centroid_index();\n  EXPECT_EQ(centroid_index->centroids_count(), 8);\n}\n\nTEST_F(IVFBuilderTest, TestTrainWithTrainer2Level) {\n  IndexTrainer::Pointer trainer =\n      IndexFactory::CreateTrainer(\"StratifiedClusterTrainer\");\n  ASSERT_TRUE(!!trainer);\n\n  prepare_index_holder(0, 1000);\n\n  Params params;\n  params.set(\"proxima.stratified.trainer.cluster_count\", \"4*2\");\n  ASSERT_EQ(0, trainer->init(index_meta_, params));\n  ASSERT_EQ(0, trainer->train(threads_, holder_));\n\n  IVFBuilder builder;\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n\n\n  ret = builder.train(trainer);\n  EXPECT_EQ(0, ret);\n\n  auto centroid_index = builder.centroid_index();\n  EXPECT_EQ(centroid_index->centroids_count(), 8);\n}\n\nTEST_F(IVFBuilderTest, TestTrainWithTrainer1Level) {\n  IVFBuilder builder;\n\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n\n  IndexTrainer::Pointer trainer =\n      IndexFactory::CreateTrainer(\"StratifiedClusterTrainer\");\n  ASSERT_TRUE(!!trainer);\n\n  prepare_index_holder(0, 1000);\n\n  Params params1;\n  params1.set(\"proxima.stratified.trainer.cluster_count\", \"4\");\n  ASSERT_EQ(0, trainer->init(index_meta_, params1));\n  ASSERT_EQ(0, trainer->train(threads_, holder_));\n\n  ret = builder.train(trainer);\n  EXPECT_EQ(0, ret);\n\n  auto centroid_index = builder.centroid_index();\n  EXPECT_EQ(centroid_index->centroids_count(), 4);\n}\n\nTEST_F(IVFBuilderTest, TestBuildWith2Level) {\n  IVFBuilder builder;\n\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4*2\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n\n  prepare_index_holder(0, 1000);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n}\n\nTEST_F(IVFBuilderTest, TestBuildWith1Level) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n\n  prepare_index_holder(0, 1000);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n}\n\nTEST_F(IVFBuilderTest, TestDump) {\n  IVFBuilder builder;\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n\n  prepare_index_holder(0, 1000);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  ret = dumper->create(\"path\");\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n}\n\n#if 0\nTEST_F(IVFBuilderTest, TestBuildWithNoEnoughMemory)\n{\n    IVFBuilder builder;\n    Params params;\n    params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4*2\");\n    params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n\n    dimension_ = 256;\n    index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n\n    int ret = builder.init(index_meta_, params);\n    EXPECT_EQ(0, ret);\n\n    prepare_index_holder(0, 1000);\n\n    ret = builder.train(threads_, holder_);\n    EXPECT_EQ(0, ret);\n\n    ret = builder.build(threads_, holder_);\n    EXPECT_EQ(IndexError_IndexFull, ret);\n}\n#endif\n\nTEST_F(IVFBuilderTest, TestBuildWithEnoughMemory) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4*2\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n\n  prepare_index_holder(0, 1000);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  ret = dumper->create(\"path\");\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n}\n\n#if 0\nTEST_F(IVFBuilderTest, TestBuildWithRowMajorAndNoEnoughMemory)\n{\n    IVFBuilder builder;\n    Params params;\n    params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4*2\");\n    params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n\n    dimension_ = 256;\n    index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n    index_meta_.set_major_order(IndexMeta::MajorOrder::MO_ROW);\n\n    int ret = builder.init(index_meta_, params);\n    EXPECT_EQ(0, ret);\n\n    prepare_index_holder(0, 1000);\n\n    ret = builder.train(threads_, holder_);\n    EXPECT_EQ(0, ret);\n\n    ret = builder.build(threads_, holder_);\n    EXPECT_EQ(IndexError_IndexFull, ret);\n}\n#endif\n\nTEST_F(IVFBuilderTest, TestBuildWithRowMajorAndMemory) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4*2\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_major_order(IndexMeta::MajorOrder::MO_ROW);\n\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n\n  prepare_index_holder(0, 1000);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  ret = dumper->create(\"path\");\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n}\n\nTEST_F(IVFBuilderTest, TestBuildWithEmptyCentroid) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"2*2\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_major_order(IndexMeta::MajorOrder::MO_ROW);\n\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n  size_t doc_cnt = 10;\n\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = 1.0f;\n    }\n    holder->emplace(i, vec);\n  }\n  holder_.reset(holder);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  ret = dumper->create(\"path\");\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)10, builder.stats().built_count());\n  EXPECT_EQ((size_t)10, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n}\n\nTEST_F(IVFBuilderTest, TestTrainClusterParams) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"2*2\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n  prepare_index_holder(0, 1000);\n  EXPECT_EQ(0, builder.init(index_meta_, params));\n  EXPECT_EQ(0, builder.train(threads_, holder_));\n  EXPECT_EQ(0, builder.build(threads_, holder_));\n\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  EXPECT_EQ(0, dumper->create(\"test.index\"));\n  EXPECT_EQ(0, builder.dump(dumper));\n}\n\nTEST_F(IVFBuilderTest, TestIndexThreads) {\n  IndexBuilder::Pointer builder1 = IndexFactory::CreateBuilder(\"IVFBuilder\");\n  ASSERT_NE(builder1, nullptr);\n  IndexBuilder::Pointer builder2 = IndexFactory::CreateBuilder(\"IVFBuilder\");\n  ASSERT_NE(builder2, nullptr);\n\n  size_t dim = 128UL;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  std::srand(Realtime::MilliSeconds());\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"2*2\");\n  ASSERT_EQ(0, builder1->init(meta, params));\n  ASSERT_EQ(0, builder2->init(meta, params));\n\n  auto threads =\n      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);\n  auto build_index1 = [&]() {\n    ASSERT_EQ(0, builder1->train(threads, holder));\n    ASSERT_EQ(0, builder1->build(threads, holder));\n  };\n  auto build_index2 = [&]() {\n    ASSERT_EQ(0, builder2->train(threads, holder));\n    ASSERT_EQ(0, builder2->build(threads, holder));\n  };\n\n  auto t1 = std::async(std::launch::async, build_index1);\n  auto t2 = std::async(std::launch::async, build_index2);\n  t1.wait();\n  t2.wait();\n\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n\n  std::string path = \"./hc_index\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder1->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder2->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  auto &stats1 = builder1->stats();\n  ASSERT_EQ(doc_cnt, stats1.built_count());\n  auto &stats2 = builder2->stats();\n  ASSERT_EQ(doc_cnt, stats2.built_count());\n}"
  },
  {
    "path": "tests/core/algorithm/ivf/ivf_searcher_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include \"ivf_searcher.h\"\n#include <future>\n#include <iostream>\n#include <vector>\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_framework.h\"\n#include \"ivf_builder.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec::core;\nusing namespace zvec::ailego;\nusing namespace std;\n\nclass IVFSearcherTest : public testing::Test {\n public:\n protected:\n  void SetUp();\n  void TearDown();\n  void prepare_index_holder(uint32_t base_key, uint32_t num);\n\n  void prepare_rand_index_holder(uint32_t base_key, uint32_t num);\n\n  void prepare_fp16_index_holder(uint32_t base_key, uint32_t num);\n\n  void prepare_fp32_index_holder(uint32_t base_key, uint32_t num);\n\n  void prepare_binary_index_holder(uint32_t base_key, uint32_t num);\n\n  void prepare_int8_index_holder(uint32_t base_key, uint32_t num);\n\n  void prepare_same_index_holder(uint32_t base_key, uint32_t num);\n\n  IndexMeta index_meta_;\n  Params params_;\n  uint32_t dimension_;\n  IndexHolder::Pointer holder_;\n  std::string index_path_;\n  IndexThreads::Pointer threads_{};\n};\n\nvoid IVFSearcherTest::SetUp() {\n  dimension_ = 8U;\n\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n\n  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"4*2\");\n  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n  index_path_ = \"./ivf_searcher.index\";\n  std::mt19937 gen((std::random_device())());\n  bool v = std::uniform_int_distribution<size_t>(0, 1)(gen);\n  if (v) {\n    threads_ = std::make_shared<SingleQueueIndexThreads>();\n  }\n}\n\nvoid IVFSearcherTest::TearDown() {\n  File::RemovePath(index_path_);\n}\n\nvoid IVFSearcherTest::prepare_index_holder(uint32_t base_key, uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = 1.0f * i;\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  holder_.reset(holder);\n}\n\nvoid IVFSearcherTest::prepare_rand_index_holder(uint32_t base_key,\n                                                uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = std::rand() % 1000 * 1.0;\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  holder_.reset(holder);\n}\n\nvoid IVFSearcherTest::prepare_fp32_index_holder(uint32_t base_key,\n                                                uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = 0.01f * i;\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  holder_.reset(holder);\n}\n\nvoid IVFSearcherTest::prepare_fp16_index_holder(uint32_t base_key,\n                                                uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = 0.01f * i;\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  IndexConverter::Pointer conveter =\n      IndexFactory::CreateConverter(\"HalfFloatConverter\");\n  conveter->init(index_meta_, Params());\n  IndexHolder::Pointer new_holder(holder);\n  conveter->transform(new_holder);\n  holder_ = conveter->result();\n}\n\nvoid IVFSearcherTest::prepare_int8_index_holder(uint32_t base_key,\n                                                uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_INT8> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_INT8>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    NumericalVector<int8_t> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = (int8_t)(i % 128);\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  holder_.reset(holder);\n}\n\nvoid IVFSearcherTest::prepare_binary_index_holder(uint32_t base_key,\n                                                  uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY32>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    BinaryVector<uint32_t> vec(dimension_);\n    for (size_t j = 0; j < dimension_ && j < i; ++j) {\n      vec.set(j);\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  holder_.reset(holder);\n}\n\nvoid IVFSearcherTest::prepare_same_index_holder(uint32_t base_key,\n                                                uint32_t num) {\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  uint32_t key = base_key;\n  for (size_t i = 0; i < num; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = 8;\n    }\n    holder->emplace(key + i, vec);\n  }\n\n  holder_.reset(holder);\n}\n\nTEST_F(IVFSearcherTest, TestInit) {\n  IVFSearcher searcher;\n  int ret = searcher.init(params_);\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestSimple) {\n  IVFBuilder builder;\n  //    index_meta_.set_major_order(IndexMeta::MO_ROW);\n  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"1\");\n  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  prepare_index_holder(0, 33);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)33, builder.stats().built_count());\n  EXPECT_EQ((size_t)33, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(32.0f);\n  }\n\n  size_t qnum = 33;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 33;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      ASSERT_EQ((uint64_t)32 - i, result[i].key());\n      ASSERT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 33;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)32 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestSimpleCosine) {\n  IVFBuilder builder;\n  //    index_meta_.set_major_order(IndexMeta::MO_ROW);\n  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"1\");\n  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n\n  Params converter_params;\n  auto converter = IndexFactory::CreateConverter(\"CosineNormalizeConverter\");\n  ASSERT_TRUE(converter != nullptr);\n  auto original_index_meta = index_meta_;\n  original_index_meta.set_metric(\"Cosine\", 0, Params());\n  EXPECT_EQ(0, converter->init(original_index_meta, converter_params));\n  IndexMeta index_meta = converter->meta();\n  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());\n  ASSERT_TRUE(reformer != nullptr);\n  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));\n\n  int ret = builder.init(index_meta, params_);\n  EXPECT_EQ(0, ret);\n  prepare_index_holder(0, 33);\n  converter->transform(holder_);\n  auto holder = converter->result();\n\n  EXPECT_EQ(0, builder.train(threads_, holder));\n  EXPECT_EQ(0, builder.build(threads_, holder));\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  EXPECT_EQ(0, dumper->create(index_path_));\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)33, builder.stats().built_count());\n  EXPECT_EQ((size_t)33, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(32.0f + i);\n  }\n\n  size_t qnum = 33;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf search\n  {\n    size_t topk = 33;\n    context->set_topk(topk);\n    \n    std::string new_vec;\n    IndexQueryMeta new_meta;\n    ASSERT_EQ(0, reformer->convert(query.data(), qmeta, &new_vec, &new_meta));\n\n    ret = searcher.search_bf_impl(new_vec.data(), new_meta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < 1; ++i) {\n      // ASSERT_EQ(29, result[i].key());\n      EXPECT_NEAR(0, result[i].score(), 1e-2);\n    }\n  }\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWithBuildMemory) {\n  IVFBuilder builder;\n  //    index_meta_.set_major_order(IndexMeta::MO_ROW);\n  //    params_.set(\"proxima.hc.builder.thread_count\", 1);\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  int total = 1000;\n  prepare_index_holder(0, total);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)total, builder.stats().built_count());\n  EXPECT_EQ((size_t)total, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back((total - 1) * 1.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = (size_t)total;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      ASSERT_EQ((uint64_t)(total - 1) - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWithFilter) {\n  IVFBuilder builder;\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  prepare_index_holder(0, 1000);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(999.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n  context->set_filter([](uint64_t key) {\n    if (key > 0) {\n      return true;\n    }\n    return false;\n  });\n  // single bf serch\n  {\n    size_t topk = 1000;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)1, result.size());\n    for (size_t i = 0; i < 1; ++i) {\n      EXPECT_EQ((uint64_t)0, result[i].key());\n      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)1, result.size());\n      EXPECT_EQ((uint64_t)0, result[0].key());\n      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)1, result.size());\n    for (size_t i = 0; i < 1; ++i) {\n      EXPECT_EQ((uint64_t)0, result[i].key());\n      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)1, result.size());\n      EXPECT_EQ((uint64_t)0, result[0].key());\n      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\n///////////////////////////  row major ////////////////////////////////\nTEST_F(IVFSearcherTest, TestRowMajorFloatWithBuildMemory) {\n  index_meta_.set_major_order(IndexMeta::MO_ROW);\n  IVFBuilder builder;\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  prepare_index_holder(0, 1000);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(999.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 1000;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestRowMajorFloatWithFilter) {\n  index_meta_.set_major_order(IndexMeta::MO_ROW);\n  IVFBuilder builder;\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  prepare_index_holder(0, 1000);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(999.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n  context->set_filter([](uint64_t key) {\n    if (key > 0) {\n      return true;\n    }\n    return false;\n  });\n  // single bf serch\n  {\n    size_t topk = 1000;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)1, result.size());\n    for (size_t i = 0; i < 1; ++i) {\n      EXPECT_EQ((uint64_t)0, result[i].key());\n      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)1, result.size());\n      EXPECT_EQ((uint64_t)0, result[0].key());\n      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)1, result.size());\n    for (size_t i = 0; i < 1; ++i) {\n      EXPECT_EQ((uint64_t)0, result[i].key());\n      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)1, result.size());\n      EXPECT_EQ((uint64_t)0, result[0].key());\n      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestRowMajorFloatWith1LevelAndBuildMemory) {\n  IVFBuilder builder;\n  Params build_params;\n  build_params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"10\");\n  build_params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_major_order(IndexMeta::MO_ROW);\n\n  int ret = builder.init(index_meta_, build_params);\n  EXPECT_EQ(0, ret);\n  prepare_index_holder(0, 1000);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(999.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 3;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWith1LevelAndBuildMemory) {\n  IVFBuilder builder;\n  Params build_params;\n  build_params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"10\");\n  build_params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_major_order(IndexMeta::MO_COLUMN);\n\n  int ret = builder.init(index_meta_, build_params);\n  EXPECT_EQ(0, ret);\n  prepare_index_holder(0, 1000);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(999.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 3;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorInt8WithBuildMemory) {\n  IVFBuilder builder;\n  dimension_ = 12;\n  index_meta_.set_meta(IndexMeta::DataType::DT_INT8, dimension_);\n  index_meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  index_meta_.set_major_order(IndexMeta::MO_COLUMN);\n\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  size_t fnum = 128;\n  prepare_int8_index_holder(0, fnum);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)fnum, builder.stats().built_count());\n  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<int8_t> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(127);\n  }\n\n  size_t qnum = 63;\n  std::vector<int8_t> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 128;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)127 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)127 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestRowMajorInt8WithBuildMemory) {\n  IVFBuilder builder;\n  dimension_ = 12;\n  index_meta_.set_meta(IndexMeta::DataType::DT_INT8, dimension_);\n  index_meta_.set_metric(\"SquaredEuclidean\", 0, Params());\n  index_meta_.set_major_order(IndexMeta::MO_ROW);\n\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  size_t fnum = 128;\n  prepare_int8_index_holder(0, fnum);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)fnum, builder.stats().built_count());\n  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<int8_t> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(127);\n  }\n\n  size_t qnum = 63;\n  std::vector<int8_t> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 128;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)127 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)127 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorBinaryWithBuildMemory) {\n  IVFBuilder builder;\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_BINARY32, dimension_);\n  index_meta_.set_metric(\"Hamming\", 0, Params());\n  index_meta_.set_major_order(IndexMeta::MO_COLUMN);\n\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  size_t fnum = 257;\n  prepare_binary_index_holder(0, fnum);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)fnum, builder.stats().built_count());\n  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  BinaryVector<uint32_t> query(dimension_);\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.set(i);\n  }\n\n  size_t qnum = 63;\n  BinaryVector<uint32_t> query1(dimension_ * qnum);\n  for (size_t i = 0; i < qnum; ++i) {\n    for (size_t j = 0; j < dimension_ && j < i; ++j) {\n      query1.set(i * dimension_ + j);\n    }\n  }\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 128;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)256 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)256 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestRowMajorBinaryWithBuildMemory) {\n  IVFBuilder builder;\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_BINARY32, dimension_);\n  index_meta_.set_metric(\"Hamming\", 0, Params());\n  index_meta_.set_major_order(IndexMeta::MO_ROW);\n\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  size_t fnum = 257;\n  prepare_binary_index_holder(0, fnum);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, dumper->close());\n  EXPECT_EQ((size_t)fnum, builder.stats().built_count());\n  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  BinaryVector<uint32_t> query(dimension_);\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.set(i);\n  }\n\n  size_t qnum = 63;\n  BinaryVector<uint32_t> query1(dimension_ * qnum);\n  for (size_t i = 0; i < qnum; ++i) {\n    for (size_t j = 0; j < dimension_ && j < i; ++j) {\n      query1.set(i * dimension_ + j);\n    }\n  }\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 128;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)256 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)256 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestSearchWithEmptyCentroid) {\n  IVFBuilder builder;\n  Params params;\n  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"3*3\");\n  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster*KmeansCluster\");\n\n  dimension_ = 256;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_major_order(IndexMeta::MO_ROW);\n\n  int ret = builder.init(index_meta_, params);\n  EXPECT_EQ(0, ret);\n  size_t doc_cnt = 10;\n\n  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =\n      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = i % 5;\n    }\n    holder->emplace(i, vec);\n  }\n  holder_.reset(holder);\n\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n\n  std::string path = \"searcher_empty_centroid.index\";\n  ret = dumper->create(path);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ(0, ret);\n  EXPECT_EQ((size_t)10, builder.stats().built_count());\n  EXPECT_EQ((size_t)10, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  dumper->close();\n\n  IVFSearcher searcher;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(path, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(999.0f);\n  }\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    auto key1 = result[0].key();\n    EXPECT_TRUE(key1 == 4ul || key1 == 9ul);\n  }\n\n  // single knn search\n  {\n    size_t topk = 3;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    auto key1 = result[0].key();\n    auto key2 = result[1].key();\n    auto key3 = result[2].key();\n    EXPECT_TRUE(key1 == 4ul || key1 == 9ul);\n    EXPECT_TRUE(key2 == 4ul || key2 == 9ul);\n    EXPECT_TRUE(key3 == 3ul || key3 == 8ul);\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFp16WithBuildMemory) {\n  const float epsilon = 1e-2;\n  dimension_ = 8;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_major_order(IndexMeta::MO_COLUMN);\n\n  prepare_fp16_index_holder(0, 1000);\n  IVFBuilder builder;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP16, dimension_);\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(-0.1f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_ * 0.01);\n  }\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta1(IndexMeta::DataType::DT_FP32, dimension_);\n\n  std::string query_buf;\n  query_buf.resize(dimension_ * sizeof(uint16_t));\n  std::string query1_buf;\n  query1_buf.resize(dimension_ * sizeof(uint16_t) * qnum);\n\n  IndexReformer::Pointer reformer =\n      IndexFactory::CreateReformer(\"HalfFloatReformer\");\n  IndexQueryMeta qmeta;\n  reformer->transform(query.data(), qmeta1, &query_buf, &qmeta);\n  reformer->transform(query1.data(), qmeta1, qnum, &query1_buf, &qmeta);\n  // single bf serch\n  {\n    size_t topk = 1000;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query_buf.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1_buf.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query_buf.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1_buf.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestRowMajorFp16WithBuildMemory) {\n  const float epsilon = 1e-2;\n  dimension_ = 8;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);\n  index_meta_.set_major_order(IndexMeta::MO_ROW);\n\n  prepare_fp16_index_holder(0, 1000);\n  IVFBuilder builder;\n  index_meta_.set_meta(IndexMeta::DataType::DT_FP16, dimension_);\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)1000, builder.stats().built_count());\n  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(-0.1f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_ * 0.01);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta1(IndexMeta::DataType::DT_FP32, dimension_);\n\n  std::string query_buf;\n  query_buf.resize(dimension_ * sizeof(uint16_t));\n  std::string query1_buf;\n  query1_buf.resize(dimension_ * sizeof(uint16_t) * qnum);\n\n  IndexReformer::Pointer reformer =\n      IndexFactory::CreateReformer(\"HalfFloatReformer\");\n  IndexQueryMeta qmeta;\n  reformer->transform(query.data(), qmeta1, &query_buf, &qmeta);\n  reformer->transform(query1.data(), qmeta1, qnum, &query1_buf, &qmeta);\n  // single bf serch\n  {\n    size_t topk = 1000;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query_buf.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1_buf.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query_buf.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1_buf.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWithHnswGraphType) {\n  IVFBuilder builder;\n  params_.set(\"proxima.ivf.builder.graph_type\", \"hnsw\");\n  params_.set(\"proxima.ivf.builder.graph_ef\", 200);\n  params_.set(\"proxima.ivf.builder.graph_scan_ratio\", 1.0);\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  int total = 1000;\n  prepare_index_holder(0, total);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)total, builder.stats().built_count());\n  EXPECT_EQ((size_t)total, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back((total - 1) * 1.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = (size_t)total;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWithSsgGraphType) {\n  IVFBuilder builder;\n  params_.set(\"proxima.ivf.builder.graph_type\", \"ssg\");\n  params_.set(\"proxima.ivf.builder.graph_ef\", 200);\n  params_.set(\"proxima.ivf.builder.graph_scan_ratio\", 1.0);\n\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  int total = 1000;\n  prepare_index_holder(0, total);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)total, builder.stats().built_count());\n  EXPECT_EQ((size_t)total, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back((total - 1) * 1.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = (size_t)total;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWithInt8Converter) {\n  IVFBuilder builder;\n  auto build_params = params_;\n  build_params.set(PARAM_IVF_BUILDER_CONVERTER_CLASS, \"Int8QuantizerConverter\");\n  int ret = builder.init(index_meta_, build_params);\n  EXPECT_EQ(0, ret);\n  int total = 1000;\n  prepare_index_holder(0, total);\n  ret = builder.train(threads_, holder_);\n  ASSERT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)total, builder.stats().built_count());\n  EXPECT_EQ((size_t)total, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back((total - 1) * 1.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = (size_t)total;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)999 - i, result[i].key());\n      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWithFloat16Quantizer) {\n  const float epsilon = 1e-2;\n\n  IVFBuilder builder;\n  auto build_params = params_;\n  build_params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, \"HalfFloatConverter\");\n  int ret = builder.init(index_meta_, build_params);\n  EXPECT_EQ(0, ret);\n  int total = 1000;\n  prepare_fp32_index_holder(0, total);\n  ret = builder.train(threads_, holder_);\n  ASSERT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)total, builder.stats().built_count());\n  EXPECT_EQ((size_t)total, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(-0.1f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_ * 0.01);\n  }\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = (size_t)total;\n    context->set_topk(topk);\n    context->set_filter([](uint64_t) { return false; });\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestColumnMajorFloatWithConverterAndQuantizer) {\n  const float epsilon = 1e-2;\n  IVFBuilder builder;\n  auto build_params = params_;\n  build_params.set(PARAM_IVF_BUILDER_CONVERTER_CLASS, \"Int8QuantizerConverter\");\n  build_params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, \"HalfFloatConverter\");\n  int ret = builder.init(index_meta_, build_params);\n  EXPECT_EQ(0, ret);\n  int total = 1000;\n  prepare_fp32_index_holder(0, total);\n  ret = builder.train(threads_, holder_);\n  ASSERT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)total, builder.stats().built_count());\n  EXPECT_EQ((size_t)total, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(-0.1f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_ * 0.01);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = (size_t)total;\n    context->set_topk(topk);\n    context->set_filter([](uint64_t) { return false; });\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      ASSERT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 100;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_EQ((uint64_t)i, result[i].key());\n      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /\n                      result[i].score(),\n                  1, epsilon);\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      EXPECT_EQ((uint64_t)q, result[0].key());\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestQuantizedPerCentroid) {\n  IVFBuilder builder;\n  auto build_params = params_;\n  auto meta = index_meta_;\n  meta.set_metric(\"InnerProduct\", 0, Params());\n  build_params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, \"Int8QuantizerConverter\");\n  build_params.set(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID, true);\n  int ret = builder.init(meta, build_params);\n  EXPECT_EQ(0, ret);\n  int total = 1000;\n  prepare_index_holder(0, total);\n  ret = builder.train(threads_, holder_);\n  ASSERT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)total, builder.stats().built_count());\n  EXPECT_EQ((size_t)total, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(500.0f);\n  }\n\n  size_t qnum = 63;\n  std::vector<float> query1;\n  for (size_t i = 1; i <= dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = (size_t)total;\n    context->set_topk(topk);\n    context->set_filter([](uint64_t) { return false; });\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      ASSERT_NEAR((uint64_t)(total - 1) - i, result[i].key(), 150);\n      float expect = (float)result[i].key() * 500.0f * dimension_;\n      ASSERT_NEAR(expect, std::abs(result[i].score()), expect * 0.2 + 500000);\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      ASSERT_NEAR((uint64_t)(total - 1) - q, result[0].key(), 100);\n      // EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 10;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_NEAR((uint64_t)total - i - 1, result[i].key(), 100);\n      // EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_EQ((size_t)topk, result.size());\n      ASSERT_NEAR((uint64_t)(total - 1) - q, result[0].key(), 100);\n      // EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\nTEST_F(IVFSearcherTest, TestSharedContext) {\n  size_t dim = dimension_;\n  auto gen_holder = [&](int start, size_t doc_cnt) {\n    auto holder =\n        make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n    uint64_t key = start;\n    for (size_t i = 0; i < doc_cnt; i++) {\n      NumericalVector<float> vec(dim);\n      for (size_t j = 0; j < dim; ++j) {\n        vec[j] = i;\n      }\n      key += 3;\n      holder->emplace(key, vec);\n    }\n    return holder;\n  };\n  auto gen_index = [&](int start, size_t docs, std::string path) {\n    auto holder = gen_holder(start, docs);\n    IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"IVFBuilder\");\n    Params params;\n    params.set(\"proxima.ivf.builder.centroid_count\", \"16\");\n    builder->init(index_meta_, params);\n    builder->train(holder);\n    builder->build(holder);\n    auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n    dumper->create(path);\n    builder->dump(dumper);\n    dumper->close();\n\n    IndexSearcher::Pointer searcher =\n        IndexFactory::CreateSearcher(\"IVFSearcher\");\n    auto name = rand() % 2 ? \"FileReadStorage\" : \"MMapFileReadStorage\";\n    auto container = IndexFactory::CreateStorage(name);\n    bool alone_file_handle = std::rand() % 2;\n    bool lock_hot = std::rand() % 2;\n    params.set(\"proxima.file.read_storage.alone_file_handle\",\n               alone_file_handle);\n    params.set(\"proxima.file.read_storage.lock_hot_in_memory\", lock_hot);\n    container->init(params);\n    container->open(path, false);\n    searcher->init(Params());\n    searcher->load(container, IndexMetric::Pointer());\n    return searcher;\n  };\n\n  srand(Realtime::MilliSeconds());\n  size_t docs1 = rand() % 500 + 100;\n  size_t docs2 = rand() % 5000 + 100;\n  size_t docs3 = rand() % 50000 + 100;\n  auto path1 = \"unittest-index/TestSharedContext.index1\";\n  auto path2 = \"unittest-index/TestSharedContext.index2\";\n  auto path3 = \"unittest-index/TestSharedContext.index3\";\n  auto searcher1 = gen_index(0, docs1, path1);\n  auto searcher2 = gen_index(1, docs2, path2);\n  auto searcher3 = gen_index(2, docs3, path3);\n\n  srand(Realtime::MilliSeconds());\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  auto do_test = [&]() {\n    IndexSearcher::Context::Pointer ctx;\n    switch (rand() % 3) {\n      case 0:\n        ctx = searcher1->create_context();\n        if (rand() % 2 == 0) {\n          ctx->set_filter([](uint64_t) { return false; });\n        }\n        break;\n      case 1:\n        ctx = searcher2->create_context();\n        if (rand() % 2 == 0) {\n          ctx->set_filter([](uint64_t) { return false; });\n        }\n        break;\n      case 2:\n        ctx = searcher3->create_context();\n        if (rand() % 2 == 0) {\n          ctx->set_filter([](uint64_t) { return false; });\n        }\n        break;\n    }\n    ctx->set_topk(10);\n\n    int ret = 0;\n    for (int i = 0; i < 100; ++i) {\n      NumericalVector<float> query(dim);\n      for (size_t j = 0; j < dim; ++j) {\n        query[j] = i + 0.1f;\n      }\n\n      auto code = rand() % 6;\n      switch (code) {\n        case 0:\n          ret = searcher1->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 1:\n          ret = searcher2->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 2:\n          ret = searcher3->search_impl(query.data(), qmeta, ctx);\n          break;\n        case 3:\n          ret = searcher1->search_bf_impl(query.data(), qmeta, ctx);\n          break;\n        case 4:\n          ret = searcher2->search_bf_impl(query.data(), qmeta, ctx);\n          break;\n        case 5:\n          ret = searcher3->search_bf_impl(query.data(), qmeta, ctx);\n          break;\n      }\n\n      ASSERT_EQ(0, ret);\n      auto &results = ctx->result();\n      EXPECT_EQ(10, results.size());\n      for (int k = 0; k < 10; ++k) {\n        EXPECT_EQ(code % 3, results[k].key() % 3);\n      }\n    }\n  };\n  auto t1 = std::async(std::launch::async, do_test);\n  auto t2 = std::async(std::launch::async, do_test);\n  t1.wait();\n  t2.wait();\n}\n\nTEST_F(IVFSearcherTest, TestRnnSearch) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"IVFBuilder\");\n  ASSERT_NE(builder, nullptr);\n  size_t dim = 16;\n  auto holder =\n      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  size_t doc_cnt = 1000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  Params params;\n  params.set(\"proxima.ivf.builder.centroid_count\", \"20\");\n  ASSERT_EQ(0, builder->init(meta, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = \"IVFSearcherTest.TestRnnSearch\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n  ASSERT_EQ(0, builder->cleanup());\n\n  // test searcher\n  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher(\"IVFSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  ASSERT_EQ(0, searcher->init(Params()));\n\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->open(path, false));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 0.0;\n  }\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 50;\n  float radius = 1000.0f;\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  ASSERT_EQ(topk, results.size());\n\n  ctx->set_threshold(radius);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  EXPECT_GT(topk, results.size());\n  for (size_t k = 0; k < results.size(); ++k) {\n    ASSERT_GE(radius, results[k].score());\n  }\n  File::RemovePath(path);\n}\n\nTEST_F(IVFSearcherTest, TestProvider) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"IVFBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n      dimension_);\n  size_t doc_cnt = 5000UL;\n  std::vector<uint64_t> keys(doc_cnt);\n  srand(Realtime::MilliSeconds());\n  bool rand_key = rand() % 2;\n  bool rand_order = rand() % 2;\n  size_t step = rand() % 2 + 1;\n  LOG_DEBUG(\"randKey=%u randOrder=%u step=%zu\", rand_key, rand_order, step);\n  if (rand_key) {\n    std::mt19937 mt;\n    std::uniform_int_distribution<size_t> dt(\n        0, std::numeric_limits<size_t>::max());\n    for (size_t i = 0; i < doc_cnt; ++i) {\n      keys[i] = dt(mt);\n    }\n  } else {\n    std::iota(keys.begin(), keys.end(), 0U);\n    std::transform(keys.begin(), keys.end(), keys.begin(),\n                   [&](key_t k) { return step * k; });\n    if (rand_order) {\n      uint32_t seed = Realtime::Seconds();\n      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));\n    }\n  }\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = keys[i];\n    }\n    ASSERT_TRUE(holder->emplace(keys[i], vec));\n  }\n  Params params;\n  params.set(\"proxima.ivf.builder.centroid_count\", \"20\");\n  ASSERT_EQ(0, builder->init(index_meta_, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = index_path_ + \"/TestProvider\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher(\"IVFSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  Params searcherParams;\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->open(path, false));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n\n  auto provider = searcher->create_provider();\n  ASSERT_EQ(IndexMeta::DataType::DT_FP32, provider->data_type());\n  for (size_t i = 0; i < keys.size(); ++i) {\n    const float *d1 =\n        reinterpret_cast<const float *>(provider->get_vector(keys[i]));\n    ASSERT_TRUE(d1);\n    for (size_t j = 0; j < dimension_; ++j) {\n      ASSERT_FLOAT_EQ(d1[j], keys[i]);\n    }\n  }\n\n  auto iter = provider->create_iterator();\n  size_t cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const float *d = reinterpret_cast<const float *>(iter->data());\n    for (size_t j = 0; j < dimension_; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, doc_cnt);\n\n  ASSERT_EQ(dimension_, provider->dimension());\n  ASSERT_EQ(index_meta_.element_size(), provider->element_size());\n  ASSERT_EQ(index_meta_.data_type(), provider->data_type());\n}\n\nTEST_F(IVFSearcherTest, TestProviderInt8) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"IVFBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n      dimension_);\n  size_t doc_cnt = 5000UL;\n  std::vector<key_t> keys(doc_cnt);\n  srand(Realtime::MilliSeconds());\n  bool rand_key = rand() % 2;\n  bool rand_order = rand() % 2;\n  size_t step = rand() % 2 + 1;\n  LOG_DEBUG(\"randKey=%u randOrder=%u step=%zu\", rand_key, rand_order, step);\n  if (rand_key) {\n    std::mt19937 mt;\n    std::uniform_int_distribution<size_t> dt(\n        0, std::numeric_limits<size_t>::max());\n    for (size_t i = 0; i < doc_cnt; ++i) {\n      keys[i] = dt(mt);\n    }\n  } else {\n    std::iota(keys.begin(), keys.end(), 0U);\n    std::transform(keys.begin(), keys.end(), keys.begin(),\n                   [&](key_t k) { return step * k; });\n    if (rand_order) {\n      uint32_t seed = Realtime::Seconds();\n      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));\n    }\n  }\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = keys[i];\n    }\n    ASSERT_TRUE(holder->emplace(keys[i], vec));\n  }\n  Params params;\n  params.set(\"proxima.ivf.builder.centroid_count\", \"20\");\n  params.set(\"proxima.ivf.builder.retain_original_features\", false);\n  auto meta = index_meta_;\n  meta.set_metric(\"InnerProduct\", 0, Params());\n  params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, \"Int8QuantizerConverter\");\n  params.set(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID, true);\n  ASSERT_EQ(0, builder->init(meta, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = index_path_ + \"/TestProvider\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher(\"IVFSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  Params searcherParams;\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->open(path, false));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n\n  auto provider = searcher->create_provider();\n  ASSERT_TRUE(!!provider);\n  ASSERT_EQ(IndexMeta::DataType::DT_INT8, provider->data_type());\n  for (size_t i = 0; i < keys.size(); ++i) {\n    auto d1 = reinterpret_cast<const int8_t *>(provider->get_vector(keys[i]));\n    ASSERT_TRUE(d1);\n    for (size_t j = 0; j < dimension_; ++j) {\n      ASSERT_LT(d1[j], 255);\n    }\n  }\n\n  auto iter = provider->create_iterator();\n  size_t cnt = 0;\n  while (iter->is_valid()) {\n    const int8_t *d = reinterpret_cast<const int8_t *>(iter->data());\n    for (size_t j = 0; j < dimension_; ++j) {\n      ASSERT_LT(d[j], 255);\n    }\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, doc_cnt);\n\n  ASSERT_EQ(dimension_, provider->dimension());\n  ASSERT_EQ(index_meta_.element_size(), provider->element_size() * 4);\n}\n\nTEST_F(IVFSearcherTest, TestSearcherReuse) {\n  auto build_index = [](IndexMeta &meta, size_t base, size_t doc_cnt,\n                        std::string &path) {\n    IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"IVFBuilder\");\n    ASSERT_NE(builder, nullptr);\n    IndexHolder::Pointer holder;\n    if (meta.data_type() == IndexMeta::DataType::DT_INT8) {\n      auto h = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_INT8>>(\n          meta.dimension());\n      for (size_t i = base; i < doc_cnt; i++) {\n        NumericalVector<int8_t> vec(meta.dimension());\n        for (size_t j = 0; j < meta.dimension(); ++j) {\n          vec[j] = i;\n        }\n        ASSERT_TRUE(h->emplace(i, vec));\n      }\n      holder = h;\n    } else if (meta.data_type() == IndexMeta::DataType::DT_FP32) {\n      auto h = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          meta.dimension());\n      for (size_t i = base; i < doc_cnt; i++) {\n        NumericalVector<float> vec(meta.dimension());\n        for (size_t j = 0; j < meta.dimension(); ++j) {\n          vec[j] = i;\n        }\n        ASSERT_TRUE(h->emplace(i, vec));\n      }\n      holder = h;\n    }\n    Params params;\n    LOG_DEBUG(\"Build index %s count=%zu\", path.c_str(), holder->count());\n    params.set(\"proxima.ivf.builder.centroid_count\", \"10\");\n    ASSERT_EQ(0, builder->init(meta, params));\n    ASSERT_EQ(0, builder->train(holder));\n    ASSERT_EQ(0, builder->build(holder));\n    auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n    ASSERT_NE(dumper, nullptr);\n    ASSERT_EQ(0, dumper->create(path));\n    ASSERT_EQ(0, builder->dump(dumper));\n    ASSERT_EQ(0, dumper->close());\n    ASSERT_EQ(0, builder->cleanup());\n  };\n\n  auto path1 = index_path_ + \"/index1\";\n  auto path2 = index_path_ + \"/index2\";\n  IndexMeta meta1(IndexMeta::DataType::DT_INT8, 16);\n  IndexMeta meta2(IndexMeta::DataType::DT_FP32, 31);\n  build_index(meta1, 10, 200, path1);\n  build_index(meta2, 2000, 3000, path2);\n\n  // test searcher\n  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher(\"IVFSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  Params searcherParams;\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->open(path1, false));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n\n  auto provider = searcher->create_provider();\n  ASSERT_EQ(IndexMeta::DataType::DT_INT8, searcher->meta().data_type());\n  ASSERT_EQ(190UL, searcher->stats().loaded_count());\n  ASSERT_EQ(190UL, provider->count());\n  ASSERT_EQ(\"IVFSearcher\", provider->owner_class());\n  for (size_t i = 10; i < 200ul; ++i) {\n    const int8_t *d1 =\n        reinterpret_cast<const int8_t *>(provider->get_vector(i));\n    ASSERT_TRUE(d1);\n    for (size_t j = 0; j < meta1.dimension(); ++j) {\n      ASSERT_EQ(d1[j], (int8_t)i);\n    }\n  }\n  ASSERT_EQ(meta1.dimension(), provider->dimension());\n  ASSERT_EQ(meta1.element_size(), provider->element_size());\n  ASSERT_EQ(meta1.data_type(), provider->data_type());\n  ASSERT_EQ(0, searcher->unload());\n  ASSERT_EQ(0, searcher->cleanup());\n\n  auto container2 = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container2->open(path2, false));\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  ASSERT_EQ(0, searcher->load(container2, IndexMetric::Pointer()));\n\n  auto provider2 = searcher->create_provider();\n  ASSERT_EQ(IndexMeta::DataType::DT_FP32, searcher->meta().data_type());\n  for (size_t i = 2000; i < 3000ul; ++i) {\n    const float *d1 = reinterpret_cast<const float *>(provider2->get_vector(i));\n    ASSERT_TRUE(d1);\n    for (size_t j = 0; j < meta2.dimension(); ++j) {\n      ASSERT_FLOAT_EQ(d1[j], i);\n    }\n  }\n  ASSERT_EQ(meta2.dimension(), provider2->dimension());\n  ASSERT_EQ(meta2.element_size(), provider2->element_size());\n  ASSERT_EQ(meta2.data_type(), provider2->data_type());\n  ASSERT_EQ(1000UL, provider2->count());\n  ASSERT_EQ(1000UL, searcher->stats().loaded_count());\n}\n\nTEST_F(IVFSearcherTest, TestInt8QuantizerWithL2) {\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"IVFBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n      dimension_);\n  size_t doc_cnt = 5000UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      vec[j] = i;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  Params params;\n  params.set(\"proxima.ivf.builder.centroid_count\", \"20\");\n  params.set(\"proxima.ivf.builder.store_original_features\", true);\n  auto meta = index_meta_;\n  params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, \"Int8QuantizerConverter\");\n  ASSERT_EQ(0, builder->init(meta, params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  string path = index_path_ + \"/TestQuantizer\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher(\"IVFSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  Params searcherParams;\n  ASSERT_EQ(0, searcher->init(searcherParams));\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->open(path, false));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n\n  auto provider = searcher->create_provider();\n  ASSERT_EQ(IndexMeta::DataType::DT_FP32, provider->data_type());\n  for (size_t i = 0; i < doc_cnt; ++i) {\n    const float *d1 = reinterpret_cast<const float *>(provider->get_vector(i));\n    ASSERT_TRUE(d1);\n    for (size_t j = 0; j < dimension_; ++j) {\n      ASSERT_FLOAT_EQ(d1[j], i);\n    }\n  }\n\n  auto iter = provider->create_iterator();\n  size_t cnt = 0;\n  while (iter->is_valid()) {\n    auto key = iter->key();\n    const float *d = reinterpret_cast<const float *>(iter->data());\n    for (size_t j = 0; j < dimension_; ++j) {\n      ASSERT_FLOAT_EQ(d[j], key);\n    }\n    cnt++;\n    iter->next();\n  }\n  ASSERT_EQ(cnt, doc_cnt);\n\n  ASSERT_EQ(dimension_, provider->dimension());\n  ASSERT_EQ(index_meta_.element_size(), provider->element_size());\n  ASSERT_EQ(index_meta_.data_type(), provider->data_type());\n\n  auto context = searcher->create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n  size_t topk = 1;\n  context->set_topk(topk);\n  context->set_filter([](uint64_t) { return false; });\n  for (size_t i = 0; i < doc_cnt; i += 20) {\n    NumericalVector<float> query(dimension_);\n    for (size_t j = 0; j < dimension_; ++j) {\n      query[j] = i;\n    }\n    int ret = searcher->search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    ASSERT_NEAR(i, result[0].key(), 100);\n  }\n}\n\nTEST_F(IVFSearcherTest, TestMipsEuclideanMetric) {\n  constexpr size_t static dim = 32;\n  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);\n  meta.set_metric(\"MipsSquaredEuclidean\", 0, Params());\n  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(\"IVFBuilder\");\n  ASSERT_NE(builder, nullptr);\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);\n  const size_t COUNT = 10000UL;\n  for (size_t i = 0; i < COUNT; i++) {\n    NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = i / 100.0f;\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  Params builder_params;\n  builder_params.set(\"proxima.ivf.builder.centroid_count\", 1024);\n  ASSERT_EQ(0, builder->init(meta, builder_params));\n  ASSERT_EQ(0, builder->train(holder));\n  ASSERT_EQ(0, builder->build(holder));\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_NE(dumper, nullptr);\n  std::string path = \"IVFTestMipsEuclideanMetric\";\n  ASSERT_EQ(0, dumper->create(path));\n  ASSERT_EQ(0, builder->dump(dumper));\n  ASSERT_EQ(0, dumper->close());\n\n  // test searcher\n  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher(\"IVFSearcher\");\n  ASSERT_NE(searcher, nullptr);\n  Params params;\n  params.set(\"proxima.ivf.searcher.scan_ratio\", 0.1f);\n  ASSERT_EQ(0, searcher->init(params));\n\n  auto container = IndexFactory::CreateStorage(\"FileReadStorage\");\n  ASSERT_EQ(0, container->open(path, false));\n  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));\n  auto ctx = searcher->create_context();\n  ASSERT_TRUE(!!ctx);\n\n  NumericalVector<float> vec(dim);\n  for (size_t j = 0; j < dim; ++j) {\n    vec[j] = 1.0;\n  }\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);\n  size_t topk = 10;\n  ctx->set_topk(topk);\n  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));\n  auto &results = ctx->result();\n  EXPECT_EQ(results.size(), topk);\n  EXPECT_NEAR((uint64_t)(COUNT - 1), results[0].key(), 10);\n  File::RemovePath(path);\n}\n\nTEST_F(IVFSearcherTest, TestSameValue) {\n  IVFBuilder builder;\n  //    index_meta_.set_major_order(IndexMeta::MO_ROW);\n  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, \"2\");\n  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, \"KmeansCluster\");\n  params_.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, \"Int8QuantizerConverter\");\n\n  int ret = builder.init(index_meta_, params_);\n  EXPECT_EQ(0, ret);\n  prepare_same_index_holder(0, 33);\n  ret = builder.train(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  ret = builder.build(threads_, holder_);\n  EXPECT_EQ(0, ret);\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ret = dumper->create(index_path_);\n  EXPECT_EQ(0, ret);\n\n  ret = builder.dump(dumper);\n  EXPECT_EQ((size_t)33, builder.stats().built_count());\n  EXPECT_EQ((size_t)33, builder.stats().dumped_count());\n  EXPECT_EQ((size_t)0, builder.stats().discarded_count());\n  EXPECT_EQ(0, dumper->close());\n\n  IVFSearcher searcher;\n  Params params;\n  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);\n  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);\n\n  ret = searcher.init(params);\n  EXPECT_EQ(0, ret);\n\n  IndexStorage::Pointer container =\n      IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  EXPECT_TRUE(!!container);\n\n  Params container_params;\n  container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  container->init(container_params);\n  ret = container->open(index_path_, false);\n  EXPECT_EQ(0, ret);\n\n  ret = searcher.load(container, IndexMetric::Pointer());\n  EXPECT_EQ(0, ret);\n\n  std::vector<float> query;\n  for (size_t i = 0; i < dimension_; ++i) {\n    query.push_back(32.0f);\n  }\n\n  size_t qnum = 33;\n  std::vector<float> query1;\n  for (size_t i = 0; i < dimension_ * qnum; ++i) {\n    query1.push_back(i / dimension_);\n  }\n\n\n  auto context = searcher.create_context();\n  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);\n\n  // single bf serch\n  {\n    size_t topk = 33;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      // std::cout << \"i: \" << i << \", key: \" << result[i].key() << \", score: \"\n      // << result[i].score() << std::endl;\n      ASSERT_EQ(0, result[i].score());\n    }\n  }\n\n  // batch bf serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  // single knn search\n  {\n    size_t topk = 33;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query.data(), qmeta, context);\n    EXPECT_EQ(0, ret);\n\n    const IndexDocumentList &result = context->result(0);\n    EXPECT_EQ((size_t)topk, result.size());\n    for (size_t i = 0; i < topk; ++i) {\n      EXPECT_FLOAT_EQ((float)0, result[i].score());\n    }\n  }\n\n  // batch knn serch\n  {\n    size_t topk = 1;\n    context->set_topk(topk);\n    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);\n    EXPECT_EQ(0, ret);\n\n    for (size_t q = 0; q < qnum; ++q) {\n      const IndexDocumentList &result = context->result(q);\n      EXPECT_FLOAT_EQ((float)0, result[0].score());\n    }\n  }\n\n  ret = searcher.unload();\n  EXPECT_EQ(0, ret);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/framework/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework\n      SRCS ${CC_SRCS}\n      INCS ../../src\n    )\nendforeach()"
  },
  {
    "path": "tests/core/interface/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_metric core_interface core_knn_flat core_utility core_quantizer sparsehash core_knn_hnsw core_mix_reducer\n          core_knn_flat_sparse core_knn_hnsw_sparse core_knn_ivf core_knn_hnsw_rabitq\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm\n    )\nendforeach()"
  },
  {
    "path": "tests/core/interface/index_interface_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cassert>\n#include <cmath>\n#include <functional>\n#include <iostream>\n#include <unordered_map>\n#include <gtest/gtest.h>\n#if RABITQ_SUPPORTED\n#include \"core/algorithm/hnsw_rabitq/rabitq_converter.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#endif\n#include \"zvec/ailego/buffer/buffer_manager.h\"\n#include \"zvec/core/interface/index.h\"\n#include \"zvec/core/interface/index_factory.h\"\n#include \"zvec/core/interface/index_param.h\"\n#include \"zvec/core/interface/index_param_builders.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec::core_interface;\n\nTEST(IndexInterface, General) {\n  constexpr uint32_t kDimension = 64;\n  const std::string index_name{\"test.index\"};\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s\", index_name.c_str());\n\n  auto func = [&](const BaseIndexParam::Pointer &param,\n                  const BaseIndexQueryParam::Pointer &query_param) {\n    system(cmd_buf);\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n\n    index->Open(index_name, {StorageOptions::StorageType::kMMAP, true});\n\n    std::vector<float> vector(kDimension);\n    vector[1] = 1.0f;\n    vector[2] = 2.0f;\n    VectorData vector_data;\n    vector_data.vector = DenseVector{vector.data()};\n    ASSERT_TRUE(0 == index->Add(vector_data, 233));\n    ASSERT_TRUE(0 == index->Train());\n\n    SearchResult result;\n    VectorData query;\n    query.vector = DenseVector{vector.data()};\n    index->Search(query, query_param, &result);\n    ASSERT_EQ(1, result.doc_list_.size());\n    ASSERT_EQ(233, result.doc_list_[0].key());\n    ASSERT_FLOAT_EQ(5.0f, result.doc_list_[0].score());\n    if (query_param->fetch_vector) {\n      auto &doc = result.doc_list_[0];\n      if (result.reverted_vector_list_.size() != 0) {\n        // cosine metric or bf16 quantizer\n        ASSERT_EQ(1, result.reverted_vector_list_.size());\n        auto reverted_vector = reinterpret_cast<const float *>(\n            result.reverted_vector_list_[0].data());\n        ASSERT_FLOAT_EQ(1.0f, reverted_vector[1]);\n        ASSERT_FLOAT_EQ(2.0f, reverted_vector[2]);\n      } else {\n        auto vector = reinterpret_cast<const float *>(doc.vector());\n        ASSERT_FLOAT_EQ(1.0f, vector[1]);\n        ASSERT_FLOAT_EQ(2.0f, vector[2]);\n      }\n    }\n\n    vector[1] = 0;\n    vector[2] = 0;\n    VectorDataBuffer fetched_vector_data;\n    ASSERT_TRUE(0 == index->Fetch(233, &fetched_vector_data));\n    float *fetched_vector = reinterpret_cast<float *>(\n        std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n            .data.data());\n    ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);\n    ASSERT_FLOAT_EQ(2.0f, fetched_vector[2]);\n    index->Close();\n    system(cmd_buf);\n  };\n\n\n  auto param = FlatIndexParamBuilder()\n                   .WithMetricType(MetricType::kInnerProduct)\n                   .WithDataType(DataType::DT_FP32)\n                   .WithDimension(kDimension)\n                   .WithIsSparse(false)\n                   .Build();\n  func(param,\n       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());\n  func(FlatIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n           .Build(),\n       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());\n\n  func(HNSWIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithEFConstruction(100)\n           .Build(),\n       HNSWQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(true)\n           .with_ef_search(20)\n           .build());\n  func(HNSWIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithEFConstruction(100)\n           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n           .Build(),\n       HNSWQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(true)\n           .with_ef_search(20)\n           .build());\n  func(IVFIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithNList(10)\n           .Build(),\n       IVFQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());\n  func(IVFIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithNList(10)\n           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n           .Build(),\n       IVFQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());\n}\n\nTEST(IndexInterface, BufferGeneral) {\n  constexpr uint32_t kDimension = 64;\n  const std::string index_name{\"test.index\"};\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s*\", index_name.c_str());\n\n  auto func = [&](const BaseIndexParam::Pointer &param,\n                  const BaseIndexQueryParam::Pointer &query_param) {\n    std::string real_index_name = index_name;\n    system(cmd_buf);\n    auto write_index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, write_index);\n\n    write_index->Open(real_index_name,\n                      {StorageOptions::StorageType::kMMAP, true});\n\n    std::vector<float> vector(kDimension);\n    vector[1] = 1.0f;\n    vector[2] = 2.0f;\n    VectorData vector_data;\n    vector_data.vector = DenseVector{vector.data()};\n    ASSERT_TRUE(0 == write_index->Add(vector_data, 233));\n    write_index->Close();\n\n    auto read_index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, read_index);\n    read_index->Open(real_index_name,\n                     {StorageOptions::StorageType::kBufferPool, false});\n\n    SearchResult result;\n    VectorData query;\n    query.vector = DenseVector{vector.data()};\n    read_index->Search(query, query_param, &result);\n    ASSERT_EQ(1, result.doc_list_.size());\n    ASSERT_EQ(233, result.doc_list_[0].key());\n    ASSERT_FLOAT_EQ(5.0f, result.doc_list_[0].score());\n    if (query_param->fetch_vector) {\n      auto &doc = result.doc_list_[0];\n      if (result.reverted_vector_list_.size() != 0) {\n        // cosine metric or bf16 quantizer\n        ASSERT_EQ(1, result.reverted_vector_list_.size());\n        auto reverted_vector = reinterpret_cast<const float *>(\n            result.reverted_vector_list_[0].data());\n        ASSERT_FLOAT_EQ(1.0f, reverted_vector[1]);\n        ASSERT_FLOAT_EQ(2.0f, reverted_vector[2]);\n      } else {\n        auto vector = reinterpret_cast<const float *>(doc.vector());\n        ASSERT_FLOAT_EQ(1.0f, vector[1]);\n        ASSERT_FLOAT_EQ(2.0f, vector[2]);\n      }\n    }\n\n    vector[1] = 0;\n    vector[2] = 0;\n    VectorDataBuffer fetched_vector_data;\n    ASSERT_TRUE(0 == read_index->Fetch(233, &fetched_vector_data));\n    float *fetched_vector = reinterpret_cast<float *>(\n        std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n            .data.data());\n    ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);\n    ASSERT_FLOAT_EQ(2.0f, fetched_vector[2]);\n    result.doc_list_.clear();\n    read_index->Close();\n    system(cmd_buf);\n  };\n\n\n  auto param = FlatIndexParamBuilder()\n                   .WithMetricType(MetricType::kInnerProduct)\n                   .WithDataType(DataType::DT_FP32)\n                   .WithDimension(kDimension)\n                   .WithIsSparse(false)\n                   .Build();\n  func(param,\n       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());\n  func(FlatIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n           .Build(),\n       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());\n\n  func(HNSWIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithEFConstruction(100)\n           .Build(),\n       HNSWQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(true)\n           .with_ef_search(20)\n           .build());\n  func(HNSWIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithEFConstruction(100)\n           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n           .Build(),\n       HNSWQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(true)\n           .with_ef_search(20)\n           .build());\n}\n\n\nTEST(IndexInterface, SparseGeneral) {\n  constexpr uint32_t kSparseCount = 3;\n  const std::string index_name{\"test.index\"};\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s\", index_name.c_str());\n\n  auto func = [&](const BaseIndexParam::Pointer &param,\n                  const BaseIndexQueryParam::Pointer &query_param) {\n    system(cmd_buf);\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n\n    index->Open(index_name, {StorageOptions::StorageType::kMMAP, true});\n\n    std::vector<uint32_t> indices(kSparseCount);\n    std::vector<float> values(kSparseCount);\n    for (uint32_t i = 0; i < kSparseCount; ++i) {\n      indices[i] = i;\n      values[i] = i;\n    }\n\n    VectorData vector_data{\n        SparseVector{kSparseCount, indices.data(), values.data()}};\n    ASSERT_TRUE(0 == index->Add(vector_data, 233));\n\n\n    SearchResult result;\n    VectorData query = {\n        SparseVector{kSparseCount, indices.data(), values.data()}};\n    index->Search(query, query_param, &result);\n    ASSERT_EQ(1, result.doc_list_.size());\n    ASSERT_EQ(233, result.doc_list_[0].key());\n    ASSERT_FLOAT_EQ(5.0f, result.doc_list_[0].score());\n\n    if (query_param->fetch_vector) {\n      auto &sparse_doc = result.doc_list_[0].sparse_doc();\n      auto sparse_indices = reinterpret_cast<const uint32_t *>(\n          sparse_doc.sparse_indices().data());\n      for (uint32_t i = 0; i < kSparseCount; ++i) {\n        ASSERT_EQ(i, sparse_indices[i]);\n      }\n      if (!result.reverted_sparse_values_list_.empty()) {\n        ASSERT_EQ(1, result.reverted_sparse_values_list_.size());\n        auto reverted_sparse_values = reinterpret_cast<const float *>(\n            result.reverted_sparse_values_list_[0].data());\n        for (uint32_t i = 0; i < kSparseCount; ++i) {\n          ASSERT_EQ(i, reverted_sparse_values[i]);\n        }\n      } else {\n        auto sparse_values =\n            reinterpret_cast<const float *>(sparse_doc.sparse_values().data());\n        for (uint32_t i = 0; i < kSparseCount; ++i) {\n          ASSERT_EQ(i, sparse_values[i]);\n        }\n      }\n    }\n\n    values[1] = 0;\n    values[2] = 0;\n    VectorDataBuffer fetched_vector_data;\n    ASSERT_TRUE(0 == index->Fetch(233, &fetched_vector_data));\n    const SparseVectorBuffer &sparse_vector_buffer =\n        std::get<SparseVectorBuffer>(fetched_vector_data.vector_buffer);\n    const uint32_t *fetched_indices =\n        reinterpret_cast<const uint32_t *>(sparse_vector_buffer.indices.data());\n    const float *fetched_values =\n        reinterpret_cast<const float *>(sparse_vector_buffer.values.data());\n    ASSERT_EQ(kSparseCount, sparse_vector_buffer.count);\n    for (uint32_t i = 0; i < kSparseCount; ++i) {\n      ASSERT_EQ(i, fetched_indices[i]);\n      ASSERT_EQ(i, fetched_values[i]);\n    }\n    index->Close();\n    system(cmd_buf);\n  };\n\n\n  auto param = FlatIndexParamBuilder()\n                   .WithMetricType(MetricType::kInnerProduct)\n                   .WithDataType(DataType::DT_FP32)\n                   .WithIsSparse(true)\n                   .Build();\n  // func(param, FlatQueryParam{{.topk = 10, .fetch_vector = true}});\n  func(FlatIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithIsSparse(true)\n           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n           .Build(),\n       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());\n\n  func(HNSWIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithIsSparse(true)\n           .WithEFConstruction(100)\n           .Build(),\n       HNSWQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(true)\n           .with_ef_search(20)\n           .build());\n  func(HNSWIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithIsSparse(true)\n           .WithEFConstruction(100)\n           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n           .Build(),\n       HNSWQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(true)\n           .with_ef_search(20)\n           .build());\n}\n\n\nTEST(IndexInterface, Merge) {\n  constexpr uint32_t kDimension = 64;\n  const std::string index_name{\"test.index\"};\n\n  auto del_index_file_func = [&](const std::string file_name) {\n    auto cmd_buf = \"rm -f \" + file_name;\n    system(cmd_buf.c_str());\n  };\n\n  auto create_index_func =\n      [&](const BaseIndexParam::Pointer &param,\n          const std::string &index_name) -> Index::Pointer {\n    del_index_file_func(index_name);\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    if (index == nullptr ||\n        0 != index->Open(index_name,\n                         {StorageOptions::StorageType::kMMAP, true})) {\n      return nullptr;\n    }\n    return index;\n  };\n\n  auto func = [&](const BaseIndexParam::Pointer &param_target,\n                  const BaseIndexParam::Pointer &param_source) {\n    auto index1 = create_index_func(param_source, index_name + \"1\");\n    ASSERT_NE(nullptr, index1);\n    auto index2 = create_index_func(param_source, index_name + \"2\");\n    ASSERT_NE(nullptr, index2);\n\n\n    std::vector<float> vector(kDimension);\n    vector[1] = 1.0f;\n    vector[2] = 123.0f;\n    VectorData vector_data{DenseVector{vector.data()}};\n    ASSERT_TRUE(0 == index1->Add(vector_data, 0));\n\n    vector[1] = 2.0f;\n    ASSERT_TRUE(0 == index2->Add(vector_data, 0));\n    vector[1] = 3.0f;\n    ASSERT_TRUE(0 == index2->Add(vector_data, 1));\n\n    {\n      VectorDataBuffer fetched_vector_data;\n      ASSERT_TRUE(0 == index1->Fetch(0, &fetched_vector_data));\n      float *fetched_vector = reinterpret_cast<float *>(\n          std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n              .data.data());\n      ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);\n      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);\n    }\n    {\n      VectorDataBuffer fetched_vector_data;\n      ASSERT_TRUE(0 == index2->Fetch(0, &fetched_vector_data));\n      float *fetched_vector = reinterpret_cast<float *>(\n          std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n              .data.data());\n      ASSERT_FLOAT_EQ(2.0f, fetched_vector[1]);\n      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);\n    }\n    {\n      VectorDataBuffer fetched_vector_data;\n      ASSERT_TRUE(0 == index2->Fetch(1, &fetched_vector_data));\n      float *fetched_vector = reinterpret_cast<float *>(\n          std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n              .data.data());\n      ASSERT_FLOAT_EQ(3.0f, fetched_vector[1]);\n      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);\n    }\n\n    {  // test reduce\n      auto index3 = create_index_func(param_target, index_name + \"3\");\n      ASSERT_NE(nullptr, index3);\n      ASSERT_TRUE(0 == index3->Merge({index1, index2}, IndexFilter()));\n      ASSERT_TRUE(3 == index3->GetDocCount());\n      {\n        VectorDataBuffer fetched_vector_data;\n        ASSERT_TRUE(0 == index3->Fetch(0, &fetched_vector_data));\n        float *fetched_vector = reinterpret_cast<float *>(\n            std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n                .data.data());\n        ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);\n        ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);\n      }\n      {\n        VectorDataBuffer fetched_vector_data;\n        ASSERT_TRUE(0 == index3->Fetch(1, &fetched_vector_data));\n        float *fetched_vector = reinterpret_cast<float *>(\n            std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n                .data.data());\n        ASSERT_FLOAT_EQ(2.0f, fetched_vector[1]);\n        ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);\n      }\n      index3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n    {  // test reduce with filter\n      auto index3 = create_index_func(param_target, index_name + \"3\");\n      ASSERT_NE(nullptr, index3);\n      auto filter = IndexFilter();\n      filter.set([](uint64_t key) { return key == 0; });  // TODO: uint32?\n      ASSERT_TRUE(0 == index3->Merge({index1, index2}, filter));\n      ASSERT_TRUE(2 == index3->GetDocCount());\n      {\n        VectorDataBuffer fetched_vector_data;\n        ASSERT_TRUE(0 == index3->Fetch(0, &fetched_vector_data));\n        float *fetched_vector = reinterpret_cast<float *>(\n            std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)\n                .data.data());\n        ASSERT_FLOAT_EQ(2.0f, fetched_vector[1]);\n        ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);\n      }\n      index3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n    index1->Close();\n    index2->Close();\n    del_index_file_func(index_name + \"1\");\n    del_index_file_func(index_name + \"2\");\n  };\n\n  // same index\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(kDimension)\n                     .WithIsSparse(false)\n                     .Build();\n    func(param, param);\n  }\n  {\n    auto param = HNSWIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(kDimension)\n                     .WithIsSparse(false)\n                     .Build();\n    func(param, param);\n  }\n\n  // different index\n  {\n    auto param_flat = FlatIndexParamBuilder()\n                          .WithMetricType(MetricType::kInnerProduct)\n                          .WithDataType(DataType::DT_FP32)\n                          .WithDimension(kDimension)\n                          .WithIsSparse(false)\n                          .Build();\n    auto param_hnsw = HNSWIndexParamBuilder()\n                          .WithMetricType(MetricType::kInnerProduct)\n                          .WithDataType(DataType::DT_FP32)\n                          .WithDimension(kDimension)\n                          .WithIsSparse(false)\n                          .Build();\n    func(param_flat, param_hnsw);\n    func(param_hnsw, param_flat);\n  }\n}\n\n\nTEST(IndexInterface, Serialize) {\n  {\n    std::cout << \"\\n\\n----flat index----\" << std::endl;\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(64)\n                     .WithIsSparse(false)\n                     .WithQuantizerParam(QuantizerParam{QuantizerType::kFP16})\n                     .Build();\n\n    std::cout << \"flat index -- omit=true: \" << param->SerializeToJson(true)\n              << std::endl;\n    std::cout << \"omit=false: \" << param->SerializeToJson() << std::endl;\n\n    auto deserialized_param =\n        IndexFactory::DeserializeIndexParamFromJson(param->SerializeToJson());\n    ASSERT_NE(nullptr, deserialized_param.get());\n\n\n    std::cout << \"serialize then de then se:\"\n              << deserialized_param->SerializeToJson() << std::endl;\n\n    ASSERT_TRUE(deserialized_param->SerializeToJson() ==\n                param->SerializeToJson());\n    ASSERT_TRUE(deserialized_param->SerializeToJson(true) ==\n                param->SerializeToJson(true));\n  }\n\n  {\n    std::cout << \"\\n\\n----hnsw index----\" << std::endl;\n    auto param = HNSWIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(64)\n                     .WithIsSparse(false)\n                     .WithQuantizerParam(QuantizerParam{QuantizerType::kFP16})\n                     .Build();\n\n    std::cout << \"hnsw index -- omit=true: \" << param->SerializeToJson(true)\n              << std::endl;\n    std::cout << \"hnsw index -- omit=false: \" << param->SerializeToJson()\n              << std::endl;\n\n    auto deserialized_param =\n        IndexFactory::DeserializeIndexParamFromJson(param->SerializeToJson());\n    ASSERT_NE(nullptr, deserialized_param.get());\n\n    std::cout << \"serialize then de then se:\"\n              << deserialized_param->SerializeToJson() << std::endl;\n\n\n    ASSERT_TRUE(deserialized_param->SerializeToJson() ==\n                param->SerializeToJson());\n    ASSERT_TRUE(deserialized_param->SerializeToJson(true) ==\n                param->SerializeToJson(true));\n  }\n\n  {\n    std::cout << \"\\n\\n----flat query----\" << std::endl;\n    auto param =\n        FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build();\n    std::cout << \"flat query -- omit=true: \"\n              << IndexFactory::QueryParamSerializeToJson(*param, true)\n              << std::endl;\n    std::cout << \"flat query -- omit=false: \"\n              << IndexFactory::QueryParamSerializeToJson(*param) << std::endl;\n\n    auto deserialized_param =\n        IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(\n            IndexFactory::QueryParamSerializeToJson(*param));\n    ASSERT_NE(nullptr, deserialized_param.get());\n\n    std::cout << \"serialize then de then se:\"\n              << IndexFactory::QueryParamSerializeToJson(*deserialized_param)\n              << std::endl;\n\n    ASSERT_TRUE(IndexFactory::QueryParamSerializeToJson(*deserialized_param) ==\n                IndexFactory::QueryParamSerializeToJson(*param));\n  }\n\n  {\n    std::cout << \"\\n\\n----hnsw query----\" << std::endl;\n    auto param = HNSWQueryParamBuilder()\n                     .with_topk(10)\n                     .with_fetch_vector(true)\n                     .with_ef_search(20)\n                     .build();\n    std::cout << \"hnsw query -- omit=true: \"\n              << IndexFactory::QueryParamSerializeToJson(*param, true)\n              << std::endl;\n    std::cout << \"hnsw query -- omit=false: \"\n              << IndexFactory::QueryParamSerializeToJson(*param, false)\n              << std::endl;\n\n    auto deserialized_param =\n        IndexFactory::QueryParamDeserializeFromJson<HNSWQueryParam>(\n            IndexFactory::QueryParamSerializeToJson(*param));\n    ASSERT_NE(nullptr, deserialized_param.get());\n\n    std::cout << \"serialize then de then se:\"\n              << IndexFactory::QueryParamSerializeToJson(*deserialized_param)\n              << std::endl;\n\n    ASSERT_TRUE(IndexFactory::QueryParamSerializeToJson(*deserialized_param) ==\n                IndexFactory::QueryParamSerializeToJson(*param));\n  }\n}\n\nTEST(IndexInterface, Failure) {\n  // Test unsupported index type\n  {\n    auto param = std::make_shared<BaseIndexParam>(IndexType::kIVF);\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_EQ(nullptr, index);\n  }\n\n  // Test unsupported metric type\n  {\n    auto param =\n        FlatIndexParamBuilder()\n            .WithMetricType(MetricType::kNone)  // L2 not supported for sparse\n            .WithDataType(DataType::DT_FP32)\n            .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_EQ(nullptr, index);\n  }\n\n  // Test unsupported metric type for sparse index\n  {\n    auto param =\n        FlatIndexParamBuilder()\n            .WithMetricType(MetricType::kL2sq)  // L2 not supported for sparse\n            .WithDataType(DataType::DT_FP32)\n            .WithIsSparse(true)\n            .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_EQ(nullptr, index);\n  }\n\n  // // Test unsupported quantizer type\n  // {\n  //   auto param = FlatIndexParamBuilder()\n  //                    .WithMetricType(MetricType::kInnerProduct)\n  //                    .WithDataType(DataType::DT_INT4)\n  //                    .WithDimension(64)\n  //                    .WithIsSparse(false)\n  //                    .WithQuantizerParam(\n  //                        QuantizerParam(QuantizerType::kInt8))  //\n  //                        Unsupported\n  //                    .Build();\n  //   auto index = IndexFactory::CreateAndInitIndex(*param);\n  //   ASSERT_EQ(nullptr, index);\n  // }\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(64)\n                     .WithIsSparse(true)\n                     .WithQuantizerParam(\n                         QuantizerParam(QuantizerType::kInt8))  // Unsupported\n                     .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_EQ(nullptr, index);\n  }\n\n  // Test unsupported data type for cosine metric\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kCosine)\n                     .WithDataType(DataType::DT_INT8)  // Unsupported for cosine\n                     .WithDimension(64)\n                     .WithIsSparse(false)\n                     .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_EQ(nullptr, index);\n  }\n\n  // Test invalid storage type\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(64)\n                     .WithIsSparse(false)\n                     .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    StorageOptions invalid_storage;\n    invalid_storage.type = StorageOptions::StorageType::kNone;  // Unsupported\n    int ret = index->Open(\"test.index\", invalid_storage);\n    ASSERT_NE(0, ret);\n  }\n\n  // Test invalid vector data type for dense operations\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(64)\n                     .WithIsSparse(false)\n                     .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    index->Open(\"test.index\", {StorageOptions::StorageType::kMMAP, true});\n\n    // Try to add sparse vector to dense index\n    std::vector<uint32_t> indices = {0, 1, 2};\n    std::vector<float> values = {1.0f, 2.0f, 3.0f};\n    VectorData sparse_vector_data{\n        SparseVector{3, indices.data(), values.data()}};\n\n    int ret = index->Add(sparse_vector_data, 1);\n    ASSERT_NE(0, ret);\n\n    index->Close();\n    system(\"rm -f test.index\");\n  }\n\n  // Test invalid vector data type for sparse operations\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithIsSparse(true)\n                     .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    index->Open(\"test.index\", {StorageOptions::StorageType::kMMAP, true});\n\n    // Try to add dense vector to sparse index\n    std::vector<float> vector(64, 1.0f);\n    VectorData dense_vector_data{DenseVector{vector.data()}};\n\n    int ret = index->Add(dense_vector_data, 1);\n    ASSERT_NE(0, ret);\n\n    index->Close();\n    system(\"rm -f test.index\");\n  }\n\n  // Test fetch non-existent document\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(64)\n                     .WithIsSparse(false)\n                     .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    index->Open(\"test.index\", {StorageOptions::StorageType::kMMAP, true});\n\n    VectorDataBuffer fetched_vector_data;\n    int ret = index->Fetch(999, &fetched_vector_data);  // Non-existent doc_id\n    ASSERT_NE(0, ret);\n\n    index->Close();\n    system(\"rm -f test.index\");\n  }\n\n  // Test search with invalid vector data\n  {\n    auto param = FlatIndexParamBuilder()\n                     .WithMetricType(MetricType::kInnerProduct)\n                     .WithDataType(DataType::DT_FP32)\n                     .WithDimension(64)\n                     .WithIsSparse(false)\n                     .Build();\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    index->Open(\"test.index\", {StorageOptions::StorageType::kMMAP, true});\n\n    // Add a vector first\n    std::vector<float> vector(64, 1.0f);\n    VectorData vector_data{DenseVector{vector.data()}};\n    ASSERT_EQ(0, index->Add(vector_data, 1));\n\n    // Try to search with sparse vector in dense index\n    std::vector<uint32_t> indices = {0, 1, 2};\n    std::vector<float> values = {1.0f, 2.0f, 3.0f};\n    VectorData sparse_query{SparseVector{3, indices.data(), values.data()}};\n\n    SearchResult result;\n    FlatQueryParam::Pointer query_param =\n        FlatQueryParamBuilder().with_topk(10).with_fetch_vector(false).build();\n    int ret = index->Search(sparse_query, query_param, &result);\n    ASSERT_NE(0, ret);\n\n    index->Close();\n    system(\"rm -f test.index\");\n  }\n\n  // Test merge with invalid write concurrency\n  {\n    auto param1 = FlatIndexParamBuilder()\n                      .WithMetricType(MetricType::kInnerProduct)\n                      .WithDataType(DataType::DT_FP32)\n                      .WithDimension(64)\n                      .WithIsSparse(false)\n                      .Build();\n    auto index1 = IndexFactory::CreateAndInitIndex(*param1);\n    ASSERT_NE(nullptr, index1);\n    index1->Open(\"test1.index\", {StorageOptions::StorageType::kMMAP, true});\n\n    auto param2 = FlatIndexParamBuilder()\n                      .WithMetricType(MetricType::kInnerProduct)\n                      .WithDataType(DataType::DT_FP32)\n                      .WithDimension(64)\n                      .WithIsSparse(false)\n                      .Build();\n    auto index2 = IndexFactory::CreateAndInitIndex(*param2);\n    ASSERT_NE(nullptr, index2);\n    index2->Open(\"test2.index\", {StorageOptions::StorageType::kMMAP, true});\n\n    auto param3 = FlatIndexParamBuilder()\n                      .WithMetricType(MetricType::kInnerProduct)\n                      .WithDataType(DataType::DT_FP32)\n                      .WithDimension(64)\n                      .WithIsSparse(false)\n                      .Build();\n    auto index3 = IndexFactory::CreateAndInitIndex(*param3);\n    ASSERT_NE(nullptr, index3);\n    index3->Open(\"test3.index\", {StorageOptions::StorageType::kMMAP, true});\n\n    MergeOptions invalid_options;\n    invalid_options.write_concurrency = 0;  // Invalid: must be > 0\n\n    int ret = index3->Merge({index1, index2}, IndexFilter(), invalid_options);\n    ASSERT_NE(0, ret);\n\n    index1->Close();\n    index2->Close();\n    index3->Close();\n    system(\"rm -f test1.index test2.index test3.index\");\n  }\n}\n\nTEST(IndexInterface, SerializeFailure) {\n  // Test invalid JSON deserialization\n  {\n    std::string invalid_json = \"invalid json string\";\n    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test JSON with invalid enum value\n  {\n    std::string invalid_enum_json = R\"({\n      \"index_type\": \"kInvalidType\",\n      \"metric_type\": \"kL2\",\n      \"dimension\": 64,\n      \"is_sparse\": false,\n      \"data_type\": \"DT_FP32\"\n    })\";\n    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_enum_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test JSON with invalid field type\n  {\n    std::string invalid_type_json = R\"({\n      \"index_type\": \"kFlat\",\n      \"metric_type\": \"kL2\",\n      \"dimension\": \"not_a_number\",\n      \"is_sparse\": false,\n      \"data_type\": \"DT_FP32\"\n    })\";\n    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_type_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test JSON with invalid field type\n  {\n    std::string invalid_type_json = R\"({\n      \"index_type\": \"kHNSW\",\n      \"metric_type\": \"kL2\",\n      \"dimension\": 1,\n      \"is_sparse\": \"false\",\n      \"data_type\": \"DT_FP32\"\n    })\";\n    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_type_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test unsupported index_type\n  {\n    std::string wrong_type_json = R\"({\n      \"index_type\": \"kNone\",\n      \"metric_type\": \"kL2\",\n      \"dimension\": 64,\n      \"is_sparse\": false,\n      \"data_type\": \"DT_FP32\"\n    })\";\n    auto param = IndexFactory::DeserializeIndexParamFromJson(wrong_type_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test QueryParam deserialization with invalid JSON\n  {\n    std::string invalid_json = \"invalid json\";\n    auto param = IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(\n        invalid_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test QueryParam deserialization with invalid enum\n  {\n    std::string invalid_enum_json = R\"({\n      \"index_type\": \"kInvalidType\",\n      \"topk\": 10,\n      \"fetch_vector\": false,\n      \"radius\": 0.0,\n      \"is_linear\": false\n    })\";\n    auto param = IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(\n        invalid_enum_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test QueryParam deserialization with invalid field type\n  {\n    std::string invalid_type_json = R\"({\n      \"index_type\": \"kFlat\",\n      \"topk\": \"not_a_number\",\n      \"fetch_vector\": false,\n      \"radius\": 0.0,\n      \"is_linear\": false\n    })\";\n    auto param = IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(\n        invalid_type_json);\n    ASSERT_EQ(nullptr, param);\n  }\n\n  // Test HNSWQueryParam deserialization with invalid field type\n  {\n    std::string invalid_type_json = R\"({\n      \"index_type\": \"kHNSW\",\n      \"topk\": 10,\n      \"fetch_vector\": false,\n      \"radius\": 0.0,\n      \"is_linear\": false,\n      \"ef_search\": \"not_a_number\"\n    })\";\n    auto param = IndexFactory::QueryParamDeserializeFromJson<HNSWQueryParam>(\n        invalid_type_json);\n    ASSERT_EQ(nullptr, param);\n  }\n}\n\nTEST(IndexInterface, Score) {\n  const std::string index_file_path = \"test_indexer.index\";\n  const int kTopk = 10;\n  constexpr uint32_t kDocId1 = 2345;\n  constexpr uint32_t kDocId2 = 5432;\n  auto vector1 = std::vector<float>{3.0f, 4.0f, 5.0f};\n  auto vector2 = std::vector<float>{1.0f, 20.0f, 3.0f};\n  auto vector_id_map = std::unordered_map<uint32_t, std::vector<float>>{\n      {kDocId1, vector1},\n      {kDocId2, vector2},\n  };\n  auto sparse_indices = std::vector<uint32_t>{0, 1, 2};\n  auto query_vector = std::vector<float>{1.0f, 2.0f, 3.0f};\n\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n  system(cmd_buf);\n\n  auto check_score = [&](const SearchResult &result, MetricType metric_type) {\n    ASSERT_EQ(result.doc_list_.size(), 2);\n\n    auto inner_produce_score_func = [&](const std::vector<float> &v1,\n                                        const std::vector<float> &v2) {\n      return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2];\n    };\n\n    auto cosine_score_func = [&](const std::vector<float> &v1,\n                                 const std::vector<float> &v2) {\n      return 1 - inner_produce_score_func(v1, v2) /\n                     (std::sqrt(inner_produce_score_func(v1, v1)) *\n                      std::sqrt(inner_produce_score_func(v2, v2)));\n    };\n\n    // SquaredEuclidean\n    auto l2_score_func = [&](const std::vector<float> &v1,\n                             const std::vector<float> &v2) {\n      assert(v1.size() == 3);\n      assert(v2.size() == 3);\n      float ret = 0.0f;\n      for (int i = 0; i < v1.size(); ++i) {\n        ret += (v1[i] - v2[i]) * (v1[i] - v2[i]);\n      }\n      return ret;\n    };\n\n    std::function<float(const std::vector<float> &, const std::vector<float> &)>\n        score_func;\n\n    switch (metric_type) {\n      case MetricType::kInnerProduct:\n        score_func = inner_produce_score_func;\n        break;\n      case MetricType::kCosine:\n        score_func = cosine_score_func;\n        break;\n      case MetricType::kL2sq:\n        score_func = l2_score_func;\n        break;\n      default:\n        ASSERT_TRUE(false);\n    }\n\n    // Iterate over doc_list_ and check scores\n    ASSERT_GE(result.doc_list_.size(), 2);\n    printf(\"result.doc_list_[0].score() top1: %f\\n\",\n           result.doc_list_[0].score());\n    printf(\n        \"score_func(vector_id_map[result.doc_list_[0].key()], query_vector): \"\n        \"%f\\n\",\n        score_func(vector_id_map[result.doc_list_[0].key()], query_vector));\n    ASSERT_TRUE(std::abs(result.doc_list_[0].score() -\n                         score_func(vector_id_map[result.doc_list_[0].key()],\n                                    query_vector)) < 1e-2);\n    printf(\"result.doc_list_[1].score() top2: %f\\n\",\n           result.doc_list_[1].score());\n    printf(\n        \"score_func(vector_id_map[result.doc_list_[1].key()], query_vector): \"\n        \"%f\\n\",\n        score_func(vector_id_map[result.doc_list_[1].key()], query_vector));\n    ASSERT_TRUE(std::abs(result.doc_list_[1].score() -\n                         score_func(vector_id_map[result.doc_list_[1].key()],\n                                    query_vector)) < 1e-2);\n  };\n\n  auto dense_func = [&](const BaseIndexParam::Pointer &param,\n                        const BaseIndexQueryParam::Pointer query_param,\n                        MetricType metric_type) {\n    system(cmd_buf);\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    index->Open(index_file_path, {StorageOptions::StorageType::kMMAP, true});\n\n    VectorData vector_data1;\n    vector_data1.vector = DenseVector{vector1.data()};\n    ASSERT_EQ(0, index->Add(vector_data1, kDocId1));\n\n    VectorData vector_data2;\n    vector_data2.vector = DenseVector{vector2.data()};\n    ASSERT_EQ(0, index->Add(vector_data2, kDocId2));\n\n    SearchResult result;\n    VectorData query;\n    query.vector = DenseVector{query_vector.data()};\n    index->Search(query, query_param, &result);\n\n    check_score(result, metric_type);\n\n    index->Close();\n    system(cmd_buf);\n  };\n\n  auto sparse_func = [&](const BaseIndexParam::Pointer &param,\n                         const BaseIndexQueryParam::Pointer query_param,\n                         MetricType metric_type) {\n    system(cmd_buf);\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    index->Open(index_file_path, {StorageOptions::StorageType::kMMAP, true});\n\n    VectorData vector_data1;\n    vector_data1.vector =\n        SparseVector{3, reinterpret_cast<const void *>(sparse_indices.data()),\n                     vector1.data()};\n    ASSERT_EQ(0, index->Add(vector_data1, kDocId1));\n\n    VectorData vector_data2;\n    vector_data2.vector =\n        SparseVector{3, reinterpret_cast<const void *>(sparse_indices.data()),\n                     vector2.data()};\n    ASSERT_EQ(0, index->Add(vector_data2, kDocId2));\n\n    SearchResult result;\n    VectorData query;\n    query.vector =\n        SparseVector{3, reinterpret_cast<const void *>(sparse_indices.data()),\n                     query_vector.data()};\n    index->Search(query, query_param, &result);\n\n    check_score(result, metric_type);\n\n    index->Close();\n    system(cmd_buf);\n  };\n\n  constexpr uint32_t kDimension = 3;\n\n  LOG_INFO(\"Test DenseVector, MetricType::kInnerProduct\");\n  dense_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kInnerProduct)\n          .WithDataType(DataType::DT_FP32)\n          .WithDimension(kDimension)\n          .WithIsSparse(false)\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kInnerProduct);\n  dense_func(HNSWIndexParamBuilder()\n                 .WithMetricType(MetricType::kInnerProduct)\n                 .WithDataType(DataType::DT_FP32)\n                 .WithDimension(kDimension)\n                 .WithIsSparse(false)\n                 .WithEFConstruction(100)\n                 .Build(),\n             HNSWQueryParamBuilder()\n                 .with_topk(kTopk)\n                 .with_fetch_vector(true)\n                 .with_ef_search(20)\n                 .build(),\n             MetricType::kInnerProduct);\n\n  LOG_INFO(\"Test DenseVector, MetricType::kInnerProduct, QuantizerType::kFP16\");\n  dense_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kInnerProduct)\n          .WithDataType(DataType::DT_FP32)\n          .WithDimension(kDimension)\n          .WithIsSparse(false)\n          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kInnerProduct);\n  dense_func(HNSWIndexParamBuilder()\n                 .WithMetricType(MetricType::kInnerProduct)\n                 .WithDataType(DataType::DT_FP32)\n                 .WithDimension(kDimension)\n                 .WithIsSparse(false)\n                 .WithEFConstruction(100)\n                 .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n                 .Build(),\n             HNSWQueryParamBuilder()\n                 .with_topk(kTopk)\n                 .with_fetch_vector(true)\n                 .with_ef_search(20)\n                 .build(),\n             MetricType::kInnerProduct);\n\n  LOG_INFO(\"Test DenseVector, MetricType::kCosine\");\n  dense_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kCosine)\n          .WithDataType(DataType::DT_FP32)\n          .WithDimension(kDimension)\n          .WithIsSparse(false)\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kCosine);\n  dense_func(HNSWIndexParamBuilder()\n                 .WithMetricType(MetricType::kCosine)\n                 .WithDataType(DataType::DT_FP32)\n                 .WithDimension(kDimension)\n                 .WithIsSparse(false)\n                 .WithEFConstruction(100)\n                 .Build(),\n             HNSWQueryParamBuilder()\n                 .with_topk(kTopk)\n                 .with_fetch_vector(true)\n                 .with_ef_search(20)\n                 .build(),\n             MetricType::kCosine);\n\n  LOG_INFO(\"Test DenseVector, MetricType::kCosine, QuantizerType::kFP16\");\n  dense_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kCosine)\n          .WithDataType(DataType::DT_FP32)\n          .WithDimension(kDimension)\n          .WithIsSparse(false)\n          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kCosine);\n  dense_func(HNSWIndexParamBuilder()\n                 .WithMetricType(MetricType::kCosine)\n                 .WithDataType(DataType::DT_FP32)\n                 .WithDimension(kDimension)\n                 .WithIsSparse(false)\n                 .WithEFConstruction(100)\n                 .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n                 .Build(),\n             HNSWQueryParamBuilder()\n                 .with_topk(kTopk)\n                 .with_fetch_vector(true)\n                 .with_ef_search(20)\n                 .build(),\n             MetricType::kCosine);\n\n  LOG_INFO(\"Test DenseVector, MetricType::kL2sq\");\n  dense_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kL2sq)\n          .WithDataType(DataType::DT_FP32)\n          .WithDimension(kDimension)\n          .WithIsSparse(false)\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kL2sq);\n  dense_func(HNSWIndexParamBuilder()\n                 .WithMetricType(MetricType::kL2sq)\n                 .WithDataType(DataType::DT_FP32)\n                 .WithDimension(kDimension)\n                 .WithIsSparse(false)\n                 .WithEFConstruction(100)\n                 .Build(),\n             HNSWQueryParamBuilder()\n                 .with_topk(kTopk)\n                 .with_fetch_vector(true)\n                 .with_ef_search(20)\n                 .build(),\n             MetricType::kL2sq);\n\n  LOG_INFO(\"Test DenseVector, MetricType::kL2sq, QuantizerType::kFP16\");\n  dense_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kL2sq)\n          .WithDataType(DataType::DT_FP32)\n          .WithDimension(kDimension)\n          .WithIsSparse(false)\n          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kL2sq);\n  dense_func(HNSWIndexParamBuilder()\n                 .WithMetricType(MetricType::kL2sq)\n                 .WithDataType(DataType::DT_FP32)\n                 .WithDimension(kDimension)\n                 .WithIsSparse(false)\n                 .WithEFConstruction(100)\n                 .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n                 .Build(),\n             HNSWQueryParamBuilder()\n                 .with_topk(kTopk)\n                 .with_fetch_vector(true)\n                 .with_ef_search(20)\n                 .build(),\n             MetricType::kL2sq);\n\n  LOG_INFO(\"Test SparseVector, MetricType::kInnerProduct\");\n  sparse_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kInnerProduct)\n          .WithDataType(DataType::DT_FP32)\n          .WithIsSparse(true)\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kInnerProduct);\n  sparse_func(HNSWIndexParamBuilder()\n                  .WithMetricType(MetricType::kInnerProduct)\n                  .WithDataType(DataType::DT_FP32)\n                  .WithIsSparse(true)\n                  .WithEFConstruction(100)\n                  .Build(),\n              HNSWQueryParamBuilder()\n                  .with_topk(kTopk)\n                  .with_fetch_vector(true)\n                  .with_ef_search(20)\n                  .build(),\n              MetricType::kInnerProduct);\n\n  LOG_INFO(\n      \"Test SparseVector, MetricType::kInnerProduct, QuantizerType::kFP16\");\n  sparse_func(\n      FlatIndexParamBuilder()\n          .WithMetricType(MetricType::kInnerProduct)\n          .WithDataType(DataType::DT_FP32)\n          .WithIsSparse(true)\n          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n          .Build(),\n      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),\n      MetricType::kInnerProduct);\n  sparse_func(HNSWIndexParamBuilder()\n                  .WithMetricType(MetricType::kInnerProduct)\n                  .WithDataType(DataType::DT_FP32)\n                  .WithIsSparse(true)\n                  .WithEFConstruction(100)\n                  .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))\n                  .Build(),\n              HNSWQueryParamBuilder()\n                  .with_topk(kTopk)\n                  .with_fetch_vector(true)\n                  .with_ef_search(20)\n                  .build(),\n              MetricType::kInnerProduct);\n}\n\n#if RABITQ_SUPPORTED\nTEST(IndexInterface, HNSWRabitqGeneral) {\n  constexpr uint32_t kDimension = 64;\n  const std::string index_name{\"test_rabitq.index\"};\n  char cmd_buf[256];\n  snprintf(cmd_buf, sizeof(cmd_buf), \"rm -f %s*\", index_name.c_str());\n\n  auto func = [&](const BaseIndexParam::Pointer &param,\n                  const BaseIndexQueryParam::Pointer &query_param) {\n    system(cmd_buf);\n    auto index = IndexFactory::CreateAndInitIndex(*param);\n    ASSERT_NE(nullptr, index);\n\n    index->Open(index_name, {StorageOptions::StorageType::kMMAP, true});\n\n    std::vector<float> vector(kDimension);\n    vector[1] = 1.0f;\n    vector[2] = 2.0f;\n    VectorData vector_data;\n    vector_data.vector = DenseVector{vector.data()};\n    ASSERT_TRUE(0 == index->Add(vector_data, 233));\n    ASSERT_TRUE(0 == index->Train());\n\n    SearchResult result;\n    VectorData query;\n    query.vector = DenseVector{vector.data()};\n    index->Search(query, query_param, &result);\n    ASSERT_EQ(1, result.doc_list_.size());\n    ASSERT_EQ(233, result.doc_list_[0].key());\n\n    // Fetch is meaningless for HNSWRabitq\n    index->Close();\n    system(cmd_buf);\n  };\n\n  using namespace zvec::core;\n  using namespace zvec::ailego;\n  auto holder = std::make_shared<\n      zvec::core::MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(\n      kDimension);\n  size_t doc_cnt = 500UL;\n  for (size_t i = 0; i < doc_cnt; i++) {\n    NumericalVector<float> vec(kDimension);\n    for (size_t j = 0; j < kDimension; ++j) {\n      vec[j] = static_cast<float>(i);\n    }\n    ASSERT_TRUE(holder->emplace(i, vec));\n  }\n  std::shared_ptr<IndexMeta> index_meta_ptr_;\n  index_meta_ptr_.reset(\n      new (std::nothrow) IndexMeta(IndexMeta::DataType::DT_FP32, kDimension));\n  index_meta_ptr_->set_metric(\"SquaredEuclidean\", 0, Params());\n\n  RabitqConverter converter;\n  converter.init(*index_meta_ptr_, Params());\n  ASSERT_EQ(converter.train(holder), 0);\n  std::shared_ptr<IndexReformer> index_reformer;\n  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);\n\n  // HNSWRabitq with default total_bits\n  func(HNSWRabitqIndexParamBuilder()\n           .WithMetricType(MetricType::kL2sq)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithEFConstruction(100)\n           .WithProvider(holder)\n           .WithReformer(index_reformer)\n           .Build(),\n       HNSWRabitqQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(false)\n           .with_ef_search(50)\n           .build());\n\n  // HNSWRabitq with InnerProduct metric\n  func(HNSWRabitqIndexParamBuilder()\n           .WithMetricType(MetricType::kInnerProduct)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithEFConstruction(100)\n           .WithProvider(holder)\n           .WithReformer(index_reformer)\n           .Build(),\n       HNSWRabitqQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(false)\n           .with_ef_search(50)\n           .build());\n\n  // HNSWRabitq with custom total_bits\n  func(HNSWRabitqIndexParamBuilder()\n           .WithMetricType(MetricType::kL2sq)\n           .WithDataType(DataType::DT_FP32)\n           .WithDimension(kDimension)\n           .WithIsSparse(false)\n           .WithEFConstruction(100)\n           .WithTotalBits(2)\n           .WithProvider(holder)\n           .WithReformer(index_reformer)\n           .Build(),\n       HNSWRabitqQueryParamBuilder()\n           .with_topk(10)\n           .with_fetch_vector(false)\n           .with_ef_search(50)\n           .build());\n}\n#endif\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/core/metric/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_metric core_quantizer\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core/\n    )\nendforeach()"
  },
  {
    "path": "tests/core/metric/cosine_metric_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <ailego/math/norm_matrix.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/float_helper.h>\n#include \"zvec/core/framework/index_factory.h\"\n\n\nusing namespace zvec;\nusing namespace zvec::core;\nusing namespace zvec::ailego;\n\nstatic void Norm2(std::vector<Float16> &vec, std::string *out) {\n  float norm = 0.0f;\n\n  out->resize(vec.size() * sizeof(Float16) + sizeof(float));\n\n  Norm2Matrix<Float16, 1>::Compute(vec.data(), vec.size(), &norm);\n\n  Float16 *buf = reinterpret_cast<Float16 *>(&(*out)[0]);\n\n  for (uint32_t i = 0; i < vec.size(); ++i) {\n    buf[i] = vec[i] / norm;\n  }\n\n  float *norm_buf =\n      reinterpret_cast<float *>(&(*out)[vec.size() * sizeof(Float16)]);\n\n  memcpy(norm_buf, &norm, sizeof(float));\n}\n\nstatic void Norm2(std::vector<float> &vec, std::string *out) {\n  float norm = 0.0f;\n\n  out->resize((vec.size() + 1) * sizeof(float));\n\n  Norm2Matrix<float, 1>::Compute(vec.data(), vec.size(), &norm);\n\n  float *buf = reinterpret_cast<float *>(&(*out)[0]);\n  for (uint32_t i = 0; i < vec.size(); ++i) {\n    buf[i] = vec[i] / norm;\n  }\n\n  buf[vec.size()] = norm;\n}\n\nstatic size_t ExtraDimension(IndexMeta::DataType type) {\n  // The extra quantized params storage size to save for each vector\n  if (type == IndexMeta::DT_FP32) return 1;\n  if (type == IndexMeta::DT_FP16) return 2;\n\n  return 0;\n}\n\nTEST(CosineMeasure_General_Test, General) {\n  auto measure = IndexFactory::CreateMetric(\"Cosine\");\n  EXPECT_TRUE(measure);\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DT_INT16, 64);\n  ASSERT_NE(0, measure->init(meta, Params()));\n  meta.set_meta(IndexMeta::DT_FP16, 64);\n  ASSERT_EQ(0, measure->init(meta, Params()));\n  meta.set_meta(IndexMeta::DT_FP32, 64);\n  ASSERT_EQ(0, measure->init(meta, Params()));\n  meta.set_meta(IndexMeta::DT_INT8, 64);\n  ASSERT_NE(0, measure->init(meta, Params()));\n\n  meta.set_meta(IndexMeta::DT_BINARY32, 64);\n  ASSERT_NE(0, measure->init(meta, Params()));\n  meta.set_meta(IndexMeta::DT_BINARY64, 64);\n  ASSERT_NE(0, measure->init(meta, Params()));\n  meta.set_meta(IndexMeta::DT_INT4, 64);\n  ASSERT_NE(0, measure->init(meta, Params()));\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DT_BINARY32, 64);\n  EXPECT_FALSE(measure->is_matched(meta2));\n  EXPECT_TRUE(\n      measure->is_matched(meta, IndexQueryMeta(IndexMeta::DT_FP32, 64)));\n  EXPECT_FALSE(\n      measure->is_matched(meta, IndexQueryMeta(IndexMeta::DT_FP32, 63)));\n\n  EXPECT_FALSE(measure->distance_matrix(0, 0));\n  EXPECT_FALSE(measure->distance_matrix(3, 5));\n  EXPECT_FALSE(measure->distance_matrix(31, 65));\n  EXPECT_TRUE(measure->distance_matrix(1, 1));\n  EXPECT_FALSE(measure->distance_matrix(2, 1));\n  EXPECT_FALSE(measure->distance_matrix(2, 2));\n  EXPECT_FALSE(measure->distance_matrix(4, 1));\n  EXPECT_FALSE(measure->distance_matrix(4, 2));\n  EXPECT_FALSE(measure->distance_matrix(4, 4));\n  EXPECT_FALSE(measure->distance_matrix(8, 1));\n  EXPECT_FALSE(measure->distance_matrix(8, 2));\n  EXPECT_FALSE(measure->distance_matrix(8, 4));\n  EXPECT_FALSE(measure->distance_matrix(8, 8));\n  EXPECT_FALSE(measure->distance_matrix(16, 1));\n  EXPECT_FALSE(measure->distance_matrix(16, 2));\n  EXPECT_FALSE(measure->distance_matrix(16, 4));\n  EXPECT_FALSE(measure->distance_matrix(16, 8));\n  EXPECT_FALSE(measure->distance_matrix(16, 16));\n  EXPECT_FALSE(measure->distance_matrix(32, 1));\n  EXPECT_FALSE(measure->distance_matrix(32, 2));\n  EXPECT_FALSE(measure->distance_matrix(32, 4));\n  EXPECT_FALSE(measure->distance_matrix(32, 8));\n  EXPECT_FALSE(measure->distance_matrix(32, 16));\n  EXPECT_FALSE(measure->distance_matrix(32, 32));\n\n  EXPECT_FALSE(measure->support_normalize());\n  float result = 1.0f;\n  measure->normalize(&result);\n  EXPECT_FLOAT_EQ(1.0f, result);\n}\n\nTEST(CosineMeasure_General_Test, TestDistanceFp32) {\n  {\n    constexpr uint32_t dimension = 2;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP32, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto distance = measure->distance();\n    ASSERT_NE(distance, nullptr);\n    auto dist_matrix = measure->distance_matrix(1, 1);\n    ASSERT_NE(dist_matrix, nullptr);\n\n    std::vector<float> a = {0.2f, 0.9f};\n    std::vector<float> b = {0.3f, 0.5f};\n\n    std::string a_out;\n    std::string b_out;\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float result = 0.0f;\n    distance(a_out.data(), b_out.data(),\n             dimension + ExtraDimension(IndexMeta::DT_FP32), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.00001f, std::abs(result - 0.05131668f));\n\n    dist_matrix(a_out.data(), b_out.data(),\n                dimension + ExtraDimension(IndexMeta::DT_FP32), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.00001f, std::abs(result - 0.05131668f));\n  }\n\n  {\n    constexpr uint32_t dimension = 3;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP32, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto distance = measure->distance();\n    ASSERT_NE(distance, nullptr);\n    auto dist_matrix = measure->distance_matrix(1, 1);\n    ASSERT_NE(dist_matrix, nullptr);\n\n    std::vector<float> a = {0.2f, 0.9f, 0.6f};\n    std::vector<float> b = {0.3f, 0.5f, 0.7f};\n\n    std::string a_out;\n    std::string b_out;\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float result = 0.0f;\n    distance(a_out.data(), b_out.data(),\n             dimension + ExtraDimension(IndexMeta::DT_FP32), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.00001f, std::abs(result - 0.07199293f));\n\n    dist_matrix(a_out.data(), b_out.data(),\n                dimension + ExtraDimension(IndexMeta::DT_FP32), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.00001f, std::abs(result - 0.07199293f));\n  }\n\n  {\n    constexpr uint32_t dimension = 11;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP32, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto distance = measure->distance();\n    ASSERT_NE(distance, nullptr);\n    auto dist_matrix = measure->distance_matrix(1, 1);\n    ASSERT_NE(dist_matrix, nullptr);\n\n    std::vector<float> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                            5.2f, 2.1f, 7.1f, 6.8f, 1.2f};\n    std::vector<float> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,\n                            1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n\n\n    std::string a_out;\n    std::string b_out;\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float result = 0.0f;\n    distance(a_out.data(), b_out.data(),\n             dimension + ExtraDimension(IndexMeta::DT_FP32), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.00001f, std::abs(result - 0.2803060f));\n\n    dist_matrix(a_out.data(), b_out.data(),\n                dimension + ExtraDimension(IndexMeta::DT_FP32), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.00001f, std::abs(result - 0.2803060f));\n  }\n}\n\nTEST(CosineMeasure_General_Test, TestDistanceFp16) {\n  {\n    constexpr uint32_t dimension = 2;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP16, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto distance = measure->distance();\n    ASSERT_NE(distance, nullptr);\n    auto dist_matrix = measure->distance_matrix(1, 1);\n    ASSERT_NE(dist_matrix, nullptr);\n\n    std::vector<Float16> a = {0.2f, 0.9f};\n    std::vector<Float16> b = {0.3f, 0.5f};\n\n    std::string a_out;\n    std::string b_out;\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float result = 0.0f;\n    distance(a_out.data(), b_out.data(),\n             dimension + ExtraDimension(IndexMeta::DT_FP16), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.001f, std::abs(result - 0.05131668f));\n\n    dist_matrix(a_out.data(), b_out.data(),\n                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.001f, std::abs(result - 0.05131668f));\n  }\n\n  {\n    constexpr uint32_t dimension = 3;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP16, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto distance = measure->distance();\n    ASSERT_NE(distance, nullptr);\n    auto dist_matrix = measure->distance_matrix(1, 1);\n    ASSERT_NE(dist_matrix, nullptr);\n\n    std::vector<Float16> a = {0.2f, 0.9f, 0.6f};\n    std::vector<Float16> b = {0.3f, 0.5f, 0.7f};\n\n    std::string a_out;\n    std::string b_out;\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float result = 0.0f;\n    distance(a_out.data(), b_out.data(),\n             dimension + ExtraDimension(IndexMeta::DT_FP16), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.001f, std::abs(result - 0.07199293f));\n\n    dist_matrix(a_out.data(), b_out.data(),\n                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.001f, std::abs(result - 0.07199293f));\n  }\n\n  {\n    constexpr uint32_t dimension = 11;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP16, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto distance = measure->distance();\n    ASSERT_NE(distance, nullptr);\n    auto dist_matrix = measure->distance_matrix(1, 1);\n    ASSERT_NE(dist_matrix, nullptr);\n\n    std::vector<Float16> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                              5.2f, 2.1f, 7.1f, 6.8f, 1.2f};\n    std::vector<Float16> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,\n                              1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n\n    std::string a_out;\n    std::string b_out;\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float result = 0.0f;\n    dist_matrix(a_out.data(), b_out.data(),\n                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.001f, std::abs(result - 0.2803060f));\n\n    dist_matrix(a_out.data(), b_out.data(),\n                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&result);\n    }\n\n    EXPECT_GE(0.001f, std::abs(result - 0.2803060f));\n  }\n}\n\nTEST(CosineMeasure_General_Test, TestDistanceBatchFp16Simple) {\n  {\n    constexpr uint32_t dimension = 2;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP16, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto dist_batch = measure->batch_distance();\n    ASSERT_NE(dist_batch, nullptr);\n\n    std::vector<Float16> a = {0.2f, 0.9f};\n    std::vector<Float16> b = {0.3f, 0.5f};\n\n    std::string a_out;\n    std::string b_out;\n\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float results[2] = {0.0f, 0.0f};\n\n    const void *vecs[2];\n    vecs[0] = a_out.data();\n    vecs[1] = b_out.data();\n    dist_batch(vecs, b_out.data(), 2,\n               dimension + ExtraDimension(IndexMeta::DT_FP16), results);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&results[0]);\n      measure->normalize(&results[1]);\n    }\n\n    EXPECT_GE(0.001f, std::abs(results[0] - 0.05131668f));\n    EXPECT_GE(0.001f, std::abs(results[1] - 0.0f));\n  }\n}\n\nTEST(CosineMeasure_General_Test, TestDistanceBatchFp32Simple) {\n  {\n    constexpr uint32_t dimension = 2;\n    IndexMeta meta;\n    meta.set_meta(IndexMeta::DT_FP32, dimension);\n\n    auto measure = IndexFactory::CreateMetric(\"Cosine\");\n    ASSERT_TRUE(measure);\n    Params params;\n    ASSERT_EQ(0, measure->init(meta, params));\n    ASSERT_EQ(false, measure->support_train());\n\n    auto dist_batch = measure->batch_distance();\n    ASSERT_NE(dist_batch, nullptr);\n\n    std::vector<float> a = {0.2f, 0.9f};\n    std::vector<float> b = {0.3f, 0.5f};\n\n    std::string a_out;\n    std::string b_out;\n\n    Norm2(a, &a_out);\n    Norm2(b, &b_out);\n\n    float results[2] = {0.0f, 0.0f};\n\n    const void *vecs[2];\n    vecs[0] = a_out.data();\n    vecs[1] = b_out.data();\n    dist_batch(vecs, b_out.data(), 2,\n               dimension + ExtraDimension(IndexMeta::DT_FP32), results);\n\n    if (measure->support_normalize()) {\n      measure->normalize(&results[0]);\n      measure->normalize(&results[1]);\n    }\n\n    EXPECT_GE(0.00001f, std::abs(results[0] - 0.05131668f));\n    EXPECT_GE(0.00001f, std::abs(results[1] - 0.0f));\n  }\n}\n\ntemplate <typename T>\nvoid calculate_distance(std::vector<T> &a, std::vector<T> &b, size_t dimension,\n                        IndexMeta::DataType data_type, size_t batch_size,\n                        float expected_distance, float epsilon = 0.00001f) {\n  IndexMeta meta;\n  meta.set_meta(data_type, dimension);\n\n  auto measure = IndexFactory::CreateMetric(\"Cosine\");\n  ASSERT_TRUE(measure);\n  Params params;\n  ASSERT_EQ(0, measure->init(meta, params));\n  ASSERT_EQ(false, measure->support_train());\n\n  auto dist_batch = measure->batch_distance();\n  ASSERT_NE(dist_batch, nullptr);\n\n  std::string a_out;\n  std::string b_out;\n\n  Norm2(a, &a_out);\n  Norm2(b, &b_out);\n\n  float results[2] = {0.0f, 0.0f};\n\n  const void *vecs[2];\n  vecs[0] = a_out.data();\n  vecs[1] = b_out.data();\n  dist_batch(vecs, b_out.data(), batch_size,\n             dimension + ExtraDimension(data_type), results);\n\n  if (measure->support_normalize()) {\n    measure->normalize(&results[0]);\n    measure->normalize(&results[1]);\n  }\n\n  EXPECT_GE(epsilon, std::abs(results[0] - expected_distance));\n  EXPECT_GE(epsilon, std::abs(results[1] - 0.0f));\n}\n\n\nTEST(CosineMeasure_General_Test, TestDistanceBatch) {\n  {\n    constexpr uint32_t dimension = 2;\n\n    {\n      std::vector<float> a = {0.2f, 0.9f};\n      std::vector<float> b = {0.3f, 0.5f};\n\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 1, 0.05131668f,\n                         0.00001f);\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 2, 0.05131668f,\n                         0.00001f);\n    }\n    {\n      std::vector<Float16> a = {0.2f, 0.9f};\n      std::vector<Float16> b = {0.3f, 0.5f};\n\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 1, 0.05131668f,\n                         0.001f);\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 2, 0.05131668f,\n                         0.001f);\n    }\n  }\n\n  {\n    constexpr uint32_t dimension = 3;\n\n\n    {\n      std::vector<float> a = {0.2f, 0.9f, 0.6f};\n      std::vector<float> b = {0.3f, 0.5f, 0.7f};\n\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 1, 0.07199293f,\n                         0.00001f);\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 2, 0.07199293f,\n                         0.00001f);\n    }\n    {\n      std::vector<Float16> a = {0.2f, 0.9f, 0.6f};\n      std::vector<Float16> b = {0.3f, 0.5f, 0.7f};\n\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 1, 0.07199293f,\n                         0.001f);\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 2, 0.07199293f,\n                         0.001f);\n    }\n  }\n\n  {\n    constexpr uint32_t dimension = 11;\n\n    {\n      std::vector<float> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                              5.2f, 2.1f, 7.1f, 6.8f, 1.2f};\n      std::vector<float> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,\n                              1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 1, 0.2803060f,\n                         0.00001f);\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 2, 0.2803060f,\n                         0.00001f);\n    }\n\n    {\n      std::vector<Float16> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,\n                                5.2f, 2.1f, 7.1f, 6.8f, 1.2f};\n      std::vector<Float16> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,\n                                1.0f, 2.3f, 3.4f, 4.5f, 6.4f};\n\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 1, 0.2803060f,\n                         0.001f);\n      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 2, 0.2803060f,\n                         0.001f);\n    }\n  }\n}\n"
  },
  {
    "path": "tests/core/metric/euclidean_metric_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_factory.h\"\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nTEST(SquaredEuclideanMetric, General) {\n  auto metric = IndexFactory::CreateMetric(\"SquaredEuclidean\");\n  EXPECT_TRUE(metric);\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_INT16, 64);\n  ASSERT_NE(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_INT4, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  EXPECT_TRUE(metric->is_matched(meta));\n  EXPECT_FALSE(metric->is_matched(meta2));\n  EXPECT_TRUE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 64)));\n  EXPECT_FALSE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 63)));\n\n  EXPECT_FALSE(metric->distance_matrix(0, 0));\n  EXPECT_FALSE(metric->distance_matrix(3, 5));\n  EXPECT_FALSE(metric->distance_matrix(31, 65));\n  EXPECT_TRUE(metric->distance_matrix(1, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 1));\n  EXPECT_TRUE(metric->distance_matrix(4, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 1));\n  EXPECT_TRUE(metric->distance_matrix(8, 2));\n  EXPECT_TRUE(metric->distance_matrix(8, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 8));\n  EXPECT_FALSE(metric->distance_matrix(8, 32));\n  EXPECT_FALSE(metric->distance_matrix(8, 9));\n  EXPECT_TRUE(metric->distance_matrix(16, 1));\n  EXPECT_TRUE(metric->distance_matrix(16, 2));\n  EXPECT_TRUE(metric->distance_matrix(16, 4));\n  EXPECT_TRUE(metric->distance_matrix(16, 8));\n  EXPECT_TRUE(metric->distance_matrix(16, 16));\n  EXPECT_FALSE(metric->distance_matrix(16, 17));\n  EXPECT_TRUE(metric->distance_matrix(32, 1));\n  EXPECT_TRUE(metric->distance_matrix(32, 2));\n  EXPECT_TRUE(metric->distance_matrix(32, 4));\n  EXPECT_TRUE(metric->distance_matrix(32, 8));\n  EXPECT_TRUE(metric->distance_matrix(32, 16));\n  EXPECT_TRUE(metric->distance_matrix(32, 32));\n\n  EXPECT_FALSE(metric->support_normalize());\n  float result = 1.0f;\n  metric->normalize(&result);\n  EXPECT_FLOAT_EQ(1.0f, result);\n}\n\nTEST(EuclideanMetric, General) {\n  auto metric = IndexFactory::CreateMetric(\"Euclidean\");\n  EXPECT_TRUE(metric);\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_INT16, 64);\n  ASSERT_NE(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_INT4, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  EXPECT_TRUE(metric->is_matched(meta));\n  EXPECT_FALSE(metric->is_matched(meta2));\n  EXPECT_TRUE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 64)));\n  EXPECT_FALSE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 63)));\n\n  EXPECT_FALSE(metric->distance_matrix(0, 0));\n  EXPECT_FALSE(metric->distance_matrix(3, 5));\n  EXPECT_FALSE(metric->distance_matrix(31, 65));\n  EXPECT_TRUE(metric->distance_matrix(1, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 1));\n  EXPECT_TRUE(metric->distance_matrix(4, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 1));\n  EXPECT_TRUE(metric->distance_matrix(8, 2));\n  EXPECT_TRUE(metric->distance_matrix(8, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 8));\n  EXPECT_TRUE(metric->distance_matrix(16, 1));\n  EXPECT_TRUE(metric->distance_matrix(16, 2));\n  EXPECT_TRUE(metric->distance_matrix(16, 4));\n  EXPECT_TRUE(metric->distance_matrix(16, 8));\n  EXPECT_TRUE(metric->distance_matrix(16, 16));\n  EXPECT_TRUE(metric->distance_matrix(32, 1));\n  EXPECT_TRUE(metric->distance_matrix(32, 2));\n  EXPECT_TRUE(metric->distance_matrix(32, 4));\n  EXPECT_TRUE(metric->distance_matrix(32, 8));\n  EXPECT_TRUE(metric->distance_matrix(32, 16));\n  EXPECT_TRUE(metric->distance_matrix(32, 32));\n\n  EXPECT_FALSE(metric->support_normalize());\n  float result = 1.0f;\n  metric->normalize(&result);\n  EXPECT_FLOAT_EQ(1.0f, result);\n}\n"
  },
  {
    "path": "tests/core/metric/hamming_metric_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_factory.h\"\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nTEST(HammingMetric, General) {\n  auto metric = IndexFactory::CreateMetric(\"Hamming\");\n  ASSERT_TRUE(metric);\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);\n  ASSERT_NE(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);\n  ASSERT_NE(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);\n  ASSERT_NE(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_INT8, 64);\n  EXPECT_TRUE(metric->is_matched(meta));\n  EXPECT_FALSE(metric->is_matched(meta2));\n  EXPECT_TRUE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_BINARY32, 64)));\n  EXPECT_FALSE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_BINARY32, 63)));\n\n  EXPECT_FALSE(metric->distance_matrix(0, 0));\n  EXPECT_FALSE(metric->distance_matrix(3, 5));\n  EXPECT_FALSE(metric->distance_matrix(31, 65));\n  EXPECT_TRUE(metric->distance_matrix(1, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 1));\n  EXPECT_TRUE(metric->distance_matrix(4, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 1));\n  EXPECT_TRUE(metric->distance_matrix(8, 2));\n  EXPECT_TRUE(metric->distance_matrix(8, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 8));\n  EXPECT_TRUE(metric->distance_matrix(16, 1));\n  EXPECT_TRUE(metric->distance_matrix(16, 2));\n  EXPECT_TRUE(metric->distance_matrix(16, 4));\n  EXPECT_TRUE(metric->distance_matrix(16, 8));\n  EXPECT_TRUE(metric->distance_matrix(16, 16));\n  EXPECT_TRUE(metric->distance_matrix(32, 1));\n  EXPECT_TRUE(metric->distance_matrix(32, 2));\n  EXPECT_TRUE(metric->distance_matrix(32, 4));\n  EXPECT_TRUE(metric->distance_matrix(32, 8));\n  EXPECT_TRUE(metric->distance_matrix(32, 16));\n  EXPECT_TRUE(metric->distance_matrix(32, 32));\n\n  EXPECT_FALSE(metric->support_normalize());\n  float result = 1.0f;\n  metric->normalize(&result);\n  EXPECT_FLOAT_EQ(1.0f, result);\n}"
  },
  {
    "path": "tests/core/metric/inner_product_metric_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_factory.h\"\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nTEST(InnerProductMetric, General) {\n  auto metric = IndexFactory::CreateMetric(\"InnerProduct\");\n  ASSERT_TRUE(metric);\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  ASSERT_NE(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);\n  ASSERT_NE(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_INT4, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);\n  ASSERT_EQ(0, metric->init(meta, ailego::Params()));\n\n  IndexMeta meta2;\n  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, 64);\n  EXPECT_TRUE(metric->is_matched(meta));\n  EXPECT_FALSE(metric->is_matched(meta2));\n  EXPECT_TRUE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 64)));\n  EXPECT_FALSE(metric->is_matched(\n      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 63)));\n\n  EXPECT_FALSE(metric->distance_matrix(0, 0));\n  EXPECT_FALSE(metric->distance_matrix(3, 5));\n  EXPECT_FALSE(metric->distance_matrix(31, 65));\n  EXPECT_TRUE(metric->distance_matrix(1, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 1));\n  EXPECT_TRUE(metric->distance_matrix(2, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 1));\n  EXPECT_TRUE(metric->distance_matrix(4, 2));\n  EXPECT_TRUE(metric->distance_matrix(4, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 1));\n  EXPECT_TRUE(metric->distance_matrix(8, 2));\n  EXPECT_TRUE(metric->distance_matrix(8, 4));\n  EXPECT_TRUE(metric->distance_matrix(8, 8));\n  EXPECT_TRUE(metric->distance_matrix(16, 1));\n  EXPECT_TRUE(metric->distance_matrix(16, 2));\n  EXPECT_TRUE(metric->distance_matrix(16, 4));\n  EXPECT_TRUE(metric->distance_matrix(16, 8));\n  EXPECT_TRUE(metric->distance_matrix(16, 16));\n  EXPECT_TRUE(metric->distance_matrix(32, 1));\n  EXPECT_TRUE(metric->distance_matrix(32, 2));\n  EXPECT_TRUE(metric->distance_matrix(32, 4));\n  EXPECT_TRUE(metric->distance_matrix(32, 8));\n  EXPECT_TRUE(metric->distance_matrix(32, 16));\n  EXPECT_TRUE(metric->distance_matrix(32, 32));\n\n  EXPECT_TRUE(metric->support_normalize());\n  float result = 1.0f;\n  metric->normalize(&result);\n  EXPECT_FLOAT_EQ(-1.0f, result);\n}"
  },
  {
    "path": "tests/core/metric/quantized_integer_metric_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <fstream>\n#include <iostream>\n#include <unordered_set>\n#include <ailego/math/distance.h>\n#include <ailego/math/norm_matrix.h>\n#include <ailego/math/normalizer.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include <zvec/ailego/utility/float_helper.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_flow.h>\n#include \"core/quantizer/quantizer_params.h\"\n#include \"zvec/core/framework/index_factory.h\"\n\n\nusing namespace zvec;\nusing namespace zvec::core;\nusing namespace zvec::ailego;\n\nstatic IndexHolder::Pointer GetHolder(\n    size_t dim, size_t count, std::uniform_real_distribution<float> &dist) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);\n  for (size_t i = 0; i < count; ++i) {\n    ailego::NumericalVector<float> vec(dim);\n    for (size_t j = 0; j < dim; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n  }\n  return holder;\n}\n\nstatic inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,\n                                   size_t N) {\n  for (size_t n = 0; n < N * M; n++) {\n    size_t i = n / N;\n    size_t j = n % N;\n    dst[n] = src[M * j + i];\n  }\n}\n\n//! Test whether two floating point numbers are equal\ntemplate <class T>\nstatic inline auto IsAlmostEqual(const T &x, const T &y, int ulp) ->\n    typename std::enable_if<std::is_floating_point<T>::value, bool>::type {\n  // the machine epsilon has to be scaled to the magnitude of the values used\n  // and multiplied by the desired precision in ULPs (units in the last place)\n  return ((std::fabs(x - y) <=\n           std::numeric_limits<T>::epsilon() * std::fabs(x + y) * ulp) ||\n          (std::fabs(x - y) < std::numeric_limits<T>::min()));\n}\n\nTEST(QuantizedIntegerMetric, General) {\n  auto metric = IndexFactory::CreateMetric(\"MipsSquaredEuclidean\");\n  ASSERT_TRUE(metric);\n\n  Params params;\n\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n  const size_t DIMENSION = 21;\n  ailego::NumericalVector<float> x(DIMENSION);\n  ailego::NumericalVector<float> X(DIMENSION);\n  ailego::NumericalVector<float> y(DIMENSION);\n  ailego::NumericalVector<float> Y(DIMENSION);\n  float xa = dist(gen);\n  float xb = dist(gen);\n  float ya = dist(gen);\n  float yb = dist(gen);\n  float x2 = 0, x1 = 0, y2 = 0, y1 = 0;\n  float X2 = 0;\n  float xx2 = 0;\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    x[j] = dist(gen);\n    printf(\"%f \", x[j]);\n    X[j] = x[j] * xa + xb;\n    x1 += x[j];\n    X2 += X[j] * X[j];\n    xx2 += x[j] * x[j];\n  }\n  printf(\"\\n\");\n\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    y[j] = dist(gen);\n    Y[j] = y[j] * ya + yb;\n    y1 += y[j];\n    printf(\"%f \", y[j]);\n  }\n  printf(\"\\n\");\n\n  auto v1 = ailego::Distance::SquaredEuclidean(X.data(), Y.data(), DIMENSION);\n  auto ip = ailego::Distance::InnerProduct(x.data(), y.data(), DIMENSION);\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(x.data(), DIMENSION, &x2);\n  ailego::SquaredNorm2Matrix<float, 1>::Compute(y.data(), DIMENSION, &y2);\n#if 0\n  ailego::Norm1Matrix<float, 1>::Compute(x.data(), DIMENSION, &x1);\n  ailego::Norm1Matrix<float, 1>::Compute(y.data(), DIMENSION, &y1);\n#endif\n  auto v2 = xa * xa * x2 + ya * ya * y2 - 2 * xa * ya * ip +\n            (xb - yb) * (xb - yb) * DIMENSION +\n            2 * (xb - yb) * (xa * x1 - ya * y1);\n  auto t1 = (xa * x[0] - ya * y[0]) + (xb - yb);\n  auto t2 = (xa * x[1] - ya * y[1]) + (xb - yb);\n  auto v3 = t1 * t1 + t2 * t2;\n  printf(\n      \"x=%f y=%f X=%f Y=%f, xa=%f xb=%f ya=%f yb=%f, x2=%f y2=%f x1=%f y1=%f \"\n      \"ip=%f\\n\",\n      x[0], y[0], X[0], Y[0], xa, xb, ya, yb, x2, y2, x1, y1, ip);\n  printf(\"v1=%f v2=%f v3=%f\\n\", v1, v2, v3);\n\n  auto IP = ailego::Distance::InnerProduct(X.data(), Y.data(), DIMENSION);\n  auto v = xa * ya * ip + xb * ya * y1 + xa * yb * x1 + xb * yb * DIMENSION;\n  printf(\"V=%f %f\\n\", IP, v);\n\n  printf(\"=========\\n\");\n  float mips;\n  ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(\n      X.data(), Y.data(), DIMENSION, 0.0, &mips);\n  printf(\"u2=%f v2=%f\\n\", x2, y2);\n  float uu2 = xa * xa * x2 + 2 * xa * xb * x1 + xb * xb * DIMENSION;\n  float vv2 = ya * ya * y2 + 2 * ya * yb * y1 + yb * yb * DIMENSION;\n  float v7 = 2.0 - 2.0 * v / std::max(uu2, vv2);\n  printf(\"mips=%f v7=%f\\n\", mips, v7);\n  printf(\"X2=%f uu2=%f xx2=%f x2=%f\\n\", X2, uu2, xx2, x2);\n}\n\nTEST(QuantizedIntegerMetric, TestInt8SquaredEuclidean) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);\n  const size_t COUNT = 1000;\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  auto converter = IndexFactory::CreateConverter(\"Int8StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n    float v1 =\n        ailego::Distance::SquaredEuclidean(mf, vec.data(), holder->dimension());\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    // printf(\"%f %f\\n\", v1, v2);\n    ASSERT_NEAR(v1, v2, 0.1 * (DIMENSION + 1));\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt8SquaredEuclideanReformer) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n  std::uniform_int_distribution<int> dist2(0, 1);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);\n  const size_t COUNT = std::uniform_int_distribution<int>(1, 128)(gen);\n  auto reformer = IndexFactory::CreateReformer(\"Int8StreamingReformer\");\n  ASSERT_TRUE(!!reformer);\n  ASSERT_EQ(0u, reformer->init(Params()));\n\n  ailego::NumericalVector<float> vecs(DIMENSION * COUNT);\n  for (size_t j = 0; j < DIMENSION * COUNT; ++j) {\n    vecs[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta1;\n  std::string out;\n  ASSERT_EQ(0,\n            dist2(gen)\n                ? reformer->transform(vecs.data(), qmeta, COUNT, &out, &qmeta1)\n                : reformer->convert(vecs.data(), qmeta, COUNT, &out, &qmeta1));\n\n  std::string out2;\n  IndexQueryMeta qmeta2;\n  for (size_t i = 0; i < COUNT; ++i) {\n    ASSERT_EQ(0,\n              reformer->transform(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));\n    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());\n    ASSERT_EQ(out2.size(), qmeta2.element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],\n                             out2.size()));\n\n    ASSERT_EQ(0,\n              reformer->convert(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));\n    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());\n    ASSERT_EQ(out2.size(), qmeta2.element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],\n                             out2.size()));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestDistanceMatrixInt8(const std::string &metric_name) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen)*4;\n  auto holder = GetHolder(dimension, batch_size, dist);\n  IndexMeta meta(IndexMeta::DT_FP32, dimension);\n  meta.set_metric(metric_name, 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int8StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  auto &meta2 = converter->meta();\n  ASSERT_EQ(dimension + 20, holder2->dimension());\n  size_t matrix_size = batch_size * holder2->dimension();\n  std::vector<int8_t> matrix1(matrix_size);\n  std::vector<int8_t> matrix2(matrix_size);\n  auto iter = holder2->create_iterator();\n  for (size_t i = 0; i < batch_size; ++i, iter->next()) {\n    std::memcpy(&matrix1[i * holder2->dimension()], iter->data(),\n                holder2->element_size());\n  }\n  MatrixTranspose(reinterpret_cast<uint32_t *>(&matrix2[0]),\n                  reinterpret_cast<uint32_t *>(matrix1.data()),\n                  meta2.dimension() / 4, batch_size);\n\n  auto query_holder = GetHolder(dimension, query_size, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, query_holder));\n  auto query_holder2 = converter->result();\n  ASSERT_EQ(dimension + 20, query_holder2->dimension());\n  size_t query_matrix_size = query_size * query_holder2->dimension();\n  std::vector<int8_t> query1(query_matrix_size);\n  std::vector<int8_t> query2(query_matrix_size);\n  auto query_iter = query_holder2->create_iterator();\n  for (size_t i = 0; i < query_size; ++i, query_iter->next()) {\n    std::memcpy(&query1[i * holder2->dimension()], query_iter->data(),\n                query_holder2->element_size());\n  }\n  MatrixTranspose(reinterpret_cast<uint32_t *>(&query2[0]),\n                  reinterpret_cast<uint32_t *>(query1.data()),\n                  meta2.dimension() / 4, query_size);\n\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  auto metric = IndexFactory::CreateMetric(\"QuantizedInteger\");\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0,\n            metric->init(converter->meta(), converter->meta().metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n  auto matrix_compute = metric->distance_matrix(M, N);\n  ASSERT_TRUE(matrix_compute);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const int8_t *cur_query = &query1[i * meta2.dimension()];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      compute(&matrix1[j * meta2.dimension()], cur_query, meta2.dimension(),\n              &query_result[j]);\n    }\n  }\n  matrix_compute(&matrix2[0], &query2[0], meta2.dimension(), &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    // EXPECT_FLOAT_EQ(result1[i], result2[i]);\n    EXPECT_TRUE(IsAlmostEqual(result1[i], result2[i], 1e4));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt8SquaredEuclideanMetric) {\n  TestDistanceMatrixInt8<1, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<2, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<2, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<4, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<4, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<4, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 8>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 8>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 16>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 8>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 16>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 32>(\"SquaredEuclidean\");\n}\n\nTEST(QuantizedIntegerMetric, TestInt4SquaredEuclidean) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;\n  const size_t COUNT = 1000;\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  auto converter = IndexFactory::CreateConverter(\"Int4StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n    float v1 =\n        ailego::Distance::SquaredEuclidean(mf, vec.data(), holder->dimension());\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt4SquaredEuclideanReformer) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n  std::uniform_int_distribution<int> dist2(0, 1);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;\n  const size_t COUNT = std::uniform_int_distribution<int>(1, 128)(gen);\n  auto reformer = IndexFactory::CreateReformer(\"Int4StreamingReformer\");\n  ASSERT_TRUE(!!reformer);\n  ASSERT_EQ(0u, reformer->init(Params()));\n\n  ailego::NumericalVector<float> vecs(DIMENSION * COUNT);\n  for (size_t j = 0; j < DIMENSION * COUNT; ++j) {\n    vecs[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta1;\n  std::string out;\n  ASSERT_EQ(0,\n            dist2(gen)\n                ? reformer->transform(vecs.data(), qmeta, COUNT, &out, &qmeta1)\n                : reformer->convert(vecs.data(), qmeta, COUNT, &out, &qmeta1));\n\n  std::string out2;\n  IndexQueryMeta qmeta2;\n  for (size_t i = 0; i < COUNT; ++i) {\n    ASSERT_EQ(0,\n              reformer->transform(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));\n    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());\n    ASSERT_EQ(out2.size(), qmeta2.element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],\n                             out2.size()));\n\n    ASSERT_EQ(0,\n              reformer->convert(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));\n    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());\n    ASSERT_EQ(out2.size(), qmeta2.element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],\n                             out2.size()));\n  }\n}\n\ntemplate <size_t M, size_t N>\nvoid TestDistanceMatrixInt4(const std::string &metric_name) {\n  std::mt19937 gen((std::random_device())());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t batch_size = M;\n  const size_t query_size = N;\n  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen)*8;\n  auto holder = GetHolder(dimension, batch_size, dist);\n  IndexMeta meta(IndexMeta::DT_FP32, dimension);\n  meta.set_metric(metric_name, 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int4StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  auto &meta2 = converter->meta();\n  ASSERT_EQ(dimension + 32, holder2->dimension());\n  size_t matrix_size = batch_size * holder2->element_size();\n  std::vector<uint8_t> matrix1(matrix_size);\n  std::vector<uint8_t> matrix2(matrix_size);\n  auto iter = holder2->create_iterator();\n  for (size_t i = 0; i < batch_size; ++i, iter->next()) {\n    std::memcpy(&matrix1[i * holder2->element_size()], iter->data(),\n                holder2->element_size());\n  }\n  MatrixTranspose(reinterpret_cast<uint32_t *>(&matrix2[0]),\n                  reinterpret_cast<uint32_t *>(matrix1.data()),\n                  meta2.dimension() / 8, batch_size);\n\n  auto query_holder = GetHolder(dimension, query_size, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, query_holder));\n  auto query_holder2 = converter->result();\n  ASSERT_EQ(dimension + 32, query_holder2->dimension());\n  size_t query_matrix_size = query_size * query_holder2->element_size();\n  std::vector<uint8_t> query1(query_matrix_size);\n  std::vector<uint8_t> query2(query_matrix_size);\n  auto query_iter = query_holder2->create_iterator();\n  for (size_t i = 0; i < query_size; ++i, query_iter->next()) {\n    std::memcpy(&query1[i * holder2->element_size()], query_iter->data(),\n                query_holder2->element_size());\n  }\n  MatrixTranspose(reinterpret_cast<uint32_t *>(&query2[0]),\n                  reinterpret_cast<uint32_t *>(query1.data()),\n                  meta2.dimension() / 8, query_size);\n\n  std::vector<float> result1(batch_size * query_size);\n  std::vector<float> result2(batch_size * query_size);\n\n  auto metric = IndexFactory::CreateMetric(\"QuantizedInteger\");\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0,\n            metric->init(converter->meta(), converter->meta().metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n  auto matrix_compute = metric->distance_matrix(M, N);\n  ASSERT_TRUE(matrix_compute);\n\n  for (size_t i = 0; i < query_size; ++i) {\n    const uint8_t *cur_query = &query1[i * meta2.element_size()];\n    float *query_result = &result1[i * batch_size];\n\n    for (size_t j = 0; j < batch_size; ++j) {\n      compute(&matrix1[j * meta2.element_size()], cur_query, meta2.dimension(),\n              &query_result[j]);\n    }\n  }\n  matrix_compute(&matrix2[0], &query2[0], meta2.dimension(), &result2[0]);\n\n  for (size_t i = 0; i < batch_size * query_size; ++i) {\n    EXPECT_NEAR(result1[i], result2[i], 1e-2 * dimension);\n    EXPECT_TRUE(IsAlmostEqual(result1[i], result2[i], 1e4));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt4SquaredEuclideanMetric) {\n  TestDistanceMatrixInt4<1, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<2, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<2, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<4, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<4, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<4, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 8>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 8>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 16>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 1>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 2>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 4>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 8>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 16>(\"SquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 32>(\"SquaredEuclidean\");\n}\n\nTEST(QuantizedIntegerMetric, TestInt8InnerProduct) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);\n  const size_t COUNT = 1000;\n  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);\n  meta.set_metric(\"InnerProduct\", 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int8StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n    float v1 = ailego::Distance::MinusInnerProduct(mf, vec.data(),\n                                                   holder->dimension());\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    // printf(\"%f %f\\n\", v1, v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt8InnerProductMetric) {\n  TestDistanceMatrixInt8<1, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt8<2, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt8<2, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt8<4, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt8<4, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt8<4, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt8<8, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt8<8, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt8<8, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt8<8, 8>(\"InnerProduct\");\n  TestDistanceMatrixInt8<16, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt8<16, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt8<16, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt8<16, 8>(\"InnerProduct\");\n  TestDistanceMatrixInt8<16, 16>(\"InnerProduct\");\n  TestDistanceMatrixInt8<32, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt8<32, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt8<32, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt8<32, 8>(\"InnerProduct\");\n  TestDistanceMatrixInt8<32, 16>(\"InnerProduct\");\n  TestDistanceMatrixInt8<32, 32>(\"InnerProduct\");\n}\n\nTEST(QuantizedIntegerMetric, TestInt4InnerProduct) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;\n  const size_t COUNT = 1000;\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  meta.set_metric(\"InnerProduct\", 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int4StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n    float v1 = ailego::Distance::MinusInnerProduct(mf, vec.data(),\n                                                   holder->dimension());\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt4InnerProductMetric) {\n  TestDistanceMatrixInt4<1, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt4<2, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt4<2, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt4<4, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt4<4, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt4<4, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt4<8, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt4<8, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt4<8, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt4<8, 8>(\"InnerProduct\");\n  TestDistanceMatrixInt4<16, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt4<16, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt4<16, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt4<16, 8>(\"InnerProduct\");\n  TestDistanceMatrixInt4<16, 16>(\"InnerProduct\");\n  TestDistanceMatrixInt4<32, 1>(\"InnerProduct\");\n  TestDistanceMatrixInt4<32, 2>(\"InnerProduct\");\n  TestDistanceMatrixInt4<32, 4>(\"InnerProduct\");\n  TestDistanceMatrixInt4<32, 8>(\"InnerProduct\");\n  TestDistanceMatrixInt4<32, 16>(\"InnerProduct\");\n  TestDistanceMatrixInt4<32, 32>(\"InnerProduct\");\n}\n\nTEST(QuantizedIntegerMetric, TestInt8MipsSquaredEuclidean) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);\n  const size_t COUNT = 1000;\n  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);\n  meta.set_metric(\"MipsSquaredEuclidean\", 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int8StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  auto query_metric = metric->query_metric();\n  ASSERT_TRUE(!!query_metric);\n  ASSERT_EQ(query_metric->name(), \"QuantizedInteger\");\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n    float v1 = ailego::Distance::MipsSquaredEuclidean(\n        mf, vec.data(), holder->dimension(), 0.0f);\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    // printf(\"%f %f\\n\", v1, v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt8MipsSquaredEuclideanMetric) {\n  TestDistanceMatrixInt8<1, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<2, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<2, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<4, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<4, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<4, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<8, 8>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 8>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<16, 16>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 8>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 16>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt8<32, 32>(\"MipsSquaredEuclidean\");\n}\n\nTEST(QuantizedIntegerMetric, TestInt4MipsSquaredEuclidean) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;\n  const size_t COUNT = 1000;\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  meta.set_metric(\"MipsSquaredEuclidean\", 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int4StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  ASSERT_EQ(0u, converter->init(meta, Params()));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n    float v1 = ailego::Distance::MipsSquaredEuclidean(mf, vec.data(),\n                                                      holder->dimension(), 0.0);\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt4MipsSquaredEuclideanMetric) {\n  TestDistanceMatrixInt4<1, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<2, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<2, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<4, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<4, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<4, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<8, 8>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 8>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<16, 16>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 1>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 2>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 4>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 8>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 16>(\"MipsSquaredEuclidean\");\n  TestDistanceMatrixInt4<32, 32>(\"MipsSquaredEuclidean\");\n}\n\nTEST(QuantizedIntegerMetric, TestInt8NormalizedCosine) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);\n  const size_t COUNT = 1000;\n  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);\n  meta.set_metric(\"NormalizedCosine\", 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int8StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  Params converter_params;\n  converter_params.set(INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE, true);\n  ASSERT_EQ(0u, converter->init(meta, converter_params));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n\n    // normalize mf & vec\n    std::vector<float> normalized_mf(DIMENSION);\n    memcpy(normalized_mf.data(), mf, DIMENSION * sizeof(float));\n    float norm_mf = 0.0;\n    ailego::Normalizer<float>::L2((float *)normalized_mf.data(), DIMENSION,\n                                  &norm_mf);\n    std::vector<float> normalized_vec(DIMENSION);\n    memcpy(normalized_vec.data(), vec.data(), DIMENSION * sizeof(float));\n    float norm_vec = 0.0;\n    ailego::Normalizer<float>::L2((float *)normalized_vec.data(), DIMENSION,\n                                  &norm_vec);\n\n    float v1 = ailego::Distance::MinusInnerProduct(\n        normalized_mf.data(), normalized_vec.data(), holder->dimension());\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    // printf(\"%f %f\\n\", v1, v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt8NormalizedCosineMetric) {\n  TestDistanceMatrixInt8<1, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<2, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<2, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<4, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<4, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<4, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<8, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<8, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<8, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<8, 8>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<16, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<16, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<16, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<16, 8>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<16, 16>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<32, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<32, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<32, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<32, 8>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<32, 16>(\"NormalizedCosine\");\n  TestDistanceMatrixInt8<32, 32>(\"NormalizedCosine\");\n}\n\nTEST(QuantizedIntegerMetric, TestInt8Cosine) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);\n  const size_t COUNT = 1000;\n  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);\n  meta.set_metric(\"Cosine\", 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"CosineInt8Converter\");\n  ASSERT_TRUE(!!converter);\n  Params converter_params;\n  ASSERT_EQ(0u, converter->init(meta, converter_params));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute_batch = metric->batch_distance();\n  ASSERT_TRUE(compute_batch);\n\n  int8_t *qi = reinterpret_cast<int8_t *>(&out[0]);\n  if (auto query_preprocess_func = metric->get_query_preprocess_func();\n      query_preprocess_func != nullptr) {\n    query_preprocess_func(qi, holder2->dimension());\n  }\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n\n    // normalize mf & vec\n    std::vector<float> normalized_mf(DIMENSION);\n    memcpy(normalized_mf.data(), mf, DIMENSION * sizeof(float));\n    float norm_mf = 0.0;\n    ailego::Normalizer<float>::L2((float *)normalized_mf.data(), DIMENSION,\n                                  &norm_mf);\n    std::vector<float> normalized_vec(DIMENSION);\n    memcpy(normalized_vec.data(), vec.data(), DIMENSION * sizeof(float));\n    float norm_vec = 0.0;\n    ailego::Normalizer<float>::L2((float *)normalized_vec.data(), DIMENSION,\n                                  &norm_vec);\n\n    float v1 = ailego::Distance::MinusInnerProduct(\n        normalized_mf.data(), normalized_vec.data(), holder->dimension());\n    float v2;\n    compute_batch(reinterpret_cast<const void **>(&mi), qi, 1,\n                  holder2->dimension(), &v2);\n    // printf(\"%f %f\\n\", v1, v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt4NormalizedCosine) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 2.0);\n\n  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;\n  const size_t COUNT = 1000;\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  meta.set_metric(\"NormalizedCosine\", 0, Params());\n  auto converter = IndexFactory::CreateConverter(\"Int4StreamingConverter\");\n  ASSERT_TRUE(!!converter);\n  Params converter_params;\n  converter_params.set(INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE, true);\n  ASSERT_EQ(0u, converter->init(meta, converter_params));\n\n  auto holder = GetHolder(DIMENSION, COUNT, dist);\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());\n  auto &meta2 = converter->meta();\n\n  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));\n\n  ailego::NumericalVector<float> vec(DIMENSION);\n  for (size_t j = 0; j < DIMENSION; ++j) {\n    vec[j] = dist(gen);\n  }\n  IndexQueryMeta qmeta;\n  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);\n  IndexQueryMeta qmeta2;\n  std::string out;\n  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));\n  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  auto metric = IndexFactory::CreateMetric(meta2.metric_name());\n  ASSERT_TRUE(!!metric);\n  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));\n  auto compute = metric->distance();\n  ASSERT_TRUE(compute);\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    const float *mf = (const float *)iter->data();\n    const int8_t *mi = (const int8_t *)iter2->data();\n    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);\n    // normalize mf & vec\n    std::vector<float> normalized_mf(DIMENSION);\n    memcpy(normalized_mf.data(), mf, DIMENSION * sizeof(float));\n    float norm_mf = 0.0;\n    ailego::Normalizer<float>::L2((float *)normalized_mf.data(), DIMENSION,\n                                  &norm_mf);\n    std::vector<float> normalized_vec(DIMENSION);\n    memcpy(normalized_vec.data(), vec.data(), DIMENSION * sizeof(float));\n    float norm_vec = 0.0;\n    ailego::Normalizer<float>::L2((float *)normalized_vec.data(), DIMENSION,\n                                  &norm_vec);\n\n    float v1 = ailego::Distance::MinusInnerProduct(\n        normalized_mf.data(), normalized_vec.data(), holder->dimension());\n    float v2;\n    compute(mi, qi, holder2->dimension(), &v2);\n    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);\n\n    std::string out2;\n    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));\n    ASSERT_EQ(out2.size(), holder2->element_size());\n    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));\n  }\n}\n\nTEST(QuantizedIntegerMetric, TestInt4NormalizedCosineMetric) {\n  TestDistanceMatrixInt4<1, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<2, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<2, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<4, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<4, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<4, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<8, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<8, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<8, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<8, 8>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<16, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<16, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<16, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<16, 8>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<16, 16>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<32, 1>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<32, 2>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<32, 4>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<32, 8>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<32, 16>(\"NormalizedCosine\");\n  TestDistanceMatrixInt4<32, 32>(\"NormalizedCosine\");\n}\n"
  },
  {
    "path": "tests/core/quantizer/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n      NAME ${CC_TARGET}\n      STRICT\n      LIBS zvec_ailego core_framework core_quantizer\n      SRCS ${CC_SRCS}\n      INCS . ${PROJECT_ROOT_DIR}/src/core/\n    )\nendforeach()"
  },
  {
    "path": "tests/core/quantizer/half_float_reformer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <random>\n\n// #include <zvec/ailego/container/vector.h>\n// #include <zvec/ailego/container/params.h>\n\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_holder.h\"\n\nusing namespace zvec::core;\n\nTEST(HalfFloatReformer, General) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(-1.0, 1.0);\n\n  const size_t COUNT = 1000;\n  const size_t DIMENSION = 128;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"HalfFloatConverter\");\n  ASSERT_TRUE(converter);\n  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));\n\n  auto reformer = IndexFactory::CreateReformer(\"HalfFloatReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(zvec::ailego::Params()));\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP16, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 2);\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    const float *f32 = (const float *)iter->data();\n    const zvec::ailego::Float16 *f16 =\n        (const zvec::ailego::Float16 *)iter2->data();\n    printf(\"%f %f\\n\", f32[0], (float)f16[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    // Test reformer convert\n    buffer.clear();\n    EXPECT_EQ(0, reformer->convert(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    buffer.clear();\n    EXPECT_EQ(0, reformer->convert(iter->data(),\n                                   IndexQueryMeta(holder->data_type(),\n                                                  holder->dimension() / 4),\n                                   4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n"
  },
  {
    "path": "tests/core/quantizer/integer_quantizer_reformer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <random>\n#include <gtest/gtest.h>\n#include <zvec/ailego/container/vector.h>\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_holder.h\"\n\nusing namespace zvec::core;\n\nTEST(IntegerReformer, Int8General) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 12;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int8QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  zvec::ailego::Params params;\n  params.set(\"proxima.int8_quantizer.converter.histogram_bins_count\", 10000);\n  ASSERT_EQ(0u, converter->init(meta, params));\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto &stats = converter->stats();\n  EXPECT_EQ(COUNT, stats.trained_count());\n  EXPECT_EQ(COUNT, stats.transformed_count());\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int8QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    // Test reformer convert\n    buffer.clear();\n    EXPECT_EQ(0, reformer->convert(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    buffer.clear();\n    EXPECT_EQ(0, reformer->convert(iter->data(),\n                                   IndexQueryMeta(holder->data_type(),\n                                                  holder->dimension() / 4),\n                                   4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n\n\nTEST(IntegerReformer, Int8OnePassHolder) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::normal_distribution<float> dist(5, 2.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 512;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int8QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  auto holder_mirror =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n    holder_mirror->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);\n\n  auto iter = holder_mirror->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int8QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n\nTEST(IntegerReformer, Int8TrainedParams) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(5, 10.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 512;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int8QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  auto holder_mirror =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n    holder_mirror->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto stats = converter->stats();\n  ASSERT_EQ(COUNT, stats.trained_count());\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);\n\n  auto iter = holder_mirror->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int8QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n\nTEST(IntegerReformer, Int8NonBias) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(5, 10.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 512;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int8QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  zvec::ailego::Params params;\n  params.set(\"proxima.int8_quantizer.converter.disable_bias\", true);\n  ASSERT_EQ(0u, converter->init(meta, params));\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  auto holder_mirror =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n    holder_mirror->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto stats = converter->stats();\n  ASSERT_EQ(COUNT, stats.trained_count());\n  ASSERT_EQ(converter->meta().reformer_name(), \"Int8QuantizerReformer\");\n  auto reformer_params = converter->meta().reformer_params();\n  ASSERT_EQ(\n      reformer_params.get_as_float(\"proxima.int8_quantizer.reformer.bias\"),\n      0.0f);\n}\n\n//! Test whether two floating point numbers are equal\ntemplate <class T>\nstatic inline auto IsAlmostEqual(const T &x, const T &y, int ulp) ->\n    typename std::enable_if<std::is_floating_point<T>::value, bool>::type {\n  // the machine epsilon has to be scaled to the magnitude of the values used\n  // and multiplied by the desired precision in ULPs (units in the last place)\n  return ((std::fabs(x - y) <=\n           std::numeric_limits<T>::epsilon() * std::fabs(x + y) * ulp) ||\n          (std::fabs(x - y) < std::numeric_limits<T>::min()));\n}\n\nTEST(IntegerReformer, Int8InitConverterWithTrainedParams) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 12;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int8QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  zvec::ailego::Params params;\n  params.set(\"proxima.int8_quantizer.converter.histogram_bins_count\", 10000);\n  ASSERT_EQ(0u, converter->init(meta, params));\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0, converter->train(holder));\n  auto reformer_params = converter->meta().reformer_params();\n  auto converter_params = converter->meta().converter_params();\n  converter = IndexFactory::CreateConverter(\"Int8QuantizerConverter\");\n  ASSERT_EQ(0, converter->init(meta, converter_params));\n  ASSERT_EQ(0, converter->transform(holder));\n\n  auto &stats = converter->stats();\n  EXPECT_EQ(0u, stats.trained_count());\n  EXPECT_EQ(COUNT, stats.transformed_count());\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int8QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(reformer_params));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    // Test reformer convert\n    buffer.clear();\n    EXPECT_EQ(0, reformer->convert(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    buffer.clear();\n    EXPECT_EQ(0, reformer->convert(iter->data(),\n                                   IndexQueryMeta(holder->data_type(),\n                                                  holder->dimension() / 4),\n                                   4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n\n// Int4 Tests =====\nTEST(IntegerReformer, Int4General) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 12;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int4QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  zvec::ailego::Params params;\n  params.set(\"proxima.int4_quantizer.converter.histogram_bins_count\", 10000);\n  ASSERT_EQ(0u, converter->init(meta, params));\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n      if (i == 0) printf(\" %f\", vec[j]);\n    }\n    if (i == 0) printf(\"\\n\");\n    holder->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto &stats = converter->stats();\n  EXPECT_EQ(COUNT, stats.trained_count());\n  EXPECT_EQ(COUNT, stats.transformed_count());\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int4QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 3),\n                                     3, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 3, qmeta.dimension());\n    ASSERT_EQ(buffer, buffer2);\n\n    // Test reformer convert\n    EXPECT_EQ(0, reformer->convert(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->convert(iter->data(),\n                                   IndexQueryMeta(holder->data_type(),\n                                                  holder->dimension() / 3),\n                                   3, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 3, qmeta.dimension());\n    ASSERT_EQ(buffer, buffer2);\n  }\n}\n\n\nTEST(IntegerReformer, Int4OnePassHolder) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::normal_distribution<float> dist(5, 2.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 512;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int4QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  auto holder_mirror =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n    holder_mirror->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);\n\n  auto iter = holder_mirror->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int4QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n\nTEST(IntegerReformer, Int4TrainedParams) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(5, 10.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 512;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int4QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  auto holder_mirror =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n    holder_mirror->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto stats = converter->stats();\n  ASSERT_EQ(COUNT, stats.trained_count());\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);\n\n  auto iter = holder_mirror->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int4QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n\nTEST(IntegerReformer, Int4NonBias) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(5, 10.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 512;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int4QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  zvec::ailego::Params params;\n  params.set(\"proxima.int4_quantizer.converter.disable_bias\", true);\n  ASSERT_EQ(0u, converter->init(meta, params));\n\n  auto holder =\n      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  auto holder_mirror =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n    holder_mirror->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));\n  auto stats = converter->stats();\n  ASSERT_EQ(COUNT, stats.trained_count());\n  ASSERT_EQ(converter->meta().reformer_name(), \"Int4QuantizerReformer\");\n  auto reformer_params = converter->meta().reformer_params();\n  ASSERT_EQ(\n      reformer_params.get_as_float(\"proxima.int4_quantizer.reformer.bias\"),\n      0.0f);\n}\n\nTEST(IntegerReformer, Int4InitConverterWithTrainedParams) {\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_real_distribution<float> dist(0.0, 1.0);\n\n  const size_t COUNT = 10000;\n  const size_t DIMENSION = 16;\n\n  IndexMeta meta;\n  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);\n\n  auto converter = IndexFactory::CreateConverter(\"Int4QuantizerConverter\");\n  ASSERT_TRUE(converter);\n  zvec::ailego::Params params;\n  params.set(\"proxima.int4_quantizer.converter.histogram_bins_count\", 10000);\n  ASSERT_EQ(0u, converter->init(meta, params));\n\n  auto holder =\n      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(\n          DIMENSION);\n  for (size_t i = 0; i < COUNT; ++i) {\n    zvec::ailego::NumericalVector<float> vec(DIMENSION);\n    for (size_t j = 0; j < DIMENSION; ++j) {\n      vec[j] = dist(gen);\n    }\n    holder->emplace(i + 1, vec);\n  }\n  EXPECT_EQ(COUNT, holder->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());\n  ASSERT_EQ(0, converter->train(holder));\n  auto reformer_params = converter->meta().reformer_params();\n  auto converter_params = converter->meta().converter_params();\n  converter = IndexFactory::CreateConverter(\"Int4QuantizerConverter\");\n  ASSERT_EQ(0, converter->init(meta, converter_params));\n  ASSERT_EQ(0, converter->transform(holder));\n\n  auto &stats = converter->stats();\n  EXPECT_EQ(0u, stats.trained_count());\n  EXPECT_EQ(COUNT, stats.transformed_count());\n\n  auto holder2 = converter->result();\n  EXPECT_EQ(COUNT, holder2->count());\n  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());\n  EXPECT_EQ(holder->dimension(), holder2->dimension());\n  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);\n\n  auto iter = holder->create_iterator();\n  auto iter2 = holder2->create_iterator();\n  std::string buffer;\n\n  auto reformer = IndexFactory::CreateReformer(\"Int4QuantizerReformer\");\n  ASSERT_TRUE(reformer);\n  ASSERT_EQ(0u, reformer->init(reformer_params));\n\n  for (; iter->is_valid(); iter->next(), iter2->next()) {\n    EXPECT_TRUE(iter2->is_valid());\n    EXPECT_TRUE(iter->data());\n    EXPECT_TRUE(iter2->data());\n\n    // const float *f32 = (const float *)iter->data();\n    // const int8_t *i8 = (const int8_t *)iter2->data();\n    // printf(\"%f %d\\n\", f32[0], i8[0]);\n\n    std::string buffer2(\n        std::string((const char *)iter2->data(), holder2->element_size()));\n\n    IndexQueryMeta qmeta;\n    EXPECT_EQ(0, reformer->transform(\n                     iter->data(),\n                     IndexQueryMeta(holder->data_type(), holder->dimension()),\n                     &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension(), qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n\n    EXPECT_EQ(0, reformer->transform(iter->data(),\n                                     IndexQueryMeta(holder->data_type(),\n                                                    holder->dimension() / 4),\n                                     4, &buffer, &qmeta));\n    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());\n    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());\n    EXPECT_EQ(buffer, buffer2);\n  }\n}\n"
  },
  {
    "path": "tests/core/utility/CMakeLists.txt",
    "content": "\ninclude(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nfile(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)\n\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gtest(\n    NAME ${CC_TARGET} STRICT\n    LIBS zvec_ailego core_framework core_utility\n    Arrow::arrow_depends\n    Arrow::parquet_static\n    SRCS ${CC_SRCS}\n    INCS . ${PROJECT_ROOT_DIR}/src/core/\n  )\n  cc_test_suite(zvec_ailego ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/core/utility/buffer_storage_test.cpp",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nTEST(BufferStorage, General) {\n  std::string file_path = \"buffer_storage_test_file\";\n  ailego::File::Delete(file_path);\n\n  auto write_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_TRUE(write_storage);\n  std::cout << file_path << std::endl;\n  EXPECT_NE(0, write_storage->open(file_path, false));\n\n  ailego::Params params;\n  EXPECT_EQ(0, write_storage->init(params));\n  std::cout << file_path << std::endl;\n  EXPECT_EQ(0, write_storage->open(file_path, true));\n\n  IndexMeta meta;\n  meta.set_trainer(\"trainer\", 111, ailego::Params());\n  meta.set_searcher(\"searcher\", 222, ailego::Params());\n  meta.set_builder(\"builder\", 333, ailego::Params());\n\n  EXPECT_EQ(0, IndexHelper::SerializeToStorage(meta, write_storage.get()));\n  EXPECT_EQ(0, write_storage->append(\"AAAA\", 1234));\n  EXPECT_EQ(0, write_storage->append(\"BBBB\", 1234));\n  auto aaaa = write_storage->get(\"AAAA\");\n  ASSERT_TRUE(aaaa);\n  auto aaaa1 = aaaa->clone();\n  ASSERT_TRUE(aaaa1);\n  std::string hello = \"Hello world!!!\";\n  EXPECT_EQ(hello.size(), aaaa1->write(0, hello.data(), hello.size()));\n  EXPECT_EQ(0, write_storage->close());\n\n  // Reopen it\n  auto read_storage = IndexFactory::CreateStorage(\"BufferStorage\");\n  EXPECT_EQ(0, read_storage->open(file_path, false));\n\n  IndexMeta meta2;\n  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(read_storage.get(), &meta2));\n  EXPECT_EQ(\"trainer\", meta2.trainer_name());\n  EXPECT_EQ(\"searcher\", meta2.searcher_name());\n  EXPECT_EQ(\"builder\", meta2.builder_name());\n  auto aaaa2 = read_storage->get(\"AAAA\");\n  ASSERT_TRUE(aaaa2);\n  const void *data;\n  EXPECT_EQ(hello.size(), aaaa2->read(0, &data, hello.size()));\n  auto aaaa3 = aaaa2->clone();\n  ASSERT_TRUE(aaaa3);\n  EXPECT_EQ(hello.size(), aaaa3->read(0, &data, hello.size()));\n  EXPECT_EQ(hello, std::string((const char *)data, hello.size()));\n}\n"
  },
  {
    "path": "tests/core/utility/file_dumper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_helper.h\"\n#include \"zvec/core/framework/index_segment_storage.h\"\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nTEST(FileDumper, General) {\n  std::string file_path = \"file_dumper_test_file\";\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_TRUE(dumper);\n\n  IndexMeta meta1;\n  meta1.set_trainer(\"index_trainer\", 0, ailego::Params());\n  ASSERT_EQ(0, dumper->create(file_path));\n  EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper.get()));\n\n  for (size_t i = 0; i < 10; ++i) {\n    std::string hello = \"Hello world!!! #\" + std::to_string(i);\n    EXPECT_EQ(hello.size(), dumper->write(hello.data(), hello.size()));\n    EXPECT_EQ(0, dumper->append(std::to_string(i), hello.size(), 0, 0));\n  }\n  ASSERT_EQ(0, dumper->close());\n\n  auto container = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_TRUE(container);\n\n  ailego::Params params;\n  params.set(\"proxima.mmap_file.container.memory_locking\", true);\n  params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  params.set(\"proxima.mmap_file.container.checksum_validation\", true);\n  ASSERT_EQ(0, container->init(params));\n\n  IndexMeta meta2;\n  EXPECT_EQ(\"\", meta2.trainer_name());\n  ASSERT_EQ(0, container->open(file_path, false));\n  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(container.get(), &meta2));\n  EXPECT_EQ(\"index_trainer\", meta2.trainer_name());\n\n  for (size_t i = 0; i < 10; ++i) {\n    auto seg = container->get(std::to_string(i));\n    const void *data = nullptr;\n    EXPECT_EQ(seg->data_size(), seg->read(0, &data, seg->data_size()));\n\n    std::string hello = \"Hello world!!! #\" + std::to_string(i);\n    EXPECT_EQ(hello, std::string((const char *)data, seg->data_size()));\n  }\n}\n\nTEST(IndexSegmentDumper, General) {\n  std::string file_path = \"index_segment_dumper_test_file\";\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_TRUE(dumper);\n  ASSERT_EQ(0, dumper->create(file_path));\n\n  {\n    IndexDumper::Pointer dumper2 =\n        std::make_shared<IndexSegmentDumper>(dumper, \"AAAAA\");\n\n    IndexMeta meta1;\n    meta1.set_trainer(\"index_trainer\", 0, ailego::Params());\n    EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper2.get()));\n\n    for (size_t i = 0; i < 10; ++i) {\n      std::string hello = \"A: Hello world!!! #\" + std::to_string(i);\n      EXPECT_EQ(hello.size(), dumper2->write(hello.data(), hello.size()));\n      EXPECT_EQ(0, dumper2->append(std::to_string(i), hello.size(), 0, 0));\n    }\n  }\n\n  {\n    IndexDumper::Pointer dumper2 =\n        std::make_shared<IndexSegmentDumper>(dumper, \"BBBBB\");\n\n    IndexMeta meta1;\n    meta1.set_builder(\"index_builder\", 0, ailego::Params());\n    EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper2.get()));\n\n    for (size_t i = 100; i < 110; ++i) {\n      std::string hello = \"B: Hello world!!! #\" + std::to_string(i);\n      EXPECT_EQ(hello.size(), dumper2->write(hello.data(), hello.size()));\n      EXPECT_EQ(0, dumper2->append(std::to_string(i), hello.size(), 0, 0));\n    }\n  }\n\n  {\n    IndexDumper::Pointer dumper2 =\n        std::make_shared<IndexSegmentDumper>(dumper, \"CCCCC\");\n\n    IndexMeta meta1;\n    meta1.set_converter(\"index_converter\", 0, ailego::Params());\n    EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper2.get()));\n\n    for (size_t i = 1000; i < 1010; ++i) {\n      std::string hello = \"C: Hello world!!! #\" + std::to_string(i);\n      EXPECT_EQ(hello.size(), dumper2->write(hello.data(), hello.size()));\n      EXPECT_EQ(0, dumper2->append(std::to_string(i), hello.size(), 0, 0));\n    }\n  }\n  ASSERT_EQ(0, dumper->close());\n\n  ///// Read data with container\n\n  auto container = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_TRUE(container);\n\n  ailego::Params params;\n  params.set(\"proxima.mmap_file.container.memory_locking\", true);\n  params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  params.set(\"proxima.mmap_file.container.checksum_validation\", true);\n  ASSERT_EQ(0, container->init(params));\n  ASSERT_EQ(0, container->open(file_path, false));\n}\n"
  },
  {
    "path": "tests/core/utility/memory_dumper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_helper.h\"\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nTEST(MemoryDumper, General) {\n  std::string file_path = \"memory_dumper_test_file\";\n\n  auto dumper = IndexFactory::CreateDumper(\"MemoryDumper\");\n  ASSERT_TRUE(dumper);\n\n  IndexMeta meta1;\n  meta1.set_trainer(\"index_trainer\", 0, ailego::Params());\n  ASSERT_EQ(0, dumper->create(file_path));\n  EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper.get()));\n\n  for (size_t i = 0; i < 10; ++i) {\n    std::string hello = \"Hello world!!! #\" + std::to_string(i);\n    EXPECT_EQ(hello.size(), dumper->write(hello.data(), hello.size()));\n    EXPECT_EQ(0, dumper->append(std::to_string(i), hello.size(), 0, 0));\n  }\n  ASSERT_EQ(0, dumper->close());\n\n  auto container = IndexFactory::CreateStorage(\"MemoryReadStorage\");\n  ASSERT_TRUE(container);\n\n  ailego::Params params;\n  params.set(\"memory.container.checksum_validation\", true);\n  ASSERT_EQ(0, container->init(params));\n\n  IndexMeta meta2;\n  EXPECT_EQ(\"\", meta2.trainer_name());\n  ASSERT_EQ(0, container->open(file_path, false));\n\n  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(container.get(), &meta2));\n  EXPECT_EQ(\"index_trainer\", meta2.trainer_name());\n\n  for (size_t i = 0; i < 10; ++i) {\n    auto seg = container->get(std::to_string(i));\n    const void *data = nullptr;\n    EXPECT_EQ(seg->data_size(), seg->read(0, &data, seg->data_size()));\n\n    std::string hello = \"Hello world!!! #\" + std::to_string(i);\n    EXPECT_EQ(hello, std::string((const char *)data, seg->data_size()));\n  }\n}\n"
  },
  {
    "path": "tests/core/utility/mmap_file_container_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <iostream>\n#include <gtest/gtest.h>\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_helper.h\"\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nstatic int GenRandInt(int m, int n) {\n  static std::mt19937 gen((std::random_device())());\n  return std::uniform_int_distribution<int>(m, n)(gen);\n}\n\nstatic void AddRandomPadding(const std::string &in, const std::string &out,\n                             size_t header_padding_size,\n                             size_t footer_padding_size) {\n  ailego::File out_file;\n  out_file.create(out, 0);\n  for (size_t i = 0; i < header_padding_size; ++i) {\n    uint8_t r = GenRandInt(0, 255);\n    out_file.write(&r, 1);\n  }\n\n  ailego::File in_file;\n  ASSERT_TRUE(in_file.open(in, true));\n  std::string buf(in_file.size(), '\\0');\n  ASSERT_EQ(buf.size(), in_file.read(&buf[0], buf.size()));\n  out_file.write(buf.data(), buf.size());\n\n  for (size_t i = 0; i < footer_padding_size; ++i) {\n    uint8_t r = GenRandInt(0, 255);\n    out_file.write(&r, 1);\n  }\n}\n\nTEST(MMapFileReadStorage, General) {\n  std::string file_path = \"mmap_file_container_test_file\";\n  std::string file_path_padding = \"mmap_file_container_test_file_padding\";\n\n  auto dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  ASSERT_TRUE(dumper);\n\n  IndexMeta meta1;\n  meta1.set_trainer(\"index_trainer\", 0, ailego::Params());\n  ASSERT_EQ(0, dumper->create(file_path));\n  EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper.get()));\n\n  for (size_t i = 0; i < 21; ++i) {\n    std::string hello = \"Hello world!!! #\" + std::to_string(i);\n    EXPECT_EQ(hello.size(), dumper->write(hello.data(), hello.size()));\n    EXPECT_EQ(0, dumper->append(std::to_string(i), hello.size(), 0, 0));\n  }\n  ASSERT_EQ(0, dumper->close());\n  size_t header_paddings = GenRandInt(0, 1024);\n  size_t footer_paddings = GenRandInt(0, 1024);\n  AddRandomPadding(file_path, file_path_padding, header_paddings,\n                   footer_paddings);\n  ailego::File file;\n  file.open(file_path_padding, true);\n  int64_t header_offset =\n      GenRandInt(0, 1) ? header_paddings : header_paddings - file.size();\n  int64_t footer_offset =\n      (GenRandInt(0, 1) ? file.size() : 0) - footer_paddings;\n\n  auto container = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n  ASSERT_TRUE(container);\n\n  ailego::Params params;\n  params.set(\"proxima.mmap_file.container.memory_locking\", true);\n  params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n  params.set(\"proxima.mmap_file.container.checksum_validation\", true);\n  params.set(\"proxima.mmap_file.container.header_offset\", header_offset);\n  params.set(\"proxima.mmap_file.container.footer_offset\", footer_offset);\n  ASSERT_EQ(0, container->init(params));\n\n  IndexMeta meta2;\n  EXPECT_EQ(0u, container->get_all().size());\n  EXPECT_EQ(\"\", meta2.trainer_name());\n  EXPECT_EQ(\"\", meta2.searcher_name());\n  ASSERT_EQ(0, container->open(file_path_padding, false));\n  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(container.get(), &meta2));\n  EXPECT_EQ(23u, container->get_all().size());\n  EXPECT_EQ(\"index_trainer\", meta2.trainer_name());\n  EXPECT_EQ(\"\", meta2.searcher_name());\n\n  for (size_t i = 0; i < 21; ++i) {\n    auto seg = container->get(std::to_string(i));\n    auto seg1 = seg->clone();\n\n    const void *data = nullptr;\n    EXPECT_EQ(seg1->data_size(), seg1->read(0, &data, seg1->data_size()));\n    std::string hello = \"Hello world!!! #\" + std::to_string(i);\n    EXPECT_EQ(hello, std::string((const char *)data, seg1->data_size()));\n  }\n  container->cleanup();\n}\n"
  },
  {
    "path": "tests/core/utility/mmap_file_storage_test.cpp",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <fstream>\n#include <iostream>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include <zvec/core/framework/index_factory.h>\n#include <zvec/core/framework/index_helper.h>\n\nusing namespace zvec;\nusing namespace zvec::core;\n\nTEST(MMapFileStorage, TestHugePage) {\n  std::string file_path = \"/mnt/huge/mmap_file_storage_test_file\";\n  // std::string file_path = \"mmap_file_storage_test_file\";\n  ailego::File::Delete(file_path);\n\n  auto write_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  ASSERT_TRUE(write_storage);\n\n  ailego::Params params;\n  params.set(\"proxima.mmap_file.storage.huge_page\", true);\n  EXPECT_EQ(0, write_storage->init(params));\n  EXPECT_EQ(0, write_storage->open(file_path, true));\n\n  IndexMeta meta;\n  meta.set_trainer(\"trainer\", 111, ailego::Params());\n  meta.set_searcher(\"searcher\", 222, ailego::Params());\n  meta.set_builder(\"builder\", 333, ailego::Params());\n\n  EXPECT_EQ(0, IndexHelper::SerializeToStorage(meta, write_storage.get()));\n  EXPECT_EQ(0, write_storage->append(\"AAAA\", 1234));\n  EXPECT_EQ(0, write_storage->append(\"BBBB\", 1234));\n  auto aaaa = write_storage->get(\"AAAA\");\n  ASSERT_TRUE(aaaa);\n  auto aaaa1 = aaaa->clone();\n  ASSERT_TRUE(aaaa1);\n  std::string hello = \"Hello world!!!\";\n  EXPECT_EQ(hello.size(), aaaa1->write(0, hello.data(), hello.size()));\n  auto hasHugePageInUse = [&]() {\n    std::ifstream smaps(\"/proc/self/smaps\");\n    if (!smaps.is_open()) {\n      std::cerr << \"Cannot open /proc/self/smaps\\n\";\n      return false;\n    }\n\n    std::string line;\n    while (std::getline(smaps, line)) {\n      // 查找 KernelPageSize 行\n      if (line.find(\"KernelPageSize:\") != std::string::npos) {\n        // 提取页大小（单位 kB）\n        size_t pos = line.find_first_of(\"0123456789\");\n        if (pos != std::string::npos) {\n          uint64_t pageSizeKB = std::stoull(line.substr(pos));\n          // std::cerr << pageSizeKB << std::endl;\n          if (pageSizeKB > 4) {  // 普通页是 4kB，大于即为 HugePage\n            std::cout << \"Found HugePage region with KernelPageSize: \"\n                      << pageSizeKB << \" kB\\n\";\n            return true;\n          }\n        }\n      }\n    }\n    return false;\n  };\n  if (!hasHugePageInUse()) {\n    EXPECT_EQ(0, 1);\n  }\n  EXPECT_EQ(0, write_storage->close());\n  // Reopen it\n  auto read_storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  EXPECT_EQ(0, write_storage->init(params));\n  EXPECT_EQ(0, read_storage->open(file_path, false));\n\n  IndexMeta meta2;\n  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(read_storage.get(), &meta2));\n  EXPECT_EQ(\"trainer\", meta2.trainer_name());\n  EXPECT_EQ(\"searcher\", meta2.searcher_name());\n  EXPECT_EQ(\"builder\", meta2.builder_name());\n  auto aaaa2 = read_storage->get(\"AAAA\");\n  ASSERT_TRUE(aaaa2);\n  const void *data;\n  EXPECT_EQ(hello.size(), aaaa2->read(0, &data, hello.size()));\n  auto aaaa3 = aaaa2->clone();\n  ASSERT_TRUE(aaaa3);\n  EXPECT_EQ(hello.size(), aaaa3->read(0, &data, hello.size()));\n  EXPECT_EQ(hello, std::string((const char *)data, hello.size()));\n}\n"
  },
  {
    "path": "tests/db/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_directory(common)\ncc_directories(crash_recovery)\ncc_directory(sqlengine)\ncc_directories(index)\n\nif(APPLE)\n  set(APPLE_FRAMEWORK_LIBS\n    -framework CoreFoundation\n    -framework CoreGraphics\n    -framework CoreData\n    -framework CoreText\n    -framework Security\n    -framework Foundation\n    -Wl,-U,_MallocExtension_ReleaseFreeMemory\n    -Wl,-U,_ProfilerStart\n    -Wl,-U,_ProfilerStop\n    -Wl,-U,_RegisterThriftProtocol\n  )\nendif()\n\nfile(GLOB ALL_TEST_SRCS *_test.cc)\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gmock(\n    NAME ${CC_TARGET} STRICT\n    LIBS zvec_db\n    zvec_proto\n    core_knn_flat\n    core_knn_flat_sparse\n    core_knn_hnsw\n    core_knn_hnsw_rabitq\n    core_knn_hnsw_sparse\n    core_knn_ivf\n    core_mix_reducer\n    core_metric\n    core_utility\n    core_quantizer\n    ${CMAKE_THREAD_LIBS_INIT}\n    ${CMAKE_DL_LIBS}\n    SRCS ${CC_SRCS} index/utils/utils.cc\n    INCS . .. ../../src\n    LDFLAGS ${APPLE_FRAMEWORK_LIBS}\n  )\n  cc_test_suite(zvec_db ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/db/collection_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/collection.h\"\n#include <algorithm>\n#include <cstddef>\n#include <cstdint>\n#include <iostream>\n#include <memory>\n#include <mutex>\n#include <string>\n#include <utility>\n#include <vector>\n#include <gtest/gtest.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/utility/file_helper.h>\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/type_helper.h\"\n#include \"index/utils/utils.h\"\n#include \"zvec/ailego/utility/float_helper.h\"\n#include \"zvec/db/config.h\"\n#include \"zvec/db/doc.h\"\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/options.h\"\n#include \"zvec/db/schema.h\"\n#include \"zvec/db/status.h\"\n#include \"zvec/db/type.h\"\n\nusing namespace zvec;\nusing namespace zvec::test;\n\nstd::string col_path = \"test_collection\";\n\nclass CollectionTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    FileHelper::RemoveDirectory(col_path);\n  }\n\n  void TearDown() override {}\n};\n\nTEST_F(CollectionTest, Feature_CreateAndOpen_General) {\n  CollectionOptions options;\n  options.read_only_ = false;\n  options.enable_mmap_ = true;\n\n  std::string path = \"./demo\";\n\n  ailego::FileHelper::RemoveDirectory(path.c_str());\n\n  auto schema = TestHelper::CreateNormalSchema();\n  auto result = Collection::CreateAndOpen(path, *schema, options);\n  if (!result.has_value()) {\n    std::cout << result.error().message() << std::endl;\n  }\n  ASSERT_TRUE(result.has_value());\n  ASSERT_TRUE(ailego::FileHelper::IsExist(path.c_str()));\n\n  auto col = result.value();\n  ASSERT_EQ(col->Path(), path);\n  ASSERT_EQ(col->Schema(), *schema);\n  ASSERT_EQ(col->Options(), options);\n  auto stats = col->Stats().value();\n  ASSERT_TRUE(stats.doc_count == 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp16\"], 1);\n  // ASSERT_EQ(stats.index_completeness[\"dense_fp64\"], 1);\n  ASSERT_EQ(stats.index_completeness[\"sparse_fp32\"], 1);\n  ASSERT_EQ(stats.index_completeness[\"sparse_fp16\"], 1);\n\n  ASSERT_EQ(col->Destroy(), Status::OK());\n\n  // after destroyed, every interface should return error\n  std::vector<Doc> empty_docs;\n  ASSERT_FALSE(col->Insert(empty_docs).has_value());\n  ASSERT_FALSE(col->Update(empty_docs).has_value());\n  ASSERT_FALSE(col->Delete({}).has_value());\n  ASSERT_FALSE(col->DeleteByFilter(\"\").ok());\n  ASSERT_FALSE(col->Fetch({}).has_value());\n  ASSERT_FALSE(col->Query({}).has_value());\n  ASSERT_FALSE(col->GroupByQuery({}).has_value());\n  ASSERT_FALSE(col->CreateIndex(\"\", nullptr).ok());\n  ASSERT_FALSE(col->DropIndex(\"\").ok());\n  ASSERT_FALSE(col->AddColumn(nullptr, \"\").ok());\n  ASSERT_FALSE(col->AlterColumn(\"\", \"\", nullptr).ok());\n  ASSERT_FALSE(col->DropColumn(\"\").ok());\n  ASSERT_FALSE(col->CreateIndex(\"\", nullptr).ok());\n  ASSERT_FALSE(col->Optimize().ok());\n  ASSERT_FALSE(col->Flush().ok());\n  ASSERT_FALSE(col->Destroy().ok());\n  ASSERT_FALSE(col->Options().has_value());\n  ASSERT_FALSE(col->Path().has_value());\n  ASSERT_FALSE(col->Stats().has_value());\n  ASSERT_FALSE(col->Schema().has_value());\n\n  ASSERT_FALSE(ailego::FileHelper::IsExist(path.c_str()));\n\n  // recreate\n  result = Collection::CreateAndOpen(path, *schema, options);\n  ASSERT_TRUE(result.has_value());\n  ASSERT_TRUE(ailego::FileHelper::IsExist(path.c_str()));\n\n  col = std::move(result.value());\n  col.reset();\n  col = nullptr;\n\n  ASSERT_TRUE(ailego::FileHelper::IsExist(path.c_str()));\n\n  // reopen\n  result = Collection::Open(path, options);\n  ASSERT_TRUE(result.has_value());\n  col = std::move(result.value());\n  col.reset();\n\n  // reopen with read-only\n  options.read_only_ = true;\n  result = Collection::Open(path, options);\n  if (!result.has_value()) {\n    std::cout << result.error().message() << std::endl;\n  }\n  ASSERT_TRUE(result.has_value());\n  col = result.value();\n\n  ASSERT_EQ(col->Path(), path);\n  ASSERT_EQ(col->Schema(), *schema);\n  ASSERT_EQ(col->Options(), options);\n  stats = col->Stats().value();\n  ASSERT_TRUE(stats.doc_count == 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp16\"], 1);\n  // ASSERT_EQ(stats.index_completeness[\"dense_fp64\"], 1);\n  ASSERT_EQ(stats.index_completeness[\"sparse_fp32\"], 1);\n  ASSERT_EQ(stats.index_completeness[\"sparse_fp16\"], 1);\n\n  // when open with read-only, write operation should fail\n  ASSERT_FALSE(col->Flush().ok());\n  ASSERT_FALSE(col->Destroy().ok());\n  ASSERT_FALSE(col->Insert(empty_docs).has_value());\n  ASSERT_FALSE(col->Update(empty_docs).has_value());\n  ASSERT_FALSE(col->Delete({}).has_value());\n  ASSERT_FALSE(col->DeleteByFilter(\"\").ok());\n  ASSERT_FALSE(col->CreateIndex(\"\", nullptr).ok());\n  ASSERT_FALSE(col->DropIndex(\"\").ok());\n  ASSERT_FALSE(col->AddColumn(nullptr, \"\").ok());\n  ASSERT_FALSE(col->AlterColumn(\"\", \"\", nullptr).ok());\n  ASSERT_FALSE(col->DropColumn(\"\").ok());\n  ASSERT_FALSE(col->CreateIndex(\"\", nullptr).ok());\n  ASSERT_FALSE(col->Optimize().ok());\n\n  // two threads open with read_only\n  result = Collection::Open(path, options);\n  if (!result.has_value()) {\n    std::cout << result.error().message() << std::endl;\n  }\n  ASSERT_TRUE(result.has_value());\n  col = result.value();\n\n  auto result1 = Collection::Open(path, options);\n  if (!result1.has_value()) {\n    std::cout << result1.error().message() << std::endl;\n  }\n  ASSERT_TRUE(result1.has_value());\n  auto col1 = result1.value();\n}\n\nTEST_F(CollectionTest, Feature_CreateAndOpen_Empty) {\n  int doc_count = 0;\n  int loop_count = 100;\n\n  // create with normal schema\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n\n  // Initial creation and insertion of 1000 docs\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  ASSERT_NE(collection, nullptr);\n\n  // Close and reopen, then insert 1 doc - repeat 100 times\n  for (int i = 0; i < loop_count; i++) {\n    // Close collection\n    collection.reset();\n\n    // Reopen collection\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value())\n        << \"Failed to reopen collection at iteration \" << i;\n    collection = std::move(result.value());\n\n    // Verify total doc count\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, 0);\n  }\n}\n\nTEST_F(CollectionTest, Feature_CreateAndOpen_PathValidate) {\n  CollectionOptions options;\n  options.read_only_ = false;\n  options.enable_mmap_ = true;\n  auto schema = TestHelper::CreateNormalSchema();\n\n  {\n    std::vector<std::string> valid_paths = {\"abc\",\n                                            \"data123\",\n                                            \"my_collection\",\n                                            \"v1.2_alpha-beta\",\n                                            \".hidden\",\n                                            \"file.txt\",\n                                            \"/tmp/absolute/path\",\n                                            \"/tmp/a/b/c\",\n                                            \"_\",\n                                            \"-\",\n                                            \"./tmp\"};\n    for (auto path : valid_paths) {\n      ailego::FileHelper::RemoveDirectory(path.c_str());\n\n      auto result = Collection::CreateAndOpen(path, *schema, options);\n      if (!result.has_value()) {\n        std::cout << result.error().message() << std::endl;\n      }\n      ASSERT_TRUE(result.has_value());\n    }\n  }\n\n  {\n    std::vector<std::string> inalid_paths = {\n        \" \",         \"\",\n        \"file name\",  // space\n        \"file$name\",  // $\n        \"a&b\",        // &\n        \"a|b\",        // |\n        \"a<b\",        // <\n        \"a>b\",        // >\n        \"a\\\"b\",       // \"\n        \"a'b\",        // '\n        \"a;b\",        // ;\n        \"a?b\",        // ?\n        \"a*b\",        // *\n        \"a[b]\",       // []\n        \"a{b}\",       // {}\n        \"a\\\\b\",       //\n        \"a~b\",        // ~\n        \"a#b\",        // #\n        \"a\\tb\",       // tab\n        \"a\\nb\",       // newline\n        \"a\\rb\",       // carriage return\n    };\n    for (auto path : inalid_paths) {\n      ailego::FileHelper::RemoveDirectory(path.c_str());\n\n      auto result = Collection::CreateAndOpen(path, *schema, options);\n      if (!result.has_value()) {\n        std::cout << result.error().message() << std::endl;\n      }\n      ASSERT_FALSE(result.has_value());\n    }\n  }\n}\n\nTEST_F(CollectionTest, Feature_CreateAndOpen_Repeated) {\n  int doc_count = 1000;\n  int loop_count = 100;\n\n  // create with normal schema\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n\n  // Initial creation and insertion of 1000 docs\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  ASSERT_NE(collection, nullptr);\n\n  // Close and reopen, then insert 1 doc - repeat 100 times\n  for (int i = 0; i < loop_count; i++) {\n    // Close collection\n    collection.reset();\n\n    // Reopen collection\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value())\n        << \"Failed to reopen collection at iteration \" << i;\n    collection = std::move(result.value());\n\n    // Insert 1 additional doc\n    auto s = TestHelper::CollectionInsertDoc(collection, doc_count + i,\n                                             doc_count + i + 1, false);\n    ASSERT_TRUE(s.ok()) << \"Failed to insert doc at iteration \" << i;\n\n    // Verify total doc count\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count + i + 1)\n        << \"Document count mismatch at iteration \" << i;\n  }\n\n  // Final verification - check all docs are present\n  for (int i = 0; i < doc_count + loop_count; i++) {\n    auto expect_doc = TestHelper::CreateDoc(i, *schema);\n    auto result = collection->Fetch({expect_doc.pk()});\n    ASSERT_TRUE(result.has_value()) << \"Failed to fetch doc \" << i;\n    ASSERT_EQ(result.value().size(), 1);\n    ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n    auto doc = result.value()[expect_doc.pk()];\n    if (doc == nullptr) {\n      std::cout << \"fetch failed, doc_id: \" << i << std::endl;\n    }\n    ASSERT_NE(doc, nullptr);\n    if (*doc != expect_doc) {\n      std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n      std::cout << \"expect_doc:\" << expect_doc.to_detail_string() << std::endl;\n    }\n    ASSERT_EQ(*doc, expect_doc);\n  }\n\n  // Clean up\n  ASSERT_TRUE(collection->Destroy().ok());\n}\n\nTEST_F(CollectionTest, Feature_CreateAndOpen_MultiThread) {\n  int doc_count = 0;\n\n  // create with normal schema\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n\n  // Initial creation and insertion of 1000 docs\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n  ASSERT_NE(collection, nullptr);\n  collection.reset();\n\n  options.read_only_ = true;\n  std::atomic<bool> has_error{false};\n  auto open_readonly = [&]() {\n    auto coll = Collection::Open(col_path, options);\n    if (!coll.has_value()) {\n      LOG_ERROR(\"Failed to reopen collection: %s\", coll.error().c_str());\n      has_error.store(true);\n    }\n    std::this_thread::sleep_for(std::chrono::milliseconds(100));\n  };\n  std::vector<std::thread> threads;\n  for (int i = 0; i < 10; i++) {\n    threads.emplace_back(open_readonly);\n  }\n  for (auto &t : threads) {\n    t.join();\n  }\n  ASSERT_FALSE(has_error.load());\n}\n\nTEST_F(CollectionTest, Feature_Write_Batch_Validate) {\n  FileHelper::RemoveDirectory(col_path);\n\n  // create with normal schema\n  auto schema = TestHelper::CreateNormalSchema(false);\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,\n                                                        options, 0, 0, false);\n\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n\n  // insert batch docs\n  auto insert_normal_status =\n      TestHelper::CollectionInsertDoc(collection, 0, 1024, false, false, true);\n  ASSERT_TRUE(insert_normal_status.ok());\n\n  auto insert_exceed_status =\n      TestHelper::CollectionInsertDoc(collection, 0, 1025, false, false, true);\n  ASSERT_FALSE(insert_exceed_status.ok());\n\n  // upsert batch docs\n  auto upsert_normal_status =\n      TestHelper::CollectionUpsertDoc(collection, 0, 1024, false, true);\n  ASSERT_TRUE(upsert_normal_status.ok());\n\n  auto upsert_exceed_status =\n      TestHelper::CollectionUpsertDoc(collection, 0, 1025, false, true);\n  ASSERT_FALSE(upsert_exceed_status.ok());\n}\n\nTEST_F(CollectionTest, Feature_Insert_General) {\n  auto func = [&](bool schema_nullable, bool doc_nullable,\n                  int doc_count = 1000) {\n    FileHelper::RemoveDirectory(col_path);\n\n    // create with normal schema\n    auto schema = TestHelper::CreateNormalSchema(schema_nullable);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, doc_nullable);\n\n\n    if (!schema_nullable && doc_nullable) {\n      ASSERT_EQ(collection, nullptr);\n      return;\n    } else {\n      ASSERT_NE(collection, nullptr);\n    }\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp16\"], 1);\n    // ASSERT_EQ(stats.index_completeness[\"dense_fp64\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp32\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp16\"], 1);\n\n    // validate fetch result\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    ASSERT_NE(collection, nullptr);\n\n    collection.reset();\n    // Reopen collection\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    // insert another 1000 docs\n    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,\n                                             doc_count * 2, doc_nullable);\n    ASSERT_TRUE(s.ok());\n\n    // validate fetch result\n    for (int i = 0; i < doc_count * 2; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count * 2);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    ASSERT_EQ(stats.index_completeness[\"dense_fp16\"], 1);\n    // ASSERT_EQ(stats.index_completeness[\"dense_fp64\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp32\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp16\"], 1);\n  };\n\n  func(false, false);\n  func(true, true);\n  func(true, false);\n  func(false, true);\n\n  func(false, false, 0);\n  func(false, false, 1);\n  func(false, false, 2);\n}\n\nTEST_F(CollectionTest, Feature_Insert_ScalarIndex) {\n  auto func = [&](bool nullable, bool enable_optimize, bool doc_nullable) {\n    std::cout << \"**** TEST INFO: nullable: \" << nullable\n              << \", enable_optimize: \" << enable_optimize\n              << \", doc_nullable: \" << doc_nullable << std::endl;\n\n    int doc_count = 1000;\n    // create with normal schema\n    auto schema =\n        TestHelper::CreateSchemaWithScalarIndex(nullable, enable_optimize);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, doc_nullable);\n\n    if (!nullable && doc_nullable) {\n      ASSERT_EQ(collection, nullptr);\n      return;\n    } else {\n      ASSERT_NE(collection, nullptr);\n    }\n\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    ASSERT_NE(collection, nullptr);\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    // validate fetch result\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    // insert another 1000 docs\n    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,\n                                             doc_count * 2, doc_nullable);\n    ASSERT_TRUE(s.ok());\n    ASSERT_TRUE(collection->Flush().ok());\n\n    // validate fetch result\n    for (int i = 0; i < doc_count * 2; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count * 2);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n  };\n\n  func(false, false, false);\n  func(false, true, false);\n  func(false, false, true);\n  func(true, false, true);\n  func(true, false, false);\n}\n\nTEST_F(CollectionTest, Feature_Insert_VectorIndex) {\n  auto func = [&](MetricType metric_type = MetricType::IP,\n                  QuantizeType quantize_type = QuantizeType::UNDEFINED) {\n    int doc_count = 1000;\n    // create with normal schema\n    auto schema = TestHelper::CreateSchemaWithVectorIndex(\n        false, \"demo\",\n        std::make_shared<HnswIndexParams>(metric_type, 16, 20, quantize_type));\n    std::cout << \"init schema: \" << schema->to_string_formatted() << std::endl;\n\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    // validate fetch result\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (metric_type != MetricType::COSINE) {\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    }\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    ASSERT_NE(collection, nullptr);\n\n    collection.reset();\n    // Reopen collection\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n    // validate fetch result\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (metric_type != MetricType::COSINE) {\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    }\n\n    // insert another 1000 docs\n    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,\n                                             doc_count * 2, false);\n    ASSERT_TRUE(s.ok());\n    ASSERT_TRUE(collection->Flush().ok());\n\n    // validate fetch result\n    for (int i = 0; i < doc_count * 2; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (metric_type != MetricType::COSINE) {\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    }\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count * 2);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n  };\n\n  func(MetricType::COSINE);\n  func(MetricType::L2);\n  func(MetricType::IP);\n  func(MetricType::COSINE, QuantizeType::FP16);\n  func(MetricType::IP, QuantizeType::FP16);\n}\n\nTEST_F(CollectionTest, Feature_Insert_SwitchSegment) {\n  auto func = [&](uint64_t segment_doc_count, uint64_t doc_count) {\n    std::cout << \"**** TEST INFO: segment_doc_count: \" << segment_doc_count\n              << \", insert_doc_count: \" << doc_count << std::endl;\n\n    FileHelper::RemoveDirectory(col_path);\n\n    // create with normal schema\n    auto schema = TestHelper::CreateSchemaWithMaxDocCount(segment_doc_count);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    ASSERT_NE(collection, nullptr);\n\n    collection.reset();\n    // Reopen collection\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    auto check_doc = [&](int total_doc_count) {\n      // validate fetch result\n      for (int i = 0; i < total_doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc(doc_count);\n    std::cout << \"check success 1\" << std::endl;\n\n    // insert another 1000 docs\n    auto s =\n        TestHelper::CollectionInsertDoc(collection, doc_count, doc_count * 2);\n    ASSERT_TRUE(s.ok());\n    ASSERT_TRUE(collection->Flush().ok());\n\n    // validate fetch result\n    check_doc(doc_count * 2);\n    std::cout << \"check success 2\" << std::endl;\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count * 2);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    collection.reset();\n    // Reopen collection\n    result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    check_doc(doc_count * 2);\n    std::cout << \"check success 3\" << std::endl;\n  };\n\n  func(1000, 499);\n  func(1000, 500);\n  func(1000, 501);\n  func(1000, 999);\n  func(1000, 1000);\n  func(1000, 1001);\n}\n\nTEST_F(CollectionTest, Feature_Insert_Duplicate) {\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n  FileHelper::RemoveDirectory(col_path);\n\n  // insert first\n  auto collection =\n      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 100);\n\n  // update all docs then\n  Result<WriteResults> s;\n  for (int i = 0; i < 100; i++) {\n    Doc new_doc = TestHelper::CreateDoc(i, *schema);\n    std::vector<Doc> docs = {new_doc};\n    s = collection->Insert(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_TRUE(s.has_value());\n    if (!s.value()[0].ok()) {\n      std::cout << \"0: \" << s.value()[0].message() << std::endl;\n    }\n    ASSERT_FALSE(s.value()[0].ok());\n    ASSERT_EQ(s.value()[0].code(), StatusCode::ALREADY_EXISTS);\n  }\n\n  Doc new_doc = TestHelper::CreateDoc(101, *schema);\n  std::vector<Doc> docs = {new_doc};\n  s = collection->Insert(docs);\n  ASSERT_TRUE(s.has_value());\n  ASSERT_TRUE(s.value()[0].ok());\n}\n\nTEST_F(CollectionTest, Feature_Upsert_General) {\n  auto func = [&](bool schema_nullable, bool doc_nullable,\n                  int doc_count = 1000) {\n    FileHelper::RemoveDirectory(col_path);\n\n    // create with normal schema\n    auto schema = TestHelper::CreateNormalSchema(schema_nullable);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, doc_nullable, true);\n\n\n    if (!schema_nullable && doc_nullable) {\n      ASSERT_EQ(collection, nullptr);\n      return;\n    } else {\n      ASSERT_NE(collection, nullptr);\n    }\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp16\"], 1);\n    // ASSERT_EQ(stats.index_completeness[\"dense_fp64\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp32\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp16\"], 1);\n\n    // validate fetch result\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    ASSERT_NE(collection, nullptr);\n\n    collection.reset();\n    // Reopen collection\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    // insert another 1000 docs\n    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,\n                                             doc_count * 2, doc_nullable);\n    ASSERT_TRUE(s.ok());\n\n    // validate fetch result\n    for (int i = 0; i < doc_count * 2; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count * 2);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    ASSERT_EQ(stats.index_completeness[\"dense_fp16\"], 1);\n    // ASSERT_EQ(stats.index_completeness[\"dense_fp64\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp32\"], 1);\n    ASSERT_EQ(stats.index_completeness[\"sparse_fp16\"], 1);\n  };\n\n  func(false, false);\n  func(true, true);\n  func(true, false);\n  func(false, true);\n\n  func(false, false, 0);\n  func(false, false, 1);\n  func(false, false, 2);\n}\n\nTEST_F(CollectionTest, Feature_Upsert_Incremental) {\n  auto func = [&](bool schema_nullable, bool doc_nullable,\n                  int doc_count = 1000) {\n    FileHelper::RemoveDirectory(col_path);\n\n    // create with normal schema\n    auto schema = TestHelper::CreateNormalSchema(schema_nullable);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, doc_nullable, true);\n\n    if (!schema_nullable && doc_nullable) {\n      ASSERT_EQ(collection, nullptr);\n      return;\n    } else {\n      ASSERT_NE(collection, nullptr);\n    }\n\n    // validate fetch result\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    ASSERT_NE(collection, nullptr);\n\n    collection.reset();\n    // Reopen collection\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    // upsert 1000 docs\n    auto s = TestHelper::CollectionInsertDoc(collection, 0, doc_count,\n                                             doc_nullable, true);\n    ASSERT_TRUE(s.ok());\n\n    // validate fetch result\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                     : TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  func(false, false);\n  func(true, true);\n  func(true, false);\n  func(false, true);\n\n  func(false, false, 0);\n  func(false, false, 1);\n  func(false, false, 2);\n}\n\nTEST_F(CollectionTest, Feature_Upsert_Nullable) {\n  auto check_doc = [&](const Collection::Ptr &collection, const std::string &pk,\n                       const Doc &expected_doc) {\n    auto result = collection->Fetch({pk});\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), 1);\n    ASSERT_EQ(result.value().count(pk), 1);\n    auto doc = result.value()[pk];\n    ASSERT_NE(doc, nullptr);\n    if (*doc != expected_doc) {\n      std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n      std::cout << \"expect_doc:\" << expected_doc.to_detail_string()\n                << std::endl;\n    }\n    ASSERT_EQ(*doc, expected_doc);\n  };\n\n  // schema not nulltable\n  {\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    auto collection =\n        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);\n\n    // insert one doc\n    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    std::vector<Doc> docs = {insert_doc};\n    auto s = collection->Insert(docs);\n    ASSERT_TRUE(s.has_value());\n\n    // update doc\n    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    update_doc.remove(\"int32\");\n    docs = {update_doc};\n    s = collection->Upsert(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_FALSE(s.has_value());\n\n\n    update_doc.set_null(\"int32\");\n    docs = {update_doc};\n    s = collection->Upsert(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_FALSE(s.has_value());\n\n    // check doc\n    check_doc(collection, insert_doc.pk(), insert_doc);\n  }\n\n  // schema nulltable\n  {\n    auto schema = TestHelper::CreateNormalSchema(true);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    auto collection =\n        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);\n\n    // insert one doc\n    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    std::vector<Doc> docs = {insert_doc};\n    auto s = collection->Insert(docs);\n    ASSERT_TRUE(s.has_value());\n\n    // update doc\n    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    update_doc.remove(\"int32\");\n    docs = {update_doc};\n    s = collection->Upsert(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_TRUE(s.has_value());\n    if (!s.value()[0].ok()) {\n      std::cout << s.value()[0].message() << std::endl;\n    }\n    ASSERT_TRUE(s.value()[0].ok());\n\n    // check doc\n    check_doc(collection, insert_doc.pk(), update_doc);\n\n    update_doc.set_null(\"int32\");\n    docs = {update_doc};\n    s = collection->Update(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_TRUE(s.has_value());\n\n    // check doc\n    auto pk = insert_doc.pk();\n    auto result = collection->Fetch({pk});\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), 1);\n    ASSERT_EQ(result.value().count(pk), 1);\n    auto doc = result.value()[pk];\n    ASSERT_NE(doc, nullptr);\n    auto get_result = doc->get_field<int32_t>(\"int32\");\n    ASSERT_EQ(get_result.status(), Doc::FieldGetStatus::NOT_FOUND);\n  }\n}\n\n\nTEST_F(CollectionTest, Feature_Update_General) {\n  auto func = [&](int doc_count) {\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    // insert first\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    auto check_doc = [&](int updated_doc_count) {\n      for (int i = 0; i < updated_doc_count; i++) {\n        auto expect_doc =\n            TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n\n      // validate fetch result\n      for (int i = updated_doc_count; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    // update all docs then\n    Result<WriteResults> s;\n    for (int i = 0; i < doc_count; i++) {\n      Doc new_doc =\n          TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));\n      std::vector<Doc> docs = {new_doc};\n      s = collection->Update(docs);\n      if (!s.has_value()) {\n        std::cout << s.error().message() << std::endl;\n      }\n      ASSERT_TRUE(s.has_value());\n      if (!s.value()[0].ok()) {\n        std::cout << s.value()[0].message() << std::endl;\n      }\n      ASSERT_TRUE(s.value()[0].ok());\n\n      if (i % 100 == 0 || i == 1) {\n        check_doc(i + 1);\n        collection.reset();\n        auto result = Collection::Open(col_path, options);\n        if (!result.has_value()) {\n          std::cout << result.error().message() << std::endl;\n        }\n        collection = std::move(result.value());\n\n        check_doc(i + 1);\n      }\n    }\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    if (!result.has_value()) {\n      std::cout << result.error().message() << std::endl;\n    }\n    collection = std::move(result.value());\n\n    check_doc(doc_count);\n  };\n\n  func(99);\n  func(100);\n  func(101);\n  func(1000);\n}\n\nTEST_F(CollectionTest, Feature_Update_Incremental) {\n  auto func = [&](int doc_count, bool doc_nullable) {\n    auto schema = TestHelper::CreateNormalSchema(doc_nullable);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    // insert first\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, doc_nullable);\n\n    auto rewrite_doc = [&](Doc &doc) {\n      // update int32\n      int32_t new_int32 = 9999;\n      doc.set(\"int32\", new_int32);\n\n      // update float\n      float new_float = 9999.0;\n      doc.set(\"float\", new_float);\n\n      // update string\n      std::string new_string = \"string_value\";\n      doc.set(\"string\", new_string);\n    };\n\n    auto check_doc = [&](int updated_doc_count) {\n      for (int i = 0; i < updated_doc_count; i++) {\n        auto expect_doc =\n            TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));\n        rewrite_doc(expect_doc);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n\n      // validate fetch result\n      for (int i = updated_doc_count; i < doc_count; i++) {\n        auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)\n                                       : TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    // update all docs then\n    Result<WriteResults> s;\n    for (int i = 0; i < doc_count; i++) {\n      Doc new_doc =\n          TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));\n      rewrite_doc(new_doc);\n      std::vector<Doc> docs = {new_doc};\n      s = collection->Update(docs);\n      if (!s.has_value()) {\n        std::cout << s.error().message() << std::endl;\n      }\n      ASSERT_TRUE(s.has_value());\n      if (!s.value()[0].ok()) {\n        std::cout << s.value()[0].message() << std::endl;\n      }\n      ASSERT_TRUE(s.value()[0].ok());\n\n      if (i % 100 == 0 || i == 1) {\n        check_doc(i + 1);\n        collection.reset();\n        auto result = Collection::Open(col_path, options);\n        if (!result.has_value()) {\n          std::cout << result.error().message() << std::endl;\n        }\n        collection = std::move(result.value());\n\n        check_doc(i + 1);\n      }\n    }\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    if (!result.has_value()) {\n      std::cout << result.error().message() << std::endl;\n    }\n    collection = std::move(result.value());\n\n    check_doc(doc_count);\n  };\n\n  func(99, false);\n  func(99, true);\n  func(100, false);\n  func(100, true);\n  func(101, false);\n  func(101, true);\n  func(1000, false);\n  func(1000, true);\n}\n\nTEST_F(CollectionTest, Feature_Update_Nullable) {\n  auto check_doc = [&](const Collection::Ptr &collection, const std::string &pk,\n                       const Doc &expected_doc) {\n    auto result = collection->Fetch({pk});\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), 1);\n    ASSERT_EQ(result.value().count(pk), 1);\n    auto doc = result.value()[pk];\n    ASSERT_NE(doc, nullptr);\n    if (*doc != expected_doc) {\n      std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n      std::cout << \"expect_doc:\" << expected_doc.to_detail_string()\n                << std::endl;\n    }\n    ASSERT_EQ(*doc, expected_doc);\n  };\n\n  // schema not nulltable\n  {\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    auto collection =\n        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);\n\n    // insert one doc\n    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    std::vector<Doc> docs = {insert_doc};\n    auto s = collection->Insert(docs);\n    ASSERT_TRUE(s.has_value());\n\n    // update doc\n    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    update_doc.remove(\"int32\");\n    docs = {update_doc};\n    s = collection->Update(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_TRUE(s.has_value());\n    if (!s.value()[0].ok()) {\n      std::cout << s.value()[0].message() << std::endl;\n    }\n    ASSERT_TRUE(s.value()[0].ok());\n\n    update_doc.set_null(\"int32\");\n    docs = {update_doc};\n    s = collection->Update(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_FALSE(s.has_value());\n\n    // check doc\n    check_doc(collection, insert_doc.pk(), insert_doc);\n  }\n\n  // schema nulltable\n  {\n    auto schema = TestHelper::CreateNormalSchema(true);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    auto collection =\n        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);\n\n    // insert one doc\n    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    std::vector<Doc> docs = {insert_doc};\n    auto s = collection->Insert(docs);\n    ASSERT_TRUE(s.has_value());\n\n    // update doc\n    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));\n    update_doc.remove(\"int32\");\n    docs = {update_doc};\n    s = collection->Update(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_TRUE(s.has_value());\n    if (!s.value()[0].ok()) {\n      std::cout << s.value()[0].message() << std::endl;\n    }\n    ASSERT_TRUE(s.value()[0].ok());\n\n    // check doc\n    check_doc(collection, insert_doc.pk(), insert_doc);\n\n    update_doc.set_null(\"int32\");\n    docs = {update_doc};\n    s = collection->Update(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_TRUE(s.has_value());\n\n    // check doc\n    auto pk = insert_doc.pk();\n    auto result = collection->Fetch({pk});\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), 1);\n    ASSERT_EQ(result.value().count(pk), 1);\n    auto doc = result.value()[pk];\n    ASSERT_NE(doc, nullptr);\n    auto get_result = doc->get_field<int32_t>(\"int32\");\n    ASSERT_EQ(get_result.status(), Doc::FieldGetStatus::NOT_FOUND);\n  }\n}\n\nTEST_F(CollectionTest, Feature_Update_Empty) {\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n  FileHelper::RemoveDirectory(col_path);\n\n  // insert first\n  auto collection =\n      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);\n\n  // update all docs then\n  Result<WriteResults> s;\n  for (int i = 0; i < 100; i++) {\n    Doc new_doc = TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));\n    std::vector<Doc> docs = {new_doc};\n    s = collection->Update(docs);\n    if (!s.has_value()) {\n      std::cout << s.error().message() << std::endl;\n    }\n    ASSERT_TRUE(s.has_value());\n    if (!s.value()[0].ok()) {\n      std::cout << \"0: \" << s.value()[0].message() << std::endl;\n    }\n    ASSERT_FALSE(s.value()[0].ok());\n    ASSERT_EQ(s.value()[0].code(), StatusCode::NOT_FOUND);\n  }\n}\n\nTEST_F(CollectionTest, Feature_Delete_General) {\n  auto func = [&](int doc_count) {\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    // insert first\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    auto check_doc = [&](int updated_doc_count) {\n      for (int i = 0; i < updated_doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_EQ(doc, nullptr);\n      }\n\n      // validate fetch result\n      for (int i = updated_doc_count; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    Result<WriteResults> s;\n    for (int i = 0; i < doc_count; i++) {\n      s = collection->Delete({TestHelper::MakePK(i)});\n      if (!s.has_value()) {\n        std::cout << s.error().message() << std::endl;\n      }\n      ASSERT_TRUE(s.has_value());\n      if (!s.value()[0].ok()) {\n        std::cout << s.value()[0].message() << std::endl;\n      }\n      ASSERT_TRUE(s.value()[0].ok());\n\n      if (i % 100 == 0 || i == 0) {\n        check_doc(i + 1);\n        collection.reset();\n        auto result = Collection::Open(col_path, options);\n        if (!result.has_value()) {\n          std::cout << result.error().message() << std::endl;\n        }\n        collection = std::move(result.value());\n\n        check_doc(i + 1);\n\n        auto stats = collection->Stats().value();\n        ASSERT_EQ(stats.doc_count, doc_count - i - 1);\n      }\n    }\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    if (!result.has_value()) {\n      std::cout << result.error().message() << std::endl;\n    }\n    collection = std::move(result.value());\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, 0);\n\n    check_doc(doc_count);\n  };\n\n  func(99);\n  func(100);\n  func(101);\n  func(1000);\n}\n\nTEST_F(CollectionTest, Feature_Delete_Repeated) {\n  auto func = [&](int doc_count) {\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    // insert first\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    auto check_doc = [&](bool deleted) {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        if (deleted) {\n          ASSERT_EQ(doc, nullptr);\n        } else {\n          ASSERT_EQ(*doc, expect_doc);\n        }\n      }\n    };\n\n    for (int i = 0; i < 10; i++) {\n      // delete first\n      Result<WriteResults> s;\n      for (int i = 0; i < doc_count; i++) {\n        s = collection->Delete({TestHelper::MakePK(i)});\n        if (!s.has_value()) {\n          std::cout << s.error().message() << std::endl;\n        }\n        ASSERT_TRUE(s.has_value());\n        if (!s.value()[0].ok()) {\n          std::cout << s.value()[0].message() << std::endl;\n        }\n        ASSERT_TRUE(s.value()[0].ok());\n      }\n\n      check_doc(true);\n\n      // insert then\n      auto st = TestHelper::CollectionInsertDoc(collection, 0, doc_count);\n      if (!st.ok()) {\n        std::cout << st.message() << std::endl;\n      }\n      ASSERT_TRUE(st.ok());\n    }\n  };\n\n  func(1);\n  func(100);\n}\n\nTEST_F(CollectionTest, Feature_DeleteByFilter_General) {\n  auto func = [&](int doc_count) {\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    // insert first\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto check_doc = [&](int updated_doc_count) {\n      for (int i = 0; i < updated_doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        if (doc != nullptr) {\n          std::cout << \"doc: \" << doc->to_detail_string() << std::endl;\n        }\n        ASSERT_EQ(doc, nullptr);\n      }\n\n      // validate fetch result\n      for (int i = updated_doc_count; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    Status s;\n    for (int i = 0; i < doc_count; i++) {\n      s = collection->DeleteByFilter(\"int32 = \" + std::to_string(i));\n      if (!s.ok()) {\n        std::cout << s.message() << std::endl;\n      }\n      ASSERT_TRUE(s.ok());\n\n      if (i % 100 == 0 || i == 0) {\n        std::cout << \"check begin: \" << i << std::endl;\n\n        check_doc(i + 1);\n        collection.reset();\n        auto result = Collection::Open(col_path, options);\n        if (!result.has_value()) {\n          std::cout << result.error().message() << std::endl;\n        }\n        collection = std::move(result.value());\n\n        check_doc(i + 1);\n\n        auto stats = collection->Stats().value();\n        ASSERT_EQ(stats.doc_count, doc_count - i - 1);\n      }\n    }\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    if (!result.has_value()) {\n      std::cout << result.error().message() << std::endl;\n    }\n    collection = std::move(result.value());\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, 0);\n\n    check_doc(doc_count);\n  };\n\n  func(99);\n  func(100);\n  func(101);\n  func(1000);\n}\n\nTEST_F(CollectionTest, Feature_DeleteByFilter_ScalarIndex) {\n  auto func = [&](int doc_count) {\n    auto schema = TestHelper::CreateNormalSchema(\n        false, \"demo\", std::make_shared<InvertIndexParams>(false));\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    FileHelper::RemoveDirectory(col_path);\n\n    // insert first\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto check_doc = [&](int updated_doc_count) {\n      for (int i = 0; i < updated_doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        if (doc != nullptr) {\n          std::cout << \"doc: \" << doc->to_detail_string() << std::endl;\n        }\n        ASSERT_EQ(doc, nullptr);\n      }\n\n      // validate fetch result\n      for (int i = updated_doc_count; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    Status s;\n    for (int i = 0; i < doc_count; i++) {\n      s = collection->DeleteByFilter(\"int32 = \" + std::to_string(i));\n      if (!s.ok()) {\n        std::cout << s.message() << std::endl;\n      }\n      ASSERT_TRUE(s.ok());\n\n      if (i % 100 == 0 || i == 0) {\n        std::cout << \"check begin: \" << i << std::endl;\n\n        check_doc(i + 1);\n        collection.reset();\n        auto result = Collection::Open(col_path, options);\n        if (!result.has_value()) {\n          std::cout << result.error().message() << std::endl;\n        }\n        collection = std::move(result.value());\n\n        check_doc(i + 1);\n\n        auto stats = collection->Stats().value();\n        ASSERT_EQ(stats.doc_count, doc_count - i - 1);\n      }\n    }\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    if (!result.has_value()) {\n      std::cout << result.error().message() << std::endl;\n    }\n    collection = std::move(result.value());\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, 0);\n\n    check_doc(doc_count);\n  };\n\n  func(1);\n  func(100);\n  func(101);\n  func(1000);\n}\n\nTEST_F(CollectionTest, Feature_MixedWrite_General) {\n  // case1: insert -> upsert -> update -> delete\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n  FileHelper::RemoveDirectory(col_path);\n\n  // insert first\n  auto collection =\n      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);\n\n  for (int i = 0; i < 100; i++) {\n    // std::cout << \"insert: \" << i << std::endl;\n\n    // insert\n    auto new_doc = TestHelper::CreateDoc(i, *schema);\n    std::vector<Doc> new_docs = {new_doc};\n    auto res = collection->Insert(new_docs);\n    ASSERT_TRUE(res.has_value());\n    ASSERT_TRUE(res.value()[0].ok());\n\n    // fetch\n    auto docs = collection->Fetch({TestHelper::MakePK(i)});\n    ASSERT_TRUE(docs.has_value());\n    ASSERT_EQ(docs.value().size(), 1);\n    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);\n    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, i + 1);\n\n    // upsert\n    new_doc = TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));\n    new_docs = {new_doc};\n    res = collection->Upsert(new_docs);\n    ASSERT_TRUE(res.has_value());\n    ASSERT_TRUE(res.value()[0].ok());\n\n    // fetch\n    docs = collection->Fetch({TestHelper::MakePK(i)}).value();\n    ASSERT_TRUE(docs.has_value());\n    ASSERT_EQ(docs.value().size(), 1);\n    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);\n    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, i + 1);\n\n    // update\n    new_doc = TestHelper::CreateDoc(i + 2, *schema, TestHelper::MakePK(i));\n    new_docs = {new_doc};\n    res = collection->Update(new_docs);\n    ASSERT_TRUE(res.has_value());\n    ASSERT_TRUE(res.value()[0].ok());\n\n    // fetch\n    docs = collection->Fetch({TestHelper::MakePK(i)}).value();\n    ASSERT_TRUE(docs.has_value());\n    ASSERT_EQ(docs.value().size(), 1);\n    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);\n    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, i + 1);\n\n    // delete\n    res = collection->Delete({TestHelper::MakePK(i)});\n    ASSERT_TRUE(res.has_value());\n    ASSERT_TRUE(res.value()[0].ok());\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, i);\n\n    // insert again\n    new_doc = TestHelper::CreateDoc(i, *schema);\n    new_docs = {new_doc};\n    res = collection->Insert(new_docs);\n    ASSERT_TRUE(res.has_value());\n    ASSERT_TRUE(res.value()[0].ok());\n\n    // fetch\n    docs = collection->Fetch({TestHelper::MakePK(i)});\n    ASSERT_TRUE(docs.has_value());\n    ASSERT_EQ(docs.value().size(), 1);\n    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);\n    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, i + 1);\n  }\n}\n\nTEST_F(CollectionTest, Feature_CreateIndex_General) {\n  // create empty collection\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,\n                                                        options, 0, 0, false);\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, 0);\n\n  auto index_params = std::make_shared<HnswIndexParams>(MetricType::IP);\n  auto s = collection->CreateIndex(\"dense_fp32\", index_params);\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n  auto new_index_params = std::make_shared<HnswIndexParams>(MetricType::COSINE);\n  s = collection->CreateIndex(\"dense_fp32\", index_params);\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n\n  s = collection->CreateIndex(\"dense_fp32_invalid\", index_params);\n  ASSERT_FALSE(s.ok());\n}\n\nTEST_F(CollectionTest, Feature_CreateIndex_Vector) {\n  auto func = [&](std::string field_name,\n                  MetricType metric_type = MetricType::IP,\n                  QuantizeType quantize_type = QuantizeType::UNDEFINED) {\n    std::cout << \"**** Test field: \" << field_name\n              << \", metric: \" << MetricTypeCodeBook::AsString(metric_type)\n              << \", quantize: \" << QuantizeTypeCodeBook::AsString(quantize_type)\n              << std::endl;\n\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 10;\n\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[field_name], 1);\n\n    auto index_params =\n        std::make_shared<HnswIndexParams>(metric_type, 16, 200, quantize_type);\n    auto s = collection->CreateIndex(field_name, index_params);\n    std::cout << \"status: \" << s.message()\n              << \", code: \" << GetDefaultMessage(s.code()) << std::endl;\n    ASSERT_TRUE(s.ok());\n\n    VectorQuery query;\n    query.topk_ = doc_count;\n    query.field_name_ = field_name;\n    query.include_vector_ = true;\n    auto field_scheama = schema->get_vector_field(field_name);\n    ASSERT_NE(field_scheama, nullptr);\n    ASSERT_TRUE(field_scheama->is_vector_field());\n\n    bool is_dense = field_scheama->is_dense_vector();\n\n    std::vector<float> vector;\n    std::vector<ailego::Float16> vector_fp16;\n    std::vector<int8_t> vector_int8;\n    std::pair<std::vector<uint32_t>, std::vector<float>> sparse_vector;\n    std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>\n        sparse_vector_fp16;\n    if (is_dense) {\n      // std::cout << \"vector: \" << vector.size() << std::endl;\n      if (field_scheama->data_type() == DataType::VECTOR_FP16) {\n        vector_fp16 = std::vector<ailego::Float16>(field_scheama->dimension(),\n                                                   ailego::Float16(1.0f));\n        vector_fp16[0] = 0;\n        query.query_vector_.assign(\n            (char *)vector_fp16.data(),\n            vector_fp16.size() * sizeof(ailego::Float16));\n      } else if (field_scheama->data_type() == DataType::VECTOR_FP32) {\n        vector = std::vector<float>(field_scheama->dimension(), 1);\n        vector[0] = 0;\n        query.query_vector_.assign((char *)vector.data(),\n                                   vector.size() * sizeof(float));\n      } else {\n        vector_int8 = std::vector<int8_t>(field_scheama->dimension(), 1);\n        vector_int8[0] = 0;\n        query.query_vector_.assign((char *)vector_int8.data(),\n                                   vector_int8.size() * sizeof(int8_t));\n      }\n    } else {\n      if (field_scheama->data_type() == DataType::SPARSE_VECTOR_FP32) {\n        sparse_vector = {{1}, {1}};\n        query.query_sparse_indices_.assign(\n            (char *)sparse_vector.first.data(),\n            sparse_vector.first.size() * sizeof(uint32_t));\n        query.query_sparse_values_.assign(\n            (char *)sparse_vector.second.data(),\n            sparse_vector.second.size() * sizeof(float));\n      } else {\n        sparse_vector_fp16 = {{1}, {ailego::Float16(1.0f)}};\n        query.query_sparse_indices_.assign(\n            (char *)sparse_vector_fp16.first.data(),\n            sparse_vector_fp16.first.size() * sizeof(uint32_t));\n        query.query_sparse_values_.assign(\n            (char *)sparse_vector_fp16.second.data(),\n            sparse_vector_fp16.second.size() * sizeof(ailego::Float16));\n      }\n    }\n    auto query_result = collection->Query(query);\n    if (!query_result.has_value()) {\n      std::cout << \"status: \" << query_result.error().message() << std::endl;\n      ASSERT_TRUE(false);\n    }\n    ASSERT_TRUE(query_result.has_value());\n    ASSERT_EQ(query_result.value().size(), doc_count);\n\n    float last_score;\n    for (size_t i = 0; i < query_result.value().size(); i++) {\n      auto pk = query_result.value()[i]->pk();\n      auto score = query_result.value()[i]->score();\n      std::cout << \"top \" << i << \": \" << pk << \", score: \" << score\n                << std::endl;\n\n      auto expect_doc =\n          TestHelper::CreateDoc(TestHelper::ExtractDocId(pk), *schema);\n      float expect_score;\n      if (is_dense) {\n        if (field_scheama->data_type() == DataType::VECTOR_FP16) {\n          auto query_result_vector =\n              expect_doc.get<std::vector<ailego::Float16>>(field_name);\n          ASSERT_TRUE(query_result_vector.has_value());\n          expect_score = distance_dense(\n              vector_fp16, query_result_vector.value(), metric_type);\n        } else if (field_scheama->data_type() == DataType::VECTOR_FP32) {\n          auto query_result_vector =\n              expect_doc.get<std::vector<float>>(field_name);\n          ASSERT_TRUE(query_result_vector.has_value());\n          expect_score =\n              distance_dense(vector, query_result_vector.value(), metric_type);\n        } else {\n          auto query_result_vector =\n              expect_doc.get<std::vector<int8_t>>(field_name);\n          ASSERT_TRUE(query_result_vector.has_value());\n          expect_score = distance_dense(\n              vector_int8, query_result_vector.value(), metric_type);\n        }\n      } else {\n        if (field_scheama->data_type() == DataType::SPARSE_VECTOR_FP32) {\n          auto query_result_vector =\n              expect_doc\n                  .get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n                      field_name);\n          ASSERT_TRUE(query_result_vector.has_value());\n          expect_score =\n              distance_sparse(sparse_vector, query_result_vector.value());\n        } else {\n          auto query_result_vector = expect_doc.get<\n              std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(\n              field_name);\n          ASSERT_TRUE(query_result_vector.has_value());\n          expect_score =\n              distance_sparse(sparse_vector_fp16, query_result_vector.value());\n        }\n      }\n      std::cout.precision(8);\n      std::cout << \"score: \" << score << \", expect_score: \" << expect_score\n                << std::endl;\n      // ASSERT_FLOAT_EQ(score, expect_score);\n      if (i > 0) {\n        if (metric_type == MetricType::L2) {\n          ASSERT_GE(score, last_score);\n        } else if (metric_type == MetricType::IP) {\n          ASSERT_LE(score, last_score);\n        }\n      }\n      last_score = score;\n    }\n\n    auto new_schema = std::make_shared<CollectionSchema>(*schema);\n    s = new_schema->add_index(field_name, index_params);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(*new_schema, collection->Schema());\n\n\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (metric_type != MetricType::COSINE) {\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    }\n\n    collection.reset();\n\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n\n    collection = result.value();\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[field_name], 1);\n\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (metric_type != MetricType::COSINE) {\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    }\n\n    // insert another 100 docs\n    s = TestHelper::CollectionInsertDoc(collection, doc_count, doc_count + 100,\n                                        false);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);\n    ASSERT_FLOAT_EQ(collection->Stats().value().index_completeness[field_name],\n                    doc_count * 1.0 / (doc_count + 100));\n\n    s = collection->Flush();\n    ASSERT_TRUE(s.ok());\n\n    s = collection->CreateIndex(field_name, index_params);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);\n    ASSERT_FLOAT_EQ(collection->Stats().value().index_completeness[field_name],\n                    doc_count * 1.0 / (doc_count + 100));\n  };\n\n  func(\"dense_fp32\", MetricType::L2);\n  func(\"dense_fp32\", MetricType::COSINE);\n  func(\"dense_fp32\", MetricType::IP);\n  func(\"dense_fp32\", MetricType::L2, QuantizeType::FP16);\n  func(\"dense_fp32\", MetricType::COSINE, QuantizeType::FP16);\n  func(\"dense_fp32\", MetricType::IP, QuantizeType::FP16);\n  func(\"dense_fp16\");\n  func(\"dense_int8\");\n  func(\"sparse_fp32\");\n  func(\"sparse_fp16\");\n}\n\nTEST_F(CollectionTest, Feature_CreateIndex_Scalar) {\n  auto func = [&](std::string field_name, bool enable_optimize,\n                  IndexParams::Ptr scalar_index_params = nullptr) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n\n    auto schema =\n        TestHelper::CreateNormalSchema(false, \"demo\", scalar_index_params);\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    auto index_params = std::make_shared<InvertIndexParams>(enable_optimize);\n    auto s = collection->CreateIndex(field_name, index_params);\n    std::cout << \"status: \" << s.message()\n              << \", code: \" << GetDefaultMessage(s.code()) << std::endl;\n    ASSERT_TRUE(s.ok());\n\n    auto new_schema = std::make_shared<CollectionSchema>(*schema);\n    s = new_schema->add_index(field_name, index_params);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(*new_schema, collection->Schema());\n\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    collection.reset();\n\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n\n    collection = result.value();\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n\n    // insert another 100 docs\n    s = TestHelper::CollectionInsertDoc(collection, doc_count, doc_count + 100,\n                                        false);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);\n    ASSERT_FLOAT_EQ(\n        collection->Stats().value().index_completeness[\"dense_fp32\"], 1);\n\n    s = collection->Flush();\n    ASSERT_TRUE(s.ok());\n\n    s = collection->CreateIndex(field_name, index_params);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);\n    ASSERT_FLOAT_EQ(\n        collection->Stats().value().index_completeness[\"dense_fp32\"], 1);\n\n    for (int i = 0; i < doc_count + 100; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  func(\"int32\", true);\n  func(\"int32\", false);\n\n  func(\"int32\", false, std::make_shared<InvertIndexParams>(true));\n  func(\"int32\", true, std::make_shared<InvertIndexParams>(true));\n}\n\nTEST_F(CollectionTest, Feature_DropIndex_General) {\n  // create empty collection\n  auto schema = TestHelper::CreateSchemaWithVectorIndex();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1204};\n  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,\n                                                        options, 0, 0, false);\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  ASSERT_EQ(collection->Schema(), *schema);\n\n\n  auto s = collection->DropIndex(\"dense_fp32_invalid\");\n  ASSERT_FALSE(s.ok());\n\n  s = collection->DropIndex(\"dense_fp32\");\n  if (!s.ok()) {\n    std::cout << \"drop index err: \" << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n\n  s = collection->DropIndex(\"dense_fp32\");\n  ASSERT_TRUE(s.ok());\n\n  auto new_schema = std::make_shared<CollectionSchema>(*schema);\n  s = new_schema->drop_index(\"dense_fp32\");\n  ASSERT_TRUE(s.ok());\n  ASSERT_EQ(*new_schema, collection->Schema());\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  ASSERT_EQ(*collection->Schema()\n                 .value()\n                 .get_vector_field(\"dense_fp32\")\n                 ->index_params(),\n            DefaultVectorIndexParams);\n\n  s = collection->DropIndex(\"dense_fp32\");\n  if (!s.ok()) {\n    std::cout << \"drop index err: \" << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n\n  auto schema1 = collection->Schema().value();\n\n  collection.reset();\n\n  auto result = Collection::Open(col_path, options);\n  ASSERT_TRUE(result.has_value());\n\n  collection = std::move(result.value());\n  auto schema2 = collection->Schema().value();\n\n  if (schema1 != schema2) {\n    std::cout << \"schema1: \" << schema1.to_string_formatted() << std::endl;\n    std::cout << \"schema2: \" << schema2.to_string_formatted() << std::endl;\n  }\n  ASSERT_EQ(schema1, schema2);\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n}\n\nTEST_F(CollectionTest, Feature_DropIndex_Vector) {\n  auto func = [&](const std::string &field_name, bool add_before_drop = true) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n\n    // create empty collection\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1204};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[field_name], 1);\n    ASSERT_EQ(collection->Schema(), *schema);\n\n    auto check_doc = [&]() {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc();\n    std::cout << \"check success 1\" << std::endl;\n\n    // create index first\n    auto index_params = std::make_shared<HnswIndexParams>(MetricType::IP);\n    auto s = collection->CreateIndex(field_name, index_params);\n    ASSERT_TRUE(s.ok());\n    auto new_schema = std::make_shared<CollectionSchema>(*schema);\n    s = new_schema->add_index(field_name, index_params);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(*new_schema, collection->Schema());\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[field_name], 1);\n\n    check_doc();\n    std::cout << \"check success 2\" << std::endl;\n\n    int new_doc_count = doc_count;\n    if (add_before_drop) {\n      new_doc_count += doc_count;\n      s = TestHelper::CollectionInsertDoc(collection, doc_count, new_doc_count);\n      ASSERT_TRUE(s.ok());\n    }\n\n    // then drop index field_name\n    s = collection->DropIndex(field_name);\n    ASSERT_TRUE(s.ok());\n    check_doc();\n    std::cout << \"check success 3\" << std::endl;\n    s = new_schema->drop_index(field_name);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(*new_schema, collection->Schema());\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, new_doc_count);\n    ASSERT_EQ(stats.index_completeness[field_name], 1);\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    check_doc();\n    std::cout << \"check success 3\" << std::endl;\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, new_doc_count);\n    ASSERT_EQ(stats.index_completeness[field_name], 1);\n  };\n\n  func(\"dense_fp32\", true);\n  func(\"dense_fp32\", false);\n  func(\"sparse_fp32\");\n}\n\nTEST_F(CollectionTest, Feature_DropIndex_Scalar) {\n  auto func = [&](std::string field_name, bool enable_optimize) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n\n    auto schema =\n        TestHelper::CreateSchemaWithScalarIndex(false, enable_optimize);\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto check_doc = [&]() {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc();\n    std::cout << \"check success 1\" << std::endl;\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n\n    auto s = collection->DropIndex(field_name);\n    ASSERT_TRUE(s.ok());\n\n    auto new_schema = std::make_shared<CollectionSchema>(*schema);\n    s = new_schema->drop_index(field_name);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(*new_schema, collection->Schema());\n\n    check_doc();\n    std::cout << \"check success 2\" << std::endl;\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    check_doc();\n    std::cout << \"check success 3\" << std::endl;\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n  };\n\n  func(\"int32\", true);\n  func(\"int32\", false);\n}\n\nTEST_F(CollectionTest, Feature_DropIndex_AfterCreate) {\n  auto func = [&](std::string field_name, bool enable_optimize) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto check_doc = [&]() {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc();\n    std::cout << \"check success 1\" << std::endl;\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n\n    auto index_params = std::make_shared<InvertIndexParams>(enable_optimize);\n    auto s = collection->CreateIndex(field_name, index_params);\n    std::cout << \"status: \" << s.message()\n              << \", code: \" << GetDefaultMessage(s.code()) << std::endl;\n    ASSERT_TRUE(s.ok());\n\n    auto new_schema = std::make_shared<CollectionSchema>(*schema);\n    s = new_schema->add_index(field_name, index_params);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(*new_schema, collection->Schema());\n\n    check_doc();\n    std::cout << \"check success 2\" << std::endl;\n\n    s = collection->DropIndex(field_name);\n    ASSERT_TRUE(s.ok());\n    check_doc();\n    std::cout << \"check success 3\" << std::endl;\n    s = new_schema->drop_index(field_name);\n    ASSERT_TRUE(s.ok());\n    ASSERT_EQ(*new_schema, collection->Schema());\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n  };\n\n  func(\"int32\", true);\n  func(\"int32\", false);\n}\n\nTEST_F(CollectionTest, Feature_Optimize_General) {\n  auto func = [](int concurrency) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n\n    // create empty collection\n    auto schema = TestHelper::CreateSchemaWithVectorIndex();\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    auto check_doc = [&]() {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc();\n    std::cout << \"check success 1\" << std::endl;\n\n    ASSERT_TRUE(collection->Flush().ok());\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n    auto s = collection->Optimize(OptimizeOptions{concurrency});\n    if (!s.ok()) {\n      std::cout << s.message() << std::endl;\n    }\n    ASSERT_TRUE(s.ok());\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    check_doc();\n    std::cout << \"check success 2\" << std::endl;\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    check_doc();\n    std::cout << \"check success 3\" << std::endl;\n  };\n\n  func(0);\n  func(4);\n}\n\nTEST_F(CollectionTest, Feature_Optimize_Repeated) {\n  int doc_count = 1000;\n\n  // create empty collection\n  auto schema = TestHelper::CreateSchemaWithVectorIndex();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  auto check_doc = [&]() {\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      if (doc == nullptr) {\n        std::cout << \"doc is null, pk: \" << expect_doc.pk() << std::endl;\n      }\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  check_doc();\n  std::cout << \"check success 1\" << std::endl;\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n  auto s = collection->Optimize();\n  ASSERT_TRUE(s.ok());\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  int loop_count = 10;\n  uint64_t start_doc_id = doc_count;\n  for (int i = 0; i < loop_count; i++) {\n    std::cout << \"loop: \" << i << \" begin\" << std::endl;\n\n    s = TestHelper::CollectionInsertDoc(collection, start_doc_id,\n                                        start_doc_id + 1);\n    ASSERT_TRUE(s.ok());\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count + i + 1);\n    ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"],\n                    1.0 * (doc_count + i) / (doc_count + i + 1));\n\n\n    s = collection->Optimize();\n    if (!s.ok()) {\n      std::cout << \"optimize failed: \" << s.message() << std::endl;\n    }\n    ASSERT_TRUE(s.ok());\n\n    start_doc_id += 1;\n\n    std::cout << \"loop: \" << i << \" end\" << std::endl;\n  }\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count + loop_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  doc_count += loop_count;\n  check_doc();\n  std::cout << \"check success 2\" << std::endl;\n}\n\nTEST_F(CollectionTest, Feature_Optimize_MetricType) {\n  auto func = [&](MetricType metric_type,\n                  QuantizeType quantize_type = QuantizeType::UNDEFINED) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n\n    // create empty collection\n    auto schema = TestHelper::CreateSchemaWithVectorIndex(\n        false, \"demo\",\n        std::make_shared<HnswIndexParams>(metric_type, 16, 200, quantize_type));\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    auto check_doc = [&]() {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (metric_type != MetricType::COSINE) {\n          if (*doc != expect_doc) {\n            std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n            std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                      << std::endl;\n          }\n          ASSERT_EQ(*doc, expect_doc);\n        }\n      }\n    };\n\n    check_doc();\n    std::cout << \"check success 1\" << std::endl;\n\n    ASSERT_TRUE(collection->Flush().ok());\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n    auto s = collection->Optimize();\n    ASSERT_TRUE(s.ok());\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    check_doc();\n    std::cout << \"check success 2\" << std::endl;\n\n    for (int i = 1; i < 2; i++) {\n      auto query_doc = TestHelper::CreateDoc(i, *schema);\n      // std::cout << query_doc.to_detail_string() << std::endl;\n\n      VectorQuery query;\n      query.topk_ = 10;\n      query.include_vector_ = true;\n      query.field_name_ = \"dense_fp32\";\n\n      auto vector = query_doc.get<std::vector<float>>(\"dense_fp32\");\n      ASSERT_TRUE(vector.has_value());\n      query.query_vector_.assign((char *)vector.value().data(),\n                                 vector.value().size() * sizeof(float));\n\n\n      auto result = collection->Query(query);\n      if (!result.has_value()) {\n        std::cout << \"err: \" << result.error().message() << std::endl;\n      }\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));\n    }\n  };\n\n  func(MetricType::L2);\n  func(MetricType::COSINE);\n  func(MetricType::IP);\n  func(MetricType::L2, QuantizeType::FP16);\n  func(MetricType::COSINE, QuantizeType::FP16);\n  func(MetricType::IP, QuantizeType::FP16);\n}\n\nTEST_F(CollectionTest, Feature_Optimize_Delete) {\n  int doc_count = 1000;\n\n  // create empty collection\n  auto schema = TestHelper::CreateSchemaWithVectorIndex();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  auto check_doc = [&]() {\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  check_doc();\n  std::cout << \"check success 1\" << std::endl;\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n  auto s = collection->Optimize();\n  if (!s.ok()) {\n    std::cout << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  check_doc();\n  std::cout << \"check success 2\" << std::endl;\n\n  // delete by filter\n  s = collection->DeleteByFilter(\"int32 < 10\");\n  if (!s.ok()) {\n    std::cout << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count - 10);\n\n  // delete all docs\n  std::vector<std::string> pks;\n  for (int i = 10; i < doc_count; ++i) {\n    pks.push_back(TestHelper::MakePK(i));\n  }\n  auto res = collection->Delete(pks);\n  ASSERT_TRUE(res.has_value());\n  for (auto &r : res.value()) {\n    ASSERT_TRUE(r.ok());\n  }\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  s = collection->Optimize();\n  if (!s.ok()) {\n    std::cout << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n\n  collection.reset();\n  auto result = Collection::Open(col_path, options);\n  ASSERT_TRUE(result.has_value());\n  collection = std::move(result.value());\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, 0);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n}\n\nTEST_F(CollectionTest, Feature_Optimize_NormalSchema) {\n  int doc_count = 1000;\n\n  // create empty collection\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  auto check_doc = [&]() {\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  check_doc();\n  std::cout << \"check success 1\" << std::endl;\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  auto s = collection->Optimize();\n  if (!s.ok()) {\n    std::cout << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  check_doc();\n  std::cout << \"check success 2\" << std::endl;\n\n  collection.reset();\n  auto result = Collection::Open(col_path, options);\n  ASSERT_TRUE(result.has_value());\n  collection = std::move(result.value());\n\n  check_doc();\n  std::cout << \"check success 3\" << std::endl;\n}\n\nTEST_F(CollectionTest, Feature_Optimize_ExceedMaxDocCount) {\n  auto func = [&](std::vector<int> segments_count, bool delete_all = false) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int max_doc_per_count = 1000;\n\n    // create empty collection\n    auto schema = TestHelper::CreateNormalSchema(\n        false, \"demo\", nullptr,\n        std::make_shared<HnswIndexParams>(MetricType::IP), max_doc_per_count);\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n\n    auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,\n                                                          options, 0, 0, false);\n\n    auto check_doc = [&](int doc_count) {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    int accu_seg_doc_count = 0;\n    for (auto doc_count : segments_count) {\n      auto s = TestHelper::CollectionInsertDoc(collection, accu_seg_doc_count,\n                                               accu_seg_doc_count + doc_count);\n\n      check_doc(accu_seg_doc_count + doc_count);\n      std::cout << \"check success 1\" << std::endl;\n\n      ASSERT_TRUE(collection->Flush().ok());\n      auto stats = collection->Stats().value();\n      ASSERT_EQ(stats.doc_count, accu_seg_doc_count + doc_count);\n      ASSERT_FLOAT_EQ(\n          stats.index_completeness[\"dense_fp32\"],\n          accu_seg_doc_count * 1.0 / (accu_seg_doc_count + doc_count));\n\n      s = collection->Optimize();\n      if (!s.ok()) {\n        std::cout << s.message() << std::endl;\n      }\n      ASSERT_TRUE(s.ok());\n\n      stats = collection->Stats().value();\n      ASSERT_EQ(stats.doc_count, accu_seg_doc_count + doc_count);\n      ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n      check_doc(accu_seg_doc_count + doc_count);\n      std::cout << \"check success 2\" << std::endl;\n\n      collection.reset();\n      auto result = Collection::Open(col_path, options);\n      ASSERT_TRUE(result.has_value());\n      collection = std::move(result.value());\n\n      check_doc(accu_seg_doc_count + doc_count);\n      std::cout << \"check success 3\" << std::endl;\n\n      accu_seg_doc_count += doc_count;\n    }\n\n    // delete all docs\n    if (delete_all) {\n      std::vector<std::string> pks;\n      for (int i = 0; i < accu_seg_doc_count; ++i) {\n        pks.push_back(TestHelper::MakePK(i));\n      }\n      auto res = collection->Delete(pks);\n      ASSERT_TRUE(res.has_value());\n      for (auto &r : res.value()) {\n        ASSERT_TRUE(r.ok());\n      }\n    }\n\n    auto s = collection->Optimize();\n    if (!s.ok()) {\n      std::cout << s.message() << std::endl;\n    }\n    ASSERT_TRUE(s.ok());\n\n    if (delete_all) {\n      check_doc(0);\n    } else {\n      check_doc(accu_seg_doc_count);\n    }\n    std::cout << \"check success 3\" << std::endl;\n\n    auto stats = collection->Stats().value();\n    if (delete_all) {\n      ASSERT_EQ(stats.doc_count, 0);\n    } else {\n      ASSERT_EQ(stats.doc_count, accu_seg_doc_count);\n    }\n    ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"], 1.0);\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    stats = collection->Stats().value();\n    if (delete_all) {\n      ASSERT_EQ(stats.doc_count, 0);\n    } else {\n      ASSERT_EQ(stats.doc_count, accu_seg_doc_count);\n    }\n    ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"], 1.0);\n  };\n\n  func({600, 600});\n  func({600, 400});\n  func({600, 401});\n\n  func({600, 600}, true);\n  func({600, 400}, true);\n  func({600, 401}, true);\n\n  func(std::vector<int>(100, 1));\n  func(std::vector<int>(100, 1), true);\n}\n\nTEST_F(CollectionTest, Feature_Optimize_Rebuild) {\n  FileHelper::RemoveDirectory(col_path);\n\n  int max_doc_per_count = 1000;\n\n  // create empty collection\n  auto schema = TestHelper::CreateNormalSchema(\n      false, \"demo\", nullptr, std::make_shared<HnswIndexParams>(MetricType::IP),\n      max_doc_per_count);\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n\n  // create seg1\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, max_doc_per_count, false);\n\n  auto check_doc = [&](int doc_count, bool delete_half = false) {\n    for (int i = 0; i < doc_count; i++) {\n      if (delete_half) {\n        if (i % 2 == 0) {\n          continue;\n        }\n      }\n\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n  // create seg2\n  auto s = TestHelper::CollectionInsertDoc(\n      collection, max_doc_per_count, max_doc_per_count + max_doc_per_count);\n  ASSERT_TRUE(s.ok());\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count + max_doc_per_count);\n  ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n  // create seg3\n  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count * 2,\n                                      max_doc_per_count * 3);\n  ASSERT_TRUE(s.ok());\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3);\n  ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n  check_doc(max_doc_per_count * 3);\n  std::cout << \"check success 1\" << std::endl;\n\n  // delete half\n  std::vector<std::string> pks;\n  for (int j = 0; j < 3 * max_doc_per_count; j++) {\n    if (j % 2 == 0) {\n      pks.push_back(TestHelper::MakePK(j));\n    }\n  }\n  auto res = collection->Delete(pks);\n  ASSERT_TRUE(res.has_value());\n  for (auto &r : res.value()) {\n    ASSERT_TRUE(r.ok());\n  }\n\n  s = collection->Optimize();\n  if (!s.ok()) {\n    std::cout << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n\n  check_doc(max_doc_per_count * 3, true);\n  std::cout << \"check success 2\" << std::endl;\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 1.5);\n  ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n}\n\nTEST_F(CollectionTest, Feature_Optimize_IndexOperation) {\n  FileHelper::RemoveDirectory(col_path);\n\n  int max_doc_per_count = 1000;\n\n  // create empty collection\n  auto schema = TestHelper::CreateNormalSchema(\n      false, \"demo\", nullptr, std::make_shared<HnswIndexParams>(MetricType::IP),\n      max_doc_per_count);\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n\n  // create seg1\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, max_doc_per_count / 2, false);\n\n  auto check_doc = [&](int doc_count) {\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, *schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count / 2);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n  auto s = collection->DropIndex(\"dense_fp32\");\n  ASSERT_TRUE(s.ok());\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count / 2);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  // create seg2\n  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count / 2,\n                                      max_doc_per_count);\n  ASSERT_TRUE(s.ok());\n  s = collection->CreateIndex(\n      \"dense_fp32\", std::make_shared<HnswIndexParams>(MetricType::IP));\n  ASSERT_TRUE(s.ok());\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  // create seg3\n  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count,\n                                      max_doc_per_count * 3 / 2);\n  ASSERT_TRUE(s.ok());\n  s = collection->DropIndex(\"dense_fp32\");\n  ASSERT_TRUE(s.ok());\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);\n  ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  check_doc(max_doc_per_count * 3 / 2);\n  std::cout << \"check success 1\" << std::endl;\n\n  s = collection->Optimize();\n  if (!s.ok()) {\n    std::cout << s.message() << std::endl;\n  }\n  ASSERT_TRUE(s.ok());\n\n  check_doc(max_doc_per_count * 3 / 2);\n  std::cout << \"check success 2\" << std::endl;\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);\n  ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n  // reset collection\n  collection.reset();\n  auto result = Collection::Open(col_path, options);\n  collection = std::move(result.value());\n\n  check_doc(max_doc_per_count * 3 / 2);\n  std::cout << \"check success 2\" << std::endl;\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);\n  ASSERT_FLOAT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n}\n\nTEST_F(CollectionTest, Feature_Optimize_Temp) {\n  auto schema = TestHelper::CreateTempSchema();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n\n  auto collection =\n      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 10);\n\n  auto s = collection->Optimize(OptimizeOptions{1});\n  ASSERT_TRUE(s.ok());\n}\n\nTEST_F(CollectionTest, Feature_Query_Validate) {\n  FileHelper::RemoveDirectory(col_path);\n\n  int doc_count = 1100;\n  // create with normal schema\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,\n                                                        options, 0, doc_count);\n\n  ASSERT_NE(collection, nullptr);\n  std::string field_name = \"dense_fp32\";\n  auto query_doc = TestHelper::CreateDoc(1, *schema);\n\n  {\n    VectorQuery query;\n    query.topk_ = 1024;\n    query.field_name_ = field_name;\n\n    auto field_scheama = schema->get_vector_field(field_name);\n    ASSERT_NE(field_scheama, nullptr);\n    ASSERT_TRUE(field_scheama->is_vector_field());\n\n    if (field_scheama->is_dense_vector()) {\n      auto vector = query_doc.get<std::vector<float>>(field_name);\n      ASSERT_TRUE(vector.has_value());\n      query.query_vector_.assign((char *)vector.value().data(),\n                                 vector.value().size() * sizeof(float));\n    } else {\n      auto sparse_vector =\n          query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n              field_name);\n      query.query_sparse_indices_.assign(\n          (char *)sparse_vector.value().first.data(),\n          sparse_vector.value().first.size() * sizeof(uint32_t));\n      query.query_sparse_values_.assign(\n          (char *)sparse_vector.value().second.data(),\n          sparse_vector.value().second.size() * sizeof(float));\n    }\n    query.include_vector_ = true;\n\n    auto result = collection->Query(query);\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), query.topk_);\n  }\n\n  {\n    VectorQuery query;\n    query.topk_ = 1025;\n    query.field_name_ = field_name;\n\n    auto field_scheama = schema->get_vector_field(field_name);\n    ASSERT_NE(field_scheama, nullptr);\n    ASSERT_TRUE(field_scheama->is_vector_field());\n\n    if (field_scheama->is_dense_vector()) {\n      auto vector = query_doc.get<std::vector<float>>(field_name);\n      ASSERT_TRUE(vector.has_value());\n      query.query_vector_.assign((char *)vector.value().data(),\n                                 vector.value().size() * sizeof(float));\n    } else {\n      auto sparse_vector =\n          query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n              field_name);\n      query.query_sparse_indices_.assign(\n          (char *)sparse_vector.value().first.data(),\n          sparse_vector.value().first.size() * sizeof(uint32_t));\n      query.query_sparse_values_.assign(\n          (char *)sparse_vector.value().second.data(),\n          sparse_vector.value().second.size() * sizeof(float));\n    }\n    query.include_vector_ = true;\n\n    auto result = collection->Query(query);\n    ASSERT_FALSE(result.has_value());\n    std::cout << result.error().message() << std::endl;\n  }\n\n  {\n    VectorQuery query;\n    query.topk_ = 1024;\n    query.field_name_ = field_name;\n    query.output_fields_ = std::make_optional<std::vector<std::string>>(\n        std::vector<std::string>(1025));\n\n    auto field_scheama = schema->get_vector_field(field_name);\n    ASSERT_NE(field_scheama, nullptr);\n    ASSERT_TRUE(field_scheama->is_vector_field());\n\n    if (field_scheama->is_dense_vector()) {\n      auto vector = query_doc.get<std::vector<float>>(field_name);\n      ASSERT_TRUE(vector.has_value());\n      query.query_vector_.assign((char *)vector.value().data(),\n                                 vector.value().size() * sizeof(float));\n    } else {\n      auto sparse_vector =\n          query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n              field_name);\n      query.query_sparse_indices_.assign(\n          (char *)sparse_vector.value().first.data(),\n          sparse_vector.value().first.size() * sizeof(uint32_t));\n      query.query_sparse_values_.assign(\n          (char *)sparse_vector.value().second.data(),\n          sparse_vector.value().second.size() * sizeof(float));\n    }\n    query.include_vector_ = true;\n\n    auto result = collection->Query(query);\n    ASSERT_FALSE(result.has_value());\n    std::cout << result.error().message() << std::endl;\n  }\n}\n\nTEST_F(CollectionTest, Feature_Query_General) {\n  auto func = [&](std::string field_name) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n    // create with normal schema\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    ASSERT_NE(collection, nullptr);\n\n    auto stats = collection->Stats().value();\n    std::cout << stats.to_string_formatted() << std::endl;\n\n    // validate query result\n    for (int i = 1; i < 2; i++) {\n      auto query_doc = TestHelper::CreateDoc(i, *schema);\n      // std::cout << query_doc.to_detail_string() << std::endl;\n\n      VectorQuery query;\n      query.topk_ = 10;\n      query.field_name_ = field_name;\n\n      auto field_scheama = schema->get_vector_field(field_name);\n      ASSERT_NE(field_scheama, nullptr);\n      ASSERT_TRUE(field_scheama->is_vector_field());\n\n      if (field_scheama->is_dense_vector()) {\n        auto vector = query_doc.get<std::vector<float>>(field_name);\n        ASSERT_TRUE(vector.has_value());\n        query.query_vector_.assign((char *)vector.value().data(),\n                                   vector.value().size() * sizeof(float));\n      } else {\n        auto sparse_vector =\n            query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n                field_name);\n        query.query_sparse_indices_.assign(\n            (char *)sparse_vector.value().first.data(),\n            sparse_vector.value().first.size() * sizeof(uint32_t));\n        query.query_sparse_values_.assign(\n            (char *)sparse_vector.value().second.data(),\n            sparse_vector.value().second.size() * sizeof(float));\n      }\n      query.include_vector_ = true;\n\n      auto result = collection->Query(query);\n      if (!result.has_value()) {\n        std::cout << \"err: \" << result.error().message() << std::endl;\n      }\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), query.topk_);\n\n      for (int j = 0; j < query.topk_; j++) {\n        std::cout << \"result[\" << j\n                  << \"]:\" << result.value()[j]->to_detail_string() << std::endl;\n        auto expect_doc = TestHelper::CreateDoc(doc_count - 1 - j, *schema);\n        if (*result.value()[j] != expect_doc) {\n          std::cout << \"       doc:\" << result.value()[j]->to_detail_string()\n                    << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*result.value()[j], expect_doc);\n      }\n    }\n  };\n\n  func(\"dense_fp32\");\n  func(\"sparse_fp32\");\n}\n\nTEST_F(CollectionTest, Feature_Query_Empty) {\n  auto func = [&](int doc_count, int topk) {\n    FileHelper::RemoveDirectory(col_path);\n    // create with normal schema\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    ASSERT_NE(collection, nullptr);\n\n    auto stats = collection->Stats().value();\n    std::cout << stats.to_string_formatted() << std::endl;\n\n    // validate query result\n    for (int i = 1; i < 2; i++) {\n      auto query_doc = TestHelper::CreateDoc(i, *schema);\n      // std::cout << query_doc.to_detail_string() << std::endl;\n\n      VectorQuery query;\n      query.topk_ = topk;\n      query.include_vector_ = true;\n\n      auto result = collection->Query(query);\n      if (!result.has_value()) {\n        std::cout << \"err: \" << result.error().message() << std::endl;\n      }\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));\n\n      auto fields_name = schema->all_field_names();\n      for (int j = 0; j < std::min(query.topk_, doc_count); j++) {\n        auto result_doc = result.value()[j];\n        auto doc_fields_names = result_doc->field_names();\n        ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));\n      }\n    }\n  };\n\n  func(1, 1);\n  func(1, 2);\n  func(1000, 1000);\n  func(1000, 1001);\n}\n\nTEST_F(CollectionTest, Feature_Query_WithoutVector_CreateScalarIndex) {\n  auto func = [&](int doc_count, int topk, std::string field,\n                  IndexParams::Ptr index_params, std::string filter,\n                  int expected_doc_count) {\n    FileHelper::RemoveDirectory(col_path);\n    // create with normal schema\n    auto schema = TestHelper::CreateNormalSchema();\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    ASSERT_NE(collection, nullptr);\n\n    auto stats = collection->Stats().value();\n    std::cout << stats.to_string_formatted() << std::endl;\n\n    // validate query result\n    VectorQuery query;\n    query.topk_ = topk;\n    query.include_vector_ = true;\n    query.filter_ = filter;\n\n    auto result = collection->Query(query);\n    if (!result.has_value()) {\n      std::cout << \"err: \" << result.error().message() << std::endl;\n    }\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), expected_doc_count);\n\n    // create index\n    auto s = collection->CreateIndex(field, index_params);\n    ASSERT_TRUE(s.ok());\n\n    auto result2 = collection->Query(query);\n    if (!result2.has_value()) {\n      std::cout << \"err: \" << result2.error().message() << std::endl;\n    }\n\n    ASSERT_TRUE(result2.has_value());\n    ASSERT_EQ(result2.value().size(), expected_doc_count);\n\n    for (int j = 0; j < expected_doc_count; j++) {\n      auto result1_doc = result2.value()[j];\n      auto result2_doc = result2.value()[j];\n      ASSERT_EQ(*result1_doc, *result2_doc);\n    }\n  };\n\n  func(5, 20, \"bool\", std::make_shared<InvertIndexParams>(false), \"bool=true\",\n       1);\n  func(5, 20, \"bool\", std::make_shared<InvertIndexParams>(true), \"bool =true\",\n       1);\n  func(100, 20, \"bool\", std::make_shared<InvertIndexParams>(true),\n       \"bool = true\", 10);\n  func(100, 20, \"int32\", std::make_shared<InvertIndexParams>(true), \"int32 =1\",\n       1);\n  func(100, 20, \"int32\", std::make_shared<InvertIndexParams>(true), \"int32 <1\",\n       1);\n  func(100, 20, \"int32\", std::make_shared<InvertIndexParams>(true),\n       \"int32 >= 1\", 20);\n  func(100, 20, \"string\", std::make_shared<InvertIndexParams>(true),\n       \"string = 'value_1'\", 1);\n  func(5, 20, \"array_bool\", std::make_shared<InvertIndexParams>(true),\n       \"array_bool contain_any (true)\", 1);\n\n  func(5, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (1)\", 1);\n  func(5, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (1,2)\", 2);\n  func(5, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (0,1,2,3,4)\", 5);\n  func(5, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (0,4)\", 2);\n  // func(5, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n  //      \"array_int32 contain_any ()\", 0);\n\n  func(10000, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (0)\", 1);\n  func(10000, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (9999)\", 1);\n  func(10000, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (10000)\", 0);\n  func(10000, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (-1)\", 0);\n}\n\nTEST_F(CollectionTest, Feature_Query_WithoutVector_WithScalarIndex) {\n  auto func = [&](int doc_count, int topk, std::string field,\n                  IndexParams::Ptr index_params, std::string filter,\n                  int expected_doc_count) {\n    FileHelper::RemoveDirectory(col_path);\n    // create with normal schema\n    auto schema = TestHelper::CreateNormalSchema(false, \"demo\", index_params);\n    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count);\n\n    ASSERT_NE(collection, nullptr);\n\n    auto stats = collection->Stats().value();\n    std::cout << stats.to_string_formatted() << std::endl;\n\n    // validate query result\n    VectorQuery query;\n    query.topk_ = topk;\n    query.include_vector_ = true;\n    query.filter_ = filter;\n\n    auto result = collection->Query(query);\n    if (!result.has_value()) {\n      std::cout << \"err: \" << result.error().message() << std::endl;\n    }\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), expected_doc_count);\n  };\n\n  func(5, 20, \"bool\", std::make_shared<InvertIndexParams>(false), \"bool=true\",\n       1);\n  func(5, 20, \"bool\", std::make_shared<InvertIndexParams>(true), \"bool =true\",\n       1);\n  func(100, 20, \"bool\", std::make_shared<InvertIndexParams>(true),\n       \"bool = true\", 10);\n  func(100, 20, \"int32\", std::make_shared<InvertIndexParams>(true), \"int32 =1\",\n       1);\n  func(100, 20, \"int32\", std::make_shared<InvertIndexParams>(true), \"int32 <1\",\n       1);\n  func(100, 20, \"int32\", std::make_shared<InvertIndexParams>(true),\n       \"int32 >= 1\", 20);\n  func(5, 20, \"array_bool\", std::make_shared<InvertIndexParams>(true),\n       \"array_bool contain_any (true)\", 1);\n  func(5, 20, \"array_int32\", std::make_shared<InvertIndexParams>(true),\n       \"array_int32 contain_any (1)\", 1);\n}\n\nTEST_F(CollectionTest, Feature_GroupByQuery) {}\n\nTEST_F(CollectionTest, Feature_AddColumn_General) {\n  // create collection\n  int doc_count = 1000;\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n  auto field_schema =\n      std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false);\n  auto s = collection->AddColumn(field_schema, \"int32\", AddColumnOptions());\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n  auto new_schema = collection->Schema().value();\n  ASSERT_TRUE(new_schema.has_field(\"add_int32\"));\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n\n  auto check_doc = [&](int doc_count) {\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, new_schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  check_doc(doc_count);\n\n  // validate query result\n  for (int i = 1; i < 2; i++) {\n    VectorQuery query;\n    query.topk_ = 10;\n    query.include_vector_ = true;\n\n    auto result = collection->Query(query);\n    if (!result.has_value()) {\n      std::cout << \"err: \" << result.error().message() << std::endl;\n    }\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));\n\n    auto fields_name = new_schema.all_field_names();\n    for (int j = 0; j < std::min(query.topk_, doc_count); j++) {\n      auto result_doc = result.value()[j];\n      auto doc_fields_names = result_doc->field_names();\n      ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));\n    }\n  }\n}\n\nTEST_F(CollectionTest, Feature_AddColumn_CornerCase) {\n  int doc_count = 1000;\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  {\n    // create collection\n    auto schema = TestHelper::CreateNormalSchema();\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n  }\n\n  {\n    // open collection and add invalid column\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    auto s = collection->AddColumn(nullptr, \"int32\", AddColumnOptions());\n    ASSERT_FALSE(s.ok());\n\n    s = collection->AddColumn(nullptr, \"\", AddColumnOptions());\n    ASSERT_FALSE(s.ok());\n\n    auto field_schema =\n        std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false);\n    s = collection->AddColumn(field_schema, \"non_exist_field\",\n                              AddColumnOptions());\n    ASSERT_FALSE(s.ok());\n  }\n\n  {\n    // open collection and add one column\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    auto field_schema =\n        std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false);\n    auto s = collection->AddColumn(field_schema, \"int32\", AddColumnOptions());\n    if (!s.ok()) {\n      std::cout << \"status: \" << s.message() << std::endl;\n      ASSERT_TRUE(false);\n    }\n    auto new_schema = collection->Schema().value();\n    ASSERT_TRUE(new_schema.has_field(\"add_int32\"));\n  }\n\n  {\n    // open collection and insert more doc\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    auto new_schema = collection->Schema().value();\n    ASSERT_TRUE(new_schema.has_field(\"add_int32\"));\n\n    for (int i = doc_count; i < doc_count * 2; i++) {\n      auto doc = TestHelper::CreateDoc(i, new_schema);\n      std::vector<Doc> docs = {doc};\n      auto res = collection->Insert(docs);\n      ASSERT_TRUE(res.has_value());\n      ASSERT_TRUE(res.value()[0].ok());\n    }\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count * 2);\n\n    auto check_doc = [&](int doc_count) {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, new_schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc(doc_count * 2);\n  }\n\n  {\n    // open collection and add one more column\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    auto field_schema =\n        std::make_shared<FieldSchema>(\"add_int32_dup\", DataType::INT32, false);\n    auto s =\n        collection->AddColumn(field_schema, \"add_int32\", AddColumnOptions());\n    if (!s.ok()) {\n      std::cout << \"status: \" << s.message() << std::endl;\n      ASSERT_TRUE(false);\n    }\n    auto new_schema = collection->Schema().value();\n    ASSERT_TRUE(new_schema.has_field(\"add_int32_dup\"));\n  }\n}\n\nTEST_F(CollectionTest, Feature_DropColumn_General) {\n  // create collection\n  int doc_count = 1000;\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n\n  auto s = collection->DropColumn(\"int32\");\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n  auto new_schema = collection->Schema().value();\n  ASSERT_TRUE(!new_schema.has_field(\"int32\"));\n}\n\nTEST_F(CollectionTest, Feature_AlterColumn_General) {\n  // create collection\n  int doc_count = 1000;\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, doc_count, false);\n\n  ASSERT_TRUE(collection->Flush().ok());\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, doc_count);\n\n  auto field_schema =\n      std::make_shared<FieldSchema>(\"int32\", DataType::INT64, false);\n  auto s = collection->AlterColumn(\"int32\", \"int32\", field_schema,\n                                   AlterColumnOptions());\n  ASSERT_FALSE(s.ok());\n\n  s = collection->AlterColumn(\"int32\", \"\", field_schema, AlterColumnOptions());\n  ASSERT_TRUE(s.ok());\n\n  auto new_schema = collection->Schema().value();\n  ASSERT_TRUE(new_schema.has_field(\"int32\"));\n  ASSERT_TRUE(new_schema.get_field(\"int32\")->data_type() == DataType::INT64);\n\n  s = collection->AlterColumn(\"int32\", \"rename_in32\", nullptr,\n                              AlterColumnOptions());\n  ASSERT_TRUE(s.ok());\n  new_schema = collection->Schema().value();\n  ASSERT_FALSE(new_schema.has_field(\"int32\"));\n  ASSERT_TRUE(new_schema.has_field(\"rename_in32\"));\n  ASSERT_TRUE(new_schema.get_field(\"rename_in32\")->data_type() ==\n              DataType::INT64);\n\n  // validate query result\n  for (int i = 1; i < 2; i++) {\n    VectorQuery query;\n    query.topk_ = 10;\n    query.include_vector_ = true;\n\n    auto result = collection->Query(query);\n    if (!result.has_value()) {\n      std::cout << \"err: \" << result.error().message() << std::endl;\n    }\n    ASSERT_TRUE(result.has_value());\n    ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));\n\n    auto fields_name = new_schema.all_field_names();\n    for (int j = 0; j < std::min(query.topk_, doc_count); j++) {\n      auto result_doc = result.value()[j];\n      auto doc_fields_names = result_doc->field_names();\n      ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));\n    }\n  }\n}\n\nTEST_F(CollectionTest, Feature_AlterColumn_CornerCase) {\n  int doc_count = 1000;\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n\n  {\n    // create collection\n    auto schema = TestHelper::CreateNormalSchema();\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n  }\n\n  {\n    // open collection and alter column\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    auto field_schema =\n        std::make_shared<FieldSchema>(\"int32_to_int64\", DataType::INT64, false);\n    auto s = collection->AlterColumn(\"int32\", \"\", field_schema,\n                                     AlterColumnOptions());\n    ASSERT_TRUE(s.ok());\n\n    auto new_schema = collection->Schema().value();\n    ASSERT_FALSE(new_schema.has_field(\"int32\"));\n    ASSERT_TRUE(new_schema.has_field(\"int32_to_int64\"));\n    ASSERT_TRUE(new_schema.get_field(\"int32_to_int64\")->data_type() ==\n                DataType::INT64);\n  }\n\n  {\n    // open collection and insert more doc\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    auto new_schema = collection->Schema().value();\n\n    for (int i = doc_count; i < doc_count * 2; i++) {\n      auto doc = TestHelper::CreateDoc(i, new_schema);\n      std::vector<Doc> docs = {doc};\n      auto res = collection->Insert(docs);\n      ASSERT_TRUE(res.has_value());\n      ASSERT_TRUE(res.value()[0].ok());\n    }\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count * 2);\n\n    auto check_doc = [&](int doc_count) {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, new_schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc(doc_count * 2);\n\n    // validate query result\n    for (int i = 1; i < 2; i++) {\n      VectorQuery query;\n      query.topk_ = 10;\n      query.include_vector_ = true;\n\n      auto result = collection->Query(query);\n      if (!result.has_value()) {\n        std::cout << \"err: \" << result.error().message() << std::endl;\n      }\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));\n\n      auto fields_name = new_schema.all_field_names();\n      for (int j = 0; j < std::min(query.topk_, doc_count); j++) {\n        auto result_doc = result.value()[j];\n        auto doc_fields_names = result_doc->field_names();\n        ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));\n      }\n    }\n  }\n}\n\nTEST_F(CollectionTest, Feature_Column_MixOperation) {\n  int max_doc_per_count = 1000;\n  // create empty collection\n  auto schema = TestHelper::CreateNormalSchema(\n      false, \"demo\", nullptr, std::make_shared<HnswIndexParams>(MetricType::IP),\n      max_doc_per_count);\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n\n  // create seg1\n  auto collection = TestHelper::CreateCollectionWithDoc(\n      col_path, *schema, options, 0, max_doc_per_count, false);\n\n  // create seg2\n  auto s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count,\n                                           max_doc_per_count * 3 / 2);\n\n  // add column\n  auto field_schema =\n      std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false);\n  s = collection->AddColumn(field_schema, \"int32\", AddColumnOptions());\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n  auto new_schema = collection->Schema().value();\n  ASSERT_TRUE(new_schema.has_field(\"add_int32\"));\n\n  auto stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);\n\n  // drop column\n  s = collection->DropColumn(\"uint32\");\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n  new_schema = collection->Schema().value();\n  ASSERT_TRUE(!new_schema.has_field(\"uint32\"));\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);\n\n  // alter column\n  s = collection->AlterColumn(\"int32\", \"rename_int32\", nullptr,\n                              AlterColumnOptions());\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n  new_schema = collection->Schema().value();\n  ASSERT_TRUE(new_schema.has_field(\"rename_int32\"));\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);\n\n  // create seg3\n  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count * 3 / 2,\n                                      max_doc_per_count * 5 / 2);\n\n  stats = collection->Stats().value();\n  ASSERT_EQ(stats.doc_count, max_doc_per_count * 5 / 2);\n\n  // drop column\n  s = collection->DropColumn(\"rename_int32\");\n  if (!s.ok()) {\n    std::cout << \"status: \" << s.message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n  new_schema = collection->Schema().value();\n  ASSERT_TRUE(!new_schema.has_field(\"rename_int32\"));\n\n\n  auto check_doc = [&](int doc_count) {\n    for (int i = 0; i < doc_count; i++) {\n      auto expect_doc = TestHelper::CreateDoc(i, new_schema);\n      auto result = collection->Fetch({expect_doc.pk()});\n      ASSERT_TRUE(result.has_value());\n      ASSERT_EQ(result.value().size(), 1);\n      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n      auto doc = result.value()[expect_doc.pk()];\n      ASSERT_NE(doc, nullptr);\n      if (*doc != expect_doc) {\n        std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n        std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                  << std::endl;\n      }\n      ASSERT_EQ(*doc, expect_doc);\n    }\n  };\n\n  check_doc(max_doc_per_count * 5 / 2);\n}\n\nTEST_F(CollectionTest, Feature_Column_MixOperation_Empty) {\n  int doc_count = 0;\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  {\n    // create empty collection\n    auto schema = TestHelper::CreateNormalSchema();\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    ASSERT_TRUE(collection->Flush().ok());\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n  }\n\n  {\n    // open collection and do mix operation\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    // add column\n    auto field_schema =\n        std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false);\n    auto s = collection->AddColumn(field_schema, \"int32\", AddColumnOptions());\n    ASSERT_TRUE(s.ok());\n\n    auto new_schema = collection->Schema().value();\n    ASSERT_TRUE(new_schema.has_field(\"add_int32\"));\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, 0);\n  }\n\n  {\n    // open collection and do mix operation\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    auto new_schema = collection->Schema().value();\n    ASSERT_TRUE(new_schema.has_field(\"add_int32\"));\n\n    // alter column\n    auto s = collection->AlterColumn(\"add_int32\", \"rename_int32\", nullptr,\n                                     AlterColumnOptions());\n    ASSERT_TRUE(s.ok());\n\n    new_schema = collection->Schema().value();\n    ASSERT_FALSE(new_schema.has_field(\"add_int32\"));\n    ASSERT_TRUE(new_schema.has_field(\"rename_int32\"));\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, 0);\n  }\n\n  {\n    // open collection and do mix operation\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n\n    auto new_schema = collection->Schema().value();\n    ASSERT_TRUE(new_schema.has_field(\"rename_int32\"));\n\n    // drop column\n    auto s = collection->DropColumn(\"rename_int32\");\n    ASSERT_TRUE(s.ok());\n    new_schema = collection->Schema().value();\n    ASSERT_FALSE(new_schema.has_field(\"rename_int32\"));\n\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, 0);\n  }\n}\n\n#if RABITQ_SUPPORTED\nTEST_F(CollectionTest, Feature_Optimize_HNSW_RABITQ) {\n  auto func = [](MetricType metric_type, int concurrency) {\n    FileHelper::RemoveDirectory(col_path);\n\n    int doc_count = 1000;\n\n    // create simple schema with only FP32 dense vector for HNSW_RABITQ\n    auto schema = std::make_shared<CollectionSchema>(\"demo\");\n    schema->set_max_doc_count_per_segment(MAX_DOC_COUNT_PER_SEGMENT);\n\n    auto hnsw_rabitq_params = std::make_shared<HnswRabitqIndexParams>(\n        metric_type, 7, 256, 16, 200, 0);\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"dense_fp32\", DataType::VECTOR_FP32, 128, false, hnsw_rabitq_params));\n\n    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n    auto collection = TestHelper::CreateCollectionWithDoc(\n        col_path, *schema, options, 0, doc_count, false);\n\n    auto check_doc = [&]() {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = TestHelper::CreateDoc(i, *schema);\n        auto result = collection->Fetch({expect_doc.pk()});\n        ASSERT_TRUE(result.has_value());\n        ASSERT_EQ(result.value().size(), 1);\n        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);\n        auto doc = result.value()[expect_doc.pk()];\n        ASSERT_NE(doc, nullptr);\n        if (*doc != expect_doc) {\n          std::cout << \"       doc:\" << doc->to_detail_string() << std::endl;\n          std::cout << \"expect_doc:\" << expect_doc.to_detail_string()\n                    << std::endl;\n        }\n        ASSERT_EQ(*doc, expect_doc);\n      }\n    };\n\n    check_doc();\n    std::cout << \"check success 1\" << std::endl;\n\n    ASSERT_TRUE(collection->Flush().ok());\n    auto stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 0);\n\n    auto s = collection->Optimize(OptimizeOptions{concurrency});\n    if (!s.ok()) {\n      std::cout << s.message() << std::endl;\n    }\n    ASSERT_TRUE(s.ok());\n\n    stats = collection->Stats().value();\n    ASSERT_EQ(stats.doc_count, doc_count);\n    ASSERT_EQ(stats.index_completeness[\"dense_fp32\"], 1);\n\n    check_doc();\n    std::cout << \"check success 2\" << std::endl;\n\n    collection.reset();\n    auto result = Collection::Open(col_path, options);\n    ASSERT_TRUE(result.has_value());\n    collection = std::move(result.value());\n\n    check_doc();\n    std::cout << \"check success 3\" << std::endl;\n  };\n\n  func(MetricType::L2, 0);\n  func(MetricType::L2, 4);\n  func(MetricType::IP, 0);\n  func(MetricType::IP, 4);\n  // TODO: cosine dense not match, may be accuracy issue\n  // func(MetricType::COSINE, 0);\n  // func(MetricType::COSINE, 4);\n}\n#endif\n\n// **** CORNER CASES **** //\nTEST_F(CollectionTest, CornerCase_CreateAndOpen) {\n  // Collection::CreateAndOpen\n  {\n    {\n      std::cout << \"Collection::CreateAndOpen case 1\" << std::endl;\n      // create collection with non-exist path with read-only mode\n      auto schema = TestHelper::CreateNormalSchema();\n      auto result = Collection::CreateAndOpen(\"non-exist-path\", *schema,\n                                              CollectionOptions{true, false});\n      ASSERT_FALSE(result.has_value());\n    }\n\n    {\n      std::cout << \"Collection::CreateAndOpen case 2\" << std::endl;\n      // create collection with exist path\n      auto schema = TestHelper::CreateNormalSchema();\n      FileHelper::CreateDirectory(\"invalid_path\");\n      auto result = Collection::CreateAndOpen(\"invalid_path\", *schema,\n                                              CollectionOptions{true, true});\n      ASSERT_FALSE(result.has_value());\n      FileHelper::RemoveDirectory(\"invalid_path\");\n    }\n\n    {\n      std::cout << \"Collection::CreateAndOpen case 3\" << std::endl;\n      FileHelper::RemoveDirectory(\"invalid_path\");\n      // create collection with exist path\n      auto schema = TestHelper::CreateNormalSchema();\n\n      auto result = Collection::CreateAndOpen(\"invalid_path\", *schema,\n                                              CollectionOptions{false, true});\n      if (!result.has_value()) {\n        std::cout << result.error().message() << std::endl;\n      }\n      ASSERT_TRUE(result.has_value());\n\n      std::cout << \"Collection::Open again\" << std::endl;\n      auto new_result = Collection::Open(\"invalid_path\", CollectionOptions{});\n      ASSERT_FALSE(new_result.has_value());\n\n      result.value().reset();\n      // FileHelper::RemoveDirectory(\"invalid_path\");\n    }\n\n    {\n      std::cout << \"Collection::CreateAndOpen case 4\" << std::endl;\n      FileHelper::RemoveDirectory(col_path);\n      // abnormal schema\n      auto schema = TestHelper::CreateNormalSchema(\n          false, \"demo\", std::make_shared<FlatIndexParams>(MetricType::IP));\n      auto result = Collection::CreateAndOpen(col_path, *schema,\n                                              CollectionOptions{false, true});\n      ASSERT_FALSE(result.has_value());\n      ASSERT_EQ(result.error().code(), StatusCode::INVALID_ARGUMENT);\n      std::cout << result.error().message() << std::endl;\n    }\n\n    {\n      std::cout << \"Collection::CreateAndOpen case 5\" << std::endl;\n      FileHelper::RemoveDirectory(col_path);\n      // abnormal schema\n      auto schema = TestHelper::CreateScalarSchema();\n      auto result = Collection::CreateAndOpen(col_path, *schema,\n                                              CollectionOptions{false, true});\n      ASSERT_FALSE(result.has_value());\n      ASSERT_EQ(result.error().code(), StatusCode::INVALID_ARGUMENT);\n      std::cout << result.error().message() << std::endl;\n    }\n  }\n\n  {\n    std::cout << \"Collection::CreateAndOpen case 6\" << std::endl;\n    FileHelper::RemoveDirectory(col_path);\n    auto schema = TestHelper::CreateNormalSchema();\n\n    // start N threas to create_and_open collection\n    std::vector<std::thread> threads;\n    std::mutex mtx;\n    std::vector<Status> statuses;\n    for (int i = 0; i < 10; i++) {\n      threads.emplace_back([&]() {\n        auto result = Collection::CreateAndOpen(col_path, *schema,\n                                                CollectionOptions{false, true});\n        if (!result.has_value()) {\n          std::cout << result.error().message() << std::endl;\n          std::lock_guard<std::mutex> lck(mtx);\n          statuses.emplace_back(result.error());\n        }\n      });\n    }\n\n    for (auto &t : threads) {\n      t.join();\n    }\n\n    ASSERT_EQ(statuses.size(), 9);\n  }\n\n  // Collection::Open\n  {\n    {\n      std::cout << \"Collection::Open case 1\" << std::endl;\n      // open collection with non-exist path\n      auto result = Collection::Open(\"non-exist-path\", CollectionOptions{});\n      ASSERT_FALSE(result.has_value());\n    }\n\n    {\n      std::cout << \"Collection::Open case 2\" << std::endl;\n      // open collection with invalid path which contains no manifest\n      FileHelper::RemoveDirectory(\"invalid_path\");\n      FileHelper::CreateDirectory(\"invalid_path\");\n      auto result = Collection::Open(\"invalid_path\", CollectionOptions{});\n      ASSERT_FALSE(result.has_value());\n      FileHelper::RemoveDirectory(\"invalid_path\");\n    }\n  }\n}\n\nTEST_F(CollectionTest, CornerCase_CreateIndex) {\n  auto schema = TestHelper::CreateNormalSchema();\n  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};\n  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,\n                                                        options, 0, 0, false);\n\n  // create index on non-exist field\n  auto s = collection->CreateIndex(\n      \"non-exist\", std::make_shared<FlatIndexParams>(MetricType::IP));\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::NOT_FOUND);\n\n  s = collection->DropIndex(\"non-exist\");\n  ASSERT_EQ(s.code(), StatusCode::NOT_FOUND);\n\n  // create vector index on scalar field\n  s = collection->CreateIndex(\n      \"uint32\", std::make_shared<FlatIndexParams>(MetricType::IP));\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  // create scalar index on vector field\n  s = collection->CreateIndex(\"dense_fp32\",\n                              std::make_shared<InvertIndexParams>(true));\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  // create scalar index on sparse vector field\n  s = collection->CreateIndex(\"sparse_fp32\",\n                              std::make_shared<InvertIndexParams>(true));\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  // create Ivf index on vector field\n  s = collection->CreateIndex(\"sparse_fp32\",\n                              std::make_shared<IVFIndexParams>(MetricType::IP));\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n}"
  },
  {
    "path": "tests/db/common/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(APPLE)\n    set(APPLE_FRAMEWORK_LIBS\n        -framework CoreFoundation\n        -framework CoreGraphics\n        -framework CoreData\n        -framework CoreText\n        -framework Security\n        -framework Foundation\n        -Wl,-U,_MallocExtension_ReleaseFreeMemory\n        -Wl,-U,_ProfilerStart\n        -Wl,-U,_ProfilerStop\n        -Wl,-U,_RegisterThriftProtocol\n    )\nendif()\n\nfile(GLOB ALL_TEST_SRCS *_test.cc)\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n    get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n    cc_gmock(\n        NAME ${CC_TARGET} STRICT\n        LIBS zvec_common\n        ${CMAKE_THREAD_LIBS_INIT}\n        ${CMAKE_DL_LIBS}\n        SRCS ${CC_SRCS}\n        INCS .. ../../src\n        LDFLAGS ${APPLE_FRAMEWORK_LIBS}\n    )\n    cc_test_suite(zvec_common ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/db/common/config_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/config.h\"\n#include <gtest/gtest.h>\n#include \"zvec/db/status.h\"\n\nusing namespace zvec;\n\nclass ConfigTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    // Reset GlobalConfig for each test\n    // Note: Since GlobalConfig is a singleton and uses atomic flag,\n    // we cannot easily reset it. In a real test environment, you might\n    // need to use a testing framework that supports fixture reset or\n    // modify the GlobalConfig to support reset for testing purposes.\n  }\n};\n\nTEST_F(ConfigTest, InitializeWithDefaultConfig) {\n  GlobalConfig::ConfigData config;\n\n  // Test initialization with default config\n  auto status = GlobalConfig::Instance().Initialize(config);\n  ASSERT_TRUE(status.ok()) << \"Initialization failed: \" << status.message();\n\n  // Verify default values\n  ASSERT_GT(GlobalConfig::Instance().memory_limit_bytes(), 0);\n  ASSERT_EQ(GlobalConfig::Instance().log_level(), GlobalConfig::LogLevel::WARN);\n  ASSERT_EQ(GlobalConfig::Instance().log_type(), \"ConsoleLogger\");\n  ASSERT_GT(GlobalConfig::Instance().query_thread_count(), 0);\n  ASSERT_EQ(GlobalConfig::Instance().invert_to_forward_scan_ratio(), 0.9f);\n  ASSERT_EQ(GlobalConfig::Instance().brute_force_by_keys_ratio(), 0.1f);\n  ASSERT_GT(GlobalConfig::Instance().optimize_thread_count(), 0);\n}\n\nTEST_F(ConfigTest, InitializeWithCustomConsoleLogConfig) {\n  GlobalConfig::ConfigData config;\n  config.log_config = std::make_shared<GlobalConfig::ConsoleLogConfig>(\n      GlobalConfig::LogLevel::DEBUG);\n  config.memory_limit_bytes = 1024 * 1024 * 1024;  // 1GB\n  config.query_thread_count = 4;\n  config.optimize_thread_count = 2;\n\n  auto status = GlobalConfig::Instance().Initialize(config);\n  // First initialization should succeed\n  if (status.code() == StatusCode::INVALID_ARGUMENT &&\n      status.message().find(\"already initialized\") != std::string::npos) {\n    // If already initialized, skip this test\n    GTEST_SKIP() << \"GlobalConfig already initialized\";\n  }\n}\n\nTEST_F(ConfigTest, InitializeWithCustomFileLogConfig) {\n  GlobalConfig::ConfigData config;\n  auto file_config = std::make_shared<GlobalConfig::FileLogConfig>(\n      GlobalConfig::LogLevel::INFO, \"/tmp/logs\", \"test.log\", 1024, 14);\n  config.log_config = file_config;\n  config.memory_limit_bytes = 2 * 1024 * 1024 * 1024ULL;  // 2GB\n  config.query_thread_count = 8;\n  config.optimize_thread_count = 4;\n\n  auto status = GlobalConfig::Instance().Initialize(config);\n  // First initialization should succeed\n  if (status.code() == StatusCode::INVALID_ARGUMENT &&\n      status.message().find(\"already initialized\") != std::string::npos) {\n    // If already initialized, skip this test\n    GTEST_SKIP() << \"GlobalConfig already initialized\";\n  }\n}\n\nTEST_F(ConfigTest, DoubleInitializationSilentlyFails) {\n  GlobalConfig::ConfigData config;\n\n  auto status1 = GlobalConfig::Instance().Initialize(config);\n  // If first initialization failed due to already being initialized\n  if (status1.code() == StatusCode::INVALID_ARGUMENT &&\n      status1.message().find(\"already initialized\") != std::string::npos) {\n    // Try again with a fresh config\n    auto status2 = GlobalConfig::Instance().Initialize(config);\n    ASSERT_FALSE(status2.ok());\n    ASSERT_EQ(status2.code(), StatusCode::INVALID_ARGUMENT);\n    ASSERT_NE(status2.message().find(\"already initialized\"), std::string::npos);\n  } else {\n    // First initialization succeeded, second should fail\n    ASSERT_TRUE(status1.ok());\n\n    // The second initialization is allowed but becomes a no-op\n    auto status2 = GlobalConfig::Instance().Initialize(config);\n    ASSERT_TRUE(status2.ok());\n  }\n}\n\nTEST_F(ConfigTest, ValidateConfigWithInvalidMemoryLimit) {\n  GlobalConfig::ConfigData config;\n  config.memory_limit_bytes = 0;  // Invalid value\n\n  GlobalConfig\n      config_instance;  // Create a local instance for testing validation\n  auto status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\"memory_limit_bytes must be greater than\"),\n            std::string::npos);\n}\n\nTEST_F(ConfigTest, ValidateConfigWithInvalidQueryThreadCount) {\n  GlobalConfig::ConfigData config;\n  config.query_thread_count = 0;  // Invalid value\n\n  GlobalConfig config_instance;\n  auto status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\"query_thread_count must be greater than 0\"),\n            std::string::npos);\n}\n\nTEST_F(ConfigTest, ValidateConfigWithInvalidRatios) {\n  GlobalConfig::ConfigData config;\n\n  // Test invalid invert_to_forward_scan_ratio\n  config.invert_to_forward_scan_ratio = -0.1f;\n  GlobalConfig config_instance;\n  auto status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\n                \"invert_to_forward_scan_ratio must be between 0 and 1\"),\n            std::string::npos);\n\n  // Test invalid brute_force_by_keys_ratio\n  config.invert_to_forward_scan_ratio = 0.9f;  // Reset to valid value\n  config.brute_force_by_keys_ratio = 1.5f;     // Invalid value\n  status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\n                \"brute_force_by_keys_ratio must be between 0 and 1\"),\n            std::string::npos);\n}\n\nTEST_F(ConfigTest, ValidateConfigWithInvalidFileLogSettings) {\n  GlobalConfig::ConfigData config;\n\n  // Test with empty log directory\n  auto file_config = std::make_shared<GlobalConfig::FileLogConfig>();\n  file_config->dir = \"\";\n  config.log_config = file_config;\n\n  GlobalConfig config_instance;\n  auto status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\"log_dir cannot be empty\"),\n            std::string::npos);\n\n  // Test with empty basename\n  file_config->dir = \"/tmp/logs\";\n  file_config->basename = \"\";\n  status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\"log_file basename cannot be empty\"),\n            std::string::npos);\n\n  // Test with invalid file size\n  file_config->basename = \"test.log\";\n  file_config->file_size = 0;\n  status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\"log file_size must be greater than\"),\n            std::string::npos);\n\n  // Test with invalid overdue days\n  file_config->file_size = 1024;\n  file_config->overdue_days = 0;\n  status = config_instance.Validate(config);\n  ASSERT_FALSE(status.ok());\n  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  ASSERT_NE(status.message().find(\"log_overdue_days must be greater than 0\"),\n            std::string::npos);\n}\n\nTEST_F(ConfigTest, LogLevelEnumValues) {\n  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::DEBUG), 0);\n  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::INFO), 1);\n  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::WARN), 2);\n  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::ERROR), 3);\n  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::FATAL), 4);\n}\n\nTEST_F(ConfigTest, LogConfigPolymorphism) {\n  auto console_config = std::make_shared<GlobalConfig::ConsoleLogConfig>();\n  auto file_config = std::make_shared<GlobalConfig::FileLogConfig>();\n\n  ASSERT_EQ(console_config->GetLoggerType(), CONSOLE_LOG_TYPE_NAME);\n  ASSERT_EQ(file_config->GetLoggerType(), FILE_LOG_TYPE_NAME);\n}"
  },
  {
    "path": "tests/db/common/status_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/status.h\"\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(StatusTest, DefaultConstructor) {\n  Status status;\n  EXPECT_TRUE(status.ok());\n  EXPECT_EQ(status.code(), StatusCode::OK);\n  EXPECT_EQ(status.message(), \"\");\n}\n\nTEST(StatusTest, ConstructorWithCodeAndMessage) {\n  std::string msg = \"Test error message\";\n  Status status(StatusCode::INVALID_ARGUMENT, msg);\n\n  EXPECT_FALSE(status.ok());\n  EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  EXPECT_EQ(status.message(), msg);\n}\n\nTEST(StatusTest, ConstructorWithRvalueMessage) {\n  std::string msg = \"Test error message\";\n  Status status(StatusCode::NOT_FOUND, std::move(msg));\n\n  EXPECT_FALSE(status.ok());\n  EXPECT_EQ(status.code(), StatusCode::NOT_FOUND);\n  EXPECT_EQ(status.message(), \"Test error message\");\n}\n\nTEST(StatusTest, CopyConstructor) {\n  Status original(StatusCode::INTERNAL_ERROR, \"Copy test\");\n  Status copy(original);\n\n  EXPECT_FALSE(copy.ok());\n  EXPECT_EQ(copy.code(), StatusCode::INTERNAL_ERROR);\n  EXPECT_EQ(copy.message(), \"Copy test\");\n  EXPECT_EQ(original.code(), copy.code());\n  EXPECT_EQ(original.message(), copy.message());\n}\n\nTEST(StatusTest, CopyAssignment) {\n  Status original(StatusCode::PERMISSION_DENIED, \"Assignment test\");\n  Status assigned;\n  assigned = original;\n\n  EXPECT_FALSE(assigned.ok());\n  EXPECT_EQ(assigned.code(), StatusCode::PERMISSION_DENIED);\n  EXPECT_EQ(assigned.message(), \"Assignment test\");\n}\n\nTEST(StatusTest, MoveConstructor) {\n  Status original(StatusCode::RESOURCE_EXHAUSTED, \"Move test\");\n  Status moved(std::move(original));\n\n  EXPECT_FALSE(moved.ok());\n  EXPECT_EQ(moved.code(), StatusCode::RESOURCE_EXHAUSTED);\n  EXPECT_EQ(moved.message(), \"Move test\");\n}\n\nTEST(StatusTest, MoveAssignment) {\n  Status original(StatusCode::UNAVAILABLE, \"Move assignment test\");\n  Status moved;\n  moved = std::move(original);\n\n  EXPECT_FALSE(moved.ok());\n  EXPECT_EQ(moved.code(), StatusCode::UNAVAILABLE);\n  EXPECT_EQ(moved.message(), \"Move assignment test\");\n}\n\nTEST(StatusTest, ComparisonOperators) {\n  Status status1(StatusCode::INVALID_ARGUMENT, \"Error 1\");\n  Status status2(StatusCode::INVALID_ARGUMENT, \"Error 1\");\n  Status status3(StatusCode::NOT_FOUND, \"Error 2\");\n  Status ok1;\n  Status ok2;\n\n  EXPECT_TRUE(status1 == status2);\n  EXPECT_FALSE(status1 == status3);\n  EXPECT_TRUE(ok1 == ok2);\n  EXPECT_FALSE(status1 == ok1);\n\n  EXPECT_FALSE(status1 != status2);\n  EXPECT_TRUE(status1 != status3);\n  EXPECT_FALSE(ok1 != ok2);\n  EXPECT_TRUE(status1 != ok1);\n}\n\nTEST(StatusTest, FactoryMethods) {\n  auto invalid_arg = Status::InvalidArgument(\"Invalid arg: \", 42);\n  EXPECT_FALSE(invalid_arg.ok());\n  EXPECT_EQ(invalid_arg.code(), StatusCode::INVALID_ARGUMENT);\n  EXPECT_FALSE(invalid_arg.message().empty());\n\n  auto not_found = Status::NotFound(\"Not found: \", \"key\");\n  EXPECT_FALSE(not_found.ok());\n  EXPECT_EQ(not_found.code(), StatusCode::NOT_FOUND);\n  EXPECT_FALSE(not_found.message().empty());\n\n  auto already_exists = Status::AlreadyExists(\"Already exists: \", \"item\");\n  EXPECT_FALSE(already_exists.ok());\n  EXPECT_EQ(already_exists.code(), StatusCode::ALREADY_EXISTS);\n  EXPECT_FALSE(already_exists.message().empty());\n\n  auto internal_error = Status::InternalError(\"Internal error: \", \"details\");\n  EXPECT_FALSE(internal_error.ok());\n  EXPECT_EQ(internal_error.code(), StatusCode::INTERNAL_ERROR);\n  EXPECT_FALSE(internal_error.message().empty());\n\n  auto permission_denied =\n      Status::PermissionDenied(\"Permission denied for: \", \"resource\");\n  EXPECT_FALSE(permission_denied.ok());\n  EXPECT_EQ(permission_denied.code(), StatusCode::PERMISSION_DENIED);\n  EXPECT_FALSE(permission_denied.message().empty());\n}\n\nTEST(StatusTest, OKFactory) {\n  auto ok = Status::OK();\n  EXPECT_TRUE(ok.ok());\n  EXPECT_EQ(ok.code(), StatusCode::OK);\n  EXPECT_EQ(ok.message(), \"\");\n}\n\nTEST(StatusTest, CStringConversion) {\n  Status status(StatusCode::UNKNOWN, \"C string test\");\n  EXPECT_STREQ(status.c_str(), \"C string test\");\n\n  Status ok_status;\n  EXPECT_STREQ(ok_status.c_str(), \"\");\n}\n\nTEST(StatusTest, OutputStreamOperator) {\n  Status status(StatusCode::INVALID_ARGUMENT, \"Stream test\");\n  std::ostringstream oss;\n  oss << status;\n  EXPECT_FALSE(oss.str().empty());\n  EXPECT_NE(oss.str().find(GetDefaultMessage(StatusCode::INVALID_ARGUMENT)),\n            std::string::npos);\n  EXPECT_NE(oss.str().find(\"Stream test\"), std::string::npos);\n\n  Status ok_status;\n  std::ostringstream oss2;\n  oss2 << ok_status;\n  EXPECT_FALSE(oss2.str().empty());\n  EXPECT_NE(oss2.str().find(\"OK\"), std::string::npos);\n}"
  },
  {
    "path": "tests/db/crash_recovery/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(APPLE)\n  set(APPLE_FRAMEWORK_LIBS\n    -framework CoreFoundation\n    -framework CoreGraphics\n    -framework CoreData\n    -framework CoreText\n    -framework Security\n    -framework Foundation\n    -Wl,-U,_MallocExtension_ReleaseFreeMemory\n    -Wl,-U,_ProfilerStart\n    -Wl,-U,_ProfilerStop\n    -Wl,-U,_RegisterThriftProtocol\n  )\nendif()\n\n\n# Build data_generator executable\ncc_binary(\n    NAME data_generator\n    LIBS zvec_db\n    zvec_proto\n    core_knn_flat\n    core_knn_flat_sparse\n    core_knn_hnsw\n    core_knn_hnsw_sparse\n    core_knn_ivf\n    core_knn_hnsw_rabitq\n    core_mix_reducer\n    core_metric\n    core_utility\n    core_quantizer\n    ${CMAKE_THREAD_LIBS_INIT}\n    ${CMAKE_DL_LIBS}\n    SRCS data_generator.cc\n    INCS .. ../../src\n    LDFLAGS ${APPLE_FRAMEWORK_LIBS}\n)\n\n\n# Build test executables\nfile(GLOB ALL_TEST_SRCS *_test.cc)\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n    get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n    cc_gmock(\n        NAME ${CC_TARGET} STRICT\n        LIBS zvec_db\n        zvec_proto\n        core_knn_flat\n        core_knn_flat_sparse\n        core_knn_hnsw\n        core_knn_hnsw_sparse\n        core_knn_ivf\n        core_knn_hnsw_rabitq\n        core_mix_reducer\n        core_metric\n        core_utility\n        core_quantizer\n        ${CMAKE_THREAD_LIBS_INIT}\n        ${CMAKE_DL_LIBS}\n        SRCS ${CC_SRCS}\n        INCS .. ../../src\n        LDFLAGS ${APPLE_FRAMEWORK_LIBS}\n    )\n    add_dependencies(${CC_TARGET} data_generator)\n    cc_test_suite(zvec_crash_recovery ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/db/crash_recovery/data_generator.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <unistd.h>\n#include <filesystem>\n#include <thread>\n#include <zvec/db/collection.h>\n#include \"zvec/ailego/logger/logger.h\"\n#include \"utility.h\"\n\n\nconstexpr int kBatchSize = 20;\nconstexpr int kBatchDelayMs = 10;\n\n\nstruct Config {\n  std::string path;\n  int start_id = 0;\n  int end_id = 0;\n  std::string operation;  // \"insert\", \"upsert\", \"update\", \"delete\"\n  int version = 999999;\n};\n\n\nbool ParseArgs(int argc, char **argv, Config &config) {\n  for (int i = 1; i < argc; i++) {\n    std::string arg = argv[i];\n\n    if (arg == \"--path\" && i + 1 < argc) {\n      config.path = argv[++i];\n    } else if (arg == \"--start\" && i + 1 < argc) {\n      config.start_id = std::stoi(argv[++i]);\n    } else if (arg == \"--end\" && i + 1 < argc) {\n      config.end_id = std::stoi(argv[++i]);\n    } else if (arg == \"--op\" && i + 1 < argc) {\n      config.operation = argv[++i];\n    } else if (arg == \"--version\" && i + 1 < argc) {\n      config.version = std::stoi(argv[++i]);\n    } else if (arg == \"--help\" || arg == \"-h\") {\n      return false;\n    }\n  }\n\n  // Validate required arguments\n  if (config.path.empty() || config.operation.empty() ||\n      config.start_id >= config.end_id || config.version == 999999) {\n    return false;\n  }\n\n  // Validate operation\n  if (config.operation != \"insert\" && config.operation != \"upsert\" &&\n      config.operation != \"update\" && config.operation != \"delete\") {\n    std::cerr << \"Error: Invalid operation '\" << config.operation\n              << \"'. Must be 'insert', 'upsert', 'update', or 'delete'.\"\n              << std::endl;\n    return false;\n  }\n\n  return true;\n}\n\n\nvoid PrintUsage(const char *program) {\n  std::cout << \"Usage: \" << program\n            << \" --path <collection_path> --start <start_id> --end <end_id> \"\n               \"--op <operation>\"\n            << std::endl;\n  std::cout << std::endl;\n  std::cout << \"Arguments:\" << std::endl;\n  std::cout << \"  --path      Path to the collection (required)\" << std::endl;\n  std::cout << \"  --start     Starting document ID (inclusive, required)\"\n            << std::endl;\n  std::cout << \"  --end       Ending document ID (exclusive, required)\"\n            << std::endl;\n  std::cout\n      << \"  --op        Operation: insert, upsert, update, or delete (required)\"\n      << std::endl;\n  std::cout << \"  --version   Operation: version (required)\" << std::endl;\n  std::cout << std::endl;\n  std::cout << \"Examples:\" << std::endl;\n  std::cout << \"  # Insert 1000 documents (pk_0 to pk_999)\" << std::endl;\n  std::cout << \"  \" << program\n            << \" --path ./test_db --start 0 --end 1000 --op insert --version 0\"\n            << std::endl;\n  std::cout << std::endl;\n  std::cout << \"  # Update documents 1000-1999\" << std::endl;\n  std::cout\n      << \"  \" << program\n      << \" --path ./test_db --start 1000 --end 2000 --op update --version 1\"\n      << std::endl;\n  std::cout << std::endl;\n  std::cout << \"  # Upsert documents 0-499\" << std::endl;\n  std::cout << \"  \" << program\n            << \" --path ./test_db --start 0 --end 500 --op upsert --version 2\"\n            << std::endl;\n}\n\n\nint main(int argc, char **argv) {\n  Config config;\n\n  // Parse arguments\n  if (!ParseArgs(argc, argv, config)) {\n    PrintUsage(argv[0]);\n    return 1;\n  }\n\n  try {\n    std::filesystem::path cwd = std::filesystem::current_path();\n    std::cout << \"[data_generator] Current Working Directory: \" << cwd.string()\n              << std::endl;\n  } catch (const std::filesystem::filesystem_error &e) {\n    std::cout\n        << \"[data_generator] Failed to get the current working directory: \"\n        << e.what() << std::endl;\n  }\n\n  std::cout << \"Configuration:\" << std::endl;\n  std::cout << \"  Path:      \" << config.path << std::endl;\n  std::cout << \"  Range:     [\" << config.start_id << \", \" << config.end_id\n            << \")\" << std::endl;\n  std::cout << \"  Operation: \" << config.operation << std::endl;\n  std::cout << \"  BatchSize: \" << kBatchSize << std::endl;\n  std::cout << \"  BatchDelay: \" << kBatchDelayMs << \"ms\" << std::endl;\n  std::cout << std::endl;\n\n  auto result = zvec::Collection::Open(\n      config.path, zvec::CollectionOptions{false, true, 4 * 1024 * 1024});\n  if (!result) {\n    LOG_ERROR(\"Failed to open collection[%s]: %s\", config.path.c_str(),\n              result.error().c_str());\n    return -1;\n  }\n\n  auto collection = result.value();\n  LOG_INFO(\"Collection[%s] opened successfully\", config.path.c_str());\n\n  // Process documents in batches\n  int total_docs = config.end_id - config.start_id;\n  int processed = 0;\n  int batch_num = 0;\n  int next_progress_threshold = total_docs / 10;  // 10% increments\n  int progress_percent = 0;\n\n  while (config.start_id < config.end_id) {\n    int batch_end = std::min(config.start_id + kBatchSize, config.end_id);\n    int batch_count = batch_end - config.start_id;\n\n    std::vector<zvec::Doc> docs;\n    docs.reserve(batch_count);\n    for (int i = config.start_id; i < batch_end; i++) {\n      docs.push_back(zvec::CreateTestDoc(i, config.version));\n    }\n\n    zvec::Result<zvec::WriteResults> results;\n    if (config.operation == \"insert\") {\n      results = collection->Insert(docs);\n    } else if (config.operation == \"upsert\") {\n      results = collection->Upsert(docs);\n    } else if (config.operation == \"update\") {\n      results = collection->Update(docs);\n    } else if (config.operation == \"delete\") {\n      std::vector<std::string> pks{};\n      for (const auto &doc : docs) {\n        pks.emplace_back(doc.pk());\n      }\n      results = collection->Delete(pks);\n    }\n    if (!results) {\n      LOG_ERROR(\"Failed to perform operation[%s], reason: %s\",\n                config.operation.c_str(), results.error().message().c_str());\n      return 1;\n    }\n    for (auto &s : results.value()) {\n      if (!s.ok()) {\n        LOG_ERROR(\"Failed to perform operation[%s], reason: %s\",\n                  config.operation.c_str(), s.message().c_str());\n        return 1;\n      }\n    }\n\n    processed += batch_count;\n    config.start_id = batch_end;\n    batch_num++;\n\n    // Print progress every 10%\n    if (processed >= next_progress_threshold) {\n      progress_percent++;\n      LOG_INFO(\"Progress: %d (%d/%d documents)\", progress_percent * 10,\n               processed, total_docs);\n      next_progress_threshold = (progress_percent + 1) * total_docs / 10;\n    }\n\n    // Sleep between batches\n    if (config.start_id < config.end_id) {\n      std::this_thread::sleep_for(std::chrono::milliseconds(kBatchDelayMs));\n    }\n  }\n\n  std::cout << std::endl;\n  std::cout << \"Success! Processed \" << processed << \" documents in \"\n            << batch_num << \" batches.\" << std::endl;\n\n  return 0;\n}\n"
  },
  {
    "path": "tests/db/crash_recovery/utility.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#pragma once\n\n\n#include <zvec/db/collection.h>\n#include <zvec/db/doc.h>\n\n\nnamespace zvec {\n\n/**\n * @brief Create a test schema with deterministic field definitions.\n *\n * @param name The collection name (default: \"crash_recovery_test\")\n * @return CollectionSchema::Ptr The test schema\n */\ninline CollectionSchema::Ptr CreateTestSchema(\n    const std::string &name = \"crash_recovery_test\") {\n  auto schema = std::make_shared<CollectionSchema>(name);\n  schema->set_max_doc_count_per_segment(10000);\n\n  schema->add_field(\n      std::make_shared<FieldSchema>(\"int32_field\", DataType::INT32, false));\n  schema->add_field(\n      std::make_shared<FieldSchema>(\"int64_field\", DataType::INT64, true));\n  schema->add_field(\n      std::make_shared<FieldSchema>(\"float_field\", DataType::FLOAT, true));\n  schema->add_field(\n      std::make_shared<FieldSchema>(\"string_field\", DataType::STRING, false));\n  schema->add_field(\n      std::make_shared<FieldSchema>(\"bool_field\", DataType::BOOL, false));\n  schema->add_field(std::make_shared<FieldSchema>(\"array_int32_field\",\n                                                  DataType::ARRAY_INT32, true));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_string_field\", DataType::ARRAY_STRING, false));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"dense_fp32_field\", DataType::VECTOR_FP32, 128, false,\n      std::make_shared<HnswIndexParams>(MetricType::COSINE)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"sparse_fp32_field\", DataType::SPARSE_VECTOR_FP32, 0, false,\n      std::make_shared<HnswIndexParams>(MetricType::IP)));\n\n  return schema;\n}\n\n\n/**\n * @brief Create a test document with deterministic values based on doc_id.\n *\n * Document pattern:\n * - pk: \"pk_{doc_id}\"\n * - int32_field: doc_id (cast to int32)\n * - int64_field: doc_id, null if doc_id % 60 == 0\n * - float_field: doc_id / 1000.0, null if doc_id % 70 == 0\n * - string_field: \"{version}_{doc_id}\"\n * - bool_field: doc_id % 2 == 0 or flipped if version % 2 !=0\n * - array_int32_field: [doc_id, doc_id+1, doc_id+2], null if doc_id % 100 == 0\n * - array_string_field: [\"str_{version}_0\", ...]\n * - dense_fp32_field: vector where dense[i] = (doc_id + i) / 1000.0f\n * - sparse_fp32_field: sparse vector with indices [0, 10, ...]\n *\n * @param doc_id The document ID (determines all field values)\n * @param version The version of the document\n * @return Doc The created document\n */\ninline Doc CreateTestDoc(uint64_t doc_id, int version) {\n  Doc doc;\n\n  // Set primary key\n  std::string pk = \"pk_\" + std::to_string(doc_id);\n  doc.set_pk(pk);\n\n  // Set scalar fields\n  doc.set<int32_t>(\"int32_field\", static_cast<int32_t>(doc_id));\n\n  // int64_field: nullable, null if doc_id % 60 == 0\n  if (doc_id % 60 != 0) {\n    doc.set<int64_t>(\"int64_field\", static_cast<int64_t>(doc_id));\n  }\n\n  // float_field: nullable, null if doc_id % 70 == 0\n  if (doc_id % 70 != 0) {\n    doc.set<float>(\"float_field\", static_cast<float>(doc_id) / 1000.0f);\n  }\n\n  // string_field: \"value_{id}\" or \"updated_value_{id}\"\n  std::string string_value =\n      std::to_string(version) + \"_\" + std::to_string(doc_id);\n  doc.set<std::string>(\"string_field\", string_value);\n\n  // bool_field: alternating based on doc_id, flipped if updated\n  bool bool_value = (doc_id % 2 == 0);\n  if (version % 2 != 0) {\n    bool_value = !bool_value;\n  }\n  doc.set<bool>(\"bool_field\", bool_value);\n\n  // array_int32_field: nullable, null if doc_id % 100 == 0\n  if (doc_id % 100 != 0) {\n    std::vector<int32_t> array_int32;\n    for (int i = 0; i < 3; i++) {\n      array_int32.push_back(static_cast<int32_t>(doc_id + i));\n    }\n    doc.set<std::vector<int32_t>>(\"array_int32_field\", array_int32);\n  }\n\n  // array_string_field: [\"str_0\", \"str_1\", ...] or [\"updated_str_0\", ...]\n  std::vector<std::string> array_string;\n  size_t array_size = doc_id % 5 + 1;  // 1 to 5 elements\n  for (size_t i = 0; i < array_size; i++) {\n    array_string.push_back(\"str_\" + std::to_string(version) + \"_\" +\n                           std::to_string(i));\n  }\n  doc.set<std::vector<std::string>>(\"array_string_field\", array_string);\n\n  // dense_fp32_field: deterministic pattern\n  std::vector<float> dense(128);\n  for (int i = 0; i < 128; i++) {\n    dense[i] = static_cast<float>(doc_id + i) / 1000.0f;\n  }\n  doc.set<std::vector<float>>(\"dense_fp32_field\", dense);\n\n  // sparse_fp32_field: sparse vector with indices [0, 10, 20, ..., 100]\n  // Values based on doc_id: value = (doc_id + index) / 1000.0\n  std::vector<uint32_t> sparse_indices;\n  std::vector<float> sparse_values;\n  for (uint32_t idx = 0; idx <= 100; idx += 10) {\n    sparse_indices.push_back(idx);\n    sparse_values.push_back(static_cast<float>(doc_id + idx) / 1000.0f);\n  }\n  doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n      \"sparse_fp32_field\", std::make_pair(sparse_indices, sparse_values));\n\n  return doc;\n}\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "tests/db/crash_recovery/write_recovery_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <csignal>\n#include <filesystem>\n#include <thread>\n#include <gtest/gtest.h>\n#include <zvec/db/collection.h>\n#include <zvec/db/doc.h>\n#include <zvec/db/schema.h>\n#include \"utility.h\"\n\n\nnamespace zvec {\n\n\nstatic std::string data_generator_bin_;\nconst std::string collection_name_{\"crash_test\"};\nconst std::string dir_path_{\"crash_test_db\"};\nconst zvec::CollectionOptions options_{false, true};\n\n\nstatic std::string LocateDataGenerator() {\n  namespace fs = std::filesystem;\n  const std::vector<std::string> candidates{\"./data_generator\",\n                                            \"./bin/data_generator\"};\n  for (const auto &p : candidates) {\n    if (fs::exists(p)) {\n      return fs::canonical(p).string();\n    }\n  }\n  throw std::runtime_error(\"data_generator binary not found\");\n}\n\n\nvoid RunGenerator(const std::string &start, const std::string &end,\n                  const std::string &op, const std::string &version) {\n  pid_t pid = fork();\n  ASSERT_GE(pid, 0);\n\n  if (pid == 0) {  // Child process\n    char arg_path[] = \"--path\";\n    char arg_start[] = \"--start\";\n    char arg_end[] = \"--end\";\n    char arg_op[] = \"--op\";\n    char arg_version[] = \"--version\";\n    char *args[] = {const_cast<char *>(data_generator_bin_.c_str()),\n                    arg_path,\n                    const_cast<char *>(dir_path_.c_str()),\n                    arg_start,\n                    const_cast<char *>(start.c_str()),\n                    arg_end,\n                    const_cast<char *>(end.c_str()),\n                    arg_op,\n                    const_cast<char *>(op.c_str()),\n                    arg_version,\n                    const_cast<char *>(version.c_str()),\n                    nullptr};\n    execvp(args[0], args);\n    perror(\"execvp failed\");\n    _exit(1);\n  }\n\n  int status;\n  waitpid(pid, &status, 0);\n  ASSERT_TRUE(WIFEXITED(status))\n      << \"Child process did not exit normally. Terminated by signal?\";\n  int exit_code = WEXITSTATUS(status);\n  ASSERT_EQ(exit_code, 0) << \"data_generator failed with exit code: \"\n                          << exit_code;\n}\n\n\nvoid RunGeneratorAndCrash(const std::string &start, const std::string &end,\n                          const std::string &op, const std::string &version,\n                          int seconds) {\n  pid_t pid = fork();\n  ASSERT_GE(pid, 0);\n\n  if (pid == 0) {  // Child process\n    char arg_path[] = \"--path\";\n    char arg_start[] = \"--start\";\n    char arg_end[] = \"--end\";\n    char arg_op[] = \"--op\";\n    char arg_version[] = \"--version\";\n    char *args[] = {const_cast<char *>(data_generator_bin_.c_str()),\n                    arg_path,\n                    const_cast<char *>(dir_path_.c_str()),\n                    arg_start,\n                    const_cast<char *>(start.c_str()),\n                    arg_end,\n                    const_cast<char *>(end.c_str()),\n                    arg_op,\n                    const_cast<char *>(op.c_str()),\n                    arg_version,\n                    const_cast<char *>(version.c_str()),\n                    nullptr};\n    execvp(args[0], args);\n    perror(\"execvp failed\");\n    _exit(1);\n  }\n\n  std::this_thread::sleep_for(std::chrono::seconds(seconds));\n  if (kill(pid, 0) == 0) {\n    kill(pid, SIGKILL);\n  }\n  int status;\n  waitpid(pid, &status, 0);\n  ASSERT_TRUE(WIFSIGNALED(status))\n      << \"Child process was not killed by a signal. It exited normally?\";\n}\n\n\nclass CrashRecoveryTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    system(\"rm -rf ./crash_test_db\");\n    ASSERT_NO_THROW(data_generator_bin_ = LocateDataGenerator());\n  }\n\n  void TearDown() override {\n    system(\"rm -rf ./crash_test_db\");\n  }\n};\n\n\nTEST_F(CrashRecoveryTest, BasicInsertAndReopen) {\n  {\n    auto schema = CreateTestSchema(collection_name_);\n    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    collection.reset();\n  }\n\n  RunGenerator(\"0\", \"5000\", \"insert\", \"0\");\n  auto result = Collection::Open(dir_path_, options_);\n  ASSERT_TRUE(result.has_value());\n  auto collection = result.value();\n  ASSERT_EQ(collection->Stats().value().doc_count, 5000)\n      << \"Document count mismatch\";\n}\n\n\nTEST_F(CrashRecoveryTest, CrashRecoveryDuringInsertion) {\n  {\n    auto schema = CreateTestSchema(collection_name_);\n    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    collection.reset();\n  }\n\n  RunGeneratorAndCrash(\"0\", \"10000\", \"insert\", \"0\", 3);\n\n  auto result = Collection::Open(dir_path_, options_);\n  ASSERT_TRUE(result.has_value()) << \"Failed to reopen collection after crash. \"\n                                     \"Recovery mechanism may be broken.\";\n  auto collection = result.value();\n  uint64_t doc_count{collection->Stats().value().doc_count};\n  ASSERT_GT(doc_count, 800)\n      << \"Document count is too low after 3s of insertion and recovery\";\n\n  for (uint64_t doc_id = 0; doc_id < doc_count; doc_id++) {\n    const auto expected_doc = CreateTestDoc(doc_id, 0);\n    std::vector<std::string> pks{};\n    pks.emplace_back(expected_doc.pk());\n    if (auto res = collection->Fetch(pks); res) {\n      auto map = res.value();\n      if (map.find(expected_doc.pk()) == map.end()) {\n        FAIL() << \"Returned map does not contain doc[\" << expected_doc.pk()\n               << \"]\";\n      }\n      const auto actual_doc = map.at(expected_doc.pk());\n      ASSERT_EQ(*actual_doc, expected_doc)\n          << \"Data mismatch for doc[\" << expected_doc.pk() << \"]\";\n    } else {\n      FAIL() << \"Failed to fetch doc[\" << expected_doc.pk() << \"]\";\n    }\n  }\n}\n\n\nTEST_F(CrashRecoveryTest, CrashRecoveryDuringUpsert) {\n  {\n    auto schema = CreateTestSchema(collection_name_);\n    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    collection.reset();\n  }\n\n  RunGenerator(\"0\", \"5000\", \"insert\", \"0\");\n  {\n    auto result = Collection::Open(dir_path_, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    ASSERT_EQ(collection->Stats().value().doc_count, 5000)\n        << \"Document count mismatch\";\n  }\n\n  RunGeneratorAndCrash(\"4500\", \"20000\", \"upsert\", \"1\", 5);\n\n  auto result = Collection::Open(dir_path_, options_);\n  ASSERT_TRUE(result.has_value()) << \"Failed to reopen collection after crash. \"\n                                     \"Recovery mechanism may be broken.\";\n  auto collection = result.value();\n  uint64_t doc_count{collection->Stats().value().doc_count};\n  ASSERT_GT(doc_count, 6000)\n      << \"Document count is too low after 5s of insertion and recovery\";\n\n  for (uint64_t doc_id = 0; doc_id < doc_count; doc_id++) {\n    Doc expected_doc;\n    if (doc_id < 4500) {\n      expected_doc = CreateTestDoc(doc_id, 0);\n    } else {\n      expected_doc = CreateTestDoc(doc_id, 1);\n    }\n    std::vector<std::string> pks{};\n    pks.emplace_back(expected_doc.pk());\n    if (auto res = collection->Fetch(pks); res) {\n      auto map = res.value();\n      if (map.find(expected_doc.pk()) == map.end()) {\n        FAIL() << \"Returned map does not contain doc[\" << expected_doc.pk()\n               << \"]\";\n      }\n      const auto actual_doc = map.at(expected_doc.pk());\n      ASSERT_EQ(*actual_doc, expected_doc)\n          << \"Data mismatch for doc[\" << expected_doc.pk() << \"]\";\n    } else {\n      FAIL() << \"Failed to fetch doc[\" << expected_doc.pk() << \"]\";\n    }\n  }\n}\n\n\nTEST_F(CrashRecoveryTest, CrashRecoveryDuringUpdate) {\n  {\n    auto schema = CreateTestSchema(collection_name_);\n    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    collection.reset();\n  }\n\n  RunGenerator(\"0\", \"18000\", \"upsert\", \"0\");\n  {\n    auto result = Collection::Open(dir_path_, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    ASSERT_EQ(collection->Stats().value().doc_count, 18000)\n        << \"Document count mismatch\";\n  }\n\n  RunGeneratorAndCrash(\"3000\", \"15000\", \"update\", \"3\", 4);\n\n  auto result = Collection::Open(dir_path_, options_);\n  ASSERT_TRUE(result.has_value()) << \"Failed to reopen collection after crash. \"\n                                     \"Recovery mechanism may be broken.\";\n  auto collection = result.value();\n  uint64_t doc_count{collection->Stats().value().doc_count};\n  ASSERT_EQ(doc_count, 18000) << \"Document count mismatch after crash recovery\";\n\n  for (int doc_id = 0; doc_id < 3500; doc_id++) {\n    Doc expected_doc;\n    if (doc_id < 3000) {\n      expected_doc = CreateTestDoc(doc_id, 0);\n    } else {\n      expected_doc = CreateTestDoc(doc_id, 3);\n    }\n    std::vector<std::string> pks{};\n    pks.emplace_back(expected_doc.pk());\n    if (auto res = collection->Fetch(pks); res) {\n      auto map = res.value();\n      if (map.find(expected_doc.pk()) == map.end()) {\n        FAIL() << \"Returned map does not contain doc[\" << expected_doc.pk()\n               << \"]\";\n      }\n      const auto actual_doc = map.at(expected_doc.pk());\n      ASSERT_EQ(*actual_doc, expected_doc)\n          << \"Data mismatch for doc[\" << expected_doc.pk() << \"]\";\n    } else {\n      FAIL() << \"Failed to fetch doc[\" << expected_doc.pk() << \"]\";\n    }\n  }\n}\n\n\nTEST_F(CrashRecoveryTest, CrashRecoveryDuringDelete) {\n  {\n    auto schema = CreateTestSchema(collection_name_);\n    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    collection.reset();\n  }\n\n  RunGenerator(\"0\", \"18000\", \"insert\", \"0\");\n  {\n    auto result = Collection::Open(dir_path_, options_);\n    ASSERT_TRUE(result.has_value());\n    auto collection = result.value();\n    ASSERT_EQ(collection->Stats().value().doc_count, 18000)\n        << \"Document count mismatch\";\n  }\n\n  RunGeneratorAndCrash(\"3000\", \"15000\", \"delete\", \"0\", 4);\n\n  auto result = Collection::Open(dir_path_, options_);\n  ASSERT_TRUE(result.has_value()) << \"Failed to reopen collection after crash. \"\n                                     \"Recovery mechanism may be broken.\";\n  auto collection = result.value();\n  uint64_t doc_count{collection->Stats().value().doc_count};\n  ASSERT_LT(doc_count, 18000)\n      << \"No deletes appear to have been applied before the crash\";\n  ASSERT_GT(doc_count, 6000)\n      << \"Too many documents deleted, recovery likely lost data\";\n\n  for (int doc_id = 0; doc_id < 3500; doc_id++) {\n    auto expected_doc = CreateTestDoc(doc_id, 0);\n    std::vector<std::string> pks{};\n    pks.emplace_back(expected_doc.pk());\n    if (auto res = collection->Fetch(pks); res) {\n      auto map = res.value();\n      auto it = map.find(expected_doc.pk());\n      ASSERT_NE(it, map.end())\n          << \"Fetch result missing requested pk[\" << expected_doc.pk() << \"]\";\n      if (doc_id < 3000) {\n        ASSERT_NE(it->second, nullptr)\n            << \"Existing doc returned as nullptr [\" << expected_doc.pk() << \"]\";\n        const auto actual_doc = map.at(expected_doc.pk());\n        ASSERT_EQ(*actual_doc, expected_doc)\n            << \"Data mismatch for doc[\" << expected_doc.pk() << \"]\";\n      } else {\n        ASSERT_EQ(it->second, nullptr)\n            << \"Returned doc for deleted pk[\" << expected_doc.pk() << \"]\";\n      }\n    } else {\n      FAIL() << \"Failed to fetch doc[\" << expected_doc.pk() << \"]\";\n    }\n  }\n}\n\n\n}  // namespace zvec\n"
  },
  {
    "path": "tests/db/index/CMakeLists.txt",
    "content": "\ninclude(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(APPLE)\n  set(APPLE_FRAMEWORK_LIBS\n    -framework CoreFoundation\n    -framework CoreGraphics\n    -framework CoreData\n    -framework CoreText\n    -framework Security\n    -framework Foundation\n    -Wl,-U,_MallocExtension_ReleaseFreeMemory\n    -Wl,-U,_ProfilerStart\n    -Wl,-U,_ProfilerStop\n    -Wl,-U,_RegisterThriftProtocol\n  )\nendif()\n\nfile(GLOB_RECURSE ALL_TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *_test.cc)\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gmock(\n    NAME ${CC_TARGET} STRICT\n    LIBS zvec_db\n    zvec_proto\n    core_metric_static\n    core_utility_static\n    core_quantizer_static\n    core_knn_hnsw core_knn_hnsw_sparse sparsehash\n    core_knn_flat core_knn_flat_sparse core_knn_ivf\n    core_knn_hnsw_rabitq core_mix_reducer\n    Arrow::arrow_dataset\n    ${CMAKE_THREAD_LIBS_INIT}\n    ${CMAKE_DL_LIBS}\n    SRCS ${CC_SRCS} utils/utils.cc\n    INCS . .. ../../src\n    LDFLAGS ${APPLE_FRAMEWORK_LIBS}\n  )\n  cc_test_suite(zvec_index ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/db/index/column/inverted_column/inverted_column_indexer_array_numbers_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <gtest/gtest.h>\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec;\nusing File = ailego::File;\n\n\nconst std::string working_dir{\"./inverted_column_indexer_array_numbers_dir/\"};\nconst std::string collection_name{\"test_collection\"};\n\n\n/**\n * @brief A helper class for testing the InvertedColumnIndexer implementation.\n *\n * This class generates test data with specific patterns to verify the\n * correctness of the inverted index implementation. It provides various methods\n * to populate an InvertedColumnIndexer with predictable data patterns and\n * verify that the indexing and search operations work correctly.\n *\n */\nclass TestHelper {\n public:\n  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)\n      : num_docs_(num_docs / 100 * 100),\n        num_write_threads_(num_write_threads) {};\n\n\n  template <typename T>\n  void insert_arrays(InvertedColumnIndexer::Ptr indexer) {\n    auto insert_func = [&](uint32_t start, uint32_t end) {\n      Status s;\n      for (uint32_t i = start; i < end; ++i) {\n        auto arr = generate_array<T>(i);\n        if (i % 100 == 0) {  // Null value for every 100th doc\n          s = indexer->insert_null(i);\n        } else {\n          s = indexer->insert(\n              i, std::string(reinterpret_cast<const char *>(arr.data()),\n                             sizeof(T) * arr.size()));\n        }\n        ASSERT_TRUE(s.ok());\n      }\n    };\n\n    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;\n    std::vector<std::thread> threads{};\n    for (uint32_t t = 0; t < num_write_threads_; ++t) {\n      threads.emplace_back(insert_func, t * num_docs_per_thread,\n                           (t + 1) * num_docs_per_thread);\n    }\n    for (auto &t : threads) {\n      t.join();\n    }\n  }\n\n\n  template <typename T>\n  void verify_arrays(InvertedColumnIndexer::Ptr indexer) {\n    std::vector<std::string> values;\n    InvertedSearchResult::Ptr res;\n\n    // Search for a non-existent value\n    T v = num_docs_ + 100;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n\n    // Search for docs containing value \"2\"\n    values.clear();\n    v = 2;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    // doc1 and doc2 contain value \"2\", doc0 is null\n    ASSERT_EQ(res->count(), 2);\n    ASSERT_TRUE(res->contains(1));\n    ASSERT_TRUE(res->contains(2));\n    res = indexer->multi_search(values, CompareOp::CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 2);\n    ASSERT_TRUE(res->contains(1));\n    ASSERT_TRUE(res->contains(2));\n\n    // Search for docs containing values of \"2\", \"3\" and \"10\"\n    values.clear();\n    v = 2;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    v = 3;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    v = 10;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 8);\n    ASSERT_TRUE(res->contains(1));\n    ASSERT_TRUE(res->contains(2));\n    ASSERT_TRUE(res->contains(3));\n    ASSERT_TRUE(res->contains(6));\n    ASSERT_TRUE(res->contains(7));\n    ASSERT_TRUE(res->contains(8));\n    ASSERT_TRUE(res->contains(9));\n    ASSERT_TRUE(res->contains(10));\n    res = indexer->multi_search(values, CompareOp::CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n\n    // Search for docs containing values of \"3\" and \"6\"\n    values.clear();\n    v = 3;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    v = 6;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 6);\n    ASSERT_TRUE(res->contains(1));\n    ASSERT_TRUE(res->contains(2));\n    ASSERT_TRUE(res->contains(3));\n    ASSERT_TRUE(res->contains(4));\n    ASSERT_TRUE(res->contains(5));\n    ASSERT_TRUE(res->contains(6));\n    res = indexer->multi_search(values, CompareOp::CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 2);\n    ASSERT_TRUE(res->contains(2));\n    ASSERT_TRUE(res->contains(3));\n\n    // Search for docs not containing value \"1\"\n    values.clear();\n    v = 1;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 1);\n    ASSERT_FALSE(res->contains(1));\n    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 1);\n    ASSERT_FALSE(res->contains(1));\n\n    // Search for docs not containing value \"10\" and \"14\"\n    values.clear();\n    v = 10;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    v = 14;\n    values.emplace_back(std::string((char *)&v, sizeof(T)));\n    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 9);\n    for (uint32_t id = 6; id <= 14; ++id) {\n      ASSERT_FALSE(res->contains(id));\n    }\n    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 1);\n    ASSERT_FALSE(res->contains(10));\n\n    // Search for docs with array length of 5\n    res = indexer->search_array_len(5, CompareOp::EQ);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 1000 - (1000 / 100));\n    res = indexer->search_array_len(5, CompareOp::NE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 990);\n    res = indexer->search_array_len(6, CompareOp::LT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 1000 - (1000 / 100));\n    res = indexer->search_array_len(6, CompareOp::LE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100));\n    res = indexer->search_array_len(6, CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n    res = indexer->search_array_len(6, CompareOp::GE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 990);\n  }\n\n\n private:\n  template <typename T>\n  std::vector<T> generate_array(uint32_t doc_id) {\n    std::vector<T> nums;\n    for (uint32_t i = 0; i < 5; ++i) {\n      T v = doc_id + i;\n      nums.push_back(v);\n    }\n    if (doc_id > 999) {\n      T v = doc_id + 5;\n      nums.push_back(v);\n    }\n    return nums;\n  }\n\n\n private:\n  const uint32_t num_docs_;\n  const uint32_t num_write_threads_;\n};\n\n\n/**\n *\n * @brief Unit tests for the InvertedColumnIndexer implementation.\n *\n */\nclass InvertedIndexTest : public testing::Test {\n  /*****  Global initialization and cleanup - Start  *****/\n public:\n  static void SetUpTestCase() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n\n    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,\n                                              true, {}, false);\n\n    params_ = std::make_shared<InvertIndexParams>(true);\n  }\n\n  static void TearDownTestCase() {\n    indexer_.reset();\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n  }\n  /*****  Global initialization and cleanup - End  *****/\n\n\n  /*****  Per-test initialization and cleanup - Start  *****/\n protected:\n  void SetUp() override {}\n\n  void TearDown() override {}\n  /*****  Per-test initialization and cleanup - End  *****/\n\n\n protected:\n  static InvertedIndexer::Ptr indexer_;\n  static TestHelper test_helper_;\n  static IndexParams::Ptr params_;\n};\n\n\nInvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};\nTestHelper InvertedIndexTest::test_helper_{100000, 10};\nIndexParams::Ptr InvertedIndexTest::params_{nullptr};\n\n\n/*\n *\n * Test Cases\n *\n */\nTEST_F(InvertedIndexTest, ARRAY_INT32) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema array_int32{\"array_int32\", DataType::ARRAY_INT32, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(array_int32).ok());\n  auto indexer_int32 = (*indexer_)[\"array_int32\"];\n  ASSERT_TRUE(indexer_int32);\n  test_helper_.insert_arrays<int32_t>(indexer_int32);\n  test_helper_.verify_arrays<int32_t>(indexer_int32);\n}\n\n\nTEST_F(InvertedIndexTest, ARRAY_INT64) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema array_int64{\"array_int64\", DataType::ARRAY_INT64, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(array_int64).ok());\n  auto indexer_int64 = (*indexer_)[\"array_int64\"];\n  ASSERT_TRUE(indexer_int64);\n  test_helper_.insert_arrays<int64_t>(indexer_int64);\n  test_helper_.verify_arrays<int64_t>(indexer_int64);\n}\n\n\nTEST_F(InvertedIndexTest, ARRAY_UINT32) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema array_uint32{\"array_uint32\", DataType::ARRAY_UINT32, true,\n                           params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(array_uint32).ok());\n  auto indexer_uint32 = (*indexer_)[\"array_uint32\"];\n  ASSERT_TRUE(indexer_uint32);\n  test_helper_.insert_arrays<uint32_t>(indexer_uint32);\n  test_helper_.verify_arrays<uint32_t>(indexer_uint32);\n}\n\n\nTEST_F(InvertedIndexTest, ARRAY_UINT64) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema array_uint64{\"array_uint64\", DataType::ARRAY_UINT64, true,\n                           params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(array_uint64).ok());\n  auto indexer_uint64 = (*indexer_)[\"array_uint64\"];\n  ASSERT_TRUE(indexer_uint64);\n  test_helper_.insert_arrays<uint64_t>(indexer_uint64);\n  test_helper_.verify_arrays<uint64_t>(indexer_uint64);\n}\n\n\nTEST_F(InvertedIndexTest, SEALED) {\n  ASSERT_TRUE(indexer_);\n\n  ASSERT_TRUE(indexer_->seal().ok());\n\n  auto indexer_int32 = (*indexer_)[\"array_int32\"];\n  ASSERT_TRUE(indexer_int32);\n  test_helper_.verify_arrays<int32_t>(indexer_int32);\n\n  auto indexer_int64 = (*indexer_)[\"array_int64\"];\n  ASSERT_TRUE(indexer_int64);\n  test_helper_.verify_arrays<int64_t>(indexer_int64);\n\n  auto indexer_uint32 = (*indexer_)[\"array_uint32\"];\n  ASSERT_TRUE(indexer_uint32);\n  test_helper_.verify_arrays<uint32_t>(indexer_uint32);\n\n  auto indexer_uint64 = (*indexer_)[\"array_uint64\"];\n  ASSERT_TRUE(indexer_uint64);\n  test_helper_.verify_arrays<uint64_t>(indexer_uint64);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/column/inverted_column/inverted_column_indexer_bool_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <gtest/gtest.h>\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec;\nusing File = ailego::File;\n\n\nconst std::string working_dir{\"./inverted_column_indexer_bool_dir/\"};\nconst std::string collection_name{\"test_collection\"};\n\n\n/**\n * @brief A helper class for testing the InvertedColumnIndexer implementation.\n *\n * This class generates test data with specific patterns to verify the\n * correctness of the inverted index implementation. It provides various methods\n * to populate an InvertedColumnIndexer with predictable data patterns and\n * verify that the indexing and search operations work correctly.\n *\n */\nclass TestHelper {\n public:\n  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)\n      : num_docs_(num_docs / 100 * 100),\n        num_write_threads_(num_write_threads) {};\n\n\n  void insert_bools(InvertedColumnIndexer::Ptr indexer) {\n    auto insert_func = [&](uint32_t start, uint32_t end) {\n      Status s;\n      for (uint32_t i = start; i < end; ++i) {\n        bool v = generate_bool(i);\n        s = indexer->insert(i, v);\n        ASSERT_TRUE(s.ok());\n      }\n    };\n\n    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;\n    std::vector<std::thread> threads{};\n    for (uint32_t t = 0; t < num_write_threads_; ++t) {\n      threads.emplace_back(insert_func, t * num_docs_per_thread,\n                           (t + 1) * num_docs_per_thread);\n    }\n    for (auto &t : threads) {\n      t.join();\n    }\n  }\n\n\n  void verify_bools(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res;\n    res = indexer->search(\"true\", CompareOp::EQ);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 2);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 2 == 0) {\n        ASSERT_TRUE(res->contains(i));\n      } else {\n        ASSERT_FALSE(res->contains(i));\n      }\n    }\n\n    res = indexer->search(\"false\", CompareOp::NE);\n    ASSERT_EQ(res->count(), num_docs_ / 2);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 2 == 0) {\n        ASSERT_TRUE(res->contains(i));\n      } else {\n        ASSERT_FALSE(res->contains(i));\n      }\n    }\n  }\n\n\n  void insert_bool_arrays(InvertedColumnIndexer::Ptr indexer) {\n    auto insert_func = [&](uint32_t start, uint32_t end) {\n      Status s;\n      for (uint32_t i = start; i < end; ++i) {\n        auto v = generate_bool_array(i);\n        s = indexer->insert(i, v);\n        ASSERT_TRUE(s.ok());\n      }\n    };\n\n    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;\n    std::vector<std::thread> threads{};\n    for (uint32_t t = 0; t < num_write_threads_; ++t) {\n      threads.emplace_back(insert_func, t * num_docs_per_thread,\n                           (t + 1) * num_docs_per_thread);\n    }\n    for (auto &t : threads) {\n      t.join();\n    }\n  }\n\n\n  void verify_bool_arrays(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res;\n    res = indexer->multi_search({\"true\"}, CompareOp::CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 8);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 10 == 4 || i % 10 == 7) {\n        ASSERT_FALSE(res->contains(i));\n      } else {\n        ASSERT_TRUE(res->contains(i));\n      }\n    }\n\n    res = indexer->multi_search({\"true\"}, CompareOp::CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 8);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 10 == 4 || i % 10 == 7) {\n        ASSERT_FALSE(res->contains(i));\n      } else {\n        ASSERT_TRUE(res->contains(i));\n      }\n    }\n\n    res = indexer->multi_search({\"true\", \"false\"}, CompareOp::CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 4);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 10 == 2 || i % 10 == 5 || i % 10 == 8 || i % 10 == 9) {\n        ASSERT_TRUE(res->contains(i));\n      } else {\n        ASSERT_FALSE(res->contains(i));\n      }\n    }\n\n    res = indexer->multi_search({\"true\", \"false\"}, CompareOp::CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_);\n\n    res = indexer->search_array_len(1, CompareOp::EQ);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10);\n    res = indexer->search_array_len(2, CompareOp::EQ);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 2);\n    res = indexer->search_array_len(3, CompareOp::EQ);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 3);\n    res = indexer->search_array_len(4, CompareOp::EQ);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 4);\n\n    res = indexer->search_array_len(5, CompareOp::NE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_);\n    res = indexer->search_array_len(3, CompareOp::NE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 7);\n\n    res = indexer->search_array_len(1, CompareOp::LT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n    res = indexer->search_array_len(1, CompareOp::LE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10);\n    res = indexer->search_array_len(4, CompareOp::LT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 6);\n    res = indexer->search_array_len(4, CompareOp::LE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_);\n\n    res = indexer->search_array_len(1, CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 9);\n    res = indexer->search_array_len(1, CompareOp::GE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_);\n    res = indexer->search_array_len(4, CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n    res = indexer->search_array_len(4, CompareOp::GE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10 * 4);\n  }\n\n\n private:\n  bool generate_bool(uint32_t doc_id) {\n    if (doc_id % 2 == 0) {\n      return true;\n    } else {\n      return false;\n    }\n  }\n\n\n  std::vector<bool> generate_bool_array(uint32_t doc_id) {\n    switch (doc_id % 10) {\n      case 0:\n        return {true};\n      case 1:\n        return {true, true};\n      case 2:\n        return {true, false};\n      case 3:\n        return {true, true, true};\n      case 4:\n        return {false, false, false};\n      case 5:\n        return {false, true, false};\n      case 6:\n        return {true, true, true, true};\n      case 7:\n        return {false, false, false, false};\n      case 8:\n        return {true, false, true, false};\n      case 9:\n        return {false, true, false, true};\n      default:\n        return {};\n    }\n  }\n\n\n private:\n  const uint32_t num_docs_;\n  const uint32_t num_write_threads_;\n};\n\n\n/**\n *\n * @brief Unit tests for the InvertedColumnIndexer implementation.\n *\n */\nclass InvertedIndexTest : public testing::Test {\n  /*****  Global initialization and cleanup - Start  *****/\n public:\n  static void SetUpTestCase() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n\n    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,\n                                              true, {}, false);\n\n    params_ = std::make_shared<InvertIndexParams>(true);\n  }\n\n  static void TearDownTestCase() {\n    indexer_.reset();\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n  }\n  /*****  Global initialization and cleanup - End  *****/\n\n\n  /*****  Per-test initialization and cleanup - Start  *****/\n protected:\n  void SetUp() override {}\n\n  void TearDown() override {}\n  /*****  Per-test initialization and cleanup - End  *****/\n\n\n protected:\n  static InvertedIndexer::Ptr indexer_;\n  static TestHelper test_helper_;\n  static IndexParams::Ptr params_;\n};\n\n\nInvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};\nTestHelper InvertedIndexTest::test_helper_{100000, 10};\nIndexParams::Ptr InvertedIndexTest::params_{nullptr};\n\n\n/*\n *\n * Test Cases\n *\n */\nTEST_F(InvertedIndexTest, BOOLS) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema test_bool{\"test_bool\", DataType::BOOL, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(test_bool).ok());\n  auto indexer_bool = (*indexer_)[\"test_bool\"];\n  ASSERT_TRUE(indexer_bool);\n  test_helper_.insert_bools(indexer_bool);\n  test_helper_.verify_bools(indexer_bool);\n}\n\n\nTEST_F(InvertedIndexTest, BOOL_ARRAYS) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema test_bool_array{\"test_bool_array\", DataType::ARRAY_BOOL, true,\n                              params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(test_bool_array).ok());\n  auto indexer_bool_array = (*indexer_)[\"test_bool_array\"];\n  ASSERT_TRUE(indexer_bool_array);\n  test_helper_.insert_bool_arrays(indexer_bool_array);\n  test_helper_.verify_bool_arrays(indexer_bool_array);\n}\n\n\nTEST_F(InvertedIndexTest, SEALED) {\n  ASSERT_TRUE(indexer_);\n  ASSERT_TRUE(indexer_->seal().ok());\n\n  auto indexer_bool = (*indexer_)[\"test_bool\"];\n  ASSERT_TRUE(indexer_bool);\n  test_helper_.verify_bools(indexer_bool);\n\n  auto indexer_bool_array = (*indexer_)[\"test_bool_array\"];\n  ASSERT_TRUE(indexer_bool_array);\n  test_helper_.verify_bool_arrays(indexer_bool_array);\n}\n\n\nTEST_F(InvertedIndexTest, SNAPSHOT) {\n  ASSERT_TRUE(indexer_);\n\n  ASSERT_TRUE(indexer_->create_snapshot(working_dir + \"snapshot\").ok());\n\n  FieldSchema test_bool{\"test_bool\", DataType::BOOL, true, params_};\n  FieldSchema test_bool_array{\"test_bool_array\", DataType::ARRAY_BOOL, true,\n                              params_};\n\n  auto snapshot_indexer =\n      InvertedIndexer::CreateAndOpen(collection_name, working_dir + \"snapshot\",\n                                     false, {test_bool, test_bool_array}, true);\n  ASSERT_TRUE(snapshot_indexer);\n\n  auto indexer_bool = (*snapshot_indexer)[\"test_bool\"];\n  ASSERT_TRUE(indexer_bool);\n  test_helper_.verify_bools(indexer_bool);\n\n  auto indexer_bool_array = (*snapshot_indexer)[\"test_bool_array\"];\n  ASSERT_TRUE(indexer_bool_array);\n  test_helper_.verify_bool_arrays(indexer_bool_array);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/column/inverted_column/inverted_column_indexer_cyclic_numbers_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <random>\n#include <gtest/gtest.h>\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec;\nusing File = ailego::File;\n\n\nconst std::string working_dir{\"./inverted_column_indexer_cyclic_numbers_dir/\"};\nconst std::string collection_name{\"test_collection\"};\n\n\n/**\n * @brief A helper class for testing the InvertedColumnIndexer implementation.\n *\n * This class generates test data with specific patterns to verify the\n * correctness of the inverted index implementation. It provides various methods\n * to populate an InvertedColumnIndexer with predictable data patterns and\n * verify that the indexing and search operations work correctly.\n *\n */\nclass TestHelper {\n public:\n  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)\n      : num_docs_(num_docs / 100 * 100),\n        num_write_threads_(num_write_threads) {};\n\n\n  template <typename T>\n  void insert_cyclic_numbers(InvertedColumnIndexer::Ptr indexer,\n                             bool include_nulls) {\n    auto insert_func = [&](uint32_t start, uint32_t end) {\n      Status s;\n      for (uint32_t i = start; i < end; ++i) {\n        T v = generate_cyclic_number<T>(i);\n        if (include_nulls && i % 100 == 0) {  // Null value for every 100th doc\n          s = indexer->insert_null(i);\n        } else {\n          s = indexer->insert(i, std::string((char *)&v, sizeof(T)));\n        }\n        ASSERT_TRUE(s.ok());\n      }\n    };\n\n    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;\n    std::vector<std::thread> threads{};\n    for (uint32_t t = 0; t < num_write_threads_; ++t) {\n      threads.emplace_back(insert_func, t * num_docs_per_thread,\n                           (t + 1) * num_docs_per_thread);\n    }\n    for (auto &t : threads) {\n      t.join();\n    }\n  }\n\n\n  template <typename T>\n  void verify_cyclic_numbers(InvertedColumnIndexer::Ptr indexer,\n                             bool include_nulls) {\n    verify_cyclic_numbers_eq_ne<T>(indexer, include_nulls);\n    verify_cyclic_numbers_range<T>(indexer, include_nulls);\n    if (include_nulls) {\n      verify_cyclic_numbers_null<T>(indexer);\n    }\n  }\n\n\n  template <typename T>\n  void verify_cyclic_numbers_eq_ne(InvertedColumnIndexer::Ptr indexer,\n                                   bool include_nulls) {\n    InvertedSearchResult::Ptr res;\n    // Test EQ operator\n    for (uint32_t i = 0; i < num_docs_ / 100; ++i) {\n      uint32_t first_doc_in_cycle = i * 100;\n      // Search for the first value in this 100-doc cycle\n      T v = generate_cyclic_number<T>(first_doc_in_cycle);\n      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);\n      ASSERT_TRUE(res);\n      if (include_nulls) {\n        ASSERT_EQ(res->count(), 9);\n        for (uint32_t j = 1; j < 10; ++j) {\n          ASSERT_TRUE(res->contains(first_doc_in_cycle + j * 10));\n        }\n      } else {\n        ASSERT_EQ(res->count(), 10);\n        for (uint32_t j = 0; j < 10; ++j) {\n          ASSERT_TRUE(res->contains(first_doc_in_cycle + j * 10));\n        }\n      }\n      // Search for the 4th value in this 100-doc cycle\n      v = generate_cyclic_number<T>(first_doc_in_cycle + 3);\n      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);\n      ASSERT_TRUE(res);\n      ASSERT_EQ(res->count(), 10);\n      for (uint32_t j = 0; j < 10; ++j) {\n        ASSERT_TRUE(res->contains(first_doc_in_cycle + 3 + j * 10));\n      }\n      // Search for an non-existent value\n      v = first_doc_in_cycle + 11;\n      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);\n      ASSERT_TRUE(res);\n      ASSERT_EQ(res->count(), 0);\n    }\n\n    // Test NE operator with a non-existent value\n    T v = generate_cyclic_number<T>(num_docs_);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n\n    // Test NE operator with a random value\n    static std::random_device rd;\n    static std::mt19937 gen(rd());\n    std::uniform_int_distribution<uint32_t> dis(0, num_docs_ / 100 - 1);\n    uint32_t random_cycle = dis(gen);\n    v = generate_cyclic_number<T>(random_cycle * 100 + 1);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < random_cycle * 100; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    for (uint32_t id = random_cycle * 100; id < (random_cycle + 1) * 100;\n         ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else if (id % 10 == 1) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    for (uint32_t id = (random_cycle + 1) * 100; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n  }\n\n\n  template <typename T>\n  void verify_cyclic_numbers_range(InvertedColumnIndexer::Ptr indexer,\n                                   bool include_nulls) {\n    InvertedSearchResult::Ptr res;\n    T v = generate_cyclic_number<T>(0);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), 9);\n    } else {\n      ASSERT_EQ(res->count(), 10);\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 9);\n    } else {\n      ASSERT_EQ(res->count(), num_docs_ - 10);\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100));\n    } else {\n      ASSERT_EQ(res->count(), num_docs_);\n    }\n\n\n    uint32_t middle_cycle = num_docs_ / 100 / 2;\n    v = generate_cyclic_number<T>(middle_cycle * 100 + 1);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;\n         ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else if (id % 10 < 1) {\n        ASSERT_TRUE(res->contains(id));\n      } else {\n        ASSERT_FALSE(res->contains(id));\n      }\n    }\n    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {\n      ASSERT_FALSE(res->contains(id));\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;\n         ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else if (id % 10 <= 1) {\n        ASSERT_TRUE(res->contains(id));\n      } else {\n        ASSERT_FALSE(res->contains(id));\n      }\n    }\n    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {\n      ASSERT_FALSE(res->contains(id));\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {\n      ASSERT_FALSE(res->contains(id));\n    }\n    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;\n         ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else if (id % 10 > 1) {\n        ASSERT_TRUE(res->contains(id));\n      } else {\n        ASSERT_FALSE(res->contains(id));\n      }\n    }\n    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {\n      ASSERT_FALSE(res->contains(id));\n    }\n    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;\n         ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else if (id % 10 >= 1) {\n        ASSERT_TRUE(res->contains(id));\n      } else {\n        ASSERT_FALSE(res->contains(id));\n      }\n    }\n    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n\n\n    v = generate_cyclic_number<T>(num_docs_ - 1);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 10);\n    } else {\n      ASSERT_EQ(res->count(), num_docs_ - 10);\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100));\n    } else {\n      ASSERT_EQ(res->count(), num_docs_);\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 10);\n  }\n\n\n  template <typename T>\n  void verify_cyclic_numbers_null(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res = indexer->search_null();\n    ASSERT_TRUE(res);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 100 == 0) {\n        ASSERT_TRUE(res->contains(i));\n      } else {\n        ASSERT_FALSE(res->contains(i));\n      }\n    }\n\n    res = indexer->search_non_null();\n    ASSERT_TRUE(res);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 100 == 0) {\n        ASSERT_FALSE(res->contains(i));\n      } else {\n        ASSERT_TRUE(res->contains(i));\n      }\n    }\n  }\n\n\n private:\n  template <typename T>\n  T generate_cyclic_number(uint32_t doc_id) {\n    // Creates a pattern where every 100 consecutive document IDs share a cycle\n    // of 10 distinct values.\n    // E.g., for int32_t,[id: 304, value: 304], [id: 315, value: 305];\n    // for float, [id: 101, value: 101.666], [id: 112, value: 102.666]\n    double num_double = (uint32_t)(doc_id / 100) * 100 + doc_id % 10 + 0.666;\n    T num = num_double;\n    return num;\n  }\n\n\n private:\n  const uint32_t num_docs_;\n  const uint32_t num_write_threads_;\n};\n\n\n/**\n *\n * @brief Unit tests for the InvertedColumnIndexer implementation.\n *\n */\nclass InvertedIndexTest : public testing::Test {\n  /*****  Global initialization and cleanup - Start  *****/\n public:\n  static void SetUpTestCase() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n\n    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,\n                                              true, {}, false);\n\n    params_ = std::make_shared<InvertIndexParams>(true);\n  }\n\n  static void TearDownTestCase() {\n    indexer_.reset();\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n  }\n  /*****  Global initialization and cleanup - End  *****/\n\n\n  /*****  Per-test initialization and cleanup - Start  *****/\n protected:\n  void SetUp() override {}\n\n  void TearDown() override {}\n  /*****  Per-test initialization and cleanup - End  *****/\n\n\n protected:\n  static InvertedIndexer::Ptr indexer_;\n  static TestHelper test_helper_;\n  static IndexParams::Ptr params_;\n};\n\n\nInvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};\nTestHelper InvertedIndexTest::test_helper_{100000, 10};\nIndexParams::Ptr InvertedIndexTest::params_{nullptr};\n\n\n/*\n *\n * Test Cases\n *\n */\nTEST_F(InvertedIndexTest, CYCLIC_NUMBERS_INT32) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema cyclic_int32{\"cyclic_int32\", DataType::INT32, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int32).ok());\n  auto indexer_int32 = (*indexer_)[\"cyclic_int32\"];\n  ASSERT_TRUE(indexer_int32);\n  test_helper_.insert_cyclic_numbers<int32_t>(indexer_int32, false);\n  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32, false);\n\n  FieldSchema cyclic_int32_w_null{\"cyclic_int32_w_null\", DataType::INT32, true,\n                                  params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int32_w_null).ok());\n  auto indexer_int32_w_null = (*indexer_)[\"cyclic_int32_w_null\"];\n  ASSERT_TRUE(indexer_int32_w_null);\n  test_helper_.insert_cyclic_numbers<int32_t>(indexer_int32_w_null, true);\n  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, CYCLIC_NUMBERS_INT64) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema cyclic_int64{\"cyclic_int64\", DataType::INT64, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int64).ok());\n  auto indexer_int64 = (*indexer_)[\"cyclic_int64\"];\n  ASSERT_TRUE(indexer_int64);\n  test_helper_.insert_cyclic_numbers<int64_t>(indexer_int64, false);\n  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64, false);\n\n  FieldSchema cyclic_int64_w_null{\"cyclic_int64_w_null\", DataType::INT64, true,\n                                  params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int64_w_null).ok());\n  auto indexer_int64_w_null = (*indexer_)[\"cyclic_int64_w_null\"];\n  ASSERT_TRUE(indexer_int64_w_null);\n  test_helper_.insert_cyclic_numbers<int64_t>(indexer_int64_w_null, true);\n  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, CYCLIC_NUMBERS_UINT32) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema cyclic_uint32{\"cyclic_uint32\", DataType::UINT32, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint32).ok());\n  auto indexer_uint32 = (*indexer_)[\"cyclic_uint32\"];\n  ASSERT_TRUE(indexer_uint32);\n  test_helper_.insert_cyclic_numbers<uint32_t>(indexer_uint32, false);\n  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32, false);\n\n  FieldSchema cyclic_uint32_w_null{\"cyclic_uint32_w_null\", DataType::UINT32,\n                                   true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint32_w_null).ok());\n  auto indexer_uint32_w_null = (*indexer_)[\"cyclic_uint32_w_null\"];\n  ASSERT_TRUE(indexer_uint32_w_null);\n  test_helper_.insert_cyclic_numbers<uint32_t>(indexer_uint32_w_null, true);\n  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, CYCLIC_NUMBERS_UINT64) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema cyclic_uint64{\"cyclic_uint64\", DataType::UINT64, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint64).ok());\n  auto indexer_uint64 = (*indexer_)[\"cyclic_uint64\"];\n  ASSERT_TRUE(indexer_uint64);\n  test_helper_.insert_cyclic_numbers<uint64_t>(indexer_uint64, false);\n  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64, false);\n\n  FieldSchema cyclic_uint64_w_null{\"cyclic_uint64_w_null\", DataType::UINT64,\n                                   true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint64_w_null).ok());\n  auto indexer_uint64_w_null = (*indexer_)[\"cyclic_uint64_w_null\"];\n  ASSERT_TRUE(indexer_uint64_w_null);\n  test_helper_.insert_cyclic_numbers<uint64_t>(indexer_uint64_w_null, true);\n  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, CYCLIC_NUMBERS_FLOAT) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema cyclic_float{\"cyclic_float\", DataType::FLOAT, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_float).ok());\n  auto indexer_float = (*indexer_)[\"cyclic_float\"];\n  ASSERT_TRUE(indexer_float);\n  test_helper_.insert_cyclic_numbers<float>(indexer_float, false);\n  test_helper_.verify_cyclic_numbers<float>(indexer_float, false);\n\n  FieldSchema cyclic_float_w_null{\"cyclic_float_w_null\", DataType::FLOAT, true,\n                                  params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_float_w_null).ok());\n  auto indexer_float_w_null = (*indexer_)[\"cyclic_float_w_null\"];\n  ASSERT_TRUE(indexer_float_w_null);\n  test_helper_.insert_cyclic_numbers<float>(indexer_float_w_null, true);\n  test_helper_.verify_cyclic_numbers<float>(indexer_float_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, CYCLIC_NUMBERS_DOUBLE) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema cyclic_double{\"cyclic_double\", DataType::DOUBLE, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_double).ok());\n  auto indexer_double = (*indexer_)[\"cyclic_double\"];\n  ASSERT_TRUE(indexer_double);\n  test_helper_.insert_cyclic_numbers<double>(indexer_double, false);\n  test_helper_.verify_cyclic_numbers<double>(indexer_double, false);\n\n  FieldSchema cyclic_double_w_null{\"cyclic_double_w_null\", DataType::DOUBLE,\n                                   true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_double_w_null).ok());\n  auto indexer_double_w_null = (*indexer_)[\"cyclic_double_w_null\"];\n  ASSERT_TRUE(indexer_double_w_null);\n  test_helper_.insert_cyclic_numbers<double>(indexer_double_w_null, true);\n  test_helper_.verify_cyclic_numbers<double>(indexer_double_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, SEALED) {\n  ASSERT_TRUE(indexer_);\n\n  ASSERT_TRUE(indexer_->seal().ok());\n\n  auto indexer_int32 = (*indexer_)[\"cyclic_int32\"];\n  ASSERT_TRUE(indexer_int32);\n  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32, false);\n\n  auto indexer_int32_w_null = (*indexer_)[\"cyclic_int32_w_null\"];\n  ASSERT_TRUE(indexer_int32_w_null);\n  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32_w_null, true);\n\n  auto indexer_int64 = (*indexer_)[\"cyclic_int64\"];\n  ASSERT_TRUE(indexer_int64);\n  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64, false);\n\n  auto indexer_int64_w_null = (*indexer_)[\"cyclic_int64_w_null\"];\n  ASSERT_TRUE(indexer_int64_w_null);\n  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64_w_null, true);\n\n  auto indexer_uint32 = (*indexer_)[\"cyclic_uint32\"];\n  ASSERT_TRUE(indexer_uint32);\n  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32, false);\n\n  auto indexer_uint32_w_null = (*indexer_)[\"cyclic_uint32_w_null\"];\n  ASSERT_TRUE(indexer_uint32_w_null);\n  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32_w_null, true);\n\n  auto indexer_uint64 = (*indexer_)[\"cyclic_uint64\"];\n  ASSERT_TRUE(indexer_uint64);\n  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64, false);\n\n  auto indexer_uint64_w_null = (*indexer_)[\"cyclic_uint64_w_null\"];\n  ASSERT_TRUE(indexer_uint64_w_null);\n  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64_w_null, true);\n\n  auto indexer_float = (*indexer_)[\"cyclic_float\"];\n  ASSERT_TRUE(indexer_float);\n  test_helper_.verify_cyclic_numbers<float>(indexer_float, false);\n\n  auto indexer_float_w_null = (*indexer_)[\"cyclic_float_w_null\"];\n  ASSERT_TRUE(indexer_float_w_null);\n  test_helper_.verify_cyclic_numbers<float>(indexer_float_w_null, true);\n\n  auto indexer_double = (*indexer_)[\"cyclic_double\"];\n  ASSERT_TRUE(indexer_double);\n  test_helper_.verify_cyclic_numbers<double>(indexer_double, false);\n\n  auto indexer_double_w_null = (*indexer_)[\"cyclic_double_w_null\"];\n  ASSERT_TRUE(indexer_double_w_null);\n  test_helper_.verify_cyclic_numbers<double>(indexer_double_w_null, true);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/column/inverted_column/inverted_column_indexer_sequential_numbers_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <random>\n#include <gtest/gtest.h>\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec;\nusing File = ailego::File;\n\n\nconst std::string working_dir{\n    \"./inverted_column_indexer_sequential_numbers_dir/\"};\nconst std::string collection_name{\"test_collection\"};\n\n\n/**\n * @brief A helper class for testing the InvertedColumnIndexer implementation.\n *\n * This class generates test data with specific patterns to verify the\n * correctness of the inverted index implementation. It provides various methods\n * to populate an InvertedColumnIndexer with predictable data patterns and\n * verify that the indexing and search operations work correctly.\n *\n */\nclass TestHelper {\n public:\n  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)\n      : num_docs_(num_docs / 100 * 100),\n        num_write_threads_(num_write_threads) {};\n\n\n  template <typename T>\n  void insert_sequential_numbers(InvertedColumnIndexer::Ptr indexer,\n                                 bool include_nulls) {\n    auto insert_func = [&](uint32_t start, uint32_t end) {\n      Status s;\n      for (uint32_t i = start; i < end; ++i) {\n        T v = generate_sequential_number<T>(i);\n        if (include_nulls && i % 100 == 0) {  // Null value for every 100th doc\n          s = indexer->insert_null(i);\n        } else {\n          s = indexer->insert(i, std::string((char *)&v, sizeof(T)));\n        }\n        ASSERT_TRUE(s.ok());\n      }\n    };\n\n    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;\n    std::vector<std::thread> threads{};\n    for (uint32_t t = 0; t < num_write_threads_; ++t) {\n      threads.emplace_back(insert_func, t * num_docs_per_thread,\n                           (t + 1) * num_docs_per_thread);\n    }\n    for (auto &t : threads) {\n      t.join();\n    }\n  }\n\n\n  template <typename T>\n  void verify_sequential_numbers(InvertedColumnIndexer::Ptr indexer,\n                                 bool include_nulls) {\n    verify_sequential_numbers_eq_ne<T>(indexer, include_nulls);\n    verify_sequential_numbers_range_less<T>(indexer, include_nulls);\n    verify_sequential_numbers_range_greater<T>(indexer, include_nulls);\n    if (include_nulls) {\n      verify_sequential_numbers_null<T>(indexer);\n    }\n    if (indexer->is_sealed()) {\n      verify_sequential_numbers_range_ratio<T>(indexer, include_nulls);\n    }\n  }\n\n\n  template <typename T>\n  void verify_sequential_numbers_eq_ne(InvertedColumnIndexer::Ptr indexer,\n                                       bool include_nulls) {\n    InvertedSearchResult::Ptr res;\n    // Test EQ operator\n    for (uint32_t id = 0; id < num_docs_; ++id) {\n      T v = generate_sequential_number<T>(id);\n      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);\n      ASSERT_TRUE(res);\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_EQ(res->count(), 0);\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_EQ(res->count(), 1);\n        ASSERT_TRUE(res->contains(id));\n        auto it = res->create_iterator();\n        ASSERT_EQ(it->doc_id(), id);\n        it->next();\n        ASSERT_FALSE(it->valid());\n      }\n    }\n\n    // Test NE operator with a non-existent value\n    T v = generate_sequential_number<T>(num_docs_);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      for (uint32_t id = 0; id < num_docs_; ++id) {\n        if (id % 100 == 0) {\n          ASSERT_FALSE(res->contains(id));\n        } else {\n          ASSERT_TRUE(res->contains(id));\n        }\n      }\n    } else {\n      ASSERT_EQ(res->count(), num_docs_);\n      auto it = res->create_iterator();\n      for (uint32_t id = 0; id < num_docs_; ++id) {\n        ASSERT_TRUE(res->contains(id));\n        ASSERT_EQ(it->doc_id(), id);\n        it->next();\n      }\n      ASSERT_FALSE(it->valid());\n    }\n\n    // Test NE operator with a random value\n    static std::random_device rd;\n    static std::mt19937 gen(rd());\n    std::uniform_int_distribution<uint32_t> dis(0, num_docs_ - 1);\n    uint32_t num_random = dis(gen);\n    v = generate_sequential_number<T>(num_random);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else if (id == num_random) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n  }\n\n\n  template <typename T>\n  void verify_sequential_numbers_range_less(InvertedColumnIndexer::Ptr indexer,\n                                            bool include_nulls) {\n    T v = generate_sequential_number<T>(0);\n    auto res =\n        indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n    ASSERT_FALSE(res->contains(0));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), 0);\n      ASSERT_FALSE(res->contains(0));\n    } else {\n      ASSERT_EQ(res->count(), 1);\n      ASSERT_TRUE(res->contains(0));\n      ASSERT_FALSE(res->contains(1));\n    }\n\n    v = generate_sequential_number<T>(1);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), 0);\n      ASSERT_FALSE(res->contains(0));\n    } else {\n      ASSERT_EQ(res->count(), 1);\n      ASSERT_TRUE(res->contains(0));\n      ASSERT_FALSE(res->contains(1));\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    if (include_nulls) {\n      ASSERT_EQ(res->count(), 1);\n      ASSERT_FALSE(res->contains(0));\n      ASSERT_TRUE(res->contains(1));\n      ASSERT_FALSE(res->contains(2));\n    } else {\n      ASSERT_EQ(res->count(), 2);\n      ASSERT_TRUE(res->contains(0));\n      ASSERT_TRUE(res->contains(1));\n      ASSERT_FALSE(res->contains(2));\n    }\n\n    v = generate_sequential_number<T>(num_docs_ / 10);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_ / 10; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 10));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_ / 10 + 1; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 10 + 1));\n\n    v = generate_sequential_number<T>(num_docs_ / 2);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_ / 2; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 2));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_ / 2 + 1; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 2 + 1));\n\n    v = generate_sequential_number<T>(num_docs_ - 1);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_ - 1; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ - 1));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_));\n\n    v = generate_sequential_number<T>(num_docs_);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_));\n  }\n\n\n  template <typename T>\n  void verify_sequential_numbers_range_greater(\n      InvertedColumnIndexer::Ptr indexer, bool include_nulls) {\n    T v = generate_sequential_number<T>(0);\n    auto res =\n        indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_FALSE(res->contains(0));\n    for (uint32_t id = 1; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = 0; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n\n    v = generate_sequential_number<T>(1);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_FALSE(res->contains(0));\n    ASSERT_FALSE(res->contains(1));\n    for (uint32_t id = 2; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    ASSERT_FALSE(res->contains(0));\n    for (uint32_t id = 1; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n\n    v = generate_sequential_number<T>(num_docs_ / 10);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = num_docs_ / 10 + 1; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 10));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = num_docs_ / 10; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 10 - 1));\n\n    v = generate_sequential_number<T>(num_docs_ / 2);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    for (uint32_t id = num_docs_ / 2 + 1; id < num_docs_; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 2));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    for (uint32_t id = num_docs_ / 2; id < num_docs_ / 2; ++id) {\n      if (include_nulls && id % 100 == 0) {\n        ASSERT_FALSE(res->contains(id));\n      } else {\n        ASSERT_TRUE(res->contains(id));\n      }\n    }\n    ASSERT_FALSE(res->contains(num_docs_ / 2 - 1));\n\n    v = generate_sequential_number<T>(num_docs_ - 1);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_FALSE(res->contains(num_docs_ - 1));\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    ASSERT_TRUE(res->contains(num_docs_ - 1));\n    ASSERT_FALSE(res->contains(num_docs_));\n\n    v = generate_sequential_number<T>(num_docs_);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n  }\n\n\n  template <typename T>\n  void verify_sequential_numbers_null(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res = indexer->search_null();\n    ASSERT_TRUE(res);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 100 == 0) {\n        ASSERT_TRUE(res->contains(i));\n      } else {\n        ASSERT_FALSE(res->contains(i));\n      }\n    }\n\n    res = indexer->search_non_null();\n    ASSERT_TRUE(res);\n    for (uint32_t i = 0; i < num_docs_; ++i) {\n      if (i % 100 == 0) {\n        ASSERT_FALSE(res->contains(i));\n      } else {\n        ASSERT_TRUE(res->contains(i));\n      }\n    }\n  }\n\n\n  template <typename T>\n  void verify_sequential_numbers_range_ratio(InvertedColumnIndexer::Ptr indexer,\n                                             bool include_nulls) {\n    uint64_t total_size, range_size;\n    T v = generate_sequential_number<T>(num_docs_ / 10);\n    auto s = indexer->evaluate_ratio(std::string((char *)&v, sizeof(T)),\n                                     CompareOp::LT, &total_size, &range_size);\n    ASSERT_TRUE(s.ok());\n    if (include_nulls) {\n      ASSERT_EQ(total_size, num_docs_ - num_docs_ / 100);\n      ASSERT_LE(range_size, num_docs_ / 10 * 2);\n    } else {\n      ASSERT_EQ(total_size, num_docs_);\n      ASSERT_LE(range_size, num_docs_ / 10 * 2);\n    }\n\n    s = indexer->evaluate_ratio(std::string((char *)&v, sizeof(T)),\n                                CompareOp::GT, &total_size, &range_size);\n    ASSERT_TRUE(s.ok());\n    if (include_nulls) {\n      ASSERT_EQ(total_size, num_docs_ - num_docs_ / 100);\n      ASSERT_GE(range_size, num_docs_ / 10 * 8);\n    } else {\n      ASSERT_EQ(total_size, num_docs_);\n      ASSERT_GE(range_size, num_docs_ / 10 * 8);\n    }\n  }\n\n\n private:\n  template <typename T>\n  T generate_sequential_number(uint32_t doc_id) {\n    // E.g., for int32_t, [id: 5, value: 5]; for float, [id: 5, value: 5.333]\n    double num_double = doc_id + 0.333;\n    T num = num_double;\n    return num;\n  }\n\n\n private:\n  const uint32_t num_docs_;\n  const uint32_t num_write_threads_;\n};\n\n\n/**\n *\n * @brief Unit tests for the InvertedColumnIndexer implementation.\n *\n */\nclass InvertedIndexTest : public testing::Test {\n  /*****  Global initialization and cleanup - Start  *****/\n public:\n  static void SetUpTestCase() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n\n    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,\n                                              true, {}, false);\n\n    params_ = std::make_shared<InvertIndexParams>(true);\n  }\n\n  static void TearDownTestCase() {\n    indexer_.reset();\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n  }\n  /*****  Global initialization and cleanup - End  *****/\n\n\n  /*****  Per-test initialization and cleanup - Start  *****/\n protected:\n  void SetUp() override {}\n\n  void TearDown() override {}\n  /*****  Per-test initialization and cleanup - End  *****/\n\n\n protected:\n  static InvertedIndexer::Ptr indexer_;\n  static TestHelper test_helper_;\n  static IndexParams::Ptr params_;\n};\n\n\nInvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};\nTestHelper InvertedIndexTest::test_helper_{100000, 10};\nIndexParams::Ptr InvertedIndexTest::params_{nullptr};\n\n\n/*\n *\n * Test Cases\n *\n */\nTEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_INT32) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema seq_int32{\"seq_int32\", DataType::INT32, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_int32).ok());\n  auto indexer_int32 = (*indexer_)[\"seq_int32\"];\n  ASSERT_TRUE(indexer_int32);\n  test_helper_.insert_sequential_numbers<int32_t>(indexer_int32, false);\n  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32, false);\n\n  FieldSchema seq_int32_w_null{\"seq_int32_w_null\", DataType::INT32, true,\n                               params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_int32_w_null).ok());\n  auto indexer_int32_w_null = (*indexer_)[\"seq_int32_w_null\"];\n  ASSERT_TRUE(indexer_int32_w_null);\n  test_helper_.insert_sequential_numbers<int32_t>(indexer_int32_w_null, true);\n  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_INT64) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema seq_int64{\"seq_int64\", DataType::INT64, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_int64).ok());\n  auto indexer_int64 = (*indexer_)[\"seq_int64\"];\n  ASSERT_TRUE(indexer_int64);\n  test_helper_.insert_sequential_numbers<int64_t>(indexer_int64, false);\n  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64, false);\n\n  FieldSchema seq_int64_w_null{\"seq_int64_w_null\", DataType::INT64, true,\n                               params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_int64_w_null).ok());\n  auto indexer_int64_w_null = (*indexer_)[\"seq_int64_w_null\"];\n  ASSERT_TRUE(indexer_int64_w_null);\n  test_helper_.insert_sequential_numbers<int64_t>(indexer_int64_w_null, true);\n  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_UINT32) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema seq_uint32{\"seq_uint32\", DataType::UINT32, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint32).ok());\n  auto indexer_uint32 = (*indexer_)[\"seq_uint32\"];\n  ASSERT_TRUE(indexer_uint32);\n  test_helper_.insert_sequential_numbers<uint32_t>(indexer_uint32, false);\n  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32, false);\n\n  FieldSchema seq_uint32_w_null{\"seq_uint32_w_null\", DataType::UINT32, true,\n                                params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint32_w_null).ok());\n  auto indexer_uint32_w_null = (*indexer_)[\"seq_uint32_w_null\"];\n  ASSERT_TRUE(indexer_uint32_w_null);\n  test_helper_.insert_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);\n  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_UINT64) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema seq_uint64{\"seq_uint64\", DataType::UINT64, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint64).ok());\n  auto indexer_uint64 = (*indexer_)[\"seq_uint64\"];\n  ASSERT_TRUE(indexer_uint64);\n  test_helper_.insert_sequential_numbers<uint64_t>(indexer_uint64, false);\n  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64, false);\n\n  FieldSchema seq_uint64_w_null{\"seq_uint64_w_null\", DataType::UINT64, true,\n                                params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint64_w_null).ok());\n  auto indexer_uint64_w_null = (*indexer_)[\"seq_uint64_w_null\"];\n  ASSERT_TRUE(indexer_uint64_w_null);\n  test_helper_.insert_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);\n  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_FLOAT) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema seq_float{\"seq_float\", DataType::FLOAT, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_float).ok());\n  auto indexer_float = (*indexer_)[\"seq_float\"];\n  ASSERT_TRUE(indexer_float);\n  test_helper_.insert_sequential_numbers<float>(indexer_float, false);\n  test_helper_.verify_sequential_numbers<float>(indexer_float, false);\n\n  FieldSchema seq_float_w_null{\"seq_float_w_null\", DataType::FLOAT, true,\n                               params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_float_w_null).ok());\n  auto indexer_float_w_null = (*indexer_)[\"seq_float_w_null\"];\n  ASSERT_TRUE(indexer_float_w_null);\n  test_helper_.insert_sequential_numbers<float>(indexer_float_w_null, true);\n  test_helper_.verify_sequential_numbers<float>(indexer_float_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_DOUBLE) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema seq_double{\"seq_double\", DataType::DOUBLE, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_double).ok());\n  auto indexer_double = (*indexer_)[\"seq_double\"];\n  ASSERT_TRUE(indexer_double);\n  test_helper_.insert_sequential_numbers<double>(indexer_double, false);\n  test_helper_.verify_sequential_numbers<double>(indexer_double, false);\n\n  FieldSchema seq_double_w_null{\"seq_double_w_null\", DataType::DOUBLE, true,\n                                params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(seq_double_w_null).ok());\n  auto indexer_double_w_null = (*indexer_)[\"seq_double_w_null\"];\n  ASSERT_TRUE(indexer_double_w_null);\n  test_helper_.insert_sequential_numbers<double>(indexer_double_w_null, true);\n  test_helper_.verify_sequential_numbers<double>(indexer_double_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, SEALED) {\n  ASSERT_TRUE(indexer_);\n\n  ASSERT_TRUE(indexer_->seal().ok());\n\n  auto indexer_int32 = (*indexer_)[\"seq_int32\"];\n  ASSERT_TRUE(indexer_int32);\n  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32, false);\n\n  auto indexer_int32_w_null = (*indexer_)[\"seq_int32_w_null\"];\n  ASSERT_TRUE(indexer_int32_w_null);\n  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32_w_null, true);\n\n  auto indexer_int64 = (*indexer_)[\"seq_int64\"];\n  ASSERT_TRUE(indexer_int64);\n  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64, false);\n\n  auto indexer_int64_w_null = (*indexer_)[\"seq_int64_w_null\"];\n  ASSERT_TRUE(indexer_int64_w_null);\n  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64_w_null, true);\n\n  auto indexer_uint32 = (*indexer_)[\"seq_uint32\"];\n  ASSERT_TRUE(indexer_uint32);\n  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32, false);\n\n  auto indexer_uint32_w_null = (*indexer_)[\"seq_uint32_w_null\"];\n  ASSERT_TRUE(indexer_uint32_w_null);\n  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);\n\n  auto indexer_uint64 = (*indexer_)[\"seq_uint64\"];\n  ASSERT_TRUE(indexer_uint64);\n  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64, false);\n\n  auto indexer_uint64_w_null = (*indexer_)[\"seq_uint64_w_null\"];\n  ASSERT_TRUE(indexer_uint64_w_null);\n  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);\n\n  auto indexer_float = (*indexer_)[\"seq_float\"];\n  ASSERT_TRUE(indexer_float);\n  test_helper_.verify_sequential_numbers<float>(indexer_float, false);\n\n  auto indexer_float_w_null = (*indexer_)[\"seq_float_w_null\"];\n  ASSERT_TRUE(indexer_float_w_null);\n  test_helper_.verify_sequential_numbers<float>(indexer_float_w_null, true);\n\n  auto indexer_double = (*indexer_)[\"seq_double\"];\n  ASSERT_TRUE(indexer_double);\n  test_helper_.verify_sequential_numbers<double>(indexer_double, false);\n\n  auto indexer_double_w_null = (*indexer_)[\"seq_double_w_null\"];\n  ASSERT_TRUE(indexer_double_w_null);\n  test_helper_.verify_sequential_numbers<double>(indexer_double_w_null, true);\n}\n\n\nTEST_F(InvertedIndexTest, CREATE_SNAPSHOT) {\n  ASSERT_TRUE(indexer_);\n\n  std::string snapshot_dir = working_dir + \"snapshot\";\n  ASSERT_TRUE(indexer_->create_snapshot(snapshot_dir).ok());\n\n  std::vector<FieldSchema> fields = {\n      FieldSchema(\"seq_int32\", DataType::INT32, true, params_),\n      FieldSchema(\"seq_int32_w_null\", DataType::INT32, true, params_),\n      FieldSchema(\"seq_int64\", DataType::INT64, true, params_),\n      FieldSchema(\"seq_int64_w_null\", DataType::INT64, true, params_),\n      FieldSchema(\"seq_uint32\", DataType::UINT32, true, params_),\n      FieldSchema(\"seq_uint32_w_null\", DataType::UINT32, true, params_),\n      FieldSchema(\"seq_uint64\", DataType::UINT64, true, params_),\n      FieldSchema(\"seq_uint64_w_null\", DataType::UINT64, true, params_),\n      FieldSchema(\"seq_float\", DataType::FLOAT, true, params_),\n      FieldSchema(\"seq_float_w_null\", DataType::FLOAT, true, params_),\n      FieldSchema(\"seq_double\", DataType::DOUBLE, true, params_),\n      FieldSchema(\"seq_double_w_null\", DataType::DOUBLE, true, params_)};\n\n  auto snapshot_indexer = InvertedIndexer::CreateAndOpen(\n      \"snapshot\", snapshot_dir, false, fields, false);\n  ASSERT_TRUE(snapshot_indexer);\n\n  auto indexer_int32 = (*snapshot_indexer)[\"seq_int32\"];\n  ASSERT_TRUE(indexer_int32);\n  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32, false);\n\n  auto indexer_int32_w_null = (*snapshot_indexer)[\"seq_int32_w_null\"];\n  ASSERT_TRUE(indexer_int32_w_null);\n  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32_w_null, true);\n\n  auto indexer_int64 = (*snapshot_indexer)[\"seq_int64\"];\n  ASSERT_TRUE(indexer_int64);\n  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64, false);\n\n  auto indexer_int64_w_null = (*snapshot_indexer)[\"seq_int64_w_null\"];\n  ASSERT_TRUE(indexer_int64_w_null);\n  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64_w_null, true);\n\n  auto indexer_uint32 = (*snapshot_indexer)[\"seq_uint32\"];\n  ASSERT_TRUE(indexer_uint32);\n  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32, false);\n\n  auto indexer_uint32_w_null = (*snapshot_indexer)[\"seq_uint32_w_null\"];\n  ASSERT_TRUE(indexer_uint32_w_null);\n  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);\n\n  auto indexer_uint64 = (*snapshot_indexer)[\"seq_uint64\"];\n  ASSERT_TRUE(indexer_uint64);\n  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64, false);\n\n  auto indexer_uint64_w_null = (*snapshot_indexer)[\"seq_uint64_w_null\"];\n  ASSERT_TRUE(indexer_uint64_w_null);\n  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);\n\n  auto indexer_float = (*snapshot_indexer)[\"seq_float\"];\n  ASSERT_TRUE(indexer_float);\n  test_helper_.verify_sequential_numbers<float>(indexer_float, false);\n\n  auto indexer_float_w_null = (*snapshot_indexer)[\"seq_float_w_null\"];\n  ASSERT_TRUE(indexer_float_w_null);\n  test_helper_.verify_sequential_numbers<float>(indexer_float_w_null, true);\n\n  auto indexer_double = (*snapshot_indexer)[\"seq_double\"];\n  ASSERT_TRUE(indexer_double);\n  test_helper_.verify_sequential_numbers<double>(indexer_double, false);\n\n  auto indexer_double_w_null = (*snapshot_indexer)[\"seq_double_w_null\"];\n  ASSERT_TRUE(indexer_double_w_null);\n  test_helper_.verify_sequential_numbers<double>(indexer_double_w_null, true);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/column/inverted_column/inverted_column_indexer_string_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <random>\n#include <gtest/gtest.h>\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec;\nusing File = ailego::File;\n\n\nconst std::string working_dir{\"./inverted_column_indexer_string_dir/\"};\nconst std::string collection_name{\"test_collection\"};\n\n\n/**\n * @brief A helper class for testing the InvertedColumnIndexer implementation.\n *\n * This class generates test data with specific patterns to verify the\n * correctness of the inverted index implementation. It provides various methods\n * to populate an InvertedColumnIndexer with predictable data patterns and\n * verify that the indexing and search operations work correctly.\n *\n */\nclass TestHelper {\n public:\n  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)\n      : num_docs_(num_docs / 100 * 100),\n        num_write_threads_(num_write_threads) {};\n\n\n  void insert_strings(InvertedColumnIndexer::Ptr indexer) {\n    auto insert_func = [&](uint32_t start, uint32_t end) {\n      Status s;\n      for (uint32_t i = start; i < end; ++i) {\n        auto v = generate_string(i);\n        s = indexer->insert(i, v);\n        ASSERT_TRUE(s.ok());\n      }\n    };\n\n    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;\n    std::vector<std::thread> threads{};\n    for (uint32_t t = 0; t < num_write_threads_; ++t) {\n      threads.emplace_back(insert_func, t * num_docs_per_thread,\n                           (t + 1) * num_docs_per_thread);\n    }\n    for (auto &t : threads) {\n      t.join();\n    }\n  }\n\n\n  void verify_strings(InvertedColumnIndexer::Ptr indexer) {\n    verify_strings_eq_ne(indexer);\n    verify_strings_like(indexer);\n    verify_strings_range(indexer);\n  }\n\n\n  void verify_strings_eq_ne(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res;\n    // Test EQ operator\n    for (uint32_t i = 0; i < 20; i++) {\n      auto v = generate_string(i);\n      res = indexer->search(v, CompareOp::EQ);\n      ASSERT_TRUE(res);\n      ASSERT_EQ(res->count(), num_docs_ / 20);\n      for (uint32_t j = 0; j < num_docs_ / 20; ++j) {\n        ASSERT_TRUE(res->contains(i + j * 20));\n      }\n    }\n\n    // Test NE operator with a non-existent value\n    std::string v = \"NotExist\";\n    res = indexer->search(v, CompareOp::NE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_);\n\n    // Test NE operator with a random value\n    static std::random_device rd;\n    static std::mt19937 gen(rd());\n    std::uniform_int_distribution<uint32_t> dis(0, 19);\n    uint32_t random_num = dis(gen);\n    v = generate_string(random_num);\n    res = indexer->search(v, CompareOp::NE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - num_docs_ / 20);\n    for (uint32_t j = 0; j < num_docs_; ++j) {\n      if (j % 20 == random_num) {\n        ASSERT_FALSE(res->contains(j));\n      } else {\n        ASSERT_TRUE(res->contains(j));\n      }\n    }\n  }\n\n\n  void verify_strings_like(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res;\n\n    std::string v = \"Three\";\n    res = indexer->search(v, CompareOp::HAS_PREFIX);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 4);\n    for (uint32_t j = 0; j < num_docs_; ++j) {\n      if (j % 4 == 2) {\n        ASSERT_TRUE(res->contains(j));\n      } else {\n        ASSERT_FALSE(res->contains(j));\n      }\n    }\n\n    v = \"06\";\n    res = indexer->search(v, CompareOp::HAS_SUFFIX);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 20);\n    for (uint32_t j = 0; j < num_docs_; ++j) {\n      if (j % 20 == 6) {\n        ASSERT_TRUE(res->contains(j));\n      } else {\n        ASSERT_FALSE(res->contains(j));\n      }\n    }\n\n    v = \"6\";\n    res = indexer->search(v, CompareOp::HAS_SUFFIX);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ / 10);\n    for (uint32_t j = 0; j < num_docs_; ++j) {\n      if (j % 20 == 6 || j % 20 == 16) {\n        ASSERT_TRUE(res->contains(j));\n      } else {\n        ASSERT_FALSE(res->contains(j));\n      }\n    }\n\n    v = \"21\";\n    res = indexer->search(v, CompareOp::HAS_SUFFIX);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n  }\n\n\n  void verify_strings_range(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res;\n    std::string v = \"Two\";\n    res = indexer->search(v, CompareOp::LT);\n    ASSERT_TRUE(res);\n    // \"One\", \"Three\", and \"Four\" are less than \"Two\" in string sense\n    ASSERT_EQ(res->count(), num_docs_ / 4 * 3);\n    for (uint32_t j = 0; j < num_docs_; ++j) {\n      if (j % 4 == 1) {\n        ASSERT_FALSE(res->contains(j));\n      } else {\n        ASSERT_TRUE(res->contains(j));\n      }\n    }\n  }\n\n\n  void insert_string_arrays(InvertedColumnIndexer::Ptr indexer) {\n    auto insert_func = [&](uint32_t start, uint32_t end) {\n      Status s;\n      for (uint32_t i = start; i < end; ++i) {\n        auto v = generate_string_array(i);\n        s = indexer->insert(i, v);\n        ASSERT_TRUE(s.ok());\n      }\n    };\n\n    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;\n    std::vector<std::thread> threads{};\n    for (uint32_t t = 0; t < num_write_threads_; ++t) {\n      threads.emplace_back(insert_func, t * num_docs_per_thread,\n                           (t + 1) * num_docs_per_thread);\n    }\n    for (auto &t : threads) {\n      t.join();\n    }\n  }\n\n\n  void verify_string_arrays(InvertedColumnIndexer::Ptr indexer) {\n    InvertedSearchResult::Ptr res;\n    auto v = generate_string_array(100);\n    res = indexer->multi_search(v, CompareOp::CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 1);\n    ASSERT_TRUE(res->contains(100));\n\n    res = indexer->multi_search(v, CompareOp::CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 5);\n    ASSERT_TRUE(res->contains(98));\n    ASSERT_TRUE(res->contains(99));\n    ASSERT_TRUE(res->contains(100));\n    ASSERT_TRUE(res->contains(101));\n    ASSERT_TRUE(res->contains(102));\n\n    res = indexer->multi_search(v, CompareOp::NOT_CONTAIN_ALL);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - 1);\n    ASSERT_FALSE(res->contains(100));\n\n    res = indexer->multi_search(v, CompareOp::NOT_CONTAIN_ANY);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_ - 5);\n    ASSERT_FALSE(res->contains(98));\n    ASSERT_FALSE(res->contains(99));\n    ASSERT_FALSE(res->contains(100));\n    ASSERT_FALSE(res->contains(101));\n    ASSERT_FALSE(res->contains(102));\n\n    res = indexer->search_array_len(3, CompareOp::EQ);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), num_docs_);\n    res = indexer->search_array_len(3, CompareOp::NE);\n    ASSERT_TRUE(res);\n    ASSERT_EQ(res->count(), 0);\n  }\n\n\n private:\n  std::string generate_string(uint32_t doc_id) {\n    std::string prefix;\n    switch (doc_id % 4) {\n      case 0:\n        prefix = \"One\";\n        break;\n      case 1:\n        prefix = \"Two\";\n        break;\n      case 2:\n        prefix = \"Three\";\n        break;\n      case 3:\n        prefix = \"Four\";\n        break;\n    }\n    std::stringstream suffix;\n    suffix << std::setfill('0') << std::setw(2) << doc_id % 20;\n\n    return prefix + \"_\" + suffix.str();\n  }\n\n\n  std::vector<std::string> generate_string_array(uint32_t doc_id) {\n    std::vector<std::string> ret;\n    std::stringstream ss1;\n    ss1 << std::setfill('0') << std::setw(10) << doc_id;\n    ret.emplace_back(ss1.str());\n    std::stringstream ss2;\n    ss2 << std::setfill('0') << std::setw(10) << doc_id + 1;\n    ret.emplace_back(ss2.str());\n    std::stringstream ss3;\n    ss3 << std::setfill('0') << std::setw(10) << doc_id + 2;\n    ret.emplace_back(ss3.str());\n    return ret;\n  }\n\n\n private:\n  const uint32_t num_docs_;\n  const uint32_t num_write_threads_;\n};\n\n\n/**\n *\n * @brief Unit tests for the InvertedColumnIndexer implementation.\n *\n */\nclass InvertedIndexTest : public testing::Test {\n  /*****  Global initialization and cleanup - Start  *****/\n public:\n  static void SetUpTestCase() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n\n    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,\n                                              true, {}, false);\n\n    params_ = std::make_shared<InvertIndexParams>(true, true);\n  }\n\n  static void TearDownTestCase() {\n    indexer_.reset();\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n  }\n  /*****  Global initialization and cleanup - End  *****/\n\n\n  /*****  Per-test initialization and cleanup - Start  *****/\n protected:\n  void SetUp() override {}\n\n  void TearDown() override {}\n  /*****  Per-test initialization and cleanup - End  *****/\n\n\n protected:\n  static InvertedIndexer::Ptr indexer_;\n  static TestHelper test_helper_;\n  static IndexParams::Ptr params_;\n};\n\n\nInvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};\nTestHelper InvertedIndexTest::test_helper_{100000, 10};\nIndexParams::Ptr InvertedIndexTest::params_{nullptr};\n\n\n/*\n *\n * Test Cases\n *\n */\nTEST_F(InvertedIndexTest, STRINGS) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema test_string{\"test_string\", DataType::STRING, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(test_string).ok());\n  auto indexer_string = (*indexer_)[\"test_string\"];\n  ASSERT_TRUE(indexer_string);\n  test_helper_.insert_strings(indexer_string);\n  test_helper_.verify_strings(indexer_string);\n}\n\n\nTEST_F(InvertedIndexTest, STRING_ARRAYS) {\n  ASSERT_TRUE(indexer_);\n\n  FieldSchema test_string_array{\"test_string_array\", DataType::ARRAY_STRING,\n                                true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(test_string_array).ok());\n  auto indexer_string_array = (*indexer_)[\"test_string_array\"];\n  ASSERT_TRUE(indexer_string_array);\n  test_helper_.insert_string_arrays(indexer_string_array);\n  test_helper_.verify_string_arrays(indexer_string_array);\n}\n\n\nTEST_F(InvertedIndexTest, SEALED) {\n  ASSERT_TRUE(indexer_);\n  ASSERT_TRUE(indexer_->seal().ok());\n\n  auto indexer_string = (*indexer_)[\"test_string\"];\n  ASSERT_TRUE(indexer_string);\n  test_helper_.verify_strings(indexer_string);\n\n\n  auto indexer_string_array = (*indexer_)[\"test_string_array\"];\n  ASSERT_TRUE(indexer_string_array);\n  test_helper_.verify_string_arrays(indexer_string_array);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/column/inverted_column/inverted_indexer_util_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include <gtest/gtest.h>\n#define private public\n#define protected public\n#include \"db/index/column/inverted_column/inverted_indexer.h\"\n#undef private\n#undef protected\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\n\nusing namespace zvec;\nusing File = ailego::File;\n\n\nconst std::string working_dir{\"./inverted_indexer_util_dir/\"};\nconst std::string collection_name{\"test_collection\"};\n\n\nclass InvertedIndexTest : public testing::Test {\n  /*****  Global initialization and cleanup - Start  *****/\n public:\n  static void SetUpTestCase() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n\n    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,\n                                              true, {}, false);\n\n    params_ = std::make_shared<InvertIndexParams>(true, false);\n  }\n\n  static void TearDownTestCase() {\n    indexer_.reset();\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf %s\", working_dir.c_str());\n    system(cmd_buf);\n  }\n  /*****  Global initialization and cleanup - End  *****/\n\n\n  /*****  Per-test initialization and cleanup - Start  *****/\n protected:\n  void SetUp() override {}\n\n  void TearDown() override {}\n  /*****  Per-test initialization and cleanup - End  *****/\n\n\n protected:\n  static InvertedIndexer::Ptr indexer_;\n  static IndexParams::Ptr params_;\n};\n\n\nInvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};\nIndexParams::Ptr InvertedIndexTest::params_{nullptr};\n\n\nTEST_F(InvertedIndexTest, COLLECTION_NAME) {\n  ASSERT_TRUE(indexer_);\n  ASSERT_EQ(indexer_->collection(), collection_name);\n}\n\n\nTEST_F(InvertedIndexTest, WORKING_DIR) {\n  ASSERT_TRUE(indexer_);\n  ASSERT_EQ(indexer_->working_dir(), working_dir);\n}\n\n\nTEST_F(InvertedIndexTest, COLUMN_MANIPULATION_EDGE_CASE) {\n  ASSERT_FALSE(indexer_->remove_column_indexer(\"Non-exist\").ok());\n\n  FieldSchema field{\"field_int32\", DataType::INT32, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(field).ok());\n  auto indexer_int32 = (*indexer_)[\"field_int32\"];\n  ASSERT_NE(indexer_int32, nullptr);\n\n  FieldSchema field_duplicate{\"field_int32\", DataType::INT32, false, params_};\n  ASSERT_FALSE(indexer_->create_column_indexer(field_duplicate).ok());\n\n  ASSERT_TRUE(indexer_->remove_column_indexer(\"field_int32\").ok());\n}\n\n\nTEST_F(InvertedIndexTest, COLUMN_MANIPULATION_INT32) {\n  ASSERT_TRUE(indexer_);\n\n  // Create column indexer\n  FieldSchema field{\"field_int32\", DataType::INT32, true, params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(field).ok());\n  auto indexer_int32 = (*indexer_)[\"field_int32\"];\n  ASSERT_NE(indexer_int32, nullptr);\n\n  // Insert some data\n  int32_t i;\n  for (i = 0; i < 3000; i++) {\n    auto s = indexer_int32->insert(i, std::string((char *)&i, sizeof(int32_t)));\n    ASSERT_TRUE(s.ok());\n  }\n\n  // Store variable names for later retrieval\n  auto cf_name_terms = indexer_int32->cf_name_terms();\n  auto cf_name_ranges = indexer_int32->cf_name_ranges();\n  auto cf_name_cdf = indexer_int32->cf_name_cdf();\n  auto key_max_id = indexer_int32->key_max_id();\n  auto key_null = indexer_int32->key_null();\n  auto key_sealed = indexer_int32->key_sealed();\n\n  ASSERT_TRUE(indexer_int32->seal().ok());\n  auto s = indexer_int32->insert(i, std::string((char *)&i, sizeof(int32_t)));\n  ASSERT_FALSE(s.ok());\n  std::string value;\n  ASSERT_TRUE(indexer_->rocksdb_context_.db_->Get({}, key_max_id, &value).ok());\n  ASSERT_TRUE(indexer_->rocksdb_context_.db_->Get({}, key_sealed, &value).ok());\n\n  // Remove column indexer\n  ASSERT_TRUE(indexer_->remove_column_indexer(\"field_int32\").ok());\n  indexer_int32 = (*indexer_)[\"field_int32\"];\n  ASSERT_EQ(indexer_int32, nullptr);\n\n  // No garbage left\n  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_terms), nullptr);\n  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_ranges), nullptr);\n  auto cdf = indexer_->rocksdb_context_.get_cf(cf_name_cdf);\n  ASSERT_NE(cdf, nullptr);\n  ASSERT_EQ(\n      indexer_->rocksdb_context_.db_->Get({}, cdf, field.name(), &value).code(),\n      rocksdb::Status::kNotFound);\n  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_max_id, &value).code(),\n            rocksdb::Status::kNotFound);\n  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_null, &value).code(),\n            rocksdb::Status::kNotFound);\n  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_sealed, &value).code(),\n            rocksdb::Status::kNotFound);\n}\n\n\nTEST_F(InvertedIndexTest, COLUMN_MANIPULATION_ARRAY_STRING) {\n  ASSERT_TRUE(indexer_);\n\n  // Create column indexer\n  FieldSchema field{\"field_string_array\", DataType::ARRAY_STRING, true,\n                    params_};\n  ASSERT_TRUE(indexer_->create_column_indexer(field).ok());\n  auto indexer_string_array = (*indexer_)[\"field_string_array\"];\n  ASSERT_NE(indexer_string_array, nullptr);\n\n  // Insert some data\n  for (uint32_t i = 0; i < 1500; i++) {\n    std::vector<std::string> values;\n    for (uint32_t j = 0; j < 5; j++) {\n      values.emplace_back(\"Number_\" + std::to_string(i));\n    }\n    auto s = indexer_string_array->insert(i, values);\n    ASSERT_TRUE(s.ok());\n  }\n\n  // Store variable names for later retrieval\n  auto cf_name_terms = indexer_string_array->cf_name_terms();\n  auto cf_name_array_len = indexer_string_array->cf_name_array_len();\n  auto cf_name_ranges = indexer_string_array->cf_name_ranges();\n  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_ranges), nullptr);\n  auto cf_name_cdf = indexer_string_array->cf_name_cdf();\n  auto key_max_id = indexer_string_array->key_max_id();\n  auto key_null = indexer_string_array->key_null();\n  auto key_sealed = indexer_string_array->key_sealed();\n\n  // Remove column indexer\n  ASSERT_TRUE(indexer_->remove_column_indexer(\"field_string_array\").ok());\n  indexer_string_array = (*indexer_)[\"field_string_array\"];\n  ASSERT_EQ(indexer_string_array, nullptr);\n\n  // No garbage left\n  std::string value;\n  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_terms), nullptr);\n  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_array_len), nullptr);\n  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_ranges), nullptr);\n  auto cdf = indexer_->rocksdb_context_.get_cf(cf_name_cdf);\n  ASSERT_NE(cdf, nullptr);\n  ASSERT_EQ(\n      indexer_->rocksdb_context_.db_->Get({}, cdf, field.name(), &value).code(),\n      rocksdb::Status::kNotFound);\n  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_max_id, &value).code(),\n            rocksdb::Status::kNotFound);\n  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_null, &value).code(),\n            rocksdb::Status::kNotFound);\n  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_sealed, &value).code(),\n            rocksdb::Status::kNotFound);\n}\n\n\nTEST_F(InvertedIndexTest, INVERTED_SEARCH_RESULT) {\n  roaring_bitmap_t *bitmap1 = roaring_bitmap_create();\n  roaring_bitmap_add(bitmap1, 1);\n  roaring_bitmap_add(bitmap1, 2);\n  roaring_bitmap_add(bitmap1, 3);\n  auto res1 = std::make_shared<InvertedSearchResult>(bitmap1);\n\n  std::vector<uint32_t> ids;\n  res1->extract_ids(&ids);\n  ASSERT_EQ(ids.size(), 3);\n  ASSERT_EQ(ids[0], 1);\n  ASSERT_EQ(ids[1], 2);\n  ASSERT_EQ(ids[2], 3);\n\n  roaring_bitmap_t *bitmap2 = roaring_bitmap_create();\n  roaring_bitmap_add(bitmap2, 3);\n  roaring_bitmap_add(bitmap2, 4);\n  roaring_bitmap_add(bitmap2, 5);\n  auto res2 = std::make_shared<InvertedSearchResult>(bitmap2);\n\n  res1->AND(*res2);\n  ASSERT_EQ(res1->count(), 1);\n  auto filter = res1->make_filter();\n  ASSERT_TRUE(filter);\n  ASSERT_FALSE(filter->is_filtered(3));\n\n  roaring_bitmap_t *bitmap3 = roaring_bitmap_create();\n  roaring_bitmap_add(bitmap3, 1);\n  roaring_bitmap_add(bitmap3, 3);\n  roaring_bitmap_add(bitmap3, 9);\n  roaring_bitmap_add(bitmap3, 11);\n  auto res3 = std::make_shared<InvertedSearchResult>(bitmap3);\n\n  res2->OR(*res3);\n  ASSERT_EQ(res2->count(), 6);\n  filter = res2->make_filter();\n  ASSERT_FALSE(filter->is_filtered(1));\n  ASSERT_FALSE(filter->is_filtered(3));\n  ASSERT_FALSE(filter->is_filtered(4));\n  ASSERT_FALSE(filter->is_filtered(5));\n  ASSERT_FALSE(filter->is_filtered(9));\n  ASSERT_FALSE(filter->is_filtered(11));\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/column/vector_column_indexer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n// #include \"db/doc.h\"\n#include \"db/index/column/vector_column/vector_column_indexer.h\"\n#include <cassert>\n#include <cstdint>\n#include <gtest/gtest.h>\n#include \"db/index/column/vector_column/vector_column_params.h\"\n#include \"zvec/ailego/utility/float_helper.h\"\n#include \"zvec/db/doc.h\"\n#include \"zvec/db/index_params.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec;\n\n\nstd::string print_dense_vector(const void *vector, size_t dim,\n                               DataType data_type) {\n  std::stringstream ss;\n  switch (data_type) {\n    case DataType::VECTOR_FP32: {\n      const float *data = reinterpret_cast<const float *>(vector);\n\n      for (size_t i = 0; i < dim; ++i) {\n        ss << data[i] << \" \";\n      }\n    } break;\n    case DataType::VECTOR_FP16: {\n      const zvec::float16_t *data =\n          reinterpret_cast<const zvec::float16_t *>(vector);\n      for (size_t i = 0; i < dim; ++i) {\n        ss << data[i] << \" \";\n      }\n    } break;\n    default:\n      LOG_ERROR(\"Unsupported data type: %d\", static_cast<int>(data_type));\n      break;\n  }\n  return ss.str();\n}\n\nTEST(VectorColumnIndexerTest, General) {\n  auto func = [&](const IndexParams::Ptr index_params,\n                  const QueryParams::Ptr query_params) {\n    const std::string index_file_path = \"test_indexer.index\";\n    constexpr idx_t kDocId = 2345;\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n    system(cmd_buf);\n\n    // 1. create indexer\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 4, false, index_params));\n    ASSERT_TRUE(indexer);\n\n    // 2. open\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    {\n      // can't use `DenseVector{std::vector<float>{1.0f, 2.0f, 3.0f}.data()}}`,\n      // which will be destroyed immediately\n      auto vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};\n\n      // 3. add data\n      auto data = vector_column_params::VectorData{\n          vector_column_params::DenseVector{vector.data()}};\n      ASSERT_TRUE(indexer->Insert(data, kDocId).ok());\n    }\n\n    {\n      auto vector = std::vector<float>{1.0f, 2000.0f, 3.0f, 0};\n      // 1 * 1 + 2 * 2000 + 3 * 3 = 12006\n      ASSERT_TRUE(indexer\n                      ->Insert(\n                          vector_column_params::VectorData{\n                              vector_column_params::DenseVector{vector.data()}},\n                          kDocId + 10)\n                      .ok());\n    }\n\n    {  // add_with_id() won't check duplication, overwrite last one\n      auto vector = std::vector<float>{1.0f, 0, 3.0f, 0};\n      // 1 * 1 + 2 * 0 + 3 * 3 = 10\n      ASSERT_TRUE(indexer\n                      ->Insert(\n                          vector_column_params::VectorData{\n                              vector_column_params::DenseVector{vector.data()}},\n                          kDocId + 10)\n                      .ok());\n    }\n\n    // 5. fetch\n    auto fetched_data = indexer->Fetch(kDocId);\n    ASSERT_TRUE(fetched_data);\n    const float *dense_vector = reinterpret_cast<const float *>(\n        std::get<vector_column_params::DenseVectorBuffer>(\n            fetched_data->vector_buffer)\n            .data.data());\n    ASSERT_NEAR(dense_vector[0], 1.0, 0.1);\n    ASSERT_NEAR(dense_vector[1], 2.0, 0.1);\n    ASSERT_NEAR(dense_vector[2], 3.0, 0.1);\n    ASSERT_NEAR(dense_vector[3], 0, 0.1);\n\n    // 4. search\n    // https://stackoverflow.com/questions/69009389/how-to-get-away-with-using-designated-initializers-in-c17-or-why-is-it-seemi\n    auto query_vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};\n    auto query = vector_column_params::VectorData{\n        vector_column_params::DenseVector{.data = query_vector.data()}};\n    auto indexer_query_params =\n        vector_column_params::QueryParams{.topk = 10,\n                                          .filter = nullptr,\n                                          .fetch_vector = true,\n                                          .query_params = query_params};\n    auto results = indexer->Search(query, indexer_query_params);\n    ASSERT_TRUE(results.has_value());\n\n    auto vector_results =\n        dynamic_cast<VectorIndexResults *>(results.value().get());\n    ASSERT_TRUE(vector_results);\n    ASSERT_EQ(vector_results->count(), 2);\n\n    {\n      int count = 0;\n      auto iter = vector_results->create_iterator();\n      while (iter->valid()) {\n        count++;\n        iter->next();\n      }\n      ASSERT_EQ(count, 2);\n    }\n\n    {  // top1 doc\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId);\n      if (iter->score() > 14) {\n        ASSERT_NEAR(iter->score(), 14.0, 0.1);\n      }\n\n      // top2\n      iter->next();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId + 10);\n      ASSERT_NEAR(iter->score(), 10.0, 0.1);\n    }\n\n    auto vector_index_params =\n        reinterpret_cast<VectorIndexParams *>(index_params.get());\n    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {\n      ASSERT_TRUE(vector_results->docs().size() == 2);\n      ASSERT_TRUE(vector_results->reverted_vector_list().size() == 2);\n      ASSERT_TRUE(vector_results->reverted_sparse_values_list().empty());\n    }\n\n    indexer->Close();\n\n    system(cmd_buf);\n  };\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP),\n       std::make_shared<QueryParams>(IndexType::FLAT));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100),\n       std::make_shared<HnswQueryParams>(300));\n  func(std::make_shared<IVFIndexParams>(MetricType::IP),\n       std::make_shared<IVFQueryParams>(10));\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16),\n       std::make_shared<QueryParams>(IndexType::FLAT));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                         QuantizeType::FP16),\n       std::make_shared<HnswQueryParams>(300));\n  func(std::make_shared<IVFIndexParams>(MetricType::IP, 1024, 10, false,\n                                        QuantizeType::FP16),\n       std::make_shared<IVFQueryParams>(10));\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::INT8),\n       std::make_shared<QueryParams>(IndexType::FLAT));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                         QuantizeType::INT8),\n       std::make_shared<HnswQueryParams>(300));\n  func(std::make_shared<IVFIndexParams>(MetricType::IP, 1024, 10, false,\n                                        QuantizeType::INT8),\n       std::make_shared<IVFQueryParams>(10));\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::INT4),\n       std::make_shared<QueryParams>(IndexType::FLAT));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                         QuantizeType::INT4),\n       std::make_shared<HnswQueryParams>(300));\n}\n\nTEST(VectorColumnIndexerTest, DenseDataTypeFP16) {\n  auto func = [&](const IndexParams::Ptr index_params,\n                  const QueryParams::Ptr query_params) {\n    const std::string index_file_path = \"test_indexer.index\";\n    constexpr idx_t kDocId = 2345;\n    constexpr int dimension = 4;\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n    system(cmd_buf);\n\n    // 1. create indexer\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path, FieldSchema(\"test\", DataType::VECTOR_FP16, dimension,\n                                     false, index_params));\n    ASSERT_TRUE(indexer);\n\n    // 2. open\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    {\n      // can't use `DenseVector{std::vector<float>{1.0f, 2.0f, 3.0f}.data()}}`,\n      // which will be destroyed immediately\n      auto origin_vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};\n      std::vector<uint16_t> buffer(dimension);\n      ailego::FloatHelper::ToFP16((float *)origin_vector.data(), dimension,\n                                  buffer.data());\n      auto vector = buffer;\n\n      // 3. add data\n      auto data = vector_column_params::VectorData{\n          vector_column_params::DenseVector{vector.data()}};\n      ASSERT_TRUE(indexer->Insert(data, kDocId).ok());\n    }\n\n    {\n      auto origin_vector = std::vector<float>{1.0f, 2000.0f, 3.0f, 0};\n      std::vector<uint16_t> buffer(dimension);\n      ailego::FloatHelper::ToFP16((float *)origin_vector.data(), dimension,\n                                  buffer.data());\n      auto vector = buffer;\n      // 1 * 1 + 2 * 2000 + 3 * 3 = 12006\n      ASSERT_TRUE(indexer\n                      ->Insert(\n                          vector_column_params::VectorData{\n                              vector_column_params::DenseVector{vector.data()}},\n                          kDocId + 10)\n                      .ok());\n    }\n\n    {  // add_with_id() won't check duplication, overwrite last one\n      auto origin_vector = std::vector<float>{1.0f, 0, 3.0f, 0};\n      std::vector<uint16_t> buffer(dimension);\n      ailego::FloatHelper::ToFP16((float *)origin_vector.data(), dimension,\n                                  buffer.data());\n      auto vector = buffer;\n      // 1 * 1 + 2 * 0 + 3 * 3 = 10\n      ASSERT_TRUE(indexer\n                      ->Insert(\n                          vector_column_params::VectorData{\n                              vector_column_params::DenseVector{vector.data()}},\n                          kDocId + 10)\n                      .ok());\n    }\n    // 5. fetch\n    {\n      auto fetched_data = indexer->Fetch(kDocId);\n      ASSERT_TRUE(fetched_data);\n      const uint16_t *dense_vector = reinterpret_cast<const uint16_t *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[0]), 1.0, 0.1);\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[1]), 2.0, 0.1);\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[2]), 3.0, 0.1);\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[3]), 0, 0.1);\n    }\n    {\n      auto fetched_data = indexer->Fetch(kDocId + 10);\n      ASSERT_TRUE(fetched_data);\n      const uint16_t *dense_vector = reinterpret_cast<const uint16_t *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[0]), 1.0, 0.1);\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[1]), 0, 0.1);\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[2]), 3.0, 0.1);\n      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[3]), 0, 0.1);\n    }\n\n    // 4. search\n    // https://stackoverflow.com/questions/69009389/how-to-get-away-with-using-designated-initializers-in-c17-or-why-is-it-seemi\n    auto origin_query_vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};\n    std::vector<uint16_t> buffer(dimension);\n    ailego::FloatHelper::ToFP16((float *)origin_query_vector.data(), dimension,\n                                buffer.data());\n    auto query_vector = buffer;\n    auto query = vector_column_params::VectorData{\n        vector_column_params::DenseVector{.data = query_vector.data()}};\n    auto indexer_query_params =\n        vector_column_params::QueryParams{.topk = 10,\n                                          .filter = nullptr,\n                                          .fetch_vector = true,\n                                          .query_params = query_params};\n    auto results = indexer->Search(query, indexer_query_params);\n    ASSERT_TRUE(results.has_value());\n\n    auto vector_results =\n        dynamic_cast<VectorIndexResults *>(results.value().get());\n    ASSERT_TRUE(vector_results);\n    ASSERT_EQ(vector_results->count(), 2);\n\n    {\n      int count = 0;\n      auto iter = vector_results->create_iterator();\n      while (iter->valid()) {\n        count++;\n        iter->next();\n      }\n      ASSERT_EQ(count, 2);\n    }\n\n    {  // top1 doc\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId);\n      if (iter->score() > 14) {\n        ASSERT_NEAR(iter->score(), 14.0, 0.1);\n      }\n\n      // top2\n      iter->next();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId + 10);\n      ASSERT_NEAR(iter->score(), 10.0, 0.1);\n    }\n\n    auto vector_index_params =\n        reinterpret_cast<VectorIndexParams *>(index_params.get());\n    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {\n      ASSERT_TRUE(vector_results->docs().size() == 2);\n      ASSERT_TRUE(vector_results->reverted_vector_list().size() == 2);\n      ASSERT_TRUE(vector_results->reverted_sparse_values_list().empty());\n    }\n\n    indexer->Close();\n\n    system(cmd_buf);\n  };\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP),\n       std::make_shared<QueryParams>(IndexType::FLAT));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100),\n       std::make_shared<HnswQueryParams>(300));\n}\n\nTEST(VectorColumnIndexerTest, DenseDataTypeINT8) {\n  auto func = [&](const IndexParams::Ptr index_params,\n                  const QueryParams::Ptr query_params) {\n    const std::string index_file_path = \"test_indexer.index\";\n    constexpr idx_t kDocId = 2345;\n    constexpr int dimension = 4;\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n    system(cmd_buf);\n\n    // 1. create indexer\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path, FieldSchema(\"test\", DataType::VECTOR_INT8, dimension,\n                                     false, index_params));\n    ASSERT_TRUE(indexer);\n\n    // 2. open\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    {\n      // can't use `DenseVector{std::vector<float>{1.0f, 2.0f, 3.0f}.data()}}`,\n      // which will be destroyed immediately\n      auto vector = std::vector<uint8_t>{1, 2, 3, 0};\n\n      // 3. add data\n      auto data = vector_column_params::VectorData{\n          vector_column_params::DenseVector{vector.data()}};\n      ASSERT_TRUE(indexer->Insert(data, kDocId).ok());\n    }\n\n    {\n      auto vector = std::vector<uint8_t>{1, 200, 3, 0};\n      // 1 * 1 + 2 * 2000 + 3 * 3 = 12006\n      ASSERT_TRUE(indexer\n                      ->Insert(\n                          vector_column_params::VectorData{\n                              vector_column_params::DenseVector{vector.data()}},\n                          kDocId + 10)\n                      .ok());\n    }\n\n    {  // add_with_id() won't check duplication, overwrite last one\n      auto vector = std::vector<uint8_t>{1, 0, 3, 0};\n      // 1 * 1 + 2 * 0 + 3 * 3 = 10\n      ASSERT_TRUE(indexer\n                      ->Insert(\n                          vector_column_params::VectorData{\n                              vector_column_params::DenseVector{vector.data()}},\n                          kDocId + 10)\n                      .ok());\n    }\n    // 5. fetch\n    {\n      auto fetched_data = indexer->Fetch(kDocId);\n      ASSERT_TRUE(fetched_data);\n      const uint8_t *dense_vector = reinterpret_cast<const uint8_t *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      ASSERT_NEAR(dense_vector[0], 1.0, 0.1);\n      ASSERT_NEAR(dense_vector[1], 2.0, 0.1);\n      ASSERT_NEAR(dense_vector[2], 3.0, 0.1);\n      ASSERT_NEAR(dense_vector[3], 0, 0.1);\n    }\n    {\n      auto fetched_data = indexer->Fetch(kDocId + 10);\n      ASSERT_TRUE(fetched_data);\n      const uint8_t *dense_vector = reinterpret_cast<const uint8_t *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      ASSERT_NEAR(dense_vector[0], 1.0, 0.1);\n      ASSERT_NEAR(dense_vector[1], 0, 0.1);\n      ASSERT_NEAR(dense_vector[2], 3.0, 0.1);\n      ASSERT_NEAR(dense_vector[3], 0, 0.1);\n    }\n\n    // 4. search\n    // https://stackoverflow.com/questions/69009389/how-to-get-away-with-using-designated-initializers-in-c17-or-why-is-it-seemi\n    auto query_vector = std::vector<uint8_t>{1, 2, 3, 0};\n    auto query = vector_column_params::VectorData{\n        vector_column_params::DenseVector{.data = query_vector.data()}};\n    auto indexer_query_params =\n        vector_column_params::QueryParams{.topk = 10,\n                                          .filter = nullptr,\n                                          .fetch_vector = true,\n                                          .query_params = query_params};\n    auto results = indexer->Search(query, indexer_query_params);\n    ASSERT_TRUE(results.has_value());\n\n    auto vector_results =\n        dynamic_cast<VectorIndexResults *>(results.value().get());\n    ASSERT_TRUE(vector_results);\n    ASSERT_EQ(vector_results->count(), 2);\n\n    {\n      int count = 0;\n      auto iter = vector_results->create_iterator();\n      while (iter->valid()) {\n        count++;\n        iter->next();\n      }\n      ASSERT_EQ(count, 2);\n    }\n\n    {  // top1 doc\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId);\n      if (iter->score() > 14) {\n        ASSERT_NEAR(iter->score(), 14.0, 0.1);\n      }\n\n      // top2\n      iter->next();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId + 10);\n      ASSERT_NEAR(iter->score(), 10.0, 0.1);\n    }\n\n    auto vector_index_params =\n        reinterpret_cast<VectorIndexParams *>(index_params.get());\n    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {\n      ASSERT_TRUE(vector_results->docs().size() == 2);\n      ASSERT_TRUE(vector_results->reverted_vector_list().size() == 2);\n      ASSERT_TRUE(vector_results->reverted_sparse_values_list().empty());\n    }\n\n    indexer->Close();\n\n    system(cmd_buf);\n  };\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP),\n       std::make_shared<QueryParams>(IndexType::FLAT));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100),\n       std::make_shared<HnswQueryParams>(300));\n}\n\n\nTEST(VectorColumnIndexerTest, SparseGeneral) {\n  constexpr uint32_t kSparseCount = 3;\n  auto func = [&](const IndexParams::Ptr index_params) {\n    const std::string index_file_path = \"test_indexer.index\";\n    constexpr idx_t kDocId = 2345;\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n    system(cmd_buf);\n\n    // create indexer\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::SPARSE_VECTOR_FP32, false, index_params));\n    ASSERT_TRUE(indexer);\n\n    // open\n    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});\n        !ret.ok()) {\n      std::cout << ret.message() << std::endl;\n      ASSERT_TRUE(false);\n    }\n\n    std::vector<uint32_t> indices(kSparseCount);\n    std::vector<float> values(kSparseCount);\n    for (uint32_t i = 0; i < kSparseCount; ++i) {\n      indices[i] = i;\n      values[i] = i;\n    }\n    vector_column_params::SparseVector vector{kSparseCount, indices.data(),\n                                              values.data()};\n    ASSERT_TRUE(\n        indexer->Insert(vector_column_params::VectorData{vector}, kDocId).ok());\n\n    // fetch\n    auto fetched_data = indexer->Fetch(kDocId);\n    ASSERT_TRUE(fetched_data.has_value());\n    auto fetched_sparse_vector =\n        std::get<vector_column_params::SparseVectorBuffer>(\n            fetched_data.value().vector_buffer);\n    auto fetched_indices = reinterpret_cast<const uint32_t *>(\n        fetched_sparse_vector.indices.data());\n    auto fetched_values =\n        reinterpret_cast<const float *>(fetched_sparse_vector.values.data());\n    for (uint32_t i = 0; i < kSparseCount; ++i) {\n      ASSERT_EQ(i, fetched_indices[i]);\n      ASSERT_FLOAT_EQ(i, fetched_values[i]);\n    }\n\n    // search\n    auto query =\n        vector_column_params::VectorData{vector_column_params::SparseVector{\n            kSparseCount, indices.data(), values.data()}};\n    auto query_params = vector_column_params::QueryParams{\n        .topk = 10, .filter = nullptr, .fetch_vector = true};\n    auto results = indexer->Search(query, query_params);\n    ASSERT_TRUE(results.has_value());\n\n    auto vector_results =\n        dynamic_cast<VectorIndexResults *>(results.value().get());\n    ASSERT_TRUE(vector_results);\n    ASSERT_EQ(vector_results->count(), 1);\n\n    {\n      int count = 0;\n      auto iter = vector_results->create_iterator();\n      while (iter->valid()) {\n        count++;\n        iter->next();\n      }\n      ASSERT_EQ(count, 1);\n    }\n\n    {\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId);\n      ASSERT_FLOAT_EQ(iter->score(), 5.0);\n\n      auto vector = iter->vector();\n      auto sparse_vector =\n          std::get<vector_column_params::SparseVector>(vector.vector);\n      auto indices = reinterpret_cast<const uint32_t *>(sparse_vector.indices);\n      auto values = reinterpret_cast<const float *>(sparse_vector.values);\n      ASSERT_EQ(sparse_vector.count, kSparseCount);\n      for (uint32_t i = 0; i < kSparseCount; ++i) {\n        ASSERT_EQ(i, indices[i]);\n        ASSERT_FLOAT_EQ(i, values[i]);\n      }\n      auto vector_index_params =\n          reinterpret_cast<VectorIndexParams *>(index_params.get());\n      if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {\n        ASSERT_TRUE(vector_results->docs().size() == 1);\n        ASSERT_TRUE(vector_results->reverted_sparse_values_list().size() == 1);\n        ASSERT_TRUE(vector_results->reverted_vector_list().empty());\n      }\n    }\n\n    indexer->Close();\n\n    system(cmd_buf);\n  };\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));\n  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                         QuantizeType::FP16));\n}\n\nTEST(VectorColumnIndexerTest, SparseDataTypeFP16) {\n  constexpr uint32_t kSparseCount = 3;\n  auto func = [&](const IndexParams::Ptr index_params) {\n    const std::string index_file_path = \"test_indexer.index\";\n    constexpr idx_t kDocId = 2345;\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n    system(cmd_buf);\n\n    // create indexer\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::SPARSE_VECTOR_FP16, false, index_params));\n    ASSERT_TRUE(indexer);\n\n    // open\n    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});\n        !ret.ok()) {\n      std::cout << ret.message() << std::endl;\n      ASSERT_TRUE(false);\n    }\n\n    std::vector<uint32_t> indices(kSparseCount);\n    std::vector<float> origin_values(kSparseCount);\n    for (uint32_t i = 0; i < kSparseCount; ++i) {\n      indices[i] = i;\n      origin_values[i] = i;\n    }\n    std::vector<uint16_t> buffer1(kSparseCount);\n    ailego::FloatHelper::ToFP16((float *)origin_values.data(), kSparseCount,\n                                buffer1.data());\n    auto values = buffer1;\n    vector_column_params::SparseVector vector{kSparseCount, indices.data(),\n                                              values.data()};\n    ASSERT_TRUE(\n        indexer->Insert(vector_column_params::VectorData{vector}, kDocId).ok());\n\n    // fetch\n    auto fetched_data = indexer->Fetch(kDocId);\n    ASSERT_TRUE(fetched_data.has_value());\n    auto fetched_sparse_vector =\n        std::get<vector_column_params::SparseVectorBuffer>(\n            fetched_data.value().vector_buffer);\n    auto fetched_indices = reinterpret_cast<const uint32_t *>(\n        fetched_sparse_vector.indices.data());\n    auto fetched_values =\n        reinterpret_cast<const uint16_t *>(fetched_sparse_vector.values.data());\n    for (uint32_t i = 0; i < kSparseCount; ++i) {\n      ASSERT_EQ(i, fetched_indices[i]);\n      ASSERT_FLOAT_EQ(i, ailego::FloatHelper::ToFP32(fetched_values[i]));\n    }\n\n    // search\n    auto query =\n        vector_column_params::VectorData{vector_column_params::SparseVector{\n            kSparseCount, indices.data(), values.data()}};\n    auto query_params = vector_column_params::QueryParams{\n        .topk = 10, .filter = nullptr, .fetch_vector = true};\n    auto results = indexer->Search(query, query_params);\n    ASSERT_TRUE(results.has_value());\n\n    auto vector_results =\n        dynamic_cast<VectorIndexResults *>(results.value().get());\n    ASSERT_TRUE(vector_results);\n    ASSERT_EQ(vector_results->count(), 1);\n\n    {\n      int count = 0;\n      auto iter = vector_results->create_iterator();\n      while (iter->valid()) {\n        count++;\n        iter->next();\n      }\n      ASSERT_EQ(count, 1);\n    }\n\n    {\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), kDocId);\n      ASSERT_FLOAT_EQ(iter->score(), 5.0);\n\n      auto vector = iter->vector();\n      auto sparse_vector =\n          std::get<vector_column_params::SparseVector>(vector.vector);\n      auto indices = reinterpret_cast<const uint32_t *>(sparse_vector.indices);\n      auto values = reinterpret_cast<const uint16_t *>(sparse_vector.values);\n      ASSERT_EQ(sparse_vector.count, kSparseCount);\n      for (uint32_t i = 0; i < kSparseCount; ++i) {\n        ASSERT_EQ(i, indices[i]);\n        ASSERT_FLOAT_EQ(i, ailego::FloatHelper::ToFP32(values[i]));\n      }\n      auto vector_index_params =\n          reinterpret_cast<VectorIndexParams *>(index_params.get());\n      if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {\n        ASSERT_TRUE(vector_results->docs().size() == 1);\n        ASSERT_TRUE(vector_results->reverted_sparse_values_list().size() == 1);\n        ASSERT_TRUE(vector_results->reverted_vector_list().empty());\n      }\n    }\n\n    indexer->Close();\n\n    system(cmd_buf);\n  };\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP));\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));\n}\n\nTEST(VectorColumnIndexerTest, Merge) {\n  constexpr uint32_t kDimension = 64;\n  const std::string index_name{\"test_indexer.index\"};\n\n  auto del_index_file_func = [&](const std::string file_name) {\n    auto cmd_buf = \"rm -f \" + file_name;\n    system(cmd_buf.c_str());\n  };\n\n  auto create_indexer_func =\n      [&](const IndexParams::Ptr &index_params,\n          const std::string &index_name) -> VectorColumnIndexer::Ptr {\n    del_index_file_func(index_name);\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_name, FieldSchema(\"test\", DataType::VECTOR_FP32, kDimension,\n                                false, index_params));\n    if (indexer == nullptr ||\n        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {\n      return nullptr;\n    }\n    return indexer;\n  };\n\n  auto func = [&](const IndexParams::Ptr &param1,\n                  const IndexParams::Ptr &param2,\n                  const IndexParams::Ptr &param3) {\n    auto indexer1 = create_indexer_func(param1, index_name + \"1\");\n    ASSERT_NE(nullptr, indexer1);\n    auto indexer2 = create_indexer_func(param2, index_name + \"2\");\n    ASSERT_NE(nullptr, indexer2);\n\n    std::vector<float> vector(kDimension);\n    vector[1] = 1.0f;\n    vector[2] = 123.0f;\n    auto vector_data = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector.data()}};\n    ASSERT_TRUE(indexer1->Insert(vector_data, 0).ok());\n\n    vector[1] = 2.0f;\n    ASSERT_TRUE(indexer2->Insert(vector_data, 0).ok());\n    vector[1] = 3.0f;\n    ASSERT_TRUE(indexer2->Insert(vector_data, 1).ok());\n\n    {\n      auto fetched_data = indexer1->Fetch(0);\n      ASSERT_TRUE(fetched_data.has_value());\n      const float *fetched_vector = reinterpret_cast<const float *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      ASSERT_NEAR(1.0f, fetched_vector[1], 0.1);\n      ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);\n    }\n    {\n      auto fetched_data = indexer2->Fetch(0);\n      ASSERT_TRUE(fetched_data.has_value());\n      const float *fetched_vector = reinterpret_cast<const float *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);\n      ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);\n    }\n    {\n      auto fetched_data = indexer2->Fetch(1);\n      ASSERT_TRUE(fetched_data.has_value());\n      const float *fetched_vector = reinterpret_cast<const float *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      ASSERT_NEAR(3.0f, fetched_vector[1], 0.1);\n      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);\n    }\n\n    {  // test reduce\n      auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n      ASSERT_NE(nullptr, indexer3);\n      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, nullptr).ok());\n      {\n        auto fetched_data = indexer3->Fetch(0);\n        ASSERT_TRUE(fetched_data.has_value());\n        const float *fetched_vector = reinterpret_cast<const float *>(\n            std::get<vector_column_params::DenseVectorBuffer>(\n                fetched_data->vector_buffer)\n                .data.data());\n        ASSERT_NEAR(1.0f, fetched_vector[1], 0.1);\n        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);\n      }\n      {\n        auto fetched_data = indexer3->Fetch(1);\n        ASSERT_TRUE(fetched_data.has_value());\n        const float *fetched_vector = reinterpret_cast<const float *>(\n            std::get<vector_column_params::DenseVectorBuffer>(\n                fetched_data->vector_buffer)\n                .data.data());\n        ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);\n        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);\n      }\n      indexer3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n    {  // test reduce with filter\n      auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n      ASSERT_NE(nullptr, indexer3);\n      auto filter = std::make_shared<EasyIndexFilter>(\n          [](uint64_t key) { return key == 0; });\n      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter).ok());\n      // 0.0 -> x ; 1.0 -> 0 ; 1.1 -> 1\n      ASSERT_TRUE(indexer3->doc_count() == 2);\n      {\n        auto fetched_data = indexer3->Fetch(0);\n        ASSERT_TRUE(fetched_data.has_value());\n        const float *fetched_vector = reinterpret_cast<const float *>(\n            std::get<vector_column_params::DenseVectorBuffer>(\n                fetched_data->vector_buffer)\n                .data.data());\n        ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);\n        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);\n      }\n\n      {\n        // search with fetch vector\n        auto query = vector_column_params::VectorData{\n            vector_column_params::DenseVector{vector.data()}};\n        auto query_params = vector_column_params::QueryParams{\n            .topk = 10, .filter = nullptr, .fetch_vector = true};\n        auto results = indexer2->Search(query, query_params);\n        ASSERT_TRUE(results.has_value());\n        auto vector_results =\n            dynamic_cast<VectorIndexResults *>(results.value().get());\n        ASSERT_TRUE(vector_results);\n        ASSERT_EQ(vector_results->count(), 2);\n        auto iter = vector_results->create_iterator();\n        ASSERT_TRUE(iter->valid());\n\n        {\n          ASSERT_TRUE(iter->valid());\n          auto doc_id = iter->doc_id();\n          LOG_DEBUG(\"topk1 pk: %zu\", (size_t)doc_id);\n          LOG_DEBUG(\"topk1 score: %.10f\", iter->score());\n\n          LOG_DEBUG(\n              \"topk1 fetched_vector:%s\",\n              print_dense_vector(std::get<vector_column_params::DenseVector>(\n                                     iter->vector().vector)\n                                     .data,\n                                 3, DataType::VECTOR_FP32)\n                  .c_str());\n          {\n            auto fetched_vector = vector_results->docs()[0].vector();\n\n            LOG_DEBUG(\n                \"topk1 fetched_vector - original:%s\",\n                print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP16)\n                    .c_str());\n          }\n          if (!vector_results->reverted_vector_list().empty()) {\n            auto fetched_vector =\n                vector_results->reverted_vector_list()[0].data();\n\n            LOG_DEBUG(\n                \"topk1 fetched_vector - reverted:%s\",\n                print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32)\n                    .c_str());\n          }\n          // ASSERT_TRUE(iter->score() < 2.01);\n          // ASSERT_TRUE(iter->score() > -0.01);\n        }\n      }\n\n      indexer3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n    {  // test reduce with filter in parallel\n      auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n      ASSERT_NE(nullptr, indexer3);\n      auto filter = std::make_shared<EasyIndexFilter>(\n          [](uint64_t key) { return key == 0; });\n      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter, {3}).ok());\n\n      {\n        auto fetched_data = indexer3->Fetch(0);\n        ASSERT_TRUE(fetched_data.has_value());\n        const float *fetched_vector = reinterpret_cast<const float *>(\n            std::get<vector_column_params::DenseVectorBuffer>(\n                fetched_data->vector_buffer)\n                .data.data());\n        ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);\n        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);\n      }\n      indexer3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n\n    indexer1->Close();\n    indexer2->Close();\n    del_index_file_func(index_name + \"1\");\n    del_index_file_func(index_name + \"2\");\n  };\n\n  // same index with different quantize type\n  auto test_different_quantize_type = [&](MetricType metric_type,\n                                          QuantizeType quantize_type) {\n    LOG_INFO(\n        \"Merge test_different_quantize_type(): with metric type %s and \"\n        \"quantize type %s\",\n        MetricTypeCodeBook::AsString(metric_type).c_str(),\n        QuantizeTypeCodeBook::AsString(quantize_type).c_str());\n\n    auto param_flat = std::make_shared<FlatIndexParams>(metric_type);\n    auto param_flat_fp16 =\n        std::make_shared<FlatIndexParams>(metric_type, quantize_type);\n    auto param_hnsw = std::make_shared<HnswIndexParams>(metric_type, 10, 100);\n    auto param_hnsw_fp16 =\n        std::make_shared<HnswIndexParams>(metric_type, 10, 100, quantize_type);\n\n    func(param_flat, param_flat, param_hnsw_fp16);\n\n    std::vector<IndexParams::Ptr> fp32_params = {param_flat, param_hnsw};\n    std::vector<IndexParams::Ptr> fp16_params = {param_flat_fp16,\n                                                 param_hnsw_fp16};\n    // can't mix\n    for (auto param_target : fp32_params) {\n      func(param_flat_fp16, param_hnsw_fp16, param_target);\n      // for (auto param1 : fp16_params) {\n      //   for (auto param2 : fp16_params) {\n      //     func(param1, param2, param_target);\n      //   }\n      // }\n      func(param_hnsw, param_flat, param_target);\n      // for (auto param1 : fp32_params) {\n      //   for (auto param2 : fp32_params) {\n      //     func(param1, param2, param_target);\n      //   }\n      // }\n    }\n\n    for (auto param_target : fp16_params) {\n      func(param_flat_fp16, param_hnsw_fp16, param_target);\n      // for (auto param1 : fp16_params) {\n      //   for (auto param2 : fp16_params) {\n      //     func(param1, param2, param_target);\n      //   }\n      // }\n      func(param_hnsw, param_flat, param_target);\n      // for (auto param1 : fp32_params) {\n      //   for (auto param2 : fp32_params) {\n      //     func(param1, param2, param_target);\n      //   }\n      // }\n    }\n  };\n  test_different_quantize_type(MetricType::L2, QuantizeType::UNDEFINED);\n  test_different_quantize_type(MetricType::L2, QuantizeType::FP16);\n  test_different_quantize_type(MetricType::IP, QuantizeType::FP16);\n  test_different_quantize_type(MetricType::L2, QuantizeType::INT8);\n  // test_different_quantize_type(MetricType::IP, QuantizeType::INT8);\n  // The quantization error is toooooo large for INT4 =_=\n  // test_different_quantize_type(MetricType::L2, QuantizeType::INT4);\n  // test_different_quantize_type(MetricType::IP, QuantizeType::INT4);\n  // test_different_quantize_type(MetricType::COSINE);\n}\n\nTEST(VectorColumnIndexerTest, SparseMerge) {\n  constexpr uint32_t kSparseCount = 3;\n  constexpr uint32_t kUnitSize = sizeof(float);  // VECTOR_FP32\n  const std::string index_name{\"test_indexer.index\"};\n\n  auto del_index_file_func = [&](const std::string file_name) {\n    auto cmd_buf = \"rm -f \" + file_name;\n    system(cmd_buf.c_str());\n  };\n\n  auto create_indexer_func =\n      [&](const IndexParams::Ptr &index_params,\n          const std::string &index_name) -> VectorColumnIndexer::Ptr {\n    del_index_file_func(index_name);\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_name,\n        FieldSchema(\"test\", DataType::SPARSE_VECTOR_FP32, false, index_params));\n    if (indexer == nullptr ||\n        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {\n      return nullptr;\n    }\n    return indexer;\n  };\n\n  auto func = [&](const IndexParams::Ptr &param1,\n                  const IndexParams::Ptr &param2,\n                  const IndexParams::Ptr &param3) {\n    auto indexer1 = create_indexer_func(param1, index_name + \"1\");\n    ASSERT_NE(nullptr, indexer1);\n    auto indexer2 = create_indexer_func(param2, index_name + \"2\");\n    ASSERT_NE(nullptr, indexer2);\n\n    std::vector<uint32_t> indices(kSparseCount);\n    std::vector<float> values(kSparseCount);\n    for (uint32_t i = 0; i < kSparseCount; ++i) {\n      indices[i] = i;\n      values[i] = (float)i;\n    }\n    vector_column_params::SparseVector vector{kSparseCount, indices.data(),\n                                              values.data()};\n    auto vector_data = vector_column_params::VectorData{vector};\n    ASSERT_TRUE(indexer1->Insert(vector_data, 0).ok());\n\n    values[1] = 2.0f;\n    ASSERT_TRUE(indexer2->Insert(vector_data, 0).ok());\n    values[1] = 3.0f;\n    ASSERT_TRUE(indexer2->Insert(vector_data, 1).ok());\n\n    {\n      auto fetched_data = indexer1->Fetch(0);\n      ASSERT_TRUE(fetched_data.has_value());\n      auto fetched_sparse_vector =\n          std::get<vector_column_params::SparseVectorBuffer>(\n              fetched_data->vector_buffer);\n      ASSERT_EQ(kSparseCount,\n                fetched_sparse_vector.indices.size() / sizeof(uint32_t));\n      ASSERT_EQ(kSparseCount, fetched_sparse_vector.values.size() / kUnitSize);\n\n      auto fetched_indices = reinterpret_cast<const uint32_t *>(\n          fetched_sparse_vector.indices.data());\n      auto fetched_values =\n          reinterpret_cast<const float *>(fetched_sparse_vector.values.data());\n      for (uint32_t i = 0; i < kSparseCount; ++i) {\n        ASSERT_EQ(i, fetched_indices[i]);\n      }\n      ASSERT_EQ(0.0f, fetched_values[0]);\n      ASSERT_EQ(1.0f, fetched_values[1]);\n      ASSERT_EQ(2.0f, fetched_values[2]);\n    }\n    {\n      auto fetched_data = indexer2->Fetch(0);\n      ASSERT_TRUE(fetched_data.has_value());\n      auto fetched_sparse_vector =\n          std::get<vector_column_params::SparseVectorBuffer>(\n              fetched_data->vector_buffer);\n      ASSERT_EQ(kSparseCount,\n                fetched_sparse_vector.indices.size() / sizeof(uint32_t));\n      ASSERT_EQ(kSparseCount, fetched_sparse_vector.values.size() / kUnitSize);\n\n      auto fetched_indices = reinterpret_cast<const uint32_t *>(\n          fetched_sparse_vector.indices.data());\n      auto fetched_values =\n          reinterpret_cast<const float *>(fetched_sparse_vector.values.data());\n      for (uint32_t i = 0; i < kSparseCount; ++i) {\n        ASSERT_EQ(i, fetched_indices[i]);\n      }\n      ASSERT_EQ(0.0f, fetched_values[0]);\n      ASSERT_EQ(2.0f, fetched_values[1]);\n      ASSERT_EQ(2.0f, fetched_values[2]);\n    }\n    {\n      auto fetched_data = indexer2->Fetch(1);\n      ASSERT_TRUE(fetched_data.has_value());\n      auto fetched_sparse_vector =\n          std::get<vector_column_params::SparseVectorBuffer>(\n              fetched_data->vector_buffer);\n      ASSERT_EQ(kSparseCount,\n                fetched_sparse_vector.indices.size() / sizeof(uint32_t));\n      ASSERT_EQ(kSparseCount, fetched_sparse_vector.values.size() / kUnitSize);\n\n      auto fetched_indices = reinterpret_cast<const uint32_t *>(\n          fetched_sparse_vector.indices.data());\n      auto fetched_values =\n          reinterpret_cast<const float *>(fetched_sparse_vector.values.data());\n      for (uint32_t i = 0; i < kSparseCount; ++i) {\n        ASSERT_EQ(i, fetched_indices[i]);\n      }\n      ASSERT_EQ(0.0f, fetched_values[0]);\n      ASSERT_EQ(3.0f, fetched_values[1]);\n      ASSERT_EQ(2.0f, fetched_values[2]);\n    }\n\n    {  // test reduce\n      auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n      ASSERT_NE(nullptr, indexer3);\n      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, nullptr).ok());\n      {\n        auto fetched_data = indexer3->Fetch(0);\n        ASSERT_TRUE(fetched_data.has_value());\n        auto fetched_sparse_vector =\n            std::get<vector_column_params::SparseVectorBuffer>(\n                fetched_data->vector_buffer);\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.values.size() / kUnitSize);\n        auto fetched_indices = reinterpret_cast<const uint32_t *>(\n            fetched_sparse_vector.indices.data());\n        auto fetched_values = reinterpret_cast<const float *>(\n            fetched_sparse_vector.values.data());\n        for (uint32_t i = 0; i < kSparseCount; ++i) {\n          ASSERT_EQ(i, fetched_indices[i]);\n        }\n        ASSERT_EQ(0.0f, fetched_values[0]);\n        ASSERT_EQ(1.0f, fetched_values[1]);\n        ASSERT_EQ(2.0f, fetched_values[2]);\n      }\n      {\n        auto fetched_data = indexer3->Fetch(1);\n        ASSERT_TRUE(fetched_data.has_value());\n        auto fetched_sparse_vector =\n            std::get<vector_column_params::SparseVectorBuffer>(\n                fetched_data->vector_buffer);\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.values.size() / kUnitSize);\n        auto fetched_indices = reinterpret_cast<const uint32_t *>(\n            fetched_sparse_vector.indices.data());\n        auto fetched_values = reinterpret_cast<const float *>(\n            fetched_sparse_vector.values.data());\n        for (uint32_t i = 0; i < kSparseCount; ++i) {\n          ASSERT_EQ(i, fetched_indices[i]);\n        }\n        ASSERT_EQ(0.0f, fetched_values[0]);\n        ASSERT_EQ(2.0f, fetched_values[1]);\n        ASSERT_EQ(2.0f, fetched_values[2]);\n      }\n      indexer3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n    {  // test reduce with filter\n      auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n      ASSERT_NE(nullptr, indexer3);\n      auto filter = std::make_shared<EasyIndexFilter>(\n          [](uint64_t key) { return key == 0; });\n      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter).ok());\n      {\n        auto fetched_data = indexer3->Fetch(0);\n        ASSERT_TRUE(fetched_data.has_value());\n        auto fetched_sparse_vector =\n            std::get<vector_column_params::SparseVectorBuffer>(\n                fetched_data->vector_buffer);\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.values.size() / kUnitSize);\n        auto fetched_indices = reinterpret_cast<const uint32_t *>(\n            fetched_sparse_vector.indices.data());\n        auto fetched_values = reinterpret_cast<const float *>(\n            fetched_sparse_vector.values.data());\n        for (uint32_t i = 0; i < kSparseCount; ++i) {\n          ASSERT_EQ(i, fetched_indices[i]);\n        }\n        ASSERT_EQ(0.0f, fetched_values[0]);\n        ASSERT_EQ(2.0f, fetched_values[1]);\n        ASSERT_EQ(2.0f, fetched_values[2]);\n      }\n      indexer3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n    {  // test reduce with filter in parallel\n      auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n      ASSERT_NE(nullptr, indexer3);\n      auto filter = std::make_shared<EasyIndexFilter>(\n          [](uint64_t key) { return key == 0; });\n      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter, {3}).ok());\n      {\n        auto fetched_data = indexer3->Fetch(0);\n        ASSERT_TRUE(fetched_data.has_value());\n        auto fetched_sparse_vector =\n            std::get<vector_column_params::SparseVectorBuffer>(\n                fetched_data->vector_buffer);\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));\n        ASSERT_EQ(kSparseCount,\n                  fetched_sparse_vector.values.size() / kUnitSize);\n        auto fetched_indices = reinterpret_cast<const uint32_t *>(\n            fetched_sparse_vector.indices.data());\n        auto fetched_values = reinterpret_cast<const float *>(\n            fetched_sparse_vector.values.data());\n        for (uint32_t i = 0; i < kSparseCount; ++i) {\n          ASSERT_EQ(i, fetched_indices[i]);\n        }\n        ASSERT_EQ(0.0f, fetched_values[0]);\n        ASSERT_EQ(2.0f, fetched_values[1]);\n        ASSERT_EQ(2.0f, fetched_values[2]);\n      }\n      indexer3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n\n\n    indexer1->Close();\n    indexer2->Close();\n    del_index_file_func(index_name + \"1\");\n    del_index_file_func(index_name + \"2\");\n  };\n\n\n  //===============================================\n  // Fp32\n  //===============================================\n  {\n    auto param_flat = std::make_shared<FlatIndexParams>(MetricType::IP);\n    auto param_hnsw =\n        std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100);\n    LOG_INFO(\"SparseMerge: param_flat, param_flat, param_flat\");\n    func(param_flat, param_flat, param_flat);\n\n    LOG_INFO(\"SparseMerge: param_hnsw, param_hnsw, param_hnsw\");\n    func(param_hnsw, param_hnsw, param_hnsw);\n\n    LOG_INFO(\"SparseMerge: param_flat, param_hnsw, param_hnsw\");\n    func(param_flat, param_hnsw, param_hnsw);\n\n    LOG_INFO(\"SparseMerge: param_hnsw, param_flat, param_flat\");\n    func(param_hnsw, param_flat, param_flat);\n    LOG_INFO(\"SparseMerge: param_flat, param_hnsw, param_flat\");\n    func(param_flat, param_hnsw, param_flat);\n\n    LOG_INFO(\"SparseMerge: param_hnsw, param_flat, param_hnsw\");\n    func(param_hnsw, param_flat, param_hnsw);\n  }\n\n  //===============================================\n  // Fp16 fp32\n  //===============================================\n  {\n    auto param_flat = std::make_shared<FlatIndexParams>(MetricType::IP);\n    auto param_hnsw = std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                                        QuantizeType::FP16);\n    LOG_INFO(\"SparseMerge - fp16: param_flat, param_flat -> param_flat\");\n    func(param_flat, param_flat, param_flat);\n\n    LOG_INFO(\"SparseMerge - fp16: param_hnsw, param_hnsw -> param_hnsw\");\n    func(param_hnsw, param_hnsw, param_hnsw);\n\n    LOG_INFO(\"SparseMerge - fp16: param_hnsw, param_hnsw -> param_flat\");\n    func(param_hnsw, param_hnsw, param_flat);\n\n    LOG_INFO(\"SparseMerge - fp16: param_flat, param_flat -> param_hnsw\");\n    func(param_flat, param_flat, param_hnsw);\n  }\n}\n\n\nTEST(VectorColumnIndexerTest, BfPks) {\n  auto func = [&](const IndexParams::Ptr index_params) {\n    const std::string index_file_path = \"test_indexer.index\";\n\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n    system(cmd_buf);\n\n    // 1. create indexer\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false, index_params));\n    ASSERT_TRUE(indexer);\n\n    // 2. open\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    auto vector1 = std::vector<float>{1.0f, 2.0f, 3.0f};\n    auto vector2 = std::vector<float>{4.0f, 5.0f, 6.0f};\n\n    // 3. add data\n    auto data1 = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector1.data()}};\n    ASSERT_TRUE(indexer->Insert(data1, 1).ok());\n\n    auto data2 = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector2.data()}};\n    ASSERT_TRUE(indexer->Insert(data2, 2).ok());\n\n    {\n      auto bf_pks = std::vector<uint64_t>{1};\n      auto query =\n          vector_column_params::VectorData{vector_column_params::DenseVector{\n              .data = std::vector<float>{1.0f, 2.0f, 3.0f}.data()}};\n      auto query_params =\n          vector_column_params::QueryParams{.topk = 10,\n                                            .filter = nullptr,\n                                            .fetch_vector = true,\n                                            .bf_pks = {bf_pks}};\n      auto results = indexer->Search(query, query_params);\n      ASSERT_TRUE(results.has_value());\n\n      auto vector_results =\n          dynamic_cast<VectorIndexResults *>(results.value().get());\n      ASSERT_TRUE(vector_results);\n      ASSERT_EQ(vector_results->count(), 1);\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), 1);\n      auto fetched_vector =\n          std::get<vector_column_params::DenseVector>(iter->vector().vector);\n      const float *fetched_vector_data =\n          reinterpret_cast<const float *>(fetched_vector.data);\n      for (int i = 0; i < 3; ++i) {\n        ASSERT_FLOAT_EQ(fetched_vector_data[i], vector1[i]);\n      }\n    }\n\n    {\n      auto bf_pks = std::vector<uint64_t>{1, 2};\n      auto query =\n          vector_column_params::VectorData{vector_column_params::DenseVector{\n              .data = std::vector<float>{1.0f, 2.0f, 3.0f}.data()}};\n      auto query_params =\n          vector_column_params::QueryParams{.topk = 10,\n                                            .filter = nullptr,\n                                            .fetch_vector = true,\n                                            .bf_pks = {bf_pks}};\n      auto results = indexer->Search(query, query_params);\n      ASSERT_TRUE(results.has_value());\n\n      auto vector_results =\n          dynamic_cast<VectorIndexResults *>(results.value().get());\n      ASSERT_TRUE(vector_results);\n      ASSERT_EQ(vector_results->count(), 2);\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), 1);\n      auto fetched_vector =\n          std::get<vector_column_params::DenseVector>(iter->vector().vector);\n      const float *fetched_vector_data =\n          reinterpret_cast<const float *>(fetched_vector.data);\n      for (int i = 0; i < 3; ++i) {\n        ASSERT_FLOAT_EQ(fetched_vector_data[i], vector1[i]);\n      }\n    }\n\n    {\n      auto bf_pks = std::vector<uint64_t>{2};\n      auto query =\n          vector_column_params::VectorData{vector_column_params::DenseVector{\n              .data = std::vector<float>{1.0f, 2.0f, 3.0f}.data()}};\n      auto query_params =\n          vector_column_params::QueryParams{.topk = 10,\n                                            .filter = nullptr,\n                                            .fetch_vector = true,\n                                            .bf_pks = {bf_pks}};\n      auto results = indexer->Search(query, query_params);\n      ASSERT_TRUE(results.has_value());\n\n      auto vector_results =\n          dynamic_cast<VectorIndexResults *>(results.value().get());\n      ASSERT_TRUE(vector_results);\n      ASSERT_EQ(vector_results->count(), 1);\n      auto iter = vector_results->create_iterator();\n      ASSERT_TRUE(iter->valid());\n      ASSERT_EQ(iter->doc_id(), 2);\n      auto fetched_vector =\n          std::get<vector_column_params::DenseVector>(iter->vector().vector);\n      const float *fetched_vector_data =\n          reinterpret_cast<const float *>(fetched_vector.data);\n      for (int i = 0; i < 3; ++i) {\n        ASSERT_FLOAT_EQ(fetched_vector_data[i], vector2[i]);\n      }\n    }\n\n    indexer->Close();\n\n    system(cmd_buf);\n  };\n\n  func(std::make_shared<FlatIndexParams>(MetricType::COSINE));\n  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100));\n}\n\n\nusing DenseVectorDataBuffer = vector_column_params::DenseVectorBuffer;\nusing SparseVectorBuffer = vector_column_params::SparseVectorBuffer;\n\nDenseVectorDataBuffer create_dense_vector(int dim, DataType data_type, int pk,\n                                          size_t count,\n                                          float float_offset = 0.1f) {\n  count += 1;\n  switch (data_type) {\n    case DataType::VECTOR_FP32: {\n      std::string ret;\n      ret.resize(dim * sizeof(float));\n      float *data = reinterpret_cast<float *>(ret.data());\n      for (int i = 0; i < dim; ++i) {\n        data[i] = pk + i + float_offset;\n      }\n      return DenseVectorDataBuffer{std::move(ret)};\n    }\n    case DataType::VECTOR_FP16: {\n      std::string ret;\n      ret.resize(dim * sizeof(zvec::float16_t));\n      zvec::float16_t *data = reinterpret_cast<zvec::float16_t *>(ret.data());\n      for (int i = 0; i < dim; ++i) {\n        data[i] = pk + i + float_offset;\n      }\n      return DenseVectorDataBuffer{std::move(ret)};\n    }\n    case DataType::VECTOR_INT8: {\n      std::string ret;\n      ret.resize(dim * sizeof(int8_t));\n      int8_t *data = reinterpret_cast<int8_t *>(ret.data());\n      for (int i = 0; i < dim; ++i) {\n        data[i] = pk + i;\n      }\n      return DenseVectorDataBuffer{std::move(ret)};\n    }\n    case DataType::VECTOR_INT16: {\n      std::string ret;\n      ret.resize(dim * sizeof(int16_t));\n      int16_t *data = reinterpret_cast<int16_t *>(ret.data());\n      for (int i = 0; i < dim; ++i) {\n        data[i] = pk + i;\n      }\n      return DenseVectorDataBuffer{std::move(ret)};\n    }\n    case DataType::VECTOR_BINARY32:\n    case DataType::VECTOR_BINARY64: {\n      std::string ret;\n      ret.resize(dim / 8);\n      uint8_t *data = reinterpret_cast<uint8_t *>(ret.data());\n      for (int i = 0; i < dim; ++i) {\n        data[i / 8] |= ((pk + i) % 2) << (i % 8);\n      }\n      return DenseVectorDataBuffer{std::move(ret)};\n    }\n    default:\n      LOG_ERROR(\"Unsupported data type: %d\", static_cast<int>(data_type));\n      return DenseVectorDataBuffer{};\n  }\n}\n\n\nSparseVectorBuffer create_sparse_vector(int dim, DataType data_type, int pk,\n                                        float float_offset = 0.1f) {\n  SparseVectorBuffer ret;\n  switch (data_type) {\n    case DataType::SPARSE_VECTOR_FP32: {\n      std::vector<float> values(dim);\n      for (int i = 0; i < dim; ++i) {\n        values[i] = pk * 100 + i + float_offset;\n      }\n      ret.values = std::string(reinterpret_cast<char *>(values.data()),\n                               values.size() * sizeof(float));\n    } break;\n    case DataType::SPARSE_VECTOR_FP16: {\n      std::vector<zvec::float16_t> values(dim);\n      for (int i = 0; i < dim; ++i) {\n        values[i] = pk * 100 + i + float_offset;\n      }\n      ret.values = std::string(reinterpret_cast<char *>(values.data()),\n                               values.size() * sizeof(zvec::float16_t));\n    } break;\n    default:\n      LOG_ERROR(\"Unsupported data type: %d\", static_cast<int>(data_type));\n      return SparseVectorBuffer{};\n  }\n  std::vector<uint32_t> indices(dim);\n  for (int i = 0; i < dim; ++i) {\n    indices[i] = i;\n  }\n  ret.indices = std::string(reinterpret_cast<char *>(indices.data()),\n                            indices.size() * sizeof(uint32_t));\n  return ret;\n}\n\nbool compare_dense_vector(const DenseVectorDataBuffer &lhs, const void *rhs,\n                          DataType data_type) {\n  switch (data_type) {\n    case DataType::VECTOR_FP32: {\n      size_t dim = lhs.data.size() / sizeof(float);\n      auto rhs_data = reinterpret_cast<const float *>(rhs);\n      auto lhs_data = reinterpret_cast<const float *>(lhs.data.data());\n      for (size_t i = 0; i < dim; ++i) {\n        if (std::abs(lhs_data[i] - rhs_data[i]) > 1) {  // reformer\n          LOG_ERROR(\"lhs_data[%zu] = %f, rhs_data[%zu] = %f\", i,\n                    (float)lhs_data[i], i, (float)rhs_data[i]);\n          return false;\n        }\n      }\n      return true;\n    };\n    case DataType::VECTOR_FP16: {\n      size_t dim = lhs.data.size() / sizeof(zvec::float16_t);\n      auto rhs_data = reinterpret_cast<const zvec::float16_t *>(rhs);\n      auto lhs_data =\n          reinterpret_cast<const zvec::float16_t *>(lhs.data.data());\n      for (size_t i = 0; i < dim; ++i) {\n        if (std::abs(lhs_data[i] - rhs_data[i]) > 1e-2) {  // reformer\n          LOG_ERROR(\"lhs_data[%zu] = %f, rhs_data[%zu] = %f\", i,\n                    (float)lhs_data[i], i, (float)rhs_data[i]);\n          return false;\n        }\n      }\n      return true;\n    }\n    default:\n      return memcmp(lhs.data.data(), rhs, lhs.data.size()) == 0;\n  }\n}\n\n\nbool compare_sparse_vector(const SparseVectorBuffer &lhs,\n                           const void *rhs_indices, const void *rhs_values,\n                           DataType data_type) {\n  if (memcmp(lhs.indices.data(), rhs_indices, lhs.indices.size()) != 0) {\n    return false;\n  }\n  size_t dim = lhs.indices.size() / sizeof(uint32_t);\n  switch (data_type) {\n    case DataType::SPARSE_VECTOR_FP32: {\n      auto rhs_values_data = reinterpret_cast<const float *>(rhs_values);\n      auto lhs_values_data = reinterpret_cast<const float *>(lhs.values.data());\n      for (size_t i = 0; i < dim; ++i) {\n        if (std::abs(lhs_values_data[i] - rhs_values_data[i]) >\n            1e-2) {  // reformer\n          LOG_ERROR(\"lhs_values_data[%zu] = %f, rhs_values_data[%zu] = %f\", i,\n                    (float)lhs_values_data[i], i, (float)rhs_values_data[i]);\n          return false;\n        }\n      }\n      return true;\n    }\n    case DataType::SPARSE_VECTOR_FP16: {\n      auto rhs_values_data =\n          reinterpret_cast<const zvec::float16_t *>(rhs_values);\n      auto lhs_values_data =\n          reinterpret_cast<const zvec::float16_t *>(lhs.values.data());\n      for (size_t i = 0; i < dim; ++i) {\n        if (std::abs(lhs_values_data[i] - rhs_values_data[i]) >\n            1e-2) {  // reformer\n          LOG_ERROR(\"lhs_values_data[%zu] = %f, rhs_values_data[%zu] = %f\", i,\n                    (float)lhs_values_data[i], i, (float)rhs_values_data[i]);\n          return false;\n        }\n      }\n      return true;\n    }\n    default:\n      return memcmp(lhs.values.data(), rhs_values, lhs.values.size()) == 0;\n  }\n}\n\n\nTEST(VectorColumnIndexerTest, CosineGeneral) {\n  const std::string index_file_path = \"test_indexer.index\";\n  const int kDim = 20;\n  const int kCount = 20;  // can't set too large, or the qunatization error\n                          // will be too large due to float's precision\n  const int kTopk = 10;\n\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n  system(cmd_buf);\n\n  auto func = [&](const IndexParams::Ptr index_params, DataType data_type) {\n    system(cmd_buf);\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", data_type, kDim, false, index_params));\n    ASSERT_TRUE(indexer);\n\n    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});\n        !ret.ok()) {\n      LOG_ERROR(\"Failed to open indexer: %s\", ret.message().c_str());\n      return;\n    }\n\n    // insert\n    for (int i = 0; i < kCount; ++i) {\n      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.1f);\n      // print_dense_vector(buffer.data.data(), kDim, data_type);\n      auto data = vector_column_params::VectorData{\n          vector_column_params::DenseVector{buffer.data.data()}};\n      ASSERT_TRUE(indexer->Insert(data, i).ok());\n    }\n\n    // fetch\n    for (int i = 0; i < kCount; ++i) {\n      auto fetched_data = indexer->Fetch(i);\n      ASSERT_TRUE(fetched_data);\n      ASSERT_TRUE(compare_dense_vector(\n          create_dense_vector(kDim, data_type, i, kCount, 0.1f),\n          std::get<DenseVectorDataBuffer>(fetched_data->vector_buffer)\n              .data.data(),\n          data_type));\n    }\n\n    // query\n    for (int i = 0; i < kCount; ++i) {\n      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.3f);\n      auto data = vector_column_params::VectorData{\n          vector_column_params::DenseVector{buffer.data.data()}};\n      auto _t = std::make_shared<zvec::HnswQueryParams>(100);\n      _t->set_is_linear(true);\n      auto query_params =\n          vector_column_params::QueryParams{.topk = kTopk,\n                                            .filter = nullptr,\n                                            .fetch_vector = true,\n                                            .query_params = _t};\n      auto results = indexer->Search(data, query_params);\n      ASSERT_TRUE(results.has_value());\n      auto vector_results =\n          dynamic_cast<VectorIndexResults *>(results.value().get());\n      ASSERT_TRUE(vector_results);\n      ASSERT_EQ(vector_results->count(), kTopk);\n      auto iter = vector_results->create_iterator();\n      LOG_INFO(\"===query pk: %d\", i);\n      LOG_INFO(\"query_vector:%s\",\n               print_dense_vector(buffer.data.data(), kDim, data_type).c_str());\n      {  // topk1\n        ASSERT_TRUE(iter->valid());\n        LOG_INFO(\"topk1 pk:%zu\", (size_t)iter->doc_id());\n        LOG_INFO(\"topk1 score:%.10f\", iter->score());\n\n        if (!(iter->score() > -0.01 && iter->score() < 2.01)) {\n          ASSERT_TRUE(iter->score() < 2.01);\n        }\n\n        ASSERT_TRUE(iter->score() < 2.01);\n        ASSERT_TRUE(iter->score() > -0.01);\n\n        auto fetched_vector =\n            std::get<vector_column_params::DenseVector>(iter->vector().vector);\n        LOG_INFO(\n            \"topk1 fetched_vector:%s\",\n            print_dense_vector(fetched_vector.data, kDim, data_type).c_str());\n\n        // ASSERT_EQ(iter->doc_id(), i);\n        ASSERT_TRUE(compare_dense_vector(\n            create_dense_vector(kDim, data_type, iter->doc_id(), kCount, 0.1f),\n            fetched_vector.data, data_type));\n      }\n    }\n    indexer->Destroy();\n  };\n\n  LOG_INFO(\"Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32\");\n  func(std::make_shared<FlatIndexParams>(MetricType::COSINE),\n       DataType::VECTOR_FP32);\n  LOG_INFO(\"Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32\");\n  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100),\n       DataType::VECTOR_FP32);\n  LOG_INFO(\n      \"Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, \"\n      \"QuantizeType::FP16\");\n  func(\n      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::FP16),\n      DataType::VECTOR_FP32);\n  LOG_INFO(\n      \"Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32, \"\n      \"QuantizeType::FP16\");\n  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,\n                                         QuantizeType::FP16),\n       DataType::VECTOR_FP32);\n\n  LOG_INFO(\n      \"Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, \"\n      \"QuantizeType::INT8\");\n  func(\n      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::INT8),\n      DataType::VECTOR_FP32);\n  LOG_INFO(\n      \"Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32, \"\n      \"QuantizeType::INT8\");\n  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,\n                                         QuantizeType::INT8),\n       DataType::VECTOR_FP32);\n\n  LOG_INFO(\n      \"Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, \"\n      \"QuantizeType::INT4\");\n  func(\n      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::INT4),\n      DataType::VECTOR_FP32);\n  LOG_INFO(\n      \"Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32, \"\n      \"QuantizeType::INT4\");\n  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,\n                                         QuantizeType::INT4),\n       DataType::VECTOR_FP32);\n\n  // cosine doesn't support int8/int4 datatype, but support int8/int4 quantizer\n\n  // LOG_INFO(\"Test FlatIndexParams(MetricType::COSINE), VECTOR_FP16\");\n  // func(\n  //     std::make_shared<FlatIndexParams>(MetricType::COSINE,\n  //     QuantizeType::FP16), DataType::VECTOR_FP16);\n  // LOG_INFO(\"Test HnswIndexParams(MetricType::COSINE), VECTOR_FP16\");\n  // func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,\n  //                                        QuantizeType::FP16),\n  //      DataType::VECTOR_FP16);\n}\n\n\nTEST(VectorColumnIndexerTest, Score) {\n  const std::string index_file_path = \"test_indexer.index\";\n  const int kTopk = 10;\n  constexpr idx_t kDocId1 = 2345;\n  constexpr idx_t kDocId2 = 5432;\n  auto vector1 = std::vector<float>{3.0f, 4.0f, 5.0f};\n  auto vector2 = std::vector<float>{1.0f, 20.0f, 3.0f};\n  auto vector_id_map = std::unordered_map<idx_t, std::vector<float>>{\n      {kDocId1, vector1},\n      {kDocId2, vector2},\n  };\n  auto sparse_indices = std::vector<uint32_t>{0, 1, 2};\n  auto query_vector = std::vector<float>{1.0f, 2.0f, 3.0f};\n\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n  system(cmd_buf);\n\n\n  auto check_score = [&](VectorIndexResults *vector_results,\n                         MetricType metric_type) {\n    ASSERT_TRUE(vector_results);\n    ASSERT_EQ(vector_results->count(), 2);\n\n    auto inner_produce_score_func = [&](const std::vector<float> &v1,\n                                        const std::vector<float> &v2) {\n      return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2];\n    };\n\n    auto cosine_score_func = [&](const std::vector<float> &v1,\n                                 const std::vector<float> &v2) {\n      return 1 - inner_produce_score_func(v1, v2) /\n                     (std::sqrt(inner_produce_score_func(v1, v1)) *\n                      std::sqrt(inner_produce_score_func(v2, v2)));\n    };\n\n    // SquaredEuclidean\n    auto l2_score_func = [&](const std::vector<float> &v1,\n                             const std::vector<float> &v2) {\n      assert(v1.size() == 3);\n      assert(v2.size() == 3);\n      float ret = 0.0f;\n      for (size_t i = 0; i < v1.size(); ++i) {\n        ret += (v1[i] - v2[i]) * (v1[i] - v2[i]);\n      }\n      return ret;\n    };\n\n    std::function<float(const std::vector<float> &, const std::vector<float> &)>\n        score_func;\n\n    switch (metric_type) {\n      case MetricType::IP:\n        score_func = inner_produce_score_func;\n        break;\n      case MetricType::COSINE:\n        score_func = cosine_score_func;\n        break;\n      case MetricType::L2:\n        score_func = l2_score_func;\n        break;\n      default:\n        ASSERT_TRUE(false);\n    }\n    auto iter = vector_results->create_iterator();\n    ASSERT_TRUE(iter->valid());\n    printf(\"iter->score() top1: %f\\n\", iter->score());\n    printf(\"score_func(vector_id_map[iter->doc_id()], query_vector): %f\\n\",\n           score_func(vector_id_map[iter->doc_id()], query_vector));\n    ASSERT_TRUE(\n        std::abs(iter->score() - score_func(vector_id_map[iter->doc_id()],\n                                            query_vector)) < 1e-2);\n    iter->next();\n    ASSERT_TRUE(iter->valid());\n    printf(\"iter->score() top2: %f\\n\", iter->score());\n    printf(\"score_func(vector_id_map[iter->doc_id()], query_vector): %f\\n\",\n           score_func(vector_id_map[iter->doc_id()], query_vector));\n    ASSERT_TRUE(\n        std::abs(iter->score() - score_func(vector_id_map[iter->doc_id()],\n                                            query_vector)) < 1e-2);\n  };\n\n  auto dense_func = [&](const std::shared_ptr<VectorIndexParams>\n                            &index_params) {\n    auto metric_type = index_params->metric_type();\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false, index_params));\n    ASSERT_TRUE(indexer);\n\n    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});\n        !ret.ok()) {\n      LOG_ERROR(\"Failed to open indexer: %s\", ret.message().c_str());\n      ASSERT_TRUE(false);\n    }\n\n    ASSERT_TRUE(indexer\n                    ->Insert(\n                        vector_column_params::VectorData{\n                            vector_column_params::DenseVector{vector1.data()}},\n                        kDocId1)\n                    .ok());\n    ASSERT_TRUE(indexer\n                    ->Insert(\n                        vector_column_params::VectorData{\n                            vector_column_params::DenseVector{vector2.data()}},\n                        kDocId2)\n                    .ok());\n\n    auto query = vector_column_params::VectorData{\n        vector_column_params::DenseVector{.data = query_vector.data()}};\n    auto query_params = vector_column_params::QueryParams{\n        .topk = kTopk, .filter = nullptr, .fetch_vector = true};\n    auto results = indexer->Search(query, query_params);\n    ASSERT_TRUE(results.has_value());\n\n    check_score(dynamic_cast<VectorIndexResults *>(results.value().get()),\n                metric_type);\n\n    indexer->Destroy();\n  };\n\n  auto sparse_func = [&](const std::shared_ptr<VectorIndexParams>\n                             &index_params) {\n    auto metric_type = index_params->metric_type();\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::SPARSE_VECTOR_FP32, false, index_params));\n    ASSERT_TRUE(indexer);\n\n    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});\n        !ret.ok()) {\n      LOG_ERROR(\"Failed to open indexer: %s\", ret.message().c_str());\n      ASSERT_TRUE(false);\n    }\n\n    ASSERT_TRUE(\n        indexer\n            ->Insert(\n                vector_column_params::VectorData{\n                    vector_column_params::SparseVector{\n                        3,\n                        reinterpret_cast<const void *>(sparse_indices.data()),\n                        vector1.data()}},\n                kDocId1)\n            .ok());\n    ASSERT_TRUE(\n        indexer\n            ->Insert(\n                vector_column_params::VectorData{\n                    vector_column_params::SparseVector{\n                        3,\n                        reinterpret_cast<const void *>(sparse_indices.data()),\n                        vector2.data()}},\n                kDocId2)\n            .ok());\n\n    auto query =\n        vector_column_params::VectorData{vector_column_params::SparseVector{\n            3, reinterpret_cast<const void *>(sparse_indices.data()),\n            query_vector.data()}};\n    auto query_params = vector_column_params::QueryParams{\n        .topk = 10, .filter = nullptr, .fetch_vector = true};\n    auto results = indexer->Search(query, query_params);\n    ASSERT_TRUE(results.has_value());\n\n    check_score(dynamic_cast<VectorIndexResults *>(results.value().get()),\n                metric_type);\n    indexer->Destroy();\n  };\n\n  LOG_INFO(\"Test DenseVector, MetricType::IP\");\n  dense_func(std::make_shared<FlatIndexParams>(MetricType::IP));\n  dense_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));\n  LOG_INFO(\"Test DenseVector, MetricType::IP, QuantizeType::FP16\");\n  dense_func(\n      std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16));\n  dense_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                               QuantizeType::FP16));\n\n  LOG_INFO(\"Test DenseVector, MetricType::COSINE\");\n  dense_func(std::make_shared<FlatIndexParams>(MetricType::COSINE));\n  dense_func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100));\n\n  LOG_INFO(\"Test DenseVector, MetricType::COSINE, QuantizeType::FP16\");\n  dense_func(std::make_shared<FlatIndexParams>(MetricType::COSINE,\n                                               QuantizeType::FP16));\n  dense_func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,\n                                               QuantizeType::FP16));\n\n  LOG_INFO(\"Test DenseVector, MetricType::L2\");\n  dense_func(std::make_shared<FlatIndexParams>(MetricType::L2));\n  dense_func(std::make_shared<HnswIndexParams>(MetricType::L2, 10, 100));\n  LOG_INFO(\"Test DenseVector, MetricType::L2, QuantizeType::FP16\");\n  dense_func(\n      std::make_shared<FlatIndexParams>(MetricType::L2, QuantizeType::FP16));\n  dense_func(std::make_shared<HnswIndexParams>(MetricType::L2, 10, 100,\n                                               QuantizeType::FP16));\n\n  LOG_INFO(\"Test SparseVector, MetricType::IP\");\n  sparse_func(std::make_shared<FlatIndexParams>(MetricType::IP));\n  sparse_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));\n  LOG_INFO(\"Test SparseVector, MetricType::IP, QuantizeType::FP16\");\n  sparse_func(\n      std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16));\n  sparse_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                                QuantizeType::FP16));\n}\n\nTEST(VectorColumnIndexerTest, Failure) {\n  const std::string index_file_path = \"test_indexer_failure.index\";\n  constexpr idx_t kDocId = 1234;\n  auto vector = std::vector<float>{1.0f, 2.0f, 3.0f};\n\n  char cmd_buf[100];\n  snprintf(cmd_buf, 100, \"rm -f %s\", index_file_path.c_str());\n  system(cmd_buf);\n\n  // Test case 1: Operations on unopened indexer\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer);\n\n    // Test Flush on unopened indexer\n    auto flush_result = indexer->Flush();\n    ASSERT_FALSE(flush_result.ok());\n    ASSERT_EQ(flush_result.message(), \"Index not opened\");\n\n    // Test Close on unopened indexer\n    auto close_result = indexer->Close();\n    ASSERT_FALSE(close_result.ok());\n    ASSERT_EQ(close_result.message(), \"Index not opened\");\n\n    // Test Destroy on unopened indexer\n    auto destroy_result = indexer->Destroy();\n    ASSERT_FALSE(destroy_result.ok());\n    ASSERT_EQ(destroy_result.message(), \"Index not opened\");\n\n    // Test Insert on unopened indexer\n    auto data = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector.data()}};\n    auto insert_result = indexer->Insert(data, kDocId);\n    ASSERT_FALSE(insert_result.ok());\n    ASSERT_EQ(insert_result.message(), \"Index not opened\");\n\n    // Test Fetch on unopened indexer\n    auto fetch_result = indexer->Fetch(kDocId);\n    ASSERT_FALSE(fetch_result.has_value());\n    ASSERT_EQ(fetch_result.error().message(), \"Index not opened\");\n\n    // Test Search on unopened indexer\n    auto query = vector_column_params::VectorData{\n        vector_column_params::DenseVector{.data = vector.data()}};\n    auto query_params = vector_column_params::QueryParams{\n        .topk = 10, .filter = nullptr, .fetch_vector = false};\n    auto search_result = indexer->Search(query, query_params);\n    ASSERT_FALSE(search_result.has_value());\n    ASSERT_EQ(search_result.error().message(), \"Index not opened\");\n\n    // Test Merge on unopened indexer\n    auto merge_result = indexer->Merge({}, nullptr);\n    ASSERT_FALSE(merge_result.ok());\n    ASSERT_EQ(merge_result.message(), \"Index not opened\");\n  }\n\n  // Test case 2: Unsupported engine name\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)),\n        \"unsupported_engine\");\n    ASSERT_TRUE(indexer);\n\n    auto open_result =\n        indexer->Open(vector_column_params::ReadOptions{true, true});\n    ASSERT_FALSE(open_result.ok());\n    ASSERT_EQ(open_result.message(), \"Engine name not supported\");\n  }\n\n  // Test case 3: Invalid field schema (nullptr index_params)\n  {\n    FieldSchema invalid_schema(\"test\", DataType::VECTOR_FP32, 3, false,\n                               nullptr);\n    auto indexer =\n        std::make_shared<VectorColumnIndexer>(index_file_path, invalid_schema);\n    ASSERT_TRUE(indexer);\n\n    auto open_result =\n        indexer->Open(vector_column_params::ReadOptions{true, true});\n    ASSERT_FALSE(open_result.ok());\n    ASSERT_EQ(open_result.message(), \"field_schema.index_params nullptr\");\n  }\n\n  // Test case 4: Unsupported data type in engine helper\n  {\n    // Create a mock index params with unsupported data type\n    // We'll use a data type that's not supported by convert_to_engine_data_type\n    FieldSchema unsupported_schema(\n        \"test\", DataType::UNDEFINED, 3, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP));\n    auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,\n                                                         unsupported_schema);\n    ASSERT_TRUE(indexer);\n\n    auto open_result =\n        indexer->Open(vector_column_params::ReadOptions{true, true});\n    ASSERT_FALSE(open_result.ok());\n    ASSERT_EQ(open_result.message(),\n              \"failed to build index param: unsupported data type\");\n  }\n\n  // Test case 5: Unsupported metric type in engine helper\n  {\n    FieldSchema unsupported_schema(\n        \"test\", DataType::VECTOR_FP32, 3, false,\n        std::make_shared<FlatIndexParams>(MetricType::UNDEFINED));\n    auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,\n                                                         unsupported_schema);\n    ASSERT_TRUE(indexer);\n\n    auto open_result =\n        indexer->Open(vector_column_params::ReadOptions{true, true});\n    ASSERT_FALSE(open_result.ok());\n    ASSERT_EQ(open_result.message(),\n              \"failed to build index param: unsupported metric type\");\n  }\n\n  // Test case 6: Unsupported quantize type in engine helper\n  {\n    auto index_params = std::make_shared<FlatIndexParams>(MetricType::IP);\n    index_params->set_quantize_type(static_cast<QuantizeType>(999));\n\n\n    FieldSchema unsupported_schema(\"test\", DataType::VECTOR_FP32, 3, false,\n                                   index_params);\n    auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,\n                                                         unsupported_schema);\n    ASSERT_TRUE(indexer);\n\n    auto open_result =\n        indexer->Open(vector_column_params::ReadOptions{true, true});\n    ASSERT_FALSE(open_result.ok());\n    ASSERT_EQ(open_result.message(),\n              \"failed to build index param: unsupported quantize type\");\n  }\n\n  // // Test case 7: Unsupported index type in engine helper\n  // {\n  //   // Create a custom index params with unsupported index type\n  //   class UnsupportedIndexTypeParams : public FlatIndexParams {\n  //    public:\n  //     UnsupportedIndexTypeParams() : FlatIndexParams(MetricType::IP) {}\n  //     void mock() {\n  //       type_ = static_cast<IndexType>(999);\n  //     }\n  //   };\n  //   auto index_params = std::make_shared<UnsupportedIndexTypeParams>();\n  //   index_params->mock();\n  //   FieldSchema unsupported_schema(\"test\", DataType::VECTOR_FP32, 3, false,\n  //                                  index_params);\n  //   auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,\n  //                                                        unsupported_schema);\n  //   ASSERT_TRUE(indexer);\n  //\n  //   auto open_result =\n  //       indexer->Open(vector_column_params::ReadOptions{true, true});\n  //   ASSERT_FALSE(open_result.ok());\n  //   ASSERT_EQ(open_result.message(), \"not supported\");\n  // }\n\n  // Test case 8: bf_pks size > 1 error\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer);\n\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    // Insert some data first\n    auto data = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector.data()}};\n    ASSERT_TRUE(indexer->Insert(data, kDocId).ok());\n\n    // Test search with bf_pks size > 1\n    auto query = vector_column_params::VectorData{\n        vector_column_params::DenseVector{.data = vector.data()}};\n    auto bf_pks1 = std::vector<uint64_t>{1, 2};\n    auto bf_pks2 = std::vector<uint64_t>{3, 4};\n    auto query_params =\n        vector_column_params::QueryParams{.topk = 10,\n                                          .filter = nullptr,\n                                          .fetch_vector = false,\n                                          .bf_pks = {bf_pks1, bf_pks2}};\n\n    auto search_result = indexer->Search(query, query_params);\n    ASSERT_FALSE(search_result.has_value());\n    ASSERT_EQ(search_result.error().message(),\n              \"bf_pks size > 1 is not supported\");\n\n    indexer->Destroy();\n  }\n\n  // Test case 9: Invalid field schema for query param conversion\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false, nullptr));\n    ASSERT_TRUE(indexer);\n\n    ASSERT_FALSE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n  }\n\n  // Test case 10: use_mmap = false\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer);\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true, false})\n            .ok());\n    // Insert some data first\n    auto data = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector.data()}};\n    ASSERT_TRUE(indexer->Insert(data, kDocId).ok());\n    ASSERT_TRUE(indexer->Flush().ok());\n    ASSERT_TRUE(indexer->Close().ok());\n    {\n      auto indexer = std::make_shared<VectorColumnIndexer>(\n          index_file_path,\n          FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                      std::make_shared<FlatIndexParams>(MetricType::IP)));\n      ASSERT_TRUE(indexer);\n      auto open_result =\n          indexer->Open(vector_column_params::ReadOptions{false, false, true});\n      ASSERT_TRUE(open_result.ok());\n      indexer->Destroy();\n    }\n  }\n\n  // Test case 11: Index already opened error\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer);\n\n    // First open should succeed\n    auto open_result1 =\n        indexer->Open(vector_column_params::ReadOptions{true, true});\n    ASSERT_TRUE(open_result1.ok());\n\n    // Second open should fail\n    auto open_result2 =\n        indexer->Open(vector_column_params::ReadOptions{true, true});\n    ASSERT_FALSE(open_result2.ok());\n    ASSERT_EQ(open_result2.message(), \"Index already opened\");\n\n    indexer->Destroy();\n  }\n\n  // Test case 12: Test doc_count() on unopened indexer\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer);\n\n    // doc_count() should return -1 for unopened indexer\n    ASSERT_EQ(indexer->doc_count(), static_cast<size_t>(-1));\n  }\n\n  // Test case 13: Test Merge with empty indexers list\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer);\n\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    // Merge with empty indexers list should succeed\n    auto merge_result = indexer->Merge({}, nullptr);\n    ASSERT_TRUE(merge_result.ok());\n\n    indexer->Destroy();\n  }\n\n  // Test case 14: Test Merge with same index file path (should be skipped)\n  {\n    auto indexer1 = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer1);\n\n    ASSERT_TRUE(\n        indexer1->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    // Insert some data\n    auto data = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector.data()}};\n    ASSERT_TRUE(indexer1->Insert(data, kDocId).ok());\n\n    // Merge with itself (same index file path) should succeed (skipped)\n    auto merge_result = indexer1->Merge({indexer1}, nullptr);\n    ASSERT_TRUE(merge_result.ok());\n\n    indexer1->Destroy();\n  }\n\n  // Test case 15: Test Fetch with non-existent doc_id\n  {\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", DataType::VECTOR_FP32, 3, false,\n                    std::make_shared<FlatIndexParams>(MetricType::IP)));\n    ASSERT_TRUE(indexer);\n\n    ASSERT_TRUE(\n        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n\n    // Fetch non-existent doc_id should fail\n    auto fetch_result = indexer->Fetch(99999);\n    ASSERT_FALSE(fetch_result.has_value());\n    ASSERT_EQ(fetch_result.error().message(),\n              \"Failed to fetch vector from index\");\n\n    indexer->Destroy();\n  }\n\n  // // Test case 16: Test Search with invalid query params (unsupported index\n  // // type)\n  // {\n  //   // Create a custom index params with unsupported index type for query\n  //   class UnsupportedQueryIndexParams : public IndexParams {\n  //    public:\n  //     IndexType type() const override {\n  //       return static_cast<IndexType>(999);\n  //     }\n  //     MetricType metric_type() const override {\n  //       return MetricType::IP;\n  //     }\n  //     QuantizeType quantize_type() const override {\n  //       return QuantizeType::UNDEFINED;\n  //     }\n  //     IndexParams::Ptr clone() const override {\n  //       return std::make_shared<UnsupportedQueryIndexParams>();\n  //     }\n  //   };\n  //\n  //   FieldSchema unsupported_schema(\n  //       \"test\", DataType::VECTOR_FP32, 3, false,\n  //       std::make_shared<UnsupportedQueryIndexParams>());\n  //   auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,\n  //                                                        unsupported_schema);\n  //   ASSERT_TRUE(indexer);\n  //\n  //   ASSERT_TRUE(\n  //       indexer->Open(vector_column_params::ReadOptions{true, true}).ok());\n  //\n  //   // Insert some data first\n  //   auto data = vector_column_params::VectorData{\n  //       vector_column_params::DenseVector{vector.data()}};\n  //   ASSERT_TRUE(indexer->Insert(data, kDocId).ok());\n  //\n  //   // Test search with unsupported index type\n  //   auto query = vector_column_params::VectorData{\n  //       vector_column_params::DenseVector{.data = vector.data()}};\n  //   auto query_params = vector_column_params::QueryParams{\n  //       .topk = 10, .filter = nullptr, .fetch_vector = false};\n  //\n  //   auto search_result = indexer->Search(query, query_params);\n  //   ASSERT_FALSE(search_result.has_value());\n  //   ASSERT_EQ(search_result.error().message(), \"not supported\");\n  //\n  //   indexer->Close();\n  // }\n\n  system(cmd_buf);\n}\n\nTEST(VectorColumnIndexerTest, CosineMerge) {\n  constexpr uint32_t kDimension = 64;\n  const std::string index_name{\"test_indexer.index\"};\n\n  auto del_index_file_func = [&](const std::string file_name) {\n    auto cmd_buf = \"rm -f \" + file_name;\n    system(cmd_buf.c_str());\n  };\n\n  auto create_indexer_func =\n      [&](const IndexParams::Ptr &index_params,\n          const std::string &index_name) -> VectorColumnIndexer::Ptr {\n    del_index_file_func(index_name);\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_name, FieldSchema(\"test\", DataType::VECTOR_FP32, kDimension,\n                                false, index_params));\n    if (indexer == nullptr ||\n        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {\n      return nullptr;\n    }\n    return indexer;\n  };\n\n  auto func = [&](const IndexParams::Ptr &param1,\n                  const IndexParams::Ptr &param2,\n                  const IndexParams::Ptr &param3) {\n    auto indexer1 = create_indexer_func(param1, index_name + \"1\");\n    ASSERT_NE(nullptr, indexer1);\n    auto indexer2 = create_indexer_func(param2, index_name + \"2\");\n    ASSERT_NE(nullptr, indexer2);\n\n    std::vector<float> vector(kDimension);\n    vector[1] = 1.0f;\n    vector[2] = 123.0f;\n    auto vector_data = vector_column_params::VectorData{\n        vector_column_params::DenseVector{vector.data()}};\n    ASSERT_TRUE(indexer1->Insert(vector_data, 0).ok());\n\n    vector[1] = 2.0f;\n    ASSERT_TRUE(indexer2->Insert(vector_data, 0).ok());\n    vector[1] = 3.0f;\n    ASSERT_TRUE(indexer2->Insert(vector_data, 1).ok());\n\n    {\n      auto fetched_data = indexer1->Fetch(0);\n      ASSERT_TRUE(fetched_data.has_value());\n      const float *fetched_vector = reinterpret_cast<const float *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      LOG_INFO(\n          \"indexer1 fetched_vector doc_id:0:%s\",\n          print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32).c_str());\n      ASSERT_TRUE(fetched_vector[1] - 1.0f < 1e-2);\n      ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);\n    }\n    {\n      auto fetched_data = indexer2->Fetch(0);\n      ASSERT_TRUE(fetched_data.has_value());\n      const float *fetched_vector = reinterpret_cast<const float *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      LOG_INFO(\n          \"indexer2 fetched_vector doc_id:0:%s\",\n          print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32).c_str());\n      ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);\n      ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);\n    }\n    {\n      auto fetched_data = indexer2->Fetch(1);\n      ASSERT_TRUE(fetched_data.has_value());\n      const float *fetched_vector = reinterpret_cast<const float *>(\n          std::get<vector_column_params::DenseVectorBuffer>(\n              fetched_data->vector_buffer)\n              .data.data());\n      LOG_INFO(\n          \"indexer2 fetched_vector doc_id:1:%s\",\n          print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32).c_str());\n      ASSERT_TRUE(fetched_vector[1] - 3.0f < 1e-2);\n      ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);\n    }\n\n    // {  // test reduce\n    //   auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n    //   ASSERT_NE(nullptr, indexer3);\n    //   ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, nullptr).ok());\n    //   {\n    //     auto fetched_data = indexer3->Fetch(0);\n    //     ASSERT_TRUE(fetched_data.has_value());\n    //     const float *fetched_vector = reinterpret_cast<const float *>(\n    //         std::get<vector_column_params::DenseVectorBuffer>(\n    //             fetched_data->vector_buffer)\n    //             .data.data());\n    //     LOG_INFO(\"indexer3 fetched_vector doc_id:0:%s\",\n    //              print_dense_vector(fetched_vector, 3,\n    //              DataType::VECTOR_FP32)\n    //                  .c_str());\n    //     ASSERT_TRUE(fetched_vector[1] - 1.0f < 1e-2);\n    //     ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);\n    //   }\n    //   {\n    //     auto fetched_data = indexer3->Fetch(1);\n    //     ASSERT_TRUE(fetched_data.has_value());\n    //     const float *fetched_vector = reinterpret_cast<const float *>(\n    //         std::get<vector_column_params::DenseVectorBuffer>(\n    //             fetched_data->vector_buffer)\n    //             .data.data());\n    //     LOG_INFO(\"indexer3 fetched_vector doc_id:1:%s\",\n    //              print_dense_vector(fetched_vector, 3,\n    //              DataType::VECTOR_FP32)\n    //                  .c_str());\n    //     ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);\n    //     ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);\n    //   }\n    //   indexer3->Close();\n    //   del_index_file_func(index_name + \"3\");\n    // }\n    //\n    {  // test reduce with filter\n      auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n      ASSERT_NE(nullptr, indexer3);\n      auto filter = std::make_shared<EasyIndexFilter>(\n          [](uint64_t key) { return key == 0; });\n      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter).ok());\n      // 0.0 -> x ; 1.0 -> 0 ; 1.1 -> 1\n      ASSERT_TRUE(indexer3->doc_count() == 2);\n      {\n        auto fetched_data = indexer3->Fetch(0);\n        ASSERT_TRUE(fetched_data.has_value());\n        const float *fetched_vector = reinterpret_cast<const float *>(\n            std::get<vector_column_params::DenseVectorBuffer>(\n                fetched_data->vector_buffer)\n                .data.data());\n        LOG_INFO(\"indexer3 fetched_vector doc_id:0:%s\",\n                 print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32)\n                     .c_str());\n        ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);\n        ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);\n      }\n\n      {\n        vector[1] = 3.0f;\n        // search with fetch vector\n        auto query = vector_column_params::VectorData{\n            vector_column_params::DenseVector{vector.data()}};\n        auto query_params = vector_column_params::QueryParams{\n            .topk = 10, .filter = nullptr, .fetch_vector = true};\n        auto results = indexer2->Search(query, query_params);\n        ASSERT_TRUE(results.has_value());\n        auto vector_results =\n            dynamic_cast<VectorIndexResults *>(results.value().get());\n        ASSERT_TRUE(vector_results);\n        ASSERT_EQ(vector_results->count(), 2);\n        auto iter = vector_results->create_iterator();\n        ASSERT_TRUE(iter->valid());\n\n        {\n          int doc_idx = 0;\n          auto query_results_doc = vector_results->docs()[doc_idx];\n          LOG_INFO(\"topk%d pk: %zu\", doc_idx, (size_t)query_results_doc.key());\n          LOG_INFO(\"topk%d score: %.10f\", doc_idx, query_results_doc.score());\n          LOG_INFO(\"topk%d fetched_vector - reverted:%s\", doc_idx,\n                   print_dense_vector(\n                       vector_results->reverted_vector_list()[doc_idx].data(),\n                       kDimension, DataType::VECTOR_FP32)\n                       .c_str());\n          LOG_INFO(\"topk%d fetched_vector - original:%s\", doc_idx,\n                   print_dense_vector(query_results_doc.vector(), kDimension,\n                                      DataType::VECTOR_FP16)\n                       .c_str());\n          ASSERT_TRUE(query_results_doc.score() < 2.01);\n          ASSERT_TRUE(query_results_doc.score() > -0.01);\n        }\n        {\n          int doc_idx = 1;\n          auto query_results_doc = vector_results->docs()[doc_idx];\n          LOG_INFO(\"topk%d pk: %zu\", doc_idx, (size_t)query_results_doc.key());\n          LOG_INFO(\"topk%d score: %.10f\", doc_idx, query_results_doc.score());\n          LOG_INFO(\"topk%d fetched_vector - reverted:%s\", doc_idx,\n                   print_dense_vector(\n                       vector_results->reverted_vector_list()[doc_idx].data(),\n                       kDimension, DataType::VECTOR_FP32)\n                       .c_str());\n          LOG_INFO(\"topk%d fetched_vector - original:%s\", doc_idx,\n                   print_dense_vector(query_results_doc.vector(), kDimension,\n                                      DataType::VECTOR_FP16)\n                       .c_str());\n          ASSERT_TRUE(query_results_doc.score() < 2.01);\n          ASSERT_TRUE(query_results_doc.score() > -0.01);\n        }\n        // ASSERT_TRUE(vector_results->docs()[0].key() == 1);\n      }\n\n      indexer3->Close();\n      del_index_file_func(index_name + \"3\");\n    }\n    //\n    // {  // test reduce with filter in parallel\n    //   auto indexer3 = create_indexer_func(param3, index_name + \"3\");\n    //   ASSERT_NE(nullptr, indexer3);\n    //   auto filter = std::make_shared<EasyIndexFilter>(\n    //       [](uint64_t key) { return key == 0; });\n    //   ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter, {3}).ok());\n    //\n    //   {\n    //     auto fetched_data = indexer3->Fetch(0);\n    //     ASSERT_TRUE(fetched_data.has_value());\n    //     const float *fetched_vector = reinterpret_cast<const float *>(\n    //         std::get<vector_column_params::DenseVectorBuffer>(\n    //             fetched_data->vector_buffer)\n    //             .data.data());\n    //     LOG_INFO(\"indexer3 fetched_vector doc_id:0:%s\",\n    //              print_dense_vector(fetched_vector, 3,\n    //              DataType::VECTOR_FP32)\n    //                  .c_str());\n    //     ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);\n    //     ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);\n    //   }\n    //   indexer3->Close();\n    //   del_index_file_func(index_name + \"3\");\n    // }\n\n\n    indexer1->Close();\n    indexer2->Close();\n    del_index_file_func(index_name + \"1\");\n    del_index_file_func(index_name + \"2\");\n  };\n\n  // same index with different quantize type\n  {\n    LOG_INFO(\"Merge: same index - FlatIndex with different quantize type\");\n    auto metric_type = MetricType::COSINE;\n    auto param_flat = std::make_shared<FlatIndexParams>(metric_type);\n    auto param_flat_fp16 =\n        std::make_shared<FlatIndexParams>(metric_type, QuantizeType::FP16);\n    auto param_hnsw = std::make_shared<HnswIndexParams>(metric_type, 10, 100);\n    auto param_hnsw_fp16 = std::make_shared<HnswIndexParams>(\n        metric_type, 10, 100, QuantizeType::FP16);\n    // func(param, param_fp16, param_fp16);\n    // func(param, param_fp16, param);\n    // func(param_fp16, param, param_fp16);\n    // func(param_fp16, param, param);\n    // func(param_fp16, param_fp16, param_fp16);\n    func(param_hnsw_fp16, param_flat_fp16, param_flat_fp16);\n  }\n}\n\nTEST(VectorColumnIndexerTest, Refiner) {\n  const std::string kIndexFilePath = \"test_indexer.index\";\n  const int kDim = 20;\n  const int kCount = 20;  // can't set too large, or the qunatization error\n                          // will be too large due to float's precision\n  const int kTopk = 10;\n\n  auto del_index_file_func = [&](const std::string &file_name) {\n    auto cmd_buf = \"rm -f \" + file_name;\n    system(cmd_buf.c_str());\n  };\n\n  auto create_indexer_func =\n      [&](const IndexParams::Ptr &index_params,\n          const std::string &index_file_path,\n          DataType data_type) -> VectorColumnIndexer::Ptr {\n    del_index_file_func(index_file_path);\n    auto indexer = std::make_shared<VectorColumnIndexer>(\n        index_file_path,\n        FieldSchema(\"test\", data_type, kDim, false, index_params));\n    if (indexer == nullptr ||\n        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {\n      return nullptr;\n    }\n    return indexer;\n  };\n\n  auto func = [&](const IndexParams::Ptr &index_params,\n                  const IndexParams::Ptr &reference_index_params,\n                  DataType data_type) {\n    auto indexer = create_indexer_func(index_params, kIndexFilePath, data_type);\n    if (indexer == nullptr) {\n      return;\n    }\n    auto reference_indexer = create_indexer_func(\n        reference_index_params, kIndexFilePath + \"_reference\", data_type);\n    if (reference_indexer == nullptr) {\n      return;\n    }\n\n    // insert\n    for (int i = 0; i < kCount; ++i) {\n      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.1f);\n      // print_dense_vector(buffer.data.data(), kDim, data_type);\n      auto data = vector_column_params::VectorData{\n          vector_column_params::DenseVector{buffer.data.data()}};\n      ASSERT_TRUE(indexer->Insert(data, i).ok());\n      ASSERT_TRUE(reference_indexer->Insert(data, i).ok());\n    }\n\n    // query\n    for (int i = 0; i < kCount; ++i) {\n      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.3f);\n      auto data = vector_column_params::VectorData{\n          vector_column_params::DenseVector{buffer.data.data()}};\n      ;\n      auto query_params = vector_column_params::QueryParams{\n          .topk = kTopk,\n          .filter = nullptr,\n          .fetch_vector = true,\n          .query_params = std::make_shared<zvec::HnswQueryParams>(100),\n          .refiner_param = std::make_shared<vector_column_params::RefinerParam>(\n              vector_column_params::RefinerParam{\n                  .scale_factor_ = 10,\n                  .reference_indexer = reference_indexer})};\n      auto results = indexer->Search(data, query_params);\n      ASSERT_TRUE(results.has_value());\n      auto vector_results =\n          dynamic_cast<VectorIndexResults *>(results.value().get());\n      ASSERT_TRUE(vector_results);\n      ASSERT_EQ(vector_results->count(), kTopk);\n      auto iter = vector_results->create_iterator();\n      LOG_INFO(\"===query pk: %d\", i);\n      LOG_INFO(\"query_vector:%s\",\n               print_dense_vector(buffer.data.data(), kDim, data_type).c_str());\n    }\n    indexer->Destroy();\n  };\n\n  LOG_INFO(\n      \"Test FlatIndexParams(MetricType::IP), VECTOR_FP32, \"\n      \"QuantizeType::FP16\");\n\n  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,\n                                         QuantizeType::FP16),\n       std::make_shared<FlatIndexParams>(MetricType::IP),\n       DataType::VECTOR_FP32);\n\n  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16),\n       std::make_shared<FlatIndexParams>(MetricType::IP),\n       DataType::VECTOR_FP32);\n\n  LOG_INFO(\n      \"Test FlatIndexParams(MetricType::MIPSL2), VECTOR_FP32, \"\n      \"QuantizeType::FP16\");\n\n  func(std::make_shared<HnswIndexParams>(MetricType::MIPSL2, 10, 100,\n                                         QuantizeType::FP16),\n       std::make_shared<FlatIndexParams>(MetricType::IP),\n       DataType::VECTOR_FP32);\n\n  func(\n      std::make_shared<FlatIndexParams>(MetricType::MIPSL2, QuantizeType::FP16),\n      std::make_shared<FlatIndexParams>(MetricType::IP), DataType::VECTOR_FP32);\n\n  LOG_INFO(\n      \"Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, \"\n      \"QuantizeType::FP16\");\n  func(\n      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::FP16),\n      std::make_shared<FlatIndexParams>(MetricType::COSINE),\n      DataType::VECTOR_FP32);\n\n  LOG_INFO(\n      \"Test FlatIndexParams(MetricType::L2), VECTOR_FP32, \"\n      \"QuantizeType::Int8\");\n  func(std::make_shared<FlatIndexParams>(MetricType::L2, QuantizeType::INT8),\n       std::make_shared<FlatIndexParams>(MetricType::L2),\n       DataType::VECTOR_FP32);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/common/db_proto_converter_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include \"db/index/common/proto_converter.h\"\n#include \"db/index/common/type_helper.h\"\n\nusing namespace zvec;\n\nTEST(ConverterTest, InvertIndexParamsConversion) {\n  // Test conversion from protobuf to C++ InvertIndexParams\n  proto::InvertIndexParams invert_pb;\n  invert_pb.set_enable_range_optimization(true);\n\n  auto invert_params = ProtoConverter::FromPb(invert_pb);\n  ASSERT_NE(invert_params, nullptr);\n  EXPECT_TRUE(invert_params->enable_range_optimization());\n  EXPECT_EQ(invert_params->type(), IndexType::INVERT);\n\n  // Test with false value\n  proto::InvertIndexParams invert_pb2;\n  invert_pb2.set_enable_range_optimization(false);\n\n  auto invert_params2 = ProtoConverter::FromPb(invert_pb2);\n  ASSERT_NE(invert_params2, nullptr);\n  EXPECT_FALSE(invert_params2->enable_range_optimization());\n\n  // Test conversion from C++ to protobuf\n  InvertIndexParams original_params(true);\n  auto pb_result = ProtoConverter::ToPb(&original_params);\n  EXPECT_TRUE(pb_result.enable_range_optimization());\n}\n\nTEST(ConverterTest, HnswIndexParamsConversion) {\n  // Test conversion from protobuf to C++ HnswIndexParams\n  proto::HnswIndexParams hnsw_pb;\n  auto *base_params = hnsw_pb.mutable_base();\n  base_params->set_metric_type(proto::MT_L2);\n  base_params->set_quantize_type(proto::QT_FP16);\n  hnsw_pb.set_m(16);\n  hnsw_pb.set_ef_construction(100);\n\n  auto hnsw_params = ProtoConverter::FromPb(hnsw_pb);\n  ASSERT_NE(hnsw_params, nullptr);\n  EXPECT_EQ(hnsw_params->metric_type(), MetricType::L2);\n  EXPECT_EQ(hnsw_params->m(), 16);\n  EXPECT_EQ(hnsw_params->ef_construction(), 100);\n  EXPECT_EQ(hnsw_params->quantize_type(), QuantizeType::FP16);\n  EXPECT_EQ(hnsw_params->type(), IndexType::HNSW);\n\n  // Test conversion from C++ to protobuf\n  HnswIndexParams original_params(MetricType::IP, 32, 200, QuantizeType::INT8);\n  auto pb_result = ProtoConverter::ToPb(&original_params);\n  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_IP);\n  EXPECT_EQ(pb_result.m(), 32);\n  EXPECT_EQ(pb_result.ef_construction(), 200);\n  EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_INT8);\n}\n\nTEST(ConverterTest, FlatIndexParamsConversion) {\n  // Test conversion from protobuf to C++ FlatIndexParams\n  proto::FlatIndexParams flat_pb;\n  auto *base_params = flat_pb.mutable_base();\n  base_params->set_metric_type(proto::MT_COSINE);\n  base_params->set_quantize_type(proto::QT_INT4);\n\n  auto flat_params = ProtoConverter::FromPb(flat_pb);\n  ASSERT_NE(flat_params, nullptr);\n  EXPECT_EQ(flat_params->metric_type(), MetricType::COSINE);\n  EXPECT_EQ(flat_params->quantize_type(), QuantizeType::INT4);\n  EXPECT_EQ(flat_params->type(), IndexType::FLAT);\n\n  // Test conversion from C++ to protobuf\n  FlatIndexParams original_params(MetricType::L2, QuantizeType::FP16);\n  auto pb_result = ProtoConverter::ToPb(&original_params);\n  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_L2);\n  EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_FP16);\n}\n\nTEST(ConverterTest, IVFIndexParamsConversion) {\n  // Test conversion from protobuf to C++ IVFIndexParams\n  proto::IVFIndexParams ivf_pb;\n  auto *base_params = ivf_pb.mutable_base();\n  base_params->set_metric_type(proto::MT_IP);\n  base_params->set_quantize_type(proto::QT_INT8);\n  ivf_pb.set_n_list(128);\n\n  auto ivf_params = ProtoConverter::FromPb(ivf_pb);\n  ASSERT_NE(ivf_params, nullptr);\n  EXPECT_EQ(ivf_params->metric_type(), MetricType::IP);\n  EXPECT_EQ(ivf_params->n_list(), 128);\n  EXPECT_EQ(ivf_params->quantize_type(), QuantizeType::INT8);\n  EXPECT_EQ(ivf_params->type(), IndexType::IVF);\n\n  // Test conversion from C++ to protobuf\n  IVFIndexParams original_params(MetricType::COSINE, 256, 10, false,\n                                 QuantizeType::INT4);\n  auto pb_result = ProtoConverter::ToPb(&original_params);\n  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_COSINE);\n  EXPECT_EQ(pb_result.n_list(), 256);\n  EXPECT_EQ(pb_result.n_iters(), 10);\n  EXPECT_FALSE(pb_result.use_soar());\n  EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_INT4);\n}\n\nTEST(ConverterTest, IndexParamsConversion) {\n  // Test conversion from protobuf to C++ IndexParams for HNSW\n  proto::IndexParams index_pb;\n  auto *hnsw_pb = index_pb.mutable_hnsw();\n  auto *base_params = hnsw_pb->mutable_base();\n  base_params->set_metric_type(proto::MT_L2);\n  base_params->set_quantize_type(proto::QT_FP16);\n  hnsw_pb->set_m(16);\n  hnsw_pb->set_ef_construction(100);\n\n  auto index_params = ProtoConverter::FromPb(index_pb);\n  ASSERT_NE(index_params, nullptr);\n  EXPECT_EQ(index_params->type(), IndexType::HNSW);\n  auto hnsw_cast = std::dynamic_pointer_cast<HnswIndexParams>(index_params);\n  ASSERT_NE(hnsw_cast, nullptr);\n  EXPECT_EQ(hnsw_cast->metric_type(), MetricType::L2);\n  EXPECT_EQ(hnsw_cast->m(), 16);\n  EXPECT_EQ(hnsw_cast->ef_construction(), 100);\n  EXPECT_EQ(hnsw_cast->quantize_type(), QuantizeType::FP16);\n\n  // Test conversion from C++ HnswIndexParams to protobuf IndexParams\n  HnswIndexParams hnsw_original(MetricType::IP, 32, 200);\n  auto pb_result = ProtoConverter::ToPb(&hnsw_original);\n  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_IP);\n  EXPECT_EQ(pb_result.m(), 32);\n  EXPECT_EQ(pb_result.ef_construction(), 200);\n\n  // Test conversion from protobuf to C++ IndexParams for FLAT\n  proto::IndexParams index_pb2;\n  auto *flat_pb = index_pb2.mutable_flat();\n  auto *base_params2 = flat_pb->mutable_base();\n  base_params2->set_metric_type(proto::MT_COSINE);\n  base_params2->set_quantize_type(proto::QT_INT8);\n\n  auto index_params2 = ProtoConverter::FromPb(index_pb2);\n  ASSERT_NE(index_params2, nullptr);\n  EXPECT_EQ(index_params2->type(), IndexType::FLAT);\n  auto flat_cast = std::dynamic_pointer_cast<FlatIndexParams>(index_params2);\n  ASSERT_NE(flat_cast, nullptr);\n  EXPECT_EQ(flat_cast->metric_type(), MetricType::COSINE);\n  EXPECT_EQ(flat_cast->quantize_type(), QuantizeType::INT8);\n\n  // Test conversion from C++ FlatIndexParams to protobuf IndexParams\n  FlatIndexParams flat_original(MetricType::L2);\n  auto pb_result2 = ProtoConverter::ToPb(&flat_original);\n  EXPECT_EQ(pb_result2.base().metric_type(), proto::MT_L2);\n\n  // Test conversion from protobuf to C++ IndexParams for IVF\n  proto::IndexParams index_pb3;\n  auto *ivf_pb = index_pb3.mutable_ivf();\n  auto *base_params3 = ivf_pb->mutable_base();\n  base_params3->set_metric_type(proto::MT_IP);\n  base_params3->set_quantize_type(proto::QT_INT4);\n  ivf_pb->set_n_list(128);\n\n  auto index_params3 = ProtoConverter::FromPb(index_pb3);\n  ASSERT_NE(index_params3, nullptr);\n  EXPECT_EQ(index_params3->type(), IndexType::IVF);\n  auto ivf_cast = std::dynamic_pointer_cast<IVFIndexParams>(index_params3);\n  ASSERT_NE(ivf_cast, nullptr);\n  EXPECT_EQ(ivf_cast->metric_type(), MetricType::IP);\n  EXPECT_EQ(ivf_cast->n_list(), 128);\n  EXPECT_EQ(ivf_cast->quantize_type(), QuantizeType::INT4);\n\n  // Test conversion from C++ IVFIndexParams to protobuf IndexParams\n  IVFIndexParams ivf_original(MetricType::COSINE, 256);\n  auto pb_result3 = ProtoConverter::ToPb(&ivf_original);\n  EXPECT_EQ(pb_result3.base().metric_type(), proto::MT_COSINE);\n  EXPECT_EQ(pb_result3.n_list(), 256);\n\n  // Test conversion from protobuf to C++ IndexParams for INVERT\n  proto::IndexParams index_pb4;\n  auto *invert_pb = index_pb4.mutable_invert();\n  invert_pb->set_enable_range_optimization(true);\n\n  auto index_params4 = ProtoConverter::FromPb(index_pb4);\n  ASSERT_NE(index_params4, nullptr);\n  EXPECT_EQ(index_params4->type(), IndexType::INVERT);\n  auto invert_cast =\n      std::dynamic_pointer_cast<InvertIndexParams>(index_params4);\n  ASSERT_NE(invert_cast, nullptr);\n  EXPECT_TRUE(invert_cast->enable_range_optimization());\n\n  // Test conversion from C++ InvertIndexParams to protobuf IndexParams\n  InvertIndexParams invert_original(false);\n  auto pb_result4 = ProtoConverter::ToPb(&invert_original);\n  EXPECT_FALSE(pb_result4.enable_range_optimization());\n}\n\nTEST(ConverterTest, FieldSchemaConversion) {\n  // Test conversion from protobuf to C++ FieldSchema\n  proto::FieldSchema field_pb;\n  field_pb.set_name(\"test_field\");\n  field_pb.set_data_type(proto::DT_VECTOR_FP32);\n  field_pb.set_dimension(128);\n  field_pb.set_nullable(true);\n\n  // Add index params\n  auto *index_params_pb = field_pb.mutable_index_params();\n  auto *hnsw_pb = index_params_pb->mutable_hnsw();\n  auto *base_params = hnsw_pb->mutable_base();\n  base_params->set_metric_type(proto::MT_L2);\n  base_params->set_quantize_type(proto::QT_FP16);\n  hnsw_pb->set_m(16);\n  hnsw_pb->set_ef_construction(100);\n\n  auto field_schema = ProtoConverter::FromPb(field_pb);\n  ASSERT_NE(field_schema, nullptr);\n  EXPECT_EQ(field_schema->name(), \"test_field\");\n  EXPECT_EQ(field_schema->data_type(), DataType::VECTOR_FP32);\n  EXPECT_TRUE(field_schema->nullable());\n  EXPECT_EQ(field_schema->dimension(), 128u);\n  ASSERT_NE(field_schema->index_params(), nullptr);\n  EXPECT_EQ(field_schema->index_params()->type(), IndexType::HNSW);\n\n  // Test conversion from C++ to protobuf\n  FieldSchema original_field(\"another_field\", DataType::ARRAY_INT32, 64, false,\n                             nullptr);\n  auto pb_result = ProtoConverter::ToPb(original_field);\n  EXPECT_EQ(pb_result.name(), \"another_field\");\n  EXPECT_EQ(pb_result.data_type(), proto::DT_ARRAY_INT32);\n  EXPECT_FALSE(pb_result.nullable());\n  EXPECT_EQ(pb_result.dimension(), 64u);\n}\n\nTEST(ConverterTest, CollectionSchemaConversion) {\n  // Test conversion from protobuf to C++ CollectionSchema\n  proto::CollectionSchema schema_pb;\n  schema_pb.set_name(\"test_collection\");\n  schema_pb.set_max_doc_count_per_segment(1000000);\n\n  auto *field1_pb = schema_pb.add_fields();\n  field1_pb->set_name(\"field1\");\n  field1_pb->set_data_type(proto::DT_STRING);\n\n  auto *field2_pb = schema_pb.add_fields();\n  field2_pb->set_name(\"field2\");\n  field2_pb->set_data_type(proto::DT_VECTOR_FP32);\n  field2_pb->set_dimension(128);\n\n  auto collection_schema = ProtoConverter::FromPb(schema_pb);\n  ASSERT_NE(collection_schema, nullptr);\n  EXPECT_EQ(collection_schema->name(), \"test_collection\");\n  EXPECT_EQ(collection_schema->fields().size(), 2);\n  EXPECT_EQ(collection_schema->max_doc_count_per_segment(), 1000000u);\n\n  // Test conversion from C++ to protobuf\n  CollectionSchema original_schema;\n  original_schema.set_name(\"original_collection\");\n\n  auto pb_result = ProtoConverter::ToPb(original_schema);\n  EXPECT_EQ(pb_result.name(), \"original_collection\");\n}\n\nTEST(ConverterTest, BlockMetaConversion) {\n  // Test conversion from protobuf to C++ BlockMeta\n  proto::BlockMeta meta_pb;\n  meta_pb.set_block_id(1);\n  meta_pb.set_block_type(proto::BT_SCALAR);\n  meta_pb.set_min_doc_id(100);\n  meta_pb.set_max_doc_id(200);\n  meta_pb.set_doc_count(50);\n  meta_pb.add_columns(\"col1\");\n  meta_pb.add_columns(\"col2\");\n\n  auto block_meta = ProtoConverter::FromPb(meta_pb);\n  ASSERT_NE(block_meta, nullptr);\n  EXPECT_EQ(block_meta->id(), 1u);\n  EXPECT_EQ(block_meta->type(), BlockType::SCALAR);\n  EXPECT_EQ(block_meta->min_doc_id(), 100u);\n  EXPECT_EQ(block_meta->max_doc_id(), 200u);\n  EXPECT_EQ(block_meta->doc_count(), 50u);\n  EXPECT_EQ(block_meta->columns().size(), 2);\n  EXPECT_EQ(block_meta->columns()[0], \"col1\");\n  EXPECT_EQ(block_meta->columns()[1], \"col2\");\n\n  // Test conversion from C++ to protobuf\n  BlockMeta original_meta(2, BlockType::VECTOR_INDEX, 300, 400);\n  original_meta.set_doc_count(75);\n  original_meta.add_column(\"col3\");\n  original_meta.add_column(\"col4\");\n\n  auto pb_result = ProtoConverter::ToPb(original_meta);\n  EXPECT_EQ(pb_result.block_id(), 2u);\n  EXPECT_EQ(pb_result.block_type(), proto::BT_VECTOR_INDEX);\n  EXPECT_EQ(pb_result.min_doc_id(), 300u);\n  EXPECT_EQ(pb_result.max_doc_id(), 400u);\n  EXPECT_EQ(pb_result.doc_count(), 75u);\n  EXPECT_EQ(pb_result.columns_size(), 2);\n  EXPECT_EQ(pb_result.columns(0), \"col3\");\n  EXPECT_EQ(pb_result.columns(1), \"col4\");\n}\n\nTEST(ConverterTest, SegmentMetaConversion) {\n  // Test conversion from protobuf to C++ SegmentMeta\n  proto::SegmentMeta segment_pb;\n  segment_pb.set_segment_id(10);\n\n  // Add persisted blocks\n  auto *block1_pb = segment_pb.add_persisted_blocks();\n  block1_pb->set_block_id(1);\n  block1_pb->set_block_type(proto::BT_SCALAR);\n  block1_pb->set_min_doc_id(0);\n  block1_pb->set_max_doc_id(100);\n  block1_pb->set_doc_count(50);\n  block1_pb->add_columns(\"col1\");\n  block1_pb->add_columns(\"col2\");\n\n  auto *block2_pb = segment_pb.add_persisted_blocks();\n  block2_pb->set_block_id(2);\n  block2_pb->set_block_type(proto::BT_VECTOR_INDEX);\n  block2_pb->set_min_doc_id(101);\n  block2_pb->set_max_doc_id(200);\n  block2_pb->set_doc_count(75);\n  block2_pb->add_columns(\"vec_col\");\n\n  // Add writing forward block\n  auto *writing_block_pb = segment_pb.mutable_writing_forward_block();\n  writing_block_pb->set_block_id(3);\n  writing_block_pb->set_block_type(proto::BT_SCALAR);\n  writing_block_pb->set_min_doc_id(201);\n  writing_block_pb->set_max_doc_id(300);\n  writing_block_pb->set_doc_count(25);\n  writing_block_pb->add_columns(\"col3\");\n\n  // Add indexed vector fields\n  segment_pb.add_indexed_vector_fields(\"vec_col1\");\n  segment_pb.add_indexed_vector_fields(\"vec_col2\");\n\n  auto segment_meta = ProtoConverter::FromPb(segment_pb);\n  ASSERT_NE(segment_meta, nullptr);\n  EXPECT_EQ(segment_meta->id(), 10u);\n  EXPECT_EQ(segment_meta->persisted_blocks().size(), 2);\n  EXPECT_TRUE(segment_meta->has_writing_forward_block());\n\n  // Check first persisted block\n  const auto &block1 = segment_meta->persisted_blocks()[0];\n  EXPECT_EQ(block1.id(), 1u);\n  EXPECT_EQ(block1.type(), BlockType::SCALAR);\n  EXPECT_EQ(block1.min_doc_id(), 0u);\n  EXPECT_EQ(block1.max_doc_id(), 100u);\n  EXPECT_EQ(block1.doc_count(), 50u);\n  EXPECT_EQ(block1.columns().size(), 2);\n  EXPECT_EQ(block1.columns()[0], \"col1\");\n  EXPECT_EQ(block1.columns()[1], \"col2\");\n\n  // Check second persisted block\n  const auto &block2 = segment_meta->persisted_blocks()[1];\n  EXPECT_EQ(block2.id(), 2u);\n  EXPECT_EQ(block2.type(), BlockType::VECTOR_INDEX);\n  EXPECT_EQ(block2.min_doc_id(), 101u);\n  EXPECT_EQ(block2.max_doc_id(), 200u);\n  EXPECT_EQ(block2.doc_count(), 75u);\n  EXPECT_EQ(block2.columns().size(), 1);\n  EXPECT_EQ(block2.columns()[0], \"vec_col\");\n\n  // Check writing forward block\n  const auto &writing_block = segment_meta->writing_forward_block();\n  EXPECT_EQ(writing_block.value().id(), 3u);\n  EXPECT_EQ(writing_block.value().type(), BlockType::SCALAR);\n  EXPECT_EQ(writing_block.value().min_doc_id(), 201u);\n  EXPECT_EQ(writing_block.value().max_doc_id(), 300u);\n  EXPECT_EQ(writing_block.value().doc_count(), 25u);\n  EXPECT_EQ(writing_block.value().columns().size(), 1);\n  EXPECT_EQ(writing_block.value().columns()[0], \"col3\");\n\n  // Check indexed vector fields\n  EXPECT_TRUE(segment_meta->vector_indexed(\"vec_col1\"));\n  EXPECT_TRUE(segment_meta->vector_indexed(\"vec_col2\"));\n  EXPECT_FALSE(segment_meta->vector_indexed(\"non_existent_field\"));\n\n  // Test conversion from C++ to protobuf\n  SegmentMeta original_meta(20);\n\n  // Add persisted blocks\n  BlockMeta block1_meta(1, BlockType::SCALAR_INDEX, 0, 50);\n  block1_meta.set_doc_count(25);\n  block1_meta.add_column(\"col3\");\n  block1_meta.add_column(\"col4\");\n  original_meta.add_persisted_block(block1_meta);\n\n  BlockMeta block2_meta(2, BlockType::VECTOR_INDEX_QUANTIZE, 51, 100);\n  block2_meta.set_doc_count(30);\n  block2_meta.add_column(\"vec_col2\");\n  original_meta.add_persisted_block(block2_meta);\n\n  // Set writing forward block\n  BlockMeta writing_block_meta(3, BlockType::SCALAR, 101, 150);\n  writing_block_meta.set_doc_count(40);\n  writing_block_meta.add_column(\"col5\");\n  original_meta.set_writing_forward_block(writing_block_meta);\n\n  // Add indexed vector fields\n  original_meta.add_indexed_vector_field(\"vec_field1\");\n  original_meta.add_indexed_vector_field(\"vec_field2\");\n\n  auto pb_result = ProtoConverter::ToPb(original_meta);\n  EXPECT_EQ(pb_result.segment_id(), 20u);\n  EXPECT_EQ(pb_result.persisted_blocks_size(), 2);\n\n  // Check first persisted block\n  const auto &pb_block1 = pb_result.persisted_blocks(0);\n  EXPECT_EQ(pb_block1.block_id(), 1u);\n  EXPECT_EQ(pb_block1.block_type(), proto::BT_SCALAR_INDEX);\n  EXPECT_EQ(pb_block1.min_doc_id(), 0u);\n  EXPECT_EQ(pb_block1.max_doc_id(), 50u);\n  EXPECT_EQ(pb_block1.doc_count(), 25u);\n  EXPECT_EQ(pb_block1.columns_size(), 2);\n  EXPECT_EQ(pb_block1.columns(0), \"col3\");\n  EXPECT_EQ(pb_block1.columns(1), \"col4\");\n\n  // Check second persisted block\n  const auto &pb_block2 = pb_result.persisted_blocks(1);\n  EXPECT_EQ(pb_block2.block_id(), 2u);\n  EXPECT_EQ(pb_block2.block_type(), proto::BT_VECTOR_INDEX_QUANTIZE);\n  EXPECT_EQ(pb_block2.min_doc_id(), 51u);\n  EXPECT_EQ(pb_block2.max_doc_id(), 100u);\n  EXPECT_EQ(pb_block2.doc_count(), 30u);\n  EXPECT_EQ(pb_block2.columns_size(), 1);\n  EXPECT_EQ(pb_block2.columns(0), \"vec_col2\");\n\n  // Check writing forward block\n  const auto &pb_writing_block = pb_result.writing_forward_block();\n  EXPECT_EQ(pb_writing_block.block_id(), 3u);\n  EXPECT_EQ(pb_writing_block.block_type(), proto::BT_SCALAR);\n  EXPECT_EQ(pb_writing_block.min_doc_id(), 101u);\n  EXPECT_EQ(pb_writing_block.max_doc_id(), 150u);\n  EXPECT_EQ(pb_writing_block.doc_count(), 40u);\n  EXPECT_EQ(pb_writing_block.columns_size(), 1);\n  EXPECT_EQ(pb_writing_block.columns(0), \"col5\");\n\n  // Check indexed vector fields\n  EXPECT_EQ(pb_result.indexed_vector_fields_size(), 2);\n  EXPECT_EQ(pb_result.indexed_vector_fields(0), \"vec_field1\");\n  EXPECT_EQ(pb_result.indexed_vector_fields(1), \"vec_field2\");\n}\n\nTEST(ConverterTest, SegmentMetaWithEmptyFields) {\n  // Test conversion with minimal data\n  proto::SegmentMeta segment_pb;\n  segment_pb.set_segment_id(1);\n\n  auto segment_meta = ProtoConverter::FromPb(segment_pb);\n  ASSERT_NE(segment_meta, nullptr);\n  EXPECT_EQ(segment_meta->id(), 1u);\n  EXPECT_EQ(segment_meta->persisted_blocks().size(), 0);\n  EXPECT_FALSE(segment_meta->has_writing_forward_block());\n  EXPECT_EQ(segment_meta->indexed_vector_fields().size(), 0);\n\n  // Test conversion from C++ to protobuf with minimal data\n  SegmentMeta original_meta(5);\n  auto pb_result = ProtoConverter::ToPb(original_meta);\n  EXPECT_EQ(pb_result.segment_id(), 5u);\n  EXPECT_EQ(pb_result.persisted_blocks_size(), 0);\n  EXPECT_FALSE(pb_result.has_writing_forward_block());\n  EXPECT_EQ(pb_result.indexed_vector_fields_size(), 0);\n}"
  },
  {
    "path": "tests/db/index/common/db_type_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include \"db/index/common/type_helper.h\"\n\nusing namespace zvec;\n\nTEST(IndexTypeCodeBookTest, ProtoToCppConversion) {\n  // Test conversion from protobuf to C++ IndexType\n  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_HNSW), IndexType::HNSW);\n  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_FLAT), IndexType::FLAT);\n  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_IVF), IndexType::IVF);\n  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_INVERT), IndexType::INVERT);\n  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_UNDEFINED), IndexType::UNDEFINED);\n  EXPECT_EQ(IndexTypeCodeBook::Get(static_cast<proto::IndexType>(999)),\n            IndexType::UNDEFINED);\n}\n\nTEST(IndexTypeCodeBookTest, CppToProtoConversion) {\n  // Test conversion from C++ IndexType to protobuf IndexType\n  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::HNSW), proto::IT_HNSW);\n  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::FLAT), proto::IT_FLAT);\n  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::IVF), proto::IT_IVF);\n  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::INVERT), proto::IT_INVERT);\n  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::UNDEFINED), proto::IT_UNDEFINED);\n  EXPECT_EQ(IndexTypeCodeBook::Get(static_cast<IndexType>(999)),\n            proto::IT_UNDEFINED);\n}\n\nTEST(IndexTypeCodeBookTest, CppToStringConversion) {\n  // Test conversion from C++ IndexType to string\n  EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::HNSW), \"HNSW\");\n  EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::INVERT), \"INVERT\");\n  EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::UNDEFINED), \"UNDEFINED\");\n  EXPECT_EQ(IndexTypeCodeBook::AsString(static_cast<IndexType>(999)),\n            \"UNDEFINED\");\n}\n\nTEST(DataTypeCodeBookTest, IsArrayType) {\n  // Test array type detection\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_BINARY));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_STRING));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_BOOL));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_INT32));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_INT64));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_UINT32));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_UINT64));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_FLOAT));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_DOUBLE));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_BINARY32));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_BINARY64));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP16));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP32));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP64));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT4));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT8));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT16));\n  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_SPARSE_VECTOR_FP32));\n\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_BINARY));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_STRING));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_BOOL));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_INT32));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_INT64));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_UINT32));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_UINT64));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_FLOAT));\n  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_DOUBLE));\n}\n\nTEST(DataTypeCodeBookTest, ProtoToCppConversion) {\n  // Test conversion from protobuf to C++ DataType\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_BINARY), DataType::BINARY);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_STRING), DataType::STRING);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_BOOL), DataType::BOOL);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_INT32), DataType::INT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_INT64), DataType::INT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UINT32), DataType::UINT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UINT64), DataType::UINT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_FLOAT), DataType::FLOAT);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_DOUBLE), DataType::DOUBLE);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_BINARY32),\n            DataType::VECTOR_BINARY32);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_BINARY64),\n            DataType::VECTOR_BINARY64);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP16),\n            DataType::VECTOR_FP16);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP32),\n            DataType::VECTOR_FP32);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP64),\n            DataType::VECTOR_FP64);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT4),\n            DataType::VECTOR_INT4);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT8),\n            DataType::VECTOR_INT8);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT16),\n            DataType::VECTOR_INT16);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_SPARSE_VECTOR_FP32),\n            DataType::SPARSE_VECTOR_FP32);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_BINARY),\n            DataType::ARRAY_BINARY);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_STRING),\n            DataType::ARRAY_STRING);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_BOOL), DataType::ARRAY_BOOL);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_INT32),\n            DataType::ARRAY_INT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_INT64),\n            DataType::ARRAY_INT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_UINT32),\n            DataType::ARRAY_UINT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_UINT64),\n            DataType::ARRAY_UINT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_FLOAT),\n            DataType::ARRAY_FLOAT);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_DOUBLE),\n            DataType::ARRAY_DOUBLE);\n  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UNDEFINED), DataType::UNDEFINED);\n  EXPECT_EQ(DataTypeCodeBook::Get(static_cast<proto::DataType>(999)),\n            DataType::UNDEFINED);\n}\n\nTEST(DataTypeCodeBookTest, CppToProtoConversion) {\n  // Test conversion from C++ DataType to protobuf DataType\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::BINARY), proto::DT_BINARY);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::STRING), proto::DT_STRING);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::BOOL), proto::DT_BOOL);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::INT32), proto::DT_INT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::INT64), proto::DT_INT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::UINT32), proto::DT_UINT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::UINT64), proto::DT_UINT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::FLOAT), proto::DT_FLOAT);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::DOUBLE), proto::DT_DOUBLE);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_BINARY32),\n            proto::DT_VECTOR_BINARY32);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_BINARY64),\n            proto::DT_VECTOR_BINARY64);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP16),\n            proto::DT_VECTOR_FP16);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP32),\n            proto::DT_VECTOR_FP32);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP64),\n            proto::DT_VECTOR_FP64);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT4),\n            proto::DT_VECTOR_INT4);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT8),\n            proto::DT_VECTOR_INT8);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT16),\n            proto::DT_VECTOR_INT16);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::SPARSE_VECTOR_FP16),\n            proto::DT_SPARSE_VECTOR_FP16);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::SPARSE_VECTOR_FP32),\n            proto::DT_SPARSE_VECTOR_FP32);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_BINARY),\n            proto::DT_ARRAY_BINARY);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_STRING),\n            proto::DT_ARRAY_STRING);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_BOOL), proto::DT_ARRAY_BOOL);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_INT32),\n            proto::DT_ARRAY_INT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_INT64),\n            proto::DT_ARRAY_INT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_UINT32),\n            proto::DT_ARRAY_UINT32);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_UINT64),\n            proto::DT_ARRAY_UINT64);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_FLOAT),\n            proto::DT_ARRAY_FLOAT);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_DOUBLE),\n            proto::DT_ARRAY_DOUBLE);\n  EXPECT_EQ(DataTypeCodeBook::Get(DataType::UNDEFINED), proto::DT_UNDEFINED);\n  EXPECT_EQ(DataTypeCodeBook::Get(static_cast<DataType>(999)),\n            proto::DT_UNDEFINED);\n}\n\nTEST(DataTypeCodeBookTest, CppToStringConversion) {\n  // Test conversion from C++ DataType to string\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::BINARY), \"BINARY\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::STRING), \"STRING\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::BOOL), \"BOOL\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::INT32), \"INT32\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::INT64), \"INT64\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::UINT32), \"UINT32\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::UINT64), \"UINT64\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::FLOAT), \"FLOAT\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::DOUBLE), \"DOUBLE\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_BINARY32),\n            \"VECTOR_BINARY32\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_BINARY64),\n            \"VECTOR_BINARY64\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_FP16), \"VECTOR_FP16\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_FP32), \"VECTOR_FP32\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_FP64), \"VECTOR_FP64\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT4), \"VECTOR_INT4\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT8), \"VECTOR_INT8\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT16), \"VECTOR_INT16\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_BINARY), \"ARRAY_BINARY\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_STRING), \"ARRAY_STRING\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_BOOL), \"ARRAY_BOOL\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_INT32), \"ARRAY_INT32\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_INT64), \"ARRAY_INT64\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_UINT32), \"ARRAY_UINT32\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_UINT64), \"ARRAY_UINT64\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_FLOAT), \"ARRAY_FLOAT\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_DOUBLE), \"ARRAY_DOUBLE\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::UNDEFINED), \"\");\n  EXPECT_EQ(DataTypeCodeBook::AsString(static_cast<DataType>(999)), \"\");\n}\n\nTEST(MetricTypeCodeBookTest, ProtoToCppConversion) {\n  // Test conversion from protobuf to C++ MetricType\n  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_IP), MetricType::IP);\n  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_L2), MetricType::L2);\n  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_COSINE), MetricType::COSINE);\n  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_UNDEFINED),\n            MetricType::UNDEFINED);\n  EXPECT_EQ(MetricTypeCodeBook::Get(static_cast<proto::MetricType>(999)),\n            MetricType::UNDEFINED);\n}\n\nTEST(MetricTypeCodeBookTest, CppToProtoConversion) {\n  // Test conversion from C++ MetricType to protobuf MetricType\n  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::IP), proto::MT_IP);\n  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::L2), proto::MT_L2);\n  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::COSINE), proto::MT_COSINE);\n  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::UNDEFINED),\n            proto::MT_UNDEFINED);\n  EXPECT_EQ(MetricTypeCodeBook::Get(static_cast<MetricType>(999)),\n            proto::MT_UNDEFINED);\n}\n\nTEST(QuantizeTypeCodeBookTest, ProtoToCppConversion) {\n  // Test conversion from protobuf to C++ QuantizeType\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_FP16), QuantizeType::FP16);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_INT4), QuantizeType::INT4);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_INT8), QuantizeType::INT8);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_UNDEFINED),\n            QuantizeType::UNDEFINED);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(static_cast<proto::QuantizeType>(999)),\n            QuantizeType::UNDEFINED);\n}\n\nTEST(QuantizeTypeCodeBookTest, CppToProtoConversion) {\n  // Test conversion from C++ QuantizeType to protobuf QuantizeType\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::FP16), proto::QT_FP16);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::INT4), proto::QT_INT4);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::INT8), proto::QT_INT8);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::UNDEFINED),\n            proto::QT_UNDEFINED);\n  EXPECT_EQ(QuantizeTypeCodeBook::Get(static_cast<QuantizeType>(999)),\n            proto::QT_UNDEFINED);\n}\n\nTEST(BlockTypeCodeBookTest, ProtoToCppConversion) {\n  // Test conversion from protobuf to C++ BlockType\n  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_SCALAR), BlockType::SCALAR);\n  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_SCALAR_INDEX),\n            BlockType::SCALAR_INDEX);\n  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_VECTOR_INDEX),\n            BlockType::VECTOR_INDEX);\n  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_VECTOR_INDEX_QUANTIZE),\n            BlockType::VECTOR_INDEX_QUANTIZE);\n  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_UNDEFINED), BlockType::UNDEFINED);\n  EXPECT_EQ(BlockTypeCodeBook::Get(static_cast<proto::BlockType>(999)),\n            BlockType::UNDEFINED);\n}\n\nTEST(BlockTypeCodeBookTest, CppToProtoConversion) {\n  // Test conversion from C++ BlockType to protobuf BlockType\n  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::SCALAR), proto::BT_SCALAR);\n  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::SCALAR_INDEX),\n            proto::BT_SCALAR_INDEX);\n  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::VECTOR_INDEX),\n            proto::BT_VECTOR_INDEX);\n  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::VECTOR_INDEX_QUANTIZE),\n            proto::BT_VECTOR_INDEX_QUANTIZE);\n  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::UNDEFINED), proto::BT_UNDEFINED);\n  EXPECT_EQ(BlockTypeCodeBook::Get(static_cast<BlockType>(999)),\n            proto::BT_UNDEFINED);\n}"
  },
  {
    "path": "tests/db/index/common/doc_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/doc.h\"\n#include <cstdint>\n#include <limits>\n#include <random>\n#include <gtest/gtest.h>\n#include <zvec/ailego/utility/float_helper.h>\n#include \"utils/utils.h\"\n#include \"zvec/db/status.h\"\n#include \"zvec/db/type.h\"\n\n\nusing namespace zvec;\n\nclass DocDetailedTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    test_doc_ = std::make_shared<Doc>();\n    test_doc_->set_pk(\"test_pk\");\n    test_doc_->set_doc_id(12345);\n    test_doc_->set_score(0.95f);\n    test_doc_->set_operator(Operator::INSERT);\n  }\n\n  Doc::Ptr test_doc_;\n};\n\n// Test serialization and deserialization of basic data types\nTEST_F(DocDetailedTest, BasicTypeSerializationDeserialization) {\n  // Test boundary values\n  test_doc_->set(\"bool_true\", true);\n  test_doc_->set(\"bool_false\", false);\n  test_doc_->set(\"int32_min\", std::numeric_limits<int32_t>::min());\n  test_doc_->set(\"int32_max\", std::numeric_limits<int32_t>::max());\n  test_doc_->set(\"uint32_min\", std::numeric_limits<uint32_t>::min());\n  test_doc_->set(\"uint32_max\", std::numeric_limits<uint32_t>::max());\n  test_doc_->set(\"int64_min\", std::numeric_limits<int64_t>::min());\n  test_doc_->set(\"int64_max\", std::numeric_limits<int64_t>::max());\n  test_doc_->set(\"uint64_min\", std::numeric_limits<uint64_t>::min());\n  test_doc_->set(\"uint64_max\", std::numeric_limits<uint64_t>::max());\n  test_doc_->set(\"float_min\", std::numeric_limits<float>::min());\n  test_doc_->set(\"float_max\", std::numeric_limits<float>::max());\n  test_doc_->set(\"float_lowest\", std::numeric_limits<float>::lowest());\n  test_doc_->set(\"double_min\", std::numeric_limits<double>::min());\n  test_doc_->set(\"double_max\", std::numeric_limits<double>::max());\n  test_doc_->set(\"double_lowest\", std::numeric_limits<double>::lowest());\n\n  auto serialized = test_doc_->serialize();\n  ASSERT_FALSE(serialized.empty());\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  EXPECT_EQ(deserialized_doc->get<bool>(\"bool_true\").value(), true);\n  EXPECT_EQ(deserialized_doc->get<bool>(\"bool_false\").value(), false);\n  EXPECT_EQ(deserialized_doc->get<int32_t>(\"int32_min\").value(),\n            std::numeric_limits<int32_t>::min());\n  EXPECT_EQ(deserialized_doc->get<int32_t>(\"int32_max\").value(),\n            std::numeric_limits<int32_t>::max());\n  EXPECT_EQ(deserialized_doc->get<uint32_t>(\"uint32_min\").value(),\n            std::numeric_limits<uint32_t>::min());\n  EXPECT_EQ(deserialized_doc->get<uint32_t>(\"uint32_max\").value(),\n            std::numeric_limits<uint32_t>::max());\n  EXPECT_EQ(deserialized_doc->get<int64_t>(\"int64_min\").value(),\n            std::numeric_limits<int64_t>::min());\n  EXPECT_EQ(deserialized_doc->get<int64_t>(\"int64_max\").value(),\n            std::numeric_limits<int64_t>::max());\n  EXPECT_EQ(deserialized_doc->get<uint64_t>(\"uint64_min\").value(),\n            std::numeric_limits<uint64_t>::min());\n  EXPECT_EQ(deserialized_doc->get<uint64_t>(\"uint64_max\").value(),\n            std::numeric_limits<uint64_t>::max());\n\n  // For floating point numbers, use approximate comparison\n  EXPECT_FLOAT_EQ(deserialized_doc->get<float>(\"float_min\").value(),\n                  std::numeric_limits<float>::min());\n  EXPECT_FLOAT_EQ(deserialized_doc->get<float>(\"float_max\").value(),\n                  std::numeric_limits<float>::max());\n  EXPECT_FLOAT_EQ(deserialized_doc->get<float>(\"float_lowest\").value(),\n                  std::numeric_limits<float>::lowest());\n  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>(\"double_min\").value(),\n                   std::numeric_limits<double>::min());\n  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>(\"double_max\").value(),\n                   std::numeric_limits<double>::max());\n  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>(\"double_lowest\").value(),\n                   std::numeric_limits<double>::lowest());\n}\n\n// Test various cases of string types\nTEST_F(DocDetailedTest, StringTypeSerializationDeserialization) {\n  // Test empty string\n  test_doc_->set(\"empty_string\", std::string(\"\"));\n\n  // Test long string\n  std::string long_string(10000, 'a');\n  test_doc_->set(\"long_string\", long_string);\n\n  // Test string with special characters\n  test_doc_->set(\"special_chars\",\n                 std::string(\"Special characters\\t\\n\\r\\0included\", 15));\n\n  // Test string with binary data\n  std::string binary_string;\n  for (int i = 0; i < 256; ++i) {\n    binary_string.push_back(static_cast<char>(i));\n  }\n  test_doc_->set(\"binary_string\", binary_string);\n\n  auto serialized = test_doc_->serialize();\n  ASSERT_FALSE(serialized.empty());\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  EXPECT_EQ(deserialized_doc->get<std::string>(\"empty_string\").value(), \"\");\n  EXPECT_EQ(deserialized_doc->get<std::string>(\"long_string\").value(),\n            long_string);\n  EXPECT_EQ(deserialized_doc->get<std::string>(\"special_chars\").value(),\n            std::string(\"Special characters\\t\\n\\r\\0included\", 15));\n  EXPECT_EQ(deserialized_doc->get<std::string>(\"binary_string\").value(),\n            binary_string);\n}\n\n\n// Test vector<bool> type\nTEST_F(DocDetailedTest, VectorBoolSerializationDeserialization) {\n  std::vector<bool> bool_vec;\n  // Create a vector<bool> with a large number of elements\n  for (int i = 0; i < 1000; ++i) {\n    bool_vec.push_back(i % 2 == 0);\n  }\n  test_doc_->set(\"bool_vec\", bool_vec);\n\n  auto serialized = test_doc_->serialize();\n  ASSERT_FALSE(serialized.empty());\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  auto deserialized_vec =\n      deserialized_doc->get<std::vector<bool>>(\"bool_vec\").value();\n\n  ASSERT_EQ(deserialized_vec.size(), bool_vec.size());\n  for (size_t i = 0; i < bool_vec.size(); ++i) {\n    EXPECT_EQ(deserialized_vec[i], bool_vec[i]) << \"Mismatch at index \" << i;\n  }\n}\n\n// Test numeric vector types\nTEST_F(DocDetailedTest, NumericVectorSerializationDeserialization) {\n  // Test int8_t vector\n  std::vector<int8_t> int8_vec = {std::numeric_limits<int8_t>::min(), -1, 0, 1,\n                                  std::numeric_limits<int8_t>::max()};\n  test_doc_->set(\"int8_vec\", int8_vec);\n\n  // Test int16_t vector\n  std::vector<int16_t> int16_vec = {std::numeric_limits<int16_t>::min(), -1, 0,\n                                    1, std::numeric_limits<int16_t>::max()};\n  test_doc_->set(\"int16_vec\", int16_vec);\n\n  // Test int32_t vector\n  std::vector<int32_t> int32_vec = {std::numeric_limits<int32_t>::min(), -1, 0,\n                                    1, std::numeric_limits<int32_t>::max()};\n  test_doc_->set(\"int32_vec\", int32_vec);\n\n  // Test int64_t vector\n  std::vector<int64_t> int64_vec = {std::numeric_limits<int64_t>::min(), -1, 0,\n                                    1, std::numeric_limits<int64_t>::max()};\n  test_doc_->set(\"int64_vec\", int64_vec);\n\n  // Test uint32_t vector\n  std::vector<uint32_t> uint32_vec = {std::numeric_limits<uint32_t>::min(), 1,\n                                      100,\n                                      std::numeric_limits<uint32_t>::max()};\n  test_doc_->set(\"uint32_vec\", uint32_vec);\n\n  // Test uint64_t vector\n  std::vector<uint64_t> uint64_vec = {std::numeric_limits<uint64_t>::min(), 1,\n                                      100,\n                                      std::numeric_limits<uint64_t>::max()};\n  test_doc_->set(\"uint64_vec\", uint64_vec);\n\n  // Test float vector\n  std::vector<float> float_vec = {std::numeric_limits<float>::min(), -1.0f,\n                                  0.0f, 1.0f,\n                                  std::numeric_limits<float>::max()};\n  test_doc_->set(\"float_vec\", float_vec);\n\n  // Test double vector\n  std::vector<double> double_vec = {std::numeric_limits<double>::min(), -1.0,\n                                    0.0, 1.0,\n                                    std::numeric_limits<double>::max()};\n  test_doc_->set(\"double_vec\", double_vec);\n\n  auto serialized = test_doc_->serialize();\n  ASSERT_FALSE(serialized.empty());\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  EXPECT_EQ(deserialized_doc->get<std::vector<int8_t>>(\"int8_vec\").value(),\n            int8_vec);\n  EXPECT_EQ(deserialized_doc->get<std::vector<int16_t>>(\"int16_vec\").value(),\n            int16_vec);\n  EXPECT_EQ(deserialized_doc->get<std::vector<int32_t>>(\"int32_vec\").value(),\n            int32_vec);\n  EXPECT_EQ(deserialized_doc->get<std::vector<int64_t>>(\"int64_vec\").value(),\n            int64_vec);\n  EXPECT_EQ(deserialized_doc->get<std::vector<uint32_t>>(\"uint32_vec\").value(),\n            uint32_vec);\n  EXPECT_EQ(deserialized_doc->get<std::vector<uint64_t>>(\"uint64_vec\").value(),\n            uint64_vec);\n\n\n  // Floating point numbers use approximate comparison\n  auto deserialized_float_vec =\n      deserialized_doc->get<std::vector<float>>(\"float_vec\").value();\n\n  ASSERT_EQ(deserialized_float_vec.size(), float_vec.size());\n  for (size_t i = 0; i < float_vec.size(); ++i) {\n    EXPECT_FLOAT_EQ(deserialized_float_vec[i], float_vec[i])\n        << \"Mismatch at index \" << i;\n  }\n\n  auto deserialized_double_vec =\n      deserialized_doc->get<std::vector<double>>(\"double_vec\").value();\n  ASSERT_EQ(deserialized_double_vec.size(), double_vec.size());\n  for (size_t i = 0; i < double_vec.size(); ++i) {\n    EXPECT_DOUBLE_EQ(deserialized_double_vec[i], double_vec[i])\n        << \"Mismatch at index \" << i;\n  }\n}\n\n// Test string vector types\nTEST_F(DocDetailedTest, StringVectorSerializationDeserialization) {\n  std::vector<std::string> string_vec;\n  string_vec.push_back(\"\");  // Empty string\n  string_vec.push_back(\"normal string\");\n  string_vec.push_back(std::string(1000, 'x'));  // Long string\n  string_vec.push_back(\"Special character test\");\n  string_vec.push_back(\n      std::string(\"binary\\0data\", 11));  // Contains binary data\n\n  test_doc_->set(\"string_vec\", string_vec);\n\n  auto serialized = test_doc_->serialize();\n  ASSERT_FALSE(serialized.empty());\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  auto deserialized_vec =\n      deserialized_doc->get<std::vector<std::string>>(\"string_vec\").value();\n  ASSERT_EQ(deserialized_vec.size(), string_vec.size());\n  for (size_t i = 0; i < string_vec.size(); ++i) {\n    EXPECT_EQ(deserialized_vec[i], string_vec[i]) << \"Mismatch at index \" << i;\n  }\n}\n\n// Test sparse vector types\nTEST_F(DocDetailedTest, SparseVectorSerializationDeserialization) {\n  // Test float type sparse vector\n  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;\n  sparse_float_vec.first = {0, 100, 1000, 10000};\n  sparse_float_vec.second = {0.1f, 100.5f, -200.7f,\n                             std::numeric_limits<float>::max()};\n\n  test_doc_->set(\"sparse_float_vec\", sparse_float_vec);\n\n  // Test ailego::Float16 type sparse vector\n  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>\n      sparse_float16_vec;\n  sparse_float16_vec.first = {1, 50, 500};\n  sparse_float16_vec.second = {ailego::Float16(0.5f), ailego::Float16(-10.25f),\n                               ailego::Float16(1000.0f)};\n\n  test_doc_->set(\"sparse_float16_vec\", sparse_float16_vec);\n\n  auto serialized = test_doc_->serialize();\n  ASSERT_FALSE(serialized.empty());\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  // Verify float sparse vector\n  auto deserialized_float_vec =\n      deserialized_doc\n          ->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n              \"sparse_float_vec\")\n          .value();\n\n  EXPECT_EQ(deserialized_float_vec.first, sparse_float_vec.first);\n  ASSERT_EQ(deserialized_float_vec.second.size(),\n            sparse_float_vec.second.size());\n  for (size_t i = 0; i < sparse_float_vec.second.size(); ++i) {\n    EXPECT_FLOAT_EQ(deserialized_float_vec.second[i],\n                    sparse_float_vec.second[i])\n        << \"Mismatch at index \" << i;\n  }\n\n  // Verify float16 sparse vector\n  auto deserialized_float16_vec =\n      deserialized_doc\n          ->get<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(\n              \"sparse_float16_vec\")\n          .value();\n\n  EXPECT_EQ(deserialized_float16_vec.first, sparse_float16_vec.first);\n  EXPECT_EQ(deserialized_float16_vec.second, sparse_float16_vec.second);\n}\n\n// Test case with many fields\nTEST_F(DocDetailedTest, ManyFieldsSerializationDeserialization) {\n  const int field_count = 1000;\n  for (int i = 0; i < field_count; ++i) {\n    test_doc_->set(\"field_\" + std::to_string(i), i);\n  }\n\n  auto serialized = test_doc_->serialize();\n  ASSERT_FALSE(serialized.empty());\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  for (int i = 0; i < field_count; ++i) {\n    std::string field_name = \"field_\" + std::to_string(i);\n    EXPECT_EQ(deserialized_doc->get<int32_t>(field_name).value(), i);\n  }\n}\n\n// Test empty document\nTEST_F(DocDetailedTest, EmptyDocSerializationDeserialization) {\n  Doc::Ptr empty_doc = std::make_shared<Doc>();\n  empty_doc->set_pk(\"\");  // Empty primary key\n\n  auto serialized = empty_doc->serialize();\n  ASSERT_FALSE(serialized.empty());\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n  EXPECT_EQ(deserialized_doc->pk(), \"\");\n}\n\n// Test large document\nTEST_F(DocDetailedTest, LargeDocSerializationDeserialization) {\n  // Create a document with a large amount of data\n  std::string large_string(100000, 'A');\n  test_doc_->set(\"large_string\", large_string);\n\n  std::vector<int32_t> large_vector(50000);\n  std::iota(large_vector.begin(), large_vector.end(), 0);\n  test_doc_->set(\"large_vector\", large_vector);\n\n  auto serialized = test_doc_->serialize();\n  EXPECT_GT(serialized.size(), 100000);  // Should be a large document\n\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n  ASSERT_NE(deserialized_doc, nullptr);\n\n  EXPECT_EQ(deserialized_doc->get<std::string>(\"large_string\").value(),\n            large_string);\n  EXPECT_EQ(deserialized_doc->get<std::vector<int32_t>>(\"large_vector\").value(),\n            large_vector);\n}\n\n// Test memory usage calculation\nTEST_F(DocDetailedTest, MemoryUsageCalculation) {\n  size_t initial_usage = test_doc_->memory_usage();\n\n  // Add some fields\n  test_doc_->set(\"small_string\", std::string(\"small\"));\n  test_doc_->set(\"int_field\", int32_t(42));\n  test_doc_->set(\"float_field\", 3.14f);\n\n  size_t usage_with_fields = test_doc_->memory_usage();\n  EXPECT_GT(usage_with_fields, initial_usage);\n\n  // Add a large field\n  std::string large_string(10000, 'B');\n  test_doc_->set(\"large_string\", large_string);\n\n  size_t usage_with_large_field = test_doc_->memory_usage();\n  EXPECT_GT(usage_with_large_field, usage_with_fields);\n}\n\n// Test detailed string representation\nTEST_F(DocDetailedTest, DetailStringRepresentation) {\n  test_doc_->set(\"test_bool\", true);\n  test_doc_->set(\"test_int\", int32_t(-42));\n  test_doc_->set(\"test_string\", std::string(\"hello\"));\n\n  std::vector<float> float_vec = {1.1f, 2.2f, 3.3f};\n  test_doc_->set(\"test_float_vec\", float_vec);\n\n  std::string detail_str = test_doc_->to_detail_string();\n  EXPECT_FALSE(detail_str.empty());\n  EXPECT_NE(detail_str.find(\"test_pk\"), std::string::npos);\n  EXPECT_NE(detail_str.find(\"test_bool\"), std::string::npos);\n  EXPECT_NE(detail_str.find(\"test_int\"), std::string::npos);\n  EXPECT_NE(detail_str.find(\"test_string\"), std::string::npos);\n  EXPECT_NE(detail_str.find(\"test_float_vec\"), std::string::npos);\n}\n\n// Test operator types\nTEST_F(DocDetailedTest, OperatorTypes) {\n  test_doc_->set_operator(Operator::INSERT);\n  EXPECT_EQ(test_doc_->get_operator(), Operator::INSERT);\n\n  test_doc_->set_operator(Operator::DELETE);\n  EXPECT_EQ(test_doc_->get_operator(), Operator::DELETE);\n\n  test_doc_->set_operator(Operator::UPDATE);\n  EXPECT_EQ(test_doc_->get_operator(), Operator::UPDATE);\n}\n\n// Test document ID and score\nTEST_F(DocDetailedTest, DocIdAndScore) {\n  test_doc_->set_doc_id(0);\n  EXPECT_EQ(test_doc_->doc_id(), 0);\n\n  test_doc_->set_doc_id(std::numeric_limits<uint64_t>::max());\n  EXPECT_EQ(test_doc_->doc_id(), std::numeric_limits<uint64_t>::max());\n\n  test_doc_->set_score(0.0f);\n  EXPECT_FLOAT_EQ(test_doc_->score(), 0.0f);\n\n  test_doc_->set_score(1.0f);\n  EXPECT_FLOAT_EQ(test_doc_->score(), 1.0f);\n\n  test_doc_->set_score(-1.0f);\n  EXPECT_FLOAT_EQ(test_doc_->score(), -1.0f);\n\n  test_doc_->set_score(std::numeric_limits<float>::max());\n  EXPECT_FLOAT_EQ(test_doc_->score(), std::numeric_limits<float>::max());\n}\n\n// Test primary key\nTEST_F(DocDetailedTest, PrimaryKey) {\n  test_doc_->set_pk(\"\");\n  EXPECT_EQ(test_doc_->pk(), \"\");\n\n  std::string long_pk(10000, 'X');\n  test_doc_->set_pk(long_pk);\n  EXPECT_EQ(test_doc_->pk(), long_pk);\n\n  test_doc_->set_pk(\"normal_pk\");\n  EXPECT_EQ(test_doc_->pk(), \"normal_pk\");\n}\n\n// Test duplicate field names (should overwrite old values)\nTEST_F(DocDetailedTest, DuplicateFieldNames) {\n  test_doc_->set(\"duplicate_field\", int32_t(1));\n  test_doc_->set(\"duplicate_field\", int32_t(2));  // Overwrite old value\n\n  auto serialized = test_doc_->serialize();\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n\n  EXPECT_EQ(deserialized_doc->get<int32_t>(\"duplicate_field\").value(), 2);\n}\n\n// Test combination of various data types\nTEST_F(DocDetailedTest, MixedDataTypes) {\n  test_doc_->set(\"bool_field\", true);\n  test_doc_->set(\"int_field\", int32_t(-1000));\n  test_doc_->set(\"uint_field\", uint32_t(2000));\n  test_doc_->set(\"float_field\", 3.14159f);\n  test_doc_->set(\"double_field\", 2.718281828459045);\n  test_doc_->set(\"string_field\", std::string(\"Hello, World!\"));\n\n  std::vector<int32_t> int_vec = {1, 2, 3, 4, 5};\n  test_doc_->set(\"int_vec\", int_vec);\n\n  std::vector<float> float_vec = {1.1f, 2.2f, 3.3f};\n  test_doc_->set(\"float_vec\", float_vec);\n\n  std::vector<std::string> string_vec = {\"apple\", \"banana\", \"cherry\"};\n  test_doc_->set(\"string_vec\", string_vec);\n\n  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_vec;\n  sparse_vec.first = {1, 10, 100};\n  sparse_vec.second = {0.1f, 1.0f, 10.0f};\n  test_doc_->set(\"sparse_vec\", sparse_vec);\n\n  auto serialized = test_doc_->serialize();\n  auto deserialized_doc =\n      Doc::deserialize(serialized.data(), serialized.size());\n\n  EXPECT_EQ(deserialized_doc->get<bool>(\"bool_field\").value(), true);\n  EXPECT_EQ(deserialized_doc->get<int32_t>(\"int_field\").value(), -1000);\n  EXPECT_EQ(deserialized_doc->get<uint32_t>(\"uint_field\").value(), 2000);\n  EXPECT_FLOAT_EQ(deserialized_doc->get<float>(\"float_field\").value(),\n                  3.14159f);\n  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>(\"double_field\").value(),\n                   2.718281828459045);\n  EXPECT_EQ(deserialized_doc->get<std::string>(\"string_field\").value(),\n            \"Hello, World!\");\n  EXPECT_EQ(deserialized_doc->get<std::vector<int32_t>>(\"int_vec\").value(),\n            int_vec);\n  EXPECT_EQ(deserialized_doc->get<std::vector<float>>(\"float_vec\").value(),\n            float_vec);\n  EXPECT_EQ(\n      deserialized_doc->get<std::vector<std::string>>(\"string_vec\").value(),\n      string_vec);\n\n  auto deserialized_sparse =\n      deserialized_doc\n          ->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n              \"sparse_vec\")\n          .value();\n  EXPECT_EQ(deserialized_sparse.first, sparse_vec.first);\n  EXPECT_EQ(deserialized_sparse.second, sparse_vec.second);\n}\n\n// Test doc validate with schema\nTEST_F(DocDetailedTest, Validate) {\n  // test schema nullable=false, but doc's field is null\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    doc = test::TestHelper::CreateDocNull(1, *schema);\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(true);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    doc = test::TestHelper::CreateDocNull(1, *schema);\n    s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n  }\n\n  // doc contained another field which not contained in schema\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    doc.set(\"another_field\", 1);\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // doc contained a mismatch scalar field\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    doc.set(\"int32\", std::string(\"1\"));\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // doc contained a mismatch type vector field\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    std::string field = \"dense_fp32\";\n    auto field_schema = schema->get_field(field);\n    ASSERT_NE(field_schema, nullptr);\n\n    doc.set(field, std::vector<int16_t>(field_schema->dimension(), 1));\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // doc contained a vector field with invalid dimension\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    std::string field = \"dense_fp32\";\n    auto field_schema = schema->get_field(field);\n    ASSERT_NE(field_schema, nullptr);\n\n    doc.set(field, std::vector<float>(field_schema->dimension() - 1, 1.0));\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n    doc.set(field, std::vector<float>());\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // doc contained a sparse vector field with mismatch type\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    std::string field = \"sparse_fp32\";\n    auto field_schema = schema->get_field(field);\n    ASSERT_NE(field_schema, nullptr);\n\n    doc.set(field, std::vector<int16_t>(field_schema->dimension(), 1));\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // doc contained a sparse vector field with indices/values size mismatch\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n\n    std::string field = \"sparse_fp32\";\n    auto field_schema = schema->get_field(field);\n    ASSERT_NE(field_schema, nullptr);\n\n    std::vector<uint32_t> indices;\n    std::vector<float> values;\n    for (uint32_t i = 0; i < 100; i++) {\n      indices.push_back(i);\n      values.push_back(float(0.1));\n    }\n    values.push_back(float(0.1));\n    std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec{\n        indices, values};\n    doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n        field, sparse_float_vec);\n    s = doc.validate(schema);\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // doc validate error\n  {\n    Doc doc;\n    // schema is null\n    auto s = doc.validate(nullptr);\n    EXPECT_EQ(s.code(), StatusCode::INTERNAL_ERROR);\n\n    // pk is null\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    s = doc.validate(schema);\n    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n    // field type is undefined\n    schema->add_field(\n        std::make_shared<FieldSchema>(\"undefined\", DataType::UNDEFINED, true));\n    s = doc.validate(schema);\n    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // doc validate more data type\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    schema->add_field(\n        std::make_shared<FieldSchema>(\"binary\", DataType::BINARY, false));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"array_binary\", DataType::ARRAY_BINARY, false));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"vector_binary32\", DataType::VECTOR_BINARY32, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"vector_binary64\", DataType::VECTOR_BINARY64, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"vector_int8\", DataType::VECTOR_INT8, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"vector_int8\", DataType::VECTOR_INT8, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"vector_int16\", DataType::VECTOR_INT16, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"dense_fp16\", DataType::VECTOR_FP16, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"dense_fp64\", DataType::VECTOR_FP64, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"sparse_fp16\", DataType::SPARSE_VECTOR_FP16, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"sparse_fp32\", DataType::SPARSE_VECTOR_FP32, 128, false,\n        std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n    auto doc = test::TestHelper::CreateDoc(1, *schema);\n\n    auto s = doc.validate(schema);\n    ASSERT_TRUE(s.ok());\n  }\n  // doc validate pk\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    std::vector<std::string> valid_names = {\n        // Min length = 1\n        \"a\",\n        \"Z\",\n        \"0\",\n        \"_\",\n        \"-\",\n        \"!\",\n        \"@\",\n        \"#\",\n        \"$\",\n        \"%\",\n        \"+\",\n        \"=\",\n        \".\",\n\n        // Mixed\n        \"a1_\",\n        \"user.name\",\n        \"test@example\",\n        \"log_2025!@#\",\n        \"metric+=value\",\n        \"score%change\",\n\n        \"user.name\",        // '.' allowed\n        \"test@example\",     // '@' allowed\n        \"log_2025!@#\",      // !@# allowed\n        \"metric+=value\",    // + = allowed\n        \"score%change\",     // % allowed\n        \"file-name_v1.2\",   // -, _, . allowed\n        \"a-b_c.d!@#$%+=.\",  // all specials in one\n\n        // Max length = 64\n        std::string(64, 'a'),\n        std::string(63, 'a') + \"_\",\n        \"_\" + std::string(62, 'x') + \".\",\n        \"!\" + std::string(62, '0') + \"@\",\n    };\n    for (auto pk : valid_names) {\n      auto doc = test::TestHelper::CreateDoc(1, *schema, pk);\n      auto s = doc.validate(schema);\n      ASSERT_TRUE(s.ok());\n    }\n  }\n  {\n    auto schema = test::TestHelper::CreateNormalSchema(false);\n    std::vector<std::string> invalid_names = {\n        // Too long (>64)\n        std::string(65, 'a'), std::string(64, 'a') + \"_\",\n\n        // Illegal characters\n        \"a b\",   // space\n        \"a&b\",   // & not in set\n        \"a*b\",   // *\n        \"a(b)\",  // ( )\n        \"a:b\",   // :\n        \"a;b\",   // ;\n        \"a/b\",   // /\n        \"a\\\\b\",  // backslash\n        \"a\\\"b\",  // \"\n        \"a'b\",   // '\n        \"a<b\",\n        \"a>b\",  // < >\n        \"a?b\",  // ?\n        \"a~b\",  // ~\n        \"a`b\",  // `\n        \"a[b\",\n        \"a]b\",  // [ ]\n        \"a{b\",\n        \"a}b\",     // { }\n        \"a|b\",     // |\n        \"a^b\",     // ^\n        \"a,b\",     // ,\n        \"用户\",    // non-ASCII (Chinese)\n        \"αβγ\",     // Greek\n        \"résumé\",  // accented chars (é not in [a-zA-Z])\n    };\n    for (auto pk : invalid_names) {\n      auto doc = test::TestHelper::CreateDoc(1, *schema, pk);\n      auto s = doc.validate(schema);\n      if (s.ok()) std::cout << \"pk:\" << pk << std::endl;\n      ASSERT_FALSE(s.ok());\n    }\n  }\n}\n\nTEST_F(DocDetailedTest, GetValueTypeNameCoverage) {\n  Doc::Value bool_val = true;\n  EXPECT_EQ(get_value_type_name(bool_val, false), \"BOOL\");\n\n  Doc::Value int32_val = int32_t(42);\n  EXPECT_EQ(get_value_type_name(int32_val, false), \"INT32\");\n\n  Doc::Value uint32_val = uint32_t(42);\n  EXPECT_EQ(get_value_type_name(uint32_val, false), \"UINT32\");\n\n  Doc::Value int64_val = int64_t(42);\n  EXPECT_EQ(get_value_type_name(int64_val, false), \"INT64\");\n\n  Doc::Value uint64_val = uint64_t(42);\n  EXPECT_EQ(get_value_type_name(uint64_val, false), \"UINT64\");\n\n  Doc::Value float_val = 3.14f;\n  EXPECT_EQ(get_value_type_name(float_val, false), \"FLOAT\");\n\n  Doc::Value double_val = 3.14;\n  EXPECT_EQ(get_value_type_name(double_val, false), \"DOUBLE\");\n\n  Doc::Value string_val = std::string(\"test\");\n  EXPECT_EQ(get_value_type_name(string_val, false), \"STRING\");\n\n  Doc::Value vector_bool_val = std::vector<bool>{true, false};\n  EXPECT_EQ(get_value_type_name(vector_bool_val, false), \"ARRAY_BOOL\");\n\n  Doc::Value vector_int8_val = std::vector<int8_t>{1, 2, 3};\n  EXPECT_EQ(get_value_type_name(vector_int8_val, true), \"VECTOR_INT8\");\n\n  Doc::Value vector_int16_val = std::vector<int16_t>{10, 20, 30};\n  EXPECT_EQ(get_value_type_name(vector_int16_val, true), \"VECTOR_INT16\");\n\n  Doc::Value vector_int32_val = std::vector<int32_t>{100, 200, 300};\n  EXPECT_EQ(get_value_type_name(vector_int32_val, true), \"VECTOR_INT32\");\n\n  Doc::Value vector_int64_val = std::vector<int64_t>{1000, 2000, 3000};\n  EXPECT_EQ(get_value_type_name(vector_int64_val, true), \"VECTOR_INT64\");\n\n  Doc::Value vector_uint32_val = std::vector<uint32_t>{10, 20, 30};\n  EXPECT_EQ(get_value_type_name(vector_uint32_val, true), \"VECTOR_UINT32\");\n\n  Doc::Value vector_uint64_val = std::vector<uint64_t>{100, 200, 300};\n  EXPECT_EQ(get_value_type_name(vector_uint64_val, true), \"VECTOR_UINT64\");\n\n  Doc::Value vector_float_val = std::vector<float>{1.1f, 2.2f, 3.3f};\n  EXPECT_EQ(get_value_type_name(vector_float_val, true), \"VECTOR_FP32\");\n\n  Doc::Value vector_double_val = std::vector<double>{1.1, 2.2, 3.3};\n  EXPECT_EQ(get_value_type_name(vector_double_val, true), \"VECTOR_FP64\");\n\n  Doc::Value vector_float16_val = std::vector<ailego::Float16>{\n      ailego::Float16(1.1f), ailego::Float16(2.2f), ailego::Float16(3.3f)};\n  EXPECT_EQ(get_value_type_name(vector_float16_val, true), \"VECTOR_FP16\");\n\n  Doc::Value vector_string_val = std::vector<std::string>{\"a\", \"b\", \"c\"};\n  EXPECT_EQ(get_value_type_name(vector_string_val, false), \"ARRAY_STRING\");\n\n  Doc::Value sparse_fp32_val =\n      std::pair<std::vector<uint32_t>, std::vector<float>>(\n          std::vector<uint32_t>{1, 2, 3}, std::vector<float>{1.1f, 2.2f, 3.3f});\n  EXPECT_EQ(get_value_type_name(sparse_fp32_val, true), \"SPARSE_VECTOR_FP32\");\n\n  Doc::Value sparse_fp16_val =\n      std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>(\n          std::vector<uint32_t>{1, 2, 3},\n          std::vector<ailego::Float16>{ailego::Float16(1.1f),\n                                       ailego::Float16(2.2f),\n                                       ailego::Float16(3.3f)});\n  EXPECT_EQ(get_value_type_name(sparse_fp16_val, true), \"SPARSE_VECTOR_FP16\");\n\n  // Test monostate (null) value\n  Doc::Value null_val = std::monostate{};\n  EXPECT_EQ(get_value_type_name(null_val, false), \"EMPTY\");\n}\n\nTEST_F(DocDetailedTest, SerializeValueCoverage) {\n  Doc doc;\n\n  doc.set<bool>(\"bool_field\", true);\n  doc.set<int32_t>(\"int32_field\", 42);\n  doc.set<uint32_t>(\"uint32_field\", 42);\n  doc.set<int64_t>(\"int64_field\", 42);\n  doc.set<uint64_t>(\"uint64_field\", 42);\n  doc.set<float>(\"float_field\", 3.14f);\n  doc.set<double>(\"double_field\", 3.14);\n  doc.set<std::string>(\"string_field\", \"test\");\n\n  std::vector<bool> bool_vec = {true, false};\n  doc.set<std::vector<bool>>(\"vector_bool_field\", bool_vec);\n\n  std::vector<int8_t> int8_vec = {1, 2, 3};\n  doc.set<std::vector<int8_t>>(\"vector_int8_field\", int8_vec);\n\n  std::vector<int16_t> int16_vec = {10, 20, 30};\n  doc.set<std::vector<int16_t>>(\"vector_int16_field\", int16_vec);\n\n  std::vector<int32_t> int32_vec = {100, 200, 300};\n  doc.set<std::vector<int32_t>>(\"vector_int32_field\", int32_vec);\n\n  std::vector<int64_t> int64_vec = {1000, 2000, 3000};\n  doc.set<std::vector<int64_t>>(\"vector_int64_field\", int64_vec);\n\n  std::vector<uint32_t> uint32_vec = {10, 20, 30};\n  doc.set<std::vector<uint32_t>>(\"vector_uint32_field\", uint32_vec);\n\n  std::vector<uint64_t> uint64_vec = {100, 200, 300};\n  doc.set<std::vector<uint64_t>>(\"vector_uint64_field\", uint64_vec);\n\n  std::vector<float> float_vec = {1.1f, 2.2f, 3.3f};\n  doc.set<std::vector<float>>(\"vector_float_field\", float_vec);\n\n  std::vector<double> double_vec = {1.1, 2.2, 3.3};\n  doc.set<std::vector<double>>(\"vector_double_field\", double_vec);\n\n  std::vector<ailego::Float16> float16_vec = {\n      ailego::Float16(1.1f), ailego::Float16(2.2f), ailego::Float16(3.3f)};\n  doc.set<std::vector<ailego::Float16>>(\"vector_float16_field\", float16_vec);\n\n  std::vector<std::string> string_vec = {\"a\", \"b\", \"c\"};\n  doc.set<std::vector<std::string>>(\"vector_string_field\", string_vec);\n\n  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_fp32(\n      std::vector<uint32_t>{1, 2, 3}, std::vector<float>{1.1f, 2.2f, 3.3f});\n  doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n      \"sparse_fp32_field\", sparse_fp32);\n\n  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>> sparse_fp16(\n      std::vector<uint32_t>{1, 2, 3},\n      std::vector<ailego::Float16>{ailego::Float16(1.1f), ailego::Float16(2.2f),\n                                   ailego::Float16(3.3f)});\n  doc.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(\n      \"sparse_fp16_field\", sparse_fp16);\n\n  // Test null value\n  doc.set_null(\"null_field\");\n\n  // for code coverage\n  EXPECT_GT(doc.to_detail_string().size(), doc.to_string().size());\n\n  auto buffer = doc.serialize();\n  EXPECT_FALSE(buffer.empty());\n\n  auto deserialized_doc = Doc::deserialize(buffer.data(), buffer.size());\n  EXPECT_NE(deserialized_doc, nullptr);\n\n  EXPECT_EQ(deserialized_doc->get<bool>(\"bool_field\"), true);\n  EXPECT_EQ(deserialized_doc->get<int32_t>(\"int32_field\"), 42);\n  EXPECT_EQ(deserialized_doc->get<uint32_t>(\"uint32_field\"), 42u);\n  EXPECT_EQ(deserialized_doc->get<int64_t>(\"int64_field\"), 42);\n  EXPECT_EQ(deserialized_doc->get<uint64_t>(\"uint64_field\"), 42u);\n  EXPECT_FLOAT_EQ(deserialized_doc->get<float>(\"float_field\").value(), 3.14f);\n  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>(\"double_field\").value(), 3.14);\n  EXPECT_EQ(deserialized_doc->get<std::string>(\"string_field\"), \"test\");\n\n  // Test null value deserialization\n  EXPECT_TRUE(deserialized_doc->is_null(\"null_field\"));\n  EXPECT_FALSE(deserialized_doc->has_value(\"null_field\"));\n  EXPECT_TRUE(deserialized_doc->has(\"null_field\"));\n}\n\nTEST_F(DocDetailedTest, ToDetailStringCoverage) {\n  Doc doc;\n  doc.set_pk(\"test_pk\");\n  doc.set_doc_id(1);\n  doc.set_score(0.95f);\n\n  doc.set<bool>(\"bool_field\", true);\n  doc.set<int32_t>(\"int32_field\", 42);\n  doc.set<uint32_t>(\"uint32_field\", 42);\n  doc.set<int64_t>(\"int64_field\", 42);\n  doc.set<uint64_t>(\"uint64_field\", 42);\n  doc.set<float>(\"float_field\", 3.14f);\n  doc.set<double>(\"double_field\", 3.14);\n  doc.set<std::string>(\"string_field\", \"test\");\n\n  std::vector<bool> bool_vec = {true, false};\n  doc.set<std::vector<bool>>(\"vector_bool_field\", bool_vec);\n\n  std::vector<int8_t> int8_vec = {1, 2};\n  doc.set<std::vector<int8_t>>(\"vector_int8_field\", int8_vec);\n\n  std::vector<int16_t> int16_vec = {10, 20};\n  doc.set<std::vector<int16_t>>(\"vector_int16_field\", int16_vec);\n\n  std::vector<int32_t> int32_vec = {100, 200};\n  doc.set<std::vector<int32_t>>(\"vector_int32_field\", int32_vec);\n\n  std::vector<int64_t> int64_vec = {1000, 2000};\n  doc.set<std::vector<int64_t>>(\"vector_int64_field\", int64_vec);\n\n  std::vector<uint32_t> uint32_vec = {10, 20};\n  doc.set<std::vector<uint32_t>>(\"vector_uint32_field\", uint32_vec);\n\n  std::vector<uint64_t> uint64_vec = {100, 200};\n  doc.set<std::vector<uint64_t>>(\"vector_uint64_field\", uint64_vec);\n\n  std::vector<float> float_vec = {1.1f, 2.2f};\n  doc.set<std::vector<float>>(\"vector_float_field\", float_vec);\n\n  std::vector<double> double_vec = {1.1, 2.2};\n  doc.set<std::vector<double>>(\"vector_double_field\", double_vec);\n\n  std::vector<ailego::Float16> float16_vec = {ailego::Float16(1.1f),\n                                              ailego::Float16(2.2f)};\n  doc.set<std::vector<ailego::Float16>>(\"vector_float16_field\", float16_vec);\n\n  std::vector<std::string> string_vec = {\"a\", \"b\"};\n  doc.set<std::vector<std::string>>(\"vector_string_field\", string_vec);\n\n  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_fp32(\n      std::vector<uint32_t>{1, 2}, std::vector<float>{1.1f, 2.2f});\n  doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n      \"sparse_fp32_field\", sparse_fp32);\n\n  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>> sparse_fp16(\n      std::vector<uint32_t>{1, 2},\n      std::vector<ailego::Float16>{ailego::Float16(1.1f),\n                                   ailego::Float16(2.2f)});\n  doc.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(\n      \"sparse_fp16_field\", sparse_fp16);\n\n  // Test null value in detail string\n  doc.set_null(\"null_field\");\n\n  std::string detail_str = doc.to_detail_string();\n  EXPECT_FALSE(detail_str.empty());\n  EXPECT_NE(detail_str.find(\"bool_field\"), std::string::npos);\n  EXPECT_NE(detail_str.find(\"int32_field\"), std::string::npos);\n  EXPECT_NE(detail_str.find(\"vector_float_field\"), std::string::npos);\n  EXPECT_NE(detail_str.find(\"null\"),\n            std::string::npos);  // Should contain \"null\" for null field\n}\n\nTEST_F(DocDetailedTest, EqualityOperatorCoverage) {\n  Doc doc1, doc2;\n  doc1.set_pk(\"test_pk\");\n  doc2.set_pk(\"test_pk\");\n\n  doc1.set_doc_id(1);\n  doc2.set_doc_id(1);\n\n  doc1.set<bool>(\"bool_field\", true);\n  doc2.set<bool>(\"bool_field\", true);\n\n  doc1.set<int32_t>(\"int32_field\", 42);\n  doc2.set<int32_t>(\"int32_field\", 42);\n\n  doc1.set<uint32_t>(\"uint32_field\", 42);\n  doc2.set<uint32_t>(\"uint32_field\", 42);\n\n  doc1.set<int64_t>(\"int64_field\", 42);\n  doc2.set<int64_t>(\"int64_field\", 42);\n\n  doc1.set<uint64_t>(\"uint64_field\", 42);\n  doc2.set<uint64_t>(\"uint64_field\", 42);\n\n  doc1.set<float>(\"float_field\", 3.14f);\n  doc2.set<float>(\"float_field\", 3.14f);\n\n  doc1.set<double>(\"double_field\", 3.14);\n  doc2.set<double>(\"double_field\", 3.14);\n\n  doc1.set<std::string>(\"string_field\", \"test\");\n  doc2.set<std::string>(\"string_field\", \"test\");\n\n  std::vector<bool> bool_vec = {true, false};\n  doc1.set<std::vector<bool>>(\"vector_bool_field\", bool_vec);\n  doc2.set<std::vector<bool>>(\"vector_bool_field\", bool_vec);\n\n  std::vector<int8_t> int8_vec = {1, 2};\n  doc1.set<std::vector<int8_t>>(\"vector_int8_field\", int8_vec);\n  doc2.set<std::vector<int8_t>>(\"vector_int8_field\", int8_vec);\n\n  std::vector<int16_t> int16_vec = {10, 20};\n  doc1.set<std::vector<int16_t>>(\"vector_int16_field\", int16_vec);\n  doc2.set<std::vector<int16_t>>(\"vector_int16_field\", int16_vec);\n\n  std::vector<int32_t> int32_vec = {100, 200};\n  doc1.set<std::vector<int32_t>>(\"vector_int32_field\", int32_vec);\n  doc2.set<std::vector<int32_t>>(\"vector_int32_field\", int32_vec);\n\n  std::vector<int64_t> int64_vec = {1000, 2000};\n  doc1.set<std::vector<int64_t>>(\"vector_int64_field\", int64_vec);\n  doc2.set<std::vector<int64_t>>(\"vector_int64_field\", int64_vec);\n\n  std::vector<uint32_t> uint32_vec = {10, 20};\n  doc1.set<std::vector<uint32_t>>(\"vector_uint32_field\", uint32_vec);\n  doc2.set<std::vector<uint32_t>>(\"vector_uint32_field\", uint32_vec);\n\n  std::vector<uint64_t> uint64_vec = {100, 200};\n  doc1.set<std::vector<uint64_t>>(\"vector_uint64_field\", uint64_vec);\n  doc2.set<std::vector<uint64_t>>(\"vector_uint64_field\", uint64_vec);\n\n  std::vector<float> float_vec = {1.1f, 2.2f};\n  doc1.set<std::vector<float>>(\"vector_float_field\", float_vec);\n  doc2.set<std::vector<float>>(\"vector_float_field\", float_vec);\n\n  std::vector<double> double_vec = {1.1, 2.2};\n  doc1.set<std::vector<double>>(\"vector_double_field\", double_vec);\n  doc2.set<std::vector<double>>(\"vector_double_field\", double_vec);\n\n  std::vector<ailego::Float16> float16_vec = {ailego::Float16(1.1f),\n                                              ailego::Float16(2.2f)};\n  doc1.set<std::vector<ailego::Float16>>(\"vector_float16_field\", float16_vec);\n  doc2.set<std::vector<ailego::Float16>>(\"vector_float16_field\", float16_vec);\n\n  std::vector<std::string> string_vec = {\"a\", \"b\"};\n  doc1.set<std::vector<std::string>>(\"vector_string_field\", string_vec);\n  doc2.set<std::vector<std::string>>(\"vector_string_field\", string_vec);\n\n  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_fp32(\n      std::vector<uint32_t>{1, 2}, std::vector<float>{1.1f, 2.2f});\n  doc1.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n      \"sparse_fp32_field\", sparse_fp32);\n  doc2.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n      \"sparse_fp32_field\", sparse_fp32);\n\n  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>> sparse_fp16(\n      std::vector<uint32_t>{1, 2},\n      std::vector<ailego::Float16>{ailego::Float16(1.1f),\n                                   ailego::Float16(2.2f)});\n  doc1.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(\n      \"sparse_fp16_field\", sparse_fp16);\n  doc2.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(\n      \"sparse_fp16_field\", sparse_fp16);\n\n  // Test equality with null values\n  doc1.set_null(\"null_field\");\n  doc2.set_null(\"null_field\");\n\n  EXPECT_TRUE(doc1 == doc2);\n\n  doc2.set<int32_t>(\"int32_field\", 43);\n  EXPECT_FALSE(doc1 == doc2);\n\n  doc1.set_pk(\"test_pk1\");\n  EXPECT_FALSE(doc1 == doc2);\n\n  doc1.set_pk(\"test_pk\");\n  doc1.set<uint32_t>(\"int32_field\", 42);\n  EXPECT_FALSE(doc1 == doc2);\n\n  doc1.set<int32_t>(\"int32_field\", 42);\n  doc1.set<int32_t>(\"rename_int32_field\", 42);\n  EXPECT_FALSE(doc1 == doc2);\n\n  // Test inequality with different null values\n  Doc doc3, doc4;\n  doc3.set_pk(\"test\");\n  doc4.set_pk(\"test\");\n  doc3.set_null(\"null_field\");\n  doc4.set<int32_t>(\"null_field\", 42);\n  EXPECT_FALSE(doc3 == doc4);\n}\n\n\nTEST(VectorQuery, Validate) {\n  // field schema is null when query without vector\n  {\n    VectorQuery query;\n    query.topk_ = 10;\n    query.field_name_ = \"field_name\";\n    auto s = query.validate(nullptr);\n    EXPECT_TRUE(s.ok());\n  }\n\n  // field schema is null when query without vector\n  {\n    VectorQuery query;\n    query.topk_ = 10;\n    query.field_name_ = \"field_name\";\n    std::vector<float> query_vector = {1.0f, 2.0f, 3.0f, 4.0f};\n    std::string query_vector_str =\n        std::string(reinterpret_cast<char *>(query_vector.data()),\n                    query_vector.size() * sizeof(float));\n    query.query_vector_ = query_vector_str;\n    auto s = query.validate(nullptr);\n    EXPECT_FALSE(s.ok());\n    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n  // vector_query exceed topk\n  {\n    VectorQuery query;\n    query.field_name_ = \"field_name\";\n    query.topk_ = 1000;\n    FieldSchema schema =\n        FieldSchema(\"field_name\", DataType::VECTOR_FP32, 128, true);\n    auto s = query.validate(&schema);\n    EXPECT_FALSE(s.ok());\n    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n  // vector_query output_fields size exceed\n  {\n    VectorQuery query;\n    query.field_name_ = \"field_name\";\n    query.topk_ = 10;\n    query.output_fields_ = std::vector<std::string>(1025);\n    FieldSchema schema = FieldSchema(\"field_name\", DataType::INT32);\n    auto s = query.validate(&schema);\n    EXPECT_FALSE(s.ok());\n    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // validate dense vector dimension\n  {\n    VectorQuery query;\n    query.field_name_ = \"field_name\";\n    query.topk_ = 100;\n    std::vector<float> query_vector = {1.0f, 2.0f, 3.0f, 4.0f};\n    std::string query_vector_str =\n        std::string(reinterpret_cast<char *>(query_vector.data()),\n                    query_vector.size() * sizeof(float));\n    query.query_vector_ = query_vector_str;\n    FieldSchema schema =\n        FieldSchema(\"field_name\", DataType::VECTOR_FP32, 4, true);\n    auto s = query.validate(&schema);\n    EXPECT_TRUE(s.ok());\n\n    query.query_vector_ = query_vector_str.substr(0, 3);\n    s = query.validate(&schema);\n    EXPECT_FALSE(s.ok());\n    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  // validate sparse indices\n  {\n    VectorQuery query;\n    query.field_name_ = \"field_name\";\n    query.topk_ = 100;\n    std::vector<uint32_t> query_indices = std::vector<uint32_t>(16385);\n    std::string query_indices_str =\n        std::string(reinterpret_cast<char *>(query_indices.data()),\n                    query_indices.size() * sizeof(uint32_t));\n    query.query_sparse_indices_ = query_indices_str;\n    FieldSchema schema =\n        FieldSchema(\"field_name\", DataType::SPARSE_VECTOR_FP32);\n    auto s = query.validate(&schema);\n    EXPECT_FALSE(s.ok());\n    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n    query.query_sparse_indices_ = query_indices_str.substr(0, 3);\n    s = query.validate(&schema);\n    EXPECT_TRUE(s.ok());\n  }\n}\n\n// Test null value\nTEST_F(DocDetailedTest, NullValue) {\n  Doc doc;\n\n  // Test setting null value\n  doc.set_null(\"null_field\");\n  EXPECT_TRUE(doc.is_null(\"null_field\"));\n  EXPECT_FALSE(doc.has_value(\"null_field\"));\n  EXPECT_TRUE(doc.has(\"null_field\"));\n\n  // Test get_field with null field\n  auto result = doc.get_field<int32_t>(\"null_field\");\n  EXPECT_EQ(result.status(), Doc::FieldGetStatus::IS_NULL);\n  EXPECT_FALSE(result.ok());\n\n  // Test get with null field\n  auto opt_result = doc.get<int32_t>(\"null_field\");\n  EXPECT_FALSE(opt_result.has_value());\n\n  // Test overwriting null with actual value\n  doc.set<int32_t>(\"null_field\", 42);\n  EXPECT_FALSE(doc.is_null(\"null_field\"));\n  EXPECT_TRUE(doc.has_value(\"null_field\"));\n  EXPECT_TRUE(doc.has(\"null_field\"));\n  EXPECT_EQ(doc.get<int32_t>(\"null_field\").value(), 42);\n\n  // Test overwriting value with null\n  doc.set_null(\"null_field\");\n  EXPECT_TRUE(doc.is_null(\"null_field\"));\n  EXPECT_FALSE(doc.has_value(\"null_field\"));\n  EXPECT_TRUE(doc.has(\"null_field\"));\n\n  // Test serialization/deserialization of null values\n  auto buffer = doc.serialize();\n  auto deserialized_doc = Doc::deserialize(buffer.data(), buffer.size());\n  EXPECT_NE(deserialized_doc, nullptr);\n  EXPECT_TRUE(deserialized_doc->is_null(\"null_field\"));\n  EXPECT_FALSE(deserialized_doc->has_value(\"null_field\"));\n  EXPECT_TRUE(deserialized_doc->has(\"null_field\"));\n}\n\n// Test field existence checks\nTEST_F(DocDetailedTest, FieldExistenceChecks) {\n  Doc doc;\n\n  // Test non-existent field\n  EXPECT_FALSE(doc.has(\"nonexistent\"));\n  EXPECT_FALSE(doc.has_value(\"nonexistent\"));\n  EXPECT_FALSE(doc.is_null(\"nonexistent\"));\n\n  // Test get_field with non-existent field\n  auto result = doc.get_field<int32_t>(\"nonexistent\");\n  EXPECT_EQ(result.status(), Doc::FieldGetStatus::NOT_FOUND);\n  EXPECT_FALSE(result.ok());\n\n  // Test get with non-existent field\n  auto opt_result = doc.get<int32_t>(\"nonexistent\");\n  EXPECT_FALSE(opt_result.has_value());\n\n  // Add a field and test again\n  doc.set<int32_t>(\"existent\", 123);\n  EXPECT_TRUE(doc.has(\"existent\"));\n  EXPECT_TRUE(doc.has_value(\"existent\"));\n  EXPECT_FALSE(doc.is_null(\"existent\"));\n\n  // Test type mismatch\n  auto type_mismatch_result = doc.get_field<std::string>(\"existent\");\n  EXPECT_EQ(type_mismatch_result.status(), Doc::FieldGetStatus::TYPE_MISMATCH);\n  EXPECT_FALSE(type_mismatch_result.ok());\n\n  auto type_mismatch_opt = doc.get<std::string>(\"existent\");\n  EXPECT_FALSE(type_mismatch_opt.has_value());\n}"
  },
  {
    "path": "tests/db/index/common/index_params_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/index_params.h\"\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(IndexParamsTest, IndexParamsBaseClass) {\n  // Test that IndexParams is abstract and can't be instantiated directly\n  // This is more of a compile-time check - we can't directly instantiate an\n  // abstract class\n\n  // Test is_vector_index_type method\n  HnswIndexParams hnsw_params(MetricType::L2, 16, 100);\n  EXPECT_TRUE(hnsw_params.is_vector_index_type());\n\n  FlatIndexParams flat_params(MetricType::IP);\n  EXPECT_TRUE(flat_params.is_vector_index_type());\n\n  IVFIndexParams ivf_params(MetricType::COSINE, 100);\n  EXPECT_TRUE(ivf_params.is_vector_index_type());\n\n  InvertIndexParams invert_params(true);\n  EXPECT_FALSE(invert_params.is_vector_index_type());\n}\n\nTEST(IndexParamsTest, InvertIndexParams) {\n  // Test constructor\n  InvertIndexParams params(true);\n  EXPECT_EQ(params.type(), IndexType::INVERT);\n  EXPECT_TRUE(params.enable_range_optimization());\n\n  InvertIndexParams params2(false);\n  EXPECT_FALSE(params2.enable_range_optimization());\n\n  // Test clone method\n  auto cloned = params.clone();\n  EXPECT_NE(cloned.get(), &params);  // Should be different objects\n  EXPECT_EQ(cloned->type(), IndexType::INVERT);\n\n  // Test comparison operators\n  InvertIndexParams params3(true);\n  InvertIndexParams params4(false);\n\n  EXPECT_TRUE(params == params3);\n  EXPECT_FALSE(params == params4);\n  EXPECT_TRUE(params != params4);\n\n  // Test setter\n  params2.set_enable_range_optimization(true);\n  EXPECT_TRUE(params2.enable_range_optimization());\n  EXPECT_TRUE(params2 == params);\n}\n\nTEST(IndexParamsTest, VectorIndexParamsBase) {\n  // Test constructor and basic methods\n  FlatIndexParams flat_params(MetricType::L2, QuantizeType::FP16);\n  EXPECT_EQ(flat_params.type(), IndexType::FLAT);\n  EXPECT_EQ(flat_params.metric_type(), MetricType::L2);\n  EXPECT_EQ(flat_params.quantize_type(), QuantizeType::FP16);\n\n  // Test setters\n  flat_params.set_metric_type(MetricType::IP);\n  EXPECT_EQ(flat_params.metric_type(), MetricType::IP);\n\n  flat_params.set_quantize_type(QuantizeType::INT8);\n  EXPECT_EQ(flat_params.quantize_type(), QuantizeType::INT8);\n}\n\nTEST(IndexParamsTest, HnswIndexParams) {\n  // Test constructor\n  HnswIndexParams params(MetricType::COSINE, 20, 150, QuantizeType::INT4);\n  EXPECT_EQ(params.type(), IndexType::HNSW);\n  EXPECT_EQ(params.metric_type(), MetricType::COSINE);\n  EXPECT_EQ(params.m(), 20);\n  EXPECT_EQ(params.ef_construction(), 150);\n  EXPECT_EQ(params.quantize_type(), QuantizeType::INT4);\n\n  // Test clone\n  auto cloned = params.clone();\n  EXPECT_NE(cloned.get(), &params);\n  EXPECT_EQ(cloned->type(), IndexType::HNSW);\n\n  // Test comparison\n  HnswIndexParams params2(MetricType::COSINE, 20, 150, QuantizeType::INT4);\n  HnswIndexParams params3(MetricType::L2, 20, 150, QuantizeType::INT4);\n  HnswIndexParams params4(MetricType::COSINE, 16, 150, QuantizeType::INT4);\n  HnswIndexParams params5(MetricType::COSINE, 20, 200, QuantizeType::INT4);\n\n  EXPECT_TRUE(params == params2);\n  EXPECT_FALSE(params == params3);\n  EXPECT_FALSE(params == params4);\n  EXPECT_FALSE(params == params5);\n\n  // Test setters\n  params.set_m(10);\n  EXPECT_EQ(params.m(), 10);\n\n  params.set_ef_construction(75);\n  EXPECT_EQ(params.ef_construction(), 75);\n}\n\nTEST(IndexParamsTest, FlatIndexParams) {\n  // Test constructor\n  FlatIndexParams params(MetricType::IP, QuantizeType::FP16);\n  EXPECT_EQ(params.type(), IndexType::FLAT);\n  EXPECT_EQ(params.metric_type(), MetricType::IP);\n  EXPECT_EQ(params.quantize_type(), QuantizeType::FP16);\n\n  // Test clone\n  auto cloned = params.clone();\n  EXPECT_NE(cloned.get(), &params);\n  EXPECT_EQ(cloned->type(), IndexType::FLAT);\n\n  // Test comparison\n  FlatIndexParams params2(MetricType::IP, QuantizeType::FP16);\n  FlatIndexParams params3(MetricType::L2, QuantizeType::FP16);\n  FlatIndexParams params4(MetricType::IP, QuantizeType::INT8);\n\n  EXPECT_TRUE(params == params2);\n  EXPECT_FALSE(params == params3);\n  EXPECT_FALSE(params == params4);\n}\n\nTEST(IndexParamsTest, IVFIndexParams) {\n  // Test constructor\n  IVFIndexParams params(MetricType::L2, 128, 10, false, QuantizeType::INT8);\n  EXPECT_EQ(params.type(), IndexType::IVF);\n  EXPECT_EQ(params.metric_type(), MetricType::L2);\n  EXPECT_EQ(params.n_list(), 128);\n  EXPECT_EQ(params.quantize_type(), QuantizeType::INT8);\n\n  // Test clone\n  auto cloned = params.clone();\n  EXPECT_NE(cloned.get(), &params);\n  EXPECT_EQ(cloned->type(), IndexType::IVF);\n\n  // Test comparison\n  IVFIndexParams params2(MetricType::L2, 128, 10, false, QuantizeType::INT8);\n  IVFIndexParams params3(MetricType::IP, 128, 10, false, QuantizeType::INT8);\n  IVFIndexParams params4(MetricType::L2, 256, 10, false, QuantizeType::INT8);\n  IVFIndexParams params5(MetricType::L2, 128, 10, false, QuantizeType::FP16);\n\n  EXPECT_TRUE(params == params2);\n  EXPECT_FALSE(params == params3);\n  EXPECT_FALSE(params == params4);\n  EXPECT_FALSE(params == params5);\n\n  // Test setter\n  params.set_n_list(64);\n  EXPECT_EQ(params.n_list(), 64);\n}\n\nTEST(IndexParamsTest, DefaultVectorIndexParams) {\n  // Test default vector index params\n  EXPECT_EQ(DefaultVectorIndexParams.type(), IndexType::FLAT);\n  EXPECT_EQ(DefaultVectorIndexParams.metric_type(), MetricType::IP);\n  EXPECT_EQ(DefaultVectorIndexParams.quantize_type(), QuantizeType::UNDEFINED);\n}\n\nTEST(IndexParamsTest, DynamicPointerCast) {\n  // Test dynamic_pointer_cast functionality with IndexParams\n  IndexParams::Ptr base_ptr =\n      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n  auto hnsw_ptr = std::dynamic_pointer_cast<HnswIndexParams>(base_ptr);\n  EXPECT_NE(hnsw_ptr, nullptr);\n  EXPECT_EQ(hnsw_ptr->type(), IndexType::HNSW);\n\n  // Test casting to wrong type\n  auto flat_ptr = std::dynamic_pointer_cast<FlatIndexParams>(base_ptr);\n  EXPECT_EQ(flat_ptr, nullptr);\n\n  // Test casting from base class reference\n  IndexParams &base_ref = *base_ptr;\n  auto &hnsw_ref = dynamic_cast<HnswIndexParams &>(base_ref);\n  EXPECT_EQ(hnsw_ref.type(), IndexType::HNSW);\n}"
  },
  {
    "path": "tests/db/index/common/meta_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/index/common/meta.h\"\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(SegmentMetaTest, DefaultConstruction) {\n  SegmentMeta segment_meta;\n  EXPECT_EQ(segment_meta.id(), 0u);\n  EXPECT_TRUE(segment_meta.persisted_blocks().empty());\n  EXPECT_FALSE(segment_meta.has_writing_forward_block());\n  EXPECT_EQ(segment_meta.min_doc_id(), 0u);\n  EXPECT_EQ(segment_meta.max_doc_id(), 0u);\n  EXPECT_EQ(segment_meta.doc_count(), 0u);\n  EXPECT_TRUE(segment_meta.indexed_vector_fields().empty());\n}\n\nTEST(SegmentMetaTest, ConstructionWithID) {\n  SegmentMeta segment_meta(42);\n  EXPECT_EQ(segment_meta.id(), 42u);\n  EXPECT_TRUE(segment_meta.persisted_blocks().empty());\n  EXPECT_FALSE(segment_meta.has_writing_forward_block());\n  EXPECT_EQ(segment_meta.min_doc_id(), 0u);\n  EXPECT_EQ(segment_meta.max_doc_id(), 0u);\n  EXPECT_EQ(segment_meta.doc_count(), 0u);\n  EXPECT_TRUE(segment_meta.indexed_vector_fields().empty());\n}\n\nTEST(SegmentMetaTest, PersistedBlocksOperations) {\n  SegmentMeta segment_meta(1);\n\n  // Add persisted blocks\n  BlockMeta block1(1, BlockType::SCALAR, 0, 100);\n  block1.set_doc_count(50);\n  block1.add_column(\"col1\");\n  block1.add_column(\"col2\");\n\n  BlockMeta block2(2, BlockType::VECTOR_INDEX, 101, 200);\n  block2.set_doc_count(75);\n  block2.add_column(\"vec_col\");\n\n  segment_meta.add_persisted_block(block1);\n  segment_meta.add_persisted_block(block2);\n\n  EXPECT_EQ(segment_meta.persisted_blocks().size(), 2u);\n\n  const auto &blocks = segment_meta.persisted_blocks();\n  EXPECT_EQ(blocks[0].id(), 1u);\n  EXPECT_EQ(blocks[0].type(), BlockType::SCALAR);\n  EXPECT_EQ(blocks[0].min_doc_id(), 0u);\n  EXPECT_EQ(blocks[0].max_doc_id(), 100u);\n  EXPECT_EQ(blocks[0].doc_count(), 50u);\n  EXPECT_EQ(blocks[0].columns().size(), 2u);\n\n  EXPECT_EQ(blocks[1].id(), 2u);\n  EXPECT_EQ(blocks[1].type(), BlockType::VECTOR_INDEX);\n  EXPECT_EQ(blocks[1].min_doc_id(), 101u);\n  EXPECT_EQ(blocks[1].max_doc_id(), 200u);\n  EXPECT_EQ(blocks[1].doc_count(), 75u);\n  EXPECT_EQ(blocks[1].columns().size(), 1u);\n}\n\nTEST(SegmentMetaTest, WritingForwardBlockOperations) {\n  SegmentMeta segment_meta(1);\n\n  // Initially no writing forward block\n  EXPECT_FALSE(segment_meta.has_writing_forward_block());\n\n  // Set writing forward block\n  BlockMeta writing_block(3, BlockType::SCALAR, 201, 300);\n  writing_block.set_doc_count(25);\n  writing_block.add_column(\"col3\");\n\n  segment_meta.set_writing_forward_block(writing_block);\n\n  // Now should have writing forward block\n  EXPECT_TRUE(segment_meta.has_writing_forward_block());\n\n  const auto &wfb = segment_meta.writing_forward_block();\n  EXPECT_EQ(wfb.value().id(), 3u);\n  EXPECT_EQ(wfb.value().type(), BlockType::SCALAR);\n  EXPECT_EQ(wfb.value().min_doc_id(), 201u);\n  EXPECT_EQ(wfb.value().max_doc_id(), 300u);\n  EXPECT_EQ(wfb.value().doc_count(), 25u);\n  EXPECT_EQ(wfb.value().columns().size(), 1u);\n  EXPECT_EQ(wfb.value().columns()[0], \"col3\");\n}\n\nTEST(SegmentMetaTest, MinDocIDCalculation) {\n  SegmentMeta segment_meta(1);\n\n  // Case 1: No persisted blocks, no writing forward block\n  EXPECT_EQ(segment_meta.min_doc_id(), 0u);\n\n  // Case 2: No persisted blocks, but has writing forward block\n  BlockMeta writing_block(1, BlockType::SCALAR, 100, 200);\n  segment_meta.set_writing_forward_block(writing_block);\n  EXPECT_EQ(segment_meta.min_doc_id(), 100u);\n\n  // Case 3: Has persisted blocks (should take precedence)\n  BlockMeta persisted_block(1, BlockType::SCALAR, 50, 150);\n  segment_meta.add_persisted_block(persisted_block);\n  EXPECT_EQ(segment_meta.min_doc_id(), 50u);\n}\n\nTEST(SegmentMetaTest, MaxDocIDCalculation) {\n  SegmentMeta segment_meta(1);\n\n  // Case 1: No blocks at all\n  EXPECT_EQ(segment_meta.max_doc_id(), 0u);\n\n  // Case 2: Only persisted blocks\n  BlockMeta persisted_block(1, BlockType::SCALAR, 0, 100);\n  segment_meta.add_persisted_block(persisted_block);\n  EXPECT_EQ(segment_meta.max_doc_id(), 100u);\n\n  // Case 3: Both persisted and writing forward blocks (writing forward takes\n  // precedence)\n  BlockMeta writing_block(2, BlockType::SCALAR, 101, 200);\n  segment_meta.set_writing_forward_block(writing_block);\n  EXPECT_EQ(segment_meta.max_doc_id(), 100u);\n\n  // Case 4: Only writing forward block\n  SegmentMeta segment_meta2(2);\n  segment_meta2.set_writing_forward_block(writing_block);\n  EXPECT_EQ(segment_meta2.max_doc_id(), 0u);\n}\n\nTEST(SegmentMetaTest, DocCountCalculation) {\n  SegmentMeta segment_meta(1);\n\n  // Initially 0\n  EXPECT_EQ(segment_meta.doc_count(), 0u);\n\n  // Add persisted blocks\n  BlockMeta block1(1, BlockType::SCALAR, 0, 100);\n  block1.set_doc_count(50);\n  segment_meta.add_persisted_block(block1);\n\n  EXPECT_EQ(segment_meta.doc_count(), 50u);\n\n  // Add another persisted block\n  BlockMeta block2(2, BlockType::VECTOR_INDEX, 101, 200);\n  block2.set_doc_count(75);\n  segment_meta.add_persisted_block(block2);\n\n  EXPECT_EQ(segment_meta.doc_count(), 50u);\n\n  // Add writing forward block\n  BlockMeta writing_block(3, BlockType::SCALAR, 201, 300);\n  writing_block.set_doc_count(25);\n  segment_meta.set_writing_forward_block(writing_block);\n\n  EXPECT_EQ(segment_meta.doc_count(), 75);\n}\n\nTEST(SegmentMetaTest, IndexedVectorFieldsOperations) {\n  SegmentMeta segment_meta(1);\n\n  // Initially empty\n  EXPECT_FALSE(segment_meta.vector_indexed(\"field1\"));\n  EXPECT_TRUE(segment_meta.indexed_vector_fields().empty());\n\n  // Add indexed fields\n  segment_meta.add_indexed_vector_field(\"field1\");\n  segment_meta.add_indexed_vector_field(\"field2\");\n\n  EXPECT_TRUE(segment_meta.vector_indexed(\"field1\"));\n  EXPECT_TRUE(segment_meta.vector_indexed(\"field2\"));\n  EXPECT_FALSE(segment_meta.vector_indexed(\"field3\"));\n\n  EXPECT_EQ(segment_meta.indexed_vector_fields().size(), 2u);\n\n  // Check set operation\n  std::set<std::string> fields = {\"field3\", \"field4\"};\n  segment_meta.set_indexed_vector_fields(fields);\n\n  EXPECT_FALSE(segment_meta.vector_indexed(\"field1\"));\n  EXPECT_FALSE(segment_meta.vector_indexed(\"field2\"));\n  EXPECT_TRUE(segment_meta.vector_indexed(\"field3\"));\n  EXPECT_TRUE(segment_meta.vector_indexed(\"field4\"));\n  EXPECT_EQ(segment_meta.indexed_vector_fields().size(), 2u);\n}\n\nTEST(SegmentMetaTest, UpdateMaxDocId) {\n  SegmentMeta segment_meta(1);\n\n  // Try to update when no writing forward block - should not crash\n  segment_meta.update_max_doc_id(100);\n\n  // Set writing forward block and update\n  BlockMeta writing_block(1, BlockType::SCALAR, 0, 50);\n  segment_meta.set_writing_forward_block(writing_block);\n  EXPECT_EQ(segment_meta.writing_forward_block().value().max_doc_id(), 50u);\n\n  segment_meta.update_max_doc_id(100);\n  EXPECT_EQ(segment_meta.writing_forward_block().value().max_doc_id(), 100u);\n}\n\nTEST(SegmentMetaTest, EqualityOperators) {\n  SegmentMeta segment1(1);\n  SegmentMeta segment2(1);\n  SegmentMeta segment3(2);\n\n  // Same empty segments\n  EXPECT_TRUE(segment1 == segment2);\n  EXPECT_FALSE(segment1 != segment2);\n\n  // Different IDs\n  EXPECT_FALSE(segment1 == segment3);\n  EXPECT_TRUE(segment1 != segment3);\n\n  // Add same persisted block to both\n  BlockMeta block(1, BlockType::SCALAR, 0, 100);\n  block.set_doc_count(50);\n  segment1.add_persisted_block(block);\n  segment2.add_persisted_block(block);\n\n  EXPECT_TRUE(segment1 == segment2);\n\n  // Add writing forward block\n  BlockMeta wfb(2, BlockType::VECTOR_INDEX, 101, 200);\n  segment1.set_writing_forward_block(wfb);\n  segment2.set_writing_forward_block(wfb);\n\n  EXPECT_TRUE(segment1 == segment2);\n\n  // Add indexed fields\n  segment1.add_indexed_vector_field(\"vec_field\");\n  segment2.add_indexed_vector_field(\"vec_field\");\n\n  EXPECT_TRUE(segment1 == segment2);\n\n  // Make them different again\n  segment1.add_indexed_vector_field(\"vec_field2\");\n\n  EXPECT_FALSE(segment1 == segment2);\n  EXPECT_TRUE(segment1 != segment2);\n}"
  },
  {
    "path": "tests/db/index/common/query_params_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/query_params.h\"\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nTEST(QueryParamsTest, QueryParamsBaseClass) {\n  // Test constructor\n  QueryParams params(IndexType::HNSW);\n  EXPECT_EQ(params.type(), IndexType::HNSW);\n\n  // Test setter\n  params.set_type(IndexType::IVF);\n  EXPECT_EQ(params.type(), IndexType::IVF);\n}\n\nTEST(QueryParamsTest, HnswQueryParams) {\n  // Test constructor\n  HnswQueryParams params(100);\n  EXPECT_EQ(params.type(), IndexType::HNSW);\n  EXPECT_EQ(params.ef(), 100);\n\n  // Test setter\n  params.set_ef(200);\n  EXPECT_EQ(params.ef(), 200);\n}\n\nTEST(QueryParamsTest, IVFQueryParams) {\n  // Test constructor\n  IVFQueryParams params(50);\n  EXPECT_EQ(params.type(), IndexType::IVF);\n  EXPECT_EQ(params.nprobe(), 50);\n\n  // Test setter\n  params.set_nprobe(75);\n  EXPECT_EQ(params.nprobe(), 75);\n}\n\nTEST(QueryParamsTest, Polymorphism) {\n  // Test polymorphic behavior\n  QueryParams::Ptr hnsw_ptr = std::make_shared<HnswQueryParams>(100);\n  QueryParams::Ptr ivf_ptr = std::make_shared<IVFQueryParams>(50);\n\n  // Verify types\n  EXPECT_EQ(hnsw_ptr->type(), IndexType::HNSW);\n  EXPECT_EQ(ivf_ptr->type(), IndexType::IVF);\n\n  // Test dynamic casting\n  auto hnsw_cast = std::dynamic_pointer_cast<HnswQueryParams>(hnsw_ptr);\n  auto ivf_cast = std::dynamic_pointer_cast<IVFQueryParams>(ivf_ptr);\n  auto invalid_cast = std::dynamic_pointer_cast<HnswQueryParams>(ivf_ptr);\n\n  EXPECT_NE(hnsw_cast, nullptr);\n  EXPECT_NE(ivf_cast, nullptr);\n  EXPECT_EQ(invalid_cast, nullptr);\n\n  // Verify values after casting\n  EXPECT_EQ(hnsw_cast->ef(), 100);\n  EXPECT_EQ(ivf_cast->nprobe(), 50);\n}\n\nTEST(QueryParamsTest, VirtualDestructor) {\n  // Test that virtual destructor allows proper deletion\n  QueryParams *hnsw_ptr = new HnswQueryParams(100);\n  QueryParams *ivf_ptr = new IVFQueryParams(50);\n\n  // This should not cause memory issues\n  delete hnsw_ptr;\n  delete ivf_ptr;\n}"
  },
  {
    "path": "tests/db/index/common/schema_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/schema.h\"\n#include <gtest/gtest.h>\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/status.h\"\n\nusing namespace zvec;\n\nTEST(FieldSchemaTest, DefaultConstructor) {\n  FieldSchema field;\n  EXPECT_EQ(field.name(), \"\");\n  EXPECT_EQ(field.data_type(), DataType::UNDEFINED);\n  EXPECT_FALSE(field.nullable());\n  EXPECT_EQ(field.dimension(), 0u);\n  EXPECT_EQ(field.index_params(), nullptr);\n}\n\nTEST(FieldSchemaTest, ConstructorWithParameters) {\n  auto index_params =\n      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n  FieldSchema field(\"test_field\", DataType::VECTOR_FP32, 128, true,\n                    index_params);\n\n  EXPECT_EQ(field.name(), \"test_field\");\n  EXPECT_EQ(field.data_type(), DataType::VECTOR_FP32);\n  EXPECT_TRUE(field.nullable());\n  EXPECT_EQ(field.dimension(), 128u);\n  EXPECT_NE(field.index_params(), nullptr);\n  EXPECT_EQ(field.index_params()->type(), IndexType::HNSW);\n}\n\nTEST(FieldSchemaTest, SettersAndGetters) {\n  FieldSchema field;\n\n  field.set_name(\"new_field\");\n  EXPECT_EQ(field.name(), \"new_field\");\n\n  field.set_data_type(DataType::STRING);\n  EXPECT_EQ(field.data_type(), DataType::STRING);\n\n  field.set_nullable(true);\n  EXPECT_TRUE(field.nullable());\n\n  field.set_dimension(256);\n  EXPECT_EQ(field.dimension(), 256u);\n}\n\nTEST(FieldSchemaTest, ElementDataType) {\n  FieldSchema array_field;\n  array_field.set_data_type(DataType::ARRAY_BINARY);\n  EXPECT_EQ(array_field.element_data_type(), DataType::BINARY);\n\n  array_field.set_data_type(DataType::ARRAY_STRING);\n  EXPECT_EQ(array_field.element_data_type(), DataType::STRING);\n\n  array_field.set_data_type(DataType::ARRAY_BOOL);\n  EXPECT_EQ(array_field.element_data_type(), DataType::BOOL);\n\n  array_field.set_data_type(DataType::ARRAY_INT32);\n  EXPECT_EQ(array_field.element_data_type(), DataType::INT32);\n\n  array_field.set_data_type(DataType::ARRAY_INT64);\n  EXPECT_EQ(array_field.element_data_type(), DataType::INT64);\n\n  array_field.set_data_type(DataType::ARRAY_UINT32);\n  EXPECT_EQ(array_field.element_data_type(), DataType::UINT32);\n\n  array_field.set_data_type(DataType::ARRAY_UINT64);\n  EXPECT_EQ(array_field.element_data_type(), DataType::UINT64);\n\n  array_field.set_data_type(DataType::ARRAY_FLOAT);\n  EXPECT_EQ(array_field.element_data_type(), DataType::FLOAT);\n\n  array_field.set_data_type(DataType::ARRAY_DOUBLE);\n  EXPECT_EQ(array_field.element_data_type(), DataType::DOUBLE);\n\n  // Non-array types should return the same type\n  FieldSchema non_array_field;\n  non_array_field.set_data_type(DataType::STRING);\n  EXPECT_EQ(non_array_field.element_data_type(), DataType::STRING);\n}\n\nTEST(FieldSchemaTest, VectorFieldDetection) {\n  FieldSchema field;\n\n  // Test dense vector field detection\n  field.set_data_type(DataType::VECTOR_BINARY32);\n  EXPECT_TRUE(field.is_vector_field());\n  EXPECT_TRUE(field.is_dense_vector());\n  EXPECT_FALSE(field.is_sparse_vector());\n\n  field.set_data_type(DataType::VECTOR_FP32);\n  EXPECT_TRUE(field.is_vector_field());\n  EXPECT_TRUE(field.is_dense_vector());\n  EXPECT_FALSE(field.is_sparse_vector());\n\n  field.set_data_type(DataType::VECTOR_INT16);\n  EXPECT_TRUE(field.is_vector_field());\n  EXPECT_TRUE(field.is_dense_vector());\n  EXPECT_FALSE(field.is_sparse_vector());\n\n  // Test sparse vector field detection\n  field.set_data_type(DataType::SPARSE_VECTOR_FP32);\n  EXPECT_TRUE(field.is_vector_field());\n  EXPECT_FALSE(field.is_dense_vector());\n  EXPECT_TRUE(field.is_sparse_vector());\n\n  // Test non-vector field\n  field.set_data_type(DataType::STRING);\n  EXPECT_FALSE(field.is_vector_field());\n  EXPECT_FALSE(field.is_dense_vector());\n  EXPECT_FALSE(field.is_sparse_vector());\n\n  // Test static methods\n  EXPECT_TRUE(FieldSchema::is_dense_vector_field(DataType::VECTOR_FP32));\n  EXPECT_FALSE(FieldSchema::is_dense_vector_field(DataType::STRING));\n\n  EXPECT_TRUE(\n      FieldSchema::is_sparse_vector_field(DataType::SPARSE_VECTOR_FP32));\n  EXPECT_FALSE(FieldSchema::is_sparse_vector_field(DataType::VECTOR_FP32));\n\n  EXPECT_TRUE(FieldSchema::is_vector_field(DataType::VECTOR_FP32));\n  EXPECT_TRUE(FieldSchema::is_vector_field(DataType::SPARSE_VECTOR_FP32));\n  EXPECT_FALSE(FieldSchema::is_vector_field(DataType::STRING));\n}\n\nTEST(FieldSchemaTest, ArrayTypeDetection) {\n  FieldSchema field;\n\n  field.set_data_type(DataType::ARRAY_BINARY);\n  EXPECT_TRUE(field.is_array_type());\n\n  field.set_data_type(DataType::ARRAY_STRING);\n  EXPECT_TRUE(field.is_array_type());\n\n  field.set_data_type(DataType::ARRAY_DOUBLE);\n  EXPECT_TRUE(field.is_array_type());\n\n  field.set_data_type(DataType::STRING);\n  EXPECT_FALSE(field.is_array_type());\n\n  field.set_data_type(DataType::VECTOR_FP32);\n  EXPECT_FALSE(field.is_array_type());\n}\n\nTEST(FieldSchemaTest, IndexTypeAndParams) {\n  FieldSchema field;\n  EXPECT_EQ(field.index_type(), IndexType::UNDEFINED);\n  EXPECT_EQ(field.index_params(), nullptr);\n\n  auto hnsw_params = std::make_shared<HnswIndexParams>(MetricType::IP, 32, 200);\n  field.set_index_params(hnsw_params);\n  EXPECT_EQ(field.index_type(), IndexType::HNSW);\n  EXPECT_NE(field.index_params(), nullptr);\n\n  // Test setting with nullptr\n  field.set_index_params(nullptr);\n  EXPECT_EQ(field.index_type(), IndexType::UNDEFINED);\n  EXPECT_EQ(field.index_params(), nullptr);\n}\n\nTEST(FieldSchemaTest, CopyConstructorAndAssignment) {\n  auto index_params = std::make_shared<FlatIndexParams>(MetricType::L2);\n  FieldSchema original(\"original\", DataType::STRING, 100, true, index_params);\n\n  // Test copy constructor\n  FieldSchema copy(original);\n  EXPECT_EQ(copy.name(), \"original\");\n  EXPECT_EQ(copy.data_type(), DataType::STRING);\n  EXPECT_TRUE(copy.nullable());\n  EXPECT_EQ(copy.dimension(), 100u);\n  EXPECT_NE(copy.index_params(), nullptr);\n  EXPECT_EQ(copy.index_params()->type(), IndexType::FLAT);\n\n  // Test copy assignment\n  FieldSchema assigned;\n  assigned = original;\n  EXPECT_EQ(assigned.name(), \"original\");\n  EXPECT_EQ(assigned.data_type(), DataType::STRING);\n  EXPECT_TRUE(assigned.nullable());\n  EXPECT_EQ(assigned.dimension(), 100u);\n  EXPECT_NE(assigned.index_params(), nullptr);\n  EXPECT_EQ(assigned.index_params()->type(), IndexType::FLAT);\n\n  // Verify deep copy - modifying original shouldn't affect copy\n  original.set_name(\"modified\");\n  EXPECT_EQ(copy.name(), \"original\");      // Copy should be unchanged\n  EXPECT_EQ(assigned.name(), \"original\");  // Assigned should be unchanged\n}\n\nTEST(FieldSchemaTest, MoveConstructorAndAssignment) {\n  auto index_params = std::make_shared<IVFIndexParams>(MetricType::COSINE, 128);\n  FieldSchema original(\"move_test\", DataType::VECTOR_FP32, 256, false,\n                       index_params);\n\n  // Test move constructor\n  FieldSchema moved(std::move(original));\n  EXPECT_EQ(moved.name(), \"move_test\");\n  EXPECT_EQ(moved.data_type(), DataType::VECTOR_FP32);\n  EXPECT_FALSE(moved.nullable());\n  EXPECT_EQ(moved.dimension(), 256u);\n  EXPECT_NE(moved.index_params(), nullptr);\n  EXPECT_EQ(moved.index_params()->type(), IndexType::IVF);\n\n  // After move, original should be in valid but unspecified state\n  // Note: In practice, the name would likely be moved, but we don't test that\n  // as it's implementation-dependent\n}\n\nTEST(FieldSchemaTest, ComparisonOperators) {\n  auto index_params1 =\n      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n  auto index_params2 =\n      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n  auto index_params3 = std::make_shared<FlatIndexParams>(MetricType::IP);\n\n  FieldSchema field1(\"field\", DataType::STRING, 100, false, index_params1);\n  FieldSchema field2(\"field\", DataType::STRING, 100, false, index_params2);\n  FieldSchema field3(\"field\", DataType::STRING, 100, false, index_params3);\n  FieldSchema field4(\"field\", DataType::STRING, 100, true, index_params1);\n  FieldSchema field5(\"different\", DataType::STRING, 100, false, index_params1);\n\n  // Equal fields\n  EXPECT_TRUE(field1 == field2);\n  EXPECT_FALSE(field1 != field2);\n\n  // Different index params\n  EXPECT_FALSE(field1 == field3);\n  EXPECT_TRUE(field1 != field3);\n\n  // Different nullable\n  EXPECT_FALSE(field1 == field4);\n  EXPECT_TRUE(field1 != field4);\n\n  // Different name\n  EXPECT_FALSE(field1 == field5);\n  EXPECT_TRUE(field1 != field5);\n}\n\nTEST(FieldSchemaTest, Validate) {\n  {\n    FieldSchema field(\"\", DataType::UNDEFINED);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    FieldSchema field(\"\", DataType::STRING);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 0,\n                      false);  // Zero dimension\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    FieldSchema field(\"dense_vector\", DataType::VECTOR_FP32, 20001,\n                      false);  // Zero dimension\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    auto ivf_params = std::make_shared<IVFIndexParams>(MetricType::IP, 128);\n    FieldSchema field(\"sparse_field\", DataType::SPARSE_VECTOR_FP32, 0, false,\n                      ivf_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    auto hnsw_params =\n        std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n    FieldSchema field(\"sparse_field\", DataType::SPARSE_VECTOR_FP32, 0, false,\n                      hnsw_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    auto invalid_params = std::make_shared<InvertIndexParams>(false);\n    FieldSchema field(\"dense_field\", DataType::VECTOR_FP32, 128, false,\n                      invalid_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    auto hnsw_params =\n        std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n    FieldSchema field(\"scalar_field\", DataType::STRING, 0, false, hnsw_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n  }\n\n  {\n    auto hnsw_params =\n        std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 128, false,\n                      hnsw_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok());\n  }\n\n  {\n    auto flat_params = std::make_shared<FlatIndexParams>(MetricType::IP);\n    FieldSchema field(\"sparse_field\", DataType::SPARSE_VECTOR_FP32, 0, false,\n                      flat_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok());\n  }\n\n  {\n    auto invert_params = std::make_shared<InvertIndexParams>(false);\n    FieldSchema field(\"scalar_field\", DataType::STRING, 0, false,\n                      invert_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok());\n  }\n\n  {\n    FieldSchema field(\"simple_field\", DataType::STRING);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok());  // Scalar fields without index params are valid\n\n    FieldSchema vector_field(\"vector_field\", DataType::VECTOR_FP32, 128, false);\n    status = vector_field.validate();\n    EXPECT_TRUE(\n        status.ok());  // Vector fields without index params are also valid\n  }\n\n  {\n    // Test that VECTOR_FP32 with FP16 quantize type is valid\n    auto hnsw_params = std::make_shared<HnswIndexParams>(\n        MetricType::L2, 16, 100, QuantizeType::FP16);\n    FieldSchema field(\"fp32_vector\", DataType::VECTOR_FP32, 128, false,\n                      hnsw_params);\n    auto status = field.validate();\n    if (!status.ok()) {\n      std::cout << \"status: \" << status.message() << std::endl;\n    }\n    EXPECT_TRUE(status.ok());\n  }\n\n  {\n    // Test that VECTOR_FP32 with UNDEFINED quantize type is valid\n    auto hnsw_params = std::make_shared<HnswIndexParams>(\n        MetricType::L2, 16, 100, QuantizeType::UNDEFINED);\n    FieldSchema field(\"fp32_vector_no_quantize\", DataType::VECTOR_FP32, 128,\n                      false, hnsw_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok());\n  }\n\n  {\n    // Test that SPARSE_VECTOR_FP32 with FP16 quantize type should fail\n    auto hnsw_params = std::make_shared<HnswIndexParams>(\n        MetricType::IP, 16, 100, QuantizeType::FP16);\n    FieldSchema field(\"sparse_fp32_vector\", DataType::SPARSE_VECTOR_FP32, 0,\n                      false, hnsw_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok());\n  }\n\n  {\n    // Test that VECTOR_FP64 with FP16 quantize type is valid\n    auto hnsw_params = std::make_shared<HnswIndexParams>(\n        MetricType::L2, 16, 100, QuantizeType::FP16);\n    FieldSchema field(\"fp64_vector\", DataType::VECTOR_FP64, 128, false,\n                      hnsw_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok());\n  }\n\n  {\n    // already support int8/int4 quantizer\n    // Test that VECTOR_FP32 with INT8 quantize type should succeed\n    auto hnsw_params = std::make_shared<HnswIndexParams>(\n        MetricType::L2, 16, 100, QuantizeType::INT8);\n    FieldSchema field(\"fp32_vector_int8_quantize\", DataType::VECTOR_FP32, 128,\n                      false, hnsw_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok());\n\n    auto flat_params =\n        std::make_shared<FlatIndexParams>(MetricType::L2, QuantizeType::INT4);\n    FieldSchema flat_field(\"fp32_vector_int4_quantize\", DataType::VECTOR_FP32,\n                           128, false, flat_params);\n    EXPECT_TRUE(field.validate().ok());\n  }\n\n  {\n    std::vector<std::string> valid_names = {\n        \"a\",  // min len = 1\n        \"A\",\n        \"0\",\n        \"_\",\n        \"-\",  // single allowed char\n        \"abc\",\n        \"ABC\",\n        \"a1_\",\n        \"user_name\",\n        \"test-123\",\n        \"aBc123_-\",\n        std::string(32, 'a'),  // max len = 32\n        \"a_b-c1\",\n        \"__test__\",\n        \"123_test\"};\n    for (auto name : valid_names) {\n      FieldSchema field(name, DataType::STRING);\n      auto status = field.validate();\n      if (!status.ok()) {\n        std::cout << \"status: \" << status.message() << std::endl;\n      }\n      EXPECT_TRUE(status.ok());\n    }\n  }\n\n  {\n    std::vector<std::string> invalid_names = {\n        \"\",                    // empty — len < 1\n        std::string(33, 'a'),  // len > 32\n        \"a b\",                 // space\n        \"a.b\",\n        \"a@b\",\n        \"a#b\",  // illegal chars: . @ #\n        \"a$b\",\n        \"a%\",\n        \"a&\",  // $ % & etc.\n        \"中文\",\n        \"用户\",  // non-ASCII\n        \"a..b\",\n        \"a__b?\",  // ? not allowed\n    };\n    for (auto name : invalid_names) {\n      FieldSchema field(name, DataType::STRING);\n      auto status = field.validate();\n      EXPECT_FALSE(status.ok());\n      EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);\n    }\n  }\n}\n\nTEST(CollectionSchemaTest, DefaultConstructor) {\n  CollectionSchema schema;\n  EXPECT_EQ(schema.name(), \"\");\n  EXPECT_EQ(schema.fields().size(), 0);\n  EXPECT_EQ(schema.max_doc_count_per_segment(), MAX_DOC_COUNT_PER_SEGMENT);\n}\n\nTEST(CollectionSchemaTest, ConstructorWithParameters) {\n  FieldSchemaPtrList fields;\n  auto field1 = std::make_shared<FieldSchema>(\"field1\", DataType::STRING);\n  auto field2 = std::make_shared<FieldSchema>(\"field2\", DataType::VECTOR_FP32);\n  fields.push_back(field1);\n  fields.push_back(field2);\n\n  CollectionSchema schema(\"test_collection\", fields);\n  EXPECT_EQ(schema.name(), \"test_collection\");\n  EXPECT_EQ(schema.fields().size(), 2);\n  EXPECT_TRUE(schema.has_field(\"field1\"));\n  EXPECT_TRUE(schema.has_field(\"field2\"));\n}\n\nTEST(CollectionSchemaTest, NameManagement) {\n  CollectionSchema schema;\n  EXPECT_EQ(schema.name(), \"\");\n\n  schema.set_name(\"new_name\");\n  EXPECT_EQ(schema.name(), \"new_name\");\n}\n\nTEST(CollectionSchemaTest, MaxDocCountPerSegment) {\n  CollectionSchema schema;\n  EXPECT_EQ(schema.max_doc_count_per_segment(), MAX_DOC_COUNT_PER_SEGMENT);\n\n  schema.set_max_doc_count_per_segment(500000);\n  EXPECT_EQ(schema.max_doc_count_per_segment(), 500000u);\n}\n\nTEST(CollectionSchemaTest, AddField) {\n  CollectionSchema schema;\n  auto field = std::make_shared<FieldSchema>(\"test_field\", DataType::STRING);\n\n  auto status = schema.add_field(field);\n  EXPECT_TRUE(status.ok());\n  EXPECT_TRUE(schema.has_field(\"test_field\"));\n  EXPECT_EQ(schema.fields().size(), 1);\n\n  // Try to add the same field again\n  auto status2 = schema.add_field(field);\n  EXPECT_FALSE(status2.ok());\n  EXPECT_EQ(status2.code(), StatusCode::ALREADY_EXISTS);\n}\n\nTEST(CollectionSchemaTest, DropField) {\n  CollectionSchema schema;\n  auto field1 = std::make_shared<FieldSchema>(\"field1\", DataType::STRING);\n  auto field2 = std::make_shared<FieldSchema>(\"field2\", DataType::VECTOR_FP32);\n\n  schema.add_field(field1);\n  schema.add_field(field2);\n  EXPECT_EQ(schema.fields().size(), 2);\n\n  // Drop existing field\n  auto status = schema.drop_field(\"field1\");\n  EXPECT_TRUE(status.ok());\n  EXPECT_FALSE(schema.has_field(\"field1\"));\n  EXPECT_TRUE(schema.has_field(\"field2\"));\n  EXPECT_EQ(schema.fields().size(), 1);\n\n  // Try to drop non-existing field\n  auto status2 = schema.drop_field(\"nonexistent\");\n  EXPECT_FALSE(status2.ok());\n  EXPECT_EQ(status2.code(), StatusCode::NOT_FOUND);\n}\n\nTEST(CollectionSchemaTest, AlterField) {\n  CollectionSchema schema;\n  auto original_field =\n      std::make_shared<FieldSchema>(\"field\", DataType::STRING);\n  schema.add_field(original_field);\n\n  auto new_field =\n      std::make_shared<FieldSchema>(\"field\", DataType::VECTOR_FP32);\n  auto status = schema.alter_field(\"field\", new_field);\n  EXPECT_TRUE(status.ok());\n\n  auto *field = schema.get_field(\"field\");\n  EXPECT_NE(field, nullptr);\n  EXPECT_EQ(field->data_type(), DataType::VECTOR_FP32);\n\n  // Try to alter non-existing field\n  auto status2 = schema.alter_field(\"nonexistent\", new_field);\n  EXPECT_FALSE(status2.ok());\n  EXPECT_EQ(status2.code(), StatusCode::NOT_FOUND);\n}\n\nTEST(CollectionSchemaTest, FieldRetrieval) {\n  CollectionSchema schema;\n  auto string_field =\n      std::make_shared<FieldSchema>(\"string_field\", DataType::STRING);\n  auto vector_field =\n      std::make_shared<FieldSchema>(\"vector_field\", DataType::VECTOR_FP32);\n\n  schema.add_field(string_field);\n  schema.add_field(vector_field);\n\n  // Test get_field\n  const auto *const_string_field = schema.get_field(\"string_field\");\n  EXPECT_NE(const_string_field, nullptr);\n  EXPECT_EQ(const_string_field->data_type(), DataType::STRING);\n\n  auto *mutable_string_field = schema.get_field(\"string_field\");\n  EXPECT_NE(mutable_string_field, nullptr);\n  EXPECT_EQ(mutable_string_field->data_type(), DataType::STRING);\n\n  // Test get_forward_field\n  const auto *const_forward_field = schema.get_forward_field(\"string_field\");\n  EXPECT_NE(const_forward_field, nullptr);\n  EXPECT_EQ(const_forward_field->data_type(), DataType::STRING);\n\n  auto *mutable_forward_field = schema.get_forward_field(\"string_field\");\n  EXPECT_NE(mutable_forward_field, nullptr);\n  EXPECT_EQ(mutable_forward_field->data_type(), DataType::STRING);\n\n  // Forward field should return nullptr for vector fields\n  EXPECT_EQ(schema.get_forward_field(\"vector_field\"), nullptr);\n\n  // Test get_vector_field\n  const auto *const_vector_field = schema.get_vector_field(\"vector_field\");\n  EXPECT_NE(const_vector_field, nullptr);\n  EXPECT_EQ(const_vector_field->data_type(), DataType::VECTOR_FP32);\n\n  auto *mutable_vector_field = schema.get_vector_field(\"vector_field\");\n  EXPECT_NE(mutable_vector_field, nullptr);\n  EXPECT_EQ(mutable_vector_field->data_type(), DataType::VECTOR_FP32);\n\n  // Vector field should return nullptr for string fields\n  EXPECT_EQ(schema.get_vector_field(\"string_field\"), nullptr);\n\n  // Test non-existing field\n  EXPECT_EQ(schema.get_field(\"nonexistent\"), nullptr);\n  EXPECT_EQ(schema.get_forward_field(\"nonexistent\"), nullptr);\n  EXPECT_EQ(schema.get_vector_field(\"nonexistent\"), nullptr);\n}\n\nTEST(CollectionSchemaTest, FieldLists) {\n  CollectionSchema schema;\n  auto string_field =\n      std::make_shared<FieldSchema>(\"string_field\", DataType::STRING);\n  auto vector_field =\n      std::make_shared<FieldSchema>(\"vector_field\", DataType::VECTOR_FP32);\n  auto array_field =\n      std::make_shared<FieldSchema>(\"array_field\", DataType::ARRAY_INT32);\n\n  schema.add_field(string_field);\n  schema.add_field(vector_field);\n  schema.add_field(array_field);\n\n  // Test fields()\n  auto all_fields = schema.fields();\n  EXPECT_EQ(all_fields.size(), 3);\n\n  // Test forward_fields()\n  auto forward_fields = schema.forward_fields();\n  EXPECT_EQ(forward_fields.size(), 2);  // string_field and array_field\n\n  // Test forward_field_names()\n  auto forward_field_names = schema.forward_field_names();\n  EXPECT_EQ(forward_field_names.size(), 2);\n  EXPECT_TRUE(std::find(forward_field_names.begin(), forward_field_names.end(),\n                        \"string_field\") != forward_field_names.end());\n  EXPECT_TRUE(std::find(forward_field_names.begin(), forward_field_names.end(),\n                        \"array_field\") != forward_field_names.end());\n\n  // Test vector_fields()\n  auto vector_fields = schema.vector_fields();\n  EXPECT_EQ(vector_fields.size(), 1);\n  EXPECT_EQ(vector_fields[0]->name(), \"vector_field\");\n}\n\nTEST(CollectionSchemaTest, IndexManagement) {\n  CollectionSchema schema;\n  auto field =\n      std::make_shared<FieldSchema>(\"indexed_field\", DataType::VECTOR_FP32);\n  schema.add_field(field);\n\n  auto forward_field =\n      std::make_shared<FieldSchema>(\"forward_field\", DataType::STRING);\n  schema.add_field(forward_field);\n\n  // Test has_index on field without index\n  EXPECT_FALSE(schema.has_index(\"indexed_field\"));\n  EXPECT_FALSE(schema.has_index(\"forward_field\"));\n\n  // Add index\n  auto index_params =\n      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);\n  auto status = schema.add_index(\"indexed_field\", index_params);\n  EXPECT_TRUE(status.ok());\n  EXPECT_TRUE(schema.has_index(\"indexed_field\"));\n\n  // Try to add index to non-existing field\n  auto status2 = schema.add_index(\"nonexistent\", index_params);\n  EXPECT_FALSE(status2.ok());\n  EXPECT_EQ(status2.code(), StatusCode::NOT_FOUND);\n\n  // Drop index\n  auto status3 = schema.drop_index(\"indexed_field\");\n  EXPECT_TRUE(status3.ok());\n  EXPECT_FALSE(schema.has_index(\"indexed_field\"));\n\n  // Try to drop index from non-existing field\n  auto status4 = schema.drop_index(\"nonexistent\");\n  EXPECT_FALSE(status4.ok());\n  EXPECT_EQ(status4.code(), StatusCode::NOT_FOUND);\n\n  auto forward_index_params = std::make_shared<InvertIndexParams>(false);\n  auto status5 = schema.add_index(\"forward_field\", forward_index_params);\n  EXPECT_TRUE(status5.ok());\n  EXPECT_TRUE(schema.has_index(\"forward_field\"));\n\n  auto status6 = schema.drop_index(\"forward_field\");\n  EXPECT_TRUE(status5.ok());\n  EXPECT_FALSE(schema.has_index(\"forward_field\"));\n}\n\nTEST(CollectionSchemaTest, CopyConstructor) {\n  CollectionSchema original(\"original_schema\", {});\n  auto field = std::make_shared<FieldSchema>(\"field\", DataType::STRING);\n  original.add_field(field);\n  original.set_max_doc_count_per_segment(100000);\n\n  CollectionSchema copy(original);\n  EXPECT_EQ(copy.name(), \"original_schema\");\n  EXPECT_EQ(copy.fields().size(), 1);\n  EXPECT_TRUE(copy.has_field(\"field\"));\n  EXPECT_EQ(copy.max_doc_count_per_segment(), 100000u);\n}\n\nTEST(CollectionSchemaTest, Validate) {\n  CollectionSchema original(\"original_schema\", {});\n  auto field =\n      std::make_shared<FieldSchema>(\"sparse\", DataType::SPARSE_VECTOR_FP32);\n  original.add_field(field);\n  original.set_max_doc_count_per_segment(100000);\n\n  ASSERT_TRUE(original.validate().ok());\n\n  CollectionSchema c1;\n  auto s = c1.validate();\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  CollectionSchema c2(\"c2\", {});\n  s = c1.validate();\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  auto f1 = std::make_shared<FieldSchema>();\n  CollectionSchema c3(\"c3\", {f1});\n  s = c3.validate();\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  auto f2 = std::make_shared<FieldSchema>(\"f2\", DataType::INT32);\n  CollectionSchema c4(\"c4\", {f2});\n  s = c4.validate();\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  auto f3 = std::make_shared<FieldSchema>(\"f3\", DataType::VECTOR_FP16);\n  CollectionSchema c5(\"c5\", {f3});\n  s = c5.validate();\n  ASSERT_FALSE(s.ok());\n  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n  // validate collection name regex \"^[a-zA-Z0-9_-]{3,32}$\"\n  {\n    std::vector<std::string> invalid_names = {\n        \"\",                    // empty\n        \"ab\",                  // too short (<3)\n        std::string(65, 'a'),  // too long (>64)\n        \"a b\",                 // space not allowed\n        \"a.b\",                 // dot not allowed\n        \"a$b\",                 // $ not allowed\n        \"中文\",                // non-ASCII\n        \"a\\nb\",                // newline not allowed\n        \"a\\tb\",                // tab not allowed\n        \"a\\rb\",                // carriage return not allowed\n    };\n\n    for (const auto &name : invalid_names) {\n      CollectionSchema c(name, {field});\n      s = c.validate();\n      if (!s.ok()) {\n        std::cout << \"Invalid name: \" << name << std::endl;\n      }\n      ASSERT_FALSE(s.ok());\n      ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n    }\n\n    std::vector<std::string> valid_names = {\n        \"test_collection_supported_vectors\",\n        std::string(64, 'a'),\n        \"a_b\",     // underscore allowed\n        \"a-b\",     // dash allowed\n        \"a_1\",     // underscore and digit allowed\n        \"a-1\",     // dash and digit allowed\n        \"a_1b\",    // underscore, digit and letter allowed\n        \"a-1b\",    // dash, digit and letter allowed\n        \"-start\",  // allowed! (regex permits leading -/_)\n        \"_start\",  // also allowed\n        \"end-\",\n        \"end_\",  // trailing -/_ allowed\n        \"a--b\",\n        \"__b\",\n        \"a__b\"  // consecutive allowed\n    };\n    for (const auto &name : valid_names) {\n      CollectionSchema c(name, {field});\n      s = c.validate();\n      ASSERT_TRUE(s.ok());\n    }\n  }\n\n  // validate vector/scalar field size\n  {\n    std::vector<FieldSchema::Ptr> fields;\n    for (int i = 0; i < 1025; ++i) {\n      auto f = std::make_shared<FieldSchema>(\"f\" + std::to_string(i),\n                                             DataType::VECTOR_FP32, 1024);\n      fields.emplace_back(f);\n    }\n    CollectionSchema c5(\"c5\", fields);\n    s = c5.validate();\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n\n    std::vector<FieldSchema::Ptr> vectors;\n    for (int i = 0; i < 5; ++i) {\n      auto f = std::make_shared<FieldSchema>(\n          \"f\" + std::to_string(i), DataType::VECTOR_FP32, 1024, false);\n      fields.emplace_back(f);\n    }\n    CollectionSchema c6(\"c6\", fields);\n    s = c6.validate();\n    ASSERT_FALSE(s.ok());\n    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);\n  }\n}\n\n#if RABITQ_SUPPORTED\nTEST(FieldSchemaTest, HnswRabitqIndexValidationMetricTypes) {\n  // Test supported combinations: FP32 + (L2/IP/COSINE)\n\n  // FP32 + L2\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok())\n        << \"FP32 + L2 should be supported, but got error: \" << status.message();\n  }\n\n  // FP32 + IP\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::IP, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok())\n        << \"FP32 + IP should be supported, but got error: \" << status.message();\n  }\n\n  // FP32 + COSINE\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::COSINE, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok())\n        << \"FP32 + COSINE should be supported, but got error: \"\n        << status.message();\n  }\n\n  // FP32 + MIPSL2\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::MIPSL2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"FP32 + MIPSL2 should not be supported, but got error: \"\n        << status.message();\n  }\n}\n\n\nTEST(FieldSchemaTest, HnswRabitqIndexValidation_Dimension) {\n  // Dimension less than 64 is not supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 63, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"Dimension 63 should not be supported with HNSW_RABITQ\";\n    EXPECT_NE(\n        status.message().find(\"HNSW_RABITQ index only support dimension in\"),\n        std::string::npos)\n        << \"Error message should mention dimension range, got: \"\n        << status.message();\n  }\n\n  // Dimension equal to 1 is not supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 1, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"Dimension 1 should not be supported with HNSW_RABITQ\";\n  }\n\n  // Dimension greater than 4095 is not supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 4096, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"Dimension 4096 should not be supported with HNSW_RABITQ\";\n    EXPECT_NE(\n        status.message().find(\"HNSW_RABITQ index only support dimension in\"),\n        std::string::npos)\n        << \"Error message should mention dimension range, got: \"\n        << status.message();\n  }\n\n  // Boundary: dimension 64 should be supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 64, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok())\n        << \"Dimension 64 should be supported, but got error: \"\n        << status.message();\n  }\n\n  // Boundary: dimension 4095 should be supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP32, 4095, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_TRUE(status.ok())\n        << \"Dimension 4095 should be supported, but got error: \"\n        << status.message();\n  }\n}\n#endif\n\nTEST(FieldSchemaTest, HnswRabitqIndexValidation_UnsupportedDataTypes) {\n  // Test unsupported data types with HNSW_RABITQ index\n\n  // FP16 is not supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP16, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"FP16 should not be supported with HNSW_RABITQ\";\n    EXPECT_NE(\n        status.message().find(\"HNSW_RABITQ index only support FP32 data type\"),\n        std::string::npos)\n        << \"Error message should mention FP32 support only, got: \"\n        << status.message();\n  }\n\n  // INT8 is not supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_INT8, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"INT8 should not be supported with HNSW_RABITQ\";\n    EXPECT_NE(\n        status.message().find(\"HNSW_RABITQ index only support FP32 data type\"),\n        std::string::npos)\n        << \"Error message should mention FP32 support only, got: \"\n        << status.message();\n  }\n\n  // FP64 is not supported\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::L2, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::VECTOR_FP64, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"FP64 should not be supported with HNSW_RABITQ\";\n  }\n\n  // Sparse vector is not supported with HNSW_RABITQ\n  {\n    auto index_params = std::make_shared<HnswRabitqIndexParams>(\n        MetricType::IP, 7, 256, 16, 200, 0);\n    FieldSchema field(\"vector_field\", DataType::SPARSE_VECTOR_FP32, 128, false,\n                      index_params);\n    auto status = field.validate();\n    EXPECT_FALSE(status.ok())\n        << \"Sparse vector should not be supported with HNSW_RABITQ\";\n    EXPECT_NE(\n        status.message().find(\"sparse_vector's index_params only support\"),\n        std::string::npos)\n        << \"Error message should mention sparse vector index support, got: \"\n        << status.message();\n  }\n}\n"
  },
  {
    "path": "tests/db/index/common/version_manager_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/index/common/version_manager.h\"\n#include <filesystem>\n#include <memory>\n#include <gtest/gtest.h>\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/meta.h\"\n#include \"proto/zvec.pb.h\"\n#include \"zvec/db/schema.h\"\n\nnamespace zvec {\n\nclass VersionManagerTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    // Create a temporary directory for testing\n    test_path_ = \"./version_manager_test\";\n    FileHelper::RemoveDirectory(test_path_);\n    FileHelper::CreateDirectory(test_path_);\n  }\n\n  void TearDown() override {\n    // Clean up temporary files\n    FileHelper::RemoveDirectory(test_path_);\n  }\n\n  std::string test_path_;\n};\n\n// Test basic Version functionality\nTEST_F(VersionManagerTest, VersionBasicOperations) {\n  Version version;\n\n  // Create a sample schema\n  CollectionSchema schema;\n  schema.set_name(\"test_collection\");\n\n  version.set_schema(schema);\n\n  // Verify schema is set correctly\n  EXPECT_EQ(version.schema().name(), \"test_collection\");\n\n  // Test segment meta operations\n  auto segment_meta = std::make_shared<SegmentMeta>(1);\n  segment_meta->set_id(1);\n\n  // Add segment meta\n  EXPECT_TRUE(version.add_persisted_segment_meta(segment_meta).ok());\n\n  // Try to add duplicate - should fail\n  EXPECT_FALSE(version.add_persisted_segment_meta(segment_meta).ok());\n\n  // Get segment metas\n  auto segment_metas = version.persisted_segment_metas();\n  EXPECT_EQ(segment_metas.size(), 1);\n  EXPECT_EQ(segment_metas[0]->id(), 1);\n\n  // Remove segment meta\n  EXPECT_TRUE(version.remove_persisted_segment_meta(1).ok());\n  EXPECT_EQ(version.persisted_segment_metas().size(), 0);\n\n  // Try to remove non-existent segment - should fail\n  EXPECT_FALSE(version.remove_persisted_segment_meta(1).ok());\n\n  std::cout << version.to_string() << std::endl;\n  std::cout << version.to_string_formatted() << std::endl;\n}\n\n// Test Version Load/Save operations\nTEST_F(VersionManagerTest, VersionLoadSave) {\n  std::string manifest_path = test_path_ + \"/manifest\";\n\n  // Create and populate a version\n  Version version;\n\n  CollectionSchema schema;\n  schema.set_name(\"test_collection\");\n  version.set_schema(schema);\n\n  auto segment_meta = std::make_shared<SegmentMeta>(1);\n  segment_meta->set_id(1);\n  version.add_persisted_segment_meta(segment_meta);\n\n  version.set_id_map_path_suffix(100);\n  version.set_delete_snapshot_path_suffix(200);\n  version.set_next_segment_id(2);\n\n  // Save version\n  EXPECT_TRUE(Version::Save(manifest_path, version).ok());\n\n  // Load version\n  Version loaded_version;\n  EXPECT_TRUE(Version::Load(manifest_path, &loaded_version).ok());\n\n  // Verify loaded version matches original\n  EXPECT_EQ(loaded_version.schema().name(), \"test_collection\");\n  EXPECT_EQ(loaded_version.persisted_segment_metas().size(), 1);\n  EXPECT_EQ(loaded_version.id_map_path_suffix(), 100);\n  EXPECT_EQ(loaded_version.delete_snapshot_path_suffix(), 200);\n  EXPECT_EQ(loaded_version.next_segment_id(), 2);\n}\n\n// Test VersionManager creation and recovery\nTEST_F(VersionManagerTest, VersionManagerCreateAndRecover) {\n  std::string version_path = test_path_ + \"/version\";\n\n  std::filesystem::create_directories(version_path);\n\n  // Create initial version\n  Version initial_version;\n  CollectionSchema schema;\n  schema.set_name(\"initial_collection\");\n  initial_version.set_schema(schema);\n\n  auto segment_meta = std::make_shared<SegmentMeta>(1);\n  segment_meta->set_id(1);\n  initial_version.add_persisted_segment_meta(segment_meta);\n\n  // Create VersionManager\n  auto create_result = VersionManager::Create(version_path, initial_version);\n  EXPECT_TRUE(create_result.has_value());\n\n  auto version_manager = create_result.value();\n\n  // Get current version and verify\n  auto current_version = version_manager->get_current_version();\n  EXPECT_EQ(current_version.schema().name(), \"initial_collection\");\n\n  // Modify version\n  auto new_segment = std::make_shared<SegmentMeta>(2);\n  new_segment->set_id(2);\n  EXPECT_TRUE(version_manager->add_persisted_segment_meta(new_segment).ok());\n\n  // Flush changes\n  ASSERT_TRUE(version_manager->flush().ok());\n\n  // Recover VersionManager\n  auto recover_result = VersionManager::Recovery(version_path);\n  EXPECT_TRUE(recover_result.has_value());\n\n  auto recovered_manager = recover_result.value();\n  auto recovered_version = recovered_manager->get_current_version();\n\n  // Verify recovered version matches modified version\n  EXPECT_EQ(recovered_version.schema().name(), \"initial_collection\");\n  EXPECT_EQ(recovered_version.persisted_segment_metas().size(), 2);\n}\n\n// Test VersionManager operations\nTEST_F(VersionManagerTest, VersionManagerOperations) {\n  std::string version_path = test_path_ + \"/version_ops\";\n\n  std::filesystem::create_directories(version_path);\n\n  // Create initial version\n  Version initial_version;\n  CollectionSchema schema;\n  schema.set_name(\"test_collection\");\n  initial_version.set_schema(schema);\n\n  auto create_result = VersionManager::Create(version_path, initial_version);\n  auto version_manager = create_result.value();\n\n  // Test segment meta operations through VersionManager\n  auto segment_meta = std::make_shared<SegmentMeta>(1);\n  segment_meta->set_id(1);\n  EXPECT_TRUE(version_manager->add_persisted_segment_meta(segment_meta).ok());\n\n  // Test reset writing segment meta\n  auto writing_segment = std::make_shared<SegmentMeta>(100);\n  writing_segment->set_id(100);\n  EXPECT_TRUE(\n      version_manager->reset_writing_segment_meta(writing_segment).ok());\n\n  // Test suffix setters\n  version_manager->set_id_map_path_suffix(50);\n  version_manager->set_delete_snapshot_path_suffix(60);\n  version_manager->set_next_segment_id(3);\n\n  // Flush and verify\n  EXPECT_TRUE(version_manager->flush().ok());\n\n  auto current_version = version_manager->get_current_version();\n  EXPECT_EQ(current_version.id_map_path_suffix(), 50);\n  EXPECT_EQ(current_version.delete_snapshot_path_suffix(), 60);\n  EXPECT_EQ(current_version.next_segment_id(), 3);\n  EXPECT_EQ(current_version.writing_segment_meta()->id(), 100);\n}\n\n// Test Version equality operator\nTEST_F(VersionManagerTest, VersionEquality) {\n  Version version1, version2;\n\n  CollectionSchema schema1, schema2;\n  schema1.set_name(\"collection1\");\n  schema2.set_name(\"collection1\");\n\n  version1.set_schema(schema1);\n  version2.set_schema(schema2);\n\n  auto segment_meta1 = std::make_shared<SegmentMeta>(1);\n  segment_meta1->set_id(1);\n  version1.add_persisted_segment_meta(segment_meta1);\n\n  auto segment_meta2 = std::make_shared<SegmentMeta>(1);\n  segment_meta2->set_id(1);\n  version2.add_persisted_segment_meta(segment_meta2);\n\n  // Versions should be equal\n  EXPECT_TRUE(version1 == version2);\n\n  // Make them different\n  auto segment_meta3 = std::make_shared<SegmentMeta>(2);\n  segment_meta3->set_id(2);\n  version2.add_persisted_segment_meta(segment_meta3);\n\n  // Versions should not be equal now\n  EXPECT_FALSE(version1 == version2);\n}\n\n// Test error conditions\nTEST_F(VersionManagerTest, ErrorConditions) {\n  std::string version_path = test_path_ + \"/error_test\";\n\n  std::filesystem::create_directories(version_path);\n\n  // Create initial version\n  Version initial_version;\n  CollectionSchema schema;\n  schema.set_name(\"test\");\n  initial_version.set_schema(schema);\n\n  auto create_result = VersionManager::Create(version_path, initial_version);\n  auto version_manager = create_result.value();\n\n  // Test operations with null segment meta\n  EXPECT_FALSE(version_manager->add_persisted_segment_meta(nullptr).ok());\n\n  // Test operations with non-existent segment ID\n  EXPECT_FALSE(version_manager->remove_persisted_segment_meta(999).ok());\n}\n\n// Test conversion between protobuf and internal schema\nTEST_F(VersionManagerTest, SchemaConversion) {\n  // Create protobuf schema\n  zvec::proto::CollectionSchema pb_schema;\n  pb_schema.set_name(\"test_collection\");\n\n  auto pb_field = pb_schema.add_fields();\n  pb_field->set_name(\"vector_field\");\n  pb_field->set_data_type(zvec::proto::DataType::DT_VECTOR_FP32);\n  pb_field->set_dimension(128);\n\n  // Convert to internal schema (this would be done in the Load method)\n  CollectionSchema internal_schema;\n  internal_schema.set_name(pb_schema.name());\n  // In a real implementation, fields would be converted here\n\n  // Test that we can set and retrieve the schema\n  Version version;\n  version.set_schema(internal_schema);\n\n  EXPECT_EQ(version.schema().name(), \"test_collection\");\n}\n\n// Test SegmentMeta functionality\nTEST_F(VersionManagerTest, SegmentMetaOperations) {\n  SegmentMeta segment_meta(10);\n\n  EXPECT_EQ(segment_meta.id(), 10);\n\n  // Test block operations\n  BlockMeta block(1, BlockType::SCALAR, 0, 100);\n  segment_meta.add_persisted_block(block);\n\n  EXPECT_EQ(segment_meta.persisted_blocks().size(), 1);\n  EXPECT_EQ(segment_meta.persisted_blocks()[0].id(), 1);\n\n  // Test indexed vector fields\n  EXPECT_FALSE(segment_meta.vector_indexed(\"field1\"));\n  segment_meta.add_indexed_vector_field(\"field1\");\n  EXPECT_TRUE(segment_meta.vector_indexed(\"field1\"));\n\n  // Test min/max doc id\n  EXPECT_EQ(segment_meta.min_doc_id(), 0);\n  EXPECT_EQ(segment_meta.max_doc_id(), 100);\n}\n\n}  // namespace zvec"
  },
  {
    "path": "tests/db/index/segment/column_merging_reader_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/index/segment/column_merging_reader.h\"\n#include <memory>\n#include <vector>\n#include <arrow/api.h>\n#include <arrow/builder.h>\n#include <arrow/ipc/writer.h>\n#include <arrow/testing/gtest_util.h>\n#include <arrow/testing/util.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\narrow::Result<std::shared_ptr<arrow::Array>> MakeInt32Array(\n    const std::vector<int32_t> &values) {\n  arrow::Int32Builder builder;\n  ARROW_RETURN_NOT_OK(builder.AppendValues(values));\n  std::shared_ptr<arrow::Array> array;\n  ARROW_RETURN_NOT_OK(builder.Finish(&array));\n  return array;\n}\n\narrow::Result<std::shared_ptr<arrow::RecordBatch>> MakeInt32RecordBatch(\n    const std::string &column_name, const std::vector<int32_t> &values) {\n  ARROW_ASSIGN_OR_RAISE(auto array, MakeInt32Array(values));\n  auto schema = arrow::schema({arrow::field(column_name, arrow::int32())});\n  return arrow::RecordBatch::Make(schema, values.size(), {array});\n}\n\n// Mock RecordBatchReader for testing error conditions\nclass MockErrorRecordBatchReader : public arrow::ipc::RecordBatchReader {\n public:\n  explicit MockErrorRecordBatchReader(arrow::StatusCode error_code)\n      : error_code_(error_code) {}\n\n  std::shared_ptr<arrow::Schema> schema() const override {\n    return arrow::schema({arrow::field(\"dummy\", arrow::int32())});\n  }\n\n  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {\n    *out = nullptr;\n    return arrow::Status(error_code_, \"Mock error\");\n  }\n\n private:\n  arrow::StatusCode error_code_;\n};\n\n// Test fixture\nclass ColumnMergingReaderTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    // Create test schemas\n    schema1_ = arrow::schema({arrow::field(\"col1\", arrow::int32()),\n                              arrow::field(\"col2\", arrow::int32())});\n\n    schema2_ = arrow::schema({arrow::field(\"col3\", arrow::int32()),\n                              arrow::field(\"col4\", arrow::int32())});\n\n    target_schema_ = arrow::schema({arrow::field(\"col1\", arrow::int32()),\n                                    arrow::field(\"col2\", arrow::int32()),\n                                    arrow::field(\"col3\", arrow::int32()),\n                                    arrow::field(\"col4\", arrow::int32())});\n  }\n\n  std::shared_ptr<arrow::Schema> schema1_;\n  std::shared_ptr<arrow::Schema> schema2_;\n  std::shared_ptr<arrow::Schema> target_schema_;\n};\n\n// Test Make factory method\nTEST_F(ColumnMergingReaderTest, Make) {\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  auto reader = ColumnMergingReader::Make(target_schema_, std::move(readers));\n  ASSERT_NE(reader, nullptr);\n  EXPECT_EQ(reader->schema(), target_schema_);\n}\n\n// Test constructor and schema method\nTEST_F(ColumnMergingReaderTest, ConstructorAndSchema) {\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  auto reader =\n      std::make_shared<ColumnMergingReader>(target_schema_, std::move(readers));\n  EXPECT_EQ(reader->schema(), target_schema_);\n}\n\n// Test normal operation with two readers\nTEST_F(ColumnMergingReaderTest, NormalOperation) {\n  // Create first batch with col1 and col2\n  auto array1 = MakeInt32Array({1, 2, 3}).ValueOrDie();\n  auto array2 = MakeInt32Array({4, 5, 6}).ValueOrDie();\n  auto batch1 = arrow::RecordBatch::Make(schema1_, 3, {array1, array2});\n\n  // Create second batch with col3 and col4\n  auto array3 = MakeInt32Array({7, 8, 9}).ValueOrDie();\n  auto array4 = MakeInt32Array({10, 11, 12}).ValueOrDie();\n  auto batch2 = arrow::RecordBatch::Make(schema2_, 3, {array3, array4});\n\n  // Create mock readers\n  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {\n   public:\n    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)\n        : batch_(batch), returned_(false) {}\n\n    std::shared_ptr<arrow::Schema> schema() const override {\n      return batch_->schema();\n    }\n\n    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {\n      if (!returned_) {\n        *out = batch_;\n        returned_ = true;\n      } else {\n        *out = nullptr;\n      }\n      return arrow::Status::OK();\n    }\n\n   private:\n    std::shared_ptr<arrow::RecordBatch> batch_;\n    bool returned_;\n  };\n\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));\n  readers.push_back(std::make_shared<MockRecordBatchReader>(batch2));\n\n  auto merging_reader =\n      ColumnMergingReader::Make(target_schema_, std::move(readers));\n\n  std::shared_ptr<arrow::RecordBatch> result_batch;\n  ASSERT_OK(merging_reader->ReadNext(&result_batch));\n  ASSERT_NE(result_batch, nullptr);\n  EXPECT_EQ(result_batch->num_rows(), 3);\n  EXPECT_EQ(result_batch->num_columns(), 4);\n\n  // Check column values\n  auto col1 =\n      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(0));\n  auto col2 =\n      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(1));\n  auto col3 =\n      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(2));\n  auto col4 =\n      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(3));\n\n  EXPECT_EQ(col1->Value(0), 1);\n  EXPECT_EQ(col1->Value(1), 2);\n  EXPECT_EQ(col1->Value(2), 3);\n\n  EXPECT_EQ(col2->Value(0), 4);\n  EXPECT_EQ(col2->Value(1), 5);\n  EXPECT_EQ(col2->Value(2), 6);\n\n  EXPECT_EQ(col3->Value(0), 7);\n  EXPECT_EQ(col3->Value(1), 8);\n  EXPECT_EQ(col3->Value(2), 9);\n\n  EXPECT_EQ(col4->Value(0), 10);\n  EXPECT_EQ(col4->Value(1), 11);\n  EXPECT_EQ(col4->Value(2), 12);\n\n  // Second read should return nullptr (EOF)\n  ASSERT_OK(merging_reader->ReadNext(&result_batch));\n  EXPECT_EQ(result_batch, nullptr);\n}\n\n// Test with empty readers\nTEST_F(ColumnMergingReaderTest, EmptyReaders) {\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  auto merging_reader =\n      ColumnMergingReader::Make(target_schema_, std::move(readers));\n\n  std::shared_ptr<arrow::RecordBatch> result_batch;\n  ASSERT_OK(merging_reader->ReadNext(&result_batch));\n  EXPECT_EQ(result_batch, nullptr);\n}\n\n// Test with inconsistent row counts\nTEST_F(ColumnMergingReaderTest, InconsistentRowCounts) {\n  // Create first batch with 3 rows\n  auto array1 = MakeInt32Array({1, 2, 3}).ValueOrDie();\n  auto batch1 = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col1\", arrow::int32())}), 3, {array1});\n\n  // Create second batch with 2 rows\n  auto array2 = MakeInt32Array({4, 5}).ValueOrDie();\n  auto batch2 = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col2\", arrow::int32())}), 2, {array2});\n\n  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {\n   public:\n    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)\n        : batch_(batch), returned_(false) {}\n\n    std::shared_ptr<arrow::Schema> schema() const override {\n      return batch_->schema();\n    }\n\n    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {\n      if (!returned_) {\n        *out = batch_;\n        returned_ = true;\n      } else {\n        *out = nullptr;\n      }\n      return arrow::Status::OK();\n    }\n\n   private:\n    std::shared_ptr<arrow::RecordBatch> batch_;\n    bool returned_;\n  };\n\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));\n  readers.push_back(std::make_shared<MockRecordBatchReader>(batch2));\n\n  auto merging_reader =\n      ColumnMergingReader::Make(target_schema_, std::move(readers));\n\n  std::shared_ptr<arrow::RecordBatch> result_batch;\n  arrow::Status status = merging_reader->ReadNext(&result_batch);\n  EXPECT_FALSE(status.ok());\n  EXPECT_EQ(status.code(), arrow::StatusCode::Invalid);\n}\n\n// Test missing column\nTEST_F(ColumnMergingReaderTest, MissingColumn) {\n  // Create batch with only col1\n  auto array1 = MakeInt32Array({1, 2, 3}).ValueOrDie();\n  auto batch1 = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col1\", arrow::int32())}), 3, {array1});\n\n  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {\n   public:\n    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)\n        : batch_(batch), returned_(false) {}\n\n    std::shared_ptr<arrow::Schema> schema() const override {\n      return batch_->schema();\n    }\n\n    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {\n      if (!returned_) {\n        *out = batch_;\n        returned_ = true;\n      } else {\n        *out = nullptr;\n      }\n      return arrow::Status::OK();\n    }\n\n   private:\n    std::shared_ptr<arrow::RecordBatch> batch_;\n    bool returned_;\n  };\n\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));\n\n  // Target schema requires col1 and col2 but we only provide col1\n  auto target_schema = arrow::schema({arrow::field(\"col1\", arrow::int32()),\n                                      arrow::field(\"col2\", arrow::int32())});\n\n  auto merging_reader =\n      ColumnMergingReader::Make(target_schema, std::move(readers));\n\n  std::shared_ptr<arrow::RecordBatch> result_batch;\n  arrow::Status status = merging_reader->ReadNext(&result_batch);\n  EXPECT_FALSE(status.ok());\n  EXPECT_EQ(status.code(), arrow::StatusCode::Invalid);\n}\n\n// Test read error\nTEST_F(ColumnMergingReaderTest, ReadError) {\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  readers.push_back(\n      std::make_shared<MockErrorRecordBatchReader>(arrow::StatusCode::IOError));\n\n  auto merging_reader =\n      ColumnMergingReader::Make(target_schema_, std::move(readers));\n\n  std::shared_ptr<arrow::RecordBatch> result_batch;\n  arrow::Status status = merging_reader->ReadNext(&result_batch);\n  EXPECT_FALSE(status.ok());\n  EXPECT_EQ(status.code(), arrow::StatusCode::IOError);\n}\n\n// Test multiple reads\nTEST_F(ColumnMergingReaderTest, MultipleReads) {\n  // Create batches\n  auto array1a = MakeInt32Array({1, 2}).ValueOrDie();\n  auto batch1a = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col1\", arrow::int32())}), 2, {array1a});\n\n  auto array1b = MakeInt32Array({3, 4}).ValueOrDie();\n  auto batch1b = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col1\", arrow::int32())}), 2, {array1b});\n\n  auto array2a = MakeInt32Array({5, 6}).ValueOrDie();\n  auto batch2a = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col2\", arrow::int32())}), 2, {array2a});\n\n  auto array2b = MakeInt32Array({7, 8}).ValueOrDie();\n  auto batch2b = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col2\", arrow::int32())}), 2, {array2b});\n\n  class MultiBatchRecordBatchReader : public arrow::ipc::RecordBatchReader {\n   public:\n    explicit MultiBatchRecordBatchReader(\n        std::vector<std::shared_ptr<arrow::RecordBatch>> batches)\n        : batches_(std::move(batches)), index_(0) {}\n\n    std::shared_ptr<arrow::Schema> schema() const override {\n      return batches_.empty() ? arrow::schema({}) : batches_[0]->schema();\n    }\n\n    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {\n      if (index_ < batches_.size()) {\n        *out = batches_[index_++];\n      } else {\n        *out = nullptr;\n      }\n      return arrow::Status::OK();\n    }\n\n   private:\n    std::vector<std::shared_ptr<arrow::RecordBatch>> batches_;\n    size_t index_;\n  };\n\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  readers.push_back(std::make_shared<MultiBatchRecordBatchReader>(\n      std::vector<std::shared_ptr<arrow::RecordBatch>>{batch1a, batch1b}));\n  readers.push_back(std::make_shared<MultiBatchRecordBatchReader>(\n      std::vector<std::shared_ptr<arrow::RecordBatch>>{batch2a, batch2b}));\n\n  auto target_schema = arrow::schema({arrow::field(\"col1\", arrow::int32()),\n                                      arrow::field(\"col2\", arrow::int32())});\n\n  auto merging_reader =\n      ColumnMergingReader::Make(target_schema, std::move(readers));\n\n  // First read\n  std::shared_ptr<arrow::RecordBatch> result_batch;\n  ASSERT_OK(merging_reader->ReadNext(&result_batch));\n  ASSERT_NE(result_batch, nullptr);\n  EXPECT_EQ(result_batch->num_rows(), 2);\n\n  auto col1 =\n      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(0));\n  auto col2 =\n      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(1));\n  EXPECT_EQ(col1->Value(0), 1);\n  EXPECT_EQ(col1->Value(1), 2);\n  EXPECT_EQ(col2->Value(0), 5);\n  EXPECT_EQ(col2->Value(1), 6);\n\n  // Second read\n  ASSERT_OK(merging_reader->ReadNext(&result_batch));\n  ASSERT_NE(result_batch, nullptr);\n  EXPECT_EQ(result_batch->num_rows(), 2);\n\n  col1 = std::static_pointer_cast<arrow::Int32Array>(result_batch->column(0));\n  col2 = std::static_pointer_cast<arrow::Int32Array>(result_batch->column(1));\n  EXPECT_EQ(col1->Value(0), 3);\n  EXPECT_EQ(col1->Value(1), 4);\n  EXPECT_EQ(col2->Value(0), 7);\n  EXPECT_EQ(col2->Value(1), 8);\n\n  // Third read - should be EOF\n  ASSERT_OK(merging_reader->ReadNext(&result_batch));\n  EXPECT_EQ(result_batch, nullptr);\n}\n\n// Test zero row batches\nTEST_F(ColumnMergingReaderTest, ZeroRowBatches) {\n  auto array1 = MakeInt32Array({}).ValueOrDie();\n  auto batch1 = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col1\", arrow::int32())}), 0, {array1});\n\n  auto array2 = MakeInt32Array({}).ValueOrDie();\n  auto batch2 = arrow::RecordBatch::Make(\n      arrow::schema({arrow::field(\"col2\", arrow::int32())}), 0, {array2});\n\n  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {\n   public:\n    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)\n        : batch_(batch), returned_(false) {}\n\n    std::shared_ptr<arrow::Schema> schema() const override {\n      return batch_->schema();\n    }\n\n    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {\n      if (!returned_) {\n        *out = batch_;\n        returned_ = true;\n      } else {\n        *out = nullptr;\n      }\n      return arrow::Status::OK();\n    }\n\n   private:\n    std::shared_ptr<arrow::RecordBatch> batch_;\n    bool returned_;\n  };\n\n  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;\n  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));\n  readers.push_back(std::make_shared<MockRecordBatchReader>(batch2));\n\n  auto target_schema = arrow::schema({arrow::field(\"col1\", arrow::int32()),\n                                      arrow::field(\"col2\", arrow::int32())});\n\n  auto merging_reader =\n      ColumnMergingReader::Make(target_schema, std::move(readers));\n\n  std::shared_ptr<arrow::RecordBatch> result_batch;\n  ASSERT_OK(merging_reader->ReadNext(&result_batch));\n  EXPECT_EQ(result_batch, nullptr);\n}"
  },
  {
    "path": "tests/db/index/segment/segment_helper_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/index/segment/segment_helper.h\"\n#include <cstdint>\n#include <filesystem>\n#include <iostream>\n#include <memory>\n#include <thread>\n#include <variant>\n#include <arrow/array/array_binary.h>\n#include <arrow/io/file.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/pretty_print.h>\n#include <arrow/result.h>\n#include <arrow/table.h>\n#include <gtest/gtest.h>\n#include \"db/common/constants.h\"\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/delete_store.h\"\n#include \"db/index/common/id_map.h\"\n#include \"db/index/common/meta.h\"\n#include \"db/index/common/version_manager.h\"\n#include \"db/index/segment/segment.h\"\n#include \"utils/utils.h\"\n#include \"zvec/db/options.h\"\n#include \"zvec/db/schema.h\"\n\nusing namespace zvec;\n\nclass SegmentHelperTest : public testing::Test {\n protected:\n  void SetUp() override {\n    ailego::LoggerBroker::SetLevel(ailego::Logger::LEVEL_INFO);\n\n    FileHelper::RemoveDirectory(col_path);\n    FileHelper::CreateDirectory(col_path);\n\n    std::string idmap_path =\n        FileHelper::MakeFilePath(col_path, FileID::ID_FILE, 0);\n    id_map = IDMap::CreateAndOpen(col_name, idmap_path, true, false);\n    if (id_map == nullptr) {\n      throw std::runtime_error(\"Failed to create id map\");\n    }\n\n    std::string delete_store_path =\n        FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);\n    delete_store = std::make_shared<DeleteStore>(col_name);\n  }\n\n  void TearDown() override {\n    id_map.reset();\n    delete_store.reset();\n\n    // FileHelper::RemoveDirectory(col_path);\n  }\n\n public:\n  std::string GetColPath() {\n    return col_path;\n  }\n\n protected:\n  std::string col_name = \"test_segment_helper\";\n  std::string col_path = \"./test_collection\";\n  IDMap::Ptr id_map;\n  DeleteStore::Ptr delete_store;\n};\n\nTEST_F(SegmentHelperTest, CompactTask_General) {\n  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  bool forward_use_parquet = false;\n  auto seg_options =\n      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};\n\n  // Create segments\n  auto seg1 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,\n      seg_options, 0, 1000);\n  ASSERT_TRUE(seg1 != nullptr);\n  ASSERT_TRUE(seg1->flush().ok());\n\n  auto seg2 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 1, 1000, id_map, delete_store, version_manager,\n      seg_options, 1000, 1000);\n  ASSERT_TRUE(seg2 != nullptr);\n  ASSERT_TRUE(seg2->flush().ok());\n  std::cout << \"seg2: \" << seg2->meta()->to_string_formatted() << std::endl;\n\n  // Prepare segments for compaction\n  std::vector<Segment::Ptr> segments = {seg1, seg2};\n\n  // Create compact task\n  SegmentID output_segment_id = 2;\n  CompactTask task(GetColPath(), schema, segments,\n                   output_segment_id,    // output_segment_id\n                   nullptr,              // filter\n                   forward_use_parquet,  // forward_use_parquet\n                   1                     // concurrency\n  );\n\n  // Create segment task\n  auto segment_task = SegmentTask::CreateComapctTask(task);\n\n  // Verify task creation\n  ASSERT_TRUE(segment_task != nullptr);\n\n  // Execute the task\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  ASSERT_TRUE(status.ok());\n\n  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());\n  // Verify output segment\n  auto output_segment_meta = segment_compact_task.output_segment_meta_;\n  ASSERT_EQ(output_segment_meta->id(), output_segment_id);\n  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());\n\n  // Move segment directory\n  auto tmp_segment_path =\n      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);\n  auto new_segment_path =\n      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);\n  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);\n\n  seg_options.read_only_ = true;\n  version_manager->set_enable_mmap(!forward_use_parquet);\n  auto seg3_ret = Segment::Open(\n      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,\n      delete_store, version_manager, seg_options);\n  if (!seg3_ret.has_value()) {\n    std::cout << seg3_ret.error().message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n\n  auto seg3 = std::move(seg3_ret.value());\n  ASSERT_EQ(seg3->id(), output_segment_id);\n\n  std::cout << seg3->meta()->to_string_formatted() << std::endl;\n  ASSERT_EQ(seg3->doc_count(), seg1->doc_count() + seg2->doc_count());\n\n  for (uint64_t i = 0; i < seg3->doc_count(); i++) {\n    auto doc = seg3->Fetch(i);\n    ASSERT_NE(doc, nullptr);\n    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);\n    ASSERT_EQ(*doc, expect_doc);\n  }\n\n  ASSERT_TRUE(seg1->destroy().ok());\n  ASSERT_TRUE(seg2->destroy().ok());\n}\n\nTEST_F(SegmentHelperTest, CompactTask_ScalarIndex) {\n  auto schema = test::TestHelper::CreateSchemaWithScalarIndex(false);\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  bool forward_use_parquet = false;\n  auto seg_options =\n      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};\n\n  // Create segments\n  auto seg1 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,\n      seg_options, 0, 1000);\n  ASSERT_TRUE(seg1 != nullptr);\n  ASSERT_TRUE(seg1->flush().ok());\n\n  auto seg2 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 1, 1000, id_map, delete_store, version_manager,\n      seg_options, 1000, 1000);\n  ASSERT_TRUE(seg2 != nullptr);\n  ASSERT_TRUE(seg2->flush().ok());\n  std::cout << \"seg2: \" << seg2->meta()->to_string_formatted() << std::endl;\n\n  // Prepare segments for compaction\n  std::vector<Segment::Ptr> segments = {seg1, seg2};\n\n  // Create compact task\n  SegmentID output_segment_id = 2;\n  CompactTask task(GetColPath(), schema, segments,\n                   output_segment_id,    // output_segment_id\n                   nullptr,              // filter\n                   forward_use_parquet,  // forward_use_parquet\n                   1                     // concurrency\n  );\n\n  // Create segment task\n  auto segment_task = SegmentTask::CreateComapctTask(task);\n\n  // Verify task creation\n  ASSERT_TRUE(segment_task != nullptr);\n\n  // Execute the task\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  ASSERT_TRUE(status.ok());\n\n  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());\n  // Verify output segment\n  auto output_segment_meta = segment_compact_task.output_segment_meta_;\n  ASSERT_EQ(output_segment_meta->id(), output_segment_id);\n  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());\n\n  // Move segment directory\n  auto tmp_segment_path =\n      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);\n  auto new_segment_path =\n      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);\n  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);\n\n  seg_options.read_only_ = true;\n  version_manager->set_enable_mmap(!forward_use_parquet);\n  auto seg3_ret = Segment::Open(\n      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,\n      delete_store, version_manager, seg_options);\n  if (!seg3_ret.has_value()) {\n    std::cout << seg3_ret.error().message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n\n  auto seg3 = std::move(seg3_ret.value());\n  ASSERT_EQ(seg3->id(), output_segment_id);\n\n  std::cout << seg3->meta()->to_string_formatted() << std::endl;\n  ASSERT_EQ(seg3->doc_count(), seg1->doc_count() + seg2->doc_count());\n\n  for (uint64_t i = 0; i < seg3->doc_count(); i++) {\n    auto doc = seg3->Fetch(i);\n    ASSERT_NE(doc, nullptr);\n    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);\n    ASSERT_EQ(*doc, expect_doc);\n  }\n\n  ASSERT_TRUE(seg1->destroy().ok());\n  ASSERT_TRUE(seg2->destroy().ok());\n}\n\nTEST_F(SegmentHelperTest, CompactTask_VectorIndex) {\n  auto schema = test::TestHelper::CreateSchemaWithVectorIndex();\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  bool forward_use_parquet = false;\n  auto seg_options =\n      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};\n\n  // Create segments\n  auto seg1 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,\n      seg_options, 0, 1000);\n  ASSERT_TRUE(seg1 != nullptr);\n  ASSERT_TRUE(seg1->flush().ok());\n\n  auto seg2 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 1, 1000, id_map, delete_store, version_manager,\n      seg_options, 1000, 1000);\n  ASSERT_TRUE(seg2 != nullptr);\n  ASSERT_TRUE(seg2->flush().ok());\n  std::cout << \"seg2: \" << seg2->meta()->to_string_formatted() << std::endl;\n\n  // Prepare segments for compaction\n  std::vector<Segment::Ptr> segments = {seg1, seg2};\n\n  // Create compact task\n  SegmentID output_segment_id = 2;\n  CompactTask task(GetColPath(), schema, segments,\n                   output_segment_id,    // output_segment_id\n                   nullptr,              // filter\n                   forward_use_parquet,  // forward_use_parquet\n                   1                     // concurrency\n  );\n\n  // Create segment task\n  auto segment_task = SegmentTask::CreateComapctTask(task);\n\n  // Verify task creation\n  ASSERT_TRUE(segment_task != nullptr);\n\n  // Execute the task\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  ASSERT_TRUE(status.ok());\n\n  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());\n  // Verify output segment\n  auto output_segment_meta = segment_compact_task.output_segment_meta_;\n  ASSERT_EQ(output_segment_meta->id(), output_segment_id);\n  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());\n\n  // Move segment directory\n  auto tmp_segment_path =\n      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);\n  auto new_segment_path =\n      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);\n  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);\n\n  seg_options.read_only_ = true;\n  version_manager->set_enable_mmap(!forward_use_parquet);\n  auto seg3_ret = Segment::Open(\n      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,\n      delete_store, version_manager, seg_options);\n  if (!seg3_ret.has_value()) {\n    std::cout << seg3_ret.error().message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n\n  auto seg3 = std::move(seg3_ret.value());\n  ASSERT_EQ(seg3->id(), output_segment_id);\n\n  std::cout << seg3->meta()->to_string_formatted() << std::endl;\n  ASSERT_EQ(seg3->doc_count(), seg1->doc_count() + seg2->doc_count());\n\n  for (uint64_t i = 0; i < seg3->doc_count(); i++) {\n    auto doc = seg3->Fetch(i);\n    ASSERT_NE(doc, nullptr);\n    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);\n    ASSERT_EQ(*doc, expect_doc);\n  }\n\n  ASSERT_TRUE(seg1->destroy().ok());\n  ASSERT_TRUE(seg2->destroy().ok());\n}\n\nTEST_F(SegmentHelperTest, CompactTask_MultipleSegments) {\n  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  bool forward_use_parquet = false;\n  auto seg_options =\n      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};\n\n  std::vector<Segment::Ptr> input_segs;\n  int seg_count = 10;\n  int doc_count_per_seg = 100;\n  for (int i = 0; i < seg_count; i++) {\n    auto seg = test::TestHelper::CreateSegmentWithDoc(\n        GetColPath(), *schema, i, i * doc_count_per_seg, id_map, delete_store,\n        version_manager, seg_options, i * doc_count_per_seg, doc_count_per_seg);\n    ASSERT_TRUE(seg != nullptr);\n    ASSERT_TRUE(seg->flush().ok());\n    input_segs.push_back(seg);\n  }\n\n  // Create compact task\n  SegmentID output_segment_id = seg_count;\n  CompactTask task(GetColPath(), schema, input_segs,\n                   output_segment_id,    // output_segment_id\n                   nullptr,              // filter\n                   forward_use_parquet,  // forward_use_parquet\n                   1                     // concurrency\n  );\n\n  // Create segment task\n  auto segment_task = SegmentTask::CreateComapctTask(task);\n\n  // Verify task creation\n  ASSERT_TRUE(segment_task != nullptr);\n\n  // Execute the task\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  ASSERT_TRUE(status.ok());\n\n  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());\n  // Verify output segment\n  auto output_segment_meta = segment_compact_task.output_segment_meta_;\n  ASSERT_EQ(output_segment_meta->id(), output_segment_id);\n  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());\n\n  // Move segment directory\n  auto tmp_segment_path =\n      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);\n  auto new_segment_path =\n      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);\n  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);\n\n  seg_options.read_only_ = true;\n  version_manager->set_enable_mmap(!forward_use_parquet);\n  auto seg3_ret = Segment::Open(\n      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,\n      delete_store, version_manager, seg_options);\n  if (!seg3_ret.has_value()) {\n    std::cout << seg3_ret.error().message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n\n  auto seg3 = std::move(seg3_ret.value());\n  ASSERT_EQ(seg3->id(), output_segment_id);\n\n  std::cout << seg3->meta()->to_string_formatted() << std::endl;\n  ASSERT_EQ(seg3->doc_count(), seg_count * doc_count_per_seg);\n\n  for (uint64_t i = 0; i < seg3->doc_count(); i++) {\n    auto doc = seg3->Fetch(i);\n    if (doc == nullptr) {\n      std::cout << \"doc is null: \" << i << std::endl;\n    }\n    ASSERT_NE(doc, nullptr);\n    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);\n    ASSERT_EQ(*doc, expect_doc);\n  }\n}\n\nTEST_F(SegmentHelperTest, CompactTask_Filter) {\n  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  bool forward_use_parquet = false;\n  auto seg_options =\n      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};\n\n  // Create segments\n  auto seg1 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,\n      seg_options, 0, 1000);\n  ASSERT_TRUE(seg1 != nullptr);\n  ASSERT_TRUE(seg1->flush().ok());\n\n  // Create a simple filter\n  auto filter = std::make_shared<EasyIndexFilter>(\n      [&](uint64_t id) -> bool { return id < 10; });\n  // Note: Actual filter configuration would depend on the IndexFilter\n  // implementation\n\n  // Create compact task with filter\n  SegmentID output_segment_id = 1;\n  CompactTask task(GetColPath(), schema, {seg1},  // Single segment with filter\n                   output_segment_id,             // output_segment_id\n                   filter,\n                   forward_use_parquet,  // forward_use_parquet\n                   1                     // concurrency\n  );\n\n  // Create and execute task\n  auto segment_task = SegmentTask::CreateComapctTask(task);\n  ASSERT_TRUE(segment_task != nullptr);\n\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  ASSERT_TRUE(status.ok());\n\n  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());\n  // Verify output segment\n  auto output_segment_meta = segment_compact_task.output_segment_meta_;\n  std::cout << output_segment_meta->to_string_formatted() << std::endl;\n  ASSERT_EQ(output_segment_meta->id(), output_segment_id);\n  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());\n\n  // Move segment directory\n  auto tmp_segment_path =\n      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);\n  auto new_segment_path =\n      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);\n  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);\n\n  seg_options.read_only_ = true;\n  version_manager->set_enable_mmap(!forward_use_parquet);\n  auto seg2_ret = Segment::Open(\n      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,\n      delete_store, version_manager, seg_options);\n  if (!seg2_ret.has_value()) {\n    std::cout << seg2_ret.error().message() << std::endl;\n    ASSERT_TRUE(false);\n  }\n\n  auto seg2 = std::move(seg2_ret.value());\n  ASSERT_EQ(seg2->id(), output_segment_id);\n\n  std::cout << seg2->meta()->to_string_formatted() << std::endl;\n  ASSERT_EQ(seg2->doc_count(), seg1->doc_count() - 10);\n\n  ASSERT_TRUE(seg1->destroy().ok());\n}\n\nTEST_F(SegmentHelperTest, CompactTask_FilterAll) {\n  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  bool forward_use_parquet = false;\n  auto seg_options =\n      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};\n\n  // Create segments\n  auto seg1 = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,\n      seg_options, 0, 1000);\n  ASSERT_TRUE(seg1 != nullptr);\n  ASSERT_TRUE(seg1->flush().ok());\n\n  // Create a simple filter\n  auto filter = std::make_shared<EasyIndexFilter>(\n      [&](uint64_t id) -> bool { return true; });\n  // Note: Actual filter configuration would depend on the IndexFilter\n  // implementation\n\n  // Create compact task with filter\n  SegmentID output_segment_id = 1;\n  CompactTask task(GetColPath(), schema, {seg1},  // Single segment with filter\n                   output_segment_id,             // output_segment_id\n                   filter,\n                   forward_use_parquet,  // forward_use_parquet\n                   1                     // concurrency\n  );\n\n  // Create and execute task\n  auto segment_task = SegmentTask::CreateComapctTask(task);\n  ASSERT_TRUE(segment_task != nullptr);\n\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  ASSERT_TRUE(status.ok());\n\n  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());\n  // Verify output segment\n  auto output_segment_meta = segment_compact_task.output_segment_meta_;\n  ASSERT_EQ(output_segment_meta, nullptr);\n\n  auto tmp_segment_path =\n      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);\n  ASSERT_FALSE(FileHelper::DirectoryExists(tmp_segment_path));\n}\n\nTEST_F(SegmentHelperTest, CreateVectorIndexTask_AllFields) {\n  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  // Create a segment\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,\n      SegmentOptions{false, true, DEFAULT_MAX_BUFFER_SIZE}, 0, 1000);\n  ASSERT_TRUE(segment != nullptr);\n  ASSERT_TRUE(segment->dump().ok());\n\n  // Create index params\n  auto index_params =\n      std::make_shared<HnswIndexParams>(MetricType::L2,  // metric_type\n                                        16,              // m\n                                        100              // ef_construction\n      );\n\n  // Create create index task\n  CreateVectorIndexTask task(\n      segment,\n      \"\",  // column_to_build_vector_index (empty means all vector columns)\n      index_params,\n      1  // concurrency\n  );\n\n  // Create segment task\n  auto segment_task = SegmentTask::CreateCreateVectorIndexTask(task);\n\n  // Verify task creation\n  ASSERT_TRUE(segment_task != nullptr);\n\n  // Execute the task\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  EXPECT_TRUE(status.ok());\n\n  // Verify output segment meta\n  auto index_task = std::get<CreateVectorIndexTask>(segment_task->task_info());\n  auto output_segment_meta = index_task.output_segment_meta_;\n  std::cout << \"output_segment_meta: \"\n            << output_segment_meta->to_string_formatted() << std::endl;\n  ASSERT_EQ(output_segment_meta->id(), 0);\n  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());\n\n  auto segment_meta = std::make_shared<SegmentMeta>(*segment->meta());\n  segment_meta->remove_writing_forward_block();\n  // create all vector index will not change segment meta\n  ASSERT_EQ(*output_segment_meta, *segment_meta);\n}\n\nTEST_F(SegmentHelperTest, CreateVectorIndexTask_SingleField) {\n  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);\n\n  Version version;\n  version.set_schema(*schema);\n  auto version_manager_tmp = VersionManager::Create(col_path, version);\n  if (!version_manager_tmp.has_value()) {\n    throw std::runtime_error(\"Failed to create version manager\");\n  }\n\n  auto version_manager = version_manager_tmp.value();\n\n  // Create a segment\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,\n      SegmentOptions{false, true, DEFAULT_MAX_BUFFER_SIZE}, 0, 1000);\n  ASSERT_TRUE(segment != nullptr);\n  ASSERT_TRUE(segment->dump().ok());\n\n  // Create index params\n  auto index_params =\n      std::make_shared<HnswIndexParams>(MetricType::IP,  // metric_type\n                                        16,              // m\n                                        100              // ef_construction\n      );\n\n  // Create create index task\n  CreateVectorIndexTask task(segment,\n                             \"dense_fp32\",  // column_to_build_vector_index\n                                            // (empty means all vector columns)\n                             index_params,\n                             1  // concurrency\n  );\n\n  // Create segment task\n  auto segment_task = SegmentTask::CreateCreateVectorIndexTask(task);\n\n  // Verify task creation\n  ASSERT_TRUE(segment_task != nullptr);\n\n  // Execute the task\n  Status status = SegmentHelper::Execute(segment_task);\n  std::cout << \"status: \" << status.message() << std::endl;\n  EXPECT_TRUE(status.ok());\n\n  // Verify output segment meta\n  auto index_task = std::get<CreateVectorIndexTask>(segment_task->task_info());\n  auto output_segment_meta = index_task.output_segment_meta_;\n  std::cout << \"output_segment_meta: \"\n            << output_segment_meta->to_string_formatted() << std::endl;\n  ASSERT_EQ(output_segment_meta->id(), 0);\n  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());\n}"
  },
  {
    "path": "tests/db/index/segment/segment_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <filesystem>\n#include <iostream>\n#define private public\n#define protected public\n#include \"db/index/segment/segment.h\"\n#undef private\n#undef protected\n#include <cstdint>\n#include <memory>\n#include <thread>\n#include <arrow/array/array_binary.h>\n#include <arrow/io/file.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/pretty_print.h>\n#include <arrow/result.h>\n#include <arrow/table.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/buffer/buffer_manager.h>\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/delete_store.h\"\n#include \"db/index/common/id_map.h\"\n#include \"db/index/common/version_manager.h\"\n#include \"db/index/storage/store_helper.h\"\n#include \"db/index/storage/wal/wal_file.h\"\n#include \"utils/utils.h\"\n#include \"zvec/db/options.h\"\n\nusing namespace zvec;\n\nclass SegmentTest : public testing::TestWithParam<bool> {\n protected:\n  void SetUp() override {\n    ailego::LoggerBroker::SetLevel(ailego::Logger::LEVEL_INFO);\n\n    FileHelper::RemoveDirectory(col_path);\n    FileHelper::CreateDirectory(col_path);\n\n    ailego::BufferManager::Instance().init(MIN_MEMORY_LIMIT_BYTES, 1);\n\n    std::string idmap_path =\n        FileHelper::MakeFilePath(col_path, FileID::ID_FILE, 0);\n    id_map = IDMap::CreateAndOpen(col_name, idmap_path, true, false);\n    if (id_map == nullptr) {\n      throw std::runtime_error(\"Failed to create id map\");\n    }\n\n    std::string delete_store_path =\n        FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);\n    delete_store = std::make_shared<DeleteStore>(col_name);\n\n    schema =\n        test::TestHelper::CreateSchemaWithScalarIndex(false, false, col_name);\n\n    schema->add_field(\n        std::make_shared<FieldSchema>(\"id\", DataType::INT32, false));\n    schema->add_field(\n        std::make_shared<FieldSchema>(\"name\", DataType::STRING, false));\n    schema->add_field(\n        std::make_shared<FieldSchema>(\"age\", DataType::UINT32, false));\n\n    schema->add_field(\n        std::make_shared<FieldSchema>(\"binary\", DataType::BINARY, false));\n\n    schema->add_field(std::make_shared<FieldSchema>(\n        \"array_binary\", DataType::ARRAY_BINARY, false));\n\n    bool enable_mmap = GetParam();\n\n    Version version;\n    version.set_schema(*schema);\n    version.set_enable_mmap(enable_mmap);\n    auto version_manager_tmp = VersionManager::Create(col_path, version);\n    if (!version_manager_tmp.has_value()) {\n      throw std::runtime_error(\"Failed to create version manager\");\n    }\n\n    version_manager = version_manager_tmp.value();\n\n    // default options\n    options.read_only_ = false;\n    options.enable_mmap_ = enable_mmap;\n    options.max_buffer_size_ = 64 * 1024 * 1024;\n  }\n\n  void TearDown() override {\n    id_map.reset();\n    delete_store.reset();\n    version_manager.reset();\n\n    // FileHelper::RemoveDirectory(col_path);\n  }\n\n public:\n  std::string GetColPath() {\n    return col_path;\n  }\n\n protected:\n  std::string col_name = \"test_segment\";\n  std::string col_path = \"./test_collection\";\n  IDMap::Ptr id_map;\n  DeleteStore::Ptr delete_store;\n  VersionManager::Ptr version_manager;\n  CollectionSchema::Ptr schema;\n  SegmentOptions options;\n};\n\nTEST_P(SegmentTest, EmptySchema) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 0);\n  ASSERT_TRUE(segment != nullptr);\n  EXPECT_EQ(segment->id(), 0);\n\n  segment.reset();\n}\n\n\nTEST_P(SegmentTest, General) {\n  options.max_buffer_size_ = 1 * 1024;\n\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 25);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto combined_reader = segment->scan({LOCAL_ROW_ID, \"id\", \"name\", \"age\"});\n  ASSERT_TRUE(combined_reader != nullptr);\n  EXPECT_TRUE(combined_reader->schema() != nullptr);\n\n  std::shared_ptr<arrow::RecordBatch> batch;\n  uint32_t total_doc = 0;\n  while (true) {\n    auto status = combined_reader->ReadNext(&batch);\n    if (status.ok() == false) break;\n    if (batch == nullptr) break;\n\n    EXPECT_EQ(batch->num_columns(), 4);\n\n    total_doc += batch->num_rows();\n  }\n  EXPECT_EQ(total_doc, 25);\n\n  std::vector<int> indices = {0, 3, 6, 1, 0, 14, 12, 21};\n  auto combined_table = segment->fetch(\n      {LOCAL_ROW_ID, \"id\", \"name\", \"age\", \"binary\", \"array_binary\"}, indices);\n  ASSERT_TRUE(combined_table != nullptr);\n  EXPECT_EQ(combined_table->num_columns(), 6);\n  EXPECT_EQ(combined_table->num_rows(), 8);\n\n  auto field = combined_table->schema()->field(0);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the LOCAL_ROW_ID column for each row\n  auto id_column = combined_table->column(0);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> &expected_ids = indices;\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_P(SegmentTest, InsertMoreData) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n  uint64_t MAX_DOC = 1000;\n  auto start = std::chrono::system_clock::now();\n  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);\n  auto end = std::chrono::system_clock::now();\n  auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)\n                  .count();\n  std::cout << \"insert cost \" << cost << \"ms\" << std::endl;\n\n  auto combined_reader = segment->scan({\"id\", \"name\", \"age\"});\n  std::shared_ptr<arrow::RecordBatch> batch;\n  uint32_t total_doc = 0;\n  while (true) {\n    auto status = combined_reader->ReadNext(&batch);\n    if (status.ok() == false) break;\n    if (batch == nullptr) break;\n    total_doc += batch->num_rows();\n  }\n\n  EXPECT_EQ(total_doc, MAX_DOC);\n}\n\nTEST_P(SegmentTest, InsertScalarTypes) {\n  auto tmp_schema =\n      test::TestHelper::CreateSchemaWithScalarIndex(true, true, col_name);\n\n  auto invert_params = std::make_shared<InvertIndexParams>(false);\n  schema->add_field(std::make_shared<FieldSchema>(\"binary\", DataType::BINARY,\n                                                  false, invert_params));\n\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_binary\", DataType::ARRAY_BINARY, false, invert_params));\n\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n}\n\nTEST_P(SegmentTest, InsertVectorTypes) {\n  auto tmp_schema = test::TestHelper::CreateSchemaWithVectorIndex(\n      false, col_name,\n      std::make_shared<HnswIndexParams>(MetricType::IP, 16, 20,\n                                        QuantizeType::FP16));\n\n  // first insert 100 doc\n  int doc_count = 100;\n  {\n    auto segment = test::TestHelper::CreateSegmentWithDoc(\n        col_path, *tmp_schema, 0, 0, id_map, delete_store, version_manager,\n        options, 0, doc_count);\n    ASSERT_TRUE(segment != nullptr);\n  }\n\n  // Open\n  {\n    Version v = version_manager->get_current_version();\n    auto result =\n        Segment::Open(col_path, *tmp_schema, *v.writing_segment_meta(), id_map,\n                      delete_store, version_manager, options);\n    ASSERT_TRUE(result.has_value());\n    auto segment = result.value();\n\n    EXPECT_GT(segment->get_vector_indexer(\"dense_fp32\").size(), 0);\n    EXPECT_GT(segment->get_quant_vector_indexer(\"dense_fp32\").size(), 0);\n  }\n}\n\nTEST_P(SegmentTest, FetchByGlobalDocID) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 1);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto ret_doc = segment->Fetch(0);\n  EXPECT_TRUE(ret_doc != nullptr);\n  EXPECT_EQ(ret_doc->doc_id(), 0);\n  EXPECT_EQ(ret_doc->pk(), \"pk_0\");\n}\n\nTEST_P(SegmentTest, FetchSingleRow) {\n  int doc_count = 10;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto func = [&](int index) -> void {\n    ExecBatchPtr batch = segment->fetch({\"id\", \"name\", \"age\"}, index);\n    ASSERT_TRUE(batch != nullptr);\n    EXPECT_EQ(batch->length, 1);\n    EXPECT_EQ(batch->values.size(), 3);\n\n    auto id_scalar = batch->values[0].scalar();\n    ASSERT_TRUE(id_scalar != nullptr);\n    auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);\n    ASSERT_TRUE(id_value != nullptr);\n    EXPECT_EQ(id_value->value, index);\n  };\n\n  for (int i = 0; i < doc_count; ++i) {\n    func(i);\n  }\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithPersistStore) {\n  // first insert 1000 doc\n  int doc_count = 1000;\n  {\n    auto segment = test::TestHelper::CreateSegmentWithDoc(\n        col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n        0, doc_count);\n    ASSERT_TRUE(segment != nullptr);\n  }\n\n  // Open\n  {\n    Version v = version_manager->get_current_version();\n    SegmentOptions options;\n    options.read_only_ = false;\n    auto result = Segment::Open(col_path, *schema, *v.writing_segment_meta(),\n                                id_map, delete_store, version_manager, options);\n    ASSERT_TRUE(result.has_value());\n    auto segment = result.value();\n\n    test::TestHelper::SegmentInsertDoc(segment, *schema, doc_count,\n                                       doc_count * 2);\n\n    auto func = [&](int index) -> void {\n      ExecBatchPtr batch = segment->fetch({\"id\", \"name\", \"age\"}, index);\n      ASSERT_TRUE(batch != nullptr);\n      EXPECT_EQ(batch->length, 1);\n      EXPECT_EQ(batch->values.size(), 3);\n\n      auto id_scalar = batch->values[0].scalar();\n      ASSERT_TRUE(id_scalar != nullptr);\n      auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);\n      ASSERT_TRUE(id_value != nullptr);\n      EXPECT_EQ(id_value->value, index);\n    };\n\n    for (int i = 0; i < doc_count * 2; ++i) {\n      func(i);\n    }\n  }\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithUserID) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({USER_ID, \"id\", \"name\"}, 2);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto user_id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(user_id_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::StringScalar>(user_id_scalar) !=\n              nullptr);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithGlobalDocID) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({GLOBAL_DOC_ID, \"id\", \"name\"}, 4);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto global_doc_id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(global_doc_id_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(\n                  global_doc_id_scalar) != nullptr);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithLocalRowID) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({LOCAL_ROW_ID, \"id\", \"name\"}, 4);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto local_doc_id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(local_doc_id_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(\n                  local_doc_id_scalar) != nullptr);\n  auto local_doc_id_value =\n      std::dynamic_pointer_cast<arrow::UInt64Scalar>(local_doc_id_scalar);\n  EXPECT_EQ(local_doc_id_value->value, 4);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithLocalRowIDMiddle) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({\"id\", LOCAL_ROW_ID, \"name\"}, 4);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto local_doc_id_scalar = batch->values[1].scalar();\n  ASSERT_TRUE(local_doc_id_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(\n                  local_doc_id_scalar) != nullptr);\n  auto local_doc_id_value =\n      std::dynamic_pointer_cast<arrow::UInt64Scalar>(local_doc_id_scalar);\n  EXPECT_EQ(local_doc_id_value->value, 4);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithLocalRowIDEnd) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({\"id\", \"name\", LOCAL_ROW_ID}, 4);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto local_doc_id_scalar = batch->values[2].scalar();\n  ASSERT_TRUE(local_doc_id_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(\n                  local_doc_id_scalar) != nullptr);\n  auto local_doc_id_value =\n      std::dynamic_pointer_cast<arrow::UInt64Scalar>(local_doc_id_scalar);\n  EXPECT_EQ(local_doc_id_value->value, 4);\n}\n\nTEST_P(SegmentTest, CheckOrderWithLocalRowID) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto combined_reader = segment->scan({LOCAL_ROW_ID, \"id\", \"name\"});\n  ASSERT_TRUE(combined_reader != nullptr);\n  EXPECT_TRUE(combined_reader->schema() != nullptr);\n\n  std::shared_ptr<arrow::RecordBatch> batch;\n  uint32_t total_doc = 0;\n  while (true) {\n    auto status = combined_reader->ReadNext(&batch);\n    if (status.ok() == false) break;\n    if (batch == nullptr) break;\n    EXPECT_EQ(batch->num_columns(), 3);\n    EXPECT_EQ(batch->column(0)->type()->id(), arrow::Type::UINT64);\n    EXPECT_EQ(batch->column_name(0), LOCAL_ROW_ID);\n    total_doc += batch->num_rows();\n  }\n  EXPECT_EQ(total_doc, 10);\n\n\n  std::vector<int> indices = {0, 3, 6, 1, 0};\n  auto combined_table = segment->fetch({LOCAL_ROW_ID, \"id\", \"name\"}, indices);\n  ASSERT_TRUE(combined_table != nullptr);\n  EXPECT_EQ(combined_table->num_columns(), 3);\n  EXPECT_EQ(combined_table->num_rows(), 5);\n\n  auto field = combined_table->schema()->field(0);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the LOCAL_ROW_ID column for each row\n  auto id_column = combined_table->column(0);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> &expected_ids = indices;\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_P(SegmentTest, CheckOrderWithLocalRowIDMiddle) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto combined_reader = segment->scan({\"id\", LOCAL_ROW_ID, \"name\"});\n  ASSERT_TRUE(combined_reader != nullptr);\n  EXPECT_TRUE(combined_reader->schema() != nullptr);\n\n  std::shared_ptr<arrow::RecordBatch> batch;\n  uint32_t total_doc = 0;\n  while (true) {\n    auto status = combined_reader->ReadNext(&batch);\n    if (status.ok() == false) break;\n    if (batch == nullptr) break;\n\n    EXPECT_EQ(batch->num_columns(), 3);\n    EXPECT_EQ(batch->column(1)->type()->id(), arrow::Type::UINT64);\n    EXPECT_EQ(batch->column_name(1), LOCAL_ROW_ID);\n\n    total_doc += batch->num_rows();\n  }\n  EXPECT_EQ(total_doc, 10);\n\n  std::vector<int> indices = {0, 3, 6, 1, 0};\n  auto combined_table = segment->fetch({\"id\", LOCAL_ROW_ID, \"name\"}, indices);\n  ASSERT_TRUE(combined_table != nullptr);\n  EXPECT_EQ(combined_table->num_columns(), 3);\n  EXPECT_EQ(combined_table->num_rows(), 5);\n\n  auto field = combined_table->schema()->field(1);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the LOCAL_ROW_ID column for each row\n  auto id_column = combined_table->column(1);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> &expected_ids = indices;\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_P(SegmentTest, CheckOrderWithLocalRowIDEnd) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto combined_reader = segment->scan({\"id\", \"name\", LOCAL_ROW_ID});\n  ASSERT_TRUE(combined_reader != nullptr);\n  EXPECT_TRUE(combined_reader->schema() != nullptr);\n\n  std::shared_ptr<arrow::RecordBatch> batch;\n  uint32_t total_doc = 0;\n  while (true) {\n    auto status = combined_reader->ReadNext(&batch);\n    if (status.ok() == false) break;\n    if (batch == nullptr) break;\n\n    EXPECT_EQ(batch->num_columns(), 3);\n    EXPECT_EQ(batch->column(2)->type()->id(), arrow::Type::UINT64);\n    EXPECT_EQ(batch->column_name(2), LOCAL_ROW_ID);\n\n    total_doc += batch->num_rows();\n  }\n  EXPECT_EQ(total_doc, 10);\n\n  std::vector<int> indices = {0, 3, 6, 1, 0};\n  auto combined_table = segment->fetch({\"id\", \"name\", LOCAL_ROW_ID}, indices);\n  ASSERT_TRUE(combined_table != nullptr);\n  EXPECT_EQ(combined_table->num_columns(), 3);\n  EXPECT_EQ(combined_table->num_rows(), 5);\n\n  auto field = combined_table->schema()->field(2);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the LOCAL_ROW_ID column for each row\n  auto id_column = combined_table->column(2);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> &expected_ids = indices;\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithNegativeIndex) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({\"id\", \"name\"}, -1);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithOutOfRangeIndex) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({\"id\", \"name\"}, 15);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithInvalidColumn) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({\"id\", \"invalid_column\"}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithEmptyColumns) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_P(SegmentTest, FetchSingleRowFromEmptySegment) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({\"id\", \"name\"}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_P(SegmentTest, FetchSingleRowWithBinaryFields) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  ExecBatchPtr batch = segment->fetch({\"binary\", \"array_binary\"}, 1);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 2);\n\n  auto binary_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(binary_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::BinaryScalar>(binary_scalar) !=\n              nullptr);\n\n  auto array_binary_scalar = batch->values[1].scalar();\n  ASSERT_TRUE(array_binary_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::ListScalar>(\n                  array_binary_scalar) != nullptr);\n}\n\nTEST_P(SegmentTest, Recover) {\n  // first insert 100 doc\n  int doc_count = 100;\n  {\n    auto segment = test::TestHelper::CreateSegmentWithDoc(\n        col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n        0, doc_count);\n    ASSERT_TRUE(segment != nullptr);\n  }\n\n  // simulate wal file\n  {\n    Version v = version_manager->get_current_version();\n    auto writing_block_id =\n        v.writing_segment_meta()->writing_forward_block_->id();\n    auto wal_file = FileHelper::MakeWalPath(col_path, 0, writing_block_id);\n    WalOptions wal_option{0, true};\n    WalFilePtr wal_file_;\n    WalFile::CreateAndOpen(wal_file, wal_option, &wal_file_);\n    ASSERT_TRUE(wal_file_ != nullptr);\n\n    for (int i = doc_count; i < doc_count + 100; i++) {\n      Doc doc = test::TestHelper::CreateDoc(i, *schema);\n      doc.set_operator(Operator::INSERT);\n      std::vector<uint8_t> buf = doc.serialize();\n      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));\n      ASSERT_EQ(ret, 0);\n    }\n\n    for (int i = 0; i < doc_count; i++) {\n      Doc doc = test::TestHelper::CreateDoc(i, *schema);\n      doc.set_doc_id(i);  // global doc id\n      doc.set_operator(Operator::UPDATE);\n      std::vector<uint8_t> buf = doc.serialize();\n      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));\n      ASSERT_EQ(ret, 0);\n    }\n\n    for (int i = 0; i < doc_count; i++) {\n      Doc doc = test::TestHelper::CreateDoc(i, *schema);\n      doc.set_operator(Operator::UPSERT);\n      std::vector<uint8_t> buf = doc.serialize();\n      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));\n      ASSERT_EQ(ret, 0);\n    }\n\n    for (int i = 0; i < doc_count; i++) {\n      Doc doc = test::TestHelper::CreateDoc(i, *schema);\n      doc.set_doc_id(i + 300);  // global doc id\n      doc.set_operator(Operator::DELETE);\n      std::vector<uint8_t> buf = doc.serialize();\n      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));\n      ASSERT_EQ(ret, 0);\n    }\n  }\n\n  // recover\n  {\n    Version v = version_manager->get_current_version();\n    SegmentOptions options;\n    options.read_only_ = false;\n    auto result = Segment::Open(col_path, *schema, *v.writing_segment_meta(),\n                                id_map, delete_store, version_manager, options);\n    ASSERT_TRUE(result.has_value());\n    auto segment = result.value();\n\n    auto combined_reader = segment->scan({\"id\"});\n    std::shared_ptr<arrow::RecordBatch> batch;\n    uint32_t total_doc = 0;\n    while (true) {\n      auto status = combined_reader->ReadNext(&batch);\n      if (status.ok() == false) break;\n      if (batch == nullptr) break;\n\n      total_doc += batch->num_rows();\n      EXPECT_EQ(batch->num_columns(), 1);\n    }\n    // Why 400 ? because in segment we just mark deleted doc\n    EXPECT_EQ(total_doc, 400);\n\n    // auto filter = segment->get_filter();\n    auto filter = delete_store->make_filter();\n    auto actual_doc_count = segment->doc_count(filter);\n    EXPECT_EQ(actual_doc_count, 100);\n  }\n}\n\nTEST_P(SegmentTest, UpdateDoc) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  // before update\n  uint64_t count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 10);\n\n  // Create a new document to update\n  Doc update_doc = test::TestHelper::CreateDoc(5, *schema);\n  update_doc.set<std::string>(\"name\", \"updated_name\");\n  update_doc.set<uint32_t>(\"age\", 99);\n\n  // Update the document\n  auto status = segment->Update(update_doc);\n  EXPECT_TRUE(status.ok()) << \"Update failed: \" << status.message();\n\n  // after update\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 10);\n\n  // Fetch the updated document and verify changes\n  // Note: The parameter here is the internal global_doc_id, not user-specified\n  auto ret_doc = segment->Fetch(10);\n  EXPECT_TRUE(ret_doc != nullptr);\n  EXPECT_EQ(ret_doc->get<std::string>(\"name\"), \"updated_name\");\n  EXPECT_EQ(ret_doc->get<uint32_t>(\"age\"), 99);\n}\n\nTEST_P(SegmentTest, UpdateDocBatch) {\n  int doc_count = 10;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n  // before update\n  uint64_t count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, doc_count);\n\n  // Create a new document to update\n  for (int i = 0; i < doc_count; i++) {\n    Doc update_doc = test::TestHelper::CreateDoc(i, *schema);\n    // Update the document\n    auto status = segment->Update(update_doc);\n    EXPECT_TRUE(status.ok()) << \"Update failed: \" << status.message();\n  }\n\n  // after update\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, doc_count);\n\n  // Fetch the updated document and verify changes\n  // Note: The parameter here is the internal global_doc_id, not user-specified\n  auto ret_doc = segment->Fetch(doc_count * 2 - 1);\n  EXPECT_TRUE(ret_doc != nullptr);\n  EXPECT_EQ(ret_doc->get<std::string>(\"name\"),\n            \"value_\" + std::to_string(doc_count - 1));\n}\n\nTEST_P(SegmentTest, DeleteDoc) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  // before update\n  uint64_t count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 10);\n\n  // Delete a document by primary key\n  auto status = segment->Delete(\"pk_5\");\n  EXPECT_TRUE(status.ok()) << \"Delete by pk failed: \" << status.message();\n\n  // after delete\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 9);\n\n  // Delete a document by global doc id\n  status = segment->Delete(3);\n  EXPECT_TRUE(status.ok()) << \"Delete by global doc id failed: \"\n                           << status.message();\n\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 8);\n}\n\nTEST_P(SegmentTest, DeleteBatch) {\n  int doc_count = 10;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n\n  // before update\n  uint64_t count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, doc_count);\n\n  for (int i = 0; i < doc_count; i++) {\n    auto status = segment->Delete(\"pk_\" + std::to_string(i));\n    EXPECT_TRUE(status.ok()) << \"Delete by pk failed: \" << status.message();\n  }\n\n  // after delete\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 0);\n}\n\n\nTEST_P(SegmentTest, UpsertDoc) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  // before update\n  uint64_t count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 5);\n\n  // Upsert an existing document\n  Doc upsert_doc1 = test::TestHelper::CreateDoc(3, *schema);\n  upsert_doc1.set<std::string>(\"name\", \"upserted_name\");\n  auto status = segment->Upsert(upsert_doc1);\n  EXPECT_TRUE(status.ok()) << \"Upsert existing doc failed: \"\n                           << status.message();\n\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 5);\n\n  // Verify the update\n  auto ret_doc = segment->Fetch(5);\n  EXPECT_TRUE(ret_doc != nullptr);\n  EXPECT_EQ(ret_doc->get<std::string>(\"name\"), \"upserted_name\");\n\n  // Upsert a new document\n  Doc upsert_doc2 = test::TestHelper::CreateDoc(6, *schema);\n  upsert_doc2.set<std::string>(\"name\", \"new_upserted_doc\");\n  status = segment->Upsert(upsert_doc2);\n  EXPECT_TRUE(status.ok()) << \"Upsert new doc failed: \" << status.message();\n\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 6);\n\n  // Verify the new document was inserted\n  ret_doc = segment->Fetch(6);\n  EXPECT_TRUE(ret_doc != nullptr);\n  EXPECT_EQ(ret_doc->get<std::string>(\"name\"), \"new_upserted_doc\");\n}\n\nTEST_P(SegmentTest, UpsertDocBatch) {\n  int doc_count = 10;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n\n  // before update\n  uint64_t count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, doc_count);\n\n  for (int i = 0; i < doc_count; i++) {\n    // Upsert existing document\n    Doc upsert_doc1 = test::TestHelper::CreateDoc(i, *schema);\n    upsert_doc1.set<std::string>(\"name\", \"upserted_name\" + std::to_string(i));\n    auto status = segment->Upsert(upsert_doc1);\n    EXPECT_TRUE(status.ok())\n        << \"Upsert existing doc failed: \" << status.message();\n\n    // Upsert new document\n    Doc upsert_doc2 = test::TestHelper::CreateDoc(doc_count + i, *schema);\n    upsert_doc2.set<std::string>(\"name\",\n                                 \"new_upserted_doc\" + std::to_string(i));\n    status = segment->Upsert(upsert_doc2);\n    EXPECT_TRUE(status.ok()) << \"Upsert new doc failed: \" << status.message();\n  }\n\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, doc_count * 2);\n\n  int incr_idx = 0;\n  for (int i = doc_count; i < doc_count + doc_count * 2; i += 2) {\n    // Verify the update\n    auto ret_doc = segment->Fetch(i);\n    EXPECT_TRUE(ret_doc != nullptr);\n    EXPECT_EQ(ret_doc->get<std::string>(\"name\"),\n              \"upserted_name\" + std::to_string(incr_idx));\n\n    // Verify the new document was inserted\n    ret_doc = segment->Fetch(i + 1);\n    EXPECT_EQ(ret_doc->get<std::string>(\"name\"),\n              \"new_upserted_doc\" + std::to_string(incr_idx));\n    incr_idx++;\n  }\n}\n\nTEST_P(SegmentTest, Flush) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 100);\n  ASSERT_TRUE(segment != nullptr);\n\n  // Flush the segment\n  auto status = segment->flush();\n  EXPECT_TRUE(status.ok()) << \"Flush failed: \" << status.message();\n}\n\nTEST_P(SegmentTest, FlushAfterInsert) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 100);\n  ASSERT_TRUE(segment != nullptr);\n\n  // Flush the segment\n  auto status = segment->flush();\n  EXPECT_TRUE(status.ok()) << \"Flush failed: \" << status.message();\n\n  test::TestHelper::SegmentInsertDoc(segment, *schema, 100, 150);\n\n  ASSERT_EQ(segment->doc_count(), 150);\n\n  for (int i = 0; i < 150; i++) {\n    auto ret_doc = segment->Fetch(i);\n    EXPECT_TRUE(ret_doc != nullptr);\n\n    Doc verify_doc = test::TestHelper::CreateDoc(i, *schema);\n    auto vv = verify_doc.get<std::vector<float>>(\"dense_fp32\").value();\n    auto v = ret_doc->get<std::vector<float>>(\"dense_fp32\").value();\n    for (uint32_t j = 0; j < vv.size(); j++) {\n      ASSERT_FLOAT_EQ(v[j], vv[j]);\n    }\n  }\n}\n\nTEST_P(SegmentTest, Dump) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 100);\n  ASSERT_TRUE(segment != nullptr);\n\n  // Dump the segment\n  auto status = segment->dump();\n  EXPECT_TRUE(status.ok()) << \"Flush failed: \" << status.message();\n\n  status = segment->dump();\n  EXPECT_FALSE(status.ok());\n  EXPECT_EQ(status.code(), StatusCode::NOT_SUPPORTED);\n}\n\nTEST_P(SegmentTest, DocCount) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 50);\n  ASSERT_TRUE(segment != nullptr);\n\n  // Get document count\n  uint64_t count = segment->doc_count();\n  EXPECT_EQ(count, 50);\n\n  // Delete some documents\n  segment->Delete(\"pk_10\");\n  segment->Delete(\"pk_20\");\n  segment->Delete(\"pk_30\");\n\n  // Get document count again\n  count = segment->doc_count(segment->get_filter());\n  EXPECT_EQ(count, 47);\n}\n\n// TEST_P(SegmentTest, Insert100WData) {\n//   options.max_buffer_size_ = 8 * 1024 * 1024;\n\n//   auto segment = test::TestHelper::CreateSegmentWithDoc(\n//       col_path, *schema, 0, 0, id_map, delete_store, version_manager,\n//       options, 0, 0);\n//   ASSERT_TRUE(segment != nullptr);\n\n//   uint64_t MAX_DOC = 1000000;\n//   auto start = std::chrono::system_clock::now();\n//   test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);\n//   auto end = std::chrono::system_clock::now();\n//   auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(end -\n//   start)\n//                   .count();\n//   std::cout << \"insert cost \" << cost << \"ms\" << std::endl;\n\n//   start = std::chrono::system_clock::now();\n//   ;\n//   auto combined_reader = segment->scan(\n//       {\"id\", \"name\", \"age\", USER_ID, GLOBAL_DOC_ID, LOCAL_ROW_ID});\n//   std::shared_ptr<arrow::RecordBatch> batch;\n//   uint32_t total_doc = 0;\n//   while (true) {\n//     auto status = combined_reader->ReadNext(&batch);\n//     if (status.ok() == false) break;\n//     if (batch == nullptr) break;\n//     total_doc += batch->num_rows();\n//   }\n//   end = std::chrono::system_clock::now();\n//   cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)\n//              .count();\n//   std::cout << \"scan cost \" << cost << \"ms\" << std::endl;\n\n//   EXPECT_EQ(total_doc, MAX_DOC);\n// }\n\nTEST_P(SegmentTest, CombinedVectorColumnIndexer) {\n  options.max_buffer_size_ = 10 * 1024;\n\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n\n  uint64_t MAX_DOC = 1000;\n  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);\n\n  Doc new_doc = test::TestHelper::CreateDoc(1000, *schema);\n  auto status = segment->Insert(new_doc);\n  ASSERT_TRUE(status.ok());\n\n  auto combined_indexer = segment->get_combined_vector_indexer(\"dense_fp32\");\n  ASSERT_TRUE(combined_indexer != nullptr);\n\n  // fetch\n  auto fetched_data = combined_indexer->Fetch(1000);\n  ASSERT_TRUE(fetched_data);\n  const float *dense_vector = reinterpret_cast<const float *>(\n      std::get<vector_column_params::DenseVectorBuffer>(\n          fetched_data->vector_buffer)\n          .data.data());\n\n  auto vv = new_doc.get<std::vector<float>>(\"dense_fp32\").value();\n\n  for (uint32_t i = 0; i < vv.size(); i++) {\n    ASSERT_FLOAT_EQ(dense_vector[i], vv[i]);\n  }\n\n  // query\n  auto dense_fp32_field = schema->get_field(\"dense_fp32\");\n  auto query_vector = new_doc.get<std::vector<float>>(\"dense_fp32\").value();\n  auto query = vector_column_params::VectorData{\n      vector_column_params::DenseVector{.data = query_vector.data()}};\n  auto query_params = vector_column_params::QueryParams{\n      .dimension = dense_fp32_field->dimension(),\n      .topk = 10,\n      .filter = nullptr,\n      .fetch_vector = false};\n  auto results = combined_indexer->Search(query, query_params);\n  ASSERT_TRUE(results.has_value());\n\n  auto vector_results =\n      dynamic_cast<VectorIndexResults *>(results.value().get());\n  ASSERT_TRUE(vector_results);\n  ASSERT_EQ(vector_results->count(), 10);\n\n  int count = 0;\n  auto iter = vector_results->create_iterator();\n  while (iter->valid()) {\n    count++;\n    iter->next();\n  }\n  ASSERT_EQ(count, 10);\n}\n\nTEST_P(SegmentTest, CombinedVectorColumnIndexerWithQuantVectorIndex) {\n  options.max_buffer_size_ = 10 * 1024;\n\n  auto tmp_schema = test::TestHelper::CreateSchemaWithVectorIndex(\n      false, \"demo\",\n      std::make_shared<HnswIndexParams>(MetricType::IP, 16, 20,\n                                        QuantizeType::FP16));\n\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *tmp_schema, 0, 0, id_map, delete_store, version_manager,\n      options, 0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n\n  uint64_t MAX_DOC = 1000;\n  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);\n\n  Doc new_doc = test::TestHelper::CreateDoc(1000, *schema);\n  auto status = segment->Insert(new_doc);\n  ASSERT_TRUE(status.ok());\n\n  auto combined_indexer =\n      segment->get_quant_combined_vector_indexer(\"dense_fp32\");\n  ASSERT_TRUE(combined_indexer != nullptr);\n\n  // fetch\n  auto fetched_data = combined_indexer->Fetch(1000);\n  ASSERT_TRUE(fetched_data);\n  const float *dense_vector = reinterpret_cast<const float *>(\n      std::get<vector_column_params::DenseVectorBuffer>(\n          fetched_data->vector_buffer)\n          .data.data());\n\n  auto vv = new_doc.get<std::vector<float>>(\"dense_fp32\").value();\n\n  for (uint32_t i = 0; i < vv.size(); i++) {\n    EXPECT_NEAR(dense_vector[i], vv[i], 0.1);\n  }\n\n  // query\n  auto dense_fp32_field = schema->get_field(\"dense_fp32\");\n  auto query_vector = new_doc.get<std::vector<float>>(\"dense_fp32\").value();\n  auto query = vector_column_params::VectorData{\n      vector_column_params::DenseVector{.data = query_vector.data()}};\n  auto query_params = vector_column_params::QueryParams{\n      .dimension = dense_fp32_field->dimension(),\n      .topk = 10,\n      .filter = nullptr,\n      .fetch_vector = false,\n      .query_params = std::make_shared<zvec::QueryParams>(IndexType::HNSW)};\n  query_params.query_params->set_is_using_refiner(true);\n\n  auto results = combined_indexer->Search(query, query_params);\n  ASSERT_TRUE(results.has_value());\n\n  auto vector_results =\n      dynamic_cast<VectorIndexResults *>(results.value().get());\n  ASSERT_TRUE(vector_results);\n  ASSERT_EQ(vector_results->count(), 10);\n\n  int count = 0;\n  auto iter = vector_results->create_iterator();\n  while (iter->valid()) {\n    count++;\n    iter->next();\n  }\n  ASSERT_EQ(count, 10);\n}\n\nTEST_P(SegmentTest, CombinedVectorColumnIndexerQueryWithPks) {\n  options.max_buffer_size_ = 10 * 1024;\n\n  auto tmp_schema = test::TestHelper::CreateSchemaWithVectorIndex(\n      false, \"demo\", std::make_shared<HnswIndexParams>(MetricType::IP));\n\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *tmp_schema, 0, 0, id_map, delete_store, version_manager,\n      options, 0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n\n  uint64_t MAX_DOC = 1000;\n  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);\n\n  auto combined_indexer = segment->get_combined_vector_indexer(\"dense_fp32\");\n  ASSERT_TRUE(combined_indexer != nullptr);\n\n  Doc verify_doc = test::TestHelper::CreateDoc(999, *schema);\n  std::vector<std::vector<uint64_t>> bf_pks = {\n      {10, 20, 30, 40, 50, 60, 70, 80, 90, 999}};\n  // query\n  auto dense_fp32_field = schema->get_field(\"dense_fp32\");\n  auto query_vector = verify_doc.get<std::vector<float>>(\"dense_fp32\").value();\n  auto query = vector_column_params::VectorData{\n      vector_column_params::DenseVector{.data = query_vector.data()}};\n  auto query_params = vector_column_params::QueryParams{\n      .data_type = dense_fp32_field->data_type(),\n      .dimension = dense_fp32_field->dimension(),\n      .topk = 10,\n      .filter = nullptr,\n      .fetch_vector = false,\n      .query_params = std::make_shared<zvec::QueryParams>(IndexType::HNSW),\n      .group_by = nullptr,\n      .bf_pks = bf_pks,\n      .refiner_param = nullptr,\n      .extra_params = {}};\n\n  auto results = combined_indexer->Search(query, query_params);\n  ASSERT_TRUE(results.has_value());\n\n  auto vector_results =\n      dynamic_cast<VectorIndexResults *>(results.value().get());\n  ASSERT_TRUE(vector_results);\n  ASSERT_EQ(vector_results->count(), 10);\n\n  int count = 0;\n  std::vector<uint64_t> result_doc_ids;\n  auto iter = vector_results->create_iterator();\n  while (iter->valid()) {\n    count++;\n    result_doc_ids.push_back(iter->doc_id());\n    iter->next();\n  }\n  ASSERT_EQ(count, 10);\n  // need reverse result_doc_ids\n  std::reverse(result_doc_ids.begin(), result_doc_ids.end());\n  ASSERT_EQ(result_doc_ids, bf_pks[0]);\n}\n\n\nTEST_P(SegmentTest, ConcurrentInsertOperations) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n  const int num_threads = 4;\n  const int docs_per_thread = 50;\n  std::vector<std::thread> threads;\n\n  // Launch multiple threads to insert documents concurrently\n  for (int t = 0; t < num_threads; ++t) {\n    threads.emplace_back([&, t]() {\n      for (int i = 0; i < docs_per_thread; ++i) {\n        int doc_id = t * docs_per_thread + i;\n        Doc doc = test::TestHelper::CreateDoc(doc_id, *schema);\n        auto status = segment->Insert(doc);\n        EXPECT_TRUE(status.ok())\n            << \"Thread \" << t << \" insert failed for doc \" << doc_id;\n      }\n    });\n  }\n\n  // Wait for all threads to complete\n  for (auto &thread : threads) {\n    thread.join();\n  }\n\n  // Verify total document count\n  uint64_t count = segment->doc_count();\n  EXPECT_EQ(count, num_threads * docs_per_thread);\n}\n\nTEST_P(SegmentTest, ConcurrentMixedOperations) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 100);\n  ASSERT_TRUE(segment != nullptr);\n\n  std::vector<std::thread> threads;\n\n  // Thread 1: Insert new documents\n  threads.emplace_back([&]() {\n    for (int i = 100; i < 120; ++i) {\n      Doc doc = test::TestHelper::CreateDoc(i, *schema);\n      auto status = segment->Insert(doc);\n      EXPECT_TRUE(status.ok() || status.code() == StatusCode::ALREADY_EXISTS);\n    }\n  });\n\n  // Thread 2: Update existing documents\n  threads.emplace_back([&]() {\n    for (int i = 0; i < 50; i += 5) {\n      Doc doc = test::TestHelper::CreateDoc(i, *schema);\n      doc.set<std::string>(\"name\", \"updated_concurrent_\" + std::to_string(i));\n      auto status = segment->Update(doc);\n      EXPECT_TRUE(status.ok() || status.code() == StatusCode::NOT_FOUND);\n    }\n  });\n\n  // Thread 3: Delete documents\n  threads.emplace_back([&]() {\n    for (int i = 50; i < 100; i += 10) {\n      auto status = segment->Delete(\"pk_\" + std::to_string(i));\n      EXPECT_TRUE(status.ok() || status.code() == StatusCode::NOT_FOUND);\n    }\n  });\n\n  // Wait for all threads to complete\n  for (auto &thread : threads) {\n    thread.join();\n  }\n}\n\n// corner cases\nTEST_P(SegmentTest, DuplicateInsert) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n  Doc doc1 = test::TestHelper::CreateDoc(0, *schema);\n  auto status1 = segment->Insert(doc1);\n  EXPECT_TRUE(status1.ok()) << \"First insert failed: \" << status1.message();\n\n  auto meta = segment->meta();\n  ASSERT_TRUE(meta != nullptr);\n  auto &mem_block = meta->writing_forward_block().value();\n  EXPECT_EQ(mem_block.doc_count_, 1);\n  EXPECT_EQ(mem_block.min_doc_id_, 0);\n  EXPECT_EQ(mem_block.max_doc_id_, 0);\n\n  auto doc = segment->Fetch(0);\n  EXPECT_TRUE(doc != nullptr);\n  EXPECT_EQ(*doc, doc1);\n\n  auto status2 = segment->Insert(doc1);\n  EXPECT_FALSE(status2.ok()) << \"Duplicate insert should fail\";\n\n  auto fetched_doc = segment->Fetch(0);\n  ASSERT_TRUE(fetched_doc != nullptr);\n  EXPECT_NE(fetched_doc->get<std::string>(\"name\").value(), \"duplicate_name\");\n}\n\nTEST_P(SegmentTest, DuplicateDelete) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto status1 = segment->Delete(\"pk_2\");\n  EXPECT_TRUE(status1.ok()) << \"First delete failed: \" << status1.message();\n\n  auto status2 = segment->Delete(\"pk_2\");\n  EXPECT_FALSE(status2.ok()) << \"Duplicate delete should fail\";\n\n  auto status3 = segment->Delete(2);\n  EXPECT_FALSE(status3.ok())\n      << \"Delete by doc_id of already deleted doc should fail\";\n}\n\nTEST_P(SegmentTest, DeleteNonExistentDoc) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto status1 = segment->Delete(\"pk_999\");\n  EXPECT_FALSE(status1.ok()) << \"Delete non-existent pk should fail\";\n}\n\nTEST_P(SegmentTest, UpdateNonExistentDoc) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  Doc doc = test::TestHelper::CreateDoc(999, *schema);\n  doc.set<std::string>(\"name\", \"non_existent_doc\");\n\n  auto status = segment->Update(doc);\n  EXPECT_FALSE(status.ok()) << \"Update non-existent doc should fail\";\n}\n\nTEST_P(SegmentTest, UpsertNonExistentDoc) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  Doc doc = test::TestHelper::CreateDoc(999, *schema);\n  doc.set<std::string>(\"name\", \"new_upserted_doc\");\n\n  auto status = segment->Upsert(doc);\n  EXPECT_TRUE(status.ok()) << \"Upsert non-existent doc should succeed: \"\n                           << status.message();\n\n  auto filter = segment->get_filter();\n  uint64_t count = segment->doc_count(filter);\n  EXPECT_EQ(count, 6);\n}\n\nTEST_P(SegmentTest, ScanWithEmptyColumns) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto reader = segment->scan({});\n  ASSERT_TRUE(reader == nullptr);\n}\n\nTEST_P(SegmentTest, ScanWithInvalidColumns) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  // Try to scan with invalid column name\n  auto reader = segment->scan({\"invalid_column\"});\n  EXPECT_TRUE(reader == nullptr);\n}\n\nTEST_P(SegmentTest, FetchNonExistentDoc) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto doc = segment->Fetch(999);\n  EXPECT_TRUE(doc == nullptr) << \"Fetch non-existent doc should return nullptr\";\n}\n\nTEST_P(SegmentTest, FetchWithInvalidIndices) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  std::vector<int> invalid_indices = {999, 1000};\n  auto table = segment->fetch({\"id\", \"name\"}, invalid_indices);\n\n  ASSERT_TRUE(table == nullptr);\n}\n\nTEST_P(SegmentTest, FetchWithInvalidColumns) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 10);\n  ASSERT_TRUE(segment != nullptr);\n\n  // Try to fetch with invalid column name\n  std::vector<int> indices = {0, 1, 2};\n  auto table = segment->fetch({\"invalid_column\"}, indices);\n  EXPECT_TRUE(table == nullptr);\n}\n\nTEST_P(SegmentTest, InsertEmptyDocWithNullableSchema) {\n  auto nullable_schema = test::TestHelper::CreateNormalSchema(true, col_name);\n\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *nullable_schema, 0, 0, id_map, delete_store, version_manager,\n      options, 0, 0);\n  ASSERT_TRUE(segment != nullptr);\n\n  Doc empty_doc;\n  empty_doc.set_pk(\"pk_empty\");\n  auto status = segment->Insert(empty_doc);\n  EXPECT_TRUE(status.ok());\n}\n\nTEST_P(SegmentTest, MultipleDuplicateDeletes) {\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, 5);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto status1 = segment->Delete(\"pk_1\");\n  EXPECT_TRUE(status1.ok());\n\n  for (int i = 0; i < 10; ++i) {\n    auto status = segment->Delete(\"pk_1\");\n    EXPECT_FALSE(status.ok()) << \"Delete iteration \" << i << \" should fail\";\n  }\n\n  auto filter = segment->get_filter();\n  uint64_t count = segment->doc_count(filter);\n  EXPECT_EQ(count, 4);\n}\n\nTEST_P(SegmentTest, FetchWithTwoVectorFields) {\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"dense2_fp32\", DataType::VECTOR_FP32, 128, false,\n      std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n  int doc_count = 1000;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n  segment.reset();\n  version_manager.reset();\n  id_map->flush();\n  id_map.reset();\n\n  std::string delete_store_path =\n      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);\n  delete_store->flush(delete_store_path);\n  delete_store.reset();\n\n  auto recover_version_manager = VersionManager::Recovery(col_path);\n  auto recover_version_mgr = recover_version_manager.value();\n  ASSERT_TRUE(recover_version_mgr != nullptr);\n\n  auto v = recover_version_mgr->get_current_version();\n\n  // idmap\n  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,\n                                                    v.id_map_path_suffix());\n  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);\n  auto status = recover_id_map->open(idmap_path, false, false);\n  ASSERT_TRUE(status.ok());\n\n  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,\n                                               v.delete_snapshot_path_suffix());\n  auto recover_delete_store =\n      DeleteStore::CreateAndLoad(col_name, delete_store_path);\n  ASSERT_TRUE(recover_delete_store != nullptr);\n\n  int incr_doc_count = 1000;\n  auto result = Segment::Open(col_path, *schema, *v.writing_segment_meta(),\n                              recover_id_map, recover_delete_store,\n                              recover_version_mgr, options);\n  ASSERT_TRUE(result.has_value());\n  segment = std::move(result).value();\n  ASSERT_TRUE(segment != nullptr);\n\n  auto s = test::TestHelper::SegmentInsertDoc(\n      segment, *schema, doc_count, doc_count + incr_doc_count, false);\n  ASSERT_TRUE(s.ok());\n\n  for (int i = 0; i < doc_count + incr_doc_count; i++) {\n    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);\n    auto ret_doc = segment->Fetch(i);\n    if (*ret_doc != expect_doc) {\n      std::cout << \"   ret_doc: \" << ret_doc->to_string() << std::endl;\n      std::cout << \"expect_doc: \" << expect_doc.to_string() << std::endl;\n    }\n    ASSERT_EQ(*ret_doc, expect_doc);\n  }\n}\n\nTEST_P(SegmentTest, FetchPerf) {\n  // create segment\n  int doc_count = 1000;\n  options.max_buffer_size_ = 100 * 1024;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n\n  segment->dump();\n  auto writing_segment_meta = segment->meta();\n\n  // convert writing segment meta to persisted segment meta\n  Version version = version_manager->get_current_version();\n  writing_segment_meta->remove_writing_forward_block();\n  auto s = version.add_persisted_segment_meta(writing_segment_meta);\n  ASSERT_TRUE(s.ok());\n\n  s = version_manager->apply(version);\n  ASSERT_TRUE(s.ok());\n  s = version_manager->flush();\n  ASSERT_TRUE(s.ok());\n\n  segment.reset();\n  version_manager.reset();\n  id_map->flush();\n  id_map.reset();\n\n  std::string delete_store_path =\n      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);\n  delete_store->flush(delete_store_path);\n  delete_store.reset();\n\n  auto recover_version_manager = VersionManager::Recovery(col_path);\n  auto recover_version_mgr = recover_version_manager.value();\n  ASSERT_TRUE(recover_version_mgr != nullptr);\n\n  Version v = recover_version_mgr->get_current_version();\n  const auto &persist_metas = v.persisted_segment_metas();\n  // idmap\n  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,\n                                                    v.id_map_path_suffix());\n  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);\n  auto status = recover_id_map->open(idmap_path, false, false);\n  ASSERT_TRUE(status.ok());\n\n  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,\n                                               v.delete_snapshot_path_suffix());\n  auto recover_delete_store =\n      DeleteStore::CreateAndLoad(col_name, delete_store_path);\n  ASSERT_TRUE(recover_delete_store != nullptr);\n\n  // open persist segment\n  options.read_only_ = true;\n  auto result =\n      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,\n                    recover_delete_store, recover_version_mgr, options);\n  ASSERT_TRUE(result.has_value());\n  segment = std::move(result).value();\n  ASSERT_TRUE(segment != nullptr);\n\n  s = segment->add_column(\n      std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false),\n      \"int32 + 1\", AddColumnOptions());\n  EXPECT_TRUE(s.ok());\n\n  std::vector<int> indices = {0, 3, 6, 1, 0, 501, 999};\n  auto func = [&](const std::vector<std::string> columns,\n                  int local_row_id_idx) -> void {\n    auto combined_table = segment->fetch(columns, indices);\n    ASSERT_TRUE(combined_table != nullptr);\n    EXPECT_EQ(combined_table->num_columns(), columns.size());\n    EXPECT_EQ(combined_table->num_rows(), indices.size());\n\n    auto field = combined_table->schema()->field(local_row_id_idx);\n    EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n    // Get data from the LOCAL_ROW_ID column for each row\n    auto id_column = combined_table->column(local_row_id_idx);\n    auto id_array =\n        std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n    std::vector<int32_t> &expected_ids = indices;\n    std::vector<int32_t> actual_ids;\n\n    for (int i = 0; i < id_array->length(); ++i) {\n      actual_ids.push_back(id_array->Value(i));\n    }\n\n    EXPECT_EQ(actual_ids, expected_ids)\n        << \"ID column values don't match expected order\";\n  };\n\n  func({LOCAL_ROW_ID, \"id\", \"name\", \"add_int32\"}, 0);\n  func(\n      {\n          \"id\",\n          LOCAL_ROW_ID,\n          \"name\",\n          \"add_int32\",\n      },\n      1);\n  func({\"id\", \"name\", \"add_int32\", LOCAL_ROW_ID}, 3);\n}\n\nTEST_P(SegmentTest, AddColumn) {\n  // create segment\n  options.max_buffer_size_ = 10 * 1024 * 1024;\n  int doc_count = 1000;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto s = segment->add_column(\n      std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false),\n      \"int32 + 1\", AddColumnOptions());\n  EXPECT_FALSE(s.ok());\n\n  segment->dump();\n  auto writing_segment_meta = segment->meta();\n\n  // convert writing segment meta to persisted segment meta\n  Version version = version_manager->get_current_version();\n  writing_segment_meta->remove_writing_forward_block();\n  s = version.add_persisted_segment_meta(writing_segment_meta);\n  ASSERT_TRUE(s.ok());\n\n  s = version_manager->apply(version);\n  ASSERT_TRUE(s.ok());\n  s = version_manager->flush();\n  ASSERT_TRUE(s.ok());\n\n  segment.reset();\n  version_manager.reset();\n  id_map->flush();\n  id_map.reset();\n\n  std::string delete_store_path =\n      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);\n  delete_store->flush(delete_store_path);\n  delete_store.reset();\n\n  auto recover_version_manager = VersionManager::Recovery(col_path);\n  auto recover_version_mgr = recover_version_manager.value();\n  ASSERT_TRUE(recover_version_mgr != nullptr);\n\n  Version v = recover_version_mgr->get_current_version();\n  const auto &persist_metas = v.persisted_segment_metas();\n  // idmap\n  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,\n                                                    v.id_map_path_suffix());\n  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);\n  auto status = recover_id_map->open(idmap_path, false, false);\n  ASSERT_TRUE(status.ok());\n\n  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,\n                                               v.delete_snapshot_path_suffix());\n  auto recover_delete_store =\n      DeleteStore::CreateAndLoad(col_name, delete_store_path);\n  ASSERT_TRUE(recover_delete_store != nullptr);\n\n  // open persist segment\n  options.read_only_ = true;\n  auto result =\n      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,\n                    recover_delete_store, recover_version_mgr, options);\n  ASSERT_TRUE(result.has_value());\n  segment = std::move(result).value();\n  ASSERT_TRUE(segment != nullptr);\n\n  s = segment->add_column(\n      std::make_shared<FieldSchema>(\"add_int32\", DataType::INT32, false), \"\",\n      AddColumnOptions());\n  EXPECT_FALSE(s.ok());\n\n  s = segment->add_column(std::make_shared<FieldSchema>(\n                              \"add_undefined\", DataType::UNDEFINED, false),\n                          \"\", AddColumnOptions());\n  EXPECT_FALSE(s.ok());\n\n  // before add column\n  auto meta = segment->meta();\n  auto &persist_blocks = meta->persisted_blocks();\n  int old_scalar_blocks_cnt = 0;\n  for (auto &block : persist_blocks) {\n    if (block.type() == BlockType::SCALAR) {\n      old_scalar_blocks_cnt++;\n    }\n  }\n\n  int add_column_cnt = 0;\n  auto func = [&](const std::shared_ptr<FieldSchema> &field_schema,\n                  const std::string &expression) {\n    auto &column_name = field_schema->name();\n    AddColumnOptions add_options;\n    status = segment->add_column(field_schema, expression, add_options);\n    EXPECT_TRUE(status.ok());\n\n    // after add column\n    int new_scalar_blocks_cnt = 0;\n    for (auto &block : persist_blocks) {\n      if (block.type() == BlockType::SCALAR) {\n        new_scalar_blocks_cnt++;\n      }\n    }\n    EXPECT_EQ(\n        new_scalar_blocks_cnt,\n        old_scalar_blocks_cnt + old_scalar_blocks_cnt * (++add_column_cnt));\n    auto combined_reader = segment->scan({\"id\", \"name\", \"age\", column_name});\n    ASSERT_TRUE(combined_reader != nullptr);\n    std::shared_ptr<arrow::RecordBatch> batch;\n    uint32_t total_doc = 0;\n    while (true) {\n      auto status = combined_reader->ReadNext(&batch);\n      if (status.ok() == false) break;\n      if (batch == nullptr) break;\n\n      EXPECT_EQ(batch->num_columns(), 4);\n\n      total_doc += batch->num_rows();\n    }\n    EXPECT_EQ(total_doc, doc_count);\n\n    auto new_schema = *schema;\n    new_schema.add_field(field_schema);\n\n    auto check_doc = [&](int doc_count) {\n      for (int i = 0; i < doc_count; i++) {\n        auto expect_doc = test::TestHelper::CreateDoc(i, new_schema);\n        auto doc = segment->Fetch(i);\n        ASSERT_EQ(doc->pk(), expect_doc.pk());\n\n        // column in same persist block\n        {\n          ExecBatchPtr exec_batch = segment->fetch({\"id\", \"name\", \"age\"}, i);\n          ASSERT_TRUE(exec_batch != nullptr);\n          EXPECT_EQ(exec_batch->length, 1);\n          EXPECT_EQ(exec_batch->values.size(), 3);\n\n          auto id_scalar = exec_batch->values[0].scalar();\n          ASSERT_TRUE(id_scalar != nullptr);\n          auto id_value =\n              std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);\n          ASSERT_TRUE(id_value != nullptr);\n          EXPECT_EQ(id_value->value, i);\n        }\n\n        {\n          ExecBatchPtr exec_batch = segment->fetch({column_name}, i);\n          ASSERT_TRUE(exec_batch != nullptr);\n          EXPECT_EQ(exec_batch->length, 1);\n          EXPECT_EQ(exec_batch->values.size(), 1);\n\n          auto id_scalar = exec_batch->values[0].scalar();\n          ASSERT_TRUE(id_scalar != nullptr);\n        }\n\n        // column in different persist block\n        {\n          ExecBatchPtr exec_batch =\n              segment->fetch({\"id\", \"name\", \"age\", column_name}, i);\n          ASSERT_TRUE(exec_batch != nullptr);\n          EXPECT_EQ(exec_batch->length, 1);\n          EXPECT_EQ(exec_batch->values.size(), 4);\n\n          auto id_scalar = exec_batch->values[0].scalar();\n          ASSERT_TRUE(id_scalar != nullptr);\n          auto id_value =\n              std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);\n          ASSERT_TRUE(id_value != nullptr);\n          EXPECT_EQ(id_value->value, i);\n        }\n      }\n    };\n    check_doc(doc_count);\n  };\n\n  auto index_param = std::make_shared<InvertIndexParams>();\n  std::vector<std::pair<std::string, std::shared_ptr<FieldSchema>>>\n      test_column_schemas = {\n          {\"add_int32\", std::make_shared<FieldSchema>(\"\", DataType::INT32,\n                                                      false, index_param)},\n          {\"add_int64\", std::make_shared<FieldSchema>(\"\", DataType::INT64,\n                                                      false, index_param)},\n          {\"add_uint32\", std::make_shared<FieldSchema>(\"\", DataType::UINT32,\n                                                       false, index_param)},\n          {\"add_uint64\", std::make_shared<FieldSchema>(\"\", DataType::UINT64,\n                                                       false, index_param)},\n          {\"add_float\", std::make_shared<FieldSchema>(\"\", DataType::FLOAT,\n                                                      false, index_param)},\n          {\"add_double\", std::make_shared<FieldSchema>(\"\", DataType::DOUBLE,\n                                                       false, index_param)},\n          {\"add_int32_nullable\", std::make_shared<FieldSchema>(\n                                     \"\", DataType::INT32, true, index_param)},\n          {\"add_int64_nullable\", std::make_shared<FieldSchema>(\n                                     \"\", DataType::INT64, true, index_param)},\n          {\"add_uint32_nullable\", std::make_shared<FieldSchema>(\n                                      \"\", DataType::UINT32, true, index_param)},\n          {\"add_uint64_nullable\", std::make_shared<FieldSchema>(\n                                      \"\", DataType::UINT64, true, index_param)},\n          {\"add_float_nullable\", std::make_shared<FieldSchema>(\n                                     \"\", DataType::FLOAT, true, index_param)},\n          {\"add_double_nullable\", std::make_shared<FieldSchema>(\n                                      \"\", DataType::DOUBLE, true, index_param)},\n      };\n\n  std::unordered_map<std::string, std::vector<std::string>> test_expressions = {\n      {\"add_int32\", {\"int32 + 1\", \"-int32\", \"+int32\", \"1\", \"-1\"}},\n      {\"add_int64\", {\"int64 + 1\", \"-int64\", \"+int64\", \"1\", \"-1\"}},\n      {\"add_uint32\", {\"uint32 + 1\", \"-uint32\", \"+int32\", \"1\", \"0\"}},\n      {\"add_uint64\", {\"uint64 + 1\", \"-uint64\", \"+uint64\", \"1\", \"0\"}},\n      {\"add_float\", {\"float + 1.0\", \"-float\", \"+float\", \"0.1\", \"-0.1\"}},\n      {\"add_double\", {\"double + 1.0\", \"-double\", \"+double\", \"0.1\", \"-0.1\"}},\n      {\"add_int32_nullable\", {\"\"}},\n      {\"add_int64_nullable\", {\"\"}},\n      {\"add_uint32_nullable\", {\"\"}},\n      {\"add_uint64_nullable\", {\"\"}},\n      {\"add_float_nullable\", {\"\"}},\n      {\"add_double_nullable\", {\"\"}},\n  };\n\n  for (auto &[column_name, field_schema] : test_column_schemas) {\n    auto expressions = test_expressions[column_name];\n    for (auto &expression : expressions) {\n      std::string col_name = column_name + \"_\" +\n                             std::to_string(ailego::Crc32c::Hash(\n                                 expression.data(), expression.size()));\n      auto new_field_schema = std::make_shared<FieldSchema>(\n          field_schema->name(), field_schema->data_type(),\n          field_schema->nullable(), field_schema->index_params());\n      new_field_schema->set_name(col_name);\n      func(new_field_schema, expression);\n    }\n  }\n}\n\nTEST_P(SegmentTest, AlterColumn) {\n  // create segment\n  int doc_count = 1000;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto s = segment->alter_column(\n      \"alter_int32\",\n      std::make_shared<FieldSchema>(\"alter_int32\", DataType::INT32, false),\n      AlterColumnOptions());\n  EXPECT_FALSE(s.ok());\n\n  segment->dump();\n  auto writing_segment_meta = segment->meta();\n\n  // convert writing segment meta to persisted segment meta\n  Version version = version_manager->get_current_version();\n  writing_segment_meta->remove_writing_forward_block();\n  s = version.add_persisted_segment_meta(writing_segment_meta);\n  ASSERT_TRUE(s.ok());\n\n  s = version_manager->apply(version);\n  ASSERT_TRUE(s.ok());\n  s = version_manager->flush();\n  ASSERT_TRUE(s.ok());\n\n  segment.reset();\n  version_manager.reset();\n  id_map->flush();\n  id_map.reset();\n\n  std::string delete_store_path =\n      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);\n  delete_store->flush(delete_store_path);\n  delete_store.reset();\n\n  auto recover_version_manager = VersionManager::Recovery(col_path);\n  auto recover_version_mgr = recover_version_manager.value();\n  ASSERT_TRUE(recover_version_mgr != nullptr);\n\n  Version v = recover_version_mgr->get_current_version();\n  const auto &persist_metas = v.persisted_segment_metas();\n\n  // idmap\n  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,\n                                                    v.id_map_path_suffix());\n  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);\n  auto status = recover_id_map->open(idmap_path, false, false);\n  ASSERT_TRUE(status.ok());\n\n  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,\n                                               v.delete_snapshot_path_suffix());\n  auto recover_delete_store =\n      DeleteStore::CreateAndLoad(col_name, delete_store_path);\n  ASSERT_TRUE(recover_delete_store != nullptr);\n\n  // open persist segment\n  options.read_only_ = true;\n  auto result =\n      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,\n                    recover_delete_store, recover_version_mgr, options);\n  ASSERT_TRUE(result.has_value());\n  segment = std::move(result).value();\n  ASSERT_TRUE(segment != nullptr);\n\n  s = segment->alter_column(\n      \"alter_int32\",\n      std::make_shared<FieldSchema>(\"alter_int32\", DataType::INT32, false),\n      AlterColumnOptions());\n  EXPECT_FALSE(s.ok());  // not found\n\n  s = segment->alter_column(\n      \"int32\",\n      std::make_shared<FieldSchema>(\"int32\", DataType::UNDEFINED, false),\n      AlterColumnOptions());\n  EXPECT_FALSE(s.ok());  // undefined type\n\n  auto func = [&](const std::string &column_name,\n                  const std::shared_ptr<FieldSchema> &field_schema) {\n    AlterColumnOptions alter_options;\n    status = segment->alter_column(column_name, field_schema, alter_options);\n    EXPECT_TRUE(status.ok());\n\n    auto combined_reader = segment->scan({\"id\", \"name\", \"age\", column_name});\n    ASSERT_TRUE(combined_reader != nullptr);\n    std::shared_ptr<arrow::RecordBatch> batch;\n    uint32_t total_doc = 0;\n    while (true) {\n      auto status = combined_reader->ReadNext(&batch);\n      if (status.ok() == false) break;\n      if (batch == nullptr) break;\n\n      EXPECT_EQ(batch->num_columns(), 4);\n\n      total_doc += batch->num_rows();\n    }\n    EXPECT_EQ(total_doc, doc_count);\n  };\n\n  std::vector<std::string> test_alter_columns = {\"int32\",  \"int64\", \"uint32\",\n                                                 \"uint64\", \"float\", \"double\"};\n\n  for (auto &column_name : test_alter_columns) {\n    // std::string column_name = \"int32\";\n    for (auto &dest_column : test_alter_columns) {\n      if (column_name == dest_column) continue;\n      auto field_schema = schema->get_field(dest_column);\n      auto new_field_schema = std::make_shared<FieldSchema>(*field_schema);\n      new_field_schema->set_name(column_name);\n      func(column_name, new_field_schema);\n    }\n  }\n}\n\nTEST_P(SegmentTest, DropColumn) {\n  // create segment\n  int doc_count = 1000;\n  auto segment = test::TestHelper::CreateSegmentWithDoc(\n      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,\n      0, doc_count);\n  ASSERT_TRUE(segment != nullptr);\n\n  auto s = segment->drop_column(\"int32\");\n  EXPECT_FALSE(s.ok());\n\n  segment->dump();\n  auto writing_segment_meta = segment->meta();\n\n  // convert writing segment meta to persisted segment meta\n  Version version = version_manager->get_current_version();\n  writing_segment_meta->remove_writing_forward_block();\n  s = version.add_persisted_segment_meta(writing_segment_meta);\n  ASSERT_TRUE(s.ok());\n\n  s = version_manager->apply(version);\n  ASSERT_TRUE(s.ok());\n  s = version_manager->flush();\n  ASSERT_TRUE(s.ok());\n\n  segment.reset();\n  version_manager.reset();\n  id_map->flush();\n  id_map.reset();\n\n  std::string delete_store_path =\n      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);\n  delete_store->flush(delete_store_path);\n  delete_store.reset();\n\n  auto recover_version_manager = VersionManager::Recovery(col_path);\n  auto recover_version_mgr = recover_version_manager.value();\n  ASSERT_TRUE(recover_version_mgr != nullptr);\n\n  Version v = recover_version_mgr->get_current_version();\n  const auto &persist_metas = v.persisted_segment_metas();\n  // idmap\n  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,\n                                                    v.id_map_path_suffix());\n  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);\n  auto status = recover_id_map->open(idmap_path, false, false);\n  ASSERT_TRUE(status.ok());\n\n  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,\n                                               v.delete_snapshot_path_suffix());\n  auto recover_delete_store =\n      DeleteStore::CreateAndLoad(col_name, delete_store_path);\n  ASSERT_TRUE(recover_delete_store != nullptr);\n\n  // open persist segment\n  options.read_only_ = true;\n  auto result =\n      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,\n                    recover_delete_store, recover_version_mgr, options);\n  ASSERT_TRUE(result.has_value());\n  segment = std::move(result).value();\n  ASSERT_TRUE(segment != nullptr);\n\n  auto meta = segment->meta();\n  auto &persist_blocks = meta->persisted_blocks();\n\n  auto func = [&](const std::string &column_name) {\n    status = segment->drop_column(column_name);\n    EXPECT_TRUE(status.ok());\n\n    // after drop column\n    bool col_exit = false;\n    for (auto &block : persist_blocks) {\n      if (block.type() == BlockType::SCALAR) {\n        if (block.contain_column(column_name)) {\n          col_exit = true;\n          break;\n        }\n      }\n    }\n\n    EXPECT_EQ(col_exit, false);\n\n    auto combined_reader = segment->scan({column_name});\n    ASSERT_TRUE(combined_reader == nullptr);\n  };\n\n  std::vector<std::string> test_drop_columns = {\"int32\",  \"int64\", \"uint32\",\n                                                \"uint64\", \"float\", \"double\"};\n\n  for (auto &column_name : test_drop_columns) {\n    func(column_name);\n  }\n}\n\n\nINSTANTIATE_TEST_SUITE_P(MMapTest, SegmentTest, testing::Values(true, false));\n"
  },
  {
    "path": "tests/db/index/segment/sql_expr_parser_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n\n#include \"db/index/segment/sql_expr_parser.h\"\n#include <arrow/array.h>\n#include <arrow/compute/api.h>\n#include <arrow/dataset/api.h>\n#include <arrow/dataset/discovery.h>\n#include <arrow/memory_pool.h>\n#include <arrow/result.h>\n#include <arrow/table.h>\n#include <arrow/testing/gtest_util.h>\n#include <gtest/gtest.h>\n#include \"utils/utils.h\"\n\nusing namespace arrow;\nusing namespace arrow::dataset;\nusing namespace zvec;\n\nclass SqlExprParserTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    // Setup code if needed\n  }\n\n  void TearDown() override {\n    // Cleanup code if needed\n  }\n};\n\nTEST_F(SqlExprParserTest, ParseAllSupportedTypes) {\n  auto schema = arrow::schema({arrow::field(\"int32\", arrow::int32()),\n                               arrow::field(\"uint32\", arrow::uint32()),\n                               arrow::field(\"float\", arrow::float32()),\n                               arrow::field(\"double\", arrow::float64()),\n                               arrow::field(\"int64\", arrow::int64()),\n                               arrow::field(\"uint64\", arrow::uint64()),\n\n                               arrow::field(\"string\", arrow::utf8()),\n\n                               arrow::field(\"bool\", arrow::boolean())});\n\n  EXPECT_TRUE(ParseToExpression(\"int32 + uint32\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"float * double\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"int64 - uint64\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"int32 / float\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"double + int64\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"uint32 * int32\", schema).ok());\n\n  EXPECT_TRUE(ParseToExpression(\"int32 + float - double\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"int64 * uint32 / float\", schema).ok());\n\n  EXPECT_TRUE(ParseToExpression(\"(int32 + float) * double\", schema).ok());\n  EXPECT_TRUE(\n      ParseToExpression(\"int32 + (float - double) * int64\", schema).ok());\n\n  EXPECT_TRUE(\n      ParseToExpression(\"((int32 + uint32) * float) - (double / int64)\", schema)\n          .ok());\n\n  EXPECT_TRUE(ParseToExpression(\"int32 + 100\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"float * 3.14\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"double - 2.5\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"(int64 + 10) * (uint32 - 5)\", schema).ok());\n\n  EXPECT_TRUE(ParseToExpression(\"-int32\", schema).ok());\n  EXPECT_TRUE(ParseToExpression(\"-(float + double)\", schema).ok());\n}\n\nTEST_F(SqlExprParserTest, ParseStringExpression) {\n  auto schema = arrow::schema({arrow::field(\"name\", arrow::utf8()),\n                               arrow::field(\"age\", arrow::int32())});\n\n  auto result = ParseToExpression(\"name = 'John'\", schema);\n  EXPECT_FALSE(result.ok());\n}\n\nTEST_F(SqlExprParserTest, ParseBooleanExpression) {\n  auto schema = arrow::schema({arrow::field(\"active\", arrow::boolean()),\n                               arrow::field(\"age\", arrow::int32())});\n\n  auto result = ParseToExpression(\"active AND age > 18\", schema);\n  EXPECT_FALSE(result.ok());\n}\n\nTEST_F(SqlExprParserTest, ParseListExpression) {\n  auto schema = arrow::schema(\n      {arrow::field(\"int32_list\", arrow::list(arrow::int32())),\n       arrow::field(\"float64_list\", arrow::list(arrow::float64())),\n       arrow::field(\"int32\", arrow::int32()),\n       arrow::field(\"float64\", arrow::float64())});\n\n  auto result = ParseToExpression(\"int32 + int32_list\", schema);\n  EXPECT_FALSE(result.ok());\n  result = ParseToExpression(\"float64 + float64_list\", schema);\n  EXPECT_FALSE(result.ok());\n}\n\nTEST_F(SqlExprParserTest, ParseComplexExpression) {\n  auto schema = arrow::schema({arrow::field(\"price\", arrow::float64()),\n                               arrow::field(\"quantity\", arrow::int32()),\n                               arrow::field(\"discount\", arrow::float64())});\n\n  auto result = ParseToExpression(\"price * quantity * (1 - discount)\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n}\n\nTEST_F(SqlExprParserTest, ParseInvalidExpression) {\n  auto schema = arrow::schema({arrow::field(\"a\", arrow::int32())});\n\n  auto result = ParseToExpression(\"a + \", schema);\n  EXPECT_FALSE(result.ok());\n}\n\nTEST_F(SqlExprParserTest, ParseNonExistentField) {\n  auto schema = arrow::schema({arrow::field(\"a\", arrow::int32())});\n\n  auto result = ParseToExpression(\"b + 1\", schema);\n  EXPECT_FALSE(result.ok());\n}\n\nTEST_F(SqlExprParserTest, ParseFunctionCall) {\n  auto schema = arrow::schema({arrow::field(\"value\", arrow::float64())});\n\n  auto result = ParseToExpression(\"sqrt(value)\", schema);\n  EXPECT_FALSE(result.ok());\n}\n\nTEST_F(SqlExprParserTest, ParseComplexCombinations) {\n  auto schema = arrow::schema(\n      {arrow::field(\"a\", arrow::int32()), arrow::field(\"b\", arrow::float64()),\n       arrow::field(\"c\", arrow::int64()), arrow::field(\"d\", arrow::float32())});\n\n  // Deeply nested expressions\n  auto result = ParseToExpression(\"((a + b) * (c - d)) / (a + 1)\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n\n  // Multi-level parentheses expressions\n  result = ParseToExpression(\"(((a + b) - c) * d) + (a / b)\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n\n  // Mixed constants and variables\n  result = ParseToExpression(\"(a + 10) * (b - 2.5) / (c + 100)\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n}\n\n// Test negative number expressions\nTEST_F(SqlExprParserTest, ParseNegativeNumbers) {\n  auto schema = arrow::schema({arrow::field(\"id\", arrow::int32()),\n                               arrow::field(\"value\", arrow::float64())});\n\n  // Test negative fields\n  auto result = ParseToExpression(\"-id\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n\n  // Test negative numbers combined with other operators\n  result = ParseToExpression(\"-id + value\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n\n  // Test nested negative expressions\n  result = ParseToExpression(\"-(-id)\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n\n  // Test complex negative expressions\n  result = ParseToExpression(\"-(id + value) * 2\", schema);\n  EXPECT_TRUE(result.ok()) << \"Failed to parse SQL expression status:\"\n                           << result.status().ToString();\n}\n\n\n// Create a simple Table\nstd::shared_ptr<arrow::Table> MakeTestTable() {\n  // Create integer column\n  arrow::Int32Builder int_builder;\n  ARROW_EXPECT_OK(int_builder.AppendValues({1, 2, 3, 4, 5}));\n  std::shared_ptr<arrow::Array> int_array;\n  ARROW_EXPECT_OK(int_builder.Finish(&int_array));\n\n  // Create double column\n  arrow::DoubleBuilder double_builder;\n  ARROW_EXPECT_OK(double_builder.AppendValues({1.1, 2.2, 3.3, 4.4, 5.5}));\n  std::shared_ptr<arrow::Array> double_array;\n  ARROW_EXPECT_OK(double_builder.Finish(&double_array));\n\n  // Create string column\n  arrow::StringBuilder string_builder;\n  ARROW_EXPECT_OK(string_builder.Append(\"a\"));\n  ARROW_EXPECT_OK(string_builder.Append(\"b\"));\n  ARROW_EXPECT_OK(string_builder.Append(\"c\"));\n  ARROW_EXPECT_OK(string_builder.Append(\"d\"));\n  ARROW_EXPECT_OK(string_builder.Append(\"e\"));\n  std::shared_ptr<arrow::Array> string_array;\n  ARROW_EXPECT_OK(string_builder.Finish(&string_array));\n\n  // Create boolean column\n  arrow::BooleanBuilder bool_builder;\n  ARROW_EXPECT_OK(bool_builder.Append(true));\n  ARROW_EXPECT_OK(bool_builder.Append(false));\n  ARROW_EXPECT_OK(bool_builder.Append(true));\n  ARROW_EXPECT_OK(bool_builder.Append(false));\n  ARROW_EXPECT_OK(bool_builder.Append(true));\n  std::shared_ptr<arrow::Array> bool_array;\n  ARROW_EXPECT_OK(bool_builder.Finish(&bool_array));\n\n  // Build table\n  auto schema = arrow::schema({arrow::field(\"int_col\", arrow::int32()),\n                               arrow::field(\"double_col\", arrow::float64()),\n                               arrow::field(\"string_col\", arrow::utf8()),\n                               arrow::field(\"bool_col\", arrow::boolean())});\n\n  auto int_chunked = std::make_shared<arrow::ChunkedArray>(int_array);\n  auto double_chunked = std::make_shared<arrow::ChunkedArray>(double_array);\n  auto string_chunked = std::make_shared<arrow::ChunkedArray>(string_array);\n  auto bool_chunked = std::make_shared<arrow::ChunkedArray>(bool_array);\n\n  return arrow::Table::Make(\n      schema, {int_chunked, double_chunked, string_chunked, bool_chunked});\n}\n\n\n// Convert Table to Dataset (for testing)\narrow::Result<std::shared_ptr<arrow::dataset::Dataset>> MakeTestDataset(\n    const std::shared_ptr<arrow::Table> &table) {\n  return std::make_shared<arrow::dataset::InMemoryDataset>(table);\n}\n\nTEST_F(SqlExprParserTest, ParseAndScanDataSet) {\n  auto status = arrow::compute::Initialize();\n\n  auto schema = arrow::schema({arrow::field(\"int_col\", arrow::int32()),\n                               arrow::field(\"double_col\", arrow::float64()),\n                               arrow::field(\"string_col\", arrow::utf8()),\n                               arrow::field(\"bool_col\", arrow::boolean())});\n\n  // Step 1: Create test table\n  auto table = MakeTestTable();\n\n  // Step 2: Convert to Dataset\n  auto dataset = MakeTestDataset(table).ValueOrDie();\n\n  // Step 3: Create scanner and project expression A + B\n  auto scanner_builder = dataset->NewScan().ValueOrDie();\n\n  auto expr = ParseToExpression(\"int_col + double_col\", schema).ValueOrDie();\n  status = scanner_builder->Project({expr}, {\"sum\"});\n\n  auto scanner = scanner_builder->Finish().ValueOrDie();\n\n  // Step 4: Execute and get results\n  auto result_table = scanner->ToTable().ValueOrDie();\n  ASSERT_TRUE(result_table != nullptr);\n  ASSERT_EQ(result_table->num_rows(), 5);\n\n  auto int_col = table->column(0);         // int_col\n  auto double_col = table->column(1);      // double_col\n  auto sum_col = result_table->column(0);  // sum column\n\n  for (int64_t i = 0; i < table->num_rows(); ++i) {\n    auto int_value =\n        std::static_pointer_cast<arrow::Int32Array>(int_col->chunk(0))\n            ->Value(i);\n    auto double_value =\n        std::static_pointer_cast<arrow::DoubleArray>(double_col->chunk(0))\n            ->Value(i);\n    auto sum_value =\n        std::static_pointer_cast<arrow::DoubleArray>(sum_col->chunk(0))\n            ->Value(i);\n\n    ASSERT_NEAR(int_value + double_value, sum_value, 1e-10);\n  }\n}"
  },
  {
    "path": "tests/db/index/segment/sql_expr_validator_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <arrow/array.h>\n#include <arrow/builder.h>\n#include <arrow/dataset/api.h>\n#include <arrow/table.h>\n#include <arrow/type.h>\n#include <gmock/gmock-matchers.h>\n#include <gtest/gtest.h>\n#include \"db/index/segment/sql_expr_parser.h\"\n\nusing arrow::Status;\nusing arrow::compute::Expression;\nnamespace compute = arrow::compute;\nusing namespace zvec;\n\narrow::Result<Expression> ParseAndValidate(\n    const std::string &expr, const std::shared_ptr<arrow::Schema> &schema) {\n  ARROW_ASSIGN_OR_RAISE(auto parsed, ParseToExpression(expr, schema));\n  return CheckSupportedArithmeticExpression(parsed, *schema);\n}\n\nclass ExprValidatorTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    schema_ = arrow::schema({arrow::field(\"int32_col\", arrow::int32()),\n                             arrow::field(\"double_col\", arrow::float64()),\n                             arrow::field(\"str_col\", arrow::utf8())});\n\n    std::vector<std::shared_ptr<arrow::Array>> arrays;\n    for (const auto &field : schema_->fields()) {\n      std::unique_ptr<arrow::ArrayBuilder> builder;\n      ASSERT_TRUE(arrow::MakeBuilder(arrow::default_memory_pool(),\n                                     field->type(), &builder)\n                      .ok());\n      std::shared_ptr<arrow::Array> array;\n      ASSERT_TRUE(builder->Finish(&array).ok());\n      arrays.push_back(array);\n    }\n\n    auto table = arrow::Table::Make(schema_, arrays);\n    dataset_ = std::make_shared<arrow::dataset::InMemoryDataset>(table);\n  }\n\n  std::shared_ptr<arrow::Schema> schema_;\n  std::shared_ptr<arrow::dataset::Dataset> dataset_;\n};\n\nTEST_F(ExprValidatorTest, SingleNumericColumn_Valid) {\n  auto result = ParseAndValidate(\"int32_col\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n\n  result = ParseAndValidate(\"double_col\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n}\n\nTEST_F(ExprValidatorTest, UnaryPositive_Supported) {\n  auto result = ParseAndValidate(\"+int32_col\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n\n  result = ParseAndValidate(\"+double_col\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n}\n\nTEST_F(ExprValidatorTest, UnaryNegative_Supported) {\n  auto result = ParseAndValidate(\"-int32_col\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n\n  result = ParseAndValidate(\"-double_col\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n}\n\nTEST_F(ExprValidatorTest, Binary_Op_With_Literal_Valid) {\n  auto result = ParseAndValidate(\"int32_col + 1\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n\n  result = ParseAndValidate(\"int32_col - 100\", schema_);\n  EXPECT_TRUE(result.ok());\n\n  result = ParseAndValidate(\"1.5 * double_col\", schema_);\n  EXPECT_TRUE(result.ok());\n\n  result = ParseAndValidate(\"double_col / 2.0\", schema_);\n  EXPECT_TRUE(result.ok());\n\n  result = ParseAndValidate(\"100 - int32_col\", schema_);\n  EXPECT_TRUE(result.ok());\n}\n\nTEST_F(ExprValidatorTest, NonNumericColumn_Rejected) {\n  auto result = ParseAndValidate(\"str_col\", schema_);\n  EXPECT_FALSE(result.ok());\n  EXPECT_THAT(result.status().ToString(), ::testing::HasSubstr(\"not numeric\"));\n\n  result = ParseAndValidate(\"+str_col\", schema_);\n  EXPECT_FALSE(result.ok());\n  EXPECT_THAT(result.status().ToString(), ::testing::HasSubstr(\"not numeric\"));\n\n  result = ParseAndValidate(\"-str_col\", schema_);\n  EXPECT_FALSE(result.ok());\n  EXPECT_THAT(result.status().ToString(), ::testing::HasSubstr(\"not numeric\"));\n}\n\nTEST_F(ExprValidatorTest, TwoColumns_Operations_Rejected) {\n  auto result = ParseAndValidate(\"int32_col + double_col\", schema_);\n  EXPECT_FALSE(result.ok());\n  result = ParseAndValidate(\"int32_col + int32_col\", schema_);\n  EXPECT_FALSE(result.ok());\n}\n\nTEST_F(ExprValidatorTest, PureLiteral_Rejected) {\n  auto result = ParseAndValidate(\"123\", schema_);\n  EXPECT_TRUE(result.ok());\n\n  result = ParseAndValidate(\"+123\", schema_);\n  EXPECT_TRUE(result.ok());\n\n  result = ParseAndValidate(\"-456\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n}\n\nTEST_F(ExprValidatorTest, NestedExpression_Rejected) {\n  auto result = ParseAndValidate(\"(int32_col + 1)\", schema_);\n  EXPECT_TRUE(result.ok()) << result.status().ToString();\n}\n\nTEST_F(ExprValidatorTest, InvalidFunctionOrSyntax) {\n  auto result = ParseAndValidate(\"int32_col || 'abc'\", schema_);\n  EXPECT_FALSE(result.ok());\n\n  result = ParseAndValidate(\"sqrt(int32_col)\", schema_);\n  EXPECT_FALSE(result.ok());\n}\n"
  },
  {
    "path": "tests/db/index/storage/arrow_ipc_writer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/index/storage/arrow_ipc_writer.h\"\n#include <iostream>\n#include <arrow/array/builder_primitive.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/record_batch.h>\n#include <arrow/status.h>\n#include <gtest/gtest.h>\n#include \"db/index/storage/store_helper.h\"\n\nusing namespace zvec;\n\nauto schema = arrow::schema(\n    {arrow::field(\"id\", arrow::int32()), arrow::field(\"name\", arrow::utf8())});\n\nstd::shared_ptr<arrow::RecordBatchReader> CreateTestReader(int start_id,\n                                                           int count) {\n  arrow::Int32Builder id_builder;\n  arrow::StringBuilder name_builder;\n\n  arrow::Status s;\n\n  for (int i = 0; i < count; ++i) {\n    s = id_builder.Append(start_id + i);\n    if (!s.ok()) {\n      return nullptr;\n    }\n    s = name_builder.Append(\"User\" + std::to_string(start_id + i));\n    if (!s.ok()) {\n      return nullptr;\n    }\n  }\n\n  std::shared_ptr<arrow::Array> id_array, name_array;\n  s = id_builder.Finish(&id_array);\n  if (!s.ok()) {\n    return nullptr;\n  }\n  s = name_builder.Finish(&name_array);\n  if (!s.ok()) {\n    return nullptr;\n  }\n\n  auto batch = arrow::RecordBatch::Make(schema, count, {id_array, name_array});\n  auto maybe_reader = arrow::RecordBatchReader::Make({batch}, schema);\n  if (!maybe_reader.ok()) {\n    return nullptr;\n  }\n  return *maybe_reader;\n}\n\nTEST(ArrowIpcWriter, General) {\n  std::string output_file_path = \"output.ipc\";\n\n  ArrowIpcWriter writer(output_file_path);\n  // writer.SetMaxRowsPerGroup(1000); // 可选：控制每组行数\n\n  // 第一次插入\n  {\n    auto reader1 = CreateTestReader(1, 3);\n    ASSERT_NE(reader1, nullptr);\n    auto status = writer.insert(reader1);\n    ASSERT_TRUE(status.ok());\n    std::cout << \"Inserted batch 1\" << std::endl;\n  }\n\n  // 第二次插入\n  {\n    auto reader2 = CreateTestReader(4, 2);\n    ASSERT_NE(reader2, nullptr);\n    auto status = writer.insert(reader2);\n    ASSERT_TRUE(status.ok());\n    std::cout << \"Inserted batch 2\" << std::endl;\n  }\n\n  // 第三次插入\n  {\n    auto reader3 = CreateTestReader(6, 4);\n    ASSERT_NE(reader3, nullptr);\n    auto status = writer.insert(reader3);\n    ASSERT_TRUE(status.ok());\n    std::cout << \"Inserted batch 3\" << std::endl;\n  }\n\n  // 最后关闭文件\n  auto status = writer.finalize();\n  if (!status.ok()) {\n    std::cerr << \"Finalize failed: \" << status.ToString() << std::endl;\n  }\n\n  std::cout << \"Parquet file written successfully to output.parquet\"\n            << std::endl;\n\n  // 读取文件\n  std::shared_ptr<arrow::io::RandomAccessFile> output_file_;\n  std::string output_file_path_cp;\n  auto as = CreateRandomAccessFileByUri(output_file_path, &output_file_,\n                                        &output_file_path_cp);\n  ASSERT_TRUE(as.ok());\n\n  auto result = arrow::ipc::RecordBatchFileReader::Open(output_file_);\n  ASSERT_TRUE(result.ok());\n\n  auto reader = std::move(result).ValueOrDie();\n  ASSERT_EQ(reader->num_record_batches(), 3);\n\n  int num_rows = 0;\n  for (int i = 0; i < reader->num_record_batches(); i++) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto res = reader->ReadRecordBatch(i);\n    ASSERT_TRUE(res.ok());\n    batch = std::move(res).ValueOrDie();\n    num_rows += batch->num_rows();\n  }\n\n  ASSERT_EQ(num_rows, 9);\n}\n"
  },
  {
    "path": "tests/db/index/storage/bufferpool_store_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#include <cstdint>\n#include <cstdlib>\n#include <filesystem>\n#include <iostream>\n#include <memory>\n#include <thread>\n#include <arrow/api.h>\n#include <arrow/result.h>\n#include <arrow/table.h>\n#include <gtest/gtest.h>\n#include \"db/index/storage/bufferpool_forward_store.h\"\n#include \"utils/utils.h\"\n\nusing namespace zvec;\n\nclass BufferPoolStoreTest : public testing::Test {\n protected:\n  void SetUp() override {\n    auto s = test::TestHelper::WriteTestFile(parquet_path, FileFormat::PARQUET);\n    if (!s.ok()) {\n      std::cout << \"err: \" << s.message() << std::endl;\n      exit(1);\n    }\n    ailego::BufferManager::Instance().init(10 * 1024 * 1024, 1);\n  }\n\n  void TearDown() override {\n    if (std::filesystem::exists(parquet_path)) {\n      std::filesystem::remove(parquet_path);\n    }\n  }\n  std::string parquet_path = \"test.parquet\";\n};\n\n\nTEST_F(BufferPoolStoreTest, ParquetFetch) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table = store->fetch({\"id\", \"name\", \"score\"}, {0, 1, 2});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n}\n\n\nTEST_F(BufferPoolStoreTest, ParquetFetchWithSelectColumns) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table = store->fetch({\"id\", \"name\"}, {0, 1, 2});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 2);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchWithUID) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  auto table = store->fetch({USER_ID, \"id\", \"name\"}, {0, 1, 2});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 3);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchWithGlobalDocID) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  auto table = store->fetch({GLOBAL_DOC_ID, \"id\", \"name\"}, {0, 1, 2});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 3);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchWitEmptyColumns) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table = store->fetch({}, std::vector<int>{});\n  EXPECT_EQ(table, nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchWitEmptyIndices) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table = store->fetch({\"id\", \"name\"}, std::vector<int>{});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 0);\n  EXPECT_EQ(table->num_columns(), 2);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchWithMoreIndices) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table = store->fetch({\"id\"}, {0, 1, 2, 3, 6, 2, 1, 7});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 8);\n  EXPECT_EQ(table->num_columns(), 1);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchWithInvalidIndices) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table = store->fetch({\"id\"}, {0, 1, 30});\n  ASSERT_TRUE(table == nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchCheckOrderWithLocalRowIDMiddle) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table =\n      store->fetch({\"id\", \"name\", LOCAL_ROW_ID, \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 5);\n  EXPECT_EQ(table->num_columns(), 4);\n  auto field = table->schema()->field(2);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = table->column(2);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\n\nTEST_F(BufferPoolStoreTest, ParquetFetchCheckOrderWithLocalRowIDEnd) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  TablePtr table =\n      store->fetch({\"id\", \"name\", \"score\", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(table != nullptr);\n  EXPECT_EQ(table->num_rows(), 5);\n  EXPECT_EQ(table->num_columns(), 4);\n  auto field = table->schema()->field(3);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = table->column(3);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\n\nTEST_F(BufferPoolStoreTest, ParquetScan) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  auto reader = store->scan({\"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 3);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetScanWithSelectColumns) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  auto reader = store->scan({\"id\", \"name\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 2);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetScanWithInvalidColumn) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  auto reader = store->scan({\"id\", \"unknown_column\"});\n  ASSERT_TRUE(reader == nullptr);\n}\n\n\nTEST_F(BufferPoolStoreTest, ParquetScanWithUserID) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  auto reader = store->scan({USER_ID, \"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetScanWithGlobalDocID) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n  auto reader = store->scan({GLOBAL_DOC_ID, \"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSingleRow) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({\"id\", \"name\", \"score\"}, 0);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(id_scalar != nullptr);\n  auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);\n  ASSERT_TRUE(id_value != nullptr);\n  EXPECT_EQ(id_value->value, 1);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSpecificRow) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({\"id\", \"name\", \"score\"}, 3);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(id_scalar != nullptr);\n  auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);\n  ASSERT_TRUE(id_value != nullptr);\n  EXPECT_EQ(id_value->value, 4);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithUserID) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({USER_ID, \"id\", \"name\"}, 1);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto user_id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(user_id_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::StringScalar>(user_id_scalar) !=\n              nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithGlobalDocID) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({GLOBAL_DOC_ID, \"id\", \"name\"}, 4);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n\n  auto global_doc_id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(global_doc_id_scalar != nullptr);\n  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(\n                  global_doc_id_scalar) != nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithNegativeIndex) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({\"id\", \"name\"}, -1);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithOutOfRangeIndex) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({\"id\", \"name\"}, 15);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithInvalidColumn) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({\"id\", \"invalid_column\"}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithEmptyColumns) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(BufferPoolStoreTest, AllDataTypeFetchSingleRow) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  EXPECT_TRUE(store->Open().ok());\n\n  ExecBatchPtr batch = store->fetch({\"id\", \"list_int32\"}, 2);\n  ASSERT_TRUE(batch != nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 2);\n\n  auto id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(id_scalar != nullptr);\n  auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);\n  ASSERT_TRUE(id_value != nullptr);\n  EXPECT_EQ(id_value->value, 3);\n\n  auto list_scalar = batch->values[1].scalar();\n  ASSERT_TRUE(list_scalar != nullptr);\n  auto list_value = std::dynamic_pointer_cast<arrow::ListScalar>(list_scalar);\n  ASSERT_TRUE(list_value != nullptr);\n  EXPECT_EQ(list_value->value->length(), 128);\n\n  auto list_array =\n      std::dynamic_pointer_cast<arrow::Int32Array>(list_value->value);\n  ASSERT_TRUE(list_array != nullptr);\n  for (int i = 0; i < 10 && i < list_array->length(); ++i) {\n    EXPECT_EQ(list_array->Value(i), 2 * 10 + i);\n  }\n}\n\nTEST_F(BufferPoolStoreTest, AllDataType) {\n  auto mmap_store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n\n  std::vector<std::string> columns = {\"id\", \"list_int32\"};\n  std::vector<int> indices = {0, 3, 6, 1, 0};\n\n  TablePtr mmap_table = mmap_store->fetch(columns, indices);\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 5);\n  EXPECT_EQ(mmap_table->num_columns(), 2);\n\n  for (size_t j = 0; j < columns.size(); ++j) {\n    auto column = mmap_table->column(j);\n    for (int k = 0; k < column->num_chunks(); ++k) {\n      auto array = column->chunk(k);\n      if (array->type()->id() == arrow::Type::INT32) {\n        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);\n        for (int i = 0; i < array->length(); ++i) {\n          int32_t value = int_array->Value(i);\n          EXPECT_EQ(value, indices[i] + 1);\n        }\n      } else if (array->type()->id() == arrow::Type::LIST) {\n        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);\n        for (int i = 0; i < array->length(); ++i) {\n          auto list_value = list_array->value_slice(i);\n          auto list_value_array =\n              std::static_pointer_cast<arrow::Int32Array>(list_value);\n          EXPECT_EQ(list_value_array->length(), 128);\n          for (int m = 0; m < list_value_array->length(); ++m) {\n            int32_t value = list_value_array->Value(m);\n            EXPECT_EQ(value, indices[i] * 10 + m);\n          }\n        }\n      }\n    }\n  }\n}\n\nTEST_F(BufferPoolStoreTest, DeleteDestructs) {\n  BufferPoolForwardStore *store = new BufferPoolForwardStore(parquet_path);\n  delete store;\n}\n\nTEST_F(BufferPoolStoreTest, PhysicSchema) {\n  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);\n  ASSERT_NE(store, nullptr);\n  EXPECT_TRUE(store->Open().ok());\n  EXPECT_NE(store->physic_schema(), nullptr);\n}\n"
  },
  {
    "path": "tests/db/index/storage/mem_store_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"zvec/db/doc.h\"\n#define private public\n#define protected public\n#include \"db/index/storage/memory_forward_store.h\"\n#undef private\n#undef protected\n#include <cstdint>\n#include <filesystem>\n#include <future>\n#include <memory>\n#include <string>\n#include <thread>\n#include <vector>\n#include <arrow/array.h>\n#include <arrow/io/file.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/result.h>\n#include <arrow/table.h>\n#include <gtest/gtest.h>\n#include \"utils/utils.h\"\n\nusing namespace zvec;\n\n// Helper function\nCollectionSchema::Ptr GetCollectionSchema() {\n  auto collection_schema = std::make_shared<CollectionSchema>(\n      \"test_collection\",\n      std::vector<FieldSchema::Ptr>{\n          std::make_shared<FieldSchema>(\"id\", DataType::UINT64, false, nullptr),\n          std::make_shared<FieldSchema>(\"name\", DataType::STRING, false,\n                                        nullptr),\n          std::make_shared<FieldSchema>(\"age\", DataType::INT32, false, nullptr),\n          std::make_shared<FieldSchema>(\"score\", DataType::DOUBLE, false,\n                                        nullptr),\n      });\n\n  return collection_schema;\n}\n\nDoc CreateDoc(const uint64_t doc_id) {\n  Doc new_doc;\n  new_doc.set_pk(\"pk_\" + std::to_string(doc_id));\n  new_doc.set_doc_id(doc_id);\n\n  new_doc.set<uint64_t>(\"id\", doc_id);\n  new_doc.set<int32_t>(\"age\", rand() % 100 + 1);\n  new_doc.set<std::string>(\n      \"name\", std::string(\"user_\") + std::to_string(rand() % 1000));\n  new_doc.set<double>(\"score\", static_cast<double>(rand() % 1000) / 10.0);\n  return new_doc;\n}\n\nvoid InsertDoc(const MemForwardStore::Ptr &store, const uint64_t start_doc_id,\n               const uint64_t end_doc_id) {\n  srand(time(NULL));\n  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {\n    if (store) {\n      Doc new_doc = CreateDoc(doc_id);\n      store->insert(new_doc);\n    }\n  }\n}\n\nclass MemStoreTest : public testing::Test {\n protected:\n  void SetUp() override {\n    schema_ = GetCollectionSchema();\n    store_ = std::make_shared<MemForwardStore>(schema_, \"./scalar.block.0\",\n                                               FileFormat::IPC);\n    EXPECT_TRUE(store_->Open().ok());\n  }\n\n  void TearDown() override {\n    auto path = store_->path();\n    if (std::filesystem::exists(path)) {\n      std::filesystem::remove(path);\n    }\n    store_.reset();\n  }\n\n  std::shared_ptr<CollectionSchema> schema_;\n  std::shared_ptr<MemForwardStore> store_;\n};\n\n// Test constructor\nTEST_F(MemStoreTest, ConstructorTest) {\n  auto schema = GetCollectionSchema();\n  MemForwardStore store(schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store.Open().ok());\n}\n\n// Test open method\nTEST_F(MemStoreTest, OpenTest) {\n  EXPECT_TRUE(store_->Open().ok());\n}\n\n// Test insert method with valid data\nTEST_F(MemStoreTest, InsertValidData) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n  EXPECT_EQ(store_->num_rows(), 1);\n}\n\n// Test insert method with multiple documents\nTEST_F(MemStoreTest, InsertMultipleDoc) {\n  // Insert multiple documents\n  for (uint64_t i = 0; i < 5; ++i) {\n    Doc doc = CreateDoc(i);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n  EXPECT_EQ(store_->num_rows(), 5);\n  auto table = store_->fetch({\"id\"}, std::vector<int>{});\n  EXPECT_EQ(table->num_rows(), 0);\n}\n\n// Test insert method with nullable data\nTEST_F(MemStoreTest, InsertNullableData) {\n  auto schema = GetCollectionSchema();\n  std::string id = \"id\";\n  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(\n                              \"id\", DataType::UINT64, true, nullptr)));\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store->Open().ok());\n\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  doc.remove(\"id\");\n  EXPECT_EQ(store->insert(doc), Status::OK());\n  EXPECT_EQ(store->num_rows(), 1);\n  auto table = store->fetch({\"id\"}, std::vector<int>{});\n  EXPECT_EQ(table->num_rows(), 0);\n}\n\n\n// Test flush method with empty cache\nTEST_F(MemStoreTest, FlushEmptyCache) {\n  EXPECT_EQ(store_->flush(), Status::OK());\n}\n\n// Test convertToBuilder method\nTEST_F(MemStoreTest, convertToBuilder) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n  auto rb_builder = store_->createBuilder();\n  auto result = store_->convertToBuilder(rb_builder);\n  EXPECT_TRUE(result.ok());\n  EXPECT_EQ(store_->num_rows(), 1);\n\n  // re convert to builder\n  result = store_->convertToBuilder(rb_builder);\n  EXPECT_TRUE(result.ok());\n  EXPECT_EQ(store_->num_rows(), 1);\n}\n\n// Test convertToBuilder method with nullable data\nTEST_F(MemStoreTest, convertToBuilderWithNullableData) {\n  auto schema = GetCollectionSchema();\n  std::string id = \"id\";\n  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(\n                              \"id\", DataType::UINT64, true, nullptr)));\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store->Open().ok());\n\n  for (size_t i = 0; i < 10; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    if (i % 2 == 0) {\n      doc.remove(\"id\");\n    }\n    EXPECT_EQ(store->insert(doc), Status::OK());\n  }\n\n  auto rb_builder = store_->createBuilder();\n  auto result = store_->convertToBuilder(rb_builder);\n  EXPECT_TRUE(result.ok());\n\n  EXPECT_EQ(store->num_rows(), 10);\n}\n\n// Test convertToRecordBatch method\nTEST_F(MemStoreTest, ConvertToRecordBatch) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  auto result = store_->convertToRecordBatch();\n  EXPECT_TRUE(result.ok());\n  EXPECT_NE(result.ValueOrDie(), nullptr);\n  auto rb = result.ValueOrDie();\n  EXPECT_EQ(rb->num_rows(), 1);\n\n  // re convert to record batch\n  result = store_->convertToRecordBatch();\n  EXPECT_TRUE(result.ok());\n  EXPECT_NE(result.ValueOrDie(), nullptr);\n  rb = result.ValueOrDie();\n  EXPECT_EQ(rb->num_rows(), 1);\n}\n\n// Test convertToTable method\nTEST_F(MemStoreTest, ConvertToTable) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {};\n\n  auto result = store_->convertToTable(columns, {});\n  EXPECT_TRUE(result.ok());\n  EXPECT_NE(result.ValueOrDie(), nullptr);\n  auto table = result.ValueOrDie();\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 2 + 4);\n\n  // re convert to table\n  result = store_->convertToTable(columns, {});\n  EXPECT_TRUE(result.ok());\n  EXPECT_NE(result.ValueOrDie(), nullptr);\n  table = result.ValueOrDie();\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 2 + 4);\n}\n\n// Test convertToTable method  with column filtering\nTEST_F(MemStoreTest, ConvertToTableWithColumnFiltering) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {\"id\", \"name\"};\n\n  auto result = store_->convertToTable(columns, {});\n  EXPECT_TRUE(result.ok());\n  EXPECT_NE(result.ValueOrDie(), nullptr);\n  auto table = result.ValueOrDie();\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 2);\n\n  // re convert to table\n  result = store_->convertToTable(columns, {});\n  EXPECT_TRUE(result.ok());\n  EXPECT_NE(result.ValueOrDie(), nullptr);\n  table = result.ValueOrDie();\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 2);\n}\n\n// Test convertToTable with index filtering\nTEST_F(MemStoreTest, ConvertToTableWithIndexFiltering) {\n  // Insert multiple documents\n  for (size_t i = 0; i < 200; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  std::vector<std::string> columns = {};\n  std::vector<int> indices = {0, 2, 4};  // Select specific rows\n\n  auto result = store_->convertToTable(columns, indices);\n  EXPECT_TRUE(result.ok());\n\n  auto table = result.ValueOrDie();\n  EXPECT_EQ(table->num_rows(), 3);  // Only selected rows\n}\n\n// Test fetch method\nTEST_F(MemStoreTest, Fetch) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {\"id\", \"name\", \"score\", \"age\"};\n  std::vector<int> indices = {};\n\n  auto table = store_->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 0);\n  EXPECT_EQ(table->num_columns(), 4);\n\n  // re fetch\n  table = store_->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 0);\n  EXPECT_EQ(table->num_columns(), 4);\n}\n\n\n// Test fetch method more data\nTEST_F(MemStoreTest, FetchWithMoreData) {\n  auto schema = GetCollectionSchema();\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store->Open().ok());\n\n  for (size_t i = 0; i < 200; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store->insert(doc), Status::OK());\n  }\n\n  std::vector<std::string> columns = {\"id\", \"name\", \"score\", \"age\"};\n  std::vector<int> indices = {0, 1, 2};\n\n  auto table = store->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 4);\n\n  // re fetch\n  table = store->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 4);\n}\n\n// Test fetch method\nTEST_F(MemStoreTest, FetchOneField) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {\"id\"};\n  std::vector<int> indices = {0};\n\n  auto table = store_->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 1);\n\n  // re fetch\n  table = store_->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 1);\n}\n\nTEST_F(MemStoreTest, FetchOneFieldWithNullable) {\n  auto schema = GetCollectionSchema();\n  std::string id = \"id\";\n  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(\n                              \"id\", DataType::UINT64, true, nullptr)));\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store->Open().ok());\n\n  for (size_t i = 0; i < 10; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    if (i % 2 == 0) {\n      doc.remove(\"id\");\n    }\n    EXPECT_EQ(store->insert(doc), Status::OK());\n  }\n\n  std::vector<std::string> columns = {\"id\"};\n  std::vector<int> indices = {0};\n\n  auto table = store->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 1);\n\n  // re fetch\n  table = store->fetch(columns, indices);\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 1);\n  EXPECT_EQ(table->num_columns(), 1);\n}\n\n// Test fetch method with empty columns\nTEST_F(MemStoreTest, FetchWithEmptyColumns) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {};\n\n  auto table = store_->fetch(columns, std::vector<int>{});\n  EXPECT_EQ(table, nullptr);\n}\n\n// Test fetch method with empty data\nTEST_F(MemStoreTest, FetchWithEmptyData) {\n  std::vector<std::string> columns = {\"id\"};\n  auto table = store_->fetch(columns, std::vector<int>{});\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 0);\n  EXPECT_EQ(table->num_columns(), 1);\n}\n\n// Test fetch method with invalid column names\nTEST_F(MemStoreTest, FetchWithInvalidColumns) {\n  std::vector<std::string> columns = {\"invalid_column\"};\n  auto table_reader = store_->fetch(columns, std::vector<int>{});\n  EXPECT_EQ(table_reader, nullptr);\n}\n\nTEST_F(MemStoreTest, FetchWithLocalRowID) {\n  for (size_t i = 0; i < 10; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  auto table = store_->fetch({LOCAL_ROW_ID, \"id\"}, {0, 1, 2});\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 2);\n}\n\nTEST_F(MemStoreTest, FetchWithUID) {\n  for (size_t i = 0; i < 10; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  auto table = store_->fetch({USER_ID, \"id\"}, {0, 1, 2});\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 2);\n}\n\nTEST_F(MemStoreTest, FetchWithGlobalDocID) {\n  for (size_t i = 0; i < 10; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  auto table = store_->fetch({GLOBAL_DOC_ID, \"id\"}, {0, 1, 2});\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 2);\n}\n\nTEST_F(MemStoreTest, FetchCheckOrderWithLocalRowIDMiddle) {\n  for (size_t i = 0; i < 10; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  auto table =\n      store_->fetch({\"id\", \"name\", LOCAL_ROW_ID, \"score\"}, {0, 3, 6, 1, 0});\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 5);\n  EXPECT_EQ(table->num_columns(), 4);\n  auto field = table->schema()->field(2);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = table->column(2);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_F(MemStoreTest, FetchCheckOrderWithLocalRowIDEnd) {\n  for (size_t i = 0; i < 10; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  auto table =\n      store_->fetch({\"id\", \"name\", \"score\", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 5);\n  EXPECT_EQ(table->num_columns(), 4);\n  auto field = table->schema()->field(3);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = table->column(3);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_F(MemStoreTest, FetchSingleRow) {\n  for (uint64_t i = 0; i < 5; ++i) {\n    Doc doc = CreateDoc(i);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  ExecBatchPtr batch = store_->fetch({\"id\", \"name\", \"age\", \"score\"}, 0);\n  ASSERT_NE(batch, nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 4);\n\n  auto id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(id_scalar != nullptr);\n  auto id_value = std::dynamic_pointer_cast<arrow::UInt64Scalar>(id_scalar);\n  ASSERT_NE(id_value, nullptr);\n  EXPECT_EQ(id_value->value, 0);\n}\n\nTEST_F(MemStoreTest, FetchSpecificRowIndex) {\n  for (uint64_t i = 0; i < 10; ++i) {\n    Doc doc = CreateDoc(i);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  ExecBatchPtr batch = store_->fetch({\"id\", \"name\", \"age\", \"score\"}, 5);\n  ASSERT_NE(batch, nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 4);\n\n  auto id_scalar = batch->values[0].scalar();\n  ASSERT_TRUE(id_scalar != nullptr);\n  auto id_value = std::dynamic_pointer_cast<arrow::UInt64Scalar>(id_scalar);\n  ASSERT_NE(id_value, nullptr);\n  EXPECT_EQ(id_value->value, 5);\n}\n\nTEST_F(MemStoreTest, FetchSingleRowWithNegativeIndex) {\n  Doc doc = CreateDoc(0);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  ExecBatchPtr batch = store_->fetch({\"id\", \"name\"}, -1);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(MemStoreTest, FetchSingleRowWithOutOfRangeIndex) {\n  for (uint64_t i = 0; i < 5; ++i) {\n    Doc doc = CreateDoc(i);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  ExecBatchPtr batch = store_->fetch({\"id\", \"name\"}, 100);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(MemStoreTest, FetchSingleRowWithInvalidColumn) {\n  Doc doc = CreateDoc(0);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  ExecBatchPtr batch = store_->fetch({\"id\", \"invalid_column\"}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(MemStoreTest, FetchSingleRowWithEmptyColumns) {\n  Doc doc = CreateDoc(0);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  ExecBatchPtr batch = store_->fetch({}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(MemStoreTest, FetchSingleRowFromEmptyStore) {\n  ExecBatchPtr batch = store_->fetch({\"id\", \"name\"}, 0);\n  EXPECT_EQ(batch, nullptr);\n}\n\nTEST_F(MemStoreTest, FetchSingleRowWithNullableData) {\n  auto schema = GetCollectionSchema();\n  std::string id = \"id\";\n  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(\n                              \"id\", DataType::UINT64, true, nullptr)));\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store->Open().ok());\n\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  doc.remove(\"id\");\n  EXPECT_EQ(store->insert(doc), Status::OK());\n\n  ExecBatchPtr batch = store->fetch({\"id\", \"name\", \"age\"}, 0);\n  ASSERT_NE(batch, nullptr);\n  EXPECT_EQ(batch->length, 1);\n  EXPECT_EQ(batch->values.size(), 3);\n}\n\n\n// Test scan method\nTEST_F(MemStoreTest, Scan) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {\"id\", \"name\", \"score\", \"age\"};\n\n  auto table_reader = store_->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n\n  int batch_count = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    EXPECT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_EQ(batch->num_rows(), 1);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n  }\n  EXPECT_EQ(batch_count, 1);\n\n  // re scan\n  table_reader = store_->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n  batch_count = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    EXPECT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_EQ(batch->num_rows(), 1);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n  }\n  EXPECT_EQ(batch_count, 1);\n}\n\n// Test scan method more data\nTEST_F(MemStoreTest, ScanWithMoreData) {\n  auto schema = GetCollectionSchema();\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store->Open().ok());\n\n  for (size_t i = 0; i < 200; i++) {\n    uint64_t doc_id = 0;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store->insert(doc), Status::OK());\n  }\n\n  std::vector<std::string> columns = {\"id\", \"name\", \"score\", \"age\"};\n\n  auto table_reader = store->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n\n  int batch_count = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    EXPECT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_EQ(batch->num_rows(), 200);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n  }\n  EXPECT_EQ(batch_count, 1);\n\n  // re scan\n  table_reader = store->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n  batch_count = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    EXPECT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_EQ(batch->num_rows(), 200);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n  }\n  EXPECT_EQ(batch_count, 1);\n}\n\n// Test scan method with empty columns\nTEST_F(MemStoreTest, ScanWithEmptyColumns) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {};\n\n  auto table_reader = store_->scan(columns);\n  EXPECT_EQ(table_reader, nullptr);\n}\n\n// Test scan method with empty data\nTEST_F(MemStoreTest, ScanWithEmptyData) {\n  std::vector<std::string> columns = {\"id\"};\n  auto table_reader = store_->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n  std::shared_ptr<arrow::RecordBatch> batch;\n  auto status = table_reader->ReadNext(&batch);\n  EXPECT_TRUE(status.ok());\n  EXPECT_EQ(batch, nullptr);\n}\n\n// Test scan method with invalid column names\nTEST_F(MemStoreTest, ScanWithInvalidColumns) {\n  std::vector<std::string> columns = {\"invalid_column\"};\n  auto table_reader = store_->scan(columns);\n  EXPECT_EQ(table_reader, nullptr);\n}\n\nTEST_F(MemStoreTest, ScanWithWithUID) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {\"id\", \"name\", \"score\", USER_ID};\n\n  auto table_reader = store_->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n\n  int batch_count = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    EXPECT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_EQ(batch->num_rows(), 1);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n  }\n  EXPECT_EQ(batch_count, 1);\n}\n\nTEST_F(MemStoreTest, ScanWithGlobalDocID) {\n  uint64_t doc_id = 0;\n  Doc doc = CreateDoc(doc_id);\n  EXPECT_EQ(store_->insert(doc), Status::OK());\n\n  std::vector<std::string> columns = {\"id\", \"name\", \"score\", GLOBAL_DOC_ID};\n\n  auto table_reader = store_->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n\n  int batch_count = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    EXPECT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_EQ(batch->num_rows(), 1);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n  }\n  EXPECT_EQ(batch_count, 1);\n}\n\n// Test flush method with data\nTEST_F(MemStoreTest, FlushWithData) {\n  for (int i = 0; i < 100; i++) {\n    uint64_t doc_id = i;\n    Doc doc = CreateDoc(doc_id);\n    EXPECT_EQ(store_->insert(doc), Status::OK());\n  }\n\n  EXPECT_EQ(store_->flush(), Status::OK());\n\n  // check file exists\n  auto path = store_->path();\n  EXPECT_EQ(std::filesystem::exists(path), true);\n}\n\n// Test thread safety\nTEST_F(MemStoreTest, ThreadSafety) {\n  const int num_threads = 4;\n  const int inserts_per_thread = 100;\n\n  std::vector<std::future<void>> futures;\n\n  for (int t = 0; t < num_threads; ++t) {\n    futures.push_back(std::async(std::launch::async, [this, t]() {\n      for (int i = 0; i < inserts_per_thread; ++i) {\n        uint64_t doc_id = t * inserts_per_thread + i;\n        store_->insert(CreateDoc(doc_id));\n      }\n    }));\n  }\n\n  // Wait for all threads to complete\n  for (auto &future : futures) {\n    future.wait();\n  }\n\n  // Check that all documents were inserted\n  EXPECT_EQ(store_->num_rows(), num_threads * inserts_per_thread);\n}\n\n// Test edge case with empty schema\nTEST_F(MemStoreTest, EmptySchema) {\n  auto empty_schema = std::make_shared<CollectionSchema>();\n  auto empty_store = std::make_unique<MemForwardStore>(\n      empty_schema, \"./scalar.block.0\", FileFormat::IPC);\n\n  EXPECT_TRUE(empty_store->Open().ok());\n}\n\narrow::Result<std::shared_ptr<arrow::Table>> ReadArrowIPCFile(\n    const std::string &filename) {\n  std::shared_ptr<arrow::io::ReadableFile> input_file;\n  ARROW_ASSIGN_OR_RAISE(input_file, arrow::io::ReadableFile::Open(filename));\n\n  std::shared_ptr<arrow::ipc::RecordBatchFileReader> file_reader;\n  ARROW_ASSIGN_OR_RAISE(file_reader,\n                        arrow::ipc::RecordBatchFileReader::Open(input_file));\n\n  std::vector<std::shared_ptr<arrow::RecordBatch>> batches;\n  auto num_record_batches = file_reader->num_record_batches();\n\n  for (int i = 0; i < num_record_batches; ++i) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    ARROW_ASSIGN_OR_RAISE(batch, file_reader->ReadRecordBatch(i));\n    batches.push_back(batch);\n  }\n\n  std::shared_ptr<arrow::Table> table;\n  ARROW_ASSIGN_OR_RAISE(table, arrow::Table::FromRecordBatches(batches));\n\n  return table;\n}\n\nTEST_F(MemStoreTest, Flush) {\n  size_t MAX_DOC = 10010;\n  for (size_t i = 0; i < MAX_DOC; i++) {\n    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());\n  }\n  EXPECT_EQ(store_->flush(), Status::OK());\n  EXPECT_EQ(store_->close(), Status::OK());\n\n  auto read_result = ReadArrowIPCFile(store_->path());\n  ASSERT_TRUE(read_result.ok())\n      << \"Failed to read Arrow IPC file: \" << read_result.status().ToString();\n\n  auto table = read_result.ValueOrDie();\n  EXPECT_EQ(table->num_rows(), MAX_DOC);\n  EXPECT_EQ(table->num_columns(), 2 + 4);\n\n  auto column_names = table->ColumnNames();\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"id\"),\n            column_names.end());\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"name\"),\n            column_names.end());\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"age\"),\n            column_names.end());\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"score\"),\n            column_names.end());\n}\n\n\nTEST_F(MemStoreTest, ReFlush) {\n  size_t MAX_DOC = 10010;\n  for (size_t i = 0; i < MAX_DOC; i++) {\n    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());\n  }\n  EXPECT_EQ(store_->flush(), Status::OK());\n\n  for (size_t i = MAX_DOC; i < MAX_DOC + 10; i++) {\n    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());\n  }\n  EXPECT_EQ(store_->flush(), Status::OK());\n\n  for (size_t i = MAX_DOC + 10; i < MAX_DOC + 20; i++) {\n    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());\n  }\n  EXPECT_EQ(store_->flush(), Status::OK());\n\n  EXPECT_EQ(store_->close(), Status::OK());\n\n  auto read_result = ReadArrowIPCFile(store_->path());\n  ASSERT_TRUE(read_result.ok())\n      << \"Failed to read Arrow IPC file: \" << read_result.status().ToString();\n\n  auto table = read_result.ValueOrDie();\n  EXPECT_EQ(table->num_rows(), MAX_DOC + 20);\n  EXPECT_EQ(table->num_columns(), 2 + 4);\n\n  auto column_names = table->ColumnNames();\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"id\"),\n            column_names.end());\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"name\"),\n            column_names.end());\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"age\"),\n            column_names.end());\n  EXPECT_NE(std::find(column_names.begin(), column_names.end(), \"score\"),\n            column_names.end());\n}\n\n// Test with max cache bytes limit\nTEST_F(MemStoreTest, MaxCacheBytesLimit) {\n  uint32_t max_cache_rows = 105;\n  uint32_t max_buffer_size = 260 * 100 * 100;\n  uint32_t max_cache_size_ = max_buffer_size / 100;\n  std::vector<int> batch_num_rows;\n\n  auto schema = GetCollectionSchema();\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      schema, \"./scalar.block.0\", FileFormat::IPC, max_buffer_size);\n  EXPECT_TRUE(store->Open().ok());\n\n  // Insert more documents than cache limit\n  uint32_t cur_doc_total_bytes = 0;\n  int cur_batch_num_row = 0;\n  for (uint64_t i = 0; i < max_cache_rows; ++i) {\n    Doc doc = CreateDoc(i);\n    EXPECT_EQ(store->insert(doc), Status::OK());\n    cur_doc_total_bytes += doc.memory_usage();\n    cur_batch_num_row++;\n    if (cur_doc_total_bytes >= max_cache_size_) {\n      batch_num_rows.push_back(cur_batch_num_row);\n      cur_doc_total_bytes = 0;\n      cur_batch_num_row = 0;\n    }\n  }\n  if (cur_batch_num_row > 0) {\n    batch_num_rows.push_back(cur_batch_num_row);\n  }\n\n  EXPECT_EQ(store->num_rows(), max_cache_rows);\n\n  std::vector<std::string> columns = {\"id\", \"name\", \"score\", \"age\"};\n  auto table_reader = store->scan(columns);\n  EXPECT_NE(table_reader, nullptr);\n  std::shared_ptr<arrow::RecordBatch> batch;\n\n  int total_doc_cnt = 0;\n  int cur_batch_idx = 0;\n  while (true) {\n    auto status = table_reader->ReadNext(&batch);\n    EXPECT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_NE(batch, nullptr);\n    EXPECT_EQ(batch->num_columns(), 4);\n    total_doc_cnt += batch->num_rows();\n    EXPECT_EQ(batch->num_rows(), batch_num_rows[cur_batch_idx++]);\n  }\n  EXPECT_EQ(total_doc_cnt, max_cache_rows);\n}\n\n\nTEST_F(MemStoreTest, AllDataType) {\n  uint32_t max_cache_rows = 100;\n  auto all_type_schema =\n      test::TestHelper::CreateNormalSchema(false, \"test_collection\");\n\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      all_type_schema, \"./scalar.block.0\", FileFormat::IPC, 64 * 1024 * 1024);\n  EXPECT_TRUE(store->Open().ok());\n\n  // Insert more documents than cache limit\n  for (uint64_t i = 0; i < max_cache_rows; ++i) {\n    Doc doc = test::TestHelper::CreateDoc(i, *all_type_schema);\n    EXPECT_EQ(store->insert(std::move(doc)), Status::OK());\n  }\n  EXPECT_EQ(store->num_rows(), max_cache_rows);\n\n  std::vector<std::string> columns = {\"int32\", \"array_int32\"};\n\n  auto table = store->fetch(columns, {1, 2, 3});\n  EXPECT_NE(table, nullptr);\n  EXPECT_EQ(table->num_rows(), 3);\n  EXPECT_EQ(table->num_columns(), 2);\n\n  for (size_t j = 0; j < columns.size(); ++j) {\n    auto column = table->column(j);\n    for (int k = 0; k < column->num_chunks(); ++k) {\n      auto array = column->chunk(k);\n      if (array->type()->id() == arrow::Type::INT32) {\n        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);\n        for (int i = 0; i < array->length(); ++i) {\n          int32_t value = int_array->Value(i);\n          EXPECT_EQ(value, i + 1);\n        }\n      } else if (array->type()->id() == arrow::Type::LIST) {\n        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);\n        for (int i = 0; i < array->length(); ++i) {\n          auto list_value = list_array->value_slice(i);\n          auto list_value_array =\n              std::static_pointer_cast<arrow::Int32Array>(list_value);\n          EXPECT_EQ(list_value_array->length(), 10);\n          for (int m = 0; m < list_value_array->length(); ++m) {\n            int32_t value = list_value_array->Value(m);\n            EXPECT_EQ(value, i + 1);\n          }\n        }\n      }\n    }\n  }\n}\n\nTEST_F(MemStoreTest, PhysicSchema) {\n  ASSERT_NE(store_, nullptr);\n  EXPECT_NE(store_->physic_schema(), nullptr);\n}\n\nTEST_F(MemStoreTest, IsFull) {\n  ASSERT_NE(store_, nullptr);\n  EXPECT_EQ(store_->is_full(), false);\n  EXPECT_EQ(store_->total_bytes(), 0);\n}\n\nTEST_F(MemStoreTest, TotalBytes) {\n  ASSERT_NE(store_, nullptr);\n  EXPECT_EQ(store_->total_bytes(), 0);\n}\n\n// =========================== performance test ===============================\n#ifdef PERFORMANCE_TEST\nTEST_F(MemStoreTest, General) {\n  auto collection_schema = GetCollectionSchema();\n  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(\n      collection_schema, \"./scalar.block.0\", FileFormat::IPC);\n  EXPECT_TRUE(store->Open().ok());\n\n  size_t MAX_DOC = 1000000;\n\n  auto start = std::chrono::system_clock::now();\n  for (int i = 0; i < MAX_DOC; i++) {\n    EXPECT_EQ(store->insert(CreateDoc(i)), Status::OK());\n  }\n  auto end = std::chrono::system_clock::now();\n  auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)\n                  .count();\n  std::cout << \"insert cost \" << cost << \"ms\" << std::endl;\n\n  start = std::chrono::system_clock::now();\n  auto table = store->fetch({\"age\", \"name\", \"score\"}, {});\n  end = std::chrono::system_clock::now();\n  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)\n             .count();\n  std::cout << \"fetch cost \" << cost << \"ms\" << std::endl;\n\n  int64_t num_rows = table->num_rows();\n  int64_t num_cols = table->num_columns();\n  std::cout << \"num_cols: \" << num_rows << \" num_cols:\" << num_cols\n            << std::endl;\n\n  for (int i = MAX_DOC; i < MAX_DOC + 100; i++) {\n    EXPECT_EQ(store->insert(CreateDoc(i)), Status::OK());\n  }\n\n  start = std::chrono::system_clock::now();\n  table = store->fetch({\"age\", \"name\", \"score\"}, {});\n  end = std::chrono::system_clock::now();\n  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)\n             .count();\n  std::cout << \"re fetch cost \" << cost << \"ms\" << std::endl;\n\n  num_rows = table->num_rows();\n  num_cols = table->num_columns();\n  std::cout << \"num_cols: \" << num_rows << \" num_cols:\" << num_cols\n            << std::endl;\n\n  for (int i = MAX_DOC + 100; i < MAX_DOC + 200; i++) {\n    EXPECT_EQ(store->insert(CreateDoc(i)), Status::OK());\n  }\n\n  start = std::chrono::system_clock::now();\n  table = store->fetch({\"age\", \"name\", \"score\"}, {});\n  end = std::chrono::system_clock::now();\n  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)\n             .count();\n  std::cout << \"re re fetch cost \" << cost << \"ms\" << std::endl;\n\n  num_rows = table->num_rows();\n  num_cols = table->num_columns();\n  std::cout << \"num_cols: \" << num_rows << \" num_cols:\" << num_cols\n            << std::endl;\n\n\n  std::vector<std::string> column_names = table->ColumnNames();\n  std::shared_ptr<arrow::ChunkedArray> column = table->column(0);\n\n  std::shared_ptr<arrow::ChunkedArray> named_column =\n      table->GetColumnByName(\"age\");\n\n  std::shared_ptr<arrow::Schema> schema = table->schema();\n  auto num_fields = schema->num_fields();\n  std::cout << \"num_fields: \" << num_fields << std::endl;\n\n  start = std::chrono::system_clock::now();\n\n  for (int j = 0; j < schema->num_fields(); ++j) {\n    auto column = table->column(j);\n    for (int k = 0; k < column->num_chunks(); ++k) {\n      auto array = column->chunk(k);\n      if (array->type()->id() == arrow::Type::INT32) {\n        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);\n        for (int i = 0; i < array->length(); ++i) {\n          int32_t value = int_array->Value(i);\n        }\n        // std::cout << \"Row \" << i << \",Column \" << j << \": \" << value\n        //           << std::endl;\n      }\n    }\n    // if (j > 10) {\n    //   break;\n    // }\n  }\n  end = std::chrono::system_clock::now();\n  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)\n             .count();\n  std::cout << \"scan all cost \" << cost << \"ms\" << std::endl;\n\n  auto first_column = table->column(0);\n  if (first_column->num_chunks() > 0) {\n    auto array = first_column->chunk(0);\n    if (array->type()->id() == arrow::Type::INT32) {\n      auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);\n      int32_t value = int_array->Value(0);\n      std::cout << \"Value at [0,0]: \" << value << std::endl;\n    }\n  }\n\n  EXPECT_EQ(store->is_full(), true);\n}\n\n#endif\n"
  },
  {
    "path": "tests/db/index/storage/mmap_store_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cstdint>\n#include <filesystem>\n#include <iostream>\n#include <memory>\n#include <thread>\n#include <arrow/api.h>\n#include <arrow/result.h>\n#include <arrow/table.h>\n#include <gtest/gtest.h>\n#include \"db/common/constants.h\"\n#define private public\n#define protected public\n#include \"db/index/storage/mmap_forward_store.h\"\n#undef private\n#undef protected\n#include \"utils/utils.h\"\n\nusing namespace zvec;\n\nclass MmapStoreTest : public testing::Test {\n protected:\n  void SetUp() override {\n    auto s = test::TestHelper::WriteTestFile(ipc_path, FileFormat::IPC);\n    if (!s.ok()) {\n      std::cout << s.message() << std::endl;\n      exit(1);\n    }\n    s = test::TestHelper::WriteTestFile(parquet_path, FileFormat::PARQUET);\n    if (!s.ok()) {\n      std::cout << s.message() << std::endl;\n      exit(1);\n    }\n  }\n\n  void TearDown() override {\n    if (std::filesystem::exists(ipc_path)) {\n      std::filesystem::remove(ipc_path);\n    }\n    if (std::filesystem::exists(parquet_path)) {\n      std::filesystem::remove(parquet_path);\n    }\n  }\n\n  std::string ipc_path = \"test.ipc\";\n  std::string parquet_path = \"test.parquet\";\n};\n\n\nTEST_F(MmapStoreTest, GeneralIPC) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table =\n      ipc_store->fetch({\"id\", \"name\", \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(ipc_table != nullptr);\n  EXPECT_EQ(ipc_table->num_rows(), 5);\n\n  auto table_reader = ipc_store->scan({\"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    ASSERT_GT(batch->num_rows(), 0);\n    batch_count++;\n  }\n  ASSERT_EQ(batch_count, 4);\n}\n\nTEST_F(MmapStoreTest, IPCFetchWithLocalRowID) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table =\n      ipc_store->fetch({LOCAL_ROW_ID, \"id\", \"name\", \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(ipc_table != nullptr);\n  EXPECT_EQ(ipc_table->num_columns(), 4);\n  EXPECT_EQ(ipc_table->num_rows(), 5);\n}\n\nTEST_F(MmapStoreTest, IPCCheckOrderWithLocalRowIDMiddle) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr mmap_table =\n      ipc_store->fetch({\"id\", \"name\", LOCAL_ROW_ID, \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 5);\n  EXPECT_EQ(mmap_table->num_columns(), 4);\n  auto field = mmap_table->schema()->field(2);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = mmap_table->column(2);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_F(MmapStoreTest, IPCCheckOrderWithLocalRowIDEnd) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr mmap_table =\n      ipc_store->fetch({\"id\", \"name\", \"score\", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 5);\n  EXPECT_EQ(mmap_table->num_columns(), 4);\n  auto field = mmap_table->schema()->field(3);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = mmap_table->column(3);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\n\nTEST_F(MmapStoreTest, IPCFetchWithUID) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table =\n      ipc_store->fetch({USER_ID, \"id\", \"name\", \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(ipc_table != nullptr);\n  EXPECT_EQ(ipc_table->num_columns(), 4);\n  EXPECT_EQ(ipc_table->num_rows(), 5);\n}\n\nTEST_F(MmapStoreTest, IPCFetchWithGlobalDocID) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table =\n      ipc_store->fetch({GLOBAL_DOC_ID, \"id\", \"name\", \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(ipc_table != nullptr);\n  EXPECT_EQ(ipc_table->num_columns(), 4);\n  EXPECT_EQ(ipc_table->num_rows(), 5);\n}\n\nTEST_F(MmapStoreTest, IPCFetchWithEmptyColumns) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table = ipc_store->fetch({}, std::vector<int>{});\n  EXPECT_EQ(ipc_table, nullptr);\n}\n\nTEST_F(MmapStoreTest, IPCFetchWithInvalidColumns) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table =\n      ipc_store->fetch({\"id\", \"unknown_column\"}, std::vector<int>{});\n  EXPECT_EQ(ipc_table, nullptr);\n}\n\nTEST_F(MmapStoreTest, IPCFetchWithEmptyIndices) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table =\n      ipc_store->fetch({\"id\", \"name\", \"score\"}, std::vector<int>{});\n  ASSERT_TRUE(ipc_table != nullptr);\n  EXPECT_EQ(ipc_table->num_rows(), 0);\n  EXPECT_EQ(ipc_table->num_columns(), 3);\n}\n\nTEST_F(MmapStoreTest, IPCFetchWithInvalidIndices) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table =\n      ipc_store->fetch({\"id\"}, std::vector<int>{-1});  // Negative index\n  EXPECT_EQ(ipc_table, nullptr);\n\n  ipc_table =\n      ipc_store->fetch({\"id\"}, std::vector<int>{100});  // Out of range index\n  EXPECT_EQ(ipc_table, nullptr);\n}\n\nTEST_F(MmapStoreTest, IPCFetchWithEmptyColumnsValidIndices) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  TablePtr ipc_table = ipc_store->fetch({}, {0, 1});\n  EXPECT_EQ(ipc_table, nullptr);\n}\n\nTEST_F(MmapStoreTest, IPCScan) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  auto table_reader = ipc_store->scan({\"id\", \"name\", \"score\"});\n  ASSERT_TRUE(table_reader != nullptr);\n  EXPECT_NE(table_reader->schema(), nullptr);\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 3);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(MmapStoreTest, IPCScanWithSelectColumns) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  auto table_reader = ipc_store->scan({\"id\", \"name\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 2);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(MmapStoreTest, IPCScanWithInvalidColumn) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  auto table_reader = ipc_store->scan({\"id\", \"unknown_column\"});\n  ASSERT_TRUE(table_reader == nullptr);\n}\n\nTEST_F(MmapStoreTest, IPCScanWithUserID) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  auto table_reader = ipc_store->scan({USER_ID, \"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(MmapStoreTest, IPCScanWithGlobalDocID) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  auto table_reader = ipc_store->scan({GLOBAL_DOC_ID, \"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\n\nTEST_F(MmapStoreTest, GeneralParquet) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  TablePtr mmap_table = mmap_store->fetch({\"id\", \"name\", \"score\"}, {0, 1, 2});\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 3);\n  EXPECT_EQ(mmap_table->num_columns(), 3);\n}\n\nTEST_F(MmapStoreTest, ParquetFetchWitEmptyColumns) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  TablePtr mmap_table = mmap_store->fetch({}, std::vector<int>{});\n  EXPECT_EQ(mmap_table, nullptr);\n}\n\nTEST_F(MmapStoreTest, ParquetFetchWithInvalidIndices) {\n  auto parquet_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(parquet_store->Open().ok());\n  TablePtr parquet_table =\n      parquet_store->fetch({\"id\"}, std::vector<int>{-1});  // Negative index\n  EXPECT_EQ(parquet_table, nullptr);\n\n  parquet_table = parquet_store->fetch(\n      {\"id\"}, std::vector<int>{100});  // Out of range index\n  EXPECT_EQ(parquet_table, nullptr);\n}\n\nTEST_F(MmapStoreTest, ParquetCheckOrder) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  TablePtr mmap_table =\n      mmap_store->fetch({\"id\", \"name\", \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 5);\n  EXPECT_EQ(mmap_table->num_columns(), 3);\n\n  // Get data from the id column for each row\n  auto id_column = mmap_table->column(0);  // id column is the first column\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::Int32Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {\n      1, 4, 7, 2, 1};  // Corresponding to indices 0, 3, 6, 1, 0\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_F(MmapStoreTest, ParquetCheckOrderWithLocalRowIDMiddle) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  TablePtr mmap_table =\n      mmap_store->fetch({\"id\", \"name\", LOCAL_ROW_ID, \"score\"}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 5);\n  EXPECT_EQ(mmap_table->num_columns(), 4);\n  auto field = mmap_table->schema()->field(2);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = mmap_table->column(2);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_F(MmapStoreTest, ParquetCheckOrderWithLocalRowIDEnd) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  TablePtr mmap_table =\n      mmap_store->fetch({\"id\", \"name\", \"score\", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 5);\n  EXPECT_EQ(mmap_table->num_columns(), 4);\n  auto field = mmap_table->schema()->field(3);\n  EXPECT_EQ(field->name(), LOCAL_ROW_ID);\n\n  // Get data from the _zvec_row_id_ column for each row\n  auto id_column = mmap_table->column(3);\n  auto id_array =\n      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));\n\n  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};\n  std::vector<int32_t> actual_ids;\n\n  for (int i = 0; i < id_array->length(); ++i) {\n    actual_ids.push_back(id_array->Value(i));\n  }\n\n  EXPECT_EQ(actual_ids, expected_ids)\n      << \"ID column values don't match expected order\";\n}\n\nTEST_F(MmapStoreTest, ParquetScan) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  auto table_reader = mmap_store->scan({\"id\", \"name\", \"score\"});\n  ASSERT_TRUE(table_reader != nullptr);\n  EXPECT_NE(table_reader->schema(), nullptr);\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 3);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(MmapStoreTest, ParquetScanWithInvalidColumn) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  auto table_reader = mmap_store->scan({\"id\", \"unknown_column\"});\n  ASSERT_TRUE(table_reader == nullptr);\n}\n\nTEST_F(MmapStoreTest, ParquetScanWithUserID) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  auto table_reader = mmap_store->scan({USER_ID, \"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(MmapStoreTest, ParquetScanWithGlobalDocID) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n  auto table_reader = mmap_store->scan({GLOBAL_DOC_ID, \"id\", \"name\", \"score\"});\n  int batch_count = 0;\n  int total_rows = 0;\n  while (true) {\n    std::shared_ptr<arrow::RecordBatch> batch;\n    auto status = table_reader->ReadNext(&batch);\n    ASSERT_TRUE(status.ok());\n    if (batch == nullptr) {\n      break;\n    }\n    EXPECT_GT(batch->num_rows(), 0);\n    EXPECT_EQ(batch->num_columns(), 4);\n    batch_count++;\n    total_rows += batch->num_rows();\n  }\n  EXPECT_GT(batch_count, 0);\n  EXPECT_EQ(total_rows, 10);\n}\n\nTEST_F(MmapStoreTest, IPCFetchSingleRow) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n\n  auto func = [&](int index) -> void {\n    ExecBatchPtr ipc_batch = ipc_store->fetch({\"id\", \"name\", \"score\"}, index);\n    ASSERT_TRUE(ipc_batch != nullptr);\n    EXPECT_EQ(ipc_batch->length, 1);\n    EXPECT_EQ(ipc_batch->values.size(), 3);\n\n    auto id_scalar = ipc_batch->values[0].scalar();\n    auto name_scalar = ipc_batch->values[1].scalar();\n    auto score_scalar = ipc_batch->values[2].scalar();\n\n    EXPECT_EQ(std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar)->value,\n              index + 1);\n  };\n\n  for (size_t i = 0; i < 10; i++) {\n    func(i);\n  }\n}\n\nTEST_F(MmapStoreTest, ParquetFetchSingleRow) {\n  auto parquet_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(parquet_store->Open().ok());\n\n  auto func = [&](int index) -> void {\n    ExecBatchPtr parquet_batch =\n        parquet_store->fetch({\"id\", \"name\", \"score\"}, index);\n    ASSERT_TRUE(parquet_batch != nullptr);\n    EXPECT_EQ(parquet_batch->length, 1);\n    EXPECT_EQ(parquet_batch->values.size(), 3);\n\n    auto id_scalar = parquet_batch->values[0].scalar();\n    auto name_scalar = parquet_batch->values[1].scalar();\n    auto score_scalar = parquet_batch->values[2].scalar();\n\n    EXPECT_EQ(std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar)->value,\n              index + 1);\n  };\n\n  for (size_t i = 0; i < 10; i++) {\n    func(i);\n  }\n}\n\nTEST_F(MmapStoreTest, IPCFetchSingleRowWithInvalidIndex) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n\n  ExecBatchPtr ipc_batch = ipc_store->fetch({\"id\", \"name\"}, -1);\n  EXPECT_EQ(ipc_batch, nullptr);\n\n  ipc_batch = ipc_store->fetch({\"id\", \"name\"}, 100);\n  EXPECT_EQ(ipc_batch, nullptr);\n}\n\nTEST_F(MmapStoreTest, IPCFetchSingleRowWithInvalidColumn) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n\n  ExecBatchPtr ipc_batch = ipc_store->fetch({\"id\", \"invalid_column\"}, 0);\n  EXPECT_EQ(ipc_batch, nullptr);\n}\n\nTEST_F(MmapStoreTest, IPCFetchSingleRowWithEmptyColumns) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n\n  ExecBatchPtr ipc_batch = ipc_store->fetch({}, 0);\n  EXPECT_EQ(ipc_batch, nullptr);\n}\n\nTEST_F(MmapStoreTest, ParquetFetchSingleRowWithInvalidIndex) {\n  auto parquet_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(parquet_store->Open().ok());\n\n  ExecBatchPtr parquet_batch = parquet_store->fetch({\"id\", \"name\"}, -1);\n  EXPECT_EQ(parquet_batch, nullptr);\n\n  parquet_batch = parquet_store->fetch({\"id\", \"name\"}, 100);\n  EXPECT_EQ(parquet_batch, nullptr);\n}\n\nTEST_F(MmapStoreTest, AllDataType) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n\n  std::vector<std::string> columns = {\"id\", \"list_int32\"};\n  std::vector<int> indices = {0, 3, 6, 1, 0};\n\n  TablePtr mmap_table = mmap_store->fetch(columns, indices);\n  ASSERT_TRUE(mmap_table != nullptr);\n  EXPECT_EQ(mmap_table->num_rows(), 5);\n  EXPECT_EQ(mmap_table->num_columns(), 2);\n\n  for (size_t j = 0; j < columns.size(); ++j) {\n    auto column = mmap_table->column(j);\n    for (int k = 0; k < column->num_chunks(); ++k) {\n      auto array = column->chunk(k);\n      if (array->type()->id() == arrow::Type::INT32) {\n        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);\n        for (int i = 0; i < array->length(); ++i) {\n          int32_t value = int_array->Value(i);\n          EXPECT_EQ(value, indices[i] + 1);\n        }\n      } else if (array->type()->id() == arrow::Type::LIST) {\n        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);\n        for (int i = 0; i < array->length(); ++i) {\n          auto list_value = list_array->value_slice(i);\n          auto list_value_array =\n              std::static_pointer_cast<arrow::Int32Array>(list_value);\n          EXPECT_EQ(list_value_array->length(), 128);\n          for (int m = 0; m < list_value_array->length(); ++m) {\n            int32_t value = list_value_array->Value(m);\n            EXPECT_EQ(value, indices[i] * 10 + m);\n          }\n        }\n      }\n    }\n  }\n}\n\nTEST_F(MmapStoreTest, FindRowGroupForRow) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n\n  EXPECT_EQ(mmap_store->FindRowGroupForRow(0), 0);\n  EXPECT_EQ(mmap_store->FindRowGroupForRow(1), 0);\n  EXPECT_EQ(mmap_store->FindRowGroupForRow(2), 0);\n  EXPECT_EQ(mmap_store->FindRowGroupForRow(3), 1);\n  EXPECT_EQ(mmap_store->FindRowGroupForRow(6), 2);\n  EXPECT_EQ(mmap_store->FindRowGroupForRow(9), 3);\n\n  EXPECT_EQ(mmap_store->FindRowGroupForRow(100), 3);\n}\n\nTEST_F(MmapStoreTest, GetRowGroupOffset) {\n  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);\n  ASSERT_TRUE(mmap_store->Open().ok());\n\n  EXPECT_EQ(mmap_store->GetRowGroupOffset(0), 0);\n  EXPECT_EQ(mmap_store->GetRowGroupOffset(1), 3);\n  EXPECT_EQ(mmap_store->GetRowGroupOffset(2), 6);\n  EXPECT_EQ(mmap_store->GetRowGroupOffset(3), 9);\n}\n\nTEST_F(MmapStoreTest, InvalidPath) {\n  std::vector<std::string> err_path = {\n      \"err_path\",\n      \"err_\" + ipc_path,\n      \"err_\" + parquet_path,\n      ipc_path + \".unknown_file_type\",\n  };\n  for (const auto &path : err_path) {\n    auto ipc_store = std::make_shared<MmapForwardStore>(path);\n    ASSERT_FALSE(ipc_store->Open().ok());\n  }\n}\n\nTEST_F(MmapStoreTest, InvalidFileFormat) {\n  std::string err_path = ipc_path + \".unknown_file_format\";\n  EXPECT_EQ(InferFileFormat(err_path), FileFormat::UNKNOWN);\n}\n\nTEST_F(MmapStoreTest, ValidateEmptyColumns) {\n  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);\n  ASSERT_TRUE(ipc_store->Open().ok());\n  EXPECT_FALSE(ipc_store->validate({}));\n}\n\nTEST_F(MmapStoreTest, ConstructorAndPhysicSchema) {\n  MmapForwardStore store(ipc_path);\n  EXPECT_EQ(store.physic_schema(), nullptr);\n}\n\nTEST_F(MmapStoreTest, DeleteDestructs) {\n  MmapForwardStore *store = new MmapForwardStore(ipc_path);\n  delete store;\n}"
  },
  {
    "path": "tests/db/index/storage/parquet_writer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/index/storage/parquet_writer.h\"\n#include <iostream>\n#include <arrow/array/builder_primitive.h>\n#include <arrow/record_batch.h>\n#include <arrow/status.h>\n#include <gtest/gtest.h>\n\nusing namespace zvec;\n\nstd::shared_ptr<arrow::RecordBatchReader> CreateTestReader(int start_id,\n                                                           int count) {\n  auto schema = arrow::schema({arrow::field(\"id\", arrow::int32()),\n                               arrow::field(\"name\", arrow::utf8())});\n\n  arrow::Int32Builder id_builder;\n  arrow::StringBuilder name_builder;\n\n  arrow::Status s;\n\n  for (int i = 0; i < count; ++i) {\n    s = id_builder.Append(start_id + i);\n    if (!s.ok()) {\n      return nullptr;\n    }\n    s = name_builder.Append(\"User\" + std::to_string(start_id + i));\n    if (!s.ok()) {\n      return nullptr;\n    }\n  }\n\n  std::shared_ptr<arrow::Array> id_array, name_array;\n  s = id_builder.Finish(&id_array);\n  if (!s.ok()) {\n    return nullptr;\n  }\n  s = name_builder.Finish(&name_array);\n  if (!s.ok()) {\n    return nullptr;\n  }\n\n  auto batch = arrow::RecordBatch::Make(schema, count, {id_array, name_array});\n  auto maybe_reader = arrow::RecordBatchReader::Make({batch}, schema);\n  if (!maybe_reader.ok()) {\n    return nullptr;\n  }\n  return *maybe_reader;\n}\n\nTEST(ParquetWriter, General) {\n  ParquetWriter writer(\"output.parquet\");\n  // writer.SetMaxRowsPerGroup(1000); // 可选：控制每组行数\n\n  // 第一次插入\n  {\n    auto reader1 = CreateTestReader(1, 3);\n    ASSERT_NE(reader1, nullptr);\n    auto status = writer.insert(reader1);\n    ASSERT_TRUE(status.ok());\n    std::cout << \"Inserted batch 1\" << std::endl;\n  }\n\n  // 第二次插入\n  {\n    auto reader2 = CreateTestReader(4, 2);\n    ASSERT_NE(reader2, nullptr);\n    auto status = writer.insert(reader2);\n    ASSERT_TRUE(status.ok());\n    std::cout << \"Inserted batch 2\" << std::endl;\n  }\n\n  // 第三次插入\n  {\n    auto reader3 = CreateTestReader(6, 4);\n    ASSERT_NE(reader3, nullptr);\n    auto status = writer.insert(reader3);\n    ASSERT_TRUE(status.ok());\n    std::cout << \"Inserted batch 3\" << std::endl;\n  }\n\n  // 最后关闭文件\n  auto status = writer.finalize();\n  if (!status.ok()) {\n    std::cerr << \"Finalize failed: \" << status.ToString() << std::endl;\n  }\n\n  std::cout << \"Parquet file written successfully to output.parquet\"\n            << std::endl;\n}\n"
  },
  {
    "path": "tests/db/index/storage/wal_file_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#define private public\n#define protected public\n#include \"db/index/storage/wal/wal_file.h\"\n#undef private\n#undef protected\n\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <fcntl.h>\n#include <stdio.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"db/common/file_helper.h\"\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wunused-result\"\n#endif\n\nusing namespace zvec;\nusing SegmentID = uint32_t;\n\nclass WalFileTest : public testing::Test {\n protected:\n  void SetUp() {\n    char cmd_buf[100];\n    snprintf(cmd_buf, 100, \"rm -rf ./data.wal.*\");\n    system(cmd_buf);\n  }\n\n  void TearDown() {}\n};\n\nTEST_F(WalFileTest, TestGeneral) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 0;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  // add 100 same record\n  for (size_t i = 0; i < 100; i++) {\n    ret = wal_file->append(std::string(\"hello\"));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // add 100-200 record\n  wal_option.create_new = false;\n  wal_option.max_docs_wal_flush = 1;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  for (size_t i = 100; i < 200; i++) {\n    std::string record = \"hello\";\n    ret = wal_file->append(record + std::to_string(i));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // reopen and add next 100 record\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  for (size_t i = 200; i < 300; i++) {\n    std::string record = \"hello\";\n    ret = wal_file->append(record + std::to_string(i));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // reopen and add batch model 100 record\n  wal_option.max_docs_wal_flush = 10;\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  for (size_t i = 300; i < 400; i++) {\n    std::string record = \"hello\";\n    ret = wal_file->append(record + std::to_string(i));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    if (idx < 100) {\n      ASSERT_EQ(record, \"hello\");\n    } else {\n      ASSERT_EQ(record, std::string(\"hello\") + std::to_string(idx));\n    }\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 400);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\nvoid do_append(WalFile *wal_file, size_t number) {\n  std::string record = \"hello\" + std::to_string(number);\n  int ret = wal_file->append(std::move(record));\n  ASSERT_EQ(ret, 0);\n}\n\nTEST_F(WalFileTest, TestMultiThread) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  ailego::ThreadPool pool(10, false);\n  for (size_t i = 0; i < 10000; i++) {\n    pool.execute(do_append, wal_file.get(), i);\n  }\n  pool.wait_finish();\n  wal_file->flush();\n  wal_file->close();\n\n  // reopen for batch model\n  wal_option.create_new = false;\n  wal_option.max_docs_wal_flush = 1000;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  for (size_t i = 10000; i < 20000; i++) {\n    pool.execute(do_append, wal_file.get(), i);\n  }\n  pool.wait_finish();\n  wal_file->flush();\n  wal_file->close();\n\n  // reopen for batch model\n  wal_option.create_new = false;\n  wal_option.max_docs_wal_flush = 0;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  for (size_t i = 20000; i < 30000; i++) {\n    pool.execute(do_append, wal_file.get(), i);\n  }\n  pool.wait_finish();\n  wal_file->flush();\n  wal_file->close();\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 30000);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\n\nTEST_F(WalFileTest, TestBoundaryCondition) {\n  // read empty file\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  uint32_t idx = 0;\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // write and read binary struct\n  std::vector<uint8_t> bin_v{0, 1, 2, 3};\n  std::string str(bin_v.begin(), bin_v.end());\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->append(std::move(str));\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record.size(), 4);\n    for (size_t i = 0; i < 4; i++) {\n      ASSERT_EQ(record[i], i);\n    }\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 1);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n\n\n  // write very large record 4Mb\n  size_t BIG_DATA_SIZE = 4 * 1024 * 1024;\n  std::vector<uint8_t> big_data(BIG_DATA_SIZE);\n  for (size_t i = 0; i < BIG_DATA_SIZE; i++) {\n    big_data[i] = i % 256;\n  }\n  str.clear();\n  str.assign((const char *)big_data.data(), BIG_DATA_SIZE);\n  wal_option.create_new = true;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->append(std::move(str));\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record.size(), BIG_DATA_SIZE);\n    for (size_t i = 0; i < BIG_DATA_SIZE; i++) {\n      ASSERT_EQ((uint8_t)record[i], i % 256);\n    }\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 1);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n\n  // batch model 100, just add 99 record and close\n  wal_option.max_docs_wal_flush = 100;\n  wal_option.create_new = true;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  for (size_t i = 0; i < 99; i++) {\n    std::string record = \"hello\";\n    ret = wal_file->append(record + std::to_string(i));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n  idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record, std::string(\"hello\") + std::to_string(idx));\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 99);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\nTEST_F(WalFileTest, TestNotExistErrorCase) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  // reopen for read\n  WalOptions wal_option;\n  wal_option.create_new = false;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, -1);\n}\n\n\nTEST_F(WalFileTest, TestFirstErrorCase) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  // add 10 same record\n  for (size_t i = 0; i < 10; i++) {\n    ret = wal_file->append(std::string(\"hello\"));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  std::string wal_path = ailego::StringHelper::Concat(\n      dir_path, \"data.wal.\", std::to_string(segment_id));\n  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);\n  ASSERT_GT(wal_fd, 0);\n  // destory first record\n  lseek(wal_fd, 64 + 8, SEEK_SET);\n  // write err data\n  char buf[6] = \"nihao\";\n  write(wal_fd, buf, 5);\n  close(wal_fd);\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record, \"hello\");\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 0);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\n\nTEST_F(WalFileTest, TestMiddleErrorCase) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  // add 10 same record\n  for (size_t i = 0; i < 10; i++) {\n    ret = wal_file->append(std::string(\"hello\"));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  std::string wal_path = ailego::StringHelper::Concat(\n      dir_path, \"data.wal.\", std::to_string(segment_id));\n  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);\n  ASSERT_GT(wal_fd, 0);\n  // destory middle record\n  lseek(wal_fd, 64 + 13 * 5 + 8, SEEK_SET);\n  // write err data\n  char buf[6] = \"nihao\";\n  write(wal_fd, buf, 5);\n  close(wal_fd);\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record, \"hello\");\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 5);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\n\nTEST_F(WalFileTest, TestLastErrorCase) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  // add 10 same record\n  for (size_t i = 0; i < 10; i++) {\n    ret = wal_file->append(std::string(\"hello\"));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // destory last record\n  std::string wal_path = ailego::StringHelper::Concat(\n      dir_path, \"data.wal.\", std::to_string(segment_id));\n  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);\n  ASSERT_GT(wal_fd, 0);\n  off_t fsize = lseek(wal_fd, 0, SEEK_END);\n  close(wal_fd);\n  truncate(wal_path.c_str(), (fsize - 4));\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record, \"hello\");\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 9);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\n\nTEST_F(WalFileTest, TestLengthSmallErrorCase) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  // add 10 same record\n  for (size_t i = 0; i < 10; i++) {\n    ret = wal_file->append(std::string(\"hello\"));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // write error length\n  std::string wal_path = ailego::StringHelper::Concat(\n      dir_path, \"data.wal.\", std::to_string(segment_id));\n  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);\n  ASSERT_GT(wal_fd, 0);\n  uint32_t err_length = 2;\n  lseek(wal_fd, 64, SEEK_SET);\n  write(wal_fd, (const void *)&err_length, 4);\n  close(wal_fd);\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record, \"hello\");\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 0);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\n\nTEST_F(WalFileTest, TestLengthBigErrorCase) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  // add 10 same record\n  for (size_t i = 0; i < 10; i++) {\n    ret = wal_file->append(std::string(\"hello\"));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // write error length\n  std::string wal_path = ailego::StringHelper::Concat(\n      dir_path, \"data.wal.\", std::to_string(segment_id));\n  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);\n  ASSERT_GT(wal_fd, 0);\n  uint32_t err_length = 200;  // exceed file size 130\n\n  lseek(wal_fd, 64, SEEK_SET);\n  write(wal_fd, (const void *)&err_length, 4);\n  close(wal_fd);\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record, \"hello\");\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 0);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\n\nTEST_F(WalFileTest, TestCRCErrorCase) {\n  std::string dir_path = \"./\";\n  SegmentID segment_id = 0;\n  std::string wal_file_path =\n      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);\n  WalFilePtr wal_file = WalFile::Create(wal_file_path);\n  ASSERT_TRUE(wal_file != nullptr);\n\n  WalOptions wal_option;\n  wal_option.create_new = true;\n  wal_option.max_docs_wal_flush = 1;\n  int ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  // add 10 same record\n  for (size_t i = 0; i < 10; i++) {\n    ret = wal_file->append(std::string(\"hello\"));\n    ASSERT_EQ(ret, 0);\n  }\n  ret = wal_file->flush();\n  ASSERT_EQ(ret, 0);\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n\n  // write error crc\n  std::string wal_path = ailego::StringHelper::Concat(\n      dir_path, \"data.wal.\", std::to_string(segment_id));\n  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);\n  ASSERT_GT(wal_fd, 0);\n  // second record crc 64+(4+4+len(hello))+4\n  lseek(wal_fd, 64 + 17, SEEK_SET);\n  uint32_t err_crc = 123;\n  write(wal_fd, (const void *)&err_crc, 4);\n  close(wal_fd);\n\n  // reopen for read\n  wal_option.create_new = false;\n  ret = wal_file->open(wal_option);\n  ASSERT_EQ(ret, 0);\n\n  uint32_t idx = 0;\n  ret = wal_file->prepare_for_read();\n  ASSERT_EQ(ret, 0);\n  std::string record = wal_file->next();\n  while (!record.empty()) {\n    ASSERT_EQ(record, \"hello\");\n    record = wal_file->next();\n    idx++;\n  }\n  ASSERT_EQ(idx, 1);\n  // close\n  ret = wal_file->close();\n  ASSERT_EQ(ret, 0);\n  // remove\n  ret = wal_file->remove();\n  ASSERT_EQ(ret, 0);\n}\n\n#if defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif"
  },
  {
    "path": "tests/db/index/utils/utils.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"utils.h\"\n#include <cstdint>\n#include <memory>\n#include <vector>\n#include <zvec/ailego/logger/logger.h>\n#include \"zvec/db/collection.h\"\n#include \"zvec/db/doc.h\"\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/schema.h\"\n#include \"zvec/db/status.h\"\n#include \"zvec/db/type.h\"\n\nusing namespace zvec;\nusing namespace zvec::test;\n\nCollectionSchema::Ptr TestHelper::CreateTempSchema() {\n  auto schema = std::make_shared<CollectionSchema>(\"demo\");\n  schema->set_max_doc_count_per_segment(1000);\n\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"id\", DataType::INT64, false, std::make_shared<InvertIndexParams>(true)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"name\", DataType::STRING, false,\n      std::make_shared<InvertIndexParams>(false)));\n  schema->add_field(\n      std::make_shared<FieldSchema>(\"weight\", DataType::FLOAT, true));\n\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"dense\", DataType::VECTOR_FP32, 128, false,\n      std::make_shared<HnswIndexParams>(MetricType::IP)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"sparse\", DataType::SPARSE_VECTOR_FP32, 0, false,\n      std::make_shared<HnswIndexParams>(MetricType::IP)));\n  return schema;\n}\n\nCollectionSchema::Ptr TestHelper::CreateScalarSchema() {\n  auto schema = std::make_shared<CollectionSchema>(\"demo\");\n\n  // scalar\n  schema->add_field(std::make_shared<FieldSchema>(\"int32\", DataType::INT32));\n  schema->add_field(std::make_shared<FieldSchema>(\"string\", DataType::STRING));\n\n  return schema;\n}\n\n// Helper function\nCollectionSchema::Ptr TestHelper::CreateNormalSchema(\n    bool nullable, std::string name, IndexParams::Ptr scalar_index_params,\n    IndexParams::Ptr vector_index_params, uint64_t max_doc_count) {\n  auto schema = std::make_shared<CollectionSchema>(name);\n  schema->set_max_doc_count_per_segment(max_doc_count);\n\n  // scalar\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"int32\", DataType::INT32, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"string\", DataType::STRING, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"uint32\", DataType::UINT32, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"bool\", DataType::BOOL, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"float\", DataType::FLOAT, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"double\", DataType::DOUBLE, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"int64\", DataType::INT64, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"uint64\", DataType::UINT64, nullable, scalar_index_params));\n\n  // array\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_int32\", DataType::ARRAY_INT32, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_string\", DataType::ARRAY_STRING, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_uint32\", DataType::ARRAY_UINT32, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_bool\", DataType::ARRAY_BOOL, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_float\", DataType::ARRAY_FLOAT, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_double\", DataType::ARRAY_DOUBLE, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_int64\", DataType::ARRAY_INT64, nullable, scalar_index_params));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"array_uint64\", DataType::ARRAY_UINT64, nullable, scalar_index_params));\n\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"dense_fp32\", DataType::VECTOR_FP32, 128, false,\n      vector_index_params ? vector_index_params\n                          : std::make_shared<FlatIndexParams>(MetricType::IP)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"dense_fp16\", DataType::VECTOR_FP16, 128, false,\n      std::make_shared<FlatIndexParams>(MetricType::IP)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"dense_int8\", DataType::VECTOR_INT8, 128, false,\n      std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n  IndexParams::Ptr sparse_index_params;\n  if (vector_index_params) {\n    sparse_index_params = vector_index_params->clone();\n    auto v = std::dynamic_pointer_cast<VectorIndexParams>(sparse_index_params);\n    // sparse always use IP\n    v->set_metric_type(MetricType::IP);\n  }\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"sparse_fp32\", DataType::SPARSE_VECTOR_FP32, 128, false,\n      sparse_index_params ? sparse_index_params\n                          : std::make_shared<FlatIndexParams>(MetricType::IP)));\n  schema->add_field(std::make_shared<FieldSchema>(\n      \"sparse_fp16\", DataType::SPARSE_VECTOR_FP16, 128, false,\n      std::make_shared<FlatIndexParams>(MetricType::IP)));\n\n  return schema;\n}\n\nCollectionSchema::Ptr TestHelper::CreateSchemaWithScalarIndex(\n    bool nullable, bool enable_optimize, std::string name) {\n  return CreateNormalSchema(\n      nullable, name, std::make_shared<InvertIndexParams>(enable_optimize));\n}\n\nCollectionSchema::Ptr TestHelper::CreateSchemaWithVectorIndex(\n    bool nullable, std::string name, IndexParams::Ptr vector_index_params) {\n  return CreateNormalSchema(\n      nullable, name, nullptr,\n      vector_index_params ? vector_index_params\n                          : std::make_shared<HnswIndexParams>(MetricType::IP));\n}\n\nCollectionSchema::Ptr TestHelper::CreateSchemaWithMaxDocCount(\n    uint64_t doc_count) {\n  return CreateNormalSchema(false, \"demo\", nullptr, nullptr, doc_count);\n}\n\nstd::string TestHelper::MakePK(const uint64_t doc_id) {\n  return \"pk_\" + std::to_string(doc_id);\n}\n\nuint64_t TestHelper::ExtractDocId(const std::string &pk) {\n  return std::stoull(pk.substr(3));\n}\n\nDoc TestHelper::CreateDoc(const uint64_t doc_id, const CollectionSchema &schema,\n                          std::string pk) {\n  Doc new_doc;\n  if (pk.empty()) {\n    pk = MakePK(doc_id);\n  }\n  new_doc.set_pk(pk);\n\n  for (auto &field : schema.fields()) {\n    switch (field->data_type()) {\n      case DataType::BINARY: {\n        std::string binary_str(\"binary_\" + std::to_string(doc_id));\n        new_doc.set<std::string>(field->name(), binary_str);\n        break;\n      }\n      case DataType::BOOL:\n        new_doc.set<bool>(field->name(), doc_id % 10 == 0);\n        break;\n      case DataType::INT32:\n        new_doc.set<int32_t>(field->name(), (int32_t)doc_id);\n        break;\n      case DataType::INT64:\n        new_doc.set<int64_t>(field->name(), (int64_t)doc_id);\n        break;\n      case DataType::UINT32:\n        new_doc.set<uint32_t>(field->name(), (uint32_t)doc_id);\n        break;\n      case DataType::UINT64:\n        new_doc.set<uint64_t>(field->name(), (uint64_t)doc_id);\n        break;\n      case DataType::FLOAT:\n        new_doc.set<float>(field->name(), (float)doc_id);\n        break;\n      case DataType::DOUBLE:\n        new_doc.set<double>(field->name(), (double)doc_id);\n        break;\n      case DataType::STRING:\n        new_doc.set<std::string>(field->name(),\n                                 \"value_\" + std::to_string(doc_id));\n        break;\n      case DataType::ARRAY_BINARY: {\n        std::vector<std::string> bin_vec;\n        for (size_t i = 0; i < (doc_id % 10); i++) {\n          bin_vec.push_back(\"bin_\" + std::to_string(i));\n        }\n        new_doc.set<std::vector<std::string>>(field->name(), bin_vec);\n        break;\n      }\n      case DataType::ARRAY_BOOL:\n        new_doc.set<std::vector<bool>>(field->name(),\n                                       std::vector<bool>(10, doc_id % 10 == 0));\n        break;\n      case DataType::ARRAY_INT32:\n        new_doc.set<std::vector<int32_t>>(\n            field->name(), std::vector<int32_t>(10, (int32_t)doc_id));\n        break;\n      case DataType::ARRAY_INT64:\n        new_doc.set<std::vector<int64_t>>(\n            field->name(), std::vector<int64_t>(10, (int64_t)doc_id));\n        break;\n      case DataType::ARRAY_UINT32:\n        new_doc.set<std::vector<uint32_t>>(\n            field->name(), std::vector<uint32_t>(10, (uint32_t)doc_id));\n        break;\n      case DataType::ARRAY_UINT64:\n        new_doc.set<std::vector<uint64_t>>(\n            field->name(), std::vector<uint64_t>(10, (uint64_t)doc_id));\n        break;\n      case DataType::ARRAY_FLOAT:\n        new_doc.set<std::vector<float>>(field->name(),\n                                        std::vector<float>(10, (float)doc_id));\n        break;\n      case DataType::ARRAY_DOUBLE:\n        new_doc.set<std::vector<double>>(\n            field->name(), std::vector<double>(10, (double)doc_id));\n        break;\n      case DataType::ARRAY_STRING:\n        new_doc.set<std::vector<std::string>>(\n            field->name(),\n            std::vector<std::string>(10, \"value_\" + std::to_string(doc_id)));\n        break;\n      case DataType::VECTOR_BINARY32:\n        new_doc.set<std::vector<uint32_t>>(\n            field->name(),\n            std::vector<uint32_t>(field->dimension(), uint32_t(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_BINARY64:\n        new_doc.set<std::vector<uint64_t>>(\n            field->name(),\n            std::vector<uint64_t>(field->dimension(), uint64_t(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP32:\n        new_doc.set<std::vector<float>>(\n            field->name(),\n            std::vector<float>(field->dimension(), float(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP64:\n        new_doc.set<std::vector<double>>(\n            field->name(),\n            std::vector<double>(field->dimension(), double(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP16:\n        new_doc.set<std::vector<float16_t>>(\n            field->name(), std::vector<float16_t>(\n                               field->dimension(),\n                               static_cast<float16_t>(float(doc_id + 0.1))));\n        break;\n      case DataType::VECTOR_INT8:\n        new_doc.set<std::vector<int8_t>>(\n            field->name(),\n            std::vector<int8_t>(field->dimension(), (int8_t)doc_id));\n        break;\n      case DataType::VECTOR_INT16:\n        new_doc.set<std::vector<int16_t>>(\n            field->name(),\n            std::vector<int16_t>(field->dimension(), (int16_t)doc_id));\n        break;\n      case DataType::SPARSE_VECTOR_FP16: {\n        std::vector<uint32_t> indices;\n        std::vector<float16_t> values;\n        for (uint32_t i = 0; i < 100; i++) {\n          indices.push_back(i);\n          values.push_back(float16_t(float(doc_id + 0.1)));\n        }\n        std::pair<std::vector<uint32_t>, std::vector<float16_t>>\n            sparse_float_vec;\n        sparse_float_vec.first = indices;\n        sparse_float_vec.second = values;\n        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(\n            field->name(), sparse_float_vec);\n        break;\n      }\n      case DataType::SPARSE_VECTOR_FP32: {\n        std::vector<uint32_t> indices;\n        std::vector<float> values;\n        for (uint32_t i = 0; i < 100; i++) {\n          indices.push_back(i);\n          values.push_back(float(doc_id + 0.1));\n        }\n        std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;\n        sparse_float_vec.first = indices;\n        sparse_float_vec.second = values;\n        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n            field->name(), sparse_float_vec);\n        break;\n      }\n      default:\n        std::cout << \"Unsupported data type: \" << field->name() << std::endl;\n        throw std::runtime_error(\"Unsupported vector data type\");\n    }\n  }\n\n  return new_doc;\n}\n\nDoc TestHelper::CreateDocNull(const uint64_t doc_id,\n                              const CollectionSchema &schema, std::string pk) {\n  Doc new_doc;\n  if (pk.empty()) {\n    pk = \"pk_\" + std::to_string(doc_id);\n  }\n  new_doc.set_pk(pk);\n\n  for (auto &field : schema.fields()) {\n    switch (field->data_type()) {\n      case DataType::BINARY:\n      case DataType::BOOL:\n      case DataType::INT32:\n      case DataType::INT64:\n      case DataType::UINT32:\n      case DataType::UINT64:\n      case DataType::FLOAT:\n      case DataType::DOUBLE:\n      case DataType::STRING:\n      case DataType::ARRAY_BINARY:\n      case DataType::ARRAY_BOOL:\n      case DataType::ARRAY_INT32:\n      case DataType::ARRAY_INT64:\n      case DataType::ARRAY_UINT32:\n      case DataType::ARRAY_UINT64:\n      case DataType::ARRAY_FLOAT:\n      case DataType::ARRAY_DOUBLE:\n      case DataType::ARRAY_STRING:\n        break;\n      case DataType::VECTOR_FP32:\n        new_doc.set<std::vector<float>>(\n            field->name(),\n            std::vector<float>(field->dimension(), float(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP64:\n        new_doc.set<std::vector<double>>(\n            field->name(),\n            std::vector<double>(field->dimension(), double(doc_id + 0.1)));\n        break;\n      case DataType::VECTOR_FP16:\n        new_doc.set<std::vector<float16_t>>(\n            field->name(), std::vector<float16_t>(\n                               field->dimension(),\n                               static_cast<float16_t>(float(doc_id + 0.1))));\n        break;\n      case DataType::VECTOR_INT8:\n        new_doc.set<std::vector<int8_t>>(\n            field->name(),\n            std::vector<int8_t>(field->dimension(), (int8_t)doc_id));\n        break;\n      case DataType::VECTOR_INT16:\n        new_doc.set<std::vector<int16_t>>(\n            field->name(),\n            std::vector<int16_t>(field->dimension(), (int16_t)doc_id));\n        break;\n      case DataType::SPARSE_VECTOR_FP16: {\n        std::vector<uint32_t> indices;\n        std::vector<float16_t> values;\n        for (uint32_t i = 0; i < 100; i++) {\n          indices.push_back(i);\n          values.push_back(float16_t(float(doc_id + 0.1)));\n        }\n        std::pair<std::vector<uint32_t>, std::vector<float16_t>>\n            sparse_float_vec;\n        sparse_float_vec.first = indices;\n        sparse_float_vec.second = values;\n        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(\n            field->name(), sparse_float_vec);\n        break;\n      }\n      case DataType::SPARSE_VECTOR_FP32: {\n        std::vector<uint32_t> indices;\n        std::vector<float> values;\n        for (uint32_t i = 0; i < 100; i++) {\n          indices.push_back(i);\n          values.push_back(float(doc_id + 0.1));\n        }\n        std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;\n        sparse_float_vec.first = indices;\n        sparse_float_vec.second = values;\n        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n            field->name(), sparse_float_vec);\n        break;\n      }\n      default:\n        throw std::runtime_error(\"Unsupported vector data type\");\n    }\n  }\n\n  return new_doc;\n}\n\nStatus TestHelper::SegmentInsertDoc(const Segment::Ptr &segment,\n                                    const CollectionSchema &schema,\n                                    const uint64_t start_doc_id,\n                                    const uint64_t end_doc_id, bool nullable,\n                                    bool upsert, bool batch) {\n  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {\n    if (segment) {\n      Doc new_doc;\n      if (nullable) {\n        new_doc = CreateDocNull(doc_id, schema);\n      } else {\n        new_doc = CreateDoc(doc_id, schema);\n      }\n\n      Status s;\n      if (upsert) {\n        s = segment->Upsert(new_doc);\n        CHECK_RETURN_STATUS(s);\n      } else {\n        s = segment->Insert(new_doc);\n        CHECK_RETURN_STATUS(s);\n      }\n    }\n  }\n  return Status::OK();\n}\n\nStatus TestHelper::CollectionInsertDoc(const Collection::Ptr &collection,\n                                       const uint64_t start_doc_id,\n                                       const uint64_t end_doc_id, bool nullable,\n                                       bool upsert, bool batch) {\n  if (!collection) {\n    return Status::InvalidArgument(\"collection is nullptr\");\n  }\n  auto schema = collection->Schema().value();\n  auto make_doc = [&](uint64_t doc_id) -> Doc {\n    return nullable ? CreateDocNull(doc_id, schema) : CreateDoc(doc_id, schema);\n  };\n  auto exec_write = [&](std::vector<Doc> &docs) -> Status {\n    Result<WriteResults> result =\n        upsert ? collection->Upsert(docs) : collection->Insert(docs);\n\n    if (!result.has_value()) {\n      LOG_ERROR(\"Failed to %s docs (count=%zu), error: %s.\",\n                upsert ? \"upsert\" : \"insert\", docs.size(),\n                result.error().message().c_str());\n      return result.error();\n    }\n\n    const auto &write_results = result.value();\n    if (write_results.empty()) {\n      return Status::InternalError(\"WriteResults is unexpectedly empty\");\n    }\n\n    for (const auto &wr : write_results) {\n      if (!wr.ok()) {\n        return wr;\n      }\n    }\n    return Status::OK();\n  };\n\n  if (batch) {\n    std::vector<Doc> docs;\n    docs.reserve(end_doc_id - start_doc_id);\n    for (uint64_t doc_id = start_doc_id; doc_id < end_doc_id; ++doc_id) {\n      docs.emplace_back(make_doc(doc_id));\n    }\n    return exec_write(docs);\n  } else {\n    std::vector<Doc> single_doc;\n    single_doc.reserve(1);  // 可选优化\n\n    for (uint64_t doc_id = start_doc_id; doc_id < end_doc_id; ++doc_id) {\n      single_doc.clear();\n      single_doc.push_back(make_doc(doc_id));\n      Status s = exec_write(single_doc);\n      if (!s.ok()) {\n        LOG_ERROR(\"Failed at doc_id=%\" PRIu64 \", doc: %s\", doc_id,\n                  single_doc[0].to_detail_string().c_str());\n        return s;\n      }\n    }\n  }\n  return Status::OK();\n}\n\nStatus TestHelper::CollectionUpsertDoc(const Collection::Ptr &collection,\n                                       const uint64_t start_doc_id,\n                                       const uint64_t end_doc_id, bool nullable,\n                                       bool batch) {\n  return CollectionInsertDoc(collection, start_doc_id, end_doc_id, nullable,\n                             true, batch);\n}\n\nSegment::Ptr TestHelper::CreateSegmentWithDoc(\n    const std::string &col_path, const CollectionSchema &schema,\n    SegmentID segment_id, uint64_t min_doc_id, const IDMap::Ptr &id_map,\n    const DeleteStore::Ptr &delete_store,\n    const VersionManager::Ptr &version_manager, const SegmentOptions &options,\n    uint64_t start_doc_id, uint32_t doc_count, bool nullable, bool upsert) {\n  auto result =\n      Segment::CreateAndOpen(col_path, schema, segment_id, min_doc_id, id_map,\n                             delete_store, version_manager, options);\n\n  if (!result.has_value()) {\n    return nullptr;\n  }\n\n  auto segment = std::move(result).value();\n\n  auto s = SegmentInsertDoc(segment, schema, start_doc_id,\n                            start_doc_id + doc_count, nullable, upsert);\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to insert doc, err: %s\", s.message().c_str());\n    return nullptr;\n  }\n\n  return segment;\n}\n\nCollection::Ptr TestHelper::CreateCollectionWithDoc(\n    const std::string &path, const CollectionSchema &schema,\n    const CollectionOptions &options, uint64_t start_doc_id, uint32_t doc_count,\n    bool nullable, bool upsert) {\n  auto result = Collection::CreateAndOpen(path, schema, options);\n\n  if (!result.has_value()) {\n    LOG_ERROR(\"Failed to create collection, err: %s\",\n              result.error().message().c_str());\n    return nullptr;\n  }\n\n  auto collection = std::move(result).value();\n\n  auto s = CollectionInsertDoc(collection, start_doc_id,\n                               start_doc_id + doc_count, nullable, upsert);\n  if (!s.ok()) {\n    LOG_ERROR(\"Failed to insert doc, err: %s\", s.message().c_str());\n    return nullptr;\n  }\n\n  return collection;\n}\n\narrow::Status TestHelper::WriteTestFile(const std::string &filepath,\n                                        FileFormat format,\n                                        uint32_t start_doc_id,\n                                        uint32_t end_doc_id,\n                                        uint32_t batch_size) {\n  // Define schema with additional list types\n  auto schema = arrow::schema(\n      {arrow::field(GLOBAL_DOC_ID, arrow::uint64()),\n       arrow::field(USER_ID, arrow::utf8()), arrow::field(\"id\", arrow::int32()),\n       arrow::field(\"name\", arrow::utf8()),\n       arrow::field(\"score\", arrow::float64()),\n       arrow::field(\"list_binary\", arrow::list(arrow::binary())),\n       arrow::field(\"list_utf8\", arrow::list(arrow::utf8())),\n       arrow::field(\"list_boolean\", arrow::list(arrow::boolean())),\n       arrow::field(\"list_int32\", arrow::list(arrow::int32())),\n       arrow::field(\"list_int64\", arrow::list(arrow::int64())),\n       arrow::field(\"list_uint32\", arrow::list(arrow::uint32())),\n       arrow::field(\"list_uint64\", arrow::list(arrow::uint64())),\n       arrow::field(\"list_float32\", arrow::list(arrow::float32())),\n       arrow::field(\"list_float64\", arrow::list(arrow::float64()))});\n\n  // Create builders\n  auto g_doc_id_builder = std::make_shared<arrow::UInt64Builder>();\n  auto uid_builder = std::make_shared<arrow::StringBuilder>();\n  auto id_builder = std::make_shared<arrow::Int32Builder>();\n  auto name_builder = std::make_shared<arrow::StringBuilder>();\n  auto score_builder = std::make_shared<arrow::DoubleBuilder>();\n\n  // Array field builders\n  auto list_binary_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::BinaryBuilder>());\n  auto list_utf8_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::StringBuilder>());\n  auto list_boolean_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::BooleanBuilder>());\n  auto list_int32_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::Int32Builder>());\n  auto list_int64_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::Int64Builder>());\n  auto list_uint32_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::UInt32Builder>());\n  auto list_uint64_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::UInt64Builder>());\n  auto list_float32_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::FloatBuilder>());\n  auto list_float64_builder = std::make_shared<arrow::ListBuilder>(\n      arrow::default_memory_pool(), std::make_shared<arrow::DoubleBuilder>());\n\n  // Cast child builders for easier access\n  auto binary_builder =\n      static_cast<arrow::BinaryBuilder *>(list_binary_builder->value_builder());\n  auto utf8_child_builder =\n      static_cast<arrow::StringBuilder *>(list_utf8_builder->value_builder());\n  auto boolean_child_builder = static_cast<arrow::BooleanBuilder *>(\n      list_boolean_builder->value_builder());\n  auto int32_child_builder =\n      static_cast<arrow::Int32Builder *>(list_int32_builder->value_builder());\n  auto int64_child_builder =\n      static_cast<arrow::Int64Builder *>(list_int64_builder->value_builder());\n  auto uint32_child_builder =\n      static_cast<arrow::UInt32Builder *>(list_uint32_builder->value_builder());\n  auto uint64_child_builder =\n      static_cast<arrow::UInt64Builder *>(list_uint64_builder->value_builder());\n  auto float32_child_builder =\n      static_cast<arrow::FloatBuilder *>(list_float32_builder->value_builder());\n  auto float64_child_builder = static_cast<arrow::DoubleBuilder *>(\n      list_float64_builder->value_builder());\n\n  // Fill data\n  for (uint32_t i = start_doc_id; i < end_doc_id; ++i) {\n    ARROW_RETURN_NOT_OK(g_doc_id_builder->Append(i + 1));\n    ARROW_RETURN_NOT_OK(uid_builder->Append(\"user_\" + std::to_string(i + 1)));\n    ARROW_RETURN_NOT_OK(id_builder->Append(i + 1));\n    ARROW_RETURN_NOT_OK(name_builder->Append(\"Name\" + std::to_string(i)));\n    ARROW_RETURN_NOT_OK(score_builder->Append(80.0 + i));\n\n    const int dim = 128;\n    // Append list_binary data\n    ARROW_RETURN_NOT_OK(list_binary_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      std::string binary_data =\n          \"binary_\" + std::to_string(i) + \"_\" + std::to_string(j);\n      ARROW_RETURN_NOT_OK(binary_builder->Append(binary_data));\n    }\n\n    // Append list_utf8 data\n    ARROW_RETURN_NOT_OK(list_utf8_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(utf8_child_builder->Append(\n          \"string_\" + std::to_string(i) + \"_\" + std::to_string(j)));\n    }\n\n    // Append list_boolean data\n    ARROW_RETURN_NOT_OK(list_boolean_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(boolean_child_builder->Append((i + j) % 2 == 0));\n    }\n\n    // Append list_int32 data\n    ARROW_RETURN_NOT_OK(list_int32_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(int32_child_builder->Append(i * 10 + j));\n    }\n\n    // Append list_int64 data\n    ARROW_RETURN_NOT_OK(list_int64_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(\n          int64_child_builder->Append(static_cast<int64_t>(i) * 100 + j));\n    }\n\n    // Append list_uint32 data\n    ARROW_RETURN_NOT_OK(list_uint32_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(\n          uint32_child_builder->Append(static_cast<uint32_t>(i) * 10 + j));\n    }\n\n    // Append list_uint64 data\n    ARROW_RETURN_NOT_OK(list_uint64_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(\n          uint64_child_builder->Append(static_cast<uint64_t>(i) * 100 + j));\n    }\n\n    // Append list_float32 data\n    ARROW_RETURN_NOT_OK(list_float32_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(\n          float32_child_builder->Append(static_cast<float>(i) + j * 0.1f));\n    }\n\n    // Append list_float64 data\n    ARROW_RETURN_NOT_OK(list_float64_builder->Append());\n    for (int j = 0; j < dim; ++j) {\n      ARROW_RETURN_NOT_OK(\n          float64_child_builder->Append(static_cast<double>(i) + j * 0.01));\n    }\n  }\n\n  // Construct arrays\n  std::shared_ptr<arrow::Array> g_doc_id_array, uid_array, id_array, name_array,\n      score_array, list_binary_array, list_utf8_array, list_boolean_array,\n      list_int32_array, list_int64_array, list_uint32_array, list_uint64_array,\n      list_float32_array, list_float64_array;\n\n  ARROW_RETURN_NOT_OK(g_doc_id_builder->Finish(&g_doc_id_array));\n  ARROW_RETURN_NOT_OK(uid_builder->Finish(&uid_array));\n  ARROW_RETURN_NOT_OK(id_builder->Finish(&id_array));\n  ARROW_RETURN_NOT_OK(name_builder->Finish(&name_array));\n  ARROW_RETURN_NOT_OK(score_builder->Finish(&score_array));\n  ARROW_RETURN_NOT_OK(list_binary_builder->Finish(&list_binary_array));\n  ARROW_RETURN_NOT_OK(list_utf8_builder->Finish(&list_utf8_array));\n  ARROW_RETURN_NOT_OK(list_boolean_builder->Finish(&list_boolean_array));\n  ARROW_RETURN_NOT_OK(list_int32_builder->Finish(&list_int32_array));\n  ARROW_RETURN_NOT_OK(list_int64_builder->Finish(&list_int64_array));\n  ARROW_RETURN_NOT_OK(list_uint32_builder->Finish(&list_uint32_array));\n  ARROW_RETURN_NOT_OK(list_uint64_builder->Finish(&list_uint64_array));\n  ARROW_RETURN_NOT_OK(list_float32_builder->Finish(&list_float32_array));\n  ARROW_RETURN_NOT_OK(list_float64_builder->Finish(&list_float64_array));\n\n  // Set rows per batch\n  std::vector<std::shared_ptr<arrow::RecordBatch>> batches;\n\n  // Split data into multiple batches\n  auto doc_count = (int)(end_doc_id - start_doc_id);\n  for (int start = 0; start < doc_count; start += batch_size) {\n    int current_batch_size = std::min((int)batch_size, doc_count - start);\n\n    auto g_doc_id_slice = g_doc_id_array->Slice(start, current_batch_size);\n    auto uid_slice = uid_array->Slice(start, current_batch_size);\n    auto id_slice = id_array->Slice(start, current_batch_size);\n    auto name_slice = name_array->Slice(start, current_batch_size);\n    auto score_slice = score_array->Slice(start, current_batch_size);\n    auto list_binary_slice =\n        list_binary_array->Slice(start, current_batch_size);\n    auto list_utf8_slice = list_utf8_array->Slice(start, current_batch_size);\n    auto list_boolean_slice =\n        list_boolean_array->Slice(start, current_batch_size);\n    auto list_int32_slice = list_int32_array->Slice(start, current_batch_size);\n    auto list_int64_slice = list_int64_array->Slice(start, current_batch_size);\n    auto list_uint32_slice =\n        list_uint32_array->Slice(start, current_batch_size);\n    auto list_uint64_slice =\n        list_uint64_array->Slice(start, current_batch_size);\n    auto list_float32_slice =\n        list_float32_array->Slice(start, current_batch_size);\n    auto list_float64_slice =\n        list_float64_array->Slice(start, current_batch_size);\n\n    auto batch = arrow::RecordBatch::Make(\n        schema, current_batch_size,\n        {g_doc_id_slice, uid_slice, id_slice, name_slice, score_slice,\n         list_binary_slice, list_utf8_slice, list_boolean_slice,\n         list_int32_slice, list_int64_slice, list_uint32_slice,\n         list_uint64_slice, list_float32_slice, list_float64_slice});\n    batches.push_back(batch);\n  }\n\n  // Open output stream\n  ARROW_ASSIGN_OR_RAISE(auto out, arrow::io::FileOutputStream::Open(filepath));\n\n  if (format == FileFormat::PARQUET) {\n    // Parquet write logic - create table with multiple record batches\n    auto table = arrow::Table::Make(\n        schema, {g_doc_id_array, uid_array, id_array, name_array, score_array,\n                 list_binary_array, list_utf8_array, list_boolean_array,\n                 list_int32_array, list_int64_array, list_uint32_array,\n                 list_uint64_array, list_float32_array, list_float64_array});\n\n    parquet::WriterProperties::Builder builder;\n    builder.data_pagesize(1024);\n    // 3 rows per row group\n    builder.max_row_group_length(batch_size);\n    auto props = builder.build();\n\n    auto status = parquet::arrow::WriteTable(\n        *table, arrow::default_memory_pool(), out, batch_size, props);\n    if (!status.ok()) {\n      std::cerr << \"Write failed: \" << status.ToString() << std::endl;\n      return status;\n    }\n\n    std::cout << \"Wrote test Parquet file with multiple row groups: \"\n              << filepath << std::endl;\n  } else if (format == FileFormat::IPC) {\n    // IPC write logic - write multiple record batches\n    auto writer_result = arrow::ipc::MakeFileWriter(out, schema);\n    ARROW_RETURN_NOT_OK(writer_result.status());\n    auto writer = std::move(writer_result).ValueOrDie();\n\n    // Write multiple batches\n    for (const auto &batch : batches) {\n      ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*batch));\n    }\n\n    ARROW_RETURN_NOT_OK(writer->Close());\n\n    std::cout << \"Wrote test IPC file with \" << batches.size()\n              << \" batches: \" << filepath << std::endl;\n  }\n\n  ARROW_RETURN_NOT_OK(out->Close());\n  return arrow::Status::OK();\n}"
  },
  {
    "path": "tests/db/index/utils/utils.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include <cstdint>\n#include <iostream>\n#include <memory>\n#include <string>\n#include <arrow/array/array_binary.h>\n#include <arrow/io/file.h>\n#include <arrow/ipc/reader.h>\n#include <arrow/ipc/writer.h>\n#include <arrow/pretty_print.h>\n#include <arrow/result.h>\n#include <arrow/table.h>\n#include <gtest/gtest.h>\n#include <parquet/arrow/writer.h>\n#include \"db/common/constants.h\"\n#include \"db/common/typedef.h\"\n#include \"db/index/common/meta.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/index/storage/store_helper.h\"\n#include \"zvec/db/collection.h\"\n#include \"zvec/db/doc.h\"\n#include \"zvec/db/schema.h\"\n#include \"zvec/db/type.h\"\n\nnamespace zvec::test {\n\ntemplate <typename T>\nbool vectors_equal_when_sorted(std::vector<T> a, std::vector<T> b) {\n  if (a.size() != b.size()) {\n    return false;\n  }\n  std::sort(a.begin(), a.end());\n  std::sort(b.begin(), b.end());\n  return a == b;\n}\n\ntemplate <typename T>\ndouble inner_produce_double(const std::vector<T> &vec1,\n                            const std::vector<T> &vec2) {\n  double result = 0.0;\n  for (size_t i = 0; i < vec1.size(); ++i) {\n    result += vec1[i] * vec2[i];\n  }\n  return result;\n}\n\n\ntemplate <typename T>\ninline float cosine_distance_dense(const std::vector<T> &vec1,\n                                   const std::vector<T> &vec2) {\n  const auto dot = inner_produce_double(vec1, vec2);\n  const auto norm1 = std::sqrt((inner_produce_double(vec1, vec1)));\n  const auto norm2 = std::sqrt((inner_produce_double(vec2, vec2)));\n\n  if (norm1 == 0.0f || norm2 == 0.0f) return 0.0f;\n  return 1.0f - dot / (norm1 * norm2);\n}\n\ntemplate <typename T>\ninline float dp_distance_dense(const std::vector<T> &vec1,\n                               const std::vector<T> &vec2) {\n  double result = 0.0;\n  for (size_t i = 0; i < vec1.size(); ++i) {\n    result += vec1[i] * vec2[i];\n  }\n  return result;\n}\n\ntemplate <typename T>\ninline float euclidean_distance_dense(const std::vector<T> &vec1,\n                                      const std::vector<T> &vec2) {\n  double sum = 0.0f;\n  for (size_t i = 0; i < vec1.size(); ++i) {\n    const float diff =\n        static_cast<float>(vec1[i]) - static_cast<float>(vec2[i]);\n    sum += diff * diff;\n  }\n  return sum;\n}\n\ntemplate <typename T>\ninline float distance_dense(const std::vector<T> &vec1,\n                            const std::vector<T> &vec2, MetricType metric) {\n  switch (metric) {\n    case MetricType::COSINE:\n      return cosine_distance_dense(vec1, vec2);\n    case MetricType::L2:\n      return euclidean_distance_dense(vec1, vec2);\n    case MetricType::IP:\n      return dp_distance_dense(vec1, vec2);\n    default:\n      throw std::invalid_argument(\"Unsupported metric for FP32\");\n  }\n}\n\nusing SparseVecFP32 = std::pair<std::vector<uint32_t>, std::vector<float>>;\nusing SparseVecFP16 = std::pair<std::vector<uint32_t>, std::vector<float16_t>>;\nusing SparseVec = SparseVecFP32;\n\ntemplate <typename T>\ninline float sparse_dot_product(const std::vector<uint32_t> &idx1,\n                                const std::vector<T> &val1,\n                                const std::vector<uint32_t> &idx2,\n                                const std::vector<T> &val2) {\n  double dot = 0.0f;\n  size_t i = 0, j = 0;\n\n  while (i < idx1.size() && j < idx2.size()) {\n    if (idx1[i] == idx2[j]) {\n      dot += static_cast<float>(val1[i]) * static_cast<float>(val2[j]);\n      ++i;\n      ++j;\n    } else if (idx1[i] < idx2[j]) {\n      ++i;\n    } else {\n      ++j;\n    }\n  }\n  return dot;\n}\n\ninline float distance_sparse(const SparseVecFP32 &vec1,\n                             const SparseVecFP32 &vec2) {\n  return sparse_dot_product(vec1.first, vec1.second, vec2.first, vec2.second);\n}\n\ninline float distance_sparse(const SparseVecFP16 &vec1,\n                             const SparseVecFP16 &vec2) {\n  return sparse_dot_product(vec1.first, vec1.second, vec2.first, vec2.second);\n}\n\n\nclass TestHelper {\n public:\n  static CollectionSchema::Ptr CreateTempSchema();\n\n  static CollectionSchema::Ptr CreateScalarSchema();\n\n  static CollectionSchema::Ptr CreateNormalSchema(\n      bool nullable = false, std::string name = \"demo\",\n      IndexParams::Ptr scalar_index_params = nullptr,\n      IndexParams::Ptr vector_index_params = nullptr,\n      uint64_t max_doc_count = MAX_DOC_COUNT_PER_SEGMENT);\n\n  static CollectionSchema::Ptr CreateSchemaWithScalarIndex(\n      bool nullable = false, bool enable_optimize = false,\n      std::string name = \"demo\");\n\n  static CollectionSchema::Ptr CreateSchemaWithVectorIndex(\n      bool nullable = false, std::string name = \"demo\",\n      IndexParams::Ptr vector_index_params = nullptr);\n\n  static CollectionSchema::Ptr CreateSchemaWithMaxDocCount(uint64_t doc_count);\n\n  static std::string MakePK(const uint64_t doc_id);\n\n  static uint64_t ExtractDocId(const std::string &pk);\n\n  static Doc CreateDoc(const uint64_t doc_id, const CollectionSchema &schema,\n                       std::string pk = \"\");\n\n  static Doc CreateDocNull(const uint64_t doc_id,\n                           const CollectionSchema &schema, std::string pk = \"\");\n\n  static Status SegmentInsertDoc(const Segment::Ptr &segment,\n                                 const CollectionSchema &schema,\n                                 const uint64_t start_doc_id,\n                                 const uint64_t end_doc_id,\n                                 bool nullable = false, bool upsert = false,\n                                 bool batch = false);\n\n  static Status CollectionInsertDoc(const Collection::Ptr &collection,\n                                    const uint64_t start_doc_id,\n                                    const uint64_t end_doc_id,\n                                    bool nullable = false, bool upsert = false,\n                                    bool batch = false);\n\n  static Status CollectionUpsertDoc(const Collection::Ptr &collection,\n                                    const uint64_t start_doc_id,\n                                    const uint64_t end_doc_id,\n                                    bool nullable = false, bool batch = false);\n\n  static Segment::Ptr CreateSegmentWithDoc(\n      const std::string &col_path, const CollectionSchema &schema,\n      SegmentID segment_id, uint64_t min_doc_id, const IDMap::Ptr &id_map,\n      const DeleteStore::Ptr &delete_store,\n      const VersionManager::Ptr &version_manager, const SegmentOptions &options,\n      uint64_t start_doc_id, uint32_t doc_count, bool nullable = false,\n      bool upsert = false);\n\n  static Collection::Ptr CreateCollectionWithDoc(\n      const std::string &path, const CollectionSchema &schema,\n      const CollectionOptions &options, uint64_t start_doc_id,\n      uint32_t doc_count, bool nullable = false, bool upsert = false);\n\n\n  static arrow::Status WriteTestFile(const std::string &filepath,\n                                     FileFormat format,\n                                     uint32_t start_doc_id = 0,\n                                     uint32_t end_doc_id = 10,\n                                     uint32_t batch_size = 3);\n};\n\n\n}  // namespace zvec::test"
  },
  {
    "path": "tests/db/sqlengine/CMakeLists.txt",
    "content": "\ninclude(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\nif(APPLE)\n  set(APPLE_FRAMEWORK_LIBS\n    -framework CoreFoundation\n    -framework CoreGraphics\n    -framework CoreData\n    -framework CoreText\n    -framework Security\n    -framework Foundation\n    -Wl,-U,_MallocExtension_ReleaseFreeMemory\n    -Wl,-U,_ProfilerStart\n    -Wl,-U,_ProfilerStop\n    -Wl,-U,_RegisterThriftProtocol\n  )\nendif()\n\nfile(GLOB ALL_TEST_SRCS *_test.cc)\nforeach(CC_SRCS ${ALL_TEST_SRCS})\n  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)\n  cc_gmock(\n    NAME ${CC_TARGET} STRICT\n    LIBS zvec_common\n    zvec_proto\n    zvec_sqlengine\n    zvec_ailego\n    core_metric\n    core_utility\n    core_quantizer\n    core_knn_hnsw core_knn_hnsw_sparse sparsehash\n    core_knn_flat core_knn_flat_sparse core_knn_ivf\n    core_knn_hnsw_rabitq core_mix_reducer\n    ${CMAKE_THREAD_LIBS_INIT}\n    ${CMAKE_DL_LIBS}\n    SRCS ${CC_SRCS}\n    INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/db ${PROJECT_ROOT_DIR}/src/db/common\n    LDFLAGS ${APPLE_FRAMEWORK_LIBS}\n  )\n  cc_test_suite(zvec_sqlengine ${CC_TARGET})\nendforeach()\n"
  },
  {
    "path": "tests/db/sqlengine/contain_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include <cstdint>\n#include <cstdlib>\n#include <memory>\n#include <arrow/api.h>\n#include <arrow/io/api.h>\n#include <arrow/ipc/api.h>\n#include <gtest/gtest.h>\n#include \"db/common/file_helper.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/sqlengine.h\"\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/schema.h\"\n#include \"zvec/db/type.h\"\n#include \"test_helper.h\"\n\nnamespace zvec::sqlengine {\n\nstatic Doc create_doc(const uint64_t doc_id) {\n  Doc new_doc;\n  new_doc.set_pk(\"pk_\" + std::to_string(doc_id));\n  new_doc.set_doc_id(doc_id);\n\n  auto size = doc_id % 100;\n  if (size > 0) {\n    std::vector<std::string> str_array;\n    std::vector<int32_t> i32_array;\n    std::vector<int64_t> i64_array;\n    std::vector<uint32_t> u32_array;\n    std::vector<uint64_t> u64_array;\n    std::vector<float> fp32_array;\n    std::vector<double> fp64_array;\n    std::vector<bool> bool_array;\n\n    for (uint32_t i = 1; i <= size; i++) {\n      i32_array.push_back(i);\n      i64_array.push_back(i);\n      u32_array.push_back(i);\n      u64_array.push_back(i);\n      fp32_array.push_back(i);\n      fp64_array.push_back(i);\n      bool_array.push_back(i % 2 == 0);\n      str_array.push_back(\"name\" + std::to_string(i));\n    }\n    new_doc.set(\"i32_array\", i32_array);\n    new_doc.set(\"i64_array\", i64_array);\n    new_doc.set(\"u32_array\", u32_array);\n    new_doc.set(\"u64_array\", u64_array);\n    new_doc.set(\"fp32_array\", fp32_array);\n    new_doc.set(\"fp64_array\", fp64_array);\n    new_doc.set(\"bool_array\", bool_array);\n    new_doc.set(\"str_array\", str_array);\n  }\n  return new_doc;\n}\n\nclass ContainTest : public testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    FileHelper::RemoveDirectory(seg_path_);\n    FileHelper::CreateDirectory(seg_path_);\n\n    auto invert_params = std::make_shared<InvertIndexParams>(true);\n    collection_schema_ = std::make_shared<CollectionSchema>(\n        \"test_collection\",\n        std::vector<FieldSchema::Ptr>{\n            std::make_shared<FieldSchema>(\"str_array\", DataType::ARRAY_STRING,\n                                          true, nullptr),\n            std::make_shared<FieldSchema>(\"i32_array\", DataType::ARRAY_INT32,\n                                          true, nullptr),\n            std::make_shared<FieldSchema>(\"i64_array\", DataType::ARRAY_INT64,\n                                          true, nullptr),\n            std::make_shared<FieldSchema>(\"u32_array\", DataType::ARRAY_UINT32,\n                                          true, nullptr),\n            std::make_shared<FieldSchema>(\"u64_array\", DataType::ARRAY_UINT64,\n                                          true, nullptr),\n            std::make_shared<FieldSchema>(\"fp32_array\", DataType::ARRAY_FLOAT,\n                                          true, nullptr),\n            std::make_shared<FieldSchema>(\"fp64_array\", DataType::ARRAY_DOUBLE,\n                                          true, nullptr),\n            std::make_shared<FieldSchema>(\"bool_array\", DataType::ARRAY_BOOL,\n                                          true, nullptr),\n\n        });\n\n    auto segment = create_segment(seg_path_, *collection_schema_);\n    if (segment == nullptr) {\n      LOG_ERROR(\"create segment failed\");\n      EXPECT_TRUE(segment != nullptr);\n      std::exit(EXIT_FAILURE);\n    }\n    auto status = InsertDoc(segment, 0, 10000, &create_doc);\n    if (!status.ok()) {\n      LOG_ERROR(\"insert doc failed: %s\", status.c_str());\n      EXPECT_TRUE(status.ok());\n      std::exit(EXIT_FAILURE);\n    }\n    segments_.push_back(segment);\n  }\n\n  static void TearDownTestSuite() {\n    segments_.clear();\n    FileHelper::RemoveDirectory(seg_path_);\n  }\n\n protected:\n  static inline std::string seg_path_ = \"./test_collection\";\n  static inline CollectionSchema::Ptr collection_schema_;\n  static inline std::vector<Segment::Ptr> segments_;\n};\n\n\nTEST_F(ContainTest, ContainAllInt32) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"i32_array contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAllInt64) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"i64_array contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAllUint32) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"u32_array contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAllUint64) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"u64_array contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAllFp32) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"fp32_array contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAllFp64) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"fp64_array contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAllString) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"str_array contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += \"'name\" + std::to_string(i) + \"'\";\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAnyInt32) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"i32_array contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAnyInt64) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"i64_array contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAnyUint32) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"u32_array contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAnyUint64) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"u64_array contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAnyFp32) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"fp32_array contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAnyFp64) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"fp64_array contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ContainTest, ContainAnyString) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n  query.filter_ = \"str_array contain_any ('name98','name99','name100')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "tests/db/sqlengine/forward_recall_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include <cstdint>\n#include <memory>\n#include <gtest/gtest.h>\n#include \"db/sqlengine/sqlengine.h\"\n#include \"zvec/db/schema.h\"\n#include \"recall_base.h\"\n\nnamespace zvec::sqlengine {\n\nclass ForwardRecallTest : public RecallTest {};\n\nTEST_F(ForwardRecallTest, Basic) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, BasicWithDocId) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.include_doc_id_ = true;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(doc->doc_id(), i);\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, OutputNoFields) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>{};\n  query.topk_ = 200;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(doc->field_names().size(), 0);\n  }\n}\n\nTEST_F(ForwardRecallTest, DenseVector) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"dense\"};\n  query.topk_ = 200;\n  query.include_vector_ = true;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto dense = doc->get<std::vector<float>>(\"dense\");\n    ASSERT_TRUE(dense.has_value());\n    EXPECT_EQ(dense.value().size(), 4);\n    for (auto v : dense.value()) {\n      EXPECT_FLOAT_EQ(v, (float)i);\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, SparseVector) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"sparse\"};\n  query.topk_ = 200;\n  query.include_vector_ = true;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    // EXPECT_EQ(doc->field_names().size(), 2);\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto sparse =\n        doc->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n            \"sparse\");\n    if (i % 100 == 0) {\n      // set with empty vector\n      ASSERT_FALSE(sparse.has_value());\n      continue;\n    }\n\n    ASSERT_TRUE(sparse.has_value());\n    const auto &[indices, values] = sparse.value();\n    EXPECT_EQ(indices.size(), i % 100);\n    EXPECT_EQ(values.size(), i % 100);\n    for (int j = 0; j < i % 100; j++) {\n      EXPECT_EQ(indices[j], j);\n      EXPECT_FLOAT_EQ(values[j], (float)i);\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, MultiSegment) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>();\n  query.topk_ = 200;\n  query.include_vector_ = true;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  std::vector<Segment::Ptr> segments = segments_;\n  segments.push_back(segments_[0]);\n  auto ret = engine->execute(collection_schema_, query, segments);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto dense = doc->get<std::vector<float>>(\"dense\");\n    ASSERT_TRUE(dense.has_value());\n    EXPECT_EQ(dense.value().size(), 4);\n    for (auto v : dense.value()) {\n      EXPECT_FLOAT_EQ(v, (float)i);\n    }\n\n    auto sparse =\n        doc->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n            \"sparse\");\n    if (i % 100 == 0) {\n      // set with empty vector\n      ASSERT_FALSE(sparse.has_value());\n      continue;\n    }\n\n    ASSERT_TRUE(sparse.has_value());\n    const auto &[indices, values] = sparse.value();\n    EXPECT_EQ(indices.size(), i % 100);\n    EXPECT_EQ(values.size(), i % 100);\n    for (int j = 0; j < i % 100; j++) {\n      EXPECT_EQ(indices[j], j);\n      EXPECT_FLOAT_EQ(values[j], (float)i);\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, Eq) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"age = 1\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, Gt) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"id > 1000\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int j = 0; j < query.topk_; j++) {\n    auto &doc = docs[j];\n    auto i = j + 1001;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, Ge) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"id >= 1000\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int j = 0; j < query.topk_; j++) {\n    auto &doc = docs[j];\n    auto i = j + 1000;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, Lt) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"id < 100\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 100);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, Le) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"id <= 100\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 101);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, And) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"id <= 100 and id > 50\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 50);\n  for (int j = 0, i = 51; j < (int)docs.size(); j++, i += 1) {\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, Or) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"id < 100 or id > 200\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 200);\n  for (int j = 0; j < (int)docs.size(); j++) {\n    int i = j < 100 ? j : j + 101;\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, StrEq) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"name = 'user_1'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, StrGe) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"name >= 'user_1'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    if (i % 100 == 0) {\n      i += 1;\n    }\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, StrIn) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"name IN ('user_1', 'user_2')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    if (i % 100 == 1) {\n      i += 1;\n    } else if (i % 100 == 2) {\n      i += 99;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, StrNotIn) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"name NOT IN ('user_1', 'user_2')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    if (i % 100 == 0) {\n      i += 3;\n    } else {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, StrLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"name like 'user_9%'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 9; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    if (i % 100 == 9) {\n      i += 81;\n    } else if (i % 100 == 99) {\n      i += 10;\n    } else {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, IsNull) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"optional_age is null\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, IsNotNull) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"optional_age is not null\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    if (i % 100 == 0) {\n      i += 1;\n    }\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, IsNullNoResult) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"age is null\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 0);\n}\n\nTEST_F(ForwardRecallTest, ContainAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, NotContainAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set not contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    // i % 100 == 0 has null category\n    while (i % 100 >= 32 || i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, ContainAny) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, NotContainAny) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set not contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    // i % 100 == 0 has null category\n    while (i % 100 >= 98 || i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, BoolContainAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"bool_array contain_all (true, false)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 3;\n  }\n}\n\nTEST_F(ForwardRecallTest, BoolContainAny) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"bool_array contain_any (true)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    if (i % 3 == 2) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, ContainAllEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set contain_all ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, NotContainAllEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set not contain_all ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 0);\n}\n\nTEST_F(ForwardRecallTest, ContainAnyEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set contain_any ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 0);\n}\n\nTEST_F(ForwardRecallTest, NotContainAnyEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"category_set not contain_any ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, BoolEqTrue) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"bool = TRuE\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(ForwardRecallTest, BoolEqFalse) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"bool = false\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    if (i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(ForwardRecallTest, ArrayLengthEq) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"array_length(category_set) = 32\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 100;\n  }\n}\n\nTEST_F(ForwardRecallTest, ArrayLengthGe) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"array_length(category_set) >= 32\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "tests/db/sqlengine/invert_recall_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include <cstdint>\n#include <memory>\n#include <gtest/gtest.h>\n#include \"db/sqlengine/sqlengine.h\"\n#include \"zvec/db/schema.h\"\n#include \"recall_base.h\"\n\nnamespace zvec::sqlengine {\n\nclass InvertRecallTest : public RecallTest {};\n\nTEST_F(InvertRecallTest, Eq) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_age = 1\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, Gt) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_id > 1000\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int j = 0; j < query.topk_; j++) {\n    auto &doc = docs[j];\n    auto i = j + 1001;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, Ge) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_id >= 1000\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int j = 0; j < query.topk_; j++) {\n    auto &doc = docs[j];\n    auto i = j + 1000;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, Lt) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_id < 100\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 100);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, Le) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_id <= 100\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 101);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, And) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_id <= 100 and invert_id > 50\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 50);\n  for (int j = 0, i = 51; j < (int)docs.size(); j++, i += 1) {\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, Or) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_id < 100 or invert_id > 200\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 200);\n  for (int j = 0; j < (int)docs.size(); j++) {\n    int i = j < 100 ? j : j + 101;\n    auto &doc = docs[j];\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, StrEq) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name = 'user_1'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, StrGe) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name >= 'user_1'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    if (i % 100 == 0) {\n      i += 1;\n    }\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, StrIn) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name IN ('user_1', 'user_2')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    if (i % 100 == 1) {\n      i += 1;\n    } else if (i % 100 == 2) {\n      i += 99;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, StrNotIn) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name NOT IN ('user_1', 'user_2')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    if (i % 100 == 0) {\n      i += 3;\n    } else {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, StrLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name like 'user\\\\_9%'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 9; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    if (i % 100 == 9) {\n      i += 81;\n    } else if (i % 100 == 99) {\n      i += 10;\n    } else {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, ContainAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, NotContainAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set not contain_all (\";\n  for (int i = 1; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    // i % 100 == 0 has null category\n    while (i % 100 >= 32 || i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, ContainAny) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 98; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 < 98) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, NotContainAny) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set not contain_any (98,99,100)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    // i % 100 == 0 has null category\n    while (i % 100 >= 98 || i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, BoolContainAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_bool_array contain_all (true, false)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 3;\n  }\n}\n\nTEST_F(InvertRecallTest, BoolContainAny) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_bool_array contain_any (true)\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    if (i % 3 == 2) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, ContainAllEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set contain_all ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, NotContainAllEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set not contain_all ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 0);\n}\n\nTEST_F(InvertRecallTest, ContainAnyEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set contain_any ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  ASSERT_EQ(docs.size(), 0);\n}\n\nTEST_F(InvertRecallTest, NotContainAnyEmptySet) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_category_set not contain_any ()\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, IsNull) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_optional_age is null\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, IsNotNull) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_optional_age is not null\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {\n    if (i % 100 == 0) {\n      i += 1;\n    }\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, BoolEqTrue) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_bool = TRuE\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n  }\n}\n\nTEST_F(InvertRecallTest, BoolEqFalse) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_bool = false\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 1; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    if (i % 100 == 0) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, ArrayLengthGe) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"array_length(invert_category_set) >= 32\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 200);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 1;\n    while (i % 100 < 32) {\n      i += 1;\n    }\n  }\n}\n\nTEST_F(InvertRecallTest, ArrayLengthEq) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  query.filter_ = \"array_length(invert_category_set) = 32\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (int j = 0, i = 32; j < (int)docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    EXPECT_EQ(i, doc->get<uint64_t>(\"id\"));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n\n    i += 100;\n  }\n}\n\nTEST_F(InvertRecallTest, MultiSegment) {\n  VectorQuery query;\n  query.output_fields_ = std::vector<std::string>();\n  query.topk_ = 200;\n  query.include_vector_ = true;\n  query.filter_ = \"invert_id <= 5000\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  std::vector<Segment::Ptr> segments = segments_;\n  segments.push_back(segments_[0]);\n  auto ret = engine->execute(collection_schema_, query, segments);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto dense = doc->get<std::vector<float>>(\"dense\");\n    ASSERT_TRUE(dense.has_value());\n    EXPECT_EQ(dense.value().size(), 4);\n    for (auto v : dense.value()) {\n      EXPECT_FLOAT_EQ(v, (float)i);\n    }\n\n    auto sparse =\n        doc->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n            \"sparse\");\n    if (i % 100 == 0) {\n      // set with empty vector\n      ASSERT_FALSE(sparse.has_value());\n      continue;\n    }\n\n    ASSERT_TRUE(sparse.has_value());\n    const auto &[indices, values] = sparse.value();\n    EXPECT_EQ(indices.size(), i % 100);\n    EXPECT_EQ(values.size(), i % 100);\n    for (int j = 0; j < i % 100; j++) {\n      EXPECT_EQ(indices[j], j);\n      EXPECT_FLOAT_EQ(values[j], (float)i);\n    }\n  }\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "tests/db/sqlengine/like_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include <cstdint>\n#include <cstdlib>\n#include <iostream>\n#include <memory>\n#include <arrow/api.h>\n#include <arrow/io/api.h>\n#include <arrow/ipc/api.h>\n#include <gtest/gtest.h>\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/version_manager.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/sqlengine.h\"\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/schema.h\"\n#include \"zvec/db/type.h\"\n#include \"test_helper.h\"\n\nnamespace zvec::sqlengine {\n\nstatic Doc create_doc(const uint64_t doc_id) {\n  Doc new_doc;\n  new_doc.set_pk(\"pk_\" + std::to_string(doc_id));\n  new_doc.set_doc_id(doc_id);\n\n  auto name = std::string(\"user-\");\n  if (doc_id >= 5000 && doc_id < 8000) {\n    name += \"%\";\n  } else if (doc_id >= 8000) {\n    name += '_';\n  }\n  name += std::to_string(doc_id % 100);\n  new_doc.set<std::string>(\"name\", name);\n  new_doc.set<std::string>(\"invert_name\", name);\n  new_doc.set<std::string>(\"extended_invert_name\", name);\n  return new_doc;\n}\n\nclass LikeTest : public testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    FileHelper::RemoveDirectory(seg_path_);\n    FileHelper::CreateDirectory(seg_path_);\n\n    auto invert_params = std::make_shared<InvertIndexParams>(true);\n    collection_schema_ = std::make_shared<CollectionSchema>(\n        \"test_collection\",\n        std::vector<FieldSchema::Ptr>{\n            std::make_shared<FieldSchema>(\"name\", DataType::STRING, false,\n                                          nullptr),\n            std::make_shared<FieldSchema>(\n                \"invert_name\", DataType::STRING, false,\n                std::make_shared<InvertIndexParams>(false, false)),\n            std::make_shared<FieldSchema>(\n                \"extended_invert_name\", DataType::STRING, false,\n                std::make_shared<InvertIndexParams>(false, true)),\n        });\n    auto segment = create_segment(seg_path_, *collection_schema_);\n    if (segment == nullptr) {\n      LOG_ERROR(\"create segment failed\");\n      EXPECT_TRUE(segment != nullptr);\n      std::exit(EXIT_FAILURE);\n    }\n    auto status = InsertDoc(segment, 0, 10000, &create_doc);\n    if (!status.ok()) {\n      LOG_ERROR(\"insert doc failed: %s\", status.c_str());\n      EXPECT_TRUE(status.ok());\n      std::exit(EXIT_FAILURE);\n    }\n    segments_.push_back(segment);\n  }\n\n  static void TearDownTestSuite() {\n    segments_.clear();\n    FileHelper::RemoveDirectory(seg_path_);\n  }\n\n protected:\n  static inline std::string seg_path_ = \"./test_collection\";\n  static inline CollectionSchema::Ptr collection_schema_;\n  static inline std::vector<Segment::Ptr> segments_;\n};\n\n\nTEST_F(LikeTest, ForwardLikeAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"name like '%'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n  }\n}\n\nTEST_F(LikeTest, InvertLikeAll) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name like '%'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n  }\n}\n\nTEST_F(LikeTest, ForwardPrefixLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"name like 'user-22%'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    int doc_id = i * 100 + 22;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, InvertPrefixLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name like 'user-22%'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    int doc_id = i * 100 + 22;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, ForwardSuffixLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"name like '%ser-22'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    int doc_id = i * 100 + 22;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, NotExtendedInvertSuffixLikeRunAsForward) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name like '%ser-22'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    int doc_id = i * 100 + 22;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, ExtendedInvertSuffixLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"extended_invert_name like '%ser-22'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    int doc_id = i * 100 + 22;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, ForwardMiddleLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"name like 'user%2'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    while (doc_id % 100 % 10 != 2) {\n      doc_id++;\n    }\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, ExtendedInvertMiddleLike) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"extended_invert_name like 'user%2'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    while (doc_id % 100 % 10 != 2) {\n      doc_id++;\n    }\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, UnderScore) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"name like 'user-_2'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    while (doc_id % 100 % 10 != 2 || doc_id % 100 < 10) {\n      doc_id++;\n    }\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, InvertUnderScoreRunAsForward) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = \"invert_name like 'user-_2'\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    while (doc_id % 100 % 10 != 2 || doc_id % 100 < 10) {\n      doc_id++;\n    }\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, ForwardEscapePercent) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = R\"(name like 'user-\\%%')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 5000; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, InvertEscapePercent) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = R\"(invert_name like 'user-\\%%')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 5000; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, ForwardEscapeUnderscore) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = R\"(name like 'user-\\_%')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 8000; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, InvertEscapeUnderscore) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = R\"(invert_name like 'user-\\_%')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  ASSERT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0, doc_id = 8000; i < docs.size(); i++, doc_id++) {\n    auto doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\nTEST_F(LikeTest, NoPercentRunAsEqual) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\"};\n  query.topk_ = 200;\n  query.filter_ = R\"(invert_name like 'user-22')\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  EXPECT_TRUE(ret.has_value()) << ret.error();\n  auto docs = std::move(ret.value());\n  for (size_t i = 0; i < docs.size(); i++) {\n    auto doc = docs[i];\n    int doc_id = i * 100 + 22;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n  }\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "tests/db/sqlengine/mock_segment.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <vector>\n#include <arrow/api.h>\n#include <arrow/compute/api.h>\n#include <gtest/gtest.h>\n#include <zvec/ailego/logger/logger.h>\n#include \"db/index/column/common/index_results.h\"\n#include \"db/index/column/vector_column/vector_column_indexer.h\"\n#include \"db/index/segment/segment.h\"\n#include \"index/column/inverted_column/inverted_column_indexer.h\"\n#include \"index/column/vector_column/vector_column_params.h\"\n#include \"index/common/index_filter.h\"\nnamespace zvec {\n\n\nclass MockIndexResult : public InvertedSearchResult {\n public:\n  MockIndexResult(const std::vector<idx_t> &doc_ids,\n                  const std::vector<float> &scores)\n      : doc_ids_(doc_ids), scores_(scores) {}\n\n  MockIndexResult(const std::vector<idx_t> &doc_ids,\n                  const std::vector<float> &scores,\n                  const std::vector<std::string> &groups)\n      : doc_ids_(doc_ids), scores_(scores), group_ids_(groups) {}\n\n  size_t count() const override {\n    return doc_ids_.size();\n  }\n\n  IteratorUPtr create_iterator() override {\n    return std::make_unique<MockIterator>(*this);\n  }\n\n private:\n  struct MockIterator : public IndexResults::Iterator {\n    MockIterator(MockIndexResult &parent) : parent_(parent) {}\n\n    idx_t doc_id() const override {\n      return parent_.doc_ids_[current_index_];\n    }\n\n    float score() const override {\n      return parent_.scores_[current_index_];\n    }\n\n    void next() override {\n      ++current_index_;\n    }\n\n    bool valid() const override {\n      return current_index_ < parent_.count();\n    }\n\n    const std::string &group_id() const override {\n      return parent_.group_ids_[current_index_];\n    }\n\n    MockIndexResult &parent_;\n    size_t current_index_{0};\n  };\n\n  std::vector<idx_t> doc_ids_;\n  std::vector<float> scores_;\n  std::vector<std::string> group_ids_;\n};\n\nclass MockVectorIndexer : public CombinedVectorColumnIndexer {\n public:\n  //! Search results with query\n  Result<IndexResults::Ptr> Search(\n      const vector_column_params::VectorData &vector_data,\n      const vector_column_params::QueryParams &query_params) override {\n    // return tl::make_unexpected(Status::InternalError(\"err\"));\n    return std::make_shared<MockIndexResult>(\n        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},\n        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,\n                           1.0F},\n        std::vector<std::string>{\"group_0\", \"group_1\", \"group_2\", \"group_0\",\n                                 \"group_1\", \"group_2\", \"group_0\", \"group_1\",\n                                 \"group_2\", \"group_0\"});\n  }\n\n  Result<vector_column_params::VectorDataBuffer> Fetch(\n      uint32_t doc_id) const override {\n    // float f = doc_id;\n    // std::vector<float> v(4, f);\n    // std::string v_str = std::string(reinterpret_cast<char *>(v.data()),\n    //                                 v.size() * sizeof(float));\n    // return vector_column_params::VectorDataBuffer{\n    //     vector_column_params::DenseVectorBuffer{v_str}};\n\n    // sparse\n    uint32_t count = doc_id % 5;\n    std::vector<uint32_t> indices(count);\n    std::vector<float> values(count);\n    for (uint32_t i = 0; i < count; i++) {\n      indices[i] = i;\n      values[i] = i / 100.0;\n    }\n    return vector_column_params::VectorDataBuffer{\n        vector_column_params::SparseVectorBuffer{\n            std::string(reinterpret_cast<char *>(indices.data()),\n                        indices.size() * sizeof(uint32_t)),\n            std::string(reinterpret_cast<char *>(values.data()),\n                        values.size() * sizeof(float))}};\n  }\n};\n\nclass MockInvertIndexer : public InvertedColumnIndexer {\n public:\n  MockInvertIndexer() : InvertedColumnIndexer(ctx) {}\n\n  InvertedSearchResult::Ptr search(const std::string &value,\n                                   CompareOp op) const override {\n    return std::make_shared<MockIndexResult>(\n        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},\n        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,\n                           1.0F});\n  }\n\n  InvertedSearchResult::Ptr search_null() const override {\n    return std::make_shared<MockIndexResult>(\n        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},\n        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,\n                           1.0F});\n  }\n\n  InvertedSearchResult::Ptr search_non_null() const override {\n    return std::make_shared<MockIndexResult>(\n        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},\n        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,\n                           1.0F});\n  }\n\n private:\n  RocksdbContext ctx;\n};\n\n//   std::make_shared<FieldSchema>(\"id\", DataType::INT32, false, 0, false,\n//                                 nullptr),\n//   std::make_shared<FieldSchema>(\"name\", DataType::STRING, false, 0,\n//                                 false, nullptr),\n//   std::make_shared<FieldSchema>(\"age\", DataType::INT64, false, 0,\n//                                 false, nullptr),\n//   std::make_shared<FieldSchema>(\"score\", DataType::DOUBLE, false, 0,\n//                                 false, nullptr),\ninline arrow::Result<std::shared_ptr<arrow::Table>> CreateTable(\n    int count = 10000000) {\n  auto schema = arrow::schema({\n      arrow::field(\"id\", arrow::int32()),\n      arrow::field(\"name\", arrow::utf8()),\n      arrow::field(\"age\", arrow::int64()),\n      arrow::field(\"score\", arrow::float64()),\n      arrow::field(\"_zvec_uid_\", arrow::utf8()),\n      arrow::field(\"_zvec_row_id_\", arrow::uint64()),\n      arrow::field(\"_zvec_g_doc_id_\", arrow::uint64()),\n      arrow::field(\"tag_list\", arrow::list(arrow::int32())),\n  });\n  std::shared_ptr<arrow::Array> array_id;\n  std::shared_ptr<arrow::Array> array_name;\n  std::shared_ptr<arrow::Array> array_age;\n  std::shared_ptr<arrow::Array> array_score;\n  std::shared_ptr<arrow::Array> array_uid;\n  arrow::NumericBuilder<arrow::Int64Type> builder;\n  auto has_value = [](int i) { return i % 13 != 0; };\n  ARROW_RETURN_NOT_OK(builder.Reserve(count));\n  for (int i = 0; i < count; i++) {\n    if (has_value(i)) {\n      ARROW_RETURN_NOT_OK((builder.Append(i)));\n    } else {\n      ARROW_RETURN_NOT_OK((builder.AppendNull()));\n    }\n  }\n  ARROW_RETURN_NOT_OK(builder.Finish(&array_age));\n  builder.Reset();\n\n  arrow::NumericBuilder<arrow::Int32Type> builder_id;\n  ARROW_RETURN_NOT_OK(builder_id.Reserve(count));\n  for (int i = 0; i < count; i++) {\n    if (has_value(i)) {\n      ARROW_RETURN_NOT_OK((builder_id.Append(i)));\n    } else {\n      ARROW_RETURN_NOT_OK((builder_id.AppendNull()));\n    }\n  }\n  ARROW_RETURN_NOT_OK(builder_id.Finish(&array_id));\n\n  arrow::NumericBuilder<arrow::DoubleType> builder_score;\n  ARROW_RETURN_NOT_OK(builder_score.Reserve(count));\n  for (int i = 0; i < count; i++) {\n    if (has_value(i)) {\n      ARROW_RETURN_NOT_OK((builder_score.Append(i / 100.0)));\n    } else {\n      ARROW_RETURN_NOT_OK((builder_score.AppendNull()));\n    }\n  }\n  ARROW_RETURN_NOT_OK(builder_score.Finish(&array_score));\n\n\n  arrow::StringBuilder builder_d;\n  ARROW_RETURN_NOT_OK(builder_d.Reserve(count));\n  for (int i = 0; i < count; i++) {\n    if (has_value(i)) {\n      ARROW_RETURN_NOT_OK((builder_d.Append(\"name_\" + std::to_string(i))));\n    } else {\n      ARROW_RETURN_NOT_OK((builder_d.AppendNull()));\n    }\n  }\n  ARROW_RETURN_NOT_OK(builder_d.Finish(&array_name));\n\n  arrow::StringBuilder builder_uid;\n  ARROW_RETURN_NOT_OK(builder_uid.Reserve(count));\n  for (int i = 0; i < count; i++) {\n    ARROW_RETURN_NOT_OK((builder_uid.Append(\"uid_\" + std::to_string(i))));\n  }\n  ARROW_RETURN_NOT_OK(builder_uid.Finish(&array_uid));\n\n  arrow::NumericBuilder<arrow::UInt64Type> builder_row_id;\n  ARROW_RETURN_NOT_OK(builder_row_id.Reserve(count));\n  for (int i = 0; i < count; i++) {\n    ARROW_RETURN_NOT_OK((builder_row_id.Append(i)));\n  }\n  std::shared_ptr<arrow::Array> array_row_id;\n  ARROW_RETURN_NOT_OK(builder_row_id.Finish(&array_row_id));\n\n  arrow::NumericBuilder<arrow::UInt64Type> builder_doc_id;\n  ARROW_RETURN_NOT_OK(builder_doc_id.Reserve(count));\n  for (int i = 0; i < count; i++) {\n    ARROW_RETURN_NOT_OK((builder_doc_id.Append(i)));\n  }\n  std::shared_ptr<arrow::Array> array_doc_id;\n  ARROW_RETURN_NOT_OK(builder_doc_id.Finish(&array_doc_id));\n\n  arrow::ListBuilder list_builder(arrow::default_memory_pool(),\n                                  std::make_shared<arrow::Int32Builder>());\n  auto *tag_value_builder =\n      static_cast<arrow::Int32Builder *>(list_builder.value_builder());\n\n  for (int i = 0; i < count; ++i) {\n    // 开始一个新的 list\n    ARROW_RETURN_NOT_OK(list_builder.Append());\n\n    int idx = i % 5;  // 对应模式\n    for (int j = 0; j < idx + 1; ++j) {\n      ARROW_RETURN_NOT_OK(tag_value_builder->Append(j + 1));\n    }\n  }\n  std::shared_ptr<arrow::Array> tag_list_array;\n  auto status = list_builder.Finish(&tag_list_array);\n  ;\n\n  return arrow::Table::Make(\n      schema, {array_id, array_name, array_age, array_score, array_uid,\n               array_row_id, array_doc_id, tag_list_array});\n}\n\nclass MockIndexFilter : public IndexFilter {\n public:\n  bool is_filtered(uint64_t id) const override {\n    return id % 2 == 1;\n  }\n};\n\ninline arrow::Result<std::shared_ptr<Table>> TakeRowsByIndices(\n    const std::shared_ptr<Table> &table, const std::vector<int> &row_indices) {\n  arrow::MemoryPool *pool = arrow::default_memory_pool();\n  arrow::Int32Builder indices_builder(pool);\n  ARROW_RETURN_NOT_OK(\n      indices_builder.AppendValues(row_indices.data(), row_indices.size()));\n  std::shared_ptr<arrow::Array> indices_array;\n  ARROW_RETURN_NOT_OK(indices_builder.Finish(&indices_array));\n\n\n  // 2. 对每一列执行 Take 操作\n  std::vector<std::shared_ptr<arrow::ChunkedArray>> new_columns;\n  for (const auto &column : table->columns()) {\n    // 使用 Take 提取指定索引的元素\n    ARROW_ASSIGN_OR_RAISE(auto taken_array, cp::Take(column, indices_array));\n    new_columns.emplace_back(taken_array.chunked_array());\n  }\n\n  // 3. 构造新的 Table\n  return arrow::Table::Make(table->schema(), new_columns, row_indices.size());\n}\n\n\nclass MockSegment : public Segment {\n public:\n  MockSegment() : Segment() {}\n\n  virtual ~MockSegment() = default;\n\n  SegmentID id() const override {\n    return 0;\n  }\n\n  TablePtr fetch(const std::vector<std::string> &columns,\n                 const std::vector<int> &indices) const override {\n    std::string s = \"\";\n    for (auto i : indices) {\n      s += std::to_string(i);\n      s += \",\";\n    }\n    LOG_INFO(\"Fetch indices: %s %s\", get_column_names(columns).c_str(),\n             s.c_str());\n    auto table = CreateTable(1000).MoveValueUnsafe();\n\n    auto res = TakeRowsByIndices(table, indices);\n    if (!res.ok()) {\n      LOG_ERROR(\"Take error: %s\", res.status().ToString().c_str());\n      return nullptr;\n    }\n    LOG_INFO(\"Take: %s\", res.ValueOrDie()->ToString().c_str());\n    return res.MoveValueUnsafe();\n  }\n\n  ExecBatchPtr fetch(const std::vector<std::string> &columns,\n                     int index) const override {\n    LOG_ERROR(\"Not implemented\");\n    return nullptr;\n  }\n\n  static std::string get_column_names(const std::vector<std::string> &columns) {\n    std::string s = \"\";\n    for (auto i : columns) {\n      s += i;\n      s += \",\";\n    }\n    return s;\n  }\n\n  RecordBatchReaderPtr scan(\n      const std::vector<std::string> &columns) const override {\n    auto table = CreateTable(10000);\n    LOG_INFO(\"Scan return: %s %s\", get_column_names(columns).c_str(),\n             table.ValueOrDie()->ToString().c_str());\n    return std::make_shared<arrow::TableBatchReader>(table.ValueOrDie());\n  }\n\n  const IndexFilter::Ptr get_filter() override {\n    return std::make_shared<MockIndexFilter>();\n  }\n\n  CombinedVectorColumnIndexer::Ptr get_quant_combined_vector_indexer(\n      const std::string &field_name) const override {\n    return std::make_shared<MockVectorIndexer>();\n  }\n\n  CombinedVectorColumnIndexer::Ptr get_combined_vector_indexer(\n      const std::string &field_name) const override {\n    return std::make_shared<MockVectorIndexer>();\n  }\n\n  InvertedColumnIndexer::Ptr get_scalar_indexer(\n      const std::string &field_name) const override {\n    return std::make_shared<MockInvertIndexer>();\n  }\n\n  SegmentMeta::Ptr meta() const override {\n    return nullptr;\n  }\n\n  uint64_t doc_count(const IndexFilter::Ptr filter = nullptr) override {\n    return 0;\n  }\n\n  Status add_column(FieldSchema::Ptr column_schema,\n                    const std::string &expression,\n                    const AddColumnOptions &options) override {\n    return Status::InternalError();\n  }\n\n  Status alter_column(const std::string &column_name,\n                      const FieldSchema::Ptr &new_column_schema,\n                      const AlterColumnOptions &options) override {\n    return Status::InternalError();\n  }\n\n\n  Status drop_column(const std::string &column_name) override {\n    return Status::OK();\n  }\n\n  Status create_all_vector_index(\n      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *quant_vector_indexers) override {\n    return Status::OK();\n  }\n\n  Status create_vector_index(\n      const std::string &column, const IndexParams::Ptr &index_params,\n      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *quant_vector_indexers) override {\n    return Status::OK();\n  }\n\n  Status drop_vector_index(\n      const std::string &column, SegmentMeta::Ptr *new_segmnet_meta,\n      std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          *vector_indexers) override {\n    return Status::OK();\n  }\n\n  Status reload_vector_index(\n      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,\n      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          &vector_indexers,\n      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>\n          &quant_vector_indexers) override {\n    return Status::OK();\n  }\n\n  bool vector_index_ready(const std::string &column,\n                          const IndexParams::Ptr &index_params) const override {\n    return true;\n  }\n\n  bool all_vector_index_ready() const override {\n    return true;\n  }\n\n  Status create_scalar_index(\n      const std::vector<std::string> &columns,\n      const IndexParams::Ptr &index_params, SegmentMeta::Ptr *new_segment_meta,\n      InvertedIndexer::Ptr *new_scalar_indexer) override {\n    return Status::OK();\n  }\n\n  Status drop_scalar_index(const std::vector<std::string> &columns,\n                           SegmentMeta::Ptr *new_segment_meta,\n                           InvertedIndexer::Ptr *new_scalar_indexer) override {\n    return Status::OK();\n  }\n\n  Status reload_scalar_index(\n      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,\n      const InvertedIndexer::Ptr &scalar_indexer) override {\n    return Status::OK();\n  }\n\n  Status Insert(Doc &doc) override {\n    return Status::OK();\n  }\n\n  Status Upsert(Doc &doc) override {\n    return Status::OK();\n  }\n\n  Status Update(Doc &doc) override {\n    return Status::OK();\n  }\n\n  Status Delete(const std::string &pk) override {\n    return Status::OK();\n  }\n\n  Status Delete(uint64_t doc_id) override {\n    return Status::OK();\n  }\n\n  Doc::Ptr Fetch(uint64_t doc_id) override {\n    return nullptr;\n  }\n\n  std::vector<VectorColumnIndexer::Ptr> get_vector_indexer(\n      const std::string &field_name) const override {\n    return {};\n  }\n\n  std::vector<VectorColumnIndexer::Ptr> get_quant_vector_indexer(\n      const std::string &field_name) const override {\n    return {};\n  }\n\n  Status flush() override {\n    return Status::OK();\n  }\n\n  Status dump() override {\n    return Status::OK();\n  }\n\n  Status destroy() override {\n    return Status::OK();\n  }\n};\n\n}  // namespace zvec\n"
  },
  {
    "path": "tests/db/sqlengine/optimizer_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <gtest/gtest.h>\n#include \"db/sqlengine/analyzer/query_info_helper.h\"\n#include \"db/sqlengine/sqlengine_impl.h\"\n#include \"zvec/db/index_params.h\"\n// #define private public\n#include <memory>\n#include \"db/sqlengine/planner/optimizer.h\"\n#include \"mock_segment.h\"\n// #undef private\n\n\nnamespace zvec::sqlengine {\n\nclass MockInvertCondOptimizer : public InvertCondOptimizer {\n public:\n  explicit MockInvertCondOptimizer(CollectionSchema *collection_schema)\n      : InvertCondOptimizer(collection_schema) {}\n\n public:\n  bool invert_rule(Segment *segment, QueryRelNode *invert_cond) override;\n};\n\nbool MockInvertCondOptimizer::invert_rule(Segment *segment,\n                                          QueryRelNode *invert_cond) {\n  if (invert_cond->op() == QueryNodeOp::Q_IN) {\n    return true;\n  }\n\n  std::string invert_value = invert_cond->right()->text();\n\n  std::string numeric_text{\"\"};\n  QueryInfoHelper::data_buf_2_text(invert_value, DataType::INT32,\n                                   &numeric_text);\n\n  int age = atoi(numeric_text.c_str());\n  std::cout << \"invert cond: age is \" << age << std::endl;\n\n  // invert cond as less than 100\n  if (age < 100) {\n    return true;\n  }\n\n  return false;\n}\n\nclass OptimizerTest : public testing::Test {\n public:\n  // Sets up the test fixture.\n  static void SetUpTestSuite() {\n    schema = std::make_shared<CollectionSchema>();\n    auto &collection_schema_ = *schema;\n    collection_schema_.set_name(\"collection\");\n\n    // feature field\n    auto column1 = std::make_shared<FieldSchema>();\n    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);\n    column1->set_name(\"face_feature\");\n    column1->set_index_params(vector_params);\n    column1->set_dimension(4);\n    column1->set_data_type(DataType::VECTOR_FP32);\n    collection_schema_.add_field(column1);\n\n    // invert field\n    auto column2 = std::make_shared<FieldSchema>();\n    column2->set_name(\"age\");\n    column2->set_data_type(DataType::INT32);\n    column2->set_index_params(std::make_shared<InvertIndexParams>(false));\n    collection_schema_.add_field(column2);\n  }\n\n  // Tears down the test fixture.\n  static void TearDownTestSuite() {}\n\n protected:\n  inline static CollectionSchema::Ptr schema;\n  Profiler::Ptr profiler_{new Profiler};\n};\n\n\nTEST_F(OptimizerTest, Basic) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age > 200\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_TRUE(optimized);\n}\n\n// case 1. invert subroot same as invert cond, do nothing\nTEST_F(OptimizerTest, Case1) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age > 12\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_FALSE(optimized);\n}\n\n// case 2.1 invert subroot is not found, all conds are forward cond\nTEST_F(OptimizerTest, Case2_1) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age > 100 and age > 101 or age > 102\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_TRUE(optimized);\n}\n\n// case 2.2 invert subroot is not found, some conds are forward cond\n// while left invert cond cannot be invert cond any more\nTEST_F(OptimizerTest, Case2_2) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age > 100 or age > 90\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_FALSE(optimized);\n}\n\n\n// case 3.1 subroot is found and be part of invert cond\nTEST_F(OptimizerTest, Case3_1) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age > 100 and age > 101 and age > 10\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_TRUE(optimized);\n  ASSERT_TRUE(ret);\n}\n\n// case 3.2 subroot is found and be part of invert cond\nTEST_F(OptimizerTest, Case3_2) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age > 10 and age > 11 and age > 100\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_TRUE(optimized);\n}\n\n// case 3.3 subroot is found and be part of invert cond\nTEST_F(OptimizerTest, Case3_3) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"(age > 10 or age > 11) and age > 100\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_TRUE(optimized);\n}\n\n// case 3.4 subroot is found and be part of invert cond, but others also have\n// invert\nTEST_F(OptimizerTest, Case3_4) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age > 10 and (age > 101 and (age > 10 and age > 10))\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_FALSE(optimized);\n}\n\n\n// case 4, optimize with in expr\nTEST_F(OptimizerTest, Case4) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.filter_ = \"age in (10, 20)\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr query_info = ret.value();\n\n  Optimizer::Ptr optimizer =\n      std::make_shared<MockInvertCondOptimizer>(schema.get());\n\n  auto segment = std::make_shared<MockSegment>();\n\n  bool optimized = optimizer->optimize(segment.get(), query_info.get());\n  // in will not optimized\n  ASSERT_FALSE(optimized);\n\n  // in and optimizable, optimize optimizable\n  query.filter_ = \"age in (10, 20) and age > 100\";\n  ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  query_info = ret.value();\n  optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_TRUE(optimized);\n\n  // in or optimizable, not optimized\n  query.filter_ = \"age in (10, 20) or age > 100\";\n  ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  query_info = ret.value();\n  optimized = optimizer->optimize(segment.get(), query_info.get());\n  ASSERT_FALSE(optimized);\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "tests/db/sqlengine/query_info_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <memory>\n#include <gmock/gmock-matchers.h>\n#include <gtest/gtest.h>\n#include \"db/sqlengine/sqlengine_impl.h\"\n#include \"zvec/db/doc.h\"\n#include \"zvec/db/schema.h\"\n#include \"profiler.h\"\n\n\nnamespace zvec::sqlengine {\n\nclass QueryInfoTest : public testing::Test {\n public:\n  // Sets up the test fixture.\n  static void SetUpTestSuite() {\n    schema = std::make_shared<CollectionSchema>();\n    auto &param = *schema;\n    param.set_name(\"1collection\");\n\n    auto column1 = std::make_shared<FieldSchema>();\n    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);\n    column1->set_name(\"face_feature\");\n    column1->set_index_params(vector_params);\n    column1->set_dimension(4);\n    column1->set_data_type(DataType::VECTOR_FP32);\n    param.add_field(column1);\n\n    auto column2 = std::make_shared<FieldSchema>();\n    column2->set_name(\"name\");\n    column2->set_data_type(DataType::UINT32);\n    param.add_field(column2);\n\n    auto column3 = std::make_shared<FieldSchema>();\n    column3->set_name(\"category\");\n    column3->set_data_type(DataType::STRING);\n    param.add_field(column3);\n\n    auto column4 = std::make_shared<FieldSchema>();\n    column4->set_name(\"face_feature\");\n    column4->set_dimension(4);\n    column4->set_data_type(DataType::VECTOR_FP32);\n    param.add_field(column4);\n\n    auto column5 = std::make_shared<FieldSchema>();\n    column5->set_name(\"1-dash_score_field\");\n    column5->set_dimension(5);\n    column5->set_data_type(DataType::STRING);\n    param.add_field(column5);\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"name_array\");\n      column->set_data_type(DataType::ARRAY_UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"category_array\");\n      column->set_data_type(DataType::ARRAY_STRING);\n      param.add_field(column);\n    }\n  }\n\n  // Tears down the test fixture.\n  static void TearDownTestSuite() {}\n\n protected:\n  Profiler::Ptr profiler_{new Profiler};\n  inline static CollectionSchema::Ptr schema;\n};\n\n\nTEST_F(QueryInfoTest, BasicQueryRequest) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();\n  QueryInfo::Ptr new_query_info = ret.value();\n  auto &query_fields = new_query_info->query_fields();\n  EXPECT_EQ(query_fields.size(), 5);\n  EXPECT_EQ(query_fields[0]->field_name(), \"name\");\n  EXPECT_EQ(query_fields[1]->field_name(), \"category\");\n  EXPECT_EQ(query_fields[2]->field_name(), \"1-dash_score_field\");\n  EXPECT_EQ(query_fields[3]->field_name(), \"name_array\");\n  EXPECT_EQ(query_fields[4]->field_name(), \"category_array\");\n  EXPECT_EQ(new_query_info->query_topn(), 11);\n  EXPECT_FALSE(new_query_info->filter_cond());\n  EXPECT_FALSE(new_query_info->invert_cond());\n  EXPECT_FALSE(new_query_info->post_filter_cond());\n  EXPECT_FALSE(new_query_info->post_invert_cond());\n\n  ASSERT_TRUE(new_query_info->vector_cond_info());\n  auto vector_cond = new_query_info->vector_cond_info();\n  EXPECT_EQ(1, vector_cond->batch());\n  EXPECT_EQ(\"face_feature\", vector_cond->vector_field_name());\n  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());\n  EXPECT_EQ(query.query_sparse_indices_, vector_cond->vector_sparse_indices());\n  EXPECT_EQ(query.query_sparse_values_, vector_cond->vector_sparse_values());\n  EXPECT_EQ(query.query_params_, vector_cond->query_params());\n}\n\nTEST_F(QueryInfoTest, QueryRequestWithFilter) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n  query.filter_ = \"name<3 or name=4 or 1-dash_score_field='test'\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr new_query_info = ret.value();\n  auto &query_fields = new_query_info->query_fields();\n  EXPECT_EQ(query_fields.size(), 5);\n  EXPECT_EQ(query_fields[0]->field_name(), \"name\");\n  EXPECT_EQ(query_fields[1]->field_name(), \"category\");\n  EXPECT_EQ(query_fields[2]->field_name(), \"1-dash_score_field\");\n  EXPECT_EQ(query_fields[3]->field_name(), \"name_array\");\n  EXPECT_EQ(query_fields[4]->field_name(), \"category_array\");\n  EXPECT_EQ(new_query_info->query_topn(), 11);\n  EXPECT_TRUE(new_query_info->filter_cond());\n  EXPECT_FALSE(new_query_info->invert_cond());\n  EXPECT_FALSE(new_query_info->post_filter_cond());\n  EXPECT_FALSE(new_query_info->post_invert_cond());\n\n  ASSERT_TRUE(new_query_info->vector_cond_info());\n  auto vector_cond = new_query_info->vector_cond_info();\n  EXPECT_EQ(1, vector_cond->batch());\n  EXPECT_EQ(\"face_feature\", vector_cond->vector_field_name());\n  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());\n  EXPECT_EQ(query.query_sparse_indices_, vector_cond->vector_sparse_indices());\n  EXPECT_EQ(query.query_sparse_values_, vector_cond->vector_sparse_values());\n  EXPECT_EQ(query.query_params_, vector_cond->query_params());\n\n  EXPECT_TRUE(new_query_info->filter_cond());\n  // (nullptr) and (xxx)\n  auto filter_cond = new_query_info->filter_cond();\n  EXPECT_EQ(filter_cond->op_name(), \"and\");\n  EXPECT_FALSE(filter_cond->left());\n\n  // ((name<3) or (name=4)) or (1-dash_score_field=test)\n  auto right = std::dynamic_pointer_cast<QueryNode>(filter_cond->right());\n  EXPECT_TRUE(right);\n  EXPECT_EQ(right->op_name(), \"or\");\n\n  // 1-dash_score_field=test\n  auto number_field_filter =\n      std::dynamic_pointer_cast<QueryNode>(right->right());\n  ASSERT_TRUE(number_field_filter);\n  EXPECT_EQ(number_field_filter->op_name(), \"=\");\n  auto left_key =\n      std::dynamic_pointer_cast<QueryIDNode>(number_field_filter->left());\n  EXPECT_EQ(left_key->op_name(), \"ID\");\n  EXPECT_EQ(left_key->value(), \"1-dash_score_field\");\n  auto right_const = std::dynamic_pointer_cast<QueryConstantNode>(\n      number_field_filter->right());\n  ASSERT_TRUE(right_const);\n  EXPECT_EQ(right_const->op_name(), \"STRING_VALUE\");\n  EXPECT_EQ(right_const->value(), \"test\");\n\n  // (name<3) or (name=4)\n  auto left = std::dynamic_pointer_cast<QueryNode>(right->left());\n  ASSERT_TRUE(left);\n  EXPECT_EQ(left->op_name(), \"or\");\n  auto or1 = std::dynamic_pointer_cast<QueryNode>(left->left());\n  EXPECT_EQ(or1->op_name(), \"<\");\n  auto id1 = std::dynamic_pointer_cast<QueryIDNode>(or1->left());\n  ASSERT_TRUE(id1);\n  EXPECT_EQ(id1->op_name(), \"ID\");\n  EXPECT_EQ(id1->value(), \"name\");\n  auto const1 = std::dynamic_pointer_cast<QueryConstantNode>(or1->right());\n  ASSERT_TRUE(const1);\n  EXPECT_EQ(const1->op_name(), \"INT_VALUE\");\n  EXPECT_EQ(const1->value(), \"3\");\n  auto or2 = std::dynamic_pointer_cast<QueryNode>(left->right());\n  EXPECT_EQ(or2->op_name(), \"=\");\n}\n\nTEST_F(QueryInfoTest, QueryRequestWithIncludeVector) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n  query.include_vector_ = true;\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr new_query_info = ret.value();\n  auto &query_fields = new_query_info->query_fields();\n  EXPECT_EQ(query_fields.size(), 6);\n  EXPECT_EQ(query_fields[0]->field_name(), \"name\");\n  EXPECT_EQ(query_fields[1]->field_name(), \"category\");\n  EXPECT_EQ(query_fields[2]->field_name(), \"1-dash_score_field\");\n  EXPECT_EQ(query_fields[3]->field_name(), \"name_array\");\n  EXPECT_EQ(query_fields[4]->field_name(), \"category_array\");\n  EXPECT_EQ(query_fields[5]->field_name(), \"face_feature\");\n  EXPECT_EQ(new_query_info->query_topn(), 11);\n  EXPECT_FALSE(new_query_info->filter_cond());\n  EXPECT_FALSE(new_query_info->invert_cond());\n  EXPECT_FALSE(new_query_info->post_filter_cond());\n  EXPECT_FALSE(new_query_info->post_invert_cond());\n\n  ASSERT_TRUE(new_query_info->vector_cond_info());\n  auto vector_cond = new_query_info->vector_cond_info();\n  EXPECT_EQ(1, vector_cond->batch());\n  EXPECT_EQ(\"face_feature\", vector_cond->vector_field_name());\n  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());\n  EXPECT_EQ(query.query_sparse_indices_, vector_cond->vector_sparse_indices());\n  EXPECT_EQ(query.query_sparse_values_, vector_cond->vector_sparse_values());\n  EXPECT_EQ(query.query_params_, vector_cond->query_params());\n}\n\nTEST_F(QueryInfoTest, OR_ANCESTOR) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n  query.filter_ = \"name=1 and (name=2 or name=3)\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr new_query_info = ret.value();\n}\n\nTEST_F(QueryInfoTest, QueryRequestWithInFilter) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 10;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n  query.filter_ =\n      \"name=3 or name in (1, 2, 3) or category not in (\\\"a\\\", \\\"b\\\", \\\"c\\\")\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr new_query_info = ret.value();\n\n  auto &query_fields = new_query_info->query_fields();\n  EXPECT_EQ(query_fields.size(), 5);\n  EXPECT_EQ(query_fields[0]->field_name(), \"name\");\n  EXPECT_EQ(query_fields[1]->field_name(), \"category\");\n  EXPECT_EQ(query_fields[2]->field_name(), \"1-dash_score_field\");\n  EXPECT_EQ(query_fields[3]->field_name(), \"name_array\");\n  EXPECT_EQ(query_fields[4]->field_name(), \"category_array\");\n  EXPECT_EQ(new_query_info->query_topn(), 10);\n\n  EXPECT_FALSE(new_query_info->invert_cond());\n  EXPECT_FALSE(new_query_info->post_filter_cond());\n  EXPECT_FALSE(new_query_info->post_invert_cond());\n\n  ASSERT_TRUE(new_query_info->vector_cond_info());\n  auto vector_cond = new_query_info->vector_cond_info();\n  EXPECT_EQ(1, vector_cond->batch());\n  EXPECT_EQ(\"face_feature\", vector_cond->vector_field_name());\n  std::vector<float> data{1.1, 2.2, 3.3, 4.4};\n  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());\n\n  EXPECT_TRUE(new_query_info->filter_cond());\n  // (nullptr) and (xxx)\n  auto filter_cond = new_query_info->filter_cond();\n  EXPECT_EQ(filter_cond->op_name(), \"and\");\n  EXPECT_FALSE(filter_cond->left());\n\n  // ((name=3) or (name in (1, 2, 3))) or (category not in (\"a\", \"b\", \"c\"))\n  auto right = std::dynamic_pointer_cast<QueryNode>(filter_cond->right());\n  EXPECT_TRUE(right);\n  EXPECT_EQ(right->op_name(), \"or\");\n\n  // category in (\"a\", \"b\", \"c\")\n  auto category_filter = std::dynamic_pointer_cast<QueryNode>(right->right());\n  ASSERT_TRUE(category_filter);\n  EXPECT_EQ(category_filter->op_name(), \" in \");\n  auto left_key =\n      std::dynamic_pointer_cast<QueryIDNode>(category_filter->left());\n  EXPECT_EQ(left_key->op_name(), \"ID\");\n  EXPECT_EQ(left_key->value(), \"category\");\n  auto right_const =\n      std::dynamic_pointer_cast<QueryListNode>(category_filter->right());\n  ASSERT_TRUE(right_const);\n  EXPECT_EQ(right_const->op_name(), \"LIST_VALUE\");\n  EXPECT_EQ(right_const->text(), \"NOT (a, b, c)\");\n\n  // (name=3) or (name in (1, 2, 3))\n  auto left = std::dynamic_pointer_cast<QueryNode>(right->left());\n  ASSERT_TRUE(left);\n  EXPECT_EQ(left->op_name(), \"or\");\n  auto or1 = std::dynamic_pointer_cast<QueryNode>(left->left());\n  EXPECT_EQ(or1->op_name(), \"=\");\n  auto id1 = std::dynamic_pointer_cast<QueryIDNode>(or1->left());\n  ASSERT_TRUE(id1);\n  EXPECT_EQ(id1->op_name(), \"ID\");\n  EXPECT_EQ(id1->value(), \"name\");\n  auto const1 = std::dynamic_pointer_cast<QueryConstantNode>(or1->right());\n  ASSERT_TRUE(const1);\n  EXPECT_EQ(const1->op_name(), \"INT_VALUE\");\n  EXPECT_EQ(const1->value(), \"3\");\n\n  auto or2 = std::dynamic_pointer_cast<QueryNode>(left->right());\n  EXPECT_EQ(or2->op_name(), \" in \");\n  auto id2 = std::dynamic_pointer_cast<QueryIDNode>(or2->left());\n  ASSERT_TRUE(id2);\n  EXPECT_EQ(id2->op_name(), \"ID\");\n  EXPECT_EQ(id2->value(), \"name\");\n  auto const2 = std::dynamic_pointer_cast<QueryListNode>(or2->right());\n  ASSERT_TRUE(const2);\n  EXPECT_EQ(const2->op_name(), \"LIST_VALUE\");\n  EXPECT_EQ(const2->text(), \"(1, 2, 3)\");\n}\n\n\nTEST_F(QueryInfoTest, QueryRequestWithInFilterWrong) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 11;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n\n  query.filter_ = (\"name in ()\");\n  ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n\n  query.filter_ = (\"name in (\\\"a\\\", 2, 3)\");\n  ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n\n  query.filter_ = (\"name in (1.1, 2, 3)\");\n  ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n\n  query.filter_ = (\"category in (1.1, \\\"b\\\")\");\n  ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n}\n\nTEST_F(QueryInfoTest, QueryRequestWithInFilterNum1024) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 10;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n\n  std::string filter_str;\n  for (int i = 0; i < 1024; i++) {\n    if (i != 0) {\n      filter_str += \" or \";\n    }\n    filter_str += \"name=\" + std::to_string(i);\n  }\n  query.filter_ = filter_str;\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr new_query_info = ret.value();\n\n  auto &query_fields = new_query_info->query_fields();\n  EXPECT_EQ(query_fields.size(), 5);\n  EXPECT_EQ(query_fields[0]->field_name(), \"name\");\n  EXPECT_EQ(query_fields[1]->field_name(), \"category\");\n  EXPECT_EQ(query_fields[2]->field_name(), \"1-dash_score_field\");\n  EXPECT_EQ(query_fields[3]->field_name(), \"name_array\");\n  EXPECT_EQ(query_fields[4]->field_name(), \"category_array\");\n  EXPECT_EQ(new_query_info->query_topn(), 10);\n\n  EXPECT_FALSE(new_query_info->invert_cond());\n  EXPECT_FALSE(new_query_info->post_filter_cond());\n  EXPECT_FALSE(new_query_info->post_invert_cond());\n\n  ASSERT_TRUE(new_query_info->vector_cond_info());\n  auto vector_cond = new_query_info->vector_cond_info();\n  EXPECT_EQ(1, vector_cond->batch());\n  EXPECT_EQ(\"face_feature\", vector_cond->vector_field_name());\n}\n\n\nTEST_F(QueryInfoTest, QueryRequestWithFilter_contain) {\n  VectorQuery query;\n  query.output_fields_ = {\"*\"};\n  query.topk_ = 10;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"face_feature\";\n  query.include_vector_ = false;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n  query.filter_ =\n      R\"( name_array contain_all (1, 2, 3) and )\"\n      R\"( (name_array not contain_all (4, 5) or category_array contain_any\n      (\"a\", \"b\")) )\"\n      R\"( or category_array not contain_any (\"c\", \"d\", \"e\")\n      )\";\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_TRUE(ret.has_value());\n  QueryInfo::Ptr new_query_info = ret.value();\n  auto &query_fields = new_query_info->query_fields();\n  // pre-defined schema field\n  EXPECT_EQ(query_fields.size(), 5);\n  EXPECT_EQ(query_fields[0]->field_name(), \"name\");\n  EXPECT_EQ(query_fields[1]->field_name(), \"category\");\n  EXPECT_EQ(query_fields[2]->field_name(), \"1-dash_score_field\");\n  EXPECT_EQ(query_fields[3]->field_name(), \"name_array\");\n  EXPECT_EQ(query_fields[4]->field_name(), \"category_array\");\n  EXPECT_EQ(new_query_info->query_topn(), 10);\n\n  EXPECT_FALSE(new_query_info->invert_cond());\n  EXPECT_FALSE(new_query_info->post_filter_cond());\n  EXPECT_FALSE(new_query_info->post_invert_cond());\n\n  ASSERT_TRUE(new_query_info->vector_cond_info());\n  auto vector_cond = new_query_info->vector_cond_info();\n  EXPECT_EQ(1, vector_cond->batch());\n  EXPECT_EQ(\"face_feature\", vector_cond->vector_field_name());\n\n  EXPECT_TRUE(new_query_info->filter_cond());\n  /*\n                     _________________[and]__________________\n                   /                                         \\\n      [nullptr(vector_cond)]                            [filter condition]\n  */\n  // (nullptr) and (xxx)\n  auto filter_cond = new_query_info->filter_cond();\n  EXPECT_EQ(filter_cond->op_name(), \"and\");\n  EXPECT_FALSE(filter_cond->left());\n\n  /*\n                                _______________[or]_______________\n                               /                                   \\\n                _____________[and]_____________  [category_array not\n                contain_any\n (\"c\", \"d\", \"e\")]\n               /                               \\\n [name_array contain_all (1, 2, 3)]  ___________[or]______________\n                                  /                              \\\n                   [name_array not contain_all (4, 5)]    [category_array\n contain_any (\"a\", \"b\")]\n  */\n  // name_array contain_all (1, 2, 3) and\n  // (name_array not contain_all (4, 5) or category_array contain_any (\"a\",\n  // \"b\")) or category_array not contain_any (\"c\", \"d\", \"e\")\n  auto parent_node = std::dynamic_pointer_cast<QueryNode>(filter_cond);\n  auto cur_node = std::dynamic_pointer_cast<QueryNode>(filter_cond->right());\n  EXPECT_TRUE(cur_node);\n  EXPECT_EQ(cur_node->op_name(), \"or\");\n\n\n  // category_array not contain_any (\"c\", \"d\", \"e\")\n  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);\n  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->right());\n  EXPECT_TRUE(cur_node);\n  EXPECT_EQ(cur_node->op_name(), \" contain_any \");\n  {\n    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());\n    EXPECT_EQ(left_key->op_name(), \"ID\");\n    EXPECT_EQ(left_key->value(), \"category_array\");\n    auto right_const =\n        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());\n    ASSERT_TRUE(right_const);\n    EXPECT_EQ(right_const->op_name(), \"LIST_VALUE\");\n    EXPECT_EQ(right_const->text(), \"NOT (c, d, e)\");\n  }\n  cur_node = parent_node;\n\n  //  name_array contain_all (1, 2, 3) and\n  // (name_array not contain_all (4, 5) or category_array contain_any (\"a\",\n  // \"b\"))\n  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);\n  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->left());\n  EXPECT_TRUE(cur_node);\n  EXPECT_EQ(cur_node->op_name(), \"and\");\n\n  // the left side of 'and'\n  // name_array contain_all (1, 2, 3)\n  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);\n  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->left());\n  EXPECT_TRUE(cur_node);\n  EXPECT_EQ(cur_node->op_name(), \" contain_all \");\n  {\n    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());\n    EXPECT_EQ(left_key->op_name(), \"ID\");\n    EXPECT_EQ(left_key->value(), \"name_array\");\n    auto right_const =\n        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());\n    ASSERT_TRUE(right_const);\n    EXPECT_EQ(right_const->op_name(), \"LIST_VALUE\");\n    EXPECT_EQ(right_const->text(), \"(1, 2, 3)\");\n  }\n  cur_node = parent_node;\n\n  // the right side of 'and'\n  // (name_array not contain_all (4, 5) or category_array contain_any (\"a\",\n  // \"b\"))\n  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);\n  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->right());\n  EXPECT_TRUE(cur_node);\n  EXPECT_EQ(cur_node->op_name(), \"or\");\n\n  // name_array not contain_all (4, 5)\n  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);\n  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->left());\n  EXPECT_TRUE(cur_node);\n  EXPECT_EQ(cur_node->op_name(), \" contain_all \");\n  {\n    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());\n    EXPECT_EQ(left_key->op_name(), \"ID\");\n    EXPECT_EQ(left_key->value(), \"name_array\");\n    auto right_const =\n        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());\n    ASSERT_TRUE(right_const);\n    EXPECT_EQ(right_const->op_name(), \"LIST_VALUE\");\n    EXPECT_EQ(right_const->text(), \"NOT (4, 5)\");\n  }\n  cur_node = parent_node;\n\n  // category_array contain_any (\"a\", \"b\"))\n  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);\n  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->right());\n  EXPECT_TRUE(cur_node);\n  EXPECT_EQ(cur_node->op_name(), \" contain_any \");\n  {\n    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());\n    EXPECT_EQ(left_key->op_name(), \"ID\");\n    EXPECT_EQ(left_key->value(), \"category_array\");\n    auto right_const =\n        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());\n    ASSERT_TRUE(right_const);\n    EXPECT_EQ(right_const->op_name(), \"LIST_VALUE\");\n    EXPECT_EQ(right_const->text(), \"(a, b)\");\n  }\n  cur_node = parent_node;\n}\n\nTEST_F(QueryInfoTest, SelectNonExistField) {\n  VectorQuery query;\n  query.output_fields_ = {\"category_array\", \"not_exist_field\"};\n  query.topk_ = 11;\n  query.include_vector_ = false;\n\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n  EXPECT_THAT(ret.error().message(),\n              testing::HasSubstr(\"not defined in schema\"));\n}\n\nTEST_F(QueryInfoTest, ContainAllExceedLimit) {\n  VectorQuery query;\n  query.topk_ = 200;\n  query.filter_ = \"name_array not contain_all (\";\n  for (int i = 0; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n  EXPECT_THAT(ret.error().message(),\n              testing::HasSubstr(\n                  \"Contain_* rel expr only support list size no more than 32\"));\n}\n\nTEST_F(QueryInfoTest, ContainAnyExceedLimit) {\n  VectorQuery query;\n  query.topk_ = 200;\n  query.filter_ = \"name_array not contain_any (\";\n  for (int i = 0; i <= 32; i++) {\n    query.filter_ += std::to_string(i);\n    if (i < 32) {\n      query.filter_ += \", \";\n    }\n  }\n  query.filter_ += \")\";\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n  EXPECT_THAT(ret.error().message(),\n              testing::HasSubstr(\n                  \"Contain_* rel expr only support list size no more than 32\"));\n}\n\nTEST_F(QueryInfoTest, ArrayLengthNonExistField) {\n  VectorQuery query;\n  query.topk_ = 200;\n  query.filter_ = \"array_length(not_exist_field) > 1\";\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n  EXPECT_THAT(ret.error().message(),\n              testing::HasSubstr(\"array_length argument not found in schema\"));\n}\n\nTEST_F(QueryInfoTest, ArrayLengthOnNonArrayField) {\n  VectorQuery query;\n  query.topk_ = 200;\n  query.filter_ = \"array_length(name) > 1\";\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n  EXPECT_THAT(ret.error().message(),\n              testing::HasSubstr(\"array_length only support array\"));\n}\n\nTEST_F(QueryInfoTest, ArrayLengthInvalidArgument) {\n  VectorQuery query;\n  query.topk_ = 200;\n  query.filter_ = \"array_length(name_array) > '1'\";\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n  EXPECT_THAT(\n      ret.error().message(),\n      testing::HasSubstr(\"array_length right side only support integer\"));\n}\n\nTEST_F(QueryInfoTest, ArrayLengthInvalidOp) {\n  VectorQuery query;\n  query.topk_ = 200;\n  query.filter_ = \"array_length(name_array) like '%'\";\n  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());\n  auto ret = engine->parse_request(schema, query, nullptr);\n  ASSERT_FALSE(ret.has_value());\n  EXPECT_THAT(ret.error().message(), testing::HasSubstr(\"syntax error\"));\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "tests/db/sqlengine/recall_base.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <cstdint>\n#include <cstdlib>\n#include <iostream>\n#include <memory>\n#include <arrow/api.h>\n#include <arrow/io/api.h>\n#include <arrow/ipc/api.h>\n#include <gtest/gtest.h>\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/version_manager.h\"\n#include \"db/index/segment/segment.h\"\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/schema.h\"\n#include \"zvec/db/type.h\"\n\nnamespace zvec {\n\ninline CollectionSchema::Ptr GetCollectionSchema() {\n  auto invert_params = std::make_shared<InvertIndexParams>(true);\n  auto collection_schema = std::make_shared<CollectionSchema>(\n      \"test_collection\",\n      std::vector<FieldSchema::Ptr>{\n          std::make_shared<FieldSchema>(\"id\", DataType::UINT64, false, nullptr),\n          std::make_shared<FieldSchema>(\"invert_id\", DataType::UINT64, false,\n                                        invert_params),\n\n          std::make_shared<FieldSchema>(\"bool\", DataType::BOOL, false, nullptr),\n          std::make_shared<FieldSchema>(\"invert_bool\", DataType::BOOL, false,\n                                        invert_params),\n\n          std::make_shared<FieldSchema>(\"bool_array\", DataType::ARRAY_BOOL,\n                                        false, nullptr),\n          std::make_shared<FieldSchema>(\n              \"invert_bool_array\", DataType::ARRAY_BOOL, false, invert_params),\n\n          std::make_shared<FieldSchema>(\"name\", DataType::STRING, false,\n                                        nullptr),\n          std::make_shared<FieldSchema>(\"invert_name\", DataType::STRING, false,\n                                        invert_params),\n\n          std::make_shared<FieldSchema>(\"age\", DataType::INT32, false, nullptr),\n          std::make_shared<FieldSchema>(\n              \"invert_age\", DataType::INT32, false,\n              std::make_shared<InvertIndexParams>(true)),\n\n          std::make_shared<FieldSchema>(\"score\", DataType::DOUBLE, false,\n                                        nullptr),\n\n          std::make_shared<FieldSchema>(\"optional_age\", DataType::UINT32, true,\n                                        nullptr),\n          std::make_shared<FieldSchema>(\"invert_optional_age\", DataType::UINT32,\n                                        true, invert_params),\n\n          std::make_shared<FieldSchema>(\"category_set\", DataType::ARRAY_INT32,\n                                        true, nullptr),\n          std::make_shared<FieldSchema>(\"invert_category_set\",\n                                        DataType::ARRAY_INT32, true,\n                                        invert_params),\n\n          // add vector field\n          std::make_shared<FieldSchema>(\n              \"dense\", DataType::VECTOR_FP32, 4, false,\n              std::make_shared<FlatIndexParams>(MetricType::L2)),\n\n          // add sparse vector\n          std::make_shared<FieldSchema>(\n              \"sparse\", DataType::SPARSE_VECTOR_FP32, 0, false,\n              std::make_shared<FlatIndexParams>(MetricType::IP)),\n      });\n\n  return collection_schema;\n}\n\ninline Doc CreateDoc(const uint64_t doc_id) {\n  Doc new_doc;\n  new_doc.set_pk(\"pk_\" + std::to_string(doc_id));\n  new_doc.set_doc_id(doc_id);\n\n  new_doc.set<uint64_t>(\"id\", doc_id);\n  new_doc.set<uint64_t>(\"invert_id\", doc_id);\n  new_doc.set<bool>(\"bool\", doc_id % 100 == 0);\n  new_doc.set<bool>(\"invert_bool\", doc_id % 100 == 0);\n  new_doc.set<int32_t>(\"age\", doc_id % 100);\n  new_doc.set<int32_t>(\"invert_age\", doc_id % 100);\n  if (uint32_t v = doc_id % 100; v) {\n    new_doc.set(\"optional_age\", v);\n    new_doc.set(\"invert_optional_age\", v);\n  }\n  auto name = \"user_\" + std::to_string(doc_id % 100);\n  new_doc.set<std::string>(\"name\", name);\n  new_doc.set<std::string>(\"invert_name\", name);\n  new_doc.set<double>(\"score\", static_cast<double>(rand() % 1000) / 10.0);\n\n  // vector\n  std::vector<float> vv;\n  for (uint32_t i = 0; i < 4; i++) {\n    vv.push_back(static_cast<float>(doc_id));\n  }\n  new_doc.set<std::vector<float>>(\"dense\", vv);\n\n  // sparse vector\n  {\n    std::vector<uint32_t> indices;\n    std::vector<float> values;\n    for (uint32_t i = 0; i < doc_id % 100; i++) {\n      indices.push_back(i);\n      values.push_back(static_cast<float>(doc_id));\n    }\n    new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(\n        \"sparse\", std::make_pair(indices, values));\n  }\n\n  auto category_size = doc_id % 100;\n  if (category_size > 0) {\n    std::vector<int32_t> category;\n    for (uint32_t i = 1; i <= category_size; i++) {\n      category.push_back(i);\n    }\n    new_doc.set(\"category_set\", category);\n    new_doc.set(\"invert_category_set\", category);\n  }\n\n  if (doc_id % 3 == 0) {\n    new_doc.set<std::vector<bool>>(\"bool_array\", {true, false, true});\n    new_doc.set<std::vector<bool>>(\"invert_bool_array\", {true, false, true});\n  } else if (doc_id % 3 == 1) {\n    new_doc.set<std::vector<bool>>(\"bool_array\", {true, true, true});\n    new_doc.set<std::vector<bool>>(\"invert_bool_array\", {true, true, true});\n  } else {\n    new_doc.set<std::vector<bool>>(\"bool_array\", {false, false, false});\n    new_doc.set<std::vector<bool>>(\"invert_bool_array\", {false, false, false});\n  }\n\n  return new_doc;\n}\n\ninline Status InsertDoc(const Segment::Ptr &segment,\n                        const uint64_t start_doc_id,\n                        const uint64_t end_doc_id) {\n  srand(time(NULL));\n  long long create_total = 0;\n  long long insert_total = 0;\n  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {\n    if (segment) {\n      auto start = std::chrono::system_clock::now();\n      Doc new_doc = CreateDoc(doc_id);\n      auto end = std::chrono::system_clock::now();\n      auto create_cost =\n          std::chrono::duration_cast<std::chrono::microseconds>(end - start)\n              .count();\n      create_total += create_cost;\n\n      start = std::chrono::system_clock::now();\n      auto status = segment->Insert(new_doc);\n      if (!status.ok()) {\n        return status;\n      }\n      end = std::chrono::system_clock::now();\n      auto insert_cost =\n          std::chrono::duration_cast<std::chrono::microseconds>(end - start)\n              .count();\n      insert_total += insert_cost;\n    }\n  }\n  std::cout << \"pure create cost \" << create_total << \"us\" << std::endl;\n  std::cout << \"pure insert cost \" << insert_total << \"us\" << std::endl;\n  return Status::OK();\n}\n\nclass RecallTest : public testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    FileHelper::RemoveDirectory(seg_path_);\n    FileHelper::CreateDirectory(seg_path_);\n\n    collection_schema_ = GetCollectionSchema();\n    auto segment = create_segment();\n    if (segment == nullptr) {\n      LOG_ERROR(\"create segment failed\");\n      EXPECT_TRUE(segment != nullptr);\n      std::exit(EXIT_FAILURE);\n    }\n    auto status = InsertDoc(segment, 0, 10000);\n    if (!status.ok()) {\n      LOG_ERROR(\"insert doc failed: %s\", status.c_str());\n      EXPECT_TRUE(status.ok());\n      std::exit(EXIT_FAILURE);\n    }\n    segments_.push_back(segment);\n  }\n\n  static void TearDownTestSuite() {\n    segments_.clear();\n    FileHelper::RemoveDirectory(seg_path_);\n  }\n\n public:\n  static std::string GetPath() {\n    return seg_path_;\n  }\n\n  static Segment::Ptr create_segment();\n\n protected:\n  static inline std::string seg_path_ = \"./test_collection\";\n  static inline CollectionSchema::Ptr collection_schema_;\n  static inline std::vector<Segment::Ptr> segments_;\n};\n\ninline Segment::Ptr RecallTest::create_segment() {\n  auto seg_path = GetPath();\n  auto segment_meta = std::make_shared<SegmentMeta>();\n  segment_meta->set_id(0);\n\n  auto id_map = IDMap::CreateAndOpen(\"test_collection\", GetPath() + \"/id_map\",\n                                     true, false);\n  auto delete_store = std::make_shared<DeleteStore>(\"test_collection\");\n\n  Version v1;\n  v1.set_schema(*collection_schema_);\n  std::string v_path = GetPath() + \"/test_manifest\";\n  FileHelper::CreateDirectory(v_path);\n  auto vm = VersionManager::Create(v_path, v1);\n  if (!vm.has_value()) {\n    LOG_ERROR(\"create version manager failed: %s\", vm.error().c_str());\n    return nullptr;\n  }\n\n  BlockMeta mem_block;\n  mem_block.id_ = 0;\n  mem_block.type_ = BlockType::SCALAR;\n  mem_block.min_doc_id_ = 0;\n  mem_block.max_doc_id_ = 0;\n  mem_block.doc_count_ = 0;\n  segment_meta->set_writing_forward_block(mem_block);\n\n  SegmentOptions options;\n  options.read_only_ = false;\n  options.enable_mmap_ = true;\n  options.max_buffer_size_ = 256 * 1024;\n\n  auto result =\n      Segment::CreateAndOpen(GetPath(), *collection_schema_, 0, 0, id_map,\n                             delete_store, vm.value(), options);\n\n  if (!result) {\n    LOG_ERROR(\"create segment failed: %s\", result.error().c_str());\n    return nullptr;\n  }\n  auto segment = result.value();\n  return segment;\n}\n\n}  // namespace zvec\n"
  },
  {
    "path": "tests/db/sqlengine/simple_rewriter_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"sqlengine/analyzer/simple_rewriter.h\"\n#include <gtest/gtest.h>\n#include \"db/sqlengine/analyzer/query_info.h\"\n#include \"db/sqlengine/sqlengine_impl.h\"\n#include \"zvec/db/doc.h\"\n#include \"zvec/db/schema.h\"\n\nnamespace zvec::sqlengine {\n\nclass SimpleRewriterTest : public testing::Test {\n public:\n  // Sets up the test fixture.\n  static void SetUpTestSuite() {\n    schema = std::make_shared<CollectionSchema>();\n    auto &param = *schema;\n    param.set_name(\"1collection\");\n\n    auto column1 = std::make_shared<FieldSchema>();\n    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);\n    column1->set_name(\"face_feature\");\n    column1->set_index_params(vector_params);\n    column1->set_dimension(4);\n    column1->set_data_type(DataType::VECTOR_FP32);\n    param.add_field(column1);\n\n    auto column2 = std::make_shared<FieldSchema>();\n    column2->set_name(\"age\");\n    column2->set_data_type(DataType::UINT32);\n    param.add_field(column2);\n\n    auto column_gender = std::make_shared<FieldSchema>();\n    column_gender->set_name(\"gender\");\n    column_gender->set_data_type(DataType::UINT32);\n    param.add_field(column_gender);\n\n    auto column3 = std::make_shared<FieldSchema>();\n    column3->set_name(\"category\");\n    column3->set_data_type(DataType::STRING);\n    param.add_field(column3);\n\n    auto column4 = std::make_shared<FieldSchema>();\n    column4->set_name(\"face_feature\");\n    column4->set_dimension(4);\n    column4->set_data_type(DataType::VECTOR_FP32);\n    param.add_field(column4);\n\n    auto column5 = std::make_shared<FieldSchema>();\n    column5->set_name(\"filename\");\n    column5->set_dimension(5);\n    column5->set_data_type(DataType::STRING);\n    param.add_field(column5);\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"loc\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"fid\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"agent_id\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"state\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"categoryId\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"passed_days\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"category_in\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"category_out\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"intAttr\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"intAttr\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"strAttr\");\n      column->set_data_type(DataType::STRING);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"partitionName\");\n      column->set_data_type(DataType::STRING);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"doc_id\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"a\");\n      column->set_data_type(DataType::UINT32);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"is_type1\");\n      column->set_data_type(DataType::BOOL);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"is_type2\");\n      column->set_data_type(DataType::BOOL);\n      param.add_field(column);\n    }\n\n    {\n      auto column = std::make_shared<FieldSchema>();\n      column->set_name(\"category_array\");\n      column->set_data_type(DataType::ARRAY_STRING);\n      param.add_field(column);\n    }\n  }\n\n  // Tears down the test fixture.\n  static void TearDownTestSuite() {}\n\n  QueryInfo::Ptr parse(const std::string &filter) {\n    VectorQuery query;\n    query.output_fields_ = {\"*\"};\n    query.topk_ = 11;\n    query.include_vector_ = false;\n    query.filter_ = filter;\n\n    auto engine = std::make_shared<SQLEngineImpl>(profiler_);\n    auto ret = engine->parse_request(schema, query, nullptr);\n\n    // ASSERT_TRUE(ret.has_value());\n    QueryInfo::Ptr new_query_info = ret.value();\n    return new_query_info;\n  }\n\n\n protected:\n  Profiler::Ptr profiler_{new Profiler};\n  inline static CollectionSchema::Ptr schema;\n};\n\nclass EqOrRewriteTest : public SimpleRewriterTest {};\n\nTEST_F(EqOrRewriteTest, SimpleEqOr) {\n  auto info = parse(\"age = 10 or age = 20 \");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(), \"age in (10, 20)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, SimpleManyEqOr) {\n  auto info = parse(\n      \"age = 1 or age = 2 or age = 3 or age = 4 \"\n      \"or age = 5 or age = 6 or age = 7 or age = 8 or age = 9 or age = 10 or \"\n      \"age = 11 or age = 12 or age = 13 or age = 14 or age = 15 or age = 16 or \"\n      \"age = 17 or age = 18 or age = 19 or age = 20 or age = 21 or age = 22 or \"\n      \"age = 23 or age = 24 or age = 25 or age = 26 or age = 27 or age = 28 or \"\n      \"age = 29 or age = 30 or age = 31 or age = 32 or age = 33 or age = 34 or \"\n      \"age = 35 or age = 36 or age = 37 or age = 38 or age = 39 or age = 40 or \"\n      \"age = 41 or age = 42 or age = 43 or age = 44 or age = 45 or age = 46 or \"\n      \"age = 47 or age = 48 or age = 49 or age = 50 or age = 51 or age = 52 or \"\n      \"age = 53 or age = 54 or age = 55 or age = 56 or age = 57 or age = 58 or \"\n      \"age = 59 or age = 60 or age = 61 or age = 62 or age = 63 or age = 64 or \"\n      \"age = 65 or age = 66 or age = 67 or age = 68 or age = 69 or age = 70 or \"\n      \"age = 71 or age = 72 or age = 73 or age = 74 or age = 75 or age = 76 or \"\n      \"age = 77 or age = 78 or age = 79 or age = 80 or age = 81 or age = 82 or \"\n      \"age = 83 or age = 84 or age = 85 or age = 86 or age = 87 or age = 88 or \"\n      \"age = 89 or age = 90 or age = 91 or age = 92 or age = 93 or age = 94 or \"\n      \"age = 95 or age = 96 or age = 97 or age = 98 or age = 99 or age = 100\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(\n      info->filter_cond()->text(),\n      \"age in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, \"\n      \"19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, \"\n      \"37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, \"\n      \"55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, \"\n      \"73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, \"\n      \"91, 92, 93, 94, 95, 96, 97, 98, 99, 100)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, SimpleManyEqOrParas) {\n  auto info = parse(\n      \"age = 1 or age = 2 or age = 3 or age = 4 \"\n      \"or age = 5 or age = 6 or (age = 7 or age = 8 or age = 9 or age = 10 or \"\n      \"age = 11 or age = 12 or age = 13) or age = 14 or age = 15 or age = 16 \"\n      \"or \"\n      \"age = 17 or age = 18 or age = 19 or age = 20 or age = 21 or age = 22 or \"\n      \"age = 23 or age = 24 or age = 25 or age = 26 or age = 27 or age = 28 or \"\n      \"age = 29 or age = 30 or age = 31 or age = 32 or age = 33 or age = 34 or \"\n      \"age = 35 or age = 36 or age = 37 or (age = 38 or age = 39 or age = 40 \"\n      \"or \"\n      \"age = 41 or age = 42 or age = 43 or age = 44 or age = 45 or age = 46 or \"\n      \"age = 47 or age = 48 or age = 49 or age = 50 or age = 51 or age = 52 or \"\n      \"age = 53 or age = 54 or age = 55 or age = 56 or age = 57 or age = 58 or \"\n      \"age = 59 or age = 60 or age = 61 or age = 62 or age = 63 or age = 64 or \"\n      \"age = 65 or age = 66 or age = 67 or age = 68 or age = 69 or age = 70 or \"\n      \"age = 71 or age = 72 or age = 73 or age = 74 or age = 75 or age = 76 or \"\n      \"age = 77 or age = 78 or age = 79 or age = 80 or age = 81 or age = 82 or \"\n      \"age = 83 or age = 84 or age = 85) or age = 86 or age = 87 or age = 88 \"\n      \"or \"\n      \"age = 89 or age = 90 or age = 91 or age = 92 or age = 93 or age = 94 or \"\n      \"age = 95 or age = 96 or age = 97 or (age = 98 or age = 99) or age = \"\n      \"100\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(\n      info->filter_cond()->text(),\n      \"age in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, \"\n      \"19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, \"\n      \"37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, \"\n      \"55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, \"\n      \"73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, \"\n      \"91, 92, 93, 94, 95, 96, 97, 98, 99, 100)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, SimpleNeOr) {\n  auto info = parse(\"age != 10 or age != 20 \");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(), \"age in NOT (10, 20)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, SimpleManyNeOr) {\n  auto info = parse(\n      \"age != 1 or age != 2 or age != 3 or age \"\n      \"!= 4 or age != 5 or age != 6 or age != 7 or age != 8 or age != 9 or age \"\n      \"!= 10 or age != 11 or age != 12 or age != 13 or age != 14 or age != 15 \"\n      \"or age != 16 or age != 17 or age != 18 or age != 19 or age != 20\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"age in NOT (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \"\n            \"16, 17, 18, \"\n            \"19, 20)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, EqAndNe) {\n  auto info = parse(\n      \"age != 10 or age != 20 or age = 30 or \"\n      \"age = 40\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(age in NOT (10, 20)(FORWARD)(OR_A)) or (age in (30, \"\n            \"40)(FORWARD)(OR_A))\");\n}\n\nTEST_F(EqOrRewriteTest, PreEqOr) {\n  {\n    auto info = parse(\n        \"gender =1 or age = 10 or age = 20 or \"\n        \"age = 30 or age = 40\");\n    ASSERT_NE(info, nullptr);\n    EXPECT_EQ(info->filter_cond()->text(),\n              \"(gender=1(FORWARD)(OR_A)) or (age in (10, 20, 30, \"\n              \"40)(FORWARD)(OR_A))\");\n  }\n  {\n    auto info = parse(\n        \"gender =1 and age = 10 or age = 20 or \"\n        \"age = 30 or age = 40\");\n    ASSERT_NE(info, nullptr);\n    EXPECT_EQ(info->filter_cond()->text(),\n              \"((gender=1(FORWARD)(OR_A)) and (age=10(FORWARD)(OR_A))) or (age \"\n              \"in (20, 30, 40)(FORWARD)(OR_A))\");\n  }\n}\n\nTEST_F(EqOrRewriteTest, PostEqOr) {\n  {\n    auto info = parse(\n        \"age = 10 or age = 20 or \"\n        \"age = 30 or age = 40 or gender = 1\");\n    ASSERT_NE(info, nullptr);\n    EXPECT_EQ(info->filter_cond()->text(),\n              \"(age in (10, 20, 30, 40)(FORWARD)(OR_A)) or \"\n              \"(gender=1(FORWARD)(OR_A))\");\n  }\n  {\n    auto info = parse(\n        \"age = 10 or age = 20 or \"\n        \"age = 30 or age = 40 and gender = 1\");\n    ASSERT_NE(info, nullptr);\n    EXPECT_EQ(info->filter_cond()->text(),\n              \"(age in (10, 20, 30)(FORWARD)(OR_A)) or \"\n              \"((age=40(FORWARD)(OR_A)) and (gender=1(FORWARD)(OR_A)))\");\n  }\n}\n\nTEST_F(EqOrRewriteTest, PreEqAnd) {\n  auto info = parse(\n      \"gender =1 and (age = 10 or age = 20 or \"\n      \"age = 30 or age = 40)\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(gender=1(FORWARD)) and (age in (10, 20, 30, 40)(FORWARD))\");\n}\n\nTEST_F(EqOrRewriteTest, PostEqAnd) {\n  auto info = parse(\n      \"(age = 10 or age = 20 or \"\n      \"age = 30 or age = 40) and gender=1\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(age in (10, 20, 30, 40)(FORWARD)) and (gender=1(FORWARD))\");\n}\n\nTEST_F(EqOrRewriteTest, PrePostEqAnd) {\n  auto info = parse(\n      \"gender =1 and (age = 10 or age = 20 or \"\n      \"age = 30 or age = 40) and loc != 3\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"((gender=1(FORWARD)) and (age in (10, 20, 30, 40)(FORWARD))) and \"\n            \"(loc!=3(FORWARD))\");\n}\n\nTEST_F(EqOrRewriteTest, UserCases1) {\n  auto info = parse(\n      \"(agent_id=20) and state=1 and (fid=107 \"\n      \"or fid=174 or fid=593 or fid=602 or fid=592 or fid=134 or fid=135 or \"\n      \"fid=136 or fid=137 or fid=138 or fid=139 or fid=141 or fid=267 or \"\n      \"fid=271 or fid=176 or fid=177 or fid=178 or fid=179 or fid=180 or \"\n      \"fid=182 or fid=183 or fid=184 or fid=270 or fid=479 or fid=488 or \"\n      \"fid=502 or fid=508 or fid=522 or fid=553 or fid=554 or fid=557 or \"\n      \"fid=561 or fid=567 or fid=570 or fid=588 or fid=594 or fid=595 or \"\n      \"fid=596 or fid=597 or fid=598 or fid=603 or fid=604 or fid=605 or \"\n      \"fid=606 or fid=426 or fid=427 or fid=428 or fid=429 or fid=430 or \"\n      \"fid=431 or fid=432 or fid=433 or fid=434 or fid=435 or fid=436 or \"\n      \"fid=437 or fid=438 or fid=439 or fid=440 or fid=441 or fid=442 or \"\n      \"fid=443 or fid=444 or fid=445 or fid=446 or fid=447 or fid=448 or \"\n      \"fid=215 or fid=216 or fid=217 or fid=469 or fid=473 or fid=475 or \"\n      \"fid=476 or fid=477 or fid=478 or fid=524 or fid=528 or fid=529 or \"\n      \"fid=532 or fid=533 or fid=534 or fid=542 or fid=543 or fid=560 or \"\n      \"fid=243 or fid=244 or fid=245 or fid=246 or fid=247 or fid=496 or \"\n      \"fid=497 or fid=506 or fid=248 or fid=249 or fid=250 or fid=251 or \"\n      \"fid=252 or fid=494 or fid=495 or fid=507 or fid=535 or fid=536 or \"\n      \"fid=586 or fid=589 or fid=259 or fid=260 or fid=261 or fid=262 or \"\n      \"fid=263 or fid=264 or fid=265 or fid=491 or fid=492 or fid=493 or \"\n      \"fid=530 or fid=531 or fid=227 or fid=228 or fid=229 or fid=230 or \"\n      \"fid=231 or fid=232 or fid=233 or fid=235 or fid=472 or fid=487 or \"\n      \"fid=537 or fid=559 or fid=236 or fid=237 or fid=238 or fid=239 or \"\n      \"fid=240 or fid=241 or fid=242 or fid=273 or fid=546 or fid=587 or \"\n      \"fid=454 or fid=455 or fid=456 or fid=457 or fid=458 or fid=459 or \"\n      \"fid=460 or fid=461 or fid=449 or fid=450 or fid=451 or fid=452 or \"\n      \"fid=453 or fid=480 or fid=481 or fid=482 or fid=483 or fid=484 or \"\n      \"fid=489 or fid=490 or fid=538 or fid=539 or fid=540 or fid=545 or \"\n      \"fid=503 or fid=504 or fid=547 or fid=548 or fid=549 or fid=550 or \"\n      \"fid=509 or fid=510 or fid=511 or fid=512 or fid=513 or fid=523 or \"\n      \"fid=558 or fid=555 or fid=556 or fid=600 or fid=601 or fid=562 or \"\n      \"fid=563 or fid=564 or fid=565 or fid=566 or fid=591 or fid=568 or \"\n      \"fid=569 or fid=590 or fid=571 or fid=572 or fid=573 or fid=574 or \"\n      \"fid=575 or fid=701 or fid=711 or fid=713 or fid=616 or fid=617 or \"\n      \"fid=618 or fid=619 or fid=620 or fid=621 or fid=622 or fid=623 or \"\n      \"fid=624 or fid=625 or fid=626 or fid=629 or fid=672 or fid=607 or \"\n      \"fid=700 or fid=635 or fid=612 or fid=613 or fid=614 or fid=615 or \"\n      \"fid=679 or fid=670 or fid=680 or fid=681 or fid=702 or fid=706 or \"\n      \"fid=714 or fid=675 or fid=676 or fid=640 or fid=643 or fid=649 or \"\n      \"fid=653 or fid=655 or fid=657 or fid=662 or fid=703 or fid=704 or \"\n      \"fid=705 or fid=707 or fid=641 or fid=642 or fid=644 or fid=645 or \"\n      \"fid=646 or fid=647 or fid=648 or fid=709 or fid=650 or fid=651 or \"\n      \"fid=652 or fid=710 or fid=654 or fid=656 or fid=658 or fid=659 or \"\n      \"fid=660 or fid=661 or fid=663 or fid=664 or fid=665 or fid=666 or \"\n      \"fid=667 or fid=668 or fid=669 or fid=678)\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(\n      info->filter_cond()->text(),\n      \"((agent_id=20(FORWARD)) and (state=1(FORWARD))) and (fid in (107, 174, \"\n      \"593, 602, 592, 134, 135, 136, 137, 138, 139, 141, 267, 271, 176, 177, \"\n      \"178, 179, 180, 182, 183, 184, 270, 479, 488, 502, 508, 522, 553, 554, \"\n      \"557, 561, 567, 570, 588, 594, 595, 596, 597, 598, 603, 604, 605, 606, \"\n      \"426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, \"\n      \"440, 441, 442, 443, 444, 445, 446, 447, 448, 215, 216, 217, 469, 473, \"\n      \"475, 476, 477, 478, 524, 528, 529, 532, 533, 534, 542, 543, 560, 243, \"\n      \"244, 245, 246, 247, 496, 497, 506, 248, 249, 250, 251, 252, 494, 495, \"\n      \"507, 535, 536, 586, 589, 259, 260, 261, 262, 263, 264, 265, 491, 492, \"\n      \"493, 530, 531, 227, 228, 229, 230, 231, 232, 233, 235, 472, 487, 537, \"\n      \"559, 236, 237, 238, 239, 240, 241, 242, 273, 546, 587, 454, 455, 456, \"\n      \"457, 458, 459, 460, 461, 449, 450, 451, 452, 453, 480, 481, 482, 483, \"\n      \"484, 489, 490, 538, 539, 540, 545, 503, 504, 547, 548, 549, 550, 509, \"\n      \"510, 511, 512, 513, 523, 558, 555, 556, 600, 601, 562, 563, 564, 565, \"\n      \"566, 591, 568, 569, 590, 571, 572, 573, 574, 575, 701, 711, 713, 616, \"\n      \"617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 629, 672, 607, 700, \"\n      \"635, 612, 613, 614, 615, 679, 670, 680, 681, 702, 706, 714, 675, 676, \"\n      \"640, 643, 649, 653, 655, 657, 662, 703, 704, 705, 707, 641, 642, 644, \"\n      \"645, 646, 647, 648, 709, 650, 651, 652, 710, 654, 656, 658, 659, 660, \"\n      \"661, 663, 664, 665, 666, 667, 668, 669, 678)(FORWARD))\");\n}\n\nTEST_F(EqOrRewriteTest, UserCases2) {\n  auto info = parse(\n      \"partitionName = '114634' or \"\n      \"partitionName = '114632' or partitionName = '114635' or partitionName = \"\n      \"'114629' or partitionName = '114630' or partitionName = '114633' or \"\n      \"partitionName = '114636' or partitionName = '114637' or partitionName = \"\n      \"'114631'\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"partitionName in (114634, 114632, 114635, 114629, 114630, 114633, \"\n            \"114636, 114637, 114631)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, UserCases3) {\n  auto info = parse(\n      \"(doc_id=1319620650600837120 or \"\n      \"doc_id=1319621497753739264 or doc_id=1319629144649367552 or \"\n      \"doc_id=1319630319721377793 or doc_id=1319667286769324032 or \"\n      \"doc_id=1319671157117808640 or doc_id=1319671403998793728 or \"\n      \"doc_id=2319684930499055617 or doc_id=1319685259995140096)\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"doc_id in (1319620650600837120, 1319621497753739264, \"\n            \"1319629144649367552, 1319630319721377793, 1319667286769324032, \"\n            \"1319671157117808640, 1319671403998793728, 2319684930499055617, \"\n            \"1319685259995140096)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, UserCases4) {\n  auto info = parse(\n      \"(strAttr ='' or strAttr = 'prd') and \"\n      \"categoryId = 4\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(strAttr in (, prd)(FORWARD)) and (categoryId=4(FORWARD))\");\n}\n\nTEST_F(EqOrRewriteTest, UserCases5) {\n  auto info = parse(\n      \"intAttr = 1  OR intAttr = 5  OR intAttr \"\n      \"= 6  OR intAttr = 9  and categoryId = 1\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(intAttr in (1, 5, 6)(FORWARD)(OR_A)) or \"\n            \"((intAttr=9(FORWARD)(OR_A)) and (categoryId=1(FORWARD)(OR_A)))\");\n}\n\nTEST_F(EqOrRewriteTest, UserCases6) {\n  auto info = parse(\n      \"\"\n      \"filename='OhbVrpoi.pdf' or \"\n      \"filename='wRyoG4dB.pdf' or \"\n      \"filename='dJ3fawFf.pdf' or \"\n      \"filename='ZJS9dk3Q.pdf' or \"\n      \"filename='fY2JD8dL.pdf' or \"\n      \"filename='HnJpdoxC.pdf' or \"\n      \"filename='Hbxm1zvi.pdf' or \"\n      \"filename='r5Q8cxHu.pdf' or \"\n      \"filename='dwF9cZtI.pdf'\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"filename in (OhbVrpoi.pdf, \"\n            \"wRyoG4dB.pdf, \"\n            \"dJ3fawFf.pdf, \"\n            \"ZJS9dk3Q.pdf, \"\n            \"fY2JD8dL.pdf, \"\n            \"HnJpdoxC.pdf, \"\n            \"Hbxm1zvi.pdf, \"\n            \"r5Q8cxHu.pdf, \"\n            \"dwF9cZtI.pdf)(FORWARD)\");\n}\n\nTEST_F(EqOrRewriteTest, NotChanged1) {\n  auto info = parse(\n      \"passed_days>3 and (loc >= \"\n      \"500 or age > 10)\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(passed_days>3(FORWARD)) and ((loc>=500(FORWARD)(OR_A)) \"\n            \"or (age>10(FORWARD)(OR_A)))\");\n}\n\nTEST_F(EqOrRewriteTest, NotChanged2) {\n  auto info = parse(\n      \"strAttr=\\\"online_252\\\" AND (intAttr > \"\n      \"103775813 OR intAttr < 103775813) and categoryId = 88888888\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(\n      info->filter_cond()->text(),\n      \"((strAttr=online_252(FORWARD)) and ((intAttr>103775813(FORWARD)(OR_A)) \"\n      \"or (intAttr<103775813(FORWARD)(OR_A)))) and \"\n      \"(categoryId=88888888(FORWARD))\");\n}\n\nTEST_F(EqOrRewriteTest, NotChanged3) {\n  auto info = parse(\n      \"(is_type1 = true or is_type2 = \"\n      \"true)\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(is_type1=true(FORWARD)(OR_A)) or (is_type2=true(FORWARD)(OR_A))\");\n}\n\nTEST_F(EqOrRewriteTest, NotChanged4) {\n  auto info = parse(\"(a = 1 or a != 2)\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(a=1(FORWARD)(OR_A)) or (a!=2(FORWARD)(OR_A))\");\n}\n\nclass ContainRewriteTest : public SimpleRewriterTest {};\n\nTEST_F(ContainRewriteTest, ContainAllEmptySet) {\n  auto info = parse(\"category_array contain_all ()\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"category_array IS_NOT_NULL (FORWARD)\");\n}\n\nTEST_F(ContainRewriteTest, NotContainAllEmptySet) {\n  auto info = parse(\"category_array not contain_all ()\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), true);\n}\n\nTEST_F(ContainRewriteTest, NotContainAnyEmptySet) {\n  auto info = parse(\"category_array not contain_any ()\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"category_array IS_NOT_NULL (FORWARD)\");\n}\n\nTEST_F(ContainRewriteTest, ContainAnyEmptySet) {\n  auto info = parse(\"category_array contain_any ()\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), true);\n}\n\nTEST_F(ContainRewriteTest, AlwaysFalseConditionAnd) {\n  auto info = parse(\"category_array not contain_all () and a = 1\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), true);\n}\n\nTEST_F(ContainRewriteTest, AlwaysFalseConditionMultiAnd) {\n  auto info = parse(\n      \"category_array not contain_all () and a > 1 and a > 2 and a > 3 and a > \"\n      \"4\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), true);\n}\n\nTEST_F(ContainRewriteTest, AlwaysFalseConditionOr) {\n  auto info = parse(\"category_array not contain_all () or a = 1\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->filter_cond()->text(), \"a=1(FORWARD)\");\n}\n\nTEST_F(ContainRewriteTest, AlwaysFalseConditionMultiOr) {\n  auto info =\n      parse(\"category_array not contain_all () or a > 1 or a > 2 or a > 3\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(\n      info->filter_cond()->text(),\n      \"((a>1(FORWARD)(OR_A)) or (a>2(FORWARD)(OR_A))) or (a>3(FORWARD)(OR_A))\");\n}\n\nTEST_F(ContainRewriteTest, AlwaysFalseConditionAndComplex) {\n  auto info = parse(\"(a > 1 or a < 0) and category_array contain_any () \");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), true);\n}\n\nTEST_F(ContainRewriteTest, AlwaysFalseConditionOrComplex) {\n  auto info = parse(\"(a > 1 or a < 0) or category_array contain_any () \");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), false);\n  EXPECT_EQ(info->filter_cond()->text(),\n            \"(a>1(FORWARD)(OR_A)) or (a<0(FORWARD)(OR_A))\");\n}\n\nTEST_F(SimpleRewriterTest, MiscOr) {\n  auto info = parse(\"a = 1 or a = 2 or a = 3 or category_array contain_any ()\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), false);\n  EXPECT_EQ(info->filter_cond()->text(), \"a in (1, 2, 3)(FORWARD)\");\n}\n\nTEST_F(SimpleRewriterTest, MiscAnd) {\n  auto info =\n      parse(\"(a = 1 or a = 2 or a = 3) and category_array contain_any ()\");\n  ASSERT_NE(info, nullptr);\n  EXPECT_EQ(info->is_filter_unsatisfiable(), true);\n}\n\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "tests/db/sqlengine/sqlengine_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"db/sqlengine/sqlengine.h\"\n#include <cstdint>\n#include <memory>\n#include <gtest/gtest.h>\n#include \"zvec/db//schema.h\"\n#include \"zvec/db/query_params.h\"\n#include \"zvec/db/type.h\"\n#include \"mock_segment.h\"\n\nnamespace zvec::sqlengine {\n\nclass SqlEngineTest : public testing::Test {\n public:\n  void SetUp() override {\n    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);\n    schema_ = std::make_shared<CollectionSchema>(\n        \"test_collection\",\n        std::vector<FieldSchema::Ptr>{\n            std::make_shared<FieldSchema>(\"id\", DataType::INT32, false, 0,\n                                          nullptr),\n            std::make_shared<FieldSchema>(\n                \"name\", DataType::STRING, false, 0,  // nullptr\n                std::make_shared<InvertIndexParams>(false)),\n            std::make_shared<FieldSchema>(\"age\", DataType::INT64, false, 0,\n                                          nullptr),\n            std::make_shared<FieldSchema>(\"score\", DataType::DOUBLE, false, 0,\n                                          nullptr),\n            std::make_shared<FieldSchema>(\"tag_list\", DataType::ARRAY_INT32,\n                                          false, 0, nullptr),\n            std::make_shared<FieldSchema>(\"vector\",\n                                          DataType::SPARSE_VECTOR_FP32, false,\n                                          4, vector_params),\n        });\n  }\n\n protected:\n  CollectionSchema::Ptr schema_;\n};\n\nTEST_F(SqlEngineTest, Forward) {\n  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\", \"tag_list\"};\n  query.topk_ = 11;\n  // query.filter_ = \"id > 3 and score < 0.1\";\n  // query.filter_ = \"name like 'name_2%'\";\n  // query.filter_ = \"name not in ('name_2','name_4')\";\n  // query.filter_ = \"tag_list contain_all (1,2,3,4)\";\n  query.filter_ = \"tag_list is null\";\n  if (const char *env_var = std::getenv(\"FILTER\"); env_var != nullptr) {\n    query.filter_ = env_var;\n  }\n  query.include_vector_ = true;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(schema_, query, segments);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  EXPECT_TRUE(ret.has_value());\n}\n\nTEST_F(SqlEngineTest, Vector) {\n  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"score\"};\n  query.topk_ = 11;\n  query.filter_ = \"id > 3 and score < 0.1\";\n  if (const char *env_var = std::getenv(\"FILTER\"); env_var != nullptr) {\n    query.filter_ = env_var;\n  }\n  // query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.query_sparse_indices_ = \"[0, 1, 2, 3]\";\n  query.query_sparse_values_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"vector\";\n  query.include_vector_ = true;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(schema_, query, segments);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  EXPECT_TRUE(ret.has_value());\n}\n\nTEST_F(SqlEngineTest, Invert) {\n  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"age\", \"score\"};\n  query.topk_ = 11;\n  // query.filter_ = \"name = 'test_name'\";\n  query.filter_ = \"name is not null\";\n  if (const char *env_var = std::getenv(\"FILTER\"); env_var != nullptr) {\n    query.filter_ = env_var;\n  }\n  query.include_vector_ = true;\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(schema_, query, segments);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  EXPECT_TRUE(ret.has_value());\n}\n\nTEST_F(SqlEngineTest, MultiSegments) {\n  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>(),\n                                        std::make_shared<MockSegment>()};\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\", \"score\"};\n  query.topk_ = 11;\n  query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"vector\";\n  // query.filter_ = \"name = 'test_name'\";\n  if (const char *env_var = std::getenv(\"FILTER\"); env_var != nullptr) {\n    query.filter_ = env_var;\n  }\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(schema_, query, segments);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  EXPECT_TRUE(ret.has_value());\n}\n\nTEST_F(SqlEngineTest, GroupBy) {\n  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};\n  GroupByVectorQuery query;\n  query.group_by_field_name_ = \"name\";\n  query.group_count_ = 3;\n  query.group_topk_ = 2;\n  query.output_fields_ = {\"id\", \"name\", \"score\"};\n  query.filter_ = \"id > 3 and score < 0.1\";\n  if (const char *env_var = std::getenv(\"FILTER\"); env_var != nullptr) {\n    query.filter_ = env_var;\n  }\n  // query.query_vector_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.query_sparse_indices_ = \"[0, 1, 2, 3]\";\n  query.query_sparse_values_ = \"[0.1, 0.2, 0.3, 0.4]\";\n  query.field_name_ = \"vector\";\n  query.include_vector_ = true;\n  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);\n  query.query_params_->set_radius(0.8F);\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute_group_by(schema_, query, segments);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  EXPECT_TRUE(ret.has_value());\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "tests/db/sqlengine/test_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#pragma once\n\n#include <cstdint>\n#include <cstdlib>\n#include <iostream>\n#include <memory>\n#include <arrow/api.h>\n#include <arrow/io/api.h>\n#include <arrow/ipc/api.h>\n#include <gtest/gtest.h>\n#include \"db/common/file_helper.h\"\n#include \"db/index/common/version_manager.h\"\n#include \"db/index/segment/segment.h\"\n#include \"db/sqlengine/sqlengine.h\"\n#include \"zvec/db/index_params.h\"\n#include \"zvec/db/schema.h\"\n#include \"zvec/db/type.h\"\n\nnamespace zvec::sqlengine {\n\nusing CreateDocFun = Doc (*)(const uint64_t doc_id);\n\ninline Status InsertDoc(const Segment::Ptr &segment,\n                        const uint64_t start_doc_id, const uint64_t end_doc_id,\n                        CreateDocFun create_doc) {\n  srand(time(NULL));\n  long long create_total = 0;\n  long long insert_total = 0;\n  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {\n    if (segment) {\n      auto start = std::chrono::system_clock::now();\n      Doc new_doc = create_doc(doc_id);\n      auto end = std::chrono::system_clock::now();\n      auto create_cost =\n          std::chrono::duration_cast<std::chrono::microseconds>(end - start)\n              .count();\n      create_total += create_cost;\n\n      start = std::chrono::system_clock::now();\n      auto status = segment->Insert(new_doc);\n      if (!status.ok()) {\n        return status;\n      }\n      end = std::chrono::system_clock::now();\n      auto insert_cost =\n          std::chrono::duration_cast<std::chrono::microseconds>(end - start)\n              .count();\n      insert_total += insert_cost;\n    }\n  }\n  std::cout << \"pure create cost \" << create_total << \"us\" << std::endl;\n  std::cout << \"pure insert cost \" << insert_total << \"us\" << std::endl;\n  return Status::OK();\n}\n\ninline Segment::Ptr create_segment(const std::string &seg_path,\n                                   const CollectionSchema &schema) {\n  auto segment_meta = std::make_shared<SegmentMeta>();\n  segment_meta->set_id(0);\n\n  auto id_map = IDMap::CreateAndOpen(\"test_collection\", seg_path + \"/id_map\",\n                                     true, false);\n  auto delete_store = std::make_shared<DeleteStore>(\"test_collection\");\n\n  Version v1;\n  v1.set_schema(schema);\n  std::string v_path = seg_path + \"/test_manifest\";\n  FileHelper::CreateDirectory(v_path);\n  auto vm = VersionManager::Create(v_path, v1);\n  if (!vm.has_value()) {\n    LOG_ERROR(\"create version manager failed: %s\", vm.error().c_str());\n    return nullptr;\n  }\n\n  BlockMeta mem_block;\n  mem_block.id_ = 0;\n  mem_block.type_ = BlockType::SCALAR;\n  mem_block.min_doc_id_ = 0;\n  mem_block.max_doc_id_ = 0;\n  mem_block.doc_count_ = 0;\n  segment_meta->set_writing_forward_block(mem_block);\n\n  SegmentOptions options;\n  options.read_only_ = false;\n  options.enable_mmap_ = true;\n\n  auto result = Segment::CreateAndOpen(seg_path, schema, 0, 0, id_map,\n                                       delete_store, vm.value(), options);\n\n  if (!result) {\n    LOG_ERROR(\"create segment failed: %s\", result.error().c_str());\n    return nullptr;\n  }\n  auto segment = result.value();\n  return segment;\n}\n\n}  // namespace zvec::sqlengine"
  },
  {
    "path": "tests/db/sqlengine/vector_recall_test.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License\n\n#include <cstdint>\n#include <memory>\n#include <gtest/gtest.h>\n#include \"db/sqlengine/sqlengine.h\"\n#include \"recall_base.h\"\n\nnamespace zvec::sqlengine {\n\nclass VectorRecallTest : public RecallTest {};\n\nTEST_F(VectorRecallTest, Basic) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  std::vector<float> feature(4, 0.0);\n  query.query_vector_.assign((const char *)feature.data(),\n                             feature.size() * sizeof(float));\n  query.field_name_ = \"dense\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);\n  }\n}\n\nTEST_F(VectorRecallTest, HybridInvertFilter) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.filter_ = \"invert_id >= 1\";\n  query.topk_ = 200;\n  std::vector<float> feature(4, 0.0);\n  query.query_vector_.assign((const char *)feature.data(),\n                             feature.size() * sizeof(float));\n  query.field_name_ = \"dense\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int j = 0; j < query.topk_; j++) {\n    auto &doc = docs[j];\n    int i = j + 1;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);\n  }\n}\n\nTEST_F(VectorRecallTest, HybridInvertFilterBfByKeys) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.filter_ = \"invert_id < 199\";\n  query.topk_ = 199;\n  std::vector<float> feature(4, 0.0);\n  query.query_vector_.assign((const char *)feature.data(),\n                             feature.size() * sizeof(float));\n  query.field_name_ = \"dense\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int i = 0; i < query.topk_; i++) {\n    auto &doc = docs[i];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);\n  }\n}\n\nTEST_F(VectorRecallTest, HybridForwardFilter) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.filter_ = \"id >= 1\";\n  query.topk_ = 200;\n  std::vector<float> feature(4, 0.0);\n  query.query_vector_.assign((const char *)feature.data(),\n                             feature.size() * sizeof(float));\n  query.field_name_ = \"dense\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n  for (int j = 0; j < query.topk_; j++) {\n    auto &doc = docs[j];\n    int i = j + 1;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(i));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), i % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(i % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);\n  }\n}\n\nTEST_F(VectorRecallTest, HybridInvertForwardFilter) {\n  VectorQuery query;\n  query.output_fields_ = {\"name\", \"age\"};\n  query.filter_ = \"invert_id >= 1 and id <= 100\";\n  query.topk_ = 200;\n  std::vector<float> feature(4, 0.0);\n  query.query_vector_.assign((const char *)feature.data(),\n                             feature.size() * sizeof(float));\n  query.field_name_ = \"dense\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (size_t j = 0; j < docs.size(); j++) {\n    auto &doc = docs[j];\n    int doc_id = j + 1;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), doc_id % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(doc_id % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * doc_id * 4);\n  }\n}\n\nTEST_F(VectorRecallTest, Sparse) {\n  VectorQuery query;\n  query.output_fields_ = {\"id\", \"name\", \"age\"};\n  query.topk_ = 200;\n  std::vector<float> feature(4, 1.0);\n  std::vector<uint32_t> indices{0, 1, 2, 3};\n  query.query_sparse_indices_.assign((const char *)indices.data(),\n                                     indices.size() * sizeof(uint32_t));\n  query.query_sparse_values_.assign((const char *)feature.data(),\n                                    feature.size() * sizeof(float));\n  query.field_name_ = \"sparse\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), query.topk_);\n\n  int doc_id = 9999;\n  for (size_t j = 0; j < docs.size(); j++) {\n    auto &doc = docs[j];\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), doc_id % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(doc_id % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * 4);\n    doc_id--;\n    while (doc_id % 100 <= 3) {\n      doc_id--;\n    }\n  }\n}\n\nTEST_F(VectorRecallTest, DeleteFilter) {\n  // This test uses only one segment and thus we only operate on the first one\n  for (int i = 0; i < 4000; i++) {\n    segments_[0]->Delete(\"pk_\" + std::to_string(i));\n  }\n\n  VectorQuery query;\n  query.output_fields_ = {\"name\", \"age\"};\n  query.topk_ = 100;\n  std::vector<float> feature(4, 0.0);\n  query.query_vector_.assign((const char *)feature.data(),\n                             feature.size() * sizeof(float));\n  query.field_name_ = \"dense\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 100);\n  for (size_t j = 0; j < docs.size(); j++) {\n    auto &doc = docs[j];\n    int doc_id = j + 4000;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), doc_id % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(doc_id % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * doc_id * 4);\n  }\n}\n\nTEST_F(VectorRecallTest, HybridInvertForwardDeleteFilter) {\n  // In previous test, docs[0-4000) has been deleted\n  VectorQuery query;\n  query.output_fields_ = {\"name\", \"age\"};\n  query.filter_ = \"invert_id >= 6000 and id < 6080\";\n  query.topk_ = 100;\n  std::vector<float> feature(4, 0.0);\n  query.query_vector_.assign((const char *)feature.data(),\n                             feature.size() * sizeof(float));\n  query.field_name_ = \"dense\";\n\n  auto engine = SQLEngine::create(std::make_shared<Profiler>());\n  auto ret = engine->execute(collection_schema_, query, segments_);\n  if (!ret) {\n    LOG_ERROR(\"execute failed: [%s]\", ret.error().c_str());\n  }\n  ASSERT_TRUE(ret.has_value());\n  auto docs = ret.value();\n  EXPECT_EQ(docs.size(), 80);\n  for (size_t j = 0; j < docs.size(); j++) {\n    auto &doc = docs[j];\n    int doc_id = j + 6000;\n    EXPECT_EQ(doc->pk(), \"pk_\" + std::to_string(doc_id));\n    auto age = doc->get<int32_t>(\"age\");\n    EXPECT_EQ(age.value(), doc_id % 100);\n    auto name = doc->get<std::string>(\"name\");\n    ASSERT_TRUE(name);\n    EXPECT_EQ(name.value(), \"user_\" + std::to_string(doc_id % 100));\n    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * doc_id * 4);\n  }\n}\n\n}  // namespace zvec::sqlengine\n"
  },
  {
    "path": "thirdparty/CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.1 FATAL_ERROR)\ncmake_policy(SET CMP0048 NEW)\nproject(thirdparty)\n\ninclude(${PROJECT_ROOT_DIR}/cmake/utils.cmake)\n\nset(CMAKE_MACOSX_RPATH ON)\nset(CMAKE_POSITION_INDEPENDENT_CODE ON)\nset(EXTERNAL_BINARY_DIR ${CMAKE_BINARY_DIR}/external)\nset(EXTERNAL_INC_DIR ${CMAKE_BINARY_DIR}/external/usr/local/include)\nset(EXTERNAL_LIB_DIR ${CMAKE_BINARY_DIR}/external/usr/local/lib)\nfile(MAKE_DIRECTORY ${EXTERNAL_INC_DIR})\nfile(MAKE_DIRECTORY ${EXTERNAL_LIB_DIR})\n\nadd_subdirectory(googletest googletest EXCLUDE_FROM_ALL)\nadd_subdirectory(gflags gflags EXCLUDE_FROM_ALL)\nadd_subdirectory(glog glog EXCLUDE_FROM_ALL)\nadd_subdirectory(sparsehash sparsehash EXCLUDE_FROM_ALL)\nadd_subdirectory(yaml-cpp yaml-cpp EXCLUDE_FROM_ALL)\nadd_subdirectory(protobuf protobuf EXCLUDE_FROM_ALL)\nadd_subdirectory(antlr antlr EXCLUDE_FROM_ALL)\nadd_subdirectory(lz4 lz4 EXCLUDE_FROM_ALL)\nadd_subdirectory(rocksdb rocksdb EXCLUDE_FROM_ALL)\nadd_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)\nadd_subdirectory(arrow arrow EXCLUDE_FROM_ALL)\nadd_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)\nadd_subdirectory(RaBitQ-Library RaBitQ-Library EXCLUDE_FROM_ALL)\n\n"
  },
  {
    "path": "thirdparty/CRoaring/CMakeLists.txt",
    "content": "set(ENABLE_ROARING_TESTS OFF CACHE BOOL \"Disable testing in CRoaring\" FORCE)\n\nset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})\nadd_subdirectory(CRoaring-2.0.4)\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nunset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n\nif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n    target_compile_options(roaring PRIVATE \n        -Wno-unused-parameter \n        -Wno-unused-but-set-variable\n    )\nendif()\n\nget_target_property(roaring_SOURCE_DIR roaring INTERFACE_SOURCE_DIR)\nset(ROARING_FOUND TRUE PARENT_SCOPE)\nset(ROARING_INCLUDE_DIR ${roaring_SOURCE_DIR}/include PARENT_SCOPE)\nset(ROARING_INCLUDE_DIRS ${roaring_SOURCE_DIR}/include PARENT_SCOPE)\nset(ROARING_LIBRARY $<TARGET_FILE:roaring> PARENT_SCOPE)\nset(ROARING_LIBRARIES $<TARGET_FILE:roaring> PARENT_SCOPE)\n"
  },
  {
    "path": "thirdparty/RaBitQ-Library/CMakeLists.txt",
    "content": "add_library(rabitqlib INTERFACE)\ntarget_include_directories(\n    rabitqlib INTERFACE \"${CMAKE_CURRENT_SOURCE_DIR}/RaBitQ-Library-0.1/include\"\n  )\n"
  },
  {
    "path": "thirdparty/antlr/CMakeLists.txt",
    "content": "set(ANTLR_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/antlr4)\nset(ANTLR_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/antlr4.patch)\napply_patch_once(\"antlr4_fix\" \"${ANTLR_SRC_DIR}\" \"${ANTLR_PATCH}\")\n\nadd_subdirectory(antlr4/runtime/Cpp/)\n\nadd_library(antlr4 UNKNOWN IMPORTED GLOBAL)\nset_target_properties(\n  antlr4 PROPERTIES\n  INTERFACE_INCLUDE_DIRECTORIES \"${CMAKE_CURRENT_SOURCE_DIR}/antlr4/runtime/Cpp/runtime/src/\"\n  IMPORTED_LOCATION \"${EXTERNAL_LIB_DIR}/libantlr4-runtime.a\"\n  )\nadd_dependencies(antlr4 antlr4_static)\n\nif(CMAKE_CXX_COMPILER_ID MATCHES \"Clang\")\n    target_compile_options(antlr4_static PRIVATE -Wno-unknown-pragmas -Wno-unqualified-std-cast-call)\nelseif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n    target_compile_options(antlr4_static PRIVATE -Wno-unknown-pragmas -Wno-unqualified-std-cast-call -Wno-attributes -Wno-implicit-fallthrough)\nendif()"
  },
  {
    "path": "thirdparty/antlr/antlr4.patch",
    "content": "diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt\nindex 390078151..213258ac8 100644\n--- a/runtime/Cpp/CMakeLists.txt\n+++ b/runtime/Cpp/CMakeLists.txt\n@@ -28,21 +28,21 @@ project(LIBANTLR4)\n if(CMAKE_VERSION VERSION_EQUAL \"3.0.0\" OR\n    CMAKE_VERSION VERSION_GREATER \"3.0.0\")\n   CMAKE_POLICY(SET CMP0026 NEW)\n-  CMAKE_POLICY(SET CMP0054 OLD)\n-  CMAKE_POLICY(SET CMP0045 OLD)\n-  CMAKE_POLICY(SET CMP0042 OLD)\n+  CMAKE_POLICY(SET CMP0054 NEW)\n+  CMAKE_POLICY(SET CMP0045 NEW)\n+  CMAKE_POLICY(SET CMP0042 NEW)\n endif()\n \n if(CMAKE_VERSION VERSION_EQUAL \"3.3.0\" OR\n    CMAKE_VERSION VERSION_GREATER \"3.3.0\")\n-  CMAKE_POLICY(SET CMP0059 OLD)\n-  CMAKE_POLICY(SET CMP0054 OLD)\n+  CMAKE_POLICY(SET CMP0059 NEW)\n+  CMAKE_POLICY(SET CMP0054 NEW)\n endif()\n \n-if(CMAKE_SYSTEM_NAME MATCHES \"Linux\")\n-  find_package(PkgConfig REQUIRED)\n-  pkg_check_modules(UUID REQUIRED uuid)\n-endif()\n+#if(CMAKE_SYSTEM_NAME MATCHES \"Linux\")\n+#  find_package(PkgConfig REQUIRED)\n+#  pkg_check_modules(UUID REQUIRED uuid)\n+#endif()\n if(APPLE)\n   find_library(COREFOUNDATION_LIBRARY CoreFoundation)\n endif()\ndiff --git a/runtime/Cpp/runtime/CMakeLists.txt b/runtime/Cpp/runtime/CMakeLists.txt\nindex 2c5e7376f..ae992f9cc 100644\n--- a/runtime/Cpp/runtime/CMakeLists.txt\n+++ b/runtime/Cpp/runtime/CMakeLists.txt\n@@ -25,7 +25,7 @@ file(GLOB libantlrcpp_SRC\n add_library(antlr4_shared SHARED ${libantlrcpp_SRC})\n add_library(antlr4_static STATIC ${libantlrcpp_SRC})\n \n-set(LIB_OUTPUT_DIR \"${CMAKE_HOME_DIRECTORY}/dist\") # put generated libraries here.\n+set(LIB_OUTPUT_DIR \"${EXTERNAL_LIB_DIR}\")\n message(STATUS \"Output libraries to ${LIB_OUTPUT_DIR}\")\n \n # make sure 'make' works fine even if ${LIB_OUTPUT_DIR} is deleted.\n"
  },
  {
    "path": "thirdparty/arrow/CMakeLists.txt",
    "content": "set(ARROW_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0)\nif(ANDROID)\n        set(ARROW_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.android.patch)\n        apply_patch_once(\"arrow_android_fix\" \"${ARROW_SRC_DIR}\" \"${ARROW_PATCH}\")\nelse()\n        set(ARROW_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.patch)\n        apply_patch_once(\"arrow_fix\" \"${ARROW_SRC_DIR}\" \"${ARROW_PATCH}\")\nendif()\n\ninclude(ExternalProject)\ninclude(ProcessorCount)\n\nProcessorCount(NPROC)\n\nset(LIB_PARQUET ${EXTERNAL_LIB_DIR}/libparquet.a)\nset(LIB_ARROW ${EXTERNAL_LIB_DIR}/libarrow.a)\nset(LIB_COMPUTE ${EXTERNAL_LIB_DIR}/libarrow_compute.a)\nset(LIB_ACERO ${EXTERNAL_LIB_DIR}/libarrow_acero.a)\nset(LIB_ARROW_DEPENDS ${EXTERNAL_LIB_DIR}/libarrow_bundled_dependencies.a)\nset(LIB_ARROW_DATASET ${EXTERNAL_LIB_DIR}/libarrow_dataset.a)\n\nset(CONFIGURE_ENV_LIST \"\")\nif(USE_OSS_MIRROR)\n        list(APPEND CONFIGURE_ENV_LIST\n                \"ARROW_BOOST_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/boost-1.88.0-cmake.tar.gz\"\n                \"ARROW_RAPIDJSON_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/rapidjson-232389d4f1012dddec4ef84861face2d2ba85709.tar.gz\"\n                \"ARROW_RE2_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/re2-2022-06-01.tar.gz\"\n                \"ARROW_THRIFT_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/thrift-0.22.0.tar.gz\"\n                \"ARROW_UTF8PROC_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/utf8proc-2.10.0.tar.gz\"\n                \"ARROW_XSIMD_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/xsimd-13.0.0.tar.gz\"\n                \"ARROW_ZLIB_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/zlib-1.3.1.tar.gz\"\n        )\n        message(STATUS \"Using OSS mirror for third-party downloads\")\nendif()\n\nif(ANDROID)\n        ExternalProject_Add(\n                ARROW.BUILD PREFIX arrow\n                SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0\n                DOWNLOAD_COMMAND \"\"\n                BUILD_IN_SOURCE false\n                CONFIGURE_COMMAND env ${CONFIGURE_ENV_LIST} \"${CMAKE_COMMAND}\" ${CMAKE_CACHE_ARGS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_DEBUG_POSTFIX= -DARROW_BUILD_SHARED=OFF -DARROW_ACERO=ON -DARROW_FILESYSTEM=ON -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_COMPUTE=ON -DARROW_WITH_ZLIB=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_MIMALLOC=OFF -DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DANDROID_ABI=${ANDROID_ABI} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} -DARROW_WITH_MUSL=OFF \"<SOURCE_DIR>/cpp\"\n                BUILD_COMMAND \"${CMAKE_COMMAND}\" --build . --target all -- -j ${NPROC}\n                INSTALL_COMMAND \"${CMAKE_COMMAND}\" --install \"<BINARY_DIR>\" --prefix=${EXTERNAL_BINARY_DIR}/usr/local\n                BYPRODUCTS ${LIB_PARQUET} ${LIB_ARROW} ${LIB_COMPUTE} ${LIB_ACERO} ${LIB_ARROW_DEPENDS} ${LIB_ARROW_DATASET}\n                LOG_DOWNLOAD ON\n                LOG_CONFIGURE ON\n                LOG_BUILD ON\n                LOG_INSTALL ON\n        )\nelse()\n        ExternalProject_Add(\n                ARROW.BUILD PREFIX arrow\n                SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0\n                DOWNLOAD_COMMAND \"\"\n                BUILD_IN_SOURCE false\n                CONFIGURE_COMMAND env ${CONFIGURE_ENV_LIST} \"${CMAKE_COMMAND}\" ${CMAKE_CACHE_ARGS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_DEBUG_POSTFIX= -DARROW_BUILD_SHARED=OFF -DARROW_ACERO=ON -DARROW_FILESYSTEM=ON -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_COMPUTE=ON -DARROW_WITH_ZLIB=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_MIMALLOC=OFF -DCMAKE_INSTALL_LIBDIR=lib \"<SOURCE_DIR>/cpp\"\n                BUILD_COMMAND \"${CMAKE_COMMAND}\" --build . --target all -- -j ${NPROC}\n                INSTALL_COMMAND \"${CMAKE_COMMAND}\" --install \"<BINARY_DIR>\" --prefix=${EXTERNAL_BINARY_DIR}/usr/local\n                BYPRODUCTS ${LIB_PARQUET} ${LIB_ARROW} ${LIB_COMPUTE} ${LIB_ACERO} ${LIB_ARROW_DEPENDS} ${LIB_ARROW_DATASET}\n                LOG_DOWNLOAD ON\n                LOG_CONFIGURE ON\n                LOG_BUILD ON\n                LOG_INSTALL ON\n        )\nendif()\n\nadd_library(arrow UNKNOWN IMPORTED GLOBAL)\nadd_dependencies(arrow ARROW.BUILD)\n\nset(Arrow_FOUND TRUE PARENT_SCOPE)\nset(Arrow_INCLUDE_DIR ${EXTERNAL_INC_DIR} PARENT_SCOPE)\nset(Arrow_LIBRARIES ${EXTERNAL_LIB_DIR}/libarrow.a PARENT_SCOPE)\nset(Arrow_DIR ${EXTERNAL_BINARY_DIR} PARENT_SCOPE)\nset(Arrow_LIBRARY_DIR ${EXTERNAL_BINARY_DIR} PARENT_SCOPE)\n\nadd_library(Arrow::arrow_depends UNKNOWN IMPORTED GLOBAL)\nset_target_properties(\n        Arrow::arrow_depends PROPERTIES\n        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}\n        IMPORTED_LOCATION \"${LIB_ARROW_DEPENDS}\"\n)\nadd_dependencies(Arrow::arrow_depends ARROW.BUILD)\n\n\nadd_library(Arrow::arrow_static UNKNOWN IMPORTED GLOBAL)\nset_target_properties(\n        Arrow::arrow_static PROPERTIES\n        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}\n        IMPORTED_LOCATION \"${LIB_ARROW}\"\n        INTERFACE_LINK_LIBRARIES \"Arrow::arrow_depends\"\n)\nadd_dependencies(Arrow::arrow_static ARROW.BUILD)\n\nadd_library(Arrow::parquet_static UNKNOWN IMPORTED GLOBAL)\nset_target_properties(\n        Arrow::parquet_static PROPERTIES\n        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}\n        IMPORTED_LOCATION \"${LIB_PARQUET}\"\n        INTERFACE_LINK_LIBRARIES \"Arrow::arrow_depends;Arrow::arrow_static\"\n)\nadd_dependencies(Arrow::parquet_static ARROW.BUILD)\n\nadd_library(Arrow::arrow_compute UNKNOWN IMPORTED GLOBAL)\nset_target_properties(\n        Arrow::arrow_compute PROPERTIES\n        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}\n        IMPORTED_LOCATION \"${LIB_COMPUTE}\"\n        INTERFACE_LINK_LIBRARIES \"Arrow::arrow_depends;Arrow::arrow_static\"\n)\nadd_dependencies(Arrow::arrow_compute ARROW.BUILD)\n\nadd_library(Arrow::arrow_acero UNKNOWN IMPORTED GLOBAL)\nset_target_properties(\n        Arrow::arrow_acero PROPERTIES\n        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}\n        IMPORTED_LOCATION \"${LIB_ACERO}\"\n        INTERFACE_LINK_LIBRARIES \"Arrow::arrow_depends;Arrow::arrow_static;Arrow::arrow_compute\"\n)\nadd_dependencies(Arrow::arrow_acero ARROW.BUILD)\n\nadd_library(Arrow::arrow_dataset UNKNOWN IMPORTED GLOBAL)\nset_target_properties(\n        Arrow::arrow_dataset PROPERTIES\n        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}\n        IMPORTED_LOCATION \"${LIB_ARROW_DATASET}\"\n        INTERFACE_LINK_LIBRARIES \"Arrow::arrow_depends;Arrow::arrow_static;Arrow::arrow_compute;Arrow::arrow_acero\"\n)\nadd_dependencies(Arrow::arrow_dataset ARROW.BUILD)\n"
  },
  {
    "path": "thirdparty/arrow/arrow.android.patch",
    "content": "diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake\nindex 7fa4b66d4b..78bcb6d47e 100644\n--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake\n+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake\n@@ -950,6 +950,13 @@ set(EP_COMMON_CMAKE_ARGS\n     # https://github.com/apache/arrow/issues/45985\n     -DCMAKE_POLICY_VERSION_MINIMUM=3.5)\n \n+if(ANDROID)\n+  list(APPEND EP_COMMON_CMAKE_ARGS \n+  -DANDROID_ABI=${ANDROID_ABI}\n+  -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL}\n+  -DANDROID_NDK=${ANDROID_NDK})\n+endif()\n+\n # if building with a toolchain file, pass that through\n if(CMAKE_TOOLCHAIN_FILE)\n   list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE})\ndiff --git a/cpp/src/arrow/acero/source_node.cc b/cpp/src/arrow/acero/source_node.cc\nindex 0f58406760..cf68bfdcbe 100644\n--- a/cpp/src/arrow/acero/source_node.cc\n+++ b/cpp/src/arrow/acero/source_node.cc\n@@ -407,7 +407,7 @@ struct SchemaSourceNode : public SourceNode {\n struct RecordBatchReaderSourceNode : public SourceNode {\n   RecordBatchReaderSourceNode(ExecPlan* plan, std::shared_ptr<Schema> schema,\n                               arrow::AsyncGenerator<std::optional<ExecBatch>> generator)\n-      : SourceNode(plan, schema, generator) {}\n+      : SourceNode(plan, schema, generator, Ordering::Implicit()) {}\n \n   static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,\n                                 const ExecNodeOptions& options) {\ndiff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp\nindex 2cf6c62a84..9e64b62297 100644\n--- a/cpp/src/arrow/vendored/datetime/tz.cpp\n+++ b/cpp/src/arrow/vendored/datetime/tz.cpp\n@@ -605,7 +605,9 @@ tzdb_list\n create_tzdb()\n {\n     tzdb_list tz_db;\n+#if !defined(ANDROID) && !defined(__ANDROID__)\n     tzdb_list::undocumented_helper::push_front(tz_db, init_tzdb().release());\n+#endif // !defined(ANDROID) && !defined(__ANDROID__)\n     return tz_db;\n }\n \n@@ -3900,7 +3902,9 @@ reload_tzdb()\n     if (!v.empty() && v == remote_version())\n         return get_tzdb_list().front();\n #endif  // AUTO_DOWNLOAD\n+#if !defined(ANDROID) && !defined(__ANDROID__)\n     tzdb_list::undocumented_helper::push_front(get_tzdb_list(), init_tzdb().release());\n+#endif  // !defined(ANDROID) && !defined(__ANDROID__)\n     return get_tzdb_list().front();\n }\n \ndiff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h\nindex 61ab3df106..d456d6765f 100644\n--- a/cpp/src/arrow/vendored/datetime/tz.h\n+++ b/cpp/src/arrow/vendored/datetime/tz.h\n@@ -858,7 +858,9 @@ private:\n     load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,\n                                  std::int32_t tzh_typecnt, std::int32_t tzh_charcnt);\n # if defined(ANDROID) || defined(__ANDROID__)\n+public:\n     void parse_from_android_tzdata(std::ifstream& inf, const std::size_t off);\n+private:\n # endif // defined(ANDROID) || defined(__ANDROID__)\n #else  // !USE_OS_TZDB\n     DATE_API sys_info   get_info_impl(sys_seconds tp, int tz_int) const;\ndiff --git a/cpp/src/arrow/vendored/musl/strptime.c b/cpp/src/arrow/vendored/musl/strptime.c\nindex 41912fd1bb..9d0b4dc1bf 100644\n--- a/cpp/src/arrow/vendored/musl/strptime.c\n+++ b/cpp/src/arrow/vendored/musl/strptime.c\n@@ -17,7 +17,7 @@\n \n #undef HAVE_LANGINFO\n \n-#ifndef _WIN32\n+#if !defined(_WIN32) && !defined(__ANDROID__)\n #define HAVE_LANGINFO 1\n #endif\n \n"
  },
  {
    "path": "thirdparty/arrow/arrow.patch",
    "content": "diff --git a/cpp/src/arrow/acero/source_node.cc b/cpp/src/arrow/acero/source_node.cc\nindex 0f5840676..cf68bfdcb 100644\n--- a/cpp/src/arrow/acero/source_node.cc\n+++ b/cpp/src/arrow/acero/source_node.cc\n@@ -407,7 +407,7 @@ struct SchemaSourceNode : public SourceNode {\n struct RecordBatchReaderSourceNode : public SourceNode {\n   RecordBatchReaderSourceNode(ExecPlan* plan, std::shared_ptr<Schema> schema,\n                               arrow::AsyncGenerator<std::optional<ExecBatch>> generator)\n-      : SourceNode(plan, schema, generator) {}\n+      : SourceNode(plan, schema, generator, Ordering::Implicit()) {}\n \n   static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,\n                                 const ExecNodeOptions& options) {\n"
  },
  {
    "path": "thirdparty/gflags/CMakeLists.txt",
    "content": "set(BUILD_TESTING OFF CACHE BOOL \"Disable Unit Tests\" FORCE)\nset(GFLAGS_BUILD_TESTING OFF CACHE BOOL \"Disable unittest in gflags\" FORCE)\nif(NOT BUILD_SHARED_LIBS)\n  set(GFLAGS_BUILD_SHARED_LIBS OFF)\n  set(GFLAGS_BUILD_STATIC_LIBS ON)\nelse()\n  set(GFLAGS_BUILD_SHARED_LIBS ON)\n  set(GFLAGS_BUILD_STATIC_LIBS OFF)\nendif()\n\nset(GFLAGS_IS_SUBPROJECT ON)\nset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})\nadd_subdirectory(gflags-2.2.2)\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nunset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n\nget_target_property(gflags_OUTPUT_NAME gflags OUTPUT_NAME)\nget_target_property(gflags_BINARY_DIR gflags BINARY_DIR)\nget_target_property(gflags_SOURCE_DIR gflags SOURCE_DIR)\nget_target_property(gflags_PREFIX gflags PREFIX)\nget_target_property(gflags_SUFFIX gflags SUFFIX)\n\nset(gflags_OUTPUT_DIR ${gflags_BINARY_DIR})\nif(NOT BUILD_SHARED_LIBS)\n  if (NOT gflags_PREFIX)\n    set(gflags_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})\n  endif()\n  if (NOT gflags_SUFFIX)\n    set(gflags_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})\n  endif()\n  if(CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n    set(gflags_OUTPUT_DIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\n  endif()\nelse()\n  if (NOT gflags_PREFIX)\n    set(gflags_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})\n  endif()\n  if (NOT gflags_SUFFIX)\n    set(gflags_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})\n  endif()\n  if(CMAKE_LIBRARY_OUTPUT_DIRECTORY)\n    set(gflags_OUTPUT_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})\n  endif()\nendif()\n\nset(gflags_FOUND TRUE PARENT_SCOPE)\nset(gflags_INCLUDE_DIR ${gflags_BINARY_DIR}/include PARENT_SCOPE)\nset(gflags_LIBRARIES ${gflags_OUTPUT_DIR}/${gflags_PREFIX}${gflags_OUTPUT_NAME}${gflags_SUFFIX} PARENT_SCOPE)\nset(gflags_DIR ${gflags_BINARY_DIR} PARENT_SCOPE)\n\nset(GFLAGS_FOUND TRUE PARENT_SCOPE)\nset(GFLAGS_INCLUDE_DIR ${gflags_BINARY_DIR}/include PARENT_SCOPE)\nset(GFLAGS_LIBRARIES ${gflags_OUTPUT_DIR}/${gflags_PREFIX}${gflags_OUTPUT_NAME}${gflags_SUFFIX} PARENT_SCOPE)\nset(GFLAGS_DIR ${gflags_BINARY_DIR} PARENT_SCOPE)\n"
  },
  {
    "path": "thirdparty/glog/CMakeLists.txt",
    "content": "set(BUILD_TESTING OFF CACHE BOOL \"Disable Unit Tests\" FORCE)\nset(WITH_GFLAGS OFF CACHE BOOL \"Disable find_package(gflags) in glog\" FORCE)\nset(WITH_UNWIND OFF CACHE BOOL \"Disable find_package(unwind) in glog\" FORCE)\nset(HAVE_LIB_GFLAGS TRUE CACHE BOOL \"\")\n\nadd_compile_options(-Wno-deprecated-declarations)\n\nset(GLOG_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/glog-0.5.0)\nif (ANDROID)\n    set(GLOG_ANDROID_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/glog.android.patch)\n    apply_patch_once(\"glog_android_fix\" \"${GLOG_SRC_DIR}\" \"${GLOG_ANDROID_PATCH}\")\nelse()\n    set(GLOG_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/glog.patch)\n    apply_patch_once(\"glog_fix\" \"${GLOG_SRC_DIR}\" \"${GLOG_PATCH}\")\nendif()\n\nset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})\nadd_subdirectory(glog-0.5.0)\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nunset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n\nadd_dependencies(glog gflags)\n\nif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n    target_compile_options(glog PRIVATE -Wno-sign-compare)\nendif()\n\nget_target_property(glog_BINARY_DIR glog BINARY_DIR)\nget_target_property(glog_SOURCE_DIR glog SOURCE_DIR)\n# get_target_property(GLOG_INCLUDE_DIRS glog INTERFACE_INCLUDE_DIRECTORIES)\n\nset(GLOG_INCLUDE_DIRS ${glog_BINARY_DIR} ${glog_SOURCE_DIR}/src)\nset(GLOG_FOUND TRUE PARENT_SCOPE)\nset(GLOG_INCLUDE_DIR ${GLOG_INCLUDE_DIRS} PARENT_SCOPE)\nset(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIRS} PARENT_SCOPE)\nset(GLOG_LIBRARY $<TARGET_FILE:glog> PARENT_SCOPE)\nset(GLOG_LIBRARIES $<TARGET_FILE:glog> PARENT_SCOPE)\n"
  },
  {
    "path": "thirdparty/glog/glog.android.patch",
    "content": "diff --git a/CMakeLists.txt b/CMakeLists.txt\nindex 62ebbcc..e17f67e 100644\n--- a/CMakeLists.txt\n+++ b/CMakeLists.txt\n@@ -17,7 +17,7 @@ set (CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})\n set (CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})\n set (CPACK_PACKAGE_VERSION ${PROJECT_VERSION})\n \n-option (BUILD_SHARED_LIBS \"Build shared libraries\" ON)\n+option (BUILD_STATIC_LIBS \"Build shared libraries\" ON)\n option (PRINT_UNSYMBOLIZED_STACK_TRACES\n   \"Print file offsets in traces instead of symbolizing\" OFF)\n option (WITH_CUSTOM_PREFIX \"Enable support for user-generated message prefixes\" OFF)\n@@ -802,12 +802,12 @@ if (BUILD_TESTING)\n     FIXTURES_REQUIRED \"cmake_package_config;cmake_package_config_working\")\n endif (BUILD_TESTING)\n \n-install (TARGETS glog\n-  EXPORT glog-targets\n-  RUNTIME DESTINATION ${_glog_CMake_BINDIR}\n-  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog\n-  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}\n-  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})\n+#install (TARGETS glog\n+#  EXPORT glog-targets\n+#  RUNTIME DESTINATION ${_glog_CMake_BINDIR}\n+#  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog\n+#  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}\n+#  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})\n \n if (WITH_PKGCONFIG)\n   install (\n@@ -840,8 +840,8 @@ write_basic_package_version_file (\n   ${CMAKE_CURRENT_BINARY_DIR}/glog-config-version.cmake\n   COMPATIBILITY SameMajorVersion)\n \n-export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)\n-export (PACKAGE glog)\n+#export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)\n+#export (PACKAGE glog)\n \n get_filename_component (_PREFIX \"${CMAKE_INSTALL_PREFIX}\" ABSOLUTE)\n \n@@ -885,5 +885,5 @@ install (DIRECTORY ${_glog_BINARY_CMake_DATADIR}\n   FILES_MATCHING PATTERN \"*.cmake\"\n )\n \n-install (EXPORT glog-targets NAMESPACE glog:: DESTINATION\n-  ${_glog_CMake_INSTALLDIR})\n+#install (EXPORT glog-targets NAMESPACE glog:: DESTINATION\n+#  ${_glog_CMake_INSTALLDIR})\ndiff --git a/src/stacktrace_generic-inl.h b/src/stacktrace_generic-inl.h\nindex fad81d3..67209ac 100644\n--- a/src/stacktrace_generic-inl.h\n+++ b/src/stacktrace_generic-inl.h\n@@ -39,21 +39,7 @@ _START_GOOGLE_NAMESPACE_\n \n // If you change this function, also change GetStackFrames below.\n int GetStackTrace(void** result, int max_depth, int skip_count) {\n-  static const int kStackLength = 64;\n-  void * stack[kStackLength];\n-  int size;\n-\n-  size = backtrace(stack, kStackLength);\n-  skip_count++;  // we want to skip the current frame as well\n-  int result_count = size - skip_count;\n-  if (result_count < 0)\n-    result_count = 0;\n-  if (result_count > max_depth)\n-    result_count = max_depth;\n-  for (int i = 0; i < result_count; i++)\n-    result[i] = stack[i + skip_count];\n-\n-  return result_count;\n+  return 0;\n }\n \n _END_GOOGLE_NAMESPACE_\n"
  },
  {
    "path": "thirdparty/glog/glog.patch",
    "content": "diff --git a/CMakeLists.txt b/CMakeLists.txt\nindex 62ebbcc..e17f67e 100644\n--- a/CMakeLists.txt\n+++ b/CMakeLists.txt\n@@ -17,7 +17,7 @@ set (CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})\n set (CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})\n set (CPACK_PACKAGE_VERSION ${PROJECT_VERSION})\n \n-option (BUILD_SHARED_LIBS \"Build shared libraries\" ON)\n+option (BUILD_STATIC_LIBS \"Build shared libraries\" ON)\n option (PRINT_UNSYMBOLIZED_STACK_TRACES\n   \"Print file offsets in traces instead of symbolizing\" OFF)\n option (WITH_CUSTOM_PREFIX \"Enable support for user-generated message prefixes\" OFF)\n@@ -802,12 +802,12 @@ if (BUILD_TESTING)\n     FIXTURES_REQUIRED \"cmake_package_config;cmake_package_config_working\")\n endif (BUILD_TESTING)\n \n-install (TARGETS glog\n-  EXPORT glog-targets\n-  RUNTIME DESTINATION ${_glog_CMake_BINDIR}\n-  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog\n-  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}\n-  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})\n+#install (TARGETS glog\n+#  EXPORT glog-targets\n+#  RUNTIME DESTINATION ${_glog_CMake_BINDIR}\n+#  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog\n+#  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}\n+#  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})\n \n if (WITH_PKGCONFIG)\n   install (\n@@ -840,8 +840,8 @@ write_basic_package_version_file (\n   ${CMAKE_CURRENT_BINARY_DIR}/glog-config-version.cmake\n   COMPATIBILITY SameMajorVersion)\n \n-export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)\n-export (PACKAGE glog)\n+#export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)\n+#export (PACKAGE glog)\n \n get_filename_component (_PREFIX \"${CMAKE_INSTALL_PREFIX}\" ABSOLUTE)\n \n@@ -885,5 +885,5 @@ install (DIRECTORY ${_glog_BINARY_CMake_DATADIR}\n   FILES_MATCHING PATTERN \"*.cmake\"\n )\n \n-install (EXPORT glog-targets NAMESPACE glog:: DESTINATION\n-  ${_glog_CMake_INSTALLDIR})\n+#install (EXPORT glog-targets NAMESPACE glog:: DESTINATION\n+#  ${_glog_CMake_INSTALLDIR})\n"
  },
  {
    "path": "thirdparty/googletest/CMakeLists.txt",
    "content": "add_compile_options(-Wno-deprecated-copy)\nif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n    add_compile_options(-Wno-maybe-uninitialized)\nendif()\n\nset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})\nadd_subdirectory(googletest-1.10.0)\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nunset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n\nget_target_property(GTEST_INCLUDE_DIRS gtest INTERFACE_INCLUDE_DIRECTORIES)\n\nset(GTEST_FOUND TRUE PARENT_SCOPE)\nset(GTEST_INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} PARENT_SCOPE)\nset(GTEST_INCLUDE_DIR ${GTEST_INCLUDE_DIRS} PARENT_SCOPE)\nset(GTEST_LIBRARIES $<TARGET_FILE:gtest> PARENT_SCOPE)\nset(GTEST_LIBRARY ${GTEST_LIBRARIES} PARENT_SCOPE)\nset(GTEST_MAIN_LIBRARIES $<TARGET_FILE:gtest_main> PARENT_SCOPE)\nset(GTEST_MAIN_LIBRARY ${GTEST_MAIN_LIBRARIES} PARENT_SCOPE)\nset(GTEST_BOTH_LIBRARIES $<TARGET_FILE:gtest> $<TARGET_FILE:gtest_main> PARENT_SCOPE)\n"
  },
  {
    "path": "thirdparty/lz4/CMakeLists.txt",
    "content": "set(lz4_INCLUDE_DIR \"${EXTERNAL_BINARY_DIR}/usr/local/include\")\nset(lz4_LIBRARY_DIR \"${EXTERNAL_BINARY_DIR}/usr/local/lib/\")\nfile(MAKE_DIRECTORY ${lz4_INCLUDE_DIR})\nfile(MAKE_DIRECTORY ${lz4_LIBRARY_DIR})\n\ninclude(ExternalProject)\n\nset(_lz4_env \"\")\nif(ANDROID)\n  string(REGEX REPLACE \"^android-([0-9]+)$\" \"\\\\1\" ANDROID_API_LEVEL \"${ANDROID_PLATFORM}\")\n\n  if(ANDROID_ABI STREQUAL \"arm64-v8a\")\n    set(TARGET_TRIPLE \"aarch64-linux-android\")\n  elseif(ANDROID_ABI STREQUAL \"armeabi-v7a\")\n    set(TARGET_TRIPLE \"armv7a-linux-androideabi\")\n  elseif(ANDROID_ABI STREQUAL \"x86\")\n    set(TARGET_TRIPLE \"i686-linux-android\")\n  elseif(ANDROID_ABI STREQUAL \"x86_64\")\n    set(TARGET_TRIPLE \"x86_64-linux-android\")\n  else()\n    message(FATAL_ERROR \"Unsupported ANDROID_ABI: ${ANDROID_ABI}\")\n  endif()\n\n  set(SYSROOT \"${ANDROID_NDK}/toolchains/llvm/prebuilt/${ANDROID_HOST_TAG}/sysroot\")\n  set(COMMON_FLAGS\n    \"--sysroot=${SYSROOT}\"\n    \"-target ${TARGET_TRIPLE}${ANDROID_API_LEVEL}\"\n    \"-fPIC\"\n    \"-D__ANDROID_API__=${ANDROID_API_LEVEL}\"\n  )\n\n  list(APPEND COMMON_FLAGS ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE}})\n\n  string(JOIN \" \" _lz4_cflags ${COMMON_FLAGS})\n\n  list(APPEND _lz4_env\n    \"CC=${CMAKE_C_COMPILER}\"\n    \"AR=${CMAKE_AR}\"\n    \"RANLIB=${CMAKE_RANLIB}\"\n    \"STRIP=${ANDROID_NDK}/toolchains/llvm/prebuilt/${ANDROID_HOST_TAG}/bin/llvm-strip\"\n    \"CFLAGS=${_lz4_cflags}\"\n  )\n\nelse()\n  list(APPEND _lz4_env \"CFLAGS=-fPIC\")\nendif()\n\nExternalProject_Add(\n  Lz4.BUILD\n  PREFIX lz4\n  URL \"${CMAKE_CURRENT_SOURCE_DIR}/lz4-1.9.4\"\n  CONFIGURE_COMMAND \"\"\n    BUILD_COMMAND env ${_lz4_env} BUILD_SHARED=no make -j\n  INSTALL_COMMAND make DESTDIR=${EXTERNAL_BINARY_DIR} BUILD_SHARED=no install\n  BUILD_IN_SOURCE ON\n  LOG_DOWNLOAD ON\n  LOG_CONFIGURE ON\n  LOG_BUILD ON\n  LOG_INSTALL ON\n  BUILD_BYPRODUCTS ${lz4_LIBRARY_DIR}/liblz4.a\n)\n\nadd_library(lz4 STATIC IMPORTED GLOBAL)\nset_target_properties(\n  lz4 PROPERTIES\n  INTERFACE_INCLUDE_DIRECTORIES \"${lz4_INCLUDE_DIR}\"\n  IMPORTED_LOCATION \"${lz4_LIBRARY_DIR}/liblz4.a\"\n)\nadd_dependencies(lz4 Lz4.BUILD)\n\nset(lz4_FOUND TRUE PARENT_SCOPE)\nset(lz4_LIBRARY ${lz4_LIBRARY_DIR}/liblz4.a PARENT_SCOPE)\nset(lz4_LIBRARIES ${lz4_LIBRARY_DIR}/liblz4.a PARENT_SCOPE)\nset(lz4_INCLUDE_DIR \"${EXTERNAL_BINARY_DIR}/usr/local/include\" PARENT_SCOPE)\nset(lz4_INCLUDE_DIRS \"${EXTERNAL_BINARY_DIR}/usr/local/include\" PARENT_SCOPE)\nset(lz4_VERSION 1.9.4 PARENT_SCOPE)\n"
  },
  {
    "path": "thirdparty/magic_enum/CMakeLists.txt",
    "content": "add_library(magic_enum INTERFACE)\ntarget_include_directories(\n    magic_enum INTERFACE \"${CMAKE_CURRENT_SOURCE_DIR}/magic_enum-0.9.7/include/\"\n  )\n"
  },
  {
    "path": "thirdparty/protobuf/CMakeLists.txt",
    "content": "set(protobuf_BUILD_TESTS OFF CACHE BOOL \"Disable testing in protobuf\" FORCE)\nset(protobuf_WITH_ZLIB ON CACHE BOOL \"Disable zlib support in protobuf\" FORCE)\n\nset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})\nadd_subdirectory(protobuf-3.21.12/cmake protobuf-3.21.12)\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nunset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n\nif(CMAKE_CXX_COMPILER_ID MATCHES \"Clang\")\n    target_compile_options(libprotobuf PRIVATE \n        -Wno-deprecated-declarations \n        -Wno-invalid-noreturn \n        -Wno-unused-function\n    )\n    target_compile_options(libprotoc PRIVATE \n        -Wno-unused-private-field \n        -Wno-unused-function\n    )\n    target_compile_options(protoc PRIVATE \n        -Wno-unused-private-field \n        -Wno-unused-function\n    )\nelseif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n    target_compile_options(libprotobuf PRIVATE \n        -Wno-deprecated-declarations \n        -Wno-unused-function\n        -Wno-maybe-uninitialized\n        -Wno-sign-compare\n        -Wno-return-type\n        -Wno-stringop-overflow\n    )\n    target_compile_options(libprotoc PRIVATE \n        -Wno-unused-private-field \n        -Wno-unused-function\n        -Wno-unused-but-set-variable\n        -Wno-sign-compare\n    )\n    target_compile_options(protoc PRIVATE \n        -Wno-unused-private-field \n        -Wno-unused-function\n        -Wno-unused-but-set-variable\n        -Wno-sign-compare\n    )\nendif()\n\nget_target_property(libprotobuf_SOURCE_DIR libprotobuf SOURCE_DIR)\nget_filename_component(libprotobuf_INCLUDE_DIR ${libprotobuf_SOURCE_DIR}/../src ABSOLUTE)\n\nset(PROTOBUF_FOUND TRUE PARENT_SCOPE)\nset(PROTOBUF_INCLUDE_DIR ${libprotobuf_INCLUDE_DIR} PARENT_SCOPE)\nset(PROTOBUF_INCLUDE_DIRS ${libprotobuf_INCLUDE_DIR} PARENT_SCOPE)\n\nset(PROTOBUF_LIBRARY $<TARGET_FILE:libprotobuf> PARENT_SCOPE)\nset(PROTOBUF_LIBRARIES $<TARGET_FILE:libprotobuf> PARENT_SCOPE)\n\nset(PROTOBUF_LITE_LIBRARY $<TARGET_FILE:libprotobuf-lite> PARENT_SCOPE)\nset(PROTOBUF_LITE_LIBRARIES $<TARGET_FILE:libprotobuf-lite> PARENT_SCOPE)\n\nset(PROTOBUF_PROTOC_LIBRARY $<TARGET_FILE:libprotoc> PARENT_SCOPE)\nset(PROTOBUF_PROTOC_LIBRARIES $<TARGET_FILE:libprotoc> PARENT_SCOPE)\nset(PROTOBUF_PROTOC_EXECUTABLE $<TARGET_FILE:protoc> PARENT_SCOPE)\n"
  },
  {
    "path": "thirdparty/rocksdb/CMakeLists.txt",
    "content": "set(ROCKSDB_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb-8.1.1)\nif (ANDROID)\n    set(ROCKSDB_ANDROID_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb.android.patch)\n    apply_patch_once(\"rocksdb_android_fix\" \"${ROCKSDB_SRC_DIR}\" \"${ROCKSDB_ANDROID_PATCH}\")\nendif()\n\nset(ROCKSDB_BUILD_SHARED OFF CACHE BOOL \"Disable install in rocksdb\" FORCE)\n\nset(WITH_TESTS OFF CACHE BOOL \"Disable testing in rocksdb\" FORCE)\nset(WITH_ALL_TESTS OFF CACHE BOOL \"Build all test, rather than a small subset\" FORCE)\nset(WITH_BENCHMARK_TOOLS OFF CACHE BOOL \"Disable benchmarks in rocksdb\" FORCE)\nset(WITH_CORE_TOOLS OFF CACHE BOOL \"build with ldb and sst_dump\" FORCE)\nset(WITH_TOOLS OFF CACHE BOOL \"build with tools\" FORCE)\nset(WITH_LZ4 ON CACHE BOOL \"build with lz4\" FORCE)\nset(USE_RTTI ON CACHE BOOL \"build with RTTI\" FORCE)\nset(FAIL_ON_WARNINGS OFF CACHE BOOL \"build with no Werror\" FORCE)\nset(PORTABLE ON CACHE BOOL \"build a portable lib\" FORCE)\n\nset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})\nadd_subdirectory(rocksdb-8.1.1)\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nunset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n\nget_target_property(rocksdb_SOURCE_DIR rocksdb SOURCE_DIR)\nset(ROCKSDB_INCLUDE_DIR ${rocksdb_SOURCE_DIR}/include)\n\ntarget_include_directories(rocksdb PUBLIC $<BUILD_INTERFACE:${ROCKSDB_INCLUDE_DIR}>)\nadd_dependencies(rocksdb Lz4.BUILD)\n"
  },
  {
    "path": "thirdparty/rocksdb/rocksdb.android.patch",
    "content": "diff --git a/env/io_posix.cc b/env/io_posix.cc\nindex 0ec0e9c83..a78ac5a13 100644\n--- a/env/io_posix.cc\n+++ b/env/io_posix.cc\n@@ -27,7 +27,7 @@\n #include <sys/mman.h>\n #include <sys/stat.h>\n #include <sys/types.h>\n-#ifdef OS_LINUX\n+#if defined(OS_LINUX) || defined(OS_ANDROID)\n #include <sys/statfs.h>\n #include <sys/sysmacros.h>\n #endif\n"
  },
  {
    "path": "thirdparty/sparsehash/CMakeLists.txt",
    "content": "set(SPARSE_SRC \"${CMAKE_CURRENT_SOURCE_DIR}/sparseconfig.h\")\nset(SPARSE_DST \"${CMAKE_CURRENT_SOURCE_DIR}/sparsehash-2.0.4/src/sparsehash/internal/sparseconfig.h\")\nget_filename_component(DESTINATION_DIR \"${SPARSE_DST}\" DIRECTORY)\nif(NOT EXISTS \"${SPARSE_DST}\")\n    file(COPY \"${SPARSE_SRC}\" DESTINATION \"${DESTINATION_DIR}\")\nendif()\n\nadd_library(sparsehash INTERFACE)\ntarget_include_directories(\n    sparsehash INTERFACE \"${CMAKE_CURRENT_SOURCE_DIR}/sparsehash-2.0.4/src/\"\n  )\n"
  },
  {
    "path": "thirdparty/sparsehash/sparseconfig.h",
    "content": "/*\n * NOTE: This file is for internal use only.\n *       Do not use these #defines in your own program!\n */\n\n/* Namespace for Google classes */\n#define GOOGLE_NAMESPACE ::google\n\n/* the location of the header defining hash functions */\n#define HASH_FUN_H <functional>\n\n/* the namespace of the hash<> function */\n#define HASH_NAMESPACE std\n\n/* Define to 1 if you have the <inttypes.h> header file. */\n#define HAVE_INTTYPES_H 1\n\n/* Define to 1 if the system has the type `long long'. */\n#define HAVE_LONG_LONG 1\n\n/* Define to 1 if you have the `memcpy' function. */\n#define HAVE_MEMCPY 1\n\n/* Define to 1 if you have the <stdint.h> header file. */\n#define HAVE_STDINT_H 1\n\n/* Define to 1 if you have the <sys/types.h> header file. */\n#define HAVE_SYS_TYPES_H 1\n\n/* Define to 1 if the system has the type `uint16_t'. */\n#define HAVE_UINT16_T 1\n\n/* Define to 1 if the system has the type `u_int16_t'. */\n#define HAVE_U_INT16_T 1\n\n/* Define to 1 if the system has the type `__uint16'. */\n/* #undef HAVE___UINT16 */\n\n/* The system-provided hash function including the namespace. */\n#define SPARSEHASH_HASH HASH_NAMESPACE::hash\n\n/* Stops putting the code inside the Google namespace */\n#define _END_GOOGLE_NAMESPACE_ }\n\n/* Puts following code inside the Google namespace */\n#define _START_GOOGLE_NAMESPACE_ namespace google {\n"
  },
  {
    "path": "thirdparty/yaml-cpp/CMakeLists.txt",
    "content": "set(YAML_CPP_BUILD_TESTS OFF CACHE BOOL \"Disable testing in yaml-cpp\" FORCE)\nset(YAML_CPP_BUILD_TOOLS OFF CACHE BOOL \"Disable parse tools in yaml-cpp\" FORCE)\nset(YAML_CPP_BUILD_CONTRIB OFF CACHE BOOL \"Disable contrib stuff in yaml-cpp\" FORCE)\n\nset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})\nadd_subdirectory(yaml-cpp-0.6.3)\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})\nunset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)\n\nif(CMAKE_CXX_COMPILER_ID MATCHES \"Clang\")\n    target_compile_options(yaml-cpp PRIVATE -Wno-shadow)\nelseif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n    target_compile_options(yaml-cpp PRIVATE -Wno-effc++)\nendif()"
  },
  {
    "path": "tools/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\n# Retrieve version from git repository\ngit_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR})\n\n# Add repository\ncc_directory(core)"
  },
  {
    "path": "tools/core/CMakeLists.txt",
    "content": "include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)\ninclude(${PROJECT_ROOT_DIR}/cmake/option.cmake)\n\ncc_binary(\n    NAME txt2vecs\n    STRICT PACKED\n    SRCS txt2vecs.cc\n    INCS ${PROJECT_ROOT_DIR}/src/core/\n    LIBS gflags core_framework zvec_ailego\n  )\n\ncc_binary(\n    NAME local_builder\n    STRICT PACKED\n    SRCS local_builder.cc\n    INCS ${PROJECT_ROOT_DIR}/src/core/\n    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf core_interface\n  )\n\ncc_binary(\n    NAME recall\n    STRICT PACKED\n    SRCS recall.cc\n    INCS ${PROJECT_ROOT_DIR}/src/core/\n    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface\n  )\n\ncc_binary(\n    NAME bench\n    STRICT PACKED\n    SRCS bench.cc\n    INCS ${PROJECT_ROOT_DIR}/src/core/\n    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface\n)\n\n\ncc_binary(\n    NAME recall_original\n    STRICT PACKED\n    SRCS recall_original.cc flow.cc\n    INCS ${PROJECT_ROOT_DIR}/src/core/\n    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface\n)\n\ncc_binary(\n    NAME bench_original\n    STRICT PACKED\n    SRCS bench_original.cc flow.cc\n    INCS ${PROJECT_ROOT_DIR}/src/core/\n    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface\n)\n\ncc_binary(\n        NAME local_builder_original\n        STRICT PACKED\n        SRCS local_builder_original.cc\n        INCS ${PROJECT_ROOT_DIR}/src/core/\n        LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf core_interface\n)\n"
  },
  {
    "path": "tools/core/README.md",
    "content": "\n# Benchmarking scripts\n\nThis directory contains benchmarking scripts and reproducing steps.\n\n## COHERE experiments\n\n### Getting COHERE Data\n\nPlease download the COHERE 10M dataset to cohere_large_10m as follows:\n\n```bash\n... ...           \nneighbors.parquet    \nshuffle_train-00-of-10.parquet     \nshuffle_train-01-of-10.parquet          \nshuffle_train-02-of-10.parquet  \nshuffle_train-03-of-10.parquet \nshuffle_train-04-of-10.parquet  \nshuffle_train-05-of-10.parquet \nshuffle_train-06-of-10.parquet\nshuffle_train-07-of-10.parquet\nshuffle_train-08-of-10.parquet\nshuffle_train-09-of-10.parquet\nscalar_labels.parquet     \ntest.parquet      \n```\n\nFor convenience, we prepared a docker image with cohere bench datasets: registry.cn-hongkong.cr.aliyuncs.com/zvec/cohere-bench-data. \n\nYou can run a container as follows:\n\n```bash\ndocker run -it --net=host -d -e DEBUG_MODE=true  --user root --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /home/zvec/:/home/zvec/  -w /home/zvec --name=cohere_bench zvec-registry.cn-hongkong.cr.aliyuncs.com/zvec/cohere-bench-data:0.0.1 bash\n\ndocker exec -it cohere_bench bash\n```\n\nThe datasets locate at /tmp/cohere/\n\n### Preparing Environment \nClone code and init:\n```bash\n$ git clone git@github.com:alibaba/zvec.git\n$ cd zvec\n$ git submodule update --init\n```\n\nBuild source code:\n```\n$ cd /home/zvec/workspace/zvec\n$ mkdir build\n$ cd build  \n$ cmake -DENABLE_SKYLAKE=ON -DCMAKE_BUILD_TYPE=Release ..\n```\n\n### Converting Dataset \nExport vector data using python script:\n```bash\n$ mkdir 10m.output\n$ python3 convert_cohere_parquet.py\n```\n\nConvert vector data to binary formatted file.\n```bash\n/home/zvec/workspace/zvec/bin/txt2vecs -input=cohere_train_vector_10m.txt --output=cohere_train_vector_10m.zvec.vecs --dimension=768\n```\n\nWe've also prepared preprocessed binary formatted files, which can be found in the container below:\n\n```bash\nroot@iZj6caifjouj5yu8xgsiysZ:/home/zvec# ls -al /tmp/cohere/*zvec \n/tmp/cohere/cohere_large_10m_zvec:\ntotal 30204572\ndrwxr-xr-x 2 root root        4096 Feb  5 13:12 .\ndrwxr-xr-x 6 root root        4096 Feb  6 03:38 ..\n-rw-r--r-- 1 root root     8664837 Feb  5 13:06 cohere_test_vector_10m.1000.new.txt\n-rw-r--r-- 1 root root 30920004295 Feb  5 13:04 cohere_train_vector_10m.new.zvec.vecs\n-rw-r--r-- 1 root root      792835 Feb  5 13:05 neighbors.txt\n\n/tmp/cohere/cohere_medium_1m_zvec:\ntotal 3028688\ndrwxr-xr-x 2 root root       4096 Feb  5 13:14 .\ndrwxr-xr-x 6 root root       4096 Feb  6 03:38 ..\n-rw-r--r-- 1 root root    8661108 Feb  5 13:07 cohere_test_vector_1m.1000.new.txt\n-rw-r--r-- 1 root root 3092004295 Feb  5 13:08 cohere_train_vector_1m.new.zvec.vecs\n-rw-r--r-- 1 root root     692969 Feb  5 13:08 neighbors.txt\n```\n\n### Preparing Bench Config \nPrepare Build Config\n\n```yaml\nBuilderCommon:\n    BuilderClass: HnswStreamer\n    BuildFile: /tmp/cohere/cohere_large_10m_zvec/cohere_train_vector_10m.zvec.vecs\n    NeedTrain: true \n    TrainFile: /tmp/cohere/cohere_large_10m_zvec/cohere_train_vector_10m.zvec.vecs\n    DumpPath:  /home/zvec/bench/config/cohere_train_vector_10m.dump.index\n    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.index\n\n    ConverterName: CosineInt8Converter\n    MetricName: Cosine\n\n    ThreadCount: 16\n\nBuilderParams: \n    proxima.general.builder.thread_count: !!int 16\n    proxima.hnsw.builder.thread_count: !!int 16\n```\n\nPrepare Search Config\n\n```yaml\nSearcherCommon:\n    SearcherClass: HnswStreamer\n    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.index\n    TopK: 1,10,50,100 \n    QueryFile: /tmp/cohere/cohere_large_10m_zvec/cohere_test_vector_1000.new.txt\n    QueryType: float \n    QueryFirstSep: \";\" \n    QuerySecondSep: \" \"\n    GroundTruthFile: /tmp/cohere/cohere_large_10m_zvec/neighbors.txt\n    RecallThreadCount: 1\n    BenchThreadCount: 16 \n    BenchIterCount: 1000000000 \n    CompareById: true\n\nSearcherParams: \n    proxima.hnsw.streamer.ef: !!int 250\n```\n\n### Building Index \nConduct Build \n```bash\n$ /home/zvec/workspace/zvec/build/bin/local_build_original ./build.yaml \n```\n\n### Performing Bench\nConduct Recall \n```bash\n$ /home/zvec/workspace/zvec/build/bin/recall_original ./search.yaml\n```\n\nConduct Bench \n```bash\n$ /home/zvec/workspace/zvec/build/bin/bench_original ./search.yaml\n```\n\n\n"
  },
  {
    "path": "tools/core/bench.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"bench_result.h\"\n#include \"helper.h\"\n\nstatic bool g_debug_mode = 0;\n\n//------------------------------------------------------------\n// Bench\n//------------------------------------------------------------\nenum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };\n\nenum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };\n\ntemplate <typename T>\nclass Bench {\n public:\n  Bench(size_t threads, size_t bench_secs, size_t batch_count,\n        RetrievalMode &retrieval_mode, FilterMode filter_mode)\n      : threads_(threads),\n        bench_secs_(bench_secs),\n        batch_count_(batch_count),\n        retrieval_mode_{retrieval_mode},\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(false);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, false);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    vector<vector<T>> queries;\n    vector<SparseData<T>> sparse_data;\n    vector<vector<uint64_t>> taglists;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, queries,\n                           sparse_data, taglists)) {\n      LOG_ERROR(\"Load query error\");\n      return false;\n    }\n\n    if (batch_count_ == 1) {\n      batch_queries_ = queries;\n\n      for (size_t i = 0; i < sparse_data.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(sparse_data[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(sparse_data[i].indices);\n        batch_sparse_features_.push_back(sparse_data[i].features);\n      }\n\n      for (size_t i = 0; i < taglists.size(); ++i) {\n        vector<vector<uint64_t>> new_taglists;\n        new_taglists.push_back(taglists[i]);\n\n        batch_taglists_.push_back(std::move(new_taglists));\n      }\n    } else {\n      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<T> batch_query;\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n        vector<vector<uint64_t>> batch_taglists;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          for (size_t k = 0; k < queries[idx].size(); ++k) {\n            batch_query.push_back(queries[idx][k]);\n          }\n\n          batch_sparse_count.push_back(sparse_data[idx].count);\n\n          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {\n            batch_sparse_feature.push_back(sparse_data[idx].features[k]);\n          }\n\n          if (taglists.size() > idx) {\n            batch_taglists.push_back(taglists[idx]);\n          }\n\n          idx = (idx + 1) % queries.size();\n        }\n\n        batch_queries_.push_back(batch_query);\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n        batch_taglists_.push_back(batch_taglists);\n      }\n    }\n\n    dim_ = queries[0].size();\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim_);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim_);\n    } else {\n      LOG_ERROR(\"unsupported type\");\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  void run(core_interface::Index::Pointer index,\n           core_interface::BaseIndexQueryParam::Pointer query_param,\n           int max_iter, int topk) {\n    // Check\n    if (batch_queries_.size() == 0) {\n      return;\n    }\n\n    query_param_ = query_param;\n    query_param_->topk = topk;\n    query_param_->is_linear = false;\n\n    // Do bench\n    signal(SIGINT, stop);\n    bench_result_.mark_start();\n    auto start_time = Monotime::MilliSeconds();\n    for (size_t i = 0; i < threads_; ++i) {\n      pool_->execute(this, &Bench<T>::start_bench, index, max_iter, &STOP_NOW);\n    }\n\n    while (!pool_->is_finished()) {\n      this_thread::sleep_for(chrono::milliseconds(1));\n      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {\n        STOP_NOW = true;\n      }\n    }\n\n    pool_->wait_finish();\n\n    bench_result_.mark_end();\n    bench_result_.print();\n  }\n\n  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,\n                     const std::vector<uint64_t> &tag_key_list) {\n    id_to_tags_list_ = id_to_tags_list;\n    tag_key_list_ = tag_key_list;\n  }\n\n private:\n  void start_bench(core_interface::Index::Pointer index, size_t max_iter,\n                   const bool *is_stop) {\n    size_t thread_index = pool_->indexof_this();\n\n    size_t i = thread_index;\n    for (; i < max_iter && !*is_stop; i += threads_) {\n      int idx = i % batch_queries_.size();\n\n      // prefilter\n      FilterResultCache filter_cache;\n      std::shared_ptr<IndexFilter> filter_ptr = nullptr;\n      if (filter_mode_ == FM_TAG) {\n        if (batch_taglists_[idx].size() != 1) {\n          LOG_ERROR(\"query tag list not equal to one!\");\n          return;\n        }\n\n        int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],\n                                      tag_key_list_);\n        if (ret != 0) {\n          LOG_ERROR(\"prefilter failed, idx: %d\", idx);\n          return;\n        }\n\n        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n        filter_ptr = std::make_shared<IndexFilter>();\n        filter_ptr->set(filterFunc);\n      }\n\n      auto query_param = query_param_->Clone();\n      query_param->filter = filter_ptr;\n\n\n      // Do knn_search\n      uint64_t start = Monotime::MicroSeconds();\n      int ret;\n      if (retrieval_mode_ == RM_DENSE) {\n        if (batch_count_ == 1) {\n          ret = do_knn_search<T>(index, batch_queries_[idx], query_param);\n        } else {\n          ret = do_knn_search_batch<T>(index, batch_queries_[idx], query_param);\n        }\n\n        if (ret != 0) {\n          LOG_ERROR(\"Failed to knn search, ret=%d %s\", ret,\n                    IndexError::What(ret));\n          return;\n        }\n      } else {\n        std::string mode = retrieval_mode_ == 1 ? \"Dense\" : \"Sparse\";\n        LOG_ERROR(\"unsupported retrieval mode: %s\", mode.c_str());\n      }\n\n      uint64_t end = Monotime::MicroSeconds();\n\n      // Do sample\n      bench_result_.add_time(batch_count_, end - start);\n    }\n  }\n\n  template <typename U>\n  typename std::enable_if<\n      std::is_same<float, U>::value || std::is_same<int8_t, U>::value ||\n          std::is_same<uint32_t, U>::value || std::is_same<uint64_t, U>::value,\n      int>::type\n  do_knn_search(core_interface::Index::Pointer index, const vector<U> &query,\n                core_interface::BaseIndexQueryParam::Pointer query_param) {\n    core_interface::DenseVector dense_query;\n    dense_query.data = query.data();\n    core_interface::VectorData query_data;\n    query_data.vector = dense_query;\n\n    core_interface::SearchResult search_result;\n    int ret = index->Search(query_data, query_param, &search_result);\n    if (ret < 0) {\n      return ret;\n    }\n\n    if (search_result.doc_list_.empty()) {\n      LOG_ERROR(\"Search results is empty\");\n    }\n\n    return 0;\n  }\n\n  template <typename U>\n  typename std::enable_if<\n      std::is_same<float, U>::value || std::is_same<int8_t, U>::value ||\n          std::is_same<uint32_t, U>::value || std::is_same<uint64_t, U>::value,\n      int>::type\n  do_knn_search_batch(\n      core_interface::Index::Pointer index, const vector<U> &query,\n      core_interface::BaseIndexQueryParam::Pointer query_param) {\n    // For batch search, we search each query separately\n    size_t qnum = query.size() / dim_;\n    for (size_t i = 0; i < qnum; ++i) {\n      core_interface::DenseVector dense_query;\n      dense_query.data = query.data() + i * dim_;\n      core_interface::VectorData query_data;\n      query_data.vector = dense_query;\n\n      core_interface::SearchResult search_result;\n      int ret = index->Search(query_data, query_param, &search_result);\n      if (ret < 0) {\n        return ret;\n      }\n\n      if (search_result.doc_list_.empty()) {\n        LOG_ERROR(\"Search results is empty for batch query %zu\", i);\n      }\n    }\n\n    return 0;\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  size_t bench_secs_;\n  size_t batch_count_;\n  size_t dim_;\n  shared_ptr<ThreadPool> pool_;\n  core_interface::BaseIndexQueryParam::Pointer query_param_;\n\n  vector<vector<T>> batch_queries_;\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  // Tag lists for filtering\n  std::vector<std::vector<uint64_t>> id_to_tags_list_;\n  std::vector<uint64_t> tag_key_list_;\n\n  BenchResult bench_result_;\n  RetrievalMode retrieval_mode_{RM_UNDEFINED};\n  FilterMode filter_mode_{FM_NONE};\n  static bool STOP_NOW;\n};\n\ntemplate <typename T>\nbool Bench<T>::STOP_NOW = false;\n\n//------------------------------------------------------------\n// Sparse Bench\n//------------------------------------------------------------\ntemplate <typename T>\nclass SparseBench {\n public:\n  SparseBench(size_t threads, size_t bench_secs, size_t batch_count,\n              FilterMode filter_mode)\n      : threads_(threads),\n        bench_secs_(bench_secs),\n        batch_count_(batch_count),\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(false);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, false);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    vector<vector<T>> queries;\n    vector<SparseData<T>> sparse_data;\n    vector<vector<uint64_t>> taglists;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, queries,\n                           sparse_data, taglists)) {\n      LOG_ERROR(\"Load query error\");\n      return false;\n    }\n\n    linear_sparse_data_ = sparse_data;\n\n    if (batch_count_ == 1) {\n      for (size_t i = 0; i < sparse_data.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(sparse_data[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(sparse_data[i].indices);\n        batch_sparse_features_.push_back(sparse_data[i].features);\n      }\n\n      for (size_t i = 0; i < taglists.size(); ++i) {\n        vector<vector<uint64_t>> new_taglists;\n        new_taglists.push_back(taglists[i]);\n\n        batch_taglists_.push_back(std::move(new_taglists));\n      }\n    } else {\n      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n        vector<vector<uint64_t>> batch_taglists;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          batch_sparse_count.push_back(sparse_data[idx].count);\n\n          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {\n            batch_sparse_feature.push_back(sparse_data[idx].features[k]);\n          }\n\n          if (taglists.size() > idx) {\n            batch_taglists.push_back(taglists[idx]);\n          }\n\n          idx = (idx + 1) % queries.size();\n        }\n\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n        batch_taglists_.push_back(batch_taglists);\n      }\n    }\n\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);\n    } else {\n      LOG_ERROR(\"unsupported type\");\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  void run(core_interface::Index::Pointer index,\n           core_interface::BaseIndexQueryParam::Pointer query_param,\n           int max_iter, int topk) {\n    // Check\n    if (batch_sparse_counts_.size() == 0) {\n      return;\n    }\n\n    query_param_ = query_param;\n    query_param_->topk = topk;\n    query_param_->is_linear = false;\n\n    // Do bench\n    signal(SIGINT, stop);\n    bench_result_.mark_start();\n    auto start_time = Monotime::MilliSeconds();\n    for (size_t i = 0; i < threads_; ++i) {\n      pool_->execute(this, &SparseBench<T>::start_bench, index, max_iter,\n                     &STOP_NOW);\n    }\n\n    while (!pool_->is_finished()) {\n      this_thread::sleep_for(chrono::milliseconds(1));\n      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {\n        STOP_NOW = true;\n      }\n    }\n\n    pool_->wait_finish();\n\n    bench_result_.mark_end();\n    bench_result_.print();\n  }\n\n  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,\n                     const std::vector<uint64_t> &tag_key_list) {\n    id_to_tags_list_ = id_to_tags_list;\n    tag_key_list_ = tag_key_list;\n  }\n\n private:\n  void start_bench(core_interface::Index::Pointer index, size_t max_iter,\n                   const bool *is_stop) {\n    size_t thread_index = pool_->indexof_this();\n\n    size_t i = thread_index;\n    size_t sparse_query_size = batch_sparse_indices_.size();\n    for (; i < max_iter && !*is_stop; i += threads_) {\n      int idx = i % sparse_query_size;\n\n      // prefilter\n      FilterResultCache filter_cache;\n      std::shared_ptr<IndexFilter> filter_ptr = nullptr;\n      if (filter_mode_ == FM_TAG) {\n        if (batch_taglists_[idx].size() != 1) {\n          LOG_ERROR(\"query tag list not equal to one!\");\n          return;\n        }\n\n        int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],\n                                      tag_key_list_);\n        if (ret != 0) {\n          LOG_ERROR(\"prefilter failed, idx: %d\", idx);\n          return;\n        }\n\n        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n        filter_ptr = std::make_shared<IndexFilter>();\n        filter_ptr->set(filterFunc);\n      }\n\n      auto query_param = query_param_->Clone();\n      query_param->filter = filter_ptr;\n\n      // Do knn_search\n      uint64_t start = Monotime::MicroSeconds();\n      int ret;\n      if (batch_count_ == 1) {\n        if (batch_sparse_counts_[idx].size() != 1) {\n          LOG_ERROR(\"Sparse count size should be 1, since batch count is 1\");\n          return;\n        }\n        ret = do_knn_search<T>(index, batch_sparse_counts_[idx][0],\n                               batch_sparse_indices_[idx],\n                               batch_sparse_features_[idx], query_param);\n      } else {\n        ret = do_knn_search_batch<T>(\n            index, batch_sparse_counts_[idx], batch_sparse_indices_[idx],\n            batch_sparse_features_[idx], idx, query_param);\n      }\n\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to sparse knn search, ret=%d %s\", ret,\n                  IndexError::What(ret));\n        return;\n      }\n\n      uint64_t end = Monotime::MicroSeconds();\n\n      // Do sample\n      bench_result_.add_time(batch_count_, end - start);\n    }\n  }\n\n  // sparse search - single query\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(core_interface::Index::Pointer index,\n                const uint32_t sparse_count,\n                const vector<uint32_t> &sparse_indices,\n                const vector<U> &sparse_feature,\n                core_interface::BaseIndexQueryParam::Pointer query_param) {\n    core_interface::SparseVector sparse_query;\n    sparse_query.count = sparse_count;\n    sparse_query.indices = sparse_indices.data();\n    sparse_query.values = sparse_feature.data();\n    core_interface::VectorData query_data;\n    query_data.vector = sparse_query;\n\n    core_interface::SearchResult search_result;\n    int ret = index->Search(query_data, query_param, &search_result);\n    if (ret < 0) {\n      return ret;\n    }\n\n    if (search_result.doc_list_.empty()) {\n      LOG_ERROR(\"Search results is empty\");\n    }\n\n    return 0;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value ||\n                              std::is_same<uint32_t, U>::value ||\n                              std::is_same<uint64_t, U>::value,\n                          int>::type\n  do_knn_search(core_interface::Index::Pointer /*index*/,\n                const uint32_t /*sparse_count*/,\n                const vector<uint32_t> & /*sparse_indices*/,\n                const vector<U> & /*sparse_feature*/,\n                core_interface::BaseIndexQueryParam::Pointer /*query_param*/) {\n    return IndexError_Unsupported;\n  }\n\n  // sparse search - batch\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search_batch(\n      core_interface::Index::Pointer index,\n      const vector<uint32_t> &sparse_count,\n      const vector<uint32_t> & /*sparse_indices*/,\n      const vector<U> & /*sparse_feature*/, size_t batch_idx,\n      core_interface::BaseIndexQueryParam::Pointer query_param) {\n    // For batch search, search each query separately\n    for (size_t i = 0; i < sparse_count.size(); ++i) {\n      size_t query_idx = batch_idx * batch_count_ + i;\n      if (query_idx >= linear_sparse_data_.size()) {\n        break;\n      }\n\n      const auto &single_sparse = linear_sparse_data_[query_idx];\n      core_interface::SparseVector sparse_query;\n      sparse_query.count = single_sparse.count;\n      sparse_query.indices = single_sparse.indices.data();\n      sparse_query.values = single_sparse.features.data();\n      core_interface::VectorData query_data;\n      query_data.vector = sparse_query;\n\n      core_interface::SearchResult search_result;\n      int ret = index->Search(query_data, query_param, &search_result);\n      if (ret < 0) {\n        return ret;\n      }\n\n      if (search_result.doc_list_.empty()) {\n        LOG_ERROR(\"Search results is empty for batch query %zu\", i);\n      }\n    }\n\n    return 0;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value ||\n                              std::is_same<uint32_t, U>::value ||\n                              std::is_same<uint64_t, U>::value,\n                          int>::type\n  do_knn_search_batch(\n      core_interface::Index::Pointer /*index*/,\n      const vector<uint32_t> & /*sparse_count*/,\n      const vector<uint32_t> & /*sparse_indices*/,\n      const vector<U> & /*sparse_feature*/, size_t /*batch_idx*/,\n      core_interface::BaseIndexQueryParam::Pointer /*query_param*/) {\n    return IndexError_Unsupported;\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  size_t bench_secs_;\n  size_t batch_count_;\n  core_interface::BaseIndexQueryParam::Pointer query_param_;\n  shared_ptr<ThreadPool> pool_;\n\n  vector<SparseData<T>> linear_sparse_data_;\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  // Tag lists for filtering\n  std::vector<std::vector<uint64_t>> id_to_tags_list_;\n  std::vector<uint64_t> tag_key_list_;\n\n  FilterMode filter_mode_{FM_NONE};\n  BenchResult bench_result_;\n  static bool STOP_NOW;\n};\ntemplate <typename T>\nbool SparseBench<T>::STOP_NOW = false;\n\nbool check_config(YAML::Node &config_node) {\n  auto common = config_node[\"IndexCommon\"];\n  if (!common) {\n    LOG_ERROR(\"Can not find [IndexCommon] in config\");\n    return false;\n  }\n  if (!common[\"IndexConfig\"]) {\n    LOG_ERROR(\"Can not find [IndexConfig] in config\");\n    return false;\n  }\n  if (!common[\"IndexPath\"]) {\n    LOG_ERROR(\"Can not find [IndexPath] in config\");\n    return false;\n  }\n  if (!common[\"TopK\"]) {\n    LOG_ERROR(\"Can not find [TopK] in config\");\n    return false;\n  }\n  if (!common[\"QueryFile\"]) {\n    LOG_ERROR(\"Can not find [QueryFile] in config\");\n    return false;\n  }\n\n  auto query_config = config_node[\"QueryConfig\"];\n  if (!query_config) {\n    LOG_ERROR(\"Can not find [QueryConfig] in config\");\n    return false;\n  }\n  if (!query_config[\"QueryParam\"]) {\n    LOG_ERROR(\"Can not find [QueryConfig.QueryParam] in config\");\n    return false;\n  }\n\n\n  return true;\n}\n\nvoid usage(void) {\n  cout << \"Usage: bench CONFIG.yaml [plugin file path]\" << endl;\n}\n\n\nint bench(std::string &query_type, size_t thread_count, size_t batch_count,\n          size_t top_k, string query_file, string &first_sep,\n          string &second_sep, size_t bench_secs, size_t iter_count,\n          core_interface::Index::Pointer index,\n          core_interface::BaseIndexQueryParam::Pointer query_param,\n          string &index_dir, RetrievalMode retrieval_mode,\n          FilterMode filter_mode) {\n  if (filter_mode == FM_TAG && batch_count > 1) {\n    LOG_ERROR(\"filter mode can not be run in batch mode\");\n    return -1;\n  }\n\n  std::vector<std::vector<uint64_t>> id_to_tags_list;\n  std::vector<uint64_t> tag_key_list;\n  // Load tag lists if available\n  load_taglists(index_dir, id_to_tags_list, tag_key_list);\n\n  if (query_type == \"float\") {\n    Bench<float> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                       filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    bench.set_tag_lists(id_to_tags_list, tag_key_list);\n    bench.run(index, query_param, iter_count, top_k);\n  } else if (query_type == \"int8\") {\n    Bench<int8_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                        filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    bench.set_tag_lists(id_to_tags_list, tag_key_list);\n    bench.run(index, query_param, iter_count, top_k);\n  } else if (query_type == \"binary\") {\n    Bench<uint32_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                          filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    bench.set_tag_lists(id_to_tags_list, tag_key_list);\n    bench.run(index, query_param, iter_count, top_k);\n  } else if (query_type == \"binary64\") {\n    Bench<uint64_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                          filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    bench.set_tag_lists(id_to_tags_list, tag_key_list);\n    bench.run(index, query_param, iter_count, top_k);\n  } else {\n    LOG_ERROR(\"Can not recognize type: %s\", query_type.c_str());\n  }\n\n  return 0;\n}\n\nint bench_sparse(std::string &query_type, size_t thread_count,\n                 size_t batch_count, size_t top_k, string query_file,\n                 string &first_sep, string &second_sep, size_t bench_secs,\n                 size_t iter_count, core_interface::Index::Pointer index,\n                 core_interface::BaseIndexQueryParam::Pointer query_param,\n                 string &index_dir, FilterMode filter_mode) {\n  if (filter_mode == FM_TAG && batch_count > 1) {\n    LOG_ERROR(\"filter mode can not be run in batch mode\");\n    return -1;\n  }\n\n  std::vector<std::vector<uint64_t>> id_to_tags_list;\n  std::vector<uint64_t> tag_key_list;\n  // Load tag lists if available\n  load_taglists(index_dir, id_to_tags_list, tag_key_list);\n\n  if (query_type == \"float\") {\n    SparseBench<float> bench(thread_count, bench_secs, batch_count,\n                             filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    bench.set_tag_lists(id_to_tags_list, tag_key_list);\n    bench.run(index, query_param, iter_count, top_k);\n  } else if (query_type == \"int8\") {\n    SparseBench<int8_t> bench(thread_count, bench_secs, batch_count,\n                              filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    bench.set_tag_lists(id_to_tags_list, tag_key_list);\n    bench.run(index, query_param, iter_count, top_k);\n  } else {\n    LOG_ERROR(\"Can not recognize type: %s\", query_type.c_str());\n  }\n\n  return 0;\n}\n\nint main(int argc, char *argv[]) {\n  if (argc < 2) {\n    usage();\n    return -1;\n  }\n\n  IndexPluginBroker broker;\n  std::string error;\n  for (int i = 2; i < argc; ++i) {\n    if (!broker.emplace(argv[i], &error)) {\n      LOG_ERROR(\"Failed to load plugin: %s (%s)\", argv[i], error.c_str());\n      return -1;\n    }\n  }\n\n  YAML::Node config_node;\n  try {\n    config_node = YAML::LoadFile(argv[1]);\n  } catch (...) {\n    LOG_ERROR(\"Load YAML file[%s] failed!\", argv[1]);\n    return -1;\n  }\n\n  if (!check_config(config_node)) {\n    return -1;\n  }\n  auto config_common = config_node[\"IndexCommon\"];\n\n  map<string, int> LOG_LEVEL = {{\"debug\", IndexLogger::LEVEL_DEBUG},\n                                {\"info\", IndexLogger::LEVEL_INFO},\n                                {\"warn\", IndexLogger::LEVEL_WARN},\n                                {\"error\", IndexLogger::LEVEL_ERROR},\n                                {\"fatal\", IndexLogger::LEVEL_FATAL}};\n  string log_level = config_common[\"LogLevel\"]\n                         ? config_common[\"LogLevel\"].as<string>()\n                         : \"debug\";\n  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);\n  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {\n    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n  }\n\n  // Calculate Bench\n  size_t thread_count = config_common[\"BenchThreadCount\"]\n                            ? config_common[\"BenchThreadCount\"].as<uint64_t>()\n                            : 0;\n  size_t iter_count = config_common[\"BenchIterCount\"]\n                          ? config_common[\"BenchIterCount\"].as<uint64_t>()\n                          : 10000;\n  size_t batch_count = config_common[\"BenchBatchCount\"]\n                           ? config_common[\"BenchBatchCount\"].as<uint64_t>()\n                           : 0;\n  g_debug_mode = config_common[\"DebugMode\"]\n                     ? config_common[\"DebugMode\"].as<bool>()\n                     : false;\n  string topk_str = config_common[\"TopK\"].as<string>();\n\n  RetrievalMode retrieval_mode{RM_DENSE};\n  if (config_common[\"RetrievalMode\"]) {\n    std::string retrieval_mode_str =\n        config_common[\"RetrievalMode\"].as<string>();\n    if (retrieval_mode_str == \"dense\") {\n      retrieval_mode = RM_DENSE;\n    } else if (retrieval_mode_str == \"sparse\") {\n      retrieval_mode = RM_SPARSE;\n    }\n  }\n\n  FilterMode filter_mode{FM_NONE};\n  if (config_common[\"FilterMode\"]) {\n    std::string filter_mode_str = config_common[\"FilterMode\"].as<string>();\n    if (filter_mode_str == \"tag\") {\n      filter_mode = FM_TAG;\n    }\n  }\n\n  vector<int32_t> topk_values;\n  StringHelper::Split(topk_str, \",\", &topk_values);\n  size_t top_k = *topk_values.rbegin();\n  string query_file = config_common[\"QueryFile\"].as<string>();\n  string first_sep = config_common[\"QueryFirstSep\"]\n                         ? config_common[\"QueryFirstSep\"].as<string>()\n                         : \";\";\n  string second_sep = config_common[\"QuerySecondSep\"]\n                          ? config_common[\"QuerySecondSep\"].as<string>()\n                          : \" \";\n  string query_type = config_common[\"QueryType\"]\n                          ? config_common[\"QueryType\"].as<string>()\n                          : \"float\";\n  size_t bench_secs = config_common[\"BenchSecs\"]\n                          ? config_common[\"BenchSecs\"].as<uint64_t>()\n                          : 60;\n\n  string index_dir = config_common[\"IndexPath\"].as<string>();\n\n  core_interface::Index::Pointer index;\n  core_interface::BaseIndexQueryParam::Pointer query_param;\n  if (0 !=\n      parse_and_load_index_param(config_node, index_dir, index, query_param)) {\n    LOG_ERROR(\"Failed to parse and load index param\");\n    return -1;\n  }\n\n  if (retrieval_mode == RM_SPARSE) {\n    bench_sparse(query_type, thread_count, batch_count, top_k, query_file,\n                 first_sep, second_sep, bench_secs, iter_count, index,\n                 query_param, index_dir, filter_mode);\n\n    cout << \"Bench Sparse done.\" << endl;\n  } else {\n    bench(query_type, thread_count, batch_count, top_k, query_file, first_sep,\n          second_sep, bench_secs, iter_count, index, query_param, index_dir,\n          retrieval_mode, filter_mode);\n\n    cout << \"Bench done.\" << endl;\n  }\n\n  // Cleanup\n  index->Close();\n\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/bench_original.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <signal.h>\n#include <iostream>\n#include <ailego/container/bitmap.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"zvec/ailego/utility/string_helper.h\"\n#include \"zvec/core/framework/index_plugin.h\"\n#include \"zvec/core/interface/index_factory.h\"\n#include \"zvec/core/interface/index_param.h\"\n#include \"bench_result.h\"\n#include \"filter_result_cache.h\"\n#include \"flow.h\"\n#include \"txt_input_reader.h\"\n\n#ifdef __clang__\n#pragma clang diagnostic push\n#pragma clang diagnostic ignored \"-Wshadow\"\n#pragma clang diagnostic ignored \"-Wdeprecated-declarations\"\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wshadow\"\n#pragma GCC diagnostic ignored \"-Wdeprecated-declarations\"\n#endif\n\n#include <yaml-cpp/yaml.h>\n\n#ifdef __clang__\n#pragma clang diagnostic pop\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n\nusing namespace std;\nusing namespace zvec::core;\nusing namespace zvec::ailego;\n\nusing Flow = Flow;\n\nstatic bool g_debug_mode = 0;\n\n//------------------------------------------------------------\n// Bench\n//------------------------------------------------------------\nenum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };\n\nenum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };\n\ntemplate <typename T>\nclass Bench {\n public:\n  Bench(size_t threads, size_t bench_secs, size_t batch_count,\n        RetrievalMode &retrieval_mode, FilterMode filter_mode)\n      : threads_(threads),\n        bench_secs_(bench_secs),\n        batch_count_(batch_count),\n        retrieval_mode_{retrieval_mode},\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(false);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, false);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    vector<vector<T>> queries;\n    vector<SparseData<T>> sparse_data;\n    vector<vector<uint64_t>> taglists;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, queries,\n                           sparse_data, taglists)) {\n      cerr << \"Load query error\" << endl;\n      return false;\n    }\n\n    if (batch_count_ == 1) {\n      batch_queries_ = queries;\n\n      for (size_t i = 0; i < sparse_data.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(sparse_data[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(sparse_data[i].indices);\n        batch_sparse_features_.push_back(sparse_data[i].features);\n      }\n\n      for (size_t i = 0; i < taglists.size(); ++i) {\n        vector<vector<uint64_t>> new_taglists;\n        new_taglists.push_back(taglists[i]);\n\n        batch_taglists_.push_back(std::move(new_taglists));\n      }\n    } else {\n      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<T> batch_query;\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n        vector<vector<uint64_t>> batch_taglists;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          for (size_t k = 0; k < queries[idx].size(); ++k) {\n            batch_query.push_back(queries[idx][k]);\n          }\n\n          batch_sparse_count.push_back(sparse_data[idx].count);\n\n          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {\n            batch_sparse_feature.push_back(sparse_data[idx].features[k]);\n          }\n\n          if (taglists.size() > idx) {\n            batch_taglists.push_back(taglists[idx]);\n          }\n\n          idx = (idx + 1) % queries.size();\n        }\n\n        batch_queries_.push_back(batch_query);\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n        batch_taglists_.push_back(batch_taglists);\n      }\n    }\n\n    size_t dim = queries[0].size();\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim);\n    } else {\n      cerr << \"unsupported type\";\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  void run(Flow *flower, int max_iter, int topk) {\n    // Check\n    if (batch_queries_.size() == 0) {\n      return;\n    }\n\n    for (size_t i = 0; i < threads_; i++) {\n      contexts_.emplace_back(flower->create_context());\n      contexts_[i]->set_topk(topk);\n      contexts_[i]->set_debug_mode(g_debug_mode);\n    }\n\n    // Do bench\n    signal(SIGINT, stop);\n    bench_result_.mark_start();\n    auto start_time = Monotime::MilliSeconds();\n    for (size_t i = 0; i < threads_; ++i) {\n      pool_->execute(this, &Bench<T>::start_bench, flower, max_iter, &STOP_NOW);\n    }\n\n    while (!pool_->is_finished()) {\n      this_thread::sleep_for(chrono::milliseconds(1));\n      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {\n        STOP_NOW = true;\n      }\n    }\n\n    pool_->wait_finish();\n\n    bench_result_.mark_end();\n    bench_result_.print();\n\n    // for (size_t i = 0; i < threads_; i++) {\n    //   if (contexts_[i]->flow_context() != nullptr) {\n    //     std::cout << \"context id: \" << i << \": \\n\" <<\n    //     contexts_[i]->flow_context()->searcher_context()->profiler().display();\n    //   }\n    // }\n  }\n\n private:\n  void start_bench(Flow *flower, size_t max_iter, const bool *is_stop) {\n    size_t thread_index = pool_->indexof_this();\n\n    size_t i = thread_index;\n    for (; i < max_iter && !*is_stop; i += threads_) {\n      int idx = i % batch_queries_.size();\n\n      // prefilter\n      FilterResultCache filter_cache;\n      if (filter_mode_ == FM_TAG) {\n        if (batch_taglists_[idx].size() != 1) {\n          cerr << \"query tag list not equal to one!\" << endl;\n          return;\n        }\n\n        int ret = filter_cache.filter(flower->id_to_tags_list(),\n                                      batch_taglists_[idx][0],\n                                      flower->tag_key_list());\n        if (ret != 0) {\n          cerr << \"prefilter failed, idx: \" << idx << std::endl;\n\n          return;\n        }\n\n        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n        contexts_[thread_index]->set_filter(filterFunc);\n      }\n\n      // Do knn_search\n      uint64_t start = Monotime::MicroSeconds();\n      int ret;\n      if (retrieval_mode_ == RM_DENSE) {\n        if (batch_count_ == 1) {\n          ret = do_knn_search<T>(flower, contexts_[thread_index],\n                                 batch_queries_[idx]);\n        } else {\n          ret = do_knn_search<T>(flower, contexts_[thread_index],\n                                 batch_queries_[idx], batch_count_);\n        }\n\n        if (ret != 0) {\n          cerr << \"Failed to knn search, ret=\" << ret << endl;\n          return;\n        }\n      } else {\n        std::string mode = retrieval_mode_ == 1 ? \"Dense\" : \"Sparse\";\n        cerr << \"unsupported retrieval mode: \" << mode << endl;\n      }\n\n      uint64_t end = Monotime::MicroSeconds();\n\n      // Check result\n      auto &result = contexts_[thread_index]->result();\n      if (result.empty()) {\n        cerr << \"Search results is small than queries\" << endl;\n      }\n\n      // Do sample\n      bench_result_.add_time(batch_count_, end - start);\n    }\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn search\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  size_t bench_secs_;\n  size_t batch_count_;\n  shared_ptr<ThreadPool> pool_;\n  vector<Flow::Context::Pointer> contexts_;\n\n  vector<vector<T>> batch_queries_;\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  BenchResult bench_result_;\n  RetrievalMode retrieval_mode_{RM_UNDEFINED};\n  FilterMode filter_mode_{FM_NONE};\n  static bool STOP_NOW;\n};\n\ntemplate <typename T>\nbool Bench<T>::STOP_NOW = false;\n\n//------------------------------------------------------------\n// Sparse Bench\n//------------------------------------------------------------\ntemplate <typename T>\nclass SparseBench {\n public:\n  SparseBench(size_t threads, size_t bench_secs, size_t batch_count,\n              FilterMode filter_mode)\n      : threads_(threads),\n        bench_secs_(bench_secs),\n        batch_count_(batch_count),\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(false);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, false);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    vector<vector<T>> queries;\n    vector<SparseData<T>> sparse_data;\n    vector<vector<uint64_t>> taglists;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, queries,\n                           sparse_data, taglists)) {\n      cerr << \"Load query error\" << endl;\n      return false;\n    }\n\n    if (batch_count_ == 1) {\n      for (size_t i = 0; i < sparse_data.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(sparse_data[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(sparse_data[i].indices);\n        batch_sparse_features_.push_back(sparse_data[i].features);\n      }\n    } else {\n      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          batch_sparse_count.push_back(sparse_data[idx].count);\n\n          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {\n            batch_sparse_feature.push_back(sparse_data[idx].features[k]);\n          }\n\n          idx = (idx + 1) % queries.size();\n        }\n\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n      }\n    }\n\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);\n    } else {\n      cerr << \"unsupported type\";\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  void run(SparseFlow *flower, int max_iter, int topk) {\n    for (size_t i = 0; i < threads_; i++) {\n      contexts_.emplace_back(flower->create_context());\n      contexts_[i]->set_topk(topk);\n      contexts_[i]->set_debug_mode(g_debug_mode);\n    }\n\n    // Do bench\n    signal(SIGINT, stop);\n    bench_result_.mark_start();\n    auto start_time = Monotime::MilliSeconds();\n    for (size_t i = 0; i < threads_; ++i) {\n      pool_->execute(this, &SparseBench<T>::start_bench, flower, max_iter,\n                     &STOP_NOW);\n    }\n\n    while (!pool_->is_finished()) {\n      this_thread::sleep_for(chrono::milliseconds(1));\n      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {\n        STOP_NOW = true;\n      }\n    }\n\n    pool_->wait_finish();\n\n    bench_result_.mark_end();\n    bench_result_.print();\n  }\n\n private:\n  void start_bench(SparseFlow *flower, size_t max_iter, const bool *is_stop) {\n    size_t thread_index = pool_->indexof_this();\n\n    size_t i = thread_index;\n    size_t sparse_query_size = batch_sparse_indices_.size();\n    for (; i < max_iter && !*is_stop; i += threads_) {\n      int idx = i % sparse_query_size;\n\n      // prefilter\n      FilterResultCache filter_cache;\n      if (filter_mode_ == FM_TAG) {\n        if (batch_taglists_[idx].size() != 1) {\n          cerr << \"query tag list not equal to one!\" << endl;\n          return;\n        }\n\n        int ret = filter_cache.filter(flower->id_to_tags_list(),\n                                      batch_taglists_[idx][0],\n                                      flower->tag_key_list());\n        if (ret != 0) {\n          cerr << \"prefilter failed, idx: \" << idx << std::endl;\n\n          return;\n        }\n\n        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n        contexts_[thread_index]->set_filter(filterFunc);\n      }\n\n      // Do knn_search\n      uint64_t start = Monotime::MicroSeconds();\n      int ret;\n      if (batch_count_ == 1) {\n        if (batch_sparse_counts_[idx].size() != 1) {\n          cerr << \"Sparse count size should be 1, since batch count is 1\"\n               << endl;\n          return;\n        }\n        ret = do_knn_search<T>(\n            flower, contexts_[thread_index], batch_sparse_counts_[idx][0],\n            batch_sparse_indices_[idx], batch_sparse_features_[idx]);\n      } else {\n        ret = do_knn_search<T>(flower, contexts_[thread_index],\n                               batch_sparse_counts_[idx],\n                               batch_sparse_indices_[idx],\n                               batch_sparse_features_[idx], batch_count_);\n      }\n\n      if (ret != 0) {\n        cerr << \"Failed to sparse knn search, ret=\" << ret << endl;\n        return;\n      }\n\n      uint64_t end = Monotime::MicroSeconds();\n\n      // Check result\n      auto &result = contexts_[thread_index]->result();\n      if (result.empty()) {\n        cerr << \"Search results is small than queries\" << endl;\n      }\n\n      // Do sample\n      bench_result_.add_time(batch_count_, end - start);\n    }\n  }\n\n  // sparse search\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,\n                const vector<uint32_t> &sparse_count,\n                const vector<uint32_t> &sparse_indices,\n                const vector<U> &sparse_feature, size_t count) {\n    // Do sparse knn search\n    return flower->search_impl(sparse_count.data(), sparse_indices.data(),\n                               sparse_feature.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_knn_search(SparseFlow * /*flower*/,\n                SparseFlow::Context::Pointer & /*context*/,\n                const vector<uint32_t> & /*sparse_count*/,\n                const vector<uint32_t> & /*sparse_indices*/,\n                const vector<U> & /*sparse_feature*/, size_t /*count*/) {\n    // Do sparse knn search\n    return IndexError_Unsupported;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_knn_search(SparseFlow * /*flower*/,\n                SparseFlow::Context::Pointer & /*context*/,\n                const vector<uint32_t> & /*sparse_count*/,\n                const vector<uint32_t> & /*sparse_indices*/,\n                const vector<U> & /*sparse_feature*/, size_t /*count*/) {\n    // Do sparse knn search\n    return IndexError_Unsupported;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_knn_search(SparseFlow * /*flower*/,\n                SparseFlow::Context::Pointer & /*context*/,\n                const vector<uint32_t> & /*sparse_count*/,\n                const vector<uint32_t> & /*sparse_indices*/,\n                const vector<U> & /*sparse_feature*/, size_t /*count*/) {\n    // Do sparse knn search\n    return IndexError_Unsupported;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,\n                const uint32_t sparse_count,\n                const vector<uint32_t> &sparse_indices,\n                const vector<U> &sparse_feature) {\n    // Do sparse knn search\n    return flower->search_impl(sparse_count, sparse_indices.data(),\n                               sparse_feature.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_knn_search(SparseFlow * /*flower*/,\n                SparseFlow::Context::Pointer & /*context*/,\n                const uint32_t /*sparse_count*/,\n                const vector<uint32_t> & /*sparse_indices*/,\n                const vector<U> & /*sparse_feature*/) {\n    // Do sparse knn search\n    return IndexError_Unsupported;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_knn_search(SparseFlow * /*flower*/,\n                SparseFlow::Context::Pointer & /*context*/,\n                const uint32_t /*parse_count*/,\n                const vector<uint32_t> & /*sparse_indices*/,\n                const vector<U> & /*sparse_feature*/) {\n    // Do sparse knn search\n    return IndexError_Unsupported;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_knn_search(SparseFlow * /*flower*/,\n                SparseFlow::Context::Pointer & /*context*/,\n                const uint32_t /*sparse_count*/,\n                const vector<uint32_t> & /*sparse_indices*/,\n                const vector<U> & /*sparse_feature*/) {\n    // Do sparse knn search\n    return IndexError_Unsupported;\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  size_t bench_secs_;\n  size_t batch_count_;\n  shared_ptr<ThreadPool> pool_;\n  vector<SparseFlow::Context::Pointer> contexts_;\n\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  FilterMode filter_mode_{FM_NONE};\n  BenchResult bench_result_;\n  static bool STOP_NOW;\n};\ntemplate <typename T>\nbool SparseBench<T>::STOP_NOW = false;\n\n// do\nbool prepare_params(YAML::Node &&config_params, Params &params) {\n  cout << \"Parse params as blow:\" << endl;\n  for (auto it = config_params.begin(); it != config_params.end(); ++it) {\n    string tag = it->second.Tag();\n    if (tag == \"tag:yaml.org,2002:int\") {\n      int64_t val = it->second.as<int64_t>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:float\") {\n      float val = it->second.as<float>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:bool\") {\n      bool val = it->second.as<bool>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else {\n      if (it->second.IsScalar()) {\n        string val = it->second.as<string>();\n        params.set(it->first.as<string>(), val);\n        cout << it->first.as<string>() << \"=\" << val << endl;\n      } else if (it->second.IsMap()) {\n        Params sub_params;\n        auto sub_node = it->second;\n        if (!prepare_params(std::move(sub_node), sub_params)) {\n          cerr << \"parse params error with key[\" << it->first.as<string>()\n               << \"]\" << endl;\n          return false;\n        }\n        params.set(it->first.as<string>(), sub_params);\n      }\n    }\n  }\n  return true;\n}\n\nbool check_config(YAML::Node &config_node) {\n  auto common = config_node[\"SearcherCommon\"];\n  if (!common) {\n    cerr << \"Can not find [SearcherCommon] in config\" << endl;\n    return false;\n  }\n  if (!common[\"SearcherClass\"] && !common[\"SearcherConfig\"]) {\n    cerr << \"Can not find [SearcherClass] or [SearcherConfig] in config\"\n         << endl;\n    return false;\n  }\n  if (!common[\"IndexPath\"]) {\n    cerr << \"Can not find [IndexPath] in config\" << endl;\n    return false;\n  }\n  if (!common[\"TopK\"]) {\n    cerr << \"Can not find [TopK] in config\" << endl;\n    return false;\n  }\n  if (!common[\"QueryFile\"]) {\n    cerr << \"Can not find [QueryFile] in config\" << endl;\n    return false;\n  }\n  return true;\n}\n\nvoid usage(void) {\n  cout << \"Usage: bench CONFIG.yaml [plugin file path]\" << endl;\n}\n\nbool load_index(Flow &flower, string &index_dir) {\n  int ret = flower.load(index_dir);\n  if (0 != ret) {\n    cerr << \"Flow load failed with ret \" << ret << endl;\n    return false;\n  }\n  cout << \"Load index done!\" << endl;\n  return true;\n};\n\nint bench(std::string &query_type, size_t thread_count, size_t batch_count,\n          size_t top_k, string query_file, string &first_sep,\n          string &second_sep, size_t bench_secs, size_t iter_count,\n          Flow &flower, string &index_dir, RetrievalMode retrieval_mode,\n          FilterMode filter_mode) {\n  if (filter_mode == FM_TAG && batch_count > 1) {\n    cerr << \"filter mode can not be run in batch mode\" << endl;\n    return -1;\n  }\n\n  if (query_type == \"float\") {\n    Bench<float> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                       filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    if (load_index(flower, index_dir)) {\n      bench.run(&flower, iter_count, top_k);\n    } else {\n      return -1;\n    }\n  } else if (query_type == \"int8\") {\n    Bench<int8_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                        filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    if (load_index(flower, index_dir)) {\n      bench.run(&flower, iter_count, top_k);\n    } else {\n      return -1;\n    }\n  } else if (query_type == \"binary\") {\n    Bench<uint32_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                          filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    if (load_index(flower, index_dir)) {\n      bench.run(&flower, iter_count, top_k);\n    } else {\n      return -1;\n    }\n  } else if (query_type == \"binary64\") {\n    Bench<uint64_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,\n                          filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    if (load_index(flower, index_dir)) {\n      bench.run(&flower, iter_count, top_k);\n    } else {\n      return -1;\n    }\n  } else {\n    cerr << \"Can not recognize type: \" << query_type << endl;\n  }\n\n  return 0;\n}\n\nbool load_index(SparseFlow &flower, string &index_dir) {\n  int ret = flower.load(index_dir);\n  if (0 != ret) {\n    cerr << \"Flow load failed with ret \" << ret << endl;\n    return false;\n  }\n  cout << \"Load index done!\" << endl;\n  return true;\n};\n\nint bench_sparse(std::string &query_type, size_t thread_count,\n                 size_t batch_count, size_t top_k, string query_file,\n                 string &first_sep, string &second_sep, size_t bench_secs,\n                 size_t iter_count, SparseFlow &flower, string &index_dir,\n                 FilterMode filter_mode) {\n  if (filter_mode == FM_TAG && batch_count > 1) {\n    cerr << \"filter mode can not be run in batch mode\" << endl;\n    return -1;\n  }\n\n  if (query_type == \"float\") {\n    SparseBench<float> bench(thread_count, bench_secs, batch_count,\n                             filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    if (load_index(flower, index_dir)) {\n      bench.run(&flower, iter_count, top_k);\n    } else {\n      return -1;\n    }\n  } else if (query_type == \"int8\") {\n    SparseBench<int8_t> bench(thread_count, bench_secs, batch_count,\n                              filter_mode);\n    bench.load_query(query_file, first_sep, second_sep);\n    if (load_index(flower, index_dir)) {\n      bench.run(&flower, iter_count, top_k);\n    } else {\n      return -1;\n    }\n  } else {\n    cerr << \"Can not recognize type: \" << query_type << endl;\n  }\n\n  return 0;\n}\n\nint main(int argc, char *argv[]) {\n  if (argc < 2) {\n    usage();\n    return -1;\n  }\n\n  IndexPluginBroker broker;\n  std::string error;\n  for (int i = 2; i < argc; ++i) {\n    if (!broker.emplace(argv[i], &error)) {\n      cerr << \"Failed to load plugin: \" << argv[i] << \" (\" << error << \")\"\n           << endl;\n      return -1;\n    }\n  }\n\n  YAML::Node config_node;\n  try {\n    config_node = YAML::LoadFile(argv[1]);\n  } catch (...) {\n    cerr << \"Load YAML file[\" << argv[1] << \"] failed!\" << endl;\n    return -1;\n  }\n\n  if (!check_config(config_node)) {\n    return -1;\n  }\n  auto config_common = config_node[\"SearcherCommon\"];\n\n  map<string, int> LOG_LEVEL = {{\"debug\", IndexLogger::LEVEL_DEBUG},\n                                {\"info\", IndexLogger::LEVEL_INFO},\n                                {\"warn\", IndexLogger::LEVEL_WARN},\n                                {\"error\", IndexLogger::LEVEL_ERROR},\n                                {\"fatal\", IndexLogger::LEVEL_FATAL}};\n  string log_level = config_common[\"LogLevel\"]\n                         ? config_common[\"LogLevel\"].as<string>()\n                         : \"debug\";\n  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);\n  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {\n    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n  }\n\n  // Calculate Bench\n  size_t thread_count = config_common[\"BenchThreadCount\"]\n                            ? config_common[\"BenchThreadCount\"].as<uint64_t>()\n                            : 0;\n  size_t iter_count = config_common[\"BenchIterCount\"]\n                          ? config_common[\"BenchIterCount\"].as<uint64_t>()\n                          : 10000;\n  size_t batch_count = config_common[\"BenchBatchCount\"]\n                           ? config_common[\"BenchBatchCount\"].as<uint64_t>()\n                           : 0;\n  g_debug_mode = config_common[\"DebugMode\"]\n                     ? config_common[\"DebugMode\"].as<bool>()\n                     : false;\n  string topk_str = config_common[\"TopK\"].as<string>();\n\n  RetrievalMode retrieval_mode{RM_DENSE};\n  if (config_common[\"RetrievalMode\"]) {\n    std::string retrieval_mode_str =\n        config_common[\"RetrievalMode\"].as<string>();\n    if (retrieval_mode_str == \"dense\") {\n      retrieval_mode = RM_DENSE;\n    } else if (retrieval_mode_str == \"sparse\") {\n      retrieval_mode = RM_SPARSE;\n    }\n  }\n\n  FilterMode filter_mode{FM_NONE};\n  if (config_common[\"FilterMode\"]) {\n    std::string filter_mode_str = config_common[\"FilterMode\"].as<string>();\n    if (filter_mode_str == \"tag\") {\n      filter_mode = FM_TAG;\n    }\n  }\n\n  vector<int32_t> topk_values;\n  StringHelper::Split(topk_str, \",\", &topk_values);\n  size_t top_k = *topk_values.rbegin();\n  string query_file = config_common[\"QueryFile\"].as<string>();\n  string first_sep = config_common[\"QueryFirstSep\"]\n                         ? config_common[\"QueryFirstSep\"].as<string>()\n                         : \";\";\n  string second_sep = config_common[\"QuerySecondSep\"]\n                          ? config_common[\"QuerySecondSep\"].as<string>()\n                          : \" \";\n  string query_type = config_common[\"QueryType\"]\n                          ? config_common[\"QueryType\"].as<string>()\n                          : \"float\";\n  string container_type = config_common[\"ContainerType\"]\n                              ? config_common[\"ContainerType\"].as<string>()\n                              : \"MMapFileStorage\";\n  size_t bench_secs = config_common[\"BenchSecs\"]\n                          ? config_common[\"BenchSecs\"].as<uint64_t>()\n                          : 60;\n\n  if (retrieval_mode == RM_SPARSE) {\n    SparseFlow flower;\n\n    // Create container params\n    Params container_params;\n    if (config_node[\"ContainerParams\"]) {\n      // Get index params of Searcher in flower object\n      if (!prepare_params(config_node[\"ContainerParams\"], container_params)) {\n        return -1;\n      }\n      cout << \"Created index params of a container in flower object \" << endl;\n    }\n\n    container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n    // Create a container\n    int ret = flower.set_container(container_type, container_params);\n    if (0 != ret) {\n      cerr << \"Create \" << container_type << \" failed.\" << endl;\n      return -1;\n    }\n\n    if (config_common[\"SearcherClass\"]) {\n      Params params;\n      if (config_node[\"SearcherParams\"]) {\n        // Get index params of Searcher in flower object\n        if (!prepare_params(config_node[\"SearcherParams\"], params)) {\n          return -1;\n        }\n        cout << \"Created index params of a searcher in flower object \" << endl;\n      }\n\n      // Set a Searcher\n      string searcher_class = config_common[\"SearcherClass\"].as<string>();\n      ret = flower.set_searcher(searcher_class, params);\n      if (0 != ret) {\n        cerr << \"Failed to create searcher \" << searcher_class << endl;\n        return -1;\n      }\n      cout << \"Created searcher \" << searcher_class << endl;\n    } else {  // SearcherConfig\n      std::cout << config_common[\"SearcherConfig\"].as<string>() << std::endl;\n      auto params =\n          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(\n              config_common[\"SearcherConfig\"].as<string>());\n\n      auto index =\n          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);\n\n      flower.set_searcher(index->index_searcher());\n    }\n\n    string index_dir = config_common[\"IndexPath\"].as<string>();\n\n    bench_sparse(query_type, thread_count, batch_count, top_k, query_file,\n                 first_sep, second_sep, bench_secs, iter_count, flower,\n                 index_dir, filter_mode);\n\n    cout << \"Bench Sparse done.\" << endl;\n  } else {\n    Flow flower;\n\n    // Create container params\n    Params container_params;\n    if (config_node[\"ContainerParams\"]) {\n      // Get index params of Searcher in flower object\n      if (!prepare_params(config_node[\"ContainerParams\"], container_params)) {\n        return -1;\n      }\n      cout << \"Created index params of a container in flower object \" << endl;\n    }\n\n    container_params.set(\"proxima.mmap_file.container.memory_warmup\", true);\n    // Create a container\n    int ret = flower.set_container(container_type, container_params);\n    if (0 != ret) {\n      cerr << \"Create \" << container_type << \" failed.\" << endl;\n      return -1;\n    }\n\n    // Set a Searcher\n    if (config_common[\"SearcherClass\"]) {\n      Params params;\n      if (config_node[\"SearcherParams\"]) {\n        // Get index params of Searcher in flower object\n        if (!prepare_params(config_node[\"SearcherParams\"], params)) {\n          return -1;\n        }\n        cout << \"Created index params of a searcher in flower object \" << endl;\n      }\n\n      string searcher_class = config_common[\"SearcherClass\"].as<string>();\n      ret = flower.set_searcher(searcher_class, params);\n      if (0 != ret) {\n        cerr << \"Failed to create searcher \" << searcher_class << endl;\n        return -1;\n      }\n      cout << \"Created searcher \" << searcher_class << endl;\n    } else {  // SearcherConfig\n      std::cout << config_common[\"SearcherConfig\"].as<string>() << std::endl;\n      auto params =\n          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(\n              config_common[\"SearcherConfig\"].as<string>());\n\n      auto index =\n          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);\n\n      flower.set_searcher(index->index_searcher());\n    }\n\n    string index_dir = config_common[\"IndexPath\"].as<string>();\n\n    bench(query_type, thread_count, batch_count, top_k, query_file, first_sep,\n          second_sep, bench_secs, iter_count, flower, index_dir, retrieval_mode,\n          filter_mode);\n\n    flower.unload();\n\n    cout << \"Bench done.\" << endl;\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/bench_result.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <sys/time.h>\n#include <stdio.h>\n#include <string.h>\n#include <limits>\n#include <map>\n#include <ailego/parallel/lock.h>\n\nnamespace zvec {\nnamespace core {\n\nclass BenchResult {\n public:\n  BenchResult() {\n    total_query_count_ = 0;\n    total_process_time_by_us_ = 0;\n    min_time_by_us_ = std::numeric_limits<long>::max();\n    max_time_by_us_ = 0;\n  }\n  ~BenchResult() {}\n\n  void add_time(int query_count, long time_by_us) {\n    lock_.lock();\n    total_query_count_ += query_count;\n    total_process_time_by_us_ += time_by_us;\n    long time_val = time_by_us / 100;\n    if (process_time_map_.find(time_val) != process_time_map_.end()) {\n      ++process_time_map_[time_val];\n    } else {\n      process_time_map_[time_val] = 1;\n    }\n    if (time_by_us < min_time_by_us_) {\n      min_time_by_us_ = time_by_us;\n    } else if (time_by_us > max_time_by_us_) {\n      max_time_by_us_ = time_by_us;\n    }\n    lock_.unlock();\n  }\n  void mark_start() {\n    gettimeofday(&start_, NULL);\n  }\n  void mark_end() {\n    gettimeofday(&end_, NULL);\n  }\n  long get_duration_by_ms() {\n    long duration = (end_.tv_sec - start_.tv_sec) * 1000 +\n                    (end_.tv_usec - start_.tv_usec) / 1000;\n    return duration;\n  }\n  long get_total_query_count() {\n    return total_query_count_;\n  }\n  std::map<long, long> &get_process_time_map() {\n    return process_time_map_;\n  }\n  long get_total_process_time_by_ms() {\n    return total_process_time_by_us_ / 1000;\n  }\n  void print() {\n    fprintf(stdout,\n            \"Process query: %ld, total process time: %ldms, \"\n            \"duration: %ldms, max: %ldms, min:%ldms\\n\",\n            get_total_query_count(), get_total_process_time_by_ms(),\n            get_duration_by_ms(), max_time_by_us_ / 1000,\n            min_time_by_us_ / 1000);\n    fprintf(stdout, \"Avg latency: %0.1fms qps: %0.1f\\n\",\n            ((float)get_total_process_time_by_ms()) / get_total_query_count(),\n            get_total_query_count() / ((float)get_duration_by_ms() / 1000));\n\n    int tot_num = 0;\n    int percent[] = {25, 50, 75, 90, 95, 99};\n    int index = 0;\n    float max_time = 0.0;\n    int last_num = 0;\n\n    for (auto element : process_time_map_) {\n      tot_num += element.second;\n      if (tot_num >= total_query_count_ * percent[index] / 100) {\n        if (last_num != tot_num) {\n          max_time = (float)element.first / 10;\n          last_num = tot_num;\n        }\n        fprintf(stdout, \"%d Percentile:\\t\\t %.1f ms\\n\", percent[index],\n                max_time);\n        index++;\n        if (index >= 6) {\n          break;\n        }\n      }\n    }\n    for (; index < 6; index++) {\n      fprintf(stdout, \"%d Percentile:\\t\\t %.1f ms\\n\", percent[index], max_time);\n    }\n    fprintf(stdout, \"\\n\");\n  }\n\n private:\n  long total_query_count_;\n  long total_process_time_by_us_;\n  long min_time_by_us_;\n  long max_time_by_us_;\n  struct timeval start_;\n  struct timeval end_;\n  ailego::SpinMutex lock_;\n  std::map<long, long> process_time_map_;  // <processTimeBy100us, count>\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "tools/core/convert_cohere_parquet.py",
    "content": "from __future__ import annotations\n\nimport logging\nimport os\nimport pathlib\nfrom pathlib import Path\n\nimport numpy as np\nimport pandas as pd\nimport polars as pl\n\nto_append = True\n\n\ndef write_neighbors_file(data_frame, neighbors_file):\n    id_list = np.stack(data_frame[\"id\"])\n    neighbors_list = np.stack(data_frame[\"neighbors_id\"])\n\n    id_list.tolist()\n    neighbors_list.tolist()\n\n    if len(id_list) != len(neighbors_list):\n        logger.error(\"list size not equal: %d, %d\", len(id_list), len(neighbors_list))\n        os._exit(1)\n\n    for i in range(len(id_list)):\n        id_int = id_list[i]\n        line = str(id_int) + \";\"\n\n        neighbors = neighbors_list[i]\n        # for j in range(len(neighbors)):\n        for j in range(100):\n            neighbor_id = neighbors[j]\n\n            line += str(neighbor_id)\n            if j != 99:\n                line += \" \"\n            else:\n                line += \"\\n\"\n\n        neighbors_file.write(line)\n\n    logger.info(\"Output neighbors file done. Total lines: %d\", len(id_list))\n\n\ndef write_vector_file(data_frame, vector_file):\n    test_embedding_list = np.stack(data_frame[\"emb\"])\n    test_embedding_list.tolist()\n\n    test_id_list = np.stack(data_frame[\"id\"])\n    test_id_list.tolist()\n\n    if len(test_id_list) != len(test_embedding_list):\n        logger.info(\n            \"id list not matched with embedding list! : %d, %d\",\n            len(test_id_list),\n            len(test_embedding_list),\n        )\n        return\n\n    for case_id in range(len(test_id_list)):\n        idx = test_id_list[case_id]\n        vector = test_embedding_list[case_id]\n\n        vector_line = str(idx) + \";\"\n\n        for i in range(len(vector)):\n            vector_line += str(round(vector[i], 16))\n            if i != len(vector) - 1:\n                vector_line += \" \"\n            else:\n                vector_line += \";\"\n\n        vector_line += \"\\n\"\n\n        vector_file.write(vector_line)\n\n        if case_id != 0 and case_id % 10000 == 0:\n            logger.info(\"output lines: %d\", case_id)\n\n    logger.info(\"Output vector file done. Total lines: %d\", len(test_id_list))\n\n\ndef read_parquet_file(file_name: str) -> pd.DataFrame:\n    parquet_file = pathlib.Path(file_name)\n    if not parquet_file.exists():\n        logger.error(\"open error!\")\n        return pd.DataFrame()\n\n    try:\n        return pl.read_parquet(parquet_file)\n    except Exception:\n        logger.error(\"open error! error file: %s\", file_name)\n        return pd.DataFrame()\n\n\ndef gen_vector_files(input_dir, input_file_pattern, output_dir, output_file_name):\n    input_file_list = list(Path(input_dir).rglob(input_file_pattern))\n\n    output_file_name_full = pathlib.Path(output_dir, output_file_name)\n\n    if not to_append and output_file_name_full.exists():\n        logger.error(\"File exists! File name: %s\", output_file_name_full)\n        os._exit(1)\n\n    write_flag = \"a\" if to_append else \"w\"\n\n    with Path.open(output_file_name_full.resolve(), write_flag) as vector_file:\n        for input_file in input_file_list:\n            input_file_name = input_file.resolve()\n\n            logger.info(\n                \"Load the entire file into memory. File name: %s\", input_file_name\n            )\n            data_set = read_parquet_file(input_file.resolve())\n            logger.info(\"Read parquet file done. File name: %s\", input_file_name)\n\n            if len(data_set) > 0:\n                logger.info(\"Process parquet file. File name: %s\", input_file_name)\n                write_vector_file(data_set, vector_file)\n                logger.info(\"Process parquet file done. File name: %s\", input_file_name)\n\n\ndef gen_neighbor_files(input_dir, input_file_pattern, output_dir, output_file_name):\n    input_file_list = list(Path(input_dir).rglob(input_file_pattern))\n\n    output_file_name_full = pathlib.Path(output_dir, output_file_name)\n\n    if not to_append and output_file_name_full.exists():\n        logger.error(\"File already exists. File name: %s\", output_file_name_full)\n        os._exit(1)\n\n    write_flag = \"a\" if to_append else \"w\"\n\n    with Path.open(output_file_name_full.resolve(), write_flag) as neighbor_file:\n        for input_file in input_file_list:\n            input_file_name = input_file.resolve()\n\n            logger.info(\n                \"Load the entire file into memory. File name: %s\", input_file_name\n            )\n            data_set = read_parquet_file(input_file.resolve())\n            logger.info(\"Read parquet file done. File name: %s\", input_file_name)\n\n            if len(data_set) > 0:\n                logger.info(\"Write parquet file. File name: %s\", input_file_name)\n                write_neighbors_file(data_set, neighbor_file)\n                logger.info(\"Write parquet file done. File name: %s\", input_file_name)\n\n\nif __name__ == \"__main__\":\n    logger = logging.getLogger(\"convert_log\")\n    logger.setLevel(logging.DEBUG)\n\n    console_handler = logging.StreamHandler()\n    console_handler.setLevel(logging.DEBUG)\n\n    formatter = logging.Formatter(\n        fmt=\"%(asctime)s [%(levelname)s] %(message)s\", datefmt=\"%Y-%m-%d %H:%M:%S\"\n    )\n\n    console_handler.setFormatter(formatter)\n    logger.addHandler(console_handler)\n\n    input_dir = \"./cohere/10m\"\n    output_dir = \"./10m.output\"\n\n    logger.info(\"Generate test vector files\")\n    input_file_pattern = \"test.parquet\"\n    output_file_name = \"cohere_test_vector_1000.new.txt\"\n\n    to_append = False\n    gen_vector_files(input_dir, input_file_pattern, output_dir, output_file_name)\n\n    logger.info(\"Generate neighbor files\")\n    input_file_pattern = \"neighbors.parquet\"\n    output_file_name = \"neighbors.txt\"\n\n    to_append = False\n    gen_neighbor_files(input_dir, input_file_pattern, output_dir, output_file_name)\n\n    logger.info(\"Generate train vector files\")\n    output_file_name = \"cohere_768_10m_vector.train.txt\"\n    to_append = True\n    for i in range(10):\n        input_file_pattern = \"shuffle_train-0\" + str(i) + \"-of-10.parquet\"\n        gen_vector_files(input_dir, input_file_pattern, output_dir, output_file_name)\n"
  },
  {
    "path": "tools/core/filter_result_cache.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <vector>\n#include <roaring/roaring.h>\n\nnamespace zvec {\nnamespace core {\n\nstruct FilterResultCache {\n public:\n  FilterResultCache() {\n    bitmap_ = roaring_bitmap_create();\n  }\n\n  FilterResultCache(uint32_t capacity_hint) {\n    bitmap_ = roaring_bitmap_create_with_capacity(capacity_hint);\n  }\n\n  ~FilterResultCache() {\n    roaring_bitmap_free(bitmap_);\n    bitmap_ = nullptr;\n  }\n\n  bool find(uint64_t key) const {\n    return !roaring_bitmap_contains(bitmap_, key);\n  }\n\n  void set(uint64_t key) const {\n    roaring_bitmap_add(bitmap_, key);\n  }\n\n  int filter(const std::vector<std::vector<uint64_t>> &id_to_tags_list,\n             const std::vector<uint64_t> &query_tag_list,\n             const std::vector<uint64_t> &id_to_key_list) {\n    for (size_t i = 0; i < id_to_tags_list.size(); ++i) {\n      auto &id_tag_list = id_to_tags_list[i];\n\n      size_t t_i = 0;\n      size_t q_i = 0;\n      while (t_i < id_tag_list.size() && q_i < query_tag_list.size()) {\n        if (id_tag_list[t_i] == query_tag_list[q_i]) {\n          uint64_t key = id_to_key_list[i];\n\n          set(key);\n\n          break;\n        } else if (id_tag_list[t_i] < query_tag_list[q_i]) {\n          ++t_i;\n        } else {\n          ++q_i;\n        }\n      }\n    }\n\n    return 0;\n  }\n\n public:\n  roaring_bitmap_t *bitmap_{nullptr};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "tools/core/flow.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n#pragma once\n\n#include \"zvec/core/framework/index_flow.h\"\n#include \"meta_segment_common.h\"\n\nusing namespace std;\n\nnamespace zvec {\nnamespace core {\n\n#define SEARCH_DENSE_BATCH(_FUNC_NAME)                                         \\\n  int _FUNC_NAME(const void *query, const IndexQueryMeta &qmeta,               \\\n                 uint32_t count, Context::Pointer &context) const {            \\\n    if (streamer_) {                                                           \\\n      if (reformer_) {                                                         \\\n        std::string ovec;                                                      \\\n        IndexQueryMeta ometa;                                                  \\\n        int ret = reformer_->convert(query, qmeta, count, &ovec, &ometa);      \\\n        if (ret != 0) {                                                        \\\n          return ret;                                                          \\\n        }                                                                      \\\n        return streamer_->_FUNC_NAME(ovec.data(), ometa, count,                \\\n                                     context->context());                      \\\n      } else {                                                                 \\\n        return streamer_->_FUNC_NAME(query, qmeta, count, context->context()); \\\n      }                                                                        \\\n    } else {                                                                   \\\n      return flow_._FUNC_NAME(query, qmeta, count, context->flow_context());   \\\n    }                                                                          \\\n  }\n\n#define SEARCH_DENSE(_FUNC_NAME)                                              \\\n  int _FUNC_NAME(const void *query, const IndexQueryMeta &qmeta,              \\\n                 Context::Pointer &context) const {                           \\\n    if (streamer_) {                                                          \\\n      if (reformer_) {                                                        \\\n        std::string ovec;                                                     \\\n        IndexQueryMeta ometa;                                                 \\\n        int ret = reformer_->convert(query, qmeta, &ovec, &ometa);            \\\n        if (ret != 0) {                                                       \\\n          return ret;                                                         \\\n        }                                                                     \\\n        return streamer_->_FUNC_NAME(ovec.data(), ometa, context->context()); \\\n      } else {                                                                \\\n        return streamer_->_FUNC_NAME(query, qmeta, context->context());       \\\n      }                                                                       \\\n    } else {                                                                  \\\n      return flow_._FUNC_NAME(query, qmeta, context->flow_context());         \\\n    }                                                                         \\\n  }\n\n#define SEARCH_SPRASE_BATCH(_FUNC_NAME)                                        \\\n  int _FUNC_NAME(const uint32_t *sparse_count, const uint32_t *sparse_indices, \\\n                 const void *sparse_query, const IndexQueryMeta &qmeta,        \\\n                 uint32_t count, Context::Pointer &context) const {            \\\n    if (streamer_) {                                                           \\\n      if (reformer_) {                                                         \\\n        LOG_ERROR(\"reformer not supported in sparse search\");                  \\\n        return IndexError_Runtime;                                             \\\n      } else {                                                                 \\\n        return streamer_->_FUNC_NAME(sparse_count, sparse_indices,             \\\n                                     sparse_query, qmeta, count,               \\\n                                     context->context());                      \\\n      }                                                                        \\\n    } else {                                                                   \\\n      return flow_._FUNC_NAME(sparse_count, sparse_indices, sparse_query,      \\\n                              qmeta, count, context->flow_context());          \\\n    }                                                                          \\\n  }\n\n#define SEARCH_SPARSE(_FUNC_NAME)                                              \\\n  int _FUNC_NAME(const uint32_t sparse_count, const uint32_t *sparse_indices,  \\\n                 const void *sparse_query, const IndexQueryMeta &qmeta,        \\\n                 Context::Pointer &context) const {                            \\\n    if (streamer_) {                                                           \\\n      if (reformer_) {                                                         \\\n        LOG_ERROR(\"reformer not supported in sparse search\");                  \\\n        return IndexError_Runtime;                                             \\\n      } else {                                                                 \\\n        return streamer_->_FUNC_NAME(sparse_count, sparse_indices,             \\\n                                     sparse_query, qmeta, context->context()); \\\n      }                                                                        \\\n    } else {                                                                   \\\n      return flow_._FUNC_NAME(sparse_count, sparse_indices, sparse_query,      \\\n                              qmeta, context->flow_context());                 \\\n    }                                                                          \\\n  }\n\nclass Flow {\n public:\n  class Context {\n   public:\n    typedef std::unique_ptr<Context> Pointer;\n\n    Context(IndexContext::Pointer &ctx, IndexFlow::Context::Pointer &flow_ctx)\n        : ctx_(std::move(ctx)), flow_ctx_(std::move(flow_ctx)) {}\n\n    void set_debug_mode(bool debug_mode) {\n      ctx_ ? ctx_->set_debug_mode(debug_mode)\n           : flow_ctx_->set_debug_mode(debug_mode);\n    }\n\n    std::string debug_string() {\n      return ctx_ ? ctx_->debug_string() : flow_ctx_->debug_string();\n    }\n\n    void set_topk(uint32_t topk) {\n      ctx_ ? ctx_->set_topk(topk) : flow_ctx_->set_topk(topk);\n    }\n\n    template <typename T>\n    void set_filter(T &&func) {\n      ctx_ ? ctx_->set_filter(func) : flow_ctx_->set_filter(func);\n    }\n\n    const IndexDocumentList &result(void) const {\n      return ctx_ ? ctx_->result() : flow_ctx_->result();\n    }\n\n    const IndexDocumentList &result(size_t index) const {\n      return ctx_ ? ctx_->result(index) : flow_ctx_->result(index);\n    }\n\n   public:\n    friend class Flow;\n\n    IndexFlow::Context::Pointer &flow_context(void) {\n      return flow_ctx_;\n    }\n\n    IndexContext::Pointer &context(void) {\n      return ctx_;\n    }\n\n   private:\n    IndexContext::Pointer ctx_;\n    IndexFlow::Context::Pointer flow_ctx_;\n  };\n\n  Context::Pointer create_context(void) const {\n    IndexContext::Pointer ctx;\n    IndexFlow::Context::Pointer flow_ctx;\n    if (streamer_) {\n      ctx = streamer_->create_context();\n    } else {\n      flow_ctx = flow_.create_context();\n    }\n    return Context::Pointer(new (std::nothrow) Context(ctx, flow_ctx));\n  }\n\n  int set_container(const std::string &name, const ailego::Params &params) {\n    return flow_.set_storage(name, params);\n  }\n\n  int load_taglists(const std::string &path) {\n    // load tag lists\n    auto storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n\n    int ret = storage->open(path, false);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to load index with storage %s\",\n                storage->name().c_str());\n      return ret;\n    }\n\n    auto segment_taglist_header = storage->get(TAGLIST_HEADER_SEGMENT_NAME);\n    if (!segment_taglist_header) {\n      LOG_INFO(\"No Tag Lists Found!\");\n\n      return 0;\n    }\n\n    TagListHeader taglist_header;\n    void *data_ptr;\n    if (segment_taglist_header->read(0, (const void **)(&data_ptr),\n                                     sizeof(TagListHeader)) !=\n        sizeof(TagListHeader)) {\n      LOG_ERROR(\"Read tag list meta failed\");\n      return IndexError_ReadData;\n    }\n\n    memcpy(&taglist_header, data_ptr, sizeof(TagListHeader));\n\n    auto segment_taglist_key = storage->get(TAGLIST_KEY_SEGMENT_NAME);\n    if (!segment_taglist_key) {\n      LOG_ERROR(\"IndexStorage get segment %s failed\",\n                TAGLIST_KEY_SEGMENT_NAME.c_str());\n      return IndexError_InvalidValue;\n    }\n\n    size_t offset = 0;\n    for (size_t i = 0; i < taglist_header.num_vecs; ++i) {\n      if (segment_taglist_key->read(offset, (const void **)(&data_ptr),\n                                    sizeof(uint64_t)) != sizeof(uint64_t)) {\n        LOG_ERROR(\"Read tag list key failed\");\n        return IndexError_ReadData;\n      }\n\n      uint64_t key = *reinterpret_cast<const uint64_t *>(data_ptr);\n      tag_key_list_.push_back(key);\n\n      offset += sizeof(uint64_t);\n    }\n\n    auto segment_taglist_data = storage->get(TAGLIST_DATA_SEGMENT_NAME);\n    if (!segment_taglist_data) {\n      LOG_ERROR(\"IndexStorage get segment %s failed\",\n                TAGLIST_DATA_SEGMENT_NAME.c_str());\n      return IndexError_InvalidValue;\n    }\n\n    std::vector<uint64_t> taglist_offsets;\n    offset = 0;\n    for (size_t i = 0; i < taglist_header.num_vecs; ++i) {\n      if (segment_taglist_data->read(offset, (const void **)(&data_ptr),\n                                     sizeof(uint64_t)) != sizeof(uint64_t)) {\n        LOG_ERROR(\"Read tag list data failed\");\n        return IndexError_ReadData;\n      }\n\n      uint64_t tag_offset = *reinterpret_cast<const uint64_t *>(data_ptr);\n      taglist_offsets.push_back(tag_offset);\n\n      offset += sizeof(uint64_t);\n    }\n\n    offset = taglist_header.num_vecs * sizeof(uint64_t);\n    for (size_t i = 0; i < taglist_header.num_vecs; ++i) {\n      if (segment_taglist_data->read(offset, (const void **)(&data_ptr),\n                                     sizeof(uint64_t)) != sizeof(uint64_t)) {\n        LOG_ERROR(\"Read tag list data failed\");\n        return IndexError_ReadData;\n      }\n      offset += sizeof(uint64_t);\n\n      uint64_t tag_count = *reinterpret_cast<const uint64_t *>(data_ptr);\n\n      if (segment_taglist_data->read(offset, (const void **)(&data_ptr),\n                                     tag_count * sizeof(uint64_t)) !=\n          tag_count * sizeof(uint64_t)) {\n        LOG_ERROR(\"Read tag list data failed\");\n        return IndexError_ReadData;\n      }\n      offset += tag_count * sizeof(uint64_t);\n\n      std::vector<uint64_t> tag_list;\n      for (size_t j = 0; j < tag_count; ++j) {\n        uint64_t tag_id = *(reinterpret_cast<const uint64_t *>(data_ptr) + j);\n        tag_list.push_back(tag_id);\n      }\n\n      // order tags\n      sort(tag_list.begin(), tag_list.end());\n\n      id_to_tags_list_.push_back(std::move(tag_list));\n    }\n\n    storage->cleanup();\n    storage = nullptr;\n\n    return 0;\n  }\n\n  int load(const std::string &path) {\n    int ret = load_taglists(path);\n    if (ret != 0) {\n      LOG_ERROR(\"Failed to load tag lists\");\n      return ret;\n    }\n\n    if (streamer_) {\n      stg_ = IndexFactory::CreateStorage(\"MMapFileStorage\");\n      if (!stg_) {\n        return IndexError_NoExist;\n      }\n      ailego::Params params;\n      params.set(\"proxima.mmap_file.storage.memory_warmup\", true);\n      ret = stg_->init(params);\n      if (ret != 0) {\n        return ret;\n      }\n      ret = stg_->open(path, true);\n      if (ret != 0) {\n        return ret;\n      }\n\n      if (!inited_) {\n        IndexMeta meta;\n        ret = IndexHelper::DeserializeFromStorage(stg_.get(), &meta);\n        if (ret != 0) {\n          LOG_ERROR(\"Failed to get IndexMeta from Storage\");\n          return ret;\n        }\n        ret = streamer_->init(meta, searcher_params_);\n        if (ret != 0) {\n          return ret;\n        }\n\n        if (!meta.reformer_name().empty()) {\n          reformer_ = IndexFactory::CreateReformer(meta.reformer_name());\n          if (!reformer_) {\n            LOG_ERROR(\"Failed to create reformer %s\",\n                      meta.reformer_name().c_str());\n            return IndexError_NoExist;\n          }\n          reformer_->init(meta.reformer_params());\n        }\n      }\n\n      return streamer_->open(stg_);\n    } else {\n      return flow_.load(path);\n    }\n  }\n\n  int unload(void) {\n    if (streamer_) {\n      streamer_->close();\n      return stg_->close();\n    } else {\n      return flow_.unload();\n    }\n  }\n\n  int set_searcher(const std::string &name, const ailego::Params &params) {\n    //! If the searcher is streamer, create it\n    streamer_ = IndexFactory::CreateStreamer(name);\n    if (!streamer_) {\n      return flow_.set_searcher(name, params);\n    }\n    searcher_params_ = params;\n    return 0;\n  }\n\n  int set_searcher(IndexStreamer::Pointer streamer) {\n    streamer_ = streamer;\n\n    inited_ = true;\n\n    return 0;\n  }\n\n  const std::vector<std::vector<uint64_t>> &id_to_tags_list() const {\n    return id_to_tags_list_;\n  }\n\n  const std::vector<uint64_t> &tag_key_list() const {\n    return tag_key_list_;\n  }\n\n  SEARCH_DENSE_BATCH(search_impl);\n  SEARCH_DENSE(search_impl);\n  SEARCH_DENSE_BATCH(search_bf_impl);\n  SEARCH_DENSE(search_bf_impl);\n\n private:\n  IndexFlow flow_{};\n\n  IndexStreamer::Pointer streamer_{};\n  IndexReformer::Pointer reformer_{};\n\n  bool inited_{false};\n\n  IndexStorage::Pointer stg_{};\n  ailego::Params searcher_params_{};\n  std::vector<std::vector<uint64_t>> id_to_tags_list_;\n  std::vector<uint64_t> tag_key_list_;\n};\n\nclass SparseFlow {\n public:\n  class Context {\n   public:\n    typedef std::unique_ptr<Context> Pointer;\n\n    Context(IndexContext::Pointer &ctx,\n            IndexSparseFlow::Context::Pointer &flow_ctx)\n        : ctx_(std::move(ctx)), flow_ctx_(std::move(flow_ctx)) {}\n\n    void set_debug_mode(bool debug_mode) {\n      ctx_ ? ctx_->set_debug_mode(debug_mode)\n           : flow_ctx_->set_debug_mode(debug_mode);\n    }\n\n    std::string debug_string() {\n      return ctx_ ? ctx_->debug_string() : flow_ctx_->debug_string();\n    }\n\n    template <typename T>\n    void set_filter(T &&func) {\n      ctx_ ? ctx_->set_filter(func) : flow_ctx_->set_filter(func);\n    }\n\n    void set_topk(uint32_t topk) {\n      ctx_ ? ctx_->set_topk(topk) : flow_ctx_->set_topk(topk);\n    }\n\n    const IndexDocumentList &result(void) const {\n      return ctx_ ? ctx_->result() : flow_ctx_->result();\n    }\n\n    const IndexDocumentList &result(size_t index) const {\n      return ctx_ ? ctx_->result(index) : flow_ctx_->result(index);\n    }\n\n   private:\n    friend class SparseFlow;\n\n    IndexSparseFlow::Context::Pointer &flow_context(void) {\n      return flow_ctx_;\n    }\n\n    IndexContext::Pointer &context(void) {\n      return ctx_;\n    }\n\n\n   private:\n    IndexContext::Pointer ctx_;\n    IndexSparseFlow::Context::Pointer flow_ctx_;\n  };\n\n  Context::Pointer create_context(void) const {\n    IndexContext::Pointer ctx;\n    IndexSparseFlow::Context::Pointer flow_ctx;\n    if (streamer_) {\n      ctx = streamer_->create_context();\n    } else {\n      flow_ctx = flow_.create_context();\n    }\n    return Context::Pointer(new (std::nothrow) Context(ctx, flow_ctx));\n  }\n\n  int set_container(const std::string &name, const ailego::Params &params) {\n    return flow_.set_storage(name, params);\n  }\n\n  int load(const std::string &path) {\n    if (streamer_) {\n      stg_ = IndexFactory::CreateStorage(\"MMapFileStorage\");\n      if (!stg_) {\n        return IndexError_NoExist;\n      }\n      ailego::Params params;\n      params.set(\"proxima.mmap_file.storage.memory_warmup\", true);\n      int ret = stg_->init(params);\n      if (ret != 0) {\n        return ret;\n      }\n      ret = stg_->open(path, true);\n      if (ret != 0) {\n        return ret;\n      }\n\n      if (!inited_) {\n        IndexMeta meta;\n        ret = IndexHelper::DeserializeFromStorage(stg_.get(), &meta);\n        if (ret != 0) {\n          LOG_ERROR(\"Failed to get IndexMeta from Storage\");\n          return ret;\n        }\n\n        ret = streamer_->init(meta, searcher_params_);\n        if (ret != 0) {\n          return ret;\n        }\n\n        if (!meta.reformer_name().empty()) {\n          reformer_ = IndexFactory::CreateReformer(meta.reformer_name());\n          if (!reformer_) {\n            LOG_ERROR(\"Failed to create reformer %s\",\n                      meta.reformer_name().c_str());\n            return IndexError_NoExist;\n          }\n          reformer_->init(meta.reformer_params());\n        }\n      }\n\n      return streamer_->open(stg_);\n    } else {\n      return flow_.load(path);\n    }\n\n    return 0;\n  }\n\n  int unload(void) {\n    if (streamer_) {\n      streamer_->close();\n      return stg_->close();\n    } else {\n      return flow_.unload();\n    }\n  }\n\n  int set_searcher(const std::string &name, const ailego::Params &params) {\n    //! If the searcher is streamer, create it\n    streamer_ = IndexFactory::CreateStreamer(name);\n    if (!streamer_) {\n      return flow_.set_searcher(name, params);\n    }\n    searcher_params_ = params;\n    return 0;\n  }\n\n  int set_searcher(IndexStreamer::Pointer streamer) {\n    streamer_ = streamer;\n\n    inited_ = true;\n\n    return 0;\n  }\n\n  const std::vector<std::vector<uint64_t>> &id_to_tags_list() const {\n    return id_to_tags_list_;\n  }\n\n  const std::vector<uint64_t> &tag_key_list() const {\n    return tag_key_list_;\n  }\n\n  SEARCH_SPRASE_BATCH(search_impl);\n  SEARCH_SPARSE(search_impl);\n  SEARCH_SPRASE_BATCH(search_bf_impl);\n  SEARCH_SPARSE(search_bf_impl);\n\n private:\n  IndexSparseFlow flow_{};\n\n  IndexStreamer::Pointer streamer_{};\n  IndexReformer::Pointer reformer_{};\n\n  bool inited_{false};\n\n  IndexStorage::Pointer stg_{};\n  ailego::Params searcher_params_{};\n  std::vector<std::vector<uint64_t>> id_to_tags_list_;\n  std::vector<uint64_t> tag_key_list_;\n};\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "tools/core/helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <sys/stat.h>\n#include <signal.h>\n#include <iomanip>\n#include <iostream>\n#include <mutex>\n#include <ailego/container/bitmap.h>\n#include <ailego/parallel/lock.h>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/logger/logger.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_plugin.h\"\n#include \"zvec/core/framework/index_storage.h\"\n#include \"zvec/core/interface/index.h\"\n#include \"zvec/core/interface/index_factory.h\"\n#include \"zvec/core/interface/index_param.h\"\n#include \"filter_result_cache.h\"\n#include \"meta_segment_common.h\"\n#include \"txt_input_reader.h\"\n\n#ifdef __clang__\n#pragma clang diagnostic push\n#pragma clang diagnostic ignored \"-Wshadow\"\n#pragma clang diagnostic ignored \"-Wdeprecated-declarations\"\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wshadow\"\n#pragma GCC diagnostic ignored \"-Wdeprecated-declarations\"\n#endif\n\n#include <yaml-cpp/yaml.h>\n\n#ifdef __clang__\n#pragma clang diagnostic pop\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n\nusing namespace std;\nusing namespace zvec;\nusing namespace zvec::core;\nusing namespace zvec::ailego;\n\n\nint parse_and_load_index_param(\n    YAML::Node &config_node, string &index_dir,\n    core_interface::Index::Pointer &index,\n    core_interface::BaseIndexQueryParam::Pointer &query_param) {\n  // Create Index from config\n\n  if (auto index_config = config_node[\"IndexCommon\"][\"IndexConfig\"]) {\n    std::cout << \"IndexConfig: \" << index_config.as<string>() << std::endl;\n    auto params = core_interface::IndexFactory::DeserializeIndexParamFromJson(\n        index_config.as<string>());\n    index = core_interface::IndexFactory::CreateAndInitIndex(*params);\n    if (!index) {\n      LOG_ERROR(\"Failed to create index\");\n      return -1;\n    }\n    core_interface::StorageOptions storage_options;\n    storage_options.type = core_interface::StorageOptions::StorageType::kMMAP;\n    storage_options.create_new = false;\n    storage_options.read_only = true;\n\n    int ret = index->Open(index_dir, storage_options);\n    if (0 != ret) {\n      LOG_ERROR(\"Index open failed with ret %d\", ret);\n      return -1;\n    }\n\n    cout << \"Load index done!\" << endl;\n  } else {\n    LOG_ERROR(\"IndexCommon.IndexConfig is required\");\n    return -1;\n  }\n\n  /*\n      QueryConfig:\n      QueryParam: '{\"ef_search\":100,\"index_type\":\"kHNSW\"}'\n      RefinerConfig:\n        ScaleFactor: !!int 2\n        ReferenceIndex:\n          Config:\n     '{\"use_id_map\":false,\"data_type\":\"DT_FP32\",\"dimension\":768,\"index_type\":\"kHNSW\",\"metric_type\":\"kCosine\"}'\n          Path: ./cohere_train_vector_1m.2.index\n  */\n\n  // QUERY PARAM\n  if (auto query_config = config_node[\"QueryConfig\"]; query_config) {\n    // QueryConfig.QueryParam\n    if (auto query_param_config = query_config[\"QueryParam\"];\n        query_param_config) {\n      std::cout << \"QueryParam: \" << query_param_config.as<string>()\n                << std::endl;\n      query_param = core_interface::IndexFactory::QueryParamDeserializeFromJson<\n          core_interface::BaseIndexQueryParam>(\n          query_param_config.as<std::string>());\n      if (!query_param) {\n        LOG_ERROR(\"Failed to deserialize query params\");\n        return -1;\n      }\n    }\n\n    // QueryConfig.RefinerConfig\n    if (auto refiner_config = query_config[\"RefinerConfig\"]; refiner_config) {\n      core_interface::Index::Pointer reference_index = nullptr;\n      auto refiner_param = std::make_shared<core_interface::RefinerParam>();\n\n      // QueryConfig.RefinerConfig.ScaleFactor\n      if (auto scale_factor_config = refiner_config[\"ScaleFactor\"];\n          scale_factor_config) {\n        auto scale_factor = scale_factor_config.as<float>();\n        refiner_param->scale_factor_ = scale_factor;\n      } else {\n        LOG_ERROR(\"QueryConfig.RefinerConfig.ScaleFactor config is required\");\n        return -1;\n      }\n\n      // QueryConfig.RefinerConfig.ReferenceIndex\n      if (auto reference_index_config = refiner_config[\"ReferenceIndex\"];\n          reference_index_config) {\n        // QueryConfig.RefinerConfig.ReferenceIndex.Config\n        if (auto reference_index_config_config =\n                reference_index_config[\"Config\"];\n            reference_index_config_config) {\n          auto params =\n              core_interface::IndexFactory::DeserializeIndexParamFromJson(\n                  reference_index_config_config.as<std::string>());\n\n          reference_index =\n              core_interface::IndexFactory::CreateAndInitIndex(*params);\n        } else {\n          LOG_ERROR(\n              \"QueryConfig.RefinerConfig.ReferenceIndex.Config config is \"\n              \"required\");\n          return -1;\n        }\n\n        // QueryConfig.RefinerConfig.ReferenceIndex.Path\n        if (auto reference_index_path_config = reference_index_config[\"Path\"];\n            reference_index_path_config) {\n          auto reference_index_path =\n              reference_index_path_config.as<std::string>();\n          core_interface::StorageOptions storage_options;\n          storage_options.type =\n              core_interface::StorageOptions::StorageType::kMMAP;\n          storage_options.create_new = false;\n          storage_options.read_only = true;\n\n          int ret =\n              reference_index->Open(reference_index_path, storage_options);\n          if (0 != ret) {\n            LOG_ERROR(\"Index open failed with ret %d\", ret);\n            return -1;\n          }\n\n          cout << \"Load reference index done!\" << endl;\n        } else {\n          LOG_ERROR(\n              \"QueryConfig.RefinerConfig.ReferenceIndex.Path is required\");\n          return -1;\n        }\n        refiner_param->reference_index = reference_index;\n      } else {\n        LOG_ERROR(\n            \"QueryConfig.RefinerConfig.ReferenceIndex section is required\");\n        return -1;\n      }  // QueryConfig.RefinerConfig.ReferenceIndex\n\n      query_param->refiner_param = refiner_param;\n    }  // QueryConfig.RefinerConfig\n  }  // QUERY PARAM\n  return 0;\n}\n\n//--------------------------------------------------\n// Helper functions for loading tag lists\n//--------------------------------------------------\nint load_taglists(const std::string &path,\n                  std::vector<std::vector<uint64_t>> &id_to_tags_list,\n                  std::vector<uint64_t> &tag_key_list) {\n  // Load tag lists\n  auto storage = IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n\n  int ret = storage->open(path, false);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to load index with storage %s\", storage->name().c_str());\n    return ret;\n  }\n\n  auto segment_taglist_header = storage->get(TAGLIST_HEADER_SEGMENT_NAME);\n  if (!segment_taglist_header) {\n    LOG_INFO(\"No Tag Lists Found!\");\n    return 0;\n  }\n\n  TagListHeader taglist_header;\n  void *data_ptr;\n  if (segment_taglist_header->read(0, (const void **)(&data_ptr),\n                                   sizeof(TagListHeader)) !=\n      sizeof(TagListHeader)) {\n    LOG_ERROR(\"Read tag list meta failed\");\n    return IndexError_ReadData;\n  }\n\n  memcpy(&taglist_header, data_ptr, sizeof(TagListHeader));\n\n  auto segment_taglist_key = storage->get(TAGLIST_KEY_SEGMENT_NAME);\n  if (!segment_taglist_key) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              TAGLIST_KEY_SEGMENT_NAME.c_str());\n    return IndexError_InvalidValue;\n  }\n\n  size_t offset = 0;\n  for (size_t i = 0; i < taglist_header.num_vecs; ++i) {\n    if (segment_taglist_key->read(offset, (const void **)(&data_ptr),\n                                  sizeof(uint64_t)) != sizeof(uint64_t)) {\n      LOG_ERROR(\"Read tag list key failed\");\n      return IndexError_ReadData;\n    }\n\n    uint64_t key = *reinterpret_cast<const uint64_t *>(data_ptr);\n    tag_key_list.push_back(key);\n\n    offset += sizeof(uint64_t);\n  }\n\n  auto segment_taglist_data = storage->get(TAGLIST_DATA_SEGMENT_NAME);\n  if (!segment_taglist_data) {\n    LOG_ERROR(\"IndexStorage get segment %s failed\",\n              TAGLIST_DATA_SEGMENT_NAME.c_str());\n    return IndexError_InvalidValue;\n  }\n\n  std::vector<uint64_t> taglist_offsets;\n  offset = 0;\n  for (size_t i = 0; i < taglist_header.num_vecs; ++i) {\n    if (segment_taglist_data->read(offset, (const void **)(&data_ptr),\n                                   sizeof(uint64_t)) != sizeof(uint64_t)) {\n      LOG_ERROR(\"Read tag list data failed\");\n      return IndexError_ReadData;\n    }\n\n    uint64_t tag_offset = *reinterpret_cast<const uint64_t *>(data_ptr);\n    taglist_offsets.push_back(tag_offset);\n\n    offset += sizeof(uint64_t);\n  }\n\n  offset = taglist_header.num_vecs * sizeof(uint64_t);\n  for (size_t i = 0; i < taglist_header.num_vecs; ++i) {\n    if (segment_taglist_data->read(offset, (const void **)(&data_ptr),\n                                   sizeof(uint64_t)) != sizeof(uint64_t)) {\n      LOG_ERROR(\"Read tag list data failed\");\n      return IndexError_ReadData;\n    }\n    offset += sizeof(uint64_t);\n\n    uint64_t tag_count = *reinterpret_cast<const uint64_t *>(data_ptr);\n\n    if (segment_taglist_data->read(offset, (const void **)(&data_ptr),\n                                   tag_count * sizeof(uint64_t)) !=\n        tag_count * sizeof(uint64_t)) {\n      LOG_ERROR(\"Read tag list data failed\");\n      return IndexError_ReadData;\n    }\n    offset += tag_count * sizeof(uint64_t);\n\n    std::vector<uint64_t> tag_list;\n    tag_list.reserve(tag_count);\n    for (size_t j = 0; j < tag_count; ++j) {\n      tag_list.push_back(reinterpret_cast<const uint64_t *>(data_ptr)[j]);\n    }\n    id_to_tags_list.push_back(std::move(tag_list));\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/index_meta_helper.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <iostream>\n#include <string>\n#include \"zvec/core/framework/index_meta.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass IndexMetaHelper {\n public:\n  static std::string to_string(IndexMeta::DataType type) {\n    switch (type) {\n      case IndexMeta::DataType::DT_FP32:\n        return std::string(\"FP32\");\n      case IndexMeta::DataType::DT_FP64:\n        return std::string(\"FP64\");\n      case IndexMeta::DataType::DT_INT16:\n        return std::string(\"INT16\");\n      case IndexMeta::DataType::DT_INT8:\n        return std::string(\"INT8\");\n      case IndexMeta::DataType::DT_BINARY32:\n        return std::string(\"Binary\");\n      case IndexMeta::DataType::DT_BINARY64:\n        return std::string(\"Binary64\");\n      case IndexMeta::DataType::DT_FP16:\n        return std::string(\"FP16\");\n      default:\n        return std::string(\"NotSupportedType\");\n    }\n  }\n\n  static std::string to_string(IndexMeta meta) {\n    char buffer[1024];\n    snprintf(buffer, 1024,\n             \"IndexMeta: type[%s] method[%s] dimension[%u] element_size[%u]\",\n             to_string(meta.data_type()).c_str(), meta.metric_name().c_str(),\n             meta.dimension(), meta.element_size());\n    return std::string(buffer);\n  }\n\n  static bool parse_from(const std::string &type, const std::string &method,\n                         const std::string &vector_type, IndexMeta &meta) {\n    return parse_from(type, method, 0, vector_type, meta);\n  }\n\n  static bool parse_from(const std::string &type, const std::string &method,\n                         const size_t dimension, const std::string &vector_type,\n                         IndexMeta &meta) {\n    if (vector_type != \"dense\" && vector_type != \"sparse\") {\n      std::cerr << \"vector type should be dense or sparse!!!\" << std::endl;\n      return false;\n    }\n\n    auto feature_type = IndexMeta::DataType::DT_UNDEFINED;\n    if (type == std::string(\"float\")) {\n      feature_type = IndexMeta::DataType::DT_FP32;\n    } else if (type == std::string(\"double\")) {\n      feature_type = IndexMeta::DataType::DT_FP64;\n    } else if (type == std::string(\"int16\")) {\n      feature_type = IndexMeta::DataType::DT_INT16;\n    } else if (type == std::string(\"int8\")) {\n      feature_type = IndexMeta::DataType::DT_INT8;\n    } else if (type == std::string(\"binary\")) {\n      feature_type = IndexMeta::DataType::DT_BINARY32;\n    } else if (type == std::string(\"binary64\")) {\n      feature_type = IndexMeta::DataType::DT_BINARY64;\n    } else {\n      std::cerr << \"Not supported type: \" << type << std::endl;\n      return false;\n    }\n\n    meta.set_meta(feature_type, dimension);\n    ailego::Params params;\n    if (method == std::string(\"L2\")) {\n      if (feature_type == IndexMeta::DataType::DT_FP32) {\n        meta.set_metric(\"SquaredEuclidean\", 0, std::move(params));\n      } else if (feature_type == IndexMeta::DataType::DT_INT8) {\n        meta.set_metric(\"SquaredEuclidean\", 0, std::move(params));\n      } else if (feature_type == IndexMeta::DataType::DT_FP16) {\n        meta.set_metric(\"SquaredEuclidean\", 0, std::move(params));\n      } else {\n        std::cerr << \"Not supported type(\" << type << \") for L2\" << std::endl;\n        return false;\n      }\n    } else if (method == std::string(\"IP\")) {\n      if (feature_type == IndexMeta::DataType::DT_FP32) {\n        meta.set_metric(\"InnerProduct\", 0, std::move(params));\n      } else if (feature_type == IndexMeta::DataType::DT_INT8) {\n        meta.set_metric(\"InnerProduct\", 0, std::move(params));\n      } else if (feature_type == IndexMeta::DataType::DT_FP16) {\n        meta.set_metric(\"InnerProduct\", 0, std::move(params));\n      } else {\n        std::cerr << \"Not supported type(\" << type << \") for IP\" << std::endl;\n        return false;\n      }\n    } else if (method == std::string(\"Cosine\")) {\n      if (feature_type == IndexMeta::DataType::DT_FP32) {\n        meta.set_metric(\"Cosine\", 0, std::move(params));\n      } else if (feature_type == IndexMeta::DataType::DT_INT8) {\n        meta.set_metric(\"Cosine\", 0, std::move(params));\n      } else if (feature_type == IndexMeta::DataType::DT_FP16) {\n        meta.set_metric(\"Cosine\", 0, std::move(params));\n      } else {\n        std::cerr << \"Not supported type(\" << type << \") for Cosine\"\n                  << std::endl;\n        return false;\n      }\n    } else if (method == std::string(\"HAMMING\")) {\n      if (feature_type == IndexMeta::DataType::DT_BINARY32) {\n        meta.set_metric(\"Hamming\", 0, std::move(params));\n      } else if (feature_type == IndexMeta::DataType::DT_BINARY64) {\n        meta.set_metric(\"Hamming\", 0, std::move(params));\n      } else {\n        std::cerr << \"Not supported type(\" << type << \") for hamming\"\n                  << std::endl;\n        return false;\n      }\n    } else {\n      std::cerr << \"Not supported method: \" << method << std::endl;\n      return false;\n    }\n\n    return true;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "tools/core/local_builder.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <libgen.h>\n#include <signal.h>\n#include <iostream>\n#include <memory>\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"algorithm/flat/flat_utility.h\"\n#include \"algorithm/hnsw_rabitq/hnsw_rabitq_params.h\"\n#if RABITQ_SUPPORTED\n#include \"algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h\"\n#include \"algorithm/hnsw_rabitq/rabitq_converter.h\"\n#endif\n#include \"algorithm/hnsw/hnsw_params.h\"\n#include \"zvec/ailego/logger/logger.h\"\n#include \"zvec/core/framework/index_dumper.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_logger.h\"\n#include \"zvec/core/framework/index_plugin.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"zvec/core/framework/index_streamer.h\"\n#include \"index_meta_helper.h\"\n#include \"meta_segment_common.h\"\n#include \"vecs_index_holder.h\"\n\n#ifdef __clang__\n#pragma clang diagnostic push\n#pragma clang diagnostic ignored \"-Wshadow\"\n#pragma clang diagnostic ignored \"-Wdeprecated-declarations\"\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wshadow\"\n#pragma GCC diagnostic ignored \"-Wdeprecated-declarations\"\n#endif\n\n#include <yaml-cpp/yaml.h>\n\n#ifdef __clang__\n#pragma clang diagnostic pop\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n\nusing namespace std;\nusing namespace zvec::core;\nusing namespace zvec;\n\nbool g_disable_id_map = false;\n\nenum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };\n\nVecsIndexHolder::Pointer holder;\nVecsIndexSparseHolder::Pointer sparse_holder;\n\nbool stop_now = false;\nvoid stop(int signo) {\n  if (stop_now) {\n    exit(signo);\n  }\n  stop_now = true;\n  cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n       << flush;\n  if (holder) {\n    holder->stop();\n  }\n}\n\nvoid usage(void) {\n  cout << \"Usage: local_builder CONFIG.yaml [plugin file path]\" << endl;\n}\n\nbool prepare_params(YAML::Node &&config_params, ailego::Params &params) {\n  cout << \"Parse params as blow:\" << endl;\n  for (auto it = config_params.begin(); it != config_params.end(); ++it) {\n    string tag = it->second.Tag();\n    if (tag == \"tag:yaml.org,2002:int\") {\n      int64_t val = it->second.as<int64_t>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:float\") {\n      float val = it->second.as<float>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:bool\") {\n      bool val = it->second.as<bool>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else {\n      if (it->second.IsScalar()) {\n        string val = it->second.as<string>();\n        params.set(it->first.as<string>(), val);\n        cout << it->first.as<string>() << \"=\" << val << endl;\n      } else if (it->second.IsMap()) {\n        ailego::Params sub_params;\n        auto sub_node = it->second;\n        if (!prepare_params(std::move(sub_node), sub_params)) {\n          LOG_ERROR(\"parse params error with key[%s]\",\n                    it->first.as<string>().c_str());\n          return false;\n        }\n        params.set(it->first.as<string>(), sub_params);\n      }\n    }\n  }\n  return true;\n}\n\nint setup_hnsw_rabitq_streamer(const IndexStreamer::Pointer &streamer,\n                               const IndexMeta &meta, YAML::Node &config_root,\n                               const std::string &converter_name,\n                               IndexHolder::Pointer *build_holder) {\n#if RABITQ_SUPPORTED\n  RabitqConverter rabitq_converter;\n  ailego::Params rabitq_converter_params;\n  if (config_root[\"RabitqConverterParams\"]) {\n    auto rabitq_params_node = config_root[\"RabitqConverterParams\"];\n    if (!prepare_params(std::move(rabitq_params_node),\n                        rabitq_converter_params)) {\n      cerr << \"Failed to prepare rabitq converter params\" << endl;\n      return -1;\n    }\n  }\n  if (rabitq_converter.init(meta, rabitq_converter_params) != 0) {\n    cerr << \"rabitq converter init failed\" << std::endl;\n    return -1;\n  }\n  if (rabitq_converter.train(*build_holder) != 0) {\n    cerr << \"rabitq converter train failed\" << std::endl;\n    return -1;\n  }\n  IndexReformer::Pointer rabitq_reformer;\n  rabitq_converter.to_reformer(&rabitq_reformer);\n  HnswRabitqStreamer *hnsw_rabitq_streamer =\n      dynamic_cast<HnswRabitqStreamer *>(streamer.get());\n  hnsw_rabitq_streamer->set_reformer(std::move(rabitq_reformer));\n  IndexProvider::Pointer provider;\n  if (converter_name.empty()) {\n    // build_holder is VecsIndexHolder\n    provider = std::dynamic_pointer_cast<IndexProvider>(*build_holder);\n  } else {\n    // build_holder is ordinary IndexHolder, need to convert\n    provider = convert_holder_to_provider(*build_holder);\n    // reuse provider to release memory\n    *build_holder = provider;\n  }\n\n  if (!provider) {\n    cerr << \"Failed to cast build holder to provider\" << endl;\n    return -1;\n  }\n  hnsw_rabitq_streamer->set_provider(provider);\n  return 0;\n#else\n  cerr << \"HNSW RaBitQ is not supported on this platform\" << endl;\n  return -1;\n#endif\n}\n\nbool check_config(YAML::Node &config_root) {\n  auto common = config_root[\"BuilderCommon\"];\n  if (!common) {\n    LOG_ERROR(\"Can not find [BuilderClass] in config\");\n    return false;\n  }\n  if (!common[\"BuilderClass\"]) {\n    LOG_ERROR(\"Can not find [BuilderClass] in config\");\n    return false;\n  }\n  if (!common[\"BuildFile\"]) {\n    LOG_ERROR(\"Can not find [BuildFile] in config\");\n    return false;\n  }\n  if (common[\"NeedTrain\"] && common[\"NeedTrain\"].as<bool>()) {\n    if (!common[\"TrainFile\"]) {\n      LOG_ERROR(\"Can not find [TrainFile] in config\");\n      return false;\n    }\n  }\n  if (common[\"UseTrainer\"]) {\n    if (!common[\"TrainerIndexPath\"]) {\n      LOG_ERROR(\"Can not find [TrainerIndexPath] in config\");\n      return false;\n    }\n    if (!config_root[\"TrainerParams\"]) {\n      LOG_ERROR(\"Can not find [TrainerParams] in config\");\n      return false;\n    }\n  }\n  if (!common[\"DumpPath\"]) {\n    LOG_ERROR(\"Can not find [DumpPath] in config\");\n    return false;\n  }\n  if (!config_root[\"BuilderParams\"]) {\n    LOG_ERROR(\"Can not find [BuilderParams] in config\");\n    return false;\n  }\n  return true;\n}\n\nstatic inline size_t AlignSize(size_t size) {\n  return (size + 0x1F) & (~0x1F);\n}\n\nbool dump_meta_segment(const IndexDumper::Pointer &dumper,\n                       const std::string &segment_id, const void *data,\n                       size_t size, size_t &writes) {\n  size_t len = dumper->write(data, size);\n  if (len != size) {\n    LOG_ERROR(\"Dump segment %s data failed, expect: %lu, actual: %lu\",\n              segment_id.c_str(), size, len);\n    return false;\n  }\n\n  size_t padding_size = AlignSize(size) - size;\n  if (padding_size > 0) {\n    std::string padding(padding_size, '\\0');\n    if (dumper->write(padding.data(), padding_size) != padding_size) {\n      LOG_ERROR(\"Append padding failed, size %lu\", padding_size);\n      return false;\n    }\n  }\n\n  uint32_t crc = ailego::Crc32c::Hash(data, size);\n  int ret = dumper->append(segment_id, size, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s meta failed, ret=%d\", segment_id.c_str(), ret);\n    return false;\n  }\n\n  writes = len + padding_size;\n\n  return true;\n}\n\nint dump_taglist(IndexDumper::Pointer dumper, size_t num_vecs,\n                 const void *key_base, const void *taglist_data,\n                 uint64_t taglist_size) {\n  TagListHeader taglist_header;\n\n  taglist_header.num_vecs = num_vecs;\n\n  size_t total_writes;\n\n  bool ret =\n      dump_meta_segment(dumper, TAGLIST_HEADER_SEGMENT_NAME, &taglist_header,\n                        sizeof(TagListHeader), total_writes);\n  if (ret == false) {\n    LOG_ERROR(\"dump taglist meta failed\");\n    return IndexError_WriteData;\n  }\n\n  ret = dump_meta_segment(dumper, TAGLIST_KEY_SEGMENT_NAME, key_base,\n                          num_vecs * sizeof(uint64_t), total_writes);\n  if (ret == false) {\n    LOG_ERROR(\"dump taglist key failed\");\n    return IndexError_WriteData;\n  }\n\n  ret = dump_meta_segment(dumper, TAGLIST_DATA_SEGMENT_NAME, taglist_data,\n                          taglist_size, total_writes);\n  if (ret == false) {\n    LOG_ERROR(\"dump taglist data failed\");\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint do_build_sparse_by_streamer(IndexStreamer::Pointer &streamer,\n                                uint32_t thread_count) {\n  int ret;\n  ailego::ThreadPool pool(thread_count, false);\n  std::atomic<size_t> finished{0};\n  int errcode = 0;\n  std::mutex mutex;\n  std::atomic_bool error{false};\n  std::condition_variable cond{};\n\n  auto meta = streamer->meta();\n  IndexReformer::Pointer reformer;\n  if (!meta.reformer_name().empty()) {\n    reformer = IndexFactory::CreateReformer(meta.reformer_name());\n    if (!reformer) {\n      LOG_ERROR(\"Failed to create reformer %s\", meta.reformer_name().c_str());\n      return IndexError_NoExist;\n    }\n    reformer->init(meta.reformer_params());\n  }\n\n  IndexQueryMeta qmeta(sparse_holder->data_type());\n  uint32_t keep_docs = sparse_holder->count() - sparse_holder->start_cursor();\n\n\n  std::function<int(uint64_t, const uint32_t, const uint32_t *, const void *,\n                    const IndexQueryMeta &, IndexContext::Pointer &)>\n      add_to_streamer_sparse = [&](uint64_t pkey, const uint32_t sparse_count,\n                                   const uint32_t *sparse_indices,\n                                   const void *sparse_query,\n                                   const IndexQueryMeta &qmeta,\n                                   IndexContext::Pointer &context) -> int {\n    return streamer->add_impl(pkey, sparse_count, sparse_indices, sparse_query,\n                              qmeta, context);\n  };\n  if (g_disable_id_map) {\n    add_to_streamer_sparse = [&](uint64_t pkey, const uint32_t sparse_count,\n                                 const uint32_t *sparse_indices,\n                                 const void *sparse_query,\n                                 const IndexQueryMeta &qmeta,\n                                 IndexContext::Pointer &context) -> int {\n      return streamer->add_with_id_impl(static_cast<uint32_t>(pkey),\n                                        sparse_count, sparse_indices,\n                                        sparse_query, qmeta, context);\n    };\n  }\n\n  auto do_build = [&](size_t idx) {\n    AILEGO_DEFER([&]() {\n      std::lock_guard<std::mutex> latch(mutex);\n      cond.notify_one();\n    });\n    auto ctx = streamer->create_context();\n    if (!ctx) {\n      if (!error.exchange(true)) {\n        LOG_ERROR(\"Failed to create streamer context\");\n        errcode = IndexError_NoMemory;\n      }\n      return;\n    }\n    std::string ovec;\n    IndexQueryMeta ometa;\n    for (uint32_t id = idx; id < sparse_holder->count() && !stop_now;\n         id += thread_count) {\n      uint64_t key = sparse_holder->get_key(id);\n      if (reformer) {\n        std::string new_vec;\n        IndexQueryMeta new_meta;\n        ret = reformer->convert(sparse_holder->get_sparse_count(id),\n                                sparse_holder->get_sparse_indices(id),\n                                sparse_holder->get_sparse_data(id), qmeta,\n                                &new_vec, &new_meta);\n        if (ret != 0) {\n          LOG_ERROR(\"Failed to convert sparse vector for %s\",\n                    IndexError::What(ret));\n          errcode = ret;\n          return;\n        }\n        ret = add_to_streamer_sparse(key, sparse_holder->get_sparse_count(id),\n                                     sparse_holder->get_sparse_indices(id),\n                                     new_vec.data(), new_meta, ctx);\n      } else {\n        ret = add_to_streamer_sparse(key, sparse_holder->get_sparse_count(id),\n                                     sparse_holder->get_sparse_indices(id),\n                                     sparse_holder->get_sparse_data(id), qmeta,\n                                     ctx);\n      }\n\n      if (ailego_unlikely(ret != 0)) {\n        if (!error.exchange(true)) {\n          LOG_ERROR(\"streamer all_impl failed\\n\");\n          errcode = ret;\n        }\n        return;\n      }\n      if (id >= keep_docs) {\n        ret =\n            streamer->remove_impl(sparse_holder->get_key(id - keep_docs), ctx);\n        if (ailego_unlikely(ret != 0)) {\n          if (!error.exchange(true)) {\n            LOG_ERROR(\"streamer remove_impl failed\\n\");\n            errcode = ret;\n          }\n          return;\n        }\n      }\n      finished++;\n    }\n    return;\n  };\n\n  for (size_t i = 0; i < pool.count(); ++i) {\n    pool.execute(do_build, i);\n  }\n\n  while (!pool.is_finished()) {\n    std::unique_lock<std::mutex> lk(mutex);\n    cond.wait_until(\n        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));\n    if (error.load(std::memory_order_acquire)) {\n      LOG_ERROR(\"Failed to build index while waiting finish\");\n      return errcode;\n    }\n    LOG_INFO(\"Built cnt %zu, finished percent %.3f%%\", finished.load(),\n             finished.load() * 100.0f / sparse_holder->count());\n  }\n  if (error.load(std::memory_order_acquire)) {\n    LOG_ERROR(\"Failed to build index while waiting finish\");\n    return errcode;\n  }\n  pool.wait_finish();\n\n  return 0;\n}\n\nint build_sparse_by_streamer(IndexStreamer::Pointer &streamer,\n                             YAML::Node &config_common) {\n  if (!config_common[\"IndexPath\"]) {\n    LOG_ERROR(\"Miss params IndexPath for Streamer\");\n    return IndexError_InvalidArgument;\n  }\n  string path = config_common[\"IndexPath\"].as<string>();\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  if (!storage) {\n    LOG_ERROR(\"Failed to create storage\");\n    return IndexError_NoExist;\n  }\n  ailego::Params params;\n  int ret = storage->init(params);\n  if (ret != 0) {\n    LOG_ERROR(\"Storage Failed init\");\n    return IndexError_Runtime;\n  }\n  ret = storage->open(path, true);\n  if (ret != 0) {\n    LOG_ERROR(\"Storage Failed to open\");\n    return IndexError_Runtime;\n  }\n  ret = streamer->open(storage);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to open storage\");\n    return IndexError_Runtime;\n  }\n\n  size_t thread_count = config_common[\"ThreadCount\"]\n                            ? config_common[\"ThreadCount\"].as<uint64_t>()\n                            : std::thread::hardware_concurrency();\n\n  auto meta = streamer->meta();\n\n  LOG_DEBUG(\"thread count: %zu, retrieval_mode: sparse\", thread_count);\n  do_build_sparse_by_streamer(streamer, thread_count);\n\n  return 0;\n}\n\nint do_build_by_streamer(IndexStreamer::Pointer &streamer,\n                         uint32_t thread_count, RetrievalMode retrieval_mode) {\n  int ret;\n  ailego::ThreadPool pool(thread_count, false);\n  std::atomic<size_t> finished{0};\n  int errcode = 0;\n  std::mutex mutex;\n  std::atomic_bool error{false};\n  std::condition_variable cond{};\n\n  auto meta = streamer->meta();\n  IndexReformer::Pointer reformer;\n  if (!meta.reformer_name().empty()) {\n    if (retrieval_mode != RM_DENSE) {\n      LOG_ERROR(\"Reformer not supported\");\n      return IndexError_Runtime;\n    } else {\n      reformer = IndexFactory::CreateReformer(meta.reformer_name());\n      if (!reformer) {\n        LOG_ERROR(\"Failed to create reformer %s\", meta.reformer_name().c_str());\n        return IndexError_NoExist;\n      }\n      reformer->init(meta.reformer_params());\n    }\n  }\n\n  IndexQueryMeta qmeta(holder->data_type(), holder->dimension());\n  uint32_t keep_docs = holder->count() - holder->start_cursor();\n\n  std::function<int(uint64_t, const void *, const IndexQueryMeta &,\n                    IndexContext::Pointer &)>\n      add_to_streamer = [&](uint64_t pkey, const void *query,\n                            const IndexQueryMeta &qmeta,\n                            IndexContext::Pointer &context) -> int {\n    return streamer->add_impl(pkey, query, qmeta, context);\n  };\n  if (g_disable_id_map) {\n    add_to_streamer = [&](uint64_t pkey, const void *query,\n                          const IndexQueryMeta &qmeta,\n                          IndexStreamer::Context::Pointer &context) -> int {\n      return streamer->add_with_id_impl(static_cast<uint32_t>(pkey), query,\n                                        qmeta, context);\n    };\n  }\n\n  auto do_build = [&](size_t idx) {\n    AILEGO_DEFER([&]() {\n      std::lock_guard<std::mutex> latch(mutex);\n      cond.notify_one();\n    });\n    auto ctx = streamer->create_context();\n    if (!ctx) {\n      if (!error.exchange(true)) {\n        LOG_ERROR(\"Failed to create streamer context\");\n        errcode = IndexError_NoMemory;\n      }\n      return;\n    }\n    std::string ovec;\n    IndexQueryMeta ometa;\n    for (uint32_t id = idx; id < holder->count() && !stop_now;\n         id += thread_count) {\n      uint64_t key = holder->get_key(id);\n      if (retrieval_mode == RM_DENSE) {\n        if (reformer) {\n          ret = reformer->convert(holder->get_vector_by_index(id), qmeta, &ovec,\n                                  &ometa);\n          if (ret != 0) {\n            LOG_ERROR(\"Failed to convert vector for %s\", IndexError::What(ret));\n            errcode = ret;\n            return;\n          }\n          ret = add_to_streamer(key, ovec.data(), ometa, ctx);\n        } else {\n          ret =\n              add_to_streamer(key, holder->get_vector_by_index(id), qmeta, ctx);\n        }\n      } else {\n        LOG_ERROR(\"Retrieval mode not supported\");\n        errcode = IndexError_Unsupported;\n        return;\n      }\n\n      if (ailego_unlikely(ret != 0)) {\n        if (!error.exchange(true)) {\n          LOG_ERROR(\"streamer add_impl failed\");\n          errcode = ret;\n        }\n        return;\n      }\n      if (id >= keep_docs) {\n        ret = streamer->remove_impl(holder->get_key(id - keep_docs), ctx);\n        if (ailego_unlikely(ret != 0)) {\n          if (!error.exchange(true)) {\n            LOG_ERROR(\"streamer remove_impl failed\");\n            errcode = ret;\n          }\n          return;\n        }\n      }\n      finished++;\n    }\n    return;\n  };\n\n  for (size_t i = 0; i < pool.count(); ++i) {\n    pool.execute(do_build, i);\n  }\n\n  while (!pool.is_finished()) {\n    std::unique_lock<std::mutex> lk(mutex);\n    cond.wait_until(\n        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));\n    if (error.load(std::memory_order_acquire)) {\n      LOG_ERROR(\"Failed to build index while waiting finish\");\n      return errcode;\n    }\n    LOG_INFO(\"Built cnt %zu, finished percent %.3f%%\", finished.load(),\n             finished.load() * 100.0f / holder->count());\n  }\n  if (error.load(std::memory_order_acquire)) {\n    LOG_ERROR(\"Failed to build index while waiting finish\");\n    return errcode;\n  }\n  pool.wait_finish();\n\n  return 0;\n}\n\nint build_by_streamer(IndexStreamer::Pointer &streamer,\n                      YAML::Node &config_common) {\n  if (!config_common[\"IndexPath\"]) {\n    LOG_ERROR(\"Miss params IndexPath for Streamer\");\n    return IndexError_InvalidArgument;\n  }\n  string path = config_common[\"IndexPath\"].as<string>();\n\n  ailego::File::RemovePath(path);\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  if (!storage) {\n    LOG_ERROR(\"Failed to create storage\");\n    return IndexError_NoExist;\n  }\n  ailego::Params params;\n  int ret = storage->init(params);\n  if (ret != 0) {\n    LOG_ERROR(\"Storage Failed init\");\n    return IndexError_Runtime;\n  }\n  ret = storage->open(path, true);\n  if (ret != 0) {\n    LOG_ERROR(\"Storage Failed to open\");\n    return IndexError_Runtime;\n  }\n  ret = streamer->open(storage);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to open storage\");\n    return IndexError_Runtime;\n  }\n\n  size_t thread_count = config_common[\"ThreadCount\"]\n                            ? config_common[\"ThreadCount\"].as<uint64_t>()\n                            : std::thread::hardware_concurrency();\n\n  auto meta = streamer->meta();\n\n  RetrievalMode retrieval_mode = RM_UNDEFINED;\n  if (meta.dimension() > 0) {\n    retrieval_mode = RM_DENSE;\n  } else {\n    retrieval_mode = RM_SPARSE;\n  }\n\n  LOG_DEBUG(\"thread count: %zu, retrieval mode: %s\", thread_count,\n            retrieval_mode == 1 ? \"Dense\" : \"Sparse\");\n  do_build_by_streamer(streamer, thread_count, retrieval_mode);\n\n  return 0;\n}\n\nIndexSparseHolder::Pointer convert_sparse_holder(\n    const std::string &name, const ailego::Params &params,\n    VecsIndexSparseHolder::Pointer &in_holder, IndexMeta &index_meta) {\n  IndexSparseHolder::Pointer cast_holder =\n      std::dynamic_pointer_cast<IndexSparseHolder>(in_holder);\n  if (name.empty()) {\n    return cast_holder;\n  }\n\n  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);\n  if (!converter) {\n    LOG_ERROR(\"Failed to create sparse converter %s\", name.c_str());\n    return IndexSparseHolder::Pointer();\n  }\n\n  int ret = converter->init(in_holder->index_meta(), params);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to init converter %d\", ret);\n    return IndexSparseHolder::Pointer();\n  }\n\n  ret = converter->train(cast_holder);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to train sparse converter %d\", ret);\n    return IndexSparseHolder::Pointer();\n  }\n\n  ret = converter->transform(cast_holder);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to transform converter %d\", ret);\n    return IndexSparseHolder::Pointer();\n  }\n\n  index_meta = converter->meta();\n\n  return converter->sparse_result();\n}\n\nIndexHolder::Pointer convert_holder(const std::string &name,\n                                    const ailego::Params &params,\n                                    VecsIndexHolder::Pointer &in_holder,\n                                    IndexMeta &index_meta) {\n  IndexHolder::Pointer cast_holder =\n      std::dynamic_pointer_cast<IndexHolder>(in_holder);\n  if (name.empty()) {\n    return cast_holder;\n  }\n\n  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);\n  if (!converter) {\n    LOG_ERROR(\"Failed to create converter %s\", name.c_str());\n    return IndexHolder::Pointer();\n  }\n\n  int ret = converter->init(in_holder->index_meta(), params);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to init converter %d\", ret);\n    return IndexHolder::Pointer();\n  }\n\n  ret = converter->train(cast_holder);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to train converter %d\", ret);\n    return IndexHolder::Pointer();\n  }\n\n  ret = converter->transform(cast_holder);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to transform converter %d\", ret);\n    return IndexHolder::Pointer();\n  }\n\n  index_meta = converter->meta();\n\n  return converter->result();\n}\n\nint do_build_sparse(YAML::Node &config_root, YAML::Node &config_common) {\n  string build_file = config_common[\"BuildFile\"].as<string>();\n  VecsIndexSparseHolder::Pointer build_holder(new VecsIndexSparseHolder);\n  if (!build_holder->load(build_file)) {\n    LOG_ERROR(\"Load input error: %s\", build_file.c_str());\n    return -1;\n  }\n  IndexMeta meta;\n  meta = build_holder->index_meta();\n\n  std::string metric_name;\n  ailego::Params metric_params;\n  if (config_common[\"MetricName\"] &&\n      !config_common[\"MetricName\"].as<string>().empty()) {\n    metric_name = config_common[\"MetricName\"].as<string>();\n    if (config_root[\"MetricParams\"] &&\n        !prepare_params(config_root[\"MetricParams\"], metric_params)) {\n      LOG_ERROR(\"Failed to prepare metric params\");\n      return -1;\n    }\n    build_holder->set_metric(metric_name, metric_params);\n    meta.set_metric(metric_name, 0, metric_params);\n  }\n\n  string converter_name;\n  ailego::Params converter_params;\n  if (config_common[\"ConverterName\"] &&\n      !config_common[\"ConverterName\"].as<string>().empty()) {\n    converter_name = config_common[\"ConverterName\"].as<string>();\n    if (config_root[\"ConverterParams\"] &&\n        !prepare_params(config_root[\"ConverterParams\"], converter_params)) {\n      LOG_ERROR(\"Failed to prepare converter params\");\n      return -1;\n    }\n  }\n\n  if (config_common[\"MaxDocs\"] && config_common[\"MaxDocs\"].as<uint32_t>()) {\n    auto max_docs = config_common[\"MaxDocs\"].as<uint32_t>();\n    build_holder->set_max_doc_count(max_docs);\n  }\n  if (config_common[\"KeepDocs\"] && config_common[\"KeepDocs\"].as<uint32_t>()) {\n    auto keep_docs = config_common[\"KeepDocs\"].as<uint32_t>();\n    if (keep_docs < build_holder->count()) {\n      build_holder->set_start_cursor(build_holder->count() - keep_docs);\n    }\n  }\n\n  // Create a Builder\n  string builder_class = config_common[\"BuilderClass\"].as<string>();\n  IndexStreamer::Pointer streamer;\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(builder_class.c_str());\n  if (!builder) {\n    streamer = IndexFactory::CreateStreamer(builder_class.c_str());\n  }\n  if (!builder && !streamer) {\n    LOG_ERROR(\"Failed to create builder %s\", builder_class.c_str());\n    return -1;\n  }\n  cout << \"Created builder \" << builder_class << endl;\n\n  IndexSparseHolder::Pointer cv_build_holder = convert_sparse_holder(\n      converter_name, converter_params, build_holder, meta);\n  if (!cv_build_holder) {\n    LOG_ERROR(\"Convert holder failed.\");\n    return -1;\n  }\n\n  ailego::Params params;\n  if (!prepare_params(config_root[\"BuilderParams\"], params)) {\n    LOG_ERROR(\"Failed to prepare params\");\n    return -1;\n  }\n\n  // INIT\n  int ret =\n      builder ? builder->init(meta, params) : streamer->init(meta, params);\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to init builder, ret=%d\", ret);\n    return -1;\n  }\n  ailego::ElapsedTime timer;\n\n  // TRAIN\n  if (builder && config_common[\"NeedTrain\"] &&\n      config_common[\"NeedTrain\"].as<bool>()) {\n    string train_file = config_common[\"TrainFile\"].as<string>();\n    VecsIndexSparseHolder::Pointer train_holder(new VecsIndexSparseHolder);\n    if (!train_holder->load(train_file)) {\n      LOG_ERROR(\"Load input error: %s\", train_file.c_str());\n      return -1;\n    }\n\n    if (!metric_name.empty()) {\n      train_holder->set_metric(metric_name, metric_params);\n    }\n\n    IndexSparseHolder::Pointer cv_train_holder = convert_sparse_holder(\n        converter_name, converter_params, train_holder, meta);\n    if (!cv_train_holder) {\n      LOG_ERROR(\"Convert train holder failed.\");\n      return -1;\n    }\n\n    std::cout << \"Prepare train data done!\" << std::endl;\n    timer.reset();\n    ret = builder->train(std::move(cv_train_holder));\n    size_t train_time = timer.milli_seconds();\n\n    if (ret < 0) {\n      LOG_ERROR(\"Failed to train in builder, ret=%d\", ret);\n      return -1;\n    }\n    cout << \"Train finished, consume \" << train_time << \"ms.\" << endl;\n  } else {\n    cout << \"Skip train procedure\" << endl;\n  }\n\n  // BUILD\n  sparse_holder = build_holder;\n  signal(SIGINT, stop);\n  timer.reset();\n  if (builder != nullptr) {\n    ret = builder->build(std::move(cv_build_holder));\n  } else {\n    ret = build_sparse_by_streamer(streamer, config_common);\n  }\n  size_t build_time = timer.milli_seconds();\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to build in builder, ret=%d\", ret);\n    return -1;\n  }\n  cout << \"Build finished, consume \" << build_time << \"ms.\" << endl;\n  signal(SIGINT, SIG_DFL);\n\n  // DUMP\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  if (!dumper) {\n    LOG_ERROR(\"Failed to create FileDumper.\");\n    return -1;\n  }\n  string dump_prefix = config_common[\"DumpPath\"].as<string>();\n  ret = dumper->create(dump_prefix);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to create in dumper, ret=%d\", ret);\n    return -1;\n  }\n  timer.reset();\n  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);\n  size_t dump_time = timer.milli_seconds();\n  if (ret == IndexError_NotImplemented) {\n    LOG_WARN(\"Dump index not implemented\");\n  } else if (ret < 0) {\n    LOG_ERROR(\"Failed to dump in builder, ret=%d\", ret);\n    return -1;\n  }\n\n  if (build_holder->has_taglist()) {\n    size_t taglist_size{0};\n    const void *taglist_data = build_holder->get_taglist_data(taglist_size);\n    const void *key_base = build_holder->get_key_base();\n\n    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,\n                 taglist_size);\n  }\n\n  ret = dumper->close();\n  if (ret != 0) {\n    LOG_ERROR(\"Dumper failed to close, ret=%d\", ret);\n    return -1;\n  }\n  std::cout << \"Dump to [\" << dump_prefix << \"] finished, consume \" << dump_time\n            << \"ms.\" << std::endl;\n\n  if (builder) {\n    auto &stats =\n        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << stats.trained_count()\n              << \"]\\n\\tBuilt count[\" << stats.built_count()\n              << \"]\\n\\tDump count[\" << stats.dumped_count()\n              << \"]\\n\\tDiscarded count[\" << stats.discarded_count() << \"]\\n\";\n  } else {\n    auto &stats = streamer->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << 0 << \"]\\n\\tBuilt count[\"\n              << stats.added_count() << \"]\\n\\tDump size [\"\n              << stats.dumped_size() << \"]\\n\\tDiscarded count[\"\n              << stats.discarded_count() << \"]\\n\";\n  }\n\n  // CLEANUP\n  builder ? builder->cleanup() : streamer->cleanup();\n\n  return 0;\n}\n\nint do_build(YAML::Node &config_root, YAML::Node &config_common) {\n  string build_file = config_common[\"BuildFile\"].as<string>();\n  VecsIndexHolder::Pointer build_holder(new VecsIndexHolder);\n  if (!build_holder->load(build_file)) {\n    LOG_ERROR(\"Load input error: %s\", build_file.c_str());\n    return -1;\n  }\n  IndexMeta meta;\n  meta = build_holder->index_meta();\n\n  std::string metric_name;\n  ailego::Params metric_params;\n  if (config_common[\"MetricName\"] &&\n      !config_common[\"MetricName\"].as<string>().empty()) {\n    metric_name = config_common[\"MetricName\"].as<string>();\n    if (config_root[\"MetricParams\"] &&\n        !prepare_params(config_root[\"MetricParams\"], metric_params)) {\n      LOG_ERROR(\"Failed to prepare metric params\");\n      return -1;\n    }\n    build_holder->set_metric(metric_name, metric_params);\n    meta.set_metric(metric_name, 0, metric_params);\n  }\n  IndexMeta input_meta = meta;\n  string converter_name;\n  ailego::Params converter_params;\n  if (config_common[\"ConverterName\"] &&\n      !config_common[\"ConverterName\"].as<string>().empty()) {\n    converter_name = config_common[\"ConverterName\"].as<string>();\n    if (config_root[\"ConverterParams\"] &&\n        !prepare_params(config_root[\"ConverterParams\"], converter_params)) {\n      LOG_ERROR(\"Failed to prepare converter params\");\n      return -1;\n    }\n  }\n  IndexMeta::MajorOrder order = IndexMeta::MO_UNDEFINED;\n  if (config_common[\"MajorOrder\"]) {\n    std::string order_str = config_common[\"MajorOrder\"].as<string>();\n    if (order_str == \"row\") {\n      order = IndexMeta::MajorOrder::MO_ROW;\n    } else {\n      order = IndexMeta::MajorOrder::MO_COLUMN;\n    }\n  }\n\n  if (config_common[\"MaxDocs\"] && config_common[\"MaxDocs\"].as<uint32_t>()) {\n    auto max_docs = config_common[\"MaxDocs\"].as<uint32_t>();\n    build_holder->set_max_doc_count(max_docs);\n  }\n  if (config_common[\"KeepDocs\"] && config_common[\"KeepDocs\"].as<uint32_t>()) {\n    auto keep_docs = config_common[\"KeepDocs\"].as<uint32_t>();\n    if (keep_docs < build_holder->count()) {\n      build_holder->set_start_cursor(build_holder->count() - keep_docs);\n    }\n  }\n\n  // Create a Builder\n  string builder_class = config_common[\"BuilderClass\"].as<string>();\n  IndexStreamer::Pointer streamer;\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(builder_class.c_str());\n  if (!builder) {\n    streamer = IndexFactory::CreateStreamer(builder_class.c_str());\n  }\n  if (!builder && !streamer) {\n    LOG_ERROR(\"Failed to create builder %s\", builder_class.c_str());\n    return -1;\n  }\n  cout << \"Created builder \" << builder_class << endl;\n\n\n  IndexHolder::Pointer cv_build_holder =\n      convert_holder(converter_name, converter_params, build_holder, meta);\n  if (!cv_build_holder) {\n    LOG_ERROR(\"Convert holder failed.\");\n    return -1;\n  }\n  meta.set_major_order(order);\n  cout << IndexMetaHelper::to_string(meta) << endl;\n  cout << \"Prepare data done!\" << endl;\n\n  ailego::Params params;\n  if (!prepare_params(config_root[\"BuilderParams\"], params)) {\n    LOG_ERROR(\"Failed to prepare params\");\n    return -1;\n  }\n  std::vector<std::string> id_map_param_list = {\n      PARAM_HNSW_STREAMER_USE_ID_MAP,\n      PARAM_FLAT_USE_ID_MAP,\n      PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP,\n  };\n  for (auto &param : id_map_param_list) {\n    params.set(param, !g_disable_id_map);\n  }\n\n  // INIT\n  int ret =\n      builder ? builder->init(meta, params) : streamer->init(meta, params);\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to init builder, ret=%d\", ret);\n    return -1;\n  }\n  ailego::ElapsedTime timer;\n\n  // TRAIN\n  if (config_common[\"UseTrainer\"] && config_common[\"UseTrainer\"].as<bool>()) {\n    ailego::Params trainer_params;\n    if (!prepare_params(config_root[\"TrainerParams\"], trainer_params)) {\n      LOG_ERROR(\"Failed to prepare trainer params\");\n      return -1;\n    }\n\n    string train_index_path;\n    if (config_common[\"TrainerIndexPath\"]) {\n      train_index_path = config_common[\"TrainerIndexPath\"].as<string>();\n      if (train_index_path.empty()) {\n        LOG_ERROR(\"invalid TrainerIndexPath format\");\n        return -1;\n      }\n      cout << \"Trainer index path: \" << train_index_path << \"\\n\";\n    } else {\n      LOG_ERROR(\"Need [TrainerIndexPath] config\");\n      return -1;\n    }\n\n    IndexTrainer::Pointer trainer =\n        IndexFactory::CreateTrainer(\"StratifiedClusterTrainer\");\n    if (trainer->init(meta, trainer_params) != 0) {\n      LOG_ERROR(\"trainer init failed\");\n      return -1;\n    }\n\n    if (ailego::File::IsExist(train_index_path)) {\n      IndexStorage::Pointer container =\n          IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n      if (!container) {\n        LOG_ERROR(\"Failed to create MMapFileReadStorage\");\n        return -1;\n      }\n      container->init(ailego::Params());\n      if (container->open(train_index_path, false) != 0) {\n        LOG_ERROR(\"MMapFileReadStorage failed to load %s\",\n                  train_index_path.c_str());\n        return -1;\n      }\n      if (trainer->load(container) != 0) {\n        LOG_ERROR(\"Trainer failed to load container\");\n        return -1;\n      };\n    } else {\n      std::cout << \"Prepare trainer data...\" << std::endl;\n      string train_file = config_common[\"TrainFile\"].as<string>();\n      VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);\n      if (!train_holder->load(train_file)) {\n        LOG_ERROR(\"Load input error: %s\", train_file.c_str());\n        return -1;\n      }\n      if (!metric_name.empty()) {\n        train_holder->set_metric(metric_name, metric_params);\n      }\n\n      // support fp16 convert\n\n      IndexHolder::Pointer cv_train_holder =\n          convert_holder(converter_name, converter_params, train_holder, meta);\n      if (!cv_train_holder) {\n        LOG_ERROR(\"Convert train holder failed.\");\n        return -1;\n      }\n\n      std::cout << \"Prepare trainer data done!\" << std::endl;\n      std::cout << \"Prepare train data!\" << std::endl;\n\n      ret = trainer->train(cv_train_holder);\n      if (ret != 0) {\n        LOG_ERROR(\"trainer train_index failed with %d\", ret);\n        return -1;\n      }\n\n      std::cout << \"train data done!\" << std::endl;\n      IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n      if (!dumper) {\n        LOG_ERROR(\"Failed to create FileDumper.\");\n        return -1;\n      }\n      if (dumper->init(ailego::Params()) != 0) {\n        LOG_ERROR(\"Failed to init FileDumper.\");\n        return -1;\n      }\n      ret = dumper->create(train_index_path);\n      if (ret != 0) {\n        LOG_ERROR(\"Failed to create in dumper, ret=%d\", ret);\n        return -1;\n      }\n      if (trainer->dump(dumper) != 0) {\n        LOG_ERROR(\"trainer dump_index failed\");\n        return -1;\n      }\n      dumper->close();\n    }\n\n    ret = builder->train(trainer);\n    size_t train_time = timer.milli_seconds();\n    if (ret < 0) {\n      LOG_ERROR(\"Failed to train in builder, ret=%d\", ret);\n      return -1;\n    }\n    cout << \"Train finished, consume \" << train_time << \"ms.\" << endl;\n  } else if (builder && config_common[\"NeedTrain\"] &&\n             config_common[\"NeedTrain\"].as<bool>()) {\n    string train_file = config_common[\"TrainFile\"].as<string>();\n    VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);\n    if (!train_holder->load(train_file)) {\n      LOG_ERROR(\"Load input error: %s\", train_file.c_str());\n      return -1;\n    }\n\n    if (!metric_name.empty()) {\n      train_holder->set_metric(metric_name, metric_params);\n    }\n    IndexHolder::Pointer cv_train_holder =\n        convert_holder(converter_name, converter_params, train_holder, meta);\n    if (!cv_train_holder) {\n      LOG_ERROR(\"Convert train holder failed.\");\n      return -1;\n    }\n\n    std::cout << \"Prepare train data done!\" << std::endl;\n    timer.reset();\n    ret = builder->train(std::move(cv_train_holder));\n    size_t train_time = timer.milli_seconds();\n    if (ret < 0) {\n      LOG_ERROR(\"Failed to train in builder, ret=%d\", ret);\n      return -1;\n    }\n    cout << \"Train finished, consume \" << train_time << \"ms.\" << endl;\n  } else {\n    cout << \"Skip train procedure\" << endl;\n  }\n\n  if (builder_class == \"HnswRabitqStreamer\") {\n    if (setup_hnsw_rabitq_streamer(streamer, input_meta, config_root,\n                                   converter_name, &cv_build_holder) != 0) {\n      return -1;\n    }\n  } else if (builder_class == \"HnswRabitqBuilder\" && !converter_name.empty()) {\n    cv_build_holder = convert_holder_to_provider(cv_build_holder);\n  }\n\n  // BUILD\n  holder = build_holder;\n  signal(SIGINT, stop);\n  timer.reset();\n  if (builder != nullptr) {\n    ret = builder->build(std::move(cv_build_holder));\n  } else {\n    std::string retrieval_mode = \"dense\";\n    if (meta.dimension() > 0) {\n      retrieval_mode = \"sparse\";\n    } else {\n      retrieval_mode = \"dense\";\n    }\n\n    ret = build_by_streamer(streamer, config_common);\n  }\n  size_t build_time = timer.milli_seconds();\n  if (ret < 0) {\n    LOG_ERROR(\"Failed to build in builder, ret=%d\", ret);\n    return -1;\n  }\n  cout << \"Build finished, consume \" << build_time << \"ms.\" << endl;\n  signal(SIGINT, SIG_DFL);\n\n  // DUMP\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  if (!dumper) {\n    LOG_ERROR(\"Failed to create FileDumper.\");\n    return -1;\n  }\n  string dump_prefix = config_common[\"DumpPath\"].as<string>();\n  ret = dumper->create(dump_prefix);\n  if (ret != 0) {\n    LOG_ERROR(\"Failed to create in dumper, ret=%d\", ret);\n    return -1;\n  }\n  timer.reset();\n  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);\n  size_t dump_time = timer.milli_seconds();\n  if (ret == IndexError_NotImplemented) {\n    LOG_WARN(\"Dump index not implemented\");\n  } else if (ret < 0) {\n    LOG_ERROR(\"Failed to dump in builder, ret=%d\", ret);\n    return -1;\n  }\n\n  if (build_holder->has_taglist()) {\n    size_t taglist_size{0};\n    const void *taglist_data = build_holder->get_taglist_data(taglist_size);\n    const void *key_base = build_holder->get_key_base();\n\n    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,\n                 taglist_size);\n  }\n\n  ret = dumper->close();\n  if (ret != 0) {\n    LOG_ERROR(\"Dumper failed to close, ret=%d\", ret);\n    return -1;\n  }\n  std::cout << \"Dump to [\" << dump_prefix << \"] finished, consume \" << dump_time\n            << \"ms.\" << std::endl;\n\n  if (builder) {\n    auto &stats =\n        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << stats.trained_count()\n              << \"]\\n\\tBuilt count[\" << stats.built_count()\n              << \"]\\n\\tDump count[\" << stats.dumped_count()\n              << \"]\\n\\tDiscarded count[\" << stats.discarded_count() << \"]\\n\";\n  } else {\n    auto &stats = streamer->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << 0 << \"]\\n\\tBuilt count[\"\n              << stats.added_count() << \"]\\n\\tDump size [\"\n              << stats.dumped_size() << \"]\\n\\tDiscarded count[\"\n              << stats.discarded_count() << \"]\\n\";\n  }\n\n  // CLEANUP\n  builder ? builder->cleanup() : streamer->cleanup();\n\n  return 0;\n}\n\nint main(int argc, char *argv[]) {\n  if (argc < 2) {\n    usage();\n    return -1;\n  }\n  IndexPluginBroker broker;\n  std::string error;\n  for (int i = 2; i < argc; ++i) {\n    if (!broker.emplace(argv[i], &error)) {\n      LOG_ERROR(\"Failed to load plugin: %s (%s)\", argv[i], error.c_str());\n      return -1;\n    }\n  }\n  YAML::Node config_root;\n  try {\n    config_root = YAML::LoadFile(argv[1]);\n  } catch (...) {\n    LOG_ERROR(\"Load YAML file[%s] failed!\", argv[1]);\n    return -1;\n  }\n  if (!check_config(config_root)) {\n    return -1;\n  }\n  auto config_common = config_root[\"BuilderCommon\"];\n\n  map<string, int> LOG_LEVEL = {{\"debug\", IndexLogger::LEVEL_DEBUG},\n                                {\"info\", IndexLogger::LEVEL_INFO},\n                                {\"warn\", IndexLogger::LEVEL_WARN},\n                                {\"error\", IndexLogger::LEVEL_ERROR},\n                                {\"fatal\", IndexLogger::LEVEL_FATAL}};\n\n  string log_level = config_common[\"LogLevel\"]\n                         ? config_common[\"LogLevel\"].as<string>()\n                         : \"debug\";\n\n  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);\n  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {\n    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n  }\n\n  RetrievalMode retrieval_mode{RM_DENSE};\n  if (config_common[\"RetrievalMode\"]) {\n    std::string retrieval_mode_str =\n        config_common[\"RetrievalMode\"].as<string>();\n    if (retrieval_mode_str == \"dense\") {\n      retrieval_mode = RM_DENSE;\n    } else if (retrieval_mode_str == \"sparse\") {\n      retrieval_mode = RM_SPARSE;\n    }\n  }\n\n  if (config_common[\"DisableIdMap\"]) {\n    g_disable_id_map = config_common[\"DisableIdMap\"].as<bool>();\n    if (g_disable_id_map) {\n      cout << \"Disable ID map\" << endl;\n    } else {\n      cout << \"Enable ID map\" << endl;\n    }\n  }\n\n  if (retrieval_mode == RM_SPARSE) {\n    return do_build_sparse(config_root, config_common);\n  } else {\n    return do_build(config_root, config_common);\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/local_builder_original.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <libgen.h>\n#include <signal.h>\n#include <iostream>\n#include <memory>\n#include <ailego/pattern/defer.h>\n#include <zvec/ailego/container/params.h>\n#include <zvec/ailego/utility/time_helper.h>\n#if RABITQ_SUPPORTED\n#include \"algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h\"\n#include \"algorithm/hnsw_rabitq/rabitq_converter.h\"\n#include \"algorithm/hnsw_rabitq/rabitq_reformer.h\"\n#endif\n#include \"zvec/core/framework/index_dumper.h\"\n#include \"zvec/core/framework/index_factory.h\"\n#include \"zvec/core/framework/index_logger.h\"\n#include \"zvec/core/framework/index_plugin.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_reformer.h\"\n#include \"zvec/core/framework/index_streamer.h\"\n#include \"index_meta_helper.h\"\n#include \"meta_segment_common.h\"\n#include \"vecs_index_holder.h\"\n\n#ifdef __clang__\n#pragma clang diagnostic push\n#pragma clang diagnostic ignored \"-Wshadow\"\n#pragma clang diagnostic ignored \"-Wdeprecated-declarations\"\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wshadow\"\n#pragma GCC diagnostic ignored \"-Wdeprecated-declarations\"\n#endif\n\n#include <yaml-cpp/yaml.h>\n\n#ifdef __clang__\n#pragma clang diagnostic pop\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n\nusing namespace std;\nusing namespace zvec::core;\nusing namespace zvec;\n\nenum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };\n\nVecsIndexHolder::Pointer holder;\nVecsIndexSparseHolder::Pointer sparse_holder;\n\nbool stop_now = false;\nvoid stop(int signo) {\n  if (stop_now) {\n    exit(signo);\n  }\n  stop_now = true;\n  cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n       << flush;\n  if (holder) {\n    holder->stop();\n  }\n}\n\nvoid usage(void) {\n  cout << \"Usage: local_builder CONFIG.yaml [plugin file path]\" << endl;\n}\n\nbool prepare_params(YAML::Node &&config_params, ailego::Params &params) {\n  cout << \"Parse params as blow:\" << endl;\n  for (auto it = config_params.begin(); it != config_params.end(); ++it) {\n    string tag = it->second.Tag();\n    if (tag == \"tag:yaml.org,2002:int\") {\n      int64_t val = it->second.as<int64_t>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:float\") {\n      float val = it->second.as<float>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:bool\") {\n      bool val = it->second.as<bool>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else {\n      if (it->second.IsScalar()) {\n        string val = it->second.as<string>();\n        params.set(it->first.as<string>(), val);\n        cout << it->first.as<string>() << \"=\" << val << endl;\n      } else if (it->second.IsMap()) {\n        ailego::Params sub_params;\n        auto sub_node = it->second;\n        if (!prepare_params(std::move(sub_node), sub_params)) {\n          cerr << \"parse params error with key[\" << it->first.as<string>()\n               << \"]\" << endl;\n          return false;\n        }\n        params.set(it->first.as<string>(), sub_params);\n      }\n    }\n  }\n  return true;\n}\n\nint setup_hnsw_rabitq_streamer(const IndexStreamer::Pointer &streamer,\n                               const IndexMeta &meta, YAML::Node &config_root,\n                               const std::string &converter_name,\n                               IndexHolder::Pointer *build_holder) {\n#if RABITQ_SUPPORTED\n  RabitqConverter rabitq_converter;\n  ailego::Params rabitq_converter_params;\n  if (config_root[\"RabitqConverterParams\"] &&\n      !prepare_params(std::move(config_root[\"RabitqConverterParams\"]),\n                      rabitq_converter_params)) {\n    cerr << \"Failed to prepare rabitq converter params\" << endl;\n    return -1;\n  }\n  if (rabitq_converter.init(meta, rabitq_converter_params) != 0) {\n    cerr << \"rabitq converter init failed\" << std::endl;\n    return -1;\n  }\n  if (rabitq_converter.train(*build_holder) != 0) {\n    cerr << \"rabitq converter train failed\" << std::endl;\n    return -1;\n  }\n  IndexReformer::Pointer rabitq_reformer;\n  rabitq_converter.to_reformer(&rabitq_reformer);\n  HnswRabitqStreamer *hnsw_rabitq_streamer =\n      dynamic_cast<HnswRabitqStreamer *>(streamer.get());\n  hnsw_rabitq_streamer->set_reformer(std::move(rabitq_reformer));\n  IndexProvider::Pointer provider;\n  if (converter_name.empty()) {\n    // build_holder is VecsIndexHolder\n    provider = std::dynamic_pointer_cast<IndexProvider>(*build_holder);\n  } else {\n    // build_holder is ordinary IndexHolder, need to convert\n    provider = convert_holder_to_provider(*build_holder);\n    // reuse provider to release memory\n    *build_holder = provider;\n  }\n\n  if (!provider) {\n    cerr << \"Failed to cast build holder to provider\" << endl;\n    return -1;\n  }\n  hnsw_rabitq_streamer->set_provider(provider);\n  return 0;\n#else\n  (void)streamer;\n  (void)meta;\n  (void)config_root;\n  (void)converter_name;\n  (void)build_holder;\n  cerr << \"HNSW RaBitQ is not supported on this platform\" << endl;\n  return -1;\n#endif\n}\n\nbool check_config(YAML::Node &config_root) {\n  auto common = config_root[\"BuilderCommon\"];\n  if (!common) {\n    cerr << \"Can not find [BuilderClass] in config\" << endl;\n    return false;\n  }\n  if (!common[\"BuilderClass\"]) {\n    cerr << \"Can not find [BuilderClass] in config\" << endl;\n    return false;\n  }\n  if (!common[\"BuildFile\"]) {\n    cerr << \"Can not find [BuildFile] in config\" << endl;\n    return false;\n  }\n  if (common[\"NeedTrain\"] && common[\"NeedTrain\"].as<bool>()) {\n    if (!common[\"TrainFile\"]) {\n      cerr << \"Can not find [TrainFile] in config\" << endl;\n      return false;\n    }\n  }\n  if (common[\"UseTrainer\"]) {\n    if (!common[\"TrainerIndexPath\"]) {\n      cerr << \"Can not find [TrainerIndexPath] in config\" << endl;\n      return false;\n    }\n    if (!config_root[\"TrainerParams\"]) {\n      cerr << \"Can not find [TrainerParams] in config\" << endl;\n      return false;\n    }\n  }\n  if (!common[\"DumpPath\"]) {\n    cerr << \"Can not find [DumpPath] in config\" << endl;\n    return false;\n  }\n  if (!config_root[\"BuilderParams\"]) {\n    cerr << \"Can not find [BuilderParams] in config\" << endl;\n    return false;\n  }\n  return true;\n}\n\nstatic inline size_t AlignSize(size_t size) {\n  return (size + 0x1F) & (~0x1F);\n}\n\nint64_t dump_meta_segment(const IndexDumper::Pointer &dumper,\n                          const std::string &segment_id, const void *data,\n                          size_t size, size_t &writes) {\n  size_t len = dumper->write(data, size);\n  if (len != size) {\n    LOG_ERROR(\"Dump segment %s data failed, expect: %lu, actual: %lu\",\n              segment_id.c_str(), size, len);\n    return false;\n  }\n\n  size_t padding_size = AlignSize(size) - size;\n  if (padding_size > 0) {\n    std::string padding(padding_size, '\\0');\n    if (dumper->write(padding.data(), padding_size) != padding_size) {\n      LOG_ERROR(\"Append padding failed, size %lu\", padding_size);\n      return false;\n    }\n  }\n\n  uint32_t crc = ailego::Crc32c::Hash(data, size);\n  int ret = dumper->append(segment_id, size, padding_size, crc);\n  if (ret != 0) {\n    LOG_ERROR(\"Dump segment %s meta failed, ret=%d\", segment_id.c_str(), ret);\n    return false;\n  }\n\n  writes = len + padding_size;\n\n  return true;\n}\n\nint dump_taglist(IndexDumper::Pointer dumper, size_t num_vecs,\n                 const void *key_base, const void *taglist_data,\n                 uint64_t taglist_size) {\n  TagListHeader taglist_header;\n\n  taglist_header.num_vecs = num_vecs;\n\n  size_t total_writes;\n\n  bool ret =\n      dump_meta_segment(dumper, TAGLIST_HEADER_SEGMENT_NAME, &taglist_header,\n                        sizeof(TagListHeader), total_writes);\n  if (ret == false) {\n    LOG_ERROR(\"dump taglist meta failed\");\n    return IndexError_WriteData;\n  }\n\n  ret = dump_meta_segment(dumper, TAGLIST_KEY_SEGMENT_NAME, key_base,\n                          num_vecs * sizeof(uint64_t), total_writes);\n  if (ret == false) {\n    LOG_ERROR(\"dump taglist key failed\");\n    return IndexError_WriteData;\n  }\n\n  ret = dump_meta_segment(dumper, TAGLIST_DATA_SEGMENT_NAME, taglist_data,\n                          taglist_size, total_writes);\n  if (ret == false) {\n    LOG_ERROR(\"dump taglist data failed\");\n    return IndexError_WriteData;\n  }\n\n  return 0;\n}\n\nint do_build_sparse_by_streamer(IndexStreamer::Pointer &streamer,\n                                uint32_t thread_count) {\n  int ret;\n  ailego::ThreadPool pool(thread_count, false);\n  std::atomic<size_t> finished{0};\n  int errcode = 0;\n  std::mutex mutex;\n  std::atomic_bool error{false};\n  std::condition_variable cond{};\n\n  auto meta = streamer->meta();\n  IndexReformer::Pointer reformer;\n  if (!meta.reformer_name().empty()) {\n    reformer = IndexFactory::CreateReformer(meta.reformer_name());\n    if (!reformer) {\n      LOG_ERROR(\"Failed to create reformer %s\", meta.reformer_name().c_str());\n      return IndexError_NoExist;\n    }\n    reformer->init(meta.reformer_params());\n  }\n\n  IndexQueryMeta qmeta(sparse_holder->data_type());\n  uint32_t keep_docs = sparse_holder->count() - sparse_holder->start_cursor();\n\n  auto do_build = [&](size_t idx) {\n    AILEGO_DEFER([&]() {\n      std::lock_guard<std::mutex> latch(mutex);\n      cond.notify_one();\n    });\n    auto ctx = streamer->create_context();\n    if (!ctx) {\n      if (!error.exchange(true)) {\n        cerr << \"Failed to create streamer context\";\n        errcode = IndexError_NoMemory;\n      }\n      return;\n    }\n    std::string ovec;\n    IndexQueryMeta ometa;\n    for (uint32_t id = idx; id < sparse_holder->count() && !stop_now;\n         id += thread_count) {\n      uint64_t key = sparse_holder->get_key(id);\n      if (reformer) {\n        std::string new_vec;\n        IndexQueryMeta new_meta;\n        ret = reformer->convert(sparse_holder->get_sparse_count(id),\n                                sparse_holder->get_sparse_indices(id),\n                                sparse_holder->get_sparse_data(id), qmeta,\n                                &new_vec, &new_meta);\n        if (ret != 0) {\n          LOG_ERROR(\"Failed to convert sparse vector for %s\",\n                    IndexError::What(ret));\n          errcode = ret;\n          return;\n        }\n        ret = streamer->add_impl(key, sparse_holder->get_sparse_count(id),\n                                 sparse_holder->get_sparse_indices(id),\n                                 new_vec.data(), new_meta, ctx);\n      } else {\n        ret =\n            streamer->add_impl(key, sparse_holder->get_sparse_count(id),\n                               sparse_holder->get_sparse_indices(id),\n                               sparse_holder->get_sparse_data(id), qmeta, ctx);\n      }\n\n      if (ailego_unlikely(ret != 0)) {\n        if (!error.exchange(true)) {\n          LOG_ERROR(\"streamer all_impl failed\\n\");\n          errcode = ret;\n        }\n        return;\n      }\n      if (id >= keep_docs) {\n        ret =\n            streamer->remove_impl(sparse_holder->get_key(id - keep_docs), ctx);\n        if (ailego_unlikely(ret != 0)) {\n          if (!error.exchange(true)) {\n            LOG_ERROR(\"streamer remove_impl failed\\n\");\n            errcode = ret;\n          }\n          return;\n        }\n      }\n      finished++;\n    }\n    return;\n  };\n\n  for (size_t i = 0; i < pool.count(); ++i) {\n    pool.execute(do_build, i);\n  }\n\n  while (!pool.is_finished()) {\n    std::unique_lock<std::mutex> lk(mutex);\n    cond.wait_until(\n        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));\n    if (error.load(std::memory_order_acquire)) {\n      cerr << \"Failed to build index while waiting finish\\n\";\n      return errcode;\n    }\n    LOG_INFO(\"Built cnt %zu, finished percent %.3f%%\", finished.load(),\n             finished.load() * 100.0f / sparse_holder->count());\n  }\n  if (error.load(std::memory_order_acquire)) {\n    cerr << \"Failed to build index while waiting finish\\n\";\n    return errcode;\n  }\n  pool.wait_finish();\n\n  return 0;\n}\n\nint build_sparse_by_streamer(IndexStreamer::Pointer &streamer,\n                             YAML::Node &config_common) {\n  if (!config_common[\"IndexPath\"]) {\n    cerr << \"Miss params IndexPath for Streamer\\n\";\n    return IndexError_InvalidArgument;\n  }\n  string path = config_common[\"IndexPath\"].as<string>();\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  if (!storage) {\n    cerr << \"Failed to create storage\\n\";\n    return IndexError_NoExist;\n  }\n  ailego::Params params;\n  int ret = storage->init(params);\n  if (ret != 0) {\n    cerr << \"Storage Failed init\";\n    return IndexError_Runtime;\n  }\n  ret = storage->open(path, true);\n  if (ret != 0) {\n    cerr << \"Storage Failed to open\";\n    return IndexError_Runtime;\n  }\n  ret = streamer->open(storage);\n  if (ret != 0) {\n    cerr << \"Failed to open storage\";\n    return IndexError_Runtime;\n  }\n\n  size_t thread_count = config_common[\"ThreadCount\"]\n                            ? config_common[\"ThreadCount\"].as<uint64_t>()\n                            : std::thread::hardware_concurrency();\n\n  auto meta = streamer->meta();\n\n  LOG_DEBUG(\"thread count: %zu, retrieval_mode: sparse\", thread_count);\n  do_build_sparse_by_streamer(streamer, thread_count);\n\n  return 0;\n}\n\nint do_build_by_streamer(IndexStreamer::Pointer &streamer,\n                         uint32_t thread_count, RetrievalMode retrieval_mode) {\n  int ret;\n  ailego::ThreadPool pool(thread_count, false);\n  std::atomic<size_t> finished{0};\n  int errcode = 0;\n  std::mutex mutex;\n  std::atomic_bool error{false};\n  std::condition_variable cond{};\n\n  auto meta = streamer->meta();\n  IndexReformer::Pointer reformer;\n  if (!meta.reformer_name().empty()) {\n    if (retrieval_mode != RM_DENSE) {\n      cerr << \"Reformer not supported\";\n      return IndexError_Runtime;\n    } else {\n      reformer = IndexFactory::CreateReformer(meta.reformer_name());\n      if (!reformer) {\n        LOG_ERROR(\"Failed to create reformer %s\", meta.reformer_name().c_str());\n        return IndexError_NoExist;\n      }\n      reformer->init(meta.reformer_params());\n    }\n  }\n\n  IndexQueryMeta qmeta(holder->data_type(), holder->dimension());\n  uint32_t keep_docs = holder->count() - holder->start_cursor();\n\n  auto do_build = [&](size_t idx) {\n    AILEGO_DEFER([&]() {\n      std::lock_guard<std::mutex> latch(mutex);\n      cond.notify_one();\n    });\n    auto ctx = streamer->create_context();\n    if (!ctx) {\n      if (!error.exchange(true)) {\n        cerr << \"Failed to create streamer context\";\n        errcode = IndexError_NoMemory;\n      }\n      return;\n    }\n    std::string ovec;\n    IndexQueryMeta ometa;\n    for (uint32_t id = idx; id < holder->count() && !stop_now;\n         id += thread_count) {\n      uint64_t key = holder->get_key(id);\n      if (retrieval_mode == RM_DENSE) {\n        if (reformer) {\n          ret = reformer->convert(holder->get_vector_by_index(id), qmeta, &ovec,\n                                  &ometa);\n          if (ret != 0) {\n            LOG_ERROR(\"Failed to convert vector for %s\", IndexError::What(ret));\n            errcode = ret;\n            return;\n          }\n          ret = streamer->add_impl(key, ovec.data(), ometa, ctx);\n        } else {\n          ret = streamer->add_impl(key, holder->get_vector_by_index(id), qmeta,\n                                   ctx);\n        }\n      } else {\n        cerr << \"Retrieval mode not supported\";\n        errcode = IndexError_Unsupported;\n        return;\n      }\n\n      if (ailego_unlikely(ret != 0)) {\n        if (!error.exchange(true)) {\n          LOG_ERROR(\"streamer add_impl failed\\n\");\n          errcode = ret;\n        }\n        return;\n      }\n      if (id >= keep_docs) {\n        ret = streamer->remove_impl(holder->get_key(id - keep_docs), ctx);\n        if (ailego_unlikely(ret != 0)) {\n          if (!error.exchange(true)) {\n            LOG_ERROR(\"streamer remove_impl failed\\n\");\n            errcode = ret;\n          }\n          return;\n        }\n      }\n      finished++;\n    }\n    return;\n  };\n\n  for (size_t i = 0; i < pool.count(); ++i) {\n    pool.execute(do_build, i);\n  }\n\n  while (!pool.is_finished()) {\n    std::unique_lock<std::mutex> lk(mutex);\n    cond.wait_until(\n        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));\n    if (error.load(std::memory_order_acquire)) {\n      cerr << \"Failed to build index while waiting finish\\n\";\n      return errcode;\n    }\n    LOG_INFO(\"Built cnt %zu, finished percent %.3f%%\", finished.load(),\n             finished.load() * 100.0f / holder->count());\n  }\n  if (error.load(std::memory_order_acquire)) {\n    cerr << \"Failed to build index while waiting finish\\n\";\n    return errcode;\n  }\n  pool.wait_finish();\n\n  return 0;\n}\n\nint build_by_streamer(IndexStreamer::Pointer &streamer,\n                      YAML::Node &config_common) {\n  if (!config_common[\"IndexPath\"]) {\n    cerr << \"Miss params IndexPath for Streamer\\n\";\n    return IndexError_InvalidArgument;\n  }\n  string path = config_common[\"IndexPath\"].as<string>();\n\n  auto storage = IndexFactory::CreateStorage(\"MMapFileStorage\");\n  if (!storage) {\n    cerr << \"Failed to create storage\\n\";\n    return IndexError_NoExist;\n  }\n  ailego::Params params;\n  int ret = storage->init(params);\n  if (ret != 0) {\n    cerr << \"Storage Failed init\";\n    return IndexError_Runtime;\n  }\n  ret = storage->open(path, true);\n  if (ret != 0) {\n    cerr << \"Storage Failed to open\";\n    return IndexError_Runtime;\n  }\n  ret = streamer->open(storage);\n  if (ret != 0) {\n    cerr << \"Failed to open storage\";\n    return IndexError_Runtime;\n  }\n\n  size_t thread_count = config_common[\"ThreadCount\"]\n                            ? config_common[\"ThreadCount\"].as<uint64_t>()\n                            : std::thread::hardware_concurrency();\n\n  auto meta = streamer->meta();\n\n  RetrievalMode retrieval_mode = RM_UNDEFINED;\n  if (meta.dimension() > 0) {\n    retrieval_mode = RM_DENSE;\n  } else {\n    retrieval_mode = RM_SPARSE;\n  }\n\n  LOG_DEBUG(\"thread count: %zu, retrieval mode: %s\", thread_count,\n            retrieval_mode == 1 ? \"Dense\" : \"Sparse\");\n  do_build_by_streamer(streamer, thread_count, retrieval_mode);\n\n  return 0;\n}\n\nIndexSparseHolder::Pointer convert_sparse_holder(\n    const std::string &name, const ailego::Params &params,\n    VecsIndexSparseHolder::Pointer &in_holder, IndexMeta &index_meta) {\n  IndexSparseHolder::Pointer cast_holder =\n      std::dynamic_pointer_cast<IndexSparseHolder>(in_holder);\n  if (name.empty()) {\n    return cast_holder;\n  }\n\n  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);\n  if (!converter) {\n    cerr << \"Failed to create sparse converter \" << name << endl;\n    return IndexSparseHolder::Pointer();\n  }\n\n  int ret = converter->init(in_holder->index_meta(), params);\n  if (ret != 0) {\n    cerr << \"Failed to init converter \" << ret << endl;\n    return IndexSparseHolder::Pointer();\n  }\n\n  ret = converter->train(cast_holder);\n  if (ret != 0) {\n    cerr << \"Failed to train sparse converter \" << ret << endl;\n    return IndexSparseHolder::Pointer();\n  }\n\n  ret = converter->transform(cast_holder);\n  if (ret != 0) {\n    cerr << \"Failed to transform converter \" << ret << endl;\n    return IndexSparseHolder::Pointer();\n  }\n\n  index_meta = converter->meta();\n\n  return converter->sparse_result();\n}\n\nIndexHolder::Pointer convert_holder(const std::string &name,\n                                    const ailego::Params &params,\n                                    VecsIndexHolder::Pointer &in_holder,\n                                    IndexMeta &index_meta) {\n  IndexHolder::Pointer cast_holder =\n      std::dynamic_pointer_cast<IndexHolder>(in_holder);\n  if (name.empty()) {\n    return cast_holder;\n  }\n\n  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);\n  if (!converter) {\n    cerr << \"Failed to create converter \" << name << endl;\n    return IndexHolder::Pointer();\n  }\n\n  int ret = converter->init(in_holder->index_meta(), params);\n  if (ret != 0) {\n    cerr << \"Failed to init converter \" << ret << endl;\n    return IndexHolder::Pointer();\n  }\n\n  ret = converter->train(cast_holder);\n  if (ret != 0) {\n    cerr << \"Failed to train converter \" << ret << endl;\n    return IndexHolder::Pointer();\n  }\n\n  ret = converter->transform(cast_holder);\n  if (ret != 0) {\n    cerr << \"Failed to transform converter \" << ret << endl;\n    return IndexHolder::Pointer();\n  }\n\n  index_meta = converter->meta();\n\n  return converter->result();\n}\n\nint do_build_sparse(YAML::Node &config_root, YAML::Node &config_common) {\n  string build_file = config_common[\"BuildFile\"].as<string>();\n  VecsIndexSparseHolder::Pointer build_holder(new VecsIndexSparseHolder);\n  if (!build_holder->load(build_file)) {\n    cerr << \"Load input error: \" << build_file << endl;\n    return -1;\n  }\n  IndexMeta meta;\n  meta = build_holder->index_meta();\n\n  std::string metric_name;\n  ailego::Params metric_params;\n  if (config_common[\"MetricName\"] &&\n      !config_common[\"MetricName\"].as<string>().empty()) {\n    metric_name = config_common[\"MetricName\"].as<string>();\n    if (config_root[\"MetricParams\"] &&\n        !prepare_params(config_root[\"MetricParams\"], metric_params)) {\n      cerr << \"Failed to prepare metric params\" << endl;\n      return -1;\n    }\n    build_holder->set_metric(metric_name, metric_params);\n    meta.set_metric(metric_name, 0, metric_params);\n  }\n\n  string converter_name;\n  ailego::Params converter_params;\n  if (config_common[\"ConverterName\"] &&\n      !config_common[\"ConverterName\"].as<string>().empty()) {\n    converter_name = config_common[\"ConverterName\"].as<string>();\n    if (config_root[\"ConverterParams\"] &&\n        !prepare_params(config_root[\"ConverterParams\"], converter_params)) {\n      cerr << \"Failed to prepare converter params\" << endl;\n      return -1;\n    }\n  }\n\n  if (config_common[\"MaxDocs\"] && config_common[\"MaxDocs\"].as<uint32_t>()) {\n    auto max_docs = config_common[\"MaxDocs\"].as<uint32_t>();\n    build_holder->set_max_doc_count(max_docs);\n  }\n  if (config_common[\"KeepDocs\"] && config_common[\"KeepDocs\"].as<uint32_t>()) {\n    auto keep_docs = config_common[\"KeepDocs\"].as<uint32_t>();\n    if (keep_docs < build_holder->count()) {\n      build_holder->set_start_cursor(build_holder->count() - keep_docs);\n    }\n  }\n\n  // Create a Builder\n  string builder_class = config_common[\"BuilderClass\"].as<string>();\n  IndexStreamer::Pointer streamer;\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(builder_class.c_str());\n  if (!builder) {\n    streamer = IndexFactory::CreateStreamer(builder_class.c_str());\n  }\n  if (!builder && !streamer) {\n    cerr << \"Failed to create builder \" << builder_class << endl;\n    return -1;\n  }\n  cout << \"Created builder \" << builder_class << endl;\n\n  IndexSparseHolder::Pointer cv_build_holder = convert_sparse_holder(\n      converter_name, converter_params, build_holder, meta);\n  if (!cv_build_holder) {\n    cerr << \"Convert holder failed.\" << endl;\n    return -1;\n  }\n\n  ailego::Params params;\n  if (!prepare_params(config_root[\"BuilderParams\"], params)) {\n    cerr << \"Failed to prepare params\" << endl;\n    return -1;\n  }\n\n  // INIT\n  int ret =\n      builder ? builder->init(meta, params) : streamer->init(meta, params);\n  if (ret < 0) {\n    cerr << \"Failed to init builder, ret=\" << ret << endl;\n    return -1;\n  }\n  ailego::ElapsedTime timer;\n\n  // TRAIN\n  if (builder && config_common[\"NeedTrain\"] &&\n      config_common[\"NeedTrain\"].as<bool>()) {\n    string train_file = config_common[\"TrainFile\"].as<string>();\n    VecsIndexSparseHolder::Pointer train_holder(new VecsIndexSparseHolder);\n    if (!train_holder->load(train_file)) {\n      cerr << \"Load input error: \" << train_file << endl;\n      return -1;\n    }\n\n    if (!metric_name.empty()) {\n      train_holder->set_metric(metric_name, metric_params);\n    }\n\n    IndexSparseHolder::Pointer cv_train_holder = convert_sparse_holder(\n        converter_name, converter_params, train_holder, meta);\n    if (!cv_train_holder) {\n      cerr << \"Convert train holder failed.\" << endl;\n      return -1;\n    }\n\n    std::cout << \"Prepare train data done!\" << std::endl;\n    timer.reset();\n    ret = builder->train(std::move(cv_train_holder));\n    size_t train_time = timer.milli_seconds();\n\n    if (ret < 0) {\n      cerr << \"Failed to train in builder, ret=\" << ret << endl;\n      return -1;\n    }\n    cout << \"Train finished, consume \" << train_time << \"ms.\" << endl;\n  } else {\n    cout << \"Skip train procedure\" << endl;\n  }\n\n  // BUILD\n  sparse_holder = build_holder;\n  signal(SIGINT, stop);\n  timer.reset();\n  if (builder != nullptr) {\n    ret = builder->build(std::move(cv_build_holder));\n  } else {\n    ret = build_sparse_by_streamer(streamer, config_common);\n  }\n  size_t build_time = timer.milli_seconds();\n  if (ret < 0) {\n    cerr << \"Failed to build in builder, ret=\" << ret << endl;\n    return -1;\n  }\n  cout << \"Build finished, consume \" << build_time << \"ms.\" << endl;\n  signal(SIGINT, SIG_DFL);\n\n  // DUMP\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  if (!dumper) {\n    cerr << \"Failed to create FileDumper.\" << endl;\n    return -1;\n  }\n  string dump_prefix = config_common[\"DumpPath\"].as<string>();\n  ret = dumper->create(dump_prefix);\n  if (ret != 0) {\n    cerr << \"Failed to create in dumper, ret=\" << ret << endl;\n    return -1;\n  }\n  timer.reset();\n  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);\n  size_t dump_time = timer.milli_seconds();\n  if (ret == IndexError_NotImplemented) {\n    LOG_WARN(\"Dump index not implemented\");\n  } else if (ret < 0) {\n    cerr << \"Failed to dump in builder, ret=\" << ret << endl;\n    return -1;\n  }\n\n  if (build_holder->has_taglist()) {\n    size_t taglist_size{0};\n    const void *taglist_data = build_holder->get_taglist_data(taglist_size);\n    const void *key_base = build_holder->get_key_base();\n\n    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,\n                 taglist_size);\n  }\n\n  ret = dumper->close();\n  if (ret != 0) {\n    cerr << \"Dumper failed to close, ret=\" << ret << endl;\n    return -1;\n  }\n  std::cout << \"Dump to [\" << dump_prefix << \"] finished, consume \" << dump_time\n            << \"ms.\" << std::endl;\n\n  if (builder) {\n    auto &stats =\n        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << stats.trained_count()\n              << \"]\\n\\tBuilt count[\" << stats.built_count()\n              << \"]\\n\\tDump count[\" << stats.dumped_count()\n              << \"]\\n\\tDiscarded count[\" << stats.discarded_count() << \"]\\n\";\n  } else {\n    auto &stats = streamer->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << 0 << \"]\\n\\tBuilt count[\"\n              << stats.added_count() << \"]\\n\\tDump size [\"\n              << stats.dumped_size() << \"]\\n\\tDiscarded count[\"\n              << stats.discarded_count() << \"]\\n\";\n  }\n\n  // CLEANUP\n  builder ? builder->cleanup() : streamer->cleanup();\n\n  return 0;\n}\n\nint do_build(YAML::Node &config_root, YAML::Node &config_common) {\n  string build_file = config_common[\"BuildFile\"].as<string>();\n  VecsIndexHolder::Pointer build_holder(new VecsIndexHolder);\n  if (!build_holder->load(build_file)) {\n    cerr << \"Load input error: \" << build_file << endl;\n    return -1;\n  }\n  IndexMeta meta;\n  meta = build_holder->index_meta();\n\n  std::string metric_name;\n  ailego::Params metric_params;\n  if (config_common[\"MetricName\"] &&\n      !config_common[\"MetricName\"].as<string>().empty()) {\n    metric_name = config_common[\"MetricName\"].as<string>();\n    if (config_root[\"MetricParams\"] &&\n        !prepare_params(config_root[\"MetricParams\"], metric_params)) {\n      cerr << \"Failed to prepare metric params\" << endl;\n      return -1;\n    }\n    build_holder->set_metric(metric_name, metric_params);\n    meta.set_metric(metric_name, 0, metric_params);\n  }\n  IndexMeta input_meta = meta;\n  string converter_name;\n  ailego::Params converter_params;\n  if (config_common[\"ConverterName\"] &&\n      !config_common[\"ConverterName\"].as<string>().empty()) {\n    converter_name = config_common[\"ConverterName\"].as<string>();\n    if (config_root[\"ConverterParams\"] &&\n        !prepare_params(config_root[\"ConverterParams\"], converter_params)) {\n      cerr << \"Failed to prepare converter params\" << endl;\n      return -1;\n    }\n  }\n  IndexMeta::MajorOrder order = IndexMeta::MO_UNDEFINED;\n  if (config_common[\"MajorOrder\"]) {\n    std::string order_str = config_common[\"MajorOrder\"].as<string>();\n    if (order_str == \"row\") {\n      order = IndexMeta::MajorOrder::MO_ROW;\n    } else {\n      order = IndexMeta::MajorOrder::MO_COLUMN;\n    }\n  }\n\n  if (config_common[\"MaxDocs\"] && config_common[\"MaxDocs\"].as<uint32_t>()) {\n    auto max_docs = config_common[\"MaxDocs\"].as<uint32_t>();\n    build_holder->set_max_doc_count(max_docs);\n  }\n  if (config_common[\"KeepDocs\"] && config_common[\"KeepDocs\"].as<uint32_t>()) {\n    auto keep_docs = config_common[\"KeepDocs\"].as<uint32_t>();\n    if (keep_docs < build_holder->count()) {\n      build_holder->set_start_cursor(build_holder->count() - keep_docs);\n    }\n  }\n\n  // Create a Builder\n  string builder_class = config_common[\"BuilderClass\"].as<string>();\n  IndexStreamer::Pointer streamer;\n  IndexBuilder::Pointer builder =\n      IndexFactory::CreateBuilder(builder_class.c_str());\n  if (!builder) {\n    streamer = IndexFactory::CreateStreamer(builder_class.c_str());\n  }\n  if (!builder && !streamer) {\n    cerr << \"Failed to create builder \" << builder_class << endl;\n    return -1;\n  }\n  cout << \"Created builder \" << builder_class << endl;\n\n\n  IndexHolder::Pointer cv_build_holder =\n      convert_holder(converter_name, converter_params, build_holder, meta);\n  if (!cv_build_holder) {\n    cerr << \"Convert holder failed.\" << endl;\n    return -1;\n  }\n  meta.set_major_order(order);\n  cout << IndexMetaHelper::to_string(meta) << endl;\n  cout << \"Prepare data done!\" << endl;\n\n  ailego::Params params;\n  if (!prepare_params(config_root[\"BuilderParams\"], params)) {\n    cerr << \"Failed to prepare params\" << endl;\n    return -1;\n  }\n\n  // INIT\n  int ret =\n      builder ? builder->init(meta, params) : streamer->init(meta, params);\n  if (ret < 0) {\n    cerr << \"Failed to init builder, ret=\" << ret << endl;\n    return -1;\n  }\n  ailego::ElapsedTime timer;\n\n  // TRAIN\n  if (config_common[\"UseTrainer\"] && config_common[\"UseTrainer\"].as<bool>()) {\n    ailego::Params trainer_params;\n    if (!prepare_params(config_root[\"TrainerParams\"], trainer_params)) {\n      cerr << \"Failed to prepare trainer params\" << endl;\n      return -1;\n    }\n\n    string train_index_path;\n    if (config_common[\"TrainerIndexPath\"]) {\n      train_index_path = config_common[\"TrainerIndexPath\"].as<string>();\n      if (train_index_path.empty()) {\n        cerr << \"invalid TrainerIndexPath format\" << std::endl;\n        return -1;\n      }\n      cout << \"Trainer index path: \" << train_index_path << \"\\n\";\n    } else {\n      cerr << \"Need [TrainerIndexPath] config\" << std::endl;\n      return -1;\n    }\n\n    IndexTrainer::Pointer trainer =\n        IndexFactory::CreateTrainer(\"StratifiedClusterTrainer\");\n    if (trainer->init(meta, trainer_params) != 0) {\n      cerr << \"trainer init failed\" << std::endl;\n      return -1;\n    }\n\n    if (ailego::File::IsExist(train_index_path)) {\n      IndexStorage::Pointer container =\n          IndexFactory::CreateStorage(\"MMapFileReadStorage\");\n      if (!container) {\n        cerr << \"Failed to create MMapFileReadStorage\" << endl;\n        return -1;\n      }\n      container->init(ailego::Params());\n      if (container->open(train_index_path, false) != 0) {\n        cerr << \"MMapFileReadStorage failed to load \"\n             << train_index_path.c_str() << endl;\n        return -1;\n      }\n      if (trainer->load(container) != 0) {\n        cerr << \"Trainer failed to load container\" << endl;\n        return -1;\n      };\n    } else {\n      std::cout << \"Prepare trainer data...\" << std::endl;\n      string train_file = config_common[\"TrainFile\"].as<string>();\n      VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);\n      if (!train_holder->load(train_file)) {\n        cerr << \"Load input error: \" << train_file << endl;\n        return -1;\n      }\n      if (!metric_name.empty()) {\n        train_holder->set_metric(metric_name, metric_params);\n      }\n\n      // support fp16 convert\n\n      IndexHolder::Pointer cv_train_holder =\n          convert_holder(converter_name, converter_params, train_holder, meta);\n      if (!cv_train_holder) {\n        cerr << \"Convert train holder failed.\" << endl;\n        return -1;\n      }\n\n      std::cout << \"Prepare trainer data done!\" << std::endl;\n      std::cout << \"Prepare train data!\" << std::endl;\n\n      ret = trainer->train(cv_train_holder);\n      if (ret != 0) {\n        cerr << \"trainer train_index failed with \" << ret << std::endl;\n        return -1;\n      }\n\n      std::cout << \"train data done!\" << std::endl;\n      IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n      if (!dumper) {\n        cerr << \"Failed to create FileDumper.\" << endl;\n        return -1;\n      }\n      if (dumper->init(ailego::Params()) != 0) {\n        cerr << \"Failed to init FileDumper.\" << endl;\n        return -1;\n      }\n      ret = dumper->create(train_index_path);\n      if (ret != 0) {\n        cerr << \"Failed to create in dumper, ret=\" << ret << endl;\n        return -1;\n      }\n      if (trainer->dump(dumper) != 0) {\n        cerr << \"trainer dump_index failed\" << std::endl;\n        return -1;\n      }\n      dumper->close();\n    }\n\n    ret = builder->train(trainer);\n    size_t train_time = timer.milli_seconds();\n    if (ret < 0) {\n      cerr << \"Failed to train in builder, ret=\" << ret << endl;\n      return -1;\n    }\n    cout << \"Train finished, consume \" << train_time << \"ms.\" << endl;\n  } else if (builder && config_common[\"NeedTrain\"] &&\n             config_common[\"NeedTrain\"].as<bool>()) {\n    string train_file = config_common[\"TrainFile\"].as<string>();\n    VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);\n    if (!train_holder->load(train_file)) {\n      cerr << \"Load input error: \" << train_file << endl;\n      return -1;\n    }\n\n    if (!metric_name.empty()) {\n      train_holder->set_metric(metric_name, metric_params);\n    }\n    IndexHolder::Pointer cv_train_holder =\n        convert_holder(converter_name, converter_params, train_holder, meta);\n    if (!cv_train_holder) {\n      cerr << \"Convert train holder failed.\" << endl;\n      return -1;\n    }\n\n    std::cout << \"Prepare train data done!\" << std::endl;\n    timer.reset();\n    ret = builder->train(std::move(cv_train_holder));\n    size_t train_time = timer.milli_seconds();\n    if (ret < 0) {\n      cerr << \"Failed to train in builder, ret=\" << ret << endl;\n      return -1;\n    }\n    cout << \"Train finished, consume \" << train_time << \"ms.\" << endl;\n  } else {\n    cout << \"Skip train procedure\" << endl;\n  }\n\n  if (builder_class == \"HnswRabitqStreamer\") {\n    if (setup_hnsw_rabitq_streamer(streamer, input_meta, config_root,\n                                   converter_name, &cv_build_holder) != 0) {\n      return -1;\n    }\n  } else if (builder_class == \"HnswRabitqBuilder\" && !converter_name.empty()) {\n    cv_build_holder = convert_holder_to_provider(cv_build_holder);\n  }\n\n  // BUILD\n  holder = build_holder;\n  signal(SIGINT, stop);\n  timer.reset();\n  if (builder != nullptr) {\n    ret = builder->build(std::move(cv_build_holder));\n  } else {\n    std::string retrieval_mode = \"dense\";\n    if (meta.dimension() > 0) {\n      retrieval_mode = \"sparse\";\n    } else {\n      retrieval_mode = \"dense\";\n    }\n\n    ret = build_by_streamer(streamer, config_common);\n  }\n  size_t build_time = timer.milli_seconds();\n  if (ret < 0) {\n    cerr << \"Failed to build in builder, ret=\" << ret << endl;\n    return -1;\n  }\n  cout << \"Build finished, consume \" << build_time << \"ms.\" << endl;\n  signal(SIGINT, SIG_DFL);\n\n  // DUMP\n  IndexDumper::Pointer dumper = IndexFactory::CreateDumper(\"FileDumper\");\n  if (!dumper) {\n    cerr << \"Failed to create FileDumper.\" << endl;\n    return -1;\n  }\n  string dump_prefix = config_common[\"DumpPath\"].as<string>();\n  ret = dumper->create(dump_prefix);\n  if (ret != 0) {\n    cerr << \"Failed to create in dumper, ret=\" << ret << endl;\n    return -1;\n  }\n  timer.reset();\n  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);\n  size_t dump_time = timer.milli_seconds();\n  if (ret == IndexError_NotImplemented) {\n    LOG_WARN(\"Dump index not implemented\");\n  } else if (ret < 0) {\n    cerr << \"Failed to dump in builder, ret=\" << ret << endl;\n    return -1;\n  }\n\n  if (build_holder->has_taglist()) {\n    size_t taglist_size{0};\n    const void *taglist_data = build_holder->get_taglist_data(taglist_size);\n    const void *key_base = build_holder->get_key_base();\n\n    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,\n                 taglist_size);\n  }\n\n  ret = dumper->close();\n  if (ret != 0) {\n    cerr << \"Dumper failed to close, ret=\" << ret << endl;\n    return -1;\n  }\n  std::cout << \"Dump to [\" << dump_prefix << \"] finished, consume \" << dump_time\n            << \"ms.\" << std::endl;\n\n  if (builder) {\n    auto &stats =\n        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << stats.trained_count()\n              << \"]\\n\\tBuilt count[\" << stats.built_count()\n              << \"]\\n\\tDump count[\" << stats.dumped_count()\n              << \"]\\n\\tDiscarded count[\" << stats.discarded_count() << \"]\\n\";\n  } else {\n    auto &stats = streamer->stats();\n    std::cout << \"STATS: \\n\\tTrained count[\" << 0 << \"]\\n\\tBuilt count[\"\n              << stats.added_count() << \"]\\n\\tDump size [\"\n              << stats.dumped_size() << \"]\\n\\tDiscarded count[\"\n              << stats.discarded_count() << \"]\\n\";\n  }\n\n  // CLEANUP\n  builder ? builder->cleanup() : streamer->cleanup();\n\n  return 0;\n}\n\nint main(int argc, char *argv[]) {\n  if (argc < 2) {\n    usage();\n    return -1;\n  }\n  IndexPluginBroker broker;\n  std::string error;\n  for (int i = 2; i < argc; ++i) {\n    if (!broker.emplace(argv[i], &error)) {\n      cerr << \"Failed to load plugin: \" << argv[i] << \" (\" << error << \")\"\n           << endl;\n      return -1;\n    }\n  }\n  YAML::Node config_root;\n  try {\n    config_root = YAML::LoadFile(argv[1]);\n  } catch (...) {\n    cerr << \"Load YAML file[\" << argv[1] << \"] failed!\" << endl;\n    return -1;\n  }\n  if (!check_config(config_root)) {\n    return -1;\n  }\n  auto config_common = config_root[\"BuilderCommon\"];\n\n  map<string, int> LOG_LEVEL = {{\"debug\", IndexLogger::LEVEL_DEBUG},\n                                {\"info\", IndexLogger::LEVEL_INFO},\n                                {\"warn\", IndexLogger::LEVEL_WARN},\n                                {\"error\", IndexLogger::LEVEL_ERROR},\n                                {\"fatal\", IndexLogger::LEVEL_FATAL}};\n\n  string log_level = config_common[\"LogLevel\"]\n                         ? config_common[\"LogLevel\"].as<string>()\n                         : \"debug\";\n\n  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);\n  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {\n    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n  }\n\n  RetrievalMode retrieval_mode{RM_DENSE};\n  if (config_common[\"RetrievalMode\"]) {\n    std::string retrieval_mode_str =\n        config_common[\"RetrievalMode\"].as<string>();\n    if (retrieval_mode_str == \"dense\") {\n      retrieval_mode = RM_DENSE;\n    } else if (retrieval_mode_str == \"sparse\") {\n      retrieval_mode = RM_SPARSE;\n    }\n  }\n\n  if (retrieval_mode == RM_SPARSE) {\n    return do_build_sparse(config_root, config_common);\n  } else {\n    return do_build(config_root, config_common);\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/meta_segment_common.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <zvec/ailego/utility/type_helper.h>\n\nnamespace zvec {\nnamespace core {\n\nconst static std::string TAGLIST_HEADER_SEGMENT_NAME(\"local_taglists_header\");\nconst static std::string TAGLIST_KEY_SEGMENT_NAME(\"local_taglists_key\");\nconst static std::string TAGLIST_DATA_SEGMENT_NAME(\"local_taglists_data\");\n\n#pragma pack(4)\nstruct TagListHeader {\n  uint64_t num_vecs;\n  uint8_t meta_buf[252];\n};\n#pragma pack()\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "tools/core/recall.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"helper.h\"\n\nmutex recall_lock;\nbool g_compare_by_id = false;\nfloat g_recall_precision;\n\n\n//--------------------------------------------------\n// Recall\n//--------------------------------------------------\nenum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };\n\nenum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };\n\ntemplate <typename T>\nclass Recall {\n public:\n  Recall(size_t threads, const string &output, size_t batch_count,\n         FilterMode filter_mode)\n      : threads_(threads),\n        output_(output),\n        batch_count_(batch_count),\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(true);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, true);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n      call_batch_api_ = false;\n    } else {\n      call_batch_api_ = true;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  void run_dense(core_interface::Index::Pointer index,\n                 core_interface::BaseIndexQueryParam::Pointer query_param,\n                 const string &recall_tops, size_t gt_count) {\n    StringHelper::Split(recall_tops, \",\", &topk_ids_);\n    std::sort(topk_ids_.begin(), topk_ids_.end());\n\n    for (auto i : topk_ids_) {\n      recall_res_[i] = 0.0f;\n    }\n    size_t topk = recall_res_.rbegin()->first;\n\n    gt_count = topk < gt_count ? gt_count : topk;\n\n    if (external_gt_file_enabled_) {\n      cout << \"Internal ground truth file NOT used since external ground truth \"\n              \"file has been loaded\"\n           << endl;\n    } else {\n      cout << \"Loading internal ground truth file\" << endl;\n\n      if (!load_gt_dense(index, gt_count)) {\n        LOG_ERROR(\"Load ground truth file failed!\");\n        return;\n      }\n    }\n\n    if (batch_queries_.size() < threads_) {\n      threads_ = batch_queries_.size();\n      pool_ = make_shared<ThreadPool>(true, threads_);\n      cout << \"Query size too small, resize thread pool count[\" << threads_\n           << \"]\" << endl;\n    }\n\n    // Prepare file handler\n    vector<pair<fstream *, fstream *>> output_fs;\n    if (!output_.empty()) {\n      string cmd = \"mkdir -p \" + output_;\n      int ret = system(cmd.c_str());\n      if (ret != 0) {\n        LOG_ERROR(\"execute cmd %s failed, ret=%d\", cmd.c_str(), ret);\n        return;\n      }\n      struct stat sb;\n      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {\n        cout << \"logs output to : \" << output_ << endl;\n        for (size_t i = 0; i < threads_; ++i) {\n          fstream *fs_k = new fstream();\n          fs_k->open(output_ + \"/t\" + to_string(i) + \".knn\", ios::out);\n          fstream *fs_l = new fstream();\n          fs_l->open(output_ + \"/t\" + to_string(i) + \".linear\", ios::out);\n          output_fs.push_back(make_pair(fs_k, fs_l));\n        }\n      }\n    }\n\n    signal(SIGINT, stop);\n    size_t i = 0;\n    for (; !STOP_NOW && i < batch_queries_.size();) {\n      if (pool_->pending_count() >= pool_->count()) {\n        this_thread::sleep_for(chrono::microseconds(1));\n        continue;\n      }\n\n      Closure::Pointer task =\n          Closure::New(this, &Recall::recall_one_dense, index, query_param,\n                       topk, i, output_fs);\n      pool_->enqueue_and_wake(task);\n\n      i++;\n    }\n    pool_->wait_finish();\n\n    for (auto fs : output_fs) {\n      fs.first->close();\n      fs.second->close();\n      delete fs.first;\n      delete fs.second;\n    }\n    cout << \"Process query: \" << i << endl;\n    for (auto it : recall_res_) {\n      cout << \"Recall@\" << it.first << \": \"\n           << it.second / linear_queries_.size() << endl;\n    }\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,\n                           linear_sparse_data_, linear_taglists_)) {\n      LOG_ERROR(\"Load query error\");\n      return false;\n    }\n\n    if (batch_count_ == 1) {\n      batch_queries_ = linear_queries_;\n\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(linear_sparse_data_[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);\n        batch_sparse_features_.push_back(linear_sparse_data_[i].features);\n      }\n\n      for (size_t i = 0; i < linear_taglists_.size(); ++i) {\n        vector<vector<uint64_t>> new_taglists;\n        new_taglists.push_back(linear_taglists_[i]);\n\n        batch_taglists_.push_back(std::move(new_taglists));\n      }\n    } else {\n      size_t num_batch =\n          (linear_queries_.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<T> batch_query;\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n        vector<vector<uint64_t>> batch_taglists;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          for (size_t k = 0; k < linear_queries_[idx].size(); ++k) {\n            batch_query.push_back(linear_queries_[idx][k]);\n          }\n\n          batch_sparse_count.push_back(linear_sparse_data_[idx].count);\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();\n               ++k) {\n            batch_sparse_feature.push_back(\n                linear_sparse_data_[idx].features[k]);\n          }\n\n          idx = (idx + 1) % linear_queries_.size();\n        }\n\n        batch_queries_.push_back(batch_query);\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n        batch_taglists_.push_back(batch_taglists);\n      }\n    }\n\n    dim_ = linear_queries_[0].size();\n    total_querys_ = linear_queries_.size();\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim_);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim_);\n    } else {\n      LOG_ERROR(\"unsupported type\");\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  bool load_external_gt_file(const std::string &external_gt_file,\n                             const std::string &first_sep,\n                             const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    bool ret =\n        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);\n    if (ret) {\n      cout << \"Load external ground truth file[\"\n           << File::BaseName(external_gt_file) << \"] done!\" << endl;\n      external_gt_file_enabled_ = true;\n    } else {\n      LOG_ERROR(\"Failed to load ground truth file!\");\n    }\n\n    return ret;\n  }\n\n private:\n  std::string compute_crc(size_t gt_count) {\n    uint32_t crc = 0u;\n    // dense\n    if (batch_queries_.size() > 0) {\n      size_t one_size = dim_ * sizeof(T);\n      size_t data_size = total_querys_ * one_size + sizeof(size_t);\n      char *data = new char[data_size];\n      size_t q = 0;\n      char *p = data;\n      for (; q < batch_queries_.size(); ++q) {\n        memcpy(p, batch_queries_[q].data(),\n               batch_queries_[q].size() * sizeof(T));\n        p += batch_queries_[q].size() * sizeof(T);\n      }\n      memcpy(p, &gt_count, sizeof(size_t));\n      crc = Crc32c::Hash(data, data_size, crc);\n      delete[] data;\n    }\n\n    // sparse\n    if (linear_sparse_data_.size() > 0) {\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),\n                           crc);\n        crc =\n            Crc32c::Hash(linear_sparse_data_[i].indices.data(),\n                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);\n        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),\n                           linear_sparse_data_[i].count * sizeof(T), crc);\n      }\n    }\n\n    char crc_str[64];\n    snprintf(crc_str, sizeof(crc_str), \"%X\", crc);\n\n    return std::string(crc_str);\n  }\n\n  bool load_gt_dense(core_interface::Index::Pointer index, size_t gt_count) {\n    std::string crc_str = compute_crc(gt_count);\n\n    string gt_file = string(\"gt.\") + crc_str;\n\n    File gtf;\n    if (!gtf.IsRegular(gt_file.c_str())) {\n      cout << \"Ground truth file[\" << gt_file << \"] not exist, try to create it\"\n           << endl;\n      ElapsedTime timer;\n\n      size_t size = sizeof(uint64_t) + sizeof(float);\n      size_t file_size =\n          linear_queries_.size() * (sizeof(int) + size * gt_count);\n\n      std::string gt_file_temp = gt_file + \".tmp\";\n      gtf.create(gt_file_temp.c_str(), file_size);\n\n      gt_.resize(linear_queries_.size());\n\n      atomic_bool error(false);\n      size_t count = 0;\n      float s = linear_queries_.size() / 100.0;\n      size_t pc = 0;\n      SpinMutex spin_lock;\n\n      function<void(size_t)> fun = [&](size_t i) {\n        spin_lock.lock();\n        count++;\n        size_t process = (size_t)ceil(count / s);\n        if (process > pc) {\n          pc = process;\n          stringstream msg;\n          msg << \"\\r\" << setw(3) << setfill(' ') << process << \"% \" << left\n              << setfill('=') << setw(process / 2 + 1) << \"[\" << right\n              << setfill(' ') << setw(51 - process / 2) << \"]\";\n          cout << msg.str() << flush;\n        }\n        spin_lock.unlock();\n\n        auto query = linear_queries_[i];\n\n        FilterResultCache filter_cache;\n        std::shared_ptr<IndexFilter> filter_ptr = nullptr;\n        if (filter_mode_ == FM_TAG) {\n          if (batch_taglists_[i].size() != 1) {\n            LOG_ERROR(\"query tag list not equal to one!\");\n            return;\n          }\n\n          int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[i][0],\n                                        tag_key_list_);\n          if (ret != 0) {\n            LOG_ERROR(\"prefilter failed, idx: %zu\", i);\n            return;\n          }\n\n          auto filterFunc = [&](uint64_t key) {\n            return filter_cache.find(key);\n          };\n\n          filter_ptr = std::make_shared<IndexFilter>();\n          filter_ptr->set(filterFunc);\n        }\n\n        core_interface::DenseVector dense_query;\n        dense_query.data = query.data();\n        core_interface::VectorData query_data;\n        query_data.vector = dense_query;\n\n        auto query_param = std::make_shared<core_interface::FlatQueryParam>();\n        query_param->topk = gt_count;\n        query_param->is_linear = true;\n        query_param->filter = filter_ptr;\n\n        core_interface::SearchResult search_result;\n        int ret = index->Search(query_data, query_param, &search_result);\n        if (ret < 0) {\n          LOG_ERROR(\"Failed to linear search, ret=%d %s\", ret,\n                    IndexError::What(ret));\n          error.exchange(true);\n          return;\n        }\n        auto &result = search_result.doc_list_;\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (auto knn : result) {\n          one_gt.emplace_back(knn.key(), knn.score());\n        }\n        gt_[i] = one_gt;\n      };\n      for (size_t i = 0; i < linear_queries_.size(); ++i) {\n        if (error) {\n          break;\n        }\n        pool_->enqueue_and_wake(Closure::New(fun, i));\n      }\n      pool_->wait_finish();\n\n      if (error) {\n        cout << endl\n             << \"Ground truth file[\" << gt_file << \"] create failed!\" << endl;\n        gtf.close();\n        remove(gt_file.c_str());\n        return false;\n      }\n\n      for (size_t i = 0; i < gt_.size(); ++i) {\n        auto &gt = gt_[i];\n\n        gtf.write(&gt_count, sizeof(int));\n\n        for (size_t j = 0; j < gt.size(); j++) {\n          auto &one_gt = gt[j];\n\n          gtf.write(&one_gt.first, sizeof(uint64_t));\n          gtf.write(&one_gt.second, sizeof(float));\n        }\n\n        // if ground truth is less than gt count, fill it up\n        if (gt.size() != gt_count) {\n          std::cout\n              << \"WARN: GT result count less than GT expected count, index: \"\n              << i << \", expected GT count: \" << gt_count\n              << \", actual GT count: \" << gt.size() << std::endl;\n\n          uint64_t key{-1LLU};\n          float score{std::nanf(\"\")};\n\n          for (size_t j = gt.size(); j < gt_count; ++j) {\n            gtf.write(&key, sizeof(uint64_t));\n            gtf.write(&score, sizeof(float));\n          }\n        }\n      }\n\n      gtf.close();\n\n      if (!File::Rename(gt_file_temp, gt_file)) {\n        LOG_ERROR(\"failed to rename ground truth file, src: %s, dst: %s\",\n                  gt_file_temp.c_str(), gt_file.c_str());\n\n        return false;\n      }\n\n      cout << endl\n           << \"Ground truth file create successful in \"\n           << timer.milli_seconds() / 1000 << \"s.\" << endl;\n    } else {\n      if (!gtf.open(gt_file.c_str(), true)) {\n        LOG_ERROR(\"Failed to open ground truth file[%s]\", gt_file.c_str());\n        return false;\n      }\n      size_t file_size = gtf.size();\n\n      constexpr size_t LENGTH = 10240;\n      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);\n\n      char *buffer = new char[LENGTH];\n      gtf.read(buffer, sizeof(int));\n\n      size_t gt_count_input = (size_t) * (int *)buffer;\n      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;\n\n      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {\n        LOG_ERROR(\"Ground truth file[%s] content error!\", gt_file.c_str());\n        gtf.close();\n        return false;\n      }\n\n      size_t query_num = file_size / one_query_line_size;\n      if (one_query_line_size > LENGTH) {\n        delete[] buffer;\n        buffer = new char[one_query_line_size];\n      }\n\n      for (size_t n = 0; n < query_num; ++n) {\n        gtf.read(n * one_query_line_size, buffer, one_query_line_size);\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (size_t i = 0; i < gt_count; ++i) {\n          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);\n          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +\n                                   sizeof(uint64_t));\n\n          if (key != -1LLU) {\n            one_gt.emplace_back(key, score);\n          }\n        }\n        gt_.emplace_back(one_gt);\n      }\n      delete[] buffer;\n      cout << \"Load ground truth file[\" << gt_file << \"] done!\" << endl;\n    }\n\n    return true;\n  }\n\n\n  void recall_one_dense(\n      core_interface::Index::Pointer index,\n      core_interface::BaseIndexQueryParam::Pointer query_param, size_t topk,\n      size_t idx,\n      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {\n    const auto &query = batch_queries_[idx];\n\n    size_t thread_index = pool_->indexof_this();\n    fstream *knn_fs = nullptr;\n    fstream *linear_fs = nullptr;\n    if (output_fs.size() > thread_index) {\n      knn_fs = output_fs[thread_index].first;\n      linear_fs = output_fs[thread_index].second;\n    }\n\n    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,\n                                size_t query_idx) {\n      vector<IndexDocument> linear_res;\n\n      size_t result_size = std::min(topk, gt_[query_idx].size());\n      if (result_size == 0) {\n        return;\n      }\n\n      for (size_t i = 0; i < result_size; ++i) {\n        auto gt_node = gt_[query_idx][i];\n\n        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);\n      }\n\n\n      if (knn_fs) {\n        for (auto knn : knn_res) {\n          string str = \"query[\" + to_string(query_idx) + \"]\\tkey[\" +\n                       to_string(knn.key()) + \"], dist[\" +\n                       to_string(knn.score()) + \"]\\n\";\n          knn_fs->write(str.c_str(), str.size());\n        }\n      }\n      size_t match = 0;\n      bool asc =\n          (linear_res.size() > 1 &&\n           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))\n              ? false\n              : true;\n\n      map<int32_t, size_t> topk_matchs;\n      if (g_compare_by_id) {\n        for (size_t i = 0; i < topk_ids_.size(); ++i) {\n          topk_matchs[topk_ids_[i]] = 0;\n        }\n      }\n      for (size_t i = 0, j = 0; i < linear_res.size();) {\n        bool m = false;       // if current doc matched in max topk\n        bool changed = true;  // if i changed\n        if (g_compare_by_id) {\n          for (size_t k = 0; k < topk_ids_.size(); ++k) {\n            size_t dynamic_size = (size_t)topk_ids_[k];\n            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {\n              if (fabs(knn_res[dynamic_size - 1].score() -\n                       knn_res[dynamic_size].score()) >=\n                  numeric_limits<float>::epsilon()) {\n                break;\n              }\n            }\n            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {\n              if (linear_res[i].key() == knn_res[l].key()) {\n                topk_matchs[topk_ids_[k]]++;\n                if (k == topk_ids_.size() - 1) {\n                  m = true;\n                }\n                break;\n              }\n            }\n          }\n          ++i;\n          auto it = recall_res_.find(i);\n          if (it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * topk_matchs[i] / i;\n          }\n        } else {\n          size_t cur_topk = i + 1;\n          if (j < knn_res.size()) {\n            if (fabs(linear_res[i].score() - knn_res[j].score()) <\n                g_recall_precision) {\n              ++j;\n              ++i;\n              match++;\n              m = true;\n            } else {\n              if ((asc && linear_res[i].score() < knn_res[j].score()) ||\n                  (!asc && linear_res[i].score() > knn_res[j].score())) {\n                ++i;\n              } else {\n                changed = false;\n                ++j;\n              }\n            }\n          } else {\n            ++i;\n          }\n          auto it = recall_res_.find(cur_topk);\n          if (changed && it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * match / cur_topk;\n          }\n        }\n        if (linear_fs && changed) {\n          string str = string(m ? \"    HIT\" : \"NOT HIT\") + \"  query[\" +\n                       to_string(idx) + \"]\\tkey[\" +\n                       to_string(linear_res[i - 1].key()) + \"], dist[\" +\n                       to_string(linear_res[i - 1].score()) + \"]\\n\";\n          linear_fs->write(str.c_str(), str.size());\n        }\n      }\n    };\n\n    // prefilter\n    FilterResultCache filter_cache;\n    std::shared_ptr<IndexFilter> filter_ptr = nullptr;\n    if (filter_mode_ == FM_TAG) {\n      if (batch_taglists_[idx].size() != 1) {\n        LOG_ERROR(\"query tag list not equal to one!\");\n        return;\n      }\n\n      int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],\n                                    tag_key_list_);\n      if (ret != 0) {\n        LOG_ERROR(\"prefilter failed, idx: %zu\", idx);\n        return;\n      }\n\n      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n      filter_ptr = std::make_shared<core::IndexFilter>();\n      filter_ptr->set(filterFunc);\n    }\n\n    core_interface::DenseVector dense_query;\n    dense_query.data = query.data();\n    core_interface::VectorData query_data;\n    query_data.vector = dense_query;\n\n    // query_param is required in the config, so it should not be nullptr\n    auto query_param_clone = query_param->Clone();\n    query_param_clone->topk = topk;\n    query_param_clone->filter = filter_ptr;\n    query_param_clone->is_linear = false;\n\n    if (call_batch_api_) {\n      size_t qnum = query.size() / dim_;\n      // For batch search, we need to search each query separately\n      // since Index::Search doesn't support batch natively in the same way\n      for (size_t i = 0; i < qnum; ++i) {\n        size_t query_idx = idx * batch_count_ + i;\n        if (query_idx >= linear_queries_.size()) {\n          break;\n        }\n\n        const auto &single_query = linear_queries_[query_idx];\n        core_interface::DenseVector single_dense_query;\n        single_dense_query.data = single_query.data();\n        core_interface::VectorData single_query_data;\n        single_query_data.vector = single_dense_query;\n\n        core_interface::SearchResult search_result;\n        int ret =\n            index->Search(single_query_data, query_param_clone, &search_result);\n        if (ret < 0) {\n          LOG_ERROR(\"Failed to knn_search batch, ret=%d %s\", ret,\n                    IndexError::What(ret));\n          return;\n        }\n        auto &knn_res = search_result.doc_list_;\n        cal_recall(knn_res, query_idx);\n      }\n    } else {\n      core_interface::SearchResult search_result;\n      int ret = index->Search(query_data, query_param_clone, &search_result);\n      if (ret < 0) {\n        LOG_ERROR(\"Failed to knn_search, ret=%d %s\", ret,\n                  IndexError::What(ret));\n        return;\n      }\n      auto &knn_res = search_result.doc_list_;\n      cal_recall(knn_res, idx);\n    }\n\n    // std::cout << \"id: \" << index << \": \\n\" <<\n    // knn_context->flow_context()->searcher_context()->profiler().display();\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  bool call_batch_api_;\n  string output_;\n  size_t batch_count_;\n  shared_ptr<ThreadPool> pool_;\n\n  // for gt\n  vector<vector<T>> linear_queries_;\n  vector<SparseData<T>> linear_sparse_data_;\n  vector<vector<uint64_t>> linear_taglists_;\n\n  // for recall\n  vector<vector<T>> batch_queries_;\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  size_t dim_;\n  size_t total_querys_;\n\n  map<size_t, float> recall_res_;\n  vector<int32_t> topk_ids_;\n  vector<vector<pair<uint64_t, float>>> gt_;\n\n  bool external_gt_file_enabled_{false};\n\n  FilterMode filter_mode_{FM_NONE};\n\n  static bool STOP_NOW;\n\n  // Tag lists for filtering\n  std::vector<std::vector<uint64_t>> id_to_tags_list_;\n  std::vector<uint64_t> tag_key_list_;\n\n public:\n  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,\n                     const std::vector<uint64_t> &tag_key_list) {\n    id_to_tags_list_ = id_to_tags_list;\n    tag_key_list_ = tag_key_list;\n  }\n};\n\ntemplate <typename T>\nbool Recall<T>::STOP_NOW = false;\n\n//--------------------------------------------------\n// Sparse Recall\n//--------------------------------------------------\ntemplate <typename T>\nclass SparseRecall {\n public:\n  SparseRecall(size_t threads, const string &output, size_t batch_count,\n               FilterMode filter_mode)\n      : threads_(threads),\n        output_(output),\n        batch_count_(batch_count),\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(true);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, true);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n      call_batch_api_ = false;\n    } else {\n      call_batch_api_ = true;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  int transform_queries_without_hybrid_scale(\n      const vector<vector<T>> &queries,\n      const vector<vector<T>> &sparse_features,\n      vector<vector<T>> *queries_output,\n      vector<vector<T>> *sparse_features_output) {\n    if (!queries_output || !sparse_features_output) {\n      LOG_ERROR(\"input should not be empty in transfrom queries\");\n\n      return -1;\n    }\n\n    queries_output->clear();\n    sparse_features_output->clear();\n\n    for (size_t i = 0; i < queries.size(); ++i) {\n      vector<T> query_output;\n      vector<T> sparse_feature_output;\n\n      transform_query_without_hybrid_scale(queries[i], sparse_features[i],\n                                           &query_output,\n                                           &sparse_feature_output);\n\n      queries_output->push_back(query_output);\n      sparse_features_output->push_back(sparse_feature_output);\n    }\n\n    return 0;\n  }\n\n  void run_sparse(core_interface::Index::Pointer index,\n                  core_interface::BaseIndexQueryParam::Pointer query_param,\n                  const string &recall_tops, size_t gt_count) {\n    StringHelper::Split(recall_tops, \",\", &topk_ids_);\n    std::sort(topk_ids_.begin(), topk_ids_.end());\n\n    for (auto i : topk_ids_) {\n      recall_res_[i] = 0.0f;\n    }\n    size_t topk = recall_res_.rbegin()->first;\n\n    gt_count = topk < gt_count ? gt_count : topk;\n\n    if (external_gt_file_enabled_) {\n      cout << \"Internal ground truth file NOT used since external ground truth \"\n              \"file has been loaded\"\n           << endl;\n    } else {\n      cout << \"Loading internal ground truth file\" << endl;\n\n      if (!load_gt_sparse(index, gt_count)) {\n        LOG_ERROR(\"Load ground truth file failed!\");\n        return;\n      }\n    }\n\n    if (batch_sparse_counts_.size() < threads_) {\n      threads_ = batch_sparse_counts_.size();\n      pool_ = make_shared<ThreadPool>(true, threads_);\n      cout << \"Query size too small, resize thread pool count[\" << threads_\n           << \"]\" << endl;\n    }\n\n    // Prepare file handler\n    vector<pair<fstream *, fstream *>> output_fs;\n    if (!output_.empty()) {\n      string cmd = \"mkdir -p \" + output_;\n      int ret = system(cmd.c_str());\n      if (ret != 0) {\n        LOG_ERROR(\"execute cmd %s failed, ret=%d\", cmd.c_str(), ret);\n        return;\n      }\n      struct stat sb;\n      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {\n        cout << \"logs output to : \" << output_ << endl;\n        for (size_t i = 0; i < threads_; ++i) {\n          fstream *fs_k = new fstream();\n          fs_k->open(output_ + \"/t\" + to_string(i) + \".knn\", ios::out);\n          fstream *fs_l = new fstream();\n          fs_l->open(output_ + \"/t\" + to_string(i) + \".linear\", ios::out);\n          output_fs.push_back(make_pair(fs_k, fs_l));\n        }\n      }\n    }\n\n    signal(SIGINT, stop);\n    size_t i = 0;\n    for (; !STOP_NOW && i < batch_sparse_counts_.size();) {\n      if (pool_->pending_count() >= pool_->count()) {\n        this_thread::sleep_for(chrono::microseconds(1));\n        continue;\n      }\n\n      Closure::Pointer task =\n          Closure::New(this, &SparseRecall::recall_one_sparse, index,\n                       query_param, topk, i, output_fs);\n      pool_->enqueue_and_wake(task);\n\n      i++;\n    }\n    pool_->wait_finish();\n\n    for (auto fs : output_fs) {\n      fs.first->close();\n      fs.second->close();\n      delete fs.first;\n      delete fs.second;\n    }\n    cout << \"Process query: \" << i << endl;\n    for (auto it : recall_res_) {\n      cout << \"Recall@\" << it.first << \": \"\n           << it.second / linear_queries_.size() << endl;\n    }\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,\n                           linear_sparse_data_, linear_taglists_)) {\n      LOG_ERROR(\"Load query error\");\n      return false;\n    }\n\n    if (batch_count_ == 1) {\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(linear_sparse_data_[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);\n        batch_sparse_features_.push_back(linear_sparse_data_[i].features);\n      }\n    } else {\n      size_t num_batch =\n          (linear_queries_.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          batch_sparse_count.push_back(linear_sparse_data_[idx].count);\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();\n               ++k) {\n            batch_sparse_feature.push_back(\n                linear_sparse_data_[idx].features[k]);\n          }\n\n          idx = (idx + 1) % linear_queries_.size();\n        }\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n      }\n    }\n\n    total_querys_ = linear_queries_.size();\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);\n    } else {\n      LOG_ERROR(\"unsupported type\");\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  bool load_gt_sparse(core_interface::Index::Pointer index, size_t gt_count) {\n    std::string crc_str = compute_crc();\n\n    string gt_file = string(\"gt.\") + crc_str;\n\n    File gtf;\n    if (!gtf.IsRegular(gt_file.c_str())) {\n      cout << \"Ground truth file[\" << gt_file << \"] not exist, try to create it\"\n           << endl;\n      ElapsedTime timer;\n      size_t size = sizeof(uint64_t) + sizeof(float);\n      size_t file_size =\n          linear_sparse_data_.size() * (sizeof(int) + size * gt_count);\n\n      std::string gt_file_temp = gt_file + \".tmp\";\n      gtf.create(gt_file_temp.c_str(), file_size);\n\n      gt_.resize(linear_sparse_data_.size());\n\n      atomic_bool error(false);\n      size_t count = 0;\n      float s = linear_sparse_data_.size() / 100.0;\n      size_t pc = 0;\n      SpinMutex spin_lock;\n\n      function<void(size_t)> fun = [&](size_t i) {\n        spin_lock.lock();\n        count++;\n        size_t process = (size_t)ceil(count / s);\n        if (process > pc) {\n          pc = process;\n          stringstream msg;\n          msg << \"\\r\" << setw(3) << setfill(' ') << process << \"% \" << left\n              << setfill('=') << setw(process / 2 + 1) << \"[\" << right\n              << setfill(' ') << setw(51 - process / 2) << \"]\";\n          cout << msg.str() << flush;\n        }\n        spin_lock.unlock();\n\n        SparseData<T> sparse_data = linear_sparse_data_[i];\n\n        // prefilter\n        FilterResultCache filter_cache;\n        std::shared_ptr<IndexFilter> filter_ptr = nullptr;\n        if (filter_mode_ == FM_TAG) {\n          if (batch_taglists_[i].size() != 1) {\n            LOG_ERROR(\"query tag list not equal to one!\");\n            return;\n          }\n\n          int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[i][0],\n                                        tag_key_list_);\n          if (ret != 0) {\n            LOG_ERROR(\"prefilter failed, idx: %zu\", i);\n            return;\n          }\n\n          auto filterFunc = [&](uint64_t key) {\n            return filter_cache.find(key);\n          };\n\n          filter_ptr = std::make_shared<IndexFilter>();\n          filter_ptr->set(filterFunc);\n        }\n\n        core_interface::SparseVector sparse_query;\n        sparse_query.count = sparse_data.count;\n        sparse_query.indices = sparse_data.indices.data();\n        sparse_query.values = sparse_data.features.data();\n        core_interface::VectorData query_data;\n        query_data.vector = sparse_query;\n\n        auto query_param = std::make_shared<core_interface::FlatQueryParam>();\n        query_param->topk = gt_count;\n        query_param->is_linear = true;\n        query_param->filter = filter_ptr;\n\n        core_interface::SearchResult search_result;\n        int ret = index->Search(query_data, query_param, &search_result);\n        if (ret < 0) {\n          LOG_ERROR(\"Failed to sparse linear search, ret=%d\", ret);\n          error.exchange(true);\n          return;\n        }\n        auto &result = search_result.doc_list_;\n\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (auto knn : result) {\n          one_gt.emplace_back(knn.key(), knn.score());\n        }\n        gt_[i] = one_gt;\n      };\n\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        if (error) {\n          break;\n        }\n        pool_->enqueue_and_wake(Closure::New(fun, i));\n      }\n      pool_->wait_finish();\n\n      if (error) {\n        cout << endl\n             << \"Ground truth file[\" << gt_file << \"] create failed!\" << endl;\n        gtf.close();\n        remove(gt_file.c_str());\n        return false;\n      }\n\n      for (size_t i = 0; i < gt_.size(); ++i) {\n        auto &gt = gt_[i];\n\n        gtf.write(&gt_count, sizeof(int));\n\n        for (size_t j = 0; j < gt.size(); j++) {\n          auto &one_gt = gt[j];\n\n          gtf.write(&one_gt.first, sizeof(uint64_t));\n          gtf.write(&one_gt.second, sizeof(float));\n        }\n\n        // if ground truth is less than gt count, fill it up\n        if (gt.size() != gt_count) {\n          std::cout\n              << \"WARN: GT result count less than GT expected count, index: \"\n              << i << \", expected GT count: \" << gt_count\n              << \", actual GT count: \" << gt.size() << std::endl;\n\n          uint64_t key{-1LLU};\n          float score{std::nanf(\"\")};\n\n          for (size_t j = gt.size(); j < gt_count; ++j) {\n            gtf.write(&key, sizeof(uint64_t));\n            gtf.write(&score, sizeof(float));\n          }\n        }\n      }\n      gtf.close();\n\n      if (!File::Rename(gt_file_temp, gt_file)) {\n        LOG_ERROR(\"failed to rename ground truth file, src: %s, dst: %s\",\n                  gt_file_temp.c_str(), gt_file.c_str());\n\n        return false;\n      }\n\n      cout << endl\n           << \"Ground truth file create successful in \"\n           << timer.milli_seconds() / 1000 << \"s.\" << endl;\n    } else {\n      if (!gtf.open(gt_file.c_str(), true)) {\n        LOG_ERROR(\"Failed to open ground truth file[%s]\", gt_file.c_str());\n        return false;\n      }\n      size_t file_size = gtf.size();\n\n      constexpr size_t LENGTH = 10240;\n      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);\n\n      char *buffer = new char[LENGTH];\n      gtf.read(buffer, sizeof(int));\n\n      size_t gt_count_input = (size_t) * (int *)buffer;\n      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;\n\n      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {\n        LOG_ERROR(\"Ground truth file[%s] content error!\", gt_file.c_str());\n        gtf.close();\n        return false;\n      }\n\n      size_t query_num = file_size / one_query_line_size;\n      if (one_query_line_size > LENGTH) {\n        delete[] buffer;\n        buffer = new char[one_query_line_size];\n      }\n\n      for (size_t n = 0; n < query_num; ++n) {\n        gtf.read(n * one_query_line_size, buffer, one_query_line_size);\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (size_t i = 0; i < gt_count; ++i) {\n          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);\n          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +\n                                   sizeof(uint64_t));\n\n          if (key != -1LLU) {\n            one_gt.emplace_back(key, score);\n          }\n        }\n\n        gt_.emplace_back(one_gt);\n      }\n\n      delete[] buffer;\n      cout << \"Load ground truth file[\" << gt_file << \"] done!\" << endl;\n    }\n\n    return true;\n  }\n\n  bool load_external_gt_file(const std::string &external_gt_file,\n                             const std::string &first_sep,\n                             const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    bool ret =\n        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);\n    if (ret) {\n      cout << \"Load external ground truth file[\"\n           << File::BaseName(external_gt_file) << \"] done!\" << endl;\n      external_gt_file_enabled_ = true;\n    } else {\n      LOG_ERROR(\"Failed to load ground truth file!\");\n    }\n\n    return ret;\n  }\n\n private:\n  std::string compute_crc() {\n    uint32_t crc = 0u;\n    // sparse\n    if (linear_sparse_data_.size() > 0) {\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),\n                           crc);\n        crc =\n            Crc32c::Hash(linear_sparse_data_[i].indices.data(),\n                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);\n        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),\n                           linear_sparse_data_[i].count * sizeof(T), crc);\n      }\n    }\n\n    char crc_str[64];\n    snprintf(crc_str, sizeof(crc_str), \"%X\", crc);\n\n    return std::string(crc_str);\n  }\n\n\n  void recall_one_sparse(\n      core_interface::Index::Pointer index,\n      core_interface::BaseIndexQueryParam::Pointer query_param, size_t topk,\n      size_t idx,\n      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {\n    const auto &sparse_count = batch_sparse_counts_[idx];\n    const auto &sparse_index = batch_sparse_indices_[idx];\n    const auto &sparse_feature = batch_sparse_features_[idx];\n\n    size_t thread_index = pool_->indexof_this();\n    fstream *knn_fs = nullptr;\n    fstream *linear_fs = nullptr;\n    if (output_fs.size() > thread_index) {\n      knn_fs = output_fs[thread_index].first;\n      linear_fs = output_fs[thread_index].second;\n    }\n\n    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,\n                                size_t query_idx) {\n      vector<IndexDocument> linear_res;\n\n      size_t result_size = std::min(topk, gt_[query_idx].size());\n      if (result_size == 0) {\n        return;\n      }\n\n      for (size_t i = 0; i < result_size; ++i) {\n        auto gt_node = gt_[query_idx][i];\n\n        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);\n      }\n\n      if (knn_fs) {\n        for (auto knn : knn_res) {\n          string str = \"query[\" + to_string(query_idx) + \"]\\tkey[\" +\n                       to_string(knn.key()) + \"], dist[\" +\n                       to_string(knn.score()) + \"]\\n\";\n          knn_fs->write(str.c_str(), str.size());\n        }\n      }\n\n      size_t match = 0;\n      bool asc =\n          (linear_res.size() > 1 &&\n           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))\n              ? false\n              : true;\n\n      map<int32_t, size_t> topk_matchs;\n      if (g_compare_by_id) {\n        for (size_t i = 0; i < topk_ids_.size(); ++i) {\n          topk_matchs[topk_ids_[i]] = 0;\n        }\n      }\n\n      for (size_t i = 0, j = 0; i < linear_res.size();) {\n        bool m = false;       // if current doc matched in max topk\n        bool changed = true;  // if i changed\n        if (g_compare_by_id) {\n          for (size_t k = 0; k < topk_ids_.size(); ++k) {\n            size_t dynamic_size = (size_t)topk_ids_[k];\n            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {\n              if (fabs(knn_res[dynamic_size - 1].score() -\n                       knn_res[dynamic_size].score()) >=\n                  numeric_limits<float>::epsilon()) {\n                break;\n              }\n            }\n            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {\n              if (linear_res[i].key() == knn_res[l].key()) {\n                topk_matchs[topk_ids_[k]]++;\n                if (k == topk_ids_.size() - 1) {\n                  m = true;\n                }\n                break;\n              }\n            }\n          }\n          ++i;\n\n          auto it = recall_res_.find(i);\n          if (it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * topk_matchs[i] / i;\n          }\n        } else {\n          size_t cur_topk = i + 1;\n          if (j < knn_res.size()) {\n            if (fabs(linear_res[i].score() - knn_res[j].score()) <\n                g_recall_precision) {\n              ++j;\n              ++i;\n              match++;\n              m = true;\n            } else {\n              if ((asc && linear_res[i].score() < knn_res[j].score()) ||\n                  (!asc && linear_res[i].score() > knn_res[j].score())) {\n                ++i;\n              } else {\n                changed = false;\n                ++j;\n              }\n            }\n          } else {\n            ++i;\n          }\n\n          auto it = recall_res_.find(cur_topk);\n          if (changed && it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * match / cur_topk;\n          }\n        }\n\n        if (linear_fs && changed) {\n          string str = string(m ? \"    HIT\" : \"NOT HIT\") + \"  query[\" +\n                       to_string(idx) + \"]\\tkey[\" +\n                       to_string(linear_res[i - 1].key()) + \"], dist[\" +\n                       to_string(linear_res[i - 1].score()) + \"]\\n\";\n          linear_fs->write(str.c_str(), str.size());\n        }\n      }\n    };\n\n    FilterResultCache filter_cache;\n    std::shared_ptr<IndexFilter> filter_ptr = nullptr;\n    if (filter_mode_ == FM_TAG) {\n      if (batch_taglists_[idx].size() != 1) {\n        LOG_ERROR(\"query tag list not equal to one!\");\n        return;\n      }\n\n      int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],\n                                    tag_key_list_);\n      if (ret != 0) {\n        LOG_ERROR(\"prefilter failed, idx: %zu\", idx);\n        return;\n      }\n\n      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n      filter_ptr = std::make_shared<core::IndexFilter>();\n      filter_ptr->set(filterFunc);\n    }\n\n    core_interface::SparseVector sparse_query;\n    sparse_query.count = sparse_count[0];\n    sparse_query.indices = sparse_index.data();\n    sparse_query.values = sparse_feature.data();\n    core_interface::VectorData query_data;\n    query_data.vector = sparse_query;\n\n    auto query_param_clone = query_param->Clone();\n    query_param_clone->topk = topk;\n    query_param_clone->filter = filter_ptr;\n    query_param_clone->is_linear = true;\n\n    if (call_batch_api_) {\n      // For batch search, we need to search each query separately\n      for (size_t i = 0; i < sparse_count.size(); ++i) {\n        size_t query_idx = idx * batch_count_ + i;\n        if (query_idx >= linear_sparse_data_.size()) {\n          break;\n        }\n\n        const auto &single_sparse = linear_sparse_data_[query_idx];\n        core_interface::SparseVector single_sparse_query;\n        single_sparse_query.count = single_sparse.count;\n        single_sparse_query.indices = single_sparse.indices.data();\n        single_sparse_query.values = single_sparse.features.data();\n        core_interface::VectorData single_query_data;\n        single_query_data.vector = single_sparse_query;\n\n        core_interface::SearchResult search_result;\n        int ret =\n            index->Search(single_query_data, query_param_clone, &search_result);\n        if (ret < 0) {\n          LOG_ERROR(\"Failed to sparse_knn_search batch, ret=%d %s\", ret,\n                    IndexError::What(ret));\n          return;\n        }\n        auto &knn_res = search_result.doc_list_;\n        cal_recall(knn_res, query_idx);\n      }\n    } else {\n      core_interface::SearchResult search_result;\n      int ret = index->Search(query_data, query_param_clone, &search_result);\n      if (ret < 0) {\n        LOG_ERROR(\"Failed to sparse_knn_search, ret=%d %s\", ret,\n                  IndexError::What(ret));\n        return;\n      }\n      auto &knn_res = search_result.doc_list_;\n      cal_recall(knn_res, idx);\n    }\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  bool call_batch_api_;\n  string output_;\n  size_t batch_count_;\n  shared_ptr<ThreadPool> pool_;\n\n  // for gt\n  vector<vector<T>> linear_queries_;\n  vector<SparseData<T>> linear_sparse_data_;\n  vector<uint32_t> linear_partitions_;\n  vector<vector<uint64_t>> linear_taglists_;\n\n  std::map<std::string, vector<vector<T>>> linear_queries_scaled_;\n  std::map<std::string, vector<vector<T>>> linear_sparse_features_scaled_;\n\n  // for recall\n  vector<vector<T>> batch_queries_;\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<uint32_t>> batch_partitions_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  std::map<std::string, vector<vector<T>>> batch_queries_scaled_;\n  std::map<std::string, vector<vector<T>>> batch_sparse_features_scaled_;\n\n  size_t total_querys_;\n\n  map<size_t, float> recall_res_;\n  vector<int32_t> topk_ids_;\n  vector<vector<pair<uint64_t, float>>> gt_;\n\n  map<string, vector<vector<pair<uint64_t, float>>>> gt_hybrid_;\n  bool external_gt_file_enabled_{false};\n\n  FilterMode filter_mode_{FM_NONE};\n\n  // Tag lists for filtering\n  std::vector<std::vector<uint64_t>> id_to_tags_list_;\n  std::vector<uint64_t> tag_key_list_;\n\n public:\n  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,\n                     const std::vector<uint64_t> &tag_key_list) {\n    id_to_tags_list_ = id_to_tags_list;\n    tag_key_list_ = tag_key_list;\n  }\n\n  static bool STOP_NOW;\n};\n\ntemplate <typename T>\nbool SparseRecall<T>::STOP_NOW = false;\n\nbool check_config(YAML::Node &config_node) {\n  auto common = config_node[\"IndexCommon\"];\n  if (!common) {\n    LOG_ERROR(\"Can not find [IndexCommon] in config\");\n    return false;\n  }\n  if (!common[\"IndexConfig\"]) {\n    LOG_ERROR(\"Can not find [IndexConfig] in config\");\n    return false;\n  }\n  if (!common[\"IndexPath\"]) {\n    LOG_ERROR(\"Can not find [IndexPath] in config\");\n    return false;\n  }\n  if (!common[\"TopK\"]) {\n    LOG_ERROR(\"Can not find [TopK] in config\");\n    return false;\n  }\n  if (!common[\"QueryFile\"]) {\n    LOG_ERROR(\"Can not find [QueryFile] in config\");\n    return false;\n  }\n\n  auto query_config = config_node[\"QueryConfig\"];\n  if (!query_config) {\n    LOG_ERROR(\"Can not find [QueryConfig] in config\");\n    return false;\n  }\n  if (!query_config[\"QueryParam\"]) {\n    LOG_ERROR(\"Can not find [QueryConfig.QueryParam] in config\");\n    return false;\n  }\n  return true;\n}\n\nvoid usage(void) {\n  cout << \"Usage: recall CONFIG.yaml [plugin file path]\" << endl;\n}\n\nint recall_dense(std::string &query_type, size_t thread_count,\n                 size_t batch_count, string top_k, size_t gt_count,\n                 string query_file, string &first_sep, string &second_sep,\n                 string &ground_truth_file, string &ground_truth_first_sep,\n                 string ground_truth_second_sep,\n                 core_interface::Index::Pointer index,\n                 core_interface::BaseIndexQueryParam::Pointer query_param,\n                 string &index_dir, string &log_dir, FilterMode filter_mode) {\n  std::vector<std::vector<uint64_t>> id_to_tags_list;\n  std::vector<uint64_t> tag_key_list;\n  // Load tag lists if available\n  load_taglists(index_dir, id_to_tags_list, tag_key_list);\n\n  if (query_type == \"float\") {\n    Recall<float> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    recall.set_tag_lists(id_to_tags_list, tag_key_list);\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    recall.run_dense(index, query_param, top_k, gt_count);\n  } else if (query_type == \"int8\") {\n    Recall<int8_t> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    recall.set_tag_lists(id_to_tags_list, tag_key_list);\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    recall.run_dense(index, query_param, top_k, gt_count);\n  } else if (query_type == \"binary\") {\n    Recall<uint32_t> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    recall.set_tag_lists(id_to_tags_list, tag_key_list);\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    recall.run_dense(index, query_param, top_k, gt_count);\n  } else if (query_type == \"binary64\") {\n    Recall<uint64_t> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    recall.set_tag_lists(id_to_tags_list, tag_key_list);\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    recall.run_dense(index, query_param, top_k, gt_count);\n  } else {\n    LOG_ERROR(\"Can not recognize type: %s\", query_type.c_str());\n  }\n\n  return 0;\n}\n\nint recall_sparse(std::string &query_type, size_t thread_count,\n                  size_t batch_count, string top_k, size_t gt_count,\n                  string &query_file, string &first_sep, string &second_sep,\n                  string &ground_truth_file, string &ground_truth_first_sep,\n                  string &ground_truth_second_sep,\n                  core_interface::Index::Pointer index,\n                  core_interface::BaseIndexQueryParam::Pointer query_param,\n                  string &index_dir, string &log_dir, FilterMode filter_mode) {\n  if (query_type == \"float\") {\n    SparseRecall<float> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    std::vector<std::vector<uint64_t>> id_to_tags_list;\n    std::vector<uint64_t> tag_key_list;\n    // Load tag lists if available\n    if (load_taglists(index_dir, id_to_tags_list, tag_key_list) != 0) {\n      LOG_ERROR(\"Failed to load tag lists\");\n      return -1;\n    }\n\n    recall.set_tag_lists(id_to_tags_list, tag_key_list);\n\n    recall.run_sparse(index, query_param, top_k, gt_count);\n  } else {\n    LOG_ERROR(\"Can not recognize type: %s\", query_type.c_str());\n  }\n\n  return 0;\n}\n\nint get_recall_precision(string &recall_precision_string) {\n  constexpr float DEFAULT_RECALL_PRECISION = 1e-6;\n\n  if (recall_precision_string == \"\") {\n    g_recall_precision = DEFAULT_RECALL_PRECISION;\n    return true;\n  }\n\n  try {\n    g_recall_precision = std::stof(recall_precision_string);\n    std::cout << \"Recall Score Precesion: \" << g_recall_precision << std::endl;\n  } catch (const std::invalid_argument &e) {\n    LOG_ERROR(\"Exeception in getting recall precision: %s, value: %s\", e.what(),\n              recall_precision_string.c_str());\n    return -1;\n  } catch (const std::out_of_range &e) {\n    LOG_ERROR(\n        \"Out of range exception in getting recall precision: %s, value: %s\",\n        e.what(), recall_precision_string.c_str());\n    return -1;\n  }\n\n  return true;\n}\n\nint main(int argc, char *argv[]) {\n  if (argc < 2) {\n    usage();\n    return -1;\n  }\n\n  IndexPluginBroker broker;\n  std::string error;\n  for (int i = 2; i < argc; ++i) {\n    if (!broker.emplace(argv[i], &error)) {\n      LOG_ERROR(\"Failed to load plugin: %s (%s)\", argv[i], error.c_str());\n      return -1;\n    }\n  }\n\n  YAML::Node config_node;\n  try {\n    config_node = YAML::LoadFile(argv[1]);\n  } catch (...) {\n    LOG_ERROR(\"Load YAML file[%s] failed!\", argv[1]);\n    return -1;\n  }\n  if (!check_config(config_node)) {\n    return -1;\n  }\n  auto config_common = config_node[\"IndexCommon\"];\n\n  map<string, int> LOG_LEVEL = {{\"debug\", IndexLogger::LEVEL_DEBUG},\n                                {\"info\", IndexLogger::LEVEL_INFO},\n                                {\"warn\", IndexLogger::LEVEL_WARN},\n                                {\"error\", IndexLogger::LEVEL_ERROR},\n                                {\"fatal\", IndexLogger::LEVEL_FATAL}};\n  string log_level = config_common[\"LogLevel\"]\n                         ? config_common[\"LogLevel\"].as<string>()\n                         : \"debug\";\n  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);\n  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {\n    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n  }\n\n  // Calculate Recall\n  string log_dir = \"\";\n  if (config_common[\"RecallLogDir\"]) {\n    log_dir = config_common[\"RecallLogDir\"].as<string>();\n  }\n  size_t thread_count = config_common[\"RecallThreadCount\"]\n                            ? config_common[\"RecallThreadCount\"].as<uint64_t>()\n                            : 0;\n  size_t gt_count = config_common[\"RecallGTCount\"]\n                        ? config_common[\"RecallGTCount\"].as<uint64_t>()\n                        : 100;\n  size_t batch_count = config_common[\"RecallBatchCount\"]\n                           ? config_common[\"RecallBatchCount\"].as<uint64_t>()\n                           : 0;\n  g_compare_by_id = config_common[\"CompareById\"]\n                        ? config_common[\"CompareById\"].as<bool>()\n                        : 0;\n  string top_k = config_common[\"TopK\"].as<string>();\n\n  string recall_precision_string =\n      config_common[\"RecallScorePrecision\"]\n          ? config_common[\"RecallScorePrecision\"].as<string>()\n          : \"\";\n\n  if (!get_recall_precision(recall_precision_string)) {\n    LOG_ERROR(\"Get recall precision failed, value: %s\",\n              recall_precision_string.c_str());\n    return -1;\n  }\n\n  RetrievalMode retrieval_mode{RM_DENSE};\n  if (config_common[\"RetrievalMode\"]) {\n    std::string retrieval_mode_str =\n        config_common[\"RetrievalMode\"].as<string>();\n    if (retrieval_mode_str == \"dense\") {\n      retrieval_mode = RM_DENSE;\n    } else if (retrieval_mode_str == \"sparse\") {\n      retrieval_mode = RM_SPARSE;\n    }\n  }\n\n  FilterMode filter_mode{FM_NONE};\n  if (config_common[\"FilterMode\"]) {\n    std::string filter_mode_str = config_common[\"FilterMode\"].as<string>();\n    if (filter_mode_str == \"tag\") {\n      filter_mode = FM_TAG;\n    }\n  }\n\n  string query_file = config_common[\"QueryFile\"].as<string>();\n\n  string first_sep = config_common[\"QueryFirstSep\"]\n                         ? config_common[\"QueryFirstSep\"].as<string>()\n                         : \";\";\n  string second_sep = config_common[\"QuerySecondSep\"]\n                          ? config_common[\"QuerySecondSep\"].as<string>()\n                          : \" \";\n  string query_type = config_common[\"QueryType\"]\n                          ? config_common[\"QueryType\"].as<string>()\n                          : \"float\";\n  string container_type = config_common[\"ContainerType\"]\n                              ? config_common[\"ContainerType\"].as<string>()\n                              : \"MMapFileStorage\";\n\n  string ground_truth_file = \"\";\n  string ground_truth_first_sep = \";\";\n  string ground_truth_second_sep = \" \";\n\n  if (config_common[\"GroundTruthFile\"]) {\n    ground_truth_file = config_common[\"GroundTruthFile\"].as<string>();\n\n    if (config_common[\"GroundTruthFirstSep\"]) {\n      ground_truth_first_sep =\n          config_common[\"GroundTruthFirstSep\"].as<string>();\n    }\n\n    if (config_common[\"GroundTruthSecondSep\"]) {\n      ground_truth_second_sep =\n          config_common[\"GroundTruthSecondSep\"].as<string>();\n    }\n  }\n\n  string index_dir = config_common[\"IndexPath\"].as<string>();\n\n  core_interface::Index::Pointer index;\n  core_interface::BaseIndexQueryParam::Pointer query_param;\n  if (parse_and_load_index_param(config_node, index_dir, index, query_param) !=\n      0) {\n    LOG_ERROR(\"Failed to parse and load index param\");\n    return -1;\n  }\n\n  if (retrieval_mode == RM_DENSE) {\n    recall_dense(query_type, thread_count, batch_count, top_k, gt_count,\n                 query_file, first_sep, second_sep, ground_truth_file,\n                 ground_truth_first_sep, ground_truth_second_sep, index,\n                 query_param, index_dir, log_dir, filter_mode);\n  } else if (retrieval_mode == RM_SPARSE) {\n    recall_sparse(query_type, thread_count, batch_count, top_k, gt_count,\n                  query_file, first_sep, second_sep, ground_truth_file,\n                  ground_truth_first_sep, ground_truth_second_sep, index,\n                  query_param, index_dir, log_dir, filter_mode);\n  } else {\n    std::string mode = retrieval_mode == 1 ? \"Dense\" : \"Sparse\";\n    LOG_ERROR(\"unsupported retrieval mode: %s\", mode.c_str());\n    return -1;\n  }\n\n  // Cleanup\n  index->Close();\n\n  cout << \"Recall done.\" << endl;\n\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/recall_original.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <sys/stat.h>\n#include <signal.h>\n#include <iomanip>\n#include <iostream>\n#include <mutex>\n#include <ailego/parallel/lock.h>\n#include <zvec/ailego/hash/crc32c.h>\n#include <zvec/ailego/io/file.h>\n#include <zvec/ailego/parallel/thread_pool.h>\n#include <zvec/ailego/utility/string_helper.h>\n#include <zvec/ailego/utility/time_helper.h>\n#include \"zvec/core/framework/index_plugin.h\"\n#include \"zvec/core/interface/index_factory.h\"\n#include \"zvec/core/interface/index_param.h\"\n#include \"filter_result_cache.h\"\n#include \"flow.h\"\n#include \"txt_input_reader.h\"\n\n#ifdef __clang__\n#pragma clang diagnostic push\n#pragma clang diagnostic ignored \"-Wshadow\"\n#pragma clang diagnostic ignored \"-Wdeprecated-declarations\"\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wshadow\"\n#pragma GCC diagnostic ignored \"-Wdeprecated-declarations\"\n#endif\n\n#include <yaml-cpp/yaml.h>\n\n#ifdef __clang__\n#pragma clang diagnostic pop\n#elif defined(__GNUC__) || defined(__GNUG__)\n#pragma GCC diagnostic pop\n#endif\n\nusing namespace std;\nusing namespace zvec::core;\nusing namespace zvec::ailego;\n\nusing Flow = Flow;\nusing SparseFlow = SparseFlow;\n\nmutex recall_lock;\nbool g_compare_by_id = false;\nfloat g_recall_precision;\n\n//--------------------------------------------------\n// Recall\n//--------------------------------------------------\nenum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };\n\nenum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };\n\ntemplate <typename T>\nclass Recall {\n public:\n  Recall(size_t threads, const string &output, size_t batch_count,\n         FilterMode filter_mode)\n      : threads_(threads),\n        output_(output),\n        batch_count_(batch_count),\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(true);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, true);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n      call_batch_api_ = false;\n    } else {\n      call_batch_api_ = true;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  void run_dense(Flow *flower, const string &recall_tops, size_t gt_count) {\n    StringHelper::Split(recall_tops, \",\", &topk_ids_);\n    std::sort(topk_ids_.begin(), topk_ids_.end());\n\n    for (auto i : topk_ids_) {\n      recall_res_[i] = 0.0f;\n    }\n    size_t topk = recall_res_.rbegin()->first;\n\n    gt_count = topk < gt_count ? gt_count : topk;\n\n    if (external_gt_file_enabled_) {\n      cout << \"Internal ground truth file NOT used since external ground truth \"\n              \"file has been loaded\"\n           << endl;\n    } else {\n      cout << \"Loading internal ground truth file\" << endl;\n\n      if (!load_gt_dense(flower, gt_count)) {\n        cerr << \"Load ground truth file failed!\" << endl;\n        return;\n      }\n    }\n\n    if (batch_queries_.size() < threads_) {\n      threads_ = batch_queries_.size();\n      pool_ = make_shared<ThreadPool>(true, threads_);\n      cout << \"Query size too small, resize thread pool count[\" << threads_\n           << \"]\" << endl;\n    }\n\n    // Prepare file handler\n    vector<pair<fstream *, fstream *>> output_fs;\n    if (!output_.empty()) {\n      string cmd = \"mkdir -p \" + output_;\n      int ret = system(cmd.c_str());\n      if (ret != 0) {\n        std::cerr << \"execute cmd \" << cmd << \" failed\" << std::endl;\n        return;\n      }\n      struct stat sb;\n      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {\n        cout << \"logs output to : \" << output_ << endl;\n        for (size_t i = 0; i < threads_; ++i) {\n          fstream *fs_k = new fstream();\n          fs_k->open(output_ + \"/t\" + to_string(i) + \".knn\", ios::out);\n          fstream *fs_l = new fstream();\n          fs_l->open(output_ + \"/t\" + to_string(i) + \".linear\", ios::out);\n          output_fs.push_back(make_pair(fs_k, fs_l));\n        }\n      }\n    }\n\n    signal(SIGINT, stop);\n    size_t i = 0;\n    for (; !STOP_NOW && i < batch_queries_.size();) {\n      if (pool_->pending_count() >= pool_->count()) {\n        this_thread::sleep_for(chrono::microseconds(1));\n        continue;\n      }\n\n      Closure::Pointer task = Closure::New(this, &Recall::recall_one_dense,\n                                           flower, topk, i, output_fs);\n      pool_->enqueue_and_wake(task);\n\n      i++;\n    }\n    pool_->wait_finish();\n\n    for (auto fs : output_fs) {\n      fs.first->close();\n      fs.second->close();\n      delete fs.first;\n      delete fs.second;\n    }\n    cout << \"Process query: \" << i << endl;\n    for (auto it : recall_res_) {\n      cout << \"Recall@\" << it.first << \": \"\n           << it.second / linear_queries_.size() << endl;\n    }\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,\n                           linear_sparse_data_, linear_taglists_)) {\n      cerr << \"Load query error\" << endl;\n      return false;\n    }\n\n    if (batch_count_ == 1) {\n      batch_queries_ = linear_queries_;\n\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(linear_sparse_data_[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);\n        batch_sparse_features_.push_back(linear_sparse_data_[i].features);\n      }\n\n      for (size_t i = 0; i < linear_taglists_.size(); ++i) {\n        vector<vector<uint64_t>> new_taglists;\n        new_taglists.push_back(linear_taglists_[i]);\n\n        batch_taglists_.push_back(std::move(new_taglists));\n      }\n    } else {\n      size_t num_batch =\n          (linear_queries_.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<T> batch_query;\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n        vector<vector<uint64_t>> batch_taglists;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          for (size_t k = 0; k < linear_queries_[idx].size(); ++k) {\n            batch_query.push_back(linear_queries_[idx][k]);\n          }\n\n          batch_sparse_count.push_back(linear_sparse_data_[idx].count);\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();\n               ++k) {\n            batch_sparse_feature.push_back(\n                linear_sparse_data_[idx].features[k]);\n          }\n\n          idx = (idx + 1) % linear_queries_.size();\n        }\n\n        batch_queries_.push_back(batch_query);\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n        batch_taglists_.push_back(batch_taglists);\n      }\n    }\n\n    dim_ = linear_queries_[0].size();\n    total_querys_ = linear_queries_.size();\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim_);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim_);\n    } else {\n      cerr << \"unsupported type\";\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  bool load_external_gt_file(const std::string &external_gt_file,\n                             const std::string &first_sep,\n                             const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    bool ret =\n        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);\n    if (ret) {\n      cout << \"Load external ground truth file[\"\n           << File::BaseName(external_gt_file) << \"] done!\" << endl;\n      external_gt_file_enabled_ = true;\n    } else {\n      cerr << \"Failed to load ground truth file!\" << endl;\n    }\n\n    return ret;\n  }\n\n private:\n  std::string compute_crc(size_t gt_count) {\n    uint32_t crc = 0u;\n    // dense\n    if (batch_queries_.size() > 0) {\n      size_t one_size = dim_ * sizeof(T);\n      size_t data_size = total_querys_ * one_size + sizeof(size_t);\n      char *data = new char[data_size];\n      size_t q = 0;\n      char *p = data;\n      for (; q < batch_queries_.size(); ++q) {\n        memcpy(p, batch_queries_[q].data(),\n               batch_queries_[q].size() * sizeof(T));\n        p += batch_queries_[q].size() * sizeof(T);\n      }\n      memcpy(p, &gt_count, sizeof(size_t));\n      crc = Crc32c::Hash(data, data_size, crc);\n      delete[] data;\n    }\n\n    // sparse\n    if (linear_sparse_data_.size() > 0) {\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),\n                           crc);\n        crc =\n            Crc32c::Hash(linear_sparse_data_[i].indices.data(),\n                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);\n        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),\n                           linear_sparse_data_[i].count * sizeof(T), crc);\n      }\n    }\n\n    char crc_str[64];\n    snprintf(crc_str, sizeof(crc_str), \"%X\", crc);\n\n    return std::string(crc_str);\n  }\n\n  bool load_gt_dense(Flow *flower, size_t gt_count) {\n    std::string crc_str = compute_crc(gt_count);\n\n    string gt_file = string(\"gt.\") + crc_str;\n\n    File gtf;\n    if (!gtf.IsRegular(gt_file.c_str())) {\n      cout << \"Ground truth file[\" << gt_file << \"] not exist, try to create it\"\n           << endl;\n      ElapsedTime timer;\n\n      size_t size = sizeof(uint64_t) + sizeof(float);\n      size_t file_size =\n          linear_queries_.size() * (sizeof(int) + size * gt_count);\n\n      std::string gt_file_temp = gt_file + \".tmp\";\n      gtf.create(gt_file_temp.c_str(), file_size);\n\n      gt_.resize(linear_queries_.size());\n\n      atomic_bool error(false);\n      size_t count = 0;\n      float s = linear_queries_.size() / 100.0;\n      size_t pc = 0;\n      SpinMutex spin_lock;\n\n      function<void(size_t)> fun = [&](size_t i) {\n        spin_lock.lock();\n        count++;\n        size_t process = (size_t)ceil(count / s);\n        if (process > pc) {\n          pc = process;\n          stringstream msg;\n          msg << \"\\r\" << setw(3) << setfill(' ') << process << \"% \" << left\n              << setfill('=') << setw(process / 2 + 1) << \"[\" << right\n              << setfill(' ') << setw(51 - process / 2) << \"]\";\n          cout << msg.str() << flush;\n        }\n        spin_lock.unlock();\n\n        auto query = linear_queries_[i];\n        Flow::Context::Pointer context = flower->create_context();\n        if (!context) {\n          cerr << \"Failed to create search context\" << endl;\n          return;\n        }\n\n        FilterResultCache filter_cache;\n        if (filter_mode_ == FM_TAG) {\n          if (batch_taglists_[i].size() != 1) {\n            cerr << \"query tag list not equal to one!\" << endl;\n            return;\n          }\n\n          int ret = filter_cache.filter(flower->id_to_tags_list(),\n                                        batch_taglists_[i][0],\n                                        flower->tag_key_list());\n          if (ret != 0) {\n            cerr << \"prefilter failed, idx: \" << i << std::endl;\n\n            return;\n          }\n\n          auto filterFunc = [&](uint64_t key) {\n            return filter_cache.find(key);\n          };\n\n          context->set_filter(filterFunc);\n        }\n\n        context->set_topk(gt_count);\n        int ret = do_linear_search<T>(flower, context, query);\n        if (ret < 0) {\n          cerr << \"Failed to linear search, ret=\" << ret << endl;\n          error.exchange(true);\n          return;\n        }\n        auto result = context->result();\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (auto knn : result) {\n          one_gt.emplace_back(knn.key(), knn.score());\n        }\n        gt_[i] = one_gt;\n      };\n      for (size_t i = 0; i < linear_queries_.size(); ++i) {\n        if (error) {\n          break;\n        }\n        pool_->enqueue_and_wake(Closure::New(fun, i));\n      }\n      pool_->wait_finish();\n\n      if (error) {\n        cout << endl\n             << \"Ground truth file[\" << gt_file << \"] create failed!\" << endl;\n        gtf.close();\n        remove(gt_file.c_str());\n        return false;\n      }\n\n      for (size_t i = 0; i < gt_.size(); ++i) {\n        auto &gt = gt_[i];\n\n        gtf.write(&gt_count, sizeof(int));\n\n        for (size_t j = 0; j < gt.size(); j++) {\n          auto &one_gt = gt[j];\n\n          gtf.write(&one_gt.first, sizeof(uint64_t));\n          gtf.write(&one_gt.second, sizeof(float));\n        }\n\n        // if ground truth is less than gt count, fill it up\n        if (gt.size() != gt_count) {\n          std::cout\n              << \"WARN: GT result count less than GT expected count, index: \"\n              << i << \", expected GT count: \" << gt_count\n              << \", actual GT count: \" << gt.size() << std::endl;\n\n          uint64_t key{-1LLU};\n          float score{std::nanf(\"\")};\n\n          for (size_t j = gt.size(); j < gt_count; ++j) {\n            gtf.write(&key, sizeof(uint64_t));\n            gtf.write(&score, sizeof(float));\n          }\n        }\n      }\n\n      gtf.close();\n\n      if (!File::Rename(gt_file_temp, gt_file)) {\n        cerr << \"failed to rename ground truth file, src: \" << gt_file_temp\n             << \", dst: \" << gt_file << endl;\n\n        return false;\n      }\n\n      cout << endl\n           << \"Ground truth file create successful in \"\n           << timer.milli_seconds() / 1000 << \"s.\" << endl;\n    } else {\n      if (!gtf.open(gt_file.c_str(), true)) {\n        cerr << \"Failed to open ground truth file[\" << gt_file << \"]\" << endl;\n        return false;\n      }\n      size_t file_size = gtf.size();\n\n      constexpr size_t LENGTH = 10240;\n      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);\n\n      char *buffer = new char[LENGTH];\n      gtf.read(buffer, sizeof(int));\n\n      size_t gt_count_input = (size_t) * (int *)buffer;\n      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;\n\n      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {\n        cerr << \"Ground truth file[\" << gt_file << \"] content error!\" << endl;\n        gtf.close();\n        return false;\n      }\n\n      size_t query_num = file_size / one_query_line_size;\n      if (one_query_line_size > LENGTH) {\n        delete[] buffer;\n        buffer = new char[one_query_line_size];\n      }\n\n      for (size_t n = 0; n < query_num; ++n) {\n        gtf.read(n * one_query_line_size, buffer, one_query_line_size);\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (size_t i = 0; i < gt_count; ++i) {\n          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);\n          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +\n                                   sizeof(uint64_t));\n\n          if (key != -1LLU) {\n            one_gt.emplace_back(key, score);\n          }\n        }\n        gt_.emplace_back(one_gt);\n      }\n      delete[] buffer;\n      cout << \"Load ground truth file[\" << gt_file << \"] done!\" << endl;\n    }\n\n    return true;\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,\n    // query.size() / count * sizeof(float), count);\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,\n    // query.size() / count, count);\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() / count * sizeof(uint32_t), count);\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query, size_t count) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() / count * sizeof(uint32_t), count);\n    return flower->search_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,\n    // query.size() * sizeof(float), 1);\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,\n    // query.size() , 1);\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() * sizeof(uint32_t), 1);\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_knn_search(Flow *flower, Flow::Context::Pointer &context,\n                const vector<U> &query) {\n    // Do knn_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() * sizeof(uint32_t), 1);\n    return flower->search_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,\n    // query.size() * sizeof(float), 1);\n    return flower->search_bf_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,\n    // query.size() , 1);\n    return flower->search_bf_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() * sizeof(uint32_t), 1);\n    return flower->search_bf_impl(query.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() * sizeof(uint32_t), 1);\n    return flower->search_bf_impl(query.data(), qmeta_, context);\n  }\n\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query, size_t count) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,\n    // query.size() / count * sizeof(float), count);\n    return flower->search_bf_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query, size_t count) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,\n    // query.size()  / count, count);\n    return flower->search_bf_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query, size_t count) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() / count * sizeof(uint32_t), count);\n    return flower->search_bf_impl(query.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type\n  do_linear_search(Flow *flower, Flow::Context::Pointer &context,\n                   const vector<U> &query, size_t count) {\n    // Do linear_search\n    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,\n    // query.size() / count * sizeof(uint32_t), count);\n    return flower->search_bf_impl(query.data(), qmeta_, count, context);\n  }\n\n  void recall_one_dense(\n      Flow *flower, size_t topk, size_t index,\n      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {\n    const auto &query = batch_queries_[index];\n\n    size_t thread_index = pool_->indexof_this();\n    fstream *knn_fs = nullptr;\n    fstream *linear_fs = nullptr;\n    if (output_fs.size() > thread_index) {\n      knn_fs = output_fs[thread_index].first;\n      linear_fs = output_fs[thread_index].second;\n    }\n\n    Flow::Context::Pointer knn_context = flower->create_context();\n    if (!knn_context) {\n      cerr << \"Failed to create search context\" << endl;\n      return;\n    }\n    knn_context->set_topk(topk);\n\n    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,\n                                size_t idx) {\n      vector<IndexDocument> linear_res;\n\n      size_t result_size = std::min(topk, gt_[idx].size());\n      if (result_size == 0) {\n        return;\n      }\n\n      for (size_t i = 0; i < result_size; ++i) {\n        auto gt_node = gt_[idx][i];\n\n        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);\n      }\n\n\n      if (knn_fs) {\n        for (auto knn : knn_res) {\n          string str = \"query[\" + to_string(idx) + \"]\\tkey[\" +\n                       to_string(knn.key()) + \"], dist[\" +\n                       to_string(knn.score()) + \"]\\n\";\n          knn_fs->write(str.c_str(), str.size());\n        }\n      }\n      size_t match = 0;\n      bool asc =\n          (linear_res.size() > 1 &&\n           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))\n              ? false\n              : true;\n\n      map<int32_t, size_t> topk_matchs;\n      if (g_compare_by_id) {\n        for (size_t i = 0; i < topk_ids_.size(); ++i) {\n          topk_matchs[topk_ids_[i]] = 0;\n        }\n      }\n      for (size_t i = 0, j = 0; i < linear_res.size();) {\n        bool m = false;       // if current doc matched in max topk\n        bool changed = true;  // if i changed\n        if (g_compare_by_id) {\n          for (size_t k = 0; k < topk_ids_.size(); ++k) {\n            size_t dynamic_size = (size_t)topk_ids_[k];\n            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {\n              if (fabs(knn_res[dynamic_size - 1].score() -\n                       knn_res[dynamic_size].score()) >=\n                  numeric_limits<float>::epsilon()) {\n                break;\n              }\n            }\n            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {\n              if (linear_res[i].key() == knn_res[l].key()) {\n                topk_matchs[topk_ids_[k]]++;\n                if (k == topk_ids_.size() - 1) {\n                  m = true;\n                }\n                break;\n              }\n            }\n          }\n          ++i;\n          auto it = recall_res_.find(i);\n          if (it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * topk_matchs[i] / i;\n          }\n        } else {\n          size_t cur_topk = i + 1;\n          if (j < knn_res.size()) {\n            if (fabs(linear_res[i].score() - knn_res[j].score()) <\n                g_recall_precision) {\n              ++j;\n              ++i;\n              match++;\n              m = true;\n            } else {\n              if ((asc && linear_res[i].score() < knn_res[j].score()) ||\n                  (!asc && linear_res[i].score() > knn_res[j].score())) {\n                ++i;\n              } else {\n                changed = false;\n                ++j;\n              }\n            }\n          } else {\n            ++i;\n          }\n          auto it = recall_res_.find(cur_topk);\n          if (changed && it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * match / cur_topk;\n          }\n        }\n        if (linear_fs && changed) {\n          string str = string(m ? \"    HIT\" : \"NOT HIT\") + \"  query[\" +\n                       to_string(idx) + \"]\\tkey[\" +\n                       to_string(linear_res[i - 1].key()) + \"], dist[\" +\n                       to_string(linear_res[i - 1].score()) + \"]\\n\";\n          linear_fs->write(str.c_str(), str.size());\n        }\n      }\n    };\n\n    // prefilter\n    FilterResultCache filter_cache;\n    if (filter_mode_ == FM_TAG) {\n      if (batch_taglists_[index].size() != 1) {\n        cerr << \"query tag list not equal to one!\" << endl;\n        return;\n      }\n\n      int ret = filter_cache.filter(flower->id_to_tags_list(),\n                                    batch_taglists_[index][0],\n                                    flower->tag_key_list());\n      if (ret != 0) {\n        cerr << \"prefilter failed, idx: \" << index << std::endl;\n\n        return;\n      }\n\n      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n      knn_context->set_filter(filterFunc);\n    }\n\n    if (call_batch_api_) {\n      size_t qnum = query.size() / dim_;\n      int ret = do_knn_search<T>(flower, knn_context, query, qnum);\n      if (ret < 0) {\n        cerr << \"Failed to knn_search batch, ret=\" << ret << \" \"\n             << IndexError::What(ret) << endl;\n        return;\n      }\n      for (size_t i = 0; i < qnum; ++i) {\n        size_t idx = index * batch_count_ + i;\n        if (idx >= linear_queries_.size()) {\n          break;\n        }\n\n        auto &knn_res = knn_context->result(i);\n        cal_recall(knn_res, idx);\n      }\n    } else {\n      int ret = do_knn_search<T>(flower, knn_context, query);\n      if (ret < 0) {\n        cerr << \"Failed to knn_search, ret=\" << ret << \" \"\n             << IndexError::What(ret) << endl;\n        return;\n      }\n      auto &knn_res = knn_context->result();\n      cal_recall(knn_res, index);\n    }\n\n    // std::cout << \"id: \" << index << \": \\n\" <<\n    // knn_context->flow_context()->searcher_context()->profiler().display();\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  bool call_batch_api_;\n  string output_;\n  size_t batch_count_;\n  shared_ptr<ThreadPool> pool_;\n\n  // for gt\n  vector<vector<T>> linear_queries_;\n  vector<SparseData<T>> linear_sparse_data_;\n  vector<vector<uint64_t>> linear_taglists_;\n\n  // for recall\n  vector<vector<T>> batch_queries_;\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  size_t dim_;\n  size_t total_querys_;\n\n  map<size_t, float> recall_res_;\n  vector<int32_t> topk_ids_;\n  vector<vector<pair<uint64_t, float>>> gt_;\n\n  bool external_gt_file_enabled_{false};\n\n  FilterMode filter_mode_{FM_NONE};\n\n  static bool STOP_NOW;\n};\n\ntemplate <typename T>\nbool Recall<T>::STOP_NOW = false;\n\n//--------------------------------------------------\n// Sparse Recall\n//--------------------------------------------------\ntemplate <typename T>\nclass SparseRecall {\n public:\n  SparseRecall(size_t threads, const string &output, size_t batch_count,\n               FilterMode filter_mode)\n      : threads_(threads),\n        output_(output),\n        batch_count_(batch_count),\n        filter_mode_{filter_mode} {\n    if (threads_ == 0) {\n      pool_ = make_shared<ThreadPool>(true);\n      threads_ = pool_->count();\n      cout << \"Using cpu count as thread pool count[\" << threads_ << \"]\"\n           << endl;\n    } else {\n      pool_ = make_shared<ThreadPool>(threads_, true);\n      cout << \"Using thread pool count[\" << threads_ << \"]\" << endl;\n    }\n    if (batch_count_ < 1) {\n      batch_count_ = 1;\n      call_batch_api_ = false;\n    } else {\n      call_batch_api_ = true;\n    }\n  }\n\n  static void stop(int signo) {\n    if (STOP_NOW) {\n      exit(signo);\n    }\n    STOP_NOW = true;\n    cout << \"\\rTrying to stop. press [Ctrl+C] again kill immediately.\" << endl\n         << flush;\n  }\n\n  int transform_queries_without_hybrid_scale(\n      const vector<vector<T>> &queries,\n      const vector<vector<T>> &sparse_features,\n      vector<vector<T>> *queries_output,\n      vector<vector<T>> *sparse_features_output) {\n    if (!queries_output || !sparse_features_output) {\n      std::cerr << \"input should not be empty in transfrom queries\"\n                << std::endl;\n\n      return -1;\n    }\n\n    queries_output->clear();\n    sparse_features_output->clear();\n\n    for (size_t i = 0; i < queries.size(); ++i) {\n      vector<T> query_output;\n      vector<T> sparse_feature_output;\n\n      transform_query_without_hybrid_scale(queries[i], sparse_features[i],\n                                           &query_output,\n                                           &sparse_feature_output);\n\n      queries_output->push_back(query_output);\n      sparse_features_output->push_back(sparse_feature_output);\n    }\n\n    return 0;\n  }\n\n  void run_sparse(SparseFlow *flower, const string &recall_tops,\n                  size_t gt_count) {\n    StringHelper::Split(recall_tops, \",\", &topk_ids_);\n    std::sort(topk_ids_.begin(), topk_ids_.end());\n\n    for (auto i : topk_ids_) {\n      recall_res_[i] = 0.0f;\n    }\n    size_t topk = recall_res_.rbegin()->first;\n\n    gt_count = topk < gt_count ? gt_count : topk;\n\n    if (external_gt_file_enabled_) {\n      cout << \"Internal ground truth file NOT used since external ground truth \"\n              \"file has been loaded\"\n           << endl;\n    } else {\n      cout << \"Loading internal ground truth file\" << endl;\n\n      if (!load_gt_sparse(flower, gt_count)) {\n        cerr << \"Load ground truth file failed!\" << endl;\n        return;\n      }\n    }\n\n    if (batch_sparse_counts_.size() < threads_) {\n      threads_ = batch_sparse_counts_.size();\n      pool_ = make_shared<ThreadPool>(true, threads_);\n      cout << \"Query size too small, resize thread pool count[\" << threads_\n           << \"]\" << endl;\n    }\n\n    // Prepare file handler\n    vector<pair<fstream *, fstream *>> output_fs;\n    if (!output_.empty()) {\n      string cmd = \"mkdir -p \" + output_;\n      int ret = system(cmd.c_str());\n      if (ret != 0) {\n        std::cerr << \"execute cmd \" << cmd << \" failed\" << std::endl;\n        return;\n      }\n      struct stat sb;\n      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {\n        cout << \"logs output to : \" << output_ << endl;\n        for (size_t i = 0; i < threads_; ++i) {\n          fstream *fs_k = new fstream();\n          fs_k->open(output_ + \"/t\" + to_string(i) + \".knn\", ios::out);\n          fstream *fs_l = new fstream();\n          fs_l->open(output_ + \"/t\" + to_string(i) + \".linear\", ios::out);\n          output_fs.push_back(make_pair(fs_k, fs_l));\n        }\n      }\n    }\n\n    signal(SIGINT, stop);\n    size_t i = 0;\n    for (; !STOP_NOW && i < batch_sparse_counts_.size();) {\n      if (pool_->pending_count() >= pool_->count()) {\n        this_thread::sleep_for(chrono::microseconds(1));\n        continue;\n      }\n\n      Closure::Pointer task = Closure::New(\n          this, &SparseRecall::recall_one_sparse, flower, topk, i, output_fs);\n      pool_->enqueue_and_wake(task);\n\n      i++;\n    }\n    pool_->wait_finish();\n\n    for (auto fs : output_fs) {\n      fs.first->close();\n      fs.second->close();\n      delete fs.first;\n      delete fs.second;\n    }\n    cout << \"Process query: \" << i << endl;\n    for (auto it : recall_res_) {\n      cout << \"Recall@\" << it.first << \": \"\n           << it.second / linear_queries_.size() << endl;\n    }\n  }\n\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep) {\n    TxtInputReader<T> reader;\n\n    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,\n                           linear_sparse_data_, linear_taglists_)) {\n      cerr << \"Load query error\" << endl;\n      return false;\n    }\n\n    if (batch_count_ == 1) {\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        vector<uint32_t> sparse_count;\n        sparse_count.push_back(linear_sparse_data_[i].count);\n\n        batch_sparse_counts_.push_back(sparse_count);\n        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);\n        batch_sparse_features_.push_back(linear_sparse_data_[i].features);\n      }\n    } else {\n      size_t num_batch =\n          (linear_queries_.size() + batch_count_ - 1) / batch_count_;\n      size_t idx = 0;\n      for (size_t n = 0; n < num_batch; ++n) {\n        vector<uint32_t> batch_sparse_count;\n        vector<uint32_t> batch_sparse_indices;\n        vector<T> batch_sparse_feature;\n\n        for (size_t i = 0; i < batch_count_; ++i) {\n          batch_sparse_count.push_back(linear_sparse_data_[idx].count);\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {\n            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);\n          }\n\n          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();\n               ++k) {\n            batch_sparse_feature.push_back(\n                linear_sparse_data_[idx].features[k]);\n          }\n\n          idx = (idx + 1) % linear_queries_.size();\n        }\n        batch_sparse_counts_.push_back(batch_sparse_count);\n        batch_sparse_indices_.push_back(batch_sparse_indices);\n        batch_sparse_features_.push_back(batch_sparse_feature);\n      }\n    }\n\n    total_querys_ = linear_queries_.size();\n    if (typeid(T) == typeid(float)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);\n    } else if (typeid(T) == typeid(int8_t)) {\n      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);\n    } else {\n      cerr << \"unsupported type\";\n      return false;\n    }\n\n    cout << \"Load query done!\" << endl;\n    return true;\n  }\n\n  bool load_gt_sparse(SparseFlow *flower, size_t gt_count) {\n    std::string crc_str = compute_crc();\n\n    string gt_file = string(\"gt.\") + crc_str;\n\n    File gtf;\n    if (!gtf.IsRegular(gt_file.c_str())) {\n      cout << \"Ground truth file[\" << gt_file << \"] not exist, try to create it\"\n           << endl;\n      ElapsedTime timer;\n      size_t size = sizeof(uint64_t) + sizeof(float);\n      size_t file_size =\n          linear_sparse_data_.size() * (sizeof(int) + size * gt_count);\n\n      std::string gt_file_temp = gt_file + \".tmp\";\n      gtf.create(gt_file_temp.c_str(), file_size);\n\n      gt_.resize(linear_sparse_data_.size());\n\n      atomic_bool error(false);\n      size_t count = 0;\n      float s = linear_sparse_data_.size() / 100.0;\n      size_t pc = 0;\n      SpinMutex spin_lock;\n\n      function<void(size_t)> fun = [&](size_t i) {\n        spin_lock.lock();\n        count++;\n        size_t process = (size_t)ceil(count / s);\n        if (process > pc) {\n          pc = process;\n          stringstream msg;\n          msg << \"\\r\" << setw(3) << setfill(' ') << process << \"% \" << left\n              << setfill('=') << setw(process / 2 + 1) << \"[\" << right\n              << setfill(' ') << setw(51 - process / 2) << \"]\";\n          cout << msg.str() << flush;\n        }\n        spin_lock.unlock();\n\n        SparseFlow::Context::Pointer context = flower->create_context();\n        if (!context) {\n          cerr << \"Failed to create search context\" << endl;\n          return;\n        }\n\n        context->set_topk(gt_count);\n        SparseData<T> sparse_data = linear_sparse_data_[i];\n\n        // prefilter\n        FilterResultCache filter_cache;\n        if (filter_mode_ == FM_TAG) {\n          if (batch_taglists_[i].size() != 1) {\n            cerr << \"query tag list not equal to one!\" << endl;\n            return;\n          }\n\n          int ret = filter_cache.filter(flower->id_to_tags_list(),\n                                        batch_taglists_[i][0],\n                                        flower->tag_key_list());\n          if (ret != 0) {\n            cerr << \"prefilter failed, idx: \" << i << std::endl;\n\n            return;\n          }\n\n          auto filterFunc = [&](uint64_t key) {\n            return filter_cache.find(key);\n          };\n\n          context->set_filter(filterFunc);\n        }\n\n        int ret =\n            do_linear_search<T>(flower, context, sparse_data.count,\n                                sparse_data.indices, sparse_data.features);\n        if (ret < 0) {\n          cerr << \"Failed to sparse linear search, ret=\" << ret << endl;\n          error.exchange(true);\n          return;\n        }\n        auto result = context->result();\n\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (auto knn : result) {\n          one_gt.emplace_back(knn.key(), knn.score());\n        }\n        gt_[i] = one_gt;\n      };\n\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        if (error) {\n          break;\n        }\n        pool_->enqueue_and_wake(Closure::New(fun, i));\n      }\n      pool_->wait_finish();\n\n      if (error) {\n        cout << endl\n             << \"Ground truth file[\" << gt_file << \"] create failed!\" << endl;\n        gtf.close();\n        remove(gt_file.c_str());\n        return false;\n      }\n\n      for (size_t i = 0; i < gt_.size(); ++i) {\n        auto &gt = gt_[i];\n\n        gtf.write(&gt_count, sizeof(int));\n\n        for (size_t j = 0; j < gt.size(); j++) {\n          auto &one_gt = gt[j];\n\n          gtf.write(&one_gt.first, sizeof(uint64_t));\n          gtf.write(&one_gt.second, sizeof(float));\n        }\n\n        // if ground truth is less than gt count, fill it up\n        if (gt.size() != gt_count) {\n          std::cout\n              << \"WARN: GT result count less than GT expected count, index: \"\n              << i << \", expected GT count: \" << gt_count\n              << \", actual GT count: \" << gt.size() << std::endl;\n\n          uint64_t key{-1LLU};\n          float score{std::nanf(\"\")};\n\n          for (size_t j = gt.size(); j < gt_count; ++j) {\n            gtf.write(&key, sizeof(uint64_t));\n            gtf.write(&score, sizeof(float));\n          }\n        }\n      }\n      gtf.close();\n\n      if (!File::Rename(gt_file_temp, gt_file)) {\n        cerr << \"failed to rename ground truth file, src: \" << gt_file_temp\n             << \", dst: \" << gt_file << endl;\n\n        return false;\n      }\n\n      cout << endl\n           << \"Ground truth file create successful in \"\n           << timer.milli_seconds() / 1000 << \"s.\" << endl;\n    } else {\n      if (!gtf.open(gt_file.c_str(), true)) {\n        cerr << \"Failed to open ground truth file[\" << gt_file << \"]\" << endl;\n        return false;\n      }\n      size_t file_size = gtf.size();\n\n      constexpr size_t LENGTH = 10240;\n      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);\n\n      char *buffer = new char[LENGTH];\n      gtf.read(buffer, sizeof(int));\n\n      size_t gt_count_input = (size_t) * (int *)buffer;\n      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;\n\n      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {\n        cerr << \"Ground truth file[\" << gt_file << \"] content error!\" << endl;\n        gtf.close();\n        return false;\n      }\n\n      size_t query_num = file_size / one_query_line_size;\n      if (one_query_line_size > LENGTH) {\n        delete[] buffer;\n        buffer = new char[one_query_line_size];\n      }\n\n      for (size_t n = 0; n < query_num; ++n) {\n        gtf.read(n * one_query_line_size, buffer, one_query_line_size);\n        vector<pair<uint64_t, float>> one_gt;\n        one_gt.reserve(gt_count);\n\n        for (size_t i = 0; i < gt_count; ++i) {\n          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);\n          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +\n                                   sizeof(uint64_t));\n\n          if (key != -1LLU) {\n            one_gt.emplace_back(key, score);\n          }\n        }\n\n        gt_.emplace_back(one_gt);\n      }\n\n      delete[] buffer;\n      cout << \"Load ground truth file[\" << gt_file << \"] done!\" << endl;\n    }\n\n    return true;\n  }\n\n  bool load_external_gt_file(const std::string &external_gt_file,\n                             const std::string &first_sep,\n                             const std::string &second_sep) {\n    TxtInputReader<T> reader;\n    bool ret =\n        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);\n    if (ret) {\n      cout << \"Load external ground truth file[\"\n           << File::BaseName(external_gt_file) << \"] done!\" << endl;\n      external_gt_file_enabled_ = true;\n    } else {\n      cerr << \"Failed to load ground truth file!\" << endl;\n    }\n\n    return ret;\n  }\n\n private:\n  std::string compute_crc() {\n    uint32_t crc = 0u;\n    // sparse\n    if (linear_sparse_data_.size() > 0) {\n      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {\n        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),\n                           crc);\n        crc =\n            Crc32c::Hash(linear_sparse_data_[i].indices.data(),\n                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);\n        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),\n                           linear_sparse_data_[i].count * sizeof(T), crc);\n      }\n    }\n\n    char crc_str[64];\n    snprintf(crc_str, sizeof(crc_str), \"%X\", crc);\n\n    return std::string(crc_str);\n  }\n\n  // sparse search\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,\n                const vector<uint32_t> &sparse_count,\n                const vector<uint32_t> &sparse_indices,\n                const vector<U> &sparse_feature, size_t count) {\n    return flower->search_impl(sparse_count.data(), sparse_indices.data(),\n                               sparse_feature.data(), qmeta_, count, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,\n                const uint32_t sparse_count,\n                const vector<uint32_t> &sparse_indices,\n                const vector<U> &sparse_feature) {\n    return flower->search_impl(sparse_count, sparse_indices.data(),\n                               sparse_feature.data(), qmeta_, context);\n  }\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_linear_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,\n                   const vector<uint32_t> &sparse_count,\n                   const vector<uint32_t> &sparse_indices,\n                   const vector<U> &sparse_feature, size_t count) {\n    return flower->search_bf_impl(sparse_count.data(), sparse_indices.data(),\n                                  sparse_feature.data(), qmeta_, count,\n                                  context);\n  }\n\n\n  template <typename U>\n  typename std::enable_if<std::is_same<float, U>::value, int>::type\n  do_linear_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,\n                   const uint32_t sparse_count,\n                   const vector<uint32_t> &sparse_indices,\n                   const vector<U> &sparse_feature) {\n    return flower->search_bf_impl(sparse_count, sparse_indices.data(),\n                                  sparse_feature.data(), qmeta_, context);\n  }\n\n  void recall_one_sparse(\n      SparseFlow *flower, size_t topk, size_t index,\n      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {\n    const auto &sparse_count = batch_sparse_counts_[index];\n    const auto &sparse_index = batch_sparse_indices_[index];\n    const auto &sparse_feature = batch_sparse_features_[index];\n\n    size_t thread_index = pool_->indexof_this();\n    fstream *knn_fs = nullptr;\n    fstream *linear_fs = nullptr;\n    if (output_fs.size() > thread_index) {\n      knn_fs = output_fs[thread_index].first;\n      linear_fs = output_fs[thread_index].second;\n    }\n\n    SparseFlow::Context::Pointer knn_context = flower->create_context();\n    if (!knn_context) {\n      cerr << \"Failed to create search context\" << endl;\n      return;\n    }\n    knn_context->set_topk(topk);\n\n    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,\n                                size_t idx) {\n      vector<IndexDocument> linear_res;\n\n      size_t result_size = std::min(topk, gt_[idx].size());\n      if (result_size == 0) {\n        return;\n      }\n\n      for (size_t i = 0; i < result_size; ++i) {\n        auto gt_node = gt_[idx][i];\n\n        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);\n      }\n\n      if (knn_fs) {\n        for (auto knn : knn_res) {\n          string str = \"query[\" + to_string(idx) + \"]\\tkey[\" +\n                       to_string(knn.key()) + \"], dist[\" +\n                       to_string(knn.score()) + \"]\\n\";\n          knn_fs->write(str.c_str(), str.size());\n        }\n      }\n\n      size_t match = 0;\n      bool asc =\n          (linear_res.size() > 1 &&\n           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))\n              ? false\n              : true;\n\n      map<int32_t, size_t> topk_matchs;\n      if (g_compare_by_id) {\n        for (size_t i = 0; i < topk_ids_.size(); ++i) {\n          topk_matchs[topk_ids_[i]] = 0;\n        }\n      }\n\n      for (size_t i = 0, j = 0; i < linear_res.size();) {\n        bool m = false;       // if current doc matched in max topk\n        bool changed = true;  // if i changed\n        if (g_compare_by_id) {\n          for (size_t k = 0; k < topk_ids_.size(); ++k) {\n            size_t dynamic_size = (size_t)topk_ids_[k];\n            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {\n              if (fabs(knn_res[dynamic_size - 1].score() -\n                       knn_res[dynamic_size].score()) >=\n                  numeric_limits<float>::epsilon()) {\n                break;\n              }\n            }\n            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {\n              if (linear_res[i].key() == knn_res[l].key()) {\n                topk_matchs[topk_ids_[k]]++;\n                if (k == topk_ids_.size() - 1) {\n                  m = true;\n                }\n                break;\n              }\n            }\n          }\n          ++i;\n\n          auto it = recall_res_.find(i);\n          if (it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * topk_matchs[i] / i;\n          }\n        } else {\n          size_t cur_topk = i + 1;\n          if (j < knn_res.size()) {\n            if (fabs(linear_res[i].score() - knn_res[j].score()) <\n                g_recall_precision) {\n              ++j;\n              ++i;\n              match++;\n              m = true;\n            } else {\n              if ((asc && linear_res[i].score() < knn_res[j].score()) ||\n                  (!asc && linear_res[i].score() > knn_res[j].score())) {\n                ++i;\n              } else {\n                changed = false;\n                ++j;\n              }\n            }\n          } else {\n            ++i;\n          }\n\n          auto it = recall_res_.find(cur_topk);\n          if (changed && it != recall_res_.end()) {\n            lock_guard<mutex> lock(recall_lock);\n            it->second += 100.0 * match / cur_topk;\n          }\n        }\n\n        if (linear_fs && changed) {\n          string str = string(m ? \"    HIT\" : \"NOT HIT\") + \"  query[\" +\n                       to_string(idx) + \"]\\tkey[\" +\n                       to_string(linear_res[i - 1].key()) + \"], dist[\" +\n                       to_string(linear_res[i - 1].score()) + \"]\\n\";\n          linear_fs->write(str.c_str(), str.size());\n        }\n      }\n    };\n\n    FilterResultCache filter_cache;\n    if (filter_mode_ == FM_TAG) {\n      if (batch_taglists_[index].size() != 1) {\n        cerr << \"query tag list not equal to one!\" << endl;\n        return;\n      }\n\n      int ret = filter_cache.filter(flower->id_to_tags_list(),\n                                    batch_taglists_[index][0],\n                                    flower->tag_key_list());\n      if (ret != 0) {\n        cerr << \"prefilter failed, idx: \" << index << std::endl;\n\n        return;\n      }\n\n      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };\n\n      knn_context->set_filter(filterFunc);\n    }\n\n    if (call_batch_api_) {\n      // size_t qnum = sparse_count.size() / dim_;\n      // int ret = do_knn_search<T>(flower, knn_context, sparse_count,\n      // sparse_index, sparse_feature, qnum); if (ret < 0) {\n      //   cerr << \"Failed to sparse_knn_search batch, ret=\" << ret << \" \"\n      //       << IndexError::What(ret) << endl;\n      //   return;\n      // }\n      // for (size_t i = 0; i < qnum; ++i) {\n      //   size_t idx = index * batch_count_ + i;\n\n      //   if (idx >= linear_queries_.size()) {\n      //     break;\n      //   }\n\n      //   auto &knn_res = knn_context->result(i);\n      //   cal_recall(knn_res, idx);\n      // }\n    } else {\n      int ret = do_knn_search<T>(flower, knn_context, sparse_count[0],\n                                 sparse_index, sparse_feature);\n      if (ret < 0) {\n        cerr << \"Failed to sparse_knn_search, ret=\" << ret << \" \"\n             << IndexError::What(ret) << endl;\n        return;\n      }\n      auto &knn_res = knn_context->result();\n      cal_recall(knn_res, index);\n    }\n  }\n\n private:\n  IndexQueryMeta qmeta_{};\n  size_t threads_;\n  bool call_batch_api_;\n  string output_;\n  size_t batch_count_;\n  shared_ptr<ThreadPool> pool_;\n\n  // for gt\n  vector<vector<T>> linear_queries_;\n  vector<SparseData<T>> linear_sparse_data_;\n  vector<uint32_t> linear_partitions_;\n  vector<vector<uint64_t>> linear_taglists_;\n\n  std::map<std::string, vector<vector<T>>> linear_queries_scaled_;\n  std::map<std::string, vector<vector<T>>> linear_sparse_features_scaled_;\n\n  // for recall\n  vector<vector<T>> batch_queries_;\n  vector<vector<uint32_t>> batch_sparse_counts_;\n  vector<vector<uint32_t>> batch_sparse_indices_;\n  vector<vector<T>> batch_sparse_features_;\n  vector<vector<uint32_t>> batch_partitions_;\n  vector<vector<vector<uint64_t>>> batch_taglists_;\n\n  std::map<std::string, vector<vector<T>>> batch_queries_scaled_;\n  std::map<std::string, vector<vector<T>>> batch_sparse_features_scaled_;\n\n  size_t total_querys_;\n\n  map<size_t, float> recall_res_;\n  vector<int32_t> topk_ids_;\n  vector<vector<pair<uint64_t, float>>> gt_;\n\n  map<string, vector<vector<pair<uint64_t, float>>>> gt_hybrid_;\n  bool external_gt_file_enabled_{false};\n\n  FilterMode filter_mode_{FM_NONE};\n  static bool STOP_NOW;\n};\n\ntemplate <typename T>\nbool SparseRecall<T>::STOP_NOW = false;\n\nbool prepare_params(YAML::Node &&config_params, Params &params) {\n  cout << \"Parse params as blow:\" << endl;\n  for (auto it = config_params.begin(); it != config_params.end(); ++it) {\n    string tag = it->second.Tag();\n    if (tag == \"tag:yaml.org,2002:int\") {\n      int64_t val = it->second.as<int64_t>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:float\") {\n      float val = it->second.as<float>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else if (tag == \"tag:yaml.org,2002:bool\") {\n      bool val = it->second.as<bool>();\n      params.set(it->first.as<string>(), val);\n      cout << it->first.as<string>() << \"=\" << val << endl;\n    } else {\n      if (it->second.IsScalar()) {\n        string val = it->second.as<string>();\n        params.set(it->first.as<string>(), val);\n        cout << it->first.as<string>() << \"=\" << val << endl;\n      } else if (it->second.IsMap()) {\n        Params sub_params;\n        auto sub_node = it->second;\n        if (!prepare_params(std::move(sub_node), sub_params)) {\n          cerr << \"parse params error with key[\" << it->first.as<string>()\n               << \"]\" << endl;\n          return false;\n        }\n        params.set(it->first.as<string>(), sub_params);\n      }\n    }\n  }\n  return true;\n}\n\nbool check_config(YAML::Node &config_node) {\n  auto common = config_node[\"SearcherCommon\"];\n  if (!common) {\n    cerr << \"Can not find [SearcherCommon] in config\" << endl;\n    return false;\n  }\n  if (!common[\"SearcherClass\"] && !common[\"SearcherConfig\"]) {\n    cerr << \"Can not find [SearcherClass] or [SearcherConfig] in config\"\n         << endl;\n    return false;\n  }\n  if (!common[\"IndexPath\"]) {\n    cerr << \"Can not find [IndexPath] in config\" << endl;\n    return false;\n  }\n  if (!common[\"TopK\"]) {\n    cerr << \"Can not find [TopK] in config\" << endl;\n    return false;\n  }\n  if (!common[\"QueryFile\"]) {\n    cerr << \"Can not find [QueryFile] in config\" << endl;\n    return false;\n  }\n  return true;\n}\n\nvoid usage(void) {\n  cout << \"Usage: recall CONFIG.yaml [plugin file path]\" << endl;\n}\n\nbool load_index(Flow &flower, string &index_dir) {\n  int ret = flower.load(index_dir);\n  if (0 != ret) {\n    cerr << \"Flow load failed with ret \" << ret << endl;\n    return false;\n  }\n  cout << \"Load index done!\" << endl;\n  return true;\n};\n\nint recall_dense(std::string &query_type, size_t thread_count,\n                 size_t batch_count, string top_k, size_t gt_count,\n                 string query_file, string &first_sep, string &second_sep,\n                 string &ground_truth_file, string &ground_truth_first_sep,\n                 string ground_truth_second_sep, Flow &flower,\n                 string &index_dir, string &log_dir, FilterMode filter_mode) {\n  if (query_type == \"float\") {\n    Recall<float> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    if (load_index(flower, index_dir)) {\n      recall.run_dense(&flower, top_k, gt_count);\n    } else {\n      return -1;\n    }\n  } else if (query_type == \"int8\") {\n    Recall<int8_t> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    if (load_index(flower, index_dir)) {\n      recall.run_dense(&flower, top_k, gt_count);\n    } else {\n      return -1;\n    }\n  } else if (query_type == \"binary\") {\n    Recall<uint32_t> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    if (load_index(flower, index_dir)) {\n      recall.run_dense(&flower, top_k, gt_count);\n    } else {\n      return -1;\n    }\n  } else if (query_type == \"binary64\") {\n    Recall<uint64_t> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    if (load_index(flower, index_dir)) {\n      recall.run_dense(&flower, top_k, gt_count);\n    } else {\n      return -1;\n    }\n  } else {\n    cerr << \"Can not recognize type: \" << query_type << endl;\n  }\n\n  return 0;\n}\n\nbool load_sparse_index(SparseFlow &flower, string &index_dir) {\n  int ret = flower.load(index_dir);\n  if (0 != ret) {\n    cerr << \"Flow load failed with ret \" << ret << endl;\n    return false;\n  }\n  cout << \"Load index done!\" << endl;\n\n  return true;\n};\n\nint recall_sparse(std::string &query_type, size_t thread_count,\n                  size_t batch_count, string top_k, size_t gt_count,\n                  string &query_file, string &first_sep, string &second_sep,\n                  string &ground_truth_file, string &ground_truth_first_sep,\n                  string &ground_truth_second_sep, SparseFlow &flower,\n                  string &index_dir, string &log_dir, FilterMode filter_mode) {\n  if (query_type == \"float\") {\n    SparseRecall<float> recall(thread_count, log_dir, batch_count, filter_mode);\n    if (!recall.load_query(query_file, first_sep, second_sep)) {\n      return -1;\n    }\n\n    if (ground_truth_file != \"\") {\n      if (!recall.load_external_gt_file(ground_truth_file,\n                                        ground_truth_first_sep,\n                                        ground_truth_second_sep)) {\n        return -1;\n      }\n    }\n\n    if (load_sparse_index(flower, index_dir)) {\n      recall.run_sparse(&flower, top_k, gt_count);\n    } else {\n      return -1;\n    }\n  } else {\n    cerr << \"Can not recognize type: \" << query_type << endl;\n  }\n\n  return 0;\n}\n\nint get_recall_precision(string &recall_precision_string) {\n  constexpr float DEFAULT_RECALL_PRECISION = 1e-6;\n\n  if (recall_precision_string == \"\") {\n    g_recall_precision = DEFAULT_RECALL_PRECISION;\n    return true;\n  }\n\n  try {\n    g_recall_precision = std::stof(recall_precision_string);\n    std::cout << \"Recall Score Precesion: \" << g_recall_precision << std::endl;\n  } catch (const std::invalid_argument &e) {\n    std::cerr << \"Exeception in getting recall precision: \" << e.what()\n              << \", value: \" << recall_precision_string << std::endl;\n    return false;\n  } catch (const std::out_of_range &e) {\n    std::cerr << \"Out of range exception in getting recall precision: \"\n              << e.what() << \", value: \" << recall_precision_string\n              << std::endl;\n    return false;\n  }\n\n  return true;\n}\n\nint main(int argc, char *argv[]) {\n  if (argc < 2) {\n    usage();\n    return -1;\n  }\n\n  IndexPluginBroker broker;\n  std::string error;\n  for (int i = 2; i < argc; ++i) {\n    if (!broker.emplace(argv[i], &error)) {\n      cerr << \"Failed to load plugin: \" << argv[i] << \" (\" << error << \")\"\n           << endl;\n      return -1;\n    }\n  }\n\n  YAML::Node config_node;\n  try {\n    config_node = YAML::LoadFile(argv[1]);\n  } catch (...) {\n    cerr << \"Load YAML file[\" << argv[1] << \"] failed!\" << endl;\n    return -1;\n  }\n  if (!check_config(config_node)) {\n    return -1;\n  }\n  auto config_common = config_node[\"SearcherCommon\"];\n\n  map<string, int> LOG_LEVEL = {{\"debug\", IndexLogger::LEVEL_DEBUG},\n                                {\"info\", IndexLogger::LEVEL_INFO},\n                                {\"warn\", IndexLogger::LEVEL_WARN},\n                                {\"error\", IndexLogger::LEVEL_ERROR},\n                                {\"fatal\", IndexLogger::LEVEL_FATAL}};\n  string log_level = config_common[\"LogLevel\"]\n                         ? config_common[\"LogLevel\"].as<string>()\n                         : \"debug\";\n  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);\n  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {\n    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);\n  }\n\n  // Calculate Recall\n  string log_dir = \"\";\n  if (config_common[\"RecallLogDir\"]) {\n    log_dir = config_common[\"RecallLogDir\"].as<string>();\n  }\n  size_t thread_count = config_common[\"RecallThreadCount\"]\n                            ? config_common[\"RecallThreadCount\"].as<uint64_t>()\n                            : 0;\n  size_t gt_count = config_common[\"RecallGTCount\"]\n                        ? config_common[\"RecallGTCount\"].as<uint64_t>()\n                        : 100;\n  size_t batch_count = config_common[\"RecallBatchCount\"]\n                           ? config_common[\"RecallBatchCount\"].as<uint64_t>()\n                           : 0;\n  g_compare_by_id = config_common[\"CompareById\"]\n                        ? config_common[\"CompareById\"].as<bool>()\n                        : 0;\n  string top_k = config_common[\"TopK\"].as<string>();\n\n  string recall_precision_string =\n      config_common[\"RecallScorePrecision\"]\n          ? config_common[\"RecallScorePrecision\"].as<string>()\n          : \"\";\n\n  if (!get_recall_precision(recall_precision_string)) {\n    cerr << \"Get recall precision failed, value: \" << recall_precision_string\n         << endl;\n    return -1;\n  }\n\n  RetrievalMode retrieval_mode{RM_DENSE};\n  if (config_common[\"RetrievalMode\"]) {\n    std::string retrieval_mode_str =\n        config_common[\"RetrievalMode\"].as<string>();\n    if (retrieval_mode_str == \"dense\") {\n      retrieval_mode = RM_DENSE;\n    } else if (retrieval_mode_str == \"sparse\") {\n      retrieval_mode = RM_SPARSE;\n    }\n  }\n\n  FilterMode filter_mode{FM_NONE};\n  if (config_common[\"FilterMode\"]) {\n    std::string filter_mode_str = config_common[\"FilterMode\"].as<string>();\n    if (filter_mode_str == \"tag\") {\n      filter_mode = FM_TAG;\n    }\n  }\n\n  string query_file = config_common[\"QueryFile\"].as<string>();\n\n  string first_sep = config_common[\"QueryFirstSep\"]\n                         ? config_common[\"QueryFirstSep\"].as<string>()\n                         : \";\";\n  string second_sep = config_common[\"QuerySecondSep\"]\n                          ? config_common[\"QuerySecondSep\"].as<string>()\n                          : \" \";\n  string query_type = config_common[\"QueryType\"]\n                          ? config_common[\"QueryType\"].as<string>()\n                          : \"float\";\n  string container_type = config_common[\"ContainerType\"]\n                              ? config_common[\"ContainerType\"].as<string>()\n                              : \"MMapFileStorage\";\n\n  string ground_truth_file = \"\";\n  string ground_truth_first_sep = \";\";\n  string ground_truth_second_sep = \" \";\n\n  if (config_common[\"GroundTruthFile\"]) {\n    ground_truth_file = config_common[\"GroundTruthFile\"].as<string>();\n\n    if (config_common[\"GroundTruthFirstSep\"]) {\n      ground_truth_first_sep =\n          config_common[\"GroundTruthFirstSep\"].as<string>();\n    }\n\n    if (config_common[\"GroundTruthSecondSep\"]) {\n      ground_truth_second_sep =\n          config_common[\"GroundTruthSecondSep\"].as<string>();\n    }\n  }\n\n  if (retrieval_mode == RM_SPARSE) {\n    SparseFlow flower;\n    Params container_params;\n    if (config_node[\"ContainerParams\"]) {\n      // Get index params of Searcher in flower object\n      if (!prepare_params(config_node[\"ContainerParams\"], container_params)) {\n        return -1;\n      }\n      cout << \"Created index params of a container in flower object \" << endl;\n    }\n\n    int ret = flower.set_container(container_type, container_params);\n    if (0 != ret) {\n      cerr << \"Create\" << container_type << \"failed.\" << endl;\n      return -1;\n    }\n\n    // Set a Searcher\n    if (config_common[\"SearcherClass\"]) {\n      Params params;\n      if (config_node[\"SearcherParams\"]) {\n        // Get index params of Searcher in flower object\n        if (!prepare_params(config_node[\"SearcherParams\"], params)) {\n          return -1;\n        }\n        cout << \"Created index params of a searcher in flower object \" << endl;\n      }\n\n      string searcher_class = config_common[\"SearcherClass\"].as<string>();\n      ret = flower.set_searcher(searcher_class, params);\n      if (0 != ret) {\n        cerr << \"Failed to create searcher \" << searcher_class << endl;\n        return -1;\n      }\n      cout << \"Created searcher \" << searcher_class << endl;\n    } else {  // SearcherConfig\n      std::cout << config_common[\"SearcherConfig\"].as<string>() << std::endl;\n      auto params =\n          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(\n              config_common[\"SearcherConfig\"].as<string>());\n\n      auto index =\n          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);\n\n      flower.set_searcher(index->index_searcher());\n    }\n\n    string index_dir = config_common[\"IndexPath\"].as<string>();\n    recall_sparse(query_type, thread_count, batch_count, top_k, gt_count,\n                  query_file, first_sep, second_sep, ground_truth_file,\n                  ground_truth_first_sep, ground_truth_second_sep, flower,\n                  index_dir, log_dir, filter_mode);\n\n    flower.unload();\n\n    cout << \"Recall done.\" << endl;\n  } else {\n    Flow flower;\n    Params container_params;\n    if (config_node[\"ContainerParams\"]) {\n      // Get index params of Searcher in flower object\n      if (!prepare_params(config_node[\"ContainerParams\"], container_params)) {\n        return -1;\n      }\n      cout << \"Created index params of a container in flower object \" << endl;\n    }\n\n    int ret = flower.set_container(container_type, container_params);\n    if (0 != ret) {\n      cerr << \"Create\" << container_type << \"failed.\" << endl;\n      return -1;\n    }\n\n    // Set a Searcher\n    if (config_common[\"SearcherClass\"]) {\n      Params params;\n      if (config_node[\"SearcherParams\"]) {\n        // Get index params of Searcher in flower object\n        if (!prepare_params(config_node[\"SearcherParams\"], params)) {\n          return -1;\n        }\n        cout << \"Created index params of a searcher in flower object \" << endl;\n      }\n\n      string searcher_class = config_common[\"SearcherClass\"].as<string>();\n      ret = flower.set_searcher(searcher_class, params);\n      if (0 != ret) {\n        cerr << \"Failed to create searcher \" << searcher_class << endl;\n        return -1;\n      }\n      cout << \"Created searcher \" << searcher_class << endl;\n    } else {  // SearcherConfig\n      std::cout << config_common[\"SearcherConfig\"].as<string>() << std::endl;\n      auto params =\n          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(\n              config_common[\"SearcherConfig\"].as<string>());\n\n      auto index =\n          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);\n\n      flower.set_searcher(index->index_searcher());\n    }\n\n    string index_dir = config_common[\"IndexPath\"].as<string>();\n    if (retrieval_mode == RM_DENSE) {\n      recall_dense(query_type, thread_count, batch_count, top_k, gt_count,\n                   query_file, first_sep, second_sep, ground_truth_file,\n                   ground_truth_first_sep, ground_truth_second_sep, flower,\n                   index_dir, log_dir, filter_mode);\n    } else {\n      std::string mode = retrieval_mode == 1 ? \"Dense\" : \"Sparse\";\n      cerr << \"unsupported retrieval mode: \" << mode << endl;\n\n      return -1;\n    }\n\n    // Cleanup\n    flower.unload();\n\n    cout << \"Recall done.\" << endl;\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/txt2vecs.cc",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <iostream>\n#include <set>\n#include \"gflags/gflags.h\"\n#include \"zvec/core/framework/index_meta.h\"\n#include \"index_meta_helper.h\"\n#include \"txt_input_reader.h\"\n#include \"vecs_common.h\"\n\nusing namespace std;\nusing namespace zvec::core;\n\nDEFINE_string(input, \"input.txt\", \"txt input file\");\nDEFINE_string(input_first_sep, \";\", \"input first sep\");\nDEFINE_string(input_second_sep, \" \", \"input second sep\");\nDEFINE_string(output, \"output.vecs\", \"vecs output file\");\nDEFINE_string(type, \"float\",\n              \"available type: float, double, int16, int8, binary\");\nDEFINE_string(method, \"L2\", \"available method: L2, IP, HAMMING\");\nDEFINE_int32(dimension, 256, \"data dimension\");\nDEFINE_string(vector_type, \"dense\", \"available type: dense, hybrid, sparse\");\n\nbool write_header_output(VecsHeader header, const IndexMeta &meta,\n                         size_t &total_writes, FILE *wfp) {\n  // write header\n  std::cout << \"Begin to Write Header Section...\" << std::endl;\n\n  std::string meta_buf;\n  meta.serialize(&meta_buf);\n  header.meta_size = meta_buf.size();\n  size_t wret = fwrite(&header, sizeof(header), 1, wfp);\n  if (wret != 1) {\n    cerr << \"Write header error\" << endl;\n    fclose(wfp);\n    return false;\n  }\n\n  total_writes += sizeof(header);\n  std::cout << \"Total Writes after Header Section: \" << total_writes\n            << std::endl\n            << std::endl;\n\n  // write meta\n  std::cout << \"Begin to Write Meta Section...\" << std::endl;\n  wret = fwrite(meta_buf.c_str(), meta_buf.size(), 1, wfp);\n  if (wret != 1) {\n    cerr << \"Write header meta_buf error\" << endl;\n    fclose(wfp);\n    return false;\n  }\n\n  total_writes += meta_buf.size();\n  std::cout << \"Total Writes after Meta Buf: \" << total_writes << std::endl\n            << std::endl;\n\n  return true;\n}\n\nbool write_header_output_sparse(VecsHeader header, const IndexMeta &meta,\n                                size_t &total_writes, FILE *wfp) {\n  // write header\n  std::cout << \"Begin to Write Header Section...\" << std::endl;\n  std::string meta_buf;\n  meta.serialize(&meta_buf);\n  header.meta_size = meta_buf.size();\n  size_t wret = fwrite(&header, sizeof(header), 1, wfp);\n  if (wret != 1) {\n    cerr << \"Write header error\" << endl;\n    fclose(wfp);\n    return false;\n  }\n\n  total_writes += sizeof(header);\n  std::cout << \"Total Writes after Header Section: \" << total_writes\n            << std::endl\n            << std::endl;\n\n  // write meta\n  std::cout << \"Begin to Write Sparse Meta Section...\" << std::endl;\n  wret = fwrite(meta_buf.c_str(), meta_buf.size(), 1, wfp);\n  if (wret != 1) {\n    cerr << \"Write header meta buf error\" << endl;\n    fclose(wfp);\n    return false;\n  }\n\n  total_writes += meta_buf.size();\n  std::cout << \"Total Writes after Meta Buf: \" << total_writes << std::endl\n            << std::endl;\n\n  return true;\n}\n\ntemplate <typename T>\nbool write_features_output(size_t vec_num, const vector<vector<T>> &features,\n                           size_t &total_writes, FILE *wfp) {\n  // write dense vector\n  std::cout << \"Begin to Write Dense Vector Section...\" << std::endl;\n  for (size_t i = 0; i < vec_num; ++i) {\n    auto &feature = features[i];\n    size_t wret = fwrite(&feature[0], sizeof(T), feature.size(), wfp);\n    if (wret != feature.size()) {\n      cerr << \"Write feature error. \" << endl;\n      fclose(wfp);\n      return false;\n    }\n\n    total_writes += feature.size() * sizeof(T);\n  }\n\n  std::cout << \"Total Writes after Dense Vector: \" << total_writes << std::endl\n            << std::endl;\n\n  return true;\n}\n\nbool write_keys_output(size_t vec_num, const vector<uint64_t> &keys,\n                       size_t &total_writes, FILE *wfp) {\n  std::cout << \"Begin to Write Key Section...\" << std::endl;\n  for (size_t i = 0; i < vec_num; ++i) {\n    uint64_t key = keys[i];\n    size_t wret = fwrite(&key, sizeof(key), 1, wfp);\n    if (wret != 1) {\n      cerr << \"Write key error. key:\" << key << endl;\n      fclose(wfp);\n      return false;\n    }\n\n    total_writes += sizeof(uint64_t);\n  }\n\n  std::cout << \"Total Writes after Key Section: \" << total_writes << std::endl\n            << std::endl;\n\n  return true;\n}\n\ntemplate <typename T>\nbool write_sparse_features_output(size_t vec_num,\n                                  const vector<SparseData<T>> &sparse_data,\n                                  size_t &total_writes, FILE *wfp) {\n  std::set<uint32_t> sparse_dims;\n  uint32_t sparse_max_count = 0;\n  uint32_t sparse_min_count = -1U;\n  uint32_t sparse_total_count = 0;\n\n  // write sparse meta\n  std::cout << \"Begin to Write Sparse Meta Section...\" << std::endl;\n  size_t wret;\n  uint64_t offset = 0;\n  for (size_t i = 0; i < vec_num; ++i) {\n    wret = fwrite(&offset, sizeof(uint64_t), 1, wfp);\n    if (wret != 1) {\n      cerr << \"Write sparse feature len error. \" << endl;\n      fclose(wfp);\n      return false;\n    }\n    offset += sparse_data[i].get_len();\n\n    total_writes += sizeof(size_t);\n  }\n  std::cout << \"Total Writes after Sparse Meta Section: \" << total_writes\n            << std::endl\n            << std::endl;\n\n  std::cout << \"Begin to Write Sparse Vector Section...\" << std::endl;\n  for (size_t i = 0; i < vec_num; ++i) {\n    auto &sparse_one_data = sparse_data[i];\n\n    wret = fwrite(&(sparse_one_data.count), sizeof(uint32_t), 1, wfp);\n    if (wret != 1) {\n      cerr << \"Write sparse feature count error. \" << endl;\n      fclose(wfp);\n      return false;\n    }\n\n    total_writes += sizeof(uint32_t);\n\n    wret = fwrite(&sparse_one_data.indices[0], sizeof(uint32_t),\n                  sparse_one_data.indices.size(), wfp);\n    if (wret != sparse_one_data.indices.size()) {\n      cerr << \"Write feature error. \" << endl;\n      fclose(wfp);\n      return false;\n    }\n\n    total_writes += sizeof(uint32_t) * sparse_one_data.indices.size();\n    // do some stat\n    for (size_t s = 0; s < sparse_one_data.indices.size(); ++s) {\n      sparse_dims.insert(sparse_one_data.indices[s]);\n    }\n\n    if (sparse_one_data.indices.size() > sparse_max_count) {\n      sparse_max_count = sparse_one_data.indices.size();\n    }\n\n    if (sparse_one_data.indices.size() < sparse_min_count) {\n      sparse_min_count = sparse_one_data.indices.size();\n    }\n\n    sparse_total_count += sparse_one_data.indices.size();\n    // //done\n\n    wret = fwrite(&sparse_one_data.features[0], sizeof(T),\n                  sparse_one_data.features.size(), wfp);\n    if (wret != sparse_one_data.features.size()) {\n      cerr << \"Write feature error. \" << endl;\n      fclose(wfp);\n      return false;\n    }\n\n    total_writes += sizeof(T) * sparse_one_data.features.size();\n  }\n\n  std::cout << \"Total Writes after Sparse Vector Section: \" << total_writes\n            << std::endl\n            << std::endl;\n  // for (auto itr=sparse_dims.begin(); itr!=sparse_dims.end(); ++itr) {\n  //   std::cout << (*itr) << \",\";\n  // }\n  // std::cout << std::endl;\n\n  std::cout << \"Max Sparse Dimension Count: \" << sparse_max_count << std::endl;\n  std::cout << \"Min Sparse Dimension Count: \" << sparse_min_count << std::endl;\n  std::cout << \"Avg Sparse Dimension Count: \" << sparse_total_count / vec_num\n            << std::endl;\n\n  return true;\n}\n\nbool write_taglists_output(size_t vec_num,\n                           const vector<vector<uint64_t>> &taglists,\n                           size_t &total_writes, FILE *wfp) {\n  std::cout << \"Begin to Write Tag List Section...\" << std::endl;\n\n  // write tag list meta\n  std::cout << \"Begin to Write Tag List Meta Section...\" << std::endl;\n  size_t wret;\n  uint64_t offset = 0;\n  for (size_t i = 0; i < vec_num; ++i) {\n    wret = fwrite(&offset, sizeof(uint64_t), 1, wfp);\n    if (wret != 1) {\n      cerr << \"Write tag list meta error. Rec no: \" << i << endl;\n      fclose(wfp);\n      return false;\n    }\n    offset += taglists[i].size() * sizeof(uint64_t);\n\n    total_writes += sizeof(size_t);\n  }\n  std::cout << \"Total Writes after Tag Meta Section: \" << total_writes\n            << std::endl\n            << std::endl;\n\n  for (size_t i = 0; i < vec_num; ++i) {\n    std::vector<uint64_t> taglist = taglists[i];\n    uint64_t taglist_size = taglist.size();\n    wret = fwrite(&taglist_size, sizeof(uint64_t), 1, wfp);\n    if (wret != 1) {\n      cerr << \"Write tag list size error. Rec no: \" << i << endl;\n      fclose(wfp);\n      return false;\n    }\n\n    wret = fwrite(&(taglist[0]), sizeof(uint64_t), taglist.size(), wfp);\n    if (wret != taglist.size()) {\n      cerr << \"Write tag list error. Rec no: \" << i << endl;\n      fclose(wfp);\n      return false;\n    }\n\n    total_writes += sizeof(uint64_t) * taglist.size() + sizeof(uint64_t);\n  }\n\n  std::cout << \"Total Writes after Tag List Section: \" << total_writes\n            << std::endl\n            << std::endl;\n\n  return true;\n}\n\ntemplate <typename T>\nbool write_vecs_output_sparse(VecsHeader &header, const IndexMeta &meta,\n                              const vector<uint64_t> &keys,\n                              const vector<SparseData<T>> &sparse_data,\n                              const vector<vector<uint64_t>> &taglists) {\n  if (keys.empty()) {\n    cerr << \"keys is empty.\" << endl;\n    return false;\n  }\n\n  if (keys.size() != sparse_data.size()) {\n    cerr << \"keys's size(\" << keys.size()\n         << \") is not equal to sparse data's size(\" << sparse_data.size()\n         << \").\" << endl;\n    return false;\n  }\n\n  size_t vec_num = keys.size();\n\n  FILE *wfp = fopen(FLAGS_output.c_str(), \"wb\");\n  if (!wfp) {\n    cerr << \"Open file error. \" << FLAGS_output << endl;\n    return false;\n  }\n\n  size_t total_writes = 0;\n\n  std::cout << \"------------------------\" << std::endl;\n  std::cout << \" Output Process         \" << std::endl;\n  std::cout << \"------------------------\" << std::endl;\n\n  // write sparse header\n  bool ret = write_header_output_sparse(header, meta, total_writes, wfp);\n  if (!ret) {\n    cerr << \"write header error! \" << endl;\n\n    return false;\n  }\n\n  // write keys\n  ret = write_keys_output(vec_num, keys, total_writes, wfp);\n  if (!ret) {\n    cerr << \"write keys error! \" << endl;\n\n    return false;\n  }\n\n  // write sparse features\n  ret = write_sparse_features_output(vec_num, sparse_data, total_writes, wfp);\n  if (!ret) {\n    cerr << \"write sparse features error! \" << endl;\n\n    return false;\n  }\n\n  if ((header.bitmap & (1ULL << BITMAP_INDEX_TAGLIST)) != 0) {\n    // write tag lists features\n    ret = write_taglists_output(vec_num, taglists, total_writes, wfp);\n    if (!ret) {\n      cerr << \"write tag lists error! \" << endl;\n\n      return false;\n    }\n  }\n\n  std::cout << \"------------------------\" << std::endl;\n  std::cout << \" Output Done            \" << std::endl;\n  std::cout << \"------------------------\" << std::endl;\n\n  fclose(wfp);\n  return true;\n}\n\ntemplate <typename T>\nbool write_vecs_output(VecsHeader &header, const IndexMeta &meta,\n                       const vector<uint64_t> &keys,\n                       const vector<vector<T>> &features,\n                       const vector<SparseData<T>> &sparse_data,\n                       const vector<vector<uint64_t>> &taglists) {\n  if (keys.empty()) {\n    cerr << \"keys is empty.\" << endl;\n    return false;\n  }\n\n  if (keys.size() != features.size()) {\n    cerr << \"keys's size(\" << keys.size()\n         << \") is not equal to features's size(\" << features.size() << \").\"\n         << endl;\n    return false;\n  }\n\n\n  size_t vec_num = header.num_vecs;\n\n  FILE *wfp = fopen(FLAGS_output.c_str(), \"wb\");\n  if (!wfp) {\n    cerr << \"Open file error. \" << FLAGS_output << endl;\n    return false;\n  }\n\n  size_t total_writes = 0;\n\n  std::cout << \"------------------------\" << std::endl;\n  std::cout << \" Output Process         \" << std::endl;\n  std::cout << \"------------------------\" << std::endl;\n\n  // write header\n  bool ret = write_header_output(header, meta, total_writes, wfp);\n  if (!ret) {\n    cerr << \"write header error! \" << endl;\n\n    return false;\n  }\n\n  // write features\n  ret = write_features_output(vec_num, features, total_writes, wfp);\n  if (!ret) {\n    cerr << \"write features error! \" << endl;\n\n    return false;\n  }\n\n  // write keys\n  ret = write_keys_output(vec_num, keys, total_writes, wfp);\n  if (!ret) {\n    cerr << \"write keys error! \" << endl;\n\n    return false;\n  }\n\n  // write sparse features\n  if ((header.bitmap & (1ULL << BITMAP_INDEX_SPARSE)) != 0) {\n    ret = write_sparse_features_output(vec_num, sparse_data, total_writes, wfp);\n    if (!ret) {\n      cerr << \"write sparse features error! \" << endl;\n\n      return false;\n    }\n  }\n\n  if ((header.bitmap & (1ULL << BITMAP_INDEX_TAGLIST)) != 0) {\n    // write tag lists features\n    ret = write_taglists_output(vec_num, taglists, total_writes, wfp);\n    if (!ret) {\n      cerr << \"write tag lists error! \" << endl;\n\n      return false;\n    }\n  }\n\n  std::cout << \"------------------------\" << std::endl;\n  std::cout << \" Output Done            \" << std::endl;\n  std::cout << \"------------------------\" << std::endl;\n\n  fclose(wfp);\n  return true;\n}\n\ntemplate <typename T>\nbool compute_offset(uint64_t num_vecs, const IndexMeta &meta,\n                    const vector<uint64_t> & /*keys*/,\n                    const vector<vector<T>> & /*features*/,\n                    const vector<SparseData<T>> &sparse_data,\n                    const vector<std::vector<uint64_t>> &taglists,\n                    uint64_t &key_offset, uint64_t &feature_offset,\n                    uint64_t &sparse_offset, uint64_t &taglist_offset,\n                    uint64_t &key_size, uint64_t &feature_size,\n                    uint64_t &sparse_size, uint64_t &taglist_size) {\n  size_t total_offset = 0;\n\n  feature_offset = 0;\n  feature_size = num_vecs * meta.element_size();\n  total_offset += feature_size;\n\n  key_offset = total_offset;\n  key_size = num_vecs * sizeof(uint64_t);\n  total_offset += key_size;\n\n  if (sparse_data.size() != 0) {\n    sparse_offset = total_offset;\n\n    size_t data_offset = num_vecs * sizeof(uint64_t);\n    for (size_t i = 0; i < sparse_data.size(); ++i) {\n      data_offset += sizeof(uint32_t) +\n                     sparse_data[i].count * (sizeof(uint32_t) + sizeof(T));\n    }\n\n    sparse_size = data_offset;\n\n    total_offset += sparse_size;\n  } else {\n    sparse_offset = -1LLU;\n    sparse_size = 0;\n  }\n\n  if (taglists.size() != 0) {\n    taglist_offset = total_offset;\n\n    size_t data_offset = num_vecs * sizeof(uint64_t);\n    for (size_t i = 0; i < taglists.size(); ++i) {\n      data_offset += sizeof(uint64_t) + taglists[i].size() * sizeof(uint64_t);\n    }\n\n    taglist_size = data_offset;\n  } else {\n    taglist_offset = -1LLU;\n    taglist_size = 0;\n  }\n\n  return true;\n}\n\ntemplate <typename T>\nbool compute_sparse_offset(uint64_t num_vecs, const IndexMeta & /*meta*/,\n                           const vector<uint64_t> & /*keys*/,\n                           const vector<SparseData<T>> &sparse_data,\n                           const vector<std::vector<uint64_t>> &taglists,\n                           uint64_t &key_offset, uint64_t &sparse_offset,\n                           uint64_t &taglist_offset, uint64_t &key_size,\n                           uint64_t &sparse_size, uint64_t &taglist_size) {\n  size_t total_offset = 0;\n\n  key_offset = 0;\n  key_size = num_vecs * sizeof(uint64_t);\n  total_offset += num_vecs * sizeof(uint64_t);\n\n  sparse_offset = total_offset;\n  size_t data_offset = num_vecs * sizeof(uint64_t);\n  for (size_t i = 0; i < sparse_data.size(); ++i) {\n    data_offset += sizeof(uint32_t) +\n                   sparse_data[i].count * (sizeof(uint32_t) + sizeof(T));\n  }\n\n  sparse_size = data_offset;\n  total_offset += sparse_size;\n\n  if (taglists.size() != 0) {\n    taglist_offset = total_offset;\n\n    data_offset = num_vecs * sizeof(uint64_t);\n    for (size_t i = 0; i < taglists.size(); ++i) {\n      data_offset += sizeof(uint64_t) + taglists[i].size() * sizeof(uint64_t);\n    }\n\n    taglist_size = data_offset;\n  } else {\n    taglist_offset = -1LLU;\n    taglist_size = 0;\n  }\n\n  return true;\n}\n\ntemplate <typename T>\nbool process(void) {\n  if (FLAGS_vector_type == \"sparse\") {\n    std::cout << \"------------------------\" << std::endl;\n    std::cout << \" Vector Type: sparse    \" << std::endl;\n    std::cout << \"------------------------\" << std::endl;\n\n    IndexMeta meta;\n    if (!IndexMetaHelper::parse_from(FLAGS_type, FLAGS_method,\n                                     FLAGS_vector_type, meta)) {\n      cerr << \"Index meta parse error.\" << endl;\n      return false;\n    }\n    cerr << IndexMetaHelper::to_string(meta) << endl;\n\n    TxtInputReader<T> reader;\n    vector<uint64_t> keys;\n    vector<SparseData<T>> sparse_data;\n    vector<std::vector<uint64_t>> taglists;\n\n    bool ret = reader.load_record_sparse(FLAGS_input, FLAGS_input_first_sep,\n                                         FLAGS_input_second_sep, keys,\n                                         sparse_data, taglists);\n    if (!ret) {\n      cerr << \"Read record failed\" << endl;\n      return false;\n    }\n\n    if (sparse_data.size() == 0) {\n      cerr << \"empty sparse data!\" << endl;\n      return false;\n    }\n\n    uint64_t num_vecs = keys.size();\n\n    uint64_t key_offset{-1LLU}, sparse_offset{-1LLU}, taglist_offset{-1LLU};\n    uint64_t key_size{0}, sparse_size{0}, taglist_size{0};\n\n    compute_sparse_offset(num_vecs, meta, keys, sparse_data, taglists,\n                          key_offset, sparse_offset, taglist_offset, key_size,\n                          sparse_size, taglist_size);\n\n    VecsHeader header;\n    header.num_vecs = keys.size();\n    header.meta_size_v1 = 0;\n    header.version = 1;\n    header.bitmap = 0;\n    header.key_offset = key_offset;\n    header.dense_offset = -1LLU;\n    header.sparse_offset = sparse_offset;\n    header.taglist_offset = taglist_offset;\n    header.key_size = key_size;\n    header.dense_size = 0;\n    header.sparse_size = sparse_size;\n    header.taglist_size = taglist_size;\n\n    header.bitmap |= (1 << BITMAP_INDEX_KEY);\n    header.bitmap |= (1 << BITMAP_INDEX_SPARSE);\n\n    if (taglist_offset != -1LLU) {\n      header.bitmap |= (1 << BITMAP_INDEX_TAGLIST);\n    }\n\n    ret = write_vecs_output_sparse(header, meta, keys, sparse_data, taglists);\n    if (!ret) {\n      cerr << \"write vecs output failed\" << endl;\n      return false;\n    }\n  } else {\n    std::cout << \"------------------------\" << std::endl;\n    std::cout << \" Vector Type:     \" << FLAGS_vector_type << std::endl;\n    std::cout << \"------------------------\" << std::endl;\n\n    IndexMeta meta;\n    if (!IndexMetaHelper::parse_from(FLAGS_type, FLAGS_method, FLAGS_dimension,\n                                     FLAGS_vector_type, meta)) {\n      cerr << \"Index meta parse error.\" << endl;\n      return false;\n    }\n    cerr << IndexMetaHelper::to_string(meta) << endl;\n\n    TxtInputReader<T> reader;\n    vector<uint64_t> keys;\n    vector<vector<T>> features;\n    vector<SparseData<T>> sparse_data;\n    vector<std::vector<uint64_t>> taglists;\n\n    bool ret = reader.load_record(FLAGS_input, FLAGS_input_first_sep,\n                                  FLAGS_input_second_sep, FLAGS_dimension, keys,\n                                  features, sparse_data, taglists);\n    if (!ret) {\n      cerr << \"Read record failed\" << endl;\n      return false;\n    }\n\n    uint64_t num_vecs = keys.size();\n\n    uint64_t key_offset{-1LLU}, features_offset{-1LLU}, sparse_offset{-1LLU},\n        taglist_offset{-1LLU};\n    uint64_t key_size{0}, feature_size{0}, sparse_size{0}, taglist_size{0};\n\n    compute_offset(num_vecs, meta, keys, features, sparse_data, taglists,\n                   key_offset, features_offset, sparse_offset, taglist_offset,\n                   key_size, feature_size, sparse_size, taglist_size);\n\n    VecsHeader header;\n    header.num_vecs = num_vecs;\n    header.meta_size_v1 = 0;\n    header.version = 1;\n    header.bitmap = 0;\n    header.key_offset = key_offset;\n    header.dense_offset = features_offset;\n    header.sparse_offset = sparse_offset;\n    header.taglist_offset = taglist_offset;\n    header.key_size = key_size;\n    header.dense_size = feature_size;\n    header.sparse_size = sparse_size;\n    header.taglist_size = taglist_size;\n\n    header.bitmap |= (1 << BITMAP_INDEX_KEY);\n    header.bitmap |= (1 << BITMAP_INDEX_DENSE);\n\n    if (sparse_offset != -1LLU) {\n      header.bitmap |= (1 << BITMAP_INDEX_SPARSE);\n    }\n\n    if (taglist_offset != -1LLU) {\n      header.bitmap |= (1 << BITMAP_INDEX_TAGLIST);\n    }\n\n    ret =\n        write_vecs_output(header, meta, keys, features, sparse_data, taglists);\n    if (!ret) {\n      cerr << \"write vecs output failed\" << endl;\n      return false;\n    }\n  }\n\n  return true;\n}\n\nint main(int argc, char *argv[]) {\n  // gflags\n  gflags::SetUsageMessage(\"Usage: txt2vecs [options]\");\n  gflags::ParseCommandLineFlags(&argc, &argv, true);\n\n  if (FLAGS_type == \"float\") {\n    if (!process<float>()) {\n      return -1;\n    }\n  } else if (FLAGS_type == \"double\") {\n    if (!process<double>()) {\n      return -1;\n    }\n  } else if (FLAGS_type == \"int16\") {\n    if (!process<int16_t>()) {\n      return -1;\n    }\n  } else if (FLAGS_type == \"int8\") {\n    if (!process<int8_t>()) {\n      return -1;\n    }\n  } else if (FLAGS_type == \"binary\") {\n    if (!process<uint32_t>()) {\n      return -1;\n    }\n  } else if (FLAGS_type == \"binary64\") {\n    if (!process<uint64_t>()) {\n      return -1;\n    }\n  } else {\n    cerr << \"Can not recognize type: \" << FLAGS_type << endl;\n    return -1;\n  }\n  return 0;\n}\n"
  },
  {
    "path": "tools/core/txt_input_reader.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string.h>\n#include <fstream>\n#include <iostream>\n#include <string>\n#include <vector>\n#include <zvec/ailego/utility/string_helper.h>\n\nnamespace zvec {\nnamespace core {\n\ntemplate <typename T>\nstruct SparseData {\n public:\n  SparseData(uint32_t count_in, std::vector<uint32_t> &indices_in,\n             std::vector<T> &features_in)\n      : count(count_in),\n        indices(std::move(indices_in)),\n        features(std::move(features_in)) {}\n\n  SparseData(uint32_t count_in, std::vector<uint32_t> &&indices_in,\n             std::vector<T> &&features_in)\n      : count(count_in),\n        indices(std::move(indices_in)),\n        features(std::move(features_in)) {}\n\n public:\n  uint64_t get_len() const {\n    return sizeof(uint32_t) + sizeof(uint32_t) * indices.size() +\n           sizeof(T) * features.size();\n  }\n\n public:\n  uint32_t count;\n  std::vector<uint32_t> indices;\n  std::vector<T> features;\n};\n\n// support type: float, binary, int16, int8\ntemplate <typename T>\nclass TxtInputReader {\n public:\n  bool load_query(const std::string &query_file, const std::string &first_sep,\n                  const std::string &second_sep,\n                  std::vector<std::vector<T>> &features,\n                  std::vector<SparseData<T>> &sparse_data,\n                  std::vector<std::vector<uint64_t>> &taglists) {\n    std::fstream qf(query_file, std::ios::in);\n\n    if (!qf.is_open()) {\n      std::cerr << \"open query file failed! [\" << query_file << \"]\"\n                << std::endl;\n      return false;\n    }\n\n    bool ret;\n    std::string buffer;\n    while (getline(qf, buffer)) {\n      buffer.erase(buffer.find_last_not_of('\\n') + 1);\n      if (buffer.empty()) {\n        continue;\n      }\n      std::vector<std::string> res;\n      ailego::StringHelper::Split(buffer, first_sep, &res);\n      if (res.empty()) {\n        continue;\n      }\n      std::string feature_str = res[0];\n      if (res.size() > 1) {\n        feature_str = res[1];\n      }\n      std::vector<T> feature;\n      size_t dimension = 0;\n      ret = load_from_string(feature_str, second_sep, feature, &dimension);\n      if (!ret) {\n        return false;\n      }\n\n      features.emplace_back(feature);\n\n      uint64_t key = atol(res[0].c_str());\n\n      // load sparse feature\n      uint32_t sparse_count = 0;\n      std::vector<uint32_t> sparse_indices;\n      std::vector<T> sparse_feature;\n\n      if (res.size() >= 3) {\n        ret = load_from_string_sparse(key, res[2], second_sep, sparse_indices,\n                                      sparse_feature, &sparse_count);\n        if (!ret) {\n          std::cerr << \"load sparse failed for key: \" << key << std::endl;\n          return false;\n        }\n      }\n\n      sparse_data.emplace_back(sparse_count, std::move(sparse_indices),\n                               std::move(sparse_feature));\n\n      if (res.size() >= 4) {\n        std::vector<uint64_t> taglist;\n        size_t tag_count = 0;\n\n        ret = load_tags_from_string(res[4], second_sep, taglist, &tag_count);\n        if (!ret) {\n          std::cerr << \"load tags failed for key: \" << key << std::endl;\n          return false;\n        }\n\n        taglists.emplace_back(taglist);\n      }\n    }\n\n    qf.close();\n    if (features.size() == 0) {\n      std::cerr << \"Read query size is 0\" << std::endl;\n      return false;\n    }\n    return true;\n  }\n\n\n  bool load_record(const std::string &input, const std::string &first_sep,\n                   const std::string &second_sep, const size_t dimension,\n                   std::vector<uint64_t> &keys,\n                   std::vector<std::vector<T>> &features,\n                   std::vector<SparseData<T>> &sparse_data,\n                   std::vector<std::vector<uint64_t>> &taglists) {\n    std::fstream qf(input, std::ios::in);\n\n    if (!qf.is_open()) {\n      std::cerr << \"open file failed! [\" << input << \"]\" << std::endl;\n      return false;\n    }\n\n    bool ret;\n    uint32_t count = 0;\n    std::string buffer;\n\n    while (getline(qf, buffer)) {\n      buffer.erase(buffer.find_last_not_of('\\n') + 1);\n      if (buffer.empty()) {\n        continue;\n      }\n      std::vector<std::string> res;\n      ailego::StringHelper::Split(buffer, first_sep, &res);\n      if (res.size() < 2) {\n        std::cerr << \"skip record : \" << buffer << std::endl;\n        continue;\n      }\n\n      std::vector<T> feature;\n      size_t real_dim = 0;\n\n      // load sparse feature\n      uint32_t sparse_count = 0;\n      std::vector<uint32_t> sparse_indices;\n      std::vector<T> sparse_feature;\n\n      uint64_t key = atol(res[0].c_str());\n\n      // load dense feature\n      ret = load_from_string(res[1], second_sep, feature, &real_dim);\n      if (!ret) {\n        return false;\n      }\n\n      if (real_dim != dimension) {\n        std::cerr << \"real dim (\" << real_dim << \") is not equal to dimension(\"\n                  << dimension << \") key : \" << res[0] << std::endl;\n        continue;\n      }\n\n      features.emplace_back(feature);\n      keys.emplace_back(key);\n\n      if (res.size() >= 3) {\n        ret = load_from_string_sparse(key, res[2], second_sep, sparse_indices,\n                                      sparse_feature, &sparse_count);\n        if (!ret) {\n          std::cerr << \"load sparse failed for key: \" << key << std::endl;\n          return false;\n        }\n\n        sparse_data.emplace_back(sparse_count, std::move(sparse_indices),\n                                 std::move(sparse_feature));\n      }\n\n      if (res.size() >= 4) {\n        std::vector<uint64_t> taglist;\n        size_t tag_count = 0;\n\n        ret = load_tags_from_string(res[3], second_sep, taglist, &tag_count);\n        if (!ret) {\n          std::cerr << \"load tags failed for key: \" << key << std::endl;\n          return false;\n        }\n\n        taglists.emplace_back(taglist);\n      }\n\n      count++;\n      if (count % 1000000 == 0) {\n        std::cout << \"processed \" << count << \" records!\" << std::endl;\n      }\n    }\n\n    qf.close();\n\n    if (keys.size() == 0) {\n      std::cerr << \"Reading nothing from input\" << std::endl;\n      return false;\n    }\n\n    return true;\n  }\n\n  bool load_record_sparse(const std::string &input,\n                          const std::string &first_sep,\n                          const std::string &second_sep,\n                          std::vector<uint64_t> &keys,\n                          std::vector<SparseData<T>> &sparse_data,\n                          std::vector<std::vector<uint64_t>> &taglists) {\n    std::fstream qf(input, std::ios::in);\n\n    if (!qf.is_open()) {\n      std::cerr << \"open file failed! [\" << input << \"]\" << std::endl;\n      return false;\n    }\n\n    bool ret;\n    uint32_t count = 0;\n    std::string buffer;\n\n    while (getline(qf, buffer)) {\n      buffer.erase(buffer.find_last_not_of('\\n') + 1);\n      if (buffer.empty()) {\n        continue;\n      }\n      std::vector<std::string> res;\n      ailego::StringHelper::Split(buffer, first_sep, &res);\n      if (res.size() < 2) {\n        std::cerr << \"skip record : \" << buffer << std::endl;\n        continue;\n      }\n\n      uint64_t key = atol(res[0].c_str());\n\n      // load sparse feature\n      uint32_t sparse_count = 0;\n      std::vector<uint32_t> sparse_indices;\n      std::vector<T> sparse_feature;\n\n      if (res.size() <= 2) {\n        std::cerr << \"field erorr, key: \" << key << std::endl;\n        continue;\n      }\n\n      ret = load_from_string_sparse(key, res[2], second_sep, sparse_indices,\n                                    sparse_feature, &sparse_count);\n      if (!ret) {\n        std::cerr << \"load sparse failed for key: \" << key << std::endl;\n        return false;\n      }\n\n      keys.emplace_back(key);\n\n      sparse_data.emplace_back(sparse_count, std::move(sparse_indices),\n                               std::move(sparse_feature));\n\n      if (res.size() >= 4) {\n        std::vector<uint64_t> taglist;\n        size_t tag_count;\n\n        ret = load_tags_from_string(res[4], second_sep, taglist, &tag_count);\n        if (!ret) {\n          std::cerr << \"load tags failed for key: \" << key << std::endl;\n          return false;\n        }\n\n        taglists.emplace_back(taglist);\n      }\n\n      count++;\n      if (count % 1000000 == 0) {\n        std::cout << \"processed \" << count << \" records!\" << std::endl;\n      }\n    }\n\n    qf.close();\n\n    if (keys.size() == 0) {\n      std::cerr << \"Reading nothing from input\" << std::endl;\n      return false;\n    }\n\n    return true;\n  }\n\n  template <typename U>\n  bool load_from_string(const std::string &record,\n                        const std::string &second_sep, std::vector<U> &data,\n                        size_t *count) {\n    ailego::StringHelper::Split(record, second_sep, &data, true);\n    *count = data.size();\n\n    return true;\n  }\n\n  bool load_scores_from_string(const std::string &record,\n                               const std::string &second_sep,\n                               std::vector<float> &data, size_t *count) {\n    ailego::StringHelper::Split(record, second_sep, &data, true);\n    *count = data.size();\n\n    return true;\n  }\n\n  bool load_ids_from_string(const std::string &record,\n                            const std::string &second_sep,\n                            std::vector<uint64_t> &data, size_t *count) {\n    ailego::StringHelper::Split(record, second_sep, &data, true);\n    *count = data.size();\n\n    return true;\n  }\n\n  bool load_tags_from_string(const std::string &record,\n                             const std::string &second_sep,\n                             std::vector<uint64_t> &tags, size_t *count) {\n    ailego::StringHelper::Split(record, second_sep, &tags, true);\n    *count = tags.size();\n\n    // order tags\n    sort(tags.begin(), tags.end());\n\n    return true;\n  }\n\n  // overloading for binary\n  bool load_from_string(const std::string &record,\n                        const std::string &second_sep,\n                        std::vector<uint32_t> &data, size_t *count) {\n    // fetch split value from text file\n    std::vector<uint8_t> vec;\n    ailego::StringHelper::Split(record, second_sep, &vec, true);\n    if (vec.size() == 0) {\n      std::cerr << \"Binary vector size is 0\" << std::endl;\n      return false;\n    }\n    if (vec.size() % 32 != 0) {\n      std::cerr << \"Binary vector size must be 32_X\" << std::endl;\n      return false;\n    }\n    // compact into uint32_t\n    size_t sz = vec.size();\n    std::vector<uint8_t> tmp;\n    for (size_t i = 0; i < sz; i += 8) {\n      uint8_t v = 0;\n      v |= (vec[i] & 0x01) << 7;\n      v |= (vec[i + 1] & 0x01) << 6;\n      v |= (vec[i + 2] & 0x01) << 5;\n      v |= (vec[i + 3] & 0x01) << 4;\n      v |= (vec[i + 4] & 0x01) << 3;\n      v |= (vec[i + 5] & 0x01) << 2;\n      v |= (vec[i + 6] & 0x01) << 1;\n      v |= (vec[i + 7] & 0x01) << 0;\n      tmp.push_back(v);\n    }\n    data.resize(sz / 32);\n    memcpy(&data[0], &tmp[0], tmp.size());\n    *count = sz;\n\n    return true;\n  }\n\n  // overloading for binary\n  bool load_from_string(const std::string &record,\n                        const std::string &second_sep,\n                        std::vector<uint64_t> &data, size_t *count) {\n    // fetch split value from text file\n    std::vector<uint8_t> vec;\n    ailego::StringHelper::Split(record, second_sep, &vec);\n    if (vec.size() == 0) {\n      std::cerr << \"Binary vector size is 0\" << std::endl;\n      return false;\n    }\n    if (vec.size() % 64 != 0) {\n      std::cerr << \"Binary vector size must be 32_X\" << std::endl;\n      return false;\n    }\n    // compact into uint64_t\n    size_t sz = vec.size();\n    std::vector<uint8_t> tmp;\n    for (size_t i = 0; i < sz; i += 8) {\n      uint8_t v = 0;\n      v |= (vec[i] & 0x01) << 7;\n      v |= (vec[i + 1] & 0x01) << 6;\n      v |= (vec[i + 2] & 0x01) << 5;\n      v |= (vec[i + 3] & 0x01) << 4;\n      v |= (vec[i + 4] & 0x01) << 3;\n      v |= (vec[i + 5] & 0x01) << 2;\n      v |= (vec[i + 6] & 0x01) << 1;\n      v |= (vec[i + 7] & 0x01) << 0;\n      tmp.push_back(v);\n    }\n    data.resize(sz / 64);\n    memcpy(&data[0], &tmp[0], tmp.size());\n    *count = sz;\n\n    return true;\n  }\n\n  bool load_from_string_sparse(uint64_t key, const std::string &record,\n                               const std::string &second_sep,\n                               std::vector<uint32_t> &sparse_indices,\n                               std::vector<T> &sparse_feature,\n                               uint32_t *sparse_count) {\n    const std::string sparse_sep = \":\";\n    std::vector<std::string> res;\n    ailego::StringHelper::Split(record, sparse_sep, &res);\n\n    if (res.size() == 2) {\n      ailego::StringHelper::Split(res[0], second_sep, &sparse_indices);\n      ailego::StringHelper::Split(res[1], second_sep, &sparse_feature);\n\n      uint32_t index_count = sparse_indices.size();\n      uint32_t feature_count = sparse_feature.size();\n\n      if (feature_count == index_count) {\n        *sparse_count = feature_count;\n      } else {\n        std::cerr << \"sparse feature count (\" << feature_count\n                  << \") is not equal with sparse index count(\" << index_count\n                  << \") key : \" << key << std::endl;\n        *sparse_count = 0;\n\n        return false;\n      }\n\n      // check order\n      for (size_t i = 1; i < sparse_indices.size(); ++i) {\n        if (sparse_indices[i - 1] >= sparse_indices[i]) {\n          std::cerr << \"sparse indices not ordered, key : \" << key\n                    << \", dim info: [\" << sparse_indices[i - 1] << \", \"\n                    << sparse_indices[i] << \"]\" << std::endl;\n\n          return false;\n        }\n      }\n    }\n\n    return true;\n  }\n\n  // LINE FORMAT is as follows:\n  //      key:key0 key1 key2 ... keyN:score0 score1 score2 ... scoreN\n  bool load_external_gt(\n      const std::string &input, const std::string &first_sep,\n      const std::string &second_sep,\n      std::vector<std::vector<std::pair<uint64_t, float>>> &ground_truth) {\n    std::fstream gf(input, std::ios::in);\n\n    if (!gf.is_open()) {\n      std::cerr << \"open file failed! [\" << input << \"]\" << std::endl;\n      return false;\n    }\n\n    uint32_t count = 0;\n    std::string buffer;\n    while (getline(gf, buffer)) {\n      buffer.erase(buffer.find_last_not_of('\\n') + 1);\n      if (buffer.empty()) {\n        continue;\n      }\n      std::vector<std::string> res;\n      ailego::StringHelper::Split(buffer, first_sep, &res);\n      if (res.size() < 2) {\n        std::cerr << \"skip record : \" << buffer << std::endl;\n        continue;\n      }\n\n      // uint64_t main_key = std::strtoll(res[0].c_str(), NULL, 10);\n      if (res.size() == 2) {\n        std::vector<uint64_t> keys;\n        size_t key_num = 0;\n        load_ids_from_string(res[1], second_sep, keys, &key_num);\n\n        std::vector<std::pair<uint64_t, float>> one_groud_truth;\n        for (size_t i = 0; i < keys.size(); ++i) {\n          one_groud_truth.push_back(std::make_pair(keys[i], 0.0f));\n        }\n\n        ground_truth.push_back(std::move(one_groud_truth));\n      } else {\n        std::vector<uint64_t> keys;\n        size_t key_num = 0;\n        load_ids_from_string(res[1], second_sep, keys, &key_num);\n\n        std::vector<float> scores;\n        size_t score_num = 0;\n        load_scores_from_string(res[2], second_sep, scores, &score_num);\n\n        if (key_num != score_num) {\n          std::cerr << \"key num (\" << key_num << \") is not equal to (\"\n                    << score_num << \"), line data:\" << buffer << std::endl;\n          continue;\n        }\n\n        std::vector<std::pair<uint64_t, float>> one_groud_truth;\n        for (size_t i = 0; i < keys.size(); ++i) {\n          one_groud_truth.push_back(std::make_pair(keys[i], scores[i]));\n        }\n\n        ground_truth.push_back(std::move(one_groud_truth));\n      }\n\n      count++;\n      if (count % 1000000 == 0) {\n        std::cout << \"processed \" << count << \" records!\" << std::endl;\n      }\n    }\n    gf.close();\n    if (ground_truth.size() == 0) {\n      std::cerr << \"Reading nothing from input\" << std::endl;\n      return false;\n    }\n\n    return true;\n  }\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "tools/core/vecs_common.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <cstdint>\nnamespace zvec {\nnamespace core {\n\nenum VecsBitMapIndex {\n  BITMAP_INDEX_KEY = 0,\n  BITMAP_INDEX_DENSE = 1,\n  BITMAP_INDEX_SPARSE = 2,\n  BITMAP_INDEX_TAGLIST = 4\n};\n\n#pragma pack(4)\nstruct VecsHeader {\n  uint64_t num_vecs;\n  uint16_t meta_size_v1;\n  uint16_t version;\n  uint32_t meta_size;\n  uint64_t bitmap;            // set for data section\n  uint64_t key_offset;        // offset for key\n  uint64_t key_size;          // size for key\n  uint64_t dense_offset;      // offset for dense\n  uint64_t dense_size;        // size for dense\n  uint64_t sparse_offset;     // offset for sparse\n  uint64_t sparse_size;       // size for sparse\n  uint64_t partition_offset;  // offset for partition\n  uint64_t partition_size;    // size for partition\n  uint64_t taglist_offset;    // offset for taglist\n  uint64_t taglist_size;      // size for taglist\n  uint8_t meta_buf[0];\n};\n#pragma pack()\n\n}  // namespace core\n}  // namespace zvec\n"
  },
  {
    "path": "tools/core/vecs_index_holder.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <string>\n#include <unordered_map>\n#include <zvec/ailego/container/params.h>\n#include \"zvec/core/framework/index_error.h\"\n#include \"zvec/core/framework/index_holder.h\"\n#include \"zvec/core/framework/index_provider.h\"\n#include \"zvec/core/framework/index_storage.h\"\n#include \"vecs_reader.h\"\n\nnamespace zvec {\nnamespace core {\n\n/*!\n * Vecs Index Holder\n *  framwork will use IndexHolder in this way:\n *  for (iter = create_iterator(); iter->is_valid(); iter->next()) {\n *      key = iter->key();\n *      data = iter->data();\n *  }\n */\nclass VecsIndexHolder : public IndexProvider {\n public:\n  typedef std::shared_ptr<VecsIndexHolder> Pointer;\n\n  bool load(const std::string &file_path) {\n    if (!vecs_reader_.load(file_path)) {\n      return false;\n    }\n    build_key_index_map();\n    return true;\n  }\n\n  const IndexMeta &index_meta(void) const {\n    return vecs_reader_.index_meta();\n  }\n\n  void set_metric(const std::string &name, const ailego::Params &params) {\n    vecs_reader_.set_metric(name, params);\n  }\n\n  /*!\n   * Index Holder Iterator\n   */\n  class Iterator : public IndexHybridHolder::Iterator {\n   public:\n    //! Constructor\n    Iterator(const VecsIndexHolder &holder, uint32_t cursor)\n        : cursor_(cursor),\n          vecs_reader_(holder.vecs_reader_),\n          stop_(holder.stop_) {}\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return !stop_ && cursor_ < vecs_reader_.num_vecs();\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return vecs_reader_.get_key(cursor_);\n    }\n\n    //! Retrieve pointer of data\n    virtual const void *data() const override {\n      return vecs_reader_.get_vector(cursor_);\n    }\n\n    //! Retrieve primary key\n    virtual uint32_t sparse_count() const override {\n      return vecs_reader_.get_sparse_count(cursor_);\n    }\n\n    //! Retrieve primary key\n    virtual const uint32_t *sparse_indices() const override {\n      return vecs_reader_.get_sparse_indices(cursor_);\n    }\n\n    //! Retrieve primary key\n    virtual const void *sparse_data() const override {\n      return vecs_reader_.get_sparse_data(cursor_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      ++cursor_;\n    }\n\n    //! Reset the iterator\n    virtual void reset(void) {\n      cursor_ = 0;\n    }\n\n   private:\n    size_t cursor_;\n    const VecsReader &vecs_reader_;\n    const bool &stop_;\n  };\n\n  virtual IndexHolder::Iterator::Pointer create_iterator(void) override {\n    // make sure iter has value whenn create_iterator finished\n    IndexHolder::Iterator::Pointer iter(\n        new VecsIndexHolder::Iterator(*this, start_cursor_));\n    return iter;\n  }\n\n  virtual IndexHybridHolder::Iterator::Pointer create_hybrid_iterator(void) {\n    // make sure iter has value whenn create_iterator finished\n    IndexHybridHolder::Iterator::Pointer iter(\n        new VecsIndexHolder::Iterator(*this, start_cursor_));\n    return iter;\n  }\n\n  //! Retrieve count of elements in holder\n  virtual size_t count(void) const override {\n    return max_doc_count_ != 0\n               ? std::min(max_doc_count_, vecs_reader_.num_vecs())\n               : vecs_reader_.num_vecs();\n  }\n\n  //! Retrieve dimension\n  virtual size_t dimension(void) const override {\n    return vecs_reader_.index_meta().dimension();\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const override {\n    return vecs_reader_.index_meta().data_type();\n  }\n\n  //! Retrieve element size in bytes\n  virtual size_t element_size(void) const override {\n    return vecs_reader_.index_meta().element_size();\n  }\n\n  //! Retrieve if it can multi-pass\n  virtual bool multipass(void) const override {\n    return true;\n  }\n\n  void stop(void) {\n    stop_ = true;\n  }\n\n  uint64_t get_num_vecs() const {\n    return vecs_reader_.num_vecs();\n  }\n\n  uint64_t get_key(size_t idx) const {\n    return vecs_reader_.get_key(idx);\n  }\n\n  uint32_t get_sparse_count(size_t idx) const {\n    return vecs_reader_.get_sparse_count(idx);\n  }\n\n  const uint32_t *get_sparse_indices(size_t idx) const {\n    return vecs_reader_.get_sparse_indices(idx);\n  }\n\n  const void *get_sparse_data(size_t idx) const {\n    return vecs_reader_.get_sparse_data(idx);\n  }\n\n  void set_start_cursor(uint32_t index) {\n    start_cursor_ = index;\n  }\n\n  void set_max_doc_count(size_t value) {\n    max_doc_count_ = value;\n  }\n\n  uint32_t start_cursor() const {\n    return start_cursor_;\n  }\n\n  size_t total_sparse_count(void) const {\n    return vecs_reader_.get_total_sparse_count();\n  }\n\n  bool has_taglist() const {\n    return vecs_reader_.has_taglist();\n  }\n\n  uint64_t get_taglist_count(size_t index) const {\n    return vecs_reader_.get_taglist_count(index);\n  }\n\n  const void *get_taglist(size_t index) const {\n    return vecs_reader_.get_taglist(index);\n  }\n\n  const void *get_taglist_data(size_t &size) const {\n    return vecs_reader_.get_taglist_data(size);\n  }\n\n  const void *get_key_base() const {\n    return vecs_reader_.key_base();\n  }\n\n  const void *get_vector_by_index(size_t idx) const {\n    return vecs_reader_.get_vector(idx);\n  }\n\n public:  // IndexProvider interface implementation\n  //! Retrieve a vector using a primary key\n  const void *get_vector(const uint64_t key) const override {\n    auto it = key_to_index_map_.find(key);\n    if (it == key_to_index_map_.end()) {\n      return nullptr;\n    }\n    return vecs_reader_.get_vector(it->second);\n  }\n\n  //! Retrieve a vector using a primary key\n  virtual int get_vector(const uint64_t key,\n                         IndexStorage::MemoryBlock &block) const override {\n    const void *vector = get_vector(key);\n    if (vector == nullptr) {\n      return IndexError_NoExist;\n    }\n    block.reset((void *)vector);\n    return 0;\n  }\n\n  //! Retrieve the owner class\n  virtual const std::string &owner_class(void) const override {\n    static std::string owner_class_name = \"VecsIndexHolder\";\n    return owner_class_name;\n  }\n\n private:\n  //! Build key to index mapping\n  void build_key_index_map() {\n    key_to_index_map_.clear();\n    size_t num_vecs = vecs_reader_.num_vecs();\n    for (size_t i = 0; i < num_vecs; ++i) {\n      uint64_t key = vecs_reader_.get_key(i);\n      key_to_index_map_[key] = i;\n    }\n  }\n\n  bool stop_{false};\n  uint32_t start_cursor_{0};\n  VecsReader vecs_reader_;\n  size_t max_doc_count_{0};\n  std::unordered_map<uint64_t, size_t> key_to_index_map_;\n};\n\n\n/*!\n * Vecs Index Sparse Holder\n *  framwork will use IndexHolder in this way:\n *  for (iter = create_iterator(); iter->is_valid(); iter->next()) {\n *      key = iter->key();\n *      data = iter->sparse_data();\n *  }\n */\nclass VecsIndexSparseHolder : public IndexSparseHolder {\n public:\n  typedef std::shared_ptr<VecsIndexSparseHolder> Pointer;\n\n  bool load(const std::string &file_path) {\n    return vecs_reader_.load(file_path);\n  }\n\n  const IndexMeta &index_meta(void) const {\n    return vecs_reader_.index_meta();\n  }\n\n  void set_metric(const std::string &name, const ailego::Params &params) {\n    vecs_reader_.set_metric(name, params);\n  }\n\n  /*!\n   * Index Holder Iterator\n   */\n  class Iterator : public IndexSparseHolder::Iterator {\n   public:\n    //! Constructor\n    Iterator(const VecsIndexSparseHolder &holder, uint32_t cursor)\n        : cursor_(cursor),\n          vecs_reader_(holder.vecs_reader_),\n          stop_(holder.stop_) {}\n\n    //! Test if the iterator is valid\n    virtual bool is_valid(void) const override {\n      return !stop_ && cursor_ < vecs_reader_.num_vecs();\n    }\n\n    //! Retrieve primary key\n    virtual uint64_t key(void) const override {\n      return vecs_reader_.get_key(cursor_);\n    }\n\n    //! Retrieve primary key\n    virtual uint32_t sparse_count() const override {\n      return vecs_reader_.get_sparse_count(cursor_);\n    }\n\n    //! Retrieve primary key\n    virtual const uint32_t *sparse_indices() const override {\n      return vecs_reader_.get_sparse_indices(cursor_);\n    }\n\n    //! Retrieve primary key\n    virtual const void *sparse_data() const override {\n      return vecs_reader_.get_sparse_data(cursor_);\n    }\n\n    //! Next iterator\n    virtual void next(void) override {\n      ++cursor_;\n    }\n\n    //! Reset the iterator\n    virtual void reset(void) {\n      cursor_ = 0;\n    }\n\n   private:\n    size_t cursor_;\n    const SparseVecsReader &vecs_reader_;\n    const bool &stop_;\n  };\n\n  virtual IndexSparseHolder::Iterator::Pointer create_iterator(void) override {\n    // make sure iter has value whenn create_iterator finished\n    IndexSparseHolder::Iterator::Pointer iter(\n        new VecsIndexSparseHolder::Iterator(*this, start_cursor_));\n    return iter;\n  }\n\n  //! Retrieve count of elements in holder\n  virtual size_t count(void) const override {\n    return max_doc_count_ != 0\n               ? std::min(max_doc_count_, vecs_reader_.num_vecs())\n               : vecs_reader_.num_vecs();\n  }\n\n  //! Retrieve type information\n  virtual IndexMeta::DataType data_type(void) const override {\n    return vecs_reader_.index_meta().data_type();\n  }\n\n  //! Retrieve if it can multi-pass\n  virtual bool multipass(void) const override {\n    return true;\n  }\n\n  void stop(void) {\n    stop_ = true;\n  }\n\n  uint64_t get_key(size_t idx) const {\n    return vecs_reader_.get_key(idx);\n  }\n\n  uint32_t get_sparse_count(size_t idx) const {\n    return vecs_reader_.get_sparse_count(idx);\n  }\n\n  const uint32_t *get_sparse_indices(size_t idx) const {\n    return vecs_reader_.get_sparse_indices(idx);\n  }\n\n  const void *get_sparse_data(size_t idx) const {\n    return vecs_reader_.get_sparse_data(idx);\n  }\n\n  void set_start_cursor(uint32_t index) {\n    start_cursor_ = index;\n  }\n\n  void set_max_doc_count(size_t value) {\n    max_doc_count_ = value;\n  }\n\n  uint64_t get_num_vecs() const {\n    return vecs_reader_.num_vecs();\n  }\n\n  uint32_t start_cursor() const {\n    return start_cursor_;\n  }\n\n  size_t total_sparse_count(void) const override {\n    return vecs_reader_.get_total_sparse_count();\n  }\n\n  bool has_taglist() const {\n    return vecs_reader_.has_taglist();\n  }\n\n  uint64_t get_taglist_count(size_t index) const {\n    return vecs_reader_.get_taglist_count(index);\n  }\n\n  const void *get_taglist(size_t index) const {\n    return vecs_reader_.get_taglist(index);\n  }\n\n  const void *get_taglist_data(size_t &size) const {\n    return vecs_reader_.get_taglist_data(size);\n  }\n\n  const void *get_key_base() const {\n    return vecs_reader_.key_base();\n  }\n\n private:\n  bool stop_{false};\n  uint32_t start_cursor_{0};\n  SparseVecsReader vecs_reader_;\n  size_t max_doc_count_{0};\n};\n\n}  // namespace core\n}  // namespace zvec"
  },
  {
    "path": "tools/core/vecs_reader.h",
    "content": "// Copyright 2025-present the zvec project\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <iostream>\n#include <zvec/ailego/io/mmap_file.h>\n#include \"zvec/core/framework/index_meta.h\"\n#include \"vecs_common.h\"\n\nnamespace zvec {\nnamespace core {\n\nclass VecsReader {\n public:\n  VecsReader()\n      : mmap_file_(),\n        index_meta_(),\n        num_vecs_(0),\n        vector_base_(nullptr),\n        key_base_(nullptr),\n        sparse_base_meta_{nullptr},\n        sparse_base_data_{nullptr},\n        partition_base_{nullptr},\n        taglist_base_meta_{nullptr},\n        taglist_base_data_{nullptr},\n        taglist_size_{0} {}\n\n  void set_metric(const std::string &name, const ailego::Params &params) {\n    index_meta_.set_metric(name, 0, params);\n  }\n\n  bool load(const std::string &fname) {\n    return load(fname.c_str());\n  }\n\n  bool load(const char *fname) {\n    if (!fname) {\n      std::cerr << \"Load fname is nullptr\" << std::endl;\n      return false;\n    }\n    if (!mmap_file_.open(fname, true)) {\n      std::cerr << \"Open file error: \" << fname << std::endl;\n      return false;\n    }\n\n    return load();\n  }\n\n  bool load() {\n    const VecsHeader *header =\n        reinterpret_cast<const VecsHeader *>(mmap_file_.region());\n    // check\n    num_vecs_ = header->num_vecs;\n\n    // deserialize\n    bool bret = index_meta_.deserialize(&header->meta_buf, header->meta_size);\n    if (!bret) {\n      std::cerr << \"deserialize index meta error.\" << std::endl;\n      return false;\n    }\n\n    const char *data_base_ptr =\n        reinterpret_cast<const char *>(header + 1) + header->meta_size;\n\n    vector_base_ = reinterpret_cast<const char *>(data_base_ptr);\n    key_base_ = reinterpret_cast<const uint64_t *>(\n        vector_base_ + num_vecs_ * index_meta_.element_size());\n\n    if (header->sparse_offset != -1LLU) {\n      sparse_base_meta_ = data_base_ptr + header->sparse_offset;\n      sparse_base_data_ = sparse_base_meta_ + num_vecs_ * sizeof(uint64_t);\n    }\n\n    if (header->partition_offset != -1LLU) {\n      partition_base_ = reinterpret_cast<const uint32_t *>(\n          data_base_ptr + header->partition_offset);\n    }\n\n    if (header->taglist_offset != -1LLU) {\n      taglist_base_meta_ = data_base_ptr + header->taglist_offset;\n      taglist_base_data_ = taglist_base_meta_ + num_vecs_;\n      taglist_size_ = header->taglist_size;\n    }\n\n    return true;\n  }\n\n  size_t num_vecs() const {\n    return num_vecs_;\n  }\n\n  const void *vector_base() const {\n    return vector_base_;\n  }\n\n  const uint64_t *key_base() const {\n    return key_base_;\n  }\n\n  const IndexMeta &index_meta() const {\n    return index_meta_;\n  }\n\n  uint64_t get_key(size_t index) const {\n    return key_base_[index];\n  }\n\n  const void *get_vector(size_t index) const {\n    return vector_base_ + index * index_meta_.element_size();\n  }\n\n  uint32_t get_sparse_count(size_t index) const {\n    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);\n    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);\n    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));\n\n    return sparse_count;\n\n    return 0;\n  }\n\n  const uint32_t *get_sparse_indices(size_t index) const {\n    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);\n    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);\n    uint32_t *sparse_indices =\n        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t));\n\n    return sparse_indices;\n\n    return nullptr;\n  }\n\n  const void *get_sparse_data(size_t index) const {\n    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);\n    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);\n    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));\n    void *sparse_data =\n        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t) +\n                     sparse_count * sizeof(uint32_t));\n\n    return sparse_data;\n  }\n\n  size_t get_total_sparse_count(void) const {\n    size_t total_sparse_count = 0;\n    for (size_t i = 0; i < num_vecs_; ++i) {\n      total_sparse_count += get_sparse_count(i);\n    }\n\n    return total_sparse_count;\n  }\n\n  bool has_taglist(void) const {\n    return taglist_base_meta_ != nullptr;\n  }\n\n  uint64_t get_taglist_count(size_t index) const {\n    if (!taglist_base_data_ || !taglist_base_meta_) {\n      return 0;\n    }\n\n    uint64_t taglist_count = *reinterpret_cast<const uint64_t *>(\n        taglist_base_data_ + taglist_base_meta_[index]);\n    return taglist_count;\n  }\n\n  const uint64_t *get_taglist(size_t index) const {\n    if (!taglist_base_data_ || !taglist_base_meta_) {\n      return nullptr;\n    }\n\n    return reinterpret_cast<const uint64_t *>(taglist_base_data_ +\n                                              taglist_base_meta_[index]) +\n           1;\n  }\n\n  const void *get_taglist_data(size_t &size) const {\n    size = taglist_size_;\n\n    return taglist_base_meta_;\n  }\n\n private:\n  ailego::MMapFile mmap_file_;\n  IndexMeta index_meta_;\n  size_t num_vecs_;\n  const char *vector_base_;\n  const uint64_t *key_base_;\n  const char *sparse_base_meta_;\n  const char *sparse_base_data_;\n  const uint32_t *partition_base_;\n  const char *taglist_base_meta_;\n  const char *taglist_base_data_;\n  uint64_t taglist_size_;\n};\n\nclass SparseVecsReader {\n public:\n  SparseVecsReader()\n      : mmap_file_(),\n        index_meta_(),\n        num_vecs_(0),\n        key_base_(nullptr),\n        sparse_base_meta_(nullptr),\n        sparse_base_data_{nullptr},\n        partition_base_{nullptr},\n        taglist_base_meta_{nullptr},\n        taglist_base_data_{nullptr},\n        taglist_size_{0} {}\n\n  void set_metric(const std::string &name, const ailego::Params &params) {\n    index_meta_.set_metric(name, 0, params);\n  }\n\n  bool load(const std::string &fname) {\n    return load(fname.c_str());\n  }\n\n\n  bool load(const char *fname) {\n    if (!fname) {\n      std::cerr << \"Load fname is nullptr\" << std::endl;\n      return false;\n    }\n    if (!mmap_file_.open(fname, true)) {\n      std::cerr << \"Open file error: \" << fname << std::endl;\n      return false;\n    }\n\n    return load();\n  }\n\n  bool load() {\n    const VecsHeader *header =\n        reinterpret_cast<const VecsHeader *>(mmap_file_.region());\n\n    // check\n    num_vecs_ = header->num_vecs;\n\n    // deserialize\n    bool bret = index_meta_.deserialize(&header->meta_buf, header->meta_size);\n    if (!bret) {\n      std::cerr << \"deserialize index meta error.\" << std::endl;\n      return false;\n    }\n\n    const char *data_base_ptr =\n        reinterpret_cast<const char *>(header + 1) + header->meta_size;\n\n    key_base_ = reinterpret_cast<const uint64_t *>(\n        reinterpret_cast<const char *>(header + 1) + header->meta_size);\n    sparse_base_meta_ = reinterpret_cast<const char *>(key_base_ + num_vecs_);\n    sparse_base_data_ = reinterpret_cast<const char *>(\n        sparse_base_meta_ + num_vecs_ * sizeof(uint64_t));\n\n    if (header->partition_offset != -1LLU) {\n      partition_base_ = reinterpret_cast<const uint32_t *>(\n          data_base_ptr + header->partition_offset);\n    }\n\n    if (header->taglist_offset != -1LLU) {\n      taglist_base_meta_ = data_base_ptr + header->taglist_offset;\n      taglist_base_data_ = taglist_base_meta_ + num_vecs_;\n      taglist_size_ = header->taglist_size;\n    }\n\n    return true;\n  }\n\n  size_t num_vecs() const {\n    return num_vecs_;\n  }\n\n  const void *sparse_meta_base() const {\n    return sparse_base_meta_;\n  }\n\n  const uint64_t *key_base() const {\n    return key_base_;\n  }\n\n  const IndexMeta &index_meta() const {\n    return index_meta_;\n  }\n\n  uint64_t get_key(size_t index) const {\n    return key_base_[index];\n  }\n\n  uint32_t get_sparse_count(size_t index) const {\n    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);\n    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);\n    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));\n\n    return sparse_count;\n\n    return 0;\n  }\n\n  const uint32_t *get_sparse_indices(size_t index) const {\n    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);\n    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);\n    uint32_t *sparse_indices =\n        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t));\n\n    return sparse_indices;\n\n    return nullptr;\n  }\n\n  const void *get_sparse_data(size_t index) const {\n    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);\n    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);\n    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));\n    void *sparse_data =\n        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t) +\n                     sparse_count * sizeof(uint32_t));\n\n    return sparse_data;\n  }\n\n  size_t get_total_sparse_count(void) const {\n    size_t total_sparse_count = 0;\n    for (size_t i = 0; i < num_vecs_; ++i) {\n      total_sparse_count += get_sparse_count(i);\n    }\n\n    return total_sparse_count;\n  }\n\n  bool has_taglist(void) const {\n    return taglist_base_meta_ != nullptr;\n  }\n\n  uint64_t get_taglist_count(size_t index) const {\n    uint64_t taglist_count = *reinterpret_cast<const uint64_t *>(\n        taglist_base_data_ + taglist_base_meta_[index]);\n    return taglist_count;\n  }\n\n  const uint64_t *get_taglist(size_t index) const {\n    return reinterpret_cast<const uint64_t *>(taglist_base_data_ +\n                                              taglist_base_meta_[index]) +\n           1;\n  }\n\n  const void *get_taglist_data(size_t &size) const {\n    size = taglist_size_;\n    return taglist_base_meta_;\n  }\n\n private:\n  ailego::MMapFile mmap_file_;\n  IndexMeta index_meta_;\n  size_t num_vecs_;\n  const uint64_t *key_base_;\n  const char *sparse_base_meta_;\n  const char *sparse_base_data_;\n  const uint32_t *partition_base_;\n  const char *taglist_base_meta_;\n  const char *taglist_base_data_;\n  uint64_t taglist_size_;\n};\n\n}  // namespace core\n}  // namespace zvec"
  }
]